From a11341f57a2e9afdd16b8b3101de27508cd0c0b0 Mon Sep 17 00:00:00 2001 From: Viktor Liu Date: Sun, 17 May 2026 16:34:14 +0200 Subject: [PATCH] Add ExtendedClipboard pseudo-encoding for UTF-8 bidirectional clipboard --- client/vnc/server/extclipboard.go | 150 +++++++++++++++++++++++++ client/vnc/server/extclipboard_test.go | 96 ++++++++++++++++ client/vnc/server/session.go | 130 ++++++++++++++++++++- 3 files changed, 371 insertions(+), 5 deletions(-) create mode 100644 client/vnc/server/extclipboard.go create mode 100644 client/vnc/server/extclipboard_test.go diff --git a/client/vnc/server/extclipboard.go b/client/vnc/server/extclipboard.go new file mode 100644 index 000000000..495f0dc63 --- /dev/null +++ b/client/vnc/server/extclipboard.go @@ -0,0 +1,150 @@ +//go:build !js && !ios && !android + +package server + +import ( + "bytes" + "compress/zlib" + "encoding/binary" + "fmt" + "io" +) + +// ExtendedClipboard is an RFB community extension (pseudo-encoding +// 0xC0A1E5CE) that replaces legacy CutText with a Caps/Notify/Request/ +// Provide/Peek handshake. Wins versus legacy CutText: +// - UTF-8 text format (legacy is Latin-1). +// - Pull-based: a Notify announces "I have new content", the peer fetches +// via Request only when it actually needs the data. Saves bandwidth on +// a high-latency relay path versus pushing every change. +// - zlib-compressed payloads. +// - Caps negotiation so each side knows the other's per-format max size. +// +// The extension reuses message opcodes 3 (ServerCutText) and 6 (ClientCutText) +// and signals "extended" by encoding the length field as a negative int32; +// the absolute value is the payload size in bytes. The first 4 bytes of +// payload are a flags word: top byte is the action, low 16 bits are the +// format mask. +const pseudoEncExtendedClipboard = -1063131698 // 0xC0A1E5CE as int32 + +const ( + extClipActionCaps uint32 = 0x01000000 + extClipActionRequest uint32 = 0x02000000 + extClipActionPeek uint32 = 0x04000000 + extClipActionNotify uint32 = 0x08000000 + extClipActionProvide uint32 = 0x10000000 + extClipActionMask uint32 = 0x1F000000 + + extClipFormatText uint32 = 0x00000001 + extClipFormatRTF uint32 = 0x00000002 + extClipFormatHTML uint32 = 0x00000004 + extClipFormatDIB uint32 = 0x00000008 + extClipFormatFiles uint32 = 0x00000010 + extClipFormatMask uint32 = 0x0000FFFF + + // extClipMaxText caps our accepted text payload. Mirrors the legacy + // maxCutTextBytes (1 MiB); advertised in Caps and enforced on Provide. + extClipMaxText = maxCutTextBytes + + // extClipMaxPayload bounds the raw on-wire payload we will read for an + // extended CutText message. Includes flags header, length prefixes, NUL, + // and zlib framing overhead on top of the text body. + extClipMaxPayload = extClipMaxText + 1024 +) + +// buildExtClipCaps emits the Caps payload advertising the formats we accept +// and our maximum size per format. One uint32 size follows the flags word +// for each format bit set, in ascending bit order. +func buildExtClipCaps() []byte { + flags := extClipActionCaps | extClipFormatText + payload := make([]byte, 4+4) + binary.BigEndian.PutUint32(payload[0:4], flags) + binary.BigEndian.PutUint32(payload[4:8], uint32(extClipMaxText)) + return payload +} + +// buildExtClipNotify emits a Notify announcing that we have new clipboard +// content available in the given format mask. No data is shipped; the peer +// pulls via Request when it actually needs to paste. +func buildExtClipNotify(formats uint32) []byte { + payload := make([]byte, 4) + binary.BigEndian.PutUint32(payload, extClipActionNotify|formats) + return payload +} + +// buildExtClipRequest emits a Request asking the peer to send Provide for +// the given format mask. Sent in response to an inbound Notify. +func buildExtClipRequest(formats uint32) []byte { + payload := make([]byte, 4) + binary.BigEndian.PutUint32(payload, extClipActionRequest|formats) + return payload +} + +// buildExtClipProvideText emits a Provide carrying UTF-8 text. The inner +// stream (4-byte length including the trailing NUL, then UTF-8 bytes, then +// NUL) is zlib-compressed; each Provide uses an independent zlib context +// per the extension spec. +func buildExtClipProvideText(text string) ([]byte, error) { + body := make([]byte, 0, 4+len(text)+1) + var lenBuf [4]byte + binary.BigEndian.PutUint32(lenBuf[:], uint32(len(text)+1)) + body = append(body, lenBuf[:]...) + body = append(body, text...) + body = append(body, 0) + + var compressed bytes.Buffer + zw := zlib.NewWriter(&compressed) + if _, err := zw.Write(body); err != nil { + return nil, fmt.Errorf("zlib write: %w", err) + } + if err := zw.Close(); err != nil { + return nil, fmt.Errorf("zlib close: %w", err) + } + + payload := make([]byte, 4+compressed.Len()) + binary.BigEndian.PutUint32(payload[0:4], extClipActionProvide|extClipFormatText) + copy(payload[4:], compressed.Bytes()) + return payload, nil +} + +// parseExtClipProvideText decompresses a Provide payload (the bytes after +// the 4-byte flags header) and returns the UTF-8 text record if the text +// format bit is set. Records for other formats are skipped. The trailing +// NUL byte the spec appends to text records is stripped. +func parseExtClipProvideText(flags uint32, payload []byte) (string, error) { + zr, err := zlib.NewReader(bytes.NewReader(payload)) + if err != nil { + return "", fmt.Errorf("zlib reader: %w", err) + } + defer zr.Close() + + limited := io.LimitReader(zr, int64(extClipMaxText)+16) + var text string + for bit := uint32(1); bit <= extClipFormatFiles; bit <<= 1 { + if flags&bit == 0 { + continue + } + var sizeBuf [4]byte + if _, err := io.ReadFull(limited, sizeBuf[:]); err != nil { + if bit == extClipFormatText && err == io.EOF { + return "", nil + } + return "", fmt.Errorf("read record size: %w", err) + } + size := binary.BigEndian.Uint32(sizeBuf[:]) + if size > uint32(extClipMaxText) { + return "", fmt.Errorf("record too large: %d", size) + } + rec := make([]byte, size) + if _, err := io.ReadFull(limited, rec); err != nil { + return "", fmt.Errorf("read record: %w", err) + } + if bit == extClipFormatText { + if len(rec) > 0 && rec[len(rec)-1] == 0 { + rec = rec[:len(rec)-1] + } + text = string(rec) + } + } + return text, nil +} diff --git a/client/vnc/server/extclipboard_test.go b/client/vnc/server/extclipboard_test.go new file mode 100644 index 000000000..fd9601a79 --- /dev/null +++ b/client/vnc/server/extclipboard_test.go @@ -0,0 +1,96 @@ +//go:build !js && !ios && !android + +package server + +import ( + "encoding/binary" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestBuildExtClipCaps(t *testing.T) { + payload := buildExtClipCaps() + require.Len(t, payload, 8, "Caps with one format should be 4 bytes flags + 4 bytes size") + + flags := binary.BigEndian.Uint32(payload[0:4]) + assert.Equal(t, extClipActionCaps, flags&extClipActionMask, "action should be Caps") + assert.Equal(t, extClipFormatText, flags&extClipFormatMask, "should advertise text format") + + maxSize := binary.BigEndian.Uint32(payload[4:8]) + assert.Equal(t, uint32(extClipMaxText), maxSize, "should advertise extClipMaxText") +} + +func TestBuildExtClipNotify(t *testing.T) { + payload := buildExtClipNotify(extClipFormatText) + require.Len(t, payload, 4) + flags := binary.BigEndian.Uint32(payload) + assert.Equal(t, extClipActionNotify, flags&extClipActionMask) + assert.Equal(t, extClipFormatText, flags&extClipFormatMask) +} + +func TestBuildExtClipRequest(t *testing.T) { + payload := buildExtClipRequest(extClipFormatText) + require.Len(t, payload, 4) + flags := binary.BigEndian.Uint32(payload) + assert.Equal(t, extClipActionRequest, flags&extClipActionMask) + assert.Equal(t, extClipFormatText, flags&extClipFormatMask) +} + +func TestExtClipProvideRoundTripASCII(t *testing.T) { + const original = "hello world" + payload, err := buildExtClipProvideText(original) + require.NoError(t, err) + + flags := binary.BigEndian.Uint32(payload[0:4]) + require.Equal(t, extClipActionProvide, flags&extClipActionMask) + require.Equal(t, extClipFormatText, flags&extClipFormatMask) + + text, err := parseExtClipProvideText(flags, payload[4:]) + require.NoError(t, err) + assert.Equal(t, original, text) +} + +func TestExtClipProvideRoundTripUTF8(t *testing.T) { + original := "héllo 🦀 世界" + payload, err := buildExtClipProvideText(original) + require.NoError(t, err) + + flags := binary.BigEndian.Uint32(payload[0:4]) + text, err := parseExtClipProvideText(flags, payload[4:]) + require.NoError(t, err) + assert.Equal(t, original, text, "UTF-8 should round-trip without mangling") +} + +func TestExtClipProvideRoundTripEmpty(t *testing.T) { + payload, err := buildExtClipProvideText("") + require.NoError(t, err) + + flags := binary.BigEndian.Uint32(payload[0:4]) + text, err := parseExtClipProvideText(flags, payload[4:]) + require.NoError(t, err) + assert.Empty(t, text) +} + +func TestExtClipProvideRoundTripLarge(t *testing.T) { + original := strings.Repeat("abcd", 200000) // 800 KiB, below cap + payload, err := buildExtClipProvideText(original) + require.NoError(t, err) + assert.Less(t, len(payload), len(original)/2, + "highly repetitive text should compress significantly") + + flags := binary.BigEndian.Uint32(payload[0:4]) + text, err := parseExtClipProvideText(flags, payload[4:]) + require.NoError(t, err) + assert.Equal(t, original, text) +} + +func TestParseExtClipProvideTextRejectsOversized(t *testing.T) { + var fakePayload [4]byte + // 4 bytes of zlib-compressed garbage won't decode; we want to ensure we + // don't panic, not that we accept it. + _, err := parseExtClipProvideText(extClipActionProvide|extClipFormatText, fakePayload[:]) + assert.Error(t, err) +} diff --git a/client/vnc/server/session.go b/client/vnc/server/session.go index e8d9a5904..aa656c28c 100644 --- a/client/vnc/server/session.go +++ b/client/vnc/server/session.go @@ -60,6 +60,8 @@ type session struct { clientSupportsDesktopName bool clientSupportsLastRect bool clientSupportsQEMUKey bool + clientSupportsExtClipboard bool + extClipCapsSent bool // prevFrame, curFrame and idleFrames live on the encoder goroutine and // must not be touched elsewhere. curFrame holds a session-owned copy of // the capturer's latest frame so the encoder works on a stable buffer @@ -132,12 +134,21 @@ func (s *session) clipboardPoll(done <-chan struct{}) { if len(text) > maxCutTextBytes { text = text[:maxCutTextBytes] } - if text != "" && text != lastClip { - lastClip = text - if err := s.sendServerCutText(text); err != nil { - s.log.Debugf("send clipboard to client: %v", err) + if text == "" || text == lastClip { + continue + } + lastClip = text + s.encMu.RLock() + ext := s.clientSupportsExtClipboard + s.encMu.RUnlock() + if ext { + if err := s.writeExtClipMessage(buildExtClipNotify(extClipFormatText)); err != nil { + s.log.Debugf("send ext clipboard notify: %v", err) return } + } else if err := s.sendServerCutText(text); err != nil { + s.log.Debugf("send clipboard to client: %v", err) + return } } } @@ -319,6 +330,9 @@ func (s *session) handleSetEncodings() error { case pseudoEncQEMUExtendedKeyEvent: s.clientSupportsQEMUKey = true encs = append(encs, "qemu-key") + case pseudoEncExtendedClipboard: + s.clientSupportsExtClipboard = true + encs = append(encs, "ext-clipboard") case encTight: s.useTight = true if s.tight == nil { @@ -327,10 +341,19 @@ func (s *session) handleSetEncodings() error { encs = append(encs, "tight") } } + sendExtClipCaps := s.clientSupportsExtClipboard && !s.extClipCapsSent + if sendExtClipCaps { + s.extClipCapsSent = true + } s.encMu.Unlock() if len(encs) > 0 { s.log.Debugf("client supports encodings: %s", strings.Join(encs, ", ")) } + if sendExtClipCaps { + if err := s.writeExtClipMessage(buildExtClipCaps()); err != nil { + return fmt.Errorf("send ext clipboard caps: %w", err) + } + } return nil } @@ -795,7 +818,16 @@ func (s *session) handleCutText() error { if _, err := io.ReadFull(s.conn, header[:]); err != nil { return fmt.Errorf("read CutText header: %w", err) } - length := binary.BigEndian.Uint32(header[3:7]) + rawLen := int32(binary.BigEndian.Uint32(header[3:7])) + if rawLen < 0 { + // Negative length signals ExtendedClipboard; absolute value is the + // payload size. Guard against MinInt32 overflow before negating. + if rawLen == -2147483648 { + return fmt.Errorf("ext clipboard payload too large") + } + return s.handleExtCutText(uint32(-rawLen)) + } + length := uint32(rawLen) if length > maxCutTextBytes { return fmt.Errorf("cut text too large: %d bytes", length) } @@ -807,6 +839,94 @@ func (s *session) handleCutText() error { return nil } +// handleExtCutText parses an ExtendedClipboard message (any of Caps, +// Notify, Request, Peek, Provide) carried as a negative-length CutText. +// Unknown actions and formats we don't handle (RTF/HTML/DIB/Files) are +// dropped without aborting the session. +func (s *session) handleExtCutText(payloadLen uint32) error { + if payloadLen < 4 { + return fmt.Errorf("ext clipboard payload too short: %d", payloadLen) + } + if payloadLen > extClipMaxPayload { + return fmt.Errorf("ext clipboard payload too large: %d", payloadLen) + } + buf := make([]byte, payloadLen) + if _, err := io.ReadFull(s.conn, buf); err != nil { + return fmt.Errorf("read ext clipboard payload: %w", err) + } + flags := binary.BigEndian.Uint32(buf[0:4]) + action := flags & extClipActionMask + formats := flags & extClipFormatMask + rest := buf[4:] + + switch action { + case extClipActionCaps: + // Client max sizes are informational for us today: we only emit + // text and already cap it at extClipMaxText. + return nil + case extClipActionRequest: + if formats&extClipFormatText != 0 { + return s.sendExtClipProvideText() + } + return nil + case extClipActionPeek: + return s.writeExtClipMessage(buildExtClipNotify(extClipFormatText)) + case extClipActionNotify: + if formats&extClipFormatText != 0 { + return s.writeExtClipMessage(buildExtClipRequest(extClipFormatText)) + } + return nil + case extClipActionProvide: + if len(rest) == 0 { + return nil + } + text, err := parseExtClipProvideText(flags, rest) + if err != nil { + s.log.Debugf("parse ext clipboard provide: %v", err) + return nil + } + if text != "" { + s.injector.SetClipboard(text) + } + return nil + default: + s.log.Debugf("unknown ext clipboard action 0x%x", action) + return nil + } +} + +// sendExtClipProvideText answers an inbound Request(text) with the current +// host clipboard contents, capped to extClipMaxText. +func (s *session) sendExtClipProvideText() error { + text := s.injector.GetClipboard() + if len(text) > extClipMaxText { + text = text[:extClipMaxText] + } + payload, err := buildExtClipProvideText(text) + if err != nil { + return fmt.Errorf("build provide: %w", err) + } + return s.writeExtClipMessage(payload) +} + +// writeExtClipMessage frames an ExtendedClipboard payload as a ServerCutText +// message with a negative length, then writes it under writeMu. +func (s *session) writeExtClipMessage(payload []byte) error { + if len(payload) == 0 { + return nil + } + buf := make([]byte, 8+len(payload)) + buf[0] = serverCutText + // buf[1:4] = padding (zero) + binary.BigEndian.PutUint32(buf[4:8], uint32(-int32(len(payload)))) + copy(buf[8:], payload) + + s.writeMu.Lock() + _, err := s.conn.Write(buf) + s.writeMu.Unlock() + return err +} + // handleTypeText handles the NetBird-specific PasteAndType message used by // the dashboard's Paste button. Wire format mirrors CutText: 3-byte // padding + 4-byte length + text bytes.