Add ExtendedClipboard pseudo-encoding for UTF-8 bidirectional clipboard

This commit is contained in:
Viktor Liu
2026-05-17 16:34:14 +02:00
parent b135d462d6
commit a11341f57a
3 changed files with 371 additions and 5 deletions

View File

@@ -0,0 +1,150 @@
//go:build !js && !ios && !android
package server
import (
"bytes"
"compress/zlib"
"encoding/binary"
"fmt"
"io"
)
// ExtendedClipboard is an RFB community extension (pseudo-encoding
// 0xC0A1E5CE) that replaces legacy CutText with a Caps/Notify/Request/
// Provide/Peek handshake. Wins versus legacy CutText:
// - UTF-8 text format (legacy is Latin-1).
// - Pull-based: a Notify announces "I have new content", the peer fetches
// via Request only when it actually needs the data. Saves bandwidth on
// a high-latency relay path versus pushing every change.
// - zlib-compressed payloads.
// - Caps negotiation so each side knows the other's per-format max size.
//
// The extension reuses message opcodes 3 (ServerCutText) and 6 (ClientCutText)
// and signals "extended" by encoding the length field as a negative int32;
// the absolute value is the payload size in bytes. The first 4 bytes of
// payload are a flags word: top byte is the action, low 16 bits are the
// format mask.
const pseudoEncExtendedClipboard = -1063131698 // 0xC0A1E5CE as int32
const (
extClipActionCaps uint32 = 0x01000000
extClipActionRequest uint32 = 0x02000000
extClipActionPeek uint32 = 0x04000000
extClipActionNotify uint32 = 0x08000000
extClipActionProvide uint32 = 0x10000000
extClipActionMask uint32 = 0x1F000000
extClipFormatText uint32 = 0x00000001
extClipFormatRTF uint32 = 0x00000002
extClipFormatHTML uint32 = 0x00000004
extClipFormatDIB uint32 = 0x00000008
extClipFormatFiles uint32 = 0x00000010
extClipFormatMask uint32 = 0x0000FFFF
// extClipMaxText caps our accepted text payload. Mirrors the legacy
// maxCutTextBytes (1 MiB); advertised in Caps and enforced on Provide.
extClipMaxText = maxCutTextBytes
// extClipMaxPayload bounds the raw on-wire payload we will read for an
// extended CutText message. Includes flags header, length prefixes, NUL,
// and zlib framing overhead on top of the text body.
extClipMaxPayload = extClipMaxText + 1024
)
// buildExtClipCaps emits the Caps payload advertising the formats we accept
// and our maximum size per format. One uint32 size follows the flags word
// for each format bit set, in ascending bit order.
func buildExtClipCaps() []byte {
flags := extClipActionCaps | extClipFormatText
payload := make([]byte, 4+4)
binary.BigEndian.PutUint32(payload[0:4], flags)
binary.BigEndian.PutUint32(payload[4:8], uint32(extClipMaxText))
return payload
}
// buildExtClipNotify emits a Notify announcing that we have new clipboard
// content available in the given format mask. No data is shipped; the peer
// pulls via Request when it actually needs to paste.
func buildExtClipNotify(formats uint32) []byte {
payload := make([]byte, 4)
binary.BigEndian.PutUint32(payload, extClipActionNotify|formats)
return payload
}
// buildExtClipRequest emits a Request asking the peer to send Provide for
// the given format mask. Sent in response to an inbound Notify.
func buildExtClipRequest(formats uint32) []byte {
payload := make([]byte, 4)
binary.BigEndian.PutUint32(payload, extClipActionRequest|formats)
return payload
}
// buildExtClipProvideText emits a Provide carrying UTF-8 text. The inner
// stream (4-byte length including the trailing NUL, then UTF-8 bytes, then
// NUL) is zlib-compressed; each Provide uses an independent zlib context
// per the extension spec.
func buildExtClipProvideText(text string) ([]byte, error) {
body := make([]byte, 0, 4+len(text)+1)
var lenBuf [4]byte
binary.BigEndian.PutUint32(lenBuf[:], uint32(len(text)+1))
body = append(body, lenBuf[:]...)
body = append(body, text...)
body = append(body, 0)
var compressed bytes.Buffer
zw := zlib.NewWriter(&compressed)
if _, err := zw.Write(body); err != nil {
return nil, fmt.Errorf("zlib write: %w", err)
}
if err := zw.Close(); err != nil {
return nil, fmt.Errorf("zlib close: %w", err)
}
payload := make([]byte, 4+compressed.Len())
binary.BigEndian.PutUint32(payload[0:4], extClipActionProvide|extClipFormatText)
copy(payload[4:], compressed.Bytes())
return payload, nil
}
// parseExtClipProvideText decompresses a Provide payload (the bytes after
// the 4-byte flags header) and returns the UTF-8 text record if the text
// format bit is set. Records for other formats are skipped. The trailing
// NUL byte the spec appends to text records is stripped.
func parseExtClipProvideText(flags uint32, payload []byte) (string, error) {
zr, err := zlib.NewReader(bytes.NewReader(payload))
if err != nil {
return "", fmt.Errorf("zlib reader: %w", err)
}
defer zr.Close()
limited := io.LimitReader(zr, int64(extClipMaxText)+16)
var text string
for bit := uint32(1); bit <= extClipFormatFiles; bit <<= 1 {
if flags&bit == 0 {
continue
}
var sizeBuf [4]byte
if _, err := io.ReadFull(limited, sizeBuf[:]); err != nil {
if bit == extClipFormatText && err == io.EOF {
return "", nil
}
return "", fmt.Errorf("read record size: %w", err)
}
size := binary.BigEndian.Uint32(sizeBuf[:])
if size > uint32(extClipMaxText) {
return "", fmt.Errorf("record too large: %d", size)
}
rec := make([]byte, size)
if _, err := io.ReadFull(limited, rec); err != nil {
return "", fmt.Errorf("read record: %w", err)
}
if bit == extClipFormatText {
if len(rec) > 0 && rec[len(rec)-1] == 0 {
rec = rec[:len(rec)-1]
}
text = string(rec)
}
}
return text, nil
}

View File

@@ -0,0 +1,96 @@
//go:build !js && !ios && !android
package server
import (
"encoding/binary"
"strings"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestBuildExtClipCaps(t *testing.T) {
payload := buildExtClipCaps()
require.Len(t, payload, 8, "Caps with one format should be 4 bytes flags + 4 bytes size")
flags := binary.BigEndian.Uint32(payload[0:4])
assert.Equal(t, extClipActionCaps, flags&extClipActionMask, "action should be Caps")
assert.Equal(t, extClipFormatText, flags&extClipFormatMask, "should advertise text format")
maxSize := binary.BigEndian.Uint32(payload[4:8])
assert.Equal(t, uint32(extClipMaxText), maxSize, "should advertise extClipMaxText")
}
func TestBuildExtClipNotify(t *testing.T) {
payload := buildExtClipNotify(extClipFormatText)
require.Len(t, payload, 4)
flags := binary.BigEndian.Uint32(payload)
assert.Equal(t, extClipActionNotify, flags&extClipActionMask)
assert.Equal(t, extClipFormatText, flags&extClipFormatMask)
}
func TestBuildExtClipRequest(t *testing.T) {
payload := buildExtClipRequest(extClipFormatText)
require.Len(t, payload, 4)
flags := binary.BigEndian.Uint32(payload)
assert.Equal(t, extClipActionRequest, flags&extClipActionMask)
assert.Equal(t, extClipFormatText, flags&extClipFormatMask)
}
func TestExtClipProvideRoundTripASCII(t *testing.T) {
const original = "hello world"
payload, err := buildExtClipProvideText(original)
require.NoError(t, err)
flags := binary.BigEndian.Uint32(payload[0:4])
require.Equal(t, extClipActionProvide, flags&extClipActionMask)
require.Equal(t, extClipFormatText, flags&extClipFormatMask)
text, err := parseExtClipProvideText(flags, payload[4:])
require.NoError(t, err)
assert.Equal(t, original, text)
}
func TestExtClipProvideRoundTripUTF8(t *testing.T) {
original := "héllo 🦀 世界"
payload, err := buildExtClipProvideText(original)
require.NoError(t, err)
flags := binary.BigEndian.Uint32(payload[0:4])
text, err := parseExtClipProvideText(flags, payload[4:])
require.NoError(t, err)
assert.Equal(t, original, text, "UTF-8 should round-trip without mangling")
}
func TestExtClipProvideRoundTripEmpty(t *testing.T) {
payload, err := buildExtClipProvideText("")
require.NoError(t, err)
flags := binary.BigEndian.Uint32(payload[0:4])
text, err := parseExtClipProvideText(flags, payload[4:])
require.NoError(t, err)
assert.Empty(t, text)
}
func TestExtClipProvideRoundTripLarge(t *testing.T) {
original := strings.Repeat("abcd", 200000) // 800 KiB, below cap
payload, err := buildExtClipProvideText(original)
require.NoError(t, err)
assert.Less(t, len(payload), len(original)/2,
"highly repetitive text should compress significantly")
flags := binary.BigEndian.Uint32(payload[0:4])
text, err := parseExtClipProvideText(flags, payload[4:])
require.NoError(t, err)
assert.Equal(t, original, text)
}
func TestParseExtClipProvideTextRejectsOversized(t *testing.T) {
var fakePayload [4]byte
// 4 bytes of zlib-compressed garbage won't decode; we want to ensure we
// don't panic, not that we accept it.
_, err := parseExtClipProvideText(extClipActionProvide|extClipFormatText, fakePayload[:])
assert.Error(t, err)
}

View File

@@ -60,6 +60,8 @@ type session struct {
clientSupportsDesktopName bool
clientSupportsLastRect bool
clientSupportsQEMUKey bool
clientSupportsExtClipboard bool
extClipCapsSent bool
// prevFrame, curFrame and idleFrames live on the encoder goroutine and
// must not be touched elsewhere. curFrame holds a session-owned copy of
// the capturer's latest frame so the encoder works on a stable buffer
@@ -132,12 +134,21 @@ func (s *session) clipboardPoll(done <-chan struct{}) {
if len(text) > maxCutTextBytes {
text = text[:maxCutTextBytes]
}
if text != "" && text != lastClip {
lastClip = text
if err := s.sendServerCutText(text); err != nil {
s.log.Debugf("send clipboard to client: %v", err)
if text == "" || text == lastClip {
continue
}
lastClip = text
s.encMu.RLock()
ext := s.clientSupportsExtClipboard
s.encMu.RUnlock()
if ext {
if err := s.writeExtClipMessage(buildExtClipNotify(extClipFormatText)); err != nil {
s.log.Debugf("send ext clipboard notify: %v", err)
return
}
} else if err := s.sendServerCutText(text); err != nil {
s.log.Debugf("send clipboard to client: %v", err)
return
}
}
}
@@ -319,6 +330,9 @@ func (s *session) handleSetEncodings() error {
case pseudoEncQEMUExtendedKeyEvent:
s.clientSupportsQEMUKey = true
encs = append(encs, "qemu-key")
case pseudoEncExtendedClipboard:
s.clientSupportsExtClipboard = true
encs = append(encs, "ext-clipboard")
case encTight:
s.useTight = true
if s.tight == nil {
@@ -327,10 +341,19 @@ func (s *session) handleSetEncodings() error {
encs = append(encs, "tight")
}
}
sendExtClipCaps := s.clientSupportsExtClipboard && !s.extClipCapsSent
if sendExtClipCaps {
s.extClipCapsSent = true
}
s.encMu.Unlock()
if len(encs) > 0 {
s.log.Debugf("client supports encodings: %s", strings.Join(encs, ", "))
}
if sendExtClipCaps {
if err := s.writeExtClipMessage(buildExtClipCaps()); err != nil {
return fmt.Errorf("send ext clipboard caps: %w", err)
}
}
return nil
}
@@ -795,7 +818,16 @@ func (s *session) handleCutText() error {
if _, err := io.ReadFull(s.conn, header[:]); err != nil {
return fmt.Errorf("read CutText header: %w", err)
}
length := binary.BigEndian.Uint32(header[3:7])
rawLen := int32(binary.BigEndian.Uint32(header[3:7]))
if rawLen < 0 {
// Negative length signals ExtendedClipboard; absolute value is the
// payload size. Guard against MinInt32 overflow before negating.
if rawLen == -2147483648 {
return fmt.Errorf("ext clipboard payload too large")
}
return s.handleExtCutText(uint32(-rawLen))
}
length := uint32(rawLen)
if length > maxCutTextBytes {
return fmt.Errorf("cut text too large: %d bytes", length)
}
@@ -807,6 +839,94 @@ func (s *session) handleCutText() error {
return nil
}
// handleExtCutText parses an ExtendedClipboard message (any of Caps,
// Notify, Request, Peek, Provide) carried as a negative-length CutText.
// Unknown actions and formats we don't handle (RTF/HTML/DIB/Files) are
// dropped without aborting the session.
func (s *session) handleExtCutText(payloadLen uint32) error {
if payloadLen < 4 {
return fmt.Errorf("ext clipboard payload too short: %d", payloadLen)
}
if payloadLen > extClipMaxPayload {
return fmt.Errorf("ext clipboard payload too large: %d", payloadLen)
}
buf := make([]byte, payloadLen)
if _, err := io.ReadFull(s.conn, buf); err != nil {
return fmt.Errorf("read ext clipboard payload: %w", err)
}
flags := binary.BigEndian.Uint32(buf[0:4])
action := flags & extClipActionMask
formats := flags & extClipFormatMask
rest := buf[4:]
switch action {
case extClipActionCaps:
// Client max sizes are informational for us today: we only emit
// text and already cap it at extClipMaxText.
return nil
case extClipActionRequest:
if formats&extClipFormatText != 0 {
return s.sendExtClipProvideText()
}
return nil
case extClipActionPeek:
return s.writeExtClipMessage(buildExtClipNotify(extClipFormatText))
case extClipActionNotify:
if formats&extClipFormatText != 0 {
return s.writeExtClipMessage(buildExtClipRequest(extClipFormatText))
}
return nil
case extClipActionProvide:
if len(rest) == 0 {
return nil
}
text, err := parseExtClipProvideText(flags, rest)
if err != nil {
s.log.Debugf("parse ext clipboard provide: %v", err)
return nil
}
if text != "" {
s.injector.SetClipboard(text)
}
return nil
default:
s.log.Debugf("unknown ext clipboard action 0x%x", action)
return nil
}
}
// sendExtClipProvideText answers an inbound Request(text) with the current
// host clipboard contents, capped to extClipMaxText.
func (s *session) sendExtClipProvideText() error {
text := s.injector.GetClipboard()
if len(text) > extClipMaxText {
text = text[:extClipMaxText]
}
payload, err := buildExtClipProvideText(text)
if err != nil {
return fmt.Errorf("build provide: %w", err)
}
return s.writeExtClipMessage(payload)
}
// writeExtClipMessage frames an ExtendedClipboard payload as a ServerCutText
// message with a negative length, then writes it under writeMu.
func (s *session) writeExtClipMessage(payload []byte) error {
if len(payload) == 0 {
return nil
}
buf := make([]byte, 8+len(payload))
buf[0] = serverCutText
// buf[1:4] = padding (zero)
binary.BigEndian.PutUint32(buf[4:8], uint32(-int32(len(payload))))
copy(buf[8:], payload)
s.writeMu.Lock()
_, err := s.conn.Write(buf)
s.writeMu.Unlock()
return err
}
// handleTypeText handles the NetBird-specific PasteAndType message used by
// the dashboard's Paste button. Wire format mirrors CutText: 3-byte
// padding + 4-byte length + text bytes.