From cd005ef9a999c6a8c00fb80b6de0d572308530ff Mon Sep 17 00:00:00 2001 From: Viktor Liu Date: Sun, 17 May 2026 08:13:52 +0200 Subject: [PATCH] Add CopyRect detection and emission for tile-aligned moves --- client/vnc/server/copyrect.go | 189 +++++++++++++++++++++++++++++ client/vnc/server/copyrect_test.go | 160 ++++++++++++++++++++++++ client/vnc/server/rfb.go | 43 +++++-- client/vnc/server/session.go | 90 +++++++++++--- 4 files changed, 458 insertions(+), 24 deletions(-) create mode 100644 client/vnc/server/copyrect.go create mode 100644 client/vnc/server/copyrect_test.go diff --git a/client/vnc/server/copyrect.go b/client/vnc/server/copyrect.go new file mode 100644 index 000000000..b0356a37e --- /dev/null +++ b/client/vnc/server/copyrect.go @@ -0,0 +1,189 @@ +package server + +import ( + "hash/maphash" + "image" +) + +// copyRectDetector finds tiles in the current frame that match the content +// of some tile-aligned region of the previous frame, so we can emit them as +// CopyRect rectangles (16 wire bytes) instead of re-encoding the pixels. +// +// The detector keeps two structures: +// - tileHash, a flat slice of one hash per tile-aligned position, used as +// the source of truth for the previous frame's tile content. +// - prevTiles, a hash → position lookup used during findTileMatch. +// +// updateDirty rehashes only the tiles that changed this frame, so the +// steady-state cost is proportional to the dirty set, not the framebuffer. +// A full rebuild from scratch is only done on the first frame or when the +// detector has not yet been initialized for the current resolution. +// +// Limitations: +// - Only tile-aligned source positions are considered. Sub-tile-aligned +// moves (e.g. window dragged by 7 pixels) are not detected. This still +// covers the common case of vertical/horizontal scrolling, which always +// produces tile-aligned matches at the tile granularity. +// - 64-bit maphash collisions are assumed not to happen. The probability +// for any single frame's hash universe is ~2^-32 * tileCount² which is +// vanishingly small at typical resolutions; if we ever observe one we +// can fall back to a full memcmp verification. +type copyRectDetector struct { + seed maphash.Seed + tileSize int + w, h int + cols, rows int + // tileHash[ty*cols + tx] is the current hash of the tile at (tx, ty) + // in the previous frame. Lookup uses this to detect stale prevTiles + // entries — incremental updates may leave hash→pos entries pointing + // at a tile whose content has since changed. + tileHash []uint64 + // prevTiles maps a tile hash to a (x, y) origin in the previous frame. + prevTiles map[uint64][2]int + // hash is reused across hash computations to keep the per-tile lookup + // path allocation-free. + hash maphash.Hash +} + +func newCopyRectDetector(tileSize int) *copyRectDetector { + d := ©RectDetector{ + seed: maphash.MakeSeed(), + tileSize: tileSize, + prevTiles: make(map[uint64][2]int), + } + d.hash.SetSeed(d.seed) + return d +} + +// resize ensures the per-tile tables match the given framebuffer size. +// Called from rebuild before each full hash sweep. +func (d *copyRectDetector) resize(w, h int) { + if d.w == w && d.h == h && d.tileHash != nil { + return + } + d.w, d.h = w, h + d.cols = w / d.tileSize + d.rows = h / d.tileSize + d.tileHash = make([]uint64, d.cols*d.rows) +} + +// hashTile computes the 64-bit maphash of one tile-aligned tile of frame. +func (d *copyRectDetector) hashTile(frame *image.RGBA, tx, ty int) uint64 { + d.hash.Reset() + ts := d.tileSize + stride := frame.Stride + rowBytes := ts * 4 + base := ty*stride + tx*4 + for row := 0; row < ts; row++ { + off := base + row*stride + _, _ = d.hash.Write(frame.Pix[off : off+rowBytes]) + } + return d.hash.Sum64() +} + +// rebuild discards everything and rehashes the whole frame. O(w*h). Use +// for the first frame or after the detector has been resized. Steady-state +// updates should go through updateDirty instead. +func (d *copyRectDetector) rebuild(frame *image.RGBA, w, h int) { + d.resize(w, h) + if d.prevTiles == nil { + d.prevTiles = make(map[uint64][2]int) + } else { + clear(d.prevTiles) + } + ts := d.tileSize + for ty := 0; ty+ts <= h; ty += ts { + for tx := 0; tx+ts <= w; tx += ts { + sum := d.hashTile(frame, tx, ty) + d.tileHash[(ty/ts)*d.cols+(tx/ts)] = sum + if _, exists := d.prevTiles[sum]; !exists { + d.prevTiles[sum] = [2]int{tx, ty} + } + } + } +} + +// updateDirty rehashes only the tiles named in dirty (each entry is +// [x, y, w, h] with w and h equal to tileSize). O(len(dirty)) work, which +// in the common case is a tiny fraction of the whole framebuffer. +// +// The prevTiles map is replaced on collision rather than first-wins so a +// newly-hashed tile claims the slot. Old, stale entries pointing at tiles +// that no longer carry that hash are filtered at lookup time via tileHash. +func (d *copyRectDetector) updateDirty(frame *image.RGBA, w, h int, dirty [][4]int) { + if d.w != w || d.h != h || d.tileHash == nil { + d.rebuild(frame, w, h) + return + } + ts := d.tileSize + for _, r := range dirty { + if r[2] != ts || r[3] != ts { + continue + } + tx, ty := r[0], r[1] + if tx+ts > w || ty+ts > h { + continue + } + sum := d.hashTile(frame, tx, ty) + d.tileHash[(ty/ts)*d.cols+(tx/ts)] = sum + // Latest-wins on collision: ensures the most recent owner of this + // hash is the one we'll return on lookup. The previous owner's + // entry, if any, gets shadowed; if its content has changed it's + // stale anyway and findTileMatch's verification will skip it. + d.prevTiles[sum] = [2]int{tx, ty} + } +} + +// findTileMatch hashes the current-frame tile at (dstX, dstY) and looks up +// its hash in the previous-frame map. Returns (srcX, srcY, true) when a +// matching tile-aligned tile exists at a different position whose stored +// hash still equals the requested hash (so the result is not stale). +func (d *copyRectDetector) findTileMatch(cur *image.RGBA, dstX, dstY int) (int, int, bool) { + if len(d.prevTiles) == 0 || d.tileHash == nil { + return 0, 0, false + } + ts := d.tileSize + if dstX+ts > cur.Rect.Dx() || dstY+ts > cur.Rect.Dy() { + return 0, 0, false + } + sum := d.hashTile(cur, dstX, dstY) + pos, ok := d.prevTiles[sum] + if !ok { + return 0, 0, false + } + if pos[0] == dstX && pos[1] == dstY { + return 0, 0, false + } + // Reject stale entries: the position the map points at must still + // carry the same hash according to our per-tile array. + if d.tileHash[(pos[1]/ts)*d.cols+(pos[0]/ts)] != sum { + return 0, 0, false + } + return pos[0], pos[1], true +} + +// extractCopyRectTiles examines the diff-produced (per-tile) dirty list and +// pulls out any tiles whose current-frame content matches a prev-frame tile +// at a different position. Returns the CopyRect candidates and the residual +// dirty tiles that still need pixel encoding. +type copyRectMove struct { + srcX, srcY int + dstX, dstY int +} + +func (d *copyRectDetector) extractCopyRectTiles(cur *image.RGBA, dirtyTiles [][4]int) (moves []copyRectMove, remaining [][4]int) { + ts := d.tileSize + remaining = dirtyTiles[:0:cap(dirtyTiles)] + for _, r := range dirtyTiles { + if r[2] == ts && r[3] == ts { + if sx, sy, ok := d.findTileMatch(cur, r[0], r[1]); ok { + moves = append(moves, copyRectMove{ + srcX: sx, srcY: sy, dstX: r[0], dstY: r[1], + }) + continue + } + } + remaining = append(remaining, r) + } + return moves, remaining +} diff --git a/client/vnc/server/copyrect_test.go b/client/vnc/server/copyrect_test.go new file mode 100644 index 000000000..8b5691b56 --- /dev/null +++ b/client/vnc/server/copyrect_test.go @@ -0,0 +1,160 @@ +package server + +import ( + "image" + "testing" +) + +// fillTile paints a tileSize×tileSize block of img at (x,y) with the colour +// derived from (r,g,b) so the test can construct distinct-content tiles. +func fillTile(img *image.RGBA, x, y, ts int, r, g, b byte) { + for row := 0; row < ts; row++ { + off := (y+row)*img.Stride + x*4 + for col := 0; col < ts; col++ { + img.Pix[off+col*4+0] = r + img.Pix[off+col*4+1] = g + img.Pix[off+col*4+2] = b + img.Pix[off+col*4+3] = 0xff + } + } +} + +// copyTile copies a tileSize×tileSize block from src(sx,sy) to dst(dx,dy). +func copyTile(dst, src *image.RGBA, sx, sy, dx, dy, ts int) { + for row := 0; row < ts; row++ { + srcOff := (sy+row)*src.Stride + sx*4 + dstOff := (dy+row)*dst.Stride + dx*4 + copy(dst.Pix[dstOff:dstOff+ts*4], src.Pix[srcOff:srcOff+ts*4]) + } +} + +func TestCopyRectDetector_DetectsVerticalScroll(t *testing.T) { + const w, h = 256, 192 // 4×3 tiles at 64px + const ts = 64 + + prev := image.NewRGBA(image.Rect(0, 0, w, h)) + cur := image.NewRGBA(image.Rect(0, 0, w, h)) + + // prev: 12 tiles each with a unique colour. + for ty := 0; ty < 3; ty++ { + for tx := 0; tx < 4; tx++ { + fillTile(prev, tx*ts, ty*ts, ts, byte(tx*40), byte(ty*60), 0x80) + } + } + // cur: simulate a single-tile-row scroll upward — every tile copied from + // the row below in prev, top row is new content. + for ty := 0; ty < 2; ty++ { + for tx := 0; tx < 4; tx++ { + copyTile(cur, prev, tx*ts, (ty+1)*ts, tx*ts, ty*ts, ts) + } + } + // Bottom row of cur: new colour, not a match. + for tx := 0; tx < 4; tx++ { + fillTile(cur, tx*ts, 2*ts, ts, 0xff, 0xff, 0xff) + } + + d := newCopyRectDetector(ts) + d.rebuild(prev, w, h) + + tiles := diffTiles(prev, cur, w, h, ts) + moves, remaining := d.extractCopyRectTiles(cur, tiles) + + // Expect 8 CopyRect moves (top two rows) and 4 residual tiles (bottom row). + if len(moves) != 8 { + t.Fatalf("moves: want 8, got %d", len(moves)) + } + if len(remaining) != 4 { + t.Fatalf("remaining: want 4, got %d", len(remaining)) + } + // Spot-check one move: cur (0, 0) should map to prev (0, 64). + var found bool + for _, m := range moves { + if m.dstX == 0 && m.dstY == 0 { + if m.srcX != 0 || m.srcY != ts { + t.Fatalf("move at (0,0): src=(%d,%d), want (0,%d)", m.srcX, m.srcY, ts) + } + found = true + } + } + if !found { + t.Fatalf("no move for dst (0,0)") + } +} + +func TestCopyRectDetector_RejectsSelfMatch(t *testing.T) { + const w, h = 128, 128 + const ts = 64 + + prev := image.NewRGBA(image.Rect(0, 0, w, h)) + cur := image.NewRGBA(image.Rect(0, 0, w, h)) + + // prev: 4 tiles, all unique + fillTile(prev, 0, 0, ts, 0x10, 0x20, 0x30) + fillTile(prev, ts, 0, ts, 0x40, 0x50, 0x60) + fillTile(prev, 0, ts, ts, 0x70, 0x80, 0x90) + fillTile(prev, ts, ts, ts, 0xa0, 0xb0, 0xc0) + + // cur: tile (0,0) unchanged, others changed but content same as prev's (0,0). + fillTile(cur, 0, 0, ts, 0x10, 0x20, 0x30) // self-match + fillTile(cur, ts, 0, ts, 0xff, 0xff, 0xff) + fillTile(cur, 0, ts, ts, 0xff, 0xff, 0xff) + fillTile(cur, ts, ts, ts, 0xff, 0xff, 0xff) + + d := newCopyRectDetector(ts) + d.rebuild(prev, w, h) + + // Tile (0,0) is not in the dirty list (it's unchanged) so it should not + // produce a move even though its hash matches prev (0,0). + tiles := diffTiles(prev, cur, w, h, ts) + moves, _ := d.extractCopyRectTiles(cur, tiles) + for _, m := range moves { + if m.dstX == 0 && m.dstY == 0 { + t.Fatalf("unexpected move at (0,0)") + } + } +} + +func TestCopyRectDetector_PassThroughWhenNoMatch(t *testing.T) { + const w, h = 64, 64 + const ts = 64 + + prev := image.NewRGBA(image.Rect(0, 0, w, h)) + cur := image.NewRGBA(image.Rect(0, 0, w, h)) + fillTile(prev, 0, 0, ts, 0x11, 0x22, 0x33) + fillTile(cur, 0, 0, ts, 0xaa, 0xbb, 0xcc) // wholly different + + d := newCopyRectDetector(ts) + d.rebuild(prev, w, h) + tiles := diffTiles(prev, cur, w, h, ts) + moves, remaining := d.extractCopyRectTiles(cur, tiles) + + if len(moves) != 0 { + t.Fatalf("expected 0 moves, got %d", len(moves)) + } + if len(remaining) != 1 { + t.Fatalf("expected 1 residual tile, got %d", len(remaining)) + } +} + +func TestEncodeCopyRectBody_Layout(t *testing.T) { + got := encodeCopyRectBody(100, 200, 300, 400, 64, 48) + if len(got) != 16 { + t.Fatalf("CopyRect body length: want 16, got %d", len(got)) + } + // Dest position + if got[0] != 0x01 || got[1] != 0x2c || got[2] != 0x01 || got[3] != 0x90 { + t.Fatalf("bad dest bytes: % x", got[0:4]) + } + // Width, height + if got[4] != 0 || got[5] != 64 || got[6] != 0 || got[7] != 48 { + t.Fatalf("bad size bytes: % x", got[4:8]) + } + // Encoding = 1 + if got[11] != 0x01 { + t.Fatalf("bad encoding byte: 0x%02x", got[11]) + } + // Source position + if got[12] != 0 || got[13] != 100 || got[14] != 0 || got[15] != 200 { + t.Fatalf("bad src bytes: % x", got[12:16]) + } +} diff --git a/client/vnc/server/rfb.go b/client/vnc/server/rfb.go index 4822c0abf..affc8c0a8 100644 --- a/client/vnc/server/rfb.go +++ b/client/vnc/server/rfb.go @@ -46,10 +46,11 @@ const ( serverCutText = 3 // Encoding types. - encRaw = 0 - encHextile = 5 - encZlib = 6 - encTight = 7 + encRaw = 0 + encCopyRect = 1 + encHextile = 5 + encZlib = 6 + encTight = 7 // Tight compression-control byte top nibble. Stream-reset bits 0-3 // (one per zlib stream) are unused while we run a single stream. @@ -140,6 +141,22 @@ func parsePixelFormat(pf []byte) clientPixelFormat { } } +// encodeCopyRectBody emits the per-rect payload for a CopyRect rectangle: +// the 12-byte rect header (dst position + size + encoding=1) plus a 4-byte +// source position. Used inside multi-rect FramebufferUpdate messages, so +// the 4-byte FU header is the caller's responsibility. +func encodeCopyRectBody(srcX, srcY, dstX, dstY, w, h int) []byte { + buf := make([]byte, 12+4) + binary.BigEndian.PutUint16(buf[0:2], uint16(dstX)) + binary.BigEndian.PutUint16(buf[2:4], uint16(dstY)) + binary.BigEndian.PutUint16(buf[4:6], uint16(w)) + binary.BigEndian.PutUint16(buf[6:8], uint16(h)) + binary.BigEndian.PutUint32(buf[8:12], uint32(encCopyRect)) + binary.BigEndian.PutUint16(buf[12:14], uint16(srcX)) + binary.BigEndian.PutUint16(buf[14:16], uint16(srcY)) + return buf +} + // encodeRawRect encodes a framebuffer region as a raw RFB rectangle. // The returned buffer includes the FramebufferUpdate header (1 rectangle). func encodeRawRect(img *image.RGBA, pf clientPixelFormat, x, y, w, h int) []byte { @@ -311,13 +328,14 @@ func encodeZlibRect(img *image.RGBA, pf clientPixelFormat, x, y, w, h int, z *zl return buf } -// diffRects compares two RGBA images and returns a list of dirty rectangles. -// Divides the screen into tiles and checks each for changes. -func diffRects(prev, cur *image.RGBA, w, h, tileSize int) [][4]int { +// diffTiles compares two RGBA images and returns a tile-ordered list of +// dirty tiles, one entry per tile. Tile order is top-to-bottom, left-to- +// right within each row. The caller decides whether to coalesce or hand +// the list off to the CopyRect detector first. +func diffTiles(prev, cur *image.RGBA, w, h, tileSize int) [][4]int { if prev == nil { return [][4]int{{0, 0, w, h}} } - var rects [][4]int for ty := 0; ty < h; ty += tileSize { th := min(tileSize, h-ty) @@ -328,7 +346,14 @@ func diffRects(prev, cur *image.RGBA, w, h, tileSize int) [][4]int { } } } - return coalesceRects(rects) + return rects +} + +// diffRects is the legacy convenience: diff then coalesce. Used by paths +// that don't go through the CopyRect detector and by tests that exercise +// the diff-plus-coalesce pipeline as one unit. +func diffRects(prev, cur *image.RGBA, w, h, tileSize int) [][4]int { + return coalesceRects(diffTiles(prev, cur, w, h, tileSize)) } // coalesceRects merges adjacent dirty tiles into larger rectangles to cut diff --git a/client/vnc/server/session.go b/client/vnc/server/session.go index 3b09bda01..9fb21eeb9 100644 --- a/client/vnc/server/session.go +++ b/client/vnc/server/session.go @@ -47,13 +47,15 @@ type session struct { // messageLoop writes these on SetPixelFormat/SetEncodings, which RFB // clients may send at any time after the handshake, while encoderLoop // reads them on every frame. - encMu sync.RWMutex - pf clientPixelFormat - useZlib bool - useHextile bool - useTight bool - zlib *zlibState - tight *tightState + encMu sync.RWMutex + pf clientPixelFormat + useZlib bool + useHextile bool + useTight bool + useCopyRect bool + zlib *zlibState + tight *tightState + copyRectDet *copyRectDetector // prevFrame, curFrame and idleFrames live on the encoder goroutine and // must not be touched elsewhere. curFrame holds a session-owned copy of // the capturer's latest frame so the encoder works on a stable buffer @@ -319,6 +321,12 @@ func (s *session) handleSetEncodings() error { for i := range int(numEnc) { enc := int32(binary.BigEndian.Uint32(buf[i*4 : i*4+4])) switch enc { + case encCopyRect: + s.useCopyRect = true + if s.copyRectDet == nil { + s.copyRectDet = newCopyRectDetector(tileSize) + } + encs = append(encs, "copyrect") case encZlib: s.useZlib = true if s.zlib == nil { @@ -410,8 +418,8 @@ func (s *session) processFBRequest(req fbRequest) error { } if req.incremental && s.prevFrame != nil { - rects := diffRects(s.prevFrame, img, s.serverW, s.serverH, tileSize) - if len(rects) == 0 { + tiles := diffTiles(s.prevFrame, img, s.serverW, s.serverH, tileSize) + if len(tiles) == 0 { // Nothing changed. Back off briefly before responding to reduce // CPU usage when the screen is static. The client re-requests // immediately after receiving our empty response, so without @@ -423,17 +431,33 @@ func (s *session) processFBRequest(req fbRequest) error { return s.sendEmptyUpdate() } s.idleFrames = 0 - if s.shouldPromoteToFullFrame(rects) { + + // Snapshot the dirty set before extractCopyRectTiles consumes it. + // extract mutates in place, so without the copy we lose the + // move-destination positions needed to incrementally update the + // CopyRect index after the swap. + dirty := make([][4]int, len(tiles)) + copy(dirty, tiles) + + var moves []copyRectMove + if s.useCopyRect && s.copyRectDet != nil { + moves, tiles = s.copyRectDet.extractCopyRectTiles(img, tiles) + } + + rects := coalesceRects(tiles) + if s.shouldPromoteToFullFrame(rects) && len(moves) == 0 { if err := s.sendFullUpdate(img); err != nil { return err } s.swapPrevCur() + s.refreshCopyRectIndex() return nil } - if err := s.sendDirtyRects(img, rects); err != nil { + if err := s.sendDirtyAndMoves(img, moves, rects); err != nil { return err } s.swapPrevCur() + s.updateCopyRectIndex(dirty) return nil } @@ -443,9 +467,30 @@ func (s *session) processFBRequest(req fbRequest) error { return err } s.swapPrevCur() + s.refreshCopyRectIndex() return nil } +// refreshCopyRectIndex does a full hash sweep of the just-swapped prevFrame. +// Used after full-frame sends, where we don't have a per-tile dirty list to +// drive an incremental update. +func (s *session) refreshCopyRectIndex() { + if s.copyRectDet == nil || s.prevFrame == nil { + return + } + s.copyRectDet.rebuild(s.prevFrame, s.serverW, s.serverH) +} + +// updateCopyRectIndex incrementally updates the CopyRect detector's hash +// tables for the tiles that just changed. On first use (or after resize) +// updateDirty internally falls back to a full rebuild. +func (s *session) updateCopyRectIndex(dirty [][4]int) { + if s.copyRectDet == nil || s.prevFrame == nil { + return + } + s.copyRectDet.updateDirty(s.prevFrame, s.serverW, s.serverH, dirty) +} + // captureFrame returns a session-owned frame for this encode cycle. // Capturers that implement captureIntoer (Linux X11, macOS) write directly // into curFrame, saving a per-frame full-screen memcpy. Capturers that @@ -529,12 +574,19 @@ func (s *session) sendFullUpdate(img *image.RGBA) error { return err } -func (s *session) sendDirtyRects(img *image.RGBA, rects [][4]int) error { - // Build a multi-rectangle FramebufferUpdate. - // Header: type(1) + padding(1) + numRects(2) +// sendDirtyAndMoves writes one FramebufferUpdate combining CopyRect moves +// (cheap, 16 bytes each) and pixel-encoded dirty rects. Moves come first so +// their source tiles are read from the client's pre-update framebuffer state, +// before any subsequent rect overwrites them. +func (s *session) sendDirtyAndMoves(img *image.RGBA, moves []copyRectMove, rects [][4]int) error { + if len(moves) == 0 && len(rects) == 0 { + return nil + } + + total := len(moves) + len(rects) header := make([]byte, 4) header[0] = serverFramebufferUpdate - binary.BigEndian.PutUint16(header[2:4], uint16(len(rects))) + binary.BigEndian.PutUint16(header[2:4], uint16(total)) s.writeMu.Lock() defer s.writeMu.Unlock() @@ -543,6 +595,14 @@ func (s *session) sendDirtyRects(img *image.RGBA, rects [][4]int) error { return err } + ts := tileSize + for _, m := range moves { + body := encodeCopyRectBody(m.srcX, m.srcY, m.dstX, m.dstY, ts, ts) + if _, err := s.conn.Write(body); err != nil { + return err + } + } + for _, r := range rects { x, y, w, h := r[0], r[1], r[2], r[3] rectBuf := s.encodeTile(img, x, y, w, h)