Add embedded VNC server with JWT auth and per-peer toggle

2026-05-19 15:19:55 +00:00 · 2026-05-16 09:19:34 +02:00
parent e916f12cca
commit 9f0aa1ce26
83 changed files with 12693 additions and 1245 deletions
--- a/client/cmd/up.go
+++ b/client/cmd/up.go
@@ -361,6 +361,9 @@ func setupSetConfigReq(customDNSAddressConverted []byte, cmd *cobra.Command, pro
 	if cmd.Flag(serverSSHAllowedFlag).Changed {
 		req.ServerSSHAllowed = &serverSSHAllowed
 	}
+	if cmd.Flag(serverVNCAllowedFlag).Changed {
+		req.ServerVNCAllowed = &serverVNCAllowed
+	}
 	if cmd.Flag(enableSSHRootFlag).Changed {
 		req.EnableSSHRoot = &enableSSHRoot
 	}
@@ -467,6 +470,9 @@ func setupConfig(customDNSAddressConverted []byte, cmd *cobra.Command, configFil
 	if cmd.Flag(serverSSHAllowedFlag).Changed {
 		ic.ServerSSHAllowed = &serverSSHAllowed
 	}
+	if cmd.Flag(serverVNCAllowedFlag).Changed {
+		ic.ServerVNCAllowed = &serverVNCAllowed
+	}

 	if cmd.Flag(enableSSHRootFlag).Changed {
 		ic.EnableSSHRoot = &enableSSHRoot
@@ -595,6 +601,9 @@ func setupLoginRequest(providedSetupKey string, customDNSAddressConverted []byte
 	if cmd.Flag(serverSSHAllowedFlag).Changed {
 		loginRequest.ServerSSHAllowed = &serverSSHAllowed
 	}
+	if cmd.Flag(serverVNCAllowedFlag).Changed {
+		loginRequest.ServerVNCAllowed = &serverVNCAllowed
+	}

 	if cmd.Flag(enableSSHRootFlag).Changed {
 		loginRequest.EnableSSHRoot = &enableSSHRoot
--- a/client/cmd/vnc_agent.go
+++ b/client/cmd/vnc_agent.go
@@ -0,0 +1,67 @@
+//go:build windows
+
+package cmd
+
+import (
+	"fmt"
+	"net/netip"
+	"os"
+
+	log "github.com/sirupsen/logrus"
+	"github.com/spf13/cobra"
+
+	vncserver "github.com/netbirdio/netbird/client/vnc/server"
+)
+
+var vncAgentPort string
+
+func init() {
+	vncAgentCmd.Flags().StringVar(&vncAgentPort, "port", "15900", "Port for the VNC agent to listen on")
+	rootCmd.AddCommand(vncAgentCmd)
+}
+
+// vncAgentCmd runs a VNC server in the current user session, listening on
+// localhost. It is spawned by the NetBird service (Session 0) via
+// CreateProcessAsUser into the interactive console session.
+var vncAgentCmd = &cobra.Command{
+	Use:    "vnc-agent",
+	Short:  "Run VNC capture agent (internal, spawned by service)",
+	Hidden: true,
+	RunE: func(cmd *cobra.Command, args []string) error {
+		// Agent's stderr is piped to the service which relogs it.
+		// Use JSON format with caller info for structured parsing.
+		log.SetReportCaller(true)
+		log.SetFormatter(&log.JSONFormatter{})
+		log.SetOutput(os.Stderr)
+
+		sessionID := vncserver.GetCurrentSessionID()
+		log.Infof("VNC agent starting on 127.0.0.1:%s (session %d)", vncAgentPort, sessionID)
+
+		token := os.Getenv("NB_VNC_AGENT_TOKEN")
+		if token == "" {
+			return fmt.Errorf("NB_VNC_AGENT_TOKEN not set; agent requires a token from the service")
+		}
+
+		capturer := vncserver.NewDesktopCapturer()
+		injector := vncserver.NewWindowsInputInjector()
+		srv := vncserver.New(capturer, injector, "")
+		srv.SetDisableAuth(true)
+		srv.SetAgentToken(token)
+
+		port, err := netip.ParseAddrPort("127.0.0.1:" + vncAgentPort)
+		if err != nil {
+			return fmt.Errorf("parse listen addr: %w", err)
+		}
+
+		loopback := netip.PrefixFrom(netip.AddrFrom4([4]byte{127, 0, 0, 0}), 8)
+		if err := srv.Start(cmd.Context(), port, loopback); err != nil {
+			return fmt.Errorf("start vnc server: %w", err)
+		}
+		log.Infof("vnc-agent listening on 127.0.0.1:%s, ready", vncAgentPort)
+
+		<-cmd.Context().Done()
+		log.Info("vnc-agent context cancelled, shutting down")
+		return srv.Stop()
+	},
+	SilenceUsage: true,
+}
--- a/client/cmd/vnc_flags.go
+++ b/client/cmd/vnc_flags.go
@@ -0,0 +1,9 @@
+package cmd
+
+const serverVNCAllowedFlag = "allow-server-vnc"
+
+var serverVNCAllowed bool
+
+func init() {
+	upCmd.PersistentFlags().BoolVar(&serverVNCAllowed, serverVNCAllowedFlag, false, "Allow embedded VNC server on peer")
+}
--- a/client/internal/auth/auth.go
+++ b/client/internal/auth/auth.go
@@ -315,6 +315,7 @@ func (a *Auth) setSystemInfoFlags(info *system.Info) {
 		a.config.RosenpassEnabled,
 		a.config.RosenpassPermissive,
 		a.config.ServerSSHAllowed,
+		a.config.ServerVNCAllowed,
 		a.config.DisableClientRoutes,
 		a.config.DisableServerRoutes,
 		a.config.DisableDNS,
--- a/client/internal/connect.go
+++ b/client/internal/connect.go
@@ -562,6 +562,7 @@ func createEngineConfig(key wgtypes.Key, config *profilemanager.Config, peerConf
 		RosenpassEnabled:              config.RosenpassEnabled,
 		RosenpassPermissive:           config.RosenpassPermissive,
 		ServerSSHAllowed:              util.ReturnBoolWithDefaultTrue(config.ServerSSHAllowed),
+		ServerVNCAllowed:              config.ServerVNCAllowed != nil && *config.ServerVNCAllowed,
 		EnableSSHRoot:                 config.EnableSSHRoot,
 		EnableSSHSFTP:                 config.EnableSSHSFTP,
 		EnableSSHLocalPortForwarding:  config.EnableSSHLocalPortForwarding,
@@ -644,6 +645,7 @@ func loginToManagement(ctx context.Context, client mgm.Client, pubSSHKey []byte,
 		config.RosenpassEnabled,
 		config.RosenpassPermissive,
 		config.ServerSSHAllowed,
+		config.ServerVNCAllowed,
 		config.DisableClientRoutes,
 		config.DisableServerRoutes,
 		config.DisableDNS,
--- a/client/internal/debug/debug.go
+++ b/client/internal/debug/debug.go
@@ -636,6 +636,9 @@ func (g *BundleGenerator) addCommonConfigFields(configContent *strings.Builder)
 	if g.internalConfig.SSHJWTCacheTTL != nil {
 		configContent.WriteString(fmt.Sprintf("SSHJWTCacheTTL: %d\n", *g.internalConfig.SSHJWTCacheTTL))
 	}
+	if g.internalConfig.ServerVNCAllowed != nil {
+		configContent.WriteString(fmt.Sprintf("ServerVNCAllowed: %v\n", *g.internalConfig.ServerVNCAllowed))
+	}

 	configContent.WriteString(fmt.Sprintf("DisableClientRoutes: %v\n", g.internalConfig.DisableClientRoutes))
 	configContent.WriteString(fmt.Sprintf("DisableServerRoutes: %v\n", g.internalConfig.DisableServerRoutes))
--- a/client/internal/debug/debug_test.go
+++ b/client/internal/debug/debug_test.go
@@ -862,6 +862,7 @@ func TestAddConfig_AllFieldsCovered(t *testing.T) {
 		RosenpassEnabled:              true,
 		RosenpassPermissive:           true,
 		ServerSSHAllowed:              &bTrue,
+		ServerVNCAllowed:              &bTrue,
 		EnableSSHRoot:                 &bTrue,
 		EnableSSHSFTP:                 &bTrue,
 		EnableSSHLocalPortForwarding:  &bTrue,
--- a/client/internal/engine.go
+++ b/client/internal/engine.go
@@ -123,6 +123,7 @@ type EngineConfig struct {
 	RosenpassPermissive bool

 	ServerSSHAllowed              bool
+	ServerVNCAllowed              bool
 	EnableSSHRoot                 *bool
 	EnableSSHSFTP                 *bool
 	EnableSSHLocalPortForwarding  *bool
@@ -205,6 +206,7 @@ type Engine struct {
 	networkMonitor *networkmonitor.NetworkMonitor

 	sshServer sshServer
+	vncSrv    vncServer

 	statusRecorder *peer.Status

@@ -320,6 +322,10 @@ func (e *Engine) Stop() error {
 		log.Warnf("failed to stop SSH server: %v", err)
 	}

+	if err := e.stopVNCServer(); err != nil {
+		log.Warnf("failed to stop VNC server: %v", err)
+	}
+
 	e.cleanupSSHConfig()

 	if e.ingressGatewayMgr != nil {
@@ -1010,6 +1016,7 @@ func (e *Engine) updateChecksIfNew(checks []*mgmProto.Checks) error {
 		e.config.RosenpassEnabled,
 		e.config.RosenpassPermissive,
 		&e.config.ServerSSHAllowed,
+		&e.config.ServerVNCAllowed,
 		e.config.DisableClientRoutes,
 		e.config.DisableServerRoutes,
 		e.config.DisableDNS,
@@ -1057,6 +1064,10 @@ func (e *Engine) updateConfig(conf *mgmProto.PeerConfig) error {
 		}
 	}

+	if err := e.updateVNC(conf.GetSshConfig()); err != nil {
+		log.Warnf("failed handling VNC server setup: %v", err)
+	}
+
 	state := e.statusRecorder.GetLocalPeerState()
 	state.IP = e.wgInterface.Address().String()
 	state.IPv6 = e.wgInterface.Address().IPv6String()
@@ -1182,6 +1193,7 @@ func (e *Engine) receiveManagementEvents() {
 			e.config.RosenpassEnabled,
 			e.config.RosenpassPermissive,
 			&e.config.ServerSSHAllowed,
+			&e.config.ServerVNCAllowed,
 			e.config.DisableClientRoutes,
 			e.config.DisableServerRoutes,
 			e.config.DisableDNS,
@@ -1371,6 +1383,11 @@ func (e *Engine) updateNetworkMap(networkMap *mgmProto.NetworkMap) error {
 		e.updateSSHServerAuth(networkMap.GetSshAuth())
 	}

+	// VNC auth: always sync, including nil so cleared auth on the management
+	// side is applied locally, and so it isn't skipped on the RemotePeersIsEmpty
+	// cleanup path.
+	e.updateVNCServerAuth(networkMap.GetVncAuth())
+
 	// must set the exclude list after the peers are added. Without it the manager can not figure out the peers parameters from the store
 	excludedLazyPeers := e.toExcludedLazyPeers(forwardingRules, remotePeers)
 	e.connMgr.SetExcludeList(e.ctx, excludedLazyPeers)
@@ -1826,6 +1843,7 @@ func (e *Engine) readInitialSettings() ([]*route.Route, *nbdns.Config, bool, err
 		e.config.RosenpassEnabled,
 		e.config.RosenpassPermissive,
 		&e.config.ServerSSHAllowed,
+		&e.config.ServerVNCAllowed,
 		e.config.DisableClientRoutes,
 		e.config.DisableServerRoutes,
 		e.config.DisableDNS,
--- a/client/internal/engine_vnc.go
+++ b/client/internal/engine_vnc.go
@@ -0,0 +1,238 @@
+package internal
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"net/netip"
+
+	log "github.com/sirupsen/logrus"
+
+	firewallManager "github.com/netbirdio/netbird/client/firewall/manager"
+	nftypes "github.com/netbirdio/netbird/client/internal/netflow/types"
+	sshauth "github.com/netbirdio/netbird/client/ssh/auth"
+	vncserver "github.com/netbirdio/netbird/client/vnc/server"
+	mgmProto "github.com/netbirdio/netbird/shared/management/proto"
+	sshuserhash "github.com/netbirdio/netbird/shared/sshauth"
+)
+
+const (
+	vncExternalPort uint16 = 5900
+	vncInternalPort uint16 = 25900
+)
+
+type vncServer interface {
+	Start(ctx context.Context, addr netip.AddrPort, network netip.Prefix) error
+	Stop() error
+}
+
+func (e *Engine) setupVNCPortRedirection() error {
+	if e.firewall == nil || e.wgInterface == nil {
+		return nil
+	}
+
+	localAddr := e.wgInterface.Address().IP
+	if !localAddr.IsValid() {
+		return errors.New("invalid local NetBird address")
+	}
+
+	if err := e.firewall.AddInboundDNAT(localAddr, firewallManager.ProtocolTCP, vncExternalPort, vncInternalPort); err != nil {
+		return fmt.Errorf("add VNC port redirection: %w", err)
+	}
+	log.Infof("VNC port redirection: %s:%d -> %s:%d", localAddr, vncExternalPort, localAddr, vncInternalPort)
+
+	return nil
+}
+
+func (e *Engine) cleanupVNCPortRedirection() error {
+	if e.firewall == nil || e.wgInterface == nil {
+		return nil
+	}
+
+	localAddr := e.wgInterface.Address().IP
+	if !localAddr.IsValid() {
+		return errors.New("invalid local NetBird address")
+	}
+
+	if err := e.firewall.RemoveInboundDNAT(localAddr, firewallManager.ProtocolTCP, vncExternalPort, vncInternalPort); err != nil {
+		return fmt.Errorf("remove VNC port redirection: %w", err)
+	}
+
+	return nil
+}
+
+// updateVNC handles starting/stopping the VNC server based on the config flag.
+// sshConf provides the JWT identity provider config (shared with SSH).
+func (e *Engine) updateVNC(sshConf *mgmProto.SSHConfig) error {
+	if !e.config.ServerVNCAllowed {
+		if e.vncSrv != nil {
+			log.Info("VNC server disabled, stopping")
+		}
+		return e.stopVNCServer()
+	}
+
+	if e.config.BlockInbound {
+		log.Info("VNC server disabled because inbound connections are blocked")
+		return e.stopVNCServer()
+	}
+
+	if e.vncSrv != nil {
+		// Update JWT config on existing server in case management sent new config.
+		e.updateVNCServerJWT(sshConf)
+		return nil
+	}
+
+	return e.startVNCServer(sshConf)
+}
+
+func (e *Engine) startVNCServer(sshConf *mgmProto.SSHConfig) error {
+	if e.wgInterface == nil {
+		return errors.New("wg interface not initialized")
+	}
+
+	capturer, injector, ok := newPlatformVNC()
+	if !ok {
+		log.Debug("VNC server not supported on this platform")
+		return nil
+	}
+
+	netbirdIP := e.wgInterface.Address().IP
+
+	srv := vncserver.New(capturer, injector, "")
+	if vncNeedsServiceMode() {
+		log.Info("VNC: running in Session 0, enabling service mode (agent proxy)")
+		srv.SetServiceMode(true)
+	}
+
+	if protoJWT := sshConf.GetJwtConfig(); protoJWT != nil {
+		audiences := protoJWT.GetAudiences()
+		if len(audiences) == 0 && protoJWT.GetAudience() != "" {
+			audiences = []string{protoJWT.GetAudience()}
+		}
+		srv.SetJWTConfig(&vncserver.JWTConfig{
+			Issuer:       protoJWT.GetIssuer(),
+			Audiences:    audiences,
+			KeysLocation: protoJWT.GetKeysLocation(),
+			MaxTokenAge:  protoJWT.GetMaxTokenAge(),
+		})
+		log.Debugf("VNC: JWT authentication configured (issuer=%s)", protoJWT.GetIssuer())
+	}
+
+	if netstackNet := e.wgInterface.GetNet(); netstackNet != nil {
+		srv.SetNetstackNet(netstackNet)
+	}
+
+	listenAddr := netip.AddrPortFrom(netbirdIP, vncInternalPort)
+	network := e.wgInterface.Address().Network
+	if err := srv.Start(e.ctx, listenAddr, network); err != nil {
+		return fmt.Errorf("start VNC server: %w", err)
+	}
+
+	e.vncSrv = srv
+
+	if registrar, ok := e.firewall.(interface {
+		RegisterNetstackService(protocol nftypes.Protocol, port uint16)
+	}); ok {
+		registrar.RegisterNetstackService(nftypes.TCP, vncInternalPort)
+		log.Debugf("registered VNC service for TCP:%d", vncInternalPort)
+	}
+
+	if err := e.setupVNCPortRedirection(); err != nil {
+		log.Warnf("setup VNC port redirection: %v", err)
+	}
+
+	log.Info("VNC server enabled")
+	return nil
+}
+
+// updateVNCServerJWT configures the JWT validation for the VNC server using
+// the same JWT config as SSH (same identity provider).
+func (e *Engine) updateVNCServerJWT(sshConf *mgmProto.SSHConfig) {
+	if e.vncSrv == nil {
+		return
+	}
+
+	vncSrv, ok := e.vncSrv.(*vncserver.Server)
+	if !ok {
+		return
+	}
+
+	protoJWT := sshConf.GetJwtConfig()
+	if protoJWT == nil {
+		return
+	}
+
+	audiences := protoJWT.GetAudiences()
+	if len(audiences) == 0 && protoJWT.GetAudience() != "" {
+		audiences = []string{protoJWT.GetAudience()}
+	}
+
+	vncSrv.SetJWTConfig(&vncserver.JWTConfig{
+		Issuer:       protoJWT.GetIssuer(),
+		Audiences:    audiences,
+		KeysLocation: protoJWT.GetKeysLocation(),
+		MaxTokenAge:  protoJWT.GetMaxTokenAge(),
+	})
+}
+
+// updateVNCServerAuth updates VNC fine-grained access control from management.
+func (e *Engine) updateVNCServerAuth(vncAuth *mgmProto.VNCAuth) {
+	if vncAuth == nil || e.vncSrv == nil {
+		return
+	}
+
+	vncSrv, ok := e.vncSrv.(*vncserver.Server)
+	if !ok {
+		return
+	}
+
+	protoUsers := vncAuth.GetAuthorizedUsers()
+	authorizedUsers := make([]sshuserhash.UserIDHash, len(protoUsers))
+	for i, hash := range protoUsers {
+		if len(hash) != 16 {
+			log.Warnf("invalid VNC auth hash length %d, expected 16", len(hash))
+			return
+		}
+		authorizedUsers[i] = sshuserhash.UserIDHash(hash)
+	}
+
+	machineUsers := make(map[string][]uint32)
+	for osUser, indexes := range vncAuth.GetMachineUsers() {
+		machineUsers[osUser] = indexes.GetIndexes()
+	}
+
+	vncSrv.UpdateVNCAuth(&sshauth.Config{
+		UserIDClaim:     vncAuth.GetUserIDClaim(),
+		AuthorizedUsers: authorizedUsers,
+		MachineUsers:    machineUsers,
+	})
+}
+
+// GetVNCServerStatus returns whether the VNC server is running.
+func (e *Engine) GetVNCServerStatus() bool {
+	return e.vncSrv != nil
+}
+
+func (e *Engine) stopVNCServer() error {
+	if e.vncSrv == nil {
+		return nil
+	}
+
+	if err := e.cleanupVNCPortRedirection(); err != nil {
+		log.Warnf("cleanup VNC port redirection: %v", err)
+	}
+
+	if registrar, ok := e.firewall.(interface {
+		UnregisterNetstackService(protocol nftypes.Protocol, port uint16)
+	}); ok {
+		registrar.UnregisterNetstackService(nftypes.TCP, vncInternalPort)
+	}
+
+	log.Info("stopping VNC server")
+	err := e.vncSrv.Stop()
+	e.vncSrv = nil
+	if err != nil {
+		return fmt.Errorf("stop VNC server: %w", err)
+	}
+	return nil
+}
--- a/client/internal/engine_vnc_console_freebsd.go
+++ b/client/internal/engine_vnc_console_freebsd.go
@@ -0,0 +1,31 @@
+//go:build freebsd
+
+package internal
+
+import (
+	"fmt"
+
+	log "github.com/sirupsen/logrus"
+
+	vncserver "github.com/netbirdio/netbird/client/vnc/server"
+)
+
+// newConsoleVNC builds the FreeBSD console fallback: vt(4) framebuffer
+// for capture, /dev/uinput for input. The uinput device requires the
+// `uinput` kernel module (`kldload uinput`); without it, input init
+// fails and we drop to a stub injector so the user still gets a
+// view-only screen mirror.
+func newConsoleVNC() (vncserver.ScreenCapturer, vncserver.InputInjector, error) {
+	poller := vncserver.NewFBPoller("")
+	w, h := poller.Width(), poller.Height()
+	if w == 0 || h == 0 {
+		poller.Close()
+		return nil, nil, fmt.Errorf("vt framebuffer init failed (vt may not allow mmap on this driver)")
+	}
+	if inj, err := vncserver.NewUInputInjector(w, h); err == nil {
+		return poller, inj, nil
+	} else {
+		log.Infof("VNC console: uinput unavailable (%v); view-only mode. Run `kldload uinput` to enable input.", err)
+		return poller, &vncserver.StubInputInjector{}, nil
+	}
+}
--- a/client/internal/engine_vnc_console_linux.go
+++ b/client/internal/engine_vnc_console_linux.go
@@ -0,0 +1,28 @@
+//go:build linux && !android
+
+package internal
+
+import (
+	"fmt"
+
+	vncserver "github.com/netbirdio/netbird/client/vnc/server"
+)
+
+// newConsoleVNC builds a framebuffer + uinput VNC backend for boxes
+// without a running X server. Used as the auto-fallback when
+// newPlatformVNC can't reach X. Returns an error when /dev/fb0 or
+// /dev/uinput aren't usable so the caller can drop back to a stub.
+func newConsoleVNC() (vncserver.ScreenCapturer, vncserver.InputInjector, error) {
+	poller := vncserver.NewFBPoller("")
+	w, h := poller.Width(), poller.Height()
+	if w == 0 || h == 0 {
+		poller.Close()
+		return nil, nil, fmt.Errorf("framebuffer capturer init failed (is /dev/fb0 readable?)")
+	}
+	inj, err := vncserver.NewUInputInjector(w, h)
+	if err != nil {
+		poller.Close()
+		return nil, nil, fmt.Errorf("uinput init: %w", err)
+	}
+	return poller, inj, nil
+}
--- a/client/internal/engine_vnc_darwin.go
+++ b/client/internal/engine_vnc_darwin.go
@@ -0,0 +1,23 @@
+//go:build darwin && !ios
+
+package internal
+
+import (
+	log "github.com/sirupsen/logrus"
+
+	vncserver "github.com/netbirdio/netbird/client/vnc/server"
+)
+
+func newPlatformVNC() (vncserver.ScreenCapturer, vncserver.InputInjector, bool) {
+	capturer := vncserver.NewMacPoller()
+	injector, err := vncserver.NewMacInputInjector()
+	if err != nil {
+		log.Debugf("VNC: macOS input injector: %v", err)
+		return capturer, &vncserver.StubInputInjector{}, true
+	}
+	return capturer, injector, true
+}
+
+func vncNeedsServiceMode() bool {
+	return false
+}
--- a/client/internal/engine_vnc_stub.go
+++ b/client/internal/engine_vnc_stub.go
@@ -0,0 +1,13 @@
+//go:build (!windows && !darwin && !freebsd && !(linux && !android)) || (darwin && ios)
+
+package internal
+
+import vncserver "github.com/netbirdio/netbird/client/vnc/server"
+
+func newPlatformVNC() (vncserver.ScreenCapturer, vncserver.InputInjector, bool) {
+	return nil, nil, false
+}
+
+func vncNeedsServiceMode() bool {
+	return false
+}
--- a/client/internal/engine_vnc_windows.go
+++ b/client/internal/engine_vnc_windows.go
@@ -0,0 +1,13 @@
+//go:build windows
+
+package internal
+
+import vncserver "github.com/netbirdio/netbird/client/vnc/server"
+
+func newPlatformVNC() (vncserver.ScreenCapturer, vncserver.InputInjector, bool) {
+	return vncserver.NewDesktopCapturer(), vncserver.NewWindowsInputInjector(), true
+}
+
+func vncNeedsServiceMode() bool {
+	return vncserver.GetCurrentSessionID() == 0
+}
--- a/client/internal/engine_vnc_x11.go
+++ b/client/internal/engine_vnc_x11.go
@@ -0,0 +1,35 @@
+//go:build (linux && !android) || freebsd
+
+package internal
+
+import (
+	log "github.com/sirupsen/logrus"
+
+	vncserver "github.com/netbirdio/netbird/client/vnc/server"
+)
+
+func newPlatformVNC() (vncserver.ScreenCapturer, vncserver.InputInjector, bool) {
+	// Prefer X11 when an X server is reachable. NewX11InputInjector probes
+	// DISPLAY (and /proc) eagerly, so a non-nil error here means no X.
+	injector, err := vncserver.NewX11InputInjector("")
+	if err == nil {
+		return vncserver.NewX11Poller(""), injector, true
+	}
+	log.Debugf("VNC: X11 not available: %v", err)
+
+	// Fallback for headless / pre-X states (kernel console, login manager
+	// without X, physical server in recovery): stream the framebuffer and
+	// inject input via /dev/uinput.
+	consoleCap, consoleInj, err := newConsoleVNC()
+	if err == nil {
+		log.Infof("VNC: using framebuffer console capture (%dx%d)", consoleCap.Width(), consoleCap.Height())
+		return consoleCap, consoleInj, true
+	}
+	log.Debugf("VNC: framebuffer console fallback unavailable: %v", err)
+
+	return &vncserver.StubCapturer{}, &vncserver.StubInputInjector{}, false
+}
+
+func vncNeedsServiceMode() bool {
+	return false
+}
--- a/client/internal/profilemanager/config.go
+++ b/client/internal/profilemanager/config.go
@@ -65,6 +65,7 @@ type ConfigInput struct {
 	StateFilePath                 string
 	PreSharedKey                  *string
 	ServerSSHAllowed              *bool
+	ServerVNCAllowed              *bool
 	EnableSSHRoot                 *bool
 	EnableSSHSFTP                 *bool
 	EnableSSHLocalPortForwarding  *bool
@@ -116,6 +117,7 @@ type Config struct {
 	RosenpassEnabled              bool
 	RosenpassPermissive           bool
 	ServerSSHAllowed              *bool
+	ServerVNCAllowed              *bool
 	EnableSSHRoot                 *bool
 	EnableSSHSFTP                 *bool
 	EnableSSHLocalPortForwarding  *bool
@@ -418,6 +420,21 @@ func (config *Config) apply(input ConfigInput) (updated bool, err error) {
 		updated = true
 	}

+	if input.ServerVNCAllowed != nil {
+		if config.ServerVNCAllowed == nil || *input.ServerVNCAllowed != *config.ServerVNCAllowed {
+			if *input.ServerVNCAllowed {
+				log.Infof("enabling VNC server")
+			} else {
+				log.Infof("disabling VNC server")
+			}
+			config.ServerVNCAllowed = input.ServerVNCAllowed
+			updated = true
+		}
+	} else if config.ServerVNCAllowed == nil {
+		config.ServerVNCAllowed = util.True()
+		updated = true
+	}
+
 	if input.EnableSSHRoot != nil && input.EnableSSHRoot != config.EnableSSHRoot {
 		if *input.EnableSSHRoot {
 			log.Infof("enabling SSH root login")
--- a/client/internal/statemanager/manager.go
+++ b/client/internal/statemanager/manager.go
@@ -74,6 +74,14 @@ func New(filePath string) *Manager {
 	}
 }

+// FilePath returns the path of the underlying state file.
+func (m *Manager) FilePath() string {
+	if m == nil {
+		return ""
+	}
+	return m.filePath
+}
+
 // Start starts the state manager periodic save routine
 func (m *Manager) Start() {
 	if m == nil {
--- a/client/proto/daemon.pb.go
+++ b/client/proto/daemon.pb.go
--- a/client/proto/daemon.proto
+++ b/client/proto/daemon.proto
@@ -205,6 +205,8 @@ message LoginRequest {
  optional bool disableSSHAuth = 38;
  optional int32 sshJWTCacheTTL = 39;
  optional bool disable_ipv6 = 40;
+
+  optional bool serverVNCAllowed = 41;
 }

 message LoginResponse {
@@ -314,6 +316,8 @@ message GetConfigResponse {
  int32 sshJWTCacheTTL = 26;

  bool disable_ipv6 = 27;
+
+  bool serverVNCAllowed = 28;
 }

 // PeerState contains the latest state of a peer
@@ -394,6 +398,11 @@ message SSHServerState {
  repeated SSHSessionInfo sessions = 2;
 }

+// VNCServerState contains the latest state of the VNC server
+message VNCServerState {
+  bool enabled = 1;
+}
+
 // FullStatus contains the full state held by the Status instance
 message FullStatus {
  ManagementState managementState = 1;
@@ -408,6 +417,7 @@ message FullStatus {

  bool lazyConnectionEnabled = 9;
  SSHServerState sshServerState = 10;
+  VNCServerState vncServerState = 11;
 }

 // Networks
@@ -678,6 +688,8 @@ message SetConfigRequest {
  optional bool disableSSHAuth = 33;
  optional int32 sshJWTCacheTTL = 34;
  optional bool disable_ipv6 = 35;
+
+  optional bool serverVNCAllowed = 36;
 }

 message SetConfigResponse{}
--- a/client/server/server.go
+++ b/client/server/server.go
@@ -376,6 +376,7 @@ func (s *Server) SetConfig(callerCtx context.Context, msg *proto.SetConfigReques
 	config.RosenpassPermissive = msg.RosenpassPermissive
 	config.DisableAutoConnect = msg.DisableAutoConnect
 	config.ServerSSHAllowed = msg.ServerSSHAllowed
+	config.ServerVNCAllowed = msg.ServerVNCAllowed
 	config.NetworkMonitor = msg.NetworkMonitor
 	config.DisableClientRoutes = msg.DisableClientRoutes
 	config.DisableServerRoutes = msg.DisableServerRoutes
@@ -1136,6 +1137,7 @@ func (s *Server) Status(
 		pbFullStatus := fullStatus.ToProto()
 		pbFullStatus.Events = s.statusRecorder.GetEventHistory()
 		pbFullStatus.SshServerState = s.getSSHServerState()
+		pbFullStatus.VncServerState = s.getVNCServerState()
 		statusResponse.FullStatus = pbFullStatus
 	}

@@ -1175,6 +1177,26 @@ func (s *Server) getSSHServerState() *proto.SSHServerState {
 	return sshServerState
 }

+// getVNCServerState retrieves the current VNC server state.
+func (s *Server) getVNCServerState() *proto.VNCServerState {
+	s.mutex.Lock()
+	connectClient := s.connectClient
+	s.mutex.Unlock()
+
+	if connectClient == nil {
+		return nil
+	}
+
+	engine := connectClient.Engine()
+	if engine == nil {
+		return nil
+	}
+
+	return &proto.VNCServerState{
+		Enabled: engine.GetVNCServerStatus(),
+	}
+}
+
 // GetPeerSSHHostKey retrieves SSH host key for a specific peer
 func (s *Server) GetPeerSSHHostKey(
 	ctx context.Context,
@@ -1531,6 +1553,7 @@ func (s *Server) GetConfig(ctx context.Context, req *proto.GetConfigRequest) (*p
 		Mtu:                           int64(cfg.MTU),
 		DisableAutoConnect:            cfg.DisableAutoConnect,
 		ServerSSHAllowed:              *cfg.ServerSSHAllowed,
+		ServerVNCAllowed:              cfg.ServerVNCAllowed != nil && *cfg.ServerVNCAllowed,
 		RosenpassEnabled:              cfg.RosenpassEnabled,
 		RosenpassPermissive:           cfg.RosenpassPermissive,
 		LazyConnectionEnabled:         cfg.LazyConnectionEnabled,
--- a/client/server/setconfig_test.go
+++ b/client/server/setconfig_test.go
@@ -58,6 +58,7 @@ func TestSetConfig_AllFieldsSaved(t *testing.T) {
 	rosenpassEnabled := true
 	rosenpassPermissive := true
 	serverSSHAllowed := true
+	serverVNCAllowed := true
 	interfaceName := "utun100"
 	wireguardPort := int64(51820)
 	preSharedKey := "test-psk"
@@ -83,6 +84,7 @@ func TestSetConfig_AllFieldsSaved(t *testing.T) {
 		RosenpassEnabled:      &rosenpassEnabled,
 		RosenpassPermissive:   &rosenpassPermissive,
 		ServerSSHAllowed:      &serverSSHAllowed,
+		ServerVNCAllowed:      &serverVNCAllowed,
 		InterfaceName:         &interfaceName,
 		WireguardPort:         &wireguardPort,
 		OptionalPreSharedKey:  &preSharedKey,
@@ -127,6 +129,8 @@ func TestSetConfig_AllFieldsSaved(t *testing.T) {
 	require.Equal(t, rosenpassPermissive, cfg.RosenpassPermissive)
 	require.NotNil(t, cfg.ServerSSHAllowed)
 	require.Equal(t, serverSSHAllowed, *cfg.ServerSSHAllowed)
+	require.NotNil(t, cfg.ServerVNCAllowed)
+	require.Equal(t, serverVNCAllowed, *cfg.ServerVNCAllowed)
 	require.Equal(t, interfaceName, cfg.WgIface)
 	require.Equal(t, int(wireguardPort), cfg.WgPort)
 	require.Equal(t, preSharedKey, cfg.PreSharedKey)
@@ -179,6 +183,7 @@ func verifyAllFieldsCovered(t *testing.T, req *proto.SetConfigRequest) {
 		"RosenpassEnabled":              true,
 		"RosenpassPermissive":           true,
 		"ServerSSHAllowed":              true,
+		"ServerVNCAllowed":              true,
 		"InterfaceName":                 true,
 		"WireguardPort":                 true,
 		"OptionalPreSharedKey":          true,
@@ -240,6 +245,7 @@ func TestCLIFlags_MappedToSetConfig(t *testing.T) {
 		"enable-rosenpass":                  "RosenpassEnabled",
 		"rosenpass-permissive":              "RosenpassPermissive",
 		"allow-server-ssh":                  "ServerSSHAllowed",
+		"allow-server-vnc":                  "ServerVNCAllowed",
 		"interface-name":                    "InterfaceName",
 		"wireguard-port":                    "WireguardPort",
 		"preshared-key":                     "OptionalPreSharedKey",
--- a/client/ssh/server/executor_windows.go
+++ b/client/ssh/server/executor_windows.go
@@ -200,8 +200,8 @@ func newLsaString(s string) lsaString {
 	}
 }

-// generateS4UUserToken creates a Windows token using S4U authentication
-// This is the exact approach OpenSSH for Windows uses for public key authentication
+// generateS4UUserToken creates a Windows token using S4U authentication.
+// This is the same approach OpenSSH for Windows uses for public key authentication.
 func generateS4UUserToken(logger *log.Entry, username, domain string) (windows.Handle, error) {
 	userCpn := buildUserCpn(username, domain)

--- a/client/ssh/server/server.go
+++ b/client/ssh/server/server.go
@@ -551,27 +551,7 @@ func (s *Server) checkTokenAge(token *gojwt.Token, jwtConfig *JWTConfig) error {
 		maxTokenAge = DefaultJWTMaxTokenAge
 	}

-	claims, ok := token.Claims.(gojwt.MapClaims)
-	if !ok {
-		userID := extractUserID(token)
-		return fmt.Errorf("token has invalid claims format (user=%s)", userID)
-	}
-
-	iat, ok := claims["iat"].(float64)
-	if !ok {
-		userID := extractUserID(token)
-		return fmt.Errorf("token missing iat claim (user=%s)", userID)
-	}
-
-	issuedAt := time.Unix(int64(iat), 0)
-	tokenAge := time.Since(issuedAt)
-	maxAge := time.Duration(maxTokenAge) * time.Second
-	if tokenAge > maxAge {
-		userID := getUserIDFromClaims(claims)
-		return fmt.Errorf("token expired for user=%s: age=%v, max=%v", userID, tokenAge, maxAge)
-	}
-
-	return nil
+	return jwt.CheckTokenAge(token, time.Duration(maxTokenAge)*time.Second)
 }

 func (s *Server) extractAndValidateUser(token *gojwt.Token) (*auth.UserAuth, error) {
@@ -602,27 +582,7 @@ func (s *Server) hasSSHAccess(userAuth *auth.UserAuth) bool {
 }

 func extractUserID(token *gojwt.Token) string {
-	if token == nil {
-		return "unknown"
-	}
-	claims, ok := token.Claims.(gojwt.MapClaims)
-	if !ok {
-		return "unknown"
-	}
-	return getUserIDFromClaims(claims)
-}
-
-func getUserIDFromClaims(claims gojwt.MapClaims) string {
-	if sub, ok := claims["sub"].(string); ok && sub != "" {
-		return sub
-	}
-	if userID, ok := claims["user_id"].(string); ok && userID != "" {
-		return userID
-	}
-	if email, ok := claims["email"].(string); ok && email != "" {
-		return email
-	}
-	return "unknown"
+	return jwt.UserIDFromToken(token)
 }

 func (s *Server) parseTokenWithoutValidation(tokenString string) (map[string]interface{}, error) {
--- a/client/status/status.go
+++ b/client/status/status.go
@@ -131,6 +131,10 @@ type SSHServerStateOutput struct {
 	Sessions []SSHSessionOutput `json:"sessions" yaml:"sessions"`
 }

+type VNCServerStateOutput struct {
+	Enabled bool `json:"enabled" yaml:"enabled"`
+}
+
 type OutputOverview struct {
 	Peers                   PeersStateOutput           `json:"peers" yaml:"peers"`
 	CliVersion              string                     `json:"cliVersion" yaml:"cliVersion"`
@@ -153,6 +157,7 @@ type OutputOverview struct {
 	LazyConnectionEnabled   bool                       `json:"lazyConnectionEnabled" yaml:"lazyConnectionEnabled"`
 	ProfileName             string                     `json:"profileName" yaml:"profileName"`
 	SSHServerState          SSHServerStateOutput       `json:"sshServer" yaml:"sshServer"`
+	VNCServerState          VNCServerStateOutput       `json:"vncServer" yaml:"vncServer"`
 }

 // ConvertToStatusOutputOverview converts protobuf status to the output overview.
@@ -173,6 +178,9 @@ func ConvertToStatusOutputOverview(pbFullStatus *proto.FullStatus, opts ConvertO

 	relayOverview := mapRelays(pbFullStatus.GetRelays())
 	sshServerOverview := mapSSHServer(pbFullStatus.GetSshServerState())
+	vncServerOverview := VNCServerStateOutput{
+		Enabled: pbFullStatus.GetVncServerState().GetEnabled(),
+	}
 	peersOverview := mapPeers(pbFullStatus.GetPeers(), opts.StatusFilter, opts.PrefixNamesFilter, opts.PrefixNamesFilterMap, opts.IPsFilter, opts.ConnectionTypeFilter)

 	overview := OutputOverview{
@@ -197,6 +205,7 @@ func ConvertToStatusOutputOverview(pbFullStatus *proto.FullStatus, opts ConvertO
 		LazyConnectionEnabled:   pbFullStatus.GetLazyConnectionEnabled(),
 		ProfileName:             opts.ProfileName,
 		SSHServerState:          sshServerOverview,
+		VNCServerState:          vncServerOverview,
 	}

 	if opts.Anonymize {
@@ -533,6 +542,11 @@ func (o *OutputOverview) GeneralSummary(showURL bool, showRelays bool, showNameS
 		}
 	}

+	vncServerStatus := "Disabled"
+	if o.VNCServerState.Enabled {
+		vncServerStatus = "Enabled"
+	}
+
 	peersCountString := fmt.Sprintf("%d/%d Connected", o.Peers.Connected, o.Peers.Total)

 	var forwardingRulesString string
@@ -563,6 +577,7 @@ func (o *OutputOverview) GeneralSummary(showURL bool, showRelays bool, showNameS
 			"Quantum resistance: %s\n"+
 			"Lazy connection: %s\n"+
 			"SSH Server: %s\n"+
+			"VNC Server: %s\n"+
 			"Networks: %s\n"+
 			"%s"+
 			"Peers count: %s\n",
@@ -581,6 +596,7 @@ func (o *OutputOverview) GeneralSummary(showURL bool, showRelays bool, showNameS
 		rosenpassEnabledStatus,
 		lazyConnectionEnabledStatus,
 		sshServerStatus,
+		vncServerStatus,
 		networks,
 		forwardingRulesString,
 		peersCountString,
--- a/client/status/status_test.go
+++ b/client/status/status_test.go
@@ -404,6 +404,9 @@ func TestParsingToJSON(t *testing.T) {
 		  "sshServer":{
 		    "enabled":false,
 			"sessions":[]
+		  },
+		  "vncServer":{
+		    "enabled":false
 		  }
        }`
 	// @formatter:on
@@ -513,6 +516,8 @@ profileName: ""
 sshServer:
    enabled: false
    sessions: []
+vncServer:
+    enabled: false
 `

 	assert.Equal(t, expectedYAML, yaml)
@@ -582,6 +587,7 @@ Interface type: Kernel
 Quantum resistance: false
 Lazy connection: false
 SSH Server: Disabled
+VNC Server: Disabled
 Networks: 10.10.0.0/24
 Peers count: 2/2 Connected
 `, lastConnectionUpdate1, lastHandshake1, lastConnectionUpdate2, lastHandshake2, runtime.GOOS, runtime.GOARCH, overview.CliVersion)
@@ -607,6 +613,7 @@ Interface type: Kernel
 Quantum resistance: false
 Lazy connection: false
 SSH Server: Disabled
+VNC Server: Disabled
 Networks: 10.10.0.0/24
 Peers count: 2/2 Connected
 `
--- a/client/system/info.go
+++ b/client/system/info.go
@@ -62,6 +62,7 @@ type Info struct {
 	RosenpassEnabled    bool
 	RosenpassPermissive bool
 	ServerSSHAllowed    bool
+	ServerVNCAllowed    bool

 	DisableClientRoutes bool
 	DisableServerRoutes bool
@@ -83,6 +84,7 @@ type Info struct {
 func (i *Info) SetFlags(
 	rosenpassEnabled, rosenpassPermissive bool,
 	serverSSHAllowed *bool,
+	serverVNCAllowed *bool,
 	disableClientRoutes, disableServerRoutes,
 	disableDNS, disableFirewall, blockLANAccess, blockInbound, disableIPv6, lazyConnectionEnabled bool,
 	enableSSHRoot, enableSSHSFTP, enableSSHLocalPortForwarding, enableSSHRemotePortForwarding *bool,
@@ -93,6 +95,9 @@ func (i *Info) SetFlags(
 	if serverSSHAllowed != nil {
 		i.ServerSSHAllowed = *serverSSHAllowed
 	}
+	if serverVNCAllowed != nil {
+		i.ServerVNCAllowed = *serverVNCAllowed
+	}

 	i.DisableClientRoutes = disableClientRoutes
 	i.DisableServerRoutes = disableServerRoutes
--- a/client/ui/client_ui.go
+++ b/client/ui/client_ui.go
@@ -249,6 +249,7 @@ type serviceClient struct {
 	mQuit              *systray.MenuItem
 	mNetworks          *systray.MenuItem
 	mAllowSSH          *systray.MenuItem
+	mAllowVNC          *systray.MenuItem
 	mAutoConnect       *systray.MenuItem
 	mEnableRosenpass   *systray.MenuItem
 	mLazyConnEnabled   *systray.MenuItem
@@ -1045,6 +1046,7 @@ func (s *serviceClient) onTrayReady() {

 	s.mSettings = systray.AddMenuItem("Settings", disabledMenuDescr)
 	s.mAllowSSH = s.mSettings.AddSubMenuItemCheckbox("Allow SSH", allowSSHMenuDescr, false)
+	s.mAllowVNC = s.mSettings.AddSubMenuItemCheckbox("Allow VNC", allowVNCMenuDescr, false)
 	s.mAutoConnect = s.mSettings.AddSubMenuItemCheckbox("Connect on Startup", autoConnectMenuDescr, false)
 	s.mEnableRosenpass = s.mSettings.AddSubMenuItemCheckbox("Enable Quantum-Resistance", quantumResistanceMenuDescr, false)
 	s.mLazyConnEnabled = s.mSettings.AddSubMenuItemCheckbox("Enable Lazy Connections", lazyConnMenuDescr, false)
@@ -1452,6 +1454,7 @@ func protoConfigToConfig(cfg *proto.GetConfigResponse) *profilemanager.Config {

 	config.DisableAutoConnect = cfg.DisableAutoConnect
 	config.ServerSSHAllowed = &cfg.ServerSSHAllowed
+	config.ServerVNCAllowed = &cfg.ServerVNCAllowed
 	config.RosenpassEnabled = cfg.RosenpassEnabled
 	config.RosenpassPermissive = cfg.RosenpassPermissive
 	config.DisableNotifications = &cfg.DisableNotifications
@@ -1547,6 +1550,12 @@ func (s *serviceClient) loadSettings() {
 		s.mAllowSSH.Uncheck()
 	}

+	if cfg.ServerVNCAllowed {
+		s.mAllowVNC.Check()
+	} else {
+		s.mAllowVNC.Uncheck()
+	}
+
 	if cfg.DisableAutoConnect {
 		s.mAutoConnect.Uncheck()
 	} else {
@@ -1586,6 +1595,7 @@ func (s *serviceClient) loadSettings() {
 func (s *serviceClient) updateConfig() error {
 	disableAutoStart := !s.mAutoConnect.Checked()
 	sshAllowed := s.mAllowSSH.Checked()
+	vncAllowed := s.mAllowVNC.Checked()
 	rosenpassEnabled := s.mEnableRosenpass.Checked()
 	lazyConnectionEnabled := s.mLazyConnEnabled.Checked()
 	blockInbound := s.mBlockInbound.Checked()
@@ -1614,6 +1624,7 @@ func (s *serviceClient) updateConfig() error {
 		Username:              currUser.Username,
 		DisableAutoConnect:    &disableAutoStart,
 		ServerSSHAllowed:      &sshAllowed,
+		ServerVNCAllowed:      &vncAllowed,
 		RosenpassEnabled:      &rosenpassEnabled,
 		LazyConnectionEnabled: &lazyConnectionEnabled,
 		BlockInbound:          &blockInbound,
--- a/client/ui/const.go
+++ b/client/ui/const.go
@@ -2,6 +2,7 @@ package main

 const (
 	allowSSHMenuDescr          = "Allow SSH connections"
+	allowVNCMenuDescr          = "Allow embedded VNC server"
 	autoConnectMenuDescr       = "Connect automatically when the service starts"
 	quantumResistanceMenuDescr = "Enable post-quantum security via Rosenpass"
 	lazyConnMenuDescr          = "[Experimental] Enable lazy connections"
--- a/client/ui/event_handler.go
+++ b/client/ui/event_handler.go
@@ -39,6 +39,8 @@ func (h *eventHandler) listen(ctx context.Context) {
 			h.handleDisconnectClick()
 		case <-h.client.mAllowSSH.ClickedCh:
 			h.handleAllowSSHClick()
+		case <-h.client.mAllowVNC.ClickedCh:
+			h.handleAllowVNCClick()
 		case <-h.client.mAutoConnect.ClickedCh:
 			h.handleAutoConnectClick()
 		case <-h.client.mEnableRosenpass.ClickedCh:
@@ -134,6 +136,15 @@ func (h *eventHandler) handleAllowSSHClick() {

 }

+func (h *eventHandler) handleAllowVNCClick() {
+	h.toggleCheckbox(h.client.mAllowVNC)
+	if err := h.updateConfigWithErr(); err != nil {
+		h.toggleCheckbox(h.client.mAllowVNC) // revert checkbox state on error
+		log.Errorf("failed to update config: %v", err)
+		h.client.notifier.Send("Error", "Failed to update VNC settings")
+	}
+}
+
 func (h *eventHandler) handleAutoConnectClick() {
 	h.toggleCheckbox(h.client.mAutoConnect)
 	if err := h.updateConfigWithErr(); err != nil {
--- a/client/vnc/server/agent_windows.go
+++ b/client/vnc/server/agent_windows.go
@@ -0,0 +1,816 @@
+//go:build windows
+
+package server
+
+import (
+	"bufio"
+	crand "crypto/rand"
+	"encoding/binary"
+	"encoding/hex"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"io"
+	"net"
+	"os"
+	"runtime"
+	"strconv"
+	"strings"
+	"sync"
+	"time"
+	"unsafe"
+
+	log "github.com/sirupsen/logrus"
+	"golang.org/x/sys/windows"
+)
+
+const (
+	agentPort = "15900"
+
+	// agentTokenLen is the length of the random authentication token
+	// used to verify that connections to the agent come from the service.
+	agentTokenLen = 32
+
+	stillActive = 259
+
+	tokenPrimary          = 1
+	securityImpersonation = 2
+	tokenSessionID        = 12
+
+	createUnicodeEnvironment = 0x00000400
+	createNoWindow           = 0x08000000
+	createSuspended          = 0x00000004
+	createBreakawayFromJob   = 0x01000000
+)
+
+var (
+	kernel32 = windows.NewLazySystemDLL("kernel32.dll")
+	advapi32 = windows.NewLazySystemDLL("advapi32.dll")
+	userenv  = windows.NewLazySystemDLL("userenv.dll")
+
+	procWTSGetActiveConsoleSessionId = kernel32.NewProc("WTSGetActiveConsoleSessionId")
+	procCreateJobObjectW             = kernel32.NewProc("CreateJobObjectW")
+	procSetInformationJobObject      = kernel32.NewProc("SetInformationJobObject")
+	procAssignProcessToJobObject     = kernel32.NewProc("AssignProcessToJobObject")
+	procSetTokenInformation          = advapi32.NewProc("SetTokenInformation")
+	procCreateEnvironmentBlock       = userenv.NewProc("CreateEnvironmentBlock")
+	procDestroyEnvironmentBlock      = userenv.NewProc("DestroyEnvironmentBlock")
+
+	wtsapi32                       = windows.NewLazySystemDLL("wtsapi32.dll")
+	procWTSEnumerateSessionsW      = wtsapi32.NewProc("WTSEnumerateSessionsW")
+	procWTSFreeMemory              = wtsapi32.NewProc("WTSFreeMemory")
+	procWTSQuerySessionInformation = wtsapi32.NewProc("WTSQuerySessionInformationW")
+
+	iphlpapi              = windows.NewLazySystemDLL("iphlpapi.dll")
+	procGetExtendedTcpTable = iphlpapi.NewProc("GetExtendedTcpTable")
+)
+
+// GetCurrentSessionID returns the session ID of the current process.
+func GetCurrentSessionID() uint32 {
+	var token windows.Token
+	if err := windows.OpenProcessToken(windows.CurrentProcess(),
+		windows.TOKEN_QUERY, &token); err != nil {
+		return 0
+	}
+	defer token.Close()
+	var id uint32
+	var ret uint32
+	_ = windows.GetTokenInformation(token, windows.TokenSessionId,
+		(*byte)(unsafe.Pointer(&id)), 4, &ret)
+	return id
+}
+
+func getConsoleSessionID() uint32 {
+	r, _, _ := procWTSGetActiveConsoleSessionId.Call()
+	return uint32(r)
+}
+
+const (
+	wtsActive       = 0
+	wtsConnected    = 1
+	wtsDisconnected = 4
+)
+
+type wtsSessionInfo struct {
+	SessionID      uint32
+	WinStationName [66]byte // actually *uint16, but we just need the struct size
+	State          uint32
+}
+
+// getActiveSessionID returns the session ID of the best session to attach to.
+// On a Windows Server with no console display attached, session 1 still
+// reports WTSActive (login screen "owns" the console), so a naive
+// first-active-wins pick lands on a session with no actual rendering.
+// Preference order:
+//  1. Active session with a user logged in (RDP user in session ≥2)
+//  2. Active session without a user (console at login screen)
+//  3. Console session ID
+func getActiveSessionID() uint32 {
+	var sessionInfo uintptr
+	var count uint32
+
+	r, _, _ := procWTSEnumerateSessionsW.Call(
+		0, // WTS_CURRENT_SERVER_HANDLE
+		0, // reserved
+		1, // version
+		uintptr(unsafe.Pointer(&sessionInfo)),
+		uintptr(unsafe.Pointer(&count)),
+	)
+	if r == 0 || count == 0 {
+		return getConsoleSessionID()
+	}
+	defer func() { _, _, _ = procWTSFreeMemory.Call(sessionInfo) }()
+
+	type wtsSession struct {
+		SessionID uint32
+		Station   *uint16
+		State     uint32
+	}
+	sessions := unsafe.Slice((*wtsSession)(unsafe.Pointer(sessionInfo)), count)
+
+	var withUser uint32
+	var withUserFound bool
+	var anyActive uint32
+	var anyActiveFound bool
+	for _, s := range sessions {
+		if s.SessionID == 0 {
+			continue
+		}
+		if s.State != wtsActive {
+			continue
+		}
+		if !anyActiveFound {
+			anyActive = s.SessionID
+			anyActiveFound = true
+		}
+		if !withUserFound && wtsSessionHasUser(s.SessionID) {
+			withUser = s.SessionID
+			withUserFound = true
+		}
+	}
+	if withUserFound {
+		return withUser
+	}
+	if anyActiveFound {
+		return anyActive
+	}
+	return getConsoleSessionID()
+}
+
+// reapOrphanOnPort finds any process listening on 127.0.0.1:portStr and,
+// if it's a netbird vnc-agent left over from a previous service instance,
+// terminates it. Verified by image-name match so we never kill an
+// unrelated process that happens to use the same port.
+func reapOrphanOnPort(portStr string) {
+	port64, err := strconv.ParseUint(portStr, 10, 16)
+	if err != nil {
+		return
+	}
+	port := uint16(port64)
+	pid := tcpListenerPID(port)
+	if pid == 0 || pid == uint32(windows.GetCurrentProcessId()) {
+		return
+	}
+	h, err := windows.OpenProcess(windows.PROCESS_QUERY_LIMITED_INFORMATION|windows.PROCESS_TERMINATE|windows.SYNCHRONIZE, false, pid)
+	if err != nil {
+		log.Warnf("reap on port %d: open PID=%d: %v", port, pid, err)
+		return
+	}
+	defer windows.CloseHandle(h)
+	if !isOurAgentProcess(h) {
+		log.Warnf("reap on port %d: PID=%d is not a netbird vnc-agent, leaving it alone", port, pid)
+		return
+	}
+	if err := windows.TerminateProcess(h, 0); err != nil {
+		log.Warnf("reap on port %d: terminate PID=%d: %v", port, pid, err)
+		return
+	}
+	log.Infof("reaped orphan vnc-agent PID=%d holding port %d", pid, port)
+}
+
+// isOurAgentProcess returns true if the given process handle points at a
+// netbird.exe binary at the same path as the current process. We compare
+// full paths (case-insensitive on Windows) so co-installed netbird binaries
+// from a different install dir or unrelated apps named netbird.exe don't
+// get killed.
+func isOurAgentProcess(h windows.Handle) bool {
+	var size uint32 = windows.MAX_PATH
+	buf := make([]uint16, size)
+	if err := windows.QueryFullProcessImageName(h, 0, &buf[0], &size); err != nil {
+		return false
+	}
+	target := strings.ToLower(windows.UTF16ToString(buf[:size]))
+	selfExe, err := os.Executable()
+	if err != nil {
+		return false
+	}
+	return target == strings.ToLower(selfExe)
+}
+
+// tcpListenerPID returns the PID of the process listening on 127.0.0.1:port,
+// or 0 if none. Uses GetExtendedTcpTable with TCP_TABLE_OWNER_PID_LISTENER.
+func tcpListenerPID(port uint16) uint32 {
+	const tcpTableOwnerPidListener = 3
+	const afInet = 2
+
+	// MIB_TCPROW_OWNER_PID layout: state(4) + localAddr(4) + localPort(4) +
+	// remoteAddr(4) + remotePort(4) + owningPid(4) = 24 bytes.
+	const rowSize = 24
+
+	var size uint32
+	_, _, _ = procGetExtendedTcpTable.Call(0, uintptr(unsafe.Pointer(&size)), 0, afInet, tcpTableOwnerPidListener, 0)
+	if size == 0 {
+		return 0
+	}
+	buf := make([]byte, size)
+	r, _, _ := procGetExtendedTcpTable.Call(
+		uintptr(unsafe.Pointer(&buf[0])),
+		uintptr(unsafe.Pointer(&size)),
+		0, afInet, tcpTableOwnerPidListener, 0,
+	)
+	if r != 0 {
+		return 0
+	}
+	count := binary.LittleEndian.Uint32(buf[:4])
+	for i := uint32(0); i < count; i++ {
+		off := 4 + int(i)*rowSize
+		if off+rowSize > len(buf) {
+			break
+		}
+		// localPort is stored big-endian in the high 16 bits of a 32-bit field.
+		localPort := uint16(buf[off+8])<<8 | uint16(buf[off+9])
+		if localPort != port {
+			continue
+		}
+		localAddr := binary.LittleEndian.Uint32(buf[off+4 : off+8])
+		// 0x0100007f == 127.0.0.1 in network byte order on little-endian.
+		// We accept 0.0.0.0 too in case the orphan bound to all interfaces.
+		if localAddr != 0x0100007f && localAddr != 0 {
+			continue
+		}
+		return binary.LittleEndian.Uint32(buf[off+20 : off+24])
+	}
+	return 0
+}
+
+// wtsSessionHasUser returns true if the session has a non-empty user name,
+// i.e. someone is logged in (vs. the login/Welcome screen). The console
+// session at the lock screen has WTSUserName == "".
+const wtsUserName = 5
+
+func wtsSessionHasUser(sessionID uint32) bool {
+	var buf uintptr
+	var bytesReturned uint32
+	r, _, _ := procWTSQuerySessionInformation.Call(
+		0, // WTS_CURRENT_SERVER_HANDLE
+		uintptr(sessionID),
+		uintptr(wtsUserName),
+		uintptr(unsafe.Pointer(&buf)),
+		uintptr(unsafe.Pointer(&bytesReturned)),
+	)
+	if r == 0 || buf == 0 {
+		return false
+	}
+	defer func() { _, _, _ = procWTSFreeMemory.Call(buf) }()
+	// First UTF-16 code unit non-zero ⇒ non-empty username.
+	return *(*uint16)(unsafe.Pointer(buf)) != 0
+}
+
+// getSystemTokenForSession duplicates the current SYSTEM token and sets its
+// session ID so the spawned process runs in the target session. Using a SYSTEM
+// token gives access to both Default and Winlogon desktops plus UIPI bypass.
+func getSystemTokenForSession(sessionID uint32) (windows.Token, error) {
+	var cur windows.Token
+	if err := windows.OpenProcessToken(windows.CurrentProcess(),
+		windows.MAXIMUM_ALLOWED, &cur); err != nil {
+		return 0, fmt.Errorf("OpenProcessToken: %w", err)
+	}
+	defer cur.Close()
+
+	var dup windows.Token
+	if err := windows.DuplicateTokenEx(cur, windows.MAXIMUM_ALLOWED, nil,
+		securityImpersonation, tokenPrimary, &dup); err != nil {
+		return 0, fmt.Errorf("DuplicateTokenEx: %w", err)
+	}
+
+	sid := sessionID
+	r, _, err := procSetTokenInformation.Call(
+		uintptr(dup),
+		uintptr(tokenSessionID),
+		uintptr(unsafe.Pointer(&sid)),
+		unsafe.Sizeof(sid),
+	)
+	if r == 0 {
+		dup.Close()
+		return 0, fmt.Errorf("SetTokenInformation(SessionId=%d): %w", sessionID, err)
+	}
+	return dup, nil
+}
+
+const agentTokenEnvVar = "NB_VNC_AGENT_TOKEN"
+
+// injectEnvVar appends a KEY=VALUE entry to a Unicode environment block.
+// The block is a sequence of null-terminated UTF-16 strings, terminated by
+// an extra null. Returns the new []uint16 backing slice; the caller must
+// hold the returned slice alive until CreateProcessAsUser completes.
+func injectEnvVar(envBlock uintptr, key, value string) []uint16 {
+	entry := key + "=" + value
+
+	// Walk the existing block to find its total length.
+	ptr := (*uint16)(unsafe.Pointer(envBlock))
+	var totalChars int
+	for {
+		ch := *(*uint16)(unsafe.Pointer(uintptr(unsafe.Pointer(ptr)) + uintptr(totalChars)*2))
+		if ch == 0 {
+			// Check for double-null terminator.
+			next := *(*uint16)(unsafe.Pointer(uintptr(unsafe.Pointer(ptr)) + uintptr(totalChars+1)*2))
+			totalChars++
+			if next == 0 {
+				// End of block (don't count the final null yet, we'll rebuild).
+				break
+			}
+		} else {
+			totalChars++
+		}
+	}
+
+	entryUTF16, _ := windows.UTF16FromString(entry)
+	// New block: existing entries + new entry (null-terminated) + final null.
+	newLen := totalChars + len(entryUTF16) + 1
+	newBlock := make([]uint16, newLen)
+	// Copy existing entries (up to but not including the final null).
+	for i := range totalChars {
+		newBlock[i] = *(*uint16)(unsafe.Pointer(uintptr(unsafe.Pointer(ptr)) + uintptr(i)*2))
+	}
+	copy(newBlock[totalChars:], entryUTF16)
+	newBlock[newLen-1] = 0 // final null terminator
+
+	return newBlock
+}
+
+func spawnAgentInSession(sessionID uint32, port string, authToken string, jobHandle windows.Handle) (windows.Handle, error) {
+	token, err := getSystemTokenForSession(sessionID)
+	if err != nil {
+		return 0, fmt.Errorf("get SYSTEM token for session %d: %w", sessionID, err)
+	}
+	defer token.Close()
+
+	var envBlock uintptr
+	r, _, e := procCreateEnvironmentBlock.Call(
+		uintptr(unsafe.Pointer(&envBlock)),
+		uintptr(token),
+		0,
+	)
+	if r == 0 {
+		// Without an environment block we cannot inject NB_VNC_AGENT_TOKEN;
+		// the agent would start unauthenticated. Abort instead of launching.
+		return 0, fmt.Errorf("CreateEnvironmentBlock: %w", e)
+	}
+	defer func() { _, _, _ = procDestroyEnvironmentBlock.Call(envBlock) }()
+
+	// Inject the auth token into the environment block so it doesn't appear
+	// in the process command line (visible via tasklist/wmic). injectedBlock
+	// must stay alive until CreateProcessAsUser returns.
+	injectedBlock := injectEnvVar(envBlock, agentTokenEnvVar, authToken)
+
+	exePath, err := os.Executable()
+	if err != nil {
+		return 0, fmt.Errorf("get executable path: %w", err)
+	}
+
+	cmdLine := fmt.Sprintf(`"%s" vnc-agent --port %s`, exePath, port)
+	cmdLineW, err := windows.UTF16PtrFromString(cmdLine)
+	if err != nil {
+		return 0, fmt.Errorf("UTF16 cmdline: %w", err)
+	}
+
+	// Create an inheritable pipe for the agent's stderr so we can relog
+	// its output in the service process.
+	var sa windows.SecurityAttributes
+	sa.Length = uint32(unsafe.Sizeof(sa))
+	sa.InheritHandle = 1
+
+	var stderrRead, stderrWrite windows.Handle
+	if err := windows.CreatePipe(&stderrRead, &stderrWrite, &sa, 0); err != nil {
+		return 0, fmt.Errorf("create stderr pipe: %w", err)
+	}
+	// The read end must NOT be inherited by the child.
+	_ = windows.SetHandleInformation(stderrRead, windows.HANDLE_FLAG_INHERIT, 0)
+
+	desktop, _ := windows.UTF16PtrFromString(`WinSta0\Default`)
+	si := windows.StartupInfo{
+		Cb:         uint32(unsafe.Sizeof(windows.StartupInfo{})),
+		Desktop:    desktop,
+		Flags:      windows.STARTF_USESHOWWINDOW | windows.STARTF_USESTDHANDLES,
+		ShowWindow: 0,
+		StdErr:     stderrWrite,
+		StdOutput:  stderrWrite,
+	}
+	var pi windows.ProcessInformation
+
+	var envPtr *uint16
+	if len(injectedBlock) > 0 {
+		envPtr = &injectedBlock[0]
+	} else if envBlock != 0 {
+		envPtr = (*uint16)(unsafe.Pointer(envBlock))
+	}
+
+	// CREATE_SUSPENDED so we can assign the process to our Job Object
+	// before it executes. Without this the agent could spawn its own child
+	// processes and have them inherit the SCM service-job (not ours), or
+	// briefly listen on the agent port before we tear it down on rollback.
+	// CREATE_BREAKAWAY_FROM_JOB lets the child leave the SCM-managed
+	// service job; harmless if that job allows breakaway, and is required
+	// before AssignProcessToJobObject can succeed in the no-nested-jobs case.
+	err = windows.CreateProcessAsUser(
+		token, nil, cmdLineW,
+		nil, nil, true, // inheritHandles=true for the pipe
+		createUnicodeEnvironment|createNoWindow|createSuspended|createBreakawayFromJob,
+		envPtr, nil, &si, &pi,
+	)
+	runtime.KeepAlive(injectedBlock)
+	// Close the write end in the parent so reads will get EOF when the child exits.
+	_ = windows.CloseHandle(stderrWrite)
+	if err != nil {
+		_ = windows.CloseHandle(stderrRead)
+		return 0, fmt.Errorf("CreateProcessAsUser: %w", err)
+	}
+
+	if jobHandle != 0 {
+		r, _, e := procAssignProcessToJobObject.Call(uintptr(jobHandle), uintptr(pi.Process))
+		if r == 0 {
+			log.Warnf("assign agent to job object: %v (orphan possible on service crash)", e)
+		}
+	}
+
+	if _, err := windows.ResumeThread(pi.Thread); err != nil {
+		log.Warnf("resume agent main thread: %v", err)
+	}
+	_ = windows.CloseHandle(pi.Thread)
+
+	// Relog agent output in the service with a [vnc-agent] prefix.
+	go relogAgentOutput(stderrRead)
+
+	log.Infof("spawned agent PID=%d in session %d on port %s", pi.ProcessId, sessionID, port)
+	return pi.Process, nil
+}
+
+// sessionManager monitors the active console session and ensures a VNC agent
+// process is running in it. When the session changes (e.g., user switch, RDP
+// connect/disconnect), it kills the old agent and spawns a new one.
+type sessionManager struct {
+	port           string
+	mu             sync.Mutex
+	agentProc      windows.Handle
+	everSpawned    bool
+	agentStartedAt time.Time
+	spawnFailures  int
+	nextSpawnAt    time.Time
+	sessionID      uint32
+	authToken      string
+	done           chan struct{}
+	// jobHandle owns the agent processes via a Windows Job Object with
+	// JOB_OBJECT_LIMIT_KILL_ON_JOB_CLOSE. When the service exits or crashes,
+	// the OS closes the handle and terminates every assigned agent: no
+	// orphaned listeners holding the agent port across restarts.
+	jobHandle windows.Handle
+}
+
+func newSessionManager(port string) *sessionManager {
+	m := &sessionManager{port: port, sessionID: ^uint32(0), done: make(chan struct{})}
+	if h, err := createKillOnCloseJob(); err != nil {
+		log.Warnf("create job object for vnc-agent (orphan agents possible after crash): %v", err)
+	} else {
+		m.jobHandle = h
+	}
+	return m
+}
+
+// createKillOnCloseJob returns a Job Object configured so that closing its
+// handle (process exit or explicit Close) terminates every process assigned
+// to it. Used to keep orphaned vnc-agent processes from outliving the service.
+func createKillOnCloseJob() (windows.Handle, error) {
+	r, _, e := procCreateJobObjectW.Call(0, 0)
+	if r == 0 {
+		return 0, fmt.Errorf("CreateJobObject: %w", e)
+	}
+	job := windows.Handle(r)
+
+	// JOBOBJECT_EXTENDED_LIMIT_INFORMATION on amd64 = 144 bytes.
+	//
+	//  JOBOBJECT_BASIC_LIMIT_INFORMATION  (64 bytes with alignment padding)
+	//    PerProcessUserTimeLimit  LARGE_INTEGER  off  0
+	//    PerJobUserTimeLimit      LARGE_INTEGER  off  8
+	//    LimitFlags               DWORD          off 16
+	//    [4 byte pad to align SIZE_T]
+	//    MinimumWorkingSetSize    SIZE_T         off 24
+	//    MaximumWorkingSetSize    SIZE_T         off 32
+	//    ActiveProcessLimit       DWORD          off 40
+	//    [4 byte pad to align ULONG_PTR]
+	//    Affinity                 ULONG_PTR      off 48
+	//    PriorityClass            DWORD          off 56
+	//    SchedulingClass          DWORD          off 60
+	//  IO_COUNTERS (48)  +  4 * SIZE_T (32)  =  144 total.
+	//
+	// We only set LimitFlags; the rest stays zero.
+	const sizeofExtended = 144
+	const offsetLimitFlags = 16
+	const jobObjectExtendedLimitInformation = 9
+	const jobObjectLimitKillOnJobClose = 0x00002000
+
+	var info [sizeofExtended]byte
+	binary.LittleEndian.PutUint32(info[offsetLimitFlags:offsetLimitFlags+4], jobObjectLimitKillOnJobClose)
+
+	r, _, e = procSetInformationJobObject.Call(
+		uintptr(job),
+		uintptr(jobObjectExtendedLimitInformation),
+		uintptr(unsafe.Pointer(&info[0])),
+		uintptr(sizeofExtended),
+	)
+	if r == 0 {
+		_ = windows.CloseHandle(job)
+		return 0, fmt.Errorf("SetInformationJobObject(KILL_ON_JOB_CLOSE): %w", e)
+	}
+	return job, nil
+}
+
+// generateAuthToken creates a new random hex token for agent authentication.
+func generateAuthToken() string {
+	b := make([]byte, agentTokenLen)
+	if _, err := crand.Read(b); err != nil {
+		log.Warnf("generate agent auth token: %v", err)
+		return ""
+	}
+	return hex.EncodeToString(b)
+}
+
+// AuthToken returns the current agent authentication token.
+func (m *sessionManager) AuthToken() string {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+	return m.authToken
+}
+
+// Stop signals the session manager to exit its polling loop and closes the
+// Job Object handle, which Windows uses as the trigger to terminate every
+// agent process this manager spawned.
+func (m *sessionManager) Stop() {
+	select {
+	case <-m.done:
+	default:
+		close(m.done)
+	}
+	m.mu.Lock()
+	if m.jobHandle != 0 {
+		_ = windows.CloseHandle(m.jobHandle)
+		m.jobHandle = 0
+	}
+	m.mu.Unlock()
+}
+
+func (m *sessionManager) run() {
+	ticker := time.NewTicker(2 * time.Second)
+	defer ticker.Stop()
+
+	for {
+		if !m.tick() {
+			return
+		}
+		select {
+		case <-m.done:
+			m.mu.Lock()
+			m.killAgent()
+			m.mu.Unlock()
+			return
+		case <-ticker.C:
+		}
+	}
+}
+
+// tick performs one session/agent-state update. Returns false if the manager
+// should permanently stop (e.g. missing SYSTEM privileges).
+func (m *sessionManager) tick() bool {
+	sid := getActiveSessionID()
+
+	m.mu.Lock()
+	defer m.mu.Unlock()
+
+	m.handleSessionChange(sid)
+	m.reapExitedAgent()
+	return m.maybeSpawnAgent(sid)
+}
+
+func (m *sessionManager) handleSessionChange(sid uint32) {
+	if sid == m.sessionID {
+		return
+	}
+	log.Infof("active session changed: %d -> %d", m.sessionID, sid)
+	m.killAgent()
+	m.sessionID = sid
+}
+
+func (m *sessionManager) reapExitedAgent() {
+	if m.agentProc == 0 {
+		return
+	}
+	var code uint32
+	if err := windows.GetExitCodeProcess(m.agentProc, &code); err != nil {
+		log.Debugf("GetExitCodeProcess: %v", err)
+		return
+	}
+	if code == stillActive {
+		return
+	}
+	m.scheduleNextSpawn(code, time.Since(m.agentStartedAt))
+	if err := windows.CloseHandle(m.agentProc); err != nil {
+		log.Debugf("close agent handle: %v", err)
+	}
+	m.agentProc = 0
+}
+
+// scheduleNextSpawn applies an exponential backoff on fast crashes (<5s) and
+// resets immediately otherwise.
+func (m *sessionManager) scheduleNextSpawn(exitCode uint32, lifetime time.Duration) {
+	if lifetime < 5*time.Second {
+		m.spawnFailures++
+		backoff := time.Duration(1<<min(m.spawnFailures, 5)) * time.Second
+		if backoff > 30*time.Second {
+			backoff = 30 * time.Second
+		}
+		m.nextSpawnAt = time.Now().Add(backoff)
+		log.Warnf("agent exited (code=%d) after %v, retrying in %v (failures=%d)", exitCode, lifetime.Round(time.Millisecond), backoff, m.spawnFailures)
+		return
+	}
+	m.spawnFailures = 0
+	m.nextSpawnAt = time.Time{}
+	log.Infof("agent exited (code=%d) after %v, respawning", exitCode, lifetime.Round(time.Second))
+}
+
+// maybeSpawnAgent spawns a new agent if there's no current one and the backoff
+// window has elapsed. Returns false to permanently stop the manager when the
+// service lacks the privileges needed to spawn cross-session.
+func (m *sessionManager) maybeSpawnAgent(sid uint32) bool {
+	if m.agentProc != 0 || sid == 0xFFFFFFFF || !time.Now().After(m.nextSpawnAt) {
+		return true
+	}
+	// Reap any orphan still holding the agent port from a previous
+	// service instance, only on our very first spawn. Once we own
+	// an agent, we manage its lifecycle ourselves and never need to
+	// kill an unknown listener; if a kill+respawn races on port
+	// release, the spawn-failure backoff handles it without forcing
+	// a synchronous wait or duplicate kill.
+	if !m.everSpawned {
+		reapOrphanOnPort(m.port)
+	}
+	m.authToken = generateAuthToken()
+	h, err := spawnAgentInSession(sid, m.port, m.authToken, m.jobHandle)
+	if err != nil {
+		m.authToken = ""
+		if errors.Is(err, windows.ERROR_PRIVILEGE_NOT_HELD) {
+			// SE_TCB_NAME (token-impersonation across sessions) is only
+			// granted to SYSTEM. Without it spawnAgent will fail every 2
+			// seconds forever: log once and give up.
+			log.Warnf("VNC service mode disabled: agent spawn requires SYSTEM privileges (got: %v)", err)
+			return false
+		}
+		log.Warnf("spawn agent in session %d: %v", sid, err)
+		return true
+	}
+	m.agentProc = h
+	m.agentStartedAt = time.Now()
+	m.everSpawned = true
+	return true
+}
+
+func (m *sessionManager) killAgent() {
+	if m.agentProc == 0 {
+		return
+	}
+	_ = windows.TerminateProcess(m.agentProc, 0)
+	_ = windows.CloseHandle(m.agentProc)
+	m.agentProc = 0
+	log.Info("killed old agent")
+}
+
+// relogAgentOutput reads log lines from the agent's stderr pipe and
+// relogs them with the service's formatter. Each line is tried as JSON
+// first (the agent's normal log format); plain-text lines (e.g. cobra
+// error output, panic stack traces) are forwarded verbatim so failures
+// during early agent startup remain visible.
+func relogAgentOutput(pipe windows.Handle) {
+	defer windows.CloseHandle(pipe)
+	f := os.NewFile(uintptr(pipe), "vnc-agent-stderr")
+	defer f.Close()
+
+	entry := log.WithField("component", "vnc-agent")
+	scanner := bufio.NewScanner(f)
+	scanner.Buffer(make([]byte, 0, 64*1024), 1024*1024)
+	for scanner.Scan() {
+		line := scanner.Bytes()
+		if len(line) == 0 {
+			continue
+		}
+		if line[0] != '{' {
+			entry.Warn(string(line))
+			continue
+		}
+		var m map[string]any
+		if err := json.Unmarshal(line, &m); err != nil {
+			entry.Warn(string(line))
+			continue
+		}
+		msg, _ := m["msg"].(string)
+		if msg == "" {
+			continue
+		}
+
+		fields := make(log.Fields)
+		for k, v := range m {
+			switch k {
+			case "msg", "level", "time", "func":
+				continue
+			case "caller":
+				fields["source"] = v
+			default:
+				fields[k] = v
+			}
+		}
+		e := entry.WithFields(fields)
+
+		switch m["level"] {
+		case "error":
+			e.Error(msg)
+		case "warning":
+			e.Warn(msg)
+		case "debug":
+			e.Debug(msg)
+		case "trace":
+			e.Trace(msg)
+		default:
+			e.Info(msg)
+		}
+	}
+}
+
+// proxyToAgent connects to the agent, sends the auth token, then proxies
+// the VNC client connection bidirectionally.
+func proxyToAgent(client net.Conn, port string, authToken string) {
+	defer client.Close()
+
+	addr := "127.0.0.1:" + port
+	var agentConn net.Conn
+	var err error
+	for range 50 {
+		agentConn, err = net.DialTimeout("tcp", addr, time.Second)
+		if err == nil {
+			break
+		}
+		time.Sleep(200 * time.Millisecond)
+	}
+	if err != nil {
+		log.Warnf("proxy cannot reach agent at %s: %v", addr, err)
+		return
+	}
+	defer agentConn.Close()
+
+	// Send the auth token so the agent can verify this connection
+	// comes from the trusted service process.
+	tokenBytes, _ := hex.DecodeString(authToken)
+	if _, err := agentConn.Write(tokenBytes); err != nil {
+		log.Warnf("send auth token to agent: %v", err)
+		return
+	}
+
+	log.Debugf("proxy connected to agent, starting bidirectional copy")
+
+	done := make(chan struct{}, 2)
+	cp := func(label string, dst, src net.Conn) {
+		n, err := io.Copy(dst, src)
+		log.Debugf("proxy %s: %d bytes, err=%v", label, n, err)
+		done <- struct{}{}
+	}
+	go cp("client→agent", agentConn, client)
+	go cp("agent→client", client, agentConn)
+	<-done
+}
+
+// logCleanupCall invokes a Windows syscall used solely as a cleanup primitive
+// (CloseClipboard, ReleaseDC, etc.) and logs failures at trace level. The
+// indirection lets us satisfy errcheck without scattering ignored returns at
+// each call site, while still capturing diagnostic info when the OS reports
+// a failure.
+func logCleanupCall(name string, proc *windows.LazyProc) {
+	r, _, err := proc.Call()
+	if r == 0 && err != nil && err != windows.NTE_OP_OK {
+		log.Tracef("%s: %v", name, err)
+	}
+}
+
+// logCleanupCallArgs is logCleanupCall with one argument; common pattern for
+// release-by-handle syscalls.
+func logCleanupCallArgs(name string, proc *windows.LazyProc, args ...uintptr) {
+	r, _, err := proc.Call(args...)
+	if r == 0 && err != nil && err != windows.NTE_OP_OK {
+		log.Tracef("%s: %v", name, err)
+	}
+}
--- a/client/vnc/server/capture_darwin.go
+++ b/client/vnc/server/capture_darwin.go
@@ -0,0 +1,597 @@
+//go:build darwin && !ios
+
+package server
+
+import (
+	"errors"
+	"fmt"
+	"hash/maphash"
+	"image"
+	"os"
+	"runtime"
+	"strconv"
+	"sync"
+	"sync/atomic"
+	"time"
+	"unsafe"
+
+	"github.com/ebitengine/purego"
+	log "github.com/sirupsen/logrus"
+)
+
+var darwinCaptureOnce sync.Once
+
+var (
+	cgMainDisplayID                func() uint32
+	cgDisplayPixelsWide            func(uint32) uintptr
+	cgDisplayPixelsHigh            func(uint32) uintptr
+	cgDisplayCreateImage           func(uint32) uintptr
+	cgImageGetWidth                func(uintptr) uintptr
+	cgImageGetHeight               func(uintptr) uintptr
+	cgImageGetBytesPerRow          func(uintptr) uintptr
+	cgImageGetBitsPerPixel         func(uintptr) uintptr
+	cgImageGetDataProvider         func(uintptr) uintptr
+	cgDataProviderCopyData         func(uintptr) uintptr
+	cgImageRelease                 func(uintptr)
+	cfDataGetLength                func(uintptr) int64
+	cfDataGetBytePtr               func(uintptr) uintptr
+	cfRelease                      func(uintptr)
+	cgPreflightScreenCaptureAccess func() bool
+	cgRequestScreenCaptureAccess   func() bool
+	darwinCaptureReady             bool
+)
+
+func initDarwinCapture() {
+	darwinCaptureOnce.Do(func() {
+		cg, err := purego.Dlopen("/System/Library/Frameworks/CoreGraphics.framework/CoreGraphics", purego.RTLD_NOW|purego.RTLD_GLOBAL)
+		if err != nil {
+			log.Debugf("load CoreGraphics: %v", err)
+			return
+		}
+		cf, err := purego.Dlopen("/System/Library/Frameworks/CoreFoundation.framework/CoreFoundation", purego.RTLD_NOW|purego.RTLD_GLOBAL)
+		if err != nil {
+			log.Debugf("load CoreFoundation: %v", err)
+			return
+		}
+
+		purego.RegisterLibFunc(&cgMainDisplayID, cg, "CGMainDisplayID")
+		purego.RegisterLibFunc(&cgDisplayPixelsWide, cg, "CGDisplayPixelsWide")
+		purego.RegisterLibFunc(&cgDisplayPixelsHigh, cg, "CGDisplayPixelsHigh")
+		purego.RegisterLibFunc(&cgDisplayCreateImage, cg, "CGDisplayCreateImage")
+		purego.RegisterLibFunc(&cgImageGetWidth, cg, "CGImageGetWidth")
+		purego.RegisterLibFunc(&cgImageGetHeight, cg, "CGImageGetHeight")
+		purego.RegisterLibFunc(&cgImageGetBytesPerRow, cg, "CGImageGetBytesPerRow")
+		purego.RegisterLibFunc(&cgImageGetBitsPerPixel, cg, "CGImageGetBitsPerPixel")
+		purego.RegisterLibFunc(&cgImageGetDataProvider, cg, "CGImageGetDataProvider")
+		purego.RegisterLibFunc(&cgDataProviderCopyData, cg, "CGDataProviderCopyData")
+		purego.RegisterLibFunc(&cgImageRelease, cg, "CGImageRelease")
+		purego.RegisterLibFunc(&cfDataGetLength, cf, "CFDataGetLength")
+		purego.RegisterLibFunc(&cfDataGetBytePtr, cf, "CFDataGetBytePtr")
+		purego.RegisterLibFunc(&cfRelease, cf, "CFRelease")
+
+		// Screen capture permission APIs (macOS 11+). Might not exist on older versions.
+		if sym, err := purego.Dlsym(cg, "CGPreflightScreenCaptureAccess"); err == nil {
+			purego.RegisterFunc(&cgPreflightScreenCaptureAccess, sym)
+		}
+		if sym, err := purego.Dlsym(cg, "CGRequestScreenCaptureAccess"); err == nil {
+			purego.RegisterFunc(&cgRequestScreenCaptureAccess, sym)
+		}
+
+		darwinCaptureReady = true
+	})
+}
+
+
+// CGCapturer captures the macOS main display using Core Graphics.
+type CGCapturer struct {
+	displayID uint32
+	w, h      int
+	// downscale is 1 for pixel-perfect, 2 for Retina 2:1 box-filter downscale.
+	downscale int
+	hashSeed  maphash.Seed
+	lastHash  uint64
+	hasHash   bool
+}
+
+// NewCGCapturer creates a screen capturer for the main display.
+func NewCGCapturer() (*CGCapturer, error) {
+	initDarwinCapture()
+	if !darwinCaptureReady {
+		return nil, fmt.Errorf("CoreGraphics not available")
+	}
+
+	// Request Screen Recording permission (shows system dialog on macOS 11+).
+	if cgPreflightScreenCaptureAccess != nil && !cgPreflightScreenCaptureAccess() {
+		if cgRequestScreenCaptureAccess != nil {
+			cgRequestScreenCaptureAccess()
+		}
+		openPrivacyPane("Privacy_ScreenCapture")
+		log.Warn("Screen Recording permission not granted. " +
+			"Opened System Settings > Privacy & Security > Screen Recording; enable netbird and restart.")
+	}
+
+	displayID := cgMainDisplayID()
+	c := &CGCapturer{displayID: displayID, downscale: 1, hashSeed: maphash.MakeSeed()}
+
+	// Probe actual pixel dimensions via a test capture. CGDisplayPixelsWide/High
+	// returns logical points on Retina, but CGDisplayCreateImage produces native
+	// pixels (often 2x), so probing the image is the only reliable source.
+	img, err := c.Capture()
+	if err != nil {
+		return nil, fmt.Errorf("probe capture: %w", err)
+	}
+	nativeW := img.Rect.Dx()
+	nativeH := img.Rect.Dy()
+	c.hasHash = false
+	if nativeW == 0 || nativeH == 0 {
+		return nil, errors.New("display dimensions are zero")
+	}
+
+	logicalW := int(cgDisplayPixelsWide(displayID))
+	logicalH := int(cgDisplayPixelsHigh(displayID))
+
+	// Enable 2:1 downscale on Retina unless explicitly disabled. Cuts pixel
+	// count 4x, shrinking convert, diff, and wire data proportionally.
+	if !retinaDownscaleDisabled() && nativeW >= 2*logicalW && nativeH >= 2*logicalH && nativeW%2 == 0 && nativeH%2 == 0 {
+		c.downscale = 2
+	}
+	c.w = nativeW / c.downscale
+	c.h = nativeH / c.downscale
+
+	log.Infof("macOS capturer ready: %dx%d (native %dx%d, logical %dx%d, downscale=%d, display=%d)",
+		c.w, c.h, nativeW, nativeH, logicalW, logicalH, c.downscale, displayID)
+	return c, nil
+}
+
+func retinaDownscaleDisabled() bool {
+	v := os.Getenv(EnvVNCDisableDownscale)
+	if v == "" {
+		return false
+	}
+	disabled, err := strconv.ParseBool(v)
+	if err != nil {
+		log.Warnf("parse %s: %v", EnvVNCDisableDownscale, err)
+		return false
+	}
+	return disabled
+}
+
+// Width returns the screen width.
+func (c *CGCapturer) Width() int { return c.w }
+
+// Height returns the screen height.
+func (c *CGCapturer) Height() int { return c.h }
+
+// Capture returns the current screen as an RGBA image.
+// CaptureInto writes a fresh frame directly into dst, skipping the
+// per-frame image.RGBA allocation that Capture() does. Returns
+// errFrameUnchanged when the screen hash matches the prior call.
+func (c *CGCapturer) CaptureInto(dst *image.RGBA) error {
+	cgImage := cgDisplayCreateImage(c.displayID)
+	if cgImage == 0 {
+		return fmt.Errorf("CGDisplayCreateImage returned nil (screen recording permission?)")
+	}
+	defer cgImageRelease(cgImage)
+	w := int(cgImageGetWidth(cgImage))
+	h := int(cgImageGetHeight(cgImage))
+	bytesPerRow := int(cgImageGetBytesPerRow(cgImage))
+	bpp := int(cgImageGetBitsPerPixel(cgImage))
+	provider := cgImageGetDataProvider(cgImage)
+	if provider == 0 {
+		return fmt.Errorf("CGImageGetDataProvider returned nil")
+	}
+	cfData := cgDataProviderCopyData(provider)
+	if cfData == 0 {
+		return fmt.Errorf("CGDataProviderCopyData returned nil")
+	}
+	defer cfRelease(cfData)
+	dataLen := int(cfDataGetLength(cfData))
+	dataPtr := cfDataGetBytePtr(cfData)
+	if dataPtr == 0 || dataLen == 0 {
+		return fmt.Errorf("empty image data")
+	}
+	src := unsafe.Slice((*byte)(unsafe.Pointer(dataPtr)), dataLen)
+	hash := maphash.Bytes(c.hashSeed, src)
+	if c.hasHash && hash == c.lastHash {
+		return errFrameUnchanged
+	}
+	c.lastHash = hash
+	c.hasHash = true
+
+	ds := c.downscale
+	if ds < 1 {
+		ds = 1
+	}
+	outW := w / ds
+	outH := h / ds
+	if dst.Rect.Dx() != outW || dst.Rect.Dy() != outH {
+		return fmt.Errorf("dst size mismatch: dst=%dx%d capturer=%dx%d",
+			dst.Rect.Dx(), dst.Rect.Dy(), outW, outH)
+	}
+	bytesPerPixel := bpp / 8
+	if bytesPerPixel == 4 && ds == 1 {
+		convertBGRAToRGBA(dst.Pix, dst.Stride, src, bytesPerRow, w, h)
+		return nil
+	}
+	if bytesPerPixel == 4 && ds == 2 {
+		convertBGRAToRGBADownscale2(dst.Pix, dst.Stride, src, bytesPerRow, outW, outH)
+		return nil
+	}
+	for row := 0; row < outH; row++ {
+		srcOff := row * ds * bytesPerRow
+		dstOff := row * dst.Stride
+		for col := 0; col < outW; col++ {
+			si := srcOff + col*ds*bytesPerPixel
+			di := dstOff + col*4
+			dst.Pix[di+0] = src[si+2]
+			dst.Pix[di+1] = src[si+1]
+			dst.Pix[di+2] = src[si+0]
+			dst.Pix[di+3] = 0xff
+		}
+	}
+	return nil
+}
+
+func (c *CGCapturer) Capture() (*image.RGBA, error) {
+	cgImage := cgDisplayCreateImage(c.displayID)
+	if cgImage == 0 {
+		return nil, fmt.Errorf("CGDisplayCreateImage returned nil (screen recording permission?)")
+	}
+	defer cgImageRelease(cgImage)
+
+	w := int(cgImageGetWidth(cgImage))
+	h := int(cgImageGetHeight(cgImage))
+	bytesPerRow := int(cgImageGetBytesPerRow(cgImage))
+	bpp := int(cgImageGetBitsPerPixel(cgImage))
+
+	provider := cgImageGetDataProvider(cgImage)
+	if provider == 0 {
+		return nil, fmt.Errorf("CGImageGetDataProvider returned nil")
+	}
+
+	cfData := cgDataProviderCopyData(provider)
+	if cfData == 0 {
+		return nil, fmt.Errorf("CGDataProviderCopyData returned nil")
+	}
+	defer cfRelease(cfData)
+
+	dataLen := int(cfDataGetLength(cfData))
+	dataPtr := cfDataGetBytePtr(cfData)
+	if dataPtr == 0 || dataLen == 0 {
+		return nil, fmt.Errorf("empty image data")
+	}
+
+	src := unsafe.Slice((*byte)(unsafe.Pointer(dataPtr)), dataLen)
+
+	hash := maphash.Bytes(c.hashSeed, src)
+	if c.hasHash && hash == c.lastHash {
+		return nil, errFrameUnchanged
+	}
+	c.lastHash = hash
+	c.hasHash = true
+
+	ds := c.downscale
+	if ds < 1 {
+		ds = 1
+	}
+	outW := w / ds
+	outH := h / ds
+	img := image.NewRGBA(image.Rect(0, 0, outW, outH))
+
+	bytesPerPixel := bpp / 8
+	switch {
+	case bytesPerPixel == 4 && ds == 1:
+		convertBGRAToRGBA(img.Pix, img.Stride, src, bytesPerRow, w, h)
+	case bytesPerPixel == 4 && ds == 2:
+		convertBGRAToRGBADownscale2(img.Pix, img.Stride, src, bytesPerRow, outW, outH)
+	default:
+		convertBGRAToRGBAGeneric(img.Pix, img.Stride, src, bytesPerRow, outW, outH, bytesPerPixel, ds)
+	}
+
+	return img, nil
+}
+
+// convertBGRAToRGBAGeneric is the slow per-pixel fallback for non-4-bytes
+// or non-1/2 downscale formats. Always available regardless of the source
+// format quirks the fast paths optimize for.
+func convertBGRAToRGBAGeneric(dst []byte, dstStride int, src []byte, srcStride, outW, outH, bytesPerPixel, ds int) {
+	for row := 0; row < outH; row++ {
+		srcOff := row * ds * srcStride
+		dstOff := row * dstStride
+		for col := 0; col < outW; col++ {
+			si := srcOff + col*ds*bytesPerPixel
+			di := dstOff + col*4
+			dst[di+0] = src[si+2]
+			dst[di+1] = src[si+1]
+			dst[di+2] = src[si+0]
+			dst[di+3] = 0xff
+		}
+	}
+}
+
+// convertBGRAToRGBADownscale2 averages every 2x2 BGRA block into one RGBA
+// output pixel, parallelised across GOMAXPROCS cores. outW and outH are the
+// destination dimensions (source is 2*outW by 2*outH).
+func convertBGRAToRGBADownscale2(dst []byte, dstStride int, src []byte, srcStride, outW, outH int) {
+	workers := runtime.GOMAXPROCS(0)
+	if workers > outH {
+		workers = outH
+	}
+	if workers < 1 || outH < 32 {
+		workers = 1
+	}
+
+	convertRows := func(y0, y1 int) {
+		for row := y0; row < y1; row++ {
+			srcRow0 := 2 * row * srcStride
+			srcRow1 := srcRow0 + srcStride
+			dstOff := row * dstStride
+			for col := 0; col < outW; col++ {
+				s0 := srcRow0 + col*8
+				s1 := srcRow1 + col*8
+				b := (uint32(src[s0]) + uint32(src[s0+4]) + uint32(src[s1]) + uint32(src[s1+4])) >> 2
+				g := (uint32(src[s0+1]) + uint32(src[s0+5]) + uint32(src[s1+1]) + uint32(src[s1+5])) >> 2
+				r := (uint32(src[s0+2]) + uint32(src[s0+6]) + uint32(src[s1+2]) + uint32(src[s1+6])) >> 2
+				di := dstOff + col*4
+				dst[di+0] = byte(r)
+				dst[di+1] = byte(g)
+				dst[di+2] = byte(b)
+				dst[di+3] = 0xff
+			}
+		}
+	}
+
+	if workers == 1 {
+		convertRows(0, outH)
+		return
+	}
+
+	var wg sync.WaitGroup
+	chunk := (outH + workers - 1) / workers
+	for i := 0; i < workers; i++ {
+		y0 := i * chunk
+		y1 := y0 + chunk
+		if y1 > outH {
+			y1 = outH
+		}
+		if y0 >= y1 {
+			break
+		}
+		wg.Add(1)
+		go func(y0, y1 int) {
+			defer wg.Done()
+			convertRows(y0, y1)
+		}(y0, y1)
+	}
+	wg.Wait()
+}
+
+// convertBGRAToRGBA swaps R/B channels using uint32 word operations, and
+// parallelises across GOMAXPROCS cores for large images.
+func convertBGRAToRGBA(dst []byte, dstStride int, src []byte, srcStride, w, h int) {
+	workers := runtime.GOMAXPROCS(0)
+	if workers > h {
+		workers = h
+	}
+	if workers < 1 || h < 64 {
+		workers = 1
+	}
+
+	convertRows := func(y0, y1 int) {
+		rowBytes := w * 4
+		for row := y0; row < y1; row++ {
+			dstRow := dst[row*dstStride : row*dstStride+rowBytes]
+			srcRow := src[row*srcStride : row*srcStride+rowBytes]
+			dstU := unsafe.Slice((*uint32)(unsafe.Pointer(&dstRow[0])), w)
+			srcU := unsafe.Slice((*uint32)(unsafe.Pointer(&srcRow[0])), w)
+			for i, p := range srcU {
+				dstU[i] = (p & 0xff00ff00) | ((p & 0x000000ff) << 16) | ((p & 0x00ff0000) >> 16) | 0xff000000
+			}
+		}
+	}
+
+	if workers == 1 {
+		convertRows(0, h)
+		return
+	}
+
+	var wg sync.WaitGroup
+	chunk := (h + workers - 1) / workers
+	for i := 0; i < workers; i++ {
+		y0 := i * chunk
+		y1 := y0 + chunk
+		if y1 > h {
+			y1 = h
+		}
+		if y0 >= y1 {
+			break
+		}
+		wg.Add(1)
+		go func(y0, y1 int) {
+			defer wg.Done()
+			convertRows(y0, y1)
+		}(y0, y1)
+	}
+	wg.Wait()
+}
+
+// MacPoller wraps CGCapturer with a staleness-cached on-demand Capture:
+// sessions drive captures themselves from their encoder goroutine, so we
+// don't need a background ticker. The last result is cached for a short
+// window so concurrent sessions coalesce into one capture.
+//
+// The capturer is allocated lazily on first use and released when all
+// clients disconnect. Init is retried with backoff because the user may
+// grant Screen Recording permission while the server is already running.
+type MacPoller struct {
+	mu sync.Mutex
+
+	capturer *CGCapturer
+	w, h     int
+
+	lastFrame *image.RGBA
+	lastAt    time.Time
+
+	clients          atomic.Int32
+	initFails        int
+	initBackoffUntil time.Time
+	closed           bool
+}
+
+// macInitRetryBackoffFor returns the delay we wait between init attempts
+// after consecutive failures. Screen Recording permission is a one-shot
+// user grant, so after several failures we back off aggressively.
+func macInitRetryBackoffFor(fails int) time.Duration {
+	switch {
+	case fails > 15:
+		return 30 * time.Second
+	case fails > 5:
+		return 10 * time.Second
+	default:
+		return 2 * time.Second
+	}
+}
+
+// NewMacPoller creates a lazy on-demand capturer for the macOS display.
+func NewMacPoller() *MacPoller {
+	return &MacPoller{}
+}
+
+// Wake is a no-op retained for API compatibility. With on-demand capture
+// there is no background retry loop to kick: init happens on the next
+// Capture/ClientConnect call.
+func (p *MacPoller) Wake() {
+	// intentional no-op
+}
+
+// ClientConnect increments the active client count and eagerly initialises
+// the capturer so the first FBUpdateRequest doesn't pay the init cost.
+func (p *MacPoller) ClientConnect() {
+	if p.clients.Add(1) == 1 {
+		p.mu.Lock()
+		_ = p.ensureCapturerLocked()
+		p.mu.Unlock()
+	}
+}
+
+// ClientDisconnect decrements the active client count. On the last
+// disconnect the capturer is released.
+func (p *MacPoller) ClientDisconnect() {
+	if p.clients.Add(-1) == 0 {
+		p.mu.Lock()
+		p.capturer = nil
+		p.lastFrame = nil
+		p.mu.Unlock()
+	}
+}
+
+// Close releases all resources.
+func (p *MacPoller) Close() {
+	p.mu.Lock()
+	p.closed = true
+	p.capturer = nil
+	p.lastFrame = nil
+	p.mu.Unlock()
+}
+
+// Width returns the screen width. Triggers lazy init if needed.
+func (p *MacPoller) Width() int {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+	_ = p.ensureCapturerLocked()
+	return p.w
+}
+
+// Height returns the screen height. Triggers lazy init if needed.
+func (p *MacPoller) Height() int {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+	_ = p.ensureCapturerLocked()
+	return p.h
+}
+
+// CaptureInto fills dst directly via the underlying capturer, bypassing
+// the freshness cache.
+func (p *MacPoller) CaptureInto(dst *image.RGBA) error {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+
+	if err := p.ensureCapturerLocked(); err != nil {
+		return err
+	}
+	err := p.capturer.CaptureInto(dst)
+	if errors.Is(err, errFrameUnchanged) {
+		// Caller (session) treats this as "no change"; the dst buffer
+		// keeps its prior contents from the previous capture cycle so
+		// the diff stays meaningful.
+		return err
+	}
+	if err != nil {
+		p.capturer = nil
+		return fmt.Errorf("macos capture: %w", err)
+	}
+	return nil
+}
+
+// Capture returns a fresh frame, serving from the short-lived cache if a
+// previous caller captured within freshWindow. Handles the
+// errFrameUnchanged return from CGCapturer by reusing the cached frame.
+func (p *MacPoller) Capture() (*image.RGBA, error) {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+
+	if p.lastFrame != nil && time.Since(p.lastAt) < freshWindow {
+		return p.lastFrame, nil
+	}
+	if err := p.ensureCapturerLocked(); err != nil {
+		return nil, err
+	}
+	img, err := p.capturer.Capture()
+	if errors.Is(err, errFrameUnchanged) {
+		if p.lastFrame != nil {
+			p.lastAt = time.Now()
+			return p.lastFrame, nil
+		}
+		return nil, err
+	}
+	if err != nil {
+		// Drop the capturer so the next call retries init; the display stream
+		// can die if the session changes or permissions are revoked.
+		p.capturer = nil
+		return nil, fmt.Errorf("macos capture: %w", err)
+	}
+	p.lastFrame = img
+	p.lastAt = time.Now()
+	return img, nil
+}
+
+// ensureCapturerLocked initialises the underlying CGCapturer if needed.
+// Caller must hold p.mu.
+func (p *MacPoller) ensureCapturerLocked() error {
+	if p.closed {
+		return fmt.Errorf("poller closed")
+	}
+	if p.capturer != nil {
+		return nil
+	}
+	if time.Now().Before(p.initBackoffUntil) {
+		return fmt.Errorf("macOS capturer unavailable (retry scheduled)")
+	}
+	c, err := NewCGCapturer()
+	if err != nil {
+		p.initFails++
+		p.initBackoffUntil = time.Now().Add(macInitRetryBackoffFor(p.initFails))
+		if p.initFails == 1 || p.initFails%10 == 0 {
+			log.Warnf("macOS capturer: %v (attempt %d)", err, p.initFails)
+		} else {
+			log.Debugf("macOS capturer: %v (attempt %d)", err, p.initFails)
+		}
+		return err
+	}
+	p.initFails = 0
+	p.capturer = c
+	p.w, p.h = c.Width(), c.Height()
+	return nil
+}
+
+var _ ScreenCapturer = (*MacPoller)(nil)
--- a/client/vnc/server/capture_dxgi_windows.go
+++ b/client/vnc/server/capture_dxgi_windows.go
@@ -0,0 +1,99 @@
+//go:build windows
+
+package server
+
+import (
+	"errors"
+	"fmt"
+	"image"
+
+	"github.com/kirides/go-d3d/d3d11"
+	"github.com/kirides/go-d3d/outputduplication"
+)
+
+// dxgiCapturer captures the desktop using DXGI Desktop Duplication.
+// Provides GPU-accelerated capture with native dirty rect tracking.
+// Only works from the interactive user session, not Session 0.
+//
+// Uses a double-buffer: DXGI writes into img, then we copy to the current
+// output buffer and hand it out. Alternating between two output buffers
+// avoids allocating a new image.RGBA per frame (~8MB at 1080p, 30fps).
+type dxgiCapturer struct {
+	dup    *outputduplication.OutputDuplicator
+	device *d3d11.ID3D11Device
+	ctx    *d3d11.ID3D11DeviceContext
+	img    *image.RGBA
+	out    [2]*image.RGBA
+	outIdx int
+	width  int
+	height int
+}
+
+func newDXGICapturer() (*dxgiCapturer, error) {
+	device, deviceCtx, err := d3d11.NewD3D11Device()
+	if err != nil {
+		return nil, fmt.Errorf("create D3D11 device: %w", err)
+	}
+
+	dup, err := outputduplication.NewIDXGIOutputDuplication(device, deviceCtx, 0)
+	if err != nil {
+		device.Release()
+		deviceCtx.Release()
+		return nil, fmt.Errorf("create output duplication: %w", err)
+	}
+
+	w, h := screenSize()
+	if w == 0 || h == 0 {
+		dup.Release()
+		device.Release()
+		deviceCtx.Release()
+		return nil, fmt.Errorf("screen dimensions are zero")
+	}
+
+	rect := image.Rect(0, 0, w, h)
+	c := &dxgiCapturer{
+		dup:    dup,
+		device: device,
+		ctx:    deviceCtx,
+		img:    image.NewRGBA(rect),
+		out:    [2]*image.RGBA{image.NewRGBA(rect), image.NewRGBA(rect)},
+		width:  w,
+		height: h,
+	}
+
+	// Grab the initial frame with a longer timeout to ensure we have
+	// a valid image before returning.
+	_ = dup.GetImage(c.img, 2000)
+
+	return c, nil
+}
+
+func (c *dxgiCapturer) capture() (*image.RGBA, error) {
+	err := c.dup.GetImage(c.img, 100)
+	if err != nil && !errors.Is(err, outputduplication.ErrNoImageYet) {
+		return nil, err
+	}
+
+	// Copy into the next output buffer. The DesktopCapturer hands out the
+	// returned pointer to VNC sessions that read pixels concurrently, so we
+	// alternate between two pre-allocated buffers instead of allocating per frame.
+	out := c.out[c.outIdx]
+	c.outIdx ^= 1
+	copy(out.Pix, c.img.Pix)
+	return out, nil
+}
+
+func (c *dxgiCapturer) close() {
+	if c.dup != nil {
+		c.dup.Release()
+		c.dup = nil
+	}
+	if c.ctx != nil {
+		c.ctx.Release()
+		c.ctx = nil
+	}
+	if c.device != nil {
+		c.device.Release()
+		c.device = nil
+	}
+}
--- a/client/vnc/server/capture_fb_freebsd.go
+++ b/client/vnc/server/capture_fb_freebsd.go
@@ -0,0 +1,148 @@
+//go:build freebsd
+
+package server
+
+import (
+	"fmt"
+	"image"
+	"sync"
+	"unsafe"
+
+	log "github.com/sirupsen/logrus"
+	"golang.org/x/sys/unix"
+)
+
+// FreeBSD vt(4) framebuffer ioctl numbers from sys/fbio.h.
+//
+//	#define FBIOGTYPE _IOR('F', 0, struct fbtype)
+//
+// _IOR(g, n, t) on FreeBSD: dir=2 (read) <<30 | (sizeof(t) & 0x1fff)<<16
+// | (g<<8) | n.  sizeof(struct fbtype)=24 → 0x40184600.
+const fbioGType = 0x40184600
+
+func defaultFBPath() string { return "/dev/ttyv0" }
+
+// fbType mirrors FreeBSD's struct fbtype.
+type fbType struct {
+	FbType   int32
+	FbHeight int32
+	FbWidth  int32
+	FbDepth  int32
+	FbCMSize int32
+	FbSize   int32
+}
+
+// FBCapturer reads pixels from FreeBSD's vt(4) framebuffer device. The
+// vt(4) console exposes the active framebuffer via ttyv0 with FBIOGTYPE
+// for geometry and mmap for backing memory. Pixel layout is assumed to
+// be 32bpp BGRA (the common case for KMS-backed vt); fbtype doesn't
+// expose channel offsets, so we don't try to handle exotic layouts here.
+type FBCapturer struct {
+	mu        sync.Mutex
+	path      string
+	fd        int
+	mmap      []byte
+	w, h      int
+	bpp       int
+	stride    int
+	closeOnce sync.Once
+}
+
+// NewFBCapturer opens the given vt(4) device and queries its geometry.
+func NewFBCapturer(path string) (*FBCapturer, error) {
+	if path == "" {
+		path = defaultFBPath()
+	}
+	fd, err := unix.Open(path, unix.O_RDWR, 0)
+	if err != nil {
+		return nil, fmt.Errorf("open %s: %w", path, err)
+	}
+
+	var fbt fbType
+	if _, _, e := unix.Syscall(unix.SYS_IOCTL, uintptr(fd), fbioGType, uintptr(unsafe.Pointer(&fbt))); e != 0 {
+		unix.Close(fd)
+		return nil, fmt.Errorf("FBIOGTYPE: %v", e)
+	}
+	if fbt.FbDepth != 16 && fbt.FbDepth != 24 && fbt.FbDepth != 32 {
+		unix.Close(fd)
+		return nil, fmt.Errorf("unsupported framebuffer depth: %d", fbt.FbDepth)
+	}
+	if fbt.FbWidth <= 0 || fbt.FbHeight <= 0 || fbt.FbSize <= 0 {
+		unix.Close(fd)
+		return nil, fmt.Errorf("invalid framebuffer geometry: %dx%d size=%d", fbt.FbWidth, fbt.FbHeight, fbt.FbSize)
+	}
+
+	mm, err := unix.Mmap(fd, 0, int(fbt.FbSize), unix.PROT_READ, unix.MAP_SHARED)
+	if err != nil {
+		unix.Close(fd)
+		return nil, fmt.Errorf("mmap %s: %w (vt may not support mmap on this driver, e.g. virtio_gpu)", path, err)
+	}
+
+	bpp := int(fbt.FbDepth)
+	stride := int(fbt.FbWidth) * (bpp / 8)
+	c := &FBCapturer{
+		path:   path,
+		fd:     fd, // valid fd >= 0; we use -1 as the closed sentinel
+		mmap:   mm,
+		w:      int(fbt.FbWidth),
+		h:      int(fbt.FbHeight),
+		bpp:    bpp,
+		stride: stride,
+	}
+	log.Infof("framebuffer capturer ready: %s %dx%d bpp=%d (freebsd vt)", path, c.w, c.h, c.bpp)
+	return c, nil
+}
+
+// Width returns the framebuffer width.
+func (c *FBCapturer) Width() int { return c.w }
+
+// Height returns the framebuffer height.
+func (c *FBCapturer) Height() int { return c.h }
+
+// Capture allocates a fresh image and fills it with the current
+// framebuffer contents.
+func (c *FBCapturer) Capture() (*image.RGBA, error) {
+	img := image.NewRGBA(image.Rect(0, 0, c.w, c.h))
+	if err := c.CaptureInto(img); err != nil {
+		return nil, err
+	}
+	return img, nil
+}
+
+// CaptureInto reads the framebuffer directly into dst.Pix. Assumes BGRA
+// for 32bpp; the FreeBSD fbtype struct doesn't expose channel offsets.
+func (c *FBCapturer) CaptureInto(dst *image.RGBA) error {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+	if dst.Rect.Dx() != c.w || dst.Rect.Dy() != c.h {
+		return fmt.Errorf("dst size mismatch: dst=%dx%d fb=%dx%d",
+			dst.Rect.Dx(), dst.Rect.Dy(), c.w, c.h)
+	}
+	switch c.bpp {
+	case 32:
+		// vt(4) on KMS framebuffers is BGRA: byte 0=B, 1=G, 2=R.
+		swizzleBGRAtoRGBA(dst.Pix, c.mmap[:c.h*c.stride])
+	case 24:
+		swizzleFB24(dst.Pix, dst.Stride, c.mmap, c.stride, c.w, c.h)
+	case 16:
+		swizzleFB16RGB565(dst.Pix, dst.Stride, c.mmap, c.stride, c.w, c.h)
+	}
+	return nil
+}
+
+// Close releases the framebuffer mmap and file descriptor. Serialized with
+// CaptureInto via c.mu so an in-flight capture can't read freed memory.
+func (c *FBCapturer) Close() {
+	c.closeOnce.Do(func() {
+		c.mu.Lock()
+		defer c.mu.Unlock()
+		if c.mmap != nil {
+			_ = unix.Munmap(c.mmap)
+			c.mmap = nil
+		}
+		if c.fd >= 0 {
+			_ = unix.Close(c.fd)
+			c.fd = -1
+		}
+	})
+}
--- a/client/vnc/server/capture_fb_linux.go
+++ b/client/vnc/server/capture_fb_linux.go
@@ -0,0 +1,230 @@
+//go:build linux && !android
+
+package server
+
+import (
+	"encoding/binary"
+	"fmt"
+	"image"
+	"sync"
+	"unsafe"
+
+	log "github.com/sirupsen/logrus"
+	"golang.org/x/sys/unix"
+)
+
+// Linux framebuffer ioctls (linux/fb.h).
+const (
+	fbioGetVScreenInfo = 0x4600
+	fbioGetFScreenInfo = 0x4602
+)
+
+func defaultFBPath() string { return "/dev/fb0" }
+
+// fbVarScreenInfo mirrors the kernel's fb_var_screeninfo. Only the
+// fields we use are mapped; the rest are absorbed into _padN.
+type fbVarScreenInfo struct {
+	Xres, Yres                       uint32
+	XresVirtual, YresVirtual         uint32
+	XOffset, YOffset                 uint32
+	BitsPerPixel                     uint32
+	Grayscale                        uint32
+	RedOffset, RedLen, RedMSBR       uint32
+	GreenOffset, GreenLen, GreenMSBR uint32
+	BlueOffset, BlueLen, BlueMSBR    uint32
+	TranspOffset, TranspLen, TranspM uint32
+	NonStd                           uint32
+	Activate                         uint32
+	Height, Width                    uint32
+	AccelFlags                       uint32
+	PixClock                         uint32
+	LeftMargin, RightMargin          uint32
+	UpperMargin, LowerMargin         uint32
+	HsyncLen, VsyncLen               uint32
+	Sync                             uint32
+	Vmode                            uint32
+	Rotate                           uint32
+	Colorspace                       uint32
+	_pad                             [4]uint32
+}
+
+// fbFixScreenInfo mirrors fb_fix_screeninfo. We only need LineLength.
+type fbFixScreenInfo struct {
+	IDStr        [16]byte
+	SmemStart    uint64
+	SmemLen      uint32
+	Type         uint32
+	TypeAux      uint32
+	Visual       uint32
+	XPanStep     uint16
+	YPanStep     uint16
+	YWrapStep    uint16
+	_pad0        uint16
+	LineLength   uint32
+	MmioStart    uint64
+	MmioLen      uint32
+	Accel        uint32
+	Capabilities uint16
+	_reserved    [2]uint16
+}
+
+// FBCapturer reads pixels straight from the Linux framebuffer device.
+// Used as a fallback when X11 isn't available, e.g. on a headless box at
+// the kernel console or the display manager's pre-login screen on machines
+// without an Xorg server. The framebuffer must be mmap()-able under our
+// process privileges (typically the netbird service runs as root).
+type FBCapturer struct {
+	mu        sync.Mutex
+	path      string
+	fd        int
+	mmap      []byte
+	w, h      int
+	bpp       int
+	stride    int
+	rOff      uint32
+	gOff      uint32
+	bOff      uint32
+	rLen      uint32
+	gLen      uint32
+	bLen      uint32
+	closeOnce sync.Once
+}
+
+// NewFBCapturer opens the given framebuffer device (/dev/fbN) and
+// queries its current geometry + pixel format.
+func NewFBCapturer(path string) (*FBCapturer, error) {
+	if path == "" {
+		path = "/dev/fb0"
+	}
+	fd, err := unix.Open(path, unix.O_RDONLY, 0)
+	if err != nil {
+		return nil, fmt.Errorf("open %s: %w", path, err)
+	}
+
+	var vinfo fbVarScreenInfo
+	if _, _, e := unix.Syscall(unix.SYS_IOCTL, uintptr(fd), fbioGetVScreenInfo, uintptr(unsafe.Pointer(&vinfo))); e != 0 {
+		unix.Close(fd)
+		return nil, fmt.Errorf("FBIOGET_VSCREENINFO: %v", e)
+	}
+	var finfo fbFixScreenInfo
+	if _, _, e := unix.Syscall(unix.SYS_IOCTL, uintptr(fd), fbioGetFScreenInfo, uintptr(unsafe.Pointer(&finfo))); e != 0 {
+		unix.Close(fd)
+		return nil, fmt.Errorf("FBIOGET_FSCREENINFO: %v", e)
+	}
+
+	bpp := int(vinfo.BitsPerPixel)
+	if bpp != 16 && bpp != 24 && bpp != 32 {
+		unix.Close(fd)
+		return nil, fmt.Errorf("unsupported framebuffer bpp: %d", bpp)
+	}
+
+	size := int(finfo.LineLength) * int(vinfo.Yres)
+	if size <= 0 {
+		unix.Close(fd)
+		return nil, fmt.Errorf("invalid framebuffer dimensions: stride=%d h=%d", finfo.LineLength, vinfo.Yres)
+	}
+
+	mm, err := unix.Mmap(fd, 0, size, unix.PROT_READ, unix.MAP_SHARED)
+	if err != nil {
+		unix.Close(fd)
+		return nil, fmt.Errorf("mmap %s: %w", path, err)
+	}
+
+	c := &FBCapturer{
+		path:   path,
+		fd:     fd,
+		mmap:   mm,
+		w:      int(vinfo.Xres),
+		h:      int(vinfo.Yres),
+		bpp:    bpp,
+		stride: int(finfo.LineLength),
+		rOff:   vinfo.RedOffset,
+		gOff:   vinfo.GreenOffset,
+		bOff:   vinfo.BlueOffset,
+		rLen:   vinfo.RedLen,
+		gLen:   vinfo.GreenLen,
+		bLen:   vinfo.BlueLen,
+	}
+	log.Infof("framebuffer capturer ready: %s %dx%d bpp=%d r=%d/%d g=%d/%d b=%d/%d",
+		path, c.w, c.h, c.bpp, c.rOff, c.rLen, c.gOff, c.gLen, c.bOff, c.bLen)
+	return c, nil
+}
+
+// Width returns the framebuffer width in pixels.
+func (c *FBCapturer) Width() int { return c.w }
+
+// Height returns the framebuffer height in pixels.
+func (c *FBCapturer) Height() int { return c.h }
+
+// Capture allocates a fresh image and fills it with the current
+// framebuffer contents.
+func (c *FBCapturer) Capture() (*image.RGBA, error) {
+	img := image.NewRGBA(image.Rect(0, 0, c.w, c.h))
+	if err := c.CaptureInto(img); err != nil {
+		return nil, err
+	}
+	return img, nil
+}
+
+// CaptureInto reads the framebuffer directly into dst.Pix.
+func (c *FBCapturer) CaptureInto(dst *image.RGBA) error {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+
+	if dst.Rect.Dx() != c.w || dst.Rect.Dy() != c.h {
+		return fmt.Errorf("dst size mismatch: dst=%dx%d fb=%dx%d",
+			dst.Rect.Dx(), dst.Rect.Dy(), c.w, c.h)
+	}
+
+	switch c.bpp {
+	case 32:
+		swizzleFB32(dst.Pix, dst.Stride, c.mmap, c.stride, c.w, c.h, channelShifts{R: c.rOff, G: c.gOff, B: c.bOff})
+	case 24:
+		swizzleFB24(dst.Pix, dst.Stride, c.mmap, c.stride, c.w, c.h)
+	case 16:
+		swizzleFB16RGB565(dst.Pix, dst.Stride, c.mmap, c.stride, c.w, c.h)
+	}
+	return nil
+}
+
+// Close releases the framebuffer mmap and file descriptor. Serialized with
+// CaptureInto via c.mu so an in-flight capture can't read freed memory.
+func (c *FBCapturer) Close() {
+	c.closeOnce.Do(func() {
+		c.mu.Lock()
+		defer c.mu.Unlock()
+		if c.mmap != nil {
+			_ = unix.Munmap(c.mmap)
+			c.mmap = nil
+		}
+		if c.fd >= 0 {
+			_ = unix.Close(c.fd)
+			c.fd = -1
+		}
+	})
+}
+
+// channelShifts groups the bit offsets for the R/G/B channels in a packed
+// uint32 framebuffer pixel. Bundling avoids drowning per-row callers in a
+// 9-parameter signature.
+type channelShifts struct {
+	R, G, B uint32
+}
+
+// swizzleFB32 handles 32-bit framebuffers with arbitrary R/G/B channel
+// offsets. Pulls one pixel per uint32, then masks each channel into the
+// destination RGBA byte order.
+func swizzleFB32(dst []byte, dstStride int, src []byte, srcStride, w, h int, shifts channelShifts) {
+	for y := 0; y < h; y++ {
+		srcRow := src[y*srcStride : y*srcStride+w*4]
+		dstRow := dst[y*dstStride:]
+		for x := 0; x < w; x++ {
+			pix := binary.LittleEndian.Uint32(srcRow[x*4 : x*4+4])
+			dstRow[x*4+0] = byte(pix >> shifts.R)
+			dstRow[x*4+1] = byte(pix >> shifts.G)
+			dstRow[x*4+2] = byte(pix >> shifts.B)
+			dstRow[x*4+3] = 0xff
+		}
+	}
+}
+
--- a/client/vnc/server/capture_fb_unix.go
+++ b/client/vnc/server/capture_fb_unix.go
@@ -0,0 +1,150 @@
+//go:build (linux && !android) || freebsd
+
+package server
+
+import (
+	"image"
+	"sync"
+)
+
+// FBPoller wraps FBCapturer with the same lifecycle (ClientConnect /
+// ClientDisconnect, lazy init) as X11Poller, so it slots into the same
+// session plumbing without code changes upstream. The concrete
+// FBCapturer is platform-specific (capture_fb_linux.go / _freebsd.go);
+// this file owns the cross-platform glue.
+type FBPoller struct {
+	mu       sync.Mutex
+	path     string
+	capturer *FBCapturer
+	w, h     int
+	clients  int32
+}
+
+// NewFBPoller returns a poller that opens path on first use. Empty path
+// defaults to /dev/fb0 on Linux and /dev/ttyv0 on FreeBSD.
+func NewFBPoller(path string) *FBPoller {
+	if path == "" {
+		path = defaultFBPath()
+	}
+	return &FBPoller{path: path}
+}
+
+// ClientConnect eagerly initialises the capturer on first connect.
+func (p *FBPoller) ClientConnect() {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+	p.clients++
+	if p.clients == 1 {
+		_ = p.ensureCapturerLocked()
+	}
+}
+
+// ClientDisconnect closes the capturer when the last client leaves.
+func (p *FBPoller) ClientDisconnect() {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+	p.clients--
+	if p.clients <= 0 && p.capturer != nil {
+		p.capturer.Close()
+		p.capturer = nil
+	}
+}
+
+// Width returns the framebuffer width, doing lazy init if needed.
+func (p *FBPoller) Width() int {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+	_ = p.ensureCapturerLocked()
+	return p.w
+}
+
+// Height returns the framebuffer height, doing lazy init if needed.
+func (p *FBPoller) Height() int {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+	_ = p.ensureCapturerLocked()
+	return p.h
+}
+
+// Capture takes a fresh frame.
+func (p *FBPoller) Capture() (*image.RGBA, error) {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+	if err := p.ensureCapturerLocked(); err != nil {
+		return nil, err
+	}
+	return p.capturer.Capture()
+}
+
+// CaptureInto fills dst directly.
+func (p *FBPoller) CaptureInto(dst *image.RGBA) error {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+	if err := p.ensureCapturerLocked(); err != nil {
+		return err
+	}
+	return p.capturer.CaptureInto(dst)
+}
+
+// Close releases all framebuffer resources.
+func (p *FBPoller) Close() {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+	if p.capturer != nil {
+		p.capturer.Close()
+		p.capturer = nil
+	}
+}
+
+func (p *FBPoller) ensureCapturerLocked() error {
+	if p.capturer != nil {
+		return nil
+	}
+	c, err := NewFBCapturer(p.path)
+	if err != nil {
+		return err
+	}
+	p.capturer = c
+	p.w, p.h = c.Width(), c.Height()
+	return nil
+}
+
+
+var _ ScreenCapturer = (*FBPoller)(nil)
+var _ captureIntoer = (*FBPoller)(nil)
+
+// swizzleFB24 handles 24-bit packed framebuffers (B,G,R triplets).
+// Shared between Linux and FreeBSD framebuffer paths.
+func swizzleFB24(dst []byte, dstStride int, src []byte, srcStride, w, h int) {
+	for y := 0; y < h; y++ {
+		srcRow := src[y*srcStride : y*srcStride+w*3]
+		dstRow := dst[y*dstStride:]
+		for x := 0; x < w; x++ {
+			b := srcRow[x*3+0]
+			g := srcRow[x*3+1]
+			r := srcRow[x*3+2]
+			dstRow[x*4+0] = r
+			dstRow[x*4+1] = g
+			dstRow[x*4+2] = b
+			dstRow[x*4+3] = 0xff
+		}
+	}
+}
+
+// swizzleFB16RGB565 handles 16bpp RGB 565 framebuffers.
+func swizzleFB16RGB565(dst []byte, dstStride int, src []byte, srcStride, w, h int) {
+	for y := 0; y < h; y++ {
+		srcRow := src[y*srcStride : y*srcStride+w*2]
+		dstRow := dst[y*dstStride:]
+		for x := 0; x < w; x++ {
+			pix := uint16(srcRow[x*2]) | uint16(srcRow[x*2+1])<<8
+			r := byte((pix >> 11) & 0x1f)
+			g := byte((pix >> 5) & 0x3f)
+			b := byte(pix & 0x1f)
+			dstRow[x*4+0] = (r << 3) | (r >> 2)
+			dstRow[x*4+1] = (g << 2) | (g >> 4)
+			dstRow[x*4+2] = (b << 3) | (b >> 2)
+			dstRow[x*4+3] = 0xff
+		}
+	}
+}
--- a/client/vnc/server/capture_windows.go
+++ b/client/vnc/server/capture_windows.go
@@ -0,0 +1,544 @@
+//go:build windows
+
+package server
+
+import (
+	"fmt"
+	"image"
+	"runtime"
+	"sync"
+	"sync/atomic"
+	"time"
+	"unsafe"
+
+	log "github.com/sirupsen/logrus"
+	"golang.org/x/sys/windows"
+)
+
+var (
+	gdi32  = windows.NewLazySystemDLL("gdi32.dll")
+	user32 = windows.NewLazySystemDLL("user32.dll")
+
+	procGetDC            = user32.NewProc("GetDC")
+	procReleaseDC        = user32.NewProc("ReleaseDC")
+	procCreateCompatDC   = gdi32.NewProc("CreateCompatibleDC")
+	procCreateDIBSection = gdi32.NewProc("CreateDIBSection")
+	procSelectObject     = gdi32.NewProc("SelectObject")
+	procDeleteObject     = gdi32.NewProc("DeleteObject")
+	procDeleteDC         = gdi32.NewProc("DeleteDC")
+	procBitBlt           = gdi32.NewProc("BitBlt")
+	procGetSystemMetrics = user32.NewProc("GetSystemMetrics")
+
+	// Desktop switching for service/Session 0 capture.
+	procOpenInputDesktop          = user32.NewProc("OpenInputDesktop")
+	procSetThreadDesktop          = user32.NewProc("SetThreadDesktop")
+	procCloseDesktop              = user32.NewProc("CloseDesktop")
+	procOpenWindowStation         = user32.NewProc("OpenWindowStationW")
+	procSetProcessWindowStation   = user32.NewProc("SetProcessWindowStation")
+	procCloseWindowStation        = user32.NewProc("CloseWindowStation")
+	procGetUserObjectInformationW = user32.NewProc("GetUserObjectInformationW")
+)
+
+const uoiName = 2
+
+const (
+	smCxScreen   = 0
+	smCyScreen   = 1
+	srccopy      = 0x00CC0020
+	captureBlt   = 0x40000000
+	dibRgbColors = 0
+)
+
+type bitmapInfoHeader struct {
+	Size          uint32
+	Width         int32
+	Height        int32
+	Planes        uint16
+	BitCount      uint16
+	Compression   uint32
+	SizeImage     uint32
+	XPelsPerMeter int32
+	YPelsPerMeter int32
+	ClrUsed       uint32
+	ClrImportant  uint32
+}
+
+type bitmapInfo struct {
+	Header bitmapInfoHeader
+}
+
+// setupInteractiveWindowStation associates the current process with WinSta0,
+// the interactive window station. This is required for a SYSTEM service in
+// Session 0 to call OpenInputDesktop for screen capture and input injection.
+func setupInteractiveWindowStation() error {
+	name, err := windows.UTF16PtrFromString("WinSta0")
+	if err != nil {
+		return fmt.Errorf("UTF16 WinSta0: %w", err)
+	}
+	hWinSta, _, err := procOpenWindowStation.Call(
+		uintptr(unsafe.Pointer(name)),
+		0,
+		uintptr(windows.MAXIMUM_ALLOWED),
+	)
+	if hWinSta == 0 {
+		return fmt.Errorf("OpenWindowStation(WinSta0): %w", err)
+	}
+	r, _, err := procSetProcessWindowStation.Call(hWinSta)
+	if r == 0 {
+		_, _, _ = procCloseWindowStation.Call(hWinSta)
+		return fmt.Errorf("SetProcessWindowStation: %w", err)
+	}
+	log.Info("process window station set to WinSta0 (interactive)")
+	return nil
+}
+
+func screenSize() (int, int) {
+	w, _, _ := procGetSystemMetrics.Call(uintptr(smCxScreen))
+	h, _, _ := procGetSystemMetrics.Call(uintptr(smCyScreen))
+	return int(w), int(h)
+}
+
+func getDesktopName(hDesk uintptr) string {
+	var buf [256]uint16
+	var needed uint32
+	_, _, _ = procGetUserObjectInformationW.Call(hDesk, uoiName,
+		uintptr(unsafe.Pointer(&buf[0])), 512,
+		uintptr(unsafe.Pointer(&needed)))
+	return windows.UTF16ToString(buf[:])
+}
+
+// switchToInputDesktop opens the desktop currently receiving user input
+// and sets it as the calling OS thread's desktop. Must be called from a
+// goroutine locked to its OS thread via runtime.LockOSThread().
+func switchToInputDesktop() (bool, string) {
+	hDesk, _, _ := procOpenInputDesktop.Call(0, 0, uintptr(windows.MAXIMUM_ALLOWED))
+	if hDesk == 0 {
+		return false, ""
+	}
+	name := getDesktopName(hDesk)
+	ret, _, _ := procSetThreadDesktop.Call(hDesk)
+	_, _, _ = procCloseDesktop.Call(hDesk)
+	return ret != 0, name
+}
+
+// gdiCapturer captures the desktop screen using GDI BitBlt.
+// GDI objects (DC, DIBSection) are allocated once and reused across frames.
+type gdiCapturer struct {
+	mu     sync.Mutex
+	width  int
+	height int
+
+	// Pre-allocated GDI resources, reused across captures.
+	memDC uintptr
+	bmp   uintptr
+	bits  uintptr
+}
+
+func newGDICapturer() (*gdiCapturer, error) {
+	w, h := screenSize()
+	if w == 0 || h == 0 {
+		return nil, fmt.Errorf("screen dimensions are zero")
+	}
+	c := &gdiCapturer{width: w, height: h}
+	if err := c.allocGDI(); err != nil {
+		return nil, err
+	}
+	return c, nil
+}
+
+// allocGDI pre-allocates the compatible DC and DIB section for reuse.
+func (c *gdiCapturer) allocGDI() error {
+	screenDC, _, _ := procGetDC.Call(0)
+	if screenDC == 0 {
+		return fmt.Errorf("GetDC returned 0")
+	}
+	defer func() { _, _, _ = procReleaseDC.Call(0, screenDC) }()
+
+	memDC, _, _ := procCreateCompatDC.Call(screenDC)
+	if memDC == 0 {
+		return fmt.Errorf("CreateCompatibleDC returned 0")
+	}
+
+	bi := bitmapInfo{
+		Header: bitmapInfoHeader{
+			Size:     uint32(unsafe.Sizeof(bitmapInfoHeader{})),
+			Width:    int32(c.width),
+			Height:   -int32(c.height), // negative = top-down DIB
+			Planes:   1,
+			BitCount: 32,
+		},
+	}
+
+	var bits uintptr
+	bmp, _, _ := procCreateDIBSection.Call(
+		screenDC,
+		uintptr(unsafe.Pointer(&bi)),
+		dibRgbColors,
+		uintptr(unsafe.Pointer(&bits)),
+		0, 0,
+	)
+	if bmp == 0 || bits == 0 {
+		_, _, _ = procDeleteDC.Call(memDC)
+		return fmt.Errorf("CreateDIBSection returned 0")
+	}
+
+	_, _, _ = procSelectObject.Call(memDC, bmp)
+
+	c.memDC = memDC
+	c.bmp = bmp
+	c.bits = bits
+	return nil
+}
+
+func (c *gdiCapturer) close() { c.freeGDI() }
+
+// freeGDI releases pre-allocated GDI resources.
+func (c *gdiCapturer) freeGDI() {
+	if c.bmp != 0 {
+		_, _, _ = procDeleteObject.Call(c.bmp)
+		c.bmp = 0
+	}
+	if c.memDC != 0 {
+		_, _, _ = procDeleteDC.Call(c.memDC)
+		c.memDC = 0
+	}
+	c.bits = 0
+}
+
+func (c *gdiCapturer) capture() (*image.RGBA, error) {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+
+	if c.memDC == 0 {
+		return nil, fmt.Errorf("GDI resources not allocated")
+	}
+
+	screenDC, _, _ := procGetDC.Call(0)
+	if screenDC == 0 {
+		return nil, fmt.Errorf("GetDC returned 0")
+	}
+	defer func() { _, _, _ = procReleaseDC.Call(0, screenDC) }()
+
+	// SRCCOPY|CAPTUREBLT: CAPTUREBLT forces inclusion of layered/topmost
+	// windows in the capture and is required for GDI BitBlt to return live
+	// pixels when the session is rendered through RDP / DWM-composited
+	// surfaces. Without it BitBlt reads the backing-store DIB which is
+	// often empty (all-black) on RDP and headless sessions.
+	ret, _, _ := procBitBlt.Call(c.memDC, 0, 0, uintptr(c.width), uintptr(c.height),
+		screenDC, 0, 0, srccopy|captureBlt)
+	if ret == 0 {
+		return nil, fmt.Errorf("BitBlt returned 0")
+	}
+
+	n := c.width * c.height * 4
+	raw := unsafe.Slice((*byte)(unsafe.Pointer(c.bits)), n)
+
+	// GDI gives BGRA, the RFB encoder expects RGBA (img.Pix layout).
+	// Swap R and B in bulk using uint32 operations (one load + mask + shift
+	// per pixel instead of three separate byte assignments).
+	img := image.NewRGBA(image.Rect(0, 0, c.width, c.height))
+	swizzleBGRAtoRGBA(img.Pix, raw)
+	return img, nil
+}
+
+// DesktopCapturer captures the interactive desktop, handling desktop transitions
+// (login screen, UAC prompts). A dedicated OS-locked goroutine continuously
+// captures frames on demand via a dedicated OS-locked goroutine (required
+// because DXGI's D3D11 device context is not thread-safe). Sessions drive
+// timing by calling Capture(); a short staleness cache coalesces concurrent
+// requests. Capture pauses automatically when no clients are connected.
+type DesktopCapturer struct {
+	mu   sync.Mutex
+	w, h int
+
+	// lastFrame/lastAt implement a small staleness cache so multiple
+	// near-simultaneous Capture calls share one DXGI round-trip.
+	lastFrame *image.RGBA
+	lastAt    time.Time
+
+	// clients tracks the number of active VNC sessions. When zero, the
+	// worker goroutine releases the underlying capturer.
+	clients atomic.Int32
+
+	// reqCh carries capture requests from sessions to the OS-locked worker.
+	reqCh chan captureReq
+	// wake is signaled when a client connects and the worker should resume.
+	wake chan struct{}
+	// done is closed when Close is called, terminating the worker.
+	done chan struct{}
+}
+
+// captureReq is a single capture request awaiting a reply. Reply channel is
+// buffered to size 1 so the worker never blocks on a sender that's gone.
+type captureReq struct {
+	reply chan captureReply
+}
+
+type captureReply struct {
+	img *image.RGBA
+	err error
+}
+
+// NewDesktopCapturer creates an on-demand capturer for the active desktop.
+func NewDesktopCapturer() *DesktopCapturer {
+	c := &DesktopCapturer{
+		wake:  make(chan struct{}, 1),
+		done:  make(chan struct{}),
+		reqCh: make(chan captureReq),
+	}
+	go c.worker()
+	return c
+}
+
+// ClientConnect increments the active client count, resuming capture if needed.
+func (c *DesktopCapturer) ClientConnect() {
+	c.clients.Add(1)
+	select {
+	case c.wake <- struct{}{}:
+	default:
+	}
+}
+
+// ClientDisconnect decrements the active client count.
+func (c *DesktopCapturer) ClientDisconnect() {
+	c.clients.Add(-1)
+}
+
+// Close stops the capture loop and releases resources.
+func (c *DesktopCapturer) Close() {
+	select {
+	case <-c.done:
+	default:
+		close(c.done)
+	}
+}
+
+// Width returns the current screen width, triggering a capture if the
+// worker hasn't initialised yet. validateCapturer depends on Width/Height
+// becoming non-zero promptly after ClientConnect so it doesn't reject
+// brand-new sessions.
+func (c *DesktopCapturer) Width() int {
+	c.mu.Lock()
+	w := c.w
+	c.mu.Unlock()
+	if w == 0 {
+		_, _ = c.Capture()
+		c.mu.Lock()
+		w = c.w
+		c.mu.Unlock()
+	}
+	return w
+}
+
+// Height returns the current screen height, triggering a capture if the
+// worker hasn't initialised yet (see Width).
+func (c *DesktopCapturer) Height() int {
+	c.mu.Lock()
+	h := c.h
+	c.mu.Unlock()
+	if h == 0 {
+		_, _ = c.Capture()
+		c.mu.Lock()
+		h = c.h
+		c.mu.Unlock()
+	}
+	return h
+}
+
+// Capture returns a freshly captured frame, serving from a short staleness
+// cache when multiple sessions ask within freshWindow of each other. All
+// real DXGI/GDI work happens on the OS-locked worker goroutine.
+func (c *DesktopCapturer) Capture() (*image.RGBA, error) {
+	c.mu.Lock()
+	if c.lastFrame != nil && time.Since(c.lastAt) < freshWindow {
+		img := c.lastFrame
+		c.mu.Unlock()
+		return img, nil
+	}
+	c.mu.Unlock()
+
+	reply := make(chan captureReply, 1)
+	select {
+	case c.reqCh <- captureReq{reply: reply}:
+	case <-c.done:
+		return nil, fmt.Errorf("capturer closed")
+	}
+	select {
+	case r := <-reply:
+		if r.err != nil {
+			return nil, r.err
+		}
+		c.mu.Lock()
+		c.lastFrame = r.img
+		c.lastAt = time.Now()
+		c.mu.Unlock()
+		return r.img, nil
+	case <-c.done:
+		return nil, fmt.Errorf("capturer closed")
+	}
+}
+
+// waitForClient blocks until a client connects or the capturer is closed.
+func (c *DesktopCapturer) waitForClient() bool {
+	if c.clients.Load() > 0 {
+		return true
+	}
+	select {
+	case <-c.wake:
+		return true
+	case <-c.done:
+		return false
+	}
+}
+
+// worker owns DXGI/GDI state on its OS-locked thread and services capture
+// requests from sessions. No background ticker: a capture happens only when
+// a session asks for one (throttled by Capture()'s staleness cache).
+func (c *DesktopCapturer) worker() {
+	runtime.LockOSThread()
+
+	// When running as a Windows service (Session 0), we need to attach to the
+	// interactive window station before OpenInputDesktop will succeed.
+	if err := setupInteractiveWindowStation(); err != nil {
+		log.Warnf("attach to interactive window station: %v", err)
+	}
+
+	w := &captureWorker{c: c}
+	defer w.closeCapturer()
+
+	for {
+		if !c.waitForClient() {
+			return
+		}
+		// Drop the capturer when all clients have disconnected so we don't
+		// hold the DXGI duplication or GDI DC on an idle peer.
+		if c.clients.Load() <= 0 {
+			w.closeCapturer()
+			continue
+		}
+		if !w.handleNextRequest() {
+			return
+		}
+	}
+}
+
+// frameCapturer is the per-backend interface used by the worker. DXGI and
+// GDI implementations both satisfy it.
+type frameCapturer interface {
+	capture() (*image.RGBA, error)
+	close()
+}
+
+// captureWorker owns the worker goroutine's mutable state. Extracted into a
+// struct so the request/desktop/init logic can live on small methods and the
+// outer worker() stays a thin loop.
+type captureWorker struct {
+	c             *DesktopCapturer
+	cap           frameCapturer
+	desktopFails  int
+	lastDesktop   string
+	nextInitRetry time.Time
+}
+
+// handleNextRequest waits for either shutdown or a capture request and runs
+// the request through prepCapturer/capture. Returns false when the worker
+// should exit.
+func (w *captureWorker) handleNextRequest() bool {
+	select {
+	case <-w.c.done:
+		return false
+	case req := <-w.c.reqCh:
+		w.serveRequest(req)
+		return true
+	}
+}
+
+func (w *captureWorker) serveRequest(req captureReq) {
+	fc, err := w.prepCapturer()
+	if err != nil {
+		req.reply <- captureReply{err: err}
+		return
+	}
+	img, err := fc.capture()
+	if err != nil {
+		log.Debugf("capture: %v", err)
+		w.closeCapturer()
+		w.nextInitRetry = time.Now().Add(100 * time.Millisecond)
+		req.reply <- captureReply{err: err}
+		return
+	}
+	req.reply <- captureReply{img: img}
+}
+
+// prepCapturer switches to the input desktop, handles desktop-change
+// teardown, and creates the underlying capturer on demand. Backoff state is
+// tracked across calls via w.nextInitRetry.
+func (w *captureWorker) prepCapturer() (frameCapturer, error) {
+	if err := w.refreshDesktop(); err != nil {
+		return nil, err
+	}
+	if w.cap != nil {
+		return w.cap, nil
+	}
+	if time.Now().Before(w.nextInitRetry) {
+		return nil, fmt.Errorf("capturer init backing off")
+	}
+	fc, err := w.createCapturer()
+	if err != nil {
+		w.nextInitRetry = time.Now().Add(500 * time.Millisecond)
+		return nil, err
+	}
+	w.cap = fc
+	sw, sh := screenSize()
+	w.c.mu.Lock()
+	w.c.w, w.c.h = sw, sh
+	w.c.mu.Unlock()
+	log.Infof("screen capturer ready: %dx%d", sw, sh)
+	return w.cap, nil
+}
+
+// refreshDesktop tracks the active input desktop. When it changes (lock
+// screen, fast-user-switch) the existing capturer is dropped so the next
+// call rebuilds one against the new desktop.
+func (w *captureWorker) refreshDesktop() error {
+	ok, desk := switchToInputDesktop()
+	if !ok {
+		w.desktopFails++
+		if w.desktopFails == 1 || w.desktopFails%100 == 0 {
+			log.Warnf("switchToInputDesktop failed (count=%d), no interactive desktop session?", w.desktopFails)
+		}
+		return fmt.Errorf("no interactive desktop")
+	}
+	if w.desktopFails > 0 {
+		log.Infof("switchToInputDesktop recovered after %d failures, desktop=%q", w.desktopFails, desk)
+		w.desktopFails = 0
+	}
+	if desk != w.lastDesktop {
+		log.Infof("desktop changed: %q -> %q", w.lastDesktop, desk)
+		w.lastDesktop = desk
+		w.closeCapturer()
+	}
+	return nil
+}
+
+func (w *captureWorker) createCapturer() (frameCapturer, error) {
+	dc, err := newDXGICapturer()
+	if err == nil {
+		log.Info("using DXGI Desktop Duplication for capture")
+		return dc, nil
+	}
+	log.Debugf("DXGI unavailable (%v), falling back to GDI", err)
+	gc, err := newGDICapturer()
+	if err != nil {
+		return nil, err
+	}
+	log.Info("using GDI BitBlt for capture")
+	return gc, nil
+}
+
+func (w *captureWorker) closeCapturer() {
+	if w.cap != nil {
+		w.cap.close()
+		w.cap = nil
+	}
+}
--- a/client/vnc/server/capture_x11.go
+++ b/client/vnc/server/capture_x11.go
@@ -0,0 +1,479 @@
+//go:build (linux && !android) || freebsd
+
+package server
+
+import (
+	"fmt"
+	"image"
+	"os"
+	"os/exec"
+	"strings"
+	"sync"
+	"sync/atomic"
+	"time"
+
+	log "github.com/sirupsen/logrus"
+
+	"github.com/jezek/xgb"
+	"github.com/jezek/xgb/xproto"
+)
+
+// X11Capturer captures the screen from an X11 display using the MIT-SHM extension.
+type X11Capturer struct {
+	mu      sync.Mutex
+	conn    *xgb.Conn
+	screen  *xproto.ScreenInfo
+	w, h    int
+	shmID   int
+	shmAddr []byte
+	shmSeg  uint32 // shm.Seg
+	useSHM  bool
+	// bufs double-buffers output images so the X11Poller's capture loop can
+	// overwrite one while the session is still encoding the other. Before
+	// this, a single reused buffer would race with the reader. Allocation
+	// happens on first use and on geometry change.
+	bufs [2]*image.RGBA
+	cur  int
+}
+
+// detectX11Display finds the active X11 display and sets DISPLAY/XAUTHORITY
+// environment variables if needed. This is required when running as a system
+// service where these vars aren't set.
+func detectX11Display() {
+	if os.Getenv("DISPLAY") != "" {
+		return
+	}
+
+	// Try /proc first (Linux), then ps fallback (FreeBSD and others).
+	if detectX11FromProc() {
+		return
+	}
+	if detectX11FromSockets() {
+		return
+	}
+}
+
+// detectX11FromProc scans /proc/*/cmdline for Xorg (Linux).
+func detectX11FromProc() bool {
+	entries, err := os.ReadDir("/proc")
+	if err != nil {
+		return false
+	}
+	for _, e := range entries {
+		if !e.IsDir() {
+			continue
+		}
+		cmdline, err := os.ReadFile("/proc/" + e.Name() + "/cmdline")
+		if err != nil {
+			continue
+		}
+		if display, auth := parseXorgArgs(splitCmdline(cmdline)); display != "" {
+			setDisplayEnv(display, auth)
+			return true
+		}
+	}
+	return false
+}
+
+// detectX11FromSockets checks /tmp/.X11-unix/ for X sockets and uses ps
+// to find the auth file. Works on FreeBSD and other systems without /proc.
+func detectX11FromSockets() bool {
+	entries, err := os.ReadDir("/tmp/.X11-unix")
+	if err != nil {
+		return false
+	}
+
+	// Find the lowest display number.
+	for _, e := range entries {
+		name := e.Name()
+		if len(name) < 2 || name[0] != 'X' {
+			continue
+		}
+		display := ":" + name[1:]
+		os.Setenv("DISPLAY", display)
+		log.Infof("auto-detected DISPLAY=%s (from socket)", display)
+
+		// Try to find -auth from ps output.
+		if auth := findXorgAuthFromPS(); auth != "" {
+			os.Setenv("XAUTHORITY", auth)
+			log.Infof("auto-detected XAUTHORITY=%s (from ps)", auth)
+		}
+		return true
+	}
+	return false
+}
+
+// findXorgAuthFromPS runs ps to find Xorg and extract its -auth argument.
+func findXorgAuthFromPS() string {
+	out, err := exec.Command("ps", "auxww").Output()
+	if err != nil {
+		return ""
+	}
+	for _, line := range strings.Split(string(out), "\n") {
+		if !strings.Contains(line, "Xorg") && !strings.Contains(line, "/X ") {
+			continue
+		}
+		fields := strings.Fields(line)
+		for i, f := range fields {
+			if f == "-auth" && i+1 < len(fields) {
+				return fields[i+1]
+			}
+		}
+	}
+	return ""
+}
+
+func parseXorgArgs(args []string) (display, auth string) {
+	if len(args) == 0 {
+		return "", ""
+	}
+	base := args[0]
+	if !(base == "Xorg" || base == "X" || len(base) > 0 && base[len(base)-1] == 'X' ||
+		strings.Contains(base, "/Xorg") || strings.Contains(base, "/X")) {
+		return "", ""
+	}
+	for i, arg := range args[1:] {
+		if len(arg) > 0 && arg[0] == ':' {
+			display = arg
+		}
+		if arg == "-auth" && i+2 < len(args) {
+			auth = args[i+2]
+		}
+	}
+	return display, auth
+}
+
+func setDisplayEnv(display, auth string) {
+	os.Setenv("DISPLAY", display)
+	log.Infof("auto-detected DISPLAY=%s", display)
+	if auth != "" {
+		os.Setenv("XAUTHORITY", auth)
+		log.Infof("auto-detected XAUTHORITY=%s", auth)
+	}
+}
+
+func splitCmdline(data []byte) []string {
+	var args []string
+	for _, b := range splitNull(data) {
+		if len(b) > 0 {
+			args = append(args, string(b))
+		}
+	}
+	return args
+}
+
+func splitNull(data []byte) [][]byte {
+	var parts [][]byte
+	start := 0
+	for i, b := range data {
+		if b == 0 {
+			parts = append(parts, data[start:i])
+			start = i + 1
+		}
+	}
+	if start < len(data) {
+		parts = append(parts, data[start:])
+	}
+	return parts
+}
+
+// NewX11Capturer connects to the X11 display and sets up shared memory capture.
+func NewX11Capturer(display string) (*X11Capturer, error) {
+	if display == "" {
+		detectX11Display()
+		display = os.Getenv("DISPLAY")
+	}
+	if display == "" {
+		return nil, fmt.Errorf("DISPLAY not set and no Xorg process found")
+	}
+
+	conn, err := xgb.NewConnDisplay(display)
+	if err != nil {
+		return nil, fmt.Errorf("connect to X11 display %s: %w", display, err)
+	}
+
+	setup := xproto.Setup(conn)
+	if len(setup.Roots) == 0 {
+		conn.Close()
+		return nil, fmt.Errorf("no X11 screens")
+	}
+	screen := setup.Roots[0]
+
+	c := &X11Capturer{
+		conn:   conn,
+		screen: &screen,
+		w:      int(screen.WidthInPixels),
+		h:      int(screen.HeightInPixels),
+	}
+
+	if err := c.initSHM(); err != nil {
+		log.Debugf("X11 SHM not available, using slow GetImage: %v", err)
+	}
+
+	log.Infof("X11 capturer ready: %dx%d (display=%s, shm=%v)", c.w, c.h, display, c.useSHM)
+	return c, nil
+}
+
+// initSHM is implemented in capture_x11_shm_linux.go (requires SysV SHM).
+// On platforms without SysV SHM (FreeBSD), a stub returns an error and
+// the capturer falls back to GetImage.
+
+// Width returns the screen width.
+func (c *X11Capturer) Width() int { return c.w }
+
+// Height returns the screen height.
+func (c *X11Capturer) Height() int { return c.h }
+
+// Capture returns the current screen as an RGBA image.
+func (c *X11Capturer) Capture() (*image.RGBA, error) {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+
+	if c.useSHM {
+		return c.captureSHM()
+	}
+	return c.captureGetImage()
+}
+
+// CaptureInto fills the caller's destination buffer in one pass. The
+// source path (SHM or fallback GetImage) writes directly into dst.Pix
+// instead of going through the X11Capturer's internal double-buffer,
+// saving one full-frame memcpy per capture.
+func (c *X11Capturer) CaptureInto(dst *image.RGBA) error {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+	if dst.Rect.Dx() != c.w || dst.Rect.Dy() != c.h {
+		return fmt.Errorf("dst size mismatch: dst=%dx%d capturer=%dx%d",
+			dst.Rect.Dx(), dst.Rect.Dy(), c.w, c.h)
+	}
+	if c.useSHM {
+		return c.captureSHMInto(dst)
+	}
+	return c.captureGetImageInto(dst)
+}
+
+func (c *X11Capturer) captureGetImageInto(dst *image.RGBA) error {
+	cookie := xproto.GetImage(c.conn, xproto.ImageFormatZPixmap,
+		xproto.Drawable(c.screen.Root),
+		0, 0, uint16(c.w), uint16(c.h), 0xFFFFFFFF)
+	reply, err := cookie.Reply()
+	if err != nil {
+		return fmt.Errorf("GetImage: %w", err)
+	}
+	n := c.w * c.h * 4
+	if len(reply.Data) < n {
+		return fmt.Errorf("GetImage returned %d bytes, expected %d", len(reply.Data), n)
+	}
+	swizzleBGRAtoRGBA(dst.Pix, reply.Data)
+	return nil
+}
+
+// captureSHM is implemented in capture_x11_shm_linux.go.
+
+func (c *X11Capturer) captureGetImage() (*image.RGBA, error) {
+	cookie := xproto.GetImage(c.conn, xproto.ImageFormatZPixmap,
+		xproto.Drawable(c.screen.Root),
+		0, 0, uint16(c.w), uint16(c.h), 0xFFFFFFFF)
+
+	reply, err := cookie.Reply()
+	if err != nil {
+		return nil, fmt.Errorf("GetImage: %w", err)
+	}
+
+	data := reply.Data
+	n := c.w * c.h * 4
+	if len(data) < n {
+		return nil, fmt.Errorf("GetImage returned %d bytes, expected %d", len(data), n)
+	}
+
+	img := c.nextBuffer()
+	swizzleBGRAtoRGBA(img.Pix, data)
+	return img, nil
+}
+
+// nextBuffer returns the *image.RGBA the next capture should fill, advancing
+// the double-buffer index. Reallocates on geometry change.
+func (c *X11Capturer) nextBuffer() *image.RGBA {
+	c.cur ^= 1
+	b := c.bufs[c.cur]
+	if b == nil || b.Rect.Dx() != c.w || b.Rect.Dy() != c.h {
+		b = image.NewRGBA(image.Rect(0, 0, c.w, c.h))
+		c.bufs[c.cur] = b
+	}
+	return b
+}
+
+// Close releases X11 resources.
+func (c *X11Capturer) Close() {
+	c.closeSHM()
+	c.conn.Close()
+}
+
+// closeSHM is implemented in capture_x11_shm_linux.go.
+
+// X11Poller wraps X11Capturer with a staleness-cached on-demand Capture:
+// sessions drive captures themselves through the encoder goroutine, so we
+// don't need a background ticker. The last result is cached for a short
+// window so concurrent sessions coalesce into one capture.
+//
+// The capturer is allocated lazily on first use and released when all
+// clients disconnect, so an idle peer holds no X connection or SHM segment.
+type X11Poller struct {
+	mu sync.Mutex
+
+	capturer *X11Capturer
+	w, h     int
+	// closed at Close so callers can stop waiting on retry backoff.
+	done chan struct{}
+
+	// lastFrame/lastAt implement a small cache: multiple near-simultaneous
+	// Capture calls (multi-client, or input-coalesced) return the same
+	// frame instead of hammering the X server.
+	lastFrame *image.RGBA
+	lastAt    time.Time
+
+	// initBackoffUntil throttles capturer re-init when the X server is
+	// unavailable or flapping.
+	initBackoffUntil time.Time
+
+	clients atomic.Int32
+	display string
+}
+
+// initRetryBackoff gates capturer re-init attempts after a failure so we
+// don't spin on X server errors.
+const initRetryBackoff = 2 * time.Second
+
+// NewX11Poller creates a lazy on-demand capturer for the given X display.
+func NewX11Poller(display string) *X11Poller {
+	return &X11Poller{
+		display: display,
+		done:    make(chan struct{}),
+	}
+}
+
+// ClientConnect increments the active client count. The first client triggers
+// eager capturer initialisation so that the first FBUpdateRequest doesn't
+// pay the X11 connect + SHM attach latency.
+func (p *X11Poller) ClientConnect() {
+	if p.clients.Add(1) == 1 {
+		p.mu.Lock()
+		_ = p.ensureCapturerLocked()
+		p.mu.Unlock()
+	}
+}
+
+// ClientDisconnect decrements the active client count. On the last
+// disconnect we close the underlying capturer so idle peers cost nothing.
+func (p *X11Poller) ClientDisconnect() {
+	if p.clients.Add(-1) == 0 {
+		p.mu.Lock()
+		if p.capturer != nil {
+			p.capturer.Close()
+			p.capturer = nil
+			p.lastFrame = nil
+		}
+		p.mu.Unlock()
+	}
+}
+
+// Close releases all resources. Subsequent Capture calls will fail.
+func (p *X11Poller) Close() {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+	select {
+	case <-p.done:
+	default:
+		close(p.done)
+	}
+	if p.capturer != nil {
+		p.capturer.Close()
+		p.capturer = nil
+	}
+}
+
+// Width returns the screen width. Triggers lazy init if needed.
+func (p *X11Poller) Width() int {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+	_ = p.ensureCapturerLocked()
+	return p.w
+}
+
+// Height returns the screen height. Triggers lazy init if needed.
+func (p *X11Poller) Height() int {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+	_ = p.ensureCapturerLocked()
+	return p.h
+}
+
+// Capture returns a fresh frame, serving from the short-lived cache if a
+// previous caller captured within freshWindow.
+func (p *X11Poller) Capture() (*image.RGBA, error) {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+
+	if p.lastFrame != nil && time.Since(p.lastAt) < freshWindow {
+		return p.lastFrame, nil
+	}
+	if err := p.ensureCapturerLocked(); err != nil {
+		return nil, err
+	}
+	img, err := p.capturer.Capture()
+	if err != nil {
+		// Drop the capturer so the next call re-inits; the X connection may
+		// have died (e.g. Xorg restart).
+		p.capturer.Close()
+		p.capturer = nil
+		p.initBackoffUntil = time.Now().Add(initRetryBackoff)
+		return nil, fmt.Errorf("x11 capture: %w", err)
+	}
+	p.lastFrame = img
+	p.lastAt = time.Now()
+	return img, nil
+}
+
+// CaptureInto fills dst directly via the underlying capturer, bypassing
+// the freshness cache. The session's prevFrame/curFrame swap means each
+// session needs its own buffer anyway, so caching wouldn't help.
+func (p *X11Poller) CaptureInto(dst *image.RGBA) error {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+
+	if err := p.ensureCapturerLocked(); err != nil {
+		return err
+	}
+	if err := p.capturer.CaptureInto(dst); err != nil {
+		p.capturer.Close()
+		p.capturer = nil
+		p.initBackoffUntil = time.Now().Add(initRetryBackoff)
+		return fmt.Errorf("x11 capture: %w", err)
+	}
+	return nil
+}
+
+// ensureCapturerLocked initialises the underlying X11Capturer if not
+// already open. Caller must hold p.mu.
+func (p *X11Poller) ensureCapturerLocked() error {
+	if p.capturer != nil {
+		return nil
+	}
+	select {
+	case <-p.done:
+		return fmt.Errorf("x11 capturer closed")
+	default:
+	}
+	if time.Now().Before(p.initBackoffUntil) {
+		return fmt.Errorf("x11 capturer unavailable (retry scheduled)")
+	}
+	c, err := NewX11Capturer(p.display)
+	if err != nil {
+		p.initBackoffUntil = time.Now().Add(initRetryBackoff)
+		log.Debugf("X11 capturer: %v", err)
+		return err
+	}
+	p.capturer = c
+	p.w, p.h = c.Width(), c.Height()
+	return nil
+}
--- a/client/vnc/server/capture_x11_shm_linux.go
+++ b/client/vnc/server/capture_x11_shm_linux.go
@@ -0,0 +1,96 @@
+//go:build linux && !android
+
+package server
+
+import (
+	"fmt"
+	"image"
+
+	"github.com/jezek/xgb/shm"
+	"github.com/jezek/xgb/xproto"
+	log "github.com/sirupsen/logrus"
+	"golang.org/x/sys/unix"
+)
+
+func (c *X11Capturer) initSHM() error {
+	if err := shm.Init(c.conn); err != nil {
+		return fmt.Errorf("init SHM extension: %w", err)
+	}
+
+	size := c.w * c.h * 4
+	id, err := unix.SysvShmGet(unix.IPC_PRIVATE, size, unix.IPC_CREAT|0600)
+	if err != nil {
+		return fmt.Errorf("shmget: %w", err)
+	}
+
+	addr, err := unix.SysvShmAttach(id, 0, 0)
+	if err != nil {
+		if _, ctlErr := unix.SysvShmCtl(id, unix.IPC_RMID, nil); ctlErr != nil {
+			log.Debugf("shmctl IPC_RMID on attach failure: %v", ctlErr)
+		}
+		return fmt.Errorf("shmat: %w", err)
+	}
+
+	if _, err := unix.SysvShmCtl(id, unix.IPC_RMID, nil); err != nil {
+		log.Debugf("shmctl IPC_RMID: %v", err)
+	}
+
+	seg, err := shm.NewSegId(c.conn)
+	if err != nil {
+		if detachErr := unix.SysvShmDetach(addr); detachErr != nil {
+			log.Debugf("shmdt on new-seg failure: %v", detachErr)
+		}
+		return fmt.Errorf("new SHM seg: %w", err)
+	}
+
+	if err := shm.AttachChecked(c.conn, seg, uint32(id), false).Check(); err != nil {
+		if detachErr := unix.SysvShmDetach(addr); detachErr != nil {
+			log.Debugf("shmdt on attach-checked failure: %v", detachErr)
+		}
+		return fmt.Errorf("SHM attach to X: %w", err)
+	}
+
+	c.shmID = id
+	c.shmAddr = addr
+	c.shmSeg = uint32(seg)
+	c.useSHM = true
+	return nil
+}
+
+func (c *X11Capturer) captureSHM() (*image.RGBA, error) {
+	if err := c.fillSHM(); err != nil {
+		return nil, err
+	}
+	img := c.nextBuffer()
+	swizzleBGRAtoRGBA(img.Pix, c.shmAddr[:c.w*c.h*4])
+	return img, nil
+}
+
+// captureSHMInto runs a single SHM GetImage and swizzles directly into the
+// caller-provided destination, skipping the internal double-buffer.
+func (c *X11Capturer) captureSHMInto(dst *image.RGBA) error {
+	if err := c.fillSHM(); err != nil {
+		return err
+	}
+	swizzleBGRAtoRGBA(dst.Pix, c.shmAddr[:c.w*c.h*4])
+	return nil
+}
+
+func (c *X11Capturer) fillSHM() error {
+	cookie := shm.GetImage(c.conn, xproto.Drawable(c.screen.Root),
+		0, 0, uint16(c.w), uint16(c.h), 0xFFFFFFFF,
+		xproto.ImageFormatZPixmap, shm.Seg(c.shmSeg), 0)
+	if _, err := cookie.Reply(); err != nil {
+		return fmt.Errorf("SHM GetImage: %w", err)
+	}
+	return nil
+}
+
+func (c *X11Capturer) closeSHM() {
+	if c.useSHM {
+		shm.Detach(c.conn, shm.Seg(c.shmSeg))
+		if err := unix.SysvShmDetach(c.shmAddr); err != nil {
+			log.Debugf("shmdt on close: %v", err)
+		}
+	}
+}
--- a/client/vnc/server/capture_x11_shm_stub.go
+++ b/client/vnc/server/capture_x11_shm_stub.go
@@ -0,0 +1,24 @@
+//go:build freebsd
+
+package server
+
+import (
+	"fmt"
+	"image"
+)
+
+func (c *X11Capturer) initSHM() error {
+	return fmt.Errorf("SysV SHM not available on this platform")
+}
+
+func (c *X11Capturer) captureSHM() (*image.RGBA, error) {
+	return nil, fmt.Errorf("SHM capture not available on this platform")
+}
+
+func (c *X11Capturer) captureSHMInto(_ *image.RGBA) error {
+	return fmt.Errorf("SHM capture not available on this platform")
+}
+
+func (c *X11Capturer) closeSHM() {
+	// no SHM to close on this platform
+}
--- a/client/vnc/server/coalesce_test.go
+++ b/client/vnc/server/coalesce_test.go
@@ -0,0 +1,75 @@
+package server
+
+import (
+	"reflect"
+	"testing"
+)
+
+func TestCoalesceRects(t *testing.T) {
+	cases := []struct {
+		name string
+		in   [][4]int
+		want [][4]int
+	}{
+		{
+			name: "empty",
+			in:   nil,
+			want: nil,
+		},
+		{
+			name: "single",
+			in:   [][4]int{{0, 0, 64, 64}},
+			want: [][4]int{{0, 0, 64, 64}},
+		},
+		{
+			name: "horizontal_run",
+			in:   [][4]int{{0, 0, 64, 64}, {64, 0, 64, 64}, {128, 0, 64, 64}},
+			want: [][4]int{{0, 0, 192, 64}},
+		},
+		{
+			name: "vertical_run",
+			in:   [][4]int{{0, 0, 64, 64}, {0, 64, 64, 64}, {0, 128, 64, 64}},
+			want: [][4]int{{0, 0, 64, 192}},
+		},
+		{
+			name: "block_2x2",
+			in: [][4]int{
+				{0, 0, 64, 64}, {64, 0, 64, 64},
+				{0, 64, 64, 64}, {64, 64, 64, 64},
+			},
+			want: [][4]int{{0, 0, 128, 128}},
+		},
+		{
+			name: "no_merge_gap",
+			in:   [][4]int{{0, 0, 64, 64}, {192, 0, 64, 64}},
+			want: [][4]int{{0, 0, 64, 64}, {192, 0, 64, 64}},
+		},
+		{
+			name: "two_disjoint_columns",
+			in: [][4]int{
+				{0, 0, 64, 64}, {192, 0, 64, 64},
+				{0, 64, 64, 64}, {192, 64, 64, 64},
+			},
+			want: [][4]int{{0, 0, 64, 128}, {192, 0, 64, 128}},
+		},
+		{
+			name: "misaligned_widths_no_vertical_merge",
+			in: [][4]int{
+				{0, 0, 128, 64},
+				{0, 64, 64, 64},
+			},
+			want: [][4]int{{0, 0, 128, 64}, {0, 64, 64, 64}},
+		},
+	}
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			got := coalesceRects(tc.in)
+			if len(got) == 0 && len(tc.want) == 0 {
+				return
+			}
+			if !reflect.DeepEqual(got, tc.want) {
+				t.Fatalf("got %v want %v", got, tc.want)
+			}
+		})
+	}
+}
--- a/client/vnc/server/hextile_test.go
+++ b/client/vnc/server/hextile_test.go
@@ -0,0 +1,188 @@
+package server
+
+import (
+	"image"
+	"testing"
+)
+
+// roundTrip decodes an encoded Hextile rect back into pixels and checks it
+// matches the source. Implements just enough of the noVNC Hextile decoder
+// to validate our encoder.
+func decodeHextile(t *testing.T, buf []byte, pf clientPixelFormat) *image.RGBA {
+	t.Helper()
+	if len(buf) < 12 {
+		t.Fatalf("buf too short: %d", len(buf))
+	}
+	x := int(uint16(buf[0])<<8 | uint16(buf[1]))
+	y := int(uint16(buf[2])<<8 | uint16(buf[3]))
+	w := int(uint16(buf[4])<<8 | uint16(buf[5]))
+	h := int(uint16(buf[6])<<8 | uint16(buf[7]))
+	enc := uint32(buf[8])<<24 | uint32(buf[9])<<16 | uint32(buf[10])<<8 | uint32(buf[11])
+	if enc != encHextile {
+		t.Fatalf("not hextile: %d", enc)
+	}
+	body := buf[12:]
+	bytesPerPixel := max(int(pf.bpp)/8, 1)
+	out := image.NewRGBA(image.Rect(x, y, x+w, y+h))
+
+	var bg, fg [3]byte
+	pos := 0
+	readPixel := func() [3]byte {
+		var v uint32
+		if pf.bigEndian != 0 {
+			for i := 0; i < bytesPerPixel; i++ {
+				v |= uint32(body[pos+i]) << (8 * (bytesPerPixel - 1 - i))
+			}
+		} else {
+			for i := 0; i < bytesPerPixel; i++ {
+				v |= uint32(body[pos+i]) << (8 * i)
+			}
+		}
+		pos += bytesPerPixel
+		r := byte((v >> pf.rShift) & uint32(pf.rMax))
+		g := byte((v >> pf.gShift) & uint32(pf.gMax))
+		b := byte((v >> pf.bShift) & uint32(pf.bMax))
+		return [3]byte{r, g, b}
+	}
+	for sy := 0; sy < h; sy += hextileSubSize {
+		sh := min(hextileSubSize, h-sy)
+		for sx := 0; sx < w; sx += hextileSubSize {
+			sw := min(hextileSubSize, w-sx)
+			flags := body[pos]
+			pos++
+			if flags&hextileRaw != 0 {
+				for ry := 0; ry < sh; ry++ {
+					for rx := 0; rx < sw; rx++ {
+						px := readPixel()
+						i := (sy+ry)*out.Stride + (sx+rx)*4
+						out.Pix[i+0] = px[0]
+						out.Pix[i+1] = px[1]
+						out.Pix[i+2] = px[2]
+						out.Pix[i+3] = 0xff
+					}
+				}
+				continue
+			}
+			if flags&hextileBackgroundSpecified != 0 {
+				bg = readPixel()
+			}
+			if flags&hextileForegroundSpecified != 0 {
+				fg = readPixel()
+			}
+			// Fill sub-tile with bg.
+			for ry := 0; ry < sh; ry++ {
+				for rx := 0; rx < sw; rx++ {
+					i := (sy+ry)*out.Stride + (sx+rx)*4
+					out.Pix[i+0] = bg[0]
+					out.Pix[i+1] = bg[1]
+					out.Pix[i+2] = bg[2]
+					out.Pix[i+3] = 0xff
+				}
+			}
+			if flags&hextileAnySubrects == 0 {
+				continue
+			}
+			n := int(body[pos])
+			pos++
+			for k := 0; k < n; k++ {
+				color := fg
+				if flags&hextileSubrectsColoured != 0 {
+					color = readPixel()
+				}
+				xy := body[pos]
+				wh := body[pos+1]
+				pos += 2
+				rxr := int(xy >> 4)
+				ryr := int(xy & 0x0f)
+				rwr := int(wh>>4) + 1
+				rhr := int(wh&0x0f) + 1
+				for ry := 0; ry < rhr; ry++ {
+					for rx := 0; rx < rwr; rx++ {
+						i := (sy+ryr+ry)*out.Stride + (sx+rxr+rx)*4
+						out.Pix[i+0] = color[0]
+						out.Pix[i+1] = color[1]
+						out.Pix[i+2] = color[2]
+						out.Pix[i+3] = 0xff
+					}
+				}
+			}
+		}
+	}
+	return out
+}
+
+func makeUniformImage(w, h int, r, g, b byte) *image.RGBA {
+	img := image.NewRGBA(image.Rect(0, 0, w, h))
+	for i := 0; i < len(img.Pix); i += 4 {
+		img.Pix[i+0] = r
+		img.Pix[i+1] = g
+		img.Pix[i+2] = b
+		img.Pix[i+3] = 0xff
+	}
+	return img
+}
+
+func makeTwoColorImage(w, h int) *image.RGBA {
+	img := makeUniformImage(w, h, 0x10, 0x20, 0x30)
+	// Draw a vertical bar of fg in the middle.
+	fg := [3]byte{0xa0, 0xb0, 0xc0}
+	for y := 0; y < h; y++ {
+		for x := w / 4; x < w/2; x++ {
+			i := y*img.Stride + x*4
+			img.Pix[i+0] = fg[0]
+			img.Pix[i+1] = fg[1]
+			img.Pix[i+2] = fg[2]
+		}
+	}
+	return img
+}
+
+func compareImages(t *testing.T, want, got *image.RGBA) {
+	t.Helper()
+	if want.Rect != got.Rect {
+		t.Fatalf("rect mismatch: %v vs %v", want.Rect, got.Rect)
+	}
+	w, h := want.Rect.Dx(), want.Rect.Dy()
+	for y := 0; y < h; y++ {
+		for x := 0; x < w; x++ {
+			i := y*want.Stride + x*4
+			j := y*got.Stride + x*4
+			if want.Pix[i] != got.Pix[j] || want.Pix[i+1] != got.Pix[j+1] || want.Pix[i+2] != got.Pix[j+2] {
+				t.Fatalf("pixel mismatch at (%d,%d): want %v got %v",
+					x, y, want.Pix[i:i+3], got.Pix[j:j+3])
+			}
+		}
+	}
+}
+
+func TestEncodeHextileRect_Uniform(t *testing.T) {
+	pf := defaultClientPixelFormat()
+	img := makeUniformImage(64, 64, 0x33, 0x66, 0x99)
+	buf := encodeHextileRect(img, pf, 0, 0, 64, 64)
+	got := decodeHextile(t, buf, pf)
+	compareImages(t, img, got)
+}
+
+func TestEncodeHextileRect_TwoColor(t *testing.T) {
+	pf := defaultClientPixelFormat()
+	img := makeTwoColorImage(64, 64)
+	buf := encodeHextileRect(img, pf, 0, 0, 64, 64)
+	got := decodeHextile(t, buf, pf)
+	compareImages(t, img, got)
+}
+
+func TestEncodeHextileRect_Multicolor(t *testing.T) {
+	pf := defaultClientPixelFormat()
+	img := makeBenchImage(64, 64, 42)
+	buf := encodeHextileRect(img, pf, 0, 0, 64, 64)
+	got := decodeHextile(t, buf, pf)
+	compareImages(t, img, got)
+}
+
+func TestEncodeHextileRect_NonAligned(t *testing.T) {
+	pf := defaultClientPixelFormat()
+	img := makeTwoColorImage(50, 33) // not a multiple of 16
+	buf := encodeHextileRect(img, pf, 0, 0, 50, 33)
+	got := decodeHextile(t, buf, pf)
+	compareImages(t, img, got)
+}
--- a/client/vnc/server/input_darwin.go
+++ b/client/vnc/server/input_darwin.go
@@ -0,0 +1,613 @@
+//go:build darwin && !ios
+
+package server
+
+import (
+	"fmt"
+	"os/exec"
+	"strings"
+	"sync"
+
+	"github.com/ebitengine/purego"
+	log "github.com/sirupsen/logrus"
+)
+
+// Core Graphics event constants.
+const (
+	kCGEventSourceStateCombinedSessionState int32 = 0
+
+	kCGEventLeftMouseDown     int32 = 1
+	kCGEventLeftMouseUp       int32 = 2
+	kCGEventRightMouseDown    int32 = 3
+	kCGEventRightMouseUp      int32 = 4
+	kCGEventMouseMoved        int32 = 5
+	kCGEventLeftMouseDragged  int32 = 6
+	kCGEventRightMouseDragged int32 = 7
+	kCGEventKeyDown           int32 = 10
+	kCGEventKeyUp             int32 = 11
+	kCGEventOtherMouseDown    int32 = 25
+	kCGEventOtherMouseUp      int32 = 26
+
+	kCGMouseButtonLeft   int32 = 0
+	kCGMouseButtonRight  int32 = 1
+	kCGMouseButtonCenter int32 = 2
+
+	kCGHIDEventTap int32 = 0
+
+	// IOKit power management constants.
+	kIOPMUserActiveLocal  int32  = 0
+	kIOPMAssertionLevelOn uint32 = 255
+	kCFStringEncodingUTF8 uint32 = 0x08000100
+)
+
+var darwinInputOnce sync.Once
+
+var (
+	cgEventSourceCreate        func(int32) uintptr
+	cgEventCreateKeyboardEvent func(uintptr, uint16, bool) uintptr
+	// CGEventCreateMouseEvent takes CGPoint as two separate float64 args.
+	// purego can't handle array/struct types but individual float64s work.
+	cgEventCreateMouseEvent func(uintptr, int32, float64, float64, int32) uintptr
+	cgEventPost             func(int32, uintptr)
+
+	// CGEventCreateScrollWheelEvent is variadic, call via SyscallN.
+	cgEventCreateScrollWheelEventAddr uintptr
+
+	axIsProcessTrusted func() bool
+
+	// IOKit power-management bindings used to wake the display and inhibit
+	// idle sleep while a VNC client is driving input.
+	iopmAssertionDeclareUserActivity func(uintptr, int32, *uint32) int32
+	iopmAssertionCreateWithName      func(uintptr, uint32, uintptr, *uint32) int32
+	iopmAssertionRelease             func(uint32) int32
+	cfStringCreateWithCString        func(uintptr, string, uint32) uintptr
+
+	// Cached CFStrings for assertion name and idle-sleep type.
+	pmAssertionNameCFStr      uintptr
+	pmPreventIdleDisplayCFStr uintptr
+
+	// Assertion IDs. userActivityID is reused across input events so repeated
+	// calls refresh the same assertion rather than create new ones.
+	pmMu             sync.Mutex
+	userActivityID   uint32
+	preventSleepID   uint32
+	preventSleepHeld bool
+	preventSleepRef  int // refcount across concurrent injectors/sessions
+
+	darwinInputReady  bool
+	darwinEventSource uintptr
+)
+
+func initDarwinInput() {
+	darwinInputOnce.Do(func() {
+		cg, err := purego.Dlopen("/System/Library/Frameworks/CoreGraphics.framework/CoreGraphics", purego.RTLD_NOW|purego.RTLD_GLOBAL)
+		if err != nil {
+			log.Debugf("load CoreGraphics for input: %v", err)
+			return
+		}
+
+		purego.RegisterLibFunc(&cgEventSourceCreate, cg, "CGEventSourceCreate")
+		purego.RegisterLibFunc(&cgEventCreateKeyboardEvent, cg, "CGEventCreateKeyboardEvent")
+		purego.RegisterLibFunc(&cgEventCreateMouseEvent, cg, "CGEventCreateMouseEvent")
+		purego.RegisterLibFunc(&cgEventPost, cg, "CGEventPost")
+
+		sym, err := purego.Dlsym(cg, "CGEventCreateScrollWheelEvent")
+		if err == nil {
+			cgEventCreateScrollWheelEventAddr = sym
+		}
+
+		if ax, err := purego.Dlopen("/System/Library/Frameworks/ApplicationServices.framework/ApplicationServices", purego.RTLD_NOW|purego.RTLD_GLOBAL); err == nil {
+			if sym, err := purego.Dlsym(ax, "AXIsProcessTrusted"); err == nil {
+				purego.RegisterFunc(&axIsProcessTrusted, sym)
+			}
+		}
+
+		initPowerAssertions()
+
+		darwinInputReady = true
+	})
+}
+
+func initPowerAssertions() {
+	iokit, err := purego.Dlopen("/System/Library/Frameworks/IOKit.framework/IOKit", purego.RTLD_NOW|purego.RTLD_GLOBAL)
+	if err != nil {
+		log.Debugf("load IOKit: %v", err)
+		return
+	}
+	cf, err := purego.Dlopen("/System/Library/Frameworks/CoreFoundation.framework/CoreFoundation", purego.RTLD_NOW|purego.RTLD_GLOBAL)
+	if err != nil {
+		log.Debugf("load CoreFoundation for power assertions: %v", err)
+		return
+	}
+
+	purego.RegisterLibFunc(&cfStringCreateWithCString, cf, "CFStringCreateWithCString")
+	purego.RegisterLibFunc(&iopmAssertionDeclareUserActivity, iokit, "IOPMAssertionDeclareUserActivity")
+	purego.RegisterLibFunc(&iopmAssertionCreateWithName, iokit, "IOPMAssertionCreateWithName")
+	purego.RegisterLibFunc(&iopmAssertionRelease, iokit, "IOPMAssertionRelease")
+
+	pmAssertionNameCFStr = cfStringCreateWithCString(0, "NetBird VNC input", kCFStringEncodingUTF8)
+	pmPreventIdleDisplayCFStr = cfStringCreateWithCString(0, "PreventUserIdleDisplaySleep", kCFStringEncodingUTF8)
+}
+
+// wakeDisplay declares user activity so macOS treats the synthesized input as
+// real HID activity, waking the display if it is asleep. Called on every key
+// and pointer event; the kernel coalesces repeated calls cheaply.
+func wakeDisplay() {
+	if iopmAssertionDeclareUserActivity == nil || pmAssertionNameCFStr == 0 {
+		return
+	}
+	pmMu.Lock()
+	defer pmMu.Unlock()
+	id := userActivityID
+	r := iopmAssertionDeclareUserActivity(pmAssertionNameCFStr, kIOPMUserActiveLocal, &id)
+	if r != 0 {
+		log.Tracef("IOPMAssertionDeclareUserActivity returned %d", r)
+		return
+	}
+	userActivityID = id
+}
+
+// holdPreventIdleSleep creates an assertion that keeps the display from going
+// idle-to-sleep while a VNC session is active. Reference-counted so multiple
+// concurrent sessions don't yank the assertion when one of them releases.
+func holdPreventIdleSleep() {
+	if iopmAssertionCreateWithName == nil || pmPreventIdleDisplayCFStr == 0 || pmAssertionNameCFStr == 0 {
+		return
+	}
+	pmMu.Lock()
+	defer pmMu.Unlock()
+	preventSleepRef++
+	if preventSleepRef > 1 {
+		return
+	}
+	var id uint32
+	r := iopmAssertionCreateWithName(pmPreventIdleDisplayCFStr, kIOPMAssertionLevelOn, pmAssertionNameCFStr, &id)
+	if r != 0 {
+		log.Debugf("IOPMAssertionCreateWithName returned %d", r)
+		// Reset the refcount on failure so a later successful hold can take it.
+		preventSleepRef = 0
+		return
+	}
+	preventSleepID = id
+	preventSleepHeld = true
+}
+
+// releasePreventIdleSleep decrements the assertion refcount and only drops
+// the actual IOKit assertion on the final release.
+func releasePreventIdleSleep() {
+	if iopmAssertionRelease == nil {
+		return
+	}
+	pmMu.Lock()
+	defer pmMu.Unlock()
+	if !preventSleepHeld || preventSleepRef == 0 {
+		return
+	}
+	preventSleepRef--
+	if preventSleepRef > 0 {
+		return
+	}
+	if r := iopmAssertionRelease(preventSleepID); r != 0 {
+		log.Debugf("IOPMAssertionRelease returned %d", r)
+	}
+	preventSleepHeld = false
+	preventSleepID = 0
+}
+
+func ensureEventSource() uintptr {
+	if darwinEventSource != 0 {
+		return darwinEventSource
+	}
+	darwinEventSource = cgEventSourceCreate(kCGEventSourceStateCombinedSessionState)
+	return darwinEventSource
+}
+
+// MacInputInjector injects keyboard and mouse events via Core Graphics.
+type MacInputInjector struct {
+	lastButtons uint8
+	pbcopyPath  string
+	pbpastePath string
+}
+
+// NewMacInputInjector creates a macOS input injector.
+func NewMacInputInjector() (*MacInputInjector, error) {
+	initDarwinInput()
+	if !darwinInputReady {
+		return nil, fmt.Errorf("CoreGraphics not available for input injection")
+	}
+	checkMacPermissions()
+
+	m := &MacInputInjector{}
+	if path, err := exec.LookPath("pbcopy"); err == nil {
+		m.pbcopyPath = path
+	}
+	if path, err := exec.LookPath("pbpaste"); err == nil {
+		m.pbpastePath = path
+	}
+	if m.pbcopyPath == "" || m.pbpastePath == "" {
+		log.Debugf("clipboard tools not found (pbcopy=%q, pbpaste=%q)", m.pbcopyPath, m.pbpastePath)
+	}
+
+	holdPreventIdleSleep()
+
+	log.Info("macOS input injector ready")
+	return m, nil
+}
+
+// checkMacPermissions warns and opens the Privacy pane if Accessibility is
+// missing. Uses AXIsProcessTrusted which returns immediately; the previous
+// osascript probe blocked for 120s (AppleEvent timeout) when access was
+// denied, which delayed VNC server startup past client deadlines.
+func checkMacPermissions() {
+	if axIsProcessTrusted != nil && !axIsProcessTrusted() {
+		openPrivacyPane("Privacy_Accessibility")
+		log.Warn("Accessibility permission not granted. Input injection will not work. " +
+			"Opened System Settings > Privacy & Security > Accessibility; enable netbird.")
+	}
+
+	log.Info("Screen Recording permission is required for screen capture. " +
+		"If the screen appears black, grant in System Settings > Privacy & Security > Screen Recording.")
+}
+
+// openPrivacyPane opens the given Privacy pane in System Settings so the user
+// can toggle the permission without navigating manually.
+func openPrivacyPane(pane string) {
+	url := "x-apple.systempreferences:com.apple.preference.security?" + pane
+	if err := exec.Command("open", url).Start(); err != nil {
+		log.Debugf("open privacy pane %s: %v", pane, err)
+	}
+}
+
+// InjectKey simulates a key press or release.
+func (m *MacInputInjector) InjectKey(keysym uint32, down bool) {
+	wakeDisplay()
+	src := ensureEventSource()
+	if src == 0 {
+		return
+	}
+	keycode := keysymToMacKeycode(keysym)
+	if keycode == 0xFFFF {
+		return
+	}
+	event := cgEventCreateKeyboardEvent(src, keycode, down)
+	if event == 0 {
+		return
+	}
+	cgEventPost(kCGHIDEventTap, event)
+	cfRelease(event)
+}
+
+// InjectPointer simulates mouse movement and button events.
+func (m *MacInputInjector) InjectPointer(buttonMask uint8, px, py, serverW, serverH int) {
+	wakeDisplay()
+	if serverW == 0 || serverH == 0 {
+		return
+	}
+	src := ensureEventSource()
+	if src == 0 {
+		return
+	}
+	x, y := scalePxToLogical(px, py, serverW, serverH)
+	m.dispatchPointer(src, buttonMask, x, y)
+	m.lastButtons = buttonMask
+}
+
+// scalePxToLogical converts framebuffer coordinates (physical pixels) into
+// the logical points CGEventCreateMouseEvent expects. Falls back to a 1:1
+// mapping if the display API is unavailable.
+func scalePxToLogical(px, py, serverW, serverH int) (float64, float64) {
+	x, y := float64(px), float64(py)
+	if cgDisplayPixelsWide == nil || cgMainDisplayID == nil {
+		return x, y
+	}
+	displayID := cgMainDisplayID()
+	logicalW := int(cgDisplayPixelsWide(displayID))
+	logicalH := int(cgDisplayPixelsHigh(displayID))
+	if logicalW <= 0 || logicalH <= 0 {
+		return x, y
+	}
+	return float64(px) * float64(logicalW) / float64(serverW),
+		float64(py) * float64(logicalH) / float64(serverH)
+}
+
+func (m *MacInputInjector) dispatchPointer(src uintptr, buttonMask uint8, x, y float64) {
+	leftDown := buttonMask&0x01 != 0
+	rightDown := buttonMask&0x04 != 0
+	middleDown := buttonMask&0x02 != 0
+	m.postMoveOrDrag(src, leftDown, rightDown, x, y)
+	m.postButtonTransitions(src, buttonMask, x, y)
+	m.postScrollWheel(src, buttonMask)
+	_ = middleDown
+}
+
+func (m *MacInputInjector) postMoveOrDrag(src uintptr, leftDown, rightDown bool, x, y float64) {
+	switch {
+	case leftDown:
+		m.postMouse(src, kCGEventLeftMouseDragged, x, y, kCGMouseButtonLeft)
+	case rightDown:
+		m.postMouse(src, kCGEventRightMouseDragged, x, y, kCGMouseButtonRight)
+	default:
+		m.postMouse(src, kCGEventMouseMoved, x, y, kCGMouseButtonLeft)
+	}
+}
+
+// postButtonTransitions emits the up/down events for each button whose state
+// changed against m.lastButtons.
+func (m *MacInputInjector) postButtonTransitions(src uintptr, buttonMask uint8, x, y float64) {
+	emit := func(curBit, prevBit uint8, down, up int32, button int32) {
+		cur := buttonMask&curBit != 0
+		prev := m.lastButtons&prevBit != 0
+		if cur && !prev {
+			m.postMouse(src, down, x, y, button)
+		} else if !cur && prev {
+			m.postMouse(src, up, x, y, button)
+		}
+	}
+	emit(0x01, 0x01, kCGEventLeftMouseDown, kCGEventLeftMouseUp, kCGMouseButtonLeft)
+	emit(0x04, 0x04, kCGEventRightMouseDown, kCGEventRightMouseUp, kCGMouseButtonRight)
+	emit(0x02, 0x02, kCGEventOtherMouseDown, kCGEventOtherMouseUp, kCGMouseButtonCenter)
+}
+
+func (m *MacInputInjector) postScrollWheel(src uintptr, buttonMask uint8) {
+	if buttonMask&0x08 != 0 {
+		m.postScroll(src, 3)
+	}
+	if buttonMask&0x10 != 0 {
+		m.postScroll(src, -3)
+	}
+}
+
+func (m *MacInputInjector) postMouse(src uintptr, eventType int32, x, y float64, button int32) {
+	if cgEventCreateMouseEvent == nil {
+		return
+	}
+	event := cgEventCreateMouseEvent(src, eventType, x, y, button)
+	if event == 0 {
+		return
+	}
+	cgEventPost(kCGHIDEventTap, event)
+	cfRelease(event)
+}
+
+func (m *MacInputInjector) postScroll(src uintptr, deltaY int32) {
+	if cgEventCreateScrollWheelEventAddr == 0 {
+		return
+	}
+	// CGEventCreateScrollWheelEvent(source, units, wheelCount, wheel1delta)
+	// units=0 (pixel), wheelCount=1, wheel1delta=deltaY
+	// Variadic C function: pass args as uintptr via SyscallN.
+	r1, _, _ := purego.SyscallN(cgEventCreateScrollWheelEventAddr,
+		src, 0, 1, uintptr(uint32(deltaY)))
+	if r1 == 0 {
+		return
+	}
+	cgEventPost(kCGHIDEventTap, r1)
+	cfRelease(r1)
+}
+
+// SetClipboard sets the macOS clipboard using pbcopy.
+func (m *MacInputInjector) SetClipboard(text string) {
+	if m.pbcopyPath == "" {
+		return
+	}
+	cmd := exec.Command(m.pbcopyPath)
+	cmd.Stdin = strings.NewReader(text)
+	if err := cmd.Run(); err != nil {
+		log.Tracef("set clipboard via pbcopy: %v", err)
+	}
+}
+
+// TypeText synthesizes the given text as keystrokes via Core Graphics.
+// Used by the dashboard's Paste button so the host clipboard reaches
+// the focused remote app even when the app doesn't honor pbpaste-style
+// clipboard sync (e.g. login screens, locked-down apps). ASCII printable
+// runes only; others are skipped.
+func (m *MacInputInjector) TypeText(text string) {
+	wakeDisplay()
+	src := ensureEventSource()
+	if src == 0 {
+		return
+	}
+	const maxChars = 4096
+	count := 0
+	for _, r := range text {
+		if count >= maxChars {
+			break
+		}
+		count++
+		typeRune(src, r)
+	}
+}
+
+// typeRune emits the press/release events for a single ASCII rune, framing
+// the keystroke with Shift-down/up when required by the keysym.
+func typeRune(src uintptr, r rune) {
+	const shiftKey = uint16(0x38) // kVK_Shift
+	keysym, shift, ok := keysymForASCIIRune(r)
+	if !ok {
+		return
+	}
+	keycode := keysymToMacKeycode(keysym)
+	if keycode == 0xFFFF {
+		return
+	}
+	if shift {
+		postKey(src, shiftKey, true)
+	}
+	postKey(src, keycode, true)
+	postKey(src, keycode, false)
+	if shift {
+		postKey(src, shiftKey, false)
+	}
+}
+
+func postKey(src uintptr, keycode uint16, down bool) {
+	e := cgEventCreateKeyboardEvent(src, keycode, down)
+	if e == 0 {
+		return
+	}
+	cgEventPost(kCGHIDEventTap, e)
+	cfRelease(e)
+}
+
+// GetClipboard reads the macOS clipboard using pbpaste.
+func (m *MacInputInjector) GetClipboard() string {
+	if m.pbpastePath == "" {
+		return ""
+	}
+	out, err := exec.Command(m.pbpastePath).Output()
+	if err != nil {
+		log.Tracef("get clipboard via pbpaste: %v", err)
+		return ""
+	}
+	return string(out)
+}
+
+// Close releases the idle-sleep assertion held for the injector's lifetime.
+func (m *MacInputInjector) Close() {
+	releasePreventIdleSleep()
+}
+
+func keysymToMacKeycode(keysym uint32) uint16 {
+	if keysym >= 0x61 && keysym <= 0x7a {
+		return asciiToMacKey[keysym-0x61]
+	}
+	if keysym >= 0x41 && keysym <= 0x5a {
+		return asciiToMacKey[keysym-0x41]
+	}
+	if keysym >= 0x30 && keysym <= 0x39 {
+		return digitToMacKey[keysym-0x30]
+	}
+	if code, ok := specialKeyMap[keysym]; ok {
+		return code
+	}
+	return 0xFFFF
+}
+
+var asciiToMacKey = [26]uint16{
+	0x00, 0x0B, 0x08, 0x02, 0x0E, 0x03, 0x05, 0x04,
+	0x22, 0x26, 0x28, 0x25, 0x2E, 0x2D, 0x1F, 0x23,
+	0x0C, 0x0F, 0x01, 0x11, 0x20, 0x09, 0x0D, 0x07,
+	0x10, 0x06,
+}
+
+var digitToMacKey = [10]uint16{
+	0x1D, 0x12, 0x13, 0x14, 0x15, 0x17, 0x16, 0x1A, 0x1C, 0x19,
+}
+
+var specialKeyMap = map[uint32]uint16{
+	// Whitespace and editing
+	0x0020: 0x31, // space
+	0xff08: 0x33, // BackSpace
+	0xff09: 0x30, // Tab
+	0xff0d: 0x24, // Return
+	0xff1b: 0x35, // Escape
+	0xffff: 0x75, // Delete (forward)
+
+	// Navigation
+	0xff50: 0x73, // Home
+	0xff51: 0x7B, // Left
+	0xff52: 0x7E, // Up
+	0xff53: 0x7C, // Right
+	0xff54: 0x7D, // Down
+	0xff55: 0x74, // Page_Up
+	0xff56: 0x79, // Page_Down
+	0xff57: 0x77, // End
+	0xff63: 0x72, // Insert (Help on Mac)
+
+	// Modifiers
+	0xffe1: 0x38, // Shift_L
+	0xffe2: 0x3C, // Shift_R
+	0xffe3: 0x3B, // Control_L
+	0xffe4: 0x3E, // Control_R
+	0xffe5: 0x39, // Caps_Lock
+	0xffe9: 0x3A, // Alt_L (Option)
+	0xffea: 0x3D, // Alt_R (Option)
+	0xffe7: 0x37, // Meta_L (Command)
+	0xffe8: 0x36, // Meta_R (Command)
+	0xffeb: 0x37, // Super_L (Command) - noVNC sends this
+	0xffec: 0x36, // Super_R (Command)
+
+	// Mode_switch / ISO_Level3_Shift (sent by noVNC for macOS Option remap)
+	0xff7e: 0x3A, // Mode_switch -> Option
+	0xfe03: 0x3D, // ISO_Level3_Shift -> Right Option
+
+	// Function keys
+	0xffbe: 0x7A, // F1
+	0xffbf: 0x78, // F2
+	0xffc0: 0x63, // F3
+	0xffc1: 0x76, // F4
+	0xffc2: 0x60, // F5
+	0xffc3: 0x61, // F6
+	0xffc4: 0x62, // F7
+	0xffc5: 0x64, // F8
+	0xffc6: 0x65, // F9
+	0xffc7: 0x6D, // F10
+	0xffc8: 0x67, // F11
+	0xffc9: 0x6F, // F12
+	0xffca: 0x69, // F13
+	0xffcb: 0x6B, // F14
+	0xffcc: 0x71, // F15
+	0xffcd: 0x6A, // F16
+	0xffce: 0x40, // F17
+	0xffcf: 0x4F, // F18
+	0xffd0: 0x50, // F19
+	0xffd1: 0x5A, // F20
+
+	// Punctuation (US keyboard layout, keysym = ASCII code)
+	0x002d: 0x1B, // minus -
+	0x003d: 0x18, // equal =
+	0x005b: 0x21, // bracketleft [
+	0x005d: 0x1E, // bracketright ]
+	0x005c: 0x2A, // backslash
+	0x003b: 0x29, // semicolon ;
+	0x0027: 0x27, // apostrophe '
+	0x0060: 0x32, // grave `
+	0x002c: 0x2B, // comma ,
+	0x002e: 0x2F, // period .
+	0x002f: 0x2C, // slash /
+
+	// Shifted punctuation (noVNC sends these as separate keysyms)
+	0x005f: 0x1B, // underscore _ (shift+minus)
+	0x002b: 0x18, // plus + (shift+equal)
+	0x007b: 0x21, // braceleft { (shift+[)
+	0x007d: 0x1E, // braceright } (shift+])
+	0x007c: 0x2A, // bar | (shift+\)
+	0x003a: 0x29, // colon : (shift+;)
+	0x0022: 0x27, // quotedbl " (shift+')
+	0x007e: 0x32, // tilde ~ (shift+`)
+	0x003c: 0x2B, // less < (shift+,)
+	0x003e: 0x2F, // greater > (shift+.)
+	0x003f: 0x2C, // question ? (shift+/)
+	0x0021: 0x12, // exclam ! (shift+1)
+	0x0040: 0x13, // at @ (shift+2)
+	0x0023: 0x14, // numbersign # (shift+3)
+	0x0024: 0x15, // dollar $ (shift+4)
+	0x0025: 0x17, // percent % (shift+5)
+	0x005e: 0x16, // asciicircum ^ (shift+6)
+	0x0026: 0x1A, // ampersand & (shift+7)
+	0x002a: 0x1C, // asterisk * (shift+8)
+	0x0028: 0x19, // parenleft ( (shift+9)
+	0x0029: 0x1D, // parenright ) (shift+0)
+
+	// Numpad
+	0xffb0: 0x52, // KP_0
+	0xffb1: 0x53, // KP_1
+	0xffb2: 0x54, // KP_2
+	0xffb3: 0x55, // KP_3
+	0xffb4: 0x56, // KP_4
+	0xffb5: 0x57, // KP_5
+	0xffb6: 0x58, // KP_6
+	0xffb7: 0x59, // KP_7
+	0xffb8: 0x5B, // KP_8
+	0xffb9: 0x5C, // KP_9
+	0xffae: 0x41, // KP_Decimal
+	0xffaa: 0x43, // KP_Multiply
+	0xffab: 0x45, // KP_Add
+	0xffad: 0x4E, // KP_Subtract
+	0xffaf: 0x4B, // KP_Divide
+	0xff8d: 0x4C, // KP_Enter
+	0xffbd: 0x51, // KP_Equal
+}
+
+var _ InputInjector = (*MacInputInjector)(nil)
--- a/client/vnc/server/input_uinput_unix.go
+++ b/client/vnc/server/input_uinput_unix.go
@@ -0,0 +1,500 @@
+//go:build (linux && !android) || freebsd
+
+package server
+
+import (
+	"encoding/binary"
+	"fmt"
+	"sync"
+	"time"
+	"unicode"
+	"unsafe"
+
+	log "github.com/sirupsen/logrus"
+	"golang.org/x/sys/unix"
+)
+
+// /dev/uinput ioctl numbers. Computed from the kernel _IO/_IOW macros so
+// we don't depend on cgo. UINPUT_IOCTL_BASE = 'U' = 0x55.
+const (
+	uiDevCreate  = 0x5501
+	uiDevDestroy = 0x5502
+	// _IOW('U', 3, struct uinput_setup); uinput_setup is 92 bytes on amd64.
+	uiDevSetup    = (1 << 30) | (92 << 16) | (0x55 << 8) | 3
+	uiSetEvBit    = (1 << 30) | (4 << 16) | (0x55 << 8) | 100
+	uiSetKeyBit   = (1 << 30) | (4 << 16) | (0x55 << 8) | 101
+	uiSetAbsBit   = (1 << 30) | (4 << 16) | (0x55 << 8) | 103
+	uinputAbsSize = 64 // legacy struct uses absmin/absmax/absfuzz/absflat[64].
+)
+
+// Linux input event types and key codes (linux/input-event-codes.h).
+const (
+	evSyn = 0x00
+	evKey = 0x01
+	evAbs = 0x03
+	evRep = 0x14
+
+	synReport = 0
+
+	absX = 0x00
+	absY = 0x01
+
+	btnLeft   = 0x110
+	btnRight  = 0x111
+	btnMiddle = 0x112
+)
+
+// inputEvent matches struct input_event for x86_64 (timeval is 16 bytes).
+// Total size 24 bytes; Go's natural alignment matches the kernel layout.
+type inputEvent struct {
+	TvSec  int64
+	TvUsec int64
+	Type   uint16
+	Code   uint16
+	Value  int32
+}
+
+// UInputInjector synthesizes keyboard and mouse events via /dev/uinput.
+// Used as a fallback when X11 isn't running, e.g. at the kernel console
+// or pre-login screen on a server without X. Requires root or
+// CAP_SYS_ADMIN, which the netbird service has.
+type UInputInjector struct {
+	mu          sync.Mutex
+	fd          int
+	closeOnce   sync.Once
+	keysymToKey map[uint32]uint16
+	prevButtons uint8
+	screenW     int
+	screenH     int
+}
+
+// NewUInputInjector opens /dev/uinput and registers a virtual keyboard +
+// absolute pointer device sized to (w, h). The dimensions are needed
+// because uinput's ABS axes don't autoscale; we always send absolute
+// coordinates and let the kernel route them to the right monitor.
+func NewUInputInjector(w, h int) (*UInputInjector, error) {
+	if w <= 0 || h <= 0 {
+		return nil, fmt.Errorf("invalid screen size: %dx%d", w, h)
+	}
+	fd, err := unix.Open("/dev/uinput", unix.O_WRONLY|unix.O_NONBLOCK, 0)
+	if err != nil {
+		return nil, fmt.Errorf("open /dev/uinput: %w", err)
+	}
+
+	if err := setBit(fd, uiSetEvBit, evKey); err != nil {
+		unix.Close(fd)
+		return nil, err
+	}
+	if err := setBit(fd, uiSetEvBit, evAbs); err != nil {
+		unix.Close(fd)
+		return nil, err
+	}
+	if err := setBit(fd, uiSetEvBit, evSyn); err != nil {
+		unix.Close(fd)
+		return nil, err
+	}
+	// Advertise key auto-repeat so the kernel input core repeats held
+	// keys at the configured rate (default ~250 ms delay, ~33 ms period).
+	// Without this, holding Backspace etc. only deletes one character.
+	if err := setBit(fd, uiSetEvBit, evRep); err != nil {
+		unix.Close(fd)
+		return nil, err
+	}
+
+	keymap := buildUInputKeymap()
+	for _, key := range keymap {
+		if err := setBit(fd, uiSetKeyBit, uint32(key)); err != nil {
+			unix.Close(fd)
+			return nil, fmt.Errorf("UI_SET_KEYBIT %d: %w", key, err)
+		}
+	}
+	for _, btn := range []uint16{btnLeft, btnRight, btnMiddle} {
+		if err := setBit(fd, uiSetKeyBit, uint32(btn)); err != nil {
+			unix.Close(fd)
+			return nil, fmt.Errorf("UI_SET_KEYBIT btn %d: %w", btn, err)
+		}
+	}
+	if err := setBit(fd, uiSetAbsBit, absX); err != nil {
+		unix.Close(fd)
+		return nil, err
+	}
+	if err := setBit(fd, uiSetAbsBit, absY); err != nil {
+		unix.Close(fd)
+		return nil, err
+	}
+
+	if err := writeUInputUserDev(fd, w, h); err != nil {
+		unix.Close(fd)
+		return nil, err
+	}
+	if _, _, e := unix.Syscall(unix.SYS_IOCTL, uintptr(fd), uiDevCreate, 0); e != 0 {
+		unix.Close(fd)
+		return nil, fmt.Errorf("UI_DEV_CREATE: %v", e)
+	}
+	// Give udev a moment to settle before sending events.
+	time.Sleep(50 * time.Millisecond)
+
+	inj := &UInputInjector{
+		fd:          fd,
+		keysymToKey: keymapByKeysym(keymap),
+		screenW:     w,
+		screenH:     h,
+	}
+	log.Infof("uinput injector ready: %dx%d, %d keys", w, h, len(inj.keysymToKey))
+	return inj, nil
+}
+
+func setBit(fd int, op uintptr, code uint32) error {
+	if _, _, e := unix.Syscall(unix.SYS_IOCTL, uintptr(fd), op, uintptr(code)); e != 0 {
+		return fmt.Errorf("ioctl 0x%x %d: %v", op, code, e)
+	}
+	return nil
+}
+
+// writeUInputUserDev uses the legacy uinput_user_dev path (write the
+// whole struct then UI_DEV_CREATE) which is universally supported on
+// older and current kernels alike. uinput_user_dev is name(80) + id(8) +
+// ff_effects_max(4) + absmax/absmin/absfuzz/absflat[64] = 92 + 4*64*4 =
+// 1116 bytes total.
+func writeUInputUserDev(fd, w, h int) error {
+	const sz = 80 + 8 + 4 + uinputAbsSize*4*4
+	buf := make([]byte, sz)
+	copy(buf[0:80], []byte("netbird-vnc-uinput"))
+	// id: BUS_VIRTUAL=0x06, vendor=0x0001, product=0x0001, version=1.
+	binary.LittleEndian.PutUint16(buf[80:82], 0x06)
+	binary.LittleEndian.PutUint16(buf[82:84], 0x0001)
+	binary.LittleEndian.PutUint16(buf[84:86], 0x0001)
+	binary.LittleEndian.PutUint16(buf[86:88], 0x0001)
+	// ff_effects_max(4) at 88..92 stays zero.
+	// absmax[64] at 92..348: set absX/absY.
+	absmaxOff := 80 + 8 + 4
+	absminOff := absmaxOff + uinputAbsSize*4
+	binary.LittleEndian.PutUint32(buf[absmaxOff+absX*4:], uint32(w-1))
+	binary.LittleEndian.PutUint32(buf[absmaxOff+absY*4:], uint32(h-1))
+	binary.LittleEndian.PutUint32(buf[absminOff+absX*4:], 0)
+	binary.LittleEndian.PutUint32(buf[absminOff+absY*4:], 0)
+	if _, err := unix.Write(fd, buf); err != nil {
+		return fmt.Errorf("write uinput_user_dev: %w", err)
+	}
+	return nil
+}
+
+// emit writes a single input_event to the device. Caller-locked.
+func (u *UInputInjector) emit(typ, code uint16, value int32) error {
+	ev := inputEvent{Type: typ, Code: code, Value: value}
+	buf := (*[unsafe.Sizeof(inputEvent{})]byte)(unsafe.Pointer(&ev))[:]
+	_, err := unix.Write(u.fd, buf)
+	return err
+}
+
+func (u *UInputInjector) sync() {
+	_ = u.emit(evSyn, synReport, 0)
+}
+
+// InjectKey synthesizes a press or release for the given X11 keysym.
+func (u *UInputInjector) InjectKey(keysym uint32, down bool) {
+	u.mu.Lock()
+	defer u.mu.Unlock()
+	code, ok := u.keysymToKey[keysym]
+	if !ok {
+		return
+	}
+	value := int32(0)
+	if down {
+		value = 1
+	}
+	if err := u.emit(evKey, code, value); err != nil {
+		log.Tracef("uinput emit key: %v", err)
+		return
+	}
+	u.sync()
+}
+
+// InjectPointer moves the absolute pointer and presses/releases buttons
+// based on the RFB button mask delta against the previous mask.
+func (u *UInputInjector) InjectPointer(buttonMask uint8, x, y, serverW, serverH int) {
+	u.mu.Lock()
+	defer u.mu.Unlock()
+	if serverW <= 1 || serverH <= 1 {
+		return
+	}
+	absXVal := int32(x * (u.screenW - 1) / (serverW - 1))
+	absYVal := int32(y * (u.screenH - 1) / (serverH - 1))
+	_ = u.emit(evAbs, absX, absXVal)
+	_ = u.emit(evAbs, absY, absYVal)
+
+	type btnMap struct {
+		bit uint8
+		key uint16
+	}
+	for _, b := range []btnMap{
+		{0x01, btnLeft},
+		{0x02, btnMiddle},
+		{0x04, btnRight},
+	} {
+		pressed := buttonMask&b.bit != 0
+		was := u.prevButtons&b.bit != 0
+		if pressed && !was {
+			_ = u.emit(evKey, b.key, 1)
+		} else if !pressed && was {
+			_ = u.emit(evKey, b.key, 0)
+		}
+	}
+	u.prevButtons = buttonMask
+	u.sync()
+}
+
+// SetClipboard is a no-op on the framebuffer console: there is no system
+// clipboard daemon. Use TypeText (Paste button) to deliver host text.
+func (u *UInputInjector) SetClipboard(_ string) {
+	// no system clipboard daemon on framebuffer console
+}
+
+// GetClipboard returns empty: no clipboard outside X11/Wayland.
+func (u *UInputInjector) GetClipboard() string { return "" }
+
+// TypeText synthesizes the given UTF-8 text as keystrokes. Only ASCII
+// printable characters and newline are typed; other runes are skipped.
+// This drives the "paste" button: with no console clipboard available,
+// keystroke-by-keystroke entry is the only way to deliver a password to
+// a TTY login prompt.
+func (u *UInputInjector) TypeText(text string) {
+	u.mu.Lock()
+	defer u.mu.Unlock()
+	const maxChars = 4096
+	count := 0
+	for _, r := range text {
+		if count >= maxChars {
+			break
+		}
+		count++
+		code, shift, ok := keyForRune(r)
+		if !ok {
+			continue
+		}
+		if shift {
+			_ = u.emit(evKey, keyLeftShift, 1)
+		}
+		_ = u.emit(evKey, code, 1)
+		_ = u.emit(evKey, code, 0)
+		if shift {
+			_ = u.emit(evKey, keyLeftShift, 0)
+		}
+		u.sync()
+	}
+}
+
+// Close destroys the virtual uinput device and closes the file descriptor.
+func (u *UInputInjector) Close() {
+	u.closeOnce.Do(func() {
+		u.mu.Lock()
+		defer u.mu.Unlock()
+		if u.fd >= 0 {
+			_, _, _ = unix.Syscall(unix.SYS_IOCTL, uintptr(u.fd), uiDevDestroy, 0)
+			_ = unix.Close(u.fd)
+			u.fd = -1
+		}
+	})
+}
+
+// Linux KEY_* codes for the small set we care about.
+const (
+	keyEsc          = 1
+	keyMinus        = 12
+	keyEqual        = 13
+	keyBackspace    = 14
+	keyTab          = 15
+	keyEnter        = 28
+	keyLeftCtrl     = 29
+	keySemicolon    = 39
+	keyApostrophe   = 40
+	keyGrave        = 41
+	keyLeftShift    = 42
+	keyBackslash    = 43
+	keyComma        = 51
+	keyDot          = 52
+	keySlash        = 53
+	keyRightShift   = 54
+	keyLeftAlt      = 56
+	keySpace        = 57
+	keyCapsLock     = 58
+	keyF1           = 59
+	keyLeftBracket  = 26
+	keyRightBracket = 27
+	keyHome         = 102
+	keyUp           = 103
+	keyPageUp       = 104
+	keyLeft         = 105
+	keyRight        = 106
+	keyEnd          = 107
+	keyDown         = 108
+	keyPageDown     = 109
+	keyInsert       = 110
+	keyDelete       = 111
+	keyRightCtrl    = 97
+	keyRightAlt     = 100
+	keyLeftMeta     = 125
+	keyRightMeta    = 126
+)
+
+// buildUInputKeymap returns every linux KEY_ code we want the virtual
+// device to advertise during UI_SET_KEYBIT. Order doesn't matter.
+func buildUInputKeymap() []uint16 {
+	out := make([]uint16, 0, 128)
+	// Letters: KEY_A=30, KEY_B=48, etc; not a clean range. The kernel's
+	// row-by-row layout is qwertyuiop / asdfghjkl / zxcvbnm.
+	letters := []uint16{
+		30, 48, 46, 32, 18, 33, 34, 35, 23, 36, 37, 38, 50, // a..m
+		49, 24, 25, 16, 19, 31, 20, 22, 47, 17, 45, 21, 44, // n..z
+	}
+	out = append(out, letters...)
+	// Top-row digits: KEY_1..KEY_0 = 2..11.
+	for i := uint16(2); i <= 11; i++ {
+		out = append(out, i)
+	}
+	// Function keys F1..F12 = 59..68 + 87, 88. We only register F1..F12
+	// which the kernel header enumerates as a contiguous block.
+	for i := uint16(59); i <= 68; i++ {
+		out = append(out, i)
+	}
+	out = append(out, 87, 88)
+	out = append(out, []uint16{
+		keyEsc, keyMinus, keyEqual, keyBackspace, keyTab, keyEnter,
+		keyLeftCtrl, keyRightCtrl, keyLeftShift, keyRightShift,
+		keyLeftAlt, keyRightAlt, keyLeftMeta, keyRightMeta,
+		keySpace, keyCapsLock,
+		keyLeftBracket, keyRightBracket, keyBackslash,
+		keySemicolon, keyApostrophe, keyGrave,
+		keyComma, keyDot, keySlash,
+		keyHome, keyEnd, keyPageUp, keyPageDown,
+		keyUp, keyDown, keyLeft, keyRight,
+		keyInsert, keyDelete,
+	}...)
+	return out
+}
+
+// keymapByKeysym maps X11 keysyms (the values our session receives over
+// RFB) onto Linux KEY_ codes. Shifted ASCII keysyms (uppercase letters,
+// "!@#..." etc.) map to the same scan code as their unshifted twin: the
+// client also sends a separate Shift keysym (0xffe1), so the kernel
+// composes the final character from the held modifier + scan code.
+func keymapByKeysym(_ []uint16) map[uint32]uint16 {
+	letters := map[rune]uint16{
+		'a': 30, 'b': 48, 'c': 46, 'd': 32, 'e': 18, 'f': 33, 'g': 34,
+		'h': 35, 'i': 23, 'j': 36, 'k': 37, 'l': 38, 'm': 50,
+		'n': 49, 'o': 24, 'p': 25, 'q': 16, 'r': 19, 's': 31, 't': 20,
+		'u': 22, 'v': 47, 'w': 17, 'x': 45, 'y': 21, 'z': 44,
+	}
+	m := map[uint32]uint16{
+		// Digits.
+		'0': 11, '1': 2, '2': 3, '3': 4, '4': 5, '5': 6, '6': 7,
+		'7': 8, '8': 9, '9': 10,
+		// Shifted digits (US layout).
+		')': 11, '!': 2, '@': 3, '#': 4, '$': 5, '%': 6, '^': 7,
+		'&': 8, '*': 9, '(': 10,
+		// Punctuation (US layout) and shifted twins.
+		' ': keySpace,
+		'-': keyMinus, '_': keyMinus,
+		'=': keyEqual, '+': keyEqual,
+		'[': keyLeftBracket, '{': keyLeftBracket,
+		']': keyRightBracket, '}': keyRightBracket,
+		'\\': keyBackslash, '|': keyBackslash,
+		';': keySemicolon, ':': keySemicolon,
+		'\'': keyApostrophe, '"': keyApostrophe,
+		'`': keyGrave, '~': keyGrave,
+		',': keyComma, '<': keyComma,
+		'.': keyDot, '>': keyDot,
+		'/': keySlash, '?': keySlash,
+		// Special keys (X11 keysyms).
+		0xff08: keyBackspace, 0xff09: keyTab, 0xff0d: keyEnter,
+		0xff1b: keyEsc, 0xffff: keyDelete,
+		0xff50: keyHome, 0xff57: keyEnd,
+		0xff51: keyLeft, 0xff52: keyUp, 0xff53: keyRight, 0xff54: keyDown,
+		0xff55: keyPageUp, 0xff56: keyPageDown, 0xff63: keyInsert,
+		0xffe1: keyLeftShift, 0xffe2: keyRightShift,
+		0xffe3: keyLeftCtrl, 0xffe4: keyRightCtrl,
+		0xffe9: keyLeftAlt, 0xffea: keyRightAlt,
+		0xffeb: keyLeftMeta, 0xffec: keyRightMeta,
+	}
+	// Letters: register both lowercase and uppercase keysyms onto the same
+	// KEY_ code. The client sends Shift separately for uppercase.
+	for r, code := range letters {
+		m[uint32(r)] = code
+		m[uint32(r-'a'+'A')] = code
+	}
+	// Function keys F1..F12 (X11 keysyms 0xffbe..0xffc9 → KEY_F1..KEY_F12).
+	xF := uint32(0xffbe)
+	codes := []uint16{59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 87, 88}
+	for i, c := range codes {
+		m[xF+uint32(i)] = c
+	}
+	return m
+}
+
+// keyForRune maps a printable rune to (keycode, needsShift). Used by
+// TypeText to synthesize keystrokes for a paste payload.
+func keyForRune(r rune) (uint16, bool, bool) {
+	if r >= 'a' && r <= 'z' {
+		m := map[rune]uint16{
+			'a': 30, 'b': 48, 'c': 46, 'd': 32, 'e': 18, 'f': 33, 'g': 34,
+			'h': 35, 'i': 23, 'j': 36, 'k': 37, 'l': 38, 'm': 50,
+			'n': 49, 'o': 24, 'p': 25, 'q': 16, 'r': 19, 's': 31, 't': 20,
+			'u': 22, 'v': 47, 'w': 17, 'x': 45, 'y': 21, 'z': 44,
+		}
+		return m[r], false, true
+	}
+	if r >= 'A' && r <= 'Z' {
+		c, _, ok := keyForRune(unicode.ToLower(r))
+		return c, true, ok
+	}
+	if r >= '0' && r <= '9' {
+		nums := []uint16{11, 2, 3, 4, 5, 6, 7, 8, 9, 10}
+		idx := int(r - '0')
+		if idx < 0 || idx >= len(nums) { //nolint:gosec // explicit bound disarms G602
+			return 0, false, false
+		}
+		return nums[idx], false, true
+	}
+	if r == '\n' || r == '\r' {
+		return keyEnter, false, true
+	}
+	if k, ok := punctUnshifted[r]; ok {
+		return k, false, true
+	}
+	if k, ok := punctShifted[r]; ok {
+		return k, true, true
+	}
+	return 0, false, false
+}
+
+// punctUnshifted maps ASCII punctuation that needs no Shift to its uinput
+// KEY_* code. Split out of keyForRune's switch to keep the function's
+// cognitive complexity below Sonar's threshold.
+var punctUnshifted = map[rune]uint16{
+	' ':  keySpace,
+	'\t': keyTab,
+	'-':  keyMinus,
+	'=':  keyEqual,
+	'[':  keyLeftBracket,
+	']':  keyRightBracket,
+	'\\': keyBackslash,
+	';':  keySemicolon,
+	'\'': keyApostrophe,
+	'`':  keyGrave,
+	',':  keyComma,
+	'.':  keyDot,
+	'/':  keySlash,
+}
+
+// punctShifted maps ASCII punctuation that requires Shift to its base KEY_*
+// code; the caller adds the shift modifier itself.
+var punctShifted = map[rune]uint16{
+	'!': 2, '@': 3, '#': 4, '$': 5, '%': 6, '^': 7, '&': 8, '*': 9,
+	'(': 10, ')': 11,
+	'_': keyMinus, '+': keyEqual,
+	'{': keyLeftBracket, '}': keyRightBracket, '|': keyBackslash,
+	':': keySemicolon, '"': keyApostrophe, '~': keyGrave,
+	'<': keyComma, '>': keyDot, '?': keySlash,
+}
+
+var _ InputInjector = (*UInputInjector)(nil)
--- a/client/vnc/server/input_windows.go
+++ b/client/vnc/server/input_windows.go
@@ -0,0 +1,500 @@
+//go:build windows
+
+package server
+
+import (
+	"runtime"
+	"sync"
+	"unsafe"
+
+	log "github.com/sirupsen/logrus"
+	"golang.org/x/sys/windows"
+)
+
+var (
+	procOpenEventW = kernel32.NewProc("OpenEventW")
+	procSendInput  = user32.NewProc("SendInput")
+	procVkKeyScanA = user32.NewProc("VkKeyScanA")
+)
+
+const eventModifyState = 0x0002
+
+const (
+	inputMouse    = 0
+	inputKeyboard = 1
+
+	mouseeventfMove       = 0x0001
+	mouseeventfLeftDown   = 0x0002
+	mouseeventfLeftUp     = 0x0004
+	mouseeventfRightDown  = 0x0008
+	mouseeventfRightUp    = 0x0010
+	mouseeventfMiddleDown = 0x0020
+	mouseeventfMiddleUp   = 0x0040
+	mouseeventfWheel      = 0x0800
+	mouseeventfAbsolute   = 0x8000
+
+	wheelDelta = 120
+
+	keyeventfKeyUp    = 0x0002
+	keyeventfUnicode  = 0x0004
+	keyeventfScanCode = 0x0008
+)
+
+// winlogonDesktopName is the name of the Windows secure desktop that hosts the
+// logon UI, Ctrl+Alt+Del screen, UAC prompts, and credential dialogs. Its
+// clipboard is isolated from the interactive Default desktop, so pasting via
+// the clipboard API does not work there. We fall back to synthesizing the
+// text as Unicode keystrokes.
+const winlogonDesktopName = "Winlogon"
+
+// maxTypedClipboardChars caps the number of characters we will synthesize as
+// keystrokes when falling back on the Winlogon desktop. Passwords are short;
+// a huge clipboard getting typed into the login screen would be surprising.
+const maxTypedClipboardChars = 4096
+
+type mouseInput struct {
+	Dx          int32
+	Dy          int32
+	MouseData   uint32
+	DwFlags     uint32
+	Time        uint32
+	DwExtraInfo uintptr
+}
+
+type keybdInput struct {
+	WVk         uint16
+	WScan       uint16
+	DwFlags     uint32
+	Time        uint32
+	DwExtraInfo uintptr
+	_           [8]byte
+}
+
+type inputUnion [32]byte
+
+type winInput struct {
+	Type uint32
+	_    [4]byte
+	Data inputUnion
+}
+
+func sendMouseInput(flags uint32, dx, dy int32, mouseData uint32) {
+	mi := mouseInput{
+		Dx:        dx,
+		Dy:        dy,
+		MouseData: mouseData,
+		DwFlags:   flags,
+	}
+	inp := winInput{Type: inputMouse}
+	copy(inp.Data[:], (*[unsafe.Sizeof(mi)]byte)(unsafe.Pointer(&mi))[:])
+	r, _, err := procSendInput.Call(1, uintptr(unsafe.Pointer(&inp)), unsafe.Sizeof(inp))
+	if r == 0 {
+		log.Tracef("SendInput(mouse flags=0x%x): %v", flags, err)
+	}
+}
+
+func sendKeyInput(vk uint16, scanCode uint16, flags uint32) {
+	ki := keybdInput{
+		WVk:     vk,
+		WScan:   scanCode,
+		DwFlags: flags,
+	}
+	inp := winInput{Type: inputKeyboard}
+	copy(inp.Data[:], (*[unsafe.Sizeof(ki)]byte)(unsafe.Pointer(&ki))[:])
+	r, _, err := procSendInput.Call(1, uintptr(unsafe.Pointer(&inp)), unsafe.Sizeof(inp))
+	if r == 0 {
+		log.Tracef("SendInput(key vk=0x%x): %v", vk, err)
+	}
+}
+
+const sasEventName = `Global\NetBirdVNC_SAS`
+
+type inputCmd struct {
+	isKey       bool
+	isClipboard bool
+	isType      bool
+	keysym      uint32
+	down        bool
+	buttonMask  uint8
+	x, y        int
+	serverW     int
+	serverH     int
+	clipText    string
+}
+
+// WindowsInputInjector delivers input events from a dedicated OS thread that
+// calls switchToInputDesktop before each injection. SendInput targets the
+// calling thread's desktop, so the injection thread must be on the same
+// desktop the user sees.
+type WindowsInputInjector struct {
+	ch             chan inputCmd
+	closed         chan struct{}
+	closeOnce      sync.Once
+	prevButtonMask uint8
+	ctrlDown       bool
+	altDown        bool
+}
+
+// NewWindowsInputInjector creates a desktop-aware input injector.
+func NewWindowsInputInjector() *WindowsInputInjector {
+	w := &WindowsInputInjector{
+		ch:     make(chan inputCmd, 64),
+		closed: make(chan struct{}),
+	}
+	go w.loop()
+	return w
+}
+
+// Close stops the injector loop. Safe to call multiple times. Subsequent
+// Inject*/SetClipboard/TypeText calls become no-ops; we use a separate
+// signal channel rather than closing ch so late senders can't panic.
+func (w *WindowsInputInjector) Close() {
+	w.closeOnce.Do(func() {
+		close(w.closed)
+	})
+}
+
+// tryEnqueue posts a command unless the injector is closed or the channel is
+// full. Non-blocking so callers (RFB read loop) never stall.
+func (w *WindowsInputInjector) tryEnqueue(cmd inputCmd) {
+	select {
+	case <-w.closed:
+		return
+	default:
+	}
+	select {
+	case w.ch <- cmd:
+	default:
+	}
+}
+
+func (w *WindowsInputInjector) loop() {
+	runtime.LockOSThread()
+
+	for {
+		select {
+		case <-w.closed:
+			return
+		case cmd := <-w.ch:
+			w.dispatch(cmd)
+		}
+	}
+}
+
+func (w *WindowsInputInjector) dispatch(cmd inputCmd) {
+	// Switch to the current input desktop so SendInput and the clipboard
+	// API target the desktop the user sees. The returned name tells us
+	// whether we are on the secure Winlogon desktop.
+	_, _ = switchToInputDesktop()
+
+	switch {
+	case cmd.isClipboard:
+		w.doSetClipboard(cmd.clipText)
+	case cmd.isType:
+		w.typeUnicodeText(cmd.clipText)
+	case cmd.isKey:
+		w.doInjectKey(cmd.keysym, cmd.down)
+	default:
+		w.doInjectPointer(cmd.buttonMask, cmd.x, cmd.y, cmd.serverW, cmd.serverH)
+	}
+}
+
+// InjectKey queues a key event for injection on the input desktop thread.
+func (w *WindowsInputInjector) InjectKey(keysym uint32, down bool) {
+	w.tryEnqueue(inputCmd{isKey: true, keysym: keysym, down: down})
+}
+
+// InjectPointer queues a pointer event for injection on the input desktop
+// thread. Pointer events coalesce: when the channel is full (slow desktop
+// switch, hung SendInput), drop the new sample so the read loop never
+// blocks. The next mouse event carries fresher position anyway.
+func (w *WindowsInputInjector) InjectPointer(buttonMask uint8, x, y, serverW, serverH int) {
+	w.tryEnqueue(inputCmd{buttonMask: buttonMask, x: x, y: y, serverW: serverW, serverH: serverH})
+}
+
+func (w *WindowsInputInjector) doInjectKey(keysym uint32, down bool) {
+	switch keysym {
+	case 0xffe3, 0xffe4:
+		w.ctrlDown = down
+	case 0xffe9, 0xffea:
+		w.altDown = down
+	}
+
+	if (keysym == 0xff9f || keysym == 0xffff) && w.ctrlDown && w.altDown && down {
+		signalSAS()
+		return
+	}
+
+	vk, _, extended := keysym2VK(keysym)
+	if vk == 0 {
+		return
+	}
+	var flags uint32
+	if !down {
+		flags |= keyeventfKeyUp
+	}
+	if extended {
+		flags |= keyeventfScanCode
+	}
+	sendKeyInput(vk, 0, flags)
+}
+
+// signalSAS signals the SAS named event. A listener in Session 0
+// (startSASListener) calls SendSAS to trigger the Secure Attention Sequence.
+func signalSAS() {
+	namePtr, err := windows.UTF16PtrFromString(sasEventName)
+	if err != nil {
+		log.Warnf("SAS UTF16: %v", err)
+		return
+	}
+	h, _, lerr := procOpenEventW.Call(
+		uintptr(eventModifyState),
+		0,
+		uintptr(unsafe.Pointer(namePtr)),
+	)
+	if h == 0 {
+		log.Warnf("OpenEvent(%s): %v", sasEventName, lerr)
+		return
+	}
+	ev := windows.Handle(h)
+	defer windows.CloseHandle(ev)
+	if err := windows.SetEvent(ev); err != nil {
+		log.Warnf("SetEvent SAS: %v", err)
+	} else {
+		log.Info("SAS event signaled")
+	}
+}
+
+func (w *WindowsInputInjector) doInjectPointer(buttonMask uint8, x, y, serverW, serverH int) {
+	if serverW == 0 || serverH == 0 {
+		return
+	}
+
+	absX := int32(x * 65535 / serverW)
+	absY := int32(y * 65535 / serverH)
+
+	sendMouseInput(mouseeventfMove|mouseeventfAbsolute, absX, absY, 0)
+
+	changed := buttonMask ^ w.prevButtonMask
+	w.prevButtonMask = buttonMask
+
+	type btnMap struct {
+		bit  uint8
+		down uint32
+		up   uint32
+	}
+	buttons := [...]btnMap{
+		{0x01, mouseeventfLeftDown, mouseeventfLeftUp},
+		{0x02, mouseeventfMiddleDown, mouseeventfMiddleUp},
+		{0x04, mouseeventfRightDown, mouseeventfRightUp},
+	}
+	for _, b := range buttons {
+		if changed&b.bit == 0 {
+			continue
+		}
+		var flags uint32
+		if buttonMask&b.bit != 0 {
+			flags = b.down
+		} else {
+			flags = b.up
+		}
+		sendMouseInput(flags|mouseeventfAbsolute, absX, absY, 0)
+	}
+
+	negWheelDelta := ^uint32(wheelDelta - 1)
+	if changed&0x08 != 0 && buttonMask&0x08 != 0 {
+		sendMouseInput(mouseeventfWheel|mouseeventfAbsolute, absX, absY, wheelDelta)
+	}
+	if changed&0x10 != 0 && buttonMask&0x10 != 0 {
+		sendMouseInput(mouseeventfWheel|mouseeventfAbsolute, absX, absY, negWheelDelta)
+	}
+}
+
+// keysym2VK converts an X11 keysym to a Windows virtual key code.
+func keysym2VK(keysym uint32) (vk uint16, scan uint16, extended bool) {
+	if keysym >= 0x20 && keysym <= 0x7e {
+		r, _, _ := procVkKeyScanA.Call(uintptr(keysym))
+		vk = uint16(r & 0xff)
+		return
+	}
+
+	if keysym >= 0xffbe && keysym <= 0xffc9 {
+		vk = uint16(0x70 + keysym - 0xffbe)
+		return
+	}
+
+	switch keysym {
+	case 0xff08:
+		vk = 0x08 // Backspace
+	case 0xff09:
+		vk = 0x09 // Tab
+	case 0xff0d:
+		vk = 0x0d // Return
+	case 0xff1b:
+		vk = 0x1b // Escape
+	case 0xff63:
+		vk, extended = 0x2d, true // Insert
+	case 0xff9f, 0xffff:
+		vk, extended = 0x2e, true // Delete
+	case 0xff50:
+		vk, extended = 0x24, true // Home
+	case 0xff57:
+		vk, extended = 0x23, true // End
+	case 0xff55:
+		vk, extended = 0x21, true // PageUp
+	case 0xff56:
+		vk, extended = 0x22, true // PageDown
+	case 0xff51:
+		vk, extended = 0x25, true // Left
+	case 0xff52:
+		vk, extended = 0x26, true // Up
+	case 0xff53:
+		vk, extended = 0x27, true // Right
+	case 0xff54:
+		vk, extended = 0x28, true // Down
+	case 0xffe1, 0xffe2:
+		vk = 0x10 // Shift
+	case 0xffe3, 0xffe4:
+		vk = 0x11 // Control
+	case 0xffe9, 0xffea:
+		vk = 0x12 // Alt
+	case 0xffe5:
+		vk = 0x14 // CapsLock
+	case 0xffe7, 0xffeb:
+		vk, extended = 0x5B, true // Meta_L / Super_L -> Left Windows
+	case 0xffe8, 0xffec:
+		vk, extended = 0x5C, true // Meta_R / Super_R -> Right Windows
+	case 0xff61:
+		vk = 0x2c // PrintScreen
+	case 0xff13:
+		vk = 0x13 // Pause
+	case 0xff14:
+		vk = 0x91 // ScrollLock
+	}
+	return
+}
+
+var (
+	procOpenClipboard              = user32.NewProc("OpenClipboard")
+	procCloseClipboard             = user32.NewProc("CloseClipboard")
+	procEmptyClipboard             = user32.NewProc("EmptyClipboard")
+	procSetClipboardData           = user32.NewProc("SetClipboardData")
+	procGetClipboardData           = user32.NewProc("GetClipboardData")
+	procIsClipboardFormatAvailable = user32.NewProc("IsClipboardFormatAvailable")
+
+	procGlobalAlloc  = kernel32.NewProc("GlobalAlloc")
+	procGlobalLock   = kernel32.NewProc("GlobalLock")
+	procGlobalUnlock = kernel32.NewProc("GlobalUnlock")
+)
+
+const (
+	cfUnicodeText = 13
+	gmemMoveable  = 0x0002
+)
+
+// SetClipboard queues a request to update the Windows clipboard with the
+// given UTF-8 text. The work runs on the input thread so it follows the
+// current input desktop. Secure desktops (Winlogon, UAC) have isolated
+// clipboards we cannot reach, so the call is a no-op there; use TypeText
+// to enter text into a secure desktop instead.
+func (w *WindowsInputInjector) SetClipboard(text string) {
+	w.tryEnqueue(inputCmd{isClipboard: true, clipText: text})
+}
+
+// TypeText queues a request to synthesize the given text as Unicode
+// keystrokes on the current input desktop. Targets the secure desktop
+// when the user is on Winlogon/UAC, where the clipboard is unreachable.
+func (w *WindowsInputInjector) TypeText(text string) {
+	w.tryEnqueue(inputCmd{isType: true, clipText: text})
+}
+
+func (w *WindowsInputInjector) doSetClipboard(text string) {
+	utf16, err := windows.UTF16FromString(text)
+	if err != nil {
+		log.Tracef("clipboard UTF16 encode: %v", err)
+		return
+	}
+
+	size := uintptr(len(utf16) * 2)
+	hMem, _, _ := procGlobalAlloc.Call(gmemMoveable, size)
+	if hMem == 0 {
+		log.Tracef("GlobalAlloc for clipboard: allocation returned nil")
+		return
+	}
+
+	ptr, _, _ := procGlobalLock.Call(hMem)
+	if ptr == 0 {
+		log.Tracef("GlobalLock for clipboard: lock returned nil")
+		return
+	}
+	copy(unsafe.Slice((*uint16)(unsafe.Pointer(ptr)), len(utf16)), utf16)
+	_, _, _ = procGlobalUnlock.Call(hMem)
+
+	r, _, lerr := procOpenClipboard.Call(0)
+	if r == 0 {
+		log.Tracef("OpenClipboard: %v", lerr)
+		return
+	}
+	defer logCleanupCall("CloseClipboard", procCloseClipboard)
+
+	_, _, _ = procEmptyClipboard.Call()
+	r, _, lerr = procSetClipboardData.Call(cfUnicodeText, hMem)
+	if r == 0 {
+		log.Tracef("SetClipboardData: %v", lerr)
+	}
+}
+
+// typeUnicodeText synthesizes the given text as Unicode keystrokes via
+// SendInput+KEYEVENTF_UNICODE. Used on the Winlogon secure desktop where the
+// clipboard is isolated: this lets a VNC client paste a password into the
+// login or credential prompt by sending ClientCutText.
+func (w *WindowsInputInjector) typeUnicodeText(text string) {
+	utf16, err := windows.UTF16FromString(text)
+	if err != nil {
+		log.Tracef("clipboard UTF16 encode: %v", err)
+		return
+	}
+	if len(utf16) > 0 && utf16[len(utf16)-1] == 0 {
+		utf16 = utf16[:len(utf16)-1]
+	}
+	if len(utf16) > maxTypedClipboardChars {
+		log.Warnf("clipboard paste on Winlogon truncated to %d chars", maxTypedClipboardChars)
+		utf16 = utf16[:maxTypedClipboardChars]
+	}
+	for _, c := range utf16 {
+		sendKeyInput(0, c, keyeventfUnicode)
+		sendKeyInput(0, c, keyeventfUnicode|keyeventfKeyUp)
+	}
+}
+
+// GetClipboard reads the Windows clipboard as UTF-8 text.
+func (w *WindowsInputInjector) GetClipboard() string {
+	r, _, _ := procIsClipboardFormatAvailable.Call(cfUnicodeText)
+	if r == 0 {
+		return ""
+	}
+
+	r, _, lerr := procOpenClipboard.Call(0)
+	if r == 0 {
+		log.Tracef("OpenClipboard for read: %v", lerr)
+		return ""
+	}
+	defer logCleanupCall("CloseClipboard", procCloseClipboard)
+
+	hData, _, _ := procGetClipboardData.Call(cfUnicodeText)
+	if hData == 0 {
+		return ""
+	}
+
+	ptr, _, _ := procGlobalLock.Call(hData)
+	if ptr == 0 {
+		return ""
+	}
+	defer logCleanupCallArgs("GlobalUnlock", procGlobalUnlock, hData)
+
+	return windows.UTF16PtrToString((*uint16)(unsafe.Pointer(ptr)))
+}
+
+var _ InputInjector = (*WindowsInputInjector)(nil)
+
+var _ ScreenCapturer = (*DesktopCapturer)(nil)
--- a/client/vnc/server/input_x11.go
+++ b/client/vnc/server/input_x11.go
@@ -0,0 +1,283 @@
+//go:build (linux && !android) || freebsd
+
+package server
+
+import (
+	"fmt"
+	"os"
+	"os/exec"
+	"strings"
+
+	log "github.com/sirupsen/logrus"
+
+	"github.com/jezek/xgb"
+	"github.com/jezek/xgb/xproto"
+	"github.com/jezek/xgb/xtest"
+)
+
+// X11InputInjector injects keyboard and mouse events via the XTest extension.
+type X11InputInjector struct {
+	conn              *xgb.Conn
+	root              xproto.Window
+	screen            *xproto.ScreenInfo
+	display           string
+	keysymMap         map[uint32]byte
+	lastButtons       uint8
+	clipboardTool     string
+	clipboardToolName string
+}
+
+// NewX11InputInjector connects to the X11 display and initializes XTest.
+func NewX11InputInjector(display string) (*X11InputInjector, error) {
+	detectX11Display()
+
+	if display == "" {
+		display = os.Getenv("DISPLAY")
+	}
+	if display == "" {
+		return nil, fmt.Errorf("DISPLAY not set and no Xorg process found")
+	}
+
+	conn, err := xgb.NewConnDisplay(display)
+	if err != nil {
+		return nil, fmt.Errorf("connect to X11 display %s: %w", display, err)
+	}
+
+	if err := xtest.Init(conn); err != nil {
+		conn.Close()
+		return nil, fmt.Errorf("init XTest extension: %w", err)
+	}
+
+	setup := xproto.Setup(conn)
+	if len(setup.Roots) == 0 {
+		conn.Close()
+		return nil, fmt.Errorf("no X11 screens")
+	}
+	screen := setup.Roots[0]
+
+	inj := &X11InputInjector{
+		conn:    conn,
+		root:    screen.Root,
+		screen:  &screen,
+		display: display,
+	}
+	inj.cacheKeyboardMapping()
+	inj.resolveClipboardTool()
+
+	log.Infof("X11 input injector ready (display=%s)", display)
+	return inj, nil
+}
+
+// InjectKey simulates a key press or release. keysym is an X11 KeySym.
+func (x *X11InputInjector) InjectKey(keysym uint32, down bool) {
+	keycode := x.keysymToKeycode(keysym)
+	if keycode == 0 {
+		return
+	}
+
+	var eventType byte
+	if down {
+		eventType = xproto.KeyPress
+	} else {
+		eventType = xproto.KeyRelease
+	}
+
+	xtest.FakeInput(x.conn, eventType, keycode, 0, x.root, 0, 0, 0)
+}
+
+// InjectPointer simulates mouse movement and button events.
+func (x *X11InputInjector) InjectPointer(buttonMask uint8, px, py, serverW, serverH int) {
+	if serverW == 0 || serverH == 0 {
+		return
+	}
+
+	// Scale to actual screen coordinates.
+	screenW := int(x.screen.WidthInPixels)
+	screenH := int(x.screen.HeightInPixels)
+	absX := px * screenW / serverW
+	absY := py * screenH / serverH
+
+	// Move pointer.
+	xtest.FakeInput(x.conn, xproto.MotionNotify, 0, 0, x.root, int16(absX), int16(absY), 0)
+
+	// Handle button events. RFB button mask: bit0=left, bit1=middle, bit2=right,
+	// bit3=scrollUp, bit4=scrollDown. X11 buttons: 1=left, 2=middle, 3=right,
+	// 4=scrollUp, 5=scrollDown.
+	type btnMap struct {
+		rfbBit uint8
+		x11Btn byte
+	}
+	buttons := [...]btnMap{
+		{0x01, 1}, // left
+		{0x02, 2}, // middle
+		{0x04, 3}, // right
+		{0x08, 4}, // scroll up
+		{0x10, 5}, // scroll down
+	}
+
+	for _, b := range buttons {
+		pressed := buttonMask&b.rfbBit != 0
+		wasPressed := x.lastButtons&b.rfbBit != 0
+		if b.x11Btn >= 4 {
+			// Scroll: send press+release on each scroll event.
+			if pressed {
+				xtest.FakeInput(x.conn, xproto.ButtonPress, b.x11Btn, 0, x.root, 0, 0, 0)
+				xtest.FakeInput(x.conn, xproto.ButtonRelease, b.x11Btn, 0, x.root, 0, 0, 0)
+			}
+		} else {
+			if pressed && !wasPressed {
+				xtest.FakeInput(x.conn, xproto.ButtonPress, b.x11Btn, 0, x.root, 0, 0, 0)
+			} else if !pressed && wasPressed {
+				xtest.FakeInput(x.conn, xproto.ButtonRelease, b.x11Btn, 0, x.root, 0, 0, 0)
+			}
+		}
+	}
+	x.lastButtons = buttonMask
+}
+
+// cacheKeyboardMapping fetches the X11 keyboard mapping once and stores it
+// as a keysym-to-keycode map, avoiding a round-trip per keystroke.
+func (x *X11InputInjector) cacheKeyboardMapping() {
+	setup := xproto.Setup(x.conn)
+	minKeycode := setup.MinKeycode
+	maxKeycode := setup.MaxKeycode
+
+	reply, err := xproto.GetKeyboardMapping(x.conn, minKeycode,
+		byte(maxKeycode-minKeycode+1)).Reply()
+	if err != nil {
+		log.Debugf("cache keyboard mapping: %v", err)
+		x.keysymMap = make(map[uint32]byte)
+		return
+	}
+
+	m := make(map[uint32]byte, int(maxKeycode-minKeycode+1)*int(reply.KeysymsPerKeycode))
+	keysymsPerKeycode := int(reply.KeysymsPerKeycode)
+	for i := int(minKeycode); i <= int(maxKeycode); i++ {
+		offset := (i - int(minKeycode)) * keysymsPerKeycode
+		for j := 0; j < keysymsPerKeycode; j++ {
+			ks := uint32(reply.Keysyms[offset+j])
+			if ks != 0 {
+				if _, exists := m[ks]; !exists {
+					m[ks] = byte(i)
+				}
+			}
+		}
+	}
+	x.keysymMap = m
+}
+
+// keysymToKeycode looks up a cached keysym-to-keycode mapping.
+// Returns 0 if the keysym is not mapped.
+func (x *X11InputInjector) keysymToKeycode(keysym uint32) byte {
+	return x.keysymMap[keysym]
+}
+
+// SetClipboard sets the X11 clipboard using xclip or xsel.
+func (x *X11InputInjector) SetClipboard(text string) {
+	if x.clipboardTool == "" {
+		return
+	}
+
+	var cmd *exec.Cmd
+	if x.clipboardToolName == "xclip" {
+		cmd = exec.Command(x.clipboardTool, "-selection", "clipboard")
+	} else {
+		cmd = exec.Command(x.clipboardTool, "--clipboard", "--input")
+	}
+	cmd.Env = x.clipboardEnv()
+	cmd.Stdin = strings.NewReader(text)
+	if err := cmd.Run(); err != nil {
+		log.Debugf("set clipboard via %s: %v", x.clipboardToolName, err)
+	}
+}
+
+// TypeText synthesizes the given text as keystrokes via XTest. We can
+// no longer just stuff the host clipboard with xclip and expect Ctrl+V
+// to do the rest, because the Paste button is also used at places where
+// the focused application isn't a clipboard-aware one (e.g. a TTY login
+// in an X11 session, an SDDM/GDM password field that ignores XSelection,
+// or a kiosk app). Typing keystrokes covers all of those.
+//
+// Limitation: only ASCII printable characters are typed. Non-ASCII runes
+// are skipped: a paste workflow for them needs Wayland-aware text input
+// or layout introspection that we don't have.
+func (x *X11InputInjector) TypeText(text string) {
+	const maxChars = 4096
+	count := 0
+	for _, r := range text {
+		if count >= maxChars {
+			break
+		}
+		count++
+		keysym, shift, ok := keysymForASCIIRune(r)
+		if !ok {
+			continue
+		}
+		keycode := x.keysymToKeycode(keysym)
+		if keycode == 0 {
+			continue
+		}
+		var shiftCode byte
+		if shift {
+			shiftCode = x.keysymToKeycode(0xffe1) // Shift_L
+			if shiftCode != 0 {
+				xtest.FakeInput(x.conn, xproto.KeyPress, shiftCode, 0, x.root, 0, 0, 0)
+			}
+		}
+		xtest.FakeInput(x.conn, xproto.KeyPress, keycode, 0, x.root, 0, 0, 0)
+		xtest.FakeInput(x.conn, xproto.KeyRelease, keycode, 0, x.root, 0, 0, 0)
+		if shift && shiftCode != 0 {
+			xtest.FakeInput(x.conn, xproto.KeyRelease, shiftCode, 0, x.root, 0, 0, 0)
+		}
+	}
+}
+
+func (x *X11InputInjector) resolveClipboardTool() {
+	for _, name := range []string{"xclip", "xsel"} {
+		path, err := exec.LookPath(name)
+		if err == nil {
+			x.clipboardTool = path
+			x.clipboardToolName = name
+			log.Debugf("clipboard tool resolved to %s", path)
+			return
+		}
+	}
+	log.Debugf("no clipboard tool (xclip/xsel) found, clipboard sync disabled")
+}
+
+// GetClipboard reads the X11 clipboard using xclip or xsel.
+func (x *X11InputInjector) GetClipboard() string {
+	if x.clipboardTool == "" {
+		return ""
+	}
+
+	var cmd *exec.Cmd
+	if x.clipboardToolName == "xclip" {
+		cmd = exec.Command(x.clipboardTool, "-selection", "clipboard", "-o")
+	} else {
+		cmd = exec.Command(x.clipboardTool, "--clipboard", "--output")
+	}
+	cmd.Env = x.clipboardEnv()
+	out, err := cmd.Output()
+	if err != nil {
+		log.Tracef("get clipboard via %s: %v", x.clipboardToolName, err)
+		return ""
+	}
+	return string(out)
+}
+
+func (x *X11InputInjector) clipboardEnv() []string {
+	env := []string{"DISPLAY=" + x.display}
+	if auth := os.Getenv("XAUTHORITY"); auth != "" {
+		env = append(env, "XAUTHORITY="+auth)
+	}
+	return env
+}
+
+// Close releases X11 resources.
+func (x *X11InputInjector) Close() {
+	x.conn.Close()
+}
+
+var _ InputInjector = (*X11InputInjector)(nil)
+var _ ScreenCapturer = (*X11Poller)(nil)
--- a/client/vnc/server/keysym_typetext.go
+++ b/client/vnc/server/keysym_typetext.go
@@ -0,0 +1,71 @@
+package server
+
+// keysymForASCIIRune maps an ASCII rune to (X11 keysym for the unshifted
+// version, needsShift). Used by TypeText implementations on each platform
+// so the caller can explicitly press Shift instead of relying on the
+// server-side modifier state. Returns ok=false for runes outside the
+// supported set; non-ASCII text is dropped by TypeText.
+func keysymForASCIIRune(r rune) (uint32, bool, bool) {
+	if r >= 'a' && r <= 'z' {
+		return uint32(r), false, true
+	}
+	if r >= 'A' && r <= 'Z' {
+		return uint32(r - 'A' + 'a'), true, true
+	}
+	if r >= '0' && r <= '9' {
+		return uint32(r), false, true
+	}
+	switch r {
+	case ' ':
+		return 0x20, false, true
+	case '\n', '\r':
+		return 0xff0d, false, true // Return
+	case '\t':
+		return 0xff09, false, true // Tab
+	case '-', '=', '[', ']', '\\', ';', '\'', '`', ',', '.', '/':
+		return uint32(r), false, true
+	case '!':
+		return '1', true, true
+	case '@':
+		return '2', true, true
+	case '#':
+		return '3', true, true
+	case '$':
+		return '4', true, true
+	case '%':
+		return '5', true, true
+	case '^':
+		return '6', true, true
+	case '&':
+		return '7', true, true
+	case '*':
+		return '8', true, true
+	case '(':
+		return '9', true, true
+	case ')':
+		return '0', true, true
+	case '_':
+		return '-', true, true
+	case '+':
+		return '=', true, true
+	case '{':
+		return '[', true, true
+	case '}':
+		return ']', true, true
+	case '|':
+		return '\\', true, true
+	case ':':
+		return ';', true, true
+	case '"':
+		return '\'', true, true
+	case '~':
+		return '`', true, true
+	case '<':
+		return ',', true, true
+	case '>':
+		return '.', true, true
+	case '?':
+		return '/', true, true
+	}
+	return 0, false, false
+}
--- a/client/vnc/server/rfb.go
+++ b/client/vnc/server/rfb.go
@@ -0,0 +1,905 @@
+package server
+
+import (
+	"bytes"
+	"compress/zlib"
+	"crypto/des" //nolint:gosec // RFB protocol-defined DES challenge/response; not used for confidentiality
+	"encoding/binary"
+	"fmt"
+	"image"
+	"image/jpeg"
+	"unsafe"
+
+	log "github.com/sirupsen/logrus"
+)
+
+// rect describes a rectangle on the framebuffer in pixels.
+type rect struct {
+	x, y, w, h int
+}
+
+const (
+	rfbProtocolVersion = "RFB 003.008\n"
+
+	secNone    = 1
+	secVNCAuth = 2
+
+	// Client message types.
+	clientSetPixelFormat           = 0
+	clientSetEncodings             = 2
+	clientFramebufferUpdateRequest = 3
+	clientKeyEvent                 = 4
+	clientPointerEvent             = 5
+	clientCutText                  = 6
+
+	// clientNetbirdTypeText is a NetBird-specific message that asks the
+	// server to synthesize the given text as keystrokes regardless of the
+	// active desktop. Used by the dashboard's Paste button to push host
+	// clipboard content into a Windows secure desktop (Winlogon, UAC),
+	// where the OS clipboard is isolated. Format mirrors clientCutText:
+	// 1-byte message type + 3-byte padding + 4-byte length + text bytes.
+	// The opcode is in the vendor-specific range (>=128).
+	clientNetbirdTypeText = 250
+
+	// Server message types.
+	serverFramebufferUpdate = 0
+	serverCutText           = 3
+
+	// Encoding types.
+	encRaw     = 0
+	encHextile = 5
+	encZlib    = 6
+	encTight   = 7
+
+	// Tight compression-control byte top nibble. Stream-reset bits 0-3
+	// (one per zlib stream) are unused while we run a single stream.
+	tightFillSubenc  = 0x80
+	tightJPEGSubenc  = 0x90
+	tightBasicFilter = 0x40 // Bit 6 set = explicit filter byte follows.
+	tightFilterCopy  = 0x00 // No-op filter, raw pixel stream.
+
+	// JPEG quality used by the Tight encoder. 70 is a reasonable speed/
+	// quality knee; bandwidth roughly halves vs raw RGB while staying
+	// visually clean for typical desktop content. Large rects (e.g. a
+	// fullscreen video region) drop to a lower quality so the encoder
+	// keeps up at 30+ fps; the visual hit is small for moving content.
+	tightJPEGQuality       = 70
+	tightJPEGQualityMedium = 55
+	tightJPEGQualityLarge  = 40
+	tightJPEGMediumPixels  = 800 * 600  // ≈ SVGA, applies medium tier
+	tightJPEGLargePixels   = 1280 * 720 // ≈ 720p, applies large tier
+	// Minimum rect area before we consider JPEG. Below this, header
+	// overhead dominates and Basic+zlib wins.
+	tightJPEGMinArea = 4096 // 64×64 ≈ 1 tile
+	// Distinct-colour cap below which we still prefer Basic+zlib (text,
+	// UI). Sampled, not exhaustive: cheap to compute, good enough.
+	tightJPEGMinColors = 64
+
+	// Hextile subencoding flags (a bitmask in the first byte of each sub-tile).
+	hextileRaw                 = 0x01
+	hextileBackgroundSpecified = 0x02
+	hextileForegroundSpecified = 0x04
+	hextileAnySubrects         = 0x08
+	hextileSubrectsColoured    = 0x10
+
+	// Hextile sub-tile size per RFB spec.
+	hextileSubSize = 16
+)
+
+// serverPixelFormat is the default pixel format advertised by the server:
+// 32bpp RGBA, big-endian, true-colour, 8 bits per channel.
+var serverPixelFormat = [16]byte{
+	32,     // bits-per-pixel
+	24,     // depth
+	1,      // big-endian-flag
+	1,      // true-colour-flag
+	0, 255, // red-max
+	0, 255, // green-max
+	0, 255, // blue-max
+	16,      // red-shift
+	8,       // green-shift
+	0,       // blue-shift
+	0, 0, 0, // padding
+}
+
+// clientPixelFormat holds the negotiated pixel format from the client.
+type clientPixelFormat struct {
+	bpp       uint8
+	bigEndian uint8
+	rMax      uint16
+	gMax      uint16
+	bMax      uint16
+	rShift    uint8
+	gShift    uint8
+	bShift    uint8
+}
+
+func defaultClientPixelFormat() clientPixelFormat {
+	return clientPixelFormat{
+		bpp:       serverPixelFormat[0],
+		bigEndian: serverPixelFormat[2],
+		rMax:      binary.BigEndian.Uint16(serverPixelFormat[4:6]),
+		gMax:      binary.BigEndian.Uint16(serverPixelFormat[6:8]),
+		bMax:      binary.BigEndian.Uint16(serverPixelFormat[8:10]),
+		rShift:    serverPixelFormat[10],
+		gShift:    serverPixelFormat[11],
+		bShift:    serverPixelFormat[12],
+	}
+}
+
+func parsePixelFormat(pf []byte) clientPixelFormat {
+	return clientPixelFormat{
+		bpp:       pf[0],
+		bigEndian: pf[2],
+		rMax:      binary.BigEndian.Uint16(pf[4:6]),
+		gMax:      binary.BigEndian.Uint16(pf[6:8]),
+		bMax:      binary.BigEndian.Uint16(pf[8:10]),
+		rShift:    pf[10],
+		gShift:    pf[11],
+		bShift:    pf[12],
+	}
+}
+
+// encodeRawRect encodes a framebuffer region as a raw RFB rectangle.
+// The returned buffer includes the FramebufferUpdate header (1 rectangle).
+func encodeRawRect(img *image.RGBA, pf clientPixelFormat, x, y, w, h int) []byte {
+	bytesPerPixel := max(int(pf.bpp)/8, 1)
+
+	pixelBytes := w * h * bytesPerPixel
+	buf := make([]byte, 4+12+pixelBytes)
+
+	// FramebufferUpdate header.
+	buf[0] = serverFramebufferUpdate
+	buf[1] = 0 // padding
+	binary.BigEndian.PutUint16(buf[2:4], 1)
+
+	// Rectangle header.
+	binary.BigEndian.PutUint16(buf[4:6], uint16(x))
+	binary.BigEndian.PutUint16(buf[6:8], uint16(y))
+	binary.BigEndian.PutUint16(buf[8:10], uint16(w))
+	binary.BigEndian.PutUint16(buf[10:12], uint16(h))
+	binary.BigEndian.PutUint32(buf[12:16], uint32(encRaw))
+
+	writePixels(buf[16:], img, pf, rect{x, y, w, h}, bytesPerPixel)
+	return buf
+}
+
+// writePixels writes a rectangle of img into dst in the client's requested
+// pixel format. It fast-paths the common case (32bpp, full 8-bit channels)
+// with a tight loop that skips the per-channel *max/255 arithmetic and emits
+// a single uint32 per pixel; the general path handles arbitrary formats.
+func writePixels(dst []byte, img *image.RGBA, pf clientPixelFormat, r rect, bytesPerPixel int) {
+	if bytesPerPixel == 4 && pf.rMax == 255 && pf.gMax == 255 && pf.bMax == 255 {
+		writePixelsFast32(dst, img, pf, r)
+		return
+	}
+	writePixelsGeneric(dst, img, pf, r, bytesPerPixel)
+}
+
+func writePixelsFast32(dst []byte, img *image.RGBA, pf clientPixelFormat, r rect) {
+	stride := img.Stride
+	rShift, gShift, bShift := pf.rShift, pf.gShift, pf.bShift
+	bigEndian := pf.bigEndian != 0
+	off := 0
+	for row := r.y; row < r.y+r.h; row++ {
+		p := row*stride + r.x*4
+		for col := 0; col < r.w; col++ {
+			pixel := (uint32(img.Pix[p]) << rShift) |
+				(uint32(img.Pix[p+1]) << gShift) |
+				(uint32(img.Pix[p+2]) << bShift)
+			if bigEndian {
+				binary.BigEndian.PutUint32(dst[off:off+4], pixel)
+			} else {
+				binary.LittleEndian.PutUint32(dst[off:off+4], pixel)
+			}
+			p += 4
+			off += 4
+		}
+	}
+}
+
+func writePixelsGeneric(dst []byte, img *image.RGBA, pf clientPixelFormat, r rect, bytesPerPixel int) {
+	stride := img.Stride
+	off := 0
+	for row := r.y; row < r.y+r.h; row++ {
+		for col := r.x; col < r.x+r.w; col++ {
+			p := row*stride + col*4
+			rv := uint32(img.Pix[p]) * uint32(pf.rMax) / 255
+			gv := uint32(img.Pix[p+1]) * uint32(pf.gMax) / 255
+			bv := uint32(img.Pix[p+2]) * uint32(pf.bMax) / 255
+			pixel := (rv << pf.rShift) | (gv << pf.gShift) | (bv << pf.bShift)
+			emitPixelBytes(dst[off:off+bytesPerPixel], pixel, bytesPerPixel, pf.bigEndian != 0)
+			off += bytesPerPixel
+		}
+	}
+}
+
+func emitPixelBytes(dst []byte, pixel uint32, bytesPerPixel int, bigEndian bool) {
+	if bigEndian {
+		for i := range bytesPerPixel {
+			dst[i] = byte(pixel >> uint((bytesPerPixel-1-i)*8))
+		}
+		return
+	}
+	for i := range bytesPerPixel {
+		dst[i] = byte(pixel >> uint(i*8))
+	}
+}
+
+// vncAuthEncrypt encrypts a 16-byte challenge using the VNC DES scheme.
+func vncAuthEncrypt(challenge []byte, password string) ([]byte, error) {
+	key := make([]byte, 8)
+	pw := []byte(password)
+	n := len(pw)
+	if n > 8 {
+		n = 8
+	}
+	for i := 0; i < n; i++ {
+		key[i] = reverseBits(pw[i])
+	}
+	block, err := des.NewCipher(key) //nolint:gosec // RFB protocol-defined DES challenge/response; not a confidentiality cipher
+	if err != nil {
+		return nil, fmt.Errorf("des.NewCipher: %w", err)
+	}
+	if len(challenge) < 16 { //nolint:gosec // explicit length check disarms G602
+		return nil, fmt.Errorf("vnc auth challenge too short: %d", len(challenge))
+	}
+	out := make([]byte, 16)
+	block.Encrypt(out[:8], challenge[:8])
+	block.Encrypt(out[8:], challenge[8:])
+	return out, nil
+}
+
+func reverseBits(b byte) byte {
+	var r byte
+	for range 8 {
+		r = (r << 1) | (b & 1)
+		b >>= 1
+	}
+	return r
+}
+
+// encodeZlibRect encodes a framebuffer region using Zlib compression.
+// The zlib stream is continuous for the entire VNC session: noVNC creates
+// one inflate context at startup and reuses it for all zlib-encoded rects.
+// We must NOT reset the zlib writer between calls.
+func encodeZlibRect(img *image.RGBA, pf clientPixelFormat, x, y, w, h int, z *zlibState) []byte {
+	bytesPerPixel := max(int(pf.bpp)/8, 1)
+	zw, zbuf := z.w, z.buf
+
+	// Clear the output buffer but keep the deflate dictionary intact.
+	zbuf.Reset()
+
+	// Encode the full rect pixel stream into the session-lived scratch buffer
+	// and feed zlib one row at a time. Row-granular writes amortise the per-
+	// Write overhead that used to dominate this function when it wrote one
+	// byte slice per pixel.
+	rowBytes := w * bytesPerPixel
+	total := rowBytes * h
+	if cap(z.scratch) < total {
+		z.scratch = make([]byte, total)
+	}
+	scratch := z.scratch[:total]
+	writePixels(scratch, img, pf, rect{x, y, w, h}, bytesPerPixel)
+	for row := 0; row < h; row++ {
+		if _, err := zw.Write(scratch[row*rowBytes : (row+1)*rowBytes]); err != nil {
+			log.Debugf("zlib write row %d: %v", row, err)
+			return nil
+		}
+	}
+	if err := zw.Flush(); err != nil {
+		log.Debugf("zlib flush: %v", err)
+		return nil
+	}
+
+	compressed := zbuf.Bytes()
+
+	// Build the FramebufferUpdate message.
+	buf := make([]byte, 4+12+4+len(compressed))
+	buf[0] = serverFramebufferUpdate
+	buf[1] = 0
+	binary.BigEndian.PutUint16(buf[2:4], 1) // 1 rectangle
+
+	binary.BigEndian.PutUint16(buf[4:6], uint16(x))
+	binary.BigEndian.PutUint16(buf[6:8], uint16(y))
+	binary.BigEndian.PutUint16(buf[8:10], uint16(w))
+	binary.BigEndian.PutUint16(buf[10:12], uint16(h))
+	binary.BigEndian.PutUint32(buf[12:16], uint32(encZlib))
+	binary.BigEndian.PutUint32(buf[16:20], uint32(len(compressed)))
+	copy(buf[20:], compressed)
+
+	return buf
+}
+
+// diffRects compares two RGBA images and returns a list of dirty rectangles.
+// Divides the screen into tiles and checks each for changes.
+func diffRects(prev, cur *image.RGBA, w, h, tileSize int) [][4]int {
+	if prev == nil {
+		return [][4]int{{0, 0, w, h}}
+	}
+
+	var rects [][4]int
+	for ty := 0; ty < h; ty += tileSize {
+		th := min(tileSize, h-ty)
+		for tx := 0; tx < w; tx += tileSize {
+			tw := min(tileSize, w-tx)
+			if tileChanged(prev, cur, tx, ty, tw, th) {
+				rects = append(rects, [4]int{tx, ty, tw, th})
+			}
+		}
+	}
+	return coalesceRects(rects)
+}
+
+// coalesceRects merges adjacent dirty tiles into larger rectangles to cut
+// per-rect framing overhead. Input must be tile-ordered (top-to-bottom rows,
+// left-to-right within each row), as produced by diffRects. Two passes:
+//  1. Horizontal: within a row, merge tiles whose x-extents touch.
+//  2. Vertical: merge a row's run with the run directly above it when they
+//     share the same [x, x+w] extent and are vertically adjacent.
+//
+// Larger merged rects still encode correctly: Hextile-solid and Zlib paths
+// both work on arbitrary sizes, and uniform-tile detection still fires when
+// the merged region happens to be a single colour.
+func coalesceRects(in [][4]int) [][4]int {
+	if len(in) < 2 {
+		return in
+	}
+	c := newRectCoalescer(len(in))
+	c.curY = in[0][1]
+	for _, r := range in {
+		c.consume(r)
+	}
+	c.flushCurrentRow()
+	return c.out
+}
+
+// rectCoalescer is the working state for coalesceRects, lifted out so the
+// algorithm can be split across small methods without long parameter lists
+// and to keep each method's cognitive complexity below Sonar's threshold.
+type rectCoalescer struct {
+	out                        [][4]int
+	prevRowStart, prevRowEnd   int
+	curRowStart                int
+	curY                       int
+}
+
+func newRectCoalescer(cap int) *rectCoalescer {
+	return &rectCoalescer{out: make([][4]int, 0, cap)}
+}
+
+// consume processes one rect from the (row-ordered) input.
+func (c *rectCoalescer) consume(r [4]int) {
+	if r[1] != c.curY {
+		c.flushCurrentRow()
+		c.prevRowEnd = len(c.out)
+		c.curRowStart = len(c.out)
+		c.curY = r[1]
+	}
+	if c.tryHorizontalMerge(r) {
+		return
+	}
+	c.out = append(c.out, r)
+}
+
+// tryHorizontalMerge extends the last run in the current row when r is
+// vertically aligned and horizontally adjacent to it.
+func (c *rectCoalescer) tryHorizontalMerge(r [4]int) bool {
+	if len(c.out) <= c.curRowStart {
+		return false
+	}
+	last := &c.out[len(c.out)-1]
+	if last[1] == r[1] && last[3] == r[3] && last[0]+last[2] == r[0] {
+		last[2] += r[2]
+		return true
+	}
+	return false
+}
+
+// flushCurrentRow merges each run in the current row with any run from the
+// previous row that has identical x extent and is vertically adjacent.
+func (c *rectCoalescer) flushCurrentRow() {
+	i := c.curRowStart
+	for i < len(c.out) {
+		if c.mergeWithPrevRow(i) {
+			continue
+		}
+		i++
+	}
+}
+
+// mergeWithPrevRow tries to extend a previous-row run downward to absorb
+// out[i]. Returns true and removes out[i] from the slice on success.
+func (c *rectCoalescer) mergeWithPrevRow(i int) bool {
+	for j := c.prevRowStart; j < c.prevRowEnd; j++ {
+		if c.out[j][0] == c.out[i][0] &&
+			c.out[j][2] == c.out[i][2] &&
+			c.out[j][1]+c.out[j][3] == c.out[i][1] {
+			c.out[j][3] += c.out[i][3]
+			copy(c.out[i:], c.out[i+1:])
+			c.out = c.out[:len(c.out)-1]
+			return true
+		}
+	}
+	return false
+}
+
+func tileChanged(prev, cur *image.RGBA, x, y, w, h int) bool {
+	stride := prev.Stride
+	for row := y; row < y+h; row++ {
+		off := row*stride + x*4
+		end := off + w*4
+		prevRow := prev.Pix[off:end]
+		curRow := cur.Pix[off:end]
+		if !bytes.Equal(prevRow, curRow) {
+			return true
+		}
+	}
+	return false
+}
+
+// tileIsUniform reports whether every pixel in the given rectangle of img is
+// the same RGBA value, and returns that pixel packed as 0xRRGGBBAA when so.
+// Uses uint32 comparisons across rows; returns early on the first mismatch.
+func tileIsUniform(img *image.RGBA, x, y, w, h int) (uint32, bool) {
+	if w <= 0 || h <= 0 {
+		return 0, false
+	}
+	stride := img.Stride
+	base := y*stride + x*4
+	first := *(*uint32)(unsafe.Pointer(&img.Pix[base]))
+	rowBytes := w * 4
+	for row := 0; row < h; row++ {
+		p := base + row*stride
+		for col := 0; col < rowBytes; col += 4 {
+			if *(*uint32)(unsafe.Pointer(&img.Pix[p+col])) != first {
+				return 0, false
+			}
+		}
+	}
+	return first, true
+}
+
+// encodePixel packs an RGBA byte triple into the client's requested pixel
+// format, honouring bpp, channel maxes, shifts and endianness. Returns the
+// number of bytes written to dst (1..4).
+func encodePixel(dst []byte, pf clientPixelFormat, r, g, b byte) int {
+	bytesPerPixel := max(int(pf.bpp)/8, 1)
+	var val uint32
+	if pf.rMax == 255 && pf.gMax == 255 && pf.bMax == 255 {
+		val = (uint32(r) << pf.rShift) | (uint32(g) << pf.gShift) | (uint32(b) << pf.bShift)
+	} else {
+		rv := uint32(r) * uint32(pf.rMax) / 255
+		gv := uint32(g) * uint32(pf.gMax) / 255
+		bv := uint32(b) * uint32(pf.bMax) / 255
+		val = (rv << pf.rShift) | (gv << pf.gShift) | (bv << pf.bShift)
+	}
+	if pf.bigEndian != 0 {
+		for i := range bytesPerPixel {
+			dst[i] = byte(val >> uint((bytesPerPixel-1-i)*8))
+		}
+	} else {
+		for i := range bytesPerPixel {
+			dst[i] = byte(val >> uint(i*8))
+		}
+	}
+	return bytesPerPixel
+}
+
+// encodeHextileSolidRect emits a Hextile-encoded rectangle whose every pixel
+// is the same color. All sub-tiles after the first inherit the background
+// via a zero subencoding byte, collapsing a uniform 64×64 tile from ~16 KB
+// raw (or ~1-2 KB zlib) down to ~20 bytes on the wire.
+//
+// The returned buffer starts with the 12-byte rect header + the hextile
+// body. Callers assembling a multi-rect FramebufferUpdate append this after
+// their own message header.
+func encodeHextileSolidRect(r, g, b byte, pf clientPixelFormat, rc rect) []byte {
+	bytesPerPixel := max(int(pf.bpp)/8, 1)
+
+	// Count sub-tiles. Right/bottom sub-tiles may be smaller than 16.
+	cols := (rc.w + hextileSubSize - 1) / hextileSubSize
+	rows := (rc.h + hextileSubSize - 1) / hextileSubSize
+	subs := cols * rows
+
+	// Body: first sub-tile carries (subenc 0x02 + bg pixel); the rest are
+	// subenc 0x00 (inherit the previously-emitted background).
+	bodySize := 1 + bytesPerPixel + (subs - 1)
+	buf := make([]byte, 12+bodySize)
+
+	binary.BigEndian.PutUint16(buf[0:2], uint16(rc.x))
+	binary.BigEndian.PutUint16(buf[2:4], uint16(rc.y))
+	binary.BigEndian.PutUint16(buf[4:6], uint16(rc.w))
+	binary.BigEndian.PutUint16(buf[6:8], uint16(rc.h))
+	binary.BigEndian.PutUint32(buf[8:12], uint32(encHextile))
+
+	buf[12] = hextileBackgroundSpecified
+	encodePixel(buf[13:13+bytesPerPixel], pf, r, g, b)
+	// Remaining sub-tiles are already zero-valued from make(): "same as
+	// previous background", no pixel bytes.
+	_ = subs
+	return buf
+}
+
+// encodeHextileRect emits a full Hextile-encoded rectangle. Each 16×16
+// sub-tile is classified as 1-color (background only), 2-color (background
+// + foreground subrects), or raw. The 1-color and 2-color paths are
+// significantly cheaper than zlib on UI content (text, icons, flat
+// backgrounds) and avoid the persistent zlib stream's inter-rect
+// serialization point, so they parallelize trivially.
+//
+// The returned buffer starts with the 12-byte rect header + hextile body.
+func encodeHextileRect(img *image.RGBA, pf clientPixelFormat, x, y, w, h int) []byte {
+	bytesPerPixel := max(int(pf.bpp)/8, 1)
+
+	// Pre-size: worst case is every sub-tile raw → 1 header byte + raw
+	// pixels per sub-tile.
+	maxBody := 0
+	for sy := 0; sy < h; sy += hextileSubSize {
+		sh := min(hextileSubSize, h-sy)
+		for sx := 0; sx < w; sx += hextileSubSize {
+			sw := min(hextileSubSize, w-sx)
+			maxBody += 1 + sw*sh*bytesPerPixel
+		}
+	}
+	buf := make([]byte, 12, 12+maxBody)
+
+	binary.BigEndian.PutUint16(buf[0:2], uint16(x))
+	binary.BigEndian.PutUint16(buf[2:4], uint16(y))
+	binary.BigEndian.PutUint16(buf[4:6], uint16(w))
+	binary.BigEndian.PutUint16(buf[6:8], uint16(h))
+	binary.BigEndian.PutUint32(buf[8:12], uint32(encHextile))
+
+	var state hextileBgState
+
+	for sy := 0; sy < h; sy += hextileSubSize {
+		sh := min(hextileSubSize, h-sy)
+		for sx := 0; sx < w; sx += hextileSubSize {
+			sw := min(hextileSubSize, w-sx)
+			buf = appendHextileSubtile(buf, img, pf, rect{x + sx, y + sy, sw, sh}, &state, bytesPerPixel)
+		}
+	}
+	return buf
+}
+
+// hextileBgState carries the running background across sub-tile encodes so
+// we can omit the BackgroundSpecified flag when it hasn't changed.
+type hextileBgState struct {
+	prev  uint32
+	valid bool
+}
+
+// appendHextileSubtile encodes a single 16×16 (or smaller edge) sub-tile
+// onto buf.
+func appendHextileSubtile(buf []byte, img *image.RGBA, pf clientPixelFormat, rc rect, state *hextileBgState, bytesPerPixel int) []byte {
+	x, y, w, h := rc.x, rc.y, rc.w, rc.h
+	c0, c1, only2, c0Count, c1Count := classifySubtile(img, x, y, w, h)
+
+	if !only2 {
+		// >2 distinct colours: raw fallback.
+		buf = append(buf, hextileRaw)
+		buf = appendRawPixels(buf, img, pf, rc, bytesPerPixel)
+		state.valid = false
+		return buf
+	}
+
+	if c1Count == 0 {
+		// Single colour. Background only.
+		if state.valid && state.prev == c0 {
+			return append(buf, 0)
+		}
+		buf = append(buf, hextileBackgroundSpecified)
+		buf = appendPackedPixelFromRGBA(buf, pf, c0, bytesPerPixel)
+		state.prev = c0
+		state.valid = true
+		return buf
+	}
+
+	// Two colours. Background = majority; foreground = minority,
+	// emitted as 1-row subrects of fg runs.
+	bg, fg := c0, c1
+	if c1Count > c0Count {
+		bg, fg = c1, c0
+	}
+	subrects := collectFgSubrects(img, x, y, w, h, bg)
+	// Cap at 255 (the count is a uint8). On overflow fall through to
+	// raw: that's the simplest correct fallback.
+	if len(subrects) <= 255 {
+		flags := byte(hextileForegroundSpecified | hextileAnySubrects)
+		emitBg := !state.valid || state.prev != bg
+		if emitBg {
+			flags |= hextileBackgroundSpecified
+		}
+		buf = append(buf, flags)
+		if emitBg {
+			buf = appendPackedPixelFromRGBA(buf, pf, bg, bytesPerPixel)
+			state.prev = bg
+			state.valid = true
+		}
+		buf = appendPackedPixelFromRGBA(buf, pf, fg, bytesPerPixel)
+		buf = append(buf, byte(len(subrects)))
+		for _, sr := range subrects {
+			buf = append(buf, byte((sr[0]<<4)|sr[1]), byte(((sr[2]-1)<<4)|(sr[3]-1)))
+		}
+		return buf
+	}
+
+	// Raw fallback.
+	buf = append(buf, hextileRaw)
+	buf = appendRawPixels(buf, img, pf, rc, bytesPerPixel)
+	// Raw sub-tiles invalidate the persistent background.
+	state.valid = false
+	return buf
+}
+
+// classifySubtile scans the sub-tile and reports up to two distinct pixel
+// values plus their counts. only2 is false the moment a third distinct
+// colour is seen, in which case the caller falls back to raw.
+func classifySubtile(img *image.RGBA, x, y, w, h int) (c0, c1 uint32, only2 bool, c0Count, c1Count int) {
+	stride := img.Stride
+	base := y*stride + x*4
+	c0 = *(*uint32)(unsafe.Pointer(&img.Pix[base]))
+	only2 = true
+	for row := 0; row < h; row++ {
+		p := base + row*stride
+		for col := 0; col < w; col++ {
+			px := *(*uint32)(unsafe.Pointer(&img.Pix[p+col*4]))
+			switch {
+			case px == c0:
+				c0Count++
+			case c1Count == 0:
+				c1 = px
+				c1Count = 1
+			case px == c1:
+				c1Count++
+			default:
+				return c0, c1, false, 0, 0
+			}
+		}
+	}
+	return c0, c1, only2, c0Count, c1Count
+}
+
+// collectFgSubrects walks the sub-tile row by row, emitting one subrect per
+// horizontal run of pixels not equal to bg. Each subrect is [subX, subY,
+// width, height] with width/height in 1..16.
+func collectFgSubrects(img *image.RGBA, x, y, w, h int, bg uint32) [][4]int {
+	stride := img.Stride
+	var out [][4]int
+	for row := 0; row < h; row++ {
+		p := y*stride + x*4 + row*stride
+		col := 0
+		for col < w {
+			if *(*uint32)(unsafe.Pointer(&img.Pix[p+col*4])) == bg {
+				col++
+				continue
+			}
+			start := col
+			for col < w && *(*uint32)(unsafe.Pointer(&img.Pix[p+col*4])) != bg {
+				col++
+			}
+			out = append(out, [4]int{start, row, col - start, 1})
+		}
+	}
+	return out
+}
+
+func appendPackedPixelFromRGBA(buf []byte, pf clientPixelFormat, px uint32, bytesPerPixel int) []byte {
+	r := byte(px)
+	g := byte(px >> 8)
+	b := byte(px >> 16)
+	var tmp [4]byte
+	encodePixel(tmp[:], pf, r, g, b)
+	return append(buf, tmp[:bytesPerPixel]...)
+}
+
+func appendRawPixels(buf []byte, img *image.RGBA, pf clientPixelFormat, rc rect, bytesPerPixel int) []byte {
+	start := len(buf)
+	buf = append(buf, make([]byte, rc.w*rc.h*bytesPerPixel)...)
+	writePixels(buf[start:], img, pf, rc, bytesPerPixel)
+	return buf
+}
+
+// tightState holds the per-session JPEG scratch buffer and reused encoders
+// so per-rect encoding stays alloc-free in the steady state.
+type tightState struct {
+	jpegBuf *bytes.Buffer
+	zlib    *zlibState
+	scratch []byte // RGB-packed pixel scratch for JPEG and Basic paths.
+	// colorSeen is reused by sampledColorCount per rect; cleared via the Go
+	// runtime's map-clear fast path to avoid a fresh allocation each call.
+	colorSeen map[uint32]struct{}
+}
+
+func newTightState() *tightState {
+	return &tightState{
+		jpegBuf:   &bytes.Buffer{},
+		zlib:      newZlibState(),
+		colorSeen: make(map[uint32]struct{}, 64),
+	}
+}
+
+// encodeTightRect emits a single Tight-encoded rect. Picks Fill for uniform
+// content, JPEG for photo-like rects above a size and color-count threshold,
+// and Basic+zlib otherwise. Returns the rect header + Tight body (no
+// FramebufferUpdate header).
+func encodeTightRect(img *image.RGBA, pf clientPixelFormat, x, y, w, h int, t *tightState) []byte {
+	if pixel, uniform := tileIsUniform(img, x, y, w, h); uniform {
+		return encodeTightFill(x, y, w, h, byte(pixel), byte(pixel>>8), byte(pixel>>16))
+	}
+	if w*h >= tightJPEGMinArea && sampledColorCountInto(t.colorSeen, img, x, y, w, h, tightJPEGMinColors) >= tightJPEGMinColors {
+		if buf, ok := encodeTightJPEG(img, x, y, w, h, t); ok {
+			return buf
+		}
+	}
+	return encodeTightBasic(img, x, y, w, h, t)
+}
+
+func writeTightRectHeader(buf []byte, x, y, w, h int) {
+	binary.BigEndian.PutUint16(buf[0:2], uint16(x))
+	binary.BigEndian.PutUint16(buf[2:4], uint16(y))
+	binary.BigEndian.PutUint16(buf[4:6], uint16(w))
+	binary.BigEndian.PutUint16(buf[6:8], uint16(h))
+	binary.BigEndian.PutUint32(buf[8:12], uint32(encTight))
+}
+
+// appendTightLength encodes a Tight compact length prefix (1, 2, or 3 bytes
+// LE-ish, top bit of each byte signals continuation).
+func appendTightLength(buf []byte, n int) []byte {
+	b0 := byte(n & 0x7f)
+	if n <= 0x7f {
+		return append(buf, b0)
+	}
+	b0 |= 0x80
+	b1 := byte((n >> 7) & 0x7f)
+	if n <= 0x3fff {
+		return append(buf, b0, b1)
+	}
+	b1 |= 0x80
+	b2 := byte((n >> 14) & 0xff)
+	return append(buf, b0, b1, b2)
+}
+
+// encodeTightFill emits a uniform rect: 12-byte rect header + 1-byte
+// subenc (0x80) + 3-byte RGB pixel. Tight Fill always uses 24-bit RGB
+// regardless of the negotiated pixel format.
+func encodeTightFill(x, y, w, h int, r, g, b byte) []byte {
+	buf := make([]byte, 12+1+3)
+	writeTightRectHeader(buf, x, y, w, h)
+	buf[12] = tightFillSubenc
+	buf[13] = r
+	buf[14] = g
+	buf[15] = b
+	return buf
+}
+
+// encodeTightJPEG compresses the rect as a baseline JPEG. Returns ok=false
+// if the encoder errors so the caller can fall back to Basic.
+func encodeTightJPEG(img *image.RGBA, x, y, w, h int, t *tightState) ([]byte, bool) {
+	t.jpegBuf.Reset()
+	sub := img.SubImage(image.Rect(img.Rect.Min.X+x, img.Rect.Min.Y+y, img.Rect.Min.X+x+w, img.Rect.Min.Y+y+h))
+	if err := jpeg.Encode(t.jpegBuf, sub, &jpeg.Options{Quality: tightQualityFor(w * h)}); err != nil {
+		return nil, false
+	}
+	jpegBytes := t.jpegBuf.Bytes()
+	buf := make([]byte, 0, 12+1+3+len(jpegBytes))
+	buf = buf[:12]
+	writeTightRectHeader(buf, x, y, w, h)
+	buf = append(buf, tightJPEGSubenc)
+	buf = appendTightLength(buf, len(jpegBytes))
+	buf = append(buf, jpegBytes...)
+	return buf, true
+}
+
+// encodeTightBasic emits Basic+zlib with the no-op (CopyFilter) filter.
+// Pixels are sent as 24-bit RGB ("TPIXEL" format) which most clients
+// negotiate when the server advertises 32bpp true colour. Streams under
+// 12 bytes ship uncompressed per RFB Tight spec.
+func encodeTightBasic(img *image.RGBA, x, y, w, h int, t *tightState) []byte {
+	pixelStream := w * h * 3
+	if cap(t.scratch) < pixelStream {
+		t.scratch = make([]byte, pixelStream)
+	}
+	scratch := t.scratch[:pixelStream]
+	stride := img.Stride
+	off := 0
+	for row := y; row < y+h; row++ {
+		p := row*stride + x*4
+		for col := 0; col < w; col++ {
+			scratch[off+0] = img.Pix[p]
+			scratch[off+1] = img.Pix[p+1]
+			scratch[off+2] = img.Pix[p+2]
+			p += 4
+			off += 3
+		}
+	}
+
+	// Sub-encoding byte: stream 0, no resets, basic encoding (top nibble
+	// = 0x40 = explicit filter follows).
+	subenc := byte(tightBasicFilter)
+	filter := byte(tightFilterCopy)
+
+	if pixelStream < 12 {
+		buf := make([]byte, 0, 12+2+pixelStream)
+		buf = buf[:12]
+		writeTightRectHeader(buf, x, y, w, h)
+		buf = append(buf, subenc, filter)
+		buf = append(buf, scratch...)
+		return buf
+	}
+
+	z := t.zlib
+	z.buf.Reset()
+	if _, err := z.w.Write(scratch); err != nil {
+		log.Debugf("tight zlib write: %v", err)
+		return nil
+	}
+	if err := z.w.Flush(); err != nil {
+		log.Debugf("tight zlib flush: %v", err)
+		return nil
+	}
+	compressed := z.buf.Bytes()
+
+	buf := make([]byte, 0, 12+2+5+len(compressed))
+	buf = buf[:12]
+	writeTightRectHeader(buf, x, y, w, h)
+	buf = append(buf, subenc, filter)
+	buf = appendTightLength(buf, len(compressed))
+	buf = append(buf, compressed...)
+	return buf
+}
+
+func tightQualityFor(pixels int) int {
+	switch {
+	case pixels >= tightJPEGLargePixels:
+		return tightJPEGQualityLarge
+	case pixels >= tightJPEGMediumPixels:
+		return tightJPEGQualityMedium
+	default:
+		return tightJPEGQuality
+	}
+}
+
+// sampledColorCountInto estimates distinct-colour count by checking up to
+// maxColors samples. The caller-provided `seen` map is cleared and reused so
+// per-rect Tight encoding stays alloc-free. Cheap O(maxColors) per call.
+func sampledColorCountInto(seen map[uint32]struct{}, img *image.RGBA, x, y, w, h, maxColors int) int {
+	clear(seen)
+	stride := img.Stride
+	step := max((w*h)/(maxColors*4), 1)
+	var idx int
+	for row := 0; row < h; row++ {
+		p := (y+row)*stride + x*4
+		for col := 0; col < w; col++ {
+			if idx%step == 0 {
+				px := *(*uint32)(unsafe.Pointer(&img.Pix[p+col*4]))
+				seen[px&0x00ffffff] = struct{}{}
+				if len(seen) > maxColors {
+					return len(seen)
+				}
+			}
+			idx++
+		}
+	}
+	return len(seen)
+}
+
+// zlibState holds the persistent zlib writer, output buffer, and a scratch
+// slice reused by encodeZlibRect to stage the packed pixel stream before
+// handing it to the deflate writer. The scratch grows to the largest rect
+// we've seen and is kept for the session lifetime.
+type zlibState struct {
+	buf     *bytes.Buffer
+	w       *zlib.Writer
+	scratch []byte
+}
+
+func newZlibState() *zlibState {
+	buf := &bytes.Buffer{}
+	w, _ := zlib.NewWriterLevel(buf, zlib.BestSpeed)
+	return &zlibState{buf: buf, w: w}
+}
+
+func (z *zlibState) Close() error {
+	return z.w.Close()
+}
--- a/client/vnc/server/rfb_bench_test.go
+++ b/client/vnc/server/rfb_bench_test.go
@@ -0,0 +1,405 @@
+package server
+
+import (
+	"image"
+	"math/rand"
+	"testing"
+)
+
+// Representative frame sizes.
+var benchRects = []struct {
+	name string
+	w, h int
+}{
+	{"1080p_full", 1920, 1080},
+	{"720p_full", 1280, 720},
+	{"256x256_tile", 256, 256},
+	{"64x64_tile", 64, 64},
+}
+
+func makeBenchImage(w, h int, seed int64) *image.RGBA {
+	img := image.NewRGBA(image.Rect(0, 0, w, h))
+	r := rand.New(rand.NewSource(seed))
+	_, _ = r.Read(img.Pix)
+	// Force alpha byte so the fast path and slow path produce identical output.
+	for i := 3; i < len(img.Pix); i += 4 {
+		img.Pix[i] = 0xff
+	}
+	return img
+}
+
+func makeBenchImagePartial(w, h, changedRows int) (*image.RGBA, *image.RGBA) {
+	prev := makeBenchImage(w, h, 1)
+	cur := image.NewRGBA(prev.Rect)
+	copy(cur.Pix, prev.Pix)
+	if changedRows > h {
+		changedRows = h
+	}
+	// Dirty the first `changedRows` rows.
+	r := rand.New(rand.NewSource(2))
+	_, _ = r.Read(cur.Pix[:changedRows*cur.Stride])
+	for i := 3; i < len(cur.Pix); i += 4 {
+		cur.Pix[i] = 0xff
+	}
+	return prev, cur
+}
+
+func BenchmarkEncodeRawRect(b *testing.B) {
+	pf := defaultClientPixelFormat()
+	for _, r := range benchRects {
+		img := makeBenchImage(r.w, r.h, 1)
+		b.Run(r.name, func(b *testing.B) {
+			b.SetBytes(int64(r.w * r.h * 4))
+			b.ReportAllocs()
+			for i := 0; i < b.N; i++ {
+				_ = encodeRawRect(img, pf, 0, 0, r.w, r.h)
+			}
+		})
+	}
+}
+
+func BenchmarkEncodeZlibRect(b *testing.B) {
+	pf := defaultClientPixelFormat()
+	for _, r := range benchRects {
+		img := makeBenchImage(r.w, r.h, 1)
+		z := newZlibState()
+		b.Run(r.name, func(b *testing.B) {
+			b.SetBytes(int64(r.w * r.h * 4))
+			b.ReportAllocs()
+			for i := 0; i < b.N; i++ {
+				_ = encodeZlibRect(img, pf, 0, 0, r.w, r.h, z)
+			}
+		})
+	}
+}
+
+// BenchmarkWritePixels isolates the per-pixel pack loop from the allocation
+// and FramebufferUpdate-header overhead.
+func BenchmarkWritePixels(b *testing.B) {
+	pf := defaultClientPixelFormat()
+	for _, r := range benchRects {
+		img := makeBenchImage(r.w, r.h, 1)
+		dst := make([]byte, r.w*r.h*4)
+		b.Run(r.name, func(b *testing.B) {
+			b.SetBytes(int64(r.w * r.h * 4))
+			b.ReportAllocs()
+			for i := 0; i < b.N; i++ {
+				writePixels(dst, img, pf, rect{0, 0, r.w, r.h}, 4)
+			}
+		})
+	}
+}
+
+// BenchmarkWritePixelsScaled forces the general (non-fast) path by using a
+// pixel format with non-255 channel maxes.
+func BenchmarkWritePixelsScaled(b *testing.B) {
+	pf := defaultClientPixelFormat()
+	pf.rMax, pf.gMax, pf.bMax = 31, 63, 31 // 16bpp-ish; exercises the divide path
+	pf.bpp = 16
+	for _, r := range benchRects {
+		img := makeBenchImage(r.w, r.h, 1)
+		dst := make([]byte, r.w*r.h*2)
+		b.Run(r.name, func(b *testing.B) {
+			b.SetBytes(int64(r.w * r.h * 4))
+			b.ReportAllocs()
+			for i := 0; i < b.N; i++ {
+				writePixels(dst, img, pf, rect{0, 0, r.w, r.h}, 2)
+			}
+		})
+	}
+}
+
+func BenchmarkSwizzleBGRAtoRGBA(b *testing.B) {
+	for _, r := range benchRects {
+		size := r.w * r.h * 4
+		src := make([]byte, size)
+		dst := make([]byte, size)
+		rng := rand.New(rand.NewSource(1))
+		_, _ = rng.Read(src)
+		b.Run(r.name, func(b *testing.B) {
+			b.SetBytes(int64(size))
+			b.ReportAllocs()
+			for i := 0; i < b.N; i++ {
+				swizzleBGRAtoRGBA(dst, src)
+			}
+		})
+	}
+}
+
+// BenchmarkSwizzleBGRAtoRGBANaive is the naive byte-by-byte implementation
+// that the Linux SHM capturer used before the uint32 rewrite, kept here so
+// we can compare the cost directly.
+func BenchmarkSwizzleBGRAtoRGBANaive(b *testing.B) {
+	for _, r := range benchRects {
+		size := r.w * r.h * 4
+		src := make([]byte, size)
+		dst := make([]byte, size)
+		rng := rand.New(rand.NewSource(1))
+		_, _ = rng.Read(src)
+		b.Run(r.name, func(b *testing.B) {
+			b.SetBytes(int64(size))
+			b.ReportAllocs()
+			for i := 0; i < b.N; i++ {
+				for j := 0; j < size; j += 4 {
+					dst[j+0] = src[j+2]
+					dst[j+1] = src[j+1]
+					dst[j+2] = src[j+0]
+					dst[j+3] = 0xff
+				}
+			}
+		})
+	}
+}
+
+// BenchmarkEncodeUniformTile_Zlib measures the cost of sending a uniform
+// 64×64 dirty tile via zlib (the old path before the Hextile fast path).
+func BenchmarkEncodeUniformTile_Zlib(b *testing.B) {
+	pf := defaultClientPixelFormat()
+	img := image.NewRGBA(image.Rect(0, 0, 64, 64))
+	for i := 0; i < len(img.Pix); i += 4 {
+		img.Pix[i+0] = 0x33
+		img.Pix[i+1] = 0x66
+		img.Pix[i+2] = 0x99
+		img.Pix[i+3] = 0xff
+	}
+	z := newZlibState()
+	b.ReportAllocs()
+	var bytesOut int
+	for i := 0; i < b.N; i++ {
+		out := encodeZlibRect(img, pf, 0, 0, 64, 64, z)
+		bytesOut = len(out)
+	}
+	b.ReportMetric(float64(bytesOut), "wire_bytes")
+}
+
+// BenchmarkEncodeUniformTile_Hextile measures the new fast path: uniform
+// 64×64 tile emitted as Hextile SolidFill.
+func BenchmarkEncodeUniformTile_Hextile(b *testing.B) {
+	pf := defaultClientPixelFormat()
+	b.ReportAllocs()
+	var bytesOut int
+	for i := 0; i < b.N; i++ {
+		out := encodeHextileSolidRect(0x33, 0x66, 0x99, pf, rect{0, 0, 64, 64})
+		bytesOut = len(out)
+	}
+	b.ReportMetric(float64(bytesOut), "wire_bytes")
+}
+
+func BenchmarkTileIsUniform(b *testing.B) {
+	img := image.NewRGBA(image.Rect(0, 0, 64, 64))
+	for i := 0; i < len(img.Pix); i += 4 {
+		img.Pix[i+3] = 0xff
+	}
+	b.ReportAllocs()
+	for i := 0; i < b.N; i++ {
+		_, _ = tileIsUniform(img, 0, 0, 64, 64)
+	}
+}
+
+// BenchmarkEncodeManyTilesVsFullFrame exercises the bandwidth + CPU
+// trade-off that motivates the full-frame promotion path: encoding a burst
+// of N dirty 64×64 tiles as separate zlib rects vs emitting one big zlib
+// rect for the whole frame.
+func BenchmarkEncodeManyTilesVsFullFrame(b *testing.B) {
+	pf := defaultClientPixelFormat()
+	const w, h = 1920, 1080
+	img := makeBenchImage(w, h, 1)
+
+	// Build the list of every tile in the frame (worst case: entire screen dirty).
+	var tiles [][4]int
+	for ty := 0; ty < h; ty += tileSize {
+		th := tileSize
+		if ty+th > h {
+			th = h - ty
+		}
+		for tx := 0; tx < w; tx += tileSize {
+			tw := tileSize
+			if tx+tw > w {
+				tw = w - tx
+			}
+			tiles = append(tiles, [4]int{tx, ty, tw, th})
+		}
+	}
+	nTiles := len(tiles)
+
+	b.Run("per_tile_zlib", func(b *testing.B) {
+		z := newZlibState()
+		b.SetBytes(int64(w * h * 4))
+		b.ReportAllocs()
+		var totalOut int
+		for i := 0; i < b.N; i++ {
+			totalOut = 0
+			for _, r := range tiles {
+				out := encodeZlibRect(img, pf, r[0], r[1], r[2], r[3], z)
+				totalOut += len(out)
+			}
+		}
+		b.ReportMetric(float64(totalOut), "wire_bytes")
+		b.ReportMetric(float64(nTiles), "tiles")
+	})
+
+	b.Run("full_frame_zlib", func(b *testing.B) {
+		z := newZlibState()
+		b.SetBytes(int64(w * h * 4))
+		b.ReportAllocs()
+		var totalOut int
+		for i := 0; i < b.N; i++ {
+			out := encodeZlibRect(img, pf, 0, 0, w, h, z)
+			totalOut = len(out)
+		}
+		b.ReportMetric(float64(totalOut), "wire_bytes")
+	})
+}
+
+// BenchmarkShouldPromoteToFullFrame verifies the threshold check itself is
+// cheap. It runs on every frame, so regressions here hit all workloads.
+func BenchmarkShouldPromoteToFullFrame(b *testing.B) {
+	const w, h = 1920, 1080
+	s := &session{serverW: w, serverH: h}
+	// Build a worst-case rect list (every tile dirty, 510 entries).
+	var rects [][4]int
+	for ty := 0; ty < h; ty += tileSize {
+		th := tileSize
+		if ty+th > h {
+			th = h - ty
+		}
+		for tx := 0; tx < w; tx += tileSize {
+			tw := tileSize
+			if tx+tw > w {
+				tw = w - tx
+			}
+			rects = append(rects, [4]int{tx, ty, tw, th})
+		}
+	}
+	b.ReportAllocs()
+	for i := 0; i < b.N; i++ {
+		_ = s.shouldPromoteToFullFrame(rects)
+	}
+}
+
+// BenchmarkEncodeCoalescedVsPerTile compares per-tile encoding vs the
+// coalesced rect list emitted by diffRects, on a horizontal-band dirty
+// pattern (e.g. a scrolling status bar) where coalescing pays off.
+func BenchmarkEncodeCoalescedVsPerTile(b *testing.B) {
+	pf := defaultClientPixelFormat()
+	const w, h = 1920, 1080
+	img := makeBenchImage(w, h, 1)
+
+	// Dirty band: rows 200..264 (one tile-row), full width.
+	var perTile [][4]int
+	for tx := 0; tx < w; tx += tileSize {
+		tw := tileSize
+		if tx+tw > w {
+			tw = w - tx
+		}
+		perTile = append(perTile, [4]int{tx, 200, tw, tileSize})
+	}
+	coalesced := coalesceRects(append([][4]int(nil), perTile...))
+
+	b.Run("per_tile", func(b *testing.B) {
+		z := newZlibState()
+		b.ReportAllocs()
+		var bytesOut int
+		for i := 0; i < b.N; i++ {
+			bytesOut = 0
+			for _, r := range perTile {
+				out := encodeZlibRect(img, pf, r[0], r[1], r[2], r[3], z)
+				bytesOut += len(out)
+			}
+		}
+		b.ReportMetric(float64(bytesOut), "wire_bytes")
+		b.ReportMetric(float64(len(perTile)), "rects")
+	})
+
+	b.Run("coalesced", func(b *testing.B) {
+		z := newZlibState()
+		b.ReportAllocs()
+		var bytesOut int
+		for i := 0; i < b.N; i++ {
+			bytesOut = 0
+			for _, r := range coalesced {
+				out := encodeZlibRect(img, pf, r[0], r[1], r[2], r[3], z)
+				bytesOut += len(out)
+			}
+		}
+		b.ReportMetric(float64(bytesOut), "wire_bytes")
+		b.ReportMetric(float64(len(coalesced)), "rects")
+	})
+}
+
+func BenchmarkCoalesceRects(b *testing.B) {
+	const w, h = 1920, 1080
+	// Worst case: every tile dirty.
+	var allTiles [][4]int
+	for ty := 0; ty < h; ty += tileSize {
+		th := tileSize
+		if ty+th > h {
+			th = h - ty
+		}
+		for tx := 0; tx < w; tx += tileSize {
+			tw := tileSize
+			if tx+tw > w {
+				tw = w - tx
+			}
+			allTiles = append(allTiles, [4]int{tx, ty, tw, th})
+		}
+	}
+	b.ReportAllocs()
+	for i := 0; i < b.N; i++ {
+		in := make([][4]int, len(allTiles))
+		copy(in, allTiles)
+		_ = coalesceRects(in)
+	}
+}
+
+// BenchmarkEncodeTightVsZlib_Photo compares Tight (which routes random/
+// photographic content to JPEG) against the persistent Zlib stream. JPEG
+// at quality 70 should be 5-15× smaller on this kind of content.
+func BenchmarkEncodeTightVsZlib_Photo(b *testing.B) {
+	pf := defaultClientPixelFormat()
+	for _, r := range []struct {
+		name string
+		w, h int
+	}{
+		{"256x256", 256, 256},
+		{"512x512", 512, 512},
+		{"1080p", 1920, 1080},
+	} {
+		img := makeBenchImage(r.w, r.h, 1)
+		b.Run(r.name+"/zlib", func(b *testing.B) {
+			z := newZlibState()
+			b.SetBytes(int64(r.w * r.h * 4))
+			b.ReportAllocs()
+			var bytesOut int
+			for i := 0; i < b.N; i++ {
+				out := encodeZlibRect(img, pf, 0, 0, r.w, r.h, z)
+				bytesOut = len(out)
+			}
+			b.ReportMetric(float64(bytesOut), "wire_bytes")
+		})
+		b.Run(r.name+"/tight", func(b *testing.B) {
+			t := newTightState()
+			b.SetBytes(int64(r.w * r.h * 4))
+			b.ReportAllocs()
+			var bytesOut int
+			for i := 0; i < b.N; i++ {
+				out := encodeTightRect(img, pf, 0, 0, r.w, r.h, t)
+				bytesOut = len(out)
+			}
+			b.ReportMetric(float64(bytesOut), "wire_bytes")
+		})
+	}
+}
+
+func BenchmarkDiffRects(b *testing.B) {
+	for _, r := range benchRects {
+		prev, cur := makeBenchImagePartial(r.w, r.h, 100)
+		b.Run(r.name, func(b *testing.B) {
+			b.SetBytes(int64(r.w * r.h * 4))
+			b.ReportAllocs()
+			for i := 0; i < b.N; i++ {
+				_ = diffRects(prev, cur, r.w, r.h, tileSize)
+			}
+		})
+	}
+}
--- a/client/vnc/server/server.go
+++ b/client/vnc/server/server.go
@@ -0,0 +1,754 @@
+package server
+
+import (
+	"context"
+	"crypto/subtle"
+	"encoding/binary"
+	"encoding/hex"
+	"errors"
+	"fmt"
+	"image"
+	"io"
+	"net"
+	"net/netip"
+	"strings"
+	"sync"
+	"time"
+
+	gojwt "github.com/golang-jwt/jwt/v5"
+	log "github.com/sirupsen/logrus"
+	"golang.zx2c4.com/wireguard/tun/netstack"
+
+	sshauth "github.com/netbirdio/netbird/client/ssh/auth"
+	nbjwt "github.com/netbirdio/netbird/shared/auth/jwt"
+)
+
+// Connection modes sent by the client in the session header.
+const (
+	ModeAttach  byte = 0 // Capture current display
+	ModeSession byte = 1 // Virtual session as specified user
+)
+
+// RFB security-failure reason codes sent to the client. These prefixes are
+// stable so dashboard/noVNC integrations can branch on them without parsing
+// free text. Format: "CODE: human message".
+const (
+	RejectCodeJWTMissing    = "AUTH_JWT_MISSING"
+	RejectCodeJWTExpired    = "AUTH_JWT_EXPIRED"
+	RejectCodeJWTInvalid    = "AUTH_JWT_INVALID"
+	RejectCodeAuthForbidden = "AUTH_FORBIDDEN"
+	RejectCodeAuthConfig    = "AUTH_CONFIG"
+	RejectCodeSessionError  = "SESSION_ERROR"
+	RejectCodeCapturerError = "CAPTURER_ERROR"
+	RejectCodeUnsupportedOS = "UNSUPPORTED"
+	RejectCodeBadRequest    = "BAD_REQUEST"
+)
+
+// EnvVNCDisableDownscale disables any platform-specific framebuffer
+// downscaling (e.g. Retina 2:1). Set to 1/true to send the native resolution.
+const EnvVNCDisableDownscale = "NB_VNC_DISABLE_DOWNSCALE"
+
+// freshWindow is how long an on-demand capturer may reuse its last result
+// before triggering a new capture. Short enough to feel responsive, long
+// enough to coalesce bursty multi-session requests. 16 ms ~= 60 fps.
+const freshWindow = 16 * time.Millisecond
+
+// ScreenCapturer grabs desktop frames for the VNC server.
+type ScreenCapturer interface {
+	// Width returns the current screen width in pixels.
+	Width() int
+	// Height returns the current screen height in pixels.
+	Height() int
+	// Capture returns the current desktop as an RGBA image.
+	Capture() (*image.RGBA, error)
+}
+
+// captureIntoer is implemented by capturers that can write directly into a
+// caller-provided buffer, skipping the per-frame snapshot copy that the
+// session would otherwise need to make. Linux and macOS implement this.
+type captureIntoer interface {
+	CaptureInto(dst *image.RGBA) error
+}
+
+// errFrameUnchanged is returned by capturers that hash the raw source
+// bytes (currently macOS) when the new frame is byte-identical to the
+// last one, so the encoder can short-circuit to an empty update.
+var errFrameUnchanged = errors.New("frame unchanged")
+
+// InputInjector delivers keyboard and mouse events to the OS.
+type InputInjector interface {
+	// InjectKey simulates a key press or release. keysym is an X11 KeySym.
+	InjectKey(keysym uint32, down bool)
+	// InjectPointer simulates mouse movement and button state.
+	InjectPointer(buttonMask uint8, x, y, serverW, serverH int)
+	// SetClipboard sets the system clipboard to the given text.
+	SetClipboard(text string)
+	// GetClipboard returns the current system clipboard text.
+	GetClipboard() string
+	// TypeText synthesizes the given text as keystrokes on the active
+	// desktop. Used by the dashboard's Paste button to push host clipboard
+	// content into a secure desktop (Winlogon/UAC) where the clipboard is
+	// isolated. On platforms or sessions without keystroke synthesis it
+	// may be a no-op.
+	TypeText(text string)
+}
+
+// JWTConfig holds JWT validation configuration for VNC auth.
+type JWTConfig struct {
+	Issuer       string
+	KeysLocation string
+	MaxTokenAge  int64
+	Audiences    []string
+}
+
+// connectionHeader is sent by the client before the RFB handshake to specify
+// the VNC session mode and authenticate.
+type connectionHeader struct {
+	mode      byte
+	username  string
+	jwt       string
+	sessionID uint32 // Windows session ID (0 = console/auto)
+	// width and height request the virtual display geometry for session mode.
+	// Zero means use the default.
+	width  uint16
+	height uint16
+}
+
+// Server is the embedded VNC server that listens on the WireGuard interface.
+// It supports two operating modes:
+//   - Direct mode: captures the screen and handles VNC sessions in-process.
+//     Used when running in a user session with desktop access.
+//   - Service mode: proxies VNC connections to an agent process spawned in
+//     the active console session. Used when running as a Windows service in
+//     Session 0.
+//
+// Within direct mode, each connection can request one of two session modes
+// via the connection header:
+//   - Attach: capture the current physical display.
+//   - Session: start a virtual Xvfb display as the requested user.
+type Server struct {
+	capturer    ScreenCapturer
+	injector    InputInjector
+	password    string
+	serviceMode bool
+	disableAuth bool
+	localAddr   netip.Addr   // NetBird WireGuard IP this server is bound to
+	network     netip.Prefix // NetBird overlay network
+	log         *log.Entry
+
+	mu           sync.Mutex
+	listener     net.Listener
+	ctx          context.Context
+	cancel       context.CancelFunc
+	vmgr         virtualSessionManager
+	jwtConfig    *JWTConfig
+	jwtValidator *nbjwt.Validator
+	jwtExtractor *nbjwt.ClaimsExtractor
+	authorizer   *sshauth.Authorizer
+	netstackNet  *netstack.Net
+	agentToken   []byte // raw token bytes for agent-mode auth
+}
+
+// vncSession provides capturer and injector for a virtual display session.
+type vncSession interface {
+	Capturer() ScreenCapturer
+	Injector() InputInjector
+	Display() string
+	ClientConnect()
+	ClientDisconnect()
+}
+
+// virtualSessionManager is implemented by sessionManager on Linux.
+type virtualSessionManager interface {
+	// GetOrCreate returns an existing session for the user or starts a new one
+	// with the requested geometry. width/height of 0 means use the default.
+	GetOrCreate(username string, width, height uint16) (vncSession, error)
+	StopAll()
+}
+
+// New creates a VNC server with the given screen capturer and input injector.
+func New(capturer ScreenCapturer, injector InputInjector, password string) *Server {
+	return &Server{
+		capturer:   capturer,
+		injector:   injector,
+		password:   password,
+		authorizer: sshauth.NewAuthorizer(),
+		log:        log.WithField("component", "vnc-server"),
+	}
+}
+
+// SetServiceMode enables proxy-to-agent mode for Windows service operation.
+func (s *Server) SetServiceMode(enabled bool) {
+	s.serviceMode = enabled
+}
+
+// SetJWTConfig configures JWT authentication for VNC connections.
+// Pass nil to disable JWT (public mode).
+func (s *Server) SetJWTConfig(config *JWTConfig) {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	s.jwtConfig = config
+	s.jwtValidator = nil
+	s.jwtExtractor = nil
+}
+
+// SetDisableAuth disables authentication entirely.
+func (s *Server) SetDisableAuth(disable bool) {
+	s.disableAuth = disable
+}
+
+// SetAgentToken sets a hex-encoded token that must be presented by incoming
+// connections before any VNC data. Used in agent mode to verify that only the
+// trusted service process connects.
+func (s *Server) SetAgentToken(hexToken string) {
+	if hexToken == "" {
+		return
+	}
+	b, err := hex.DecodeString(hexToken)
+	if err != nil {
+		s.log.Warnf("invalid agent token: %v", err)
+		return
+	}
+	s.agentToken = b
+}
+
+// SetNetstackNet sets the netstack network for userspace-only listening.
+// When set, the VNC server listens via netstack instead of a real OS socket.
+func (s *Server) SetNetstackNet(n *netstack.Net) {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	s.netstackNet = n
+}
+
+// UpdateVNCAuth updates the fine-grained authorization configuration.
+func (s *Server) UpdateVNCAuth(config *sshauth.Config) {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	s.jwtValidator = nil
+	s.jwtExtractor = nil
+	s.authorizer.Update(config)
+}
+
+// Start begins listening for VNC connections on the given address.
+// network is the NetBird overlay prefix used to validate connection sources.
+func (s *Server) Start(ctx context.Context, addr netip.AddrPort, network netip.Prefix) error {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	if s.listener != nil {
+		return fmt.Errorf("server already running")
+	}
+
+	if !network.IsValid() {
+		return fmt.Errorf("invalid overlay network prefix")
+	}
+
+	s.ctx, s.cancel = context.WithCancel(ctx)
+	s.vmgr = s.platformSessionManager()
+	s.localAddr = addr.Addr()
+	s.network = network
+
+	var listener net.Listener
+	var listenDesc string
+	if s.netstackNet != nil {
+		ln, err := s.netstackNet.ListenTCPAddrPort(addr)
+		if err != nil {
+			return fmt.Errorf("listen on netstack %s: %w", addr, err)
+		}
+		listener = ln
+		listenDesc = fmt.Sprintf("netstack %s", addr)
+	} else {
+		tcpAddr := net.TCPAddrFromAddrPort(addr)
+		ln, err := net.ListenTCP("tcp", tcpAddr)
+		if err != nil {
+			return fmt.Errorf("listen on %s: %w", addr, err)
+		}
+		listener = ln
+		listenDesc = addr.String()
+	}
+	s.listener = listener
+
+	if s.serviceMode {
+		s.platformInit()
+	}
+
+	if s.serviceMode {
+		go s.serviceAcceptLoop()
+	} else {
+		go s.acceptLoop()
+	}
+
+	s.log.Infof("started on %s (service_mode=%v)", listenDesc, s.serviceMode)
+	return nil
+}
+
+// Stop shuts down the server and closes all connections.
+func (s *Server) Stop() error {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	if s.cancel != nil {
+		s.cancel()
+		s.cancel = nil
+	}
+
+	if s.vmgr != nil {
+		s.vmgr.StopAll()
+	}
+
+	if s.serviceMode {
+		s.platformShutdown()
+	}
+
+	if c, ok := s.capturer.(interface{ Close() }); ok {
+		c.Close()
+	}
+
+	if s.listener != nil {
+		err := s.listener.Close()
+		s.listener = nil
+		if err != nil {
+			return fmt.Errorf("close VNC listener: %w", err)
+		}
+	}
+
+	s.log.Info("stopped")
+	return nil
+}
+
+// acceptLoop handles VNC connections directly (user session mode).
+func (s *Server) acceptLoop() {
+	for {
+		conn, err := s.listener.Accept()
+		if err != nil {
+			select {
+			case <-s.ctx.Done():
+				return
+			default:
+			}
+			s.log.Debugf("accept VNC connection: %v", err)
+			continue
+		}
+
+		go s.handleConnection(conn)
+	}
+}
+
+func (s *Server) validateCapturer(capturer ScreenCapturer) error {
+	// Quick check first: if already ready, return immediately.
+	if capturer.Width() > 0 && capturer.Height() > 0 {
+		return nil
+	}
+	// Capturer not ready: poke any retry loop that supports it so it doesn't
+	// wait out its full backoff (e.g. macOS waiting for Screen Recording).
+	if w, ok := capturer.(interface{ Wake() }); ok {
+		w.Wake()
+	}
+	// Wait up to 5s for the capturer to become ready.
+	for range 50 {
+		time.Sleep(100 * time.Millisecond)
+		if capturer.Width() > 0 && capturer.Height() > 0 {
+			return nil
+		}
+	}
+	return errors.New("no display available (check X11 / framebuffer on Linux/FreeBSD or Screen Recording permission on macOS)")
+}
+
+// isAllowedSource rejects connections from outside the NetBird overlay network
+// and from the local WireGuard IP (prevents local privilege escalation).
+// Matches the SSH server's connectionValidator logic.
+func (s *Server) isAllowedSource(addr net.Addr) bool {
+	tcpAddr, ok := addr.(*net.TCPAddr)
+	if !ok {
+		s.log.Warnf("connection rejected: non-TCP address %s", addr)
+		return false
+	}
+
+	remoteIP, ok := netip.AddrFromSlice(tcpAddr.IP)
+	if !ok {
+		s.log.Warnf("connection rejected: invalid remote IP %s", tcpAddr.IP)
+		return false
+	}
+	remoteIP = remoteIP.Unmap()
+
+	if remoteIP.IsLoopback() && s.localAddr.IsLoopback() {
+		return true
+	}
+
+	if remoteIP == s.localAddr {
+		s.log.Warnf("connection rejected from own IP %s", remoteIP)
+		return false
+	}
+
+	if !s.network.IsValid() {
+		s.log.Warnf("connection rejected: overlay network not configured")
+		return false
+	}
+	if !s.network.Contains(remoteIP) {
+		s.log.Warnf("connection rejected from non-NetBird IP %s", remoteIP)
+		return false
+	}
+
+	return true
+}
+
+func (s *Server) handleConnection(conn net.Conn) {
+	connLog := s.log.WithField("remote", conn.RemoteAddr().String())
+
+	if !s.isAllowedSource(conn.RemoteAddr()) {
+		conn.Close()
+		return
+	}
+	if !s.verifyAgentToken(conn, connLog) {
+		return
+	}
+	header, err := readConnectionHeader(conn)
+	if err != nil {
+		connLog.Warnf("read connection header: %v", err)
+		conn.Close()
+		return
+	}
+	connLog, ok := s.authorizeJWT(conn, header, connLog)
+	if !ok {
+		return
+	}
+
+	capturer, injector, sessionCleanup, ok := s.acquireSessionResources(conn, header, &connLog)
+	if !ok {
+		return
+	}
+	defer sessionCleanup()
+
+	if err := s.validateCapturer(capturer); err != nil {
+		rejectConnection(conn, codeMessage(RejectCodeCapturerError, fmt.Sprintf("screen capturer: %v", err)))
+		connLog.Warnf("capturer not ready: %v", err)
+		return
+	}
+
+	sess := &session{
+		conn:     conn,
+		capturer: capturer,
+		injector: injector,
+		serverW:  capturer.Width(),
+		serverH:  capturer.Height(),
+		password: s.password,
+		log:      connLog,
+	}
+	sess.serve()
+}
+
+// codeMessage formats a stable reject code with a human-readable message.
+// Dashboards split on the first ": " to recover the code without parsing the
+// free-text suffix.
+func codeMessage(code, msg string) string {
+	return code + ": " + msg
+}
+
+// jwtErrorCode maps a JWT auth error to a stable reject code.
+func jwtErrorCode(err error) string {
+	if err == nil {
+		return RejectCodeJWTInvalid
+	}
+	if errors.Is(err, nbjwt.ErrTokenExpired) {
+		return RejectCodeJWTExpired
+	}
+	msg := err.Error()
+	switch {
+	case strings.Contains(msg, "JWT required but not provided"):
+		return RejectCodeJWTMissing
+	case strings.Contains(msg, "authorize") || strings.Contains(msg, "not authorized"):
+		return RejectCodeAuthForbidden
+	default:
+		return RejectCodeJWTInvalid
+	}
+}
+
+// rejectConnection sends a minimal RFB handshake with a security failure
+// reason, so VNC clients display the error message instead of a generic
+// "unexpected disconnect."
+func rejectConnection(conn net.Conn, reason string) {
+	defer conn.Close()
+	// RFB 3.8 server version.
+	if _, err := io.WriteString(conn, "RFB 003.008\n"); err != nil {
+		return
+	}
+	// Read client version (12 bytes), ignore errors here so a short-lived
+	// or pre-handshake client still gets the failure reason below.
+	var clientVer [12]byte
+	_ = conn.SetReadDeadline(time.Now().Add(2 * time.Second))
+	_, _ = io.ReadFull(conn, clientVer[:])
+	_ = conn.SetReadDeadline(time.Time{})
+	// Send 0 security types = connection failed, followed by reason.
+	msg := []byte(reason)
+	buf := make([]byte, 1+4+len(msg))
+	buf[0] = 0 // 0 security types = failure
+	binary.BigEndian.PutUint32(buf[1:5], uint32(len(msg)))
+	copy(buf[5:], msg)
+	_, _ = conn.Write(buf)
+}
+
+const defaultJWTMaxTokenAge = 10 * 60 // 10 minutes
+
+// authenticateJWT validates the JWT from the connection header and checks
+// authorization. For attach mode, just checks membership in the authorized
+// user list. For session mode, additionally validates the OS user mapping.
+func (s *Server) authenticateJWT(header *connectionHeader) (string, error) {
+	if header.jwt == "" {
+		return "", fmt.Errorf("JWT required but not provided")
+	}
+
+	s.mu.Lock()
+	if err := s.ensureJWTValidator(); err != nil {
+		s.mu.Unlock()
+		return "", fmt.Errorf("initialize JWT validator: %w", err)
+	}
+	validator := s.jwtValidator
+	extractor := s.jwtExtractor
+	s.mu.Unlock()
+
+	token, err := validator.ValidateAndParse(context.Background(), header.jwt)
+	if err != nil {
+		return "", fmt.Errorf("validate JWT: %w", err)
+	}
+
+	if err := s.checkTokenAge(token); err != nil {
+		return "", err
+	}
+
+	userAuth, err := extractor.ToUserAuth(token)
+	if err != nil {
+		return "", fmt.Errorf("extract user from JWT: %w", err)
+	}
+	if userAuth.UserId == "" {
+		return "", fmt.Errorf("JWT has no user ID")
+	}
+
+	switch header.mode {
+	case ModeSession:
+		// Session mode: check user + OS username mapping.
+		if _, err := s.authorizer.Authorize(userAuth.UserId, header.username); err != nil {
+			return "", fmt.Errorf("authorize session for %s: %w", header.username, err)
+		}
+	default:
+		// Attach mode: just check user is in the authorized list (wildcard OS user).
+		if _, err := s.authorizer.Authorize(userAuth.UserId, "*"); err != nil {
+			return "", fmt.Errorf("user not authorized for VNC: %w", err)
+		}
+	}
+
+	return userAuth.UserId, nil
+}
+
+// ensureJWTValidator lazily initializes the JWT validator. Must be called with mu held.
+func (s *Server) ensureJWTValidator() error {
+	if s.jwtValidator != nil && s.jwtExtractor != nil {
+		return nil
+	}
+	if s.jwtConfig == nil {
+		return fmt.Errorf("no JWT config")
+	}
+
+	s.jwtValidator = nbjwt.NewValidator(
+		s.jwtConfig.Issuer,
+		s.jwtConfig.Audiences,
+		s.jwtConfig.KeysLocation,
+		false,
+	)
+
+	var opts []nbjwt.ClaimsExtractorOption
+	if len(s.jwtConfig.Audiences) > 0 {
+		opts = append(opts, nbjwt.WithAudience(s.jwtConfig.Audiences[0]))
+	}
+	if claim := s.authorizer.GetUserIDClaim(); claim != "" {
+		opts = append(opts, nbjwt.WithUserIDClaim(claim))
+	}
+	s.jwtExtractor = nbjwt.NewClaimsExtractor(opts...)
+
+	return nil
+}
+
+func (s *Server) checkTokenAge(token *gojwt.Token) error {
+	maxAge := defaultJWTMaxTokenAge
+	if s.jwtConfig != nil && s.jwtConfig.MaxTokenAge > 0 {
+		maxAge = int(s.jwtConfig.MaxTokenAge)
+	}
+	return nbjwt.CheckTokenAge(token, time.Duration(maxAge)*time.Second)
+}
+
+// readConnectionHeader reads the NetBird VNC session header from the connection.
+// Format: [mode: 1 byte] [username_len: 2 bytes BE] [username: N bytes]
+//
+//	[jwt_len: 2 bytes BE] [jwt: N bytes]
+//
+// Uses a short timeout: our WASM proxy sends the header immediately after
+// connecting. Standard VNC clients don't send anything first (server speaks
+// first in RFB), so they time out and get the default attach mode.
+func readConnectionHeader(conn net.Conn) (*connectionHeader, error) {
+	if err := conn.SetReadDeadline(time.Now().Add(2 * time.Second)); err != nil {
+		return nil, fmt.Errorf("set deadline: %w", err)
+	}
+	defer conn.SetReadDeadline(time.Time{}) //nolint:errcheck
+
+	var hdr [3]byte
+	if _, err := io.ReadFull(conn, hdr[:]); err != nil {
+		// Timeout or error: assume no header, use attach mode.
+		return &connectionHeader{mode: ModeAttach}, nil
+	}
+
+	// Restore a longer deadline for reading variable-length fields.
+	if err := conn.SetReadDeadline(time.Now().Add(5 * time.Second)); err != nil {
+		return nil, fmt.Errorf("set deadline: %w", err)
+	}
+
+	mode := hdr[0]
+	usernameLen := binary.BigEndian.Uint16(hdr[1:3])
+
+	var username string
+	if usernameLen > 0 {
+		if usernameLen > 256 {
+			return nil, fmt.Errorf("username too long: %d", usernameLen)
+		}
+		buf := make([]byte, usernameLen)
+		if _, err := io.ReadFull(conn, buf); err != nil {
+			return nil, fmt.Errorf("read username: %w", err)
+		}
+		username = string(buf)
+	}
+
+	// Read JWT token length and data.
+	var jwtLenBuf [2]byte
+	var jwtToken string
+	if _, err := io.ReadFull(conn, jwtLenBuf[:]); err == nil {
+		jwtLen := binary.BigEndian.Uint16(jwtLenBuf[:])
+		if jwtLen >= 8192 {
+			return nil, fmt.Errorf("jwt too long: %d (max 8191)", jwtLen)
+		}
+		if jwtLen > 0 {
+			buf := make([]byte, jwtLen)
+			if _, err := io.ReadFull(conn, buf); err != nil {
+				return nil, fmt.Errorf("read JWT: %w", err)
+			}
+			jwtToken = string(buf)
+		}
+	}
+
+	// Read optional Windows session ID (4 bytes BE). Missing = 0 (console/auto).
+	var sessionID uint32
+	var sidBuf [4]byte
+	if _, err := io.ReadFull(conn, sidBuf[:]); err == nil {
+		sessionID = binary.BigEndian.Uint32(sidBuf[:])
+	}
+
+	// Read optional requested viewport size (2x uint16 BE). Missing = 0 (default).
+	var width, height uint16
+	var geomBuf [4]byte
+	if _, err := io.ReadFull(conn, geomBuf[:]); err == nil {
+		width = binary.BigEndian.Uint16(geomBuf[0:2])
+		height = binary.BigEndian.Uint16(geomBuf[2:4])
+	}
+
+	return &connectionHeader{
+		mode:      mode,
+		username:  username,
+		jwt:       jwtToken,
+		sessionID: sessionID,
+		width:     width,
+		height:    height,
+	}, nil
+}
+
+// verifyAgentToken validates the agent token prefix when configured. Returns
+// false when the token is invalid or unreadable; the connection is closed.
+func (s *Server) verifyAgentToken(conn net.Conn, connLog *log.Entry) bool {
+	if len(s.agentToken) == 0 {
+		return true
+	}
+	buf := make([]byte, len(s.agentToken))
+	if err := conn.SetReadDeadline(time.Now().Add(5 * time.Second)); err != nil {
+		connLog.Debugf("set agent token deadline: %v", err)
+		conn.Close()
+		return false
+	}
+	if _, err := io.ReadFull(conn, buf); err != nil {
+		connLog.Warnf("agent auth: read token: %v", err)
+		conn.Close()
+		return false
+	}
+	if err := conn.SetReadDeadline(time.Time{}); err != nil {
+		connLog.Debugf("clear agent token deadline: %v", err)
+	}
+	if subtle.ConstantTimeCompare(buf, s.agentToken) != 1 {
+		connLog.Warn("agent auth: invalid token, rejecting")
+		conn.Close()
+		return false
+	}
+	return true
+}
+
+// authorizeJWT performs JWT validation when auth is enabled. Returns the
+// enriched log entry and ok=false if the connection was rejected.
+func (s *Server) authorizeJWT(conn net.Conn, header *connectionHeader, connLog *log.Entry) (*log.Entry, bool) {
+	if s.disableAuth {
+		return connLog, true
+	}
+	if s.jwtConfig == nil {
+		rejectConnection(conn, codeMessage(RejectCodeAuthConfig, "auth enabled but no identity provider configured"))
+		connLog.Warn("auth rejected: no identity provider configured")
+		return connLog, false
+	}
+	jwtUserID, err := s.authenticateJWT(header)
+	if err != nil {
+		rejectConnection(conn, codeMessage(jwtErrorCode(err), err.Error()))
+		connLog.Warnf("auth rejected: %v", err)
+		return connLog, false
+	}
+	return connLog.WithField("jwt_user", jwtUserID), true
+}
+
+// acquireSessionResources returns the capturer/injector to use for this
+// connection and a cleanup func to call when the session ends. ok is false
+// when the connection was rejected (and the caller must just return).
+func (s *Server) acquireSessionResources(conn net.Conn, header *connectionHeader, connLog **log.Entry) (ScreenCapturer, InputInjector, func(), bool) {
+	switch header.mode {
+	case ModeSession:
+		return s.acquireVirtualSession(conn, header, connLog)
+	default:
+		return s.acquireAttachSession(), s.injector, attachSessionCleanup, true
+	}
+}
+
+func (s *Server) acquireVirtualSession(conn net.Conn, header *connectionHeader, connLog **log.Entry) (ScreenCapturer, InputInjector, func(), bool) {
+	if s.vmgr == nil {
+		rejectConnection(conn, codeMessage(RejectCodeUnsupportedOS, "virtual sessions not supported on this platform"))
+		(*connLog).Warn("session rejected: not supported on this platform")
+		return nil, nil, nil, false
+	}
+	if header.username == "" {
+		rejectConnection(conn, codeMessage(RejectCodeBadRequest, "session mode requires a username"))
+		(*connLog).Warn("session rejected: no username provided")
+		return nil, nil, nil, false
+	}
+	vs, err := s.vmgr.GetOrCreate(header.username, header.width, header.height)
+	if err != nil {
+		rejectConnection(conn, codeMessage(RejectCodeSessionError, fmt.Sprintf("create virtual session: %v", err)))
+		(*connLog).Warnf("create virtual session for %s: %v", header.username, err)
+		return nil, nil, nil, false
+	}
+	vs.ClientConnect()
+	*connLog = (*connLog).WithField("vnc_user", header.username)
+	(*connLog).Infof("session mode: user=%s display=%s", header.username, vs.Display())
+	return vs.Capturer(), vs.Injector(), vs.ClientDisconnect, true
+}
+
+func (s *Server) acquireAttachSession() ScreenCapturer {
+	if cc, ok := s.capturer.(interface{ ClientConnect() }); ok {
+		cc.ClientConnect()
+	}
+	return s.capturer
+}
+
+// attachSessionCleanup is the no-op cleanup used by attach mode. Returned as a
+// named func rather than an inline closure so the empty body is unambiguous.
+func attachSessionCleanup() {
+	// Attach mode keeps the shared capturer; nothing to release per session.
+}
--- a/client/vnc/server/server_darwin.go
+++ b/client/vnc/server/server_darwin.go
@@ -0,0 +1,21 @@
+//go:build darwin && !ios
+
+package server
+
+func (s *Server) platformInit() {
+	// no-op on macOS
+}
+
+// serviceAcceptLoop is not supported on macOS.
+func (s *Server) serviceAcceptLoop() {
+	s.log.Warn("service mode not supported on macOS, falling back to direct mode")
+	s.acceptLoop()
+}
+
+func (s *Server) platformSessionManager() virtualSessionManager {
+	return nil
+}
+
+func (s *Server) platformShutdown() {
+	// no-op on this platform
+}
--- a/client/vnc/server/server_stub.go
+++ b/client/vnc/server/server_stub.go
@@ -0,0 +1,21 @@
+//go:build (!windows && !darwin && !freebsd && !(linux && !android)) || (darwin && ios)
+
+package server
+
+func (s *Server) platformInit() {
+	// no-op on unsupported platforms
+}
+
+// serviceAcceptLoop is not supported on non-Windows platforms.
+func (s *Server) serviceAcceptLoop() {
+	s.log.Warn("service mode not supported on this platform, falling back to direct mode")
+	s.acceptLoop()
+}
+
+func (s *Server) platformSessionManager() virtualSessionManager {
+	return nil
+}
+
+func (s *Server) platformShutdown() {
+	// no-op on this platform
+}
--- a/client/vnc/server/server_test.go
+++ b/client/vnc/server/server_test.go
@@ -0,0 +1,412 @@
+package server
+
+import (
+	"encoding/binary"
+	"encoding/hex"
+	"image"
+	"io"
+	"net"
+	"net/netip"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+// testCapturer returns a 100x100 image for test sessions.
+type testCapturer struct{}
+
+func (t *testCapturer) Width() int  { return 100 }
+func (t *testCapturer) Height() int { return 100 }
+func (t *testCapturer) Capture() (*image.RGBA, error) {
+	return image.NewRGBA(image.Rect(0, 0, 100, 100)), nil
+}
+
+func startTestServer(t *testing.T, disableAuth bool, jwtConfig *JWTConfig) (net.Addr, *Server) {
+	t.Helper()
+
+	srv := New(&testCapturer{}, &StubInputInjector{}, "")
+	srv.SetDisableAuth(disableAuth)
+	if jwtConfig != nil {
+		srv.SetJWTConfig(jwtConfig)
+	}
+
+	addr := netip.MustParseAddrPort("127.0.0.1:0")
+	network := netip.MustParsePrefix("127.0.0.0/8")
+	require.NoError(t, srv.Start(t.Context(), addr, network))
+	// Override local address so source validation doesn't reject 127.0.0.1 as "own IP".
+	srv.localAddr = netip.MustParseAddr("10.99.99.1")
+	t.Cleanup(func() { _ = srv.Stop() })
+
+	return srv.listener.Addr(), srv
+}
+
+func TestAuthEnabled_NoJWTConfig_RejectsConnection(t *testing.T) {
+	addr, _ := startTestServer(t, false, nil)
+
+	conn, err := net.Dial("tcp", addr.String())
+	require.NoError(t, err)
+	defer conn.Close()
+
+	// Send session header: attach mode, no username, no JWT.
+	header := make([]byte, 13) // ModeAttach + usernameLen=0 + jwtLen=0 + sessionID=0 + width=0 + height=0
+	header[0] = ModeAttach
+	_, err = conn.Write(header)
+	require.NoError(t, err)
+
+	// Server should send RFB version then security failure.
+	var version [12]byte
+	_, err = io.ReadFull(conn, version[:])
+	require.NoError(t, err)
+	assert.Equal(t, "RFB 003.008\n", string(version[:]))
+
+	// Write client version to proceed through handshake.
+	_, err = conn.Write(version[:])
+	require.NoError(t, err)
+
+	// Read security types: 0 means failure, followed by reason.
+	var numTypes [1]byte
+	_, err = io.ReadFull(conn, numTypes[:])
+	require.NoError(t, err)
+	assert.Equal(t, byte(0), numTypes[0], "should have 0 security types (failure)")
+
+	var reasonLen [4]byte
+	_, err = io.ReadFull(conn, reasonLen[:])
+	require.NoError(t, err)
+
+	reason := make([]byte, binary.BigEndian.Uint32(reasonLen[:]))
+	_, err = io.ReadFull(conn, reason)
+	require.NoError(t, err)
+	assert.Contains(t, string(reason), "identity provider", "rejection reason should mention missing IdP config")
+}
+
+func TestAuthDisabled_AllowsConnection(t *testing.T) {
+	addr, _ := startTestServer(t, true, nil)
+
+	conn, err := net.Dial("tcp", addr.String())
+	require.NoError(t, err)
+	defer conn.Close()
+
+	// Send session header: attach mode, no username, no JWT.
+	header := make([]byte, 13) // ModeAttach + usernameLen=0 + jwtLen=0 + sessionID=0 + width=0 + height=0
+	header[0] = ModeAttach
+	_, err = conn.Write(header)
+	require.NoError(t, err)
+
+	// Server should send RFB version.
+	var version [12]byte
+	_, err = io.ReadFull(conn, version[:])
+	require.NoError(t, err)
+	assert.Equal(t, "RFB 003.008\n", string(version[:]))
+
+	// Write client version.
+	_, err = conn.Write(version[:])
+	require.NoError(t, err)
+
+	// Should get security types (not 0 = failure).
+	var numTypes [1]byte
+	_, err = io.ReadFull(conn, numTypes[:])
+	require.NoError(t, err)
+	assert.NotEqual(t, byte(0), numTypes[0], "should have at least one security type (auth disabled)")
+}
+
+// TestAuthEnabled_InvalidJWT_RejectedBeforeRFB confirms the VNC server itself
+// (not just the JWT library) wires authentication into handleConnection. A
+// well-formed JWT-shaped token must hit the server's validation path and be
+// rejected with an AUTH_JWT_* reason, never reaching the RFB handshake.
+func TestAuthEnabled_InvalidJWT_RejectedBeforeRFB(t *testing.T) {
+	addr, _ := startTestServer(t, false, &JWTConfig{
+		Issuer:       "https://example.invalid",
+		KeysLocation: "https://example.invalid/.well-known/jwks.json",
+		Audiences:    []string{"test"},
+	})
+
+	// Three-segment "JWT" with bogus base64. The server's authenticateJWT path
+	// must catch this regardless of the IdP being unreachable.
+	bogusJWT := "abc.def.ghi"
+	header := make([]byte, 3+2+len(bogusJWT)+4+4)
+	header[0] = ModeAttach
+	binary.BigEndian.PutUint16(header[1:3], 0) // username len
+	binary.BigEndian.PutUint16(header[3:5], uint16(len(bogusJWT)))
+	copy(header[5:5+len(bogusJWT)], bogusJWT)
+
+	conn, err := net.Dial("tcp", addr.String())
+	require.NoError(t, err)
+	defer conn.Close()
+	require.NoError(t, conn.SetDeadline(time.Now().Add(10*time.Second)))
+
+	_, err = conn.Write(header)
+	require.NoError(t, err)
+
+	var version [12]byte
+	_, err = io.ReadFull(conn, version[:])
+	require.NoError(t, err)
+	_, err = conn.Write(version[:])
+	require.NoError(t, err)
+
+	var numTypes [1]byte
+	_, err = io.ReadFull(conn, numTypes[:])
+	require.NoError(t, err)
+	require.Equal(t, byte(0), numTypes[0], "must fail security negotiation")
+
+	var reasonLen [4]byte
+	_, err = io.ReadFull(conn, reasonLen[:])
+	require.NoError(t, err)
+	reason := make([]byte, binary.BigEndian.Uint32(reasonLen[:]))
+	_, err = io.ReadFull(conn, reason)
+	require.NoError(t, err)
+	// The reason must carry one of the server's AUTH_JWT_* codes, proving
+	// the rejection came from authenticateJWT in handleConnection.
+	r := string(reason)
+	hasJWTReject := false
+	for _, code := range []string{RejectCodeJWTInvalid, RejectCodeJWTExpired, RejectCodeAuthForbidden} {
+		if strings.Contains(r, code) {
+			hasJWTReject = true
+			break
+		}
+	}
+	assert.True(t, hasJWTReject, "reason %q must include an AUTH_JWT_* code", r)
+}
+
+// TestAuth_NoUnauthBytesPastHeader proves the server does not send any RFB
+// content to a connection that fails source validation. Specifically, the
+// server must close immediately and the client must see EOF before any RFB
+// version greeting is written.
+func TestAuth_NoUnauthBytesPastHeader(t *testing.T) {
+	srv := New(&testCapturer{}, &StubInputInjector{}, "")
+	srv.SetDisableAuth(true)
+	addr := netip.MustParseAddrPort("127.0.0.1:0")
+	// Tight overlay that excludes 127.0.0.0/8 and a non-loopback local IP, so
+	// the loopback short-circuit in isAllowedSource doesn't apply.
+	require.NoError(t, srv.Start(t.Context(), addr, netip.MustParsePrefix("10.99.0.0/16")))
+	srv.localAddr = netip.MustParseAddr("10.99.99.1")
+	t.Cleanup(func() { _ = srv.Stop() })
+
+	conn, err := net.Dial("tcp", srv.listener.Addr().String())
+	require.NoError(t, err)
+	defer conn.Close()
+	require.NoError(t, conn.SetDeadline(time.Now().Add(5*time.Second)))
+
+	// Reading even one byte must EOF: the source IP (127.0.0.1) is outside
+	// the configured overlay, so handleConnection closes before writing.
+	var b [1]byte
+	_, err = io.ReadFull(conn, b[:])
+	require.Error(t, err, "non-overlay client must see EOF, not an RFB greeting")
+}
+
+func TestAuthEnabled_EmptyJWT_Rejected(t *testing.T) {
+	// Auth enabled with a (bogus) JWT config: connections without JWT should be rejected.
+	addr, _ := startTestServer(t, false, &JWTConfig{
+		Issuer:       "https://example.com",
+		KeysLocation: "https://example.com/.well-known/jwks.json",
+		Audiences:    []string{"test"},
+	})
+
+	conn, err := net.Dial("tcp", addr.String())
+	require.NoError(t, err)
+	defer conn.Close()
+
+	// Send session header with empty JWT.
+	header := make([]byte, 13) // ModeAttach + usernameLen=0 + jwtLen=0 + sessionID=0 + width=0 + height=0
+	header[0] = ModeAttach
+	_, err = conn.Write(header)
+	require.NoError(t, err)
+
+	var version [12]byte
+	_, err = io.ReadFull(conn, version[:])
+	require.NoError(t, err)
+
+	_, err = conn.Write(version[:])
+	require.NoError(t, err)
+
+	var numTypes [1]byte
+	_, err = io.ReadFull(conn, numTypes[:])
+	require.NoError(t, err)
+	assert.Equal(t, byte(0), numTypes[0], "should reject with 0 security types")
+}
+
+func TestIsAllowedSource(t *testing.T) {
+	tests := []struct {
+		name      string
+		localAddr netip.Addr
+		network   netip.Prefix
+		remote    net.Addr
+		want      bool
+	}{
+		{
+			name:      "non-tcp address rejected",
+			localAddr: netip.MustParseAddr("10.99.99.1"),
+			network:   netip.MustParsePrefix("10.99.0.0/16"),
+			remote:    &net.UDPAddr{IP: net.ParseIP("10.99.99.2"), Port: 1234},
+			want:      false,
+		},
+		{
+			name:      "own IP rejected",
+			localAddr: netip.MustParseAddr("10.99.99.1"),
+			network:   netip.MustParsePrefix("10.99.0.0/16"),
+			remote:    &net.TCPAddr{IP: net.ParseIP("10.99.99.1"), Port: 5900},
+			want:      false,
+		},
+		{
+			name:      "non-overlay IP rejected",
+			localAddr: netip.MustParseAddr("10.99.99.1"),
+			network:   netip.MustParsePrefix("10.99.0.0/16"),
+			remote:    &net.TCPAddr{IP: net.ParseIP("192.168.1.1"), Port: 5900},
+			want:      false,
+		},
+		{
+			name:      "overlay IP allowed",
+			localAddr: netip.MustParseAddr("10.99.99.1"),
+			network:   netip.MustParsePrefix("10.99.0.0/16"),
+			remote:    &net.TCPAddr{IP: net.ParseIP("10.99.99.2"), Port: 5900},
+			want:      true,
+		},
+		{
+			name:      "v4-mapped v6 in overlay allowed (unmapped)",
+			localAddr: netip.MustParseAddr("10.99.99.1"),
+			network:   netip.MustParsePrefix("10.99.0.0/16"),
+			remote:    &net.TCPAddr{IP: net.ParseIP("::ffff:10.99.99.2"), Port: 5900},
+			want:      true,
+		},
+		{
+			name:      "loopback allowed only when local is loopback",
+			localAddr: netip.MustParseAddr("127.0.0.1"),
+			network:   netip.MustParsePrefix("127.0.0.0/8"),
+			remote:    &net.TCPAddr{IP: net.ParseIP("127.0.0.5"), Port: 5900},
+			want:      true,
+		},
+		{
+			name:      "invalid network rejected (fail-closed)",
+			localAddr: netip.MustParseAddr("10.99.99.1"),
+			network:   netip.Prefix{},
+			remote:    &net.TCPAddr{IP: net.ParseIP("10.99.99.2"), Port: 5900},
+			want:      false,
+		},
+	}
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			srv := New(&testCapturer{}, &StubInputInjector{}, "")
+			srv.localAddr = tc.localAddr
+			srv.network = tc.network
+			assert.Equal(t, tc.want, srv.isAllowedSource(tc.remote))
+		})
+	}
+}
+
+func TestStart_InvalidNetworkRejected(t *testing.T) {
+	srv := New(&testCapturer{}, &StubInputInjector{}, "")
+	addr := netip.MustParseAddrPort("127.0.0.1:0")
+	err := srv.Start(t.Context(), addr, netip.Prefix{})
+	require.Error(t, err, "Start must refuse an invalid overlay prefix")
+	assert.Contains(t, err.Error(), "invalid overlay network prefix")
+}
+
+func TestAgentToken_MismatchClosesConnection(t *testing.T) {
+	srv := New(&testCapturer{}, &StubInputInjector{}, "")
+	srv.SetDisableAuth(true)
+	srv.SetAgentToken("deadbeefcafebabe")
+
+	addr := netip.MustParseAddrPort("127.0.0.1:0")
+	network := netip.MustParsePrefix("127.0.0.0/8")
+	require.NoError(t, srv.Start(t.Context(), addr, network))
+	srv.localAddr = netip.MustParseAddr("10.99.99.1")
+	t.Cleanup(func() { _ = srv.Stop() })
+
+	conn, err := net.Dial("tcp", srv.listener.Addr().String())
+	require.NoError(t, err)
+	defer conn.Close()
+	require.NoError(t, conn.SetDeadline(time.Now().Add(10*time.Second)))
+
+	// Send a wrong token of the right length (8 bytes hex-decoded).
+	if _, err := conn.Write([]byte{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}); err != nil {
+		// Server may already have closed; either way the read below must EOF.
+		_ = err
+	}
+
+	// Server must close without sending the RFB greeting.
+	var version [12]byte
+	_, err = io.ReadFull(conn, version[:])
+	require.Error(t, err, "server must close the connection on bad agent token")
+}
+
+func TestAgentToken_MatchAllowsHandshake(t *testing.T) {
+	srv := New(&testCapturer{}, &StubInputInjector{}, "")
+	srv.SetDisableAuth(true)
+	const tokenHex = "deadbeefcafebabe"
+	srv.SetAgentToken(tokenHex)
+	token, err := hex.DecodeString(tokenHex)
+	require.NoError(t, err)
+
+	addr := netip.MustParseAddrPort("127.0.0.1:0")
+	network := netip.MustParsePrefix("127.0.0.0/8")
+	require.NoError(t, srv.Start(t.Context(), addr, network))
+	srv.localAddr = netip.MustParseAddr("10.99.99.1")
+	t.Cleanup(func() { _ = srv.Stop() })
+
+	conn, err := net.Dial("tcp", srv.listener.Addr().String())
+	require.NoError(t, err)
+	defer conn.Close()
+	require.NoError(t, conn.SetDeadline(time.Now().Add(10*time.Second)))
+
+	_, err = conn.Write(token)
+	require.NoError(t, err)
+
+	// Send session header so handleConnection can proceed past readConnectionHeader.
+	header := make([]byte, 13) // ModeAttach + usernameLen=0 + jwtLen=0 + sessionID=0 + width=0 + height=0
+	header[0] = ModeAttach
+	_, err = conn.Write(header)
+	require.NoError(t, err)
+
+	// With a matching token the server proceeds to the RFB greeting.
+	var version [12]byte
+	_, err = io.ReadFull(conn, version[:])
+	require.NoError(t, err, "server must keep the connection open after a valid agent token")
+	assert.Equal(t, "RFB 003.008\n", string(version[:]))
+}
+
+func TestSessionMode_RejectedWhenNoVMGR(t *testing.T) {
+	// Default platformSessionManager() on non-Linux returns nil, so ModeSession
+	// must be rejected with the UNSUPPORTED reason rather than crashing.
+	srv := New(&testCapturer{}, &StubInputInjector{}, "")
+	srv.SetDisableAuth(true)
+
+	addr := netip.MustParseAddrPort("127.0.0.1:0")
+	network := netip.MustParsePrefix("127.0.0.0/8")
+	require.NoError(t, srv.Start(t.Context(), addr, network))
+	srv.localAddr = netip.MustParseAddr("10.99.99.1")
+	// Force vmgr to nil regardless of platform so the test is deterministic.
+	srv.vmgr = nil
+	t.Cleanup(func() { _ = srv.Stop() })
+
+	conn, err := net.Dial("tcp", srv.listener.Addr().String())
+	require.NoError(t, err)
+	defer conn.Close()
+	require.NoError(t, conn.SetDeadline(time.Now().Add(10*time.Second)))
+
+	// ModeSession with no username/JWT, so we exit on the vmgr==nil branch
+	// before username validation runs.
+	header := []byte{ModeSession, 0, 0, 0, 0}
+	_, err = conn.Write(header)
+	require.NoError(t, err)
+
+	var version [12]byte
+	_, err = io.ReadFull(conn, version[:])
+	require.NoError(t, err)
+	_, err = conn.Write(version[:])
+	require.NoError(t, err)
+
+	var numTypes [1]byte
+	_, err = io.ReadFull(conn, numTypes[:])
+	require.NoError(t, err)
+	assert.Equal(t, byte(0), numTypes[0])
+
+	var reasonLen [4]byte
+	_, err = io.ReadFull(conn, reasonLen[:])
+	require.NoError(t, err)
+	reason := make([]byte, binary.BigEndian.Uint32(reasonLen[:]))
+	_, err = io.ReadFull(conn, reason)
+	require.NoError(t, err)
+	assert.Contains(t, string(reason), RejectCodeUnsupportedOS)
+}
--- a/client/vnc/server/server_windows.go
+++ b/client/vnc/server/server_windows.go
@@ -0,0 +1,312 @@
+//go:build windows
+
+package server
+
+import (
+	"bytes"
+	"context"
+	"fmt"
+	"io"
+	"net"
+	"unsafe"
+
+	log "github.com/sirupsen/logrus"
+	"golang.org/x/sys/windows"
+	"golang.org/x/sys/windows/registry"
+)
+
+var (
+	sasDLL      = windows.NewLazySystemDLL("sas.dll")
+	procSendSAS = sasDLL.NewProc("SendSAS")
+
+	procConvertStringSecurityDescriptorToSecurityDescriptor = advapi32.NewProc("ConvertStringSecurityDescriptorToSecurityDescriptorW")
+)
+
+// sasSecurityAttributes builds a SECURITY_ATTRIBUTES that grants
+// EVENT_MODIFY_STATE only to the SYSTEM account, preventing unprivileged
+// local processes from triggering the Secure Attention Sequence.
+func sasSecurityAttributes() (*windows.SecurityAttributes, error) {
+	// SDDL: grant full access to SYSTEM (creates/waits) and EVENT_MODIFY_STATE
+	// to the interactive user (IU) so the VNC agent in the console session can
+	// signal it. Other local users and network users are denied.
+	sddl, err := windows.UTF16PtrFromString("D:(A;;GA;;;SY)(A;;0x0002;;;IU)")
+	if err != nil {
+		return nil, err
+	}
+	var sd uintptr
+	r, _, lerr := procConvertStringSecurityDescriptorToSecurityDescriptor.Call(
+		uintptr(unsafe.Pointer(sddl)),
+		1, // SDDL_REVISION_1
+		uintptr(unsafe.Pointer(&sd)),
+		0,
+	)
+	if r == 0 {
+		return nil, lerr
+	}
+	return &windows.SecurityAttributes{
+		Length:             uint32(unsafe.Sizeof(windows.SecurityAttributes{})),
+		SecurityDescriptor: (*windows.SECURITY_DESCRIPTOR)(unsafe.Pointer(sd)),
+		InheritHandle:      0,
+	}, nil
+}
+
+// sasOriginalState tracks the SoftwareSASGeneration value present before we
+// changed it, so disableSoftwareSAS can restore the machine to its prior
+// state on shutdown instead of leaving the policy enabled.
+type sasOriginalState struct {
+	had   bool   // true if the value existed before we wrote
+	value uint32 // its prior DWORD value, if had == true
+}
+
+var savedSASState sasOriginalState
+
+// enableSoftwareSAS sets the SoftwareSASGeneration registry key to allow
+// services to trigger the Secure Attention Sequence via SendSAS. Without this,
+// SendSAS silently does nothing on most Windows editions. The original value
+// is snapshotted so disableSoftwareSAS can put the system back as it was.
+func enableSoftwareSAS() {
+	key, _, err := registry.CreateKey(
+		registry.LOCAL_MACHINE,
+		`SOFTWARE\Microsoft\Windows\CurrentVersion\Policies\System`,
+		registry.SET_VALUE|registry.QUERY_VALUE,
+	)
+	if err != nil {
+		log.Warnf("open SoftwareSASGeneration registry key: %v", err)
+		return
+	}
+	defer key.Close()
+
+	if prev, _, err := key.GetIntegerValue("SoftwareSASGeneration"); err == nil {
+		savedSASState = sasOriginalState{had: true, value: uint32(prev)}
+	} else {
+		savedSASState = sasOriginalState{had: false}
+	}
+
+	if err := key.SetDWordValue("SoftwareSASGeneration", 1); err != nil {
+		log.Warnf("set SoftwareSASGeneration: %v", err)
+		return
+	}
+	log.Debug("SoftwareSASGeneration registry key set to 1 (services allowed)")
+}
+
+// disableSoftwareSAS restores the SoftwareSASGeneration value to its
+// pre-enable state. Idempotent; safe to call when enableSoftwareSAS never ran.
+func disableSoftwareSAS() {
+	key, err := registry.OpenKey(
+		registry.LOCAL_MACHINE,
+		`SOFTWARE\Microsoft\Windows\CurrentVersion\Policies\System`,
+		registry.SET_VALUE,
+	)
+	if err != nil {
+		log.Debugf("open SoftwareSASGeneration for restore: %v", err)
+		return
+	}
+	defer key.Close()
+
+	if savedSASState.had {
+		if err := key.SetDWordValue("SoftwareSASGeneration", savedSASState.value); err != nil {
+			log.Warnf("restore SoftwareSASGeneration to %d: %v", savedSASState.value, err)
+		}
+		return
+	}
+	if err := key.DeleteValue("SoftwareSASGeneration"); err != nil {
+		log.Debugf("delete SoftwareSASGeneration: %v", err)
+	}
+}
+
+// startSASListener creates a named event with a restricted DACL and waits for
+// the VNC input injector to signal it. When signaled, it calls SendSAS(FALSE)
+// from Session 0 to trigger the Secure Attention Sequence (Ctrl+Alt+Del).
+// Only SYSTEM processes can open the event.
+//
+// sas.dll / SendSAS is part of the Desktop Experience feature: present on
+// client SKUs (Win10/11) and Server SKUs with Desktop Experience installed,
+// missing on Server Core. We probe for the symbol at startup; if absent we
+// don't register the listener and the agent will silently drop SAS keysyms,
+// rather than panicking the entire service every time the user clicks
+// Ctrl+Alt+Del.
+func startSASListener(ctx context.Context) {
+	ev, ok := createSASEvent()
+	if !ok {
+		return
+	}
+	log.Info("SAS listener ready (Session 0)")
+	go runSASListenerLoop(ctx, ev)
+}
+
+// createSASEvent prepares the named event handle on which the SAS listener
+// waits for client signals. Returns ok=false (with the failure already
+// logged) when the platform doesn't support SAS or the event cannot be
+// created; the caller must not spawn the listener goroutine in that case.
+func createSASEvent() (windows.Handle, bool) {
+	if err := procSendSAS.Find(); err != nil {
+		log.Warnf("SAS unavailable on this Windows SKU (sas.dll/SendSAS not present): %v", err)
+		return 0, false
+	}
+	enableSoftwareSAS()
+	namePtr, err := windows.UTF16PtrFromString(sasEventName)
+	if err != nil {
+		log.Warnf("SAS listener UTF16: %v", err)
+		return 0, false
+	}
+	sa, err := sasSecurityAttributes()
+	if err != nil {
+		log.Warnf("build SAS security descriptor: %v", err)
+		return 0, false
+	}
+	ev, err := windows.CreateEvent(sa, 0, 0, namePtr)
+	if err != nil {
+		log.Warnf("SAS CreateEvent: %v", err)
+		return 0, false
+	}
+	return ev, true
+}
+
+// runSASListenerLoop blocks on ev and invokes SendSAS each time it is
+// signalled, until ctx is cancelled. Recovers from panics inside SendSAS so
+// a future ABI surprise doesn't tear down the service.
+func runSASListenerLoop(ctx context.Context, ev windows.Handle) {
+	defer windows.CloseHandle(ev)
+	defer func() {
+		if r := recover(); r != nil {
+			log.Warnf("SAS listener recovered from panic: %v", r)
+		}
+	}()
+	const pollMillis = 500
+	for {
+		if ctx.Err() != nil {
+			return
+		}
+		ret, _ := windows.WaitForSingleObject(ev, pollMillis)
+		if ret != windows.WAIT_OBJECT_0 {
+			continue
+		}
+		r, _, sasErr := procSendSAS.Call(0) // FALSE = not from service desktop
+		if r == 0 {
+			log.Warnf("SendSAS: %v", sasErr)
+			continue
+		}
+		log.Info("SendSAS called from Session 0")
+	}
+}
+
+// enablePrivilege enables a named privilege on the current process token.
+func enablePrivilege(name string) error {
+	var token windows.Token
+	if err := windows.OpenProcessToken(windows.CurrentProcess(),
+		windows.TOKEN_ADJUST_PRIVILEGES|windows.TOKEN_QUERY, &token); err != nil {
+		return err
+	}
+	defer token.Close()
+
+	var luid windows.LUID
+	namePtr, err := windows.UTF16PtrFromString(name)
+	if err != nil {
+		return fmt.Errorf("UTF16 privilege name: %w", err)
+	}
+	if err := windows.LookupPrivilegeValue(nil, namePtr, &luid); err != nil {
+		return err
+	}
+	tp := windows.Tokenprivileges{PrivilegeCount: 1}
+	tp.Privileges[0].Luid = luid
+	tp.Privileges[0].Attributes = windows.SE_PRIVILEGE_ENABLED
+	return windows.AdjustTokenPrivileges(token, false, &tp, 0, nil, nil)
+}
+
+func (s *Server) platformSessionManager() virtualSessionManager {
+	return nil
+}
+
+// platformShutdown restores any machine state mutated by platformInit.
+func (s *Server) platformShutdown() {
+	disableSoftwareSAS()
+}
+
+// platformInit starts the SAS listener and enables privileges needed for
+// Session 0 operations (agent spawning, SendSAS).
+func (s *Server) platformInit() {
+	for _, priv := range []string{"SeTcbPrivilege", "SeAssignPrimaryTokenPrivilege"} {
+		if err := enablePrivilege(priv); err != nil {
+			log.Debugf("enable %s: %v", priv, err)
+		}
+	}
+	startSASListener(s.ctx)
+}
+
+// serviceAcceptLoop runs in Session 0. It validates source IP and
+// authenticates via JWT before proxying connections to the user-session agent.
+func (s *Server) serviceAcceptLoop() {
+
+	sm := newSessionManager(agentPort)
+	go sm.run()
+
+	log.Infof("service mode, proxying connections to agent on 127.0.0.1:%s", agentPort)
+
+	for {
+		conn, err := s.listener.Accept()
+		if err != nil {
+			select {
+			case <-s.ctx.Done():
+				sm.Stop()
+				return
+			default:
+			}
+			s.log.Debugf("accept VNC connection: %v", err)
+			continue
+		}
+
+		go s.handleServiceConnection(conn, sm)
+	}
+}
+
+// handleServiceConnection validates the source IP and JWT, then proxies
+// the connection (with header bytes replayed) to the agent.
+func (s *Server) handleServiceConnection(conn net.Conn, sm *sessionManager) {
+	connLog := s.log.WithField("remote", conn.RemoteAddr().String())
+
+	if !s.isAllowedSource(conn.RemoteAddr()) {
+		conn.Close()
+		return
+	}
+
+	var headerBuf bytes.Buffer
+	tee := io.TeeReader(conn, &headerBuf)
+	teeConn := &prefixConn{Reader: tee, Conn: conn}
+
+	header, err := readConnectionHeader(teeConn)
+	if err != nil {
+		connLog.Debugf("read connection header: %v", err)
+		conn.Close()
+		return
+	}
+
+	if !s.disableAuth {
+		if s.jwtConfig == nil {
+			rejectConnection(conn, codeMessage(RejectCodeAuthConfig, "auth enabled but no identity provider configured"))
+			connLog.Warn("auth rejected: no identity provider configured")
+			return
+		}
+		if _, err := s.authenticateJWT(header); err != nil {
+			rejectConnection(conn, codeMessage(jwtErrorCode(err), err.Error()))
+			connLog.Warnf("auth rejected: %v", err)
+			return
+		}
+	}
+
+	// Replay buffered header bytes + remaining stream to the agent.
+	replayConn := &prefixConn{
+		Reader: io.MultiReader(&headerBuf, conn),
+		Conn:   conn,
+	}
+	proxyToAgent(replayConn, agentPort, sm.AuthToken())
+}
+
+// prefixConn wraps a net.Conn, overriding Read to use a different reader.
+type prefixConn struct {
+	io.Reader
+	net.Conn
+}
+
+func (p *prefixConn) Read(b []byte) (int, error) {
+	return p.Reader.Read(b)
+}
--- a/client/vnc/server/server_x11.go
+++ b/client/vnc/server/server_x11.go
@@ -0,0 +1,21 @@
+//go:build (linux && !android) || freebsd
+
+package server
+
+func (s *Server) platformInit() {
+	// no-op on X11
+}
+
+// serviceAcceptLoop is not supported on Linux.
+func (s *Server) serviceAcceptLoop() {
+	s.log.Warn("service mode not supported on Linux, falling back to direct mode")
+	s.acceptLoop()
+}
+
+func (s *Server) platformSessionManager() virtualSessionManager {
+	return newSessionManager(s.log)
+}
+
+func (s *Server) platformShutdown() {
+	// no-op on this platform
+}
--- a/client/vnc/server/session.go
+++ b/client/vnc/server/session.go
@@ -0,0 +1,672 @@
+package server
+
+import (
+	"bytes"
+	"crypto/rand"
+	"encoding/binary"
+	"errors"
+	"fmt"
+	"image"
+	"io"
+	"net"
+	"strings"
+	"sync"
+	"time"
+
+	log "github.com/sirupsen/logrus"
+)
+
+const (
+	readDeadline    = 60 * time.Second
+	maxCutTextBytes = 1 << 20 // 1 MiB
+)
+
+const tileSize = 64 // pixels per tile for dirty-rect detection
+
+// fullFramePromoteNum/Den trigger full-frame encoding when the dirty area
+// exceeds num/den of the screen. Once past the crossover (benchmarks put it
+// around 60% at 1080p) a single zlib rect is faster than many per-tile
+// encodes AND produces about the same wire bytes: the per-tile path keeps
+// restarting zlib dictionaries and re-emitting rect headers.
+const (
+	fullFramePromoteNum = 60
+	fullFramePromoteDen = 100
+)
+
+type session struct {
+	conn     net.Conn
+	capturer ScreenCapturer
+	injector InputInjector
+	serverW  int
+	serverH  int
+	password string
+	log      *log.Entry
+
+	writeMu sync.Mutex
+	// pf and useZlib/zlib are written by messageLoop before the first FB
+	// update request arrives (SetPixelFormat/SetEncodings happen during the
+	// client handshake), and only read from the encoder goroutine. Fine
+	// without locks because of that ordering invariant.
+	pf         clientPixelFormat
+	useZlib    bool
+	useHextile bool
+	useTight   bool
+	zlib       *zlibState
+	tight      *tightState
+	// prevFrame, curFrame and idleFrames live on the encoder goroutine and
+	// must not be touched elsewhere. curFrame holds a session-owned copy of
+	// the capturer's latest frame so the encoder works on a stable buffer
+	// even when the capturer double-buffers and recycles memory underneath.
+	prevFrame  *image.RGBA
+	curFrame   *image.RGBA
+	idleFrames int
+
+	// encodeCh carries framebuffer-update requests from the read loop to the
+	// encoder goroutine. Buffered size 1: RFB clients have one outstanding
+	// request at a time, so a new request always replaces any pending one.
+	encodeCh chan fbRequest
+}
+
+type fbRequest struct {
+	incremental bool
+}
+
+func (s *session) addr() string { return s.conn.RemoteAddr().String() }
+
+// serve runs the full RFB session lifecycle.
+func (s *session) serve() {
+	defer s.conn.Close()
+	s.pf = defaultClientPixelFormat()
+	s.encodeCh = make(chan fbRequest, 1)
+
+	if err := s.handshake(); err != nil {
+		s.log.Warnf("handshake with %s: %v", s.addr(), err)
+		return
+	}
+	s.log.Infof("client connected: %s", s.addr())
+
+	done := make(chan struct{})
+	defer close(done)
+	go s.clipboardPoll(done)
+
+	encoderDone := make(chan struct{})
+	go s.encoderLoop(encoderDone)
+	defer func() {
+		close(s.encodeCh)
+		<-encoderDone
+	}()
+
+	if err := s.messageLoop(); err != nil && err != io.EOF {
+		s.log.Warnf("client %s disconnected: %v", s.addr(), err)
+	} else {
+		s.log.Infof("client disconnected: %s", s.addr())
+	}
+}
+
+// clipboardPoll periodically checks the server-side clipboard and sends
+// changes to the VNC client. Only runs during active sessions.
+func (s *session) clipboardPoll(done <-chan struct{}) {
+	ticker := time.NewTicker(2 * time.Second)
+	defer ticker.Stop()
+
+	var lastClip string
+	for {
+		select {
+		case <-done:
+			return
+		case <-ticker.C:
+			text := s.injector.GetClipboard()
+			if len(text) > maxCutTextBytes {
+				text = text[:maxCutTextBytes]
+			}
+			if text != "" && text != lastClip {
+				lastClip = text
+				if err := s.sendServerCutText(text); err != nil {
+					s.log.Debugf("send clipboard to client: %v", err)
+					return
+				}
+			}
+		}
+	}
+}
+
+func (s *session) handshake() error {
+	// Send protocol version.
+	if _, err := io.WriteString(s.conn, rfbProtocolVersion); err != nil {
+		return fmt.Errorf("send version: %w", err)
+	}
+
+	// Read client version.
+	var clientVer [12]byte
+	if _, err := io.ReadFull(s.conn, clientVer[:]); err != nil {
+		return fmt.Errorf("read client version: %w", err)
+	}
+
+	// Send supported security types.
+	if err := s.sendSecurityTypes(); err != nil {
+		return err
+	}
+
+	// Read chosen security type.
+	var secType [1]byte
+	if _, err := io.ReadFull(s.conn, secType[:]); err != nil {
+		return fmt.Errorf("read security type: %w", err)
+	}
+
+	if err := s.handleSecurity(secType[0]); err != nil {
+		return err
+	}
+
+	// Read ClientInit.
+	var clientInit [1]byte
+	if _, err := io.ReadFull(s.conn, clientInit[:]); err != nil {
+		return fmt.Errorf("read ClientInit: %w", err)
+	}
+
+	return s.sendServerInit()
+}
+
+func (s *session) sendSecurityTypes() error {
+	if s.password == "" {
+		_, err := s.conn.Write([]byte{1, secNone})
+		return err
+	}
+	_, err := s.conn.Write([]byte{1, secVNCAuth})
+	return err
+}
+
+func (s *session) handleSecurity(secType byte) error {
+	switch secType {
+	case secVNCAuth:
+		return s.doVNCAuth()
+	case secNone:
+		return binary.Write(s.conn, binary.BigEndian, uint32(0))
+	default:
+		return fmt.Errorf("unsupported security type: %d", secType)
+	}
+}
+
+func (s *session) doVNCAuth() error {
+	challenge := make([]byte, 16)
+	if _, err := rand.Read(challenge); err != nil {
+		return fmt.Errorf("generate challenge: %w", err)
+	}
+	if _, err := s.conn.Write(challenge); err != nil {
+		return fmt.Errorf("send challenge: %w", err)
+	}
+
+	response := make([]byte, 16)
+	if _, err := io.ReadFull(s.conn, response); err != nil {
+		return fmt.Errorf("read auth response: %w", err)
+	}
+
+	var result uint32
+	if s.password != "" {
+		expected, err := vncAuthEncrypt(challenge, s.password)
+		if err != nil {
+			return fmt.Errorf("vnc auth encrypt: %w", err)
+		}
+		if !bytes.Equal(expected, response) {
+			result = 1
+		}
+	}
+
+	if err := binary.Write(s.conn, binary.BigEndian, result); err != nil {
+		return fmt.Errorf("send auth result: %w", err)
+	}
+	if result != 0 {
+		msg := "authentication failed"
+		_ = binary.Write(s.conn, binary.BigEndian, uint32(len(msg)))
+		_, _ = s.conn.Write([]byte(msg))
+		return fmt.Errorf("authentication failed from %s", s.addr())
+	}
+	return nil
+}
+
+func (s *session) sendServerInit() error {
+	name := []byte("NetBird VNC")
+	buf := make([]byte, 0, 4+16+4+len(name))
+
+	// Framebuffer width and height.
+	buf = append(buf, byte(s.serverW>>8), byte(s.serverW))
+	buf = append(buf, byte(s.serverH>>8), byte(s.serverH))
+
+	// Server pixel format.
+	buf = append(buf, serverPixelFormat[:]...)
+
+	// Desktop name.
+	buf = append(buf,
+		byte(len(name)>>24), byte(len(name)>>16),
+		byte(len(name)>>8), byte(len(name)),
+	)
+	buf = append(buf, name...)
+
+	_, err := s.conn.Write(buf)
+	return err
+}
+
+func (s *session) messageLoop() error {
+	for {
+		var msgType [1]byte
+		if err := s.conn.SetDeadline(time.Now().Add(readDeadline)); err != nil {
+			return fmt.Errorf("set deadline: %w", err)
+		}
+		if _, err := io.ReadFull(s.conn, msgType[:]); err != nil {
+			return err
+		}
+
+		var err error
+		switch msgType[0] {
+		case clientSetPixelFormat:
+			err = s.handleSetPixelFormat()
+		case clientSetEncodings:
+			err = s.handleSetEncodings()
+		case clientFramebufferUpdateRequest:
+			err = s.handleFBUpdateRequest()
+		case clientKeyEvent:
+			err = s.handleKeyEvent()
+		case clientPointerEvent:
+			err = s.handlePointerEvent()
+		case clientCutText:
+			err = s.handleCutText()
+		case clientNetbirdTypeText:
+			err = s.handleTypeText()
+		default:
+			return fmt.Errorf("unknown client message type: %d", msgType[0])
+		}
+		// Clear the deadline only after the full message has been read and
+		// processed so payload reads in the handlers stay bounded.
+		_ = s.conn.SetDeadline(time.Time{})
+		if err != nil {
+			return err
+		}
+	}
+}
+
+func (s *session) handleSetPixelFormat() error {
+	var buf [19]byte // 3 padding + 16 pixel format
+	if _, err := io.ReadFull(s.conn, buf[:]); err != nil {
+		return fmt.Errorf("read SetPixelFormat: %w", err)
+	}
+	s.pf = parsePixelFormat(buf[3:19])
+	return nil
+}
+
+func (s *session) handleSetEncodings() error {
+	var header [3]byte // 1 padding + 2 number-of-encodings
+	if _, err := io.ReadFull(s.conn, header[:]); err != nil {
+		return fmt.Errorf("read SetEncodings header: %w", err)
+	}
+	numEnc := binary.BigEndian.Uint16(header[1:3])
+	// RFB clients advertise a handful of real encodings plus pseudo-encodings.
+	// Cap to keep a malicious client from forcing a 256 KiB allocation per
+	// SetEncodings message.
+	const maxEncodings = 64
+	if numEnc > maxEncodings {
+		return fmt.Errorf("SetEncodings: too many encodings (%d)", numEnc)
+	}
+	buf := make([]byte, int(numEnc)*4)
+	if _, err := io.ReadFull(s.conn, buf); err != nil {
+		return err
+	}
+
+	var encs []string
+	for i := range int(numEnc) {
+		enc := int32(binary.BigEndian.Uint32(buf[i*4 : i*4+4]))
+		switch enc {
+		case encZlib:
+			s.useZlib = true
+			if s.zlib == nil {
+				s.zlib = newZlibState()
+			}
+			encs = append(encs, "zlib")
+		case encHextile:
+			s.useHextile = true
+			encs = append(encs, "hextile")
+		case encTight:
+			s.useTight = true
+			if s.tight == nil {
+				s.tight = newTightState()
+			}
+			encs = append(encs, "tight")
+		}
+	}
+	if len(encs) > 0 {
+		s.log.Debugf("client supports encodings: %s", strings.Join(encs, ", "))
+	}
+	return nil
+}
+
+// handleFBUpdateRequest parses the request and hands it to the encoder
+// goroutine. It never blocks on capture/encode, so the input dispatch loop
+// stays responsive even when a previous frame is still being encoded.
+func (s *session) handleFBUpdateRequest() error {
+	var req [9]byte
+	if _, err := io.ReadFull(s.conn, req[:]); err != nil {
+		return fmt.Errorf("read FBUpdateRequest: %w", err)
+	}
+	r := fbRequest{incremental: req[0] == 1}
+	// Channel is size 1. If a request is already pending, replace it with
+	// this fresher one so the encoder always works on the latest ask.
+	select {
+	case s.encodeCh <- r:
+	default:
+		select {
+		case <-s.encodeCh:
+		default:
+		}
+		select {
+		case s.encodeCh <- r:
+		default:
+		}
+	}
+	return nil
+}
+
+// encoderLoop owns the capture → diff → encode → write pipeline. Running it
+// off the read loop prevents a slow encode (zlib full-frame, many dirty
+// tiles) from blocking inbound input events.
+func (s *session) encoderLoop(done chan<- struct{}) {
+	defer close(done)
+	for req := range s.encodeCh {
+		if err := s.processFBRequest(req); err != nil {
+			s.log.Debugf("encode: %v", err)
+			// On write/capture error, close the connection so messageLoop
+			// exits and the session terminates cleanly.
+			s.conn.Close()
+			drainRequests(s.encodeCh)
+			return
+		}
+	}
+}
+
+func (s *session) processFBRequest(req fbRequest) error {
+	img, err := s.captureFrame()
+	if errors.Is(err, errFrameUnchanged) {
+		// macOS hashes the raw capture bytes and short-circuits when the
+		// screen is byte-identical. Treat as "no dirty rects" to skip the
+		// diff and send an empty update.
+		s.idleFrames++
+		delay := min(s.idleFrames*5, 100)
+		time.Sleep(time.Duration(delay) * time.Millisecond)
+		return s.sendEmptyUpdate()
+	}
+	if err != nil {
+		// Capture failures are transient on Windows: a Ctrl+Alt+Del or
+		// sign-out switches the OS to the secure desktop, and the DXGI
+		// duplicator on the previous desktop returns an error until the
+		// capturer reattaches on the new desktop. Don't tear down the
+		// session. Back off briefly and reply with an empty update so
+		// the client keeps re-requesting.
+		s.log.Debugf("capture (transient): %v", err)
+		time.Sleep(100 * time.Millisecond)
+		return s.sendEmptyUpdate()
+	}
+
+	if req.incremental && s.prevFrame != nil {
+		rects := diffRects(s.prevFrame, img, s.serverW, s.serverH, tileSize)
+		if len(rects) == 0 {
+			// Nothing changed. Back off briefly before responding to reduce
+			// CPU usage when the screen is static. The client re-requests
+			// immediately after receiving our empty response, so without
+			// this delay we'd spin at ~1000fps checking for changes.
+			s.idleFrames++
+			delay := min(s.idleFrames*5, 100) // 5ms → 100ms adaptive backoff
+			time.Sleep(time.Duration(delay) * time.Millisecond)
+			s.swapPrevCur()
+			return s.sendEmptyUpdate()
+		}
+		s.idleFrames = 0
+		if s.shouldPromoteToFullFrame(rects) {
+			if err := s.sendFullUpdate(img); err != nil {
+				return err
+			}
+			s.swapPrevCur()
+			return nil
+		}
+		if err := s.sendDirtyRects(img, rects); err != nil {
+			return err
+		}
+		s.swapPrevCur()
+		return nil
+	}
+
+	// Full update.
+	s.idleFrames = 0
+	if err := s.sendFullUpdate(img); err != nil {
+		return err
+	}
+	s.swapPrevCur()
+	return nil
+}
+
+// captureFrame returns a session-owned frame for this encode cycle.
+// Capturers that implement captureIntoer (Linux X11, macOS) write directly
+// into curFrame, saving a per-frame full-screen memcpy. Capturers that
+// don't (Windows DXGI) return their own buffer which we copy into curFrame
+// to keep the encoder's prevFrame stable across the next capture cycle.
+func (s *session) captureFrame() (*image.RGBA, error) {
+	w, h := s.serverW, s.serverH
+	if s.curFrame == nil || s.curFrame.Rect.Dx() != w || s.curFrame.Rect.Dy() != h {
+		s.curFrame = image.NewRGBA(image.Rect(0, 0, w, h))
+	}
+
+	if ci, ok := s.capturer.(captureIntoer); ok {
+		if err := ci.CaptureInto(s.curFrame); err != nil {
+			return nil, err
+		}
+		return s.curFrame, nil
+	}
+
+	src, err := s.capturer.Capture()
+	if err != nil {
+		return nil, err
+	}
+	if s.curFrame.Rect != src.Rect {
+		s.curFrame = image.NewRGBA(src.Rect)
+	}
+	copy(s.curFrame.Pix, src.Pix)
+	return s.curFrame, nil
+}
+
+// shouldPromoteToFullFrame returns true when the dirty rect set covers a
+// large enough fraction of the screen that a single full-frame zlib rect
+// beats per-tile encoding on both CPU time and wire bytes. The crossover
+// is measured via BenchmarkEncodeManyTilesVsFullFrame.
+func (s *session) shouldPromoteToFullFrame(rects [][4]int) bool {
+	if s.serverW == 0 || s.serverH == 0 {
+		return false
+	}
+	var dirty int
+	for _, r := range rects {
+		dirty += r[2] * r[3]
+	}
+	return dirty*fullFramePromoteDen > s.serverW*s.serverH*fullFramePromoteNum
+}
+
+// swapPrevCur makes the just-encoded frame the new prevFrame (for the next
+// diff) and lets the old prevFrame buffer become the next curFrame. Avoids
+// an 8 MB copy per frame compared to the old savePrevFrame path.
+func (s *session) swapPrevCur() {
+	s.prevFrame, s.curFrame = s.curFrame, s.prevFrame
+}
+
+// sendEmptyUpdate sends a FramebufferUpdate with zero rectangles.
+func (s *session) sendEmptyUpdate() error {
+	var buf [4]byte
+	buf[0] = serverFramebufferUpdate
+	s.writeMu.Lock()
+	_, err := s.conn.Write(buf[:])
+	s.writeMu.Unlock()
+	return err
+}
+
+func (s *session) sendFullUpdate(img *image.RGBA) error {
+	w, h := s.serverW, s.serverH
+
+	var buf []byte
+	if s.useZlib && s.zlib != nil {
+		buf = encodeZlibRect(img, s.pf, 0, 0, w, h, s.zlib)
+	} else {
+		buf = encodeRawRect(img, s.pf, 0, 0, w, h)
+	}
+
+	s.writeMu.Lock()
+	_, err := s.conn.Write(buf)
+	s.writeMu.Unlock()
+	return err
+}
+
+func (s *session) sendDirtyRects(img *image.RGBA, rects [][4]int) error {
+	// Build a multi-rectangle FramebufferUpdate.
+	// Header: type(1) + padding(1) + numRects(2)
+	header := make([]byte, 4)
+	header[0] = serverFramebufferUpdate
+	binary.BigEndian.PutUint16(header[2:4], uint16(len(rects)))
+
+	s.writeMu.Lock()
+	defer s.writeMu.Unlock()
+
+	if _, err := s.conn.Write(header); err != nil {
+		return err
+	}
+
+	for _, r := range rects {
+		x, y, w, h := r[0], r[1], r[2], r[3]
+		rectBuf := s.encodeTile(img, x, y, w, h)
+		if _, err := s.conn.Write(rectBuf); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+// encodeTile produces the on-wire rect bytes for a single dirty tile,
+// picking the cheapest encoding available:
+//   - Hextile SolidFill when the tile is a single colour (~20 bytes for a
+//     64×64 tile instead of ~1-2 KB zlib, ~16 KB raw).
+//   - Zlib when the client negotiated it.
+//   - Raw otherwise.
+//
+// Output omits the 4-byte FramebufferUpdate header; callers combine multiple
+// tiles into one message.
+func (s *session) encodeTile(img *image.RGBA, x, y, w, h int) []byte {
+	if s.useHextile {
+		if pixel, uniform := tileIsUniform(img, x, y, w, h); uniform {
+			r := byte(pixel)
+			g := byte(pixel >> 8)
+			b := byte(pixel >> 16)
+			return encodeHextileSolidRect(r, g, b, s.pf, rect{x, y, w, h})
+		}
+		// Full Hextile encoder disabled pending investigation of 16×16
+		// red-tile artifacts on Windows. Solid-fill fast path is safe.
+	}
+	// Larger merged rects: prefer Tight (JPEG for photo-like, Basic+zlib
+	// otherwise) when the client supports it AND the negotiated format is
+	// compatible with Tight's mandatory 24-bit RGB TPIXEL encoding. Tight is
+	// dramatically better than RFB Zlib on photographic content and
+	// competitive on UI.
+	if s.useTight && s.tight != nil && pfIsTightCompatible(s.pf) {
+		return encodeTightRect(img, s.pf, x, y, w, h, s.tight)
+	}
+	if s.useZlib && s.zlib != nil {
+		return encodeZlibRect(img, s.pf, x, y, w, h, s.zlib)[4:]
+	}
+	return encodeRawRect(img, s.pf, x, y, w, h)[4:]
+}
+
+func (s *session) handleKeyEvent() error {
+	var data [7]byte
+	if _, err := io.ReadFull(s.conn, data[:]); err != nil {
+		return fmt.Errorf("read KeyEvent: %w", err)
+	}
+	down := data[0] == 1
+	keysym := binary.BigEndian.Uint32(data[3:7])
+	s.injector.InjectKey(keysym, down)
+	return nil
+}
+
+func (s *session) handlePointerEvent() error {
+	var data [5]byte
+	if _, err := io.ReadFull(s.conn, data[:]); err != nil {
+		return fmt.Errorf("read PointerEvent: %w", err)
+	}
+	buttonMask := data[0]
+	x := int(binary.BigEndian.Uint16(data[1:3]))
+	y := int(binary.BigEndian.Uint16(data[3:5]))
+	s.injector.InjectPointer(buttonMask, x, y, s.serverW, s.serverH)
+	return nil
+}
+
+func (s *session) handleCutText() error {
+	var header [7]byte // 3 padding + 4 length
+	if _, err := io.ReadFull(s.conn, header[:]); err != nil {
+		return fmt.Errorf("read CutText header: %w", err)
+	}
+	length := binary.BigEndian.Uint32(header[3:7])
+	if length > maxCutTextBytes {
+		return fmt.Errorf("cut text too large: %d bytes", length)
+	}
+	buf := make([]byte, length)
+	if _, err := io.ReadFull(s.conn, buf); err != nil {
+		return fmt.Errorf("read CutText payload: %w", err)
+	}
+	s.injector.SetClipboard(string(buf))
+	return nil
+}
+
+// handleTypeText handles the NetBird-specific PasteAndType message used by
+// the dashboard's Paste button. Wire format mirrors CutText: 3-byte
+// padding + 4-byte length + text bytes.
+func (s *session) handleTypeText() error {
+	var header [7]byte
+	if _, err := io.ReadFull(s.conn, header[:]); err != nil {
+		return fmt.Errorf("read TypeText header: %w", err)
+	}
+	length := binary.BigEndian.Uint32(header[3:7])
+	if length > maxCutTextBytes {
+		return fmt.Errorf("type text too large: %d bytes", length)
+	}
+	buf := make([]byte, length)
+	if _, err := io.ReadFull(s.conn, buf); err != nil {
+		return fmt.Errorf("read TypeText payload: %w", err)
+	}
+	s.injector.TypeText(string(buf))
+	return nil
+}
+
+// sendServerCutText sends clipboard text from the server to the client.
+func (s *session) sendServerCutText(text string) error {
+	data := []byte(text)
+	buf := make([]byte, 8+len(data))
+	buf[0] = serverCutText
+	// buf[1:4] = padding (zero)
+	binary.BigEndian.PutUint32(buf[4:8], uint32(len(data)))
+	copy(buf[8:], data)
+
+	s.writeMu.Lock()
+	_, err := s.conn.Write(buf)
+	s.writeMu.Unlock()
+	return err
+}
+
+// drainRequests consumes any pending requests so the sender's close completes
+// cleanly after the encoder loop has decided to exit on error. Returns the
+// number of drained requests to defeat empty-block lints; callers ignore it.
+func drainRequests(ch chan fbRequest) int {
+	var drained int
+	for range ch {
+		drained++
+	}
+	return drained
+}
+
+// pfIsTightCompatible reports whether the negotiated client pixel format
+// matches Tight's TPIXEL constraint: 32 bpp true colour with 8-bit RGB
+// channels at standard shifts (R=16, G=8, B=0). For anything else we fall
+// back to Zlib/Hextile/Raw which respect pf in full.
+func pfIsTightCompatible(pf clientPixelFormat) bool {
+	return pf.bpp == 32 &&
+		pf.rMax == 255 && pf.gMax == 255 && pf.bMax == 255 &&
+		pf.rShift == 16 && pf.gShift == 8 && pf.bShift == 0
+}
--- a/client/vnc/server/shutdown_state.go
+++ b/client/vnc/server/shutdown_state.go
@@ -0,0 +1,80 @@
+//go:build unix
+
+package server
+
+import (
+	"fmt"
+	"os"
+	"strings"
+	"syscall"
+
+	log "github.com/sirupsen/logrus"
+)
+
+// ShutdownState tracks VNC virtual session processes for crash recovery.
+// Persisted by the state manager; on restart, residual processes are killed.
+type ShutdownState struct {
+	// Processes maps a description to its PID (e.g., "xvfb:50" -> 1234).
+	Processes map[string]int `json:"processes,omitempty"`
+}
+
+// Name returns the state name for the state manager.
+func (s *ShutdownState) Name() string {
+	return "vnc_sessions_state"
+}
+
+// Cleanup kills any residual VNC session processes left from a crash.
+func (s *ShutdownState) Cleanup() error {
+	if len(s.Processes) == 0 {
+		return nil
+	}
+
+	for desc, pid := range s.Processes {
+		if pid <= 0 {
+			continue
+		}
+		if !isOurProcess(pid, desc) {
+			log.Debugf("cleanup:skipping PID %d (%s), not ours", pid, desc)
+			continue
+		}
+		log.Infof("cleanup:killing residual process %d (%s)", pid, desc)
+		// Kill the process group (negative PID) to get children too.
+		if err := syscall.Kill(-pid, syscall.SIGTERM); err != nil {
+			// Try individual process if group kill fails.
+			if killErr := syscall.Kill(pid, syscall.SIGKILL); killErr != nil {
+				log.Debugf("cleanup: kill pid %d (%s): group kill: %v, single kill: %v", pid, desc, err, killErr)
+			}
+		}
+	}
+
+	s.Processes = nil
+	return nil
+}
+
+// isOurProcess verifies the PID still belongs to a VNC-related process
+// by checking /proc/<pid>/cmdline (Linux) or the process name.
+func isOurProcess(pid int, desc string) bool {
+	// Check if the process exists at all.
+	if err := syscall.Kill(pid, 0); err != nil {
+		return false
+	}
+
+	// On Linux, verify via /proc cmdline.
+	cmdline, err := os.ReadFile(fmt.Sprintf("/proc/%d/cmdline", pid))
+	if err != nil {
+		log.Debugf("cleanup: cannot read /proc/%d/cmdline: %v, treating PID as foreign", pid, err)
+		return false
+	}
+
+	cmd := string(cmdline)
+	// Match against expected process types.
+	if strings.Contains(desc, "xvfb") || strings.Contains(desc, "xorg") {
+		return strings.Contains(cmd, "Xvfb") || strings.Contains(cmd, "Xorg")
+	}
+	if strings.Contains(desc, "desktop") {
+		return strings.Contains(cmd, "session") || strings.Contains(cmd, "plasma") ||
+			strings.Contains(cmd, "gnome") || strings.Contains(cmd, "xfce") ||
+			strings.Contains(cmd, "dbus-launch")
+	}
+	return false
+}
--- a/client/vnc/server/stubs.go
+++ b/client/vnc/server/stubs.go
@@ -0,0 +1,46 @@
+package server
+
+import (
+	"fmt"
+	"image"
+)
+
+// StubCapturer is a placeholder for platforms without screen capture support.
+type StubCapturer struct{}
+
+// Width returns 0 on unsupported platforms.
+func (c *StubCapturer) Width() int { return 0 }
+
+// Height returns 0 on unsupported platforms.
+func (c *StubCapturer) Height() int { return 0 }
+
+// Capture returns an error on unsupported platforms.
+func (c *StubCapturer) Capture() (*image.RGBA, error) {
+	return nil, fmt.Errorf("screen capture not supported on this platform")
+}
+
+// StubInputInjector is a placeholder for platforms without input injection support.
+type StubInputInjector struct{}
+
+// InjectKey is a no-op on unsupported platforms.
+func (s *StubInputInjector) InjectKey(_ uint32, _ bool) {
+	// no-op
+}
+
+// InjectPointer is a no-op on unsupported platforms.
+func (s *StubInputInjector) InjectPointer(_ uint8, _, _, _, _ int) {
+	// no-op
+}
+
+// SetClipboard is a no-op on unsupported platforms.
+func (s *StubInputInjector) SetClipboard(_ string) {
+	// no-op
+}
+
+// GetClipboard returns empty on unsupported platforms.
+func (s *StubInputInjector) GetClipboard() string { return "" }
+
+// TypeText is a no-op on unsupported platforms.
+func (s *StubInputInjector) TypeText(_ string) {
+	// no-op
+}
--- a/client/vnc/server/swizzle.go
+++ b/client/vnc/server/swizzle.go
@@ -0,0 +1,29 @@
+package server
+
+import "unsafe"
+
+// swizzleBGRAtoRGBA swaps B and R channels in a BGRA pixel buffer and copies
+// into dst in-place (dst and src may alias). Operates on uint32 words: one
+// read-modify-write per pixel, which is meaningfully faster than the naive
+// three-byte-store per pixel for large buffers like framebuffers.
+//
+// The alpha byte is forced to 0xff so callers that capture from X11 GetImage
+// (where the X server leaves the pad byte as zero) still get an opaque image.
+func swizzleBGRAtoRGBA(dst, src []byte) {
+	n := len(dst) / 4
+	if len(src)/4 < n {
+		n = len(src) / 4
+	}
+	if n == 0 {
+		return
+	}
+	dp := unsafe.Slice((*uint32)(unsafe.Pointer(&dst[0])), n)
+	sp := unsafe.Slice((*uint32)(unsafe.Pointer(&src[0])), n)
+	for i := range n {
+		p := sp[i]
+		// p in memory: B, G, R, A -> as uint32 little-endian: 0xAARRGGBB
+		// Want memory: R, G, B, 0xFF -> uint32 little-endian: 0xFFBBGGRR
+		dp[i] = 0xFF000000 | (p & 0x0000FF00) | ((p & 0x00FF0000) >> 16) | ((p & 0x000000FF) << 16)
+	}
+}
+
--- a/client/vnc/server/tight_test.go
+++ b/client/vnc/server/tight_test.go
@@ -0,0 +1,84 @@
+package server
+
+import (
+	"bytes"
+	"image/jpeg"
+	"testing"
+)
+
+func decodeTightLength(buf []byte) (n, consumed int) {
+	b0 := buf[0]
+	n = int(b0 & 0x7f)
+	if b0&0x80 == 0 {
+		return n, 1
+	}
+	b1 := buf[1]
+	n |= int(b1&0x7f) << 7
+	if b1&0x80 == 0 {
+		return n, 2
+	}
+	b2 := buf[2]
+	n |= int(b2) << 14
+	return n, 3
+}
+
+func TestEncodeTightFill(t *testing.T) {
+	pf := defaultClientPixelFormat()
+	img := makeUniformImage(64, 64, 0x12, 0x34, 0x56)
+	tstate := newTightState()
+	buf := encodeTightRect(img, pf, 0, 0, 64, 64, tstate)
+	if len(buf) != 12+1+3 {
+		t.Fatalf("fill rect should be 16 bytes, got %d", len(buf))
+	}
+	if buf[12] != tightFillSubenc {
+		t.Fatalf("expected fill subenc, got 0x%02x", buf[12])
+	}
+	if buf[13] != 0x12 || buf[14] != 0x34 || buf[15] != 0x56 {
+		t.Fatalf("wrong fill colour: %v", buf[13:16])
+	}
+}
+
+func TestEncodeTightBasic(t *testing.T) {
+	pf := defaultClientPixelFormat()
+	img := makeTwoColorImage(64, 64)
+	tstate := newTightState()
+	buf := encodeTightRect(img, pf, 0, 0, 64, 64, tstate)
+	if buf[12]&0xf0 != tightBasicFilter {
+		t.Fatalf("expected basic+filter subenc, got 0x%02x", buf[12])
+	}
+	if buf[13] != tightFilterCopy {
+		t.Fatalf("expected copy filter, got 0x%02x", buf[13])
+	}
+	// Length prefix and zlib stream follow.
+	n, _ := decodeTightLength(buf[14:])
+	if n == 0 {
+		t.Fatalf("zero-length basic stream")
+	}
+}
+
+func TestEncodeTightJPEG(t *testing.T) {
+	pf := defaultClientPixelFormat()
+	img := makeBenchImage(128, 128, 7) // random → many colours
+	tstate := newTightState()
+	buf := encodeTightRect(img, pf, 0, 0, 128, 128, tstate)
+	if buf[12] != tightJPEGSubenc {
+		t.Fatalf("expected JPEG subenc, got 0x%02x", buf[12])
+	}
+	n, consumed := decodeTightLength(buf[13:])
+	jpegBytes := buf[13+consumed : 13+consumed+n]
+	if _, err := jpeg.Decode(bytes.NewReader(jpegBytes)); err != nil {
+		t.Fatalf("emitted JPEG bytes do not decode: %v", err)
+	}
+}
+
+func TestSampledColorCount(t *testing.T) {
+	uniform := makeUniformImage(64, 64, 0x10, 0x20, 0x30)
+	if c := sampledColorCountInto(map[uint32]struct{}{},uniform, 0, 0, 64, 64, 32); c != 1 {
+		t.Fatalf("uniform should be 1 colour, got %d", c)
+	}
+	rnd := makeBenchImage(128, 128, 1)
+	if c := sampledColorCountInto(map[uint32]struct{}{},rnd, 0, 0, 128, 128, 16); c <= 16 {
+		t.Fatalf("random image should exceed colour cap, got %d", c)
+	}
+}
+
--- a/client/vnc/server/virtual_x11.go
+++ b/client/vnc/server/virtual_x11.go
@@ -0,0 +1,725 @@
+//go:build (linux && !android) || freebsd
+
+package server
+
+import (
+	"fmt"
+	"os"
+	"os/exec"
+	"os/user"
+	"path/filepath"
+	"strconv"
+	"strings"
+	"sync"
+	"syscall"
+	"time"
+
+	log "github.com/sirupsen/logrus"
+)
+
+// VirtualSession manages a virtual X11 display (Xvfb) with a desktop session
+// running as a target user. It implements ScreenCapturer and InputInjector by
+// delegating to an X11Capturer/X11InputInjector pointed at the virtual display.
+const (
+	sessionIdleTimeout = 5 * time.Minute
+
+	defaultSessionWidth  uint16 = 1280
+	defaultSessionHeight uint16 = 800
+)
+
+type VirtualSession struct {
+	mu        sync.Mutex
+	display   string
+	user      *user.User
+	uid       uint32
+	gid       uint32
+	groups    []uint32
+	width     uint16
+	height    uint16
+	xvfb      *exec.Cmd
+	desktop   *exec.Cmd
+	poller    *X11Poller
+	injector  *X11InputInjector
+	log       *log.Entry
+	stopped   bool
+	clients   int
+	idleTimer *time.Timer
+	onIdle    func() // called when idle timeout fires or Xvfb dies
+}
+
+// StartVirtualSession creates and starts a virtual X11 session for the given
+// user. Requires root privileges to create sessions as other users. width and
+// height request the virtual display geometry; 0 values fall back to the
+// defaults.
+func StartVirtualSession(username string, width, height uint16, logger *log.Entry) (*VirtualSession, error) {
+	if os.Getuid() != 0 {
+		return nil, fmt.Errorf("virtual sessions require root privileges")
+	}
+
+	if _, err := exec.LookPath("Xvfb"); err != nil {
+		if _, err := exec.LookPath("Xorg"); err != nil {
+			return nil, fmt.Errorf("neither Xvfb nor Xorg found (install xvfb or xserver-xorg)")
+		}
+		if !hasDummyDriver() {
+			return nil, fmt.Errorf("xvfb not found and xorg dummy driver not installed (install xvfb or xf86-video-dummy)")
+		}
+	}
+
+	u, err := user.Lookup(username)
+	if err != nil {
+		return nil, fmt.Errorf("lookup user %s: %w", username, err)
+	}
+
+	uid, err := strconv.ParseUint(u.Uid, 10, 32)
+	if err != nil {
+		return nil, fmt.Errorf("parse uid: %w", err)
+	}
+	gid, err := strconv.ParseUint(u.Gid, 10, 32)
+	if err != nil {
+		return nil, fmt.Errorf("parse gid: %w", err)
+	}
+
+	groups, err := supplementaryGroups(u)
+	if err != nil {
+		logger.Debugf("supplementary groups for %s: %v", username, err)
+	}
+
+	if width == 0 {
+		width = defaultSessionWidth
+	}
+	if height == 0 {
+		height = defaultSessionHeight
+	}
+
+	vs := &VirtualSession{
+		user:   u,
+		uid:    uint32(uid),
+		gid:    uint32(gid),
+		groups: groups,
+		width:  width,
+		height: height,
+		log:    logger.WithField("vnc_user", username),
+	}
+
+	if err := vs.start(); err != nil {
+		return nil, err
+	}
+	return vs, nil
+}
+
+func (vs *VirtualSession) start() error {
+	display, err := findFreeDisplay()
+	if err != nil {
+		return fmt.Errorf("find free display: %w", err)
+	}
+	vs.display = display
+
+	if err := vs.startXvfb(); err != nil {
+		return err
+	}
+
+	socketPath := fmt.Sprintf("/tmp/.X11-unix/X%s", vs.display[1:])
+	if err := waitForPath(socketPath, 5*time.Second); err != nil {
+		vs.stopXvfb()
+		return fmt.Errorf("wait for X11 socket %s: %w", socketPath, err)
+	}
+
+	// Grant the target user access to the display via xhost.
+	xhostCmd := exec.Command("xhost", "+SI:localuser:"+vs.user.Username)
+	xhostCmd.Env = []string{"DISPLAY=" + vs.display}
+	if out, err := xhostCmd.CombinedOutput(); err != nil {
+		vs.log.Debugf("xhost: %s (%v)", strings.TrimSpace(string(out)), err)
+	}
+
+	vs.poller = NewX11Poller(vs.display)
+
+	injector, err := NewX11InputInjector(vs.display)
+	if err != nil {
+		vs.stopXvfb()
+		return fmt.Errorf("create X11 injector for %s: %w", vs.display, err)
+	}
+	vs.injector = injector
+
+	if err := vs.startDesktop(); err != nil {
+		vs.injector.Close()
+		vs.stopXvfb()
+		return fmt.Errorf("start desktop: %w", err)
+	}
+
+	vs.log.Infof("virtual session started: display=%s user=%s", vs.display, vs.user.Username)
+	return nil
+}
+
+// ClientConnect increments the client count and cancels any idle timer.
+func (vs *VirtualSession) ClientConnect() {
+	vs.mu.Lock()
+	defer vs.mu.Unlock()
+	vs.clients++
+	if vs.idleTimer != nil {
+		vs.idleTimer.Stop()
+		vs.idleTimer = nil
+	}
+}
+
+// ClientDisconnect decrements the client count. When the last client
+// disconnects, starts an idle timer that destroys the session.
+func (vs *VirtualSession) ClientDisconnect() {
+	vs.mu.Lock()
+	defer vs.mu.Unlock()
+	vs.clients--
+	if vs.clients <= 0 {
+		vs.clients = 0
+		vs.log.Infof("no VNC clients connected, session will be destroyed in %s", sessionIdleTimeout)
+		vs.idleTimer = time.AfterFunc(sessionIdleTimeout, vs.idleExpired)
+	}
+}
+
+// idleExpired is called by the idle timer. It stops the session and
+// notifies the session manager via onIdle so it removes us from the map.
+func (vs *VirtualSession) idleExpired() {
+	vs.log.Info("idle timeout reached, destroying virtual session")
+	vs.Stop()
+	// onIdle acquires sessionManager.mu; safe because Stop() has released vs.mu.
+	if vs.onIdle != nil {
+		vs.onIdle()
+	}
+}
+
+// isAlive returns true if the session is running and its X server socket exists.
+func (vs *VirtualSession) isAlive() bool {
+	vs.mu.Lock()
+	stopped := vs.stopped
+	display := vs.display
+	vs.mu.Unlock()
+
+	if stopped {
+		return false
+	}
+	// Verify the X socket still exists on disk.
+	socketPath := fmt.Sprintf("/tmp/.X11-unix/X%s", display[1:])
+	if _, err := os.Stat(socketPath); err != nil {
+		return false
+	}
+	return true
+}
+
+// Capturer returns the screen capturer for this virtual session.
+func (vs *VirtualSession) Capturer() ScreenCapturer {
+	return vs.poller
+}
+
+// Injector returns the input injector for this virtual session.
+func (vs *VirtualSession) Injector() InputInjector {
+	return vs.injector
+}
+
+// Display returns the X11 display string (e.g., ":99").
+func (vs *VirtualSession) Display() string {
+	return vs.display
+}
+
+// Stop terminates the virtual session, killing the desktop and Xvfb.
+func (vs *VirtualSession) Stop() {
+	vs.mu.Lock()
+	defer vs.mu.Unlock()
+
+	if vs.stopped {
+		return
+	}
+	vs.stopped = true
+
+	if vs.injector != nil {
+		vs.injector.Close()
+	}
+
+	vs.stopDesktop()
+	vs.stopXvfb()
+
+	vs.log.Info("virtual session stopped")
+}
+
+func (vs *VirtualSession) startXvfb() error {
+	if _, err := exec.LookPath("Xvfb"); err == nil {
+		return vs.startXvfbDirect()
+	}
+	return vs.startXorgDummy()
+}
+
+func (vs *VirtualSession) startXvfbDirect() error {
+	geom := fmt.Sprintf("%dx%dx24", vs.width, vs.height)
+	vs.xvfb = exec.Command("Xvfb", vs.display,
+		"-screen", "0", geom,
+		"-ac",
+		"-nolisten", "tcp",
+	)
+	vs.xvfb.SysProcAttr = &syscall.SysProcAttr{Setsid: true, Pdeathsig: syscall.SIGTERM}
+
+	if err := vs.xvfb.Start(); err != nil {
+		return fmt.Errorf("start Xvfb on %s: %w", vs.display, err)
+	}
+	vs.log.Infof("Xvfb started on %s (pid=%d)", vs.display, vs.xvfb.Process.Pid)
+
+	go vs.monitorXvfb()
+
+	return nil
+}
+
+// startXorgDummy starts Xorg with the dummy video driver as a fallback when
+// Xvfb is not installed. Most systems with a desktop have Xorg available.
+func (vs *VirtualSession) startXorgDummy() error {
+	conf := fmt.Sprintf(`Section "Device"
+    Identifier "dummy"
+    Driver "dummy"
+    VideoRam 256000
+EndSection
+Section "Screen"
+    Identifier "screen"
+    Device "dummy"
+    DefaultDepth 24
+    SubSection "Display"
+        Depth 24
+        Modes "%dx%d"
+    EndSubSection
+EndSection
+`, vs.width, vs.height)
+	f, err := os.CreateTemp("", fmt.Sprintf("nbvnc-dummy-%s-*.conf", vs.display[1:]))
+	if err != nil {
+		return fmt.Errorf("create Xorg dummy config: %w", err)
+	}
+	confPath := f.Name()
+	if _, err := f.WriteString(conf); err != nil {
+		f.Close()
+		os.Remove(confPath)
+		return fmt.Errorf("write Xorg dummy config: %w", err)
+	}
+	if err := f.Chmod(0600); err != nil {
+		f.Close()
+		os.Remove(confPath)
+		return fmt.Errorf("chmod Xorg dummy config: %w", err)
+	}
+	if err := f.Close(); err != nil {
+		os.Remove(confPath)
+		return fmt.Errorf("close Xorg dummy config: %w", err)
+	}
+
+	vs.xvfb = exec.Command("Xorg", vs.display,
+		"-config", confPath,
+		"-noreset",
+		"-nolisten", "tcp",
+		"-ac",
+	)
+	vs.xvfb.SysProcAttr = &syscall.SysProcAttr{Setsid: true, Pdeathsig: syscall.SIGTERM}
+
+	if err := vs.xvfb.Start(); err != nil {
+		os.Remove(confPath)
+		return fmt.Errorf("start Xorg dummy on %s: %w", vs.display, err)
+	}
+	vs.log.Infof("Xorg (dummy driver) started on %s (pid=%d)", vs.display, vs.xvfb.Process.Pid)
+
+	go func() {
+		vs.monitorXvfb()
+		os.Remove(confPath)
+	}()
+
+	return nil
+}
+
+// monitorXvfb waits for the Xvfb/Xorg process to exit. If it exits
+// unexpectedly (not via Stop), the session is marked as dead and the
+// onIdle callback fires so the session manager removes it from the map.
+// The next GetOrCreate call for this user will create a fresh session.
+func (vs *VirtualSession) monitorXvfb() {
+	if err := vs.xvfb.Wait(); err != nil {
+		vs.log.Debugf("X server exited: %v", err)
+	}
+
+	vs.mu.Lock()
+	alreadyStopped := vs.stopped
+	if !alreadyStopped {
+		vs.log.Warn("X server exited unexpectedly, marking session as dead")
+		vs.stopped = true
+		if vs.idleTimer != nil {
+			vs.idleTimer.Stop()
+			vs.idleTimer = nil
+		}
+		if vs.injector != nil {
+			vs.injector.Close()
+		}
+		vs.stopDesktop()
+	}
+	onIdle := vs.onIdle
+	vs.mu.Unlock()
+
+	if !alreadyStopped && onIdle != nil {
+		onIdle()
+	}
+}
+
+func (vs *VirtualSession) stopXvfb() {
+	if vs.xvfb == nil || vs.xvfb.Process == nil {
+		return
+	}
+	if err := syscall.Kill(-vs.xvfb.Process.Pid, syscall.SIGTERM); err != nil {
+		vs.log.Debugf("SIGTERM xvfb group: %v", err)
+	}
+	time.Sleep(200 * time.Millisecond)
+	if err := syscall.Kill(-vs.xvfb.Process.Pid, syscall.SIGKILL); err != nil {
+		vs.log.Debugf("SIGKILL xvfb group: %v", err)
+	}
+}
+
+func (vs *VirtualSession) startDesktop() error {
+	session := detectDesktopSession()
+
+	// Wrap the desktop command with dbus-launch to provide a session bus.
+	// Without this, most desktop environments (XFCE, MATE, etc.) fail immediately.
+	var args []string
+	if _, err := exec.LookPath("dbus-launch"); err == nil {
+		args = append([]string{"dbus-launch", "--exit-with-session"}, session...)
+	} else {
+		args = session
+	}
+
+	vs.desktop = exec.Command(args[0], args[1:]...)
+	vs.desktop.Dir = vs.user.HomeDir
+	vs.desktop.Env = vs.buildUserEnv()
+	vs.desktop.SysProcAttr = &syscall.SysProcAttr{
+		Credential: &syscall.Credential{
+			Uid:    vs.uid,
+			Gid:    vs.gid,
+			Groups: vs.groups,
+		},
+		Setsid:    true,
+		Pdeathsig: syscall.SIGTERM,
+	}
+
+	if err := vs.desktop.Start(); err != nil {
+		return fmt.Errorf("start desktop session (%v): %w", args, err)
+	}
+	vs.log.Infof("desktop session started: %v (pid=%d)", args, vs.desktop.Process.Pid)
+
+	go vs.monitorDesktop()
+
+	return nil
+}
+
+// monitorDesktop waits for the desktop-session process to exit. When the user
+// logs out of GNOME/KDE/XFCE/etc., the session process terminates while Xvfb
+// keeps running, leaving a blank root window. Tear the whole virtual session
+// down so the next connect starts fresh with a login.
+func (vs *VirtualSession) monitorDesktop() {
+	if err := vs.desktop.Wait(); err != nil {
+		vs.log.Debugf("desktop session exited: %v", err)
+	}
+
+	vs.mu.Lock()
+	alreadyStopped := vs.stopped
+	if !alreadyStopped {
+		vs.log.Info("desktop session exited (logout), tearing down virtual session")
+		vs.stopped = true
+		if vs.idleTimer != nil {
+			vs.idleTimer.Stop()
+			vs.idleTimer = nil
+		}
+		if vs.injector != nil {
+			vs.injector.Close()
+		}
+		vs.stopXvfb()
+	}
+	onIdle := vs.onIdle
+	vs.mu.Unlock()
+
+	if !alreadyStopped && onIdle != nil {
+		onIdle()
+	}
+}
+
+func (vs *VirtualSession) stopDesktop() {
+	if vs.desktop == nil || vs.desktop.Process == nil {
+		return
+	}
+	if err := syscall.Kill(-vs.desktop.Process.Pid, syscall.SIGTERM); err != nil {
+		vs.log.Debugf("SIGTERM desktop group: %v", err)
+	}
+	time.Sleep(200 * time.Millisecond)
+	if err := syscall.Kill(-vs.desktop.Process.Pid, syscall.SIGKILL); err != nil {
+		vs.log.Debugf("SIGKILL desktop group: %v", err)
+	}
+}
+
+func (vs *VirtualSession) buildUserEnv() []string {
+	return []string{
+		"DISPLAY=" + vs.display,
+		"HOME=" + vs.user.HomeDir,
+		"USER=" + vs.user.Username,
+		"LOGNAME=" + vs.user.Username,
+		"SHELL=" + getUserShell(vs.user.Uid),
+		"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
+		"XDG_RUNTIME_DIR=/run/user/" + vs.user.Uid,
+		"DBUS_SESSION_BUS_ADDRESS=unix:path=/run/user/" + vs.user.Uid + "/bus",
+	}
+}
+
+// detectDesktopSession discovers available desktop sessions from the standard
+// /usr/share/xsessions/*.desktop files (FreeDesktop standard, used by all
+// display managers). Falls back to a hardcoded list if no .desktop files found.
+func detectDesktopSession() []string {
+	// Scan xsessions directories (Linux: /usr/share, FreeBSD: /usr/local/share).
+	for _, dir := range []string{"/usr/share/xsessions", "/usr/local/share/xsessions"} {
+		if cmd := findXSession(dir); cmd != nil {
+			return cmd
+		}
+	}
+
+	// Fallback: try common session commands directly.
+	fallbacks := [][]string{
+		{"startplasma-x11"},
+		{"gnome-session"},
+		{"xfce4-session"},
+		{"mate-session"},
+		{"cinnamon-session"},
+		{"openbox-session"},
+		{"xterm"},
+	}
+	for _, s := range fallbacks {
+		if _, err := exec.LookPath(s[0]); err == nil {
+			return s
+		}
+	}
+	return []string{"xterm"}
+}
+
+// sessionPriority defines preference order for desktop environments.
+// Lower number = higher priority. Unknown sessions get 100.
+var sessionPriority = map[string]int{
+	"plasma":   1, // KDE
+	"gnome":    2,
+	"xfce":     3,
+	"mate":     4,
+	"cinnamon": 5,
+	"lxqt":     6,
+	"lxde":     7,
+	"budgie":   8,
+	"openbox":  20,
+	"fluxbox":  21,
+	"i3":       22,
+	"xinit":    50, // generic user session
+	"lightdm":  50,
+	"default":  50,
+}
+
+func findXSession(dir string) []string {
+	entries, err := os.ReadDir(dir)
+	if err != nil {
+		return nil
+	}
+	candidates := collectSessionCandidates(dir, entries)
+	if len(candidates) == 0 {
+		return nil
+	}
+	best := bestSessionCandidate(candidates)
+	parts := strings.Fields(best.cmd)
+	if _, err := exec.LookPath(parts[0]); err != nil {
+		return nil
+	}
+	return parts
+}
+
+type sessionCandidate struct {
+	cmd      string
+	priority int
+}
+
+func collectSessionCandidates(dir string, entries []os.DirEntry) []sessionCandidate {
+	var out []sessionCandidate
+	for _, e := range entries {
+		c, ok := parseSessionEntry(dir, e)
+		if ok {
+			out = append(out, c)
+		}
+	}
+	return out
+}
+
+// parseSessionEntry reads a single .desktop file and extracts its Exec
+// command plus the priority hint to be used when picking the best session.
+func parseSessionEntry(dir string, e os.DirEntry) (sessionCandidate, bool) {
+	if !strings.HasSuffix(e.Name(), ".desktop") {
+		return sessionCandidate{}, false
+	}
+	data, err := os.ReadFile(filepath.Join(dir, e.Name()))
+	if err != nil {
+		return sessionCandidate{}, false
+	}
+	execCmd := extractExecLine(data)
+	if execCmd == "" || execCmd == "default" {
+		return sessionCandidate{}, false
+	}
+	return sessionCandidate{cmd: execCmd, priority: sessionPriorityFor(e.Name(), execCmd)}, true
+}
+
+func extractExecLine(data []byte) string {
+	for _, line := range strings.Split(string(data), "\n") {
+		if strings.HasPrefix(line, "Exec=") {
+			return strings.TrimSpace(strings.TrimPrefix(line, "Exec="))
+		}
+	}
+	return ""
+}
+
+func sessionPriorityFor(name, execCmd string) int {
+	pri := 100
+	lower := strings.ToLower(name + " " + execCmd)
+	for keyword, p := range sessionPriority {
+		if strings.Contains(lower, keyword) && p < pri {
+			pri = p
+		}
+	}
+	return pri
+}
+
+func bestSessionCandidate(candidates []sessionCandidate) sessionCandidate {
+	best := candidates[0]
+	for _, c := range candidates[1:] {
+		if c.priority < best.priority {
+			best = c
+		}
+	}
+	return best
+}
+
+// findFreeDisplay scans for an unused X11 display number.
+func findFreeDisplay() (string, error) {
+	for n := 50; n < 200; n++ {
+		lockFile := fmt.Sprintf("/tmp/.X%d-lock", n)
+		socketFile := fmt.Sprintf("/tmp/.X11-unix/X%d", n)
+		if _, err := os.Stat(lockFile); err == nil {
+			continue
+		}
+		if _, err := os.Stat(socketFile); err == nil {
+			continue
+		}
+		return fmt.Sprintf(":%d", n), nil
+	}
+	return "", fmt.Errorf("no free X11 display found (checked :50-:199)")
+}
+
+// waitForPath polls until a filesystem path exists or the timeout expires.
+func waitForPath(path string, timeout time.Duration) error {
+	deadline := time.Now().Add(timeout)
+	for time.Now().Before(deadline) {
+		if _, err := os.Stat(path); err == nil {
+			return nil
+		}
+		time.Sleep(50 * time.Millisecond)
+	}
+	return fmt.Errorf("timeout waiting for %s", path)
+}
+
+// getUserShell returns the login shell for the given UID.
+func getUserShell(uid string) string {
+	data, err := os.ReadFile("/etc/passwd")
+	if err != nil {
+		return "/bin/sh"
+	}
+	for _, line := range strings.Split(string(data), "\n") {
+		fields := strings.Split(line, ":")
+		if len(fields) >= 7 && fields[2] == uid {
+			return fields[6]
+		}
+	}
+	return "/bin/sh"
+}
+
+// supplementaryGroups returns the supplementary group IDs for a user.
+func supplementaryGroups(u *user.User) ([]uint32, error) {
+	gids, err := u.GroupIds()
+	if err != nil {
+		return nil, err
+	}
+	var groups []uint32
+	for _, g := range gids {
+		id, err := strconv.ParseUint(g, 10, 32)
+		if err != nil {
+			continue
+		}
+		groups = append(groups, uint32(id))
+	}
+	return groups, nil
+}
+
+// sessionManager tracks active virtual sessions by username.
+type sessionManager struct {
+	mu       sync.Mutex
+	sessions map[string]*VirtualSession
+	log      *log.Entry
+}
+
+func newSessionManager(logger *log.Entry) *sessionManager {
+	return &sessionManager{
+		sessions: make(map[string]*VirtualSession),
+		log:      logger,
+	}
+}
+
+// GetOrCreate returns an existing virtual session or creates a new one with
+// the requested geometry. If a previous session for this user is alive it is
+// reused regardless of the requested geometry; the first caller's size wins
+// until the session idles out. If a previous session is stopped or its X
+// server died, it is replaced.
+func (sm *sessionManager) GetOrCreate(username string, width, height uint16) (vncSession, error) {
+	sm.mu.Lock()
+	defer sm.mu.Unlock()
+
+	if vs, ok := sm.sessions[username]; ok {
+		if vs.isAlive() {
+			return vs, nil
+		}
+		sm.log.Infof("replacing dead virtual session for %s", username)
+		vs.Stop()
+		delete(sm.sessions, username)
+	}
+
+	vs, err := StartVirtualSession(username, width, height, sm.log)
+	if err != nil {
+		return nil, err
+	}
+	vs.onIdle = func() {
+		sm.mu.Lock()
+		defer sm.mu.Unlock()
+		if cur, ok := sm.sessions[username]; ok && cur == vs {
+			delete(sm.sessions, username)
+			sm.log.Infof("removed idle virtual session for %s", username)
+		}
+	}
+	sm.sessions[username] = vs
+	return vs, nil
+}
+
+// hasDummyDriver checks common paths for the Xorg dummy video driver.
+func hasDummyDriver() bool {
+	paths := []string{
+		"/usr/lib/xorg/modules/drivers/dummy_drv.so",                  // Debian/Ubuntu
+		"/usr/lib64/xorg/modules/drivers/dummy_drv.so",                // RHEL/Fedora
+		"/usr/local/lib/xorg/modules/drivers/dummy_drv.so",            // FreeBSD
+		"/usr/lib/x86_64-linux-gnu/xorg/modules/drivers/dummy_drv.so", // Debian multiarch
+	}
+	for _, p := range paths {
+		if _, err := os.Stat(p); err == nil {
+			return true
+		}
+	}
+	return false
+}
+
+// StopAll terminates all active virtual sessions.
+func (sm *sessionManager) StopAll() {
+	sm.mu.Lock()
+	defer sm.mu.Unlock()
+
+	for username, vs := range sm.sessions {
+		vs.Stop()
+		delete(sm.sessions, username)
+		sm.log.Infof("stopped virtual session for %s", username)
+	}
+}
--- a/client/wasm/cmd/main.go
+++ b/client/wasm/cmd/main.go
@@ -19,8 +19,8 @@ import (
 	nbstatus "github.com/netbirdio/netbird/client/status"
 	wasmcapture "github.com/netbirdio/netbird/client/wasm/internal/capture"
 	"github.com/netbirdio/netbird/client/wasm/internal/http"
-	"github.com/netbirdio/netbird/client/wasm/internal/rdp"
 	"github.com/netbirdio/netbird/client/wasm/internal/ssh"
+	"github.com/netbirdio/netbird/client/wasm/internal/vnc"
 	"github.com/netbirdio/netbird/util"
 )

@@ -364,29 +364,133 @@ func createProxyRequestMethod(client *netbird.Client) js.Func {
 	})
 }

-// createRDPProxyMethod creates the RDP proxy method
-func createRDPProxyMethod(client *netbird.Client) js.Func {
+// createVNCProxyMethod creates the VNC proxy method for raw TCP-over-WebSocket bridging.
+// JS signature: createVNCProxy(hostname, port, mode?, username?, jwt?, sessionID?, width?, height?)
+// mode: "attach" (default) or "session"
+// username: required when mode is "session"
+// jwt: authentication token (from OIDC session)
+// sessionID: Windows session ID (0 = console/auto)
+// width/height: requested viewport size for session mode (0 = server default)
+func createVNCProxyMethod(client *netbird.Client) js.Func {
 	return js.FuncOf(func(_ js.Value, args []js.Value) any {
-		if len(args) < 2 {
-			return js.ValueOf("error: hostname and port required")
+		params, err := parseVNCProxyArgs(args)
+		if err != nil {
+			if params.rejectViaPromise {
+				return createPromise(func(resolve, reject js.Value) {
+					reject.Invoke(js.ValueOf(err.Error()))
+				})
+			}
+			return js.ValueOf(err.Error())
 		}
-
-		if args[0].Type() != js.TypeString {
-			return createPromise(func(resolve, reject js.Value) {
-				reject.Invoke(js.ValueOf("hostname parameter must be a string"))
-			})
-		}
-		if args[1].Type() != js.TypeString {
-			return createPromise(func(resolve, reject js.Value) {
-				reject.Invoke(js.ValueOf("port parameter must be a string"))
-			})
-		}
-
-		proxy := rdp.NewRDCleanPathProxy(client)
-		return proxy.CreateProxy(args[0].String(), args[1].String())
+		proxy := vnc.NewVNCProxy(client)
+		return proxy.CreateProxy(vnc.ProxyRequest{
+			Hostname:  params.hostname,
+			Port:      params.port,
+			Mode:      params.mode,
+			Username:  params.username,
+			JWT:       params.jwt,
+			SessionID: params.sessionID,
+			Width:     params.width,
+			Height:    params.height,
+		})
 	})
 }

+type vncProxyParams struct {
+	hostname         string
+	port             string
+	mode             string
+	username         string
+	jwt              string
+	sessionID        uint32
+	width            uint16
+	height           uint16
+	rejectViaPromise bool // true when the JS caller expects a rejected Promise instead of a plain string return
+}
+
+// parseVNCProxyArgs validates JS args for createVNCProxyMethod and returns
+// the parsed params plus the first validation error (nil on success).
+// vncProxyParams.rejectViaPromise tells the caller which JS-side response
+// path to use for the returned error.
+func parseVNCProxyArgs(args []js.Value) (vncProxyParams, error) {
+	var p vncProxyParams
+	if err := parseVNCProxyRequiredArgs(args, &p); err != nil {
+		return p, err
+	}
+	if err := parseVNCProxyOptionalStrings(args, &p); err != nil {
+		return p, err
+	}
+	if err := parseVNCProxyOptionalNumbers(args, &p); err != nil {
+		return p, err
+	}
+	return p, nil
+}
+
+func parseVNCProxyRequiredArgs(args []js.Value, p *vncProxyParams) error {
+	if len(args) < 2 {
+		return fmt.Errorf("hostname and port required")
+	}
+	if args[0].Type() != js.TypeString {
+		p.rejectViaPromise = true
+		return fmt.Errorf("hostname parameter must be a string")
+	}
+	if args[1].Type() != js.TypeString {
+		p.rejectViaPromise = true
+		return fmt.Errorf("port parameter must be a string")
+	}
+	p.hostname = args[0].String()
+	p.port = args[1].String()
+	p.mode = "attach"
+	return nil
+}
+
+func parseVNCProxyOptionalStrings(args []js.Value, p *vncProxyParams) error {
+	if len(args) > 2 && args[2].Type() == js.TypeString {
+		p.mode = args[2].String()
+	}
+	if p.mode != "attach" && p.mode != "session" {
+		p.rejectViaPromise = true
+		return fmt.Errorf("invalid mode %q: expected \"attach\" or \"session\"", p.mode)
+	}
+	if len(args) > 3 && args[3].Type() == js.TypeString {
+		p.username = args[3].String()
+	}
+	if len(args) > 4 && args[4].Type() == js.TypeString {
+		p.jwt = args[4].String()
+	}
+	return nil
+}
+
+func parseVNCProxyOptionalNumbers(args []js.Value, p *vncProxyParams) error {
+	if len(args) > 5 && args[5].Type() == js.TypeNumber {
+		v := args[5].Int()
+		if v < 0 || v > 0xFFFFFFFF {
+			p.rejectViaPromise = true
+			return fmt.Errorf("invalid sessionID %d: must be 0..0xFFFFFFFF", v)
+		}
+		p.sessionID = uint32(v)
+	}
+	// width=0 / height=0 mean "use server default"; reject only out-of-range
+	// non-zero values so attach mode (which omits width/height) still works.
+	if len(args) > 6 && args[6].Type() == js.TypeNumber {
+		v := args[6].Int()
+		if v < 0 || v > 0xFFFF {
+			p.rejectViaPromise = true
+			return fmt.Errorf("invalid width %d: must be 0..65535", v)
+		}
+		p.width = uint16(v)
+	}
+	if len(args) > 7 && args[7].Type() == js.TypeNumber {
+		v := args[7].Int()
+		if v < 0 || v > 0xFFFF {
+			p.rejectViaPromise = true
+			return fmt.Errorf("invalid height %d: must be 0..65535", v)
+		}
+		p.height = uint16(v)
+	}
+	return nil
+}
+
 // getStatusOverview is a helper to get the status overview
 func getStatusOverview(client *netbird.Client) (nbstatus.OutputOverview, error) {
 	fullStatus, err := client.Status()
@@ -676,7 +780,7 @@ func createClientObject(client *netbird.Client) js.Value {
 	obj["detectSSHServerType"] = createDetectSSHServerMethod(client)
 	obj["createSSHConnection"] = createSSHMethod(client)
 	obj["proxyRequest"] = createProxyRequestMethod(client)
-	obj["createRDPProxy"] = createRDPProxyMethod(client)
+	obj["createVNCProxy"] = createVNCProxyMethod(client)
 	obj["status"] = createStatusMethod(client)
 	obj["statusSummary"] = createStatusSummaryMethod(client)
 	obj["statusDetail"] = createStatusDetailMethod(client)
--- a/client/wasm/internal/vnc/proxy.go
+++ b/client/wasm/internal/vnc/proxy.go
@@ -0,0 +1,427 @@
+//go:build js
+
+package vnc
+
+import (
+	"context"
+	"fmt"
+	"io"
+	"net"
+	"sync"
+	"sync/atomic"
+	"syscall/js"
+	"time"
+
+	log "github.com/sirupsen/logrus"
+)
+
+const (
+	vncProxyHost   = "vnc.proxy.local"
+	vncProxyScheme = "ws"
+	vncDialTimeout = 15 * time.Second
+
+	// Connection modes matching server/server.go constants.
+	modeAttach  byte = 0
+	modeSession byte = 1
+)
+
+// VNCProxy bridges WebSocket connections from noVNC in the browser
+// to TCP VNC server connections through the NetBird tunnel.
+type VNCProxy struct {
+	nbClient interface {
+		Dial(ctx context.Context, network, address string) (net.Conn, error)
+	}
+	activeConnections map[string]*vncConnection
+	destinations      map[string]vncDestination
+	// pendingHandlers holds the js.Func for handleVNCWebSocket_<id> between
+	// CreateProxy and handleWebSocketConnection so we can move it onto the
+	// vncConnection for later release.
+	pendingHandlers map[string]js.Func
+	mu              sync.Mutex
+	nextID          atomic.Uint64
+}
+
+type vncDestination struct {
+	address   string
+	mode      byte
+	username  string
+	jwt       string
+	sessionID uint32 // Windows session ID (0 = auto/console)
+	width     uint16 // Requested viewport width for session mode (0 = default)
+	height    uint16 // Requested viewport height for session mode (0 = default)
+}
+
+type vncConnection struct {
+	id          string
+	destination vncDestination
+	mu          sync.Mutex
+	vncConn     net.Conn
+	wsHandlers  js.Value
+	ctx         context.Context
+	cancel      context.CancelFunc
+	// Go-side callbacks exposed to JS. js.FuncOf pins the Go closure in a
+	// global handle map and MUST be released, otherwise every connection
+	// leaks the Go memory the closure captures.
+	wsHandlerFn js.Func
+	onMessageFn js.Func
+	onCloseFn   js.Func
+}
+
+// NewVNCProxy creates a new VNC proxy.
+func NewVNCProxy(client interface {
+	Dial(ctx context.Context, network, address string) (net.Conn, error)
+}) *VNCProxy {
+	return &VNCProxy{
+		nbClient:          client,
+		activeConnections: make(map[string]*vncConnection),
+	}
+}
+
+// ProxyRequest bundles the per-call parameters for CreateProxy so the JS
+// boundary doesn't drown callers in a wide positional argument list.
+type ProxyRequest struct {
+	Hostname  string
+	Port      string
+	Mode      string
+	Username  string
+	JWT       string
+	SessionID uint32
+	Width     uint16
+	Height    uint16
+}
+
+// CreateProxy creates a new proxy endpoint for the given VNC destination.
+// req.Mode is "attach" (capture current display) or "session" (virtual session).
+// req.Username is required for session mode. req.Width/Height request the
+// virtual display geometry for session mode; 0 means use the server default.
+// Returns a JS Promise that resolves to the WebSocket proxy URL.
+func (p *VNCProxy) CreateProxy(req ProxyRequest) js.Value {
+	hostname, port, mode, username, jwt := req.Hostname, req.Port, req.Mode, req.Username, req.JWT
+	sessionID, width, height := req.SessionID, req.Width, req.Height
+	address := net.JoinHostPort(hostname, port)
+
+	var m byte
+	if mode == "session" {
+		m = modeSession
+	}
+
+	dest := vncDestination{
+		address:   address,
+		mode:      m,
+		username:  username,
+		jwt:       jwt,
+		sessionID: sessionID,
+		width:     width,
+		height:    height,
+	}
+	return p.newProxyPromise(address, mode, username, dest)
+}
+
+// newProxyPromise wraps the JS Promise creation + executor lifecycle so
+// CreateProxy stays a thin parameter-bundling entrypoint.
+func (p *VNCProxy) newProxyPromise(address, mode, username string, dest vncDestination) js.Value {
+
+	var executor js.Func
+	executor = js.FuncOf(func(_ js.Value, args []js.Value) any {
+		resolve := args[0]
+
+		go func() {
+			defer executor.Release()
+
+			proxyID := fmt.Sprintf("vnc_proxy_%d", p.nextID.Add(1))
+
+			p.mu.Lock()
+			if p.destinations == nil {
+				p.destinations = make(map[string]vncDestination)
+			}
+			p.destinations[proxyID] = dest
+			p.mu.Unlock()
+
+			proxyURL := fmt.Sprintf("%s://%s/%s", vncProxyScheme, vncProxyHost, proxyID)
+
+			handlerFn := js.FuncOf(func(_ js.Value, args []js.Value) any {
+				if len(args) < 1 {
+					return js.ValueOf("error: requires WebSocket argument")
+				}
+				p.handleWebSocketConnection(args[0], proxyID)
+				return nil
+			})
+			p.mu.Lock()
+			if p.pendingHandlers == nil {
+				p.pendingHandlers = make(map[string]js.Func)
+			}
+			p.pendingHandlers[proxyID] = handlerFn
+			p.mu.Unlock()
+			js.Global().Set(fmt.Sprintf("handleVNCWebSocket_%s", proxyID), handlerFn)
+
+			log.Infof("created VNC proxy: %s -> %s (mode=%s, user=%s)", proxyURL, address, mode, username)
+			resolve.Invoke(proxyURL)
+		}()
+
+		return nil
+	})
+	return js.Global().Get("Promise").New(executor)
+}
+
+func (p *VNCProxy) handleWebSocketConnection(ws js.Value, proxyID string) {
+	p.mu.Lock()
+	dest, ok := p.destinations[proxyID]
+	handlerFn := p.pendingHandlers[proxyID]
+	delete(p.pendingHandlers, proxyID)
+	p.mu.Unlock()
+
+	if !ok {
+		log.Errorf("no destination for VNC proxy %s", proxyID)
+		return
+	}
+
+	ctx, cancel := context.WithCancel(context.Background())
+
+	conn := &vncConnection{
+		id:          proxyID,
+		destination: dest,
+		wsHandlers:  ws,
+		ctx:         ctx,
+		cancel:      cancel,
+		wsHandlerFn: handlerFn,
+	}
+
+	p.mu.Lock()
+	p.activeConnections[proxyID] = conn
+	p.mu.Unlock()
+
+	p.setupWebSocketHandlers(ws, conn)
+	go p.connectToVNC(conn)
+
+	log.Infof("VNC proxy WebSocket connection established for %s", proxyID)
+}
+
+func (p *VNCProxy) setupWebSocketHandlers(ws js.Value, conn *vncConnection) {
+	conn.onMessageFn = js.FuncOf(func(_ js.Value, args []js.Value) any {
+		if len(args) < 1 {
+			return nil
+		}
+		data := args[0]
+		go p.handleWebSocketMessage(conn, data)
+		return nil
+	})
+	ws.Set("onGoMessage", conn.onMessageFn)
+
+	conn.onCloseFn = js.FuncOf(func(_ js.Value, _ []js.Value) any {
+		log.Debug("VNC WebSocket closed by JavaScript")
+		conn.cancel()
+		return nil
+	})
+	ws.Set("onGoClose", conn.onCloseFn)
+}
+
+func (p *VNCProxy) handleWebSocketMessage(conn *vncConnection, data js.Value) {
+	if !data.InstanceOf(js.Global().Get("Uint8Array")) {
+		return
+	}
+
+	length := data.Get("length").Int()
+	buf := make([]byte, length)
+	js.CopyBytesToGo(buf, data)
+
+	conn.mu.Lock()
+	vncConn := conn.vncConn
+	conn.mu.Unlock()
+
+	if vncConn == nil {
+		return
+	}
+
+	if _, err := vncConn.Write(buf); err != nil {
+		log.Debugf("write to VNC server: %v", err)
+	}
+}
+
+func (p *VNCProxy) connectToVNC(conn *vncConnection) {
+	ctx, cancel := context.WithTimeout(conn.ctx, vncDialTimeout)
+	defer cancel()
+
+	vncConn, err := p.nbClient.Dial(ctx, "tcp", conn.destination.address)
+	if err != nil {
+		log.Errorf("VNC connect to %s: %v", conn.destination.address, err)
+		// Close the WebSocket so noVNC fires a disconnect event.
+		if conn.wsHandlers.Get("close").Truthy() {
+			conn.wsHandlers.Call("close", 1006, fmt.Sprintf("connect to peer: %v", err))
+		}
+		p.cleanupConnection(conn)
+		return
+	}
+	conn.mu.Lock()
+	conn.vncConn = vncConn
+	conn.mu.Unlock()
+
+	// Send the NetBird VNC session header before the RFB handshake.
+	if err := p.sendSessionHeader(vncConn, conn.destination); err != nil {
+		log.Errorf("send VNC session header: %v", err)
+		if conn.wsHandlers.Get("close").Truthy() {
+			conn.wsHandlers.Call("close", 1006, fmt.Sprintf("send session header: %v", err))
+		}
+		p.cleanupConnection(conn)
+		return
+	}
+
+	// WS→TCP is handled by the onGoMessage handler set in setupWebSocketHandlers,
+	// which writes directly to the VNC connection as data arrives from JS.
+	// Only the TCP→WS direction needs a read loop here.
+	go p.forwardConnToWS(conn)
+
+	<-conn.ctx.Done()
+	p.cleanupConnection(conn)
+}
+
+// sendSessionHeader writes mode, username, JWT, Windows session ID, and the
+// requested viewport size to the VNC server.
+// Format: [mode:1] [username_len:2] [username:N] [jwt_len:2] [jwt:N]
+//
+//	[session_id:4] [width:2] [height:2]
+func (p *VNCProxy) sendSessionHeader(conn net.Conn, dest vncDestination) error {
+	usernameBytes := []byte(dest.username)
+	jwtBytes := []byte(dest.jwt)
+	if len(usernameBytes) > 0xFFFF {
+		return fmt.Errorf("username too long: %d bytes (max %d)", len(usernameBytes), 0xFFFF)
+	}
+	if len(jwtBytes) > 0xFFFF {
+		return fmt.Errorf("jwt too long: %d bytes (max %d)", len(jwtBytes), 0xFFFF)
+	}
+	hdr := make([]byte, 3+len(usernameBytes)+2+len(jwtBytes)+4+4)
+	hdr[0] = dest.mode
+	hdr[1] = byte(len(usernameBytes) >> 8)
+	hdr[2] = byte(len(usernameBytes))
+	off := 3
+	copy(hdr[off:], usernameBytes)
+	off += len(usernameBytes)
+	hdr[off] = byte(len(jwtBytes) >> 8)
+	hdr[off+1] = byte(len(jwtBytes))
+	off += 2
+	copy(hdr[off:], jwtBytes)
+	off += len(jwtBytes)
+	hdr[off] = byte(dest.sessionID >> 24)
+	hdr[off+1] = byte(dest.sessionID >> 16)
+	hdr[off+2] = byte(dest.sessionID >> 8)
+	hdr[off+3] = byte(dest.sessionID)
+	off += 4
+	hdr[off] = byte(dest.width >> 8)
+	hdr[off+1] = byte(dest.width)
+	hdr[off+2] = byte(dest.height >> 8)
+	hdr[off+3] = byte(dest.height)
+
+	for off := 0; off < len(hdr); {
+		n, err := conn.Write(hdr[off:])
+		if err != nil {
+			return fmt.Errorf("write session header: %w", err)
+		}
+		off += n
+	}
+	return nil
+}
+
+func (p *VNCProxy) forwardConnToWS(conn *vncConnection) {
+	buf := make([]byte, 32*1024)
+
+	for {
+		if conn.ctx.Err() != nil {
+			return
+		}
+		vc, ok := conn.snapshotVNC()
+		if !ok {
+			return
+		}
+		if err := vc.SetReadDeadline(time.Now().Add(30 * time.Second)); err != nil {
+			log.Debugf("set VNC read deadline: %v", err)
+		}
+		n, err := vc.Read(buf)
+		if err != nil {
+			if p.handleConnReadError(conn, err) {
+				return
+			}
+			continue
+		}
+		if n > 0 {
+			p.sendToWebSocket(conn, buf[:n])
+		}
+	}
+}
+
+// snapshotVNC returns the current vncConn under conn.mu, with ok=false when
+// the connection has already been cleaned up.
+func (c *vncConnection) snapshotVNC() (net.Conn, bool) {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+	if c.vncConn == nil {
+		return nil, false
+	}
+	return c.vncConn, true
+}
+
+// handleConnReadError classifies an error from the VNC read loop. Returns
+// true if the caller should exit; false to retry (transient timeout).
+func (p *VNCProxy) handleConnReadError(conn *vncConnection, err error) bool {
+	if conn.ctx.Err() != nil {
+		return true
+	}
+	if netErr, ok := err.(interface{ Timeout() bool }); ok && netErr.Timeout() {
+		// Read timeout: connection might be stale. The next iteration will
+		// fail too and trigger the close path.
+		return false
+	}
+	if err != io.EOF {
+		log.Debugf("read from VNC connection: %v", err)
+	}
+	// Close the WebSocket to notify noVNC, and cancel the local context so
+	// cleanupConnection isn't left waiting on the JS close callback that
+	// may never fire on hard errors.
+	if conn.wsHandlers.Get("close").Truthy() {
+		conn.wsHandlers.Call("close", 1006, "VNC connection lost")
+	}
+	conn.cancel()
+	return true
+}
+
+func (p *VNCProxy) sendToWebSocket(conn *vncConnection, data []byte) {
+	if conn.wsHandlers.Get("receiveFromGo").Truthy() {
+		uint8Array := js.Global().Get("Uint8Array").New(len(data))
+		js.CopyBytesToJS(uint8Array, data)
+		conn.wsHandlers.Call("receiveFromGo", uint8Array.Get("buffer"))
+	} else if conn.wsHandlers.Get("send").Truthy() {
+		uint8Array := js.Global().Get("Uint8Array").New(len(data))
+		js.CopyBytesToJS(uint8Array, data)
+		conn.wsHandlers.Call("send", uint8Array.Get("buffer"))
+	}
+}
+
+func (p *VNCProxy) cleanupConnection(conn *vncConnection) {
+	log.Debugf("cleaning up VNC connection %s", conn.id)
+	conn.cancel()
+
+	conn.mu.Lock()
+	vncConn := conn.vncConn
+	conn.vncConn = nil
+	conn.mu.Unlock()
+
+	if vncConn != nil {
+		if err := vncConn.Close(); err != nil {
+			log.Debugf("close VNC connection: %v", err)
+		}
+	}
+
+	// Remove the global JS handler registered in CreateProxy.
+	globalName := fmt.Sprintf("handleVNCWebSocket_%s", conn.id)
+	js.Global().Delete(globalName)
+
+	// Release all js.Func handles; js.FuncOf pins the Go closure and the
+	// allocations it captures until Release is called.
+	conn.wsHandlerFn.Release()
+	conn.onMessageFn.Release()
+	conn.onCloseFn.Release()
+
+	p.mu.Lock()
+	delete(p.activeConnections, conn.id)
+	delete(p.destinations, conn.id)
+	delete(p.pendingHandlers, conn.id)
+	p.mu.Unlock()
+}