Proxy macOS VNC connections from the LaunchDaemon to a per-user agent via launchctl asuser

This commit is contained in:
Viktor Liu
2026-05-19 17:24:10 +02:00
parent 62b36112ea
commit 7d61975f6c
9 changed files with 536 additions and 74 deletions

View File

@@ -1,4 +1,4 @@
//go:build windows
//go:build windows || (darwin && !ios)
package cmd
@@ -20,30 +20,30 @@ func init() {
rootCmd.AddCommand(vncAgentCmd)
}
// vncAgentCmd runs a VNC server in the current user session, listening on
// localhost. It is spawned by the NetBird service (Session 0) via
// CreateProcessAsUser into the interactive console session.
// vncAgentCmd runs a VNC server inside the user's interactive session,
// listening on localhost. The NetBird service spawns it: on Windows via
// CreateProcessAsUser into the console session, on macOS via
// launchctl asuser into the Aqua session.
var vncAgentCmd = &cobra.Command{
Use: "vnc-agent",
Short: "Run VNC capture agent (internal, spawned by service)",
Hidden: true,
RunE: func(cmd *cobra.Command, args []string) error {
// Agent's stderr is piped to the service which relogs it.
// Use JSON format with caller info for structured parsing.
log.SetReportCaller(true)
log.SetFormatter(&log.JSONFormatter{})
log.SetOutput(os.Stderr)
sessionID := vncserver.GetCurrentSessionID()
log.Infof("VNC agent starting on 127.0.0.1:%d (session %d)", vncAgentPort, sessionID)
log.Infof("VNC agent starting on 127.0.0.1:%d", vncAgentPort)
token := os.Getenv("NB_VNC_AGENT_TOKEN")
if token == "" {
return fmt.Errorf("NB_VNC_AGENT_TOKEN not set; agent requires a token from the service")
}
capturer := vncserver.NewDesktopCapturer()
injector := vncserver.NewWindowsInputInjector()
capturer, injector, err := newAgentResources()
if err != nil {
return err
}
srv := vncserver.New(capturer, injector)
srv.SetDisableAuth(true)
srv.SetAgentToken(token)

View File

@@ -0,0 +1,18 @@
//go:build darwin && !ios
package cmd
import (
"fmt"
vncserver "github.com/netbirdio/netbird/client/vnc/server"
)
func newAgentResources() (vncserver.ScreenCapturer, vncserver.InputInjector, error) {
capturer := vncserver.NewMacPoller()
injector, err := vncserver.NewMacInputInjector()
if err != nil {
return nil, nil, fmt.Errorf("macOS input injector: %w", err)
}
return capturer, injector, nil
}

View File

@@ -0,0 +1,15 @@
//go:build windows
package cmd
import (
log "github.com/sirupsen/logrus"
vncserver "github.com/netbirdio/netbird/client/vnc/server"
)
func newAgentResources() (vncserver.ScreenCapturer, vncserver.InputInjector, error) {
sessionID := vncserver.GetCurrentSessionID()
log.Infof("VNC agent running in Windows session %d", sessionID)
return vncserver.NewDesktopCapturer(), vncserver.NewWindowsInputInjector(), nil
}

View File

@@ -3,6 +3,8 @@
package internal
import (
"os"
log "github.com/sirupsen/logrus"
vncserver "github.com/netbirdio/netbird/client/vnc/server"
@@ -23,6 +25,10 @@ func newPlatformVNC() (vncserver.ScreenCapturer, vncserver.InputInjector, bool)
return capturer, injector, true
}
// vncNeedsServiceMode reports whether the running process is a system
// LaunchDaemon (root, parented by launchd). Daemons sit in the global
// bootstrap namespace and cannot talk to WindowServer; we route capture
// through a per-user agent in that case.
func vncNeedsServiceMode() bool {
return false
return os.Geteuid() == 0 && os.Getppid() == 1
}

View File

@@ -0,0 +1,315 @@
//go:build darwin && !ios
package server
import (
"bytes"
"context"
"errors"
"fmt"
"net"
"os"
"os/exec"
"strconv"
"sync"
"syscall"
"time"
log "github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
// darwinAgentManager spawns a per-user VNC agent on demand and keeps it
// alive across multiple client connections within the same console-user
// session. A new agent is spawned the first time a client connects, or
// whenever the console user changes underneath us.
//
// Lifecycle is lazy by design: a daemon that never receives a VNC
// connection never spawns anything. The trade-off versus an eager spawn
// (the Windows model) is that the first VNC client pays the launchctl
// asuser + listen-readiness wait, ~hundreds of milliseconds in practice.
// That cost only repeats on user switch.
type darwinAgentManager struct {
mu sync.Mutex
authToken string
port uint16
uid uint32
running bool
}
func newDarwinAgentManager(ctx context.Context) *darwinAgentManager {
m := &darwinAgentManager{port: agentPort}
go m.watchConsoleUser(ctx)
return m
}
// watchConsoleUser kills the cached agent whenever the console user
// changes (logout, fast user switch, login window). Without it the daemon
// keeps proxying to an agent whose TCC grant and WindowServer access
// belong to a user who is no longer at the screen, so the new user only
// ever sees the locked-screen wallpaper. Killing the agent breaks the
// loopback TCP that the daemon proxies into, the client disconnects, and
// the next reconnect runs ensure() against the new console uid.
func (m *darwinAgentManager) watchConsoleUser(ctx context.Context) {
t := time.NewTicker(2 * time.Second)
defer t.Stop()
for {
select {
case <-ctx.Done():
return
case <-t.C:
uid, err := consoleUserID()
m.mu.Lock()
if !m.running {
m.mu.Unlock()
continue
}
if err != nil || uid != m.uid {
prev := m.uid
m.killLocked()
m.mu.Unlock()
if err != nil {
log.Infof("console user gone (was uid=%d): %v; agent stopped", prev, err)
} else {
log.Infof("console user changed %d -> %d; agent stopped, will respawn on next connect", prev, uid)
}
continue
}
m.mu.Unlock()
}
}
}
// ensure returns a token good for proxyToAgent. It spawns or respawns the
// per-user agent process as needed and waits until it is listening on the
// loopback port. Each ensure call is serialized so concurrent VNC clients
// share the same agent.
func (m *darwinAgentManager) ensure(ctx context.Context) (string, error) {
consoleUID, err := consoleUserID()
if err != nil {
return "", fmt.Errorf("no console user: %w", err)
}
m.mu.Lock()
defer m.mu.Unlock()
if m.running && m.uid == consoleUID && vncAgentRunning() {
return m.authToken, nil
}
m.killLocked()
token := generateAuthToken()
if token == "" {
return "", fmt.Errorf("generate agent auth token")
}
if err := spawnAgentForUser(consoleUID, m.port, token); err != nil {
return "", err
}
if err := waitForAgent(ctx, m.port, 5*time.Second); err != nil {
killAllVNCAgents()
return "", fmt.Errorf("agent did not start listening: %w", err)
}
m.authToken = token
m.uid = consoleUID
m.running = true
log.Infof("spawned VNC agent for console uid=%d on port %d", consoleUID, m.port)
return token, nil
}
// stop terminates the spawned agent, if any. Intended for daemon shutdown.
func (m *darwinAgentManager) stop() {
m.mu.Lock()
defer m.mu.Unlock()
m.killLocked()
}
func (m *darwinAgentManager) killLocked() {
if !m.running {
return
}
killAllVNCAgents()
m.running = false
m.authToken = ""
m.uid = 0
}
// errNoConsoleUser is the sentinel callers use to recognise the
// "login window showing, no user signed in" state and surface it as a
// distinct condition to the VNC client.
var errNoConsoleUser = errors.New("no user logged into console")
// consoleUserID returns the uid of the user currently sitting at the
// console (the one whose Aqua session is active). Returns
// errNoConsoleUser when nobody is logged in: at the login window
// /dev/console is owned by root.
func consoleUserID() (uint32, error) {
info, err := os.Stat("/dev/console")
if err != nil {
return 0, fmt.Errorf("stat /dev/console: %w", err)
}
st, ok := info.Sys().(*syscall.Stat_t)
if !ok {
return 0, fmt.Errorf("/dev/console stat has unexpected type")
}
if st.Uid == 0 {
return 0, errNoConsoleUser
}
return st.Uid, nil
}
// spawnAgentForUser uses launchctl asuser to start a netbird vnc-agent
// process inside the target user's launchd bootstrap namespace. That is
// the only spawn mode on macOS that gives the child access to the user's
// WindowServer. The agent's stderr is relogged into the daemon log so
// startup failures are not silently lost when the readiness check times
// out.
func spawnAgentForUser(uid uint32, port uint16, token string) error {
exe, err := os.Executable()
if err != nil {
return fmt.Errorf("resolve own executable: %w", err)
}
cmd := exec.Command(
"/bin/launchctl", "asuser", strconv.FormatUint(uint64(uid), 10),
exe, "vnc-agent", "--port", strconv.FormatUint(uint64(port), 10),
)
cmd.Env = append(os.Environ(), agentTokenEnvVar+"="+token)
stderr, err := cmd.StderrPipe()
if err != nil {
return fmt.Errorf("agent stderr pipe: %w", err)
}
if err := cmd.Start(); err != nil {
return fmt.Errorf("launchctl asuser: %w", err)
}
go func() {
defer stderr.Close()
relogAgentStream(stderr)
}()
go func() { _ = cmd.Wait() }()
return nil
}
// waitForAgent dials the loopback port until the agent answers. Used to
// gate proxy attempts until the spawned process has finished its Start.
func waitForAgent(ctx context.Context, port uint16, wait time.Duration) error {
addr := fmt.Sprintf("127.0.0.1:%d", port)
deadline := time.Now().Add(wait)
for time.Now().Before(deadline) {
if ctx.Err() != nil {
return ctx.Err()
}
c, err := net.DialTimeout("tcp", addr, 200*time.Millisecond)
if err == nil {
_ = c.Close()
return nil
}
time.Sleep(100 * time.Millisecond)
}
return fmt.Errorf("timeout dialing %s", addr)
}
// vncAgentRunning reports whether any vnc-agent process exists on the
// system. The daemon owns the only port-15900 listener model, so any
// match is "the" agent.
func vncAgentRunning() bool {
pids, err := vncAgentPIDs()
if err != nil {
log.Debugf("scan for vnc-agent: %v", err)
return false
}
return len(pids) > 0
}
// killAllVNCAgents sends SIGTERM to every process whose argv contains
// "vnc-agent", waits briefly for them to exit, and escalates to SIGKILL
// for any that remain. We enumerate kern.proc.all rather than
// kern.proc.uid because launchctl asuser preserves the caller's uid
// (root) on the spawned child, so a uid-scoped filter would never match.
func killAllVNCAgents() {
pids, err := vncAgentPIDs()
if err != nil {
log.Debugf("scan for vnc-agent: %v", err)
return
}
for _, pid := range pids {
_ = syscall.Kill(pid, syscall.SIGTERM)
}
if len(pids) == 0 {
return
}
deadline := time.Now().Add(2 * time.Second)
for time.Now().Before(deadline) {
remaining, _ := vncAgentPIDs()
if len(remaining) == 0 {
return
}
time.Sleep(100 * time.Millisecond)
}
leftover, _ := vncAgentPIDs()
for _, pid := range leftover {
_ = syscall.Kill(pid, syscall.SIGKILL)
}
}
// vncAgentPIDs returns the pids of every process whose argv contains
// "vnc-agent". Skips pid 0 and 1 defensively.
func vncAgentPIDs() ([]int, error) {
procs, err := unix.SysctlKinfoProcSlice("kern.proc.all")
if err != nil {
return nil, fmt.Errorf("sysctl kern.proc.all: %w", err)
}
var out []int
for i := range procs {
pid := int(procs[i].Proc.P_pid)
if pid <= 1 {
continue
}
argv, err := procArgv(pid)
if err != nil || !argvIsVNCAgent(argv) {
continue
}
out = append(out, pid)
}
return out, nil
}
// procArgv reads the kernel's stored argv for pid via the kern.procargs2
// sysctl. Format: 4-byte argc, then argv[0..argc) each NUL-terminated,
// then envp, then padding. We only need argv so we stop after argc.
func procArgv(pid int) ([]string, error) {
raw, err := unix.SysctlRaw("kern.procargs2", pid)
if err != nil {
return nil, err
}
if len(raw) < 4 {
return nil, fmt.Errorf("procargs2 truncated")
}
argc := int(raw[0]) | int(raw[1])<<8 | int(raw[2])<<16 | int(raw[3])<<24
body := raw[4:]
// Skip the executable path (NUL-terminated) and any zero padding that
// follows before argv[0].
end := bytes.IndexByte(body, 0)
if end < 0 {
return nil, fmt.Errorf("procargs2 path unterminated")
}
body = body[end+1:]
for len(body) > 0 && body[0] == 0 {
body = body[1:]
}
args := make([]string, 0, argc)
for i := 0; i < argc; i++ {
end := bytes.IndexByte(body, 0)
if end < 0 {
break
}
args = append(args, string(body[:end]))
body = body[end+1:]
}
return args, nil
}
func argvIsVNCAgent(argv []string) bool {
for _, a := range argv {
if a == "vnc-agent" {
return true
}
}
return false
}

View File

@@ -3,8 +3,10 @@
package server
import (
"bufio"
crand "crypto/rand"
"encoding/hex"
"encoding/json"
"fmt"
"io"
"net"
@@ -23,6 +25,12 @@ const (
// agentTokenLen is the size of the random per-spawn token in bytes.
agentTokenLen = 32
// agentTokenEnvVar names the environment variable the daemon uses to
// hand the per-spawn token to the agent child. Out-of-band channels
// like this keep the secret out of the command line, where listings
// such as `ps` or Windows tasklist would expose it.
agentTokenEnvVar = "NB_VNC_AGENT_TOKEN" // #nosec G101 -- env var name, not a credential
)
// generateAuthToken returns a fresh hex-encoded random token for one
@@ -73,6 +81,60 @@ func proxyToAgent(client net.Conn, port uint16, authToken string) {
<-done
}
// relogAgentStream reads log lines from the agent's stderr and re-emits
// them through the daemon's logrus, so the merged log keeps a single
// format. JSON lines (the agent's normal output) are parsed and dispatched
// by level; plain-text lines (cobra errors, panic traces) are forwarded
// verbatim so early-startup failures stay visible.
func relogAgentStream(r io.Reader) {
entry := log.WithField("component", "vnc-agent")
scanner := bufio.NewScanner(r)
scanner.Buffer(make([]byte, 0, 64*1024), 1024*1024)
for scanner.Scan() {
line := scanner.Bytes()
if len(line) == 0 {
continue
}
if line[0] != '{' {
entry.Warn(string(line))
continue
}
var m map[string]any
if err := json.Unmarshal(line, &m); err != nil {
entry.Warn(string(line))
continue
}
msg, _ := m["msg"].(string)
if msg == "" {
continue
}
fields := make(log.Fields)
for k, v := range m {
switch k {
case "msg", "level", "time", "func":
continue
case "caller":
fields["source"] = v
default:
fields[k] = v
}
}
e := entry.WithFields(fields)
switch m["level"] {
case "error":
e.Error(msg)
case "warning":
e.Warn(msg)
case "debug":
e.Debug(msg)
case "trace":
e.Trace(msg)
default:
e.Info(msg)
}
}
}
// dialAgentWithRetry retries the loopback connect for up to ~10 s so the
// daemon does not race the agent's first listen. Returns the live conn or
// the final error.

View File

@@ -3,9 +3,7 @@
package server
import (
"bufio"
"encoding/binary"
"encoding/json"
"errors"
"fmt"
"os"
@@ -285,7 +283,6 @@ func getSystemTokenForSession(sessionID uint32) (windows.Token, error) {
return dup, nil
}
const agentTokenEnvVar = "NB_VNC_AGENT_TOKEN" // #nosec G101 -- env var name, not a credential
// injectEnvVar appends a KEY=VALUE entry to a Unicode environment block.
// The block is a sequence of null-terminated UTF-16 strings, terminated by
@@ -661,63 +658,12 @@ func (m *sessionManager) killAgent() {
}
// relogAgentOutput reads log lines from the agent's stderr pipe and
// relogs them with the service's formatter. Each line is tried as JSON
// first (the agent's normal log format); plain-text lines (e.g. cobra
// error output, panic stack traces) are forwarded verbatim so failures
// during early agent startup remain visible.
// relogs them with the service's formatter.
func relogAgentOutput(pipe windows.Handle) {
defer func() { _ = windows.CloseHandle(pipe) }()
f := os.NewFile(uintptr(pipe), "vnc-agent-stderr")
defer f.Close()
entry := log.WithField("component", "vnc-agent")
scanner := bufio.NewScanner(f)
scanner.Buffer(make([]byte, 0, 64*1024), 1024*1024)
for scanner.Scan() {
line := scanner.Bytes()
if len(line) == 0 {
continue
}
if line[0] != '{' {
entry.Warn(string(line))
continue
}
var m map[string]any
if err := json.Unmarshal(line, &m); err != nil {
entry.Warn(string(line))
continue
}
msg, _ := m["msg"].(string)
if msg == "" {
continue
}
fields := make(log.Fields)
for k, v := range m {
switch k {
case "msg", "level", "time", "func":
continue
case "caller":
fields["source"] = v
default:
fields[k] = v
}
}
e := entry.WithFields(fields)
switch m["level"] {
case "error":
e.Error(msg)
case "warning":
e.Warn(msg)
case "debug":
e.Debug(msg)
case "trace":
e.Trace(msg)
default:
e.Info(msg)
}
}
relogAgentStream(f)
}
// logCleanupCall invokes a Windows syscall used solely as a cleanup primitive

View File

@@ -44,6 +44,7 @@ const (
RejectCodeCapturerError = "CAPTURER_ERROR"
RejectCodeUnsupportedOS = "UNSUPPORTED"
RejectCodeBadRequest = "BAD_REQUEST"
RejectCodeNoConsoleUser = "NO_CONSOLE_USER"
)
// EnvVNCDisableDownscale disables any platform-specific framebuffer
@@ -812,7 +813,14 @@ func (s *Server) verifyAgentToken(conn net.Conn, connLog *log.Entry) bool {
return false
}
if _, err := io.ReadFull(conn, buf); err != nil {
connLog.Warnf("agent auth: read token: %v", err)
if errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) {
// Connect-then-close probes (port liveness checks) hit this
// path on every dial; logging them would just flood the
// daemon log without surfacing a real failure.
connLog.Tracef("agent auth: read token: %v", err)
} else {
connLog.Warnf("agent auth: read token: %v", err)
}
conn.Close()
return false
}

View File

@@ -2,20 +2,112 @@
package server
import (
"bytes"
"errors"
"io"
"net"
log "github.com/sirupsen/logrus"
)
func (s *Server) platformInit() {
// no-op on macOS
}
// serviceAcceptLoop is not supported on macOS.
func (s *Server) serviceAcceptLoop() {
s.log.Warn("service mode not supported on macOS, falling back to direct mode")
s.acceptLoop()
func (s *Server) platformShutdown() {
// no-op on macOS
}
func (s *Server) platformSessionManager() virtualSessionManager {
return nil
}
func (s *Server) platformShutdown() {
// no-op on this platform
// serviceAcceptLoop runs in a LaunchDaemon and proxies each VNC
// connection to a per-user agent. The agent is spawned lazily on the
// first connection (and respawned after a console-user change) via
// launchctl asuser, which is the only mechanism that lands a child
// inside the user's Aqua session — where WindowServer and TCC grants
// for screen capture work.
func (s *Server) serviceAcceptLoop() {
mgr := newDarwinAgentManager(s.ctx)
defer mgr.stop()
log.Infof("service mode, proxying connections to per-user agent on 127.0.0.1:%d", agentPort)
for {
conn, err := s.listener.Accept()
if err != nil {
select {
case <-s.ctx.Done():
return
default:
}
s.log.Debugf("accept VNC connection: %v", err)
continue
}
enableTCPKeepAlive(conn, s.log)
conn = newMetricsConn(conn, s.sessionRecorder)
go s.handleServiceConnectionDarwin(conn, mgr)
}
}
func (s *Server) handleServiceConnectionDarwin(conn net.Conn, mgr *darwinAgentManager) {
connLog := s.log.WithField("remote", conn.RemoteAddr().String())
if !s.isAllowedSource(conn.RemoteAddr()) {
conn.Close()
return
}
var headerBuf bytes.Buffer
tee := io.TeeReader(conn, &headerBuf)
teeConn := &darwinPrefixConn{Reader: tee, Conn: conn}
header, err := readConnectionHeader(teeConn)
if err != nil {
connLog.Debugf("read connection header: %v", err)
conn.Close()
return
}
if !s.disableAuth {
if s.jwtConfig == nil {
rejectConnection(conn, codeMessage(RejectCodeAuthConfig, "auth enabled but no identity provider configured"))
connLog.Warn("auth rejected: no identity provider configured")
return
}
if _, err := s.authenticateJWT(header); err != nil {
rejectConnection(conn, codeMessage(jwtErrorCode(err), err.Error()))
connLog.Warnf("auth rejected: %v", err)
return
}
}
token, err := mgr.ensure(s.ctx)
if err != nil {
code := RejectCodeCapturerError
if errors.Is(err, errNoConsoleUser) {
code = RejectCodeNoConsoleUser
}
rejectConnection(conn, codeMessage(code, err.Error()))
connLog.Warnf("spawn per-user agent: %v", err)
return
}
replayConn := &darwinPrefixConn{
Reader: io.MultiReader(&headerBuf, conn),
Conn: conn,
}
proxyToAgent(replayConn, agentPort, token)
}
// darwinPrefixConn replays the already-consumed connection-header bytes
// in front of the proxy stream, mirroring the Windows prefixConn shape.
type darwinPrefixConn struct {
io.Reader
net.Conn
}
func (p *darwinPrefixConn) Read(b []byte) (int, error) { return p.Reader.Read(b) }