diff --git a/client/cmd/vnc_agent.go b/client/cmd/vnc_agent.go index c5b036b85..15605784d 100644 --- a/client/cmd/vnc_agent.go +++ b/client/cmd/vnc_agent.go @@ -1,4 +1,4 @@ -//go:build windows +//go:build windows || (darwin && !ios) package cmd @@ -20,30 +20,30 @@ func init() { rootCmd.AddCommand(vncAgentCmd) } -// vncAgentCmd runs a VNC server in the current user session, listening on -// localhost. It is spawned by the NetBird service (Session 0) via -// CreateProcessAsUser into the interactive console session. +// vncAgentCmd runs a VNC server inside the user's interactive session, +// listening on localhost. The NetBird service spawns it: on Windows via +// CreateProcessAsUser into the console session, on macOS via +// launchctl asuser into the Aqua session. var vncAgentCmd = &cobra.Command{ Use: "vnc-agent", Short: "Run VNC capture agent (internal, spawned by service)", Hidden: true, RunE: func(cmd *cobra.Command, args []string) error { - // Agent's stderr is piped to the service which relogs it. - // Use JSON format with caller info for structured parsing. log.SetReportCaller(true) log.SetFormatter(&log.JSONFormatter{}) log.SetOutput(os.Stderr) - sessionID := vncserver.GetCurrentSessionID() - log.Infof("VNC agent starting on 127.0.0.1:%d (session %d)", vncAgentPort, sessionID) + log.Infof("VNC agent starting on 127.0.0.1:%d", vncAgentPort) token := os.Getenv("NB_VNC_AGENT_TOKEN") if token == "" { return fmt.Errorf("NB_VNC_AGENT_TOKEN not set; agent requires a token from the service") } - capturer := vncserver.NewDesktopCapturer() - injector := vncserver.NewWindowsInputInjector() + capturer, injector, err := newAgentResources() + if err != nil { + return err + } srv := vncserver.New(capturer, injector) srv.SetDisableAuth(true) srv.SetAgentToken(token) diff --git a/client/cmd/vnc_agent_darwin.go b/client/cmd/vnc_agent_darwin.go new file mode 100644 index 000000000..6bf264602 --- /dev/null +++ b/client/cmd/vnc_agent_darwin.go @@ -0,0 +1,18 @@ +//go:build darwin && !ios + +package cmd + +import ( + "fmt" + + vncserver "github.com/netbirdio/netbird/client/vnc/server" +) + +func newAgentResources() (vncserver.ScreenCapturer, vncserver.InputInjector, error) { + capturer := vncserver.NewMacPoller() + injector, err := vncserver.NewMacInputInjector() + if err != nil { + return nil, nil, fmt.Errorf("macOS input injector: %w", err) + } + return capturer, injector, nil +} diff --git a/client/cmd/vnc_agent_windows.go b/client/cmd/vnc_agent_windows.go new file mode 100644 index 000000000..ea1247b55 --- /dev/null +++ b/client/cmd/vnc_agent_windows.go @@ -0,0 +1,15 @@ +//go:build windows + +package cmd + +import ( + log "github.com/sirupsen/logrus" + + vncserver "github.com/netbirdio/netbird/client/vnc/server" +) + +func newAgentResources() (vncserver.ScreenCapturer, vncserver.InputInjector, error) { + sessionID := vncserver.GetCurrentSessionID() + log.Infof("VNC agent running in Windows session %d", sessionID) + return vncserver.NewDesktopCapturer(), vncserver.NewWindowsInputInjector(), nil +} diff --git a/client/internal/engine_vnc_darwin.go b/client/internal/engine_vnc_darwin.go index 0f182cdb0..309d14f5c 100644 --- a/client/internal/engine_vnc_darwin.go +++ b/client/internal/engine_vnc_darwin.go @@ -3,6 +3,8 @@ package internal import ( + "os" + log "github.com/sirupsen/logrus" vncserver "github.com/netbirdio/netbird/client/vnc/server" @@ -23,6 +25,10 @@ func newPlatformVNC() (vncserver.ScreenCapturer, vncserver.InputInjector, bool) return capturer, injector, true } +// vncNeedsServiceMode reports whether the running process is a system +// LaunchDaemon (root, parented by launchd). Daemons sit in the global +// bootstrap namespace and cannot talk to WindowServer; we route capture +// through a per-user agent in that case. func vncNeedsServiceMode() bool { - return false + return os.Geteuid() == 0 && os.Getppid() == 1 } diff --git a/client/vnc/server/agent_darwin.go b/client/vnc/server/agent_darwin.go new file mode 100644 index 000000000..7b24cbc8e --- /dev/null +++ b/client/vnc/server/agent_darwin.go @@ -0,0 +1,315 @@ +//go:build darwin && !ios + +package server + +import ( + "bytes" + "context" + "errors" + "fmt" + "net" + "os" + "os/exec" + "strconv" + "sync" + "syscall" + "time" + + log "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" +) + +// darwinAgentManager spawns a per-user VNC agent on demand and keeps it +// alive across multiple client connections within the same console-user +// session. A new agent is spawned the first time a client connects, or +// whenever the console user changes underneath us. +// +// Lifecycle is lazy by design: a daemon that never receives a VNC +// connection never spawns anything. The trade-off versus an eager spawn +// (the Windows model) is that the first VNC client pays the launchctl +// asuser + listen-readiness wait, ~hundreds of milliseconds in practice. +// That cost only repeats on user switch. +type darwinAgentManager struct { + mu sync.Mutex + authToken string + port uint16 + uid uint32 + running bool +} + +func newDarwinAgentManager(ctx context.Context) *darwinAgentManager { + m := &darwinAgentManager{port: agentPort} + go m.watchConsoleUser(ctx) + return m +} + +// watchConsoleUser kills the cached agent whenever the console user +// changes (logout, fast user switch, login window). Without it the daemon +// keeps proxying to an agent whose TCC grant and WindowServer access +// belong to a user who is no longer at the screen, so the new user only +// ever sees the locked-screen wallpaper. Killing the agent breaks the +// loopback TCP that the daemon proxies into, the client disconnects, and +// the next reconnect runs ensure() against the new console uid. +func (m *darwinAgentManager) watchConsoleUser(ctx context.Context) { + t := time.NewTicker(2 * time.Second) + defer t.Stop() + for { + select { + case <-ctx.Done(): + return + case <-t.C: + uid, err := consoleUserID() + m.mu.Lock() + if !m.running { + m.mu.Unlock() + continue + } + if err != nil || uid != m.uid { + prev := m.uid + m.killLocked() + m.mu.Unlock() + if err != nil { + log.Infof("console user gone (was uid=%d): %v; agent stopped", prev, err) + } else { + log.Infof("console user changed %d -> %d; agent stopped, will respawn on next connect", prev, uid) + } + continue + } + m.mu.Unlock() + } + } +} + +// ensure returns a token good for proxyToAgent. It spawns or respawns the +// per-user agent process as needed and waits until it is listening on the +// loopback port. Each ensure call is serialized so concurrent VNC clients +// share the same agent. +func (m *darwinAgentManager) ensure(ctx context.Context) (string, error) { + consoleUID, err := consoleUserID() + if err != nil { + return "", fmt.Errorf("no console user: %w", err) + } + m.mu.Lock() + defer m.mu.Unlock() + if m.running && m.uid == consoleUID && vncAgentRunning() { + return m.authToken, nil + } + m.killLocked() + + token := generateAuthToken() + if token == "" { + return "", fmt.Errorf("generate agent auth token") + } + if err := spawnAgentForUser(consoleUID, m.port, token); err != nil { + return "", err + } + if err := waitForAgent(ctx, m.port, 5*time.Second); err != nil { + killAllVNCAgents() + return "", fmt.Errorf("agent did not start listening: %w", err) + } + m.authToken = token + m.uid = consoleUID + m.running = true + log.Infof("spawned VNC agent for console uid=%d on port %d", consoleUID, m.port) + return token, nil +} + +// stop terminates the spawned agent, if any. Intended for daemon shutdown. +func (m *darwinAgentManager) stop() { + m.mu.Lock() + defer m.mu.Unlock() + m.killLocked() +} + +func (m *darwinAgentManager) killLocked() { + if !m.running { + return + } + killAllVNCAgents() + m.running = false + m.authToken = "" + m.uid = 0 +} + +// errNoConsoleUser is the sentinel callers use to recognise the +// "login window showing, no user signed in" state and surface it as a +// distinct condition to the VNC client. +var errNoConsoleUser = errors.New("no user logged into console") + +// consoleUserID returns the uid of the user currently sitting at the +// console (the one whose Aqua session is active). Returns +// errNoConsoleUser when nobody is logged in: at the login window +// /dev/console is owned by root. +func consoleUserID() (uint32, error) { + info, err := os.Stat("/dev/console") + if err != nil { + return 0, fmt.Errorf("stat /dev/console: %w", err) + } + st, ok := info.Sys().(*syscall.Stat_t) + if !ok { + return 0, fmt.Errorf("/dev/console stat has unexpected type") + } + if st.Uid == 0 { + return 0, errNoConsoleUser + } + return st.Uid, nil +} + +// spawnAgentForUser uses launchctl asuser to start a netbird vnc-agent +// process inside the target user's launchd bootstrap namespace. That is +// the only spawn mode on macOS that gives the child access to the user's +// WindowServer. The agent's stderr is relogged into the daemon log so +// startup failures are not silently lost when the readiness check times +// out. +func spawnAgentForUser(uid uint32, port uint16, token string) error { + exe, err := os.Executable() + if err != nil { + return fmt.Errorf("resolve own executable: %w", err) + } + cmd := exec.Command( + "/bin/launchctl", "asuser", strconv.FormatUint(uint64(uid), 10), + exe, "vnc-agent", "--port", strconv.FormatUint(uint64(port), 10), + ) + cmd.Env = append(os.Environ(), agentTokenEnvVar+"="+token) + stderr, err := cmd.StderrPipe() + if err != nil { + return fmt.Errorf("agent stderr pipe: %w", err) + } + if err := cmd.Start(); err != nil { + return fmt.Errorf("launchctl asuser: %w", err) + } + go func() { + defer stderr.Close() + relogAgentStream(stderr) + }() + go func() { _ = cmd.Wait() }() + return nil +} + +// waitForAgent dials the loopback port until the agent answers. Used to +// gate proxy attempts until the spawned process has finished its Start. +func waitForAgent(ctx context.Context, port uint16, wait time.Duration) error { + addr := fmt.Sprintf("127.0.0.1:%d", port) + deadline := time.Now().Add(wait) + for time.Now().Before(deadline) { + if ctx.Err() != nil { + return ctx.Err() + } + c, err := net.DialTimeout("tcp", addr, 200*time.Millisecond) + if err == nil { + _ = c.Close() + return nil + } + time.Sleep(100 * time.Millisecond) + } + return fmt.Errorf("timeout dialing %s", addr) +} + +// vncAgentRunning reports whether any vnc-agent process exists on the +// system. The daemon owns the only port-15900 listener model, so any +// match is "the" agent. +func vncAgentRunning() bool { + pids, err := vncAgentPIDs() + if err != nil { + log.Debugf("scan for vnc-agent: %v", err) + return false + } + return len(pids) > 0 +} + +// killAllVNCAgents sends SIGTERM to every process whose argv contains +// "vnc-agent", waits briefly for them to exit, and escalates to SIGKILL +// for any that remain. We enumerate kern.proc.all rather than +// kern.proc.uid because launchctl asuser preserves the caller's uid +// (root) on the spawned child, so a uid-scoped filter would never match. +func killAllVNCAgents() { + pids, err := vncAgentPIDs() + if err != nil { + log.Debugf("scan for vnc-agent: %v", err) + return + } + for _, pid := range pids { + _ = syscall.Kill(pid, syscall.SIGTERM) + } + if len(pids) == 0 { + return + } + deadline := time.Now().Add(2 * time.Second) + for time.Now().Before(deadline) { + remaining, _ := vncAgentPIDs() + if len(remaining) == 0 { + return + } + time.Sleep(100 * time.Millisecond) + } + leftover, _ := vncAgentPIDs() + for _, pid := range leftover { + _ = syscall.Kill(pid, syscall.SIGKILL) + } +} + +// vncAgentPIDs returns the pids of every process whose argv contains +// "vnc-agent". Skips pid 0 and 1 defensively. +func vncAgentPIDs() ([]int, error) { + procs, err := unix.SysctlKinfoProcSlice("kern.proc.all") + if err != nil { + return nil, fmt.Errorf("sysctl kern.proc.all: %w", err) + } + var out []int + for i := range procs { + pid := int(procs[i].Proc.P_pid) + if pid <= 1 { + continue + } + argv, err := procArgv(pid) + if err != nil || !argvIsVNCAgent(argv) { + continue + } + out = append(out, pid) + } + return out, nil +} + +// procArgv reads the kernel's stored argv for pid via the kern.procargs2 +// sysctl. Format: 4-byte argc, then argv[0..argc) each NUL-terminated, +// then envp, then padding. We only need argv so we stop after argc. +func procArgv(pid int) ([]string, error) { + raw, err := unix.SysctlRaw("kern.procargs2", pid) + if err != nil { + return nil, err + } + if len(raw) < 4 { + return nil, fmt.Errorf("procargs2 truncated") + } + argc := int(raw[0]) | int(raw[1])<<8 | int(raw[2])<<16 | int(raw[3])<<24 + body := raw[4:] + // Skip the executable path (NUL-terminated) and any zero padding that + // follows before argv[0]. + end := bytes.IndexByte(body, 0) + if end < 0 { + return nil, fmt.Errorf("procargs2 path unterminated") + } + body = body[end+1:] + for len(body) > 0 && body[0] == 0 { + body = body[1:] + } + args := make([]string, 0, argc) + for i := 0; i < argc; i++ { + end := bytes.IndexByte(body, 0) + if end < 0 { + break + } + args = append(args, string(body[:end])) + body = body[end+1:] + } + return args, nil +} + +func argvIsVNCAgent(argv []string) bool { + for _, a := range argv { + if a == "vnc-agent" { + return true + } + } + return false +} diff --git a/client/vnc/server/agent_ipc.go b/client/vnc/server/agent_ipc.go index e0124ee3a..8cf0ea8a6 100644 --- a/client/vnc/server/agent_ipc.go +++ b/client/vnc/server/agent_ipc.go @@ -3,8 +3,10 @@ package server import ( + "bufio" crand "crypto/rand" "encoding/hex" + "encoding/json" "fmt" "io" "net" @@ -23,6 +25,12 @@ const ( // agentTokenLen is the size of the random per-spawn token in bytes. agentTokenLen = 32 + + // agentTokenEnvVar names the environment variable the daemon uses to + // hand the per-spawn token to the agent child. Out-of-band channels + // like this keep the secret out of the command line, where listings + // such as `ps` or Windows tasklist would expose it. + agentTokenEnvVar = "NB_VNC_AGENT_TOKEN" // #nosec G101 -- env var name, not a credential ) // generateAuthToken returns a fresh hex-encoded random token for one @@ -73,6 +81,60 @@ func proxyToAgent(client net.Conn, port uint16, authToken string) { <-done } +// relogAgentStream reads log lines from the agent's stderr and re-emits +// them through the daemon's logrus, so the merged log keeps a single +// format. JSON lines (the agent's normal output) are parsed and dispatched +// by level; plain-text lines (cobra errors, panic traces) are forwarded +// verbatim so early-startup failures stay visible. +func relogAgentStream(r io.Reader) { + entry := log.WithField("component", "vnc-agent") + scanner := bufio.NewScanner(r) + scanner.Buffer(make([]byte, 0, 64*1024), 1024*1024) + for scanner.Scan() { + line := scanner.Bytes() + if len(line) == 0 { + continue + } + if line[0] != '{' { + entry.Warn(string(line)) + continue + } + var m map[string]any + if err := json.Unmarshal(line, &m); err != nil { + entry.Warn(string(line)) + continue + } + msg, _ := m["msg"].(string) + if msg == "" { + continue + } + fields := make(log.Fields) + for k, v := range m { + switch k { + case "msg", "level", "time", "func": + continue + case "caller": + fields["source"] = v + default: + fields[k] = v + } + } + e := entry.WithFields(fields) + switch m["level"] { + case "error": + e.Error(msg) + case "warning": + e.Warn(msg) + case "debug": + e.Debug(msg) + case "trace": + e.Trace(msg) + default: + e.Info(msg) + } + } +} + // dialAgentWithRetry retries the loopback connect for up to ~10 s so the // daemon does not race the agent's first listen. Returns the live conn or // the final error. diff --git a/client/vnc/server/agent_windows.go b/client/vnc/server/agent_windows.go index 4bacbdd6f..318076aaa 100644 --- a/client/vnc/server/agent_windows.go +++ b/client/vnc/server/agent_windows.go @@ -3,9 +3,7 @@ package server import ( - "bufio" "encoding/binary" - "encoding/json" "errors" "fmt" "os" @@ -285,7 +283,6 @@ func getSystemTokenForSession(sessionID uint32) (windows.Token, error) { return dup, nil } -const agentTokenEnvVar = "NB_VNC_AGENT_TOKEN" // #nosec G101 -- env var name, not a credential // injectEnvVar appends a KEY=VALUE entry to a Unicode environment block. // The block is a sequence of null-terminated UTF-16 strings, terminated by @@ -661,63 +658,12 @@ func (m *sessionManager) killAgent() { } // relogAgentOutput reads log lines from the agent's stderr pipe and -// relogs them with the service's formatter. Each line is tried as JSON -// first (the agent's normal log format); plain-text lines (e.g. cobra -// error output, panic stack traces) are forwarded verbatim so failures -// during early agent startup remain visible. +// relogs them with the service's formatter. func relogAgentOutput(pipe windows.Handle) { defer func() { _ = windows.CloseHandle(pipe) }() f := os.NewFile(uintptr(pipe), "vnc-agent-stderr") defer f.Close() - - entry := log.WithField("component", "vnc-agent") - scanner := bufio.NewScanner(f) - scanner.Buffer(make([]byte, 0, 64*1024), 1024*1024) - for scanner.Scan() { - line := scanner.Bytes() - if len(line) == 0 { - continue - } - if line[0] != '{' { - entry.Warn(string(line)) - continue - } - var m map[string]any - if err := json.Unmarshal(line, &m); err != nil { - entry.Warn(string(line)) - continue - } - msg, _ := m["msg"].(string) - if msg == "" { - continue - } - - fields := make(log.Fields) - for k, v := range m { - switch k { - case "msg", "level", "time", "func": - continue - case "caller": - fields["source"] = v - default: - fields[k] = v - } - } - e := entry.WithFields(fields) - - switch m["level"] { - case "error": - e.Error(msg) - case "warning": - e.Warn(msg) - case "debug": - e.Debug(msg) - case "trace": - e.Trace(msg) - default: - e.Info(msg) - } - } + relogAgentStream(f) } // logCleanupCall invokes a Windows syscall used solely as a cleanup primitive diff --git a/client/vnc/server/server.go b/client/vnc/server/server.go index 076825074..46fcc679f 100644 --- a/client/vnc/server/server.go +++ b/client/vnc/server/server.go @@ -44,6 +44,7 @@ const ( RejectCodeCapturerError = "CAPTURER_ERROR" RejectCodeUnsupportedOS = "UNSUPPORTED" RejectCodeBadRequest = "BAD_REQUEST" + RejectCodeNoConsoleUser = "NO_CONSOLE_USER" ) // EnvVNCDisableDownscale disables any platform-specific framebuffer @@ -812,7 +813,14 @@ func (s *Server) verifyAgentToken(conn net.Conn, connLog *log.Entry) bool { return false } if _, err := io.ReadFull(conn, buf); err != nil { - connLog.Warnf("agent auth: read token: %v", err) + if errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) { + // Connect-then-close probes (port liveness checks) hit this + // path on every dial; logging them would just flood the + // daemon log without surfacing a real failure. + connLog.Tracef("agent auth: read token: %v", err) + } else { + connLog.Warnf("agent auth: read token: %v", err) + } conn.Close() return false } diff --git a/client/vnc/server/server_darwin.go b/client/vnc/server/server_darwin.go index 121704222..357b979b1 100644 --- a/client/vnc/server/server_darwin.go +++ b/client/vnc/server/server_darwin.go @@ -2,20 +2,112 @@ package server +import ( + "bytes" + "errors" + "io" + "net" + + log "github.com/sirupsen/logrus" +) + func (s *Server) platformInit() { // no-op on macOS } -// serviceAcceptLoop is not supported on macOS. -func (s *Server) serviceAcceptLoop() { - s.log.Warn("service mode not supported on macOS, falling back to direct mode") - s.acceptLoop() +func (s *Server) platformShutdown() { + // no-op on macOS } func (s *Server) platformSessionManager() virtualSessionManager { return nil } -func (s *Server) platformShutdown() { - // no-op on this platform +// serviceAcceptLoop runs in a LaunchDaemon and proxies each VNC +// connection to a per-user agent. The agent is spawned lazily on the +// first connection (and respawned after a console-user change) via +// launchctl asuser, which is the only mechanism that lands a child +// inside the user's Aqua session — where WindowServer and TCC grants +// for screen capture work. +func (s *Server) serviceAcceptLoop() { + mgr := newDarwinAgentManager(s.ctx) + defer mgr.stop() + + log.Infof("service mode, proxying connections to per-user agent on 127.0.0.1:%d", agentPort) + + for { + conn, err := s.listener.Accept() + if err != nil { + select { + case <-s.ctx.Done(): + return + default: + } + s.log.Debugf("accept VNC connection: %v", err) + continue + } + + enableTCPKeepAlive(conn, s.log) + conn = newMetricsConn(conn, s.sessionRecorder) + go s.handleServiceConnectionDarwin(conn, mgr) + } } + +func (s *Server) handleServiceConnectionDarwin(conn net.Conn, mgr *darwinAgentManager) { + connLog := s.log.WithField("remote", conn.RemoteAddr().String()) + + if !s.isAllowedSource(conn.RemoteAddr()) { + conn.Close() + return + } + + var headerBuf bytes.Buffer + tee := io.TeeReader(conn, &headerBuf) + teeConn := &darwinPrefixConn{Reader: tee, Conn: conn} + + header, err := readConnectionHeader(teeConn) + if err != nil { + connLog.Debugf("read connection header: %v", err) + conn.Close() + return + } + + if !s.disableAuth { + if s.jwtConfig == nil { + rejectConnection(conn, codeMessage(RejectCodeAuthConfig, "auth enabled but no identity provider configured")) + connLog.Warn("auth rejected: no identity provider configured") + return + } + if _, err := s.authenticateJWT(header); err != nil { + rejectConnection(conn, codeMessage(jwtErrorCode(err), err.Error())) + connLog.Warnf("auth rejected: %v", err) + return + } + } + + token, err := mgr.ensure(s.ctx) + if err != nil { + code := RejectCodeCapturerError + if errors.Is(err, errNoConsoleUser) { + code = RejectCodeNoConsoleUser + } + rejectConnection(conn, codeMessage(code, err.Error())) + connLog.Warnf("spawn per-user agent: %v", err) + return + } + + replayConn := &darwinPrefixConn{ + Reader: io.MultiReader(&headerBuf, conn), + Conn: conn, + } + proxyToAgent(replayConn, agentPort, token) +} + +// darwinPrefixConn replays the already-consumed connection-header bytes +// in front of the proxy stream, mirroring the Windows prefixConn shape. +type darwinPrefixConn struct { + io.Reader + net.Conn +} + +func (p *darwinPrefixConn) Read(b []byte) (int, error) { return p.Reader.Read(b) }