Add embedded VNC server with JWT auth, DXGI capture, and dashboard integration

This commit is contained in:
Viktor Liu
2026-04-14 12:31:00 +02:00
parent 3098f48b25
commit b754df1171
85 changed files with 10457 additions and 2011 deletions

View File

@@ -0,0 +1,242 @@
//go:build (linux && !android) || freebsd
package server
import (
"fmt"
"os"
"os/exec"
"strings"
log "github.com/sirupsen/logrus"
"github.com/jezek/xgb"
"github.com/jezek/xgb/xproto"
"github.com/jezek/xgb/xtest"
)
// X11InputInjector injects keyboard and mouse events via the XTest extension.
type X11InputInjector struct {
conn *xgb.Conn
root xproto.Window
screen *xproto.ScreenInfo
display string
keysymMap map[uint32]byte
lastButtons uint8
clipboardTool string
clipboardToolName string
}
// NewX11InputInjector connects to the X11 display and initializes XTest.
func NewX11InputInjector(display string) (*X11InputInjector, error) {
detectX11Display()
if display == "" {
display = os.Getenv("DISPLAY")
}
if display == "" {
return nil, fmt.Errorf("DISPLAY not set and no Xorg process found")
}
conn, err := xgb.NewConnDisplay(display)
if err != nil {
return nil, fmt.Errorf("connect to X11 display %s: %w", display, err)
}
if err := xtest.Init(conn); err != nil {
conn.Close()
return nil, fmt.Errorf("init XTest extension: %w", err)
}
setup := xproto.Setup(conn)
if len(setup.Roots) == 0 {
conn.Close()
return nil, fmt.Errorf("no X11 screens")
}
screen := setup.Roots[0]
inj := &X11InputInjector{
conn: conn,
root: screen.Root,
screen: &screen,
display: display,
}
inj.cacheKeyboardMapping()
inj.resolveClipboardTool()
log.Infof("X11 input injector ready (display=%s)", display)
return inj, nil
}
// InjectKey simulates a key press or release. keysym is an X11 KeySym.
func (x *X11InputInjector) InjectKey(keysym uint32, down bool) {
keycode := x.keysymToKeycode(keysym)
if keycode == 0 {
return
}
var eventType byte
if down {
eventType = xproto.KeyPress
} else {
eventType = xproto.KeyRelease
}
xtest.FakeInput(x.conn, eventType, keycode, 0, x.root, 0, 0, 0)
}
// InjectPointer simulates mouse movement and button events.
func (x *X11InputInjector) InjectPointer(buttonMask uint8, px, py, serverW, serverH int) {
if serverW == 0 || serverH == 0 {
return
}
// Scale to actual screen coordinates.
screenW := int(x.screen.WidthInPixels)
screenH := int(x.screen.HeightInPixels)
absX := px * screenW / serverW
absY := py * screenH / serverH
// Move pointer.
xtest.FakeInput(x.conn, xproto.MotionNotify, 0, 0, x.root, int16(absX), int16(absY), 0)
// Handle button events. RFB button mask: bit0=left, bit1=middle, bit2=right,
// bit3=scrollUp, bit4=scrollDown. X11 buttons: 1=left, 2=middle, 3=right,
// 4=scrollUp, 5=scrollDown.
type btnMap struct {
rfbBit uint8
x11Btn byte
}
buttons := [...]btnMap{
{0x01, 1}, // left
{0x02, 2}, // middle
{0x04, 3}, // right
{0x08, 4}, // scroll up
{0x10, 5}, // scroll down
}
for _, b := range buttons {
pressed := buttonMask&b.rfbBit != 0
wasPressed := x.lastButtons&b.rfbBit != 0
if b.x11Btn >= 4 {
// Scroll: send press+release on each scroll event.
if pressed {
xtest.FakeInput(x.conn, xproto.ButtonPress, b.x11Btn, 0, x.root, 0, 0, 0)
xtest.FakeInput(x.conn, xproto.ButtonRelease, b.x11Btn, 0, x.root, 0, 0, 0)
}
} else {
if pressed && !wasPressed {
xtest.FakeInput(x.conn, xproto.ButtonPress, b.x11Btn, 0, x.root, 0, 0, 0)
} else if !pressed && wasPressed {
xtest.FakeInput(x.conn, xproto.ButtonRelease, b.x11Btn, 0, x.root, 0, 0, 0)
}
}
}
x.lastButtons = buttonMask
}
// cacheKeyboardMapping fetches the X11 keyboard mapping once and stores it
// as a keysym-to-keycode map, avoiding a round-trip per keystroke.
func (x *X11InputInjector) cacheKeyboardMapping() {
setup := xproto.Setup(x.conn)
minKeycode := setup.MinKeycode
maxKeycode := setup.MaxKeycode
reply, err := xproto.GetKeyboardMapping(x.conn, minKeycode,
byte(maxKeycode-minKeycode+1)).Reply()
if err != nil {
log.Debugf("cache keyboard mapping: %v", err)
x.keysymMap = make(map[uint32]byte)
return
}
m := make(map[uint32]byte, int(maxKeycode-minKeycode+1)*int(reply.KeysymsPerKeycode))
keysymsPerKeycode := int(reply.KeysymsPerKeycode)
for i := int(minKeycode); i <= int(maxKeycode); i++ {
offset := (i - int(minKeycode)) * keysymsPerKeycode
for j := 0; j < keysymsPerKeycode; j++ {
ks := uint32(reply.Keysyms[offset+j])
if ks != 0 {
if _, exists := m[ks]; !exists {
m[ks] = byte(i)
}
}
}
}
x.keysymMap = m
}
// keysymToKeycode looks up a cached keysym-to-keycode mapping.
// Returns 0 if the keysym is not mapped.
func (x *X11InputInjector) keysymToKeycode(keysym uint32) byte {
return x.keysymMap[keysym]
}
// SetClipboard sets the X11 clipboard using xclip or xsel.
func (x *X11InputInjector) SetClipboard(text string) {
if x.clipboardTool == "" {
return
}
var cmd *exec.Cmd
if x.clipboardToolName == "xclip" {
cmd = exec.Command(x.clipboardTool, "-selection", "clipboard")
} else {
cmd = exec.Command(x.clipboardTool, "--clipboard", "--input")
}
cmd.Env = x.clipboardEnv()
cmd.Stdin = strings.NewReader(text)
if err := cmd.Run(); err != nil {
log.Debugf("set clipboard via %s: %v", x.clipboardToolName, err)
}
}
func (x *X11InputInjector) resolveClipboardTool() {
for _, name := range []string{"xclip", "xsel"} {
path, err := exec.LookPath(name)
if err == nil {
x.clipboardTool = path
x.clipboardToolName = name
log.Debugf("clipboard tool resolved to %s", path)
return
}
}
log.Debugf("no clipboard tool (xclip/xsel) found, clipboard sync disabled")
}
// GetClipboard reads the X11 clipboard using xclip or xsel.
func (x *X11InputInjector) GetClipboard() string {
if x.clipboardTool == "" {
return ""
}
var cmd *exec.Cmd
if x.clipboardToolName == "xclip" {
cmd = exec.Command(x.clipboardTool, "-selection", "clipboard", "-o")
} else {
cmd = exec.Command(x.clipboardTool, "--clipboard", "--output")
}
cmd.Env = x.clipboardEnv()
out, err := cmd.Output()
if err != nil {
log.Tracef("get clipboard via %s: %v", x.clipboardToolName, err)
return ""
}
return string(out)
}
func (x *X11InputInjector) clipboardEnv() []string {
env := []string{"DISPLAY=" + x.display}
if auth := os.Getenv("XAUTHORITY"); auth != "" {
env = append(env, "XAUTHORITY="+auth)
}
return env
}
// Close releases X11 resources.
func (x *X11InputInjector) Close() {
x.conn.Close()
}
var _ InputInjector = (*X11InputInjector)(nil)
var _ ScreenCapturer = (*X11Poller)(nil)