//go:build (linux && !android) || freebsd package server import ( "fmt" "os" "os/exec" "strings" log "github.com/sirupsen/logrus" "github.com/jezek/xgb" "github.com/jezek/xgb/xproto" "github.com/jezek/xgb/xtest" ) // X11InputInjector injects keyboard and mouse events via the XTest extension. type X11InputInjector struct { conn *xgb.Conn root xproto.Window screen *xproto.ScreenInfo display string keysymMap map[uint32]byte lastButtons uint8 clipboardTool string clipboardToolName string } // NewX11InputInjector connects to the X11 display and initializes XTest. func NewX11InputInjector(display string) (*X11InputInjector, error) { detectX11Display() if display == "" { display = os.Getenv("DISPLAY") } if display == "" { return nil, fmt.Errorf("DISPLAY not set and no Xorg process found") } conn, err := xgb.NewConnDisplay(display) if err != nil { return nil, fmt.Errorf("connect to X11 display %s: %w", display, err) } if err := xtest.Init(conn); err != nil { conn.Close() return nil, fmt.Errorf("init XTest extension: %w", err) } setup := xproto.Setup(conn) if len(setup.Roots) == 0 { conn.Close() return nil, fmt.Errorf("no X11 screens") } screen := setup.Roots[0] inj := &X11InputInjector{ conn: conn, root: screen.Root, screen: &screen, display: display, } inj.cacheKeyboardMapping() inj.resolveClipboardTool() log.Infof("X11 input injector ready (display=%s)", display) return inj, nil } // InjectKey simulates a key press or release. keysym is an X11 KeySym. func (x *X11InputInjector) InjectKey(keysym uint32, down bool) { keycode := x.keysymToKeycode(keysym) if keycode == 0 { return } var eventType byte if down { eventType = xproto.KeyPress } else { eventType = xproto.KeyRelease } xtest.FakeInput(x.conn, eventType, keycode, 0, x.root, 0, 0, 0) } // InjectPointer simulates mouse movement and button events. func (x *X11InputInjector) InjectPointer(buttonMask uint8, px, py, serverW, serverH int) { if serverW == 0 || serverH == 0 { return } // Scale to actual screen coordinates. screenW := int(x.screen.WidthInPixels) screenH := int(x.screen.HeightInPixels) absX := px * screenW / serverW absY := py * screenH / serverH // Move pointer. xtest.FakeInput(x.conn, xproto.MotionNotify, 0, 0, x.root, int16(absX), int16(absY), 0) // Handle button events. RFB button mask: bit0=left, bit1=middle, bit2=right, // bit3=scrollUp, bit4=scrollDown. X11 buttons: 1=left, 2=middle, 3=right, // 4=scrollUp, 5=scrollDown. type btnMap struct { rfbBit uint8 x11Btn byte } buttons := [...]btnMap{ {0x01, 1}, // left {0x02, 2}, // middle {0x04, 3}, // right {0x08, 4}, // scroll up {0x10, 5}, // scroll down } for _, b := range buttons { pressed := buttonMask&b.rfbBit != 0 wasPressed := x.lastButtons&b.rfbBit != 0 if b.x11Btn >= 4 { // Scroll: send press+release on each scroll event. if pressed { xtest.FakeInput(x.conn, xproto.ButtonPress, b.x11Btn, 0, x.root, 0, 0, 0) xtest.FakeInput(x.conn, xproto.ButtonRelease, b.x11Btn, 0, x.root, 0, 0, 0) } } else { if pressed && !wasPressed { xtest.FakeInput(x.conn, xproto.ButtonPress, b.x11Btn, 0, x.root, 0, 0, 0) } else if !pressed && wasPressed { xtest.FakeInput(x.conn, xproto.ButtonRelease, b.x11Btn, 0, x.root, 0, 0, 0) } } } x.lastButtons = buttonMask } // cacheKeyboardMapping fetches the X11 keyboard mapping once and stores it // as a keysym-to-keycode map, avoiding a round-trip per keystroke. func (x *X11InputInjector) cacheKeyboardMapping() { setup := xproto.Setup(x.conn) minKeycode := setup.MinKeycode maxKeycode := setup.MaxKeycode reply, err := xproto.GetKeyboardMapping(x.conn, minKeycode, byte(maxKeycode-minKeycode+1)).Reply() if err != nil { log.Debugf("cache keyboard mapping: %v", err) x.keysymMap = make(map[uint32]byte) return } m := make(map[uint32]byte, int(maxKeycode-minKeycode+1)*int(reply.KeysymsPerKeycode)) keysymsPerKeycode := int(reply.KeysymsPerKeycode) for i := int(minKeycode); i <= int(maxKeycode); i++ { offset := (i - int(minKeycode)) * keysymsPerKeycode for j := 0; j < keysymsPerKeycode; j++ { ks := uint32(reply.Keysyms[offset+j]) if ks != 0 { if _, exists := m[ks]; !exists { m[ks] = byte(i) } } } } x.keysymMap = m } // keysymToKeycode looks up a cached keysym-to-keycode mapping. // Returns 0 if the keysym is not mapped. func (x *X11InputInjector) keysymToKeycode(keysym uint32) byte { return x.keysymMap[keysym] } // SetClipboard sets the X11 clipboard using xclip or xsel. func (x *X11InputInjector) SetClipboard(text string) { if x.clipboardTool == "" { return } var cmd *exec.Cmd if x.clipboardToolName == "xclip" { cmd = exec.Command(x.clipboardTool, "-selection", "clipboard") } else { cmd = exec.Command(x.clipboardTool, "--clipboard", "--input") } cmd.Env = x.clipboardEnv() cmd.Stdin = strings.NewReader(text) if err := cmd.Run(); err != nil { log.Debugf("set clipboard via %s: %v", x.clipboardToolName, err) } } // TypeText synthesizes the given text as keystrokes via XTest. We can // no longer just stuff the host clipboard with xclip and expect Ctrl+V // to do the rest, because the Paste button is also used at places where // the focused application isn't a clipboard-aware one (e.g. a TTY login // in an X11 session, an SDDM/GDM password field that ignores XSelection, // or a kiosk app). Typing keystrokes covers all of those. // // Limitation: only ASCII printable characters are typed. Non-ASCII runes // are skipped: a paste workflow for them needs Wayland-aware text input // or layout introspection that we don't have. func (x *X11InputInjector) TypeText(text string) { const maxChars = 4096 count := 0 for _, r := range text { if count >= maxChars { break } count++ keysym, shift, ok := keysymForASCIIRune(r) if !ok { continue } keycode := x.keysymToKeycode(keysym) if keycode == 0 { continue } var shiftCode byte if shift { shiftCode = x.keysymToKeycode(0xffe1) // Shift_L if shiftCode != 0 { xtest.FakeInput(x.conn, xproto.KeyPress, shiftCode, 0, x.root, 0, 0, 0) } } xtest.FakeInput(x.conn, xproto.KeyPress, keycode, 0, x.root, 0, 0, 0) xtest.FakeInput(x.conn, xproto.KeyRelease, keycode, 0, x.root, 0, 0, 0) if shift && shiftCode != 0 { xtest.FakeInput(x.conn, xproto.KeyRelease, shiftCode, 0, x.root, 0, 0, 0) } } } func (x *X11InputInjector) resolveClipboardTool() { for _, name := range []string{"xclip", "xsel"} { path, err := exec.LookPath(name) if err == nil { x.clipboardTool = path x.clipboardToolName = name log.Debugf("clipboard tool resolved to %s", path) return } } log.Debugf("no clipboard tool (xclip/xsel) found, clipboard sync disabled") } // GetClipboard reads the X11 clipboard using xclip or xsel. func (x *X11InputInjector) GetClipboard() string { if x.clipboardTool == "" { return "" } var cmd *exec.Cmd if x.clipboardToolName == "xclip" { cmd = exec.Command(x.clipboardTool, "-selection", "clipboard", "-o") } else { cmd = exec.Command(x.clipboardTool, "--clipboard", "--output") } cmd.Env = x.clipboardEnv() out, err := cmd.Output() if err != nil { // Exit status 1 just means there is no STRING selection set yet, // which is the steady state on a fresh Xvfb session — logging it // every clipboard poll (2s) floods the trace stream. return "" } return string(out) } func (x *X11InputInjector) clipboardEnv() []string { env := []string{"DISPLAY=" + x.display} if auth := os.Getenv("XAUTHORITY"); auth != "" { env = append(env, "XAUTHORITY="+auth) } return env } // Close releases X11 resources. func (x *X11InputInjector) Close() { x.conn.Close() } var _ InputInjector = (*X11InputInjector)(nil) var _ ScreenCapturer = (*X11Poller)(nil)