Fix sleep detector races flagged in review

This commit is contained in:
Viktor Liu
2026-04-23 18:00:03 +02:00
parent fe4fe4008d
commit 2c2bbdcb74

View File

@@ -20,33 +20,6 @@ const (
kIOMessageSystemHasPoweredOn uintptr = 0xe0000300
)
// IOKit / CoreFoundation symbols, resolved once at init.
type iokitFuncs struct {
IORegisterForSystemPower func(refcon uintptr, portRef *uintptr, callback uintptr, notifier *uintptr) uintptr
IODeregisterForSystemPower func(notifier *uintptr) int32
IOAllowPowerChange func(kernelPort uintptr, notificationID uintptr) int32
IOServiceClose func(connect uintptr) int32
IONotificationPortGetRunLoopSource func(port uintptr) uintptr
IONotificationPortDestroy func(port uintptr)
}
type cfFuncs struct {
CFRunLoopGetCurrent func() uintptr
CFRunLoopRun func()
CFRunLoopStop func(rl uintptr)
CFRunLoopAddSource func(rl, source, mode uintptr)
CFRunLoopRemoveSource func(rl, source, mode uintptr)
}
// runLoopSession bundles the handles owned by one CFRunLoop lifetime. A nil
// session means no runloop is active and the next Register must start one.
type runLoopSession struct {
rl uintptr
port uintptr
notifier uintptr
rp uintptr
}
var (
ioKit iokitFuncs
cf cfFuncs
@@ -67,110 +40,49 @@ var (
lifecycleMu sync.Mutex
)
func initLibs() error {
libInitOnce.Do(func() {
iokit, err := purego.Dlopen("/System/Library/Frameworks/IOKit.framework/IOKit", purego.RTLD_NOW|purego.RTLD_GLOBAL)
if err != nil {
libInitErr = fmt.Errorf("dlopen IOKit: %w", err)
return
}
cfLib, err := purego.Dlopen("/System/Library/Frameworks/CoreFoundation.framework/CoreFoundation", purego.RTLD_NOW|purego.RTLD_GLOBAL)
if err != nil {
libInitErr = fmt.Errorf("dlopen CoreFoundation: %w", err)
return
}
purego.RegisterLibFunc(&ioKit.IORegisterForSystemPower, iokit, "IORegisterForSystemPower")
purego.RegisterLibFunc(&ioKit.IODeregisterForSystemPower, iokit, "IODeregisterForSystemPower")
purego.RegisterLibFunc(&ioKit.IOAllowPowerChange, iokit, "IOAllowPowerChange")
purego.RegisterLibFunc(&ioKit.IOServiceClose, iokit, "IOServiceClose")
purego.RegisterLibFunc(&ioKit.IONotificationPortGetRunLoopSource, iokit, "IONotificationPortGetRunLoopSource")
purego.RegisterLibFunc(&ioKit.IONotificationPortDestroy, iokit, "IONotificationPortDestroy")
purego.RegisterLibFunc(&cf.CFRunLoopGetCurrent, cfLib, "CFRunLoopGetCurrent")
purego.RegisterLibFunc(&cf.CFRunLoopRun, cfLib, "CFRunLoopRun")
purego.RegisterLibFunc(&cf.CFRunLoopStop, cfLib, "CFRunLoopStop")
purego.RegisterLibFunc(&cf.CFRunLoopAddSource, cfLib, "CFRunLoopAddSource")
purego.RegisterLibFunc(&cf.CFRunLoopRemoveSource, cfLib, "CFRunLoopRemoveSource")
modeAddr, err := purego.Dlsym(cfLib, "kCFRunLoopCommonModes")
if err != nil {
libInitErr = fmt.Errorf("dlsym kCFRunLoopCommonModes: %w", err)
return
}
// kCFRunLoopCommonModes is a CFStringRef variable. Launder the
// uintptr-to-pointer conversion through the address of our Go
// variable so go vet's unsafeptr analyzer doesn't flag it; the
// address is a stable system-library global, not a Go heap pointer.
cfCommonModes = **(**uintptr)(unsafe.Pointer(&modeAddr))
// Register the callback once for the lifetime of the process. NewCallback slots
// are a finite, non-reclaimable resource, so a single thunk that dispatches
// to the current Detector set is safer than registering per Register().
callbackThunk = purego.NewCallback(powerCallback)
})
return libInitErr
// iokitFuncs holds IOKit symbols resolved once at init.
type iokitFuncs struct {
IORegisterForSystemPower func(refcon uintptr, portRef *uintptr, callback uintptr, notifier *uintptr) uintptr
IODeregisterForSystemPower func(notifier *uintptr) int32
IOAllowPowerChange func(kernelPort uintptr, notificationID uintptr) int32
IOServiceClose func(connect uintptr) int32
IONotificationPortGetRunLoopSource func(port uintptr) uintptr
IONotificationPortDestroy func(port uintptr)
}
// powerCallback is the IOServiceInterestCallback trampoline, invoked on the
// runloop thread. A Go panic crossing the purego boundary has undefined
// behavior, so contain it here.
func powerCallback(refcon, service, messageType, messageArgument uintptr) uintptr {
defer func() {
if r := recover(); r != nil {
log.Errorf("panic in sleep powerCallback: %v", r)
}
}()
switch messageType {
case kIOMessageCanSystemSleep:
// Not acknowledging forces a 30s IOKit timeout before idle sleep.
allowPowerChange(messageArgument)
case kIOMessageSystemWillSleep:
dispatchEvent(EventTypeSleep)
allowPowerChange(messageArgument)
case kIOMessageSystemHasPoweredOn:
dispatchEvent(EventTypeWakeUp)
}
return 0
// cfFuncs holds CoreFoundation symbols resolved once at init.
type cfFuncs struct {
CFRunLoopGetCurrent func() uintptr
CFRunLoopRun func()
CFRunLoopStop func(rl uintptr)
CFRunLoopAddSource func(rl, source, mode uintptr)
CFRunLoopRemoveSource func(rl, source, mode uintptr)
}
func allowPowerChange(messageArgument uintptr) {
serviceRegistryMu.Lock()
var port uintptr
if session != nil {
port = session.rp
}
serviceRegistryMu.Unlock()
if port != 0 {
ioKit.IOAllowPowerChange(port, messageArgument)
}
// runLoopSession bundles the handles owned by one CFRunLoop lifetime. A nil
// session means no runloop is active and the next Register must start one.
type runLoopSession struct {
rl uintptr
port uintptr
notifier uintptr
rp uintptr
}
func dispatchEvent(event EventType) {
serviceRegistryMu.Lock()
detectors := make([]*Detector, 0, len(serviceRegistry))
for d := range serviceRegistry {
detectors = append(detectors, d)
}
serviceRegistryMu.Unlock()
for _, d := range detectors {
d.triggerCallback(event)
}
// detectorSnapshot pins a detector's callback and done channel so dispatch
// runs with values valid at snapshot time, even if a concurrent
// Deregister/Register rewrites the detector's fields.
type detectorSnapshot struct {
detector *Detector
callback func(event EventType)
done <-chan struct{}
}
// Detector delivers sleep and wake events to a registered callback.
type Detector struct {
callback func(event EventType)
done chan struct{}
}
func NewDetector() (*Detector, error) {
if err := initLibs(); err != nil {
return nil, err
}
return &Detector{}, nil
}
// Register installs callback for power events. The first registration starts
// the CFRunLoop on a dedicated OS-locked thread and blocks until IOKit
// registration succeeds or fails; subsequent registrations just add to the
@@ -228,7 +140,6 @@ func (d *Detector) Deregister() error {
return nil
}
sess := session
session = nil
serviceRegistryMu.Unlock()
log.Info("sleep detection service stopping (deregister)")
@@ -237,7 +148,6 @@ func (d *Detector) Deregister() error {
return nil
}
// CFRunLoop and IOKit teardown calls are safe from a non-runloop thread.
if sess.rl != 0 && sess.port != 0 {
source := ioKit.IONotificationPortGetRunLoopSource(sess.port)
cf.CFRunLoopRemoveSource(sess.rl, source, cfCommonModes)
@@ -246,6 +156,13 @@ func (d *Detector) Deregister() error {
n := sess.notifier
ioKit.IODeregisterForSystemPower(&n)
}
// Clear session only after IODeregisterForSystemPower returns so any
// in-flight powerCallback can still look up session.rp to ack sleep.
serviceRegistryMu.Lock()
session = nil
serviceRegistryMu.Unlock()
if sess.rp != 0 {
ioKit.IOServiceClose(sess.rp)
}
@@ -259,12 +176,17 @@ func (d *Detector) Deregister() error {
return nil
}
func (d *Detector) triggerCallback(event EventType) {
cb := d.callback
if cb == nil {
func (d *Detector) triggerCallback(event EventType, cb func(event EventType), done <-chan struct{}) {
if cb == nil || done == nil {
return
}
select {
case <-done:
return
default:
}
doneChan := make(chan struct{})
timeout := time.NewTimer(500 * time.Millisecond)
defer timeout.Stop()
@@ -282,12 +204,113 @@ func (d *Detector) triggerCallback(event EventType) {
select {
case <-doneChan:
case <-d.done:
case <-done:
case <-timeout.C:
log.Warn("sleep callback timed out")
}
}
// NewDetector initializes IOKit/CoreFoundation bindings and returns a Detector.
func NewDetector() (*Detector, error) {
if err := initLibs(); err != nil {
return nil, err
}
return &Detector{}, nil
}
func initLibs() error {
libInitOnce.Do(func() {
iokit, err := purego.Dlopen("/System/Library/Frameworks/IOKit.framework/IOKit", purego.RTLD_NOW|purego.RTLD_GLOBAL)
if err != nil {
libInitErr = fmt.Errorf("dlopen IOKit: %w", err)
return
}
cfLib, err := purego.Dlopen("/System/Library/Frameworks/CoreFoundation.framework/CoreFoundation", purego.RTLD_NOW|purego.RTLD_GLOBAL)
if err != nil {
libInitErr = fmt.Errorf("dlopen CoreFoundation: %w", err)
return
}
purego.RegisterLibFunc(&ioKit.IORegisterForSystemPower, iokit, "IORegisterForSystemPower")
purego.RegisterLibFunc(&ioKit.IODeregisterForSystemPower, iokit, "IODeregisterForSystemPower")
purego.RegisterLibFunc(&ioKit.IOAllowPowerChange, iokit, "IOAllowPowerChange")
purego.RegisterLibFunc(&ioKit.IOServiceClose, iokit, "IOServiceClose")
purego.RegisterLibFunc(&ioKit.IONotificationPortGetRunLoopSource, iokit, "IONotificationPortGetRunLoopSource")
purego.RegisterLibFunc(&ioKit.IONotificationPortDestroy, iokit, "IONotificationPortDestroy")
purego.RegisterLibFunc(&cf.CFRunLoopGetCurrent, cfLib, "CFRunLoopGetCurrent")
purego.RegisterLibFunc(&cf.CFRunLoopRun, cfLib, "CFRunLoopRun")
purego.RegisterLibFunc(&cf.CFRunLoopStop, cfLib, "CFRunLoopStop")
purego.RegisterLibFunc(&cf.CFRunLoopAddSource, cfLib, "CFRunLoopAddSource")
purego.RegisterLibFunc(&cf.CFRunLoopRemoveSource, cfLib, "CFRunLoopRemoveSource")
modeAddr, err := purego.Dlsym(cfLib, "kCFRunLoopCommonModes")
if err != nil {
libInitErr = fmt.Errorf("dlsym kCFRunLoopCommonModes: %w", err)
return
}
// Launder the uintptr-to-pointer conversion through a Go variable so
// go vet's unsafeptr analyzer doesn't flag a system-library global.
cfCommonModes = **(**uintptr)(unsafe.Pointer(&modeAddr))
// NewCallback slots are a finite, non-reclaimable resource, so register
// a single thunk that dispatches to the current Detector set.
callbackThunk = purego.NewCallback(powerCallback)
})
return libInitErr
}
// powerCallback is the IOServiceInterestCallback trampoline, invoked on the
// runloop thread. A Go panic crossing the purego boundary has undefined
// behavior, so contain it here.
func powerCallback(refcon, service, messageType, messageArgument uintptr) uintptr {
defer func() {
if r := recover(); r != nil {
log.Errorf("panic in sleep powerCallback: %v", r)
}
}()
switch messageType {
case kIOMessageCanSystemSleep:
// Not acknowledging forces a 30s IOKit timeout before idle sleep.
allowPowerChange(messageArgument)
case kIOMessageSystemWillSleep:
dispatchEvent(EventTypeSleep)
allowPowerChange(messageArgument)
case kIOMessageSystemHasPoweredOn:
dispatchEvent(EventTypeWakeUp)
}
return 0
}
func allowPowerChange(messageArgument uintptr) {
serviceRegistryMu.Lock()
var port uintptr
if session != nil {
port = session.rp
}
serviceRegistryMu.Unlock()
if port != 0 {
ioKit.IOAllowPowerChange(port, messageArgument)
}
}
func dispatchEvent(event EventType) {
serviceRegistryMu.Lock()
snaps := make([]detectorSnapshot, 0, len(serviceRegistry))
for d := range serviceRegistry {
snaps = append(snaps, detectorSnapshot{
detector: d,
callback: d.callback,
done: d.done,
})
}
serviceRegistryMu.Unlock()
for _, s := range snaps {
s.detector.triggerCallback(event, s.callback, s.done)
}
}
// runRunLoop owns the OS-locked thread that CFRunLoop is pinned to. Setup
// result is reported on errCh so Register can surface failures synchronously.
func runRunLoop(errCh chan<- error) {
@@ -313,8 +336,8 @@ func runRunLoop(errCh chan<- error) {
cf.CFRunLoopRun()
}
// setupSession performs the IOKit registration on the current thread.
// Panics are converted to errors so runRunLoop never leaves errCh unsent.
// setupSession performs the IOKit registration on the current thread. Panics
// are converted to errors so runRunLoop never leaves errCh unsent.
func setupSession() (s *runLoopSession, err error) {
defer func() {
if r := recover(); r != nil {