Compare commits

...

1 Commits

Author SHA1 Message Date
Viktor Liu
f49ec5db2e Recover from tun device read/write panics and restart the client 2026-06-12 15:41:54 +02:00
3 changed files with 123 additions and 10 deletions

View File

@@ -1,10 +1,13 @@
package device
import (
"fmt"
"net/netip"
"runtime/debug"
"sync"
"sync/atomic"
log "github.com/sirupsen/logrus"
"golang.zx2c4.com/wireguard/tun"
)
@@ -41,10 +44,13 @@ type PacketCapture interface {
type FilteredDevice struct {
tun.Device
filter PacketFilter
capture atomic.Pointer[PacketCapture]
mutex sync.RWMutex
closeOnce sync.Once
filter PacketFilter
capture atomic.Pointer[PacketCapture]
// panicHandler is invoked after a panic in the underlying device is
// recovered in Read or Write.
panicHandler atomic.Pointer[func()]
mutex sync.RWMutex
closeOnce sync.Once
}
// newDeviceFilter constructor function
@@ -70,7 +76,7 @@ func (d *FilteredDevice) Close() error {
// Read wraps read method with filtering feature
func (d *FilteredDevice) Read(bufs [][]byte, sizes []int, offset int) (n int, err error) {
if n, err = d.Device.Read(bufs, sizes, offset); err != nil {
if n, err = d.deviceRead(bufs, sizes, offset); err != nil {
return 0, err
}
@@ -112,7 +118,7 @@ func (d *FilteredDevice) Write(bufs [][]byte, offset int) (int, error) {
d.mutex.RUnlock()
if filter == nil {
return d.Device.Write(bufs, offset)
return d.deviceWrite(bufs, offset)
}
filteredBufs := make([][]byte, 0, len(bufs))
@@ -125,9 +131,44 @@ func (d *FilteredDevice) Write(bufs [][]byte, offset int) (int, error) {
}
}
n, err := d.Device.Write(filteredBufs, offset)
n += dropped
return n, err
n, err := d.deviceWrite(filteredBufs, offset)
if err != nil {
return n, err
}
return n + dropped, nil
}
// deviceRead calls the underlying device Read, recovering from panics in the
// wintun read path and converting them into errors.
func (d *FilteredDevice) deviceRead(bufs [][]byte, sizes []int, offset int) (n int, err error) {
defer d.recoverFromPanic("read", &n, &err)
return d.Device.Read(bufs, sizes, offset)
}
// deviceWrite calls the underlying device Write, recovering from panics in the
// wintun write path and converting them into errors.
func (d *FilteredDevice) deviceWrite(bufs [][]byte, offset int) (n int, err error) {
defer d.recoverFromPanic("write", &n, &err)
return d.Device.Write(bufs, offset)
}
// recoverFromPanic converts a panic in the underlying device into a regular
// error and invokes the registered panic handler. The wintun read path is
// known to panic on zero-length packets that third-party filter drivers can
// place in the ring.
func (d *FilteredDevice) recoverFromPanic(op string, n *int, err *error) {
r := recover()
if r == nil {
return
}
log.Errorf("recovered panic in tun device %s: %v\n%s", op, r, debug.Stack())
*n = 0
*err = fmt.Errorf("tun device %s panic: %v", op, r)
if handler := d.panicHandler.Load(); handler != nil {
(*handler)()
}
}
// SetFilter sets packet filter to device
@@ -137,6 +178,17 @@ func (d *FilteredDevice) SetFilter(filter PacketFilter) {
d.mutex.Unlock()
}
// SetPanicHandler registers a handler invoked after a recovered panic in Read
// or Write. The device is unusable after such a panic; the handler should
// trigger recreation of the interface. Pass nil to remove.
func (d *FilteredDevice) SetPanicHandler(handler func()) {
if handler == nil {
d.panicHandler.Store(nil)
return
}
d.panicHandler.Store(&handler)
}
// SetCapture sets or clears the packet capture sink. Pass nil to disable.
// Uses atomic store so the hot path (Read/Write) is a single pointer load
// with no locking overhead when capture is off.

View File

@@ -221,3 +221,60 @@ func TestDeviceWrapperRead(t *testing.T) {
}
})
}
func TestDeviceWrapperReadPanic(t *testing.T) {
ctrl := gomock.NewController(t)
defer ctrl.Finish()
tun := mocks.NewMockDevice(ctrl)
tun.EXPECT().Read(gomock.Any(), gomock.Any(), gomock.Any()).
DoAndReturn(func(bufs [][]byte, sizes []int, offset int) (int, error) {
// Reproduce the wintun zero-length packet panic (index out of range).
packet := make([]byte, 0)
return int(packet[0]), nil
})
wrapped := newDeviceFilter(tun)
handlerCalled := false
wrapped.SetPanicHandler(func() { handlerCalled = true })
n, err := wrapped.Read([][]byte{{}}, []int{0}, 0)
if err == nil {
t.Errorf("expected error from recovered panic, got nil")
}
if n != 0 {
t.Errorf("expected n=0, got %d", n)
}
if !handlerCalled {
t.Errorf("expected panic handler to be called")
}
}
func TestDeviceWrapperWritePanic(t *testing.T) {
ctrl := gomock.NewController(t)
defer ctrl.Finish()
tun := mocks.NewMockDevice(ctrl)
tun.EXPECT().Write(gomock.Any(), gomock.Any()).
DoAndReturn(func(bufs [][]byte, offset int) (int, error) {
packet := make([]byte, 0)
return int(packet[0]), nil
})
wrapped := newDeviceFilter(tun)
handlerCalled := false
wrapped.SetPanicHandler(func() { handlerCalled = true })
n, err := wrapped.Write([][]byte{{0x45, 0x00}}, 0)
if err == nil {
t.Errorf("expected error from recovered panic, got nil")
}
if n != 0 {
t.Errorf("expected n=0, got %d", n)
}
if !handlerCalled {
t.Errorf("expected panic handler to be called")
}
}

View File

@@ -240,7 +240,7 @@ type Engine struct {
syncStore syncstore.Store
syncStoreDir string
flowManager nftypes.FlowManager
flowManager nftypes.FlowManager
// auto-update
updateManager *updater.Manager
@@ -531,6 +531,10 @@ func (e *Engine) Start(netbirdConfig *mgmProto.NetbirdConfig, mgmtURL *url.URL)
return fmt.Errorf("create wg interface: %w", err)
}
if filteredDevice := e.wgInterface.GetDevice(); filteredDevice != nil {
filteredDevice.SetPanicHandler(e.triggerClientRestart)
}
if err := e.createFirewall(); err != nil {
e.close()
return err