mirror of
https://github.com/netbirdio/netbird.git
synced 2026-05-18 22:59:57 +00:00
Merge branch 'main' into proto-ipv6-overlay
This commit is contained in:
@@ -5,6 +5,7 @@ import (
|
||||
"fmt"
|
||||
"net"
|
||||
"net/netip"
|
||||
"runtime"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
@@ -177,7 +178,12 @@ func getDefaultGateway() (gateway net.IP, localIP net.IP, err error) {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
_, gateway, localIP, err = router.Route(net.IPv4zero)
|
||||
dst := net.IPv4zero
|
||||
if runtime.GOOS == "linux" {
|
||||
// go-netroute v0.4.0 rejects unspecified destinations client-side on Linux.
|
||||
dst = net.IPv4(0, 0, 0, 1)
|
||||
}
|
||||
_, gateway, localIP, err = router.Route(dst)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
@@ -196,7 +202,12 @@ func getDefaultGateway6() (gateway net.IP, localIP net.IP, err error) {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
_, gateway, localIP, err = router.Route(net.IPv6zero)
|
||||
dst := net.IPv6zero
|
||||
if runtime.GOOS == "linux" {
|
||||
// ::2
|
||||
dst = net.IP{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2}
|
||||
}
|
||||
_, gateway, localIP, err = router.Route(dst)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
@@ -344,6 +344,22 @@ func GetNextHop(ip netip.Addr) (Nexthop, error) {
|
||||
if err != nil {
|
||||
return Nexthop{}, fmt.Errorf("new netroute: %w", err)
|
||||
}
|
||||
|
||||
// go-netroute v0.4.0 rejects unspecified destinations on Linux with a hard
|
||||
// client-side check. Substitute the lowest non-loopback address so the
|
||||
// lookup falls through to the default route (::1 / 127.0.0.1 would match
|
||||
// loopback, ::/0.0.0.0 are unspec). BSD/Windows pass the query straight to
|
||||
// the kernel and need no substitution.
|
||||
if runtime.GOOS == "linux" && ip.IsUnspecified() {
|
||||
if ip.Is6() {
|
||||
// ::2
|
||||
ip = netip.AddrFrom16([16]byte{15: 2})
|
||||
} else {
|
||||
// 0.0.0.1
|
||||
ip = netip.AddrFrom4([4]byte{0, 0, 0, 1})
|
||||
}
|
||||
}
|
||||
|
||||
intf, gateway, preferredSrc, err := r.Route(ip.AsSlice())
|
||||
if err != nil {
|
||||
log.Debugf("Failed to get route for %s: %v", ip, err)
|
||||
|
||||
@@ -355,9 +355,13 @@ func TestAddRouteToNonVPNIntf(t *testing.T) {
|
||||
require.NoError(t, err, "Should be able to get IPv4 default route")
|
||||
t.Logf("Initial IPv4 next hop: %s", initialNextHopV4)
|
||||
|
||||
if testCase.prefix.Addr().Is6() && !testCase.expectError {
|
||||
ensureIPv6DefaultRoute(t)
|
||||
}
|
||||
|
||||
initialNextHopV6, err := GetNextHop(netip.IPv6Unspecified())
|
||||
if testCase.prefix.Addr().Is6() &&
|
||||
(errors.Is(err, vars.ErrRouteNotFound) || initialNextHopV6.Intf != nil && strings.HasPrefix(initialNextHopV6.Intf.Name, "utun")) {
|
||||
initialNextHopV6.Intf != nil && strings.HasPrefix(initialNextHopV6.Intf.Name, "utun") {
|
||||
t.Skip("Skipping test as no ipv6 default route is available")
|
||||
}
|
||||
if err != nil && !errors.Is(err, vars.ErrRouteNotFound) {
|
||||
|
||||
30
client/internal/routemanager/systemops/v6route_bsd_test.go
Normal file
30
client/internal/routemanager/systemops/v6route_bsd_test.go
Normal file
@@ -0,0 +1,30 @@
|
||||
//go:build darwin || dragonfly || freebsd || netbsd || openbsd
|
||||
|
||||
package systemops
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"os/exec"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// ensureIPv6DefaultRoute installs an IPv6 default route via the loopback
|
||||
// interface so route lookups for global IPv6 prefixes resolve in environments
|
||||
// without v6 connectivity. If a default already exists it is left alone.
|
||||
func ensureIPv6DefaultRoute(t *testing.T) {
|
||||
t.Helper()
|
||||
|
||||
out, err := exec.Command("route", "-6", "add", "default", "-iface", "lo0").CombinedOutput()
|
||||
if err != nil {
|
||||
// Existing default; nothing to install or clean up.
|
||||
if bytes.Contains(out, []byte("route already in table")) {
|
||||
return
|
||||
}
|
||||
t.Skipf("install IPv6 fallback default route: %v: %s", err, out)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if out, err := exec.Command("route", "-6", "delete", "default").CombinedOutput(); err != nil {
|
||||
t.Logf("delete IPv6 fallback default route: %v: %s", err, out)
|
||||
}
|
||||
})
|
||||
}
|
||||
41
client/internal/routemanager/systemops/v6route_linux_test.go
Normal file
41
client/internal/routemanager/systemops/v6route_linux_test.go
Normal file
@@ -0,0 +1,41 @@
|
||||
//go:build linux && !android
|
||||
|
||||
package systemops
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"net"
|
||||
"syscall"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
"github.com/vishvananda/netlink"
|
||||
)
|
||||
|
||||
// ensureIPv6DefaultRoute installs a low-preference IPv6 default route via the
|
||||
// loopback interface so route lookups for global IPv6 prefixes resolve in
|
||||
// environments without v6 connectivity. Any pre-existing default route wins
|
||||
// because of its lower metric.
|
||||
func ensureIPv6DefaultRoute(t *testing.T) {
|
||||
t.Helper()
|
||||
|
||||
lo, err := netlink.LinkByName("lo")
|
||||
require.NoError(t, err, "find loopback interface")
|
||||
|
||||
route := &netlink.Route{
|
||||
Dst: &net.IPNet{IP: net.IPv6zero, Mask: net.CIDRMask(0, 128)},
|
||||
LinkIndex: lo.Attrs().Index,
|
||||
Priority: 1 << 20,
|
||||
}
|
||||
if err := netlink.RouteAdd(route); err != nil {
|
||||
if errors.Is(err, syscall.EEXIST) {
|
||||
return
|
||||
}
|
||||
t.Skipf("install IPv6 fallback default route: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
if err := netlink.RouteDel(route); err != nil && !errors.Is(err, syscall.ESRCH) {
|
||||
t.Logf("delete IPv6 fallback default route: %v", err)
|
||||
}
|
||||
})
|
||||
}
|
||||
@@ -0,0 +1,34 @@
|
||||
//go:build windows
|
||||
|
||||
package systemops
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"os/exec"
|
||||
"testing"
|
||||
)
|
||||
|
||||
const loopbackIfaceWindows = "Loopback Pseudo-Interface 1"
|
||||
|
||||
// ensureIPv6DefaultRoute installs an IPv6 default route via the loopback
|
||||
// interface so route lookups for global IPv6 prefixes resolve in environments
|
||||
// without v6 connectivity. If a default already exists it is left alone.
|
||||
func ensureIPv6DefaultRoute(t *testing.T) {
|
||||
t.Helper()
|
||||
|
||||
script := `New-NetRoute -DestinationPrefix "::/0" -InterfaceAlias "` + loopbackIfaceWindows + `" -RouteMetric 9999 -PolicyStore ActiveStore -ErrorAction Stop`
|
||||
out, err := exec.Command("powershell", "-Command", script).CombinedOutput()
|
||||
if err != nil {
|
||||
// Existing default; nothing to install or clean up.
|
||||
if bytes.Contains(out, []byte("already exists")) {
|
||||
return
|
||||
}
|
||||
t.Skipf("install IPv6 fallback default route: %v: %s", err, out)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
script := `Remove-NetRoute -DestinationPrefix "::/0" -InterfaceAlias "` + loopbackIfaceWindows + `" -Confirm:$false -ErrorAction Stop`
|
||||
if out, err := exec.Command("powershell", "-Command", script).CombinedOutput(); err != nil {
|
||||
t.Logf("delete IPv6 fallback default route: %v: %s", err, out)
|
||||
}
|
||||
})
|
||||
}
|
||||
@@ -6,7 +6,6 @@ import (
|
||||
"fmt"
|
||||
"net"
|
||||
"runtime"
|
||||
"time"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
|
||||
@@ -28,6 +27,10 @@ func NewWGIfaceMonitor() *WGIfaceMonitor {
|
||||
|
||||
// Start begins monitoring the WireGuard interface.
|
||||
// It relies on the provided context cancellation to stop.
|
||||
//
|
||||
// On Linux the watcher is event-driven (RTNLGRP_LINK netlink subscription)
|
||||
// to avoid the allocation churn of repeatedly dumping the kernel link
|
||||
// table; on other platforms it falls back to a low-frequency poll.
|
||||
func (m *WGIfaceMonitor) Start(ctx context.Context, ifaceName string) (shouldRestart bool, err error) {
|
||||
defer close(m.done)
|
||||
|
||||
@@ -56,31 +59,7 @@ func (m *WGIfaceMonitor) Start(ctx context.Context, ifaceName string) (shouldRes
|
||||
|
||||
log.Infof("Interface monitor: watching %s (index: %d)", ifaceName, expectedIndex)
|
||||
|
||||
ticker := time.NewTicker(2 * time.Second)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
log.Infof("Interface monitor: stopped for %s", ifaceName)
|
||||
return false, fmt.Errorf("wg interface monitor stopped: %v", ctx.Err())
|
||||
case <-ticker.C:
|
||||
currentIndex, err := getInterfaceIndex(ifaceName)
|
||||
if err != nil {
|
||||
// Interface was deleted
|
||||
log.Infof("Interface monitor: %s deleted", ifaceName)
|
||||
return true, fmt.Errorf("interface %s deleted: %w", ifaceName, err)
|
||||
}
|
||||
|
||||
// Check if interface index changed (interface was recreated)
|
||||
if currentIndex != expectedIndex {
|
||||
log.Infof("Interface monitor: %s recreated (index changed from %d to %d), restarting engine",
|
||||
ifaceName, expectedIndex, currentIndex)
|
||||
return true, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return watchInterface(ctx, ifaceName, expectedIndex)
|
||||
}
|
||||
|
||||
// getInterfaceIndex returns the index of a network interface by name.
|
||||
|
||||
134
client/internal/wg_iface_monitor_linux.go
Normal file
134
client/internal/wg_iface_monitor_linux.go
Normal file
@@ -0,0 +1,134 @@
|
||||
//go:build linux
|
||||
|
||||
package internal
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"syscall"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
"github.com/vishvananda/netlink"
|
||||
)
|
||||
|
||||
// watchInterface uses an RTNLGRP_LINK netlink subscription to detect
|
||||
// deletion or recreation of the WireGuard interface.
|
||||
//
|
||||
// The previous implementation polled net.InterfaceByName every 2 s, which
|
||||
// on Linux issues syscall.NetlinkRIB(RTM_GETLINK, ...) and dumps the
|
||||
// entire kernel link table on every call. On hosts with many veth
|
||||
// interfaces (containers, bridges) the resulting allocation churn was on
|
||||
// the order of ~1 GB/day from this single ticker, which on small ARM
|
||||
// hosts manifested as a slow RSS climb (see netbirdio/netbird#3678).
|
||||
//
|
||||
// The event-driven version below allocates only when the kernel actually
|
||||
// publishes a link event for the tracked interface — typically zero
|
||||
// allocations between events.
|
||||
func watchInterface(ctx context.Context, ifaceName string, expectedIndex int) (bool, error) {
|
||||
done := make(chan struct{})
|
||||
defer close(done)
|
||||
|
||||
// Buffer the channel to absorb event bursts (e.g. when many veth
|
||||
// pairs are created/destroyed at once by container runtimes).
|
||||
linkChan := make(chan netlink.LinkUpdate, 32)
|
||||
if err := netlink.LinkSubscribe(linkChan, done); err != nil {
|
||||
// Return shouldRestart=true so the engine recovers monitoring
|
||||
// via triggerClientRestart instead of silently losing it for
|
||||
// the rest of the process lifetime.
|
||||
return true, fmt.Errorf("subscribe to link updates: %w", err)
|
||||
}
|
||||
|
||||
// Race window: the interface could have been deleted (or recreated)
|
||||
// between the initial getInterfaceIndex() in Start and LinkSubscribe
|
||||
// completing its handshake with the kernel. Re-check explicitly so we
|
||||
// do not block forever waiting for an event that already fired.
|
||||
if currentIndex, err := getInterfaceIndex(ifaceName); err != nil {
|
||||
log.Infof("Interface monitor: %s deleted before subscription completed", ifaceName)
|
||||
return true, fmt.Errorf("interface %s deleted: %w", ifaceName, err)
|
||||
} else if currentIndex != expectedIndex {
|
||||
log.Infof("Interface monitor: %s recreated (index changed from %d to %d) before subscription completed",
|
||||
ifaceName, expectedIndex, currentIndex)
|
||||
return true, nil
|
||||
}
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
log.Infof("Interface monitor: stopped for %s", ifaceName)
|
||||
return false, fmt.Errorf("wg interface monitor stopped: %w", ctx.Err())
|
||||
|
||||
case update, ok := <-linkChan:
|
||||
if !ok {
|
||||
// The vishvananda/netlink subscription goroutine closes
|
||||
// the channel on receive errors. Signal the engine to
|
||||
// restart so monitoring is re-established instead of
|
||||
// silently ending.
|
||||
log.Warnf("Interface monitor: link subscription channel closed unexpectedly for %s", ifaceName)
|
||||
return true, fmt.Errorf("link subscription channel closed unexpectedly")
|
||||
}
|
||||
if restart, err := inspectLinkEvent(update, ifaceName, expectedIndex); restart {
|
||||
return true, err
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// inspectLinkEvent classifies a single netlink link update against the
|
||||
// tracked WireGuard interface. It returns (true, err) when the engine
|
||||
// should restart monitoring; (false, nil) means the event is unrelated
|
||||
// and the caller should keep waiting.
|
||||
//
|
||||
// The error component, when non-nil, describes the kernel-side reason
|
||||
// (deletion or rename); the recreation case returns (true, nil) since
|
||||
// no error condition is reported.
|
||||
func inspectLinkEvent(update netlink.LinkUpdate, ifaceName string, expectedIndex int) (bool, error) {
|
||||
eventIndex := int(update.Index)
|
||||
eventName := ""
|
||||
if attrs := update.Attrs(); attrs != nil {
|
||||
eventName = attrs.Name
|
||||
}
|
||||
|
||||
switch update.Header.Type {
|
||||
case syscall.RTM_DELLINK:
|
||||
return inspectDelLink(eventIndex, ifaceName, expectedIndex)
|
||||
case syscall.RTM_NEWLINK:
|
||||
return inspectNewLink(eventIndex, eventName, ifaceName, expectedIndex)
|
||||
}
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// inspectDelLink reports a restart when an RTM_DELLINK arrives for the
|
||||
// tracked interface index.
|
||||
func inspectDelLink(eventIndex int, ifaceName string, expectedIndex int) (bool, error) {
|
||||
if eventIndex != expectedIndex {
|
||||
return false, nil
|
||||
}
|
||||
log.Infof("Interface monitor: %s deleted", ifaceName)
|
||||
return true, fmt.Errorf("interface %s deleted", ifaceName)
|
||||
}
|
||||
|
||||
// inspectNewLink reports a restart when an RTM_NEWLINK either:
|
||||
//
|
||||
// 1. Introduces a link with our name at a different index (recreation
|
||||
// after a delete), or
|
||||
//
|
||||
// 2. Reports a link still at our index but with a different name
|
||||
// (in-place rename). The previous polling implementation caught
|
||||
// this implicitly because net.InterfaceByName(ifaceName) would
|
||||
// start failing; the event-driven version has to test it.
|
||||
//
|
||||
// Same name + same index is just a flag/state change on the existing
|
||||
// interface and is ignored.
|
||||
func inspectNewLink(eventIndex int, eventName, ifaceName string, expectedIndex int) (bool, error) {
|
||||
if eventName == ifaceName && eventIndex != expectedIndex {
|
||||
log.Infof("Interface monitor: %s recreated (index changed from %d to %d), restarting engine",
|
||||
ifaceName, expectedIndex, eventIndex)
|
||||
return true, nil
|
||||
}
|
||||
if eventIndex == expectedIndex && eventName != "" && eventName != ifaceName {
|
||||
log.Infof("Interface monitor: %s renamed to %s (index %d), restarting engine",
|
||||
ifaceName, eventName, expectedIndex)
|
||||
return true, fmt.Errorf("interface %s renamed to %s", ifaceName, eventName)
|
||||
}
|
||||
return false, nil
|
||||
}
|
||||
56
client/internal/wg_iface_monitor_other.go
Normal file
56
client/internal/wg_iface_monitor_other.go
Normal file
@@ -0,0 +1,56 @@
|
||||
//go:build !linux
|
||||
|
||||
package internal
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// watchInterface polls net.InterfaceByName at a fixed interval to detect
|
||||
// deletion or recreation of the WireGuard interface.
|
||||
//
|
||||
// This is the fallback used on non-Linux desktop and server platforms
|
||||
// (darwin, windows, freebsd). It is also compiled on android and ios so
|
||||
// the package builds on every supported GOOS, but it is never reached
|
||||
// at runtime there because Start() in wg_iface_monitor.go exits early
|
||||
// on mobile platforms.
|
||||
//
|
||||
// The Linux build (see wg_iface_monitor_linux.go) uses an event-driven
|
||||
// RTNLGRP_LINK netlink subscription instead, because on Linux
|
||||
// net.InterfaceByName issues syscall.NetlinkRIB(RTM_GETLINK, ...) which
|
||||
// dumps the entire kernel link table on every call and produces
|
||||
// significant allocation churn (netbirdio/netbird#3678).
|
||||
//
|
||||
// Windows is also reported in #3678 as affected by RSS climb. A future
|
||||
// follow-up could implement an event-driven watcher there using
|
||||
// NotifyIpInterfaceChange from iphlpapi.
|
||||
func watchInterface(ctx context.Context, ifaceName string, expectedIndex int) (bool, error) {
|
||||
ticker := time.NewTicker(2 * time.Second)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
log.Infof("Interface monitor: stopped for %s", ifaceName)
|
||||
return false, fmt.Errorf("wg interface monitor stopped: %w", ctx.Err())
|
||||
case <-ticker.C:
|
||||
currentIndex, err := getInterfaceIndex(ifaceName)
|
||||
if err != nil {
|
||||
// Interface was deleted
|
||||
log.Infof("Interface monitor: %s deleted", ifaceName)
|
||||
return true, fmt.Errorf("interface %s deleted: %w", ifaceName, err)
|
||||
}
|
||||
|
||||
// Check if interface index changed (interface was recreated)
|
||||
if currentIndex != expectedIndex {
|
||||
log.Infof("Interface monitor: %s recreated (index changed from %d to %d), restarting engine",
|
||||
ifaceName, expectedIndex, currentIndex)
|
||||
return true, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user