[client] Use native windows sock opts to avoid routing loops (#4314)

- Move `util/grpc` and `util/net` to `client` so `internal` packages can be accessed
 - Add methods to return the next best interface after the NetBird interface.
- Use `IP_UNICAST_IF` sock opt to force the outgoing interface for the NetBird `net.Dialer` and `net.ListenerConfig` to avoid routing loops. The interface is picked by the new route lookup method.
- Some refactoring to avoid import cycles
- Old behavior is available through `NB_USE_LEGACY_ROUTING=true` env var
This commit is contained in:
Viktor Liu
2025-09-20 09:31:04 +02:00
committed by GitHub
parent 90577682e4
commit 55126f990c
77 changed files with 1180 additions and 606 deletions

View File

@@ -36,9 +36,9 @@ import (
"github.com/netbirdio/netbird/client/internal/routemanager/vars"
"github.com/netbirdio/netbird/client/internal/routeselector"
"github.com/netbirdio/netbird/client/internal/statemanager"
nbnet "github.com/netbirdio/netbird/client/net"
"github.com/netbirdio/netbird/route"
relayClient "github.com/netbirdio/netbird/shared/relay/client"
nbnet "github.com/netbirdio/netbird/util/net"
"github.com/netbirdio/netbird/version"
)
@@ -108,6 +108,10 @@ func NewManager(config ManagerConfig) *DefaultManager {
notifier := notifier.NewNotifier()
sysOps := systemops.NewSysOps(config.WGInterface, notifier)
if runtime.GOOS == "windows" && config.WGInterface != nil {
nbnet.SetVPNInterfaceName(config.WGInterface.Name())
}
dm := &DefaultManager{
ctx: mCTX,
stop: cancel,
@@ -208,7 +212,7 @@ func (m *DefaultManager) Init() error {
return nil
}
if err := m.sysOps.CleanupRouting(nil); err != nil {
if err := m.sysOps.CleanupRouting(nil, nbnet.AdvancedRouting()); err != nil {
log.Warnf("Failed cleaning up routing: %v", err)
}
@@ -219,7 +223,7 @@ func (m *DefaultManager) Init() error {
ips := resolveURLsToIPs(initialAddresses)
if err := m.sysOps.SetupRouting(ips, m.stateManager); err != nil {
if err := m.sysOps.SetupRouting(ips, m.stateManager, nbnet.AdvancedRouting()); err != nil {
return fmt.Errorf("setup routing: %w", err)
}
@@ -285,11 +289,15 @@ func (m *DefaultManager) Stop(stateManager *statemanager.Manager) {
}
if !nbnet.CustomRoutingDisabled() && !m.disableClientRoutes {
if err := m.sysOps.CleanupRouting(stateManager); err != nil {
if err := m.sysOps.CleanupRouting(stateManager, nbnet.AdvancedRouting()); err != nil {
log.Errorf("Error cleaning up routing: %v", err)
} else {
log.Info("Routing cleanup complete")
}
if runtime.GOOS == "windows" {
nbnet.SetVPNInterfaceName("")
}
}
m.mux.Lock()

View File

@@ -12,11 +12,11 @@ import (
"github.com/netbirdio/netbird/client/internal/statemanager"
)
func (r *SysOps) SetupRouting([]net.IP, *statemanager.Manager) error {
func (r *SysOps) SetupRouting([]net.IP, *statemanager.Manager, bool) error {
return nil
}
func (r *SysOps) CleanupRouting(*statemanager.Manager) error {
func (r *SysOps) CleanupRouting(*statemanager.Manager, bool) error {
return nil
}

View File

@@ -3,7 +3,6 @@
package systemops
import (
"context"
"errors"
"fmt"
"net"
@@ -22,7 +21,7 @@ import (
"github.com/netbirdio/netbird/client/internal/routemanager/util"
"github.com/netbirdio/netbird/client/internal/routemanager/vars"
"github.com/netbirdio/netbird/client/internal/statemanager"
nbnet "github.com/netbirdio/netbird/util/net"
"github.com/netbirdio/netbird/client/net/hooks"
)
const localSubnetsCacheTTL = 15 * time.Minute
@@ -96,9 +95,9 @@ func (r *SysOps) cleanupRefCounter(stateManager *statemanager.Manager) error {
return nil
}
// TODO: Remove hooks selectively
nbnet.RemoveDialerHooks()
nbnet.RemoveListenerHooks()
hooks.RemoveWriteHooks()
hooks.RemoveCloseHooks()
hooks.RemoveAddressRemoveHooks()
if err := r.refCounter.Flush(); err != nil {
return fmt.Errorf("flush route manager: %w", err)
@@ -290,12 +289,7 @@ func (r *SysOps) genericRemoveVPNRoute(prefix netip.Prefix, intf *net.Interface)
}
func (r *SysOps) setupHooks(initAddresses []net.IP, stateManager *statemanager.Manager) error {
beforeHook := func(connID nbnet.ConnectionID, ip net.IP) error {
prefix, err := util.GetPrefixFromIP(ip)
if err != nil {
return fmt.Errorf("convert ip to prefix: %w", err)
}
beforeHook := func(connID hooks.ConnectionID, prefix netip.Prefix) error {
if _, err := r.refCounter.IncrementWithID(string(connID), prefix, struct{}{}); err != nil {
return fmt.Errorf("adding route reference: %v", err)
}
@@ -304,7 +298,7 @@ func (r *SysOps) setupHooks(initAddresses []net.IP, stateManager *statemanager.M
return nil
}
afterHook := func(connID nbnet.ConnectionID) error {
afterHook := func(connID hooks.ConnectionID) error {
if err := r.refCounter.DecrementWithID(string(connID)); err != nil {
return fmt.Errorf("remove route reference: %w", err)
}
@@ -317,36 +311,20 @@ func (r *SysOps) setupHooks(initAddresses []net.IP, stateManager *statemanager.M
var merr *multierror.Error
for _, ip := range initAddresses {
if err := beforeHook("init", ip); err != nil {
merr = multierror.Append(merr, fmt.Errorf("add initial route for %s: %w", ip, err))
prefix, err := util.GetPrefixFromIP(ip)
if err != nil {
merr = multierror.Append(merr, fmt.Errorf("invalid IP address %s: %w", ip, err))
continue
}
if err := beforeHook("init", prefix); err != nil {
merr = multierror.Append(merr, fmt.Errorf("add initial route for %s: %w", prefix, err))
}
}
nbnet.AddDialerHook(func(ctx context.Context, connID nbnet.ConnectionID, resolvedIPs []net.IPAddr) error {
if ctx.Err() != nil {
return ctx.Err()
}
hooks.AddWriteHook(beforeHook)
hooks.AddCloseHook(afterHook)
var merr *multierror.Error
for _, ip := range resolvedIPs {
merr = multierror.Append(merr, beforeHook(connID, ip.IP))
}
return nberrors.FormatErrorOrNil(merr)
})
nbnet.AddDialerCloseHook(func(connID nbnet.ConnectionID, conn *net.Conn) error {
return afterHook(connID)
})
nbnet.AddListenerWriteHook(func(connID nbnet.ConnectionID, ip *net.IPAddr, data []byte) error {
return beforeHook(connID, ip.IP)
})
nbnet.AddListenerCloseHook(func(connID nbnet.ConnectionID, conn net.PacketConn) error {
return afterHook(connID)
})
nbnet.AddListenerAddressRemoveHook(func(connID nbnet.ConnectionID, prefix netip.Prefix) error {
hooks.AddAddressRemoveHook(func(connID hooks.ConnectionID, prefix netip.Prefix) error {
if _, err := r.refCounter.Decrement(prefix); err != nil {
return fmt.Errorf("remove route reference: %w", err)
}

View File

@@ -22,6 +22,7 @@ import (
"github.com/netbirdio/netbird/client/iface"
"github.com/netbirdio/netbird/client/internal/routemanager/vars"
nbnet "github.com/netbirdio/netbird/client/net"
)
type dialer interface {
@@ -143,10 +144,11 @@ func TestAddVPNRoute(t *testing.T) {
wgInterface := createWGInterface(t, fmt.Sprintf("utun53%d", n), "100.65.75.2/24", 33100+n)
r := NewSysOps(wgInterface, nil)
err := r.SetupRouting(nil, nil)
advancedRouting := nbnet.AdvancedRouting()
err := r.SetupRouting(nil, nil, advancedRouting)
require.NoError(t, err)
t.Cleanup(func() {
assert.NoError(t, r.CleanupRouting(nil))
assert.NoError(t, r.CleanupRouting(nil, advancedRouting))
})
intf, err := net.InterfaceByName(wgInterface.Name())
@@ -341,10 +343,11 @@ func TestAddRouteToNonVPNIntf(t *testing.T) {
wgInterface := createWGInterface(t, fmt.Sprintf("utun54%d", n), "100.65.75.2/24", 33200+n)
r := NewSysOps(wgInterface, nil)
err := r.SetupRouting(nil, nil)
advancedRouting := nbnet.AdvancedRouting()
err := r.SetupRouting(nil, nil, advancedRouting)
require.NoError(t, err)
t.Cleanup(func() {
assert.NoError(t, r.CleanupRouting(nil))
assert.NoError(t, r.CleanupRouting(nil, advancedRouting))
})
initialNextHopV4, err := GetNextHop(netip.IPv4Unspecified())
@@ -484,10 +487,11 @@ func setupTestEnv(t *testing.T) {
})
r := NewSysOps(wgInterface, nil)
err := r.SetupRouting(nil, nil)
advancedRouting := nbnet.AdvancedRouting()
err := r.SetupRouting(nil, nil, advancedRouting)
require.NoError(t, err, "setupRouting should not return err")
t.Cleanup(func() {
assert.NoError(t, r.CleanupRouting(nil))
assert.NoError(t, r.CleanupRouting(nil, advancedRouting))
})
index, err := net.InterfaceByName(wgInterface.Name())

View File

@@ -12,14 +12,14 @@ import (
"github.com/netbirdio/netbird/client/internal/statemanager"
)
func (r *SysOps) SetupRouting([]net.IP, *statemanager.Manager) error {
func (r *SysOps) SetupRouting([]net.IP, *statemanager.Manager, bool) error {
r.mu.Lock()
defer r.mu.Unlock()
r.prefixes = make(map[netip.Prefix]struct{})
return nil
}
func (r *SysOps) CleanupRouting(*statemanager.Manager) error {
func (r *SysOps) CleanupRouting(*statemanager.Manager, bool) error {
r.mu.Lock()
defer r.mu.Unlock()

View File

@@ -20,7 +20,7 @@ import (
"github.com/netbirdio/netbird/client/internal/routemanager/sysctl"
"github.com/netbirdio/netbird/client/internal/routemanager/vars"
"github.com/netbirdio/netbird/client/internal/statemanager"
nbnet "github.com/netbirdio/netbird/util/net"
nbnet "github.com/netbirdio/netbird/client/net"
)
// IPRule contains IP rule information for debugging
@@ -94,15 +94,15 @@ func getSetupRules() []ruleParams {
// Rule 2 (VPN Traffic Routing): Directs all remaining traffic to the 'NetbirdVPNTableID' custom routing table.
// This table is where a default route or other specific routes received from the management server are configured,
// enabling VPN connectivity.
func (r *SysOps) SetupRouting(initAddresses []net.IP, stateManager *statemanager.Manager) (err error) {
if !nbnet.AdvancedRouting() {
func (r *SysOps) SetupRouting(initAddresses []net.IP, stateManager *statemanager.Manager, advancedRouting bool) (err error) {
if !advancedRouting {
log.Infof("Using legacy routing setup")
return r.setupRefCounter(initAddresses, stateManager)
}
defer func() {
if err != nil {
if cleanErr := r.CleanupRouting(stateManager); cleanErr != nil {
if cleanErr := r.CleanupRouting(stateManager, advancedRouting); cleanErr != nil {
log.Errorf("Error cleaning up routing: %v", cleanErr)
}
}
@@ -132,8 +132,8 @@ func (r *SysOps) SetupRouting(initAddresses []net.IP, stateManager *statemanager
// CleanupRouting performs a thorough cleanup of the routing configuration established by 'setupRouting'.
// It systematically removes the three rules and any associated routing table entries to ensure a clean state.
// The function uses error aggregation to report any errors encountered during the cleanup process.
func (r *SysOps) CleanupRouting(stateManager *statemanager.Manager) error {
if !nbnet.AdvancedRouting() {
func (r *SysOps) CleanupRouting(stateManager *statemanager.Manager, advancedRouting bool) error {
if !advancedRouting {
return r.cleanupRefCounter(stateManager)
}

View File

@@ -20,11 +20,11 @@ import (
"github.com/netbirdio/netbird/client/internal/statemanager"
)
func (r *SysOps) SetupRouting(initAddresses []net.IP, stateManager *statemanager.Manager) error {
func (r *SysOps) SetupRouting(initAddresses []net.IP, stateManager *statemanager.Manager, advancedRouting bool) error {
return r.setupRefCounter(initAddresses, stateManager)
}
func (r *SysOps) CleanupRouting(stateManager *statemanager.Manager) error {
func (r *SysOps) CleanupRouting(stateManager *statemanager.Manager, advancedRouting bool) error {
return r.cleanupRefCounter(stateManager)
}

View File

@@ -17,7 +17,7 @@ import (
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
nbnet "github.com/netbirdio/netbird/util/net"
nbnet "github.com/netbirdio/netbird/client/net"
)
type PacketExpectation struct {

View File

@@ -8,6 +8,7 @@ import (
"net/netip"
"os"
"runtime/debug"
"sort"
"strconv"
"sync"
"syscall"
@@ -19,9 +20,16 @@ import (
"golang.org/x/sys/windows"
"github.com/netbirdio/netbird/client/internal/statemanager"
nbnet "github.com/netbirdio/netbird/client/net"
)
const InfiniteLifetime = 0xffffffff
func init() {
nbnet.GetBestInterfaceFunc = GetBestInterface
}
const (
InfiniteLifetime = 0xffffffff
)
type RouteUpdateType int
@@ -77,6 +85,14 @@ type MIB_IPFORWARD_TABLE2 struct {
Table [1]MIB_IPFORWARD_ROW2 // Flexible array member
}
// candidateRoute represents a potential route for selection during route lookup
type candidateRoute struct {
interfaceIndex uint32
prefixLength uint8
routeMetric uint32
interfaceMetric int
}
// IP_ADDRESS_PREFIX is defined in https://learn.microsoft.com/en-us/windows/win32/api/netioapi/ns-netioapi-ip_address_prefix
type IP_ADDRESS_PREFIX struct {
Prefix SOCKADDR_INET
@@ -177,11 +193,20 @@ const (
RouteDeleted
)
func (r *SysOps) SetupRouting(initAddresses []net.IP, stateManager *statemanager.Manager) error {
func (r *SysOps) SetupRouting(initAddresses []net.IP, stateManager *statemanager.Manager, advancedRouting bool) error {
if advancedRouting {
return nil
}
log.Infof("Using legacy routing setup with ref counters")
return r.setupRefCounter(initAddresses, stateManager)
}
func (r *SysOps) CleanupRouting(stateManager *statemanager.Manager) error {
func (r *SysOps) CleanupRouting(stateManager *statemanager.Manager, advancedRouting bool) error {
if advancedRouting {
return nil
}
return r.cleanupRefCounter(stateManager)
}
@@ -635,10 +660,7 @@ func getWindowsRoutingTable() (*MIB_IPFORWARD_TABLE2, error) {
func freeWindowsRoutingTable(table *MIB_IPFORWARD_TABLE2) {
if table != nil {
ret, _, _ := procFreeMibTable.Call(uintptr(unsafe.Pointer(table)))
if ret != 0 {
log.Warnf("FreeMibTable failed with return code: %d", ret)
}
_, _, _ = procFreeMibTable.Call(uintptr(unsafe.Pointer(table)))
}
}
@@ -652,8 +674,7 @@ func parseWindowsRoutingTable(table *MIB_IPFORWARD_TABLE2) []DetailedRoute {
entryPtr := basePtr + uintptr(i)*entrySize
entry := (*MIB_IPFORWARD_ROW2)(unsafe.Pointer(entryPtr))
detailed := buildWindowsDetailedRoute(entry)
if detailed != nil {
if detailed := buildWindowsDetailedRoute(entry); detailed != nil {
detailedRoutes = append(detailedRoutes, *detailed)
}
}
@@ -802,6 +823,46 @@ func addZone(ip netip.Addr, interfaceIndex int) netip.Addr {
return ip
}
// parseCandidatesFromTable extracts all matching candidate routes from the routing table
func parseCandidatesFromTable(table *MIB_IPFORWARD_TABLE2, dest netip.Addr, skipInterfaceIndex int) []candidateRoute {
var candidates []candidateRoute
entrySize := unsafe.Sizeof(MIB_IPFORWARD_ROW2{})
basePtr := uintptr(unsafe.Pointer(&table.Table[0]))
for i := uint32(0); i < table.NumEntries; i++ {
entryPtr := basePtr + uintptr(i)*entrySize
entry := (*MIB_IPFORWARD_ROW2)(unsafe.Pointer(entryPtr))
if candidate := parseCandidateRoute(entry, dest, skipInterfaceIndex); candidate != nil {
candidates = append(candidates, *candidate)
}
}
return candidates
}
// parseCandidateRoute extracts candidate route information from a MIB_IPFORWARD_ROW2 entry
// Returns nil if the route doesn't match the destination or should be skipped
func parseCandidateRoute(entry *MIB_IPFORWARD_ROW2, dest netip.Addr, skipInterfaceIndex int) *candidateRoute {
if skipInterfaceIndex > 0 && int(entry.InterfaceIndex) == skipInterfaceIndex {
return nil
}
destPrefix := parseIPPrefix(entry.DestinationPrefix, int(entry.InterfaceIndex))
if !destPrefix.IsValid() || !destPrefix.Contains(dest) {
return nil
}
interfaceMetric := getInterfaceMetric(entry.InterfaceIndex, entry.DestinationPrefix.Prefix.sin6_family)
return &candidateRoute{
interfaceIndex: entry.InterfaceIndex,
prefixLength: entry.DestinationPrefix.PrefixLength,
routeMetric: entry.Metric,
interfaceMetric: interfaceMetric,
}
}
// getInterfaceMetric retrieves the interface metric for a given interface and address family
func getInterfaceMetric(interfaceIndex uint32, family int16) int {
if interfaceIndex == 0 {
@@ -821,6 +882,75 @@ func getInterfaceMetric(interfaceIndex uint32, family int16) int {
return int(ipInterfaceRow.Metric)
}
// sortRouteCandidates sorts route candidates by priority: prefix length -> route metric -> interface metric
func sortRouteCandidates(candidates []candidateRoute) {
sort.Slice(candidates, func(i, j int) bool {
if candidates[i].prefixLength != candidates[j].prefixLength {
return candidates[i].prefixLength > candidates[j].prefixLength
}
if candidates[i].routeMetric != candidates[j].routeMetric {
return candidates[i].routeMetric < candidates[j].routeMetric
}
return candidates[i].interfaceMetric < candidates[j].interfaceMetric
})
}
// GetBestInterface finds the best interface for reaching a destination,
// excluding the VPN interface to avoid routing loops.
//
// Route selection priority:
// 1. Longest prefix match (most specific route)
// 2. Lowest route metric
// 3. Lowest interface metric
func GetBestInterface(dest netip.Addr, vpnIntf string) (*net.Interface, error) {
var skipInterfaceIndex int
if vpnIntf != "" {
if iface, err := net.InterfaceByName(vpnIntf); err == nil {
skipInterfaceIndex = iface.Index
} else {
return nil, fmt.Errorf("get VPN interface %s: %w", vpnIntf, err)
}
}
table, err := getWindowsRoutingTable()
if err != nil {
return nil, fmt.Errorf("get routing table: %w", err)
}
defer freeWindowsRoutingTable(table)
candidates := parseCandidatesFromTable(table, dest, skipInterfaceIndex)
if len(candidates) == 0 {
return nil, fmt.Errorf("no route to %s", dest)
}
// Sort routes: prefix length -> route metric -> interface metric
sortRouteCandidates(candidates)
for _, candidate := range candidates {
iface, err := net.InterfaceByIndex(int(candidate.interfaceIndex))
if err != nil {
log.Warnf("failed to get interface by index %d: %v", candidate.interfaceIndex, err)
continue
}
if iface.Flags&net.FlagLoopback != 0 && !dest.IsLoopback() {
continue
}
if iface.Flags&net.FlagUp == 0 {
log.Debugf("interface %s is down, trying next route", iface.Name)
continue
}
log.Debugf("route lookup for %s: selected interface %s (index %d), route metric %d, interface metric %d",
dest, iface.Name, iface.Index, candidate.routeMetric, candidate.interfaceMetric)
return iface, nil
}
return nil, fmt.Errorf("no usable interface found for %s", dest)
}
// formatRouteAge formats the route age in seconds to a human-readable string
func formatRouteAge(ageSeconds uint32) string {
if ageSeconds == 0 {

View File

@@ -15,7 +15,7 @@ import (
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
nbnet "github.com/netbirdio/netbird/util/net"
nbnet "github.com/netbirdio/netbird/client/net"
)
var (

View File

@@ -12,18 +12,8 @@ func GetPrefixFromIP(ip net.IP) (netip.Prefix, error) {
if !ok {
return netip.Prefix{}, fmt.Errorf("parse IP address: %s", ip)
}
addr = addr.Unmap()
var prefixLength int
switch {
case addr.Is4():
prefixLength = 32
case addr.Is6():
prefixLength = 128
default:
return netip.Prefix{}, fmt.Errorf("invalid IP address: %s", addr)
}
prefix := netip.PrefixFrom(addr, prefixLength)
prefix := netip.PrefixFrom(addr, addr.BitLen())
return prefix, nil
}