mirror of
https://github.com/netbirdio/netbird.git
synced 2026-04-18 08:16:39 +00:00
[client] Use native windows sock opts to avoid routing loops (#4314)
- Move `util/grpc` and `util/net` to `client` so `internal` packages can be accessed - Add methods to return the next best interface after the NetBird interface. - Use `IP_UNICAST_IF` sock opt to force the outgoing interface for the NetBird `net.Dialer` and `net.ListenerConfig` to avoid routing loops. The interface is picked by the new route lookup method. - Some refactoring to avoid import cycles - Old behavior is available through `NB_USE_LEGACY_ROUTING=true` env var
This commit is contained in:
@@ -12,11 +12,11 @@ import (
|
||||
"github.com/netbirdio/netbird/client/internal/statemanager"
|
||||
)
|
||||
|
||||
func (r *SysOps) SetupRouting([]net.IP, *statemanager.Manager) error {
|
||||
func (r *SysOps) SetupRouting([]net.IP, *statemanager.Manager, bool) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *SysOps) CleanupRouting(*statemanager.Manager) error {
|
||||
func (r *SysOps) CleanupRouting(*statemanager.Manager, bool) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
@@ -3,7 +3,6 @@
|
||||
package systemops
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net"
|
||||
@@ -22,7 +21,7 @@ import (
|
||||
"github.com/netbirdio/netbird/client/internal/routemanager/util"
|
||||
"github.com/netbirdio/netbird/client/internal/routemanager/vars"
|
||||
"github.com/netbirdio/netbird/client/internal/statemanager"
|
||||
nbnet "github.com/netbirdio/netbird/util/net"
|
||||
"github.com/netbirdio/netbird/client/net/hooks"
|
||||
)
|
||||
|
||||
const localSubnetsCacheTTL = 15 * time.Minute
|
||||
@@ -96,9 +95,9 @@ func (r *SysOps) cleanupRefCounter(stateManager *statemanager.Manager) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// TODO: Remove hooks selectively
|
||||
nbnet.RemoveDialerHooks()
|
||||
nbnet.RemoveListenerHooks()
|
||||
hooks.RemoveWriteHooks()
|
||||
hooks.RemoveCloseHooks()
|
||||
hooks.RemoveAddressRemoveHooks()
|
||||
|
||||
if err := r.refCounter.Flush(); err != nil {
|
||||
return fmt.Errorf("flush route manager: %w", err)
|
||||
@@ -290,12 +289,7 @@ func (r *SysOps) genericRemoveVPNRoute(prefix netip.Prefix, intf *net.Interface)
|
||||
}
|
||||
|
||||
func (r *SysOps) setupHooks(initAddresses []net.IP, stateManager *statemanager.Manager) error {
|
||||
beforeHook := func(connID nbnet.ConnectionID, ip net.IP) error {
|
||||
prefix, err := util.GetPrefixFromIP(ip)
|
||||
if err != nil {
|
||||
return fmt.Errorf("convert ip to prefix: %w", err)
|
||||
}
|
||||
|
||||
beforeHook := func(connID hooks.ConnectionID, prefix netip.Prefix) error {
|
||||
if _, err := r.refCounter.IncrementWithID(string(connID), prefix, struct{}{}); err != nil {
|
||||
return fmt.Errorf("adding route reference: %v", err)
|
||||
}
|
||||
@@ -304,7 +298,7 @@ func (r *SysOps) setupHooks(initAddresses []net.IP, stateManager *statemanager.M
|
||||
|
||||
return nil
|
||||
}
|
||||
afterHook := func(connID nbnet.ConnectionID) error {
|
||||
afterHook := func(connID hooks.ConnectionID) error {
|
||||
if err := r.refCounter.DecrementWithID(string(connID)); err != nil {
|
||||
return fmt.Errorf("remove route reference: %w", err)
|
||||
}
|
||||
@@ -317,36 +311,20 @@ func (r *SysOps) setupHooks(initAddresses []net.IP, stateManager *statemanager.M
|
||||
var merr *multierror.Error
|
||||
|
||||
for _, ip := range initAddresses {
|
||||
if err := beforeHook("init", ip); err != nil {
|
||||
merr = multierror.Append(merr, fmt.Errorf("add initial route for %s: %w", ip, err))
|
||||
prefix, err := util.GetPrefixFromIP(ip)
|
||||
if err != nil {
|
||||
merr = multierror.Append(merr, fmt.Errorf("invalid IP address %s: %w", ip, err))
|
||||
continue
|
||||
}
|
||||
if err := beforeHook("init", prefix); err != nil {
|
||||
merr = multierror.Append(merr, fmt.Errorf("add initial route for %s: %w", prefix, err))
|
||||
}
|
||||
}
|
||||
|
||||
nbnet.AddDialerHook(func(ctx context.Context, connID nbnet.ConnectionID, resolvedIPs []net.IPAddr) error {
|
||||
if ctx.Err() != nil {
|
||||
return ctx.Err()
|
||||
}
|
||||
hooks.AddWriteHook(beforeHook)
|
||||
hooks.AddCloseHook(afterHook)
|
||||
|
||||
var merr *multierror.Error
|
||||
for _, ip := range resolvedIPs {
|
||||
merr = multierror.Append(merr, beforeHook(connID, ip.IP))
|
||||
}
|
||||
return nberrors.FormatErrorOrNil(merr)
|
||||
})
|
||||
|
||||
nbnet.AddDialerCloseHook(func(connID nbnet.ConnectionID, conn *net.Conn) error {
|
||||
return afterHook(connID)
|
||||
})
|
||||
|
||||
nbnet.AddListenerWriteHook(func(connID nbnet.ConnectionID, ip *net.IPAddr, data []byte) error {
|
||||
return beforeHook(connID, ip.IP)
|
||||
})
|
||||
|
||||
nbnet.AddListenerCloseHook(func(connID nbnet.ConnectionID, conn net.PacketConn) error {
|
||||
return afterHook(connID)
|
||||
})
|
||||
|
||||
nbnet.AddListenerAddressRemoveHook(func(connID nbnet.ConnectionID, prefix netip.Prefix) error {
|
||||
hooks.AddAddressRemoveHook(func(connID hooks.ConnectionID, prefix netip.Prefix) error {
|
||||
if _, err := r.refCounter.Decrement(prefix); err != nil {
|
||||
return fmt.Errorf("remove route reference: %w", err)
|
||||
}
|
||||
|
||||
@@ -22,6 +22,7 @@ import (
|
||||
|
||||
"github.com/netbirdio/netbird/client/iface"
|
||||
"github.com/netbirdio/netbird/client/internal/routemanager/vars"
|
||||
nbnet "github.com/netbirdio/netbird/client/net"
|
||||
)
|
||||
|
||||
type dialer interface {
|
||||
@@ -143,10 +144,11 @@ func TestAddVPNRoute(t *testing.T) {
|
||||
wgInterface := createWGInterface(t, fmt.Sprintf("utun53%d", n), "100.65.75.2/24", 33100+n)
|
||||
|
||||
r := NewSysOps(wgInterface, nil)
|
||||
err := r.SetupRouting(nil, nil)
|
||||
advancedRouting := nbnet.AdvancedRouting()
|
||||
err := r.SetupRouting(nil, nil, advancedRouting)
|
||||
require.NoError(t, err)
|
||||
t.Cleanup(func() {
|
||||
assert.NoError(t, r.CleanupRouting(nil))
|
||||
assert.NoError(t, r.CleanupRouting(nil, advancedRouting))
|
||||
})
|
||||
|
||||
intf, err := net.InterfaceByName(wgInterface.Name())
|
||||
@@ -341,10 +343,11 @@ func TestAddRouteToNonVPNIntf(t *testing.T) {
|
||||
wgInterface := createWGInterface(t, fmt.Sprintf("utun54%d", n), "100.65.75.2/24", 33200+n)
|
||||
|
||||
r := NewSysOps(wgInterface, nil)
|
||||
err := r.SetupRouting(nil, nil)
|
||||
advancedRouting := nbnet.AdvancedRouting()
|
||||
err := r.SetupRouting(nil, nil, advancedRouting)
|
||||
require.NoError(t, err)
|
||||
t.Cleanup(func() {
|
||||
assert.NoError(t, r.CleanupRouting(nil))
|
||||
assert.NoError(t, r.CleanupRouting(nil, advancedRouting))
|
||||
})
|
||||
|
||||
initialNextHopV4, err := GetNextHop(netip.IPv4Unspecified())
|
||||
@@ -484,10 +487,11 @@ func setupTestEnv(t *testing.T) {
|
||||
})
|
||||
|
||||
r := NewSysOps(wgInterface, nil)
|
||||
err := r.SetupRouting(nil, nil)
|
||||
advancedRouting := nbnet.AdvancedRouting()
|
||||
err := r.SetupRouting(nil, nil, advancedRouting)
|
||||
require.NoError(t, err, "setupRouting should not return err")
|
||||
t.Cleanup(func() {
|
||||
assert.NoError(t, r.CleanupRouting(nil))
|
||||
assert.NoError(t, r.CleanupRouting(nil, advancedRouting))
|
||||
})
|
||||
|
||||
index, err := net.InterfaceByName(wgInterface.Name())
|
||||
|
||||
@@ -12,14 +12,14 @@ import (
|
||||
"github.com/netbirdio/netbird/client/internal/statemanager"
|
||||
)
|
||||
|
||||
func (r *SysOps) SetupRouting([]net.IP, *statemanager.Manager) error {
|
||||
func (r *SysOps) SetupRouting([]net.IP, *statemanager.Manager, bool) error {
|
||||
r.mu.Lock()
|
||||
defer r.mu.Unlock()
|
||||
r.prefixes = make(map[netip.Prefix]struct{})
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *SysOps) CleanupRouting(*statemanager.Manager) error {
|
||||
func (r *SysOps) CleanupRouting(*statemanager.Manager, bool) error {
|
||||
r.mu.Lock()
|
||||
defer r.mu.Unlock()
|
||||
|
||||
|
||||
@@ -20,7 +20,7 @@ import (
|
||||
"github.com/netbirdio/netbird/client/internal/routemanager/sysctl"
|
||||
"github.com/netbirdio/netbird/client/internal/routemanager/vars"
|
||||
"github.com/netbirdio/netbird/client/internal/statemanager"
|
||||
nbnet "github.com/netbirdio/netbird/util/net"
|
||||
nbnet "github.com/netbirdio/netbird/client/net"
|
||||
)
|
||||
|
||||
// IPRule contains IP rule information for debugging
|
||||
@@ -94,15 +94,15 @@ func getSetupRules() []ruleParams {
|
||||
// Rule 2 (VPN Traffic Routing): Directs all remaining traffic to the 'NetbirdVPNTableID' custom routing table.
|
||||
// This table is where a default route or other specific routes received from the management server are configured,
|
||||
// enabling VPN connectivity.
|
||||
func (r *SysOps) SetupRouting(initAddresses []net.IP, stateManager *statemanager.Manager) (err error) {
|
||||
if !nbnet.AdvancedRouting() {
|
||||
func (r *SysOps) SetupRouting(initAddresses []net.IP, stateManager *statemanager.Manager, advancedRouting bool) (err error) {
|
||||
if !advancedRouting {
|
||||
log.Infof("Using legacy routing setup")
|
||||
return r.setupRefCounter(initAddresses, stateManager)
|
||||
}
|
||||
|
||||
defer func() {
|
||||
if err != nil {
|
||||
if cleanErr := r.CleanupRouting(stateManager); cleanErr != nil {
|
||||
if cleanErr := r.CleanupRouting(stateManager, advancedRouting); cleanErr != nil {
|
||||
log.Errorf("Error cleaning up routing: %v", cleanErr)
|
||||
}
|
||||
}
|
||||
@@ -132,8 +132,8 @@ func (r *SysOps) SetupRouting(initAddresses []net.IP, stateManager *statemanager
|
||||
// CleanupRouting performs a thorough cleanup of the routing configuration established by 'setupRouting'.
|
||||
// It systematically removes the three rules and any associated routing table entries to ensure a clean state.
|
||||
// The function uses error aggregation to report any errors encountered during the cleanup process.
|
||||
func (r *SysOps) CleanupRouting(stateManager *statemanager.Manager) error {
|
||||
if !nbnet.AdvancedRouting() {
|
||||
func (r *SysOps) CleanupRouting(stateManager *statemanager.Manager, advancedRouting bool) error {
|
||||
if !advancedRouting {
|
||||
return r.cleanupRefCounter(stateManager)
|
||||
}
|
||||
|
||||
|
||||
@@ -20,11 +20,11 @@ import (
|
||||
"github.com/netbirdio/netbird/client/internal/statemanager"
|
||||
)
|
||||
|
||||
func (r *SysOps) SetupRouting(initAddresses []net.IP, stateManager *statemanager.Manager) error {
|
||||
func (r *SysOps) SetupRouting(initAddresses []net.IP, stateManager *statemanager.Manager, advancedRouting bool) error {
|
||||
return r.setupRefCounter(initAddresses, stateManager)
|
||||
}
|
||||
|
||||
func (r *SysOps) CleanupRouting(stateManager *statemanager.Manager) error {
|
||||
func (r *SysOps) CleanupRouting(stateManager *statemanager.Manager, advancedRouting bool) error {
|
||||
return r.cleanupRefCounter(stateManager)
|
||||
}
|
||||
|
||||
|
||||
@@ -17,7 +17,7 @@ import (
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
nbnet "github.com/netbirdio/netbird/util/net"
|
||||
nbnet "github.com/netbirdio/netbird/client/net"
|
||||
)
|
||||
|
||||
type PacketExpectation struct {
|
||||
|
||||
@@ -8,6 +8,7 @@ import (
|
||||
"net/netip"
|
||||
"os"
|
||||
"runtime/debug"
|
||||
"sort"
|
||||
"strconv"
|
||||
"sync"
|
||||
"syscall"
|
||||
@@ -19,9 +20,16 @@ import (
|
||||
"golang.org/x/sys/windows"
|
||||
|
||||
"github.com/netbirdio/netbird/client/internal/statemanager"
|
||||
nbnet "github.com/netbirdio/netbird/client/net"
|
||||
)
|
||||
|
||||
const InfiniteLifetime = 0xffffffff
|
||||
func init() {
|
||||
nbnet.GetBestInterfaceFunc = GetBestInterface
|
||||
}
|
||||
|
||||
const (
|
||||
InfiniteLifetime = 0xffffffff
|
||||
)
|
||||
|
||||
type RouteUpdateType int
|
||||
|
||||
@@ -77,6 +85,14 @@ type MIB_IPFORWARD_TABLE2 struct {
|
||||
Table [1]MIB_IPFORWARD_ROW2 // Flexible array member
|
||||
}
|
||||
|
||||
// candidateRoute represents a potential route for selection during route lookup
|
||||
type candidateRoute struct {
|
||||
interfaceIndex uint32
|
||||
prefixLength uint8
|
||||
routeMetric uint32
|
||||
interfaceMetric int
|
||||
}
|
||||
|
||||
// IP_ADDRESS_PREFIX is defined in https://learn.microsoft.com/en-us/windows/win32/api/netioapi/ns-netioapi-ip_address_prefix
|
||||
type IP_ADDRESS_PREFIX struct {
|
||||
Prefix SOCKADDR_INET
|
||||
@@ -177,11 +193,20 @@ const (
|
||||
RouteDeleted
|
||||
)
|
||||
|
||||
func (r *SysOps) SetupRouting(initAddresses []net.IP, stateManager *statemanager.Manager) error {
|
||||
func (r *SysOps) SetupRouting(initAddresses []net.IP, stateManager *statemanager.Manager, advancedRouting bool) error {
|
||||
if advancedRouting {
|
||||
return nil
|
||||
}
|
||||
|
||||
log.Infof("Using legacy routing setup with ref counters")
|
||||
return r.setupRefCounter(initAddresses, stateManager)
|
||||
}
|
||||
|
||||
func (r *SysOps) CleanupRouting(stateManager *statemanager.Manager) error {
|
||||
func (r *SysOps) CleanupRouting(stateManager *statemanager.Manager, advancedRouting bool) error {
|
||||
if advancedRouting {
|
||||
return nil
|
||||
}
|
||||
|
||||
return r.cleanupRefCounter(stateManager)
|
||||
}
|
||||
|
||||
@@ -635,10 +660,7 @@ func getWindowsRoutingTable() (*MIB_IPFORWARD_TABLE2, error) {
|
||||
|
||||
func freeWindowsRoutingTable(table *MIB_IPFORWARD_TABLE2) {
|
||||
if table != nil {
|
||||
ret, _, _ := procFreeMibTable.Call(uintptr(unsafe.Pointer(table)))
|
||||
if ret != 0 {
|
||||
log.Warnf("FreeMibTable failed with return code: %d", ret)
|
||||
}
|
||||
_, _, _ = procFreeMibTable.Call(uintptr(unsafe.Pointer(table)))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -652,8 +674,7 @@ func parseWindowsRoutingTable(table *MIB_IPFORWARD_TABLE2) []DetailedRoute {
|
||||
entryPtr := basePtr + uintptr(i)*entrySize
|
||||
entry := (*MIB_IPFORWARD_ROW2)(unsafe.Pointer(entryPtr))
|
||||
|
||||
detailed := buildWindowsDetailedRoute(entry)
|
||||
if detailed != nil {
|
||||
if detailed := buildWindowsDetailedRoute(entry); detailed != nil {
|
||||
detailedRoutes = append(detailedRoutes, *detailed)
|
||||
}
|
||||
}
|
||||
@@ -802,6 +823,46 @@ func addZone(ip netip.Addr, interfaceIndex int) netip.Addr {
|
||||
return ip
|
||||
}
|
||||
|
||||
// parseCandidatesFromTable extracts all matching candidate routes from the routing table
|
||||
func parseCandidatesFromTable(table *MIB_IPFORWARD_TABLE2, dest netip.Addr, skipInterfaceIndex int) []candidateRoute {
|
||||
var candidates []candidateRoute
|
||||
entrySize := unsafe.Sizeof(MIB_IPFORWARD_ROW2{})
|
||||
basePtr := uintptr(unsafe.Pointer(&table.Table[0]))
|
||||
|
||||
for i := uint32(0); i < table.NumEntries; i++ {
|
||||
entryPtr := basePtr + uintptr(i)*entrySize
|
||||
entry := (*MIB_IPFORWARD_ROW2)(unsafe.Pointer(entryPtr))
|
||||
|
||||
if candidate := parseCandidateRoute(entry, dest, skipInterfaceIndex); candidate != nil {
|
||||
candidates = append(candidates, *candidate)
|
||||
}
|
||||
}
|
||||
|
||||
return candidates
|
||||
}
|
||||
|
||||
// parseCandidateRoute extracts candidate route information from a MIB_IPFORWARD_ROW2 entry
|
||||
// Returns nil if the route doesn't match the destination or should be skipped
|
||||
func parseCandidateRoute(entry *MIB_IPFORWARD_ROW2, dest netip.Addr, skipInterfaceIndex int) *candidateRoute {
|
||||
if skipInterfaceIndex > 0 && int(entry.InterfaceIndex) == skipInterfaceIndex {
|
||||
return nil
|
||||
}
|
||||
|
||||
destPrefix := parseIPPrefix(entry.DestinationPrefix, int(entry.InterfaceIndex))
|
||||
if !destPrefix.IsValid() || !destPrefix.Contains(dest) {
|
||||
return nil
|
||||
}
|
||||
|
||||
interfaceMetric := getInterfaceMetric(entry.InterfaceIndex, entry.DestinationPrefix.Prefix.sin6_family)
|
||||
|
||||
return &candidateRoute{
|
||||
interfaceIndex: entry.InterfaceIndex,
|
||||
prefixLength: entry.DestinationPrefix.PrefixLength,
|
||||
routeMetric: entry.Metric,
|
||||
interfaceMetric: interfaceMetric,
|
||||
}
|
||||
}
|
||||
|
||||
// getInterfaceMetric retrieves the interface metric for a given interface and address family
|
||||
func getInterfaceMetric(interfaceIndex uint32, family int16) int {
|
||||
if interfaceIndex == 0 {
|
||||
@@ -821,6 +882,75 @@ func getInterfaceMetric(interfaceIndex uint32, family int16) int {
|
||||
return int(ipInterfaceRow.Metric)
|
||||
}
|
||||
|
||||
// sortRouteCandidates sorts route candidates by priority: prefix length -> route metric -> interface metric
|
||||
func sortRouteCandidates(candidates []candidateRoute) {
|
||||
sort.Slice(candidates, func(i, j int) bool {
|
||||
if candidates[i].prefixLength != candidates[j].prefixLength {
|
||||
return candidates[i].prefixLength > candidates[j].prefixLength
|
||||
}
|
||||
if candidates[i].routeMetric != candidates[j].routeMetric {
|
||||
return candidates[i].routeMetric < candidates[j].routeMetric
|
||||
}
|
||||
return candidates[i].interfaceMetric < candidates[j].interfaceMetric
|
||||
})
|
||||
}
|
||||
|
||||
// GetBestInterface finds the best interface for reaching a destination,
|
||||
// excluding the VPN interface to avoid routing loops.
|
||||
//
|
||||
// Route selection priority:
|
||||
// 1. Longest prefix match (most specific route)
|
||||
// 2. Lowest route metric
|
||||
// 3. Lowest interface metric
|
||||
func GetBestInterface(dest netip.Addr, vpnIntf string) (*net.Interface, error) {
|
||||
var skipInterfaceIndex int
|
||||
if vpnIntf != "" {
|
||||
if iface, err := net.InterfaceByName(vpnIntf); err == nil {
|
||||
skipInterfaceIndex = iface.Index
|
||||
} else {
|
||||
return nil, fmt.Errorf("get VPN interface %s: %w", vpnIntf, err)
|
||||
}
|
||||
}
|
||||
|
||||
table, err := getWindowsRoutingTable()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("get routing table: %w", err)
|
||||
}
|
||||
defer freeWindowsRoutingTable(table)
|
||||
|
||||
candidates := parseCandidatesFromTable(table, dest, skipInterfaceIndex)
|
||||
|
||||
if len(candidates) == 0 {
|
||||
return nil, fmt.Errorf("no route to %s", dest)
|
||||
}
|
||||
|
||||
// Sort routes: prefix length -> route metric -> interface metric
|
||||
sortRouteCandidates(candidates)
|
||||
|
||||
for _, candidate := range candidates {
|
||||
iface, err := net.InterfaceByIndex(int(candidate.interfaceIndex))
|
||||
if err != nil {
|
||||
log.Warnf("failed to get interface by index %d: %v", candidate.interfaceIndex, err)
|
||||
continue
|
||||
}
|
||||
|
||||
if iface.Flags&net.FlagLoopback != 0 && !dest.IsLoopback() {
|
||||
continue
|
||||
}
|
||||
|
||||
if iface.Flags&net.FlagUp == 0 {
|
||||
log.Debugf("interface %s is down, trying next route", iface.Name)
|
||||
continue
|
||||
}
|
||||
|
||||
log.Debugf("route lookup for %s: selected interface %s (index %d), route metric %d, interface metric %d",
|
||||
dest, iface.Name, iface.Index, candidate.routeMetric, candidate.interfaceMetric)
|
||||
return iface, nil
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("no usable interface found for %s", dest)
|
||||
}
|
||||
|
||||
// formatRouteAge formats the route age in seconds to a human-readable string
|
||||
func formatRouteAge(ageSeconds uint32) string {
|
||||
if ageSeconds == 0 {
|
||||
|
||||
@@ -15,7 +15,7 @@ import (
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
nbnet "github.com/netbirdio/netbird/util/net"
|
||||
nbnet "github.com/netbirdio/netbird/client/net"
|
||||
)
|
||||
|
||||
var (
|
||||
|
||||
Reference in New Issue
Block a user