diff --git a/client/internal/routemanager/systemops/systemops_darwin.go b/client/internal/routemanager/systemops/systemops_darwin.go index 15b151af6..d6875ff95 100644 --- a/client/internal/routemanager/systemops/systemops_darwin.go +++ b/client/internal/routemanager/systemops/systemops_darwin.go @@ -6,6 +6,8 @@ import ( "errors" "fmt" "net/netip" + "os" + "time" "github.com/hashicorp/go-multierror" log "github.com/sirupsen/logrus" @@ -17,6 +19,11 @@ import ( nbnet "github.com/netbirdio/netbird/client/net" ) +// scopedRouteBudget bounds retries for the scoped default route. Installing or +// deleting it matters enough that we're willing to spend longer waiting for the +// kernel reply than for per-prefix exclusion routes. +const scopedRouteBudget = 5 * time.Second + // setupAdvancedRouting installs an RTF_IFSCOPE default route per address family // pinned to the current physical egress, so IP_BOUND_IF scoped lookups can // resolve gateway'd destinations while the VPN's split default owns the @@ -34,6 +41,12 @@ import ( // the table. Any subsequent reconnect via nbnet.NewDialer picks up the // populated bound-iface cache and gets IP_BOUND_IF set cleanly. func (r *SysOps) setupAdvancedRouting() error { + // Drop any previously-cached egress interface before reinstalling. On a + // refresh, a family that no longer resolves would otherwise keep the stale + // binding, causing new sockets to scope to an interface without a matching + // scoped default. + nbnet.ClearBoundInterfaces() + if err := r.flushScopedDefaults(); err != nil { log.Warnf("flush residual scoped defaults: %v", err) } @@ -72,14 +85,12 @@ func (r *SysOps) installScopedDefaultFor(unspec netip.Addr) (bool, error) { } return false, fmt.Errorf("get default nexthop for %s: %w", unspec, err) } - if nexthop.Intf == nil || !nexthop.IP.IsValid() { - return false, fmt.Errorf("unusable default nexthop for %s (iface=%v gw=%v)", - unspec, nexthop.Intf, nexthop.IP) + if nexthop.Intf == nil { + return false, fmt.Errorf("unusable default nexthop for %s (no interface)", unspec) } if err := r.addScopedDefault(unspec, nexthop); err != nil { - return false, fmt.Errorf("add scoped default via %s on %s: %w", - nexthop.IP, nexthop.Intf.Name, err) + return false, fmt.Errorf("add scoped default on %s: %w", nexthop.Intf.Name, err) } af := unix.AF_INET @@ -87,8 +98,11 @@ func (r *SysOps) installScopedDefaultFor(unspec netip.Addr) (bool, error) { af = unix.AF_INET6 } nbnet.SetBoundInterface(af, nexthop.Intf) - log.Infof("installed scoped default route via %s on %s for %s", - nexthop.IP, nexthop.Intf.Name, afOf(unspec)) + via := "point-to-point" + if nexthop.IP.IsValid() { + via = nexthop.IP.String() + } + log.Infof("installed scoped default route via %s on %s for %s", via, nexthop.Intf.Name, afOf(unspec)) return true, nil } @@ -172,16 +186,17 @@ func (r *SysOps) deleteScopedRoute(rtMsg *route.RouteMessage) error { Index: rtMsg.Index, Addrs: rtMsg.Addrs, } - return r.writeRouteMessage(del) + return r.writeRouteMessage(del, scopedRouteBudget) } func (r *SysOps) scopedRouteSocket(action int, unspec netip.Addr, nexthop Nexthop) error { - flags := unix.RTF_UP | unix.RTF_STATIC | unix.RTF_GATEWAY | unix.RTF_IFSCOPE | routeProtoFlag + flags := unix.RTF_UP | unix.RTF_STATIC | unix.RTF_IFSCOPE | routeProtoFlag msg := &route.RouteMessage{ Type: action, Flags: flags, Version: unix.RTM_VERSION, + ID: uintptr(os.Getpid()), Seq: r.getSeq(), Index: nexthop.Intf.Index, } @@ -197,16 +212,25 @@ func (r *SysOps) scopedRouteSocket(action int, unspec netip.Addr, nexthop Nextho if err != nil { return fmt.Errorf("build netmask: %w", err) } - gw, err := addrToRouteAddr(nexthop.IP.Unmap()) - if err != nil { - return fmt.Errorf("build gateway: %w", err) - } addrs[unix.RTAX_DST] = dst addrs[unix.RTAX_NETMASK] = mask - addrs[unix.RTAX_GATEWAY] = gw + + if nexthop.IP.IsValid() { + msg.Flags |= unix.RTF_GATEWAY + gw, err := addrToRouteAddr(nexthop.IP.Unmap()) + if err != nil { + return fmt.Errorf("build gateway: %w", err) + } + addrs[unix.RTAX_GATEWAY] = gw + } else { + addrs[unix.RTAX_GATEWAY] = &route.LinkAddr{ + Index: nexthop.Intf.Index, + Name: nexthop.Intf.Name, + } + } msg.Addrs = addrs - return r.writeRouteMessage(msg) + return r.writeRouteMessage(msg, scopedRouteBudget) } func afOf(a netip.Addr) string { diff --git a/client/internal/routemanager/systemops/systemops_unix.go b/client/internal/routemanager/systemops/systemops_unix.go index aee80ab15..2d3f9b69a 100644 --- a/client/internal/routemanager/systemops/systemops_unix.go +++ b/client/internal/routemanager/systemops/systemops_unix.go @@ -25,6 +25,9 @@ import ( const ( envRouteProtoFlag = "NB_ROUTE_PROTO_FLAG" + + // routeBudget bounds retries for per-prefix exclusion route programming. + routeBudget = 1 * time.Second ) var routeProtoFlag int @@ -138,7 +141,7 @@ func (r *SysOps) routeSocket(action int, prefix netip.Prefix, nexthop Nexthop) e return fmt.Errorf("build route message: %w", err) } - if err := r.writeRouteMessage(msg); err != nil { + if err := r.writeRouteMessage(msg, routeBudget); err != nil { a := "add" if action == unix.RTM_DELETE { a = "remove" @@ -149,13 +152,13 @@ func (r *SysOps) routeSocket(action int, prefix netip.Prefix, nexthop Nexthop) e } // writeRouteMessage sends a route message over AF_ROUTE and waits for the -// kernel's matching reply, retrying transient failures. Callers do not need to -// manage sockets or seq numbers themselves. -func (r *SysOps) writeRouteMessage(msg *route.RouteMessage) error { +// kernel's matching reply, retrying transient failures until budget elapses. +// Callers do not need to manage sockets or seq numbers themselves. +func (r *SysOps) writeRouteMessage(msg *route.RouteMessage, budget time.Duration) error { expBackOff := backoff.NewExponentialBackOff() expBackOff.InitialInterval = 50 * time.Millisecond expBackOff.MaxInterval = 500 * time.Millisecond - expBackOff.MaxElapsedTime = 1 * time.Second + expBackOff.MaxElapsedTime = budget return backoff.Retry(func() error { return routeMessageRoundtrip(msg) }, expBackOff) } @@ -176,6 +179,13 @@ func routeMessageRoundtrip(msg *route.RouteMessage) error { return backoff.Permanent(fmt.Errorf("set recv timeout: %w", err)) } + // AF_ROUTE is a broadcast channel: every route socket on the host sees + // every RTM_* event. With concurrent route programming the default + // per-socket queue overflows and our own reply gets dropped. + if err := unix.SetsockoptInt(fd, unix.SOL_SOCKET, unix.SO_RCVBUF, 1<<20); err != nil { + log.Debugf("set SO_RCVBUF on route socket: %v", err) + } + bytes, err := msg.Marshal() if err != nil { return backoff.Permanent(fmt.Errorf("marshal: %w", err)) @@ -200,7 +210,9 @@ func readRouteResponse(fd, wantType, wantSeq int) error { deadline := time.Now().Add(time.Second) for { if time.Now().After(deadline) { - return backoff.Permanent(fmt.Errorf("read: timeout waiting for route reply type=%d seq=%d", wantType, wantSeq)) + // Transient: under concurrent pressure the kernel can drop our reply + // from the socket buffer. Let backoff.Retry re-send with a fresh seq. + return fmt.Errorf("read: timeout waiting for route reply type=%d seq=%d", wantType, wantSeq) } n, err := unix.Read(fd, resp) if err != nil { @@ -214,11 +226,13 @@ func readRouteResponse(fd, wantType, wantSeq int) error { continue } hdr := (*unix.RtMsghdr)(unsafe.Pointer(&resp[0])) + // Darwin reflects the sender's pid on replies; matching (Type, Seq, Pid) + // uniquely identifies our own reply among broadcast traffic. if int(hdr.Type) != wantType || int(hdr.Seq) != wantSeq || hdr.Pid != pid { continue } if hdr.Errno != 0 { - return backoff.Permanent(fmt.Errorf("kernel errno %d", hdr.Errno)) + return backoff.Permanent(fmt.Errorf("kernel: %w", syscall.Errno(hdr.Errno))) } return nil } @@ -229,6 +243,7 @@ func (r *SysOps) buildRouteMessage(action int, prefix netip.Prefix, nexthop Next Type: action, Flags: unix.RTF_UP | routeProtoFlag, Version: unix.RTM_VERSION, + ID: uintptr(os.Getpid()), Seq: r.getSeq(), } diff --git a/client/net/env_mobile.go b/client/net/env_mobile.go index f29d448ed..80b0fad8d 100644 --- a/client/net/env_mobile.go +++ b/client/net/env_mobile.go @@ -4,6 +4,7 @@ package net // Init initializes the network environment for mobile platforms. func Init() { + // no-op on mobile: routing scope is owned by the VPN extension. } // AdvancedRouting reports whether routing loops can be avoided without using exclusion routes. @@ -15,6 +16,7 @@ func AdvancedRouting() bool { // SetVPNInterfaceName is a no-op on mobile. func SetVPNInterfaceName(string) { + // no-op on mobile: the VPN extension manages the interface. } // GetVPNInterfaceName returns an empty string on mobile. diff --git a/client/net/net_darwin.go b/client/net/net_darwin.go index 1e8d81d62..00d858a6a 100644 --- a/client/net/net_darwin.go +++ b/client/net/net_darwin.go @@ -60,6 +60,11 @@ func ClearBoundInterfaces() { boundIface6 = nil } +// boundInterfaceFor returns the cached egress interface for a socket's address +// family, falling back to the other family if the preferred slot is empty. +// The kernel stores both IP_BOUND_IF and IPV6_BOUND_IF in inp_boundifp, so +// either setsockopt scopes the socket; preferring same-family still matters +// when v4 and v6 defaults egress different NICs. func boundInterfaceFor(network, address string) *net.Interface { if iface := zoneInterface(address); iface != nil { return iface @@ -68,10 +73,14 @@ func boundInterfaceFor(network, address string) *net.Interface { boundIfaceMu.RLock() defer boundIfaceMu.RUnlock() + primary, secondary := boundIface4, boundIface6 if isV6Network(network) { - return boundIface6 + primary, secondary = boundIface6, boundIface4 } - return boundIface4 + if primary != nil { + return primary + } + return secondary } func isV6Network(network string) bool { @@ -129,6 +138,7 @@ func applyBoundIfToSocket(network, address string, c syscall.RawConn) error { iface := boundInterfaceFor(network, address) if iface == nil { + log.Debugf("no bound iface cached for %s to %s, skipping BOUND_IF", network, address) return nil } diff --git a/client/server/state.go b/client/server/state.go index 8dca6bde1..f2d823465 100644 --- a/client/server/state.go +++ b/client/server/state.go @@ -12,7 +12,6 @@ import ( "github.com/netbirdio/netbird/client/internal" "github.com/netbirdio/netbird/client/internal/routemanager/systemops" "github.com/netbirdio/netbird/client/internal/statemanager" - nbnet "github.com/netbirdio/netbird/client/net" "github.com/netbirdio/netbird/client/proto" ) @@ -138,10 +137,8 @@ func restoreResidualState(ctx context.Context, statePath string) error { } // clean up any remaining routes independently of the state file - if !nbnet.AdvancedRouting() { - if err := systemops.New(nil, nil).FlushMarkedRoutes(); err != nil { - merr = multierror.Append(merr, fmt.Errorf("flush marked routes: %w", err)) - } + if err := systemops.New(nil, nil).FlushMarkedRoutes(); err != nil { + merr = multierror.Append(merr, fmt.Errorf("flush marked routes: %w", err)) } return nberrors.FormatErrorOrNil(merr)