mirror of
https://github.com/netbirdio/netbird.git
synced 2026-05-07 01:10:03 +00:00
Address review
This commit is contained in:
@@ -6,6 +6,8 @@ import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"net/netip"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/go-multierror"
|
||||
log "github.com/sirupsen/logrus"
|
||||
@@ -17,6 +19,11 @@ import (
|
||||
nbnet "github.com/netbirdio/netbird/client/net"
|
||||
)
|
||||
|
||||
// scopedRouteBudget bounds retries for the scoped default route. Installing or
|
||||
// deleting it matters enough that we're willing to spend longer waiting for the
|
||||
// kernel reply than for per-prefix exclusion routes.
|
||||
const scopedRouteBudget = 5 * time.Second
|
||||
|
||||
// setupAdvancedRouting installs an RTF_IFSCOPE default route per address family
|
||||
// pinned to the current physical egress, so IP_BOUND_IF scoped lookups can
|
||||
// resolve gateway'd destinations while the VPN's split default owns the
|
||||
@@ -34,6 +41,12 @@ import (
|
||||
// the table. Any subsequent reconnect via nbnet.NewDialer picks up the
|
||||
// populated bound-iface cache and gets IP_BOUND_IF set cleanly.
|
||||
func (r *SysOps) setupAdvancedRouting() error {
|
||||
// Drop any previously-cached egress interface before reinstalling. On a
|
||||
// refresh, a family that no longer resolves would otherwise keep the stale
|
||||
// binding, causing new sockets to scope to an interface without a matching
|
||||
// scoped default.
|
||||
nbnet.ClearBoundInterfaces()
|
||||
|
||||
if err := r.flushScopedDefaults(); err != nil {
|
||||
log.Warnf("flush residual scoped defaults: %v", err)
|
||||
}
|
||||
@@ -72,14 +85,12 @@ func (r *SysOps) installScopedDefaultFor(unspec netip.Addr) (bool, error) {
|
||||
}
|
||||
return false, fmt.Errorf("get default nexthop for %s: %w", unspec, err)
|
||||
}
|
||||
if nexthop.Intf == nil || !nexthop.IP.IsValid() {
|
||||
return false, fmt.Errorf("unusable default nexthop for %s (iface=%v gw=%v)",
|
||||
unspec, nexthop.Intf, nexthop.IP)
|
||||
if nexthop.Intf == nil {
|
||||
return false, fmt.Errorf("unusable default nexthop for %s (no interface)", unspec)
|
||||
}
|
||||
|
||||
if err := r.addScopedDefault(unspec, nexthop); err != nil {
|
||||
return false, fmt.Errorf("add scoped default via %s on %s: %w",
|
||||
nexthop.IP, nexthop.Intf.Name, err)
|
||||
return false, fmt.Errorf("add scoped default on %s: %w", nexthop.Intf.Name, err)
|
||||
}
|
||||
|
||||
af := unix.AF_INET
|
||||
@@ -87,8 +98,11 @@ func (r *SysOps) installScopedDefaultFor(unspec netip.Addr) (bool, error) {
|
||||
af = unix.AF_INET6
|
||||
}
|
||||
nbnet.SetBoundInterface(af, nexthop.Intf)
|
||||
log.Infof("installed scoped default route via %s on %s for %s",
|
||||
nexthop.IP, nexthop.Intf.Name, afOf(unspec))
|
||||
via := "point-to-point"
|
||||
if nexthop.IP.IsValid() {
|
||||
via = nexthop.IP.String()
|
||||
}
|
||||
log.Infof("installed scoped default route via %s on %s for %s", via, nexthop.Intf.Name, afOf(unspec))
|
||||
return true, nil
|
||||
}
|
||||
|
||||
@@ -172,16 +186,17 @@ func (r *SysOps) deleteScopedRoute(rtMsg *route.RouteMessage) error {
|
||||
Index: rtMsg.Index,
|
||||
Addrs: rtMsg.Addrs,
|
||||
}
|
||||
return r.writeRouteMessage(del)
|
||||
return r.writeRouteMessage(del, scopedRouteBudget)
|
||||
}
|
||||
|
||||
func (r *SysOps) scopedRouteSocket(action int, unspec netip.Addr, nexthop Nexthop) error {
|
||||
flags := unix.RTF_UP | unix.RTF_STATIC | unix.RTF_GATEWAY | unix.RTF_IFSCOPE | routeProtoFlag
|
||||
flags := unix.RTF_UP | unix.RTF_STATIC | unix.RTF_IFSCOPE | routeProtoFlag
|
||||
|
||||
msg := &route.RouteMessage{
|
||||
Type: action,
|
||||
Flags: flags,
|
||||
Version: unix.RTM_VERSION,
|
||||
ID: uintptr(os.Getpid()),
|
||||
Seq: r.getSeq(),
|
||||
Index: nexthop.Intf.Index,
|
||||
}
|
||||
@@ -197,16 +212,25 @@ func (r *SysOps) scopedRouteSocket(action int, unspec netip.Addr, nexthop Nextho
|
||||
if err != nil {
|
||||
return fmt.Errorf("build netmask: %w", err)
|
||||
}
|
||||
gw, err := addrToRouteAddr(nexthop.IP.Unmap())
|
||||
if err != nil {
|
||||
return fmt.Errorf("build gateway: %w", err)
|
||||
}
|
||||
addrs[unix.RTAX_DST] = dst
|
||||
addrs[unix.RTAX_NETMASK] = mask
|
||||
addrs[unix.RTAX_GATEWAY] = gw
|
||||
|
||||
if nexthop.IP.IsValid() {
|
||||
msg.Flags |= unix.RTF_GATEWAY
|
||||
gw, err := addrToRouteAddr(nexthop.IP.Unmap())
|
||||
if err != nil {
|
||||
return fmt.Errorf("build gateway: %w", err)
|
||||
}
|
||||
addrs[unix.RTAX_GATEWAY] = gw
|
||||
} else {
|
||||
addrs[unix.RTAX_GATEWAY] = &route.LinkAddr{
|
||||
Index: nexthop.Intf.Index,
|
||||
Name: nexthop.Intf.Name,
|
||||
}
|
||||
}
|
||||
msg.Addrs = addrs
|
||||
|
||||
return r.writeRouteMessage(msg)
|
||||
return r.writeRouteMessage(msg, scopedRouteBudget)
|
||||
}
|
||||
|
||||
func afOf(a netip.Addr) string {
|
||||
|
||||
@@ -25,6 +25,9 @@ import (
|
||||
|
||||
const (
|
||||
envRouteProtoFlag = "NB_ROUTE_PROTO_FLAG"
|
||||
|
||||
// routeBudget bounds retries for per-prefix exclusion route programming.
|
||||
routeBudget = 1 * time.Second
|
||||
)
|
||||
|
||||
var routeProtoFlag int
|
||||
@@ -138,7 +141,7 @@ func (r *SysOps) routeSocket(action int, prefix netip.Prefix, nexthop Nexthop) e
|
||||
return fmt.Errorf("build route message: %w", err)
|
||||
}
|
||||
|
||||
if err := r.writeRouteMessage(msg); err != nil {
|
||||
if err := r.writeRouteMessage(msg, routeBudget); err != nil {
|
||||
a := "add"
|
||||
if action == unix.RTM_DELETE {
|
||||
a = "remove"
|
||||
@@ -149,13 +152,13 @@ func (r *SysOps) routeSocket(action int, prefix netip.Prefix, nexthop Nexthop) e
|
||||
}
|
||||
|
||||
// writeRouteMessage sends a route message over AF_ROUTE and waits for the
|
||||
// kernel's matching reply, retrying transient failures. Callers do not need to
|
||||
// manage sockets or seq numbers themselves.
|
||||
func (r *SysOps) writeRouteMessage(msg *route.RouteMessage) error {
|
||||
// kernel's matching reply, retrying transient failures until budget elapses.
|
||||
// Callers do not need to manage sockets or seq numbers themselves.
|
||||
func (r *SysOps) writeRouteMessage(msg *route.RouteMessage, budget time.Duration) error {
|
||||
expBackOff := backoff.NewExponentialBackOff()
|
||||
expBackOff.InitialInterval = 50 * time.Millisecond
|
||||
expBackOff.MaxInterval = 500 * time.Millisecond
|
||||
expBackOff.MaxElapsedTime = 1 * time.Second
|
||||
expBackOff.MaxElapsedTime = budget
|
||||
|
||||
return backoff.Retry(func() error { return routeMessageRoundtrip(msg) }, expBackOff)
|
||||
}
|
||||
@@ -176,6 +179,13 @@ func routeMessageRoundtrip(msg *route.RouteMessage) error {
|
||||
return backoff.Permanent(fmt.Errorf("set recv timeout: %w", err))
|
||||
}
|
||||
|
||||
// AF_ROUTE is a broadcast channel: every route socket on the host sees
|
||||
// every RTM_* event. With concurrent route programming the default
|
||||
// per-socket queue overflows and our own reply gets dropped.
|
||||
if err := unix.SetsockoptInt(fd, unix.SOL_SOCKET, unix.SO_RCVBUF, 1<<20); err != nil {
|
||||
log.Debugf("set SO_RCVBUF on route socket: %v", err)
|
||||
}
|
||||
|
||||
bytes, err := msg.Marshal()
|
||||
if err != nil {
|
||||
return backoff.Permanent(fmt.Errorf("marshal: %w", err))
|
||||
@@ -200,7 +210,9 @@ func readRouteResponse(fd, wantType, wantSeq int) error {
|
||||
deadline := time.Now().Add(time.Second)
|
||||
for {
|
||||
if time.Now().After(deadline) {
|
||||
return backoff.Permanent(fmt.Errorf("read: timeout waiting for route reply type=%d seq=%d", wantType, wantSeq))
|
||||
// Transient: under concurrent pressure the kernel can drop our reply
|
||||
// from the socket buffer. Let backoff.Retry re-send with a fresh seq.
|
||||
return fmt.Errorf("read: timeout waiting for route reply type=%d seq=%d", wantType, wantSeq)
|
||||
}
|
||||
n, err := unix.Read(fd, resp)
|
||||
if err != nil {
|
||||
@@ -214,11 +226,13 @@ func readRouteResponse(fd, wantType, wantSeq int) error {
|
||||
continue
|
||||
}
|
||||
hdr := (*unix.RtMsghdr)(unsafe.Pointer(&resp[0]))
|
||||
// Darwin reflects the sender's pid on replies; matching (Type, Seq, Pid)
|
||||
// uniquely identifies our own reply among broadcast traffic.
|
||||
if int(hdr.Type) != wantType || int(hdr.Seq) != wantSeq || hdr.Pid != pid {
|
||||
continue
|
||||
}
|
||||
if hdr.Errno != 0 {
|
||||
return backoff.Permanent(fmt.Errorf("kernel errno %d", hdr.Errno))
|
||||
return backoff.Permanent(fmt.Errorf("kernel: %w", syscall.Errno(hdr.Errno)))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -229,6 +243,7 @@ func (r *SysOps) buildRouteMessage(action int, prefix netip.Prefix, nexthop Next
|
||||
Type: action,
|
||||
Flags: unix.RTF_UP | routeProtoFlag,
|
||||
Version: unix.RTM_VERSION,
|
||||
ID: uintptr(os.Getpid()),
|
||||
Seq: r.getSeq(),
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user