From 2e0e3a3601d90a5d7a6b39dc16ebde2522fdf580 Mon Sep 17 00:00:00 2001 From: Viktor Liu <17948409+lixmal@users.noreply.github.com> Date: Mon, 20 Apr 2026 17:01:01 +0900 Subject: [PATCH 01/25] [client] Replace exclusion routes with scoped default + IP_BOUND_IF on macOS (#5918) --- client/iface/udpmux/universal.go | 4 +- .../systemops/systemops_bsd_other.go | 10 + .../systemops/systemops_darwin.go | 241 ++++++++++++++++++ .../systemops/systemops_generic.go | 15 +- .../routemanager/systemops/systemops_js.go | 4 - .../routemanager/systemops/systemops_linux.go | 7 - .../systemops/systemops_nonlinux.go | 4 - .../routemanager/systemops/systemops_unix.go | 162 +++++++----- client/net/dialer_init_darwin.go | 5 + client/net/dialer_init_generic.go | 2 +- client/net/env_android.go | 24 -- .../{env_windows.go => env_bound_iface.go} | 19 +- client/net/env_generic.go | 2 +- client/net/env_mobile.go | 25 ++ client/net/listener_init_darwin.go | 5 + client/net/listener_init_generic.go | 2 +- client/net/net_darwin.go | 160 ++++++++++++ client/server/state.go | 7 +- 18 files changed, 579 insertions(+), 119 deletions(-) create mode 100644 client/internal/routemanager/systemops/systemops_bsd_other.go create mode 100644 client/internal/routemanager/systemops/systemops_darwin.go create mode 100644 client/net/dialer_init_darwin.go delete mode 100644 client/net/env_android.go rename client/net/{env_windows.go => env_bound_iface.go} (71%) create mode 100644 client/net/env_mobile.go create mode 100644 client/net/listener_init_darwin.go create mode 100644 client/net/net_darwin.go diff --git a/client/iface/udpmux/universal.go b/client/iface/udpmux/universal.go index 43bfedaaa..89a7eefb9 100644 --- a/client/iface/udpmux/universal.go +++ b/client/iface/udpmux/universal.go @@ -171,7 +171,7 @@ func (u *UDPConn) performFilterCheck(addr net.Addr) error { } if u.address.Network.Contains(a) { - log.Warnf("Address %s is part of the NetBird network %s, refusing to write", addr, u.address) + log.Warnf("address %s is part of the NetBird network %s, refusing to write", addr, u.address) return fmt.Errorf("address %s is part of the NetBird network %s, refusing to write", addr, u.address) } @@ -181,7 +181,7 @@ func (u *UDPConn) performFilterCheck(addr net.Addr) error { u.addrCache.Store(addr.String(), isRouted) if isRouted { // Extra log, as the error only shows up with ICE logging enabled - log.Infof("Address %s is part of routed network %s, refusing to write", addr, prefix) + log.Infof("address %s is part of routed network %s, refusing to write", addr, prefix) return fmt.Errorf("address %s is part of routed network %s, refusing to write", addr, prefix) } } diff --git a/client/internal/routemanager/systemops/systemops_bsd_other.go b/client/internal/routemanager/systemops/systemops_bsd_other.go new file mode 100644 index 000000000..3f09219aa --- /dev/null +++ b/client/internal/routemanager/systemops/systemops_bsd_other.go @@ -0,0 +1,10 @@ +//go:build (dragonfly || freebsd || netbsd || openbsd) && !darwin + +package systemops + +// Non-darwin BSDs don't support the IP_BOUND_IF + scoped default model. They +// always fall through to the ref-counter exclusion-route path; these stubs +// exist only so systemops_unix.go compiles. +func (r *SysOps) setupAdvancedRouting() error { return nil } +func (r *SysOps) cleanupAdvancedRouting() error { return nil } +func (r *SysOps) flushPlatformExtras() error { return nil } diff --git a/client/internal/routemanager/systemops/systemops_darwin.go b/client/internal/routemanager/systemops/systemops_darwin.go new file mode 100644 index 000000000..d6875ff95 --- /dev/null +++ b/client/internal/routemanager/systemops/systemops_darwin.go @@ -0,0 +1,241 @@ +//go:build darwin && !ios + +package systemops + +import ( + "errors" + "fmt" + "net/netip" + "os" + "time" + + "github.com/hashicorp/go-multierror" + log "github.com/sirupsen/logrus" + "golang.org/x/net/route" + "golang.org/x/sys/unix" + + nberrors "github.com/netbirdio/netbird/client/errors" + "github.com/netbirdio/netbird/client/internal/routemanager/vars" + nbnet "github.com/netbirdio/netbird/client/net" +) + +// scopedRouteBudget bounds retries for the scoped default route. Installing or +// deleting it matters enough that we're willing to spend longer waiting for the +// kernel reply than for per-prefix exclusion routes. +const scopedRouteBudget = 5 * time.Second + +// setupAdvancedRouting installs an RTF_IFSCOPE default route per address family +// pinned to the current physical egress, so IP_BOUND_IF scoped lookups can +// resolve gateway'd destinations while the VPN's split default owns the +// unscoped table. +// +// Timing note: this runs during routeManager.Init, which happens before the +// VPN interface is created and before any peer routes propagate. The initial +// mgmt / signal / relay TCP dials always fire before this runs, so those +// sockets miss the IP_BOUND_IF binding and rely on the kernel's normal route +// lookup, which at that point correctly picks the physical default. Those +// already-established TCP flows keep their originally-selected interface for +// their lifetime on Darwin because the kernel caches the egress route +// per-socket at connect time; adding the VPN's 0/1 + 128/1 split default +// afterwards does not migrate them since the original en0 default stays in +// the table. Any subsequent reconnect via nbnet.NewDialer picks up the +// populated bound-iface cache and gets IP_BOUND_IF set cleanly. +func (r *SysOps) setupAdvancedRouting() error { + // Drop any previously-cached egress interface before reinstalling. On a + // refresh, a family that no longer resolves would otherwise keep the stale + // binding, causing new sockets to scope to an interface without a matching + // scoped default. + nbnet.ClearBoundInterfaces() + + if err := r.flushScopedDefaults(); err != nil { + log.Warnf("flush residual scoped defaults: %v", err) + } + + var merr *multierror.Error + installed := 0 + + for _, unspec := range []netip.Addr{netip.IPv4Unspecified(), netip.IPv6Unspecified()} { + ok, err := r.installScopedDefaultFor(unspec) + if err != nil { + merr = multierror.Append(merr, err) + continue + } + if ok { + installed++ + } + } + + if installed == 0 && merr != nil { + return nberrors.FormatErrorOrNil(merr) + } + if merr != nil { + log.Warnf("advanced routing setup partially succeeded: %v", nberrors.FormatErrorOrNil(merr)) + } + return nil +} + +// installScopedDefaultFor resolves the physical default nexthop for the given +// address family, installs a scoped default via it, and caches the iface for +// subsequent IP_BOUND_IF / IPV6_BOUND_IF socket binds. +func (r *SysOps) installScopedDefaultFor(unspec netip.Addr) (bool, error) { + nexthop, err := GetNextHop(unspec) + if err != nil { + if errors.Is(err, vars.ErrRouteNotFound) { + return false, nil + } + return false, fmt.Errorf("get default nexthop for %s: %w", unspec, err) + } + if nexthop.Intf == nil { + return false, fmt.Errorf("unusable default nexthop for %s (no interface)", unspec) + } + + if err := r.addScopedDefault(unspec, nexthop); err != nil { + return false, fmt.Errorf("add scoped default on %s: %w", nexthop.Intf.Name, err) + } + + af := unix.AF_INET + if unspec.Is6() { + af = unix.AF_INET6 + } + nbnet.SetBoundInterface(af, nexthop.Intf) + via := "point-to-point" + if nexthop.IP.IsValid() { + via = nexthop.IP.String() + } + log.Infof("installed scoped default route via %s on %s for %s", via, nexthop.Intf.Name, afOf(unspec)) + return true, nil +} + +func (r *SysOps) cleanupAdvancedRouting() error { + nbnet.ClearBoundInterfaces() + return r.flushScopedDefaults() +} + +// flushPlatformExtras runs darwin-specific residual cleanup hooked into the +// generic FlushMarkedRoutes path, so a crashed daemon's scoped defaults get +// removed on the next boot regardless of whether a profile is brought up. +func (r *SysOps) flushPlatformExtras() error { + return r.flushScopedDefaults() +} + +// flushScopedDefaults removes any scoped default routes tagged with routeProtoFlag. +// Safe to call at startup to clear residual entries from a prior session. +func (r *SysOps) flushScopedDefaults() error { + rib, err := retryFetchRIB() + if err != nil { + return fmt.Errorf("fetch routing table: %w", err) + } + + msgs, err := route.ParseRIB(route.RIBTypeRoute, rib) + if err != nil { + return fmt.Errorf("parse routing table: %w", err) + } + + var merr *multierror.Error + removed := 0 + + for _, msg := range msgs { + rtMsg, ok := msg.(*route.RouteMessage) + if !ok { + continue + } + if rtMsg.Flags&routeProtoFlag == 0 { + continue + } + if rtMsg.Flags&unix.RTF_IFSCOPE == 0 { + continue + } + + info, err := MsgToRoute(rtMsg) + if err != nil { + log.Debugf("skip scoped flush: %v", err) + continue + } + if !info.Dst.IsValid() || info.Dst.Bits() != 0 { + continue + } + + if err := r.deleteScopedRoute(rtMsg); err != nil { + merr = multierror.Append(merr, fmt.Errorf("delete scoped default %s on index %d: %w", + info.Dst, rtMsg.Index, err)) + continue + } + removed++ + log.Debugf("flushed residual scoped default %s on index %d", info.Dst, rtMsg.Index) + } + + if removed > 0 { + log.Infof("flushed %d residual scoped default route(s)", removed) + } + return nberrors.FormatErrorOrNil(merr) +} + +func (r *SysOps) addScopedDefault(unspec netip.Addr, nexthop Nexthop) error { + return r.scopedRouteSocket(unix.RTM_ADD, unspec, nexthop) +} + +func (r *SysOps) deleteScopedRoute(rtMsg *route.RouteMessage) error { + // Preserve identifying flags from the stored route (including RTF_GATEWAY + // only if present); kernel-set bits like RTF_DONE don't belong on RTM_DELETE. + keep := unix.RTF_UP | unix.RTF_STATIC | unix.RTF_GATEWAY | unix.RTF_IFSCOPE | routeProtoFlag + del := &route.RouteMessage{ + Type: unix.RTM_DELETE, + Flags: rtMsg.Flags & keep, + Version: unix.RTM_VERSION, + Seq: r.getSeq(), + Index: rtMsg.Index, + Addrs: rtMsg.Addrs, + } + return r.writeRouteMessage(del, scopedRouteBudget) +} + +func (r *SysOps) scopedRouteSocket(action int, unspec netip.Addr, nexthop Nexthop) error { + flags := unix.RTF_UP | unix.RTF_STATIC | unix.RTF_IFSCOPE | routeProtoFlag + + msg := &route.RouteMessage{ + Type: action, + Flags: flags, + Version: unix.RTM_VERSION, + ID: uintptr(os.Getpid()), + Seq: r.getSeq(), + Index: nexthop.Intf.Index, + } + + const numAddrs = unix.RTAX_NETMASK + 1 + addrs := make([]route.Addr, numAddrs) + + dst, err := addrToRouteAddr(unspec) + if err != nil { + return fmt.Errorf("build destination: %w", err) + } + mask, err := prefixToRouteNetmask(netip.PrefixFrom(unspec, 0)) + if err != nil { + return fmt.Errorf("build netmask: %w", err) + } + addrs[unix.RTAX_DST] = dst + addrs[unix.RTAX_NETMASK] = mask + + if nexthop.IP.IsValid() { + msg.Flags |= unix.RTF_GATEWAY + gw, err := addrToRouteAddr(nexthop.IP.Unmap()) + if err != nil { + return fmt.Errorf("build gateway: %w", err) + } + addrs[unix.RTAX_GATEWAY] = gw + } else { + addrs[unix.RTAX_GATEWAY] = &route.LinkAddr{ + Index: nexthop.Intf.Index, + Name: nexthop.Intf.Name, + } + } + msg.Addrs = addrs + + return r.writeRouteMessage(msg, scopedRouteBudget) +} + +func afOf(a netip.Addr) string { + if a.Is4() { + return "IPv4" + } + return "IPv6" +} diff --git a/client/internal/routemanager/systemops/systemops_generic.go b/client/internal/routemanager/systemops/systemops_generic.go index ec219c7fe..4211eb057 100644 --- a/client/internal/routemanager/systemops/systemops_generic.go +++ b/client/internal/routemanager/systemops/systemops_generic.go @@ -21,6 +21,7 @@ import ( "github.com/netbirdio/netbird/client/internal/routemanager/util" "github.com/netbirdio/netbird/client/internal/routemanager/vars" "github.com/netbirdio/netbird/client/internal/statemanager" + nbnet "github.com/netbirdio/netbird/client/net" "github.com/netbirdio/netbird/client/net/hooks" ) @@ -31,8 +32,6 @@ var splitDefaultv4_2 = netip.PrefixFrom(netip.AddrFrom4([4]byte{128}), 1) var splitDefaultv6_1 = netip.PrefixFrom(netip.IPv6Unspecified(), 1) var splitDefaultv6_2 = netip.PrefixFrom(netip.AddrFrom16([16]byte{0x80}), 1) -var ErrRoutingIsSeparate = errors.New("routing is separate") - func (r *SysOps) setupRefCounter(initAddresses []net.IP, stateManager *statemanager.Manager) error { stateManager.RegisterState(&ShutdownState{}) @@ -397,12 +396,16 @@ func ipToAddr(ip net.IP, intf *net.Interface) (netip.Addr, error) { } // IsAddrRouted checks if the candidate address would route to the vpn, in which case it returns true and the matched prefix. +// When advanced routing is active the WG socket is bound to the physical interface (fwmark on linux, +// IP_UNICAST_IF on windows, IP_BOUND_IF on darwin) and bypasses the main routing table, so the check is skipped. func IsAddrRouted(addr netip.Addr, vpnRoutes []netip.Prefix) (bool, netip.Prefix) { - localRoutes, err := hasSeparateRouting() + if nbnet.AdvancedRouting() { + return false, netip.Prefix{} + } + + localRoutes, err := GetRoutesFromTable() if err != nil { - if !errors.Is(err, ErrRoutingIsSeparate) { - log.Errorf("Failed to get routes: %v", err) - } + log.Errorf("Failed to get routes: %v", err) return false, netip.Prefix{} } diff --git a/client/internal/routemanager/systemops/systemops_js.go b/client/internal/routemanager/systemops/systemops_js.go index 808507fc9..242571b3d 100644 --- a/client/internal/routemanager/systemops/systemops_js.go +++ b/client/internal/routemanager/systemops/systemops_js.go @@ -22,10 +22,6 @@ func GetRoutesFromTable() ([]netip.Prefix, error) { return []netip.Prefix{}, nil } -func hasSeparateRouting() ([]netip.Prefix, error) { - return []netip.Prefix{}, nil -} - // GetDetailedRoutesFromTable returns empty routes for WASM. func GetDetailedRoutesFromTable() ([]DetailedRoute, error) { return []DetailedRoute{}, nil diff --git a/client/internal/routemanager/systemops/systemops_linux.go b/client/internal/routemanager/systemops/systemops_linux.go index bd10f131f..39a9fd978 100644 --- a/client/internal/routemanager/systemops/systemops_linux.go +++ b/client/internal/routemanager/systemops/systemops_linux.go @@ -894,13 +894,6 @@ func getAddressFamily(prefix netip.Prefix) int { return netlink.FAMILY_V6 } -func hasSeparateRouting() ([]netip.Prefix, error) { - if !nbnet.AdvancedRouting() { - return GetRoutesFromTable() - } - return nil, ErrRoutingIsSeparate -} - func isOpErr(err error) bool { // EAFTNOSUPPORT when ipv6 is disabled via sysctl, EOPNOTSUPP when disabled in boot options or otherwise not supported if errors.Is(err, syscall.EAFNOSUPPORT) || errors.Is(err, syscall.EOPNOTSUPP) { diff --git a/client/internal/routemanager/systemops/systemops_nonlinux.go b/client/internal/routemanager/systemops/systemops_nonlinux.go index 905a7bc12..016a62ebd 100644 --- a/client/internal/routemanager/systemops/systemops_nonlinux.go +++ b/client/internal/routemanager/systemops/systemops_nonlinux.go @@ -48,10 +48,6 @@ func EnableIPForwarding() error { return nil } -func hasSeparateRouting() ([]netip.Prefix, error) { - return GetRoutesFromTable() -} - // GetIPRules returns IP rules for debugging (not supported on non-Linux platforms) func GetIPRules() ([]IPRule, error) { log.Infof("IP rules collection is not supported on %s", runtime.GOOS) diff --git a/client/internal/routemanager/systemops/systemops_unix.go b/client/internal/routemanager/systemops/systemops_unix.go index 7089178fb..2d3f9b69a 100644 --- a/client/internal/routemanager/systemops/systemops_unix.go +++ b/client/internal/routemanager/systemops/systemops_unix.go @@ -25,6 +25,9 @@ import ( const ( envRouteProtoFlag = "NB_ROUTE_PROTO_FLAG" + + // routeBudget bounds retries for per-prefix exclusion route programming. + routeBudget = 1 * time.Second ) var routeProtoFlag int @@ -41,26 +44,42 @@ func init() { } func (r *SysOps) SetupRouting(initAddresses []net.IP, stateManager *statemanager.Manager, advancedRouting bool) error { + if advancedRouting { + return r.setupAdvancedRouting() + } + + log.Infof("Using legacy routing setup with ref counters") return r.setupRefCounter(initAddresses, stateManager) } func (r *SysOps) CleanupRouting(stateManager *statemanager.Manager, advancedRouting bool) error { + if advancedRouting { + return r.cleanupAdvancedRouting() + } + return r.cleanupRefCounter(stateManager) } // FlushMarkedRoutes removes single IP exclusion routes marked with the configured RTF_PROTO flag. +// On darwin it also flushes residual RTF_IFSCOPE scoped default routes so a +// crashed prior session can't leave crud in the table. func (r *SysOps) FlushMarkedRoutes() error { + var merr *multierror.Error + + if err := r.flushPlatformExtras(); err != nil { + merr = multierror.Append(merr, fmt.Errorf("flush platform extras: %w", err)) + } + rib, err := retryFetchRIB() if err != nil { - return fmt.Errorf("fetch routing table: %w", err) + return nberrors.FormatErrorOrNil(multierror.Append(merr, fmt.Errorf("fetch routing table: %w", err))) } msgs, err := route.ParseRIB(route.RIBTypeRoute, rib) if err != nil { - return fmt.Errorf("parse routing table: %w", err) + return nberrors.FormatErrorOrNil(multierror.Append(merr, fmt.Errorf("parse routing table: %w", err))) } - var merr *multierror.Error flushedCount := 0 for _, msg := range msgs { @@ -117,12 +136,12 @@ func (r *SysOps) routeSocket(action int, prefix netip.Prefix, nexthop Nexthop) e return fmt.Errorf("invalid prefix: %s", prefix) } - expBackOff := backoff.NewExponentialBackOff() - expBackOff.InitialInterval = 50 * time.Millisecond - expBackOff.MaxInterval = 500 * time.Millisecond - expBackOff.MaxElapsedTime = 1 * time.Second + msg, err := r.buildRouteMessage(action, prefix, nexthop) + if err != nil { + return fmt.Errorf("build route message: %w", err) + } - if err := backoff.Retry(r.routeOp(action, prefix, nexthop), expBackOff); err != nil { + if err := r.writeRouteMessage(msg, routeBudget); err != nil { a := "add" if action == unix.RTM_DELETE { a = "remove" @@ -132,50 +151,91 @@ func (r *SysOps) routeSocket(action int, prefix netip.Prefix, nexthop Nexthop) e return nil } -func (r *SysOps) routeOp(action int, prefix netip.Prefix, nexthop Nexthop) func() error { - operation := func() error { - fd, err := unix.Socket(syscall.AF_ROUTE, syscall.SOCK_RAW, syscall.AF_UNSPEC) - if err != nil { - return fmt.Errorf("open routing socket: %w", err) +// writeRouteMessage sends a route message over AF_ROUTE and waits for the +// kernel's matching reply, retrying transient failures until budget elapses. +// Callers do not need to manage sockets or seq numbers themselves. +func (r *SysOps) writeRouteMessage(msg *route.RouteMessage, budget time.Duration) error { + expBackOff := backoff.NewExponentialBackOff() + expBackOff.InitialInterval = 50 * time.Millisecond + expBackOff.MaxInterval = 500 * time.Millisecond + expBackOff.MaxElapsedTime = budget + + return backoff.Retry(func() error { return routeMessageRoundtrip(msg) }, expBackOff) +} + +func routeMessageRoundtrip(msg *route.RouteMessage) error { + fd, err := unix.Socket(syscall.AF_ROUTE, syscall.SOCK_RAW, syscall.AF_UNSPEC) + if err != nil { + return fmt.Errorf("open routing socket: %w", err) + } + defer func() { + if err := unix.Close(fd); err != nil && !errors.Is(err, unix.EBADF) { + log.Warnf("close routing socket: %v", err) } - defer func() { - if err := unix.Close(fd); err != nil && !errors.Is(err, unix.EBADF) { - log.Warnf("failed to close routing socket: %v", err) + }() + + tv := unix.Timeval{Sec: 1} + if err := unix.SetsockoptTimeval(fd, unix.SOL_SOCKET, unix.SO_RCVTIMEO, &tv); err != nil { + return backoff.Permanent(fmt.Errorf("set recv timeout: %w", err)) + } + + // AF_ROUTE is a broadcast channel: every route socket on the host sees + // every RTM_* event. With concurrent route programming the default + // per-socket queue overflows and our own reply gets dropped. + if err := unix.SetsockoptInt(fd, unix.SOL_SOCKET, unix.SO_RCVBUF, 1<<20); err != nil { + log.Debugf("set SO_RCVBUF on route socket: %v", err) + } + + bytes, err := msg.Marshal() + if err != nil { + return backoff.Permanent(fmt.Errorf("marshal: %w", err)) + } + + if _, err = unix.Write(fd, bytes); err != nil { + if errors.Is(err, unix.ENOBUFS) || errors.Is(err, unix.EAGAIN) { + return fmt.Errorf("write: %w", err) + } + return backoff.Permanent(fmt.Errorf("write: %w", err)) + } + return readRouteResponse(fd, msg.Type, msg.Seq) +} + +// readRouteResponse reads from the AF_ROUTE socket until it sees a reply +// matching our write (same type, seq, and pid). AF_ROUTE SOCK_RAW is a +// broadcast channel: interface up/down, third-party route changes and neighbor +// discovery events can all land between our write and read, so we must filter. +func readRouteResponse(fd, wantType, wantSeq int) error { + pid := int32(os.Getpid()) + resp := make([]byte, 2048) + deadline := time.Now().Add(time.Second) + for { + if time.Now().After(deadline) { + // Transient: under concurrent pressure the kernel can drop our reply + // from the socket buffer. Let backoff.Retry re-send with a fresh seq. + return fmt.Errorf("read: timeout waiting for route reply type=%d seq=%d", wantType, wantSeq) + } + n, err := unix.Read(fd, resp) + if err != nil { + if errors.Is(err, unix.EAGAIN) || errors.Is(err, unix.EWOULDBLOCK) { + // SO_RCVTIMEO fired while waiting; loop to re-check the absolute deadline. + continue } - }() - - msg, err := r.buildRouteMessage(action, prefix, nexthop) - if err != nil { - return backoff.Permanent(fmt.Errorf("build route message: %w", err)) + return backoff.Permanent(fmt.Errorf("read: %w", err)) } - - msgBytes, err := msg.Marshal() - if err != nil { - return backoff.Permanent(fmt.Errorf("marshal route message: %w", err)) + if n < int(unsafe.Sizeof(unix.RtMsghdr{})) { + continue } - - if _, err = unix.Write(fd, msgBytes); err != nil { - if errors.Is(err, unix.ENOBUFS) || errors.Is(err, unix.EAGAIN) { - return fmt.Errorf("write: %w", err) - } - return backoff.Permanent(fmt.Errorf("write: %w", err)) + hdr := (*unix.RtMsghdr)(unsafe.Pointer(&resp[0])) + // Darwin reflects the sender's pid on replies; matching (Type, Seq, Pid) + // uniquely identifies our own reply among broadcast traffic. + if int(hdr.Type) != wantType || int(hdr.Seq) != wantSeq || hdr.Pid != pid { + continue } - - respBuf := make([]byte, 2048) - n, err := unix.Read(fd, respBuf) - if err != nil { - return backoff.Permanent(fmt.Errorf("read route response: %w", err)) + if hdr.Errno != 0 { + return backoff.Permanent(fmt.Errorf("kernel: %w", syscall.Errno(hdr.Errno))) } - - if n > 0 { - if err := r.parseRouteResponse(respBuf[:n]); err != nil { - return backoff.Permanent(err) - } - } - return nil } - return operation } func (r *SysOps) buildRouteMessage(action int, prefix netip.Prefix, nexthop Nexthop) (msg *route.RouteMessage, err error) { @@ -183,6 +243,7 @@ func (r *SysOps) buildRouteMessage(action int, prefix netip.Prefix, nexthop Next Type: action, Flags: unix.RTF_UP | routeProtoFlag, Version: unix.RTM_VERSION, + ID: uintptr(os.Getpid()), Seq: r.getSeq(), } @@ -221,19 +282,6 @@ func (r *SysOps) buildRouteMessage(action int, prefix netip.Prefix, nexthop Next return msg, nil } -func (r *SysOps) parseRouteResponse(buf []byte) error { - if len(buf) < int(unsafe.Sizeof(unix.RtMsghdr{})) { - return nil - } - - rtMsg := (*unix.RtMsghdr)(unsafe.Pointer(&buf[0])) - if rtMsg.Errno != 0 { - return fmt.Errorf("parse: %d", rtMsg.Errno) - } - - return nil -} - // addrToRouteAddr converts a netip.Addr to the appropriate route.Addr (*route.Inet4Addr or *route.Inet6Addr). func addrToRouteAddr(addr netip.Addr) (route.Addr, error) { if addr.Is4() { diff --git a/client/net/dialer_init_darwin.go b/client/net/dialer_init_darwin.go new file mode 100644 index 000000000..e18909ff7 --- /dev/null +++ b/client/net/dialer_init_darwin.go @@ -0,0 +1,5 @@ +package net + +func (d *Dialer) init() { + d.Dialer.Control = applyBoundIfToSocket +} diff --git a/client/net/dialer_init_generic.go b/client/net/dialer_init_generic.go index 18ebc6ad1..78973b47d 100644 --- a/client/net/dialer_init_generic.go +++ b/client/net/dialer_init_generic.go @@ -1,4 +1,4 @@ -//go:build !linux && !windows +//go:build !linux && !windows && !darwin package net diff --git a/client/net/env_android.go b/client/net/env_android.go deleted file mode 100644 index 9d89951a1..000000000 --- a/client/net/env_android.go +++ /dev/null @@ -1,24 +0,0 @@ -//go:build android - -package net - -// Init initializes the network environment for Android -func Init() { - // No initialization needed on Android -} - -// AdvancedRouting reports whether routing loops can be avoided without using exclusion routes. -// Always returns true on Android since we cannot handle routes dynamically. -func AdvancedRouting() bool { - return true -} - -// SetVPNInterfaceName is a no-op on Android -func SetVPNInterfaceName(name string) { - // No-op on Android - not needed for Android VPN service -} - -// GetVPNInterfaceName returns empty string on Android -func GetVPNInterfaceName() string { - return "" -} diff --git a/client/net/env_windows.go b/client/net/env_bound_iface.go similarity index 71% rename from client/net/env_windows.go rename to client/net/env_bound_iface.go index 7e8868ba5..593988c2c 100644 --- a/client/net/env_windows.go +++ b/client/net/env_bound_iface.go @@ -1,4 +1,4 @@ -//go:build windows +//go:build (darwin && !ios) || windows package net @@ -24,17 +24,22 @@ func Init() { } func checkAdvancedRoutingSupport() bool { - var err error - var legacyRouting bool + legacyRouting := false if val := os.Getenv(envUseLegacyRouting); val != "" { - legacyRouting, err = strconv.ParseBool(val) + parsed, err := strconv.ParseBool(val) if err != nil { - log.Warnf("failed to parse %s: %v", envUseLegacyRouting, err) + log.Warnf("ignoring unparsable %s=%q: %v", envUseLegacyRouting, val, err) + } else { + legacyRouting = parsed } } - if legacyRouting || netstack.IsEnabled() { - log.Info("advanced routing has been requested to be disabled") + if legacyRouting { + log.Infof("advanced routing disabled: legacy routing requested via %s", envUseLegacyRouting) + return false + } + if netstack.IsEnabled() { + log.Info("advanced routing disabled: netstack mode is enabled") return false } diff --git a/client/net/env_generic.go b/client/net/env_generic.go index f467930c3..18c10bb78 100644 --- a/client/net/env_generic.go +++ b/client/net/env_generic.go @@ -1,4 +1,4 @@ -//go:build !linux && !windows && !android +//go:build !linux && !windows && !darwin package net diff --git a/client/net/env_mobile.go b/client/net/env_mobile.go new file mode 100644 index 000000000..80b0fad8d --- /dev/null +++ b/client/net/env_mobile.go @@ -0,0 +1,25 @@ +//go:build ios || android + +package net + +// Init initializes the network environment for mobile platforms. +func Init() { + // no-op on mobile: routing scope is owned by the VPN extension. +} + +// AdvancedRouting reports whether routing loops can be avoided without using exclusion routes. +// Always returns true on mobile since routes cannot be handled dynamically and the VPN extension +// owns the routing scope. +func AdvancedRouting() bool { + return true +} + +// SetVPNInterfaceName is a no-op on mobile. +func SetVPNInterfaceName(string) { + // no-op on mobile: the VPN extension manages the interface. +} + +// GetVPNInterfaceName returns an empty string on mobile. +func GetVPNInterfaceName() string { + return "" +} diff --git a/client/net/listener_init_darwin.go b/client/net/listener_init_darwin.go new file mode 100644 index 000000000..f2fcc80ed --- /dev/null +++ b/client/net/listener_init_darwin.go @@ -0,0 +1,5 @@ +package net + +func (l *ListenerConfig) init() { + l.ListenConfig.Control = applyBoundIfToSocket +} diff --git a/client/net/listener_init_generic.go b/client/net/listener_init_generic.go index 4f8f17ab2..65a785222 100644 --- a/client/net/listener_init_generic.go +++ b/client/net/listener_init_generic.go @@ -1,4 +1,4 @@ -//go:build !linux && !windows +//go:build !linux && !windows && !darwin package net diff --git a/client/net/net_darwin.go b/client/net/net_darwin.go new file mode 100644 index 000000000..00d858a6a --- /dev/null +++ b/client/net/net_darwin.go @@ -0,0 +1,160 @@ +package net + +import ( + "fmt" + "net" + "net/netip" + "strconv" + "strings" + "sync" + "syscall" + + log "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" +) + +// On darwin IPV6_BOUND_IF also scopes v4-mapped egress from dual-stack +// (IPV6_V6ONLY=0) AF_INET6 sockets, so a single setsockopt on "udp6"/"tcp6" +// covers both families. Setting IP_BOUND_IF on an AF_INET6 socket returns +// EINVAL regardless of V6ONLY because the IPPROTO_IP ctloutput path is +// dispatched by socket domain (AF_INET only) not by inp_vflag. + +// boundIface holds the physical interface chosen at routing setup time. Sockets +// created via nbnet.NewDialer / nbnet.NewListener bind to it via IP_BOUND_IF +// (IPv4) or IPV6_BOUND_IF (IPv6 / dual-stack) so their scoped route lookup +// hits the RTF_IFSCOPE default installed by the routemanager, rather than +// following the VPN's split default. +var ( + boundIfaceMu sync.RWMutex + boundIface4 *net.Interface + boundIface6 *net.Interface +) + +// SetBoundInterface records the egress interface for an address family. Called +// by the routemanager after a scoped default route has been installed. +// af must be unix.AF_INET or unix.AF_INET6; other values are ignored. +// nil iface is rejected — use ClearBoundInterfaces to clear all slots. +func SetBoundInterface(af int, iface *net.Interface) { + if iface == nil { + log.Warnf("SetBoundInterface: nil iface for AF %d, ignored", af) + return + } + boundIfaceMu.Lock() + defer boundIfaceMu.Unlock() + switch af { + case unix.AF_INET: + boundIface4 = iface + case unix.AF_INET6: + boundIface6 = iface + default: + log.Warnf("SetBoundInterface: unsupported address family %d", af) + } +} + +// ClearBoundInterfaces resets the cached egress interfaces. Called by the +// routemanager during cleanup. +func ClearBoundInterfaces() { + boundIfaceMu.Lock() + defer boundIfaceMu.Unlock() + boundIface4 = nil + boundIface6 = nil +} + +// boundInterfaceFor returns the cached egress interface for a socket's address +// family, falling back to the other family if the preferred slot is empty. +// The kernel stores both IP_BOUND_IF and IPV6_BOUND_IF in inp_boundifp, so +// either setsockopt scopes the socket; preferring same-family still matters +// when v4 and v6 defaults egress different NICs. +func boundInterfaceFor(network, address string) *net.Interface { + if iface := zoneInterface(address); iface != nil { + return iface + } + + boundIfaceMu.RLock() + defer boundIfaceMu.RUnlock() + + primary, secondary := boundIface4, boundIface6 + if isV6Network(network) { + primary, secondary = boundIface6, boundIface4 + } + if primary != nil { + return primary + } + return secondary +} + +func isV6Network(network string) bool { + return strings.HasSuffix(network, "6") +} + +// zoneInterface extracts an explicit interface from an IPv6 link-local zone (e.g. fe80::1%en0). +func zoneInterface(address string) *net.Interface { + if address == "" { + return nil + } + addr, err := netip.ParseAddrPort(address) + if err != nil { + a, err := netip.ParseAddr(address) + if err != nil { + return nil + } + addr = netip.AddrPortFrom(a, 0) + } + zone := addr.Addr().Zone() + if zone == "" { + return nil + } + if iface, err := net.InterfaceByName(zone); err == nil { + return iface + } + if idx, err := strconv.Atoi(zone); err == nil { + if iface, err := net.InterfaceByIndex(idx); err == nil { + return iface + } + } + return nil +} + +func setIPv4BoundIf(fd uintptr, iface *net.Interface) error { + if err := unix.SetsockoptInt(int(fd), unix.IPPROTO_IP, unix.IP_BOUND_IF, iface.Index); err != nil { + return fmt.Errorf("set IP_BOUND_IF: %w (interface: %s, index: %d)", err, iface.Name, iface.Index) + } + return nil +} + +func setIPv6BoundIf(fd uintptr, iface *net.Interface) error { + if err := unix.SetsockoptInt(int(fd), unix.IPPROTO_IPV6, unix.IPV6_BOUND_IF, iface.Index); err != nil { + return fmt.Errorf("set IPV6_BOUND_IF: %w (interface: %s, index: %d)", err, iface.Name, iface.Index) + } + return nil +} + +// applyBoundIfToSocket binds the socket to the cached physical egress interface +// so scoped route lookup avoids the VPN utun and egresses the underlay directly. +func applyBoundIfToSocket(network, address string, c syscall.RawConn) error { + if !AdvancedRouting() { + return nil + } + + iface := boundInterfaceFor(network, address) + if iface == nil { + log.Debugf("no bound iface cached for %s to %s, skipping BOUND_IF", network, address) + return nil + } + + isV6 := isV6Network(network) + var controlErr error + if err := c.Control(func(fd uintptr) { + if isV6 { + controlErr = setIPv6BoundIf(fd, iface) + } else { + controlErr = setIPv4BoundIf(fd, iface) + } + if controlErr == nil { + log.Debugf("set BOUND_IF=%d on %s for %s to %s", iface.Index, iface.Name, network, address) + } + }); err != nil { + return fmt.Errorf("control: %w", err) + } + return controlErr +} diff --git a/client/server/state.go b/client/server/state.go index 8dca6bde1..f2d823465 100644 --- a/client/server/state.go +++ b/client/server/state.go @@ -12,7 +12,6 @@ import ( "github.com/netbirdio/netbird/client/internal" "github.com/netbirdio/netbird/client/internal/routemanager/systemops" "github.com/netbirdio/netbird/client/internal/statemanager" - nbnet "github.com/netbirdio/netbird/client/net" "github.com/netbirdio/netbird/client/proto" ) @@ -138,10 +137,8 @@ func restoreResidualState(ctx context.Context, statePath string) error { } // clean up any remaining routes independently of the state file - if !nbnet.AdvancedRouting() { - if err := systemops.New(nil, nil).FlushMarkedRoutes(); err != nil { - merr = multierror.Append(merr, fmt.Errorf("flush marked routes: %w", err)) - } + if err := systemops.New(nil, nil).FlushMarkedRoutes(); err != nil { + merr = multierror.Append(merr, fmt.Errorf("flush marked routes: %w", err)) } return nberrors.FormatErrorOrNil(merr) From 95213f715714177b7c9672b3f264f84c9a31cf04 Mon Sep 17 00:00:00 2001 From: Viktor Liu <17948409+lixmal@users.noreply.github.com> Date: Mon, 20 Apr 2026 17:24:11 +0900 Subject: [PATCH 02/25] [client] Use Match host+exec instead of Host+Match in SSH client config (#5903) --- client/ssh/config/manager.go | 25 ++++++++++++------------- client/ssh/config/manager_test.go | 31 +++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 13 deletions(-) diff --git a/client/ssh/config/manager.go b/client/ssh/config/manager.go index cc47fd2d2..6e584b2c3 100644 --- a/client/ssh/config/manager.go +++ b/client/ssh/config/manager.go @@ -187,24 +187,23 @@ func (m *Manager) buildPeerConfig(allHostPatterns []string) (string, error) { return "", fmt.Errorf("get NetBird executable path: %w", err) } - hostLine := strings.Join(deduplicatedPatterns, " ") - config := fmt.Sprintf("Host %s\n", hostLine) - config += fmt.Sprintf(" Match exec \"%s ssh detect %%h %%p\"\n", execPath) - config += " PreferredAuthentications password,publickey,keyboard-interactive\n" - config += " PasswordAuthentication yes\n" - config += " PubkeyAuthentication yes\n" - config += " BatchMode no\n" - config += fmt.Sprintf(" ProxyCommand %s ssh proxy %%h %%p\n", execPath) - config += " StrictHostKeyChecking no\n" + hostList := strings.Join(deduplicatedPatterns, ",") + config := fmt.Sprintf("Match host \"%s\" exec \"%s ssh detect %%h %%p\"\n", hostList, execPath) + config += " PreferredAuthentications password,publickey,keyboard-interactive\n" + config += " PasswordAuthentication yes\n" + config += " PubkeyAuthentication yes\n" + config += " BatchMode no\n" + config += fmt.Sprintf(" ProxyCommand %s ssh proxy %%h %%p\n", execPath) + config += " StrictHostKeyChecking no\n" if runtime.GOOS == "windows" { - config += " UserKnownHostsFile NUL\n" + config += " UserKnownHostsFile NUL\n" } else { - config += " UserKnownHostsFile /dev/null\n" + config += " UserKnownHostsFile /dev/null\n" } - config += " CheckHostIP no\n" - config += " LogLevel ERROR\n\n" + config += " CheckHostIP no\n" + config += " LogLevel ERROR\n\n" return config, nil } diff --git a/client/ssh/config/manager_test.go b/client/ssh/config/manager_test.go index dc3ad95b3..e7380c7f2 100644 --- a/client/ssh/config/manager_test.go +++ b/client/ssh/config/manager_test.go @@ -116,6 +116,37 @@ func TestManager_PeerLimit(t *testing.T) { assert.True(t, os.IsNotExist(err), "SSH config should not be created with too many peers") } +func TestManager_MatchHostFormat(t *testing.T) { + tempDir, err := os.MkdirTemp("", "netbird-ssh-config-test") + require.NoError(t, err) + defer func() { assert.NoError(t, os.RemoveAll(tempDir)) }() + + manager := &Manager{ + sshConfigDir: filepath.Join(tempDir, "ssh_config.d"), + sshConfigFile: "99-netbird.conf", + } + + peers := []PeerSSHInfo{ + {Hostname: "peer1", IP: "100.125.1.1", FQDN: "peer1.nb.internal"}, + {Hostname: "peer2", IP: "100.125.1.2", FQDN: "peer2.nb.internal"}, + } + + err = manager.SetupSSHClientConfig(peers) + require.NoError(t, err) + + configPath := filepath.Join(manager.sshConfigDir, manager.sshConfigFile) + content, err := os.ReadFile(configPath) + require.NoError(t, err) + configStr := string(content) + + // Must use "Match host" with comma-separated patterns, not a bare "Host" directive. + // A bare "Host" followed by "Match exec" is incorrect per ssh_config(5): the Host block + // ends at the next Match keyword, making it a no-op and leaving the Match exec unscoped. + assert.NotContains(t, configStr, "\nHost ", "should not use bare Host directive") + assert.Contains(t, configStr, "Match host \"100.125.1.1,peer1.nb.internal,peer1,100.125.1.2,peer2.nb.internal,peer2\"", + "should use Match host with comma-separated patterns") +} + func TestManager_ForcedSSHConfig(t *testing.T) { // Set force environment variable t.Setenv(EnvForceSSHConfig, "true") From e3611265159bd372f77ca404d32c86127e036525 Mon Sep 17 00:00:00 2001 From: Michael Uray <25169478+MichaelUray@users.noreply.github.com> Date: Mon, 20 Apr 2026 10:36:19 +0200 Subject: [PATCH 03/25] [client] Fix WGIface.Close deadlock when DNS filter hook re-enters GetDevice (#5916) WGIface.Close() took w.mu and held it across w.tun.Close(). The underlying wireguard-go device waits for its send/receive goroutines to drain before Close() returns, and some of those goroutines re-enter WGIface during shutdown. In particular, the userspace packet filter DNS hook in client/internal/dns.ServiceViaMemory.filterDNSTraffic calls s.wgInterface.GetDevice() on every packet, which also needs w.mu. With the Close-side holding the mutex, the read goroutine blocks in GetDevice and Close waits forever for that goroutine to exit: goroutine N (TestDNSPermanent_updateUpstream): WGIface.Close -> holds w.mu -> tun.Close -> sync.WaitGroup.Wait goroutine M (wireguard read routine): FilteredDevice.Read -> filterOutbound -> udpHooksDrop -> filterDNSTraffic.func1 -> WGIface.GetDevice -> sync.Mutex.Lock This surfaces as a 5 minute test timeout on the macOS Client/Unit CI job (panic: test timed out after 5m0s, running tests: TestDNSPermanent_updateUpstream). Release w.mu before calling w.tun.Close(). The other Close steps (wgProxyFactory.Free, waitUntilRemoved, Destroy) do not mutate any fields guarded by w.mu beyond what Free() already does, so the lock is not needed once the tun has started shutting down. A new unit test in iface_close_test.go uses a fake WGTunDevice to reproduce the deadlock deterministically without requiring CAP_NET_ADMIN. --- client/iface/iface.go | 11 ++- client/iface/iface_close_test.go | 113 +++++++++++++++++++++++++++++++ 2 files changed, 122 insertions(+), 2 deletions(-) create mode 100644 client/iface/iface_close_test.go diff --git a/client/iface/iface.go b/client/iface/iface.go index 9b331d68c..655dd1682 100644 --- a/client/iface/iface.go +++ b/client/iface/iface.go @@ -217,7 +217,6 @@ func (w *WGIface) RemoveAllowedIP(peerKey string, allowedIP netip.Prefix) error // Close closes the tunnel interface func (w *WGIface) Close() error { w.mu.Lock() - defer w.mu.Unlock() var result *multierror.Error @@ -225,7 +224,15 @@ func (w *WGIface) Close() error { result = multierror.Append(result, fmt.Errorf("failed to free WireGuard proxy: %w", err)) } - if err := w.tun.Close(); err != nil { + // Release w.mu before calling w.tun.Close(): the underlying + // wireguard-go device.Close() waits for its send/receive goroutines + // to drain. Some of those goroutines re-enter WGIface methods that + // take w.mu (e.g. the packet filter DNS hook calls GetDevice()), so + // holding the mutex here would deadlock the shutdown path. + tun := w.tun + w.mu.Unlock() + + if err := tun.Close(); err != nil { result = multierror.Append(result, fmt.Errorf("failed to close wireguard interface %s: %w", w.Name(), err)) } diff --git a/client/iface/iface_close_test.go b/client/iface/iface_close_test.go new file mode 100644 index 000000000..171e15d0a --- /dev/null +++ b/client/iface/iface_close_test.go @@ -0,0 +1,113 @@ +//go:build !android + +package iface + +import ( + "errors" + "sync" + "testing" + "time" + + wgdevice "golang.zx2c4.com/wireguard/device" + "golang.zx2c4.com/wireguard/tun/netstack" + + "github.com/netbirdio/netbird/client/iface/device" + "github.com/netbirdio/netbird/client/iface/udpmux" + "github.com/netbirdio/netbird/client/iface/wgaddr" + "github.com/netbirdio/netbird/client/iface/wgproxy" +) + +// fakeTunDevice implements WGTunDevice and lets the test control when +// Close() returns. It mimics the wireguard-go shutdown path, which blocks +// until its goroutines drain. Some of those goroutines (e.g. the packet +// filter DNS hook in client/internal/dns) call back into WGIface, so if +// WGIface.Close() held w.mu across tun.Close() the shutdown would +// deadlock. +type fakeTunDevice struct { + closeStarted chan struct{} + unblockClose chan struct{} +} + +func (f *fakeTunDevice) Create() (device.WGConfigurer, error) { + return nil, errors.New("not implemented") +} +func (f *fakeTunDevice) Up() (*udpmux.UniversalUDPMuxDefault, error) { + return nil, errors.New("not implemented") +} +func (f *fakeTunDevice) UpdateAddr(wgaddr.Address) error { return nil } +func (f *fakeTunDevice) WgAddress() wgaddr.Address { return wgaddr.Address{} } +func (f *fakeTunDevice) MTU() uint16 { return DefaultMTU } +func (f *fakeTunDevice) DeviceName() string { return "nb-close-test" } +func (f *fakeTunDevice) FilteredDevice() *device.FilteredDevice { return nil } +func (f *fakeTunDevice) Device() *wgdevice.Device { return nil } +func (f *fakeTunDevice) GetNet() *netstack.Net { return nil } +func (f *fakeTunDevice) GetICEBind() device.EndpointManager { return nil } + +func (f *fakeTunDevice) Close() error { + close(f.closeStarted) + <-f.unblockClose + return nil +} + +type fakeProxyFactory struct{} + +func (fakeProxyFactory) GetProxy() wgproxy.Proxy { return nil } +func (fakeProxyFactory) GetProxyPort() uint16 { return 0 } +func (fakeProxyFactory) Free() error { return nil } + +// TestWGIface_CloseReleasesMutexBeforeTunClose guards against a deadlock +// that surfaces as a macOS test-timeout in +// TestDNSPermanent_updateUpstream: WGIface.Close() used to hold w.mu +// while waiting for the wireguard-go device goroutines to finish, and +// one of those goroutines (the DNS filter hook) calls back into +// WGIface.GetDevice() which needs the same mutex. The fix is to drop +// the lock before tun.Close() returns control. +func TestWGIface_CloseReleasesMutexBeforeTunClose(t *testing.T) { + tun := &fakeTunDevice{ + closeStarted: make(chan struct{}), + unblockClose: make(chan struct{}), + } + w := &WGIface{ + tun: tun, + wgProxyFactory: fakeProxyFactory{}, + } + + closeDone := make(chan error, 1) + go func() { + closeDone <- w.Close() + }() + + select { + case <-tun.closeStarted: + case <-time.After(2 * time.Second): + close(tun.unblockClose) + t.Fatal("tun.Close() was never invoked") + } + + // Simulate the WireGuard read goroutine calling back into WGIface + // via the packet filter's DNS hook. If Close() still held w.mu + // during tun.Close(), this would block until the test timeout. + getDeviceDone := make(chan struct{}) + var wg sync.WaitGroup + wg.Add(1) + go func() { + defer wg.Done() + _ = w.GetDevice() + close(getDeviceDone) + }() + + select { + case <-getDeviceDone: + case <-time.After(2 * time.Second): + close(tun.unblockClose) + wg.Wait() + t.Fatal("GetDevice() deadlocked while WGIface.Close was closing the tun") + } + + close(tun.unblockClose) + select { + case <-closeDone: + case <-time.After(2 * time.Second): + t.Fatal("WGIface.Close() never returned after the tun was unblocked") + } +} From 7f023ce8014084385e677e67c541677336e25431 Mon Sep 17 00:00:00 2001 From: Zoltan Papp Date: Mon, 20 Apr 2026 11:26:30 +0200 Subject: [PATCH 04/25] [client] Android debug bundle support (#5888) Add Android debug bundle support with Troubleshoot UI --- Makefile | 2 +- client/android/client.go | 117 ++++++++++++++++++++-- client/android/platform_files.go | 1 + client/internal/connect.go | 3 + client/internal/debug/debug.go | 18 ++-- client/internal/debug/debug_android.go | 41 ++++++++ client/internal/debug/debug_nonandroid.go | 25 +++++ client/internal/engine.go | 2 + client/internal/mobile_dependency.go | 4 + 9 files changed, 191 insertions(+), 22 deletions(-) create mode 100644 client/internal/debug/debug_android.go create mode 100644 client/internal/debug/debug_nonandroid.go diff --git a/Makefile b/Makefile index 43379e115..5d52b94fa 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ GOLANGCI_LINT := $(shell pwd)/bin/golangci-lint $(GOLANGCI_LINT): @echo "Installing golangci-lint..." @mkdir -p ./bin - @GOBIN=$(shell pwd)/bin go install github.com/golangci/golangci-lint/cmd/golangci-lint@latest + @GOBIN=$(shell pwd)/bin go install github.com/golangci/golangci-lint/v2/cmd/golangci-lint@latest # Lint only changed files (fast, for pre-push) lint: $(GOLANGCI_LINT) diff --git a/client/android/client.go b/client/android/client.go index d35bf4279..37e17a363 100644 --- a/client/android/client.go +++ b/client/android/client.go @@ -8,6 +8,7 @@ import ( "os" "slices" "sync" + "time" "golang.org/x/exp/maps" @@ -15,6 +16,7 @@ import ( "github.com/netbirdio/netbird/client/iface/device" "github.com/netbirdio/netbird/client/internal" + "github.com/netbirdio/netbird/client/internal/debug" "github.com/netbirdio/netbird/client/internal/dns" "github.com/netbirdio/netbird/client/internal/listener" "github.com/netbirdio/netbird/client/internal/peer" @@ -26,6 +28,7 @@ import ( "github.com/netbirdio/netbird/formatter" "github.com/netbirdio/netbird/route" "github.com/netbirdio/netbird/shared/management/domain" + types "github.com/netbirdio/netbird/upload-server/types" ) // ConnectionListener export internal Listener for mobile @@ -68,7 +71,30 @@ type Client struct { uiVersion string networkChangeListener listener.NetworkChangeListener + stateMu sync.RWMutex connectClient *internal.ConnectClient + config *profilemanager.Config + cacheDir string +} + +func (c *Client) setState(cfg *profilemanager.Config, cacheDir string, cc *internal.ConnectClient) { + c.stateMu.Lock() + defer c.stateMu.Unlock() + c.config = cfg + c.cacheDir = cacheDir + c.connectClient = cc +} + +func (c *Client) stateSnapshot() (*profilemanager.Config, string, *internal.ConnectClient) { + c.stateMu.RLock() + defer c.stateMu.RUnlock() + return c.config, c.cacheDir, c.connectClient +} + +func (c *Client) getConnectClient() *internal.ConnectClient { + c.stateMu.RLock() + defer c.stateMu.RUnlock() + return c.connectClient } // NewClient instantiate a new Client @@ -93,6 +119,7 @@ func (c *Client) Run(platformFiles PlatformFiles, urlOpener URLOpener, isAndroid cfgFile := platformFiles.ConfigurationFilePath() stateFile := platformFiles.StateFilePath() + cacheDir := platformFiles.CacheDir() log.Infof("Starting client with config: %s, state: %s", cfgFile, stateFile) @@ -124,8 +151,9 @@ func (c *Client) Run(platformFiles PlatformFiles, urlOpener URLOpener, isAndroid // todo do not throw error in case of cancelled context ctx = internal.CtxInitState(ctx) - c.connectClient = internal.NewConnectClient(ctx, cfg, c.recorder) - return c.connectClient.RunOnAndroid(c.tunAdapter, c.iFaceDiscover, c.networkChangeListener, slices.Clone(dns.items), dnsReadyListener, stateFile) + connectClient := internal.NewConnectClient(ctx, cfg, c.recorder) + c.setState(cfg, cacheDir, connectClient) + return connectClient.RunOnAndroid(c.tunAdapter, c.iFaceDiscover, c.networkChangeListener, slices.Clone(dns.items), dnsReadyListener, stateFile, cacheDir) } // RunWithoutLogin we apply this type of run function when the backed has been started without UI (i.e. after reboot). @@ -135,6 +163,7 @@ func (c *Client) RunWithoutLogin(platformFiles PlatformFiles, dns *DNSList, dnsR cfgFile := platformFiles.ConfigurationFilePath() stateFile := platformFiles.StateFilePath() + cacheDir := platformFiles.CacheDir() log.Infof("Starting client without login with config: %s, state: %s", cfgFile, stateFile) @@ -157,8 +186,9 @@ func (c *Client) RunWithoutLogin(platformFiles PlatformFiles, dns *DNSList, dnsR // todo do not throw error in case of cancelled context ctx = internal.CtxInitState(ctx) - c.connectClient = internal.NewConnectClient(ctx, cfg, c.recorder) - return c.connectClient.RunOnAndroid(c.tunAdapter, c.iFaceDiscover, c.networkChangeListener, slices.Clone(dns.items), dnsReadyListener, stateFile) + connectClient := internal.NewConnectClient(ctx, cfg, c.recorder) + c.setState(cfg, cacheDir, connectClient) + return connectClient.RunOnAndroid(c.tunAdapter, c.iFaceDiscover, c.networkChangeListener, slices.Clone(dns.items), dnsReadyListener, stateFile, cacheDir) } // Stop the internal client and free the resources @@ -173,11 +203,12 @@ func (c *Client) Stop() { } func (c *Client) RenewTun(fd int) error { - if c.connectClient == nil { + cc := c.getConnectClient() + if cc == nil { return fmt.Errorf("engine not running") } - e := c.connectClient.Engine() + e := cc.Engine() if e == nil { return fmt.Errorf("engine not initialized") } @@ -185,6 +216,73 @@ func (c *Client) RenewTun(fd int) error { return e.RenewTun(fd) } +// DebugBundle generates a debug bundle, uploads it, and returns the upload key. +// It works both with and without a running engine. +func (c *Client) DebugBundle(platformFiles PlatformFiles, anonymize bool) (string, error) { + cfg, cacheDir, cc := c.stateSnapshot() + + // If the engine hasn't been started, load config from disk + if cfg == nil { + var err error + cfg, err = profilemanager.UpdateOrCreateConfig(profilemanager.ConfigInput{ + ConfigPath: platformFiles.ConfigurationFilePath(), + }) + if err != nil { + return "", fmt.Errorf("load config: %w", err) + } + cacheDir = platformFiles.CacheDir() + } + + deps := debug.GeneratorDependencies{ + InternalConfig: cfg, + StatusRecorder: c.recorder, + TempDir: cacheDir, + } + + if cc != nil { + resp, err := cc.GetLatestSyncResponse() + if err != nil { + log.Warnf("get latest sync response: %v", err) + } + deps.SyncResponse = resp + + if e := cc.Engine(); e != nil { + if cm := e.GetClientMetrics(); cm != nil { + deps.ClientMetrics = cm + } + } + } + + bundleGenerator := debug.NewBundleGenerator( + deps, + debug.BundleConfig{ + Anonymize: anonymize, + IncludeSystemInfo: true, + }, + ) + + path, err := bundleGenerator.Generate() + if err != nil { + return "", fmt.Errorf("generate debug bundle: %w", err) + } + defer func() { + if err := os.Remove(path); err != nil { + log.Errorf("failed to remove debug bundle file: %v", err) + } + }() + + uploadCtx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) + defer cancel() + + key, err := debug.UploadDebugBundle(uploadCtx, types.DefaultBundleURL, cfg.ManagementURL.String(), path) + if err != nil { + return "", fmt.Errorf("upload debug bundle: %w", err) + } + + log.Infof("debug bundle uploaded with key %s", key) + return key, nil +} + // SetTraceLogLevel configure the logger to trace level func (c *Client) SetTraceLogLevel() { log.SetLevel(log.TraceLevel) @@ -214,12 +312,13 @@ func (c *Client) PeersList() *PeerInfoArray { } func (c *Client) Networks() *NetworkArray { - if c.connectClient == nil { + cc := c.getConnectClient() + if cc == nil { log.Error("not connected") return nil } - engine := c.connectClient.Engine() + engine := cc.Engine() if engine == nil { log.Error("could not get engine") return nil @@ -300,7 +399,7 @@ func (c *Client) toggleRoute(command routeCommand) error { } func (c *Client) getRouteManager() (routemanager.Manager, error) { - client := c.connectClient + client := c.getConnectClient() if client == nil { return nil, fmt.Errorf("not connected") } diff --git a/client/android/platform_files.go b/client/android/platform_files.go index f0c369750..3be40c0bd 100644 --- a/client/android/platform_files.go +++ b/client/android/platform_files.go @@ -7,4 +7,5 @@ package android type PlatformFiles interface { ConfigurationFilePath() string StateFilePath() string + CacheDir() string } diff --git a/client/internal/connect.go b/client/internal/connect.go index bc2bd84d9..ac498f719 100644 --- a/client/internal/connect.go +++ b/client/internal/connect.go @@ -94,6 +94,7 @@ func (c *ConnectClient) RunOnAndroid( dnsAddresses []netip.AddrPort, dnsReadyListener dns.ReadyListener, stateFilePath string, + cacheDir string, ) error { // in case of non Android os these variables will be nil mobileDependency := MobileDependency{ @@ -103,6 +104,7 @@ func (c *ConnectClient) RunOnAndroid( HostDNSAddresses: dnsAddresses, DnsReadyListener: dnsReadyListener, StateFilePath: stateFilePath, + TempDir: cacheDir, } return c.run(mobileDependency, nil, "") } @@ -338,6 +340,7 @@ func (c *ConnectClient) run(mobileDependency MobileDependency, runningChan chan log.Error(err) return wrapErr(err) } + engineConfig.TempDir = mobileDependency.TempDir relayManager := relayClient.NewManager(engineCtx, relayURLs, myPrivateKey.PublicKey().String(), engineConfig.MTU) c.statusRecorder.SetRelayMgr(relayManager) diff --git a/client/internal/debug/debug.go b/client/internal/debug/debug.go index 6a8eae324..bddb9a69e 100644 --- a/client/internal/debug/debug.go +++ b/client/internal/debug/debug.go @@ -16,7 +16,6 @@ import ( "path/filepath" "runtime" "runtime/pprof" - "slices" "sort" "strings" "time" @@ -31,7 +30,6 @@ import ( "github.com/netbirdio/netbird/client/internal/updater/installer" nbstatus "github.com/netbirdio/netbird/client/status" mgmProto "github.com/netbirdio/netbird/shared/management/proto" - "github.com/netbirdio/netbird/util" ) const readmeContent = `Netbird debug bundle @@ -234,6 +232,7 @@ type BundleGenerator struct { statusRecorder *peer.Status syncResponse *mgmProto.SyncResponse logPath string + tempDir string cpuProfile []byte refreshStatus func() // Optional callback to refresh status before bundle generation clientMetrics MetricsExporter @@ -256,6 +255,7 @@ type GeneratorDependencies struct { StatusRecorder *peer.Status SyncResponse *mgmProto.SyncResponse LogPath string + TempDir string // Directory for temporary bundle zip files. If empty, os.TempDir() is used. CPUProfile []byte RefreshStatus func() // Optional callback to refresh status before bundle generation ClientMetrics MetricsExporter @@ -275,6 +275,7 @@ func NewBundleGenerator(deps GeneratorDependencies, cfg BundleConfig) *BundleGen statusRecorder: deps.StatusRecorder, syncResponse: deps.SyncResponse, logPath: deps.LogPath, + tempDir: deps.TempDir, cpuProfile: deps.CPUProfile, refreshStatus: deps.RefreshStatus, clientMetrics: deps.ClientMetrics, @@ -287,7 +288,7 @@ func NewBundleGenerator(deps GeneratorDependencies, cfg BundleConfig) *BundleGen // Generate creates a debug bundle and returns the location. func (g *BundleGenerator) Generate() (resp string, err error) { - bundlePath, err := os.CreateTemp("", "netbird.debug.*.zip") + bundlePath, err := os.CreateTemp(g.tempDir, "netbird.debug.*.zip") if err != nil { return "", fmt.Errorf("create zip file: %w", err) } @@ -373,15 +374,8 @@ func (g *BundleGenerator) createArchive() error { log.Errorf("failed to add wg show output: %v", err) } - if g.logPath != "" && !slices.Contains(util.SpecialLogs, g.logPath) { - if err := g.addLogfile(); err != nil { - log.Errorf("failed to add log file to debug bundle: %v", err) - if err := g.trySystemdLogFallback(); err != nil { - log.Errorf("failed to add systemd logs as fallback: %v", err) - } - } - } else if err := g.trySystemdLogFallback(); err != nil { - log.Errorf("failed to add systemd logs: %v", err) + if err := g.addPlatformLog(); err != nil { + log.Errorf("failed to add logs to debug bundle: %v", err) } if err := g.addUpdateLogs(); err != nil { diff --git a/client/internal/debug/debug_android.go b/client/internal/debug/debug_android.go new file mode 100644 index 000000000..a4e2b3e98 --- /dev/null +++ b/client/internal/debug/debug_android.go @@ -0,0 +1,41 @@ +//go:build android + +package debug + +import ( + "fmt" + "io" + "os/exec" + + log "github.com/sirupsen/logrus" +) + +func (g *BundleGenerator) addPlatformLog() error { + cmd := exec.Command("/system/bin/logcat", "-d") + stdout, err := cmd.StdoutPipe() + if err != nil { + return fmt.Errorf("logcat stdout pipe: %w", err) + } + + if err := cmd.Start(); err != nil { + return fmt.Errorf("start logcat: %w", err) + } + + var logReader io.Reader = stdout + if g.anonymize { + var pw *io.PipeWriter + logReader, pw = io.Pipe() + go anonymizeLog(stdout, pw, g.anonymizer) + } + + if err := g.addFileToZip(logReader, "logcat.txt"); err != nil { + return fmt.Errorf("add logcat to zip: %w", err) + } + + if err := cmd.Wait(); err != nil { + return fmt.Errorf("wait logcat: %w", err) + } + + log.Debug("added logcat output to debug bundle") + return nil +} diff --git a/client/internal/debug/debug_nonandroid.go b/client/internal/debug/debug_nonandroid.go new file mode 100644 index 000000000..117238dec --- /dev/null +++ b/client/internal/debug/debug_nonandroid.go @@ -0,0 +1,25 @@ +//go:build !android + +package debug + +import ( + "slices" + + log "github.com/sirupsen/logrus" + + "github.com/netbirdio/netbird/util" +) + +func (g *BundleGenerator) addPlatformLog() error { + if g.logPath != "" && !slices.Contains(util.SpecialLogs, g.logPath) { + if err := g.addLogfile(); err != nil { + log.Errorf("failed to add log file to debug bundle: %v", err) + if err := g.trySystemdLogFallback(); err != nil { + return err + } + } + } else if err := g.trySystemdLogFallback(); err != nil { + return err + } + return nil +} diff --git a/client/internal/engine.go b/client/internal/engine.go index be2d8bbf3..b49e02c6d 100644 --- a/client/internal/engine.go +++ b/client/internal/engine.go @@ -140,6 +140,7 @@ type EngineConfig struct { ProfileConfig *profilemanager.Config LogPath string + TempDir string } // EngineServices holds the external service dependencies required by the Engine. @@ -1095,6 +1096,7 @@ func (e *Engine) handleBundle(params *mgmProto.BundleParameters) (*mgmProto.JobR StatusRecorder: e.statusRecorder, SyncResponse: syncResponse, LogPath: e.config.LogPath, + TempDir: e.config.TempDir, ClientMetrics: e.clientMetrics, RefreshStatus: func() { e.RunHealthProbes(true) diff --git a/client/internal/mobile_dependency.go b/client/internal/mobile_dependency.go index 7c95e2b99..310d61a25 100644 --- a/client/internal/mobile_dependency.go +++ b/client/internal/mobile_dependency.go @@ -22,4 +22,8 @@ type MobileDependency struct { DnsManager dns.IosDnsManager FileDescriptor int32 StateFilePath string + + // TempDir is a writable directory for temporary files (e.g., debug bundle zip). + // On Android, this should be set to the app's cache directory. + TempDir string } From 3098f48b25e2a613d8f70466243d992c498e0fd2 Mon Sep 17 00:00:00 2001 From: Zoltan Papp Date: Mon, 20 Apr 2026 11:49:38 +0200 Subject: [PATCH 05/25] [client] fix ios network addresses mac filter (#5906) * fix(client): skip MAC address filter for network addresses on iOS iOS does not expose hardware (MAC) addresses due to Apple's privacy restrictions (since iOS 14), causing networkAddresses() to return an empty list because all interfaces are filtered out by the HardwareAddr check. Move networkAddresses() to platform-specific files so iOS can skip this filter. --- client/system/info.go | 54 ---------------------------- client/system/info_ios.go | 62 ++++++++++++++++++++++++++++++++ client/system/network_addr.go | 66 +++++++++++++++++++++++++++++++++++ 3 files changed, 128 insertions(+), 54 deletions(-) create mode 100644 client/system/network_addr.go diff --git a/client/system/info.go b/client/system/info.go index f2546cfe6..175d1f07f 100644 --- a/client/system/info.go +++ b/client/system/info.go @@ -2,7 +2,6 @@ package system import ( "context" - "net" "net/netip" "strings" @@ -145,59 +144,6 @@ func extractDeviceName(ctx context.Context, defaultName string) string { return v } -func networkAddresses() ([]NetworkAddress, error) { - interfaces, err := net.Interfaces() - if err != nil { - return nil, err - } - - var netAddresses []NetworkAddress - for _, iface := range interfaces { - if iface.Flags&net.FlagUp == 0 { - continue - } - if iface.HardwareAddr.String() == "" { - continue - } - addrs, err := iface.Addrs() - if err != nil { - continue - } - - for _, address := range addrs { - ipNet, ok := address.(*net.IPNet) - if !ok { - continue - } - - if ipNet.IP.IsLoopback() { - continue - } - - netAddr := NetworkAddress{ - NetIP: netip.MustParsePrefix(ipNet.String()), - Mac: iface.HardwareAddr.String(), - } - - if isDuplicated(netAddresses, netAddr) { - continue - } - - netAddresses = append(netAddresses, netAddr) - } - } - return netAddresses, nil -} - -func isDuplicated(addresses []NetworkAddress, addr NetworkAddress) bool { - for _, duplicated := range addresses { - if duplicated.NetIP == addr.NetIP { - return true - } - } - return false -} - // GetInfoWithChecks retrieves and parses the system information with applied checks. func GetInfoWithChecks(ctx context.Context, checks []*proto.Checks) (*Info, error) { log.Debugf("gathering system information with checks: %d", len(checks)) diff --git a/client/system/info_ios.go b/client/system/info_ios.go index 81936cf1d..ad42b1edf 100644 --- a/client/system/info_ios.go +++ b/client/system/info_ios.go @@ -2,6 +2,8 @@ package system import ( "context" + "net" + "net/netip" "runtime" log "github.com/sirupsen/logrus" @@ -42,6 +44,66 @@ func GetInfo(ctx context.Context) *Info { return gio } +// networkAddresses returns the list of network addresses on iOS. +// On iOS, hardware (MAC) addresses are not available due to Apple's privacy +// restrictions (iOS returns a fixed 02:00:00:00:00:00 placeholder), so we +// leave Mac empty to match Android's behavior. We also skip the HardwareAddr +// check that other platforms use and filter out link-local addresses as they +// are not useful for posture checks. +func networkAddresses() ([]NetworkAddress, error) { + interfaces, err := net.Interfaces() + if err != nil { + return nil, err + } + + var netAddresses []NetworkAddress + for _, iface := range interfaces { + if iface.Flags&net.FlagUp == 0 { + continue + } + addrs, err := iface.Addrs() + if err != nil { + continue + } + + for _, address := range addrs { + netAddr, ok := toNetworkAddress(address) + if !ok { + continue + } + if isDuplicated(netAddresses, netAddr) { + continue + } + netAddresses = append(netAddresses, netAddr) + } + } + return netAddresses, nil +} + +func toNetworkAddress(address net.Addr) (NetworkAddress, bool) { + ipNet, ok := address.(*net.IPNet) + if !ok { + return NetworkAddress{}, false + } + if ipNet.IP.IsLoopback() || ipNet.IP.IsLinkLocalUnicast() || ipNet.IP.IsMulticast() { + return NetworkAddress{}, false + } + prefix, err := netip.ParsePrefix(ipNet.String()) + if err != nil { + return NetworkAddress{}, false + } + return NetworkAddress{NetIP: prefix, Mac: ""}, true +} + +func isDuplicated(addresses []NetworkAddress, addr NetworkAddress) bool { + for _, duplicated := range addresses { + if duplicated.NetIP == addr.NetIP { + return true + } + } + return false +} + // checkFileAndProcess checks if the file path exists and if a process is running at that path. func checkFileAndProcess(paths []string) ([]File, error) { return []File{}, nil diff --git a/client/system/network_addr.go b/client/system/network_addr.go new file mode 100644 index 000000000..5423cf8ad --- /dev/null +++ b/client/system/network_addr.go @@ -0,0 +1,66 @@ +//go:build !ios + +package system + +import ( + "net" + "net/netip" +) + +func networkAddresses() ([]NetworkAddress, error) { + interfaces, err := net.Interfaces() + if err != nil { + return nil, err + } + + var netAddresses []NetworkAddress + for _, iface := range interfaces { + if iface.Flags&net.FlagUp == 0 { + continue + } + if iface.HardwareAddr.String() == "" { + continue + } + addrs, err := iface.Addrs() + if err != nil { + continue + } + + mac := iface.HardwareAddr.String() + for _, address := range addrs { + netAddr, ok := toNetworkAddress(address, mac) + if !ok { + continue + } + if isDuplicated(netAddresses, netAddr) { + continue + } + netAddresses = append(netAddresses, netAddr) + } + } + return netAddresses, nil +} + +func toNetworkAddress(address net.Addr, mac string) (NetworkAddress, bool) { + ipNet, ok := address.(*net.IPNet) + if !ok { + return NetworkAddress{}, false + } + if ipNet.IP.IsLoopback() { + return NetworkAddress{}, false + } + prefix, err := netip.ParsePrefix(ipNet.String()) + if err != nil { + return NetworkAddress{}, false + } + return NetworkAddress{NetIP: prefix, Mac: mac}, true +} + +func isDuplicated(addresses []NetworkAddress, addr NetworkAddress) bool { + for _, duplicated := range addresses { + if duplicated.NetIP == addr.NetIP { + return true + } + } + return false +} From 45d9ee52c00ff48cfa9c5a6d7919eb187d508659 Mon Sep 17 00:00:00 2001 From: Misha Bragin Date: Tue, 21 Apr 2026 10:21:11 +0200 Subject: [PATCH 06/25] [self-hosted] add reverse proxy retention fields to combined YAML (#5930) --- combined/config.yaml.example | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/combined/config.yaml.example b/combined/config.yaml.example index dce658d89..af85b0477 100644 --- a/combined/config.yaml.example +++ b/combined/config.yaml.example @@ -119,6 +119,8 @@ server: # Reverse proxy settings (optional) # reverseProxy: - # trustedHTTPProxies: [] - # trustedHTTPProxiesCount: 0 - # trustedPeers: [] + # trustedHTTPProxies: [] # CIDRs of trusted reverse proxies (e.g. ["10.0.0.0/8"]) + # trustedHTTPProxiesCount: 0 # Number of trusted proxies in front of the server (alternative to trustedHTTPProxies) + # trustedPeers: [] # CIDRs of trusted peer networks (e.g. ["100.64.0.0/10"]) + # accessLogRetentionDays: 7 # Days to retain HTTP access logs. 0 (or unset) defaults to 7. Negative values disable cleanup (logs kept indefinitely). + # accessLogCleanupIntervalHours: 24 # How often (in hours) to run the access-log cleanup job. 0 (or unset) is treated as "not set" and defaults to 24 hours; cleanup remains enabled. To disable cleanup, set accessLogRetentionDays to a negative value. From 06dfa9d4a5c7035b3abb2e1407c5636d0f0115db Mon Sep 17 00:00:00 2001 From: Misha Bragin Date: Tue, 21 Apr 2026 13:59:35 +0200 Subject: [PATCH 07/25] [management] replace mailru/easyjson with netbirdio/easyjson fork (#5938) --- go.mod | 2 ++ go.sum | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/go.mod b/go.mod index 5172b1a78..1b5861a37 100644 --- a/go.mod +++ b/go.mod @@ -323,3 +323,5 @@ replace github.com/pion/ice/v4 => github.com/netbirdio/ice/v4 v4.0.0-20250908184 replace github.com/libp2p/go-netroute => github.com/netbirdio/go-netroute v0.0.0-20240611143515-f59b0e1d3944 replace github.com/dexidp/dex => github.com/netbirdio/dex v0.244.0 + +replace github.com/mailru/easyjson => github.com/netbirdio/easyjson v0.9.0 diff --git a/go.sum b/go.sum index 9293ce73b..3772946e1 100644 --- a/go.sum +++ b/go.sum @@ -400,8 +400,6 @@ github.com/lufia/plan9stats v0.0.0-20240513124658-fba389f38bae h1:dIZY4ULFcto4tA github.com/lufia/plan9stats v0.0.0-20240513124658-fba389f38bae/go.mod h1:ilwx/Dta8jXAgpFYFvSWEMwxmbWXyiUHkd5FwyKhb5k= github.com/magiconair/properties v1.8.10 h1:s31yESBquKXCV9a/ScB3ESkOjUYYv+X0rg8SYxI99mE= github.com/magiconair/properties v1.8.10/go.mod h1:Dhd985XPs7jluiymwWYZ0G4Z61jb3vdS329zhj2hYo0= -github.com/mailru/easyjson v0.9.0 h1:PrnmzHw7262yW8sTBwxi1PdJA3Iw/EKBa8psRf7d9a4= -github.com/mailru/easyjson v0.9.0/go.mod h1:1+xMtQp2MRNVL/V1bOzuP3aP8VNwRW55fQUto+XFtTU= github.com/mattermost/xml-roundtrip-validator v0.1.0 h1:RXbVD2UAl7A7nOTR4u7E3ILa4IbtvKBHw64LDsmu9hU= github.com/mattermost/xml-roundtrip-validator v0.1.0/go.mod h1:qccnGMcpgwcNaBnxqpJpWWUiPNr5H3O8eDgGV9gT5To= github.com/mattn/go-isatty v0.0.9/go.mod h1:YNRxwqDuOph6SZLI9vUUz6OYw3QyUt7WiY2yME+cCiQ= @@ -449,6 +447,8 @@ github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/netbirdio/dex v0.244.0 h1:1GOvi8wnXYassnKGildzNqRHq0RbcfEUw7LKYpKIN7U= github.com/netbirdio/dex v0.244.0/go.mod h1:STGInJhPcAflrHmDO7vyit2kSq03PdL+8zQPoGALtcU= +github.com/netbirdio/easyjson v0.9.0 h1:6Nw2lghSVuy8RSkAYDhDv1thBVEmfVbKZnV7T7Z6Aus= +github.com/netbirdio/easyjson v0.9.0/go.mod h1:1+xMtQp2MRNVL/V1bOzuP3aP8VNwRW55fQUto+XFtTU= github.com/netbirdio/go-netroute v0.0.0-20240611143515-f59b0e1d3944 h1:TDtJKmM6Sf8uYFx/dMeqNOL90KUoRscdfpFZ3Im89uk= github.com/netbirdio/go-netroute v0.0.0-20240611143515-f59b0e1d3944/go.mod h1:sHA6TRxjQ6RLbnI+3R4DZo2Eseg/iKiPRfNmcuNySVQ= github.com/netbirdio/ice/v4 v4.0.0-20250908184934-6202be846b51 h1:Ov4qdafATOgGMB1wbSuh+0aAHcwz9hdvB6VZjh1mVMI= From 5a89e6621bf41cb6cb0da3040ffe68ae790e3c02 Mon Sep 17 00:00:00 2001 From: Zoltan Papp Date: Tue, 21 Apr 2026 15:52:08 +0200 Subject: [PATCH 08/25] [client] Supress ICE signaling (#5820) * [client] Suppress ICE signaling and periodic offers in force-relay mode When NB_FORCE_RELAY is enabled, skip WorkerICE creation entirely, suppress ICE credentials in offer/answer messages, disable the periodic ICE candidate monitor, and fix isConnectedOnAllWay to only check relay status so the guard stops sending unnecessary offers. * [client] Dynamically suppress ICE based on remote peer's offer credentials Track whether the remote peer includes ICE credentials in its offers/answers. When remote stops sending ICE credentials, skip ICE listener dispatch, suppress ICE credentials in responses, and exclude ICE from the guard connectivity check. When remote resumes sending ICE credentials, re-enable all ICE behavior. * [client] Fix nil SessionID panic and force ICE teardown on relay-only transition Fix nil pointer dereference in signalOfferAnswer when SessionID is nil (relay-only offers). Close stale ICE agent immediately when remote peer stops sending ICE credentials to avoid traffic black-hole during the ICE disconnect timeout. * [client] Add relay-only fallback check when ICE is unavailable Ensure the relay connection is supported with the peer when ICE is disabled to prevent connectivity issues. * [client] Add tri-state connection status to guard for smarter ICE retry (#5828) * [client] Add tri-state connection status to guard for smarter ICE retry Refactor isConnectedOnAllWay to return a ConnStatus enum (Connected, Disconnected, PartiallyConnected) instead of a boolean. When relay is up but ICE is not (PartiallyConnected), limit ICE offers to 3 retries with exponential backoff then fall back to hourly attempts, reducing unnecessary signaling traffic. Fully disconnected peers continue to retry aggressively. External events (relay/ICE disconnect, signal/relay reconnect) reset retry state to give ICE a fresh chance. * [client] Clarify guard ICE retry state and trace log trigger Split iceRetryState.attempt into shouldRetry (pure predicate) and enterHourlyMode (explicit state transition) so the caller in reconnectLoopWithRetry reads top-to-bottom. Restore the original trace-log behavior in isConnectedOnAllWay so it only logs on full disconnection, not on the new PartiallyConnected state. * [client] Extract pure evalConnStatus and add unit tests Split isConnectedOnAllWay into a thin method that snapshots state and a pure evalConnStatus helper that takes a connStatusInputs struct, so the tri-state decision logic can be exercised without constructing full Worker or Handshaker objects. Add table-driven tests covering force-relay, ICE-unavailable and fully-available code paths, plus unit tests for iceRetryState budget/hourly transitions and reset. * [client] Improve grammar in logs and refactor ICE credential checks --- client/internal/engine.go | 2 +- client/internal/peer/conn.go | 105 ++++++--- client/internal/peer/conn_status.go | 14 ++ client/internal/peer/conn_status_eval_test.go | 201 ++++++++++++++++++ client/internal/peer/env.go | 2 +- client/internal/peer/guard/guard.go | 68 ++++-- client/internal/peer/guard/ice_retry_state.go | 61 ++++++ .../peer/guard/ice_retry_state_test.go | 103 +++++++++ client/internal/peer/guard/sr_watcher.go | 8 +- client/internal/peer/handshaker.go | 56 ++++- client/internal/peer/signaler.go | 10 +- 11 files changed, 567 insertions(+), 63 deletions(-) create mode 100644 client/internal/peer/conn_status_eval_test.go create mode 100644 client/internal/peer/guard/ice_retry_state.go create mode 100644 client/internal/peer/guard/ice_retry_state_test.go diff --git a/client/internal/engine.go b/client/internal/engine.go index b49e02c6d..09d80a87d 100644 --- a/client/internal/engine.go +++ b/client/internal/engine.go @@ -570,7 +570,7 @@ func (e *Engine) Start(netbirdConfig *mgmProto.NetbirdConfig, mgmtURL *url.URL) e.connMgr.Start(e.ctx) e.srWatcher = guard.NewSRWatcher(e.signal, e.relayManager, e.mobileDep.IFaceDiscover, iceCfg) - e.srWatcher.Start() + e.srWatcher.Start(peer.IsForceRelayed()) e.receiveSignalEvents() e.receiveManagementEvents() diff --git a/client/internal/peer/conn.go b/client/internal/peer/conn.go index 8d1585b3f..1e416bfe7 100644 --- a/client/internal/peer/conn.go +++ b/client/internal/peer/conn.go @@ -185,17 +185,20 @@ func (conn *Conn) Open(engineCtx context.Context) error { conn.workerRelay = NewWorkerRelay(conn.ctx, conn.Log, isController(conn.config), conn.config, conn, conn.relayManager) - relayIsSupportedLocally := conn.workerRelay.RelayIsSupportedLocally() - workerICE, err := NewWorkerICE(conn.ctx, conn.Log, conn.config, conn, conn.signaler, conn.iFaceDiscover, conn.statusRecorder, relayIsSupportedLocally) - if err != nil { - return err + forceRelay := IsForceRelayed() + if !forceRelay { + relayIsSupportedLocally := conn.workerRelay.RelayIsSupportedLocally() + workerICE, err := NewWorkerICE(conn.ctx, conn.Log, conn.config, conn, conn.signaler, conn.iFaceDiscover, conn.statusRecorder, relayIsSupportedLocally) + if err != nil { + return err + } + conn.workerICE = workerICE } - conn.workerICE = workerICE conn.handshaker = NewHandshaker(conn.Log, conn.config, conn.signaler, conn.workerICE, conn.workerRelay, conn.metricsStages) conn.handshaker.AddRelayListener(conn.workerRelay.OnNewOffer) - if !isForceRelayed() { + if !forceRelay { conn.handshaker.AddICEListener(conn.workerICE.OnNewOffer) } @@ -251,7 +254,9 @@ func (conn *Conn) Close(signalToRemote bool) { conn.wgWatcherCancel() } conn.workerRelay.CloseConn() - conn.workerICE.Close() + if conn.workerICE != nil { + conn.workerICE.Close() + } if conn.wgProxyRelay != nil { err := conn.wgProxyRelay.CloseConn() @@ -294,7 +299,9 @@ func (conn *Conn) OnRemoteAnswer(answer OfferAnswer) { // OnRemoteCandidate Handles ICE connection Candidate provided by the remote peer. func (conn *Conn) OnRemoteCandidate(candidate ice.Candidate, haRoutes route.HAMap) { conn.dumpState.RemoteCandidate() - conn.workerICE.OnRemoteCandidate(candidate, haRoutes) + if conn.workerICE != nil { + conn.workerICE.OnRemoteCandidate(candidate, haRoutes) + } } // SetOnConnected sets a handler function to be triggered by Conn when a new connection to a remote peer established @@ -712,33 +719,35 @@ func (conn *Conn) evalStatus() ConnStatus { return StatusConnecting } -func (conn *Conn) isConnectedOnAllWay() (connected bool) { - // would be better to protect this with a mutex, but it could cause deadlock with Close function - +// isConnectedOnAllWay evaluates the overall connection status based on ICE and Relay transports. +// +// The result is a tri-state: +// - ConnStatusConnected: all available transports are up +// - ConnStatusPartiallyConnected: relay is up but ICE is still pending/reconnecting +// - ConnStatusDisconnected: no working transport +func (conn *Conn) isConnectedOnAllWay() (status guard.ConnStatus) { defer func() { - if !connected { + if status == guard.ConnStatusDisconnected { conn.logTraceConnState() } }() - // For JS platform: only relay connection is supported - if runtime.GOOS == "js" { - return conn.statusRelay.Get() == worker.StatusConnected + iceWorkerCreated := conn.workerICE != nil + + var iceInProgress bool + if iceWorkerCreated { + iceInProgress = conn.workerICE.InProgress() } - // For non-JS platforms: check ICE connection status - if conn.statusICE.Get() == worker.StatusDisconnected && !conn.workerICE.InProgress() { - return false - } - - // If relay is supported with peer, it must also be connected - if conn.workerRelay.IsRelayConnectionSupportedWithPeer() { - if conn.statusRelay.Get() == worker.StatusDisconnected { - return false - } - } - - return true + return evalConnStatus(connStatusInputs{ + forceRelay: IsForceRelayed(), + peerUsesRelay: conn.workerRelay.IsRelayConnectionSupportedWithPeer(), + relayConnected: conn.statusRelay.Get() == worker.StatusConnected, + remoteSupportsICE: conn.handshaker.RemoteICESupported(), + iceWorkerCreated: iceWorkerCreated, + iceStatusConnecting: conn.statusICE.Get() != worker.StatusDisconnected, + iceInProgress: iceInProgress, + }) } func (conn *Conn) enableWgWatcherIfNeeded(enabledTime time.Time) { @@ -926,3 +935,43 @@ func isController(config ConnConfig) bool { func isRosenpassEnabled(remoteRosenpassPubKey []byte) bool { return remoteRosenpassPubKey != nil } + +func evalConnStatus(in connStatusInputs) guard.ConnStatus { + // "Relay up and needed" — the peer uses relay and the transport is connected. + relayUsedAndUp := in.peerUsesRelay && in.relayConnected + + // Force-relay mode: ICE never runs. Relay is the only transport and must be up. + if in.forceRelay { + return boolToConnStatus(relayUsedAndUp) + } + + // Remote peer doesn't support ICE, or we haven't created the worker yet: + // relay is the only possible transport. + if !in.remoteSupportsICE || !in.iceWorkerCreated { + return boolToConnStatus(relayUsedAndUp) + } + + // ICE counts as "up" when the status is anything other than Disconnected, OR + // when a negotiation is currently in progress (so we don't spam offers while one is in flight). + iceUp := in.iceStatusConnecting || in.iceInProgress + + // Relay side is acceptable if the peer doesn't rely on relay, or relay is connected. + relayOK := !in.peerUsesRelay || in.relayConnected + + switch { + case iceUp && relayOK: + return guard.ConnStatusConnected + case relayUsedAndUp: + // Relay is up but ICE is down — partially connected. + return guard.ConnStatusPartiallyConnected + default: + return guard.ConnStatusDisconnected + } +} + +func boolToConnStatus(connected bool) guard.ConnStatus { + if connected { + return guard.ConnStatusConnected + } + return guard.ConnStatusDisconnected +} diff --git a/client/internal/peer/conn_status.go b/client/internal/peer/conn_status.go index 73acc5ef5..b43e245f3 100644 --- a/client/internal/peer/conn_status.go +++ b/client/internal/peer/conn_status.go @@ -13,6 +13,20 @@ const ( StatusConnected ) +// connStatusInputs is the primitive-valued snapshot of the state that drives the +// tri-state connection classification. Extracted so the decision logic can be unit-tested +// without constructing full Worker/Handshaker objects. +type connStatusInputs struct { + forceRelay bool // NB_FORCE_RELAY or JS/WASM + peerUsesRelay bool // remote peer advertises relay support AND local has relay + relayConnected bool // statusRelay reports Connected (independent of whether peer uses relay) + remoteSupportsICE bool // remote peer sent ICE credentials + iceWorkerCreated bool // local WorkerICE exists (false in force-relay mode) + iceStatusConnecting bool // statusICE is anything other than Disconnected + iceInProgress bool // a negotiation is currently in flight +} + + // ConnStatus describe the status of a peer's connection type ConnStatus int32 diff --git a/client/internal/peer/conn_status_eval_test.go b/client/internal/peer/conn_status_eval_test.go new file mode 100644 index 000000000..66393cafe --- /dev/null +++ b/client/internal/peer/conn_status_eval_test.go @@ -0,0 +1,201 @@ +package peer + +import ( + "testing" + + "github.com/netbirdio/netbird/client/internal/peer/guard" +) + +func TestEvalConnStatus_ForceRelay(t *testing.T) { + tests := []struct { + name string + in connStatusInputs + want guard.ConnStatus + }{ + { + name: "force relay, peer uses relay, relay up", + in: connStatusInputs{ + forceRelay: true, + peerUsesRelay: true, + relayConnected: true, + }, + want: guard.ConnStatusConnected, + }, + { + name: "force relay, peer uses relay, relay down", + in: connStatusInputs{ + forceRelay: true, + peerUsesRelay: true, + relayConnected: false, + }, + want: guard.ConnStatusDisconnected, + }, + { + name: "force relay, peer does NOT use relay - disconnected forever", + in: connStatusInputs{ + forceRelay: true, + peerUsesRelay: false, + relayConnected: true, + }, + want: guard.ConnStatusDisconnected, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + if got := evalConnStatus(tc.in); got != tc.want { + t.Fatalf("evalConnStatus = %v, want %v", got, tc.want) + } + }) + } +} + +func TestEvalConnStatus_ICEUnavailable(t *testing.T) { + tests := []struct { + name string + in connStatusInputs + want guard.ConnStatus + }{ + { + name: "remote does not support ICE, peer uses relay, relay up", + in: connStatusInputs{ + peerUsesRelay: true, + relayConnected: true, + remoteSupportsICE: false, + iceWorkerCreated: true, + }, + want: guard.ConnStatusConnected, + }, + { + name: "remote does not support ICE, peer uses relay, relay down", + in: connStatusInputs{ + peerUsesRelay: true, + relayConnected: false, + remoteSupportsICE: false, + iceWorkerCreated: true, + }, + want: guard.ConnStatusDisconnected, + }, + { + name: "ICE worker not yet created, relay up", + in: connStatusInputs{ + peerUsesRelay: true, + relayConnected: true, + remoteSupportsICE: true, + iceWorkerCreated: false, + }, + want: guard.ConnStatusConnected, + }, + { + name: "remote does not support ICE, peer does not use relay", + in: connStatusInputs{ + peerUsesRelay: false, + relayConnected: false, + remoteSupportsICE: false, + iceWorkerCreated: true, + }, + want: guard.ConnStatusDisconnected, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + if got := evalConnStatus(tc.in); got != tc.want { + t.Fatalf("evalConnStatus = %v, want %v", got, tc.want) + } + }) + } +} + +func TestEvalConnStatus_FullyAvailable(t *testing.T) { + base := connStatusInputs{ + remoteSupportsICE: true, + iceWorkerCreated: true, + } + + tests := []struct { + name string + mutator func(*connStatusInputs) + want guard.ConnStatus + }{ + { + name: "ICE connected, relay connected, peer uses relay", + mutator: func(in *connStatusInputs) { + in.peerUsesRelay = true + in.relayConnected = true + in.iceStatusConnecting = true + }, + want: guard.ConnStatusConnected, + }, + { + name: "ICE connected, peer does NOT use relay", + mutator: func(in *connStatusInputs) { + in.peerUsesRelay = false + in.relayConnected = false + in.iceStatusConnecting = true + }, + want: guard.ConnStatusConnected, + }, + { + name: "ICE InProgress only, peer does NOT use relay", + mutator: func(in *connStatusInputs) { + in.peerUsesRelay = false + in.iceStatusConnecting = false + in.iceInProgress = true + }, + want: guard.ConnStatusConnected, + }, + { + name: "ICE down, relay up, peer uses relay -> partial", + mutator: func(in *connStatusInputs) { + in.peerUsesRelay = true + in.relayConnected = true + in.iceStatusConnecting = false + in.iceInProgress = false + }, + want: guard.ConnStatusPartiallyConnected, + }, + { + name: "ICE down, peer does NOT use relay -> disconnected", + mutator: func(in *connStatusInputs) { + in.peerUsesRelay = false + in.relayConnected = false + in.iceStatusConnecting = false + in.iceInProgress = false + }, + want: guard.ConnStatusDisconnected, + }, + { + name: "ICE up, peer uses relay but relay down -> partial (relay required, ICE ignored)", + mutator: func(in *connStatusInputs) { + in.peerUsesRelay = true + in.relayConnected = false + in.iceStatusConnecting = true + }, + // relayOK = false (peer uses relay but it's down), iceUp = true + // first switch arm fails (relayOK false), relayUsedAndUp = false (relay down), + // falls into default: Disconnected. + want: guard.ConnStatusDisconnected, + }, + { + name: "ICE down, relay up but peer does not use relay -> disconnected", + mutator: func(in *connStatusInputs) { + in.peerUsesRelay = false + in.relayConnected = true // not actually used since peer doesn't rely on it + in.iceStatusConnecting = false + in.iceInProgress = false + }, + want: guard.ConnStatusDisconnected, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + in := base + tc.mutator(&in) + if got := evalConnStatus(in); got != tc.want { + t.Fatalf("evalConnStatus = %v, want %v (inputs: %+v)", got, tc.want, in) + } + }) + } +} diff --git a/client/internal/peer/env.go b/client/internal/peer/env.go index 7f500c410..b4ba9ad7b 100644 --- a/client/internal/peer/env.go +++ b/client/internal/peer/env.go @@ -10,7 +10,7 @@ const ( EnvKeyNBForceRelay = "NB_FORCE_RELAY" ) -func isForceRelayed() bool { +func IsForceRelayed() bool { if runtime.GOOS == "js" { return true } diff --git a/client/internal/peer/guard/guard.go b/client/internal/peer/guard/guard.go index d93403730..2e5efbcc5 100644 --- a/client/internal/peer/guard/guard.go +++ b/client/internal/peer/guard/guard.go @@ -8,7 +8,19 @@ import ( log "github.com/sirupsen/logrus" ) -type isConnectedFunc func() bool +// ConnStatus represents the connection state as seen by the guard. +type ConnStatus int + +const ( + // ConnStatusDisconnected means neither ICE nor Relay is connected. + ConnStatusDisconnected ConnStatus = iota + // ConnStatusPartiallyConnected means Relay is connected but ICE is not. + ConnStatusPartiallyConnected + // ConnStatusConnected means all required connections are established. + ConnStatusConnected +) + +type connStatusFunc func() ConnStatus // Guard is responsible for the reconnection logic. // It will trigger to send an offer to the peer then has connection issues. @@ -20,14 +32,14 @@ type isConnectedFunc func() bool // - ICE candidate changes type Guard struct { log *log.Entry - isConnectedOnAllWay isConnectedFunc + isConnectedOnAllWay connStatusFunc timeout time.Duration srWatcher *SRWatcher relayedConnDisconnected chan struct{} iCEConnDisconnected chan struct{} } -func NewGuard(log *log.Entry, isConnectedFn isConnectedFunc, timeout time.Duration, srWatcher *SRWatcher) *Guard { +func NewGuard(log *log.Entry, isConnectedFn connStatusFunc, timeout time.Duration, srWatcher *SRWatcher) *Guard { return &Guard{ log: log, isConnectedOnAllWay: isConnectedFn, @@ -57,8 +69,17 @@ func (g *Guard) SetICEConnDisconnected() { } } -// reconnectLoopWithRetry periodically check the connection status. -// Try to send offer while the P2P is not established or while the Relay is not connected if is it supported +// reconnectLoopWithRetry periodically checks the connection status and sends offers to re-establish connectivity. +// +// Behavior depends on the connection state reported by isConnectedOnAllWay: +// - Connected: no action, the peer is fully reachable. +// - Disconnected (neither ICE nor Relay): retries aggressively with exponential backoff (800ms doubling +// up to timeout), never gives up. This ensures rapid recovery when the peer has no connectivity at all. +// - PartiallyConnected (Relay up, ICE not): retries up to 3 times with exponential backoff, then switches +// to one attempt per hour. This limits signaling traffic when relay already provides connectivity. +// +// External events (relay/ICE disconnect, signal/relay reconnect, candidate changes) reset the retry +// counter and backoff ticker, giving ICE a fresh chance after network conditions change. func (g *Guard) reconnectLoopWithRetry(ctx context.Context, callback func()) { srReconnectedChan := g.srWatcher.NewListener() defer g.srWatcher.RemoveListener(srReconnectedChan) @@ -68,36 +89,47 @@ func (g *Guard) reconnectLoopWithRetry(ctx context.Context, callback func()) { tickerChannel := ticker.C + iceState := &iceRetryState{log: g.log} + defer iceState.reset() + for { select { - case t := <-tickerChannel: - if t.IsZero() { - g.log.Infof("retry timed out, stop periodic offer sending") - // after backoff timeout the ticker.C will be closed. We need to a dummy channel to avoid loop - tickerChannel = make(<-chan time.Time) - continue + case <-tickerChannel: + switch g.isConnectedOnAllWay() { + case ConnStatusConnected: + // all good, nothing to do + case ConnStatusDisconnected: + callback() + case ConnStatusPartiallyConnected: + if iceState.shouldRetry() { + callback() + } else { + iceState.enterHourlyMode() + ticker.Stop() + tickerChannel = iceState.hourlyC() + } } - if !g.isConnectedOnAllWay() { - callback() - } case <-g.relayedConnDisconnected: g.log.Debugf("Relay connection changed, reset reconnection ticker") ticker.Stop() - ticker = g.prepareExponentTicker(ctx) + ticker = g.newReconnectTicker(ctx) tickerChannel = ticker.C + iceState.reset() case <-g.iCEConnDisconnected: g.log.Debugf("ICE connection changed, reset reconnection ticker") ticker.Stop() - ticker = g.prepareExponentTicker(ctx) + ticker = g.newReconnectTicker(ctx) tickerChannel = ticker.C + iceState.reset() case <-srReconnectedChan: g.log.Debugf("has network changes, reset reconnection ticker") ticker.Stop() - ticker = g.prepareExponentTicker(ctx) + ticker = g.newReconnectTicker(ctx) tickerChannel = ticker.C + iceState.reset() case <-ctx.Done(): g.log.Debugf("context is done, stop reconnect loop") @@ -120,7 +152,7 @@ func (g *Guard) initialTicker(ctx context.Context) *backoff.Ticker { return backoff.NewTicker(bo) } -func (g *Guard) prepareExponentTicker(ctx context.Context) *backoff.Ticker { +func (g *Guard) newReconnectTicker(ctx context.Context) *backoff.Ticker { bo := backoff.WithContext(&backoff.ExponentialBackOff{ InitialInterval: 800 * time.Millisecond, RandomizationFactor: 0.1, diff --git a/client/internal/peer/guard/ice_retry_state.go b/client/internal/peer/guard/ice_retry_state.go new file mode 100644 index 000000000..01dc1bf2d --- /dev/null +++ b/client/internal/peer/guard/ice_retry_state.go @@ -0,0 +1,61 @@ +package guard + +import ( + "time" + + log "github.com/sirupsen/logrus" +) + +const ( + // maxICERetries is the maximum number of ICE offer attempts when relay is connected + maxICERetries = 3 + // iceRetryInterval is the periodic retry interval after ICE retries are exhausted + iceRetryInterval = 1 * time.Hour +) + +// iceRetryState tracks the limited ICE retry attempts when relay is already connected. +// After maxICERetries attempts it switches to a periodic hourly retry. +type iceRetryState struct { + log *log.Entry + retries int + hourly *time.Ticker +} + +func (s *iceRetryState) reset() { + s.retries = 0 + if s.hourly != nil { + s.hourly.Stop() + s.hourly = nil + } +} + +// shouldRetry reports whether the caller should send another ICE offer on this tick. +// Returns false when the per-cycle retry budget is exhausted and the caller must switch +// to the hourly ticker via enterHourlyMode + hourlyC. +func (s *iceRetryState) shouldRetry() bool { + if s.hourly != nil { + s.log.Debugf("hourly ICE retry attempt") + return true + } + + s.retries++ + if s.retries <= maxICERetries { + s.log.Debugf("ICE retry attempt %d/%d", s.retries, maxICERetries) + return true + } + + return false +} + +// enterHourlyMode starts the hourly retry ticker. Must be called after shouldRetry returns false. +func (s *iceRetryState) enterHourlyMode() { + s.log.Infof("ICE retries exhausted (%d/%d), switching to hourly retry", maxICERetries, maxICERetries) + s.hourly = time.NewTicker(iceRetryInterval) +} + +func (s *iceRetryState) hourlyC() <-chan time.Time { + if s.hourly == nil { + return nil + } + return s.hourly.C +} diff --git a/client/internal/peer/guard/ice_retry_state_test.go b/client/internal/peer/guard/ice_retry_state_test.go new file mode 100644 index 000000000..6a5b5a76f --- /dev/null +++ b/client/internal/peer/guard/ice_retry_state_test.go @@ -0,0 +1,103 @@ +package guard + +import ( + "testing" + + log "github.com/sirupsen/logrus" +) + +func newTestRetryState() *iceRetryState { + return &iceRetryState{log: log.NewEntry(log.StandardLogger())} +} + +func TestICERetryState_AllowsInitialBudget(t *testing.T) { + s := newTestRetryState() + + for i := 1; i <= maxICERetries; i++ { + if !s.shouldRetry() { + t.Fatalf("shouldRetry returned false on attempt %d, want true (budget = %d)", i, maxICERetries) + } + } +} + +func TestICERetryState_ExhaustsAfterBudget(t *testing.T) { + s := newTestRetryState() + + for i := 0; i < maxICERetries; i++ { + _ = s.shouldRetry() + } + + if s.shouldRetry() { + t.Fatalf("shouldRetry returned true after budget exhausted, want false") + } +} + +func TestICERetryState_HourlyCNilBeforeEnterHourlyMode(t *testing.T) { + s := newTestRetryState() + + if s.hourlyC() != nil { + t.Fatalf("hourlyC returned non-nil channel before enterHourlyMode") + } +} + +func TestICERetryState_EnterHourlyModeArmsTicker(t *testing.T) { + s := newTestRetryState() + for i := 0; i < maxICERetries+1; i++ { + _ = s.shouldRetry() + } + + s.enterHourlyMode() + defer s.reset() + + if s.hourlyC() == nil { + t.Fatalf("hourlyC returned nil after enterHourlyMode") + } +} + +func TestICERetryState_ShouldRetryTrueInHourlyMode(t *testing.T) { + s := newTestRetryState() + s.enterHourlyMode() + defer s.reset() + + if !s.shouldRetry() { + t.Fatalf("shouldRetry returned false in hourly mode, want true") + } + + // Subsequent calls also return true — we keep retrying on each hourly tick. + if !s.shouldRetry() { + t.Fatalf("second shouldRetry returned false in hourly mode, want true") + } +} + +func TestICERetryState_ResetRestoresBudget(t *testing.T) { + s := newTestRetryState() + for i := 0; i < maxICERetries+1; i++ { + _ = s.shouldRetry() + } + s.enterHourlyMode() + + s.reset() + + if s.hourlyC() != nil { + t.Fatalf("hourlyC returned non-nil channel after reset") + } + if s.retries != 0 { + t.Fatalf("retries = %d after reset, want 0", s.retries) + } + + for i := 1; i <= maxICERetries; i++ { + if !s.shouldRetry() { + t.Fatalf("shouldRetry returned false on attempt %d after reset, want true", i) + } + } +} + +func TestICERetryState_ResetIsIdempotent(t *testing.T) { + s := newTestRetryState() + s.reset() + s.reset() // second call must not panic or re-stop a nil ticker + + if s.hourlyC() != nil { + t.Fatalf("hourlyC non-nil after double reset") + } +} diff --git a/client/internal/peer/guard/sr_watcher.go b/client/internal/peer/guard/sr_watcher.go index 6f4f5ad4f..0befd7438 100644 --- a/client/internal/peer/guard/sr_watcher.go +++ b/client/internal/peer/guard/sr_watcher.go @@ -39,7 +39,7 @@ func NewSRWatcher(signalClient chNotifier, relayManager chNotifier, iFaceDiscove return srw } -func (w *SRWatcher) Start() { +func (w *SRWatcher) Start(disableICEMonitor bool) { w.mu.Lock() defer w.mu.Unlock() @@ -50,8 +50,10 @@ func (w *SRWatcher) Start() { ctx, cancel := context.WithCancel(context.Background()) w.cancelIceMonitor = cancel - iceMonitor := NewICEMonitor(w.iFaceDiscover, w.iceConfig, GetICEMonitorPeriod()) - go iceMonitor.Start(ctx, w.onICEChanged) + if !disableICEMonitor { + iceMonitor := NewICEMonitor(w.iFaceDiscover, w.iceConfig, GetICEMonitorPeriod()) + go iceMonitor.Start(ctx, w.onICEChanged) + } w.signalClient.SetOnReconnectedListener(w.onReconnected) w.relayManager.SetOnReconnectedListener(w.onReconnected) diff --git a/client/internal/peer/handshaker.go b/client/internal/peer/handshaker.go index 9b50cecd1..741dfce60 100644 --- a/client/internal/peer/handshaker.go +++ b/client/internal/peer/handshaker.go @@ -4,6 +4,7 @@ import ( "context" "errors" "sync" + "sync/atomic" log "github.com/sirupsen/logrus" @@ -43,6 +44,10 @@ type OfferAnswer struct { SessionID *ICESessionID } +func (o *OfferAnswer) hasICECredentials() bool { + return o.IceCredentials.UFrag != "" && o.IceCredentials.Pwd != "" +} + type Handshaker struct { mu sync.Mutex log *log.Entry @@ -59,6 +64,10 @@ type Handshaker struct { relayListener *AsyncOfferListener iceListener func(remoteOfferAnswer *OfferAnswer) + // remoteICESupported tracks whether the remote peer includes ICE credentials in its offers/answers. + // When false, the local side skips ICE listener dispatch and suppresses ICE credentials in responses. + remoteICESupported atomic.Bool + // remoteOffersCh is a channel used to wait for remote credentials to proceed with the connection remoteOffersCh chan OfferAnswer // remoteAnswerCh is a channel used to wait for remote credentials answer (confirmation of our offer) to proceed with the connection @@ -66,7 +75,7 @@ type Handshaker struct { } func NewHandshaker(log *log.Entry, config ConnConfig, signaler *Signaler, ice *WorkerICE, relay *WorkerRelay, metricsStages *MetricsStages) *Handshaker { - return &Handshaker{ + h := &Handshaker{ log: log, config: config, signaler: signaler, @@ -76,6 +85,13 @@ func NewHandshaker(log *log.Entry, config ConnConfig, signaler *Signaler, ice *W remoteOffersCh: make(chan OfferAnswer), remoteAnswerCh: make(chan OfferAnswer), } + // assume remote supports ICE until we learn otherwise from received offers + h.remoteICESupported.Store(ice != nil) + return h +} + +func (h *Handshaker) RemoteICESupported() bool { + return h.remoteICESupported.Load() } func (h *Handshaker) AddRelayListener(offer func(remoteOfferAnswer *OfferAnswer)) { @@ -90,18 +106,20 @@ func (h *Handshaker) Listen(ctx context.Context) { for { select { case remoteOfferAnswer := <-h.remoteOffersCh: - h.log.Infof("received offer, running version %s, remote WireGuard listen port %d, session id: %s", remoteOfferAnswer.Version, remoteOfferAnswer.WgListenPort, remoteOfferAnswer.SessionIDString()) + h.log.Infof("received offer, running version %s, remote WireGuard listen port %d, session id: %s, remote ICE supported: %t", remoteOfferAnswer.Version, remoteOfferAnswer.WgListenPort, remoteOfferAnswer.SessionIDString(), remoteOfferAnswer.hasICECredentials()) // Record signaling received for reconnection attempts if h.metricsStages != nil { h.metricsStages.RecordSignalingReceived() } + h.updateRemoteICEState(&remoteOfferAnswer) + if h.relayListener != nil { h.relayListener.Notify(&remoteOfferAnswer) } - if h.iceListener != nil { + if h.iceListener != nil && h.RemoteICESupported() { h.iceListener(&remoteOfferAnswer) } @@ -110,18 +128,20 @@ func (h *Handshaker) Listen(ctx context.Context) { continue } case remoteOfferAnswer := <-h.remoteAnswerCh: - h.log.Infof("received answer, running version %s, remote WireGuard listen port %d, session id: %s", remoteOfferAnswer.Version, remoteOfferAnswer.WgListenPort, remoteOfferAnswer.SessionIDString()) + h.log.Infof("received answer, running version %s, remote WireGuard listen port %d, session id: %s, remote ICE supported: %t", remoteOfferAnswer.Version, remoteOfferAnswer.WgListenPort, remoteOfferAnswer.SessionIDString(), remoteOfferAnswer.hasICECredentials()) // Record signaling received for reconnection attempts if h.metricsStages != nil { h.metricsStages.RecordSignalingReceived() } + h.updateRemoteICEState(&remoteOfferAnswer) + if h.relayListener != nil { h.relayListener.Notify(&remoteOfferAnswer) } - if h.iceListener != nil { + if h.iceListener != nil && h.RemoteICESupported() { h.iceListener(&remoteOfferAnswer) } case <-ctx.Done(): @@ -183,15 +203,18 @@ func (h *Handshaker) sendAnswer() error { } func (h *Handshaker) buildOfferAnswer() OfferAnswer { - uFrag, pwd := h.ice.GetLocalUserCredentials() - sid := h.ice.SessionID() answer := OfferAnswer{ - IceCredentials: IceCredentials{uFrag, pwd}, WgListenPort: h.config.LocalWgPort, Version: version.NetbirdVersion(), RosenpassPubKey: h.config.RosenpassConfig.PubKey, RosenpassAddr: h.config.RosenpassConfig.Addr, - SessionID: &sid, + } + + if h.ice != nil && h.RemoteICESupported() { + uFrag, pwd := h.ice.GetLocalUserCredentials() + sid := h.ice.SessionID() + answer.IceCredentials = IceCredentials{uFrag, pwd} + answer.SessionID = &sid } if addr, err := h.relay.RelayInstanceAddress(); err == nil { @@ -200,3 +223,18 @@ func (h *Handshaker) buildOfferAnswer() OfferAnswer { return answer } + +func (h *Handshaker) updateRemoteICEState(offer *OfferAnswer) { + hasICE := offer.hasICECredentials() + prev := h.remoteICESupported.Swap(hasICE) + if prev != hasICE { + if hasICE { + h.log.Infof("remote peer started sending ICE credentials") + } else { + h.log.Infof("remote peer stopped sending ICE credentials") + if h.ice != nil { + h.ice.Close() + } + } + } +} diff --git a/client/internal/peer/signaler.go b/client/internal/peer/signaler.go index b28906625..f6eb87cca 100644 --- a/client/internal/peer/signaler.go +++ b/client/internal/peer/signaler.go @@ -46,9 +46,13 @@ func (s *Signaler) Ready() bool { // SignalOfferAnswer signals either an offer or an answer to remote peer func (s *Signaler) signalOfferAnswer(offerAnswer OfferAnswer, remoteKey string, bodyType sProto.Body_Type) error { - sessionIDBytes, err := offerAnswer.SessionID.Bytes() - if err != nil { - log.Warnf("failed to get session ID bytes: %v", err) + var sessionIDBytes []byte + if offerAnswer.SessionID != nil { + var err error + sessionIDBytes, err = offerAnswer.SessionID.Bytes() + if err != nil { + log.Warnf("failed to get session ID bytes: %v", err) + } } msg, err := signal.MarshalCredential( s.wgPrivateKey, From 75e408f51cb54f82c42d4c04d65f3a38e42c433a Mon Sep 17 00:00:00 2001 From: Viktor Liu <17948409+lixmal@users.noreply.github.com> Date: Wed, 22 Apr 2026 00:56:56 +0900 Subject: [PATCH 09/25] [client] Prefer systemd-resolved stub over file mode regardless of resolv.conf header (#5935) --- client/internal/dns/file_parser_unix.go | 1 + client/internal/dns/host_unix.go | 134 ++++++++++++++++++------ client/internal/dns/host_unix_test.go | 76 ++++++++++++++ 3 files changed, 180 insertions(+), 31 deletions(-) create mode 100644 client/internal/dns/host_unix_test.go diff --git a/client/internal/dns/file_parser_unix.go b/client/internal/dns/file_parser_unix.go index 8dacb4e51..50ba74c0c 100644 --- a/client/internal/dns/file_parser_unix.go +++ b/client/internal/dns/file_parser_unix.go @@ -13,6 +13,7 @@ import ( const ( defaultResolvConfPath = "/etc/resolv.conf" + nsswitchConfPath = "/etc/nsswitch.conf" ) type resolvConf struct { diff --git a/client/internal/dns/host_unix.go b/client/internal/dns/host_unix.go index 422fed4e5..d7301d725 100644 --- a/client/internal/dns/host_unix.go +++ b/client/internal/dns/host_unix.go @@ -46,12 +46,12 @@ type restoreHostManager interface { } func newHostManager(wgInterface string) (hostManager, error) { - osManager, err := getOSDNSManagerType() + osManager, reason, err := getOSDNSManagerType() if err != nil { return nil, fmt.Errorf("get os dns manager type: %w", err) } - log.Infof("System DNS manager discovered: %s", osManager) + log.Infof("System DNS manager discovered: %s (%s)", osManager, reason) mgr, err := newHostManagerFromType(wgInterface, osManager) // need to explicitly return nil mgr on error to avoid returning a non-nil interface containing a nil value if err != nil { @@ -74,17 +74,49 @@ func newHostManagerFromType(wgInterface string, osManager osManagerType) (restor } } -func getOSDNSManagerType() (osManagerType, error) { +func getOSDNSManagerType() (osManagerType, string, error) { + resolved := isSystemdResolvedRunning() + nss := isLibnssResolveUsed() + stub := checkStub() + + // Prefer systemd-resolved whenever it owns libc resolution, regardless of + // who wrote /etc/resolv.conf. File-mode rewrites do not affect lookups + // that go through nss-resolve, and in foreign mode they can loop back + // through resolved as an upstream. + if resolved && (nss || stub) { + return systemdManager, fmt.Sprintf("systemd-resolved active (nss-resolve=%t, stub=%t)", nss, stub), nil + } + + mgr, reason, rejected, err := scanResolvConfHeader() + if err != nil { + return 0, "", err + } + if reason != "" { + return mgr, reason, nil + } + + fallback := fmt.Sprintf("no manager matched (resolved=%t, nss-resolve=%t, stub=%t)", resolved, nss, stub) + if len(rejected) > 0 { + fallback += "; rejected: " + strings.Join(rejected, ", ") + } + return fileManager, fallback, nil +} + +// scanResolvConfHeader walks /etc/resolv.conf header comments and returns the +// matching manager. If reason is empty the caller should pick file mode and +// use rejected for diagnostics. +func scanResolvConfHeader() (osManagerType, string, []string, error) { file, err := os.Open(defaultResolvConfPath) if err != nil { - return 0, fmt.Errorf("unable to open %s for checking owner, got error: %w", defaultResolvConfPath, err) + return 0, "", nil, fmt.Errorf("unable to open %s for checking owner, got error: %w", defaultResolvConfPath, err) } defer func() { - if err := file.Close(); err != nil { - log.Errorf("close file %s: %s", defaultResolvConfPath, err) + if cerr := file.Close(); cerr != nil { + log.Errorf("close file %s: %s", defaultResolvConfPath, cerr) } }() + var rejected []string scanner := bufio.NewScanner(file) for scanner.Scan() { text := scanner.Text() @@ -92,41 +124,48 @@ func getOSDNSManagerType() (osManagerType, error) { continue } if text[0] != '#' { - return fileManager, nil + break } - if strings.Contains(text, fileGeneratedResolvConfContentHeader) { - return netbirdManager, nil - } - if strings.Contains(text, "NetworkManager") && isDbusListenerRunning(networkManagerDest, networkManagerDbusObjectNode) && isNetworkManagerSupported() { - return networkManager, nil - } - if strings.Contains(text, "systemd-resolved") && isSystemdResolvedRunning() { - if checkStub() { - return systemdManager, nil - } else { - return fileManager, nil - } - } - if strings.Contains(text, "resolvconf") { - if isSystemdResolveConfMode() { - return systemdManager, nil - } - - return resolvConfManager, nil + if mgr, reason, rej := matchResolvConfHeader(text); reason != "" { + return mgr, reason, nil, nil + } else if rej != "" { + rejected = append(rejected, rej) } } if err := scanner.Err(); err != nil && err != io.EOF { - return 0, fmt.Errorf("scan: %w", err) + return 0, "", nil, fmt.Errorf("scan: %w", err) } - - return fileManager, nil + return 0, "", rejected, nil } -// checkStub checks if the stub resolver is disabled in systemd-resolved. If it is disabled, we fall back to file manager. +// matchResolvConfHeader inspects a single comment line. Returns either a +// definitive (manager, reason) or a non-empty rejected diagnostic. +func matchResolvConfHeader(text string) (osManagerType, string, string) { + if strings.Contains(text, fileGeneratedResolvConfContentHeader) { + return netbirdManager, "netbird-managed resolv.conf header detected", "" + } + if strings.Contains(text, "NetworkManager") { + if isDbusListenerRunning(networkManagerDest, networkManagerDbusObjectNode) && isNetworkManagerSupported() { + return networkManager, "NetworkManager header + supported version on dbus", "" + } + return 0, "", "NetworkManager header (no dbus or unsupported version)" + } + if strings.Contains(text, "resolvconf") { + if isSystemdResolveConfMode() { + return systemdManager, "resolvconf header in systemd-resolved compatibility mode", "" + } + return resolvConfManager, "resolvconf header detected", "" + } + return 0, "", "" +} + +// checkStub reports whether systemd-resolved's stub (127.0.0.53) is listed +// in /etc/resolv.conf. On parse failure we assume it is, to avoid dropping +// into file mode while resolved is active. func checkStub() bool { rConf, err := parseDefaultResolvConf() if err != nil { - log.Warnf("failed to parse resolv conf: %s", err) + log.Warnf("failed to parse resolv conf, assuming stub is active: %s", err) return true } @@ -139,3 +178,36 @@ func checkStub() bool { return false } + +// isLibnssResolveUsed reports whether nss-resolve is listed before dns on +// the hosts: line of /etc/nsswitch.conf. When it is, libc lookups are +// delegated to systemd-resolved regardless of /etc/resolv.conf. +func isLibnssResolveUsed() bool { + bs, err := os.ReadFile(nsswitchConfPath) + if err != nil { + log.Debugf("read %s: %v", nsswitchConfPath, err) + return false + } + return parseNsswitchResolveAhead(bs) +} + +func parseNsswitchResolveAhead(data []byte) bool { + for _, line := range strings.Split(string(data), "\n") { + if i := strings.IndexByte(line, '#'); i >= 0 { + line = line[:i] + } + fields := strings.Fields(line) + if len(fields) < 2 || fields[0] != "hosts:" { + continue + } + for _, module := range fields[1:] { + switch module { + case "dns": + return false + case "resolve": + return true + } + } + } + return false +} diff --git a/client/internal/dns/host_unix_test.go b/client/internal/dns/host_unix_test.go new file mode 100644 index 000000000..e936281d3 --- /dev/null +++ b/client/internal/dns/host_unix_test.go @@ -0,0 +1,76 @@ +//go:build (linux && !android) || freebsd + +package dns + +import "testing" + +func TestParseNsswitchResolveAhead(t *testing.T) { + tests := []struct { + name string + in string + want bool + }{ + { + name: "resolve before dns with action token", + in: "hosts: mymachines resolve [!UNAVAIL=return] files myhostname dns\n", + want: true, + }, + { + name: "dns before resolve", + in: "hosts: files mdns4_minimal [NOTFOUND=return] dns resolve\n", + want: false, + }, + { + name: "debian default with only dns", + in: "hosts: files mdns4_minimal [NOTFOUND=return] dns mymachines\n", + want: false, + }, + { + name: "neither resolve nor dns", + in: "hosts: files myhostname\n", + want: false, + }, + { + name: "no hosts line", + in: "passwd: files systemd\ngroup: files systemd\n", + want: false, + }, + { + name: "empty", + in: "", + want: false, + }, + { + name: "comments and blank lines ignored", + in: "# comment\n\n# another\nhosts: resolve dns\n", + want: true, + }, + { + name: "trailing inline comment", + in: "hosts: resolve [!UNAVAIL=return] dns # fallback\n", + want: true, + }, + { + name: "hosts token must be the first field", + in: " hosts: resolve dns\n", + want: true, + }, + { + name: "other db line mentioning resolve is ignored", + in: "networks: resolve\nhosts: dns\n", + want: false, + }, + { + name: "only resolve, no dns", + in: "hosts: files resolve\n", + want: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := parseNsswitchResolveAhead([]byte(tt.in)); got != tt.want { + t.Errorf("parseNsswitchResolveAhead() = %v, want %v", got, tt.want) + } + }) + } +} From 064ec1c83226d4a12fd8b73f07597d029e06af05 Mon Sep 17 00:00:00 2001 From: Viktor Liu <17948409+lixmal@users.noreply.github.com> Date: Wed, 22 Apr 2026 00:57:16 +0900 Subject: [PATCH 10/25] [client] Trust wg interface in firewalld to bypass owner-flagged chains (#5928) --- client/firewall/firewalld/firewalld.go | 11 + client/firewall/firewalld/firewalld_linux.go | 260 ++++++++++++++++++ .../firewalld/firewalld_linux_test.go | 49 ++++ client/firewall/firewalld/firewalld_other.go | 25 ++ client/firewall/iptables/manager_linux.go | 18 ++ client/firewall/nftables/manager_linux.go | 5 + client/firewall/nftables/router_linux.go | 18 ++ client/firewall/uspfilter/allow_netbird.go | 9 + client/firewall/uspfilter/common/iface.go | 1 + client/firewall/uspfilter/filter_test.go | 8 + client/internal/engine.go | 3 + 11 files changed, 407 insertions(+) create mode 100644 client/firewall/firewalld/firewalld.go create mode 100644 client/firewall/firewalld/firewalld_linux.go create mode 100644 client/firewall/firewalld/firewalld_linux_test.go create mode 100644 client/firewall/firewalld/firewalld_other.go diff --git a/client/firewall/firewalld/firewalld.go b/client/firewall/firewalld/firewalld.go new file mode 100644 index 000000000..188ea61dd --- /dev/null +++ b/client/firewall/firewalld/firewalld.go @@ -0,0 +1,11 @@ +// Package firewalld integrates with the firewalld daemon so NetBird can place +// its wg interface into firewalld's "trusted" zone. This is required because +// firewalld's nftables chains are created with NFT_CHAIN_OWNER on recent +// versions, which returns EPERM to any other process that tries to insert +// rules into them. The workaround mirrors what Tailscale does: let firewalld +// itself add the accept rules to its own chains by trusting the interface. +package firewalld + +// TrustedZone is the firewalld zone name used for interfaces whose traffic +// should bypass firewalld filtering. +const TrustedZone = "trusted" diff --git a/client/firewall/firewalld/firewalld_linux.go b/client/firewall/firewalld/firewalld_linux.go new file mode 100644 index 000000000..924a04b0a --- /dev/null +++ b/client/firewall/firewalld/firewalld_linux.go @@ -0,0 +1,260 @@ +//go:build linux + +package firewalld + +import ( + "context" + "errors" + "fmt" + "os/exec" + "strings" + "sync" + "time" + + "github.com/godbus/dbus/v5" + log "github.com/sirupsen/logrus" +) + +const ( + dbusDest = "org.fedoraproject.FirewallD1" + dbusPath = "/org/fedoraproject/FirewallD1" + dbusRootIface = "org.fedoraproject.FirewallD1" + dbusZoneIface = "org.fedoraproject.FirewallD1.zone" + + errZoneAlreadySet = "ZONE_ALREADY_SET" + errAlreadyEnabled = "ALREADY_ENABLED" + errUnknownIface = "UNKNOWN_INTERFACE" + errNotEnabled = "NOT_ENABLED" + + // callTimeout bounds each individual DBus or firewall-cmd invocation. + // A fresh context is created for each call so a slow DBus probe can't + // exhaust the deadline before the firewall-cmd fallback gets to run. + callTimeout = 3 * time.Second +) + +var ( + errDBusUnavailable = errors.New("firewalld dbus unavailable") + + // trustLogOnce ensures the "added to trusted zone" message is logged at + // Info level only for the first successful add per process; repeat adds + // from other init paths are quieter. + trustLogOnce sync.Once + + parentCtxMu sync.RWMutex + parentCtx context.Context = context.Background() +) + +// SetParentContext installs a parent context whose cancellation aborts any +// in-flight TrustInterface call. It does not affect UntrustInterface, which +// always uses a fresh Background-rooted timeout so cleanup can still run +// during engine shutdown when the engine context is already cancelled. +func SetParentContext(ctx context.Context) { + parentCtxMu.Lock() + parentCtx = ctx + parentCtxMu.Unlock() +} + +func getParentContext() context.Context { + parentCtxMu.RLock() + defer parentCtxMu.RUnlock() + return parentCtx +} + +// TrustInterface places iface into firewalld's trusted zone if firewalld is +// running. It is idempotent and best-effort: errors are returned so callers +// can log, but a non-running firewalld is not an error. Only the first +// successful call per process logs at Info. Respects the parent context set +// via SetParentContext so startup-time cancellation unblocks it. +func TrustInterface(iface string) error { + parent := getParentContext() + if !isRunning(parent) { + return nil + } + if err := addTrusted(parent, iface); err != nil { + return fmt.Errorf("add %s to firewalld trusted zone: %w", iface, err) + } + trustLogOnce.Do(func() { + log.Infof("added %s to firewalld trusted zone", iface) + }) + log.Debugf("firewalld: ensured %s is in trusted zone", iface) + return nil +} + +// UntrustInterface removes iface from firewalld's trusted zone if firewalld +// is running. Idempotent. Uses a Background-rooted timeout so it still runs +// during shutdown after the engine context has been cancelled. +func UntrustInterface(iface string) error { + if !isRunning(context.Background()) { + return nil + } + if err := removeTrusted(context.Background(), iface); err != nil { + return fmt.Errorf("remove %s from firewalld trusted zone: %w", iface, err) + } + return nil +} + +func newCallContext(parent context.Context) (context.Context, context.CancelFunc) { + return context.WithTimeout(parent, callTimeout) +} + +func isRunning(parent context.Context) bool { + ctx, cancel := newCallContext(parent) + ok, err := isRunningDBus(ctx) + cancel() + if err == nil { + return ok + } + if errors.Is(err, errDBusUnavailable) || errors.Is(err, context.DeadlineExceeded) { + ctx, cancel = newCallContext(parent) + defer cancel() + return isRunningCLI(ctx) + } + return false +} + +func addTrusted(parent context.Context, iface string) error { + ctx, cancel := newCallContext(parent) + err := addDBus(ctx, iface) + cancel() + if err == nil { + return nil + } + if !errors.Is(err, errDBusUnavailable) { + log.Debugf("firewalld: dbus add failed, falling back to firewall-cmd: %v", err) + } + ctx, cancel = newCallContext(parent) + defer cancel() + return addCLI(ctx, iface) +} + +func removeTrusted(parent context.Context, iface string) error { + ctx, cancel := newCallContext(parent) + err := removeDBus(ctx, iface) + cancel() + if err == nil { + return nil + } + if !errors.Is(err, errDBusUnavailable) { + log.Debugf("firewalld: dbus remove failed, falling back to firewall-cmd: %v", err) + } + ctx, cancel = newCallContext(parent) + defer cancel() + return removeCLI(ctx, iface) +} + +func isRunningDBus(ctx context.Context) (bool, error) { + conn, err := dbus.SystemBus() + if err != nil { + return false, fmt.Errorf("%w: %v", errDBusUnavailable, err) + } + obj := conn.Object(dbusDest, dbusPath) + + var zone string + if err := obj.CallWithContext(ctx, dbusRootIface+".getDefaultZone", 0).Store(&zone); err != nil { + return false, fmt.Errorf("firewalld getDefaultZone: %w", err) + } + return true, nil +} + +func isRunningCLI(ctx context.Context) bool { + if _, err := exec.LookPath("firewall-cmd"); err != nil { + return false + } + return exec.CommandContext(ctx, "firewall-cmd", "--state").Run() == nil +} + +func addDBus(ctx context.Context, iface string) error { + conn, err := dbus.SystemBus() + if err != nil { + return fmt.Errorf("%w: %v", errDBusUnavailable, err) + } + obj := conn.Object(dbusDest, dbusPath) + + call := obj.CallWithContext(ctx, dbusZoneIface+".addInterface", 0, TrustedZone, iface) + if call.Err == nil { + return nil + } + + if dbusErrContains(call.Err, errAlreadyEnabled) { + return nil + } + + if dbusErrContains(call.Err, errZoneAlreadySet) { + move := obj.CallWithContext(ctx, dbusZoneIface+".changeZoneOfInterface", 0, TrustedZone, iface) + if move.Err != nil { + return fmt.Errorf("firewalld changeZoneOfInterface: %w", move.Err) + } + return nil + } + + return fmt.Errorf("firewalld addInterface: %w", call.Err) +} + +func removeDBus(ctx context.Context, iface string) error { + conn, err := dbus.SystemBus() + if err != nil { + return fmt.Errorf("%w: %v", errDBusUnavailable, err) + } + obj := conn.Object(dbusDest, dbusPath) + + call := obj.CallWithContext(ctx, dbusZoneIface+".removeInterface", 0, TrustedZone, iface) + if call.Err == nil { + return nil + } + + if dbusErrContains(call.Err, errUnknownIface) || dbusErrContains(call.Err, errNotEnabled) { + return nil + } + + return fmt.Errorf("firewalld removeInterface: %w", call.Err) +} + +func addCLI(ctx context.Context, iface string) error { + if _, err := exec.LookPath("firewall-cmd"); err != nil { + return fmt.Errorf("firewall-cmd not available: %w", err) + } + + // --change-interface (no --permanent) binds the interface for the + // current runtime only; we do not want membership to persist across + // reboots because netbird re-asserts it on every startup. + out, err := exec.CommandContext(ctx, + "firewall-cmd", "--zone="+TrustedZone, "--change-interface="+iface, + ).CombinedOutput() + if err != nil { + return fmt.Errorf("firewall-cmd change-interface: %w: %s", err, strings.TrimSpace(string(out))) + } + return nil +} + +func removeCLI(ctx context.Context, iface string) error { + if _, err := exec.LookPath("firewall-cmd"); err != nil { + return fmt.Errorf("firewall-cmd not available: %w", err) + } + + out, err := exec.CommandContext(ctx, + "firewall-cmd", "--zone="+TrustedZone, "--remove-interface="+iface, + ).CombinedOutput() + if err != nil { + msg := strings.TrimSpace(string(out)) + if strings.Contains(msg, errUnknownIface) || strings.Contains(msg, errNotEnabled) { + return nil + } + return fmt.Errorf("firewall-cmd remove-interface: %w: %s", err, msg) + } + return nil +} + +func dbusErrContains(err error, code string) bool { + if err == nil { + return false + } + var de dbus.Error + if errors.As(err, &de) { + for _, b := range de.Body { + if s, ok := b.(string); ok && strings.Contains(s, code) { + return true + } + } + } + return strings.Contains(err.Error(), code) +} diff --git a/client/firewall/firewalld/firewalld_linux_test.go b/client/firewall/firewalld/firewalld_linux_test.go new file mode 100644 index 000000000..d812745fc --- /dev/null +++ b/client/firewall/firewalld/firewalld_linux_test.go @@ -0,0 +1,49 @@ +//go:build linux + +package firewalld + +import ( + "errors" + "testing" + + "github.com/godbus/dbus/v5" +) + +func TestDBusErrContains(t *testing.T) { + tests := []struct { + name string + err error + code string + want bool + }{ + {"nil error", nil, errZoneAlreadySet, false}, + {"plain error match", errors.New("ZONE_ALREADY_SET: wt0"), errZoneAlreadySet, true}, + {"plain error miss", errors.New("something else"), errZoneAlreadySet, false}, + { + "dbus.Error body match", + dbus.Error{Name: "org.fedoraproject.FirewallD1.Exception", Body: []any{"ZONE_ALREADY_SET: wt0"}}, + errZoneAlreadySet, + true, + }, + { + "dbus.Error body miss", + dbus.Error{Name: "org.fedoraproject.FirewallD1.Exception", Body: []any{"INVALID_INTERFACE"}}, + errAlreadyEnabled, + false, + }, + { + "dbus.Error non-string body falls back to Error()", + dbus.Error{Name: "x", Body: []any{123}}, + "x", + true, + }, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + got := dbusErrContains(tc.err, tc.code) + if got != tc.want { + t.Fatalf("dbusErrContains(%v, %q) = %v; want %v", tc.err, tc.code, got, tc.want) + } + }) + } +} diff --git a/client/firewall/firewalld/firewalld_other.go b/client/firewall/firewalld/firewalld_other.go new file mode 100644 index 000000000..cfa28221d --- /dev/null +++ b/client/firewall/firewalld/firewalld_other.go @@ -0,0 +1,25 @@ +//go:build !linux + +package firewalld + +import "context" + +// SetParentContext is a no-op on non-Linux platforms because firewalld only +// runs on Linux. +func SetParentContext(context.Context) { + // intentionally empty: firewalld is a Linux-only daemon +} + +// TrustInterface is a no-op on non-Linux platforms because firewalld only +// runs on Linux. +func TrustInterface(string) error { + // intentionally empty: firewalld is a Linux-only daemon + return nil +} + +// UntrustInterface is a no-op on non-Linux platforms because firewalld only +// runs on Linux. +func UntrustInterface(string) error { + // intentionally empty: firewalld is a Linux-only daemon + return nil +} diff --git a/client/firewall/iptables/manager_linux.go b/client/firewall/iptables/manager_linux.go index a1d4467d5..7d8cd7f8c 100644 --- a/client/firewall/iptables/manager_linux.go +++ b/client/firewall/iptables/manager_linux.go @@ -12,6 +12,7 @@ import ( log "github.com/sirupsen/logrus" nberrors "github.com/netbirdio/netbird/client/errors" + "github.com/netbirdio/netbird/client/firewall/firewalld" firewall "github.com/netbirdio/netbird/client/firewall/manager" "github.com/netbirdio/netbird/client/iface/wgaddr" "github.com/netbirdio/netbird/client/internal/statemanager" @@ -86,6 +87,12 @@ func (m *Manager) Init(stateManager *statemanager.Manager) error { log.Warnf("raw table not available, notrack rules will be disabled: %v", err) } + // Trust after all fatal init steps so a later failure doesn't leave the + // interface in firewalld's trusted zone without a corresponding Close. + if err := firewalld.TrustInterface(m.wgIface.Name()); err != nil { + log.Warnf("failed to trust interface in firewalld: %v", err) + } + // persist early to ensure cleanup of chains go func() { if err := stateManager.PersistState(context.Background()); err != nil { @@ -191,6 +198,12 @@ func (m *Manager) Close(stateManager *statemanager.Manager) error { merr = multierror.Append(merr, fmt.Errorf("reset router: %w", err)) } + // Appending to merr intentionally blocks DeleteState below so ShutdownState + // stays persisted and the crash-recovery path retries firewalld cleanup. + if err := firewalld.UntrustInterface(m.wgIface.Name()); err != nil { + merr = multierror.Append(merr, err) + } + // attempt to delete state only if all other operations succeeded if merr == nil { if err := stateManager.DeleteState(&ShutdownState{}); err != nil { @@ -217,6 +230,11 @@ func (m *Manager) AllowNetbird() error { if err != nil { return fmt.Errorf("allow netbird interface traffic: %w", err) } + + if err := firewalld.TrustInterface(m.wgIface.Name()); err != nil { + log.Warnf("failed to trust interface in firewalld: %v", err) + } + return nil } diff --git a/client/firewall/nftables/manager_linux.go b/client/firewall/nftables/manager_linux.go index 0b5b61e04..8cd5cc6b3 100644 --- a/client/firewall/nftables/manager_linux.go +++ b/client/firewall/nftables/manager_linux.go @@ -14,6 +14,7 @@ import ( log "github.com/sirupsen/logrus" "golang.org/x/sys/unix" + "github.com/netbirdio/netbird/client/firewall/firewalld" firewall "github.com/netbirdio/netbird/client/firewall/manager" "github.com/netbirdio/netbird/client/iface/wgaddr" "github.com/netbirdio/netbird/client/internal/statemanager" @@ -217,6 +218,10 @@ func (m *Manager) AllowNetbird() error { return fmt.Errorf("flush allow input netbird rules: %w", err) } + if err := firewalld.TrustInterface(m.wgIface.Name()); err != nil { + log.Warnf("failed to trust interface in firewalld: %v", err) + } + return nil } diff --git a/client/firewall/nftables/router_linux.go b/client/firewall/nftables/router_linux.go index 904daf7cb..8cc0d2792 100644 --- a/client/firewall/nftables/router_linux.go +++ b/client/firewall/nftables/router_linux.go @@ -19,6 +19,7 @@ import ( "golang.org/x/sys/unix" nberrors "github.com/netbirdio/netbird/client/errors" + "github.com/netbirdio/netbird/client/firewall/firewalld" firewall "github.com/netbirdio/netbird/client/firewall/manager" nbid "github.com/netbirdio/netbird/client/internal/acl/id" "github.com/netbirdio/netbird/client/internal/routemanager/ipfwdstate" @@ -40,6 +41,8 @@ const ( chainNameForward = "FORWARD" chainNameMangleForward = "netbird-mangle-forward" + firewalldTableName = "firewalld" + userDataAcceptForwardRuleIif = "frwacceptiif" userDataAcceptForwardRuleOif = "frwacceptoif" userDataAcceptInputRule = "inputaccept" @@ -133,6 +136,10 @@ func (r *router) Reset() error { merr = multierror.Append(merr, fmt.Errorf("remove accept filter rules: %w", err)) } + if err := firewalld.UntrustInterface(r.wgIface.Name()); err != nil { + merr = multierror.Append(merr, err) + } + if err := r.removeNatPreroutingRules(); err != nil { merr = multierror.Append(merr, fmt.Errorf("remove filter prerouting rules: %w", err)) } @@ -280,6 +287,10 @@ func (r *router) createContainers() error { log.Errorf("failed to add accept rules for the forward chain: %s", err) } + if err := firewalld.TrustInterface(r.wgIface.Name()); err != nil { + log.Warnf("failed to trust interface in firewalld: %v", err) + } + if err := r.refreshRulesMap(); err != nil { log.Errorf("failed to refresh rules: %s", err) } @@ -1319,6 +1330,13 @@ func (r *router) isExternalChain(chain *nftables.Chain) bool { return false } + // Skip firewalld-owned chains. Firewalld creates its chains with the + // NFT_CHAIN_OWNER flag, so inserting rules into them returns EPERM. + // We delegate acceptance to firewalld by trusting the interface instead. + if chain.Table.Name == firewalldTableName { + return false + } + // Skip all iptables-managed tables in the ip family if chain.Table.Family == nftables.TableFamilyIPv4 && isIptablesTable(chain.Table.Name) { return false diff --git a/client/firewall/uspfilter/allow_netbird.go b/client/firewall/uspfilter/allow_netbird.go index 6a6533344..b120cdf12 100644 --- a/client/firewall/uspfilter/allow_netbird.go +++ b/client/firewall/uspfilter/allow_netbird.go @@ -3,6 +3,9 @@ package uspfilter import ( + log "github.com/sirupsen/logrus" + + "github.com/netbirdio/netbird/client/firewall/firewalld" "github.com/netbirdio/netbird/client/internal/statemanager" ) @@ -16,6 +19,9 @@ func (m *Manager) Close(stateManager *statemanager.Manager) error { if m.nativeFirewall != nil { return m.nativeFirewall.Close(stateManager) } + if err := firewalld.UntrustInterface(m.wgIface.Name()); err != nil { + log.Warnf("failed to untrust interface in firewalld: %v", err) + } return nil } @@ -24,5 +30,8 @@ func (m *Manager) AllowNetbird() error { if m.nativeFirewall != nil { return m.nativeFirewall.AllowNetbird() } + if err := firewalld.TrustInterface(m.wgIface.Name()); err != nil { + log.Warnf("failed to trust interface in firewalld: %v", err) + } return nil } diff --git a/client/firewall/uspfilter/common/iface.go b/client/firewall/uspfilter/common/iface.go index 7296953db..9c06eb3f7 100644 --- a/client/firewall/uspfilter/common/iface.go +++ b/client/firewall/uspfilter/common/iface.go @@ -9,6 +9,7 @@ import ( // IFaceMapper defines subset methods of interface required for manager type IFaceMapper interface { + Name() string SetFilter(device.PacketFilter) error Address() wgaddr.Address GetWGDevice() *wgdevice.Device diff --git a/client/firewall/uspfilter/filter_test.go b/client/firewall/uspfilter/filter_test.go index 39e8efa2c..5fb9fef0e 100644 --- a/client/firewall/uspfilter/filter_test.go +++ b/client/firewall/uspfilter/filter_test.go @@ -31,12 +31,20 @@ var logger = log.NewFromLogrus(logrus.StandardLogger()) var flowLogger = netflow.NewManager(nil, []byte{}, nil).GetLogger() type IFaceMock struct { + NameFunc func() string SetFilterFunc func(device.PacketFilter) error AddressFunc func() wgaddr.Address GetWGDeviceFunc func() *wgdevice.Device GetDeviceFunc func() *device.FilteredDevice } +func (i *IFaceMock) Name() string { + if i.NameFunc == nil { + return "wgtest" + } + return i.NameFunc() +} + func (i *IFaceMock) GetWGDevice() *wgdevice.Device { if i.GetWGDeviceFunc == nil { return nil diff --git a/client/internal/engine.go b/client/internal/engine.go index 09d80a87d..8d7e02bd5 100644 --- a/client/internal/engine.go +++ b/client/internal/engine.go @@ -26,6 +26,7 @@ import ( nberrors "github.com/netbirdio/netbird/client/errors" "github.com/netbirdio/netbird/client/firewall" + "github.com/netbirdio/netbird/client/firewall/firewalld" firewallManager "github.com/netbirdio/netbird/client/firewall/manager" "github.com/netbirdio/netbird/client/iface" "github.com/netbirdio/netbird/client/iface/device" @@ -604,6 +605,8 @@ func (e *Engine) createFirewall() error { return nil } + firewalld.SetParentContext(e.ctx) + var err error e.firewall, err = firewall.NewFirewall(e.wgInterface, e.stateManager, e.flowManager.GetLogger(), e.config.DisableServerRoutes, e.config.MTU) if err != nil { From eb3aa96257eee3a6dcef868e3d8816286f242c2e Mon Sep 17 00:00:00 2001 From: Vlad <4941176+crn4@users.noreply.github.com> Date: Tue, 21 Apr 2026 18:37:04 +0200 Subject: [PATCH 11/25] [management] check policy for changes before actual db update (#5405) --- management/server/policy.go | 114 +++++++----- management/server/types/policy.go | 38 ++++ management/server/types/policy_test.go | 193 ++++++++++++++++++++ management/server/types/policyrule.go | 105 +++++++++++ management/server/types/policyrule_test.go | 194 +++++++++++++++++++++ 5 files changed, 600 insertions(+), 44 deletions(-) create mode 100644 management/server/types/policy_test.go create mode 100644 management/server/types/policyrule_test.go diff --git a/management/server/policy.go b/management/server/policy.go index 3e84c3d10..48297ca11 100644 --- a/management/server/policy.go +++ b/management/server/policy.go @@ -5,6 +5,7 @@ import ( _ "embed" "github.com/rs/xid" + "github.com/sirupsen/logrus" "github.com/netbirdio/netbird/management/server/permissions/modules" "github.com/netbirdio/netbird/management/server/permissions/operations" @@ -46,25 +47,40 @@ func (am *DefaultAccountManager) SavePolicy(ctx context.Context, accountID, user var isUpdate = policy.ID != "" var updateAccountPeers bool var action = activity.PolicyAdded + var unchanged bool err = am.Store.ExecuteInTransaction(ctx, func(transaction store.Store) error { - if err = validatePolicy(ctx, transaction, accountID, policy); err != nil { - return err - } - - updateAccountPeers, err = arePolicyChangesAffectPeers(ctx, transaction, accountID, policy, isUpdate) + existingPolicy, err := validatePolicy(ctx, transaction, accountID, policy) if err != nil { return err } - saveFunc := transaction.CreatePolicy if isUpdate { - action = activity.PolicyUpdated - saveFunc = transaction.SavePolicy - } + if policy.Equal(existingPolicy) { + logrus.WithContext(ctx).Tracef("policy update skipped because equal to stored one - policy id %s", policy.ID) + unchanged = true + return nil + } - if err = saveFunc(ctx, policy); err != nil { - return err + action = activity.PolicyUpdated + + updateAccountPeers, err = arePolicyChangesAffectPeersWithExisting(ctx, transaction, policy, existingPolicy) + if err != nil { + return err + } + + if err = transaction.SavePolicy(ctx, policy); err != nil { + return err + } + } else { + updateAccountPeers, err = arePolicyChangesAffectPeers(ctx, transaction, policy) + if err != nil { + return err + } + + if err = transaction.CreatePolicy(ctx, policy); err != nil { + return err + } } return transaction.IncrementNetworkSerial(ctx, accountID) @@ -73,6 +89,10 @@ func (am *DefaultAccountManager) SavePolicy(ctx context.Context, accountID, user return nil, err } + if unchanged { + return policy, nil + } + am.StoreEvent(ctx, userID, policy.ID, accountID, action, policy.EventMeta()) if updateAccountPeers { @@ -101,7 +121,7 @@ func (am *DefaultAccountManager) DeletePolicy(ctx context.Context, accountID, po return err } - updateAccountPeers, err = arePolicyChangesAffectPeers(ctx, transaction, accountID, policy, false) + updateAccountPeers, err = arePolicyChangesAffectPeers(ctx, transaction, policy) if err != nil { return err } @@ -138,34 +158,37 @@ func (am *DefaultAccountManager) ListPolicies(ctx context.Context, accountID, us return am.Store.GetAccountPolicies(ctx, store.LockingStrengthNone, accountID) } -// arePolicyChangesAffectPeers checks if changes to a policy will affect any associated peers. -func arePolicyChangesAffectPeers(ctx context.Context, transaction store.Store, accountID string, policy *types.Policy, isUpdate bool) (bool, error) { - if isUpdate { - existingPolicy, err := transaction.GetPolicyByID(ctx, store.LockingStrengthNone, accountID, policy.ID) - if err != nil { - return false, err - } - - if !policy.Enabled && !existingPolicy.Enabled { - return false, nil - } - - for _, rule := range existingPolicy.Rules { - if rule.SourceResource.Type != "" || rule.DestinationResource.Type != "" { - return true, nil - } - } - - hasPeers, err := anyGroupHasPeersOrResources(ctx, transaction, policy.AccountID, existingPolicy.RuleGroups()) - if err != nil { - return false, err - } - - if hasPeers { +// arePolicyChangesAffectPeers checks if a policy (being created or deleted) will affect any associated peers. +func arePolicyChangesAffectPeers(ctx context.Context, transaction store.Store, policy *types.Policy) (bool, error) { + for _, rule := range policy.Rules { + if rule.SourceResource.Type != "" || rule.DestinationResource.Type != "" { return true, nil } } + return anyGroupHasPeersOrResources(ctx, transaction, policy.AccountID, policy.RuleGroups()) +} + +func arePolicyChangesAffectPeersWithExisting(ctx context.Context, transaction store.Store, policy *types.Policy, existingPolicy *types.Policy) (bool, error) { + if !policy.Enabled && !existingPolicy.Enabled { + return false, nil + } + + for _, rule := range existingPolicy.Rules { + if rule.SourceResource.Type != "" || rule.DestinationResource.Type != "" { + return true, nil + } + } + + hasPeers, err := anyGroupHasPeersOrResources(ctx, transaction, policy.AccountID, existingPolicy.RuleGroups()) + if err != nil { + return false, err + } + + if hasPeers { + return true, nil + } + for _, rule := range policy.Rules { if rule.SourceResource.Type != "" || rule.DestinationResource.Type != "" { return true, nil @@ -175,12 +198,15 @@ func arePolicyChangesAffectPeers(ctx context.Context, transaction store.Store, a return anyGroupHasPeersOrResources(ctx, transaction, policy.AccountID, policy.RuleGroups()) } -// validatePolicy validates the policy and its rules. -func validatePolicy(ctx context.Context, transaction store.Store, accountID string, policy *types.Policy) error { +// validatePolicy validates the policy and its rules. For updates it returns +// the existing policy loaded from the store so callers can avoid a second read. +func validatePolicy(ctx context.Context, transaction store.Store, accountID string, policy *types.Policy) (*types.Policy, error) { + var existingPolicy *types.Policy if policy.ID != "" { - existingPolicy, err := transaction.GetPolicyByID(ctx, store.LockingStrengthNone, accountID, policy.ID) + var err error + existingPolicy, err = transaction.GetPolicyByID(ctx, store.LockingStrengthNone, accountID, policy.ID) if err != nil { - return err + return nil, err } // TODO: Refactor to support multiple rules per policy @@ -191,7 +217,7 @@ func validatePolicy(ctx context.Context, transaction store.Store, accountID stri for _, rule := range policy.Rules { if rule.ID != "" && !existingRuleIDs[rule.ID] { - return status.Errorf(status.InvalidArgument, "invalid rule ID: %s", rule.ID) + return nil, status.Errorf(status.InvalidArgument, "invalid rule ID: %s", rule.ID) } } } else { @@ -201,12 +227,12 @@ func validatePolicy(ctx context.Context, transaction store.Store, accountID stri groups, err := transaction.GetGroupsByIDs(ctx, store.LockingStrengthNone, accountID, policy.RuleGroups()) if err != nil { - return err + return nil, err } postureChecks, err := transaction.GetPostureChecksByIDs(ctx, store.LockingStrengthNone, accountID, policy.SourcePostureChecks) if err != nil { - return err + return nil, err } for i, rule := range policy.Rules { @@ -225,7 +251,7 @@ func validatePolicy(ctx context.Context, transaction store.Store, accountID stri policy.SourcePostureChecks = getValidPostureCheckIDs(postureChecks, policy.SourcePostureChecks) } - return nil + return existingPolicy, nil } // getValidPostureCheckIDs filters and returns only the valid posture check IDs from the provided list. diff --git a/management/server/types/policy.go b/management/server/types/policy.go index d4e1a8816..d410aec8d 100644 --- a/management/server/types/policy.go +++ b/management/server/types/policy.go @@ -93,6 +93,44 @@ func (p *Policy) Copy() *Policy { return c } +func (p *Policy) Equal(other *Policy) bool { + if p == nil || other == nil { + return p == other + } + + if p.ID != other.ID || + p.AccountID != other.AccountID || + p.Name != other.Name || + p.Description != other.Description || + p.Enabled != other.Enabled { + return false + } + + if !stringSlicesEqualUnordered(p.SourcePostureChecks, other.SourcePostureChecks) { + return false + } + + if len(p.Rules) != len(other.Rules) { + return false + } + + otherRules := make(map[string]*PolicyRule, len(other.Rules)) + for _, r := range other.Rules { + otherRules[r.ID] = r + } + for _, r := range p.Rules { + otherRule, ok := otherRules[r.ID] + if !ok { + return false + } + if !r.Equal(otherRule) { + return false + } + } + + return true +} + // EventMeta returns activity event meta related to this policy func (p *Policy) EventMeta() map[string]any { return map[string]any{"name": p.Name} diff --git a/management/server/types/policy_test.go b/management/server/types/policy_test.go new file mode 100644 index 000000000..b1d7aabc2 --- /dev/null +++ b/management/server/types/policy_test.go @@ -0,0 +1,193 @@ +package types + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestPolicyEqual_SameRulesDifferentOrder(t *testing.T) { + a := &Policy{ + ID: "pol1", + AccountID: "acc1", + Name: "test", + Enabled: true, + Rules: []*PolicyRule{ + {ID: "r1", PolicyID: "pol1", Ports: []string{"80"}}, + {ID: "r2", PolicyID: "pol1", Ports: []string{"443"}}, + }, + } + b := &Policy{ + ID: "pol1", + AccountID: "acc1", + Name: "test", + Enabled: true, + Rules: []*PolicyRule{ + {ID: "r2", PolicyID: "pol1", Ports: []string{"443"}}, + {ID: "r1", PolicyID: "pol1", Ports: []string{"80"}}, + }, + } + assert.True(t, a.Equal(b)) +} + +func TestPolicyEqual_DifferentRules(t *testing.T) { + a := &Policy{ + ID: "pol1", + Enabled: true, + Rules: []*PolicyRule{ + {ID: "r1", PolicyID: "pol1", Ports: []string{"80"}}, + }, + } + b := &Policy{ + ID: "pol1", + Enabled: true, + Rules: []*PolicyRule{ + {ID: "r1", PolicyID: "pol1", Ports: []string{"443"}}, + }, + } + assert.False(t, a.Equal(b)) +} + +func TestPolicyEqual_DifferentRuleCount(t *testing.T) { + a := &Policy{ + ID: "pol1", + Rules: []*PolicyRule{ + {ID: "r1", PolicyID: "pol1"}, + }, + } + b := &Policy{ + ID: "pol1", + Rules: []*PolicyRule{ + {ID: "r1", PolicyID: "pol1"}, + {ID: "r2", PolicyID: "pol1"}, + }, + } + assert.False(t, a.Equal(b)) +} + +func TestPolicyEqual_PostureChecksDifferentOrder(t *testing.T) { + a := &Policy{ + ID: "pol1", + SourcePostureChecks: []string{"pc3", "pc1", "pc2"}, + } + b := &Policy{ + ID: "pol1", + SourcePostureChecks: []string{"pc1", "pc2", "pc3"}, + } + assert.True(t, a.Equal(b)) +} + +func TestPolicyEqual_DifferentPostureChecks(t *testing.T) { + a := &Policy{ + ID: "pol1", + SourcePostureChecks: []string{"pc1", "pc2"}, + } + b := &Policy{ + ID: "pol1", + SourcePostureChecks: []string{"pc1", "pc3"}, + } + assert.False(t, a.Equal(b)) +} + +func TestPolicyEqual_DifferentScalarFields(t *testing.T) { + base := Policy{ + ID: "pol1", + AccountID: "acc1", + Name: "test", + Description: "desc", + Enabled: true, + } + + other := base + other.Name = "changed" + assert.False(t, base.Equal(&other)) + + other = base + other.Enabled = false + assert.False(t, base.Equal(&other)) + + other = base + other.Description = "changed" + assert.False(t, base.Equal(&other)) +} + +func TestPolicyEqual_NilCases(t *testing.T) { + var a *Policy + var b *Policy + assert.True(t, a.Equal(b)) + + a = &Policy{ID: "pol1"} + assert.False(t, a.Equal(nil)) +} + +func TestPolicyEqual_RulesMismatchByID(t *testing.T) { + a := &Policy{ + ID: "pol1", + Rules: []*PolicyRule{ + {ID: "r1", PolicyID: "pol1"}, + }, + } + b := &Policy{ + ID: "pol1", + Rules: []*PolicyRule{ + {ID: "r2", PolicyID: "pol1"}, + }, + } + assert.False(t, a.Equal(b)) +} + +func TestPolicyEqual_FullScenario(t *testing.T) { + a := &Policy{ + ID: "pol1", + AccountID: "acc1", + Name: "Web Access", + Description: "Allow web access", + Enabled: true, + SourcePostureChecks: []string{"pc2", "pc1"}, + Rules: []*PolicyRule{ + { + ID: "r1", + PolicyID: "pol1", + Name: "HTTP", + Enabled: true, + Action: PolicyTrafficActionAccept, + Protocol: PolicyRuleProtocolTCP, + Bidirectional: true, + Sources: []string{"g2", "g1"}, + Destinations: []string{"g4", "g3"}, + Ports: []string{"443", "80", "8080"}, + PortRanges: []RulePortRange{ + {Start: 8000, End: 9000}, + {Start: 80, End: 80}, + }, + }, + }, + } + b := &Policy{ + ID: "pol1", + AccountID: "acc1", + Name: "Web Access", + Description: "Allow web access", + Enabled: true, + SourcePostureChecks: []string{"pc1", "pc2"}, + Rules: []*PolicyRule{ + { + ID: "r1", + PolicyID: "pol1", + Name: "HTTP", + Enabled: true, + Action: PolicyTrafficActionAccept, + Protocol: PolicyRuleProtocolTCP, + Bidirectional: true, + Sources: []string{"g1", "g2"}, + Destinations: []string{"g3", "g4"}, + Ports: []string{"80", "8080", "443"}, + PortRanges: []RulePortRange{ + {Start: 80, End: 80}, + {Start: 8000, End: 9000}, + }, + }, + }, + } + assert.True(t, a.Equal(b)) +} diff --git a/management/server/types/policyrule.go b/management/server/types/policyrule.go index bb75dd555..52c494a6a 100644 --- a/management/server/types/policyrule.go +++ b/management/server/types/policyrule.go @@ -1,6 +1,8 @@ package types import ( + "slices" + "github.com/netbirdio/netbird/shared/management/proto" ) @@ -118,3 +120,106 @@ func (pm *PolicyRule) Copy() *PolicyRule { } return rule } + +func (pm *PolicyRule) Equal(other *PolicyRule) bool { + if pm == nil || other == nil { + return pm == other + } + + if pm.ID != other.ID || + pm.PolicyID != other.PolicyID || + pm.Name != other.Name || + pm.Description != other.Description || + pm.Enabled != other.Enabled || + pm.Action != other.Action || + pm.Bidirectional != other.Bidirectional || + pm.Protocol != other.Protocol || + pm.SourceResource != other.SourceResource || + pm.DestinationResource != other.DestinationResource || + pm.AuthorizedUser != other.AuthorizedUser { + return false + } + + if !stringSlicesEqualUnordered(pm.Sources, other.Sources) { + return false + } + if !stringSlicesEqualUnordered(pm.Destinations, other.Destinations) { + return false + } + if !stringSlicesEqualUnordered(pm.Ports, other.Ports) { + return false + } + if !portRangeSlicesEqualUnordered(pm.PortRanges, other.PortRanges) { + return false + } + if !authorizedGroupsEqual(pm.AuthorizedGroups, other.AuthorizedGroups) { + return false + } + + return true +} + +func stringSlicesEqualUnordered(a, b []string) bool { + if len(a) != len(b) { + return false + } + if len(a) == 0 { + return true + } + sorted1 := make([]string, len(a)) + sorted2 := make([]string, len(b)) + copy(sorted1, a) + copy(sorted2, b) + slices.Sort(sorted1) + slices.Sort(sorted2) + return slices.Equal(sorted1, sorted2) +} + +func portRangeSlicesEqualUnordered(a, b []RulePortRange) bool { + if len(a) != len(b) { + return false + } + if len(a) == 0 { + return true + } + cmp := func(x, y RulePortRange) int { + if x.Start != y.Start { + if x.Start < y.Start { + return -1 + } + return 1 + } + if x.End != y.End { + if x.End < y.End { + return -1 + } + return 1 + } + return 0 + } + sorted1 := make([]RulePortRange, len(a)) + sorted2 := make([]RulePortRange, len(b)) + copy(sorted1, a) + copy(sorted2, b) + slices.SortFunc(sorted1, cmp) + slices.SortFunc(sorted2, cmp) + return slices.EqualFunc(sorted1, sorted2, func(x, y RulePortRange) bool { + return x.Start == y.Start && x.End == y.End + }) +} + +func authorizedGroupsEqual(a, b map[string][]string) bool { + if len(a) != len(b) { + return false + } + for k, va := range a { + vb, ok := b[k] + if !ok { + return false + } + if !stringSlicesEqualUnordered(va, vb) { + return false + } + } + return true +} diff --git a/management/server/types/policyrule_test.go b/management/server/types/policyrule_test.go new file mode 100644 index 000000000..816e72abb --- /dev/null +++ b/management/server/types/policyrule_test.go @@ -0,0 +1,194 @@ +package types + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestPolicyRuleEqual_SamePortsDifferentOrder(t *testing.T) { + a := &PolicyRule{ + ID: "rule1", + PolicyID: "pol1", + Ports: []string{"443", "80", "22"}, + } + b := &PolicyRule{ + ID: "rule1", + PolicyID: "pol1", + Ports: []string{"22", "443", "80"}, + } + assert.True(t, a.Equal(b)) +} + +func TestPolicyRuleEqual_DifferentPorts(t *testing.T) { + a := &PolicyRule{ + ID: "rule1", + PolicyID: "pol1", + Ports: []string{"443", "80"}, + } + b := &PolicyRule{ + ID: "rule1", + PolicyID: "pol1", + Ports: []string{"443", "22"}, + } + assert.False(t, a.Equal(b)) +} + +func TestPolicyRuleEqual_SourcesDestinationsDifferentOrder(t *testing.T) { + a := &PolicyRule{ + ID: "rule1", + PolicyID: "pol1", + Sources: []string{"g1", "g2", "g3"}, + Destinations: []string{"g4", "g5"}, + } + b := &PolicyRule{ + ID: "rule1", + PolicyID: "pol1", + Sources: []string{"g3", "g1", "g2"}, + Destinations: []string{"g5", "g4"}, + } + assert.True(t, a.Equal(b)) +} + +func TestPolicyRuleEqual_DifferentSources(t *testing.T) { + a := &PolicyRule{ + ID: "rule1", + PolicyID: "pol1", + Sources: []string{"g1", "g2"}, + } + b := &PolicyRule{ + ID: "rule1", + PolicyID: "pol1", + Sources: []string{"g1", "g3"}, + } + assert.False(t, a.Equal(b)) +} + +func TestPolicyRuleEqual_PortRangesDifferentOrder(t *testing.T) { + a := &PolicyRule{ + ID: "rule1", + PolicyID: "pol1", + PortRanges: []RulePortRange{ + {Start: 8000, End: 9000}, + {Start: 80, End: 80}, + }, + } + b := &PolicyRule{ + ID: "rule1", + PolicyID: "pol1", + PortRanges: []RulePortRange{ + {Start: 80, End: 80}, + {Start: 8000, End: 9000}, + }, + } + assert.True(t, a.Equal(b)) +} + +func TestPolicyRuleEqual_DifferentPortRanges(t *testing.T) { + a := &PolicyRule{ + ID: "rule1", + PolicyID: "pol1", + PortRanges: []RulePortRange{ + {Start: 80, End: 80}, + }, + } + b := &PolicyRule{ + ID: "rule1", + PolicyID: "pol1", + PortRanges: []RulePortRange{ + {Start: 80, End: 443}, + }, + } + assert.False(t, a.Equal(b)) +} + +func TestPolicyRuleEqual_AuthorizedGroupsDifferentValueOrder(t *testing.T) { + a := &PolicyRule{ + ID: "rule1", + PolicyID: "pol1", + AuthorizedGroups: map[string][]string{ + "g1": {"u1", "u2", "u3"}, + }, + } + b := &PolicyRule{ + ID: "rule1", + PolicyID: "pol1", + AuthorizedGroups: map[string][]string{ + "g1": {"u3", "u1", "u2"}, + }, + } + assert.True(t, a.Equal(b)) +} + +func TestPolicyRuleEqual_DifferentAuthorizedGroups(t *testing.T) { + a := &PolicyRule{ + ID: "rule1", + PolicyID: "pol1", + AuthorizedGroups: map[string][]string{ + "g1": {"u1"}, + }, + } + b := &PolicyRule{ + ID: "rule1", + PolicyID: "pol1", + AuthorizedGroups: map[string][]string{ + "g2": {"u1"}, + }, + } + assert.False(t, a.Equal(b)) +} + +func TestPolicyRuleEqual_DifferentScalarFields(t *testing.T) { + base := PolicyRule{ + ID: "rule1", + PolicyID: "pol1", + Name: "test", + Description: "desc", + Enabled: true, + Action: PolicyTrafficActionAccept, + Bidirectional: true, + Protocol: PolicyRuleProtocolTCP, + } + + other := base + other.Name = "changed" + assert.False(t, base.Equal(&other)) + + other = base + other.Enabled = false + assert.False(t, base.Equal(&other)) + + other = base + other.Action = PolicyTrafficActionDrop + assert.False(t, base.Equal(&other)) + + other = base + other.Protocol = PolicyRuleProtocolUDP + assert.False(t, base.Equal(&other)) +} + +func TestPolicyRuleEqual_NilCases(t *testing.T) { + var a *PolicyRule + var b *PolicyRule + assert.True(t, a.Equal(b)) + + a = &PolicyRule{ID: "rule1"} + assert.False(t, a.Equal(nil)) +} + +func TestPolicyRuleEqual_EmptySlices(t *testing.T) { + a := &PolicyRule{ + ID: "rule1", + PolicyID: "pol1", + Ports: []string{}, + Sources: nil, + } + b := &PolicyRule{ + ID: "rule1", + PolicyID: "pol1", + Ports: nil, + Sources: []string{}, + } + assert.True(t, a.Equal(b)) +} + From 2fb50aef6be47eca9158750f1195d2e5194fb867 Mon Sep 17 00:00:00 2001 From: Zoltan Papp Date: Tue, 21 Apr 2026 19:05:58 +0200 Subject: [PATCH 12/25] [client] allow UDP packet loss in TestICEBind_HandlesConcurrentMixedTraffic (#5953) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The test writes 500 packets per family and asserted exact-count delivery within a 5s window, even though its own comment says "Some packet loss is acceptable for UDP". On FreeBSD/QEMU runners the writer loops cannot always finish all 500 before the 5s deadline closes the readers (we have seen 411/500 in CI). The real assertion of this test is the routing check — IPv4 peer only gets v4- packets, IPv6 peer only gets v6- packets — which remains strict. Replace the exact-count assertions with a >=80% delivery threshold so runner speed variance no longer causes false failures. --- client/iface/bind/ice_bind_test.go | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/client/iface/bind/ice_bind_test.go b/client/iface/bind/ice_bind_test.go index 1fdd955c9..f49e68508 100644 --- a/client/iface/bind/ice_bind_test.go +++ b/client/iface/bind/ice_bind_test.go @@ -239,8 +239,12 @@ func TestICEBind_HandlesConcurrentMixedTraffic(t *testing.T) { ipv6Count++ } - assert.Equal(t, packetsPerFamily, ipv4Count) - assert.Equal(t, packetsPerFamily, ipv6Count) + // Allow some UDP packet loss under load (e.g. FreeBSD/QEMU runners). The + // routing-correctness checks above are the real assertions; the counts + // are a sanity bound to catch a totally silent path. + minDelivered := packetsPerFamily * 80 / 100 + assert.GreaterOrEqual(t, ipv4Count, minDelivered, "IPv4 delivery below threshold") + assert.GreaterOrEqual(t, ipv6Count, minDelivered, "IPv6 delivery below threshold") } func TestICEBind_DetectsAddressFamilyFromConnection(t *testing.T) { From 703353d3540cf4507475b7a0c614a6431ae9ee1c Mon Sep 17 00:00:00 2001 From: Zoltan Papp Date: Tue, 21 Apr 2026 19:06:47 +0200 Subject: [PATCH 13/25] [flow] fix goroutine leak in TestReceive_ProtocolErrorStreamReconnect (#5951) The Receive goroutine could outlive the test and call t.Logf after teardown, panicking with "Log in goroutine after ... has completed". Register a cleanup that waits for the goroutine to exit; ordering is LIFO so it runs after client.Close, which is what unblocks Receive. --- flow/client/client_test.go | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/flow/client/client_test.go b/flow/client/client_test.go index 55157acbc..c8f5f4af4 100644 --- a/flow/client/client_test.go +++ b/flow/client/client_test.go @@ -457,6 +457,18 @@ func TestReceive_ProtocolErrorStreamReconnect(t *testing.T) { client, err := flow.NewClient("http://"+server.addr, "test-payload", "test-signature", 1*time.Second) require.NoError(t, err) + + // Cleanups run LIFO: the goroutine-drain registered here runs after Close below, + // which is when Receive has actually returned. Without this, the Receive goroutine + // can outlive the test and call t.Logf after teardown, panicking. + receiveDone := make(chan struct{}) + t.Cleanup(func() { + select { + case <-receiveDone: + case <-time.After(2 * time.Second): + t.Error("Receive goroutine did not exit after Close") + } + }) t.Cleanup(func() { err := client.Close() assert.NoError(t, err, "failed to close flow") @@ -468,6 +480,7 @@ func TestReceive_ProtocolErrorStreamReconnect(t *testing.T) { receivedAfterReconnect := make(chan struct{}) go func() { + defer close(receiveDone) err := client.Receive(ctx, 1*time.Second, func(msg *proto.FlowEventAck) error { if msg.IsInitiator || len(msg.EventId) == 0 { return nil From 1165058fad020a42d6cdd72f5ab1c7248ee2c3ce Mon Sep 17 00:00:00 2001 From: Zoltan Papp Date: Tue, 21 Apr 2026 19:07:20 +0200 Subject: [PATCH 14/25] [client] fix port collision in TestUpload (#5950) * [debug] fix port collision in TestUpload TestUpload hardcoded :8080, so it failed deterministically when anything was already on that port and collided across concurrent test runs. Bind a :0 listener in the test to get a kernel-assigned free port, and add Server.Serve so tests can hand the listener in without reaching into unexported state. * [debug] drop test-only Server.Serve, use SERVER_ADDRESS env The previous commit added a Server.Serve method on the upload-server, used only by TestUpload. That left production with an unused function. Reserve an ephemeral loopback port in the test, release it, and pass the address through SERVER_ADDRESS (which the server already reads). A small wait helper ensures the server is accepting connections before the upload runs, so the close/rebind gap does not cause a false failure. --- client/internal/debug/upload_test.go | 34 +++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/client/internal/debug/upload_test.go b/client/internal/debug/upload_test.go index e833c196d..f224b8d3f 100644 --- a/client/internal/debug/upload_test.go +++ b/client/internal/debug/upload_test.go @@ -3,10 +3,12 @@ package debug import ( "context" "errors" + "net" "net/http" "os" "path/filepath" "testing" + "time" "github.com/stretchr/testify/require" @@ -19,8 +21,10 @@ func TestUpload(t *testing.T) { t.Skip("Skipping upload test on docker ci") } testDir := t.TempDir() - testURL := "http://localhost:8080" + addr := reserveLoopbackPort(t) + testURL := "http://" + addr t.Setenv("SERVER_URL", testURL) + t.Setenv("SERVER_ADDRESS", addr) t.Setenv("STORE_DIR", testDir) srv := server.NewServer() go func() { @@ -33,6 +37,7 @@ func TestUpload(t *testing.T) { t.Errorf("Failed to stop server: %v", err) } }) + waitForServer(t, addr) file := filepath.Join(t.TempDir(), "tmpfile") fileContent := []byte("test file content") @@ -47,3 +52,30 @@ func TestUpload(t *testing.T) { require.NoError(t, err) require.Equal(t, fileContent, createdFileContent) } + +// reserveLoopbackPort binds an ephemeral port on loopback to learn a free +// address, then releases it so the server under test can rebind. The close/ +// rebind window is racy in theory; on loopback with a kernel-assigned port +// it's essentially never contended in practice. +func reserveLoopbackPort(t *testing.T) string { + t.Helper() + l, err := net.Listen("tcp", "127.0.0.1:0") + require.NoError(t, err) + addr := l.Addr().String() + require.NoError(t, l.Close()) + return addr +} + +func waitForServer(t *testing.T, addr string) { + t.Helper() + deadline := time.Now().Add(5 * time.Second) + for time.Now().Before(deadline) { + c, err := net.DialTimeout("tcp", addr, 100*time.Millisecond) + if err == nil { + _ = c.Close() + return + } + time.Sleep(20 * time.Millisecond) + } + t.Fatalf("server did not start listening on %s in time", addr) +} From 57b23c5b25ebcc1eb560f354c1fc50aa1435beaa Mon Sep 17 00:00:00 2001 From: Bethuel Mmbaga Date: Tue, 21 Apr 2026 23:06:52 +0300 Subject: [PATCH 15/25] [management] Propagate context changes to upstream middleware (#5956) --- .../server/http/middleware/auth_middleware.go | 44 ++++++++++--------- .../server/telemetry/http_api_metrics.go | 16 ++----- 2 files changed, 27 insertions(+), 33 deletions(-) diff --git a/management/server/http/middleware/auth_middleware.go b/management/server/http/middleware/auth_middleware.go index 63be672e6..8106380f2 100644 --- a/management/server/http/middleware/auth_middleware.go +++ b/management/server/http/middleware/auth_middleware.go @@ -12,6 +12,7 @@ import ( "go.opentelemetry.io/otel/metric" "github.com/netbirdio/management-integrations/integrations" + serverauth "github.com/netbirdio/netbird/management/server/auth" nbcontext "github.com/netbirdio/netbird/management/server/context" "github.com/netbirdio/netbird/management/server/http/middleware/bypass" @@ -87,17 +88,14 @@ func (m *AuthMiddleware) Handler(h http.Handler) http.Handler { switch authType { case "bearer": - request, err := m.checkJWTFromRequest(r, authHeader) - if err != nil { + if err := m.checkJWTFromRequest(r, authHeader); err != nil { log.WithContext(r.Context()).Errorf("Error when validating JWT: %s", err.Error()) util.WriteError(r.Context(), status.Errorf(status.Unauthorized, "token invalid"), w) return } - - h.ServeHTTP(w, request) + h.ServeHTTP(w, r) case "token": - request, err := m.checkPATFromRequest(r, authHeader) - if err != nil { + if err := m.checkPATFromRequest(r, authHeader); err != nil { log.WithContext(r.Context()).Debugf("Error when validating PAT: %s", err.Error()) // Check if it's a status error, otherwise default to Unauthorized if _, ok := status.FromError(err); !ok { @@ -106,7 +104,7 @@ func (m *AuthMiddleware) Handler(h http.Handler) http.Handler { util.WriteError(r.Context(), err, w) return } - h.ServeHTTP(w, request) + h.ServeHTTP(w, r) default: util.WriteError(r.Context(), status.Errorf(status.Unauthorized, "no valid authentication provided"), w) return @@ -115,19 +113,19 @@ func (m *AuthMiddleware) Handler(h http.Handler) http.Handler { } // CheckJWTFromRequest checks if the JWT is valid -func (m *AuthMiddleware) checkJWTFromRequest(r *http.Request, authHeaderParts []string) (*http.Request, error) { +func (m *AuthMiddleware) checkJWTFromRequest(r *http.Request, authHeaderParts []string) error { token, err := getTokenFromJWTRequest(authHeaderParts) // If an error occurs, call the error handler and return an error if err != nil { - return r, fmt.Errorf("error extracting token: %w", err) + return fmt.Errorf("error extracting token: %w", err) } ctx := r.Context() userAuth, validatedToken, err := m.authManager.ValidateAndParseToken(ctx, token) if err != nil { - return r, err + return err } if impersonate, ok := r.URL.Query()["account"]; ok && len(impersonate) == 1 { @@ -143,7 +141,7 @@ func (m *AuthMiddleware) checkJWTFromRequest(r *http.Request, authHeaderParts [] // we need to call this method because if user is new, we will automatically add it to existing or create a new account accountId, _, err := m.ensureAccount(ctx, userAuth) if err != nil { - return r, err + return err } if userAuth.AccountId != accountId { @@ -153,7 +151,7 @@ func (m *AuthMiddleware) checkJWTFromRequest(r *http.Request, authHeaderParts [] userAuth, err = m.authManager.EnsureUserAccessByJWTGroups(ctx, userAuth, validatedToken) if err != nil { - return r, err + return err } err = m.syncUserJWTGroups(ctx, userAuth) @@ -164,17 +162,19 @@ func (m *AuthMiddleware) checkJWTFromRequest(r *http.Request, authHeaderParts [] _, err = m.getUserFromUserAuth(ctx, userAuth) if err != nil { log.WithContext(ctx).Errorf("HTTP server failed to update user from user auth: %s", err) - return r, err + return err } - return nbcontext.SetUserAuthInRequest(r, userAuth), nil + // propagates ctx change to upstream middleware + *r = *nbcontext.SetUserAuthInRequest(r, userAuth) + return nil } // CheckPATFromRequest checks if the PAT is valid -func (m *AuthMiddleware) checkPATFromRequest(r *http.Request, authHeaderParts []string) (*http.Request, error) { +func (m *AuthMiddleware) checkPATFromRequest(r *http.Request, authHeaderParts []string) error { token, err := getTokenFromPATRequest(authHeaderParts) if err != nil { - return r, fmt.Errorf("error extracting token: %w", err) + return fmt.Errorf("error extracting token: %w", err) } if m.patUsageTracker != nil { @@ -183,22 +183,22 @@ func (m *AuthMiddleware) checkPATFromRequest(r *http.Request, authHeaderParts [] if m.rateLimiter != nil && !isTerraformRequest(r) { if !m.rateLimiter.Allow(token) { - return r, status.Errorf(status.TooManyRequests, "too many requests") + return status.Errorf(status.TooManyRequests, "too many requests") } } ctx := r.Context() user, pat, accDomain, accCategory, err := m.authManager.GetPATInfo(ctx, token) if err != nil { - return r, fmt.Errorf("invalid Token: %w", err) + return fmt.Errorf("invalid Token: %w", err) } if time.Now().After(pat.GetExpirationDate()) { - return r, fmt.Errorf("token expired") + return fmt.Errorf("token expired") } err = m.authManager.MarkPATUsed(ctx, pat.ID) if err != nil { - return r, err + return err } userAuth := auth.UserAuth{ @@ -216,7 +216,9 @@ func (m *AuthMiddleware) checkPATFromRequest(r *http.Request, authHeaderParts [] } } - return nbcontext.SetUserAuthInRequest(r, userAuth), nil + // propagates ctx change to upstream middleware + *r = *nbcontext.SetUserAuthInRequest(r, userAuth) + return nil } func isTerraformRequest(r *http.Request) bool { diff --git a/management/server/telemetry/http_api_metrics.go b/management/server/telemetry/http_api_metrics.go index 28e8457e2..e48e6d64a 100644 --- a/management/server/telemetry/http_api_metrics.go +++ b/management/server/telemetry/http_api_metrics.go @@ -193,20 +193,12 @@ func (m *HTTPMiddleware) Handler(h http.Handler) http.Handler { } }) - h.ServeHTTP(w, r.WithContext(ctx)) + // Hold on to req so auth's in-place ctx update is visible after ServeHTTP. + req := r.WithContext(ctx) + h.ServeHTTP(w, req) close(handlerDone) - userAuth, err := nbContext.GetUserAuthFromContext(r.Context()) - if err == nil { - if userAuth.AccountId != "" { - //nolint - ctx = context.WithValue(ctx, nbContext.AccountIDKey, userAuth.AccountId) - } - if userAuth.UserId != "" { - //nolint - ctx = context.WithValue(ctx, nbContext.UserIDKey, userAuth.UserId) - } - } + ctx = req.Context() if w.Status() > 399 { log.WithContext(ctx).Errorf("HTTP response %v: %v %v status %v", reqID, r.Method, r.URL, w.Status()) From a822a33240da6f32b53e9f4c7ee4144262ef2de4 Mon Sep 17 00:00:00 2001 From: Viktor Liu <17948409+lixmal@users.noreply.github.com> Date: Wed, 22 Apr 2026 17:35:22 +0900 Subject: [PATCH 16/25] [self-hosted] Use cscli lapi status for CrowdSec readiness in installer (#5949) --- infrastructure_files/getting-started.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/infrastructure_files/getting-started.sh b/infrastructure_files/getting-started.sh index 08da48264..2a3f840b4 100755 --- a/infrastructure_files/getting-started.sh +++ b/infrastructure_files/getting-started.sh @@ -472,7 +472,7 @@ start_services_and_show_instructions() { if [[ "$ENABLE_CROWDSEC" == "true" ]]; then echo "Registering CrowdSec bouncer..." local cs_retries=0 - while ! $DOCKER_COMPOSE_COMMAND exec -T crowdsec cscli capi status >/dev/null 2>&1; do + while ! $DOCKER_COMPOSE_COMMAND exec -T crowdsec cscli lapi status >/dev/null 2>&1; do cs_retries=$((cs_retries + 1)) if [[ $cs_retries -ge 30 ]]; then echo "WARNING: CrowdSec did not become ready. Skipping CrowdSec setup." > /dev/stderr From 801de8c68d4725f313344d4b6f684c3a86e59b90 Mon Sep 17 00:00:00 2001 From: Viktor Liu <17948409+lixmal@users.noreply.github.com> Date: Wed, 22 Apr 2026 22:10:14 +0900 Subject: [PATCH 17/25] [client] Add TTL-based refresh to mgmt DNS cache via handler chain (#5945) --- client/internal/dns/handler_chain.go | 94 ++++ client/internal/dns/handler_chain_test.go | 164 ++++++ client/internal/dns/mgmt/mgmt.go | 489 ++++++++++++++---- client/internal/dns/mgmt/mgmt_refresh_test.go | 408 +++++++++++++++ client/internal/dns/mgmt/mgmt_test.go | 55 ++ client/internal/dns/server.go | 1 + 6 files changed, 1114 insertions(+), 97 deletions(-) create mode 100644 client/internal/dns/mgmt/mgmt_refresh_test.go diff --git a/client/internal/dns/handler_chain.go b/client/internal/dns/handler_chain.go index 6fbdedc59..57e7722d4 100644 --- a/client/internal/dns/handler_chain.go +++ b/client/internal/dns/handler_chain.go @@ -1,7 +1,10 @@ package dns import ( + "context" "fmt" + "math" + "net" "slices" "strconv" "strings" @@ -192,6 +195,12 @@ func (c *HandlerChain) logHandlers() { } func (c *HandlerChain) ServeDNS(w dns.ResponseWriter, r *dns.Msg) { + c.dispatch(w, r, math.MaxInt) +} + +// dispatch routes a DNS request through the chain, skipping handlers with +// priority > maxPriority. Shared by ServeDNS and ResolveInternal. +func (c *HandlerChain) dispatch(w dns.ResponseWriter, r *dns.Msg, maxPriority int) { if len(r.Question) == 0 { return } @@ -216,6 +225,9 @@ func (c *HandlerChain) ServeDNS(w dns.ResponseWriter, r *dns.Msg) { // Try handlers in priority order for _, entry := range handlers { + if entry.Priority > maxPriority { + continue + } if !c.isHandlerMatch(qname, entry) { continue } @@ -273,6 +285,55 @@ func (c *HandlerChain) logResponse(logger *log.Entry, cw *ResponseWriterChain, q cw.response.Len(), meta, time.Since(startTime)) } +// ResolveInternal runs an in-process DNS query against the chain, skipping any +// handler with priority > maxPriority. Used by internal callers (e.g. the mgmt +// cache refresher) that must bypass themselves to avoid loops. Honors ctx +// cancellation; on ctx.Done the dispatch goroutine is left to drain on its own +// (bounded by the invoked handler's internal timeout). +func (c *HandlerChain) ResolveInternal(ctx context.Context, r *dns.Msg, maxPriority int) (*dns.Msg, error) { + if len(r.Question) == 0 { + return nil, fmt.Errorf("empty question") + } + + base := &internalResponseWriter{} + done := make(chan struct{}) + go func() { + c.dispatch(base, r, maxPriority) + close(done) + }() + + select { + case <-done: + case <-ctx.Done(): + // Prefer a completed response if dispatch finished concurrently with cancellation. + select { + case <-done: + default: + return nil, fmt.Errorf("resolve %s: %w", strings.ToLower(r.Question[0].Name), ctx.Err()) + } + } + + if base.response == nil || base.response.Rcode == dns.RcodeRefused { + return nil, fmt.Errorf("no handler resolved %s at priority ≤ %d", + strings.ToLower(r.Question[0].Name), maxPriority) + } + return base.response, nil +} + +// HasRootHandlerAtOrBelow reports whether any "." handler is registered at +// priority ≤ maxPriority. +func (c *HandlerChain) HasRootHandlerAtOrBelow(maxPriority int) bool { + c.mu.RLock() + defer c.mu.RUnlock() + + for _, h := range c.handlers { + if h.Pattern == "." && h.Priority <= maxPriority { + return true + } + } + return false +} + func (c *HandlerChain) isHandlerMatch(qname string, entry HandlerEntry) bool { switch { case entry.Pattern == ".": @@ -291,3 +352,36 @@ func (c *HandlerChain) isHandlerMatch(qname string, entry HandlerEntry) bool { } } } + +// internalResponseWriter captures a dns.Msg for in-process chain queries. +type internalResponseWriter struct { + response *dns.Msg +} + +func (w *internalResponseWriter) WriteMsg(m *dns.Msg) error { w.response = m; return nil } +func (w *internalResponseWriter) LocalAddr() net.Addr { return nil } +func (w *internalResponseWriter) RemoteAddr() net.Addr { return nil } + +// Write unpacks raw DNS bytes so handlers that call Write instead of WriteMsg +// still surface their answer to ResolveInternal. +func (w *internalResponseWriter) Write(p []byte) (int, error) { + msg := new(dns.Msg) + if err := msg.Unpack(p); err != nil { + return 0, err + } + w.response = msg + return len(p), nil +} + +func (w *internalResponseWriter) Close() error { return nil } +func (w *internalResponseWriter) TsigStatus() error { return nil } + +// TsigTimersOnly is part of dns.ResponseWriter. +func (w *internalResponseWriter) TsigTimersOnly(bool) { + // no-op: in-process queries carry no TSIG state. +} + +// Hijack is part of dns.ResponseWriter. +func (w *internalResponseWriter) Hijack() { + // no-op: in-process queries have no underlying connection to hand off. +} diff --git a/client/internal/dns/handler_chain_test.go b/client/internal/dns/handler_chain_test.go index fa9525069..034a760dc 100644 --- a/client/internal/dns/handler_chain_test.go +++ b/client/internal/dns/handler_chain_test.go @@ -1,11 +1,15 @@ package dns_test import ( + "context" + "net" "testing" + "time" "github.com/miekg/dns" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" nbdns "github.com/netbirdio/netbird/client/internal/dns" "github.com/netbirdio/netbird/client/internal/dns/test" @@ -1042,3 +1046,163 @@ func TestHandlerChain_AddRemoveRoundtrip(t *testing.T) { }) } } + +// answeringHandler writes a fixed A record to ack the query. Used to verify +// which handler ResolveInternal dispatches to. +type answeringHandler struct { + name string + ip string +} + +func (h *answeringHandler) ServeDNS(w dns.ResponseWriter, r *dns.Msg) { + resp := &dns.Msg{} + resp.SetReply(r) + resp.Answer = []dns.RR{&dns.A{ + Hdr: dns.RR_Header{Name: r.Question[0].Name, Rrtype: dns.TypeA, Class: dns.ClassINET, Ttl: 60}, + A: net.ParseIP(h.ip).To4(), + }} + _ = w.WriteMsg(resp) +} + +func (h *answeringHandler) String() string { return h.name } + +func TestHandlerChain_ResolveInternal_SkipsAboveMaxPriority(t *testing.T) { + chain := nbdns.NewHandlerChain() + + high := &answeringHandler{name: "high", ip: "10.0.0.1"} + low := &answeringHandler{name: "low", ip: "10.0.0.2"} + + chain.AddHandler("example.com.", high, nbdns.PriorityMgmtCache) + chain.AddHandler("example.com.", low, nbdns.PriorityUpstream) + + r := new(dns.Msg) + r.SetQuestion("example.com.", dns.TypeA) + + resp, err := chain.ResolveInternal(context.Background(), r, nbdns.PriorityUpstream) + assert.NoError(t, err) + assert.NotNil(t, resp) + assert.Equal(t, 1, len(resp.Answer)) + a, ok := resp.Answer[0].(*dns.A) + assert.True(t, ok) + assert.Equal(t, "10.0.0.2", a.A.String(), "should skip mgmtCache handler and resolve via upstream") +} + +func TestHandlerChain_ResolveInternal_ErrorWhenNoMatch(t *testing.T) { + chain := nbdns.NewHandlerChain() + high := &answeringHandler{name: "high", ip: "10.0.0.1"} + chain.AddHandler("example.com.", high, nbdns.PriorityMgmtCache) + + r := new(dns.Msg) + r.SetQuestion("example.com.", dns.TypeA) + + _, err := chain.ResolveInternal(context.Background(), r, nbdns.PriorityUpstream) + assert.Error(t, err, "no handler at or below maxPriority should error") +} + +// rawWriteHandler packs a response and calls ResponseWriter.Write directly +// (instead of WriteMsg), exercising the internalResponseWriter.Write path. +type rawWriteHandler struct { + ip string +} + +func (h *rawWriteHandler) ServeDNS(w dns.ResponseWriter, r *dns.Msg) { + resp := &dns.Msg{} + resp.SetReply(r) + resp.Answer = []dns.RR{&dns.A{ + Hdr: dns.RR_Header{Name: r.Question[0].Name, Rrtype: dns.TypeA, Class: dns.ClassINET, Ttl: 60}, + A: net.ParseIP(h.ip).To4(), + }} + packed, err := resp.Pack() + if err != nil { + return + } + _, _ = w.Write(packed) +} + +func TestHandlerChain_ResolveInternal_CapturesRawWrite(t *testing.T) { + chain := nbdns.NewHandlerChain() + chain.AddHandler("example.com.", &rawWriteHandler{ip: "10.0.0.3"}, nbdns.PriorityUpstream) + + r := new(dns.Msg) + r.SetQuestion("example.com.", dns.TypeA) + + resp, err := chain.ResolveInternal(context.Background(), r, nbdns.PriorityUpstream) + assert.NoError(t, err) + require.NotNil(t, resp) + require.Len(t, resp.Answer, 1) + a, ok := resp.Answer[0].(*dns.A) + require.True(t, ok) + assert.Equal(t, "10.0.0.3", a.A.String(), "handlers calling Write(packed) must still surface their answer") +} + +func TestHandlerChain_ResolveInternal_EmptyQuestion(t *testing.T) { + chain := nbdns.NewHandlerChain() + _, err := chain.ResolveInternal(context.Background(), new(dns.Msg), nbdns.PriorityUpstream) + assert.Error(t, err) +} + +// hangingHandler blocks indefinitely until closed, simulating a wedged upstream. +type hangingHandler struct { + block chan struct{} +} + +func (h *hangingHandler) ServeDNS(w dns.ResponseWriter, r *dns.Msg) { + <-h.block + resp := &dns.Msg{} + resp.SetReply(r) + _ = w.WriteMsg(resp) +} + +func (h *hangingHandler) String() string { return "hangingHandler" } + +func TestHandlerChain_ResolveInternal_HonorsContextTimeout(t *testing.T) { + chain := nbdns.NewHandlerChain() + h := &hangingHandler{block: make(chan struct{})} + defer close(h.block) + + chain.AddHandler("example.com.", h, nbdns.PriorityUpstream) + + r := new(dns.Msg) + r.SetQuestion("example.com.", dns.TypeA) + + ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond) + defer cancel() + + start := time.Now() + _, err := chain.ResolveInternal(ctx, r, nbdns.PriorityUpstream) + elapsed := time.Since(start) + + assert.Error(t, err) + assert.ErrorIs(t, err, context.DeadlineExceeded) + assert.Less(t, elapsed, 500*time.Millisecond, "ResolveInternal must return shortly after ctx deadline") +} + +func TestHandlerChain_HasRootHandlerAtOrBelow(t *testing.T) { + chain := nbdns.NewHandlerChain() + h := &answeringHandler{name: "h", ip: "10.0.0.1"} + + assert.False(t, chain.HasRootHandlerAtOrBelow(nbdns.PriorityUpstream), "empty chain") + + chain.AddHandler("example.com.", h, nbdns.PriorityUpstream) + assert.False(t, chain.HasRootHandlerAtOrBelow(nbdns.PriorityUpstream), "non-root handler does not count") + + chain.AddHandler(".", h, nbdns.PriorityMgmtCache) + assert.False(t, chain.HasRootHandlerAtOrBelow(nbdns.PriorityUpstream), "root handler above threshold excluded") + + chain.AddHandler(".", h, nbdns.PriorityDefault) + assert.True(t, chain.HasRootHandlerAtOrBelow(nbdns.PriorityUpstream), "root handler at PriorityDefault included") + + chain.RemoveHandler(".", nbdns.PriorityDefault) + assert.False(t, chain.HasRootHandlerAtOrBelow(nbdns.PriorityUpstream)) + + // Primary nsgroup case: root handler lands at PriorityUpstream. + chain.AddHandler(".", h, nbdns.PriorityUpstream) + assert.True(t, chain.HasRootHandlerAtOrBelow(nbdns.PriorityUpstream), "root at PriorityUpstream included") + chain.RemoveHandler(".", nbdns.PriorityUpstream) + + // Fallback case: original /etc/resolv.conf entries land at PriorityFallback. + chain.AddHandler(".", h, nbdns.PriorityFallback) + assert.True(t, chain.HasRootHandlerAtOrBelow(nbdns.PriorityUpstream), "root at PriorityFallback included") + chain.RemoveHandler(".", nbdns.PriorityFallback) + assert.False(t, chain.HasRootHandlerAtOrBelow(nbdns.PriorityUpstream)) +} diff --git a/client/internal/dns/mgmt/mgmt.go b/client/internal/dns/mgmt/mgmt.go index 314af51d9..988e427fb 100644 --- a/client/internal/dns/mgmt/mgmt.go +++ b/client/internal/dns/mgmt/mgmt.go @@ -2,40 +2,83 @@ package mgmt import ( "context" + "errors" "fmt" "net" - "net/netip" "net/url" + "os" + "slices" "strings" "sync" + "sync/atomic" "time" "github.com/miekg/dns" log "github.com/sirupsen/logrus" + "golang.org/x/sync/singleflight" dnsconfig "github.com/netbirdio/netbird/client/internal/dns/config" + "github.com/netbirdio/netbird/client/internal/dns/resutil" "github.com/netbirdio/netbird/shared/management/domain" ) -const dnsTimeout = 5 * time.Second +const ( + dnsTimeout = 5 * time.Second + defaultTTL = 300 * time.Second + refreshBackoff = 30 * time.Second -// Resolver caches critical NetBird infrastructure domains + // envMgmtCacheTTL overrides defaultTTL for integration/dev testing. + envMgmtCacheTTL = "NB_MGMT_CACHE_TTL" +) + +// ChainResolver lets the cache refresh stale entries through the DNS handler +// chain instead of net.DefaultResolver, avoiding loopback when NetBird is the +// system resolver. +type ChainResolver interface { + ResolveInternal(ctx context.Context, msg *dns.Msg, maxPriority int) (*dns.Msg, error) + HasRootHandlerAtOrBelow(maxPriority int) bool +} + +// cachedRecord holds DNS records plus timestamps used for TTL refresh. +// records and cachedAt are set at construction and treated as immutable; +// lastFailedRefresh and consecFailures are mutable and must be accessed under +// Resolver.mutex. +type cachedRecord struct { + records []dns.RR + cachedAt time.Time + lastFailedRefresh time.Time + consecFailures int +} + +// Resolver caches critical NetBird infrastructure domains. +// records, refreshing, mgmtDomain and serverDomains are all guarded by mutex. type Resolver struct { - records map[dns.Question][]dns.RR + records map[dns.Question]*cachedRecord mgmtDomain *domain.Domain serverDomains *dnsconfig.ServerDomains mutex sync.RWMutex -} -type ipsResponse struct { - ips []netip.Addr - err error + chain ChainResolver + chainMaxPriority int + refreshGroup singleflight.Group + + // refreshing tracks questions whose refresh is running via the OS + // fallback path. A ServeDNS hit for a question in this map indicates + // the OS resolver routed the recursive query back to us (loop). Only + // the OS path arms this so chain-path refreshes don't produce false + // positives. The atomic bool is CAS-flipped once per refresh to + // throttle the warning log. + refreshing map[dns.Question]*atomic.Bool + + cacheTTL time.Duration } // NewResolver creates a new management domains cache resolver. func NewResolver() *Resolver { return &Resolver{ - records: make(map[dns.Question][]dns.RR), + records: make(map[dns.Question]*cachedRecord), + refreshing: make(map[dns.Question]*atomic.Bool), + cacheTTL: resolveCacheTTL(), } } @@ -44,7 +87,19 @@ func (m *Resolver) String() string { return "MgmtCacheResolver" } -// ServeDNS implements dns.Handler interface. +// SetChainResolver wires the handler chain used to refresh stale cache entries. +// maxPriority caps which handlers may answer refresh queries (typically +// PriorityUpstream, so upstream/default/fallback handlers are consulted and +// mgmt/route/local handlers are skipped). +func (m *Resolver) SetChainResolver(chain ChainResolver, maxPriority int) { + m.mutex.Lock() + m.chain = chain + m.chainMaxPriority = maxPriority + m.mutex.Unlock() +} + +// ServeDNS serves cached A/AAAA records. Stale entries are returned +// immediately and refreshed asynchronously (stale-while-revalidate). func (m *Resolver) ServeDNS(w dns.ResponseWriter, r *dns.Msg) { if len(r.Question) == 0 { m.continueToNext(w, r) @@ -60,7 +115,14 @@ func (m *Resolver) ServeDNS(w dns.ResponseWriter, r *dns.Msg) { } m.mutex.RLock() - records, found := m.records[question] + cached, found := m.records[question] + inflight := m.refreshing[question] + var shouldRefresh bool + if found { + stale := time.Since(cached.cachedAt) > m.cacheTTL + inBackoff := !cached.lastFailedRefresh.IsZero() && time.Since(cached.lastFailedRefresh) < refreshBackoff + shouldRefresh = stale && !inBackoff + } m.mutex.RUnlock() if !found { @@ -68,12 +130,23 @@ func (m *Resolver) ServeDNS(w dns.ResponseWriter, r *dns.Msg) { return } + if inflight != nil && inflight.CompareAndSwap(false, true) { + log.Warnf("mgmt cache: possible resolver loop for domain=%s: served stale while an OS-fallback refresh was inflight (if NetBird is the system resolver, the OS-path predicate is wrong)", + question.Name) + } + + // Skip scheduling a refresh goroutine if one is already inflight for + // this question; singleflight would dedup anyway but skipping avoids + // a parked goroutine per stale hit under bursty load. + if shouldRefresh && inflight == nil { + m.scheduleRefresh(question, cached) + } + resp := &dns.Msg{} resp.SetReply(r) resp.Authoritative = false resp.RecursionAvailable = true - - resp.Answer = append(resp.Answer, records...) + resp.Answer = cloneRecordsWithTTL(cached.records, m.responseTTL(cached.cachedAt)) log.Debugf("serving %d cached records for domain=%s", len(resp.Answer), question.Name) @@ -98,101 +171,260 @@ func (m *Resolver) continueToNext(w dns.ResponseWriter, r *dns.Msg) { } } -// AddDomain manually adds a domain to cache by resolving it. +// AddDomain resolves a domain and stores its A/AAAA records in the cache. +// A family that resolves NODATA (nil err, zero records) evicts any stale +// entry for that qtype. func (m *Resolver) AddDomain(ctx context.Context, d domain.Domain) error { dnsName := strings.ToLower(dns.Fqdn(d.PunycodeString())) ctx, cancel := context.WithTimeout(ctx, dnsTimeout) defer cancel() - ips, err := lookupIPWithExtraTimeout(ctx, d) - if err != nil { - return err + aRecords, aaaaRecords, errA, errAAAA := m.lookupBoth(ctx, d, dnsName) + + if errA != nil && errAAAA != nil { + return fmt.Errorf("resolve %s: %w", d.SafeString(), errors.Join(errA, errAAAA)) } - var aRecords, aaaaRecords []dns.RR - for _, ip := range ips { - if ip.Is4() { - rr := &dns.A{ - Hdr: dns.RR_Header{ - Name: dnsName, - Rrtype: dns.TypeA, - Class: dns.ClassINET, - Ttl: 300, - }, - A: ip.AsSlice(), - } - aRecords = append(aRecords, rr) - } else if ip.Is6() { - rr := &dns.AAAA{ - Hdr: dns.RR_Header{ - Name: dnsName, - Rrtype: dns.TypeAAAA, - Class: dns.ClassINET, - Ttl: 300, - }, - AAAA: ip.AsSlice(), - } - aaaaRecords = append(aaaaRecords, rr) + if len(aRecords) == 0 && len(aaaaRecords) == 0 { + if err := errors.Join(errA, errAAAA); err != nil { + return fmt.Errorf("resolve %s: no A/AAAA records: %w", d.SafeString(), err) } + return fmt.Errorf("resolve %s: no A/AAAA records", d.SafeString()) } + now := time.Now() m.mutex.Lock() + defer m.mutex.Unlock() - if len(aRecords) > 0 { - aQuestion := dns.Question{ - Name: dnsName, - Qtype: dns.TypeA, - Qclass: dns.ClassINET, - } - m.records[aQuestion] = aRecords - } + m.applyFamilyRecords(dnsName, dns.TypeA, aRecords, errA, now) + m.applyFamilyRecords(dnsName, dns.TypeAAAA, aaaaRecords, errAAAA, now) - if len(aaaaRecords) > 0 { - aaaaQuestion := dns.Question{ - Name: dnsName, - Qtype: dns.TypeAAAA, - Qclass: dns.ClassINET, - } - m.records[aaaaQuestion] = aaaaRecords - } - - m.mutex.Unlock() - - log.Debugf("added domain=%s with %d A records and %d AAAA records", + log.Debugf("added/updated domain=%s with %d A records and %d AAAA records", d.SafeString(), len(aRecords), len(aaaaRecords)) return nil } -func lookupIPWithExtraTimeout(ctx context.Context, d domain.Domain) ([]netip.Addr, error) { - log.Infof("looking up IP for mgmt domain=%s", d.SafeString()) - defer log.Infof("done looking up IP for mgmt domain=%s", d.SafeString()) - resultChan := make(chan *ipsResponse, 1) +// applyFamilyRecords writes records, evicts on NODATA, leaves the cache +// untouched on error. Caller holds m.mutex. +func (m *Resolver) applyFamilyRecords(dnsName string, qtype uint16, records []dns.RR, err error, now time.Time) { + q := dns.Question{Name: dnsName, Qtype: qtype, Qclass: dns.ClassINET} + switch { + case len(records) > 0: + m.records[q] = &cachedRecord{records: records, cachedAt: now} + case err == nil: + delete(m.records, q) + } +} - go func() { - ips, err := net.DefaultResolver.LookupNetIP(ctx, "ip", d.PunycodeString()) - resultChan <- &ipsResponse{ - err: err, - ips: ips, +// scheduleRefresh kicks off an async refresh. DoChan spawns one goroutine per +// unique in-flight key; bursty stale hits share its channel. expected is the +// cachedRecord pointer observed by the caller; the refresh only mutates the +// cache if that pointer is still the one stored, so a stale in-flight refresh +// can't clobber a newer entry written by AddDomain or a competing refresh. +func (m *Resolver) scheduleRefresh(question dns.Question, expected *cachedRecord) { + key := question.Name + "|" + dns.TypeToString[question.Qtype] + _ = m.refreshGroup.DoChan(key, func() (any, error) { + return nil, m.refreshQuestion(question, expected) + }) +} + +// refreshQuestion replaces the cached records on success, or marks the entry +// failed (arming the backoff) on failure. While this runs, ServeDNS can detect +// a resolver loop by spotting a query for this same question arriving on us. +// expected pins the cache entry observed at schedule time; mutations only apply +// if m.records[question] still points at it. +func (m *Resolver) refreshQuestion(question dns.Question, expected *cachedRecord) error { + ctx, cancel := context.WithTimeout(context.Background(), dnsTimeout) + defer cancel() + + d, err := domain.FromString(strings.TrimSuffix(question.Name, ".")) + if err != nil { + m.markRefreshFailed(question, expected) + return fmt.Errorf("parse domain: %w", err) + } + + records, err := m.lookupRecords(ctx, d, question) + if err != nil { + fails := m.markRefreshFailed(question, expected) + logf := log.Warnf + if fails == 0 || fails > 1 { + logf = log.Debugf } - }() - - var resp *ipsResponse - - select { - case <-time.After(dnsTimeout + time.Millisecond*500): - log.Warnf("timed out waiting for IP for mgmt domain=%s", d.SafeString()) - return nil, fmt.Errorf("timed out waiting for ips to be available for domain %s", d.SafeString()) - case <-ctx.Done(): - return nil, ctx.Err() - case resp = <-resultChan: + logf("refresh mgmt cache domain=%s type=%s: %v (consecutive failures=%d)", + d.SafeString(), dns.TypeToString[question.Qtype], err, fails) + return err } - if resp.err != nil { - return nil, fmt.Errorf("resolve domain %s: %w", d.SafeString(), resp.err) + // NOERROR/NODATA: family gone upstream, evict so we stop serving stale. + if len(records) == 0 { + m.mutex.Lock() + if m.records[question] == expected { + delete(m.records, question) + m.mutex.Unlock() + log.Infof("removed mgmt cache domain=%s type=%s: no records returned", + d.SafeString(), dns.TypeToString[question.Qtype]) + return nil + } + m.mutex.Unlock() + log.Debugf("skipping refresh evict for domain=%s type=%s: entry changed during refresh", + d.SafeString(), dns.TypeToString[question.Qtype]) + return nil } - return resp.ips, nil + + now := time.Now() + m.mutex.Lock() + if m.records[question] != expected { + m.mutex.Unlock() + log.Debugf("skipping refresh write for domain=%s type=%s: entry changed during refresh", + d.SafeString(), dns.TypeToString[question.Qtype]) + return nil + } + m.records[question] = &cachedRecord{records: records, cachedAt: now} + m.mutex.Unlock() + + log.Infof("refreshed mgmt cache domain=%s type=%s", + d.SafeString(), dns.TypeToString[question.Qtype]) + return nil +} + +func (m *Resolver) markRefreshing(question dns.Question) { + m.mutex.Lock() + m.refreshing[question] = &atomic.Bool{} + m.mutex.Unlock() +} + +func (m *Resolver) clearRefreshing(question dns.Question) { + m.mutex.Lock() + delete(m.refreshing, question) + m.mutex.Unlock() +} + +// markRefreshFailed arms the backoff and returns the new consecutive-failure +// count so callers can downgrade subsequent failure logs to debug. +func (m *Resolver) markRefreshFailed(question dns.Question, expected *cachedRecord) int { + m.mutex.Lock() + defer m.mutex.Unlock() + c, ok := m.records[question] + if !ok || c != expected { + return 0 + } + c.lastFailedRefresh = time.Now() + c.consecFailures++ + return c.consecFailures +} + +// lookupBoth resolves A and AAAA via chain or OS. Per-family errors let +// callers tell records, NODATA (nil err, no records), and failure apart. +func (m *Resolver) lookupBoth(ctx context.Context, d domain.Domain, dnsName string) (aRecords, aaaaRecords []dns.RR, errA, errAAAA error) { + m.mutex.RLock() + chain := m.chain + maxPriority := m.chainMaxPriority + m.mutex.RUnlock() + + if chain != nil && chain.HasRootHandlerAtOrBelow(maxPriority) { + aRecords, errA = m.lookupViaChain(ctx, chain, maxPriority, dnsName, dns.TypeA) + aaaaRecords, errAAAA = m.lookupViaChain(ctx, chain, maxPriority, dnsName, dns.TypeAAAA) + return + } + + // TODO: drop once every supported OS registers a fallback resolver. Safe + // today: no root handler at priority ≤ PriorityUpstream means NetBird is + // not the system resolver, so net.DefaultResolver will not loop back. + aRecords, errA = m.osLookup(ctx, d, dnsName, dns.TypeA) + aaaaRecords, errAAAA = m.osLookup(ctx, d, dnsName, dns.TypeAAAA) + return +} + +// lookupRecords resolves a single record type via chain or OS. The OS branch +// arms the loop detector for the duration of its call so that ServeDNS can +// spot the OS resolver routing the recursive query back to us. +func (m *Resolver) lookupRecords(ctx context.Context, d domain.Domain, q dns.Question) ([]dns.RR, error) { + m.mutex.RLock() + chain := m.chain + maxPriority := m.chainMaxPriority + m.mutex.RUnlock() + + if chain != nil && chain.HasRootHandlerAtOrBelow(maxPriority) { + return m.lookupViaChain(ctx, chain, maxPriority, q.Name, q.Qtype) + } + + // TODO: drop once every supported OS registers a fallback resolver. + m.markRefreshing(q) + defer m.clearRefreshing(q) + + return m.osLookup(ctx, d, q.Name, q.Qtype) +} + +// lookupViaChain resolves via the handler chain and rewrites each RR to use +// dnsName as owner and m.cacheTTL as TTL, so CNAME-backed domains don't cache +// target-owned records or upstream TTLs. NODATA returns (nil, nil). +func (m *Resolver) lookupViaChain(ctx context.Context, chain ChainResolver, maxPriority int, dnsName string, qtype uint16) ([]dns.RR, error) { + msg := &dns.Msg{} + msg.SetQuestion(dnsName, qtype) + msg.RecursionDesired = true + + resp, err := chain.ResolveInternal(ctx, msg, maxPriority) + if err != nil { + return nil, fmt.Errorf("chain resolve: %w", err) + } + if resp == nil { + return nil, fmt.Errorf("chain resolve returned nil response") + } + if resp.Rcode != dns.RcodeSuccess { + return nil, fmt.Errorf("chain resolve rcode=%s", dns.RcodeToString[resp.Rcode]) + } + + ttl := uint32(m.cacheTTL.Seconds()) + owners := cnameOwners(dnsName, resp.Answer) + var filtered []dns.RR + for _, rr := range resp.Answer { + h := rr.Header() + if h.Class != dns.ClassINET || h.Rrtype != qtype { + continue + } + if !owners[strings.ToLower(dns.Fqdn(h.Name))] { + continue + } + if cp := cloneIPRecord(rr, dnsName, ttl); cp != nil { + filtered = append(filtered, cp) + } + } + return filtered, nil +} + +// osLookup resolves a single family via net.DefaultResolver using resutil, +// which disambiguates NODATA from NXDOMAIN and Unmaps v4-mapped-v6. NODATA +// returns (nil, nil). +func (m *Resolver) osLookup(ctx context.Context, d domain.Domain, dnsName string, qtype uint16) ([]dns.RR, error) { + network := resutil.NetworkForQtype(qtype) + if network == "" { + return nil, fmt.Errorf("unsupported qtype %s", dns.TypeToString[qtype]) + } + + log.Infof("looking up IP for mgmt domain=%s type=%s", d.SafeString(), dns.TypeToString[qtype]) + defer log.Infof("done looking up IP for mgmt domain=%s type=%s", d.SafeString(), dns.TypeToString[qtype]) + + result := resutil.LookupIP(ctx, net.DefaultResolver, network, d.PunycodeString(), qtype) + if result.Rcode == dns.RcodeSuccess { + return resutil.IPsToRRs(dnsName, result.IPs, uint32(m.cacheTTL.Seconds())), nil + } + + if result.Err != nil { + return nil, fmt.Errorf("resolve %s type=%s: %w", d.SafeString(), dns.TypeToString[qtype], result.Err) + } + return nil, fmt.Errorf("resolve %s type=%s: rcode=%s", d.SafeString(), dns.TypeToString[qtype], dns.RcodeToString[result.Rcode]) +} + +// responseTTL returns the remaining cache lifetime in seconds (rounded up), +// so downstream resolvers don't cache an answer for longer than we will. +func (m *Resolver) responseTTL(cachedAt time.Time) uint32 { + remaining := m.cacheTTL - time.Since(cachedAt) + if remaining <= 0 { + return 0 + } + return uint32((remaining + time.Second - 1) / time.Second) } // PopulateFromConfig extracts and caches domains from the client configuration. @@ -224,19 +456,12 @@ func (m *Resolver) RemoveDomain(d domain.Domain) error { m.mutex.Lock() defer m.mutex.Unlock() - aQuestion := dns.Question{ - Name: dnsName, - Qtype: dns.TypeA, - Qclass: dns.ClassINET, - } - delete(m.records, aQuestion) - - aaaaQuestion := dns.Question{ - Name: dnsName, - Qtype: dns.TypeAAAA, - Qclass: dns.ClassINET, - } - delete(m.records, aaaaQuestion) + qA := dns.Question{Name: dnsName, Qtype: dns.TypeA, Qclass: dns.ClassINET} + qAAAA := dns.Question{Name: dnsName, Qtype: dns.TypeAAAA, Qclass: dns.ClassINET} + delete(m.records, qA) + delete(m.records, qAAAA) + delete(m.refreshing, qA) + delete(m.refreshing, qAAAA) log.Debugf("removed domain=%s from cache", d.SafeString()) return nil @@ -394,3 +619,73 @@ func (m *Resolver) extractDomainsFromServerDomains(serverDomains dnsconfig.Serve return domains } + +// cloneIPRecord returns a deep copy of rr retargeted to owner with ttl. Non +// A/AAAA records return nil. +func cloneIPRecord(rr dns.RR, owner string, ttl uint32) dns.RR { + switch r := rr.(type) { + case *dns.A: + cp := *r + cp.Hdr.Name = owner + cp.Hdr.Ttl = ttl + cp.A = slices.Clone(r.A) + return &cp + case *dns.AAAA: + cp := *r + cp.Hdr.Name = owner + cp.Hdr.Ttl = ttl + cp.AAAA = slices.Clone(r.AAAA) + return &cp + } + return nil +} + +// cloneRecordsWithTTL clones A/AAAA records preserving their owner and +// stamping ttl so the response shares no memory with the cached slice. +func cloneRecordsWithTTL(records []dns.RR, ttl uint32) []dns.RR { + out := make([]dns.RR, 0, len(records)) + for _, rr := range records { + if cp := cloneIPRecord(rr, rr.Header().Name, ttl); cp != nil { + out = append(out, cp) + } + } + return out +} + +// cnameOwners returns dnsName plus every target reachable by following CNAMEs +// in answer, iterating until fixed point so out-of-order chains resolve. +func cnameOwners(dnsName string, answer []dns.RR) map[string]bool { + owners := map[string]bool{dnsName: true} + for { + added := false + for _, rr := range answer { + cname, ok := rr.(*dns.CNAME) + if !ok { + continue + } + name := strings.ToLower(dns.Fqdn(cname.Hdr.Name)) + if !owners[name] { + continue + } + target := strings.ToLower(dns.Fqdn(cname.Target)) + if !owners[target] { + owners[target] = true + added = true + } + } + if !added { + return owners + } + } +} + +// resolveCacheTTL reads the cache TTL override env var; invalid or empty +// values fall back to defaultTTL. Called once per Resolver from NewResolver. +func resolveCacheTTL() time.Duration { + if v := os.Getenv(envMgmtCacheTTL); v != "" { + if d, err := time.ParseDuration(v); err == nil && d > 0 { + return d + } + } + return defaultTTL +} diff --git a/client/internal/dns/mgmt/mgmt_refresh_test.go b/client/internal/dns/mgmt/mgmt_refresh_test.go new file mode 100644 index 000000000..9faa5a0b8 --- /dev/null +++ b/client/internal/dns/mgmt/mgmt_refresh_test.go @@ -0,0 +1,408 @@ +package mgmt + +import ( + "context" + "errors" + "net" + "sync" + "sync/atomic" + "testing" + "time" + + "github.com/miekg/dns" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/netbirdio/netbird/client/internal/dns/test" + "github.com/netbirdio/netbird/shared/management/domain" +) + +type fakeChain struct { + mu sync.Mutex + calls map[string]int + answers map[string][]dns.RR + err error + hasRoot bool + onLookup func() +} + +func newFakeChain() *fakeChain { + return &fakeChain{ + calls: map[string]int{}, + answers: map[string][]dns.RR{}, + hasRoot: true, + } +} + +func (f *fakeChain) HasRootHandlerAtOrBelow(maxPriority int) bool { + f.mu.Lock() + defer f.mu.Unlock() + return f.hasRoot +} + +func (f *fakeChain) ResolveInternal(ctx context.Context, msg *dns.Msg, maxPriority int) (*dns.Msg, error) { + f.mu.Lock() + q := msg.Question[0] + key := q.Name + "|" + dns.TypeToString[q.Qtype] + f.calls[key]++ + answers := f.answers[key] + err := f.err + onLookup := f.onLookup + f.mu.Unlock() + + if onLookup != nil { + onLookup() + } + if err != nil { + return nil, err + } + resp := &dns.Msg{} + resp.SetReply(msg) + resp.Answer = answers + return resp, nil +} + +func (f *fakeChain) setAnswer(name string, qtype uint16, ip string) { + f.mu.Lock() + defer f.mu.Unlock() + key := name + "|" + dns.TypeToString[qtype] + hdr := dns.RR_Header{Name: name, Rrtype: qtype, Class: dns.ClassINET, Ttl: 60} + switch qtype { + case dns.TypeA: + f.answers[key] = []dns.RR{&dns.A{Hdr: hdr, A: net.ParseIP(ip).To4()}} + case dns.TypeAAAA: + f.answers[key] = []dns.RR{&dns.AAAA{Hdr: hdr, AAAA: net.ParseIP(ip).To16()}} + } +} + +func (f *fakeChain) callCount(name string, qtype uint16) int { + f.mu.Lock() + defer f.mu.Unlock() + return f.calls[name+"|"+dns.TypeToString[qtype]] +} + +// waitFor polls the predicate until it returns true or the deadline passes. +func waitFor(t *testing.T, d time.Duration, fn func() bool) { + t.Helper() + deadline := time.Now().Add(d) + for time.Now().Before(deadline) { + if fn() { + return + } + time.Sleep(5 * time.Millisecond) + } + t.Fatalf("condition not met within %s", d) +} + +func queryA(t *testing.T, r *Resolver, name string) *dns.Msg { + t.Helper() + msg := new(dns.Msg) + msg.SetQuestion(name, dns.TypeA) + w := &test.MockResponseWriter{} + r.ServeDNS(w, msg) + return w.GetLastResponse() +} + +func firstA(t *testing.T, resp *dns.Msg) string { + t.Helper() + require.NotNil(t, resp) + require.Greater(t, len(resp.Answer), 0, "expected at least one answer") + a, ok := resp.Answer[0].(*dns.A) + require.True(t, ok, "expected A record") + return a.A.String() +} + +func TestResolver_CacheTTLGatesRefresh(t *testing.T) { + // Same cached entry age, different cacheTTL values: the shorter TTL must + // trigger a background refresh, the longer one must not. Proves that the + // per-Resolver cacheTTL field actually drives the stale decision. + cachedAt := time.Now().Add(-100 * time.Millisecond) + + newRec := func() *cachedRecord { + return &cachedRecord{ + records: []dns.RR{&dns.A{ + Hdr: dns.RR_Header{Name: "mgmt.example.com.", Rrtype: dns.TypeA, Class: dns.ClassINET, Ttl: 60}, + A: net.ParseIP("10.0.0.1").To4(), + }}, + cachedAt: cachedAt, + } + } + q := dns.Question{Name: "mgmt.example.com.", Qtype: dns.TypeA, Qclass: dns.ClassINET} + + t.Run("short TTL treats entry as stale and refreshes", func(t *testing.T) { + r := NewResolver() + r.cacheTTL = 10 * time.Millisecond + chain := newFakeChain() + chain.setAnswer(q.Name, dns.TypeA, "10.0.0.2") + r.SetChainResolver(chain, 50) + r.records[q] = newRec() + + resp := queryA(t, r, q.Name) + assert.Equal(t, "10.0.0.1", firstA(t, resp), "stale entry must be served while refresh runs") + + waitFor(t, time.Second, func() bool { + return chain.callCount(q.Name, dns.TypeA) >= 1 + }) + }) + + t.Run("long TTL keeps entry fresh and skips refresh", func(t *testing.T) { + r := NewResolver() + r.cacheTTL = time.Hour + chain := newFakeChain() + chain.setAnswer(q.Name, dns.TypeA, "10.0.0.2") + r.SetChainResolver(chain, 50) + r.records[q] = newRec() + + resp := queryA(t, r, q.Name) + assert.Equal(t, "10.0.0.1", firstA(t, resp)) + + time.Sleep(50 * time.Millisecond) + assert.Equal(t, 0, chain.callCount(q.Name, dns.TypeA), "fresh entry must not trigger refresh") + }) +} + +func TestResolver_ServeFresh_NoRefresh(t *testing.T) { + r := NewResolver() + chain := newFakeChain() + chain.setAnswer("mgmt.example.com.", dns.TypeA, "10.0.0.2") + r.SetChainResolver(chain, 50) + + r.records[dns.Question{Name: "mgmt.example.com.", Qtype: dns.TypeA, Qclass: dns.ClassINET}] = &cachedRecord{ + records: []dns.RR{&dns.A{ + Hdr: dns.RR_Header{Name: "mgmt.example.com.", Rrtype: dns.TypeA, Class: dns.ClassINET, Ttl: 60}, + A: net.ParseIP("10.0.0.1").To4(), + }}, + cachedAt: time.Now(), // fresh + } + + resp := queryA(t, r, "mgmt.example.com.") + assert.Equal(t, "10.0.0.1", firstA(t, resp)) + + time.Sleep(20 * time.Millisecond) + assert.Equal(t, 0, chain.callCount("mgmt.example.com.", dns.TypeA), "fresh entry must not trigger refresh") +} + +func TestResolver_StaleTriggersAsyncRefresh(t *testing.T) { + r := NewResolver() + chain := newFakeChain() + chain.setAnswer("mgmt.example.com.", dns.TypeA, "10.0.0.2") + r.SetChainResolver(chain, 50) + + q := dns.Question{Name: "mgmt.example.com.", Qtype: dns.TypeA, Qclass: dns.ClassINET} + r.records[q] = &cachedRecord{ + records: []dns.RR{&dns.A{ + Hdr: dns.RR_Header{Name: q.Name, Rrtype: dns.TypeA, Class: dns.ClassINET, Ttl: 60}, + A: net.ParseIP("10.0.0.1").To4(), + }}, + cachedAt: time.Now().Add(-2 * defaultTTL), // stale + } + + // First query: serves stale immediately. + resp := queryA(t, r, "mgmt.example.com.") + assert.Equal(t, "10.0.0.1", firstA(t, resp), "stale entry must be served while refresh runs") + + waitFor(t, time.Second, func() bool { + return chain.callCount("mgmt.example.com.", dns.TypeA) >= 1 + }) + + // Next query should now return the refreshed IP. + waitFor(t, time.Second, func() bool { + resp := queryA(t, r, "mgmt.example.com.") + return resp != nil && len(resp.Answer) > 0 && firstA(t, resp) == "10.0.0.2" + }) +} + +func TestResolver_ConcurrentStaleHitsCollapseRefresh(t *testing.T) { + r := NewResolver() + chain := newFakeChain() + chain.setAnswer("mgmt.example.com.", dns.TypeA, "10.0.0.2") + + var inflight atomic.Int32 + var maxInflight atomic.Int32 + chain.onLookup = func() { + cur := inflight.Add(1) + defer inflight.Add(-1) + for { + prev := maxInflight.Load() + if cur <= prev || maxInflight.CompareAndSwap(prev, cur) { + break + } + } + time.Sleep(50 * time.Millisecond) // hold inflight long enough to collide + } + + r.SetChainResolver(chain, 50) + + q := dns.Question{Name: "mgmt.example.com.", Qtype: dns.TypeA, Qclass: dns.ClassINET} + r.records[q] = &cachedRecord{ + records: []dns.RR{&dns.A{ + Hdr: dns.RR_Header{Name: q.Name, Rrtype: dns.TypeA, Class: dns.ClassINET, Ttl: 60}, + A: net.ParseIP("10.0.0.1").To4(), + }}, + cachedAt: time.Now().Add(-2 * defaultTTL), + } + + var wg sync.WaitGroup + for i := 0; i < 50; i++ { + wg.Add(1) + go func() { + defer wg.Done() + queryA(t, r, "mgmt.example.com.") + }() + } + wg.Wait() + + waitFor(t, 2*time.Second, func() bool { + return inflight.Load() == 0 + }) + + calls := chain.callCount("mgmt.example.com.", dns.TypeA) + assert.LessOrEqual(t, calls, 2, "singleflight must collapse concurrent refreshes (got %d)", calls) + assert.Equal(t, int32(1), maxInflight.Load(), "only one refresh should run concurrently") +} + +func TestResolver_RefreshFailureArmsBackoff(t *testing.T) { + r := NewResolver() + chain := newFakeChain() + chain.err = errors.New("boom") + r.SetChainResolver(chain, 50) + + q := dns.Question{Name: "mgmt.example.com.", Qtype: dns.TypeA, Qclass: dns.ClassINET} + r.records[q] = &cachedRecord{ + records: []dns.RR{&dns.A{ + Hdr: dns.RR_Header{Name: q.Name, Rrtype: dns.TypeA, Class: dns.ClassINET, Ttl: 60}, + A: net.ParseIP("10.0.0.1").To4(), + }}, + cachedAt: time.Now().Add(-2 * defaultTTL), + } + + // First stale hit triggers a refresh attempt that fails. + resp := queryA(t, r, "mgmt.example.com.") + assert.Equal(t, "10.0.0.1", firstA(t, resp), "stale entry served while refresh fails") + + waitFor(t, time.Second, func() bool { + return chain.callCount("mgmt.example.com.", dns.TypeA) == 1 + }) + waitFor(t, time.Second, func() bool { + r.mutex.RLock() + defer r.mutex.RUnlock() + c, ok := r.records[q] + return ok && !c.lastFailedRefresh.IsZero() + }) + + // Subsequent stale hits within backoff window should not schedule more refreshes. + for i := 0; i < 10; i++ { + queryA(t, r, "mgmt.example.com.") + } + time.Sleep(50 * time.Millisecond) + assert.Equal(t, 1, chain.callCount("mgmt.example.com.", dns.TypeA), "backoff must suppress further refreshes") +} + +func TestResolver_NoRootHandler_SkipsChain(t *testing.T) { + r := NewResolver() + chain := newFakeChain() + chain.hasRoot = false + chain.setAnswer("mgmt.example.com.", dns.TypeA, "10.0.0.2") + r.SetChainResolver(chain, 50) + + // With hasRoot=false the chain must not be consulted. Use a short + // deadline so the OS fallback returns quickly without waiting on a + // real network call in CI. + ctx, cancel := context.WithTimeout(context.Background(), 50*time.Millisecond) + defer cancel() + _, _, _, _ = r.lookupBoth(ctx, domain.Domain("mgmt.example.com"), "mgmt.example.com.") + + assert.Equal(t, 0, chain.callCount("mgmt.example.com.", dns.TypeA), + "chain must not be used when no root handler is registered at the bound priority") +} + +func TestResolver_ServeDuringRefreshSetsLoopFlag(t *testing.T) { + // ServeDNS being invoked for a question while a refresh for that question + // is inflight indicates a resolver loop (OS resolver sent the recursive + // query back to us). The inflightRefresh.loopLoggedOnce flag must be set. + r := NewResolver() + + q := dns.Question{Name: "mgmt.example.com.", Qtype: dns.TypeA, Qclass: dns.ClassINET} + r.records[q] = &cachedRecord{ + records: []dns.RR{&dns.A{ + Hdr: dns.RR_Header{Name: q.Name, Rrtype: dns.TypeA, Class: dns.ClassINET, Ttl: 60}, + A: net.ParseIP("10.0.0.1").To4(), + }}, + cachedAt: time.Now(), + } + + // Simulate an inflight refresh. + r.markRefreshing(q) + defer r.clearRefreshing(q) + + resp := queryA(t, r, "mgmt.example.com.") + assert.Equal(t, "10.0.0.1", firstA(t, resp), "stale entry must still be served to avoid breaking external queries") + + r.mutex.RLock() + inflight := r.refreshing[q] + r.mutex.RUnlock() + require.NotNil(t, inflight) + assert.True(t, inflight.Load(), "loop flag must be set once a ServeDNS during refresh was observed") +} + +func TestResolver_LoopFlagOnlyTrippedOncePerRefresh(t *testing.T) { + r := NewResolver() + + q := dns.Question{Name: "mgmt.example.com.", Qtype: dns.TypeA, Qclass: dns.ClassINET} + r.records[q] = &cachedRecord{ + records: []dns.RR{&dns.A{ + Hdr: dns.RR_Header{Name: q.Name, Rrtype: dns.TypeA, Class: dns.ClassINET, Ttl: 60}, + A: net.ParseIP("10.0.0.1").To4(), + }}, + cachedAt: time.Now(), + } + + r.markRefreshing(q) + defer r.clearRefreshing(q) + + // Multiple ServeDNS calls during the same refresh must not re-set the flag + // (CompareAndSwap from false -> true returns true only on the first call). + for range 5 { + queryA(t, r, "mgmt.example.com.") + } + + r.mutex.RLock() + inflight := r.refreshing[q] + r.mutex.RUnlock() + assert.True(t, inflight.Load()) +} + +func TestResolver_NoLoopFlagWhenNotRefreshing(t *testing.T) { + r := NewResolver() + + q := dns.Question{Name: "mgmt.example.com.", Qtype: dns.TypeA, Qclass: dns.ClassINET} + r.records[q] = &cachedRecord{ + records: []dns.RR{&dns.A{ + Hdr: dns.RR_Header{Name: q.Name, Rrtype: dns.TypeA, Class: dns.ClassINET, Ttl: 60}, + A: net.ParseIP("10.0.0.1").To4(), + }}, + cachedAt: time.Now(), + } + + queryA(t, r, "mgmt.example.com.") + + r.mutex.RLock() + _, ok := r.refreshing[q] + r.mutex.RUnlock() + assert.False(t, ok, "no refresh inflight means no loop tracking") +} + +func TestResolver_AddDomain_UsesChainWhenRootRegistered(t *testing.T) { + r := NewResolver() + chain := newFakeChain() + chain.setAnswer("mgmt.example.com.", dns.TypeA, "10.0.0.2") + chain.setAnswer("mgmt.example.com.", dns.TypeAAAA, "fd00::2") + r.SetChainResolver(chain, 50) + + require.NoError(t, r.AddDomain(context.Background(), domain.Domain("mgmt.example.com"))) + + resp := queryA(t, r, "mgmt.example.com.") + assert.Equal(t, "10.0.0.2", firstA(t, resp)) + assert.Equal(t, 1, chain.callCount("mgmt.example.com.", dns.TypeA)) + assert.Equal(t, 1, chain.callCount("mgmt.example.com.", dns.TypeAAAA)) +} diff --git a/client/internal/dns/mgmt/mgmt_test.go b/client/internal/dns/mgmt/mgmt_test.go index 9e8a746f3..276cbba0a 100644 --- a/client/internal/dns/mgmt/mgmt_test.go +++ b/client/internal/dns/mgmt/mgmt_test.go @@ -6,6 +6,7 @@ import ( "net/url" "strings" "testing" + "time" "github.com/miekg/dns" "github.com/stretchr/testify/assert" @@ -23,6 +24,60 @@ func TestResolver_NewResolver(t *testing.T) { assert.False(t, resolver.MatchSubdomains()) } +func TestResolveCacheTTL(t *testing.T) { + tests := []struct { + name string + value string + want time.Duration + }{ + {"unset falls back to default", "", defaultTTL}, + {"valid duration", "45s", 45 * time.Second}, + {"valid minutes", "2m", 2 * time.Minute}, + {"malformed falls back to default", "not-a-duration", defaultTTL}, + {"zero falls back to default", "0s", defaultTTL}, + {"negative falls back to default", "-5s", defaultTTL}, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + t.Setenv(envMgmtCacheTTL, tc.value) + got := resolveCacheTTL() + assert.Equal(t, tc.want, got, "parsed TTL should match") + }) + } +} + +func TestNewResolver_CacheTTLFromEnv(t *testing.T) { + t.Setenv(envMgmtCacheTTL, "7s") + r := NewResolver() + assert.Equal(t, 7*time.Second, r.cacheTTL, "NewResolver should evaluate cacheTTL once from env") +} + +func TestResolver_ResponseTTL(t *testing.T) { + now := time.Now() + tests := []struct { + name string + cacheTTL time.Duration + cachedAt time.Time + wantMin uint32 + wantMax uint32 + }{ + {"fresh entry returns full TTL", 60 * time.Second, now, 59, 60}, + {"half-aged entry returns half TTL", 60 * time.Second, now.Add(-30 * time.Second), 29, 31}, + {"expired entry returns zero", 60 * time.Second, now.Add(-61 * time.Second), 0, 0}, + {"exactly expired returns zero", 10 * time.Second, now.Add(-10 * time.Second), 0, 0}, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + r := &Resolver{cacheTTL: tc.cacheTTL} + got := r.responseTTL(tc.cachedAt) + assert.GreaterOrEqual(t, got, tc.wantMin, "remaining TTL should be >= wantMin") + assert.LessOrEqual(t, got, tc.wantMax, "remaining TTL should be <= wantMax") + }) + } +} + func TestResolver_ExtractDomainFromURL(t *testing.T) { tests := []struct { name string diff --git a/client/internal/dns/server.go b/client/internal/dns/server.go index f7865047b..d4f54dec5 100644 --- a/client/internal/dns/server.go +++ b/client/internal/dns/server.go @@ -212,6 +212,7 @@ func newDefaultServer( ctx, stop := context.WithCancel(ctx) mgmtCacheResolver := mgmt.NewResolver() + mgmtCacheResolver.SetChainResolver(handlerChain, PriorityUpstream) defaultServer := &DefaultServer{ ctx: ctx, From 5da05ecca655831e99bd348316a1b10941ef4c24 Mon Sep 17 00:00:00 2001 From: Zoltan Papp Date: Wed, 22 Apr 2026 20:54:18 +0200 Subject: [PATCH 18/25] [client] increase gRPC health check timeout to 5s (#5961) Bump the IsHealthy() context timeout from 1s to 5s for both the management and signal gRPC clients to reduce false negatives on slower or congested connections. --- shared/management/client/grpc.go | 4 +++- shared/signal/client/grpc.go | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/shared/management/client/grpc.go b/shared/management/client/grpc.go index a01e51abc..e9bea7ffb 100644 --- a/shared/management/client/grpc.go +++ b/shared/management/client/grpc.go @@ -30,6 +30,8 @@ import ( const ConnectTimeout = 10 * time.Second +const healthCheckTimeout = 5 * time.Second + const ( // EnvMaxRecvMsgSize overrides the default gRPC max receive message size (4 MB) // for the management client connection. Value is in bytes. @@ -532,7 +534,7 @@ func (c *GrpcClient) IsHealthy() bool { case connectivity.Ready: } - ctx, cancel := context.WithTimeout(c.ctx, 1*time.Second) + ctx, cancel := context.WithTimeout(c.ctx, healthCheckTimeout) defer cancel() _, err := c.realClient.GetServerKey(ctx, &proto.Empty{}) diff --git a/shared/signal/client/grpc.go b/shared/signal/client/grpc.go index 5368b57a2..d0f598dd7 100644 --- a/shared/signal/client/grpc.go +++ b/shared/signal/client/grpc.go @@ -23,6 +23,8 @@ import ( "github.com/netbirdio/netbird/util/wsproxy" ) +const healthCheckTimeout = 5 * time.Second + // ConnStateNotifier is a wrapper interface of the status recorder type ConnStateNotifier interface { MarkSignalDisconnected(error) @@ -263,7 +265,7 @@ func (c *GrpcClient) IsHealthy() bool { case connectivity.Ready: } - ctx, cancel := context.WithTimeout(c.ctx, 1*time.Second) + ctx, cancel := context.WithTimeout(c.ctx, healthCheckTimeout) defer cancel() _, err := c.realClient.Send(ctx, &proto.EncryptedMessage{ Key: c.key.PublicKey().String(), From b6038e8acda35545ff551847458bd191f2f008c3 Mon Sep 17 00:00:00 2001 From: Vlad <4941176+crn4@users.noreply.github.com> Date: Thu, 23 Apr 2026 15:13:22 +0200 Subject: [PATCH 19/25] [management] refactor: changeable pat rate limiting (#5946) --- management/internals/server/boot.go | 12 +- management/server/http/handler.go | 44 +---- .../server/http/middleware/auth_middleware.go | 13 +- .../http/middleware/auth_middleware_test.go | 22 ++- .../server/http/middleware/rate_limiter.go | 97 ++++++++++ .../http/middleware/rate_limiter_test.go | 171 ++++++++++++++++++ .../testing/testing_tools/channel/channel.go | 4 +- 7 files changed, 304 insertions(+), 59 deletions(-) diff --git a/management/internals/server/boot.go b/management/internals/server/boot.go index 24dfb641b..2b40c0aad 100644 --- a/management/internals/server/boot.go +++ b/management/internals/server/boot.go @@ -30,6 +30,7 @@ import ( nbcache "github.com/netbirdio/netbird/management/server/cache" nbContext "github.com/netbirdio/netbird/management/server/context" nbhttp "github.com/netbirdio/netbird/management/server/http" + "github.com/netbirdio/netbird/management/server/http/middleware" "github.com/netbirdio/netbird/management/server/store" "github.com/netbirdio/netbird/management/server/telemetry" mgmtProto "github.com/netbirdio/netbird/shared/management/proto" @@ -109,7 +110,7 @@ func (s *BaseServer) EventStore() activity.Store { func (s *BaseServer) APIHandler() http.Handler { return Create(s, func() http.Handler { - httpAPIHandler, err := nbhttp.NewAPIHandler(context.Background(), s.AccountManager(), s.NetworksManager(), s.ResourcesManager(), s.RoutesManager(), s.GroupsManager(), s.GeoLocationManager(), s.AuthManager(), s.Metrics(), s.IntegratedValidator(), s.ProxyController(), s.PermissionsManager(), s.PeersManager(), s.SettingsManager(), s.ZonesManager(), s.RecordsManager(), s.NetworkMapController(), s.IdpManager(), s.ServiceManager(), s.ReverseProxyDomainManager(), s.AccessLogsManager(), s.ReverseProxyGRPCServer(), s.Config.ReverseProxy.TrustedHTTPProxies) + httpAPIHandler, err := nbhttp.NewAPIHandler(context.Background(), s.AccountManager(), s.NetworksManager(), s.ResourcesManager(), s.RoutesManager(), s.GroupsManager(), s.GeoLocationManager(), s.AuthManager(), s.Metrics(), s.IntegratedValidator(), s.ProxyController(), s.PermissionsManager(), s.PeersManager(), s.SettingsManager(), s.ZonesManager(), s.RecordsManager(), s.NetworkMapController(), s.IdpManager(), s.ServiceManager(), s.ReverseProxyDomainManager(), s.AccessLogsManager(), s.ReverseProxyGRPCServer(), s.Config.ReverseProxy.TrustedHTTPProxies, s.RateLimiter()) if err != nil { log.Fatalf("failed to create API handler: %v", err) } @@ -117,6 +118,15 @@ func (s *BaseServer) APIHandler() http.Handler { }) } +func (s *BaseServer) RateLimiter() *middleware.APIRateLimiter { + return Create(s, func() *middleware.APIRateLimiter { + cfg, enabled := middleware.RateLimiterConfigFromEnv() + limiter := middleware.NewAPIRateLimiter(cfg) + limiter.SetEnabled(enabled) + return limiter + }) +} + func (s *BaseServer) GRPCServer() *grpc.Server { return Create(s, func() *grpc.Server { trustedPeers := s.Config.ReverseProxy.TrustedPeers diff --git a/management/server/http/handler.go b/management/server/http/handler.go index ad36b9d46..56b2d8203 100644 --- a/management/server/http/handler.go +++ b/management/server/http/handler.go @@ -5,9 +5,6 @@ import ( "fmt" "net/http" "net/netip" - "os" - "strconv" - "time" "github.com/gorilla/mux" "github.com/rs/cors" @@ -66,14 +63,11 @@ import ( ) const ( - apiPrefix = "/api" - rateLimitingEnabledKey = "NB_API_RATE_LIMITING_ENABLED" - rateLimitingBurstKey = "NB_API_RATE_LIMITING_BURST" - rateLimitingRPMKey = "NB_API_RATE_LIMITING_RPM" + apiPrefix = "/api" ) // NewAPIHandler creates the Management service HTTP API handler registering all the available endpoints. -func NewAPIHandler(ctx context.Context, accountManager account.Manager, networksManager nbnetworks.Manager, resourceManager resources.Manager, routerManager routers.Manager, groupsManager nbgroups.Manager, LocationManager geolocation.Geolocation, authManager auth.Manager, appMetrics telemetry.AppMetrics, integratedValidator integrated_validator.IntegratedValidator, proxyController port_forwarding.Controller, permissionsManager permissions.Manager, peersManager nbpeers.Manager, settingsManager settings.Manager, zManager zones.Manager, rManager records.Manager, networkMapController network_map.Controller, idpManager idpmanager.Manager, serviceManager service.Manager, reverseProxyDomainManager *manager.Manager, reverseProxyAccessLogsManager accesslogs.Manager, proxyGRPCServer *nbgrpc.ProxyServiceServer, trustedHTTPProxies []netip.Prefix) (http.Handler, error) { +func NewAPIHandler(ctx context.Context, accountManager account.Manager, networksManager nbnetworks.Manager, resourceManager resources.Manager, routerManager routers.Manager, groupsManager nbgroups.Manager, LocationManager geolocation.Geolocation, authManager auth.Manager, appMetrics telemetry.AppMetrics, integratedValidator integrated_validator.IntegratedValidator, proxyController port_forwarding.Controller, permissionsManager permissions.Manager, peersManager nbpeers.Manager, settingsManager settings.Manager, zManager zones.Manager, rManager records.Manager, networkMapController network_map.Controller, idpManager idpmanager.Manager, serviceManager service.Manager, reverseProxyDomainManager *manager.Manager, reverseProxyAccessLogsManager accesslogs.Manager, proxyGRPCServer *nbgrpc.ProxyServiceServer, trustedHTTPProxies []netip.Prefix, rateLimiter *middleware.APIRateLimiter) (http.Handler, error) { // Register bypass paths for unauthenticated endpoints if err := bypass.AddBypassPath("/api/instance"); err != nil { @@ -94,34 +88,10 @@ func NewAPIHandler(ctx context.Context, accountManager account.Manager, networks return nil, fmt.Errorf("failed to add bypass path: %w", err) } - var rateLimitingConfig *middleware.RateLimiterConfig - if os.Getenv(rateLimitingEnabledKey) == "true" { - rpm := 6 - if v := os.Getenv(rateLimitingRPMKey); v != "" { - value, err := strconv.Atoi(v) - if err != nil { - log.Warnf("parsing %s env var: %v, using default %d", rateLimitingRPMKey, err, rpm) - } else { - rpm = value - } - } - - burst := 500 - if v := os.Getenv(rateLimitingBurstKey); v != "" { - value, err := strconv.Atoi(v) - if err != nil { - log.Warnf("parsing %s env var: %v, using default %d", rateLimitingBurstKey, err, burst) - } else { - burst = value - } - } - - rateLimitingConfig = &middleware.RateLimiterConfig{ - RequestsPerMinute: float64(rpm), - Burst: burst, - CleanupInterval: 6 * time.Hour, - LimiterTTL: 24 * time.Hour, - } + if rateLimiter == nil { + log.Warn("NewAPIHandler: nil rate limiter, rate limiting disabled") + rateLimiter = middleware.NewAPIRateLimiter(nil) + rateLimiter.SetEnabled(false) } authMiddleware := middleware.NewAuthMiddleware( @@ -129,7 +99,7 @@ func NewAPIHandler(ctx context.Context, accountManager account.Manager, networks accountManager.GetAccountIDFromUserAuth, accountManager.SyncUserJWTGroups, accountManager.GetUserFromUserAuth, - rateLimitingConfig, + rateLimiter, appMetrics.GetMeter(), ) diff --git a/management/server/http/middleware/auth_middleware.go b/management/server/http/middleware/auth_middleware.go index 8106380f2..6d075d9c2 100644 --- a/management/server/http/middleware/auth_middleware.go +++ b/management/server/http/middleware/auth_middleware.go @@ -43,14 +43,9 @@ func NewAuthMiddleware( ensureAccount EnsureAccountFunc, syncUserJWTGroups SyncUserJWTGroupsFunc, getUserFromUserAuth GetUserFromUserAuthFunc, - rateLimiterConfig *RateLimiterConfig, + rateLimiter *APIRateLimiter, meter metric.Meter, ) *AuthMiddleware { - var rateLimiter *APIRateLimiter - if rateLimiterConfig != nil { - rateLimiter = NewAPIRateLimiter(rateLimiterConfig) - } - var patUsageTracker *PATUsageTracker if meter != nil { var err error @@ -181,10 +176,8 @@ func (m *AuthMiddleware) checkPATFromRequest(r *http.Request, authHeaderParts [] m.patUsageTracker.IncrementUsage(token) } - if m.rateLimiter != nil && !isTerraformRequest(r) { - if !m.rateLimiter.Allow(token) { - return status.Errorf(status.TooManyRequests, "too many requests") - } + if !isTerraformRequest(r) && !m.rateLimiter.Allow(token) { + return status.Errorf(status.TooManyRequests, "too many requests") } ctx := r.Context() diff --git a/management/server/http/middleware/auth_middleware_test.go b/management/server/http/middleware/auth_middleware_test.go index f397c63a4..8f736fbfd 100644 --- a/management/server/http/middleware/auth_middleware_test.go +++ b/management/server/http/middleware/auth_middleware_test.go @@ -196,6 +196,8 @@ func TestAuthMiddleware_Handler(t *testing.T) { GetPATInfoFunc: mockGetAccountInfoFromPAT, } + disabledLimiter := NewAPIRateLimiter(nil) + disabledLimiter.SetEnabled(false) authMiddleware := NewAuthMiddleware( mockAuth, func(ctx context.Context, userAuth nbauth.UserAuth) (string, string, error) { @@ -207,7 +209,7 @@ func TestAuthMiddleware_Handler(t *testing.T) { func(ctx context.Context, userAuth nbauth.UserAuth) (*types.User, error) { return &types.User{}, nil }, - nil, + disabledLimiter, nil, ) @@ -266,7 +268,7 @@ func TestAuthMiddleware_RateLimiting(t *testing.T) { func(ctx context.Context, userAuth nbauth.UserAuth) (*types.User, error) { return &types.User{}, nil }, - rateLimitConfig, + NewAPIRateLimiter(rateLimitConfig), nil, ) @@ -318,7 +320,7 @@ func TestAuthMiddleware_RateLimiting(t *testing.T) { func(ctx context.Context, userAuth nbauth.UserAuth) (*types.User, error) { return &types.User{}, nil }, - rateLimitConfig, + NewAPIRateLimiter(rateLimitConfig), nil, ) @@ -361,7 +363,7 @@ func TestAuthMiddleware_RateLimiting(t *testing.T) { func(ctx context.Context, userAuth nbauth.UserAuth) (*types.User, error) { return &types.User{}, nil }, - rateLimitConfig, + NewAPIRateLimiter(rateLimitConfig), nil, ) @@ -405,7 +407,7 @@ func TestAuthMiddleware_RateLimiting(t *testing.T) { func(ctx context.Context, userAuth nbauth.UserAuth) (*types.User, error) { return &types.User{}, nil }, - rateLimitConfig, + NewAPIRateLimiter(rateLimitConfig), nil, ) @@ -469,7 +471,7 @@ func TestAuthMiddleware_RateLimiting(t *testing.T) { func(ctx context.Context, userAuth nbauth.UserAuth) (*types.User, error) { return &types.User{}, nil }, - rateLimitConfig, + NewAPIRateLimiter(rateLimitConfig), nil, ) @@ -528,7 +530,7 @@ func TestAuthMiddleware_RateLimiting(t *testing.T) { func(ctx context.Context, userAuth nbauth.UserAuth) (*types.User, error) { return &types.User{}, nil }, - rateLimitConfig, + NewAPIRateLimiter(rateLimitConfig), nil, ) @@ -583,7 +585,7 @@ func TestAuthMiddleware_RateLimiting(t *testing.T) { func(ctx context.Context, userAuth nbauth.UserAuth) (*types.User, error) { return &types.User{}, nil }, - rateLimitConfig, + NewAPIRateLimiter(rateLimitConfig), nil, ) @@ -670,6 +672,8 @@ func TestAuthMiddleware_Handler_Child(t *testing.T) { GetPATInfoFunc: mockGetAccountInfoFromPAT, } + disabledLimiter := NewAPIRateLimiter(nil) + disabledLimiter.SetEnabled(false) authMiddleware := NewAuthMiddleware( mockAuth, func(ctx context.Context, userAuth nbauth.UserAuth) (string, string, error) { @@ -681,7 +685,7 @@ func TestAuthMiddleware_Handler_Child(t *testing.T) { func(ctx context.Context, userAuth nbauth.UserAuth) (*types.User, error) { return &types.User{}, nil }, - nil, + disabledLimiter, nil, ) diff --git a/management/server/http/middleware/rate_limiter.go b/management/server/http/middleware/rate_limiter.go index 936b34319..bfd44afee 100644 --- a/management/server/http/middleware/rate_limiter.go +++ b/management/server/http/middleware/rate_limiter.go @@ -4,14 +4,27 @@ import ( "context" "net" "net/http" + "os" + "strconv" "sync" + "sync/atomic" "time" + log "github.com/sirupsen/logrus" "golang.org/x/time/rate" "github.com/netbirdio/netbird/shared/management/http/util" ) +const ( + RateLimitingEnabledEnv = "NB_API_RATE_LIMITING_ENABLED" + RateLimitingBurstEnv = "NB_API_RATE_LIMITING_BURST" + RateLimitingRPMEnv = "NB_API_RATE_LIMITING_RPM" + + defaultAPIRPM = 6 + defaultAPIBurst = 500 +) + // RateLimiterConfig holds configuration for the API rate limiter type RateLimiterConfig struct { // RequestsPerMinute defines the rate at which tokens are replenished @@ -34,6 +47,43 @@ func DefaultRateLimiterConfig() *RateLimiterConfig { } } +func RateLimiterConfigFromEnv() (cfg *RateLimiterConfig, enabled bool) { + rpm := defaultAPIRPM + if v := os.Getenv(RateLimitingRPMEnv); v != "" { + value, err := strconv.Atoi(v) + if err != nil { + log.Warnf("parsing %s env var: %v, using default %d", RateLimitingRPMEnv, err, rpm) + } else { + rpm = value + } + } + if rpm <= 0 { + log.Warnf("%s=%d is non-positive, using default %d", RateLimitingRPMEnv, rpm, defaultAPIRPM) + rpm = defaultAPIRPM + } + + burst := defaultAPIBurst + if v := os.Getenv(RateLimitingBurstEnv); v != "" { + value, err := strconv.Atoi(v) + if err != nil { + log.Warnf("parsing %s env var: %v, using default %d", RateLimitingBurstEnv, err, burst) + } else { + burst = value + } + } + if burst <= 0 { + log.Warnf("%s=%d is non-positive, using default %d", RateLimitingBurstEnv, burst, defaultAPIBurst) + burst = defaultAPIBurst + } + + return &RateLimiterConfig{ + RequestsPerMinute: float64(rpm), + Burst: burst, + CleanupInterval: 6 * time.Hour, + LimiterTTL: 24 * time.Hour, + }, os.Getenv(RateLimitingEnabledEnv) == "true" +} + // limiterEntry holds a rate limiter and its last access time type limiterEntry struct { limiter *rate.Limiter @@ -46,6 +96,7 @@ type APIRateLimiter struct { limiters map[string]*limiterEntry mu sync.RWMutex stopChan chan struct{} + enabled atomic.Bool } // NewAPIRateLimiter creates a new API rate limiter with the given configuration @@ -59,14 +110,53 @@ func NewAPIRateLimiter(config *RateLimiterConfig) *APIRateLimiter { limiters: make(map[string]*limiterEntry), stopChan: make(chan struct{}), } + rl.enabled.Store(true) go rl.cleanupLoop() return rl } +func (rl *APIRateLimiter) SetEnabled(enabled bool) { + rl.enabled.Store(enabled) +} + +func (rl *APIRateLimiter) Enabled() bool { + return rl.enabled.Load() +} + +func (rl *APIRateLimiter) UpdateConfig(config *RateLimiterConfig) { + if config == nil { + return + } + if config.RequestsPerMinute <= 0 || config.Burst <= 0 { + log.Warnf("UpdateConfig: ignoring invalid rpm=%v burst=%d", config.RequestsPerMinute, config.Burst) + return + } + + newRPS := rate.Limit(config.RequestsPerMinute / 60.0) + newBurst := config.Burst + + rl.mu.Lock() + rl.config.RequestsPerMinute = config.RequestsPerMinute + rl.config.Burst = newBurst + snapshot := make([]*rate.Limiter, 0, len(rl.limiters)) + for _, entry := range rl.limiters { + snapshot = append(snapshot, entry.limiter) + } + rl.mu.Unlock() + + for _, l := range snapshot { + l.SetLimit(newRPS) + l.SetBurst(newBurst) + } +} + // Allow checks if a request for the given key (token) is allowed func (rl *APIRateLimiter) Allow(key string) bool { + if !rl.enabled.Load() { + return true + } limiter := rl.getLimiter(key) return limiter.Allow() } @@ -74,6 +164,9 @@ func (rl *APIRateLimiter) Allow(key string) bool { // Wait blocks until the rate limiter allows another request for the given key // Returns an error if the context is canceled func (rl *APIRateLimiter) Wait(ctx context.Context, key string) error { + if !rl.enabled.Load() { + return nil + } limiter := rl.getLimiter(key) return limiter.Wait(ctx) } @@ -153,6 +246,10 @@ func (rl *APIRateLimiter) Reset(key string) { // Returns 429 Too Many Requests if the rate limit is exceeded. func (rl *APIRateLimiter) Middleware(next http.Handler) http.Handler { return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if !rl.enabled.Load() { + next.ServeHTTP(w, r) + return + } clientIP := getClientIP(r) if !rl.Allow(clientIP) { util.WriteErrorResponse("rate limit exceeded, please try again later", http.StatusTooManyRequests, w) diff --git a/management/server/http/middleware/rate_limiter_test.go b/management/server/http/middleware/rate_limiter_test.go index 68f804e57..4b97d1874 100644 --- a/management/server/http/middleware/rate_limiter_test.go +++ b/management/server/http/middleware/rate_limiter_test.go @@ -1,8 +1,10 @@ package middleware import ( + "fmt" "net/http" "net/http/httptest" + "sync" "testing" "time" @@ -156,3 +158,172 @@ func TestAPIRateLimiter_Reset(t *testing.T) { // Should be allowed again assert.True(t, rl.Allow("test-key")) } + +func TestAPIRateLimiter_SetEnabled(t *testing.T) { + rl := NewAPIRateLimiter(&RateLimiterConfig{ + RequestsPerMinute: 60, + Burst: 1, + CleanupInterval: time.Minute, + LimiterTTL: time.Minute, + }) + defer rl.Stop() + + assert.True(t, rl.Allow("key")) + assert.False(t, rl.Allow("key"), "burst exhausted while enabled") + + rl.SetEnabled(false) + assert.False(t, rl.Enabled()) + for i := 0; i < 5; i++ { + assert.True(t, rl.Allow("key"), "disabled limiter must always allow") + } + + rl.SetEnabled(true) + assert.True(t, rl.Enabled()) + assert.False(t, rl.Allow("key"), "re-enabled limiter retains prior bucket state") +} + +func TestAPIRateLimiter_UpdateConfig(t *testing.T) { + rl := NewAPIRateLimiter(&RateLimiterConfig{ + RequestsPerMinute: 60, + Burst: 2, + CleanupInterval: time.Minute, + LimiterTTL: time.Minute, + }) + defer rl.Stop() + + assert.True(t, rl.Allow("k1")) + assert.True(t, rl.Allow("k1")) + assert.False(t, rl.Allow("k1"), "burst=2 exhausted") + + rl.UpdateConfig(&RateLimiterConfig{ + RequestsPerMinute: 60, + Burst: 10, + CleanupInterval: time.Minute, + LimiterTTL: time.Minute, + }) + + // New burst applies to existing keys in place; bucket refills up to new burst over time, + // but importantly newly-added keys use the updated config immediately. + assert.True(t, rl.Allow("k2")) + for i := 0; i < 9; i++ { + assert.True(t, rl.Allow("k2")) + } + assert.False(t, rl.Allow("k2"), "new burst=10 exhausted") +} + +func TestAPIRateLimiter_UpdateConfig_NilIgnored(t *testing.T) { + rl := NewAPIRateLimiter(&RateLimiterConfig{ + RequestsPerMinute: 60, + Burst: 1, + CleanupInterval: time.Minute, + LimiterTTL: time.Minute, + }) + defer rl.Stop() + + rl.UpdateConfig(nil) // must not panic or zero the config + + assert.True(t, rl.Allow("k")) + assert.False(t, rl.Allow("k")) +} + +func TestAPIRateLimiter_UpdateConfig_NonPositiveIgnored(t *testing.T) { + rl := NewAPIRateLimiter(&RateLimiterConfig{ + RequestsPerMinute: 60, + Burst: 1, + CleanupInterval: time.Minute, + LimiterTTL: time.Minute, + }) + defer rl.Stop() + + assert.True(t, rl.Allow("k")) + assert.False(t, rl.Allow("k")) + + rl.UpdateConfig(&RateLimiterConfig{RequestsPerMinute: 0, Burst: 0, CleanupInterval: time.Minute, LimiterTTL: time.Minute}) + rl.UpdateConfig(&RateLimiterConfig{RequestsPerMinute: -1, Burst: 5, CleanupInterval: time.Minute, LimiterTTL: time.Minute}) + rl.UpdateConfig(&RateLimiterConfig{RequestsPerMinute: 60, Burst: -1, CleanupInterval: time.Minute, LimiterTTL: time.Minute}) + + rl.Reset("k") + assert.True(t, rl.Allow("k")) + assert.False(t, rl.Allow("k"), "burst should still be 1 — invalid UpdateConfig calls were ignored") +} + +func TestAPIRateLimiter_ConcurrentAllowAndUpdate(t *testing.T) { + rl := NewAPIRateLimiter(&RateLimiterConfig{ + RequestsPerMinute: 600, + Burst: 10, + CleanupInterval: time.Minute, + LimiterTTL: time.Minute, + }) + defer rl.Stop() + + var wg sync.WaitGroup + stop := make(chan struct{}) + + for i := 0; i < 8; i++ { + wg.Add(1) + go func(id int) { + defer wg.Done() + key := fmt.Sprintf("k%d", id) + for { + select { + case <-stop: + return + default: + rl.Allow(key) + } + } + }(i) + } + + wg.Add(1) + go func() { + defer wg.Done() + for i := 0; i < 200; i++ { + select { + case <-stop: + return + default: + rl.UpdateConfig(&RateLimiterConfig{ + RequestsPerMinute: float64(30 + (i % 90)), + Burst: 1 + (i % 20), + CleanupInterval: time.Minute, + LimiterTTL: time.Minute, + }) + rl.SetEnabled(i%2 == 0) + } + } + }() + + time.Sleep(100 * time.Millisecond) + close(stop) + wg.Wait() +} + +func TestRateLimiterConfigFromEnv(t *testing.T) { + t.Setenv(RateLimitingEnabledEnv, "true") + t.Setenv(RateLimitingRPMEnv, "42") + t.Setenv(RateLimitingBurstEnv, "7") + + cfg, enabled := RateLimiterConfigFromEnv() + assert.True(t, enabled) + assert.Equal(t, float64(42), cfg.RequestsPerMinute) + assert.Equal(t, 7, cfg.Burst) + + t.Setenv(RateLimitingEnabledEnv, "false") + _, enabled = RateLimiterConfigFromEnv() + assert.False(t, enabled) + + t.Setenv(RateLimitingEnabledEnv, "") + t.Setenv(RateLimitingRPMEnv, "") + t.Setenv(RateLimitingBurstEnv, "") + cfg, enabled = RateLimiterConfigFromEnv() + assert.False(t, enabled) + assert.Equal(t, float64(defaultAPIRPM), cfg.RequestsPerMinute) + assert.Equal(t, defaultAPIBurst, cfg.Burst) + + t.Setenv(RateLimitingRPMEnv, "0") + t.Setenv(RateLimitingBurstEnv, "-5") + cfg, _ = RateLimiterConfigFromEnv() + assert.Equal(t, float64(defaultAPIRPM), cfg.RequestsPerMinute, "non-positive rpm must fall back to default") + assert.Equal(t, defaultAPIBurst, cfg.Burst, "non-positive burst must fall back to default") +} diff --git a/management/server/http/testing/testing_tools/channel/channel.go b/management/server/http/testing/testing_tools/channel/channel.go index 0203d6177..1a8b83c7e 100644 --- a/management/server/http/testing/testing_tools/channel/channel.go +++ b/management/server/http/testing/testing_tools/channel/channel.go @@ -135,7 +135,7 @@ func BuildApiBlackBoxWithDBState(t testing_tools.TB, sqlFile string, expectedPee customZonesManager := zonesManager.NewManager(store, am, permissionsManager, "") zoneRecordsManager := recordsManager.NewManager(store, am, permissionsManager) - apiHandler, err := http2.NewAPIHandler(context.Background(), am, networksManager, resourcesManager, routersManager, groupsManager, geoMock, authManagerMock, metrics, validatorMock, proxyController, permissionsManager, peersManager, settingsManager, customZonesManager, zoneRecordsManager, networkMapController, nil, serviceManager, nil, nil, nil, nil) + apiHandler, err := http2.NewAPIHandler(context.Background(), am, networksManager, resourcesManager, routersManager, groupsManager, geoMock, authManagerMock, metrics, validatorMock, proxyController, permissionsManager, peersManager, settingsManager, customZonesManager, zoneRecordsManager, networkMapController, nil, serviceManager, nil, nil, nil, nil, nil) if err != nil { t.Fatalf("Failed to create API handler: %v", err) } @@ -264,7 +264,7 @@ func BuildApiBlackBoxWithDBStateAndPeerChannel(t testing_tools.TB, sqlFile strin customZonesManager := zonesManager.NewManager(store, am, permissionsManager, "") zoneRecordsManager := recordsManager.NewManager(store, am, permissionsManager) - apiHandler, err := http2.NewAPIHandler(context.Background(), am, networksManager, resourcesManager, routersManager, groupsManager, geoMock, authManagerMock, metrics, validatorMock, proxyController, permissionsManager, peersManager, settingsManager, customZonesManager, zoneRecordsManager, networkMapController, nil, serviceManager, nil, nil, nil, nil) + apiHandler, err := http2.NewAPIHandler(context.Background(), am, networksManager, resourcesManager, routersManager, groupsManager, geoMock, authManagerMock, metrics, validatorMock, proxyController, permissionsManager, peersManager, settingsManager, customZonesManager, zoneRecordsManager, networkMapController, nil, serviceManager, nil, nil, nil, nil, nil) if err != nil { t.Fatalf("Failed to create API handler: %v", err) } From fa0d58d093a883ddd6183c52e1b92e0888b2bafd Mon Sep 17 00:00:00 2001 From: Pascal Fischer <32096965+pascal-fischer@users.noreply.github.com> Date: Thu, 23 Apr 2026 16:01:54 +0200 Subject: [PATCH 20/25] [management] exclude peers for expiration job that have already been marked expired (#5970) --- management/server/account_test.go | 23 +++++++++++ management/server/management_proto_test.go | 4 +- management/server/peer.go | 4 ++ management/server/store/sql_store.go | 2 +- management/server/store/sql_store_test.go | 40 ++++++++++++++----- .../testdata/store_with_expired_peers.sql | 1 + 6 files changed, 62 insertions(+), 12 deletions(-) diff --git a/management/server/account_test.go b/management/server/account_test.go index 4453d064e..bcc73d52f 100644 --- a/management/server/account_test.go +++ b/management/server/account_test.go @@ -2311,6 +2311,29 @@ func TestAccount_GetExpiredPeers(t *testing.T) { } } +func TestGetExpiredPeers_SkipsAlreadyExpired(t *testing.T) { + ctx := context.Background() + + testStore, cleanUp, err := store.NewTestStoreFromSQL(ctx, "testdata/store_with_expired_peers.sql", t.TempDir()) + t.Cleanup(cleanUp) + require.NoError(t, err) + + accountID := "bf1c8084-ba50-4ce7-9439-34653001fc3b" + + // Verify the already-expired peer is excluded at the store level + peers, err := testStore.GetAccountPeersWithExpiration(ctx, store.LockingStrengthNone, accountID) + require.NoError(t, err) + + for _, peer := range peers { + assert.NotEqual(t, "cg05lnblo1hkg2j514p0", peer.ID, "already expired peer should be excluded by the store query") + assert.False(t, peer.Status.LoginExpired, "returned peers should not already be marked as login expired") + } + + // Only the non-expired peer with expiration enabled should be returned + require.Len(t, peers, 1) + assert.Equal(t, "notexpired01", peers[0].ID) +} + func TestAccount_GetInactivePeers(t *testing.T) { type test struct { name string diff --git a/management/server/management_proto_test.go b/management/server/management_proto_test.go index 4e6eb0a33..18d85315d 100644 --- a/management/server/management_proto_test.go +++ b/management/server/management_proto_test.go @@ -267,8 +267,8 @@ func Test_SyncProtocol(t *testing.T) { } // expired peers come separately. - if len(networkMap.GetOfflinePeers()) != 1 { - t.Fatal("expecting SyncResponse to have NetworkMap with 1 offline peer") + if len(networkMap.GetOfflinePeers()) != 2 { + t.Fatal("expecting SyncResponse to have NetworkMap with 2 offline peer") } expiredPeerPubKey := "RlSy2vzoG2HyMBTUImXOiVhCBiiBa5qD5xzMxkiFDW4=" diff --git a/management/server/peer.go b/management/server/peer.go index a02e34e0d..a95ae17a3 100644 --- a/management/server/peer.go +++ b/management/server/peer.go @@ -1405,6 +1405,10 @@ func (am *DefaultAccountManager) getExpiredPeers(ctx context.Context, accountID var peers []*nbpeer.Peer for _, peer := range peersWithExpiry { + if peer.Status.LoginExpired { + continue + } + expired, _ := peer.LoginExpired(settings.PeerLoginExpiration) if expired { peers = append(peers, peer) diff --git a/management/server/store/sql_store.go b/management/server/store/sql_store.go index 8189548b7..0ff57b752 100644 --- a/management/server/store/sql_store.go +++ b/management/server/store/sql_store.go @@ -3310,7 +3310,7 @@ func (s *SqlStore) GetAccountPeersWithExpiration(ctx context.Context, lockStreng var peers []*nbpeer.Peer result := tx. - Where("login_expiration_enabled = ? AND user_id IS NOT NULL AND user_id != ''", true). + Where("login_expiration_enabled = ? AND peer_status_login_expired != ? AND user_id IS NOT NULL AND user_id != ''", true, true). Find(&peers, accountIDCondition, accountID) if err := result.Error; err != nil { log.WithContext(ctx).Errorf("failed to get peers with expiration from the store: %s", result.Error) diff --git a/management/server/store/sql_store_test.go b/management/server/store/sql_store_test.go index 8ea6c2ae5..5a5616abc 100644 --- a/management/server/store/sql_store_test.go +++ b/management/server/store/sql_store_test.go @@ -2729,7 +2729,7 @@ func TestSqlStore_GetAccountPeers(t *testing.T) { { name: "should retrieve peers for an existing account ID", accountID: "bf1c8084-ba50-4ce7-9439-34653001fc3b", - expectedCount: 4, + expectedCount: 5, }, { name: "should return no peers for a non-existing account ID", @@ -2751,7 +2751,7 @@ func TestSqlStore_GetAccountPeers(t *testing.T) { name: "should filter peers by partial name", accountID: "bf1c8084-ba50-4ce7-9439-34653001fc3b", nameFilter: "host", - expectedCount: 3, + expectedCount: 4, }, { name: "should filter peers by ip", @@ -2777,14 +2777,16 @@ func TestSqlStore_GetAccountPeersWithExpiration(t *testing.T) { require.NoError(t, err) tests := []struct { - name string - accountID string - expectedCount int + name string + accountID string + expectedCount int + expectedPeerIDs []string }{ { - name: "should retrieve peers with expiration for an existing account ID", - accountID: "bf1c8084-ba50-4ce7-9439-34653001fc3b", - expectedCount: 1, + name: "should retrieve only non-expired peers with expiration enabled", + accountID: "bf1c8084-ba50-4ce7-9439-34653001fc3b", + expectedCount: 1, + expectedPeerIDs: []string{"notexpired01"}, }, { name: "should return no peers with expiration for a non-existing account ID", @@ -2803,10 +2805,30 @@ func TestSqlStore_GetAccountPeersWithExpiration(t *testing.T) { peers, err := store.GetAccountPeersWithExpiration(context.Background(), LockingStrengthNone, tt.accountID) require.NoError(t, err) require.Len(t, peers, tt.expectedCount) + for i, peer := range peers { + assert.Equal(t, tt.expectedPeerIDs[i], peer.ID) + } }) } } +func TestSqlStore_GetAccountPeersWithExpiration_ExcludesAlreadyExpired(t *testing.T) { + store, cleanup, err := NewTestStoreFromSQL(context.Background(), "../testdata/store_with_expired_peers.sql", t.TempDir()) + t.Cleanup(cleanup) + require.NoError(t, err) + + accountID := "bf1c8084-ba50-4ce7-9439-34653001fc3b" + + peers, err := store.GetAccountPeersWithExpiration(context.Background(), LockingStrengthNone, accountID) + require.NoError(t, err) + + // Verify the already-expired peer (cg05lnblo1hkg2j514p0) is not returned + for _, peer := range peers { + assert.NotEqual(t, "cg05lnblo1hkg2j514p0", peer.ID, "already expired peer should not be returned") + assert.False(t, peer.Status.LoginExpired, "returned peers should not have LoginExpired set") + } +} + func TestSqlStore_GetAccountPeersWithInactivity(t *testing.T) { store, cleanup, err := NewTestStoreFromSQL(context.Background(), "../testdata/store_with_expired_peers.sql", t.TempDir()) t.Cleanup(cleanup) @@ -2887,7 +2909,7 @@ func TestSqlStore_GetUserPeers(t *testing.T) { name: "should retrieve peers for another valid account ID and user ID", accountID: "bf1c8084-ba50-4ce7-9439-34653001fc3b", userID: "edafee4e-63fb-11ec-90d6-0242ac120003", - expectedCount: 2, + expectedCount: 3, }, { name: "should return no peers for existing account ID with empty user ID", diff --git a/management/server/testdata/store_with_expired_peers.sql b/management/server/testdata/store_with_expired_peers.sql index dfcaeee6f..189bd1262 100644 --- a/management/server/testdata/store_with_expired_peers.sql +++ b/management/server/testdata/store_with_expired_peers.sql @@ -31,6 +31,7 @@ INSERT INTO peers VALUES('cfvprsrlo1hqoo49ohog','bf1c8084-ba50-4ce7-9439-3465300 INSERT INTO peers VALUES('cg05lnblo1hkg2j514p0','bf1c8084-ba50-4ce7-9439-34653001fc3b','RlSy2vzoG2HyMBTUImXOiVhCBiiBa5qD5xzMxkiFDW4=','','"100.64.39.54"','expiredhost','linux','Linux','22.04','x86_64','Ubuntu','','development','','',NULL,'','','','{"Cloud":"","Platform":""}',NULL,'expiredhost','expiredhost','2023-03-02 09:19:57.276717255+01:00',0,1,0,'edafee4e-63fb-11ec-90d6-0242ac120003','ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIMbK5ZXJsGOOWoBT4OmkPtgdPZe2Q7bDuS/zjn2CZxhK',0,1,0,'2023-03-02 09:14:21.791679181+01:00','2024-10-02 17:00:32.527947+02:00',0,'""','','',0); INSERT INTO peers VALUES('cg3161rlo1hs9cq94gdg','bf1c8084-ba50-4ce7-9439-34653001fc3b','mVABSKj28gv+JRsf7e0NEGKgSOGTfU/nPB2cpuG56HU=','','"100.64.117.96"','testhost','linux','Linux','22.04','x86_64','Ubuntu','','development','','',NULL,'','','','{"Cloud":"","Platform":""}',NULL,'testhost','testhost','2023-03-06 18:21:27.252010027+01:00',0,0,0,'edafee4e-63fb-11ec-90d6-0242ac120003','ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINWvvUkFFcrj48CWTkNUb/do/n52i1L5dH4DhGu+4ZuM',0,0,0,'2023-03-07 09:02:47.442857106+01:00','2024-10-02 17:00:32.527947+02:00',0,'""','','',0); INSERT INTO peers VALUES('csrnkiq7qv9d8aitqd50','bf1c8084-ba50-4ce7-9439-34653001fc3b','nVABSKj28gv+JRsf7e0NEGKgSOGTfU/nPB2cpuG56HX=','','"100.64.117.97"','testhost','linux','Linux','22.04','x86_64','Ubuntu','','development','','',NULL,'','','','{"Cloud":"","Platform":""}',NULL,'testhost','testhost-1','2023-03-06 18:21:27.252010027+01:00',0,0,0,'f4f6d672-63fb-11ec-90d6-0242ac120003','ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINWvvUkFFcrj48CWTkNUb/do/n52i1L5dH4DhGu+4ZuM',0,0,1,'2023-03-07 09:02:47.442857106+01:00','2024-10-02 17:00:32.527947+02:00',0,'""','','',0); +INSERT INTO peers VALUES('notexpired01','bf1c8084-ba50-4ce7-9439-34653001fc3b','oVABSKj28gv+JRsf7e0NEGKgSOGTfU/nPB2cpuG56HY=','','"100.64.117.98"','activehost','linux','Linux','22.04','x86_64','Ubuntu','','development','','',NULL,'','','','{"Cloud":"","Platform":""}',NULL,'activehost','activehost','2023-03-06 18:21:27.252010027+01:00',0,0,0,'edafee4e-63fb-11ec-90d6-0242ac120003','ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINWvvUkFFcrj48CWTkNUb/do/n52i1L5dH4DhGu+4ZuM',0,1,0,'2023-03-07 09:02:47.442857106+01:00','2024-10-02 17:00:32.527947+02:00',0,'""','','',0); INSERT INTO users VALUES('f4f6d672-63fb-11ec-90d6-0242ac120003','bf1c8084-ba50-4ce7-9439-34653001fc3b','user',0,0,'','[]',0,NULL,'2024-10-02 17:00:32.528196+02:00','api',0,''); INSERT INTO users VALUES('edafee4e-63fb-11ec-90d6-0242ac120003','bf1c8084-ba50-4ce7-9439-34653001fc3b','admin',0,0,'','[]',0,NULL,'2024-10-02 17:00:32.528196+02:00','api',0,''); INSERT INTO installations VALUES(1,''); From c07c726ea7a7d6bc9b34c1a5dc138785c7bd1214 Mon Sep 17 00:00:00 2001 From: alsruf36 <33592711+alsruf36@users.noreply.github.com> Date: Fri, 24 Apr 2026 01:20:54 +0900 Subject: [PATCH 21/25] [proxy] Set session cookie path to root (#5915) --- proxy/internal/auth/middleware.go | 1 + proxy/internal/auth/middleware_test.go | 9 +++++++++ 2 files changed, 10 insertions(+) diff --git a/proxy/internal/auth/middleware.go b/proxy/internal/auth/middleware.go index 055e4510f..3b383f8b4 100644 --- a/proxy/internal/auth/middleware.go +++ b/proxy/internal/auth/middleware.go @@ -433,6 +433,7 @@ func setSessionCookie(w http.ResponseWriter, token string, expiration time.Durat http.SetCookie(w, &http.Cookie{ Name: auth.SessionCookieName, Value: token, + Path: "/", HttpOnly: true, Secure: true, SameSite: http.SameSiteLaxMode, diff --git a/proxy/internal/auth/middleware_test.go b/proxy/internal/auth/middleware_test.go index 16d09800c..2c93d7912 100644 --- a/proxy/internal/auth/middleware_test.go +++ b/proxy/internal/auth/middleware_test.go @@ -391,6 +391,15 @@ func TestProtect_SchemeAuthRedirectsWithCookie(t *testing.T) { assert.Equal(t, http.SameSiteLaxMode, sessionCookie.SameSite) } +func TestSetSessionCookieHasRootPath(t *testing.T) { + w := httptest.NewRecorder() + setSessionCookie(w, "test-token", time.Hour) + + cookies := w.Result().Cookies() + require.Len(t, cookies, 1) + assert.Equal(t, "/", cookies[0].Path, "session cookie must be scoped to root so it applies to all paths") +} + func TestProtect_FailedAuthDoesNotSetCookie(t *testing.T) { mw := NewMiddleware(log.StandardLogger(), nil, nil) kp := generateTestKeyPair(t) From f732b01a055d4de1fd736f2e8b42196c31f291a5 Mon Sep 17 00:00:00 2001 From: Zoltan Papp Date: Thu, 23 Apr 2026 21:19:21 +0200 Subject: [PATCH 22/25] [management] unify peer-update test timeout via constant (#5952) peerShouldReceiveUpdate waited 500ms for the expected update message, and every outer wrapper across the management/server test suite paired it with a 1s goroutine-drain timeout. Both were too tight for slower CI runners (MySQL, FreeBSD, loaded sqlite), producing intermittent "Timed out waiting for update message" failures in tests like TestDNSAccountPeersUpdate, TestPeerAccountPeersUpdate, and TestNameServerAccountPeersUpdate. Introduce peerUpdateTimeout (5s) next to the helper and use it both in the helper and in every outer wrapper so the two timeouts stay in sync. Only runs down on failure; passing tests return as soon as the channel delivers, so there is no slowdown on green runs. --- management/server/account_test.go | 9 ++++++++- management/server/dns_test.go | 6 +++--- management/server/group_test.go | 14 +++++++------- management/server/nameserver_test.go | 4 ++-- management/server/peer_test.go | 16 ++++++++-------- management/server/policy_test.go | 14 +++++++------- management/server/posture_checks_test.go | 10 +++++----- management/server/route_test.go | 12 ++++++------ management/server/user_test.go | 4 ++-- 9 files changed, 48 insertions(+), 41 deletions(-) diff --git a/management/server/account_test.go b/management/server/account_test.go index bcc73d52f..bef791d77 100644 --- a/management/server/account_test.go +++ b/management/server/account_test.go @@ -3253,6 +3253,13 @@ func setupNetworkMapTest(t *testing.T) (*DefaultAccountManager, *update_channel. return manager, updateManager, account, peer1, peer2, peer3 } +// peerUpdateTimeout bounds how long peerShouldReceiveUpdate and its outer +// wrappers wait for an expected update message. Sized for slow CI runners +// (MySQL, FreeBSD, loaded sqlite) where the channel publish can take +// seconds. Only runs down on failure; passing tests return immediately +// when the channel delivers. +const peerUpdateTimeout = 5 * time.Second + func peerShouldNotReceiveUpdate(t *testing.T, updateMessage <-chan *network_map.UpdateMessage) { t.Helper() select { @@ -3271,7 +3278,7 @@ func peerShouldReceiveUpdate(t *testing.T, updateMessage <-chan *network_map.Upd if msg == nil { t.Errorf("Received nil update message, expected valid message") } - case <-time.After(500 * time.Millisecond): + case <-time.After(peerUpdateTimeout): t.Error("Timed out waiting for update message") } } diff --git a/management/server/dns_test.go b/management/server/dns_test.go index 0e37a3b22..c443223c6 100644 --- a/management/server/dns_test.go +++ b/management/server/dns_test.go @@ -458,7 +458,7 @@ func TestDNSAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) @@ -478,7 +478,7 @@ func TestDNSAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) @@ -518,7 +518,7 @@ func TestDNSAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) diff --git a/management/server/group_test.go b/management/server/group_test.go index fa818e532..5821b90a3 100644 --- a/management/server/group_test.go +++ b/management/server/group_test.go @@ -620,7 +620,7 @@ func TestGroupAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) @@ -638,7 +638,7 @@ func TestGroupAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) @@ -656,7 +656,7 @@ func TestGroupAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) @@ -689,7 +689,7 @@ func TestGroupAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) @@ -730,7 +730,7 @@ func TestGroupAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) @@ -757,7 +757,7 @@ func TestGroupAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) @@ -804,7 +804,7 @@ func TestGroupAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) diff --git a/management/server/nameserver_test.go b/management/server/nameserver_test.go index d10d4464f..b2c8300d6 100644 --- a/management/server/nameserver_test.go +++ b/management/server/nameserver_test.go @@ -1087,7 +1087,7 @@ func TestNameServerAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) @@ -1105,7 +1105,7 @@ func TestNameServerAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) diff --git a/management/server/peer_test.go b/management/server/peer_test.go index 6f8d924fd..050baa595 100644 --- a/management/server/peer_test.go +++ b/management/server/peer_test.go @@ -1907,7 +1907,7 @@ func TestPeerAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) @@ -1929,7 +1929,7 @@ func TestPeerAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) @@ -1994,7 +1994,7 @@ func TestPeerAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) @@ -2012,7 +2012,7 @@ func TestPeerAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) @@ -2058,7 +2058,7 @@ func TestPeerAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) @@ -2076,7 +2076,7 @@ func TestPeerAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) @@ -2113,7 +2113,7 @@ func TestPeerAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) @@ -2131,7 +2131,7 @@ func TestPeerAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) diff --git a/management/server/policy_test.go b/management/server/policy_test.go index a3f987732..a553b7d05 100644 --- a/management/server/policy_test.go +++ b/management/server/policy_test.go @@ -1231,7 +1231,7 @@ func TestPolicyAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) @@ -1263,7 +1263,7 @@ func TestPolicyAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) @@ -1294,7 +1294,7 @@ func TestPolicyAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) @@ -1314,7 +1314,7 @@ func TestPolicyAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) @@ -1355,7 +1355,7 @@ func TestPolicyAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) @@ -1373,7 +1373,7 @@ func TestPolicyAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } @@ -1393,7 +1393,7 @@ func TestPolicyAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) diff --git a/management/server/posture_checks_test.go b/management/server/posture_checks_test.go index 7f0a48dc7..394f0d896 100644 --- a/management/server/posture_checks_test.go +++ b/management/server/posture_checks_test.go @@ -244,7 +244,7 @@ func TestPostureCheckAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) @@ -273,7 +273,7 @@ func TestPostureCheckAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) @@ -292,7 +292,7 @@ func TestPostureCheckAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) @@ -395,7 +395,7 @@ func TestPostureCheckAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) @@ -438,7 +438,7 @@ func TestPostureCheckAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) diff --git a/management/server/route_test.go b/management/server/route_test.go index 91b2cf982..91bd8b050 100644 --- a/management/server/route_test.go +++ b/management/server/route_test.go @@ -2070,7 +2070,7 @@ func TestRouteAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } @@ -2107,7 +2107,7 @@ func TestRouteAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) @@ -2127,7 +2127,7 @@ func TestRouteAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) @@ -2145,7 +2145,7 @@ func TestRouteAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) @@ -2185,7 +2185,7 @@ func TestRouteAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) @@ -2225,7 +2225,7 @@ func TestRouteAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) diff --git a/management/server/user_test.go b/management/server/user_test.go index 8fdfbd633..c77ea53d1 100644 --- a/management/server/user_test.go +++ b/management/server/user_test.go @@ -1586,7 +1586,7 @@ func TestUserAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) @@ -1609,7 +1609,7 @@ func TestUserAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) From d6f08e48408a7a09995076c4bd595832555eb407 Mon Sep 17 00:00:00 2001 From: Maycon Santos Date: Fri, 24 Apr 2026 13:13:27 +0200 Subject: [PATCH 23/25] [misc] Update sign pipeline version (#5981) --- .github/workflows/release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 5ada1033d..1d29c8406 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -9,7 +9,7 @@ on: pull_request: env: - SIGN_PIPE_VER: "v0.1.2" + SIGN_PIPE_VER: "v0.1.3" GORELEASER_VER: "v2.14.3" PRODUCT_NAME: "NetBird" COPYRIGHT: "NetBird GmbH" From 34167c8a160d66668eb5592fb2589b13adcd8ee0 Mon Sep 17 00:00:00 2001 From: Maycon Santos Date: Mon, 27 Apr 2026 10:55:38 +0200 Subject: [PATCH 24/25] [misc] Update release pipeline version (#5995) --- .github/workflows/release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 1d29c8406..826c05ff3 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -9,7 +9,7 @@ on: pull_request: env: - SIGN_PIPE_VER: "v0.1.3" + SIGN_PIPE_VER: "v0.1.4" GORELEASER_VER: "v2.14.3" PRODUCT_NAME: "NetBird" COPYRIGHT: "NetBird GmbH" From 154b81645a5922d0fa1fbfff6785798a14640e02 Mon Sep 17 00:00:00 2001 From: Vlad <4941176+crn4@users.noreply.github.com> Date: Mon, 27 Apr 2026 16:02:54 +0200 Subject: [PATCH 25/25] [management] removed legacy network map code (#5565) --- .../network_map/controller/controller.go | 269 +- .../controllers/network_map/interface.go | 3 - management/server/account_test.go | 98 +- .../http/handlers/peers/peers_handler.go | 2 +- management/server/peer_test.go | 11 - management/server/route_test.go | 245 -- management/server/store/sql_store.go | 2 - management/server/types/account.go | 403 --- management/server/types/account_test.go | 399 --- management/server/types/holder.go | 47 - management/server/types/networkmap.go | 67 - .../types/networkmap_comparison_test.go | 592 ----- .../server/types/networkmap_components.go | 2 - .../types/networkmap_components_test.go | 787 ++++++ .../server/types/networkmap_golden_test.go | 967 ------- management/server/types/networkmapbuilder.go | 2317 ----------------- 16 files changed, 807 insertions(+), 5404 deletions(-) delete mode 100644 management/server/types/holder.go delete mode 100644 management/server/types/networkmap.go delete mode 100644 management/server/types/networkmap_comparison_test.go create mode 100644 management/server/types/networkmap_components_test.go delete mode 100644 management/server/types/networkmap_golden_test.go delete mode 100644 management/server/types/networkmapbuilder.go diff --git a/management/internals/controllers/network_map/controller/controller.go b/management/internals/controllers/network_map/controller/controller.go index 4b414df6f..4b47ecaa0 100644 --- a/management/internals/controllers/network_map/controller/controller.go +++ b/management/internals/controllers/network_map/controller/controller.go @@ -7,7 +7,6 @@ import ( "os" "slices" "strconv" - "strings" "sync" "sync/atomic" "time" @@ -16,11 +15,9 @@ import ( "golang.org/x/exp/maps" "golang.org/x/mod/semver" - nbdns "github.com/netbirdio/netbird/dns" "github.com/netbirdio/netbird/management/internals/controllers/network_map" "github.com/netbirdio/netbird/management/internals/controllers/network_map/controller/cache" "github.com/netbirdio/netbird/management/internals/modules/peers/ephemeral" - "github.com/netbirdio/netbird/management/internals/modules/zones" "github.com/netbirdio/netbird/management/internals/server/config" "github.com/netbirdio/netbird/management/internals/shared/grpc" "github.com/netbirdio/netbird/management/server/account" @@ -58,13 +55,6 @@ type Controller struct { proxyController port_forwarding.Controller integratedPeerValidator integrated_validator.IntegratedValidator - - holder *types.Holder - - expNewNetworkMap bool - expNewNetworkMapAIDs map[string]struct{} - - compactedNetworkMap bool } type bufferUpdate struct { @@ -81,29 +71,6 @@ func NewController(ctx context.Context, store store.Store, metrics telemetry.App log.Fatal(fmt.Errorf("error creating metrics: %w", err)) } - newNetworkMapBuilder, err := strconv.ParseBool(os.Getenv(network_map.EnvNewNetworkMapBuilder)) - if err != nil { - log.WithContext(ctx).Warnf("failed to parse %s, using default value false: %v", network_map.EnvNewNetworkMapBuilder, err) - newNetworkMapBuilder = false - } - - compactedNetworkMap := true - compactedEnv := os.Getenv(types.EnvNewNetworkMapCompacted) - parsedCompactedNmap, err := strconv.ParseBool(compactedEnv) - if err != nil && len(compactedEnv) > 0 { - log.WithContext(ctx).Warnf("failed to parse %s, using default value true: %v", types.EnvNewNetworkMapCompacted, err) - } - if err == nil && !parsedCompactedNmap { - log.WithContext(ctx).Info("disabling compacted mode") - compactedNetworkMap = false - } - - ids := strings.Split(os.Getenv(network_map.EnvNewNetworkMapAccounts), ",") - expIDs := make(map[string]struct{}, len(ids)) - for _, id := range ids { - expIDs[id] = struct{}{} - } - return &Controller{ repo: newRepository(store), metrics: nMetrics, @@ -117,12 +84,6 @@ func NewController(ctx context.Context, store store.Store, metrics telemetry.App proxyController: proxyController, EphemeralPeersManager: ephemeralPeersManager, - - holder: types.NewHolder(), - expNewNetworkMap: newNetworkMapBuilder, - expNewNetworkMapAIDs: expIDs, - - compactedNetworkMap: compactedNetworkMap, } } @@ -153,17 +114,9 @@ func (c *Controller) CountStreams() int { func (c *Controller) sendUpdateAccountPeers(ctx context.Context, accountID string) error { log.WithContext(ctx).Tracef("updating peers for account %s from %s", accountID, util.GetCallerName()) - var ( - account *types.Account - err error - ) - if c.experimentalNetworkMap(accountID) { - account = c.getAccountFromHolderOrInit(ctx, accountID) - } else { - account, err = c.requestBuffer.GetAccountWithBackpressure(ctx, accountID) - if err != nil { - return fmt.Errorf("failed to get account: %v", err) - } + account, err := c.requestBuffer.GetAccountWithBackpressure(ctx, accountID) + if err != nil { + return fmt.Errorf("failed to get account: %v", err) } globalStart := time.Now() @@ -197,10 +150,6 @@ func (c *Controller) sendUpdateAccountPeers(ctx context.Context, accountID strin routers := account.GetResourceRoutersMap() groupIDToUserIDs := account.GetActiveGroupUsers() - if c.experimentalNetworkMap(accountID) { - c.initNetworkMapBuilderIfNeeded(account, approvedPeersMap) - } - proxyNetworkMaps, err := c.proxyController.GetProxyNetworkMapsAll(ctx, accountID, account.Peers) if err != nil { log.WithContext(ctx).Errorf("failed to get proxy network maps: %v", err) @@ -243,16 +192,7 @@ func (c *Controller) sendUpdateAccountPeers(ctx context.Context, accountID strin c.metrics.CountCalcPostureChecksDuration(time.Since(start)) start = time.Now() - var remotePeerNetworkMap *types.NetworkMap - - switch { - case c.experimentalNetworkMap(accountID): - remotePeerNetworkMap = c.getPeerNetworkMapExp(ctx, p.AccountID, p.ID, approvedPeersMap, peersCustomZone, accountZones, c.accountManagerMetrics) - case c.compactedNetworkMap: - remotePeerNetworkMap = account.GetPeerNetworkMapFromComponents(ctx, p.ID, peersCustomZone, accountZones, approvedPeersMap, resourcePolicies, routers, c.accountManagerMetrics, groupIDToUserIDs) - default: - remotePeerNetworkMap = account.GetPeerNetworkMap(ctx, p.ID, peersCustomZone, accountZones, approvedPeersMap, resourcePolicies, routers, c.accountManagerMetrics, groupIDToUserIDs) - } + remotePeerNetworkMap := account.GetPeerNetworkMapFromComponents(ctx, p.ID, peersCustomZone, accountZones, approvedPeersMap, resourcePolicies, routers, c.accountManagerMetrics, groupIDToUserIDs) c.metrics.CountCalcPeerNetworkMapDuration(time.Since(start)) @@ -318,10 +258,6 @@ func (c *Controller) bufferSendUpdateAccountPeers(ctx context.Context, accountID // UpdatePeers updates all peers that belong to an account. // Should be called when changes have to be synced to peers. func (c *Controller) UpdateAccountPeers(ctx context.Context, accountID string) error { - if err := c.RecalculateNetworkMapCache(ctx, accountID); err != nil { - return fmt.Errorf("recalculate network map cache: %v", err) - } - return c.sendUpdateAccountPeers(ctx, accountID) } @@ -371,16 +307,7 @@ func (c *Controller) UpdateAccountPeer(ctx context.Context, accountId string, pe return err } - var remotePeerNetworkMap *types.NetworkMap - - switch { - case c.experimentalNetworkMap(accountId): - remotePeerNetworkMap = c.getPeerNetworkMapExp(ctx, peer.AccountID, peer.ID, approvedPeersMap, peersCustomZone, accountZones, c.accountManagerMetrics) - case c.compactedNetworkMap: - remotePeerNetworkMap = account.GetPeerNetworkMapFromComponents(ctx, peerId, peersCustomZone, accountZones, approvedPeersMap, resourcePolicies, routers, c.accountManagerMetrics, groupIDToUserIDs) - default: - remotePeerNetworkMap = account.GetPeerNetworkMap(ctx, peerId, peersCustomZone, accountZones, approvedPeersMap, resourcePolicies, routers, c.accountManagerMetrics, groupIDToUserIDs) - } + remotePeerNetworkMap := account.GetPeerNetworkMapFromComponents(ctx, peerId, peersCustomZone, accountZones, approvedPeersMap, resourcePolicies, routers, c.accountManagerMetrics, groupIDToUserIDs) proxyNetworkMap, ok := proxyNetworkMaps[peer.ID] if ok { @@ -451,17 +378,9 @@ func (c *Controller) GetValidatedPeerWithMap(ctx context.Context, isRequiresAppr return peer, emptyMap, nil, 0, nil } - var ( - account *types.Account - err error - ) - if c.experimentalNetworkMap(accountID) { - account = c.getAccountFromHolderOrInit(ctx, accountID) - } else { - account, err = c.requestBuffer.GetAccountWithBackpressure(ctx, accountID) - if err != nil { - return nil, nil, nil, 0, err - } + account, err := c.requestBuffer.GetAccountWithBackpressure(ctx, accountID) + if err != nil { + return nil, nil, nil, 0, err } account.InjectProxyPolicies(ctx) @@ -493,20 +412,10 @@ func (c *Controller) GetValidatedPeerWithMap(ctx context.Context, isRequiresAppr return nil, nil, nil, 0, err } - var networkMap *types.NetworkMap - - if c.experimentalNetworkMap(accountID) { - networkMap = c.getPeerNetworkMapExp(ctx, peer.AccountID, peer.ID, approvedPeersMap, peersCustomZone, accountZones, c.accountManagerMetrics) - } else { - resourcePolicies := account.GetResourcePoliciesMap() - routers := account.GetResourceRoutersMap() - groupIDToUserIDs := account.GetActiveGroupUsers() - if c.compactedNetworkMap { - networkMap = account.GetPeerNetworkMapFromComponents(ctx, peer.ID, peersCustomZone, accountZones, approvedPeersMap, resourcePolicies, routers, c.accountManagerMetrics, groupIDToUserIDs) - } else { - networkMap = account.GetPeerNetworkMap(ctx, peer.ID, peersCustomZone, accountZones, approvedPeersMap, resourcePolicies, routers, c.accountManagerMetrics, groupIDToUserIDs) - } - } + resourcePolicies := account.GetResourcePoliciesMap() + routers := account.GetResourceRoutersMap() + groupIDToUserIDs := account.GetActiveGroupUsers() + networkMap := account.GetPeerNetworkMapFromComponents(ctx, peer.ID, peersCustomZone, accountZones, approvedPeersMap, resourcePolicies, routers, c.accountManagerMetrics, groupIDToUserIDs) proxyNetworkMap, ok := proxyNetworkMaps[peer.ID] if ok { @@ -518,108 +427,6 @@ func (c *Controller) GetValidatedPeerWithMap(ctx context.Context, isRequiresAppr return peer, networkMap, postureChecks, dnsFwdPort, nil } -func (c *Controller) initNetworkMapBuilderIfNeeded(account *types.Account, validatedPeers map[string]struct{}) { - c.enrichAccountFromHolder(account) - account.InitNetworkMapBuilderIfNeeded(validatedPeers) -} - -func (c *Controller) getPeerNetworkMapExp( - ctx context.Context, - accountId string, - peerId string, - validatedPeers map[string]struct{}, - peersCustomZone nbdns.CustomZone, - accountZones []*zones.Zone, - metrics *telemetry.AccountManagerMetrics, -) *types.NetworkMap { - account := c.getAccountFromHolderOrInit(ctx, accountId) - if account == nil { - log.WithContext(ctx).Warnf("account %s not found in holder when getting peer network map", accountId) - return &types.NetworkMap{ - Network: &types.Network{}, - } - } - - return account.GetPeerNetworkMapExp(ctx, peerId, peersCustomZone, accountZones, validatedPeers, metrics) -} - -func (c *Controller) onPeersAddedUpdNetworkMapCache(account *types.Account, peerIds ...string) { - c.enrichAccountFromHolder(account) - account.OnPeersAddedUpdNetworkMapCache(peerIds...) -} - -func (c *Controller) onPeerDeletedUpdNetworkMapCache(account *types.Account, peerId string) error { - c.enrichAccountFromHolder(account) - return account.OnPeerDeletedUpdNetworkMapCache(peerId) -} - -func (c *Controller) UpdatePeerInNetworkMapCache(accountId string, peer *nbpeer.Peer) { - account := c.getAccountFromHolder(accountId) - if account == nil { - return - } - account.UpdatePeerInNetworkMapCache(peer) -} - -func (c *Controller) recalculateNetworkMapCache(account *types.Account, validatedPeers map[string]struct{}) { - account.RecalculateNetworkMapCache(validatedPeers) - c.updateAccountInHolder(account) -} - -func (c *Controller) RecalculateNetworkMapCache(ctx context.Context, accountId string) error { - if c.experimentalNetworkMap(accountId) { - account, err := c.requestBuffer.GetAccountWithBackpressure(ctx, accountId) - if err != nil { - return err - } - validatedPeers, err := c.integratedPeerValidator.GetValidatedPeers(ctx, account.Id, maps.Values(account.Groups), maps.Values(account.Peers), account.Settings.Extra) - if err != nil { - log.WithContext(ctx).Errorf("failed to get validate peers: %v", err) - return err - } - c.recalculateNetworkMapCache(account, validatedPeers) - } - return nil -} - -func (c *Controller) experimentalNetworkMap(accountId string) bool { - _, ok := c.expNewNetworkMapAIDs[accountId] - return c.expNewNetworkMap || ok -} - -func (c *Controller) enrichAccountFromHolder(account *types.Account) { - a := c.holder.GetAccount(account.Id) - if a == nil { - c.holder.AddAccount(account) - return - } - account.NetworkMapCache = a.NetworkMapCache - if account.NetworkMapCache == nil { - return - } - c.holder.AddAccount(account) -} - -func (c *Controller) getAccountFromHolder(accountID string) *types.Account { - return c.holder.GetAccount(accountID) -} - -func (c *Controller) getAccountFromHolderOrInit(ctx context.Context, accountID string) *types.Account { - a := c.holder.GetAccount(accountID) - if a != nil { - return a - } - account, err := c.holder.LoadOrStoreFunc(ctx, accountID, c.requestBuffer.GetAccountWithBackpressure) - if err != nil { - return nil - } - return account -} - -func (c *Controller) updateAccountInHolder(account *types.Account) { - c.holder.AddAccount(account) -} - // GetDNSDomain returns the configured dnsDomain func (c *Controller) GetDNSDomain(settings *types.Settings) string { if settings == nil { @@ -756,16 +563,7 @@ func isPeerInPolicySourceGroups(account *types.Account, peerID string, policy *t } func (c *Controller) OnPeersUpdated(ctx context.Context, accountID string, peerIDs []string) error { - peers, err := c.repo.GetPeersByIDs(ctx, accountID, peerIDs) - if err != nil { - return fmt.Errorf("failed to get peers by ids: %w", err) - } - - for _, peer := range peers { - c.UpdatePeerInNetworkMapCache(accountID, peer) - } - - err = c.bufferSendUpdateAccountPeers(ctx, accountID) + err := c.bufferSendUpdateAccountPeers(ctx, accountID) if err != nil { log.WithContext(ctx).Errorf("failed to buffer update account peers for peer update in account %s: %v", accountID, err) } @@ -775,14 +573,6 @@ func (c *Controller) OnPeersUpdated(ctx context.Context, accountID string, peerI func (c *Controller) OnPeersAdded(ctx context.Context, accountID string, peerIDs []string) error { log.WithContext(ctx).Debugf("OnPeersAdded call to add peers: %v", peerIDs) - if c.experimentalNetworkMap(accountID) { - account, err := c.requestBuffer.GetAccountWithBackpressure(ctx, accountID) - if err != nil { - return err - } - log.WithContext(ctx).Debugf("peers are ready to be added to networkmap cache: %v", peerIDs) - c.onPeersAddedUpdNetworkMapCache(account, peerIDs...) - } return c.bufferSendUpdateAccountPeers(ctx, accountID) } @@ -817,19 +607,6 @@ func (c *Controller) OnPeersDeleted(ctx context.Context, accountID string, peerI MessageType: network_map.MessageTypeNetworkMap, }) c.peersUpdateManager.CloseChannel(ctx, peerID) - - if c.experimentalNetworkMap(accountID) { - account, err := c.requestBuffer.GetAccountWithBackpressure(ctx, accountID) - if err != nil { - log.WithContext(ctx).Errorf("failed to get account %s: %v", accountID, err) - continue - } - err = c.onPeerDeletedUpdNetworkMapCache(account, peerID) - if err != nil { - log.WithContext(ctx).Errorf("failed to update network map cache for deleted peer %s in account %s: %v", peerID, accountID, err) - continue - } - } } return c.bufferSendUpdateAccountPeers(ctx, accountID) @@ -872,21 +649,11 @@ func (c *Controller) GetNetworkMap(ctx context.Context, peerID string) (*types.N return nil, err } - var networkMap *types.NetworkMap - - if c.experimentalNetworkMap(peer.AccountID) { - networkMap = c.getPeerNetworkMapExp(ctx, peer.AccountID, peerID, validatedPeers, peersCustomZone, accountZones, nil) - } else { - account.InjectProxyPolicies(ctx) - resourcePolicies := account.GetResourcePoliciesMap() - routers := account.GetResourceRoutersMap() - groupIDToUserIDs := account.GetActiveGroupUsers() - if c.compactedNetworkMap { - networkMap = account.GetPeerNetworkMapFromComponents(ctx, peer.ID, peersCustomZone, accountZones, validatedPeers, resourcePolicies, routers, nil, groupIDToUserIDs) - } else { - networkMap = account.GetPeerNetworkMap(ctx, peer.ID, peersCustomZone, accountZones, validatedPeers, resourcePolicies, routers, nil, groupIDToUserIDs) - } - } + account.InjectProxyPolicies(ctx) + resourcePolicies := account.GetResourcePoliciesMap() + routers := account.GetResourceRoutersMap() + groupIDToUserIDs := account.GetActiveGroupUsers() + networkMap := account.GetPeerNetworkMapFromComponents(ctx, peer.ID, peersCustomZone, accountZones, validatedPeers, resourcePolicies, routers, nil, groupIDToUserIDs) proxyNetworkMap, ok := proxyNetworkMaps[peer.ID] if ok { diff --git a/management/internals/controllers/network_map/interface.go b/management/internals/controllers/network_map/interface.go index 64caac861..cfea2d3de 100644 --- a/management/internals/controllers/network_map/interface.go +++ b/management/internals/controllers/network_map/interface.go @@ -12,9 +12,6 @@ import ( ) const ( - EnvNewNetworkMapBuilder = "NB_EXPERIMENT_NETWORK_MAP" - EnvNewNetworkMapAccounts = "NB_EXPERIMENT_NETWORK_MAP_ACCOUNTS" - DnsForwarderPort = nbdns.ForwarderServerPort OldForwarderPort = nbdns.ForwarderClientPort DnsForwarderPortMinVersion = "v0.59.0" diff --git a/management/server/account_test.go b/management/server/account_test.go index bef791d77..756c42421 100644 --- a/management/server/account_test.go +++ b/management/server/account_test.go @@ -408,7 +408,7 @@ func TestAccount_GetPeerNetworkMap(t *testing.T) { } customZone := account.GetPeersCustomZone(context.Background(), "netbird.io") - networkMap := account.GetPeerNetworkMap(context.Background(), testCase.peerID, customZone, nil, validatedPeers, account.GetResourcePoliciesMap(), account.GetResourceRoutersMap(), nil, account.GetActiveGroupUsers()) + networkMap := account.GetPeerNetworkMapFromComponents(context.Background(), testCase.peerID, customZone, nil, validatedPeers, account.GetResourcePoliciesMap(), account.GetResourceRoutersMap(), nil, account.GetActiveGroupUsers()) assert.Len(t, networkMap.Peers, len(testCase.expectedPeers)) assert.Len(t, networkMap.OfflinePeers, len(testCase.expectedOfflinePeers)) } @@ -1171,11 +1171,6 @@ func TestAccountManager_AddPeerWithUserID(t *testing.T) { assert.Equal(t, peer.IP.String(), fmt.Sprint(ev.Meta["ip"])) } -func TestAccountManager_NetworkUpdates_SaveGroup_Experimental(t *testing.T) { - t.Setenv(network_map.EnvNewNetworkMapBuilder, "true") - testAccountManager_NetworkUpdates_SaveGroup(t) -} - func TestAccountManager_NetworkUpdates_SaveGroup(t *testing.T) { testAccountManager_NetworkUpdates_SaveGroup(t) } @@ -1231,11 +1226,6 @@ func testAccountManager_NetworkUpdates_SaveGroup(t *testing.T) { wg.Wait() } -func TestAccountManager_NetworkUpdates_DeletePolicy_Experimental(t *testing.T) { - t.Setenv(network_map.EnvNewNetworkMapBuilder, "true") - testAccountManager_NetworkUpdates_DeletePolicy(t) -} - func TestAccountManager_NetworkUpdates_DeletePolicy(t *testing.T) { testAccountManager_NetworkUpdates_DeletePolicy(t) } @@ -1274,11 +1264,6 @@ func testAccountManager_NetworkUpdates_DeletePolicy(t *testing.T) { wg.Wait() } -func TestAccountManager_NetworkUpdates_SavePolicy_Experimental(t *testing.T) { - t.Setenv(network_map.EnvNewNetworkMapBuilder, "true") - testAccountManager_NetworkUpdates_SavePolicy(t) -} - func TestAccountManager_NetworkUpdates_SavePolicy(t *testing.T) { testAccountManager_NetworkUpdates_SavePolicy(t) } @@ -1332,11 +1317,6 @@ func testAccountManager_NetworkUpdates_SavePolicy(t *testing.T) { wg.Wait() } -func TestAccountManager_NetworkUpdates_DeletePeer_Experimental(t *testing.T) { - t.Setenv(network_map.EnvNewNetworkMapBuilder, "true") - testAccountManager_NetworkUpdates_DeletePeer(t) -} - func TestAccountManager_NetworkUpdates_DeletePeer(t *testing.T) { testAccountManager_NetworkUpdates_DeletePeer(t) } @@ -1397,11 +1377,6 @@ func testAccountManager_NetworkUpdates_DeletePeer(t *testing.T) { wg.Wait() } -func TestAccountManager_NetworkUpdates_DeleteGroup_Experimental(t *testing.T) { - t.Setenv(network_map.EnvNewNetworkMapBuilder, "true") - testAccountManager_NetworkUpdates_DeleteGroup(t) -} - func TestAccountManager_NetworkUpdates_DeleteGroup(t *testing.T) { testAccountManager_NetworkUpdates_DeleteGroup(t) } @@ -1633,75 +1608,6 @@ func TestFileStore_GetRoutesByPrefix(t *testing.T) { assert.Contains(t, routeIDs, route.ID("route-2")) } -func TestAccount_GetRoutesToSync(t *testing.T) { - _, prefix, err := route.ParseNetwork("192.168.64.0/24") - if err != nil { - t.Fatal(err) - } - _, prefix2, err := route.ParseNetwork("192.168.0.0/24") - if err != nil { - t.Fatal(err) - } - account := &types.Account{ - Peers: map[string]*nbpeer.Peer{ - "peer-1": {Key: "peer-1", Meta: nbpeer.PeerSystemMeta{GoOS: "linux"}}, "peer-2": {Key: "peer-2", Meta: nbpeer.PeerSystemMeta{GoOS: "linux"}}, "peer-3": {Key: "peer-1", Meta: nbpeer.PeerSystemMeta{GoOS: "linux"}}, - }, - Groups: map[string]*types.Group{"group1": {ID: "group1", Peers: []string{"peer-1", "peer-2"}}}, - Routes: map[route.ID]*route.Route{ - "route-1": { - ID: "route-1", - Network: prefix, - NetID: "network-1", - Description: "network-1", - Peer: "peer-1", - NetworkType: 0, - Masquerade: false, - Metric: 999, - Enabled: true, - Groups: []string{"group1"}, - }, - "route-2": { - ID: "route-2", - Network: prefix2, - NetID: "network-2", - Description: "network-2", - Peer: "peer-2", - NetworkType: 0, - Masquerade: false, - Metric: 999, - Enabled: true, - Groups: []string{"group1"}, - }, - "route-3": { - ID: "route-3", - Network: prefix, - NetID: "network-1", - Description: "network-1", - Peer: "peer-2", - NetworkType: 0, - Masquerade: false, - Metric: 999, - Enabled: true, - Groups: []string{"group1"}, - }, - }, - } - - routes := account.GetRoutesToSync(context.Background(), "peer-2", []*nbpeer.Peer{{Key: "peer-1"}, {Key: "peer-3"}}, account.GetPeerGroups("peer-2")) - - assert.Len(t, routes, 2) - routeIDs := make(map[route.ID]struct{}, 2) - for _, r := range routes { - routeIDs[r.ID] = struct{}{} - } - assert.Contains(t, routeIDs, route.ID("route-2")) - assert.Contains(t, routeIDs, route.ID("route-3")) - - emptyRoutes := account.GetRoutesToSync(context.Background(), "peer-3", []*nbpeer.Peer{{Key: "peer-1"}, {Key: "peer-2"}}, account.GetPeerGroups("peer-3")) - - assert.Len(t, emptyRoutes, 0) -} - func TestAccount_Copy(t *testing.T) { account := &types.Account{ Id: "account1", @@ -1824,9 +1730,7 @@ func TestAccount_Copy(t *testing.T) { AccountID: "account1", }, }, - NetworkMapCache: &types.NetworkMapBuilder{}, } - account.InitOnce() err := hasNilField(account) if err != nil { t.Fatal(err) diff --git a/management/server/http/handlers/peers/peers_handler.go b/management/server/http/handlers/peers/peers_handler.go index 6b9a69f04..bf6937a49 100644 --- a/management/server/http/handlers/peers/peers_handler.go +++ b/management/server/http/handlers/peers/peers_handler.go @@ -417,7 +417,7 @@ func (h *Handler) GetAccessiblePeers(w http.ResponseWriter, r *http.Request) { dnsDomain := h.networkMapController.GetDNSDomain(account.Settings) - netMap := account.GetPeerNetworkMap(r.Context(), peerID, dns.CustomZone{}, nil, validPeers, account.GetResourcePoliciesMap(), account.GetResourceRoutersMap(), nil, account.GetActiveGroupUsers()) + netMap := account.GetPeerNetworkMapFromComponents(r.Context(), peerID, dns.CustomZone{}, nil, validPeers, account.GetResourcePoliciesMap(), account.GetResourceRoutersMap(), nil, account.GetActiveGroupUsers()) util.WriteJSONObject(r.Context(), w, toAccessiblePeers(netMap, dnsDomain)) } diff --git a/management/server/peer_test.go b/management/server/peer_test.go index 050baa595..17202597a 100644 --- a/management/server/peer_test.go +++ b/management/server/peer_test.go @@ -179,11 +179,6 @@ func TestAccountManager_GetNetworkMap(t *testing.T) { testGetNetworkMapGeneral(t) } -func TestAccountManager_GetNetworkMap_Experimental(t *testing.T) { - t.Setenv(network_map.EnvNewNetworkMapBuilder, "true") - testGetNetworkMapGeneral(t) -} - func testGetNetworkMapGeneral(t *testing.T) { manager, _, err := createManager(t) if err != nil { @@ -1016,11 +1011,6 @@ func BenchmarkUpdateAccountPeers(b *testing.B) { } } -func TestUpdateAccountPeers_Experimental(t *testing.T) { - t.Setenv(network_map.EnvNewNetworkMapBuilder, "true") - testUpdateAccountPeers(t) -} - func TestUpdateAccountPeers(t *testing.T) { testUpdateAccountPeers(t) } @@ -1600,7 +1590,6 @@ func Test_RegisterPeerRollbackOnFailure(t *testing.T) { } func Test_LoginPeer(t *testing.T) { - t.Setenv(network_map.EnvNewNetworkMapBuilder, "true") if runtime.GOOS == "windows" { t.Skip("The SQLite store is not properly supported by Windows yet") } diff --git a/management/server/route_test.go b/management/server/route_test.go index 91bd8b050..d0caf4b9b 100644 --- a/management/server/route_test.go +++ b/management/server/route_test.go @@ -2,10 +2,8 @@ package server import ( "context" - "fmt" "net" "net/netip" - "sort" "testing" "time" @@ -1840,11 +1838,6 @@ func TestAccount_getPeersRoutesFirewall(t *testing.T) { }, } - validatedPeers := make(map[string]struct{}) - for p := range account.Peers { - validatedPeers[p] = struct{}{} - } - t.Run("check applied policies for the route", func(t *testing.T) { route1 := account.Routes["route1"] policies := types.GetAllRoutePoliciesFromGroups(account, route1.AccessControlGroups) @@ -1858,116 +1851,6 @@ func TestAccount_getPeersRoutesFirewall(t *testing.T) { policies = types.GetAllRoutePoliciesFromGroups(account, route3.AccessControlGroups) assert.Len(t, policies, 0) }) - - t.Run("check peer routes firewall rules", func(t *testing.T) { - routesFirewallRules := account.GetPeerRoutesFirewallRules(context.Background(), "peerA", validatedPeers) - assert.Len(t, routesFirewallRules, 4) - - expectedRoutesFirewallRules := []*types.RouteFirewallRule{ - { - SourceRanges: []string{ - fmt.Sprintf(types.AllowedIPsFormat, peerCIp), - fmt.Sprintf(types.AllowedIPsFormat, peerHIp), - fmt.Sprintf(types.AllowedIPsFormat, peerBIp), - }, - Action: "accept", - Destination: "192.168.0.0/16", - Protocol: "all", - Port: 80, - RouteID: "route1:peerA", - }, - { - SourceRanges: []string{ - fmt.Sprintf(types.AllowedIPsFormat, peerCIp), - fmt.Sprintf(types.AllowedIPsFormat, peerHIp), - fmt.Sprintf(types.AllowedIPsFormat, peerBIp), - }, - Action: "accept", - Destination: "192.168.0.0/16", - Protocol: "all", - Port: 320, - RouteID: "route1:peerA", - }, - } - additionalFirewallRule := []*types.RouteFirewallRule{ - { - SourceRanges: []string{ - fmt.Sprintf(types.AllowedIPsFormat, peerJIp), - }, - Action: "accept", - Destination: "192.168.10.0/16", - Protocol: "tcp", - Port: 80, - RouteID: "route4:peerA", - }, - { - SourceRanges: []string{ - fmt.Sprintf(types.AllowedIPsFormat, peerKIp), - }, - Action: "accept", - Destination: "192.168.10.0/16", - Protocol: "all", - RouteID: "route4:peerA", - }, - } - - assert.ElementsMatch(t, orderRuleSourceRanges(routesFirewallRules), orderRuleSourceRanges(append(expectedRoutesFirewallRules, additionalFirewallRule...))) - - // peerD is also the routing peer for route1, should contain same routes firewall rules as peerA - routesFirewallRules = account.GetPeerRoutesFirewallRules(context.Background(), "peerD", validatedPeers) - assert.Len(t, routesFirewallRules, 2) - for _, rule := range expectedRoutesFirewallRules { - rule.RouteID = "route1:peerD" - } - assert.ElementsMatch(t, orderRuleSourceRanges(routesFirewallRules), orderRuleSourceRanges(expectedRoutesFirewallRules)) - - // peerE is a single routing peer for route 2 and route 3 - routesFirewallRules = account.GetPeerRoutesFirewallRules(context.Background(), "peerE", validatedPeers) - assert.Len(t, routesFirewallRules, 3) - - expectedRoutesFirewallRules = []*types.RouteFirewallRule{ - { - SourceRanges: []string{"100.65.250.202/32", "100.65.13.186/32"}, - Action: "accept", - Destination: existingNetwork.String(), - Protocol: "tcp", - PortRange: types.RulePortRange{Start: 80, End: 350}, - RouteID: "route2", - }, - { - SourceRanges: []string{"0.0.0.0/0"}, - Action: "accept", - Destination: "192.0.2.0/32", - Protocol: "all", - Domains: domain.List{"example.com"}, - IsDynamic: true, - RouteID: "route3", - }, - { - SourceRanges: []string{"::/0"}, - Action: "accept", - Destination: "192.0.2.0/32", - Protocol: "all", - Domains: domain.List{"example.com"}, - IsDynamic: true, - RouteID: "route3", - }, - } - assert.ElementsMatch(t, orderRuleSourceRanges(routesFirewallRules), orderRuleSourceRanges(expectedRoutesFirewallRules)) - - // peerC is part of route1 distribution groups but should not receive the routes firewall rules - routesFirewallRules = account.GetPeerRoutesFirewallRules(context.Background(), "peerC", validatedPeers) - assert.Len(t, routesFirewallRules, 0) - }) - -} - -// orderList is a helper function to sort a list of strings -func orderRuleSourceRanges(ruleList []*types.RouteFirewallRule) []*types.RouteFirewallRule { - for _, rule := range ruleList { - sort.Strings(rule.SourceRanges) - } - return ruleList } func TestRouteAccountPeersUpdate(t *testing.T) { @@ -2665,11 +2548,6 @@ func TestAccount_GetPeerNetworkResourceFirewallRules(t *testing.T) { }, } - validatedPeers := make(map[string]struct{}) - for p := range account.Peers { - validatedPeers[p] = struct{}{} - } - t.Run("validate applied policies for different network resources", func(t *testing.T) { // Test case: Resource1 is directly applied to the policy (policyResource1) policies := account.GetPoliciesForNetworkResource("resource1") @@ -2693,127 +2571,4 @@ func TestAccount_GetPeerNetworkResourceFirewallRules(t *testing.T) { policies = account.GetPoliciesForNetworkResource("resource6") assert.Len(t, policies, 1, "resource6 should have exactly 1 policy applied via access control groups") }) - - t.Run("validate routing peer firewall rules for network resources", func(t *testing.T) { - resourcePoliciesMap := account.GetResourcePoliciesMap() - resourceRoutersMap := account.GetResourceRoutersMap() - _, routes, sourcePeers := account.GetNetworkResourcesRoutesToSync(context.Background(), "peerA", resourcePoliciesMap, resourceRoutersMap) - firewallRules := account.GetPeerNetworkResourceFirewallRules(context.Background(), account.Peers["peerA"], validatedPeers, routes, resourcePoliciesMap) - assert.Len(t, firewallRules, 4) - assert.Len(t, sourcePeers, 5) - - expectedFirewallRules := []*types.RouteFirewallRule{ - { - SourceRanges: []string{ - fmt.Sprintf(types.AllowedIPsFormat, peerCIp), - fmt.Sprintf(types.AllowedIPsFormat, peerHIp), - fmt.Sprintf(types.AllowedIPsFormat, peerBIp), - }, - Action: "accept", - Destination: "192.168.0.0/16", - Protocol: "all", - Port: 80, - RouteID: "resource2:peerA", - }, - { - SourceRanges: []string{ - fmt.Sprintf(types.AllowedIPsFormat, peerCIp), - fmt.Sprintf(types.AllowedIPsFormat, peerHIp), - fmt.Sprintf(types.AllowedIPsFormat, peerBIp), - }, - Action: "accept", - Destination: "192.168.0.0/16", - Protocol: "all", - Port: 320, - RouteID: "resource2:peerA", - }, - } - - additionalFirewallRules := []*types.RouteFirewallRule{ - { - SourceRanges: []string{ - fmt.Sprintf(types.AllowedIPsFormat, peerJIp), - }, - Action: "accept", - Destination: "192.0.2.0/32", - Protocol: "tcp", - Port: 80, - Domains: domain.List{"example.com"}, - IsDynamic: true, - RouteID: "resource4:peerA", - }, - { - SourceRanges: []string{ - fmt.Sprintf(types.AllowedIPsFormat, peerKIp), - }, - Action: "accept", - Destination: "192.0.2.0/32", - Protocol: "all", - Domains: domain.List{"example.com"}, - IsDynamic: true, - RouteID: "resource4:peerA", - }, - } - assert.ElementsMatch(t, orderRuleSourceRanges(firewallRules), orderRuleSourceRanges(append(expectedFirewallRules, additionalFirewallRules...))) - - // peerD is also the routing peer for resource2 - _, routes, sourcePeers = account.GetNetworkResourcesRoutesToSync(context.Background(), "peerD", resourcePoliciesMap, resourceRoutersMap) - firewallRules = account.GetPeerNetworkResourceFirewallRules(context.Background(), account.Peers["peerD"], validatedPeers, routes, resourcePoliciesMap) - assert.Len(t, firewallRules, 2) - for _, rule := range expectedFirewallRules { - rule.RouteID = "resource2:peerD" - } - assert.ElementsMatch(t, orderRuleSourceRanges(firewallRules), orderRuleSourceRanges(expectedFirewallRules)) - assert.Len(t, sourcePeers, 3) - - // peerE is a single routing peer for resource1 and resource3 - // PeerE should only receive rules for resource1 since resource3 has no applied policy - _, routes, sourcePeers = account.GetNetworkResourcesRoutesToSync(context.Background(), "peerE", resourcePoliciesMap, resourceRoutersMap) - firewallRules = account.GetPeerNetworkResourceFirewallRules(context.Background(), account.Peers["peerE"], validatedPeers, routes, resourcePoliciesMap) - assert.Len(t, firewallRules, 1) - assert.Len(t, sourcePeers, 2) - - expectedFirewallRules = []*types.RouteFirewallRule{ - { - SourceRanges: []string{"100.65.250.202/32", "100.65.13.186/32"}, - Action: "accept", - Destination: "10.10.10.0/24", - Protocol: "tcp", - PortRange: types.RulePortRange{Start: 80, End: 350}, - RouteID: "resource1:peerE", - }, - } - assert.ElementsMatch(t, orderRuleSourceRanges(firewallRules), orderRuleSourceRanges(expectedFirewallRules)) - - // peerC is part of distribution groups for resource2 but should not receive the firewall rules - firewallRules = account.GetPeerRoutesFirewallRules(context.Background(), "peerC", validatedPeers) - assert.Len(t, firewallRules, 0) - - // peerL is the single routing peer for resource5 - _, routes, sourcePeers = account.GetNetworkResourcesRoutesToSync(context.Background(), "peerL", resourcePoliciesMap, resourceRoutersMap) - assert.Len(t, routes, 1) - firewallRules = account.GetPeerNetworkResourceFirewallRules(context.Background(), account.Peers["peerL"], validatedPeers, routes, resourcePoliciesMap) - assert.Len(t, firewallRules, 1) - assert.Len(t, sourcePeers, 1) - - expectedFirewallRules = []*types.RouteFirewallRule{ - { - SourceRanges: []string{"100.65.29.67/32"}, - Action: "accept", - Destination: "10.12.12.1/32", - Protocol: "tcp", - Port: 8080, - RouteID: "resource5:peerL", - }, - } - assert.ElementsMatch(t, orderRuleSourceRanges(firewallRules), orderRuleSourceRanges(expectedFirewallRules)) - - _, routes, sourcePeers = account.GetNetworkResourcesRoutesToSync(context.Background(), "peerM", resourcePoliciesMap, resourceRoutersMap) - assert.Len(t, routes, 1) - assert.Len(t, sourcePeers, 0) - - _, routes, sourcePeers = account.GetNetworkResourcesRoutesToSync(context.Background(), "peerN", resourcePoliciesMap, resourceRoutersMap) - assert.Len(t, routes, 1) - assert.Len(t, sourcePeers, 2) - }) } diff --git a/management/server/store/sql_store.go b/management/server/store/sql_store.go index 0ff57b752..0a716d08d 100644 --- a/management/server/store/sql_store.go +++ b/management/server/store/sql_store.go @@ -1196,7 +1196,6 @@ func (s *SqlStore) getAccountGorm(ctx context.Context, accountID string) (*types account.NameServerGroups[ns.ID] = &ns } account.NameServerGroupsG = nil - account.InitOnce() return &account, nil } @@ -1635,7 +1634,6 @@ func (s *SqlStore) getAccount(ctx context.Context, accountID string) (*types.Acc if sExtraIntegratedValidatorGroups.Valid { _ = json.Unmarshal([]byte(sExtraIntegratedValidatorGroups.String), &account.Settings.Extra.IntegratedValidatorGroups) } - account.InitOnce() return &account, nil } diff --git a/management/server/types/account.go b/management/server/types/account.go index c448813db..e7c1e2dce 100644 --- a/management/server/types/account.go +++ b/management/server/types/account.go @@ -8,7 +8,6 @@ import ( "slices" "strconv" "strings" - "sync" "time" "github.com/hashicorp/go-multierror" @@ -27,7 +26,6 @@ import ( networkTypes "github.com/netbirdio/netbird/management/server/networks/types" nbpeer "github.com/netbirdio/netbird/management/server/peer" "github.com/netbirdio/netbird/management/server/posture" - "github.com/netbirdio/netbird/management/server/telemetry" "github.com/netbirdio/netbird/management/server/util" "github.com/netbirdio/netbird/route" "github.com/netbirdio/netbird/shared/management/domain" @@ -110,16 +108,9 @@ type Account struct { NetworkResources []*resourceTypes.NetworkResource `gorm:"foreignKey:AccountID;references:id"` Onboarding AccountOnboarding `gorm:"foreignKey:AccountID;references:id;constraint:OnDelete:CASCADE"` - NetworkMapCache *NetworkMapBuilder `gorm:"-"` - nmapInitOnce *sync.Once `gorm:"-"` - ReverseProxyFreeDomainNonce string } -func (a *Account) InitOnce() { - a.nmapInitOnce = &sync.Once{} -} - // this class is used by gorm only type PrimaryAccountInfo struct { IsDomainPrimaryAccount bool @@ -155,108 +146,6 @@ func (o AccountOnboarding) IsEqual(onboarding AccountOnboarding) bool { o.SignupFormPending == onboarding.SignupFormPending } -// GetRoutesToSync returns the enabled routes for the peer ID and the routes -// from the ACL peers that have distribution groups associated with the peer ID. -// Please mind, that the returned route.Route objects will contain Peer.Key instead of Peer.ID. -func (a *Account) GetRoutesToSync(ctx context.Context, peerID string, aclPeers []*nbpeer.Peer, peerGroups LookupMap) []*route.Route { - routes, peerDisabledRoutes := a.getRoutingPeerRoutes(ctx, peerID) - peerRoutesMembership := make(LookupMap) - for _, r := range append(routes, peerDisabledRoutes...) { - peerRoutesMembership[string(r.GetHAUniqueID())] = struct{}{} - } - - for _, peer := range aclPeers { - activeRoutes, _ := a.getRoutingPeerRoutes(ctx, peer.ID) - groupFilteredRoutes := a.filterRoutesByGroups(activeRoutes, peerGroups) - filteredRoutes := a.filterRoutesFromPeersOfSameHAGroup(groupFilteredRoutes, peerRoutesMembership) - routes = append(routes, filteredRoutes...) - } - - return routes -} - -// filterRoutesFromPeersOfSameHAGroup filters and returns a list of routes that don't share the same HA route membership -func (a *Account) filterRoutesFromPeersOfSameHAGroup(routes []*route.Route, peerMemberships LookupMap) []*route.Route { - var filteredRoutes []*route.Route - for _, r := range routes { - _, found := peerMemberships[string(r.GetHAUniqueID())] - if !found { - filteredRoutes = append(filteredRoutes, r) - } - } - return filteredRoutes -} - -// filterRoutesByGroups returns a list with routes that have distribution groups in the group's map -func (a *Account) filterRoutesByGroups(routes []*route.Route, groupListMap LookupMap) []*route.Route { - var filteredRoutes []*route.Route - for _, r := range routes { - for _, groupID := range r.Groups { - _, found := groupListMap[groupID] - if found { - filteredRoutes = append(filteredRoutes, r) - break - } - } - } - return filteredRoutes -} - -// getRoutingPeerRoutes returns the enabled and disabled lists of routes that the given routing peer serves -// Please mind, that the returned route.Route objects will contain Peer.Key instead of Peer.ID. -// If the given is not a routing peer, then the lists are empty. -func (a *Account) getRoutingPeerRoutes(ctx context.Context, peerID string) (enabledRoutes []*route.Route, disabledRoutes []*route.Route) { - - peer := a.GetPeer(peerID) - if peer == nil { - log.WithContext(ctx).Errorf("peer %s that doesn't exist under account %s", peerID, a.Id) - return enabledRoutes, disabledRoutes - } - - seenRoute := make(map[route.ID]struct{}) - - takeRoute := func(r *route.Route, id string) { - if _, ok := seenRoute[r.ID]; ok { - return - } - seenRoute[r.ID] = struct{}{} - - if r.Enabled { - r.Peer = peer.Key - enabledRoutes = append(enabledRoutes, r) - return - } - disabledRoutes = append(disabledRoutes, r) - } - - for _, r := range a.Routes { - for _, groupID := range r.PeerGroups { - group := a.GetGroup(groupID) - if group == nil { - log.WithContext(ctx).Errorf("route %s has peers group %s that doesn't exist under account %s", r.ID, groupID, a.Id) - continue - } - for _, id := range group.Peers { - if id != peerID { - continue - } - - newPeerRoute := r.Copy() - newPeerRoute.Peer = id - newPeerRoute.PeerGroups = nil - newPeerRoute.ID = route.ID(string(r.ID) + ":" + id) // we have to provide unique route id when distribute network map - takeRoute(newPeerRoute, id) - break - } - } - if r.Peer == peerID { - takeRoute(r.Copy(), peerID) - } - } - - return enabledRoutes, disabledRoutes -} - // GetRoutesByPrefixOrDomains return list of routes by account and route prefix func (a *Account) GetRoutesByPrefixOrDomains(prefix netip.Prefix, domains domain.List) []*route.Route { var routes []*route.Route @@ -276,106 +165,6 @@ func (a *Account) GetGroup(groupID string) *Group { return a.Groups[groupID] } -// GetPeerNetworkMap returns the networkmap for the given peer ID. -func (a *Account) GetPeerNetworkMap( - ctx context.Context, - peerID string, - peersCustomZone nbdns.CustomZone, - accountZones []*zones.Zone, - validatedPeersMap map[string]struct{}, - resourcePolicies map[string][]*Policy, - routers map[string]map[string]*routerTypes.NetworkRouter, - metrics *telemetry.AccountManagerMetrics, - groupIDToUserIDs map[string][]string, -) *NetworkMap { - start := time.Now() - peer := a.Peers[peerID] - if peer == nil { - return &NetworkMap{ - Network: a.Network.Copy(), - } - } - - if _, ok := validatedPeersMap[peerID]; !ok { - return &NetworkMap{ - Network: a.Network.Copy(), - } - } - - peerGroups := a.GetPeerGroups(peerID) - - aclPeers, firewallRules, authorizedUsers, enableSSH := a.GetPeerConnectionResources(ctx, peer, validatedPeersMap, groupIDToUserIDs) - // exclude expired peers - var peersToConnect []*nbpeer.Peer - var expiredPeers []*nbpeer.Peer - for _, p := range aclPeers { - expired, _ := p.LoginExpired(a.Settings.PeerLoginExpiration) - if a.Settings.PeerLoginExpirationEnabled && expired { - expiredPeers = append(expiredPeers, p) - continue - } - peersToConnect = append(peersToConnect, p) - } - - routesUpdate := a.GetRoutesToSync(ctx, peerID, peersToConnect, peerGroups) - routesFirewallRules := a.GetPeerRoutesFirewallRules(ctx, peerID, validatedPeersMap) - isRouter, networkResourcesRoutes, sourcePeers := a.GetNetworkResourcesRoutesToSync(ctx, peerID, resourcePolicies, routers) - var networkResourcesFirewallRules []*RouteFirewallRule - if isRouter { - networkResourcesFirewallRules = a.GetPeerNetworkResourceFirewallRules(ctx, peer, validatedPeersMap, networkResourcesRoutes, resourcePolicies) - } - peersToConnectIncludingRouters := a.addNetworksRoutingPeers(networkResourcesRoutes, peer, peersToConnect, expiredPeers, isRouter, sourcePeers) - - dnsManagementStatus := a.getPeerDNSManagementStatus(peerID) - dnsUpdate := nbdns.Config{ - ServiceEnable: dnsManagementStatus, - } - - if dnsManagementStatus { - var zones []nbdns.CustomZone - - if peersCustomZone.Domain != "" { - records := filterZoneRecordsForPeers(peer, peersCustomZone, peersToConnectIncludingRouters, expiredPeers) - zones = append(zones, nbdns.CustomZone{ - Domain: peersCustomZone.Domain, - Records: records, - }) - } - - filteredAccountZones := filterPeerAppliedZones(ctx, accountZones, peerGroups) - zones = append(zones, filteredAccountZones...) - - dnsUpdate.CustomZones = zones - dnsUpdate.NameServerGroups = getPeerNSGroups(a, peerID) - } - - nm := &NetworkMap{ - Peers: peersToConnectIncludingRouters, - Network: a.Network.Copy(), - Routes: slices.Concat(networkResourcesRoutes, routesUpdate), - DNSConfig: dnsUpdate, - OfflinePeers: expiredPeers, - FirewallRules: firewallRules, - RoutesFirewallRules: slices.Concat(networkResourcesFirewallRules, routesFirewallRules), - AuthorizedUsers: authorizedUsers, - EnableSSH: enableSSH, - } - - if metrics != nil { - objectCount := int64(len(peersToConnectIncludingRouters) + len(expiredPeers) + len(routesUpdate) + len(networkResourcesRoutes) + len(firewallRules) + +len(networkResourcesFirewallRules) + len(routesFirewallRules)) - metrics.CountNetworkMapObjects(objectCount) - metrics.CountGetPeerNetworkMapDuration(time.Since(start)) - - if objectCount > 5000 { - log.WithContext(ctx).Tracef("account: %s has a total resource count of %d objects, "+ - "peers to connect: %d, expired peers: %d, routes: %d, firewall rules: %d, network resources routes: %d, network resources firewall rules: %d, routes firewall rules: %d", - a.Id, objectCount, len(peersToConnectIncludingRouters), len(expiredPeers), len(routesUpdate), len(firewallRules), len(networkResourcesRoutes), len(networkResourcesFirewallRules), len(routesFirewallRules)) - } - } - - return nm -} - func (a *Account) addNetworksRoutingPeers( networkResourcesRoutes []*route.Route, peer *nbpeer.Peer, @@ -421,39 +210,6 @@ func (a *Account) addNetworksRoutingPeers( return peersToConnect } -func getPeerNSGroups(account *Account, peerID string) []*nbdns.NameServerGroup { - groupList := account.GetPeerGroups(peerID) - - var peerNSGroups []*nbdns.NameServerGroup - - for _, nsGroup := range account.NameServerGroups { - if !nsGroup.Enabled { - continue - } - for _, gID := range nsGroup.Groups { - _, found := groupList[gID] - if found { - if !peerIsNameserver(account.GetPeer(peerID), nsGroup) { - peerNSGroups = append(peerNSGroups, nsGroup.Copy()) - break - } - } - } - } - - return peerNSGroups -} - -// peerIsNameserver returns true if the peer is a nameserver for a nsGroup -func peerIsNameserver(peer *nbpeer.Peer, nsGroup *nbdns.NameServerGroup) bool { - for _, ns := range nsGroup.NameServers { - if peer.IP.Equal(ns.IP.AsSlice()) { - return true - } - } - return false -} - func AddPeerLabelsToAccount(ctx context.Context, account *Account, peerLabels LookupMap) { for _, peer := range account.Peers { label, err := GetPeerHostLabel(peer.Name, peerLabels) @@ -800,19 +556,6 @@ func (a *Account) GetPeerGroupsList(peerID string) []string { return grps } -func (a *Account) getPeerDNSManagementStatus(peerID string) bool { - peerGroups := a.GetPeerGroups(peerID) - enabled := true - for _, groupID := range a.DNSSettings.DisabledManagementGroups { - _, found := peerGroups[groupID] - if found { - enabled = false - break - } - } - return enabled -} - func (a *Account) GetPeerGroups(peerID string) LookupMap { groupList := make(LookupMap) for groupID, group := range a.Groups { @@ -941,8 +684,6 @@ func (a *Account) Copy() *Account { NetworkResources: networkResources, Services: services, Onboarding: a.Onboarding, - NetworkMapCache: a.NetworkMapCache, - nmapInitOnce: a.nmapInitOnce, Domains: domains, } } @@ -1304,31 +1045,6 @@ func (a *Account) GetPostureChecks(postureChecksID string) *posture.Checks { return nil } -// GetPeerRoutesFirewallRules gets the routes firewall rules associated with a routing peer ID for the account. -func (a *Account) GetPeerRoutesFirewallRules(ctx context.Context, peerID string, validatedPeersMap map[string]struct{}) []*RouteFirewallRule { - routesFirewallRules := make([]*RouteFirewallRule, 0, len(a.Routes)) - - enabledRoutes, _ := a.getRoutingPeerRoutes(ctx, peerID) - for _, route := range enabledRoutes { - // If no access control groups are specified, accept all traffic. - if len(route.AccessControlGroups) == 0 { - defaultPermit := getDefaultPermit(route) - routesFirewallRules = append(routesFirewallRules, defaultPermit...) - continue - } - - distributionPeers := a.getDistributionGroupsPeers(route) - - for _, accessGroup := range route.AccessControlGroups { - policies := GetAllRoutePoliciesFromGroups(a, []string{accessGroup}) - rules := a.getRouteFirewallRules(ctx, peerID, policies, route, validatedPeersMap, distributionPeers) - routesFirewallRules = append(routesFirewallRules, rules...) - } - } - - return routesFirewallRules -} - func (a *Account) getRouteFirewallRules(ctx context.Context, peerID string, policies []*Policy, route *route.Route, validatedPeersMap map[string]struct{}, distributionPeers map[string]struct{}) []*RouteFirewallRule { var fwRules []*RouteFirewallRule for _, policy := range policies { @@ -1387,50 +1103,6 @@ func (a *Account) getRulePeers(rule *PolicyRule, postureChecks []string, peerID return distributionGroupPeers } -func (a *Account) getDistributionGroupsPeers(route *route.Route) map[string]struct{} { - distPeers := make(map[string]struct{}) - for _, id := range route.Groups { - group := a.Groups[id] - if group == nil { - continue - } - - for _, pID := range group.Peers { - distPeers[pID] = struct{}{} - } - } - return distPeers -} - -func getDefaultPermit(route *route.Route) []*RouteFirewallRule { - var rules []*RouteFirewallRule - - sources := []string{"0.0.0.0/0"} - if route.Network.Addr().Is6() { - sources = []string{"::/0"} - } - rule := RouteFirewallRule{ - SourceRanges: sources, - Action: string(PolicyTrafficActionAccept), - Destination: route.Network.String(), - Protocol: string(PolicyRuleProtocolALL), - Domains: route.Domains, - IsDynamic: route.IsDynamic(), - RouteID: route.ID, - } - - rules = append(rules, &rule) - - // dynamic routes always contain an IPv4 placeholder as destination, hence we must add IPv6 rules additionally - if route.IsDynamic() { - ruleV6 := rule - ruleV6.SourceRanges = []string{"::/0"} - rules = append(rules, &ruleV6) - } - - return rules -} - // GetAllRoutePoliciesFromGroups retrieves route policies associated with the specified access control groups // and returns a list of policies that have rules with destinations matching the specified groups. func GetAllRoutePoliciesFromGroups(account *Account, accessControlGroups []string) []*Policy { @@ -1508,65 +1180,6 @@ func (a *Account) GetResourcePoliciesMap() map[string][]*Policy { return resourcePolicies } -// GetNetworkResourcesRoutesToSync returns network routes for syncing with a specific peer and its ACL peers. -func (a *Account) GetNetworkResourcesRoutesToSync(ctx context.Context, peerID string, resourcePolicies map[string][]*Policy, routers map[string]map[string]*routerTypes.NetworkRouter) (bool, []*route.Route, map[string]struct{}) { - var isRoutingPeer bool - var routes []*route.Route - allSourcePeers := make(map[string]struct{}, len(a.Peers)) - - for _, resource := range a.NetworkResources { - if !resource.Enabled { - continue - } - - var addSourcePeers bool - - networkRoutingPeers, exists := routers[resource.NetworkID] - if exists { - if router, ok := networkRoutingPeers[peerID]; ok { - isRoutingPeer, addSourcePeers = true, true - routes = append(routes, a.getNetworkResourcesRoutes(resource, peerID, router, resourcePolicies)...) - } - } - - addedResourceRoute := false - for _, policy := range resourcePolicies[resource.ID] { - var peers []string - if policy.Rules[0].SourceResource.Type == ResourceTypePeer && policy.Rules[0].SourceResource.ID != "" { - peers = []string{policy.Rules[0].SourceResource.ID} - } else { - peers = a.getUniquePeerIDsFromGroupsIDs(ctx, policy.SourceGroups()) - } - if addSourcePeers { - for _, pID := range a.getPostureValidPeers(peers, policy.SourcePostureChecks) { - allSourcePeers[pID] = struct{}{} - } - } else if slices.Contains(peers, peerID) && a.validatePostureChecksOnPeer(ctx, policy.SourcePostureChecks, peerID) { - // add routes for the resource if the peer is in the distribution group - for peerId, router := range networkRoutingPeers { - routes = append(routes, a.getNetworkResourcesRoutes(resource, peerId, router, resourcePolicies)...) - } - addedResourceRoute = true - } - if addedResourceRoute { - break - } - } - } - - return isRoutingPeer, routes, allSourcePeers -} - -func (a *Account) getPostureValidPeers(inputPeers []string, postureChecksIDs []string) []string { - var dest []string - for _, peerID := range inputPeers { - if a.validatePostureChecksOnPeer(context.Background(), postureChecksIDs, peerID) { - dest = append(dest, peerID) - } - } - return dest -} - func (a *Account) getUniquePeerIDsFromGroupsIDs(ctx context.Context, groups []string) []string { peerIDs := make(map[string]struct{}, len(groups)) // we expect at least one peer per group as initial capacity for _, groupID := range groups { @@ -1658,22 +1271,6 @@ func (a *Account) GetPoliciesAppliedInNetwork(networkID string) []string { return result } -// getNetworkResourcesRoutes convert the network resources list to routes list. -func (a *Account) getNetworkResourcesRoutes(resource *resourceTypes.NetworkResource, peerId string, router *routerTypes.NetworkRouter, resourcePolicies map[string][]*Policy) []*route.Route { - resourceAppliedPolicies := resourcePolicies[resource.ID] - - var routes []*route.Route - // distribute the resource routes only if there is policy applied to it - if len(resourceAppliedPolicies) > 0 { - peer := a.GetPeer(peerId) - if peer != nil { - routes = append(routes, resource.ToRoute(peer, router)) - } - } - - return routes -} - func (a *Account) GetResourceRoutersMap() map[string]map[string]*routerTypes.NetworkRouter { routers := make(map[string]map[string]*routerTypes.NetworkRouter) diff --git a/management/server/types/account_test.go b/management/server/types/account_test.go index 00ba29b7f..9b1c9e31d 100644 --- a/management/server/types/account_test.go +++ b/management/server/types/account_test.go @@ -4,8 +4,6 @@ import ( "context" "fmt" "net" - "net/netip" - "slices" "testing" "github.com/miekg/dns" @@ -19,7 +17,6 @@ import ( routerTypes "github.com/netbirdio/netbird/management/server/networks/routers/types" networkTypes "github.com/netbirdio/netbird/management/server/networks/types" nbpeer "github.com/netbirdio/netbird/management/server/peer" - "github.com/netbirdio/netbird/management/server/posture" "github.com/netbirdio/netbird/route" ) @@ -451,402 +448,6 @@ func Test_AddNetworksRoutingPeersHandlesNoMissingPeers(t *testing.T) { require.Len(t, result, 0) } -const ( - accID = "accountID" - network1ID = "network1ID" - group1ID = "group1" - accNetResourcePeer1ID = "peer1" - accNetResourcePeer2ID = "peer2" - accNetResourceRouter1ID = "router1" - accNetResource1ID = "resource1ID" - accNetResourceRestrictPostureCheckID = "restrictPostureCheck" - accNetResourceRelaxedPostureCheckID = "relaxedPostureCheck" - accNetResourceLockedPostureCheckID = "lockedPostureCheck" - accNetResourceLinuxPostureCheckID = "linuxPostureCheck" -) - -var ( - accNetResourcePeer1IP = net.IP{192, 168, 1, 1} - accNetResourcePeer2IP = net.IP{192, 168, 1, 2} - accNetResourceRouter1IP = net.IP{192, 168, 1, 3} - accNetResourceValidPeers = map[string]struct{}{accNetResourcePeer1ID: {}, accNetResourcePeer2ID: {}} -) - -func getBasicAccountsWithResource() *Account { - return &Account{ - Id: accID, - Peers: map[string]*nbpeer.Peer{ - accNetResourcePeer1ID: { - ID: accNetResourcePeer1ID, - AccountID: accID, - Key: "peer1Key", - IP: accNetResourcePeer1IP, - Meta: nbpeer.PeerSystemMeta{ - GoOS: "linux", - WtVersion: "0.35.1", - KernelVersion: "4.4.0", - }, - }, - accNetResourcePeer2ID: { - ID: accNetResourcePeer2ID, - AccountID: accID, - Key: "peer2Key", - IP: accNetResourcePeer2IP, - Meta: nbpeer.PeerSystemMeta{ - GoOS: "windows", - WtVersion: "0.34.1", - KernelVersion: "4.4.0", - }, - }, - accNetResourceRouter1ID: { - ID: accNetResourceRouter1ID, - AccountID: accID, - Key: "router1Key", - IP: accNetResourceRouter1IP, - Meta: nbpeer.PeerSystemMeta{ - GoOS: "linux", - WtVersion: "0.35.1", - KernelVersion: "4.4.0", - }, - }, - }, - Groups: map[string]*Group{ - group1ID: { - ID: group1ID, - Peers: []string{accNetResourcePeer1ID, accNetResourcePeer2ID}, - }, - }, - Networks: []*networkTypes.Network{ - { - ID: network1ID, - AccountID: accID, - Name: "network1", - }, - }, - NetworkRouters: []*routerTypes.NetworkRouter{ - { - ID: accNetResourceRouter1ID, - NetworkID: network1ID, - AccountID: accID, - Peer: accNetResourceRouter1ID, - PeerGroups: []string{}, - Masquerade: false, - Metric: 100, - Enabled: true, - }, - }, - NetworkResources: []*resourceTypes.NetworkResource{ - { - ID: accNetResource1ID, - AccountID: accID, - NetworkID: network1ID, - Address: "10.10.10.0/24", - Prefix: netip.MustParsePrefix("10.10.10.0/24"), - Type: resourceTypes.NetworkResourceType("subnet"), - Enabled: true, - }, - }, - Policies: []*Policy{ - { - ID: "policy1ID", - AccountID: accID, - Enabled: true, - Rules: []*PolicyRule{ - { - ID: "rule1ID", - Enabled: true, - Sources: []string{group1ID}, - DestinationResource: Resource{ - ID: accNetResource1ID, - Type: "Host", - }, - Protocol: PolicyRuleProtocolTCP, - Ports: []string{"80"}, - Action: PolicyTrafficActionAccept, - }, - }, - SourcePostureChecks: nil, - }, - }, - PostureChecks: []*posture.Checks{ - { - ID: accNetResourceRestrictPostureCheckID, - Name: accNetResourceRestrictPostureCheckID, - Checks: posture.ChecksDefinition{ - NBVersionCheck: &posture.NBVersionCheck{ - MinVersion: "0.35.0", - }, - }, - }, - { - ID: accNetResourceRelaxedPostureCheckID, - Name: accNetResourceRelaxedPostureCheckID, - Checks: posture.ChecksDefinition{ - NBVersionCheck: &posture.NBVersionCheck{ - MinVersion: "0.0.1", - }, - }, - }, - { - ID: accNetResourceLockedPostureCheckID, - Name: accNetResourceLockedPostureCheckID, - Checks: posture.ChecksDefinition{ - NBVersionCheck: &posture.NBVersionCheck{ - MinVersion: "7.7.7", - }, - }, - }, - { - ID: accNetResourceLinuxPostureCheckID, - Name: accNetResourceLinuxPostureCheckID, - Checks: posture.ChecksDefinition{ - OSVersionCheck: &posture.OSVersionCheck{ - Linux: &posture.MinKernelVersionCheck{ - MinKernelVersion: "0.0.0"}, - }, - }, - }, - }, - } -} - -func Test_NetworksNetMapGenWithNoPostureChecks(t *testing.T) { - account := getBasicAccountsWithResource() - - // all peers should match the policy - - // validate for peer1 - isRouter, networkResourcesRoutes, sourcePeers := account.GetNetworkResourcesRoutesToSync(context.Background(), accNetResourcePeer1ID, account.GetResourcePoliciesMap(), account.GetResourceRoutersMap()) - assert.False(t, isRouter, "expected router status") - assert.Len(t, networkResourcesRoutes, 1, "expected network resource route don't match") - assert.Len(t, sourcePeers, 0, "expected source peers don't match") - - // validate for peer2 - isRouter, networkResourcesRoutes, sourcePeers = account.GetNetworkResourcesRoutesToSync(context.Background(), accNetResourcePeer2ID, account.GetResourcePoliciesMap(), account.GetResourceRoutersMap()) - assert.False(t, isRouter, "expected router status") - assert.Len(t, networkResourcesRoutes, 1, "expected network resource route don't match") - assert.Len(t, sourcePeers, 0, "expected source peers don't match") - - // validate routes for router1 - isRouter, networkResourcesRoutes, sourcePeers = account.GetNetworkResourcesRoutesToSync(context.Background(), accNetResourceRouter1ID, account.GetResourcePoliciesMap(), account.GetResourceRoutersMap()) - assert.True(t, isRouter, "should be router") - assert.Len(t, networkResourcesRoutes, 1, "expected network resource route don't match") - assert.Len(t, sourcePeers, 2, "expected source peers don't match") - assert.NotNil(t, sourcePeers[accNetResourcePeer1ID], "expected source peers don't match") - assert.NotNil(t, sourcePeers[accNetResourcePeer2ID], "expected source peers don't match") - - // validate rules for router1 - rules := account.GetPeerNetworkResourceFirewallRules(context.Background(), account.Peers[accNetResourceRouter1ID], accNetResourceValidPeers, networkResourcesRoutes, account.GetResourcePoliciesMap()) - assert.Len(t, rules, 1, "expected rules count don't match") - assert.Equal(t, uint16(80), rules[0].Port, "should have port 80") - assert.Equal(t, "tcp", rules[0].Protocol, "should have protocol tcp") - if !slices.Contains(rules[0].SourceRanges, accNetResourcePeer1IP.String()+"/32") { - t.Errorf("%s should have source range of peer1 %s", rules[0].SourceRanges, accNetResourcePeer1IP.String()) - } - if !slices.Contains(rules[0].SourceRanges, accNetResourcePeer2IP.String()+"/32") { - t.Errorf("%s should have source range of peer2 %s", rules[0].SourceRanges, accNetResourcePeer2IP.String()) - } -} - -func Test_NetworksNetMapGenWithPostureChecks(t *testing.T) { - account := getBasicAccountsWithResource() - - // should allow peer1 to match the policy - policy := account.Policies[0] - policy.SourcePostureChecks = []string{accNetResourceRestrictPostureCheckID} - - // validate for peer1 - isRouter, networkResourcesRoutes, sourcePeers := account.GetNetworkResourcesRoutesToSync(context.Background(), accNetResourcePeer1ID, account.GetResourcePoliciesMap(), account.GetResourceRoutersMap()) - assert.False(t, isRouter, "expected router status") - assert.Len(t, networkResourcesRoutes, 1, "expected network resource route don't match") - assert.Len(t, sourcePeers, 0, "expected source peers don't match") - - // validate for peer2 - isRouter, networkResourcesRoutes, sourcePeers = account.GetNetworkResourcesRoutesToSync(context.Background(), accNetResourcePeer2ID, account.GetResourcePoliciesMap(), account.GetResourceRoutersMap()) - assert.False(t, isRouter, "expected router status") - assert.Len(t, networkResourcesRoutes, 0, "expected network resource route don't match") - assert.Len(t, sourcePeers, 0, "expected source peers don't match") - - // validate routes for router1 - isRouter, networkResourcesRoutes, sourcePeers = account.GetNetworkResourcesRoutesToSync(context.Background(), accNetResourceRouter1ID, account.GetResourcePoliciesMap(), account.GetResourceRoutersMap()) - assert.True(t, isRouter, "should be router") - assert.Len(t, networkResourcesRoutes, 1, "expected network resource route don't match") - assert.Len(t, sourcePeers, 1, "expected source peers don't match") - assert.NotNil(t, sourcePeers[accNetResourcePeer1ID], "expected source peers don't match") - - // validate rules for router1 - rules := account.GetPeerNetworkResourceFirewallRules(context.Background(), account.Peers[accNetResourceRouter1ID], accNetResourceValidPeers, networkResourcesRoutes, account.GetResourcePoliciesMap()) - assert.Len(t, rules, 1, "expected rules count don't match") - assert.Equal(t, uint16(80), rules[0].Port, "should have port 80") - assert.Equal(t, "tcp", rules[0].Protocol, "should have protocol tcp") - if !slices.Contains(rules[0].SourceRanges, accNetResourcePeer1IP.String()+"/32") { - t.Errorf("%s should have source range of peer1 %s", rules[0].SourceRanges, accNetResourcePeer1IP.String()) - } - if slices.Contains(rules[0].SourceRanges, accNetResourcePeer2IP.String()+"/32") { - t.Errorf("%s should not have source range of peer2 %s", rules[0].SourceRanges, accNetResourcePeer2IP.String()) - } -} - -func Test_NetworksNetMapGenWithNoMatchedPostureChecks(t *testing.T) { - account := getBasicAccountsWithResource() - - // should not match any peer - policy := account.Policies[0] - policy.SourcePostureChecks = []string{accNetResourceLockedPostureCheckID} - - // validate for peer1 - isRouter, networkResourcesRoutes, sourcePeers := account.GetNetworkResourcesRoutesToSync(context.Background(), accNetResourcePeer1ID, account.GetResourcePoliciesMap(), account.GetResourceRoutersMap()) - assert.False(t, isRouter, "expected router status") - assert.Len(t, networkResourcesRoutes, 0, "expected network resource route don't match") - assert.Len(t, sourcePeers, 0, "expected source peers don't match") - - // validate for peer2 - isRouter, networkResourcesRoutes, sourcePeers = account.GetNetworkResourcesRoutesToSync(context.Background(), accNetResourcePeer2ID, account.GetResourcePoliciesMap(), account.GetResourceRoutersMap()) - assert.False(t, isRouter, "expected router status") - assert.Len(t, networkResourcesRoutes, 0, "expected network resource route don't match") - assert.Len(t, sourcePeers, 0, "expected source peers don't match") - - // validate routes for router1 - isRouter, networkResourcesRoutes, sourcePeers = account.GetNetworkResourcesRoutesToSync(context.Background(), accNetResourceRouter1ID, account.GetResourcePoliciesMap(), account.GetResourceRoutersMap()) - assert.True(t, isRouter, "should be router") - assert.Len(t, networkResourcesRoutes, 1, "expected network resource route don't match") - assert.Len(t, sourcePeers, 0, "expected source peers don't match") - - // validate rules for router1 - rules := account.GetPeerNetworkResourceFirewallRules(context.Background(), account.Peers[accNetResourceRouter1ID], accNetResourceValidPeers, networkResourcesRoutes, account.GetResourcePoliciesMap()) - assert.Len(t, rules, 0, "expected rules count don't match") -} - -func Test_NetworksNetMapGenWithTwoPoliciesAndPostureChecks(t *testing.T) { - account := getBasicAccountsWithResource() - - // should allow peer1 to match the policy - policy := account.Policies[0] - policy.SourcePostureChecks = []string{accNetResourceRestrictPostureCheckID} - - // should allow peer1 and peer2 to match the policy - newPolicy := &Policy{ - ID: "policy2ID", - AccountID: accID, - Enabled: true, - Rules: []*PolicyRule{ - { - ID: "policy2ID", - Enabled: true, - Sources: []string{group1ID}, - DestinationResource: Resource{ - ID: accNetResource1ID, - Type: "Host", - }, - Protocol: PolicyRuleProtocolTCP, - Ports: []string{"22"}, - Action: PolicyTrafficActionAccept, - }, - }, - SourcePostureChecks: []string{accNetResourceRelaxedPostureCheckID}, - } - - account.Policies = append(account.Policies, newPolicy) - - // validate for peer1 - isRouter, networkResourcesRoutes, sourcePeers := account.GetNetworkResourcesRoutesToSync(context.Background(), accNetResourcePeer1ID, account.GetResourcePoliciesMap(), account.GetResourceRoutersMap()) - assert.False(t, isRouter, "expected router status") - assert.Len(t, networkResourcesRoutes, 1, "expected network resource route don't match") - assert.Len(t, sourcePeers, 0, "expected source peers don't match") - - // validate for peer2 - isRouter, networkResourcesRoutes, sourcePeers = account.GetNetworkResourcesRoutesToSync(context.Background(), accNetResourcePeer2ID, account.GetResourcePoliciesMap(), account.GetResourceRoutersMap()) - assert.False(t, isRouter, "expected router status") - assert.Len(t, networkResourcesRoutes, 1, "expected network resource route don't match") - assert.Len(t, sourcePeers, 0, "expected source peers don't match") - - // validate routes for router1 - isRouter, networkResourcesRoutes, sourcePeers = account.GetNetworkResourcesRoutesToSync(context.Background(), accNetResourceRouter1ID, account.GetResourcePoliciesMap(), account.GetResourceRoutersMap()) - assert.True(t, isRouter, "should be router") - assert.Len(t, networkResourcesRoutes, 1, "expected network resource route don't match") - assert.Len(t, sourcePeers, 2, "expected source peers don't match") - assert.NotNil(t, sourcePeers[accNetResourcePeer1ID], "expected source peers don't match") - assert.NotNil(t, sourcePeers[accNetResourcePeer2ID], "expected source peers don't match") - - // validate rules for router1 - rules := account.GetPeerNetworkResourceFirewallRules(context.Background(), account.Peers[accNetResourceRouter1ID], accNetResourceValidPeers, networkResourcesRoutes, account.GetResourcePoliciesMap()) - assert.Len(t, rules, 2, "expected rules count don't match") - assert.Equal(t, uint16(80), rules[0].Port, "should have port 80") - assert.Equal(t, "tcp", rules[0].Protocol, "should have protocol tcp") - if !slices.Contains(rules[0].SourceRanges, accNetResourcePeer1IP.String()+"/32") { - t.Errorf("%s should have source range of peer1 %s", rules[0].SourceRanges, accNetResourcePeer1IP.String()) - } - if slices.Contains(rules[0].SourceRanges, accNetResourcePeer2IP.String()+"/32") { - t.Errorf("%s should not have source range of peer2 %s", rules[0].SourceRanges, accNetResourcePeer2IP.String()) - } - - assert.Equal(t, uint16(22), rules[1].Port, "should have port 22") - assert.Equal(t, "tcp", rules[1].Protocol, "should have protocol tcp") - if !slices.Contains(rules[1].SourceRanges, accNetResourcePeer1IP.String()+"/32") { - t.Errorf("%s should have source range of peer1 %s", rules[1].SourceRanges, accNetResourcePeer1IP.String()) - } - if !slices.Contains(rules[1].SourceRanges, accNetResourcePeer2IP.String()+"/32") { - t.Errorf("%s should have source range of peer2 %s", rules[1].SourceRanges, accNetResourcePeer2IP.String()) - } -} - -func Test_NetworksNetMapGenWithTwoPostureChecks(t *testing.T) { - account := getBasicAccountsWithResource() - - // two posture checks should match only the peers that match both checks - policy := account.Policies[0] - policy.SourcePostureChecks = []string{accNetResourceRelaxedPostureCheckID, accNetResourceLinuxPostureCheckID} - - // validate for peer1 - isRouter, networkResourcesRoutes, sourcePeers := account.GetNetworkResourcesRoutesToSync(context.Background(), accNetResourcePeer1ID, account.GetResourcePoliciesMap(), account.GetResourceRoutersMap()) - assert.False(t, isRouter, "expected router status") - assert.Len(t, networkResourcesRoutes, 1, "expected network resource route don't match") - assert.Len(t, sourcePeers, 0, "expected source peers don't match") - - // validate for peer2 - isRouter, networkResourcesRoutes, sourcePeers = account.GetNetworkResourcesRoutesToSync(context.Background(), accNetResourcePeer2ID, account.GetResourcePoliciesMap(), account.GetResourceRoutersMap()) - assert.False(t, isRouter, "expected router status") - assert.Len(t, networkResourcesRoutes, 0, "expected network resource route don't match") - assert.Len(t, sourcePeers, 0, "expected source peers don't match") - - // validate routes for router1 - isRouter, networkResourcesRoutes, sourcePeers = account.GetNetworkResourcesRoutesToSync(context.Background(), accNetResourceRouter1ID, account.GetResourcePoliciesMap(), account.GetResourceRoutersMap()) - assert.True(t, isRouter, "should be router") - assert.Len(t, networkResourcesRoutes, 1, "expected network resource route don't match") - assert.Len(t, sourcePeers, 1, "expected source peers don't match") - assert.NotNil(t, sourcePeers[accNetResourcePeer1ID], "expected source peers don't match") - - // validate rules for router1 - rules := account.GetPeerNetworkResourceFirewallRules(context.Background(), account.Peers[accNetResourceRouter1ID], accNetResourceValidPeers, networkResourcesRoutes, account.GetResourcePoliciesMap()) - assert.Len(t, rules, 1, "expected rules count don't match") - assert.Equal(t, uint16(80), rules[0].Port, "should have port 80") - assert.Equal(t, "tcp", rules[0].Protocol, "should have protocol tcp") - if !slices.Contains(rules[0].SourceRanges, accNetResourcePeer1IP.String()+"/32") { - t.Errorf("%s should have source range of peer1 %s", rules[0].SourceRanges, accNetResourcePeer1IP.String()) - } - if slices.Contains(rules[0].SourceRanges, accNetResourcePeer2IP.String()+"/32") { - t.Errorf("%s should not have source range of peer2 %s", rules[0].SourceRanges, accNetResourcePeer2IP.String()) - } -} - -func Test_NetworksNetMapGenShouldExcludeOtherRouters(t *testing.T) { - account := getBasicAccountsWithResource() - - account.Peers["router2Id"] = &nbpeer.Peer{Key: "router2Key", ID: "router2Id", AccountID: accID, IP: net.IP{192, 168, 1, 4}} - account.NetworkRouters = append(account.NetworkRouters, &routerTypes.NetworkRouter{ - ID: "router2Id", - NetworkID: network1ID, - AccountID: accID, - Peer: "router2Id", - }) - - // validate routes for router1 - isRouter, networkResourcesRoutes, sourcePeers := account.GetNetworkResourcesRoutesToSync(context.Background(), accNetResourceRouter1ID, account.GetResourcePoliciesMap(), account.GetResourceRoutersMap()) - assert.True(t, isRouter, "should be router") - assert.Len(t, networkResourcesRoutes, 1, "expected network resource route don't match") - assert.Len(t, sourcePeers, 2, "expected source peers don't match") -} - func Test_ExpandPortsAndRanges_SSHRuleExpansion(t *testing.T) { tests := []struct { name string diff --git a/management/server/types/holder.go b/management/server/types/holder.go deleted file mode 100644 index de8ac8110..000000000 --- a/management/server/types/holder.go +++ /dev/null @@ -1,47 +0,0 @@ -package types - -import ( - "context" - "sync" -) - -type Holder struct { - mu sync.RWMutex - accounts map[string]*Account -} - -func NewHolder() *Holder { - return &Holder{ - accounts: make(map[string]*Account), - } -} - -func (h *Holder) GetAccount(id string) *Account { - h.mu.RLock() - defer h.mu.RUnlock() - return h.accounts[id] -} - -func (h *Holder) AddAccount(account *Account) { - h.mu.Lock() - defer h.mu.Unlock() - a := h.accounts[account.Id] - if a != nil && a.Network.CurrentSerial() >= account.Network.CurrentSerial() { - return - } - h.accounts[account.Id] = account -} - -func (h *Holder) LoadOrStoreFunc(ctx context.Context, id string, accGetter func(context.Context, string) (*Account, error)) (*Account, error) { - h.mu.Lock() - defer h.mu.Unlock() - if acc, ok := h.accounts[id]; ok { - return acc, nil - } - account, err := accGetter(ctx, id) - if err != nil { - return nil, err - } - h.accounts[id] = account - return account, nil -} diff --git a/management/server/types/networkmap.go b/management/server/types/networkmap.go deleted file mode 100644 index 68c988a93..000000000 --- a/management/server/types/networkmap.go +++ /dev/null @@ -1,67 +0,0 @@ -package types - -import ( - "context" - - nbdns "github.com/netbirdio/netbird/dns" - "github.com/netbirdio/netbird/management/internals/modules/zones" - nbpeer "github.com/netbirdio/netbird/management/server/peer" - "github.com/netbirdio/netbird/management/server/telemetry" -) - -func (a *Account) initNetworkMapBuilder(validatedPeers map[string]struct{}) { - if a.NetworkMapCache != nil { - return - } - a.nmapInitOnce.Do(func() { - a.NetworkMapCache = NewNetworkMapBuilder(a, validatedPeers) - }) -} - -func (a *Account) InitNetworkMapBuilderIfNeeded(validatedPeers map[string]struct{}) { - a.initNetworkMapBuilder(validatedPeers) -} - -func (a *Account) GetPeerNetworkMapExp( - ctx context.Context, - peerID string, - peersCustomZone nbdns.CustomZone, - accountZones []*zones.Zone, - validatedPeers map[string]struct{}, - metrics *telemetry.AccountManagerMetrics, -) *NetworkMap { - a.initNetworkMapBuilder(validatedPeers) - return a.NetworkMapCache.GetPeerNetworkMap(ctx, peerID, peersCustomZone, accountZones, validatedPeers, metrics) -} - -func (a *Account) OnPeerAddedUpdNetworkMapCache(peerId string) error { - if a.NetworkMapCache == nil { - return nil - } - return a.NetworkMapCache.OnPeerAddedIncremental(a, peerId) -} - -func (a *Account) OnPeersAddedUpdNetworkMapCache(peerIds ...string) { - if a.NetworkMapCache == nil { - return - } - a.NetworkMapCache.EnqueuePeersForIncrementalAdd(a, peerIds...) -} - -func (a *Account) OnPeerDeletedUpdNetworkMapCache(peerId string) error { - if a.NetworkMapCache == nil { - return nil - } - return a.NetworkMapCache.OnPeerDeleted(a, peerId) -} - -func (a *Account) UpdatePeerInNetworkMapCache(peer *nbpeer.Peer) { - if a.NetworkMapCache == nil { - return - } - a.NetworkMapCache.UpdatePeer(peer) -} - -func (a *Account) RecalculateNetworkMapCache(validatedPeers map[string]struct{}) { - a.initNetworkMapBuilder(validatedPeers) -} diff --git a/management/server/types/networkmap_comparison_test.go b/management/server/types/networkmap_comparison_test.go deleted file mode 100644 index c5844cca0..000000000 --- a/management/server/types/networkmap_comparison_test.go +++ /dev/null @@ -1,592 +0,0 @@ -package types - -import ( - "context" - "encoding/json" - "fmt" - "net" - "net/netip" - "os" - "path/filepath" - "sort" - "testing" - "time" - - "github.com/stretchr/testify/require" - - nbdns "github.com/netbirdio/netbird/dns" - resourceTypes "github.com/netbirdio/netbird/management/server/networks/resources/types" - routerTypes "github.com/netbirdio/netbird/management/server/networks/routers/types" - networkTypes "github.com/netbirdio/netbird/management/server/networks/types" - nbpeer "github.com/netbirdio/netbird/management/server/peer" - "github.com/netbirdio/netbird/management/server/posture" - "github.com/netbirdio/netbird/route" -) - -func TestNetworkMapComponents_CompareWithLegacy(t *testing.T) { - account := createTestAccount() - ctx := context.Background() - - peerID := testingPeerID - validatedPeersMap := make(map[string]struct{}) - for i := range numPeers { - pid := fmt.Sprintf("peer-%d", i) - if pid == offlinePeerID { - continue - } - validatedPeersMap[pid] = struct{}{} - } - - peersCustomZone := nbdns.CustomZone{} - resourcePolicies := account.GetResourcePoliciesMap() - routers := account.GetResourceRoutersMap() - groupIDToUserIDs := account.GetActiveGroupUsers() - - legacyNetworkMap := account.GetPeerNetworkMap( - ctx, - peerID, - peersCustomZone, - nil, - validatedPeersMap, - resourcePolicies, - routers, - nil, - groupIDToUserIDs, - ) - - components := account.GetPeerNetworkMapComponents( - ctx, - peerID, - peersCustomZone, - nil, - validatedPeersMap, - resourcePolicies, - routers, - groupIDToUserIDs, - ) - - if components == nil { - t.Fatal("GetPeerNetworkMapComponents returned nil") - } - - newNetworkMap := CalculateNetworkMapFromComponents(ctx, components) - - if newNetworkMap == nil { - t.Fatal("CalculateNetworkMapFromComponents returned nil") - } - - compareNetworkMaps(t, legacyNetworkMap, newNetworkMap) -} - -func TestNetworkMapComponents_GoldenFileComparison(t *testing.T) { - account := createTestAccount() - ctx := context.Background() - - peerID := testingPeerID - validatedPeersMap := make(map[string]struct{}) - for i := range numPeers { - pid := fmt.Sprintf("peer-%d", i) - if pid == offlinePeerID { - continue - } - validatedPeersMap[pid] = struct{}{} - } - - peersCustomZone := nbdns.CustomZone{} - resourcePolicies := account.GetResourcePoliciesMap() - routers := account.GetResourceRoutersMap() - groupIDToUserIDs := account.GetActiveGroupUsers() - - legacyNetworkMap := account.GetPeerNetworkMap( - ctx, - peerID, - peersCustomZone, - nil, - validatedPeersMap, - resourcePolicies, - routers, - nil, - groupIDToUserIDs, - ) - - components := account.GetPeerNetworkMapComponents( - ctx, - peerID, - peersCustomZone, - nil, - validatedPeersMap, - resourcePolicies, - routers, - groupIDToUserIDs, - ) - - require.NotNil(t, components, "GetPeerNetworkMapComponents returned nil") - - newNetworkMap := CalculateNetworkMapFromComponents(ctx, components) - require.NotNil(t, newNetworkMap, "CalculateNetworkMapFromComponents returned nil") - - normalizeAndSortNetworkMap(legacyNetworkMap) - normalizeAndSortNetworkMap(newNetworkMap) - - componentsJSON, err := json.MarshalIndent(components, "", " ") - require.NoError(t, err, "error marshaling components to JSON") - - legacyJSON, err := json.MarshalIndent(legacyNetworkMap, "", " ") - require.NoError(t, err, "error marshaling legacy network map to JSON") - - newJSON, err := json.MarshalIndent(newNetworkMap, "", " ") - require.NoError(t, err, "error marshaling new network map to JSON") - - goldenDir := filepath.Join("testdata", "comparison") - err = os.MkdirAll(goldenDir, 0755) - require.NoError(t, err) - - legacyGoldenPath := filepath.Join(goldenDir, "legacy_networkmap.json") - err = os.WriteFile(legacyGoldenPath, legacyJSON, 0644) - require.NoError(t, err, "error writing legacy golden file") - - newGoldenPath := filepath.Join(goldenDir, "components_networkmap.json") - err = os.WriteFile(newGoldenPath, newJSON, 0644) - require.NoError(t, err, "error writing components golden file") - - componentsPath := filepath.Join(goldenDir, "components.json") - err = os.WriteFile(componentsPath, componentsJSON, 0644) - require.NoError(t, err, "error writing components golden file") - - require.JSONEq(t, string(legacyJSON), string(newJSON), - "NetworkMaps from legacy and components approaches do not match.\n"+ - "Legacy JSON saved to: %s\n"+ - "Components JSON saved to: %s", - legacyGoldenPath, newGoldenPath) - - t.Logf("✅ NetworkMaps are identical") - t.Logf(" Legacy NetworkMap: %s", legacyGoldenPath) - t.Logf(" Components NetworkMap: %s", newGoldenPath) -} - -func normalizeAndSortNetworkMap(nm *NetworkMap) { - if nm == nil { - return - } - - sort.Slice(nm.Peers, func(i, j int) bool { - return nm.Peers[i].ID < nm.Peers[j].ID - }) - - sort.Slice(nm.OfflinePeers, func(i, j int) bool { - return nm.OfflinePeers[i].ID < nm.OfflinePeers[j].ID - }) - - sort.Slice(nm.Routes, func(i, j int) bool { - return string(nm.Routes[i].ID) < string(nm.Routes[j].ID) - }) - - sort.Slice(nm.FirewallRules, func(i, j int) bool { - if nm.FirewallRules[i].PeerIP != nm.FirewallRules[j].PeerIP { - return nm.FirewallRules[i].PeerIP < nm.FirewallRules[j].PeerIP - } - if nm.FirewallRules[i].Direction != nm.FirewallRules[j].Direction { - return nm.FirewallRules[i].Direction < nm.FirewallRules[j].Direction - } - if nm.FirewallRules[i].Protocol != nm.FirewallRules[j].Protocol { - return nm.FirewallRules[i].Protocol < nm.FirewallRules[j].Protocol - } - if nm.FirewallRules[i].Port != nm.FirewallRules[j].Port { - return nm.FirewallRules[i].Port < nm.FirewallRules[j].Port - } - return nm.FirewallRules[i].PolicyID < nm.FirewallRules[j].PolicyID - }) - - for i := range nm.RoutesFirewallRules { - sort.Strings(nm.RoutesFirewallRules[i].SourceRanges) - } - - sort.Slice(nm.RoutesFirewallRules, func(i, j int) bool { - if nm.RoutesFirewallRules[i].Destination != nm.RoutesFirewallRules[j].Destination { - return nm.RoutesFirewallRules[i].Destination < nm.RoutesFirewallRules[j].Destination - } - - minLen := len(nm.RoutesFirewallRules[i].SourceRanges) - if len(nm.RoutesFirewallRules[j].SourceRanges) < minLen { - minLen = len(nm.RoutesFirewallRules[j].SourceRanges) - } - for k := 0; k < minLen; k++ { - if nm.RoutesFirewallRules[i].SourceRanges[k] != nm.RoutesFirewallRules[j].SourceRanges[k] { - return nm.RoutesFirewallRules[i].SourceRanges[k] < nm.RoutesFirewallRules[j].SourceRanges[k] - } - } - if len(nm.RoutesFirewallRules[i].SourceRanges) != len(nm.RoutesFirewallRules[j].SourceRanges) { - return len(nm.RoutesFirewallRules[i].SourceRanges) < len(nm.RoutesFirewallRules[j].SourceRanges) - } - - if string(nm.RoutesFirewallRules[i].RouteID) != string(nm.RoutesFirewallRules[j].RouteID) { - return string(nm.RoutesFirewallRules[i].RouteID) < string(nm.RoutesFirewallRules[j].RouteID) - } - - if nm.RoutesFirewallRules[i].PolicyID != nm.RoutesFirewallRules[j].PolicyID { - return nm.RoutesFirewallRules[i].PolicyID < nm.RoutesFirewallRules[j].PolicyID - } - - if nm.RoutesFirewallRules[i].Port != nm.RoutesFirewallRules[j].Port { - return nm.RoutesFirewallRules[i].Port < nm.RoutesFirewallRules[j].Port - } - - return nm.RoutesFirewallRules[i].Protocol < nm.RoutesFirewallRules[j].Protocol - }) - - if nm.DNSConfig.CustomZones != nil { - for i := range nm.DNSConfig.CustomZones { - sort.Slice(nm.DNSConfig.CustomZones[i].Records, func(a, b int) bool { - return nm.DNSConfig.CustomZones[i].Records[a].Name < nm.DNSConfig.CustomZones[i].Records[b].Name - }) - } - } - - if len(nm.DNSConfig.NameServerGroups) != 0 { - sort.Slice(nm.DNSConfig.NameServerGroups, func(a, b int) bool { - return nm.DNSConfig.NameServerGroups[a].Name < nm.DNSConfig.NameServerGroups[b].Name - }) - } -} - -func compareNetworkMaps(t *testing.T, legacy, current *NetworkMap) { - t.Helper() - - if legacy.Network.Serial != current.Network.Serial { - t.Errorf("Network Serial mismatch: legacy=%d, current=%d", legacy.Network.Serial, current.Network.Serial) - } - - if len(legacy.Peers) != len(current.Peers) { - t.Errorf("Peers count mismatch: legacy=%d, current=%d", len(legacy.Peers), len(current.Peers)) - } - - legacyPeerIDs := make(map[string]bool) - for _, p := range legacy.Peers { - legacyPeerIDs[p.ID] = true - } - - for _, p := range current.Peers { - if !legacyPeerIDs[p.ID] { - t.Errorf("Current NetworkMap contains peer %s not in legacy", p.ID) - } - } - - if len(legacy.OfflinePeers) != len(current.OfflinePeers) { - t.Errorf("OfflinePeers count mismatch: legacy=%d, current=%d", len(legacy.OfflinePeers), len(current.OfflinePeers)) - } - - if len(legacy.FirewallRules) != len(current.FirewallRules) { - t.Logf("FirewallRules count mismatch: legacy=%d, current=%d", len(legacy.FirewallRules), len(current.FirewallRules)) - } - - if len(legacy.Routes) != len(current.Routes) { - t.Logf("Routes count mismatch: legacy=%d, current=%d", len(legacy.Routes), len(current.Routes)) - } - - if len(legacy.RoutesFirewallRules) != len(current.RoutesFirewallRules) { - t.Logf("RoutesFirewallRules count mismatch: legacy=%d, current=%d", len(legacy.RoutesFirewallRules), len(current.RoutesFirewallRules)) - } - - if legacy.DNSConfig.ServiceEnable != current.DNSConfig.ServiceEnable { - t.Errorf("DNSConfig.ServiceEnable mismatch: legacy=%v, current=%v", legacy.DNSConfig.ServiceEnable, current.DNSConfig.ServiceEnable) - } -} - -const ( - numPeers = 100 - devGroupID = "group-dev" - opsGroupID = "group-ops" - allGroupID = "group-all" - routeID = route.ID("route-main") - routeHA1ID = route.ID("route-ha-1") - routeHA2ID = route.ID("route-ha-2") - policyIDDevOps = "policy-dev-ops" - policyIDAll = "policy-all" - policyIDPosture = "policy-posture" - policyIDDrop = "policy-drop" - postureCheckID = "posture-check-ver" - networkResourceID = "res-database" - networkID = "net-database" - networkRouterID = "router-database" - nameserverGroupID = "ns-group-main" - testingPeerID = "peer-60" - expiredPeerID = "peer-98" - offlinePeerID = "peer-99" - routingPeerID = "peer-95" - testAccountID = "account-comparison-test" -) - -func createTestAccount() *Account { - peers := make(map[string]*nbpeer.Peer) - devGroupPeers, opsGroupPeers, allGroupPeers := []string{}, []string{}, []string{} - - for i := range numPeers { - peerID := fmt.Sprintf("peer-%d", i) - ip := net.IP{100, 64, 0, byte(i + 1)} - wtVersion := "0.25.0" - if i%2 == 0 { - wtVersion = "0.40.0" - } - - p := &nbpeer.Peer{ - ID: peerID, IP: ip, Key: fmt.Sprintf("key-%s", peerID), DNSLabel: fmt.Sprintf("peer%d", i+1), - Status: &nbpeer.PeerStatus{Connected: true, LastSeen: time.Now()}, - UserID: "user-admin", Meta: nbpeer.PeerSystemMeta{WtVersion: wtVersion, GoOS: "linux"}, - } - - if peerID == expiredPeerID { - p.LoginExpirationEnabled = true - pastTimestamp := time.Now().Add(-2 * time.Hour) - p.LastLogin = &pastTimestamp - } - - peers[peerID] = p - allGroupPeers = append(allGroupPeers, peerID) - if i < numPeers/2 { - devGroupPeers = append(devGroupPeers, peerID) - } else { - opsGroupPeers = append(opsGroupPeers, peerID) - } - } - - groups := map[string]*Group{ - allGroupID: {ID: allGroupID, Name: "All", Peers: allGroupPeers}, - devGroupID: {ID: devGroupID, Name: "Developers", Peers: devGroupPeers}, - opsGroupID: {ID: opsGroupID, Name: "Operations", Peers: opsGroupPeers}, - } - - policies := []*Policy{ - { - ID: policyIDAll, Name: "Default-Allow", Enabled: true, - Rules: []*PolicyRule{{ - ID: policyIDAll, Name: "Allow All", Enabled: true, Action: PolicyTrafficActionAccept, - Protocol: PolicyRuleProtocolALL, Bidirectional: true, - Sources: []string{allGroupID}, Destinations: []string{allGroupID}, - }}, - }, - { - ID: policyIDDevOps, Name: "Dev to Ops Web Access", Enabled: true, - Rules: []*PolicyRule{{ - ID: policyIDDevOps, Name: "Dev -> Ops (HTTP Range)", Enabled: true, Action: PolicyTrafficActionAccept, - Protocol: PolicyRuleProtocolTCP, Bidirectional: false, - PortRanges: []RulePortRange{{Start: 8080, End: 8090}}, - Sources: []string{devGroupID}, Destinations: []string{opsGroupID}, - }}, - }, - { - ID: policyIDDrop, Name: "Drop DB traffic", Enabled: true, - Rules: []*PolicyRule{{ - ID: policyIDDrop, Name: "Drop DB", Enabled: true, Action: PolicyTrafficActionDrop, - Protocol: PolicyRuleProtocolTCP, Ports: []string{"5432"}, Bidirectional: true, - Sources: []string{devGroupID}, Destinations: []string{opsGroupID}, - }}, - }, - { - ID: policyIDPosture, Name: "Posture Check for DB Resource", Enabled: true, - SourcePostureChecks: []string{postureCheckID}, - Rules: []*PolicyRule{{ - ID: policyIDPosture, Name: "Allow DB Access", Enabled: true, Action: PolicyTrafficActionAccept, - Protocol: PolicyRuleProtocolALL, Bidirectional: true, - Sources: []string{opsGroupID}, DestinationResource: Resource{ID: networkResourceID}, - }}, - }, - } - - routes := map[route.ID]*route.Route{ - routeID: { - ID: routeID, Network: netip.MustParsePrefix("192.168.10.0/24"), - Peer: peers["peer-75"].Key, - PeerID: "peer-75", - Description: "Route to internal resource", Enabled: true, - PeerGroups: []string{devGroupID, opsGroupID}, - Groups: []string{devGroupID, opsGroupID}, - AccessControlGroups: []string{devGroupID}, - }, - routeHA1ID: { - ID: routeHA1ID, Network: netip.MustParsePrefix("10.10.0.0/16"), - Peer: peers["peer-80"].Key, - PeerID: "peer-80", - Description: "HA Route 1", Enabled: true, Metric: 1000, - PeerGroups: []string{allGroupID}, - Groups: []string{allGroupID}, - AccessControlGroups: []string{allGroupID}, - }, - routeHA2ID: { - ID: routeHA2ID, Network: netip.MustParsePrefix("10.10.0.0/16"), - Peer: peers["peer-90"].Key, - PeerID: "peer-90", - Description: "HA Route 2", Enabled: true, Metric: 900, - PeerGroups: []string{devGroupID, opsGroupID}, - Groups: []string{devGroupID, opsGroupID}, - AccessControlGroups: []string{allGroupID}, - }, - } - - account := &Account{ - Id: testAccountID, Peers: peers, Groups: groups, Policies: policies, Routes: routes, - Network: &Network{ - Identifier: "net-comparison-test", Net: net.IPNet{IP: net.IP{100, 64, 0, 0}, Mask: net.CIDRMask(16, 32)}, Serial: 1, - }, - DNSSettings: DNSSettings{DisabledManagementGroups: []string{opsGroupID}}, - NameServerGroups: map[string]*nbdns.NameServerGroup{ - nameserverGroupID: { - ID: nameserverGroupID, Name: "Main NS", Enabled: true, Groups: []string{devGroupID}, - NameServers: []nbdns.NameServer{{IP: netip.MustParseAddr("8.8.8.8"), NSType: nbdns.UDPNameServerType, Port: 53}}, - }, - }, - PostureChecks: []*posture.Checks{ - {ID: postureCheckID, Name: "Check version", Checks: posture.ChecksDefinition{ - NBVersionCheck: &posture.NBVersionCheck{MinVersion: "0.26.0"}, - }}, - }, - NetworkResources: []*resourceTypes.NetworkResource{ - {ID: networkResourceID, NetworkID: networkID, AccountID: testAccountID, Enabled: true, Address: "db.netbird.cloud"}, - }, - Networks: []*networkTypes.Network{{ID: networkID, Name: "DB Network", AccountID: testAccountID}}, - NetworkRouters: []*routerTypes.NetworkRouter{ - {ID: networkRouterID, NetworkID: networkID, Peer: routingPeerID, Enabled: true, AccountID: testAccountID}, - }, - Settings: &Settings{PeerLoginExpirationEnabled: true, PeerLoginExpiration: 1 * time.Hour}, - } - - for _, p := range account.Policies { - p.AccountID = account.Id - } - for _, r := range account.Routes { - r.AccountID = account.Id - } - - return account -} - -func BenchmarkLegacyNetworkMap(b *testing.B) { - account := createTestAccount() - ctx := context.Background() - peerID := testingPeerID - validatedPeersMap := make(map[string]struct{}) - for i := range numPeers { - pid := fmt.Sprintf("peer-%d", i) - if pid != offlinePeerID { - validatedPeersMap[pid] = struct{}{} - } - } - - peersCustomZone := nbdns.CustomZone{} - resourcePolicies := account.GetResourcePoliciesMap() - routers := account.GetResourceRoutersMap() - groupIDToUserIDs := account.GetActiveGroupUsers() - - b.ResetTimer() - for i := 0; i < b.N; i++ { - _ = account.GetPeerNetworkMap( - ctx, - peerID, - peersCustomZone, - nil, - validatedPeersMap, - resourcePolicies, - routers, - nil, - groupIDToUserIDs, - ) - } -} - -func BenchmarkComponentsNetworkMap(b *testing.B) { - account := createTestAccount() - ctx := context.Background() - peerID := testingPeerID - validatedPeersMap := make(map[string]struct{}) - for i := range numPeers { - pid := fmt.Sprintf("peer-%d", i) - if pid != offlinePeerID { - validatedPeersMap[pid] = struct{}{} - } - } - - peersCustomZone := nbdns.CustomZone{} - resourcePolicies := account.GetResourcePoliciesMap() - routers := account.GetResourceRoutersMap() - groupIDToUserIDs := account.GetActiveGroupUsers() - - b.ResetTimer() - for i := 0; i < b.N; i++ { - components := account.GetPeerNetworkMapComponents( - ctx, - peerID, - peersCustomZone, - nil, - validatedPeersMap, - resourcePolicies, - routers, - groupIDToUserIDs, - ) - _ = CalculateNetworkMapFromComponents(ctx, components) - } -} - -func BenchmarkComponentsCreation(b *testing.B) { - account := createTestAccount() - ctx := context.Background() - peerID := testingPeerID - validatedPeersMap := make(map[string]struct{}) - for i := range numPeers { - pid := fmt.Sprintf("peer-%d", i) - if pid != offlinePeerID { - validatedPeersMap[pid] = struct{}{} - } - } - - peersCustomZone := nbdns.CustomZone{} - resourcePolicies := account.GetResourcePoliciesMap() - routers := account.GetResourceRoutersMap() - groupIDToUserIDs := account.GetActiveGroupUsers() - - b.ResetTimer() - for i := 0; i < b.N; i++ { - _ = account.GetPeerNetworkMapComponents( - ctx, - peerID, - peersCustomZone, - nil, - validatedPeersMap, - resourcePolicies, - routers, - groupIDToUserIDs, - ) - } -} - -func BenchmarkCalculationFromComponents(b *testing.B) { - account := createTestAccount() - ctx := context.Background() - peerID := testingPeerID - validatedPeersMap := make(map[string]struct{}) - for i := range numPeers { - pid := fmt.Sprintf("peer-%d", i) - if pid != offlinePeerID { - validatedPeersMap[pid] = struct{}{} - } - } - - peersCustomZone := nbdns.CustomZone{} - resourcePolicies := account.GetResourcePoliciesMap() - routers := account.GetResourceRoutersMap() - groupIDToUserIDs := account.GetActiveGroupUsers() - - components := account.GetPeerNetworkMapComponents( - ctx, - peerID, - peersCustomZone, - nil, - validatedPeersMap, - resourcePolicies, - routers, - groupIDToUserIDs, - ) - - b.ResetTimer() - for i := 0; i < b.N; i++ { - _ = CalculateNetworkMapFromComponents(ctx, components) - } -} diff --git a/management/server/types/networkmap_components.go b/management/server/types/networkmap_components.go index 23d84a994..6f84c8d30 100644 --- a/management/server/types/networkmap_components.go +++ b/management/server/types/networkmap_components.go @@ -19,8 +19,6 @@ import ( "github.com/netbirdio/netbird/shared/management/domain" ) -const EnvNewNetworkMapCompacted = "NB_NETWORK_MAP_COMPACTED" - type NetworkMapComponents struct { PeerID string diff --git a/management/server/types/networkmap_components_test.go b/management/server/types/networkmap_components_test.go new file mode 100644 index 000000000..dde639ccb --- /dev/null +++ b/management/server/types/networkmap_components_test.go @@ -0,0 +1,787 @@ +package types_test + +import ( + "context" + "net" + "net/netip" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + nbdns "github.com/netbirdio/netbird/dns" + resourceTypes "github.com/netbirdio/netbird/management/server/networks/resources/types" + routerTypes "github.com/netbirdio/netbird/management/server/networks/routers/types" + networkTypes "github.com/netbirdio/netbird/management/server/networks/types" + nbpeer "github.com/netbirdio/netbird/management/server/peer" + "github.com/netbirdio/netbird/management/server/posture" + "github.com/netbirdio/netbird/management/server/types" + "github.com/netbirdio/netbird/route" +) + +func networkMapFromComponents(t *testing.T, account *types.Account, peerID string, validatedPeers map[string]struct{}) *types.NetworkMap { + t.Helper() + return account.GetPeerNetworkMapFromComponents( + context.Background(), + peerID, + account.GetPeersCustomZone(context.Background(), "netbird.io"), + nil, + validatedPeers, + account.GetResourcePoliciesMap(), + account.GetResourceRoutersMap(), + nil, + account.GetActiveGroupUsers(), + ) +} + +func allPeersValidated(account *types.Account, excludePeerIDs ...string) map[string]struct{} { + excludeSet := make(map[string]struct{}, len(excludePeerIDs)) + for _, id := range excludePeerIDs { + excludeSet[id] = struct{}{} + } + validated := make(map[string]struct{}, len(account.Peers)) + for id := range account.Peers { + if _, excluded := excludeSet[id]; !excluded { + validated[id] = struct{}{} + } + } + return validated +} + +func peerIDs(peers []*nbpeer.Peer) []string { + ids := make([]string, len(peers)) + for i, p := range peers { + ids[i] = p.ID + } + return ids +} + +func TestNetworkMapComponents_RegularPeerConnectivity(t *testing.T) { + account := createComponentTestAccount() + validated := allPeersValidated(account) + + nm := networkMapFromComponents(t, account, "peer-src-1", validated) + + assert.NotNil(t, nm) + assert.Contains(t, peerIDs(nm.Peers), "peer-dst-1", "should see peer from destination group via bidirectional policy") + assert.Contains(t, peerIDs(nm.Peers), "peer-router-1", "should see router peer via resource policy") + assert.NotContains(t, peerIDs(nm.Peers), "peer-src-1", "should not see itself") + assert.Empty(t, nm.OfflinePeers, "no expired peers expected") +} + +func TestNetworkMapComponents_IntraGroupConnectivity(t *testing.T) { + account := createComponentTestAccount() + account.Policies = append(account.Policies, &types.Policy{ + ID: "policy-intra-src", Name: "Intra-source connectivity", Enabled: true, AccountID: account.Id, + Rules: []*types.PolicyRule{{ + ID: "rule-intra-src", Name: "src <-> src", Enabled: true, + Action: types.PolicyTrafficActionAccept, Protocol: types.PolicyRuleProtocolALL, + Bidirectional: true, + Sources: []string{"group-src"}, Destinations: []string{"group-src"}, + }}, + }) + validated := allPeersValidated(account) + + nm := networkMapFromComponents(t, account, "peer-src-1", validated) + assert.Contains(t, peerIDs(nm.Peers), "peer-src-2", "should see peer from same group with intra-group policy") +} + +func TestNetworkMapComponents_FirewallRules(t *testing.T) { + account := createComponentTestAccount() + validated := allPeersValidated(account) + + nm := networkMapFromComponents(t, account, "peer-src-1", validated) + + require.NotEmpty(t, nm.FirewallRules, "firewall rules should be generated") + + var hasAcceptAll bool + for _, rule := range nm.FirewallRules { + if rule.Protocol == string(types.PolicyRuleProtocolALL) && rule.Action == string(types.PolicyTrafficActionAccept) { + hasAcceptAll = true + } + } + assert.True(t, hasAcceptAll, "should have an accept-all firewall rule from the base policy") +} + +func TestNetworkMapComponents_LoginExpiration(t *testing.T) { + account := createComponentTestAccount() + account.Settings.PeerLoginExpirationEnabled = true + account.Settings.PeerLoginExpiration = 1 * time.Hour + + expiredTime := time.Now().Add(-2 * time.Hour) + account.Peers["peer-dst-1"].LoginExpirationEnabled = true + account.Peers["peer-dst-1"].LastLogin = &expiredTime + + validated := allPeersValidated(account) + nm := networkMapFromComponents(t, account, "peer-src-1", validated) + + assert.Contains(t, peerIDs(nm.OfflinePeers), "peer-dst-1", "expired peer should be in OfflinePeers") + assert.NotContains(t, peerIDs(nm.Peers), "peer-dst-1", "expired peer should NOT be in active Peers") +} + +func TestNetworkMapComponents_InvalidatedPeerExcluded(t *testing.T) { + account := createComponentTestAccount() + validated := allPeersValidated(account, "peer-dst-1") + + nm := networkMapFromComponents(t, account, "peer-src-1", validated) + + assert.NotContains(t, peerIDs(nm.Peers), "peer-dst-1", "non-validated peer should be excluded") + assert.NotContains(t, peerIDs(nm.OfflinePeers), "peer-dst-1", "non-validated peer should not be in offline peers either") +} + +func TestNetworkMapComponents_NonValidatedTargetPeer(t *testing.T) { + account := createComponentTestAccount() + validated := allPeersValidated(account, "peer-src-1") + + nm := networkMapFromComponents(t, account, "peer-src-1", validated) + + assert.Empty(t, nm.Peers, "non-validated target peer should get empty network map") + assert.Empty(t, nm.FirewallRules) +} + +func TestNetworkMapComponents_NetworkResourceRoutes_SourcePeer(t *testing.T) { + account := createComponentTestAccount() + validated := allPeersValidated(account) + + nm := networkMapFromComponents(t, account, "peer-src-1", validated) + + var hasResourceRoute bool + for _, r := range nm.Routes { + if r.Network.String() == "10.200.0.1/32" { + hasResourceRoute = true + break + } + } + assert.True(t, hasResourceRoute, "source peer should receive route to network resource via router") + assert.Contains(t, peerIDs(nm.Peers), "peer-router-1", "source peer should see the routing peer") +} + +func TestNetworkMapComponents_NetworkResourceRoutes_RouterPeer(t *testing.T) { + account := createComponentTestAccount() + validated := allPeersValidated(account) + + nm := networkMapFromComponents(t, account, "peer-router-1", validated) + + var hasResourceRoute bool + for _, r := range nm.Routes { + if r.Network.String() == "10.200.0.1/32" { + hasResourceRoute = true + break + } + } + assert.True(t, hasResourceRoute, "router peer should receive network resource route") + assert.NotEmpty(t, nm.RoutesFirewallRules, "router peer should have route firewall rules for the resource") +} + +func TestNetworkMapComponents_NetworkResourceRoutes_UnrelatedPeer(t *testing.T) { + account := createComponentTestAccount() + validated := allPeersValidated(account) + + nm := networkMapFromComponents(t, account, "peer-dst-1", validated) + + for _, r := range nm.Routes { + assert.NotEqual(t, "10.200.0.1/32", r.Network.String(), "unrelated peer should not receive network resource route") + } +} + +func TestNetworkMapComponents_NetworkResource_WithPostureCheck(t *testing.T) { + account := createComponentTestAccount() + validated := allPeersValidated(account) + + account.PostureChecks = []*posture.Checks{ + {ID: "pc-version", Name: "Version check", Checks: posture.ChecksDefinition{ + NBVersionCheck: &posture.NBVersionCheck{MinVersion: "0.30.0"}, + }}, + } + account.Policies = append(account.Policies, &types.Policy{ + ID: "policy-posture-resource", Name: "Posture resource access", Enabled: true, AccountID: account.Id, + SourcePostureChecks: []string{"pc-version"}, + Rules: []*types.PolicyRule{{ + ID: "rule-posture-resource", Name: "Posture -> Resource", Enabled: true, + Action: types.PolicyTrafficActionAccept, Protocol: types.PolicyRuleProtocolALL, + Sources: []string{"group-src"}, + DestinationResource: types.Resource{ID: "resource-guarded"}, + }}, + }) + + account.NetworkResources = append(account.NetworkResources, &resourceTypes.NetworkResource{ + ID: "resource-guarded", NetworkID: "net-guarded", AccountID: account.Id, Enabled: true, + Type: resourceTypes.Host, Prefix: netip.MustParsePrefix("10.200.1.1/32"), Address: "10.200.1.1/32", + }) + account.Networks = append(account.Networks, &networkTypes.Network{ + ID: "net-guarded", Name: "Guarded Net", AccountID: account.Id, + }) + account.NetworkRouters = append(account.NetworkRouters, &routerTypes.NetworkRouter{ + ID: "router-guarded", NetworkID: "net-guarded", Peer: "peer-router-1", Enabled: true, AccountID: account.Id, + }) + + t.Run("peer passes posture check", func(t *testing.T) { + account.Peers["peer-src-1"].Meta.WtVersion = "0.35.0" + nm := networkMapFromComponents(t, account, "peer-src-1", validated) + + var hasGuardedRoute bool + for _, r := range nm.Routes { + if r.Network.String() == "10.200.1.1/32" { + hasGuardedRoute = true + } + } + assert.True(t, hasGuardedRoute, "peer passing posture check should get guarded resource route") + }) + + t.Run("peer fails posture check", func(t *testing.T) { + account.Peers["peer-src-1"].Meta.WtVersion = "0.20.0" + nm := networkMapFromComponents(t, account, "peer-src-1", validated) + + for _, r := range nm.Routes { + assert.NotEqual(t, "10.200.1.1/32", r.Network.String(), "peer failing posture check should NOT get guarded resource route") + } + }) +} + +func TestNetworkMapComponents_NetworkResource_MultiplePostureChecks(t *testing.T) { + account := createComponentTestAccount() + validated := allPeersValidated(account) + + account.PostureChecks = []*posture.Checks{ + {ID: "pc-version", Name: "Version", Checks: posture.ChecksDefinition{ + NBVersionCheck: &posture.NBVersionCheck{MinVersion: "0.30.0"}, + }}, + {ID: "pc-os", Name: "OS check", Checks: posture.ChecksDefinition{ + OSVersionCheck: &posture.OSVersionCheck{Linux: &posture.MinKernelVersionCheck{MinKernelVersion: "5.0"}}, + }}, + } + + account.Policies = append(account.Policies, &types.Policy{ + ID: "policy-multi-posture", Name: "Multi posture", Enabled: true, AccountID: account.Id, + SourcePostureChecks: []string{"pc-version", "pc-os"}, + Rules: []*types.PolicyRule{{ + ID: "rule-multi-posture", Name: "Multi posture rule", Enabled: true, + Action: types.PolicyTrafficActionAccept, Protocol: types.PolicyRuleProtocolALL, + Sources: []string{"group-src"}, + DestinationResource: types.Resource{ID: "resource-strict"}, + }}, + }) + + account.NetworkResources = append(account.NetworkResources, &resourceTypes.NetworkResource{ + ID: "resource-strict", NetworkID: "net-strict", AccountID: account.Id, Enabled: true, + Type: resourceTypes.Host, Prefix: netip.MustParsePrefix("10.200.2.1/32"), Address: "10.200.2.1/32", + }) + account.Networks = append(account.Networks, &networkTypes.Network{ + ID: "net-strict", Name: "Strict Net", AccountID: account.Id, + }) + account.NetworkRouters = append(account.NetworkRouters, &routerTypes.NetworkRouter{ + ID: "router-strict", NetworkID: "net-strict", Peer: "peer-router-1", Enabled: true, AccountID: account.Id, + }) + + t.Run("passes both posture checks", func(t *testing.T) { + account.Peers["peer-src-1"].Meta.WtVersion = "0.35.0" + account.Peers["peer-src-1"].Meta.GoOS = "linux" + account.Peers["peer-src-1"].Meta.KernelVersion = "6.1.0" + nm := networkMapFromComponents(t, account, "peer-src-1", validated) + + var found bool + for _, r := range nm.Routes { + if r.Network.String() == "10.200.2.1/32" { + found = true + } + } + assert.True(t, found, "peer passing both checks should get resource route") + }) + + t.Run("fails version posture check", func(t *testing.T) { + account.Peers["peer-src-1"].Meta.WtVersion = "0.20.0" + account.Peers["peer-src-1"].Meta.KernelVersion = "6.1.0" + nm := networkMapFromComponents(t, account, "peer-src-1", validated) + + for _, r := range nm.Routes { + assert.NotEqual(t, "10.200.2.1/32", r.Network.String(), "peer failing version check should NOT get resource route") + } + }) + + t.Run("fails OS posture check", func(t *testing.T) { + account.Peers["peer-src-1"].Meta.WtVersion = "0.35.0" + account.Peers["peer-src-1"].Meta.KernelVersion = "4.0.0" + nm := networkMapFromComponents(t, account, "peer-src-1", validated) + + for _, r := range nm.Routes { + assert.NotEqual(t, "10.200.2.1/32", r.Network.String(), "peer failing OS check should NOT get resource route") + } + }) +} + +func TestNetworkMapComponents_RouterPeerFirewallRules(t *testing.T) { + account := createComponentTestAccount() + validated := allPeersValidated(account) + + nm := networkMapFromComponents(t, account, "peer-router-1", validated) + + var resourceFWRules []*types.RouteFirewallRule + for _, rule := range nm.RoutesFirewallRules { + if rule.Destination == "10.200.0.1/32" { + resourceFWRules = append(resourceFWRules, rule) + } + } + assert.NotEmpty(t, resourceFWRules, "router should have firewall rules for the network resource") + + var hasSourcePeerIP bool + for _, rule := range resourceFWRules { + for _, sr := range rule.SourceRanges { + if sr == account.Peers["peer-src-1"].IP.String()+"/32" || sr == account.Peers["peer-src-2"].IP.String()+"/32" { + hasSourcePeerIP = true + } + } + } + assert.True(t, hasSourcePeerIP, "resource firewall rules should include source peer IPs") +} + +func TestNetworkMapComponents_DNSManagement(t *testing.T) { + account := createComponentTestAccount() + validated := allPeersValidated(account) + + t.Run("peer in DNS-enabled group", func(t *testing.T) { + nm := networkMapFromComponents(t, account, "peer-src-1", validated) + assert.True(t, nm.DNSConfig.ServiceEnable, "peer in non-disabled group should have DNS enabled") + }) + + t.Run("peer in DNS-disabled group", func(t *testing.T) { + nm := networkMapFromComponents(t, account, "peer-dst-1", validated) + assert.False(t, nm.DNSConfig.ServiceEnable, "peer in DNS-disabled group should have DNS disabled") + }) +} + +func TestNetworkMapComponents_NameServerGroups(t *testing.T) { + account := createComponentTestAccount() + validated := allPeersValidated(account) + + nm := networkMapFromComponents(t, account, "peer-src-1", validated) + assert.True(t, nm.DNSConfig.ServiceEnable) + + var hasNSGroup bool + for _, ns := range nm.DNSConfig.NameServerGroups { + if ns.ID == "ns-main" { + hasNSGroup = true + } + } + assert.True(t, hasNSGroup, "peer in NS group should receive nameserver configuration") +} + +func TestNetworkMapComponents_RoutesWithHADeduplication(t *testing.T) { + account := createComponentTestAccount() + validated := allPeersValidated(account) + + account.Routes["route-ha-1"] = &route.Route{ + ID: "route-ha-1", Network: netip.MustParsePrefix("172.16.0.0/16"), + Peer: account.Peers["peer-dst-1"].Key, PeerID: "peer-dst-1", + Enabled: true, Metric: 100, AccountID: account.Id, + Groups: []string{"group-src", "group-dst"}, PeerGroups: []string{"group-dst"}, + } + account.Routes["route-ha-2"] = &route.Route{ + ID: "route-ha-2", Network: netip.MustParsePrefix("172.16.0.0/16"), + Peer: account.Peers["peer-src-1"].Key, PeerID: "peer-src-1", + Enabled: true, Metric: 200, AccountID: account.Id, + Groups: []string{"group-src", "group-dst"}, PeerGroups: []string{"group-src"}, + } + + nm := networkMapFromComponents(t, account, "peer-src-1", validated) + + haCount := 0 + for _, r := range nm.Routes { + if r.Network.String() == "172.16.0.0/16" { + haCount++ + } + } + assert.Equal(t, 1, haCount, "peer should only receive one route from HA group (not both, since it's a member of one)") +} + +func TestNetworkMapComponents_RoutesFirewallRulesForAccessControl(t *testing.T) { + account := createComponentTestAccount() + validated := allPeersValidated(account) + + account.Routes["route-acl"] = &route.Route{ + ID: "route-acl", Network: netip.MustParsePrefix("192.168.100.0/24"), + Peer: account.Peers["peer-src-1"].Key, PeerID: "peer-src-1", + Enabled: true, Metric: 100, AccountID: account.Id, + Groups: []string{"group-dst"}, + PeerGroups: []string{"group-src"}, + AccessControlGroups: []string{"group-dst"}, + } + + nm := networkMapFromComponents(t, account, "peer-src-1", validated) + + var hasFWRule bool + for _, rule := range nm.RoutesFirewallRules { + if rule.Destination == "192.168.100.0/24" { + hasFWRule = true + } + } + assert.True(t, hasFWRule, "routing peer should have firewall rules for route with access control groups") +} + +func TestNetworkMapComponents_RoutesDefaultPermit(t *testing.T) { + account := createComponentTestAccount() + validated := allPeersValidated(account) + + account.Routes["route-open"] = &route.Route{ + ID: "route-open", Network: netip.MustParsePrefix("10.99.0.0/16"), + Peer: account.Peers["peer-src-1"].Key, PeerID: "peer-src-1", + Enabled: true, Metric: 100, AccountID: account.Id, + Groups: []string{"group-src"}, + PeerGroups: []string{"group-src"}, + } + + nm := networkMapFromComponents(t, account, "peer-src-1", validated) + + var hasFWRule bool + for _, rule := range nm.RoutesFirewallRules { + if rule.Destination == "10.99.0.0/16" { + hasFWRule = true + } + } + assert.True(t, hasFWRule, "route without access control groups should have default permit firewall rules") +} + +func TestNetworkMapComponents_SSHAuthorizedUsers(t *testing.T) { + account := createComponentTestAccount() + validated := allPeersValidated(account) + + account.Peers["peer-dst-1"].SSHEnabled = true + + account.Policies = append(account.Policies, &types.Policy{ + ID: "policy-ssh", Name: "SSH Access", Enabled: true, AccountID: account.Id, + Rules: []*types.PolicyRule{{ + ID: "rule-ssh", Name: "SSH to dst", Enabled: true, + Action: types.PolicyTrafficActionAccept, Protocol: types.PolicyRuleProtocolALL, + Bidirectional: true, + Sources: []string{"group-src"}, Destinations: []string{"group-dst"}, + }}, + }) + + nm := networkMapFromComponents(t, account, "peer-dst-1", validated) + assert.True(t, nm.EnableSSH, "SSH-enabled peer with matching policy should have EnableSSH") +} + +func TestNetworkMapComponents_DisabledPolicyIgnored(t *testing.T) { + account := createComponentTestAccount() + validated := allPeersValidated(account) + + for _, p := range account.Policies { + p.Enabled = false + } + + nm := networkMapFromComponents(t, account, "peer-src-1", validated) + assert.Empty(t, nm.Peers, "with all policies disabled, peer should see no other peers") + assert.Empty(t, nm.FirewallRules) +} + +func TestNetworkMapComponents_DisabledRouteIgnored(t *testing.T) { + account := createComponentTestAccount() + validated := allPeersValidated(account) + + for _, r := range account.Routes { + r.Enabled = false + } + for _, r := range account.NetworkResources { + r.Enabled = false + } + + nm := networkMapFromComponents(t, account, "peer-src-1", validated) + assert.Empty(t, nm.Routes, "disabled routes should not appear in network map") +} + +func TestNetworkMapComponents_DisabledNetworkResourceIgnored(t *testing.T) { + account := createComponentTestAccount() + validated := allPeersValidated(account) + + for _, r := range account.NetworkResources { + r.Enabled = false + } + + nm := networkMapFromComponents(t, account, "peer-router-1", validated) + + for _, r := range nm.Routes { + assert.NotEqual(t, "10.200.0.1/32", r.Network.String(), "disabled resource should not generate routes") + } +} + +func TestNetworkMapComponents_BidirectionalPolicy(t *testing.T) { + account := createComponentTestAccount() + validated := allPeersValidated(account) + + nmSrc := networkMapFromComponents(t, account, "peer-src-1", validated) + nmDst := networkMapFromComponents(t, account, "peer-dst-1", validated) + + assert.Contains(t, peerIDs(nmSrc.Peers), "peer-dst-1", "src should see dst via bidirectional policy") + assert.Contains(t, peerIDs(nmDst.Peers), "peer-src-1", "dst should see src via bidirectional policy") +} + +func TestNetworkMapComponents_DropPolicy(t *testing.T) { + account := createComponentTestAccount() + validated := allPeersValidated(account) + + account.Policies = append(account.Policies, &types.Policy{ + ID: "policy-drop", Name: "Drop traffic", Enabled: true, AccountID: account.Id, + Rules: []*types.PolicyRule{{ + ID: "rule-drop", Name: "Drop src->dst", Enabled: true, + Action: types.PolicyTrafficActionDrop, Protocol: types.PolicyRuleProtocolTCP, + Ports: []string{"5432"}, + Sources: []string{"group-src"}, Destinations: []string{"group-dst"}, + }}, + }) + + nm := networkMapFromComponents(t, account, "peer-src-1", validated) + + var hasDropRule bool + for _, rule := range nm.FirewallRules { + if rule.Action == string(types.PolicyTrafficActionDrop) && rule.Port == "5432" { + hasDropRule = true + } + } + assert.True(t, hasDropRule, "drop policy should generate drop firewall rule") +} + +func TestNetworkMapComponents_PortRangePolicy(t *testing.T) { + account := createComponentTestAccount() + validated := allPeersValidated(account) + + account.Peers["peer-src-1"].Meta.WtVersion = "0.50.0" + + account.Policies = append(account.Policies, &types.Policy{ + ID: "policy-range", Name: "Port range", Enabled: true, AccountID: account.Id, + Rules: []*types.PolicyRule{{ + ID: "rule-range", Name: "Range rule", Enabled: true, + Action: types.PolicyTrafficActionAccept, Protocol: types.PolicyRuleProtocolTCP, + PortRanges: []types.RulePortRange{{Start: 8080, End: 8090}}, + Sources: []string{"group-src"}, Destinations: []string{"group-dst"}, + }}, + }) + + nm := networkMapFromComponents(t, account, "peer-src-1", validated) + + var hasRangeRule bool + for _, rule := range nm.FirewallRules { + if rule.PortRange.Start == 8080 && rule.PortRange.End == 8090 { + hasRangeRule = true + } + } + assert.True(t, hasRangeRule, "port range policy should generate corresponding firewall rule") +} + +func TestNetworkMapComponents_MultipleNetworkResources(t *testing.T) { + account := createComponentTestAccount() + validated := allPeersValidated(account) + + account.NetworkResources = append(account.NetworkResources, &resourceTypes.NetworkResource{ + ID: "resource-2", NetworkID: "net-1", AccountID: account.Id, Enabled: true, + Type: resourceTypes.Host, Prefix: netip.MustParsePrefix("10.200.0.2/32"), Address: "10.200.0.2/32", + }) + account.Groups["group-res2"] = &types.Group{ID: "group-res2", Name: "Resource 2 Group", Peers: []string{"peer-src-1", "peer-src-2"}, + Resources: []types.Resource{{ID: "resource-2"}}, + } + account.Policies = append(account.Policies, &types.Policy{ + ID: "policy-res2", Name: "Resource 2 Policy", Enabled: true, AccountID: account.Id, + Rules: []*types.PolicyRule{{ + ID: "rule-res2", Name: "Access Resource 2", Enabled: true, + Action: types.PolicyTrafficActionAccept, Protocol: types.PolicyRuleProtocolALL, + Sources: []string{"group-src"}, + DestinationResource: types.Resource{ID: "resource-2"}, + }}, + }) + + nm := networkMapFromComponents(t, account, "peer-router-1", validated) + + resourceRouteCount := 0 + for _, r := range nm.Routes { + if r.Network.String() == "10.200.0.1/32" || r.Network.String() == "10.200.0.2/32" { + resourceRouteCount++ + } + } + assert.Equal(t, 2, resourceRouteCount, "router should have routes for both network resources") +} + +func TestNetworkMapComponents_DomainNetworkResource(t *testing.T) { + account := createComponentTestAccount() + validated := allPeersValidated(account) + + account.NetworkResources = append(account.NetworkResources, &resourceTypes.NetworkResource{ + ID: "resource-domain", NetworkID: "net-1", AccountID: account.Id, Enabled: true, + Type: resourceTypes.Domain, Domain: "api.example.com", Address: "api.example.com", + }) + account.Groups["group-res-domain"] = &types.Group{ + ID: "group-res-domain", Name: "Domain Resource Group", + Resources: []types.Resource{{ID: "resource-domain"}}, + } + account.Policies = append(account.Policies, &types.Policy{ + ID: "policy-domain", Name: "Domain resource policy", Enabled: true, AccountID: account.Id, + Rules: []*types.PolicyRule{{ + ID: "rule-domain", Name: "Access domain resource", Enabled: true, + Action: types.PolicyTrafficActionAccept, Protocol: types.PolicyRuleProtocolALL, + Sources: []string{"group-src"}, + DestinationResource: types.Resource{ID: "resource-domain"}, + }}, + }) + + nm := networkMapFromComponents(t, account, "peer-src-1", validated) + + var hasDomainRoute bool + for _, r := range nm.Routes { + if r.NetworkType == route.DomainNetwork && len(r.Domains) > 0 && r.Domains[0].SafeString() == "api.example.com" { + hasDomainRoute = true + } + } + assert.True(t, hasDomainRoute, "source peer should receive domain route for domain network resource") +} + +func TestNetworkMapComponents_NetworkEmpty(t *testing.T) { + account := createComponentTestAccount() + validated := allPeersValidated(account) + + nm := networkMapFromComponents(t, account, "nonexistent-peer", validated) + + assert.NotNil(t, nm) + assert.Empty(t, nm.Peers) + assert.Empty(t, nm.FirewallRules) + assert.NotNil(t, nm.Network) +} + +func TestNetworkMapComponents_RouterExcludesOtherNetworkRoutes(t *testing.T) { + account := createComponentTestAccount() + validated := allPeersValidated(account) + + account.NetworkResources = append(account.NetworkResources, &resourceTypes.NetworkResource{ + ID: "resource-other", NetworkID: "net-other", AccountID: account.Id, Enabled: true, + Type: resourceTypes.Host, Prefix: netip.MustParsePrefix("10.200.99.1/32"), Address: "10.200.99.1/32", + }) + account.Networks = append(account.Networks, &networkTypes.Network{ + ID: "net-other", Name: "Other Net", AccountID: account.Id, + }) + account.NetworkRouters = append(account.NetworkRouters, &routerTypes.NetworkRouter{ + ID: "router-other", NetworkID: "net-other", Peer: "peer-dst-1", Enabled: true, AccountID: account.Id, + }) + account.Groups["group-res-other"] = &types.Group{ID: "group-res-other", Name: "Other resource group", + Resources: []types.Resource{{ID: "resource-other"}}, + } + account.Policies = append(account.Policies, &types.Policy{ + ID: "policy-other-resource", Name: "Other resource policy", Enabled: true, AccountID: account.Id, + Rules: []*types.PolicyRule{{ + ID: "rule-other", Name: "Other resource access", Enabled: true, + Action: types.PolicyTrafficActionAccept, Protocol: types.PolicyRuleProtocolALL, + Sources: []string{"group-src"}, + DestinationResource: types.Resource{ID: "resource-other"}, + }}, + }) + + nm := networkMapFromComponents(t, account, "peer-router-1", validated) + + for _, r := range nm.Routes { + assert.NotEqual(t, "10.200.99.1/32", r.Network.String(), "router-1 should NOT get routes for other network's resources") + } +} + +func createComponentTestAccount() *types.Account { + peers := map[string]*nbpeer.Peer{ + "peer-src-1": { + ID: "peer-src-1", IP: net.IP{100, 64, 0, 1}, Key: "key-src-1", DNSLabel: "src1", + Status: &nbpeer.PeerStatus{Connected: true, LastSeen: time.Now()}, UserID: "user-1", + Meta: nbpeer.PeerSystemMeta{WtVersion: "0.35.0", GoOS: "linux"}, + }, + "peer-src-2": { + ID: "peer-src-2", IP: net.IP{100, 64, 0, 2}, Key: "key-src-2", DNSLabel: "src2", + Status: &nbpeer.PeerStatus{Connected: true, LastSeen: time.Now()}, UserID: "user-1", + Meta: nbpeer.PeerSystemMeta{WtVersion: "0.35.0", GoOS: "linux"}, + }, + "peer-dst-1": { + ID: "peer-dst-1", IP: net.IP{100, 64, 0, 3}, Key: "key-dst-1", DNSLabel: "dst1", + Status: &nbpeer.PeerStatus{Connected: true, LastSeen: time.Now()}, UserID: "user-2", + Meta: nbpeer.PeerSystemMeta{WtVersion: "0.35.0", GoOS: "linux"}, + }, + "peer-router-1": { + ID: "peer-router-1", IP: net.IP{100, 64, 0, 10}, Key: "key-router-1", DNSLabel: "router1", + Status: &nbpeer.PeerStatus{Connected: true, LastSeen: time.Now()}, UserID: "user-1", + Meta: nbpeer.PeerSystemMeta{WtVersion: "0.35.0", GoOS: "linux"}, + }, + } + + groups := map[string]*types.Group{ + "group-src": {ID: "group-src", Name: "Sources", Peers: []string{"peer-src-1", "peer-src-2"}}, + "group-dst": {ID: "group-dst", Name: "Destinations", Peers: []string{"peer-dst-1"}}, + "group-all": {ID: "group-all", Name: "All", Peers: []string{"peer-src-1", "peer-src-2", "peer-dst-1", "peer-router-1"}}, + "group-res": { + ID: "group-res", Name: "Resource Group", + Resources: []types.Resource{{ID: "resource-1"}}, + }, + } + + policies := []*types.Policy{ + { + ID: "policy-base", Name: "Base connectivity", Enabled: true, + Rules: []*types.PolicyRule{{ + ID: "rule-base", Name: "Allow src <-> dst", Enabled: true, + Action: types.PolicyTrafficActionAccept, Protocol: types.PolicyRuleProtocolALL, + Bidirectional: true, + Sources: []string{"group-src"}, Destinations: []string{"group-dst"}, + }}, + }, + { + ID: "policy-resource", Name: "Network resource access", Enabled: true, + Rules: []*types.PolicyRule{{ + ID: "rule-resource", Name: "Source -> Resource", Enabled: true, + Action: types.PolicyTrafficActionAccept, Protocol: types.PolicyRuleProtocolALL, + Sources: []string{"group-src"}, + DestinationResource: types.Resource{ID: "resource-1"}, + }}, + }, + } + + routes := map[route.ID]*route.Route{ + "route-main": { + ID: "route-main", Network: netip.MustParsePrefix("192.168.10.0/24"), + Peer: peers["peer-dst-1"].Key, PeerID: "peer-dst-1", + Enabled: true, Metric: 100, + Groups: []string{"group-src", "group-dst"}, PeerGroups: []string{"group-dst"}, + }, + } + + users := map[string]*types.User{ + "user-1": {Id: "user-1", Role: types.UserRoleAdmin, IsServiceUser: false, AutoGroups: []string{"group-all"}}, + "user-2": {Id: "user-2", Role: types.UserRoleUser, IsServiceUser: false, AutoGroups: []string{"group-all"}}, + } + + account := &types.Account{ + Id: "account-components-test", Peers: peers, Groups: groups, Policies: policies, Routes: routes, + Users: users, + Network: &types.Network{ + Identifier: "net-test", Net: net.IPNet{IP: net.IP{100, 64, 0, 0}, Mask: net.CIDRMask(16, 32)}, Serial: 1, + }, + DNSSettings: types.DNSSettings{DisabledManagementGroups: []string{"group-dst"}}, + NameServerGroups: map[string]*nbdns.NameServerGroup{ + "ns-main": { + ID: "ns-main", Name: "Main NS", Enabled: true, Groups: []string{"group-src"}, + NameServers: []nbdns.NameServer{{IP: netip.MustParseAddr("8.8.8.8"), NSType: nbdns.UDPNameServerType, Port: 53}}, + }, + }, + PostureChecks: []*posture.Checks{}, + NetworkResources: []*resourceTypes.NetworkResource{ + { + ID: "resource-1", NetworkID: "net-1", AccountID: "account-components-test", Enabled: true, + Type: resourceTypes.Host, Prefix: netip.MustParsePrefix("10.200.0.1/32"), Address: "10.200.0.1/32", + }, + }, + Networks: []*networkTypes.Network{ + {ID: "net-1", Name: "Resource Net", AccountID: "account-components-test"}, + }, + NetworkRouters: []*routerTypes.NetworkRouter{ + {ID: "router-1", NetworkID: "net-1", Peer: "peer-router-1", Enabled: true, AccountID: "account-components-test"}, + }, + Settings: &types.Settings{PeerLoginExpirationEnabled: false, PeerLoginExpiration: 24 * time.Hour}, + } + + for _, p := range account.Policies { + p.AccountID = account.Id + } + for _, r := range account.Routes { + r.AccountID = account.Id + } + + return account +} diff --git a/management/server/types/networkmap_golden_test.go b/management/server/types/networkmap_golden_test.go deleted file mode 100644 index 53261f22d..000000000 --- a/management/server/types/networkmap_golden_test.go +++ /dev/null @@ -1,967 +0,0 @@ -package types_test - -import ( - "context" - "encoding/json" - "fmt" - "net" - "net/netip" - "os" - "path/filepath" - "slices" - "sort" - "testing" - "time" - - "github.com/stretchr/testify/require" - - "github.com/netbirdio/netbird/dns" - "github.com/netbirdio/netbird/management/internals/modules/zones" - resourceTypes "github.com/netbirdio/netbird/management/server/networks/resources/types" - routerTypes "github.com/netbirdio/netbird/management/server/networks/routers/types" - networkTypes "github.com/netbirdio/netbird/management/server/networks/types" - nbpeer "github.com/netbirdio/netbird/management/server/peer" - "github.com/netbirdio/netbird/management/server/posture" - "github.com/netbirdio/netbird/management/server/types" - "github.com/netbirdio/netbird/route" -) - -const ( - numPeers = 100 - devGroupID = "group-dev" - opsGroupID = "group-ops" - allGroupID = "group-all" - sshUsersGroupID = "group-ssh-users" - routeID = route.ID("route-main") - routeHA1ID = route.ID("route-ha-1") - routeHA2ID = route.ID("route-ha-2") - policyIDDevOps = "policy-dev-ops" - policyIDAll = "policy-all" - policyIDPosture = "policy-posture" - policyIDDrop = "policy-drop" - policyIDSSH = "policy-ssh" - postureCheckID = "posture-check-ver" - networkResourceID = "res-database" - networkID = "net-database" - networkRouterID = "router-database" - nameserverGroupID = "ns-group-main" - testingPeerID = "peer-60" // A peer from the "dev" group, should receive the most detailed map. - expiredPeerID = "peer-98" // This peer will be online but with an expired session. - offlinePeerID = "peer-99" // This peer will be completely offline. - routingPeerID = "peer-95" // This peer is used for routing, it has a route to the network. - testAccountID = "account-golden-test" - userAdminID = "user-admin" - userDevID = "user-dev" - userOpsID = "user-ops" -) - -func TestGetPeerNetworkMap_Golden(t *testing.T) { - account := createTestAccountWithEntities() - - ctx := context.Background() - validatedPeersMap := make(map[string]struct{}) - for i := range numPeers { - peerID := fmt.Sprintf("peer-%d", i) - if peerID == offlinePeerID { - continue - } - validatedPeersMap[peerID] = struct{}{} - } - - resourcePolicies := account.GetResourcePoliciesMap() - routers := account.GetResourceRoutersMap() - - legacyNetworkMap := account.GetPeerNetworkMap(ctx, testingPeerID, dns.CustomZone{}, []*zones.Zone{}, validatedPeersMap, resourcePolicies, routers, nil, account.GetActiveGroupUsers()) - normalizeAndSortNetworkMap(legacyNetworkMap) - legacyJSON, err := json.MarshalIndent(toNetworkMapJSON(legacyNetworkMap), "", " ") - require.NoError(t, err, "error marshaling legacy network map to JSON") - - builder := types.NewNetworkMapBuilder(account, validatedPeersMap) - newNetworkMap := builder.GetPeerNetworkMap(ctx, testingPeerID, dns.CustomZone{}, nil, validatedPeersMap, nil) - normalizeAndSortNetworkMap(newNetworkMap) - newJSON, err := json.MarshalIndent(toNetworkMapJSON(newNetworkMap), "", " ") - require.NoError(t, err, "error marshaling new network map to JSON") - - if string(legacyJSON) != string(newJSON) { - legacyFilePath := filepath.Join("testdata", "networkmap_golden.json") - newFilePath := filepath.Join("testdata", "networkmap_golden_new.json") - - err = os.MkdirAll(filepath.Dir(legacyFilePath), 0755) - require.NoError(t, err) - - err = os.WriteFile(legacyFilePath, legacyJSON, 0644) - require.NoError(t, err) - t.Logf("Saved legacy network map to %s", legacyFilePath) - - err = os.WriteFile(newFilePath, newJSON, 0644) - require.NoError(t, err) - t.Logf("Saved new network map to %s", newFilePath) - - require.JSONEq(t, string(legacyJSON), string(newJSON), "network maps from legacy and new builder do not match") - } -} - -func BenchmarkGetPeerNetworkMap(b *testing.B) { - account := createTestAccountWithEntities() - ctx := context.Background() - validatedPeersMap := make(map[string]struct{}) - var peerIDs []string - for i := range numPeers { - peerID := fmt.Sprintf("peer-%d", i) - validatedPeersMap[peerID] = struct{}{} - peerIDs = append(peerIDs, peerID) - } - - b.ResetTimer() - b.Run("old builder", func(b *testing.B) { - for range b.N { - for _, peerID := range peerIDs { - _ = account.GetPeerNetworkMap(ctx, peerID, dns.CustomZone{}, []*zones.Zone{}, validatedPeersMap, nil, nil, nil, account.GetActiveGroupUsers()) - } - } - }) - b.ResetTimer() - b.Run("new builder", func(b *testing.B) { - for range b.N { - builder := types.NewNetworkMapBuilder(account, validatedPeersMap) - for _, peerID := range peerIDs { - _ = builder.GetPeerNetworkMap(ctx, peerID, dns.CustomZone{}, nil, validatedPeersMap, nil) - } - } - }) -} - -func TestGetPeerNetworkMap_Golden_WithNewPeer(t *testing.T) { - account := createTestAccountWithEntities() - - ctx := context.Background() - validatedPeersMap := make(map[string]struct{}) - for i := range numPeers { - peerID := fmt.Sprintf("peer-%d", i) - if peerID == offlinePeerID { - continue - } - validatedPeersMap[peerID] = struct{}{} - } - - builder := types.NewNetworkMapBuilder(account, validatedPeersMap) - - newPeerID := "peer-new-101" - newPeerIP := net.IP{100, 64, 1, 1} - newPeer := &nbpeer.Peer{ - ID: newPeerID, - IP: newPeerIP, - Key: fmt.Sprintf("key-%s", newPeerID), - DNSLabel: "peernew101", - Status: &nbpeer.PeerStatus{Connected: true, LastSeen: time.Now()}, - UserID: "user-admin", - Meta: nbpeer.PeerSystemMeta{WtVersion: "0.26.0", GoOS: "linux"}, - LastLogin: func() *time.Time { t := time.Now(); return &t }(), - } - - account.Peers[newPeerID] = newPeer - - if devGroup, exists := account.Groups[devGroupID]; exists { - devGroup.Peers = append(devGroup.Peers, newPeerID) - } - - if allGroup, exists := account.Groups[allGroupID]; exists { - allGroup.Peers = append(allGroup.Peers, newPeerID) - } - - validatedPeersMap[newPeerID] = struct{}{} - - if account.Network != nil { - account.Network.Serial++ - } - - resourcePolicies := account.GetResourcePoliciesMap() - routers := account.GetResourceRoutersMap() - - legacyNetworkMap := account.GetPeerNetworkMap(ctx, testingPeerID, dns.CustomZone{}, []*zones.Zone{}, validatedPeersMap, resourcePolicies, routers, nil, account.GetActiveGroupUsers()) - normalizeAndSortNetworkMap(legacyNetworkMap) - legacyJSON, err := json.MarshalIndent(toNetworkMapJSON(legacyNetworkMap), "", " ") - require.NoError(t, err, "error marshaling legacy network map to JSON") - - err = builder.OnPeerAddedIncremental(account, newPeerID) - require.NoError(t, err, "error adding peer to cache") - - newNetworkMap := builder.GetPeerNetworkMap(ctx, testingPeerID, dns.CustomZone{}, nil, validatedPeersMap, nil) - normalizeAndSortNetworkMap(newNetworkMap) - newJSON, err := json.MarshalIndent(toNetworkMapJSON(newNetworkMap), "", " ") - require.NoError(t, err, "error marshaling new network map to JSON") - - if string(legacyJSON) != string(newJSON) { - legacyFilePath := filepath.Join("testdata", "networkmap_golden_with_new_peer.json") - newFilePath := filepath.Join("testdata", "networkmap_golden_new_with_onpeeradded.json") - - err = os.MkdirAll(filepath.Dir(legacyFilePath), 0755) - require.NoError(t, err) - - err = os.WriteFile(legacyFilePath, legacyJSON, 0644) - require.NoError(t, err) - t.Logf("Saved legacy network map to %s", legacyFilePath) - - err = os.WriteFile(newFilePath, newJSON, 0644) - require.NoError(t, err) - t.Logf("Saved new network map to %s", newFilePath) - - require.JSONEq(t, string(legacyJSON), string(newJSON), "network maps with new peer from legacy and new builder do not match") - } -} - -func BenchmarkGetPeerNetworkMap_AfterPeerAdded(b *testing.B) { - account := createTestAccountWithEntities() - ctx := context.Background() - validatedPeersMap := make(map[string]struct{}) - var peerIDs []string - for i := range numPeers { - peerID := fmt.Sprintf("peer-%d", i) - validatedPeersMap[peerID] = struct{}{} - peerIDs = append(peerIDs, peerID) - } - builder := types.NewNetworkMapBuilder(account, validatedPeersMap) - newPeerID := "peer-new-101" - newPeer := &nbpeer.Peer{ - ID: newPeerID, - IP: net.IP{100, 64, 1, 1}, - Key: fmt.Sprintf("key-%s", newPeerID), - DNSLabel: "peernew101", - Status: &nbpeer.PeerStatus{Connected: true, LastSeen: time.Now()}, - UserID: "user-admin", - Meta: nbpeer.PeerSystemMeta{WtVersion: "0.26.0", GoOS: "linux"}, - } - - account.Peers[newPeerID] = newPeer - account.Groups[devGroupID].Peers = append(account.Groups[devGroupID].Peers, newPeerID) - account.Groups[allGroupID].Peers = append(account.Groups[allGroupID].Peers, newPeerID) - validatedPeersMap[newPeerID] = struct{}{} - - b.ResetTimer() - b.Run("old builder after add", func(b *testing.B) { - for i := 0; i < b.N; i++ { - for _, testingPeerID := range peerIDs { - _ = account.GetPeerNetworkMap(ctx, testingPeerID, dns.CustomZone{}, []*zones.Zone{}, validatedPeersMap, nil, nil, nil, account.GetActiveGroupUsers()) - } - } - }) - - b.ResetTimer() - b.Run("new builder after add", func(b *testing.B) { - for i := 0; i < b.N; i++ { - _ = builder.OnPeerAddedIncremental(account, newPeerID) - for _, testingPeerID := range peerIDs { - _ = builder.GetPeerNetworkMap(ctx, testingPeerID, dns.CustomZone{}, nil, validatedPeersMap, nil) - } - } - }) -} - -func TestGetPeerNetworkMap_Golden_WithNewRoutingPeer(t *testing.T) { - account := createTestAccountWithEntities() - - ctx := context.Background() - validatedPeersMap := make(map[string]struct{}) - for i := range numPeers { - peerID := fmt.Sprintf("peer-%d", i) - if peerID == offlinePeerID { - continue - } - validatedPeersMap[peerID] = struct{}{} - } - - builder := types.NewNetworkMapBuilder(account, validatedPeersMap) - - newRouterID := "peer-new-router-102" - newRouterIP := net.IP{100, 64, 1, 2} - newRouter := &nbpeer.Peer{ - ID: newRouterID, - IP: newRouterIP, - Key: fmt.Sprintf("key-%s", newRouterID), - DNSLabel: "newrouter102", - Status: &nbpeer.PeerStatus{Connected: true, LastSeen: time.Now()}, - UserID: "user-admin", - Meta: nbpeer.PeerSystemMeta{WtVersion: "0.26.0", GoOS: "linux"}, - LastLogin: func() *time.Time { t := time.Now(); return &t }(), - } - - account.Peers[newRouterID] = newRouter - - if opsGroup, exists := account.Groups[opsGroupID]; exists { - opsGroup.Peers = append(opsGroup.Peers, newRouterID) - } - - if allGroup, exists := account.Groups[allGroupID]; exists { - allGroup.Peers = append(allGroup.Peers, newRouterID) - } - - newRoute := &route.Route{ - ID: route.ID("route-new-router"), - Network: netip.MustParsePrefix("172.16.0.0/24"), - Peer: newRouter.Key, - PeerID: newRouterID, - Description: "Route from new router", - Enabled: true, - PeerGroups: []string{opsGroupID}, - Groups: []string{devGroupID, opsGroupID}, - AccessControlGroups: []string{devGroupID}, - AccountID: account.Id, - } - account.Routes[newRoute.ID] = newRoute - - validatedPeersMap[newRouterID] = struct{}{} - - if account.Network != nil { - account.Network.Serial++ - } - - resourcePolicies := account.GetResourcePoliciesMap() - routers := account.GetResourceRoutersMap() - - legacyNetworkMap := account.GetPeerNetworkMap(ctx, testingPeerID, dns.CustomZone{}, []*zones.Zone{}, validatedPeersMap, resourcePolicies, routers, nil, account.GetActiveGroupUsers()) - normalizeAndSortNetworkMap(legacyNetworkMap) - legacyJSON, err := json.MarshalIndent(toNetworkMapJSON(legacyNetworkMap), "", " ") - require.NoError(t, err, "error marshaling legacy network map to JSON") - - err = builder.OnPeerAddedIncremental(account, newRouterID) - require.NoError(t, err, "error adding router to cache") - - newNetworkMap := builder.GetPeerNetworkMap(ctx, testingPeerID, dns.CustomZone{}, nil, validatedPeersMap, nil) - normalizeAndSortNetworkMap(newNetworkMap) - newJSON, err := json.MarshalIndent(toNetworkMapJSON(newNetworkMap), "", " ") - require.NoError(t, err, "error marshaling new network map to JSON") - - if string(legacyJSON) != string(newJSON) { - legacyFilePath := filepath.Join("testdata", "networkmap_golden_with_new_router.json") - newFilePath := filepath.Join("testdata", "networkmap_golden_new_with_onpeeradded_router.json") - - err = os.MkdirAll(filepath.Dir(legacyFilePath), 0755) - require.NoError(t, err) - - err = os.WriteFile(legacyFilePath, legacyJSON, 0644) - require.NoError(t, err) - t.Logf("Saved legacy network map to %s", legacyFilePath) - - err = os.WriteFile(newFilePath, newJSON, 0644) - require.NoError(t, err) - t.Logf("Saved new network map to %s", newFilePath) - - require.JSONEq(t, string(legacyJSON), string(newJSON), "network maps with new router from legacy and new builder do not match") - } -} - -func BenchmarkGetPeerNetworkMap_AfterRouterPeerAdded(b *testing.B) { - account := createTestAccountWithEntities() - ctx := context.Background() - validatedPeersMap := make(map[string]struct{}) - var peerIDs []string - for i := range numPeers { - peerID := fmt.Sprintf("peer-%d", i) - validatedPeersMap[peerID] = struct{}{} - peerIDs = append(peerIDs, peerID) - } - builder := types.NewNetworkMapBuilder(account, validatedPeersMap) - newRouterID := "peer-new-router-102" - newRouterIP := net.IP{100, 64, 1, 2} - newRouter := &nbpeer.Peer{ - ID: newRouterID, - IP: newRouterIP, - Key: fmt.Sprintf("key-%s", newRouterID), - DNSLabel: "newrouter102", - Status: &nbpeer.PeerStatus{Connected: true, LastSeen: time.Now()}, - UserID: "user-admin", - Meta: nbpeer.PeerSystemMeta{WtVersion: "0.26.0", GoOS: "linux"}, - LastLogin: func() *time.Time { t := time.Now(); return &t }(), - } - - account.Peers[newRouterID] = newRouter - - if opsGroup, exists := account.Groups[opsGroupID]; exists { - opsGroup.Peers = append(opsGroup.Peers, newRouterID) - } - if allGroup, exists := account.Groups[allGroupID]; exists { - allGroup.Peers = append(allGroup.Peers, newRouterID) - } - - newRoute := &route.Route{ - ID: route.ID("route-new-router"), - Network: netip.MustParsePrefix("172.16.0.0/24"), - Peer: newRouter.Key, - PeerID: newRouterID, - Description: "Route from new router", - Enabled: true, - PeerGroups: []string{opsGroupID}, - Groups: []string{devGroupID, opsGroupID}, - AccessControlGroups: []string{devGroupID}, - AccountID: account.Id, - } - account.Routes[newRoute.ID] = newRoute - - validatedPeersMap[newRouterID] = struct{}{} - - b.ResetTimer() - b.Run("old builder after add", func(b *testing.B) { - for i := 0; i < b.N; i++ { - for _, testingPeerID := range peerIDs { - _ = account.GetPeerNetworkMap(ctx, testingPeerID, dns.CustomZone{}, []*zones.Zone{}, validatedPeersMap, nil, nil, nil, account.GetActiveGroupUsers()) - } - } - }) - - b.ResetTimer() - b.Run("new builder after add", func(b *testing.B) { - for i := 0; i < b.N; i++ { - _ = builder.OnPeerAddedIncremental(account, newRouterID) - for _, testingPeerID := range peerIDs { - _ = builder.GetPeerNetworkMap(ctx, testingPeerID, dns.CustomZone{}, nil, validatedPeersMap, nil) - } - } - }) -} - -func TestGetPeerNetworkMap_Golden_WithDeletedPeer(t *testing.T) { - account := createTestAccountWithEntities() - - ctx := context.Background() - validatedPeersMap := make(map[string]struct{}) - for i := range numPeers { - peerID := fmt.Sprintf("peer-%d", i) - if peerID == offlinePeerID { - continue - } - validatedPeersMap[peerID] = struct{}{} - } - - builder := types.NewNetworkMapBuilder(account, validatedPeersMap) - - deletedPeerID := "peer-25" - - delete(account.Peers, deletedPeerID) - - if devGroup, exists := account.Groups[devGroupID]; exists { - devGroup.Peers = slices.DeleteFunc(devGroup.Peers, func(id string) bool { - return id == deletedPeerID - }) - } - - if allGroup, exists := account.Groups[allGroupID]; exists { - allGroup.Peers = slices.DeleteFunc(allGroup.Peers, func(id string) bool { - return id == deletedPeerID - }) - } - - delete(validatedPeersMap, deletedPeerID) - - if account.Network != nil { - account.Network.Serial++ - } - - resourcePolicies := account.GetResourcePoliciesMap() - routers := account.GetResourceRoutersMap() - - legacyNetworkMap := account.GetPeerNetworkMap(ctx, testingPeerID, dns.CustomZone{}, []*zones.Zone{}, validatedPeersMap, resourcePolicies, routers, nil, account.GetActiveGroupUsers()) - normalizeAndSortNetworkMap(legacyNetworkMap) - legacyJSON, err := json.MarshalIndent(toNetworkMapJSON(legacyNetworkMap), "", " ") - require.NoError(t, err, "error marshaling legacy network map to JSON") - - err = builder.OnPeerDeleted(account, deletedPeerID) - require.NoError(t, err, "error deleting peer from cache") - - newNetworkMap := builder.GetPeerNetworkMap(ctx, testingPeerID, dns.CustomZone{}, nil, validatedPeersMap, nil) - normalizeAndSortNetworkMap(newNetworkMap) - newJSON, err := json.MarshalIndent(toNetworkMapJSON(newNetworkMap), "", " ") - require.NoError(t, err, "error marshaling new network map to JSON") - - if string(legacyJSON) != string(newJSON) { - legacyFilePath := filepath.Join("testdata", "networkmap_golden_with_deleted_peer.json") - newFilePath := filepath.Join("testdata", "networkmap_golden_new_with_onpeerdeleted.json") - - err = os.MkdirAll(filepath.Dir(legacyFilePath), 0755) - require.NoError(t, err) - - err = os.WriteFile(legacyFilePath, legacyJSON, 0644) - require.NoError(t, err) - t.Logf("Saved legacy network map to %s", legacyFilePath) - - err = os.WriteFile(newFilePath, newJSON, 0644) - require.NoError(t, err) - t.Logf("Saved new network map to %s", newFilePath) - - require.JSONEq(t, string(legacyJSON), string(newJSON), "network maps with deleted peer from legacy and new builder do not match") - } -} - -func TestGetPeerNetworkMap_Golden_WithDeletedRouterPeer(t *testing.T) { - account := createTestAccountWithEntities() - - ctx := context.Background() - validatedPeersMap := make(map[string]struct{}) - for i := range numPeers { - peerID := fmt.Sprintf("peer-%d", i) - if peerID == offlinePeerID { - continue - } - validatedPeersMap[peerID] = struct{}{} - } - - builder := types.NewNetworkMapBuilder(account, validatedPeersMap) - - deletedRouterID := "peer-75" - - var affectedRoute *route.Route - for _, r := range account.Routes { - if r.PeerID == deletedRouterID { - affectedRoute = r - break - } - } - require.NotNil(t, affectedRoute, "Router peer should have a route") - - for _, group := range account.Groups { - group.Peers = slices.DeleteFunc(group.Peers, func(id string) bool { - return id == deletedRouterID - }) - } - - for routeID, r := range account.Routes { - if r.Peer == account.Peers[deletedRouterID].Key || r.PeerID == deletedRouterID { - delete(account.Routes, routeID) - } - } - delete(account.Peers, deletedRouterID) - delete(validatedPeersMap, deletedRouterID) - - if account.Network != nil { - account.Network.Serial++ - } - - resourcePolicies := account.GetResourcePoliciesMap() - routers := account.GetResourceRoutersMap() - - legacyNetworkMap := account.GetPeerNetworkMap(ctx, testingPeerID, dns.CustomZone{}, []*zones.Zone{}, validatedPeersMap, resourcePolicies, routers, nil, account.GetActiveGroupUsers()) - normalizeAndSortNetworkMap(legacyNetworkMap) - legacyJSON, err := json.MarshalIndent(toNetworkMapJSON(legacyNetworkMap), "", " ") - require.NoError(t, err, "error marshaling legacy network map to JSON") - - err = builder.OnPeerDeleted(account, deletedRouterID) - require.NoError(t, err, "error deleting routing peer from cache") - - newNetworkMap := builder.GetPeerNetworkMap(ctx, testingPeerID, dns.CustomZone{}, nil, validatedPeersMap, nil) - normalizeAndSortNetworkMap(newNetworkMap) - newJSON, err := json.MarshalIndent(toNetworkMapJSON(newNetworkMap), "", " ") - require.NoError(t, err, "error marshaling new network map to JSON") - - if string(legacyJSON) != string(newJSON) { - legacyFilePath := filepath.Join("testdata", "networkmap_golden_with_deleted_router_peer.json") - newFilePath := filepath.Join("testdata", "networkmap_golden_new_with_deleted_router.json") - - err = os.MkdirAll(filepath.Dir(legacyFilePath), 0755) - require.NoError(t, err) - - err = os.WriteFile(legacyFilePath, legacyJSON, 0644) - require.NoError(t, err) - t.Logf("Saved legacy network map to %s", legacyFilePath) - - err = os.WriteFile(newFilePath, newJSON, 0644) - require.NoError(t, err) - t.Logf("Saved new network map to %s", newFilePath) - - require.JSONEq(t, string(legacyJSON), string(newJSON), "network maps with deleted router from legacy and new builder do not match") - } -} - -func BenchmarkGetPeerNetworkMap_AfterPeerDeleted(b *testing.B) { - account := createTestAccountWithEntities() - ctx := context.Background() - validatedPeersMap := make(map[string]struct{}) - var peerIDs []string - for i := range numPeers { - peerID := fmt.Sprintf("peer-%d", i) - validatedPeersMap[peerID] = struct{}{} - peerIDs = append(peerIDs, peerID) - } - - deletedPeerID := "peer-25" - - delete(account.Peers, deletedPeerID) - account.Groups[devGroupID].Peers = slices.DeleteFunc(account.Groups[devGroupID].Peers, func(id string) bool { - return id == deletedPeerID - }) - account.Groups[allGroupID].Peers = slices.DeleteFunc(account.Groups[allGroupID].Peers, func(id string) bool { - return id == deletedPeerID - }) - delete(validatedPeersMap, deletedPeerID) - - builder := types.NewNetworkMapBuilder(account, validatedPeersMap) - - b.ResetTimer() - b.Run("old builder after delete", func(b *testing.B) { - for i := 0; i < b.N; i++ { - for _, testingPeerID := range peerIDs { - _ = account.GetPeerNetworkMap(ctx, testingPeerID, dns.CustomZone{}, []*zones.Zone{}, validatedPeersMap, nil, nil, nil, account.GetActiveGroupUsers()) - } - } - }) - - b.ResetTimer() - b.Run("new builder after delete", func(b *testing.B) { - for i := 0; i < b.N; i++ { - _ = builder.OnPeerDeleted(account, deletedPeerID) - for _, testingPeerID := range peerIDs { - _ = builder.GetPeerNetworkMap(ctx, testingPeerID, dns.CustomZone{}, nil, validatedPeersMap, nil) - } - } - }) -} - -func normalizeAndSortNetworkMap(networkMap *types.NetworkMap) { - for _, peer := range networkMap.Peers { - if peer.Status != nil { - peer.Status.LastSeen = time.Time{} - } - peer.LastLogin = &time.Time{} - } - for _, peer := range networkMap.OfflinePeers { - if peer.Status != nil { - peer.Status.LastSeen = time.Time{} - } - peer.LastLogin = &time.Time{} - } - - sort.Slice(networkMap.Peers, func(i, j int) bool { return networkMap.Peers[i].ID < networkMap.Peers[j].ID }) - sort.Slice(networkMap.OfflinePeers, func(i, j int) bool { return networkMap.OfflinePeers[i].ID < networkMap.OfflinePeers[j].ID }) - sort.Slice(networkMap.Routes, func(i, j int) bool { return networkMap.Routes[i].ID < networkMap.Routes[j].ID }) - - sort.Slice(networkMap.FirewallRules, func(i, j int) bool { - r1, r2 := networkMap.FirewallRules[i], networkMap.FirewallRules[j] - if r1.PeerIP != r2.PeerIP { - return r1.PeerIP < r2.PeerIP - } - if r1.Protocol != r2.Protocol { - return r1.Protocol < r2.Protocol - } - if r1.Direction != r2.Direction { - return r1.Direction < r2.Direction - } - if r1.Action != r2.Action { - return r1.Action < r2.Action - } - return r1.Port < r2.Port - }) - - sort.Slice(networkMap.RoutesFirewallRules, func(i, j int) bool { - r1, r2 := networkMap.RoutesFirewallRules[i], networkMap.RoutesFirewallRules[j] - if r1.RouteID != r2.RouteID { - return r1.RouteID < r2.RouteID - } - if r1.Action != r2.Action { - return r1.Action < r2.Action - } - if r1.Destination != r2.Destination { - return r1.Destination < r2.Destination - } - if len(r1.SourceRanges) > 0 && len(r2.SourceRanges) > 0 { - if r1.SourceRanges[0] != r2.SourceRanges[0] { - return r1.SourceRanges[0] < r2.SourceRanges[0] - } - } - return r1.Port < r2.Port - }) - - for _, ranges := range networkMap.RoutesFirewallRules { - sort.Slice(ranges.SourceRanges, func(i, j int) bool { - return ranges.SourceRanges[i] < ranges.SourceRanges[j] - }) - } -} - -type networkMapJSON struct { - Peers []*nbpeer.Peer `json:"Peers"` - Network *types.Network `json:"Network"` - Routes []*route.Route `json:"Routes"` - DNSConfig dns.Config `json:"DNSConfig"` - OfflinePeers []*nbpeer.Peer `json:"OfflinePeers"` - FirewallRules []*types.FirewallRule `json:"FirewallRules"` - RoutesFirewallRules []*types.RouteFirewallRule `json:"RoutesFirewallRules"` - ForwardingRules []*types.ForwardingRule `json:"ForwardingRules"` - AuthorizedUsers map[string][]string `json:"AuthorizedUsers,omitempty"` - EnableSSH bool `json:"EnableSSH"` -} - -func toNetworkMapJSON(nm *types.NetworkMap) *networkMapJSON { - result := &networkMapJSON{ - Peers: nm.Peers, - Network: nm.Network, - Routes: nm.Routes, - DNSConfig: nm.DNSConfig, - OfflinePeers: nm.OfflinePeers, - FirewallRules: nm.FirewallRules, - RoutesFirewallRules: nm.RoutesFirewallRules, - ForwardingRules: nm.ForwardingRules, - EnableSSH: nm.EnableSSH, - } - - if len(nm.AuthorizedUsers) > 0 { - result.AuthorizedUsers = make(map[string][]string) - localUsers := make([]string, 0, len(nm.AuthorizedUsers)) - for localUser := range nm.AuthorizedUsers { - localUsers = append(localUsers, localUser) - } - sort.Strings(localUsers) - - for _, localUser := range localUsers { - userIDs := nm.AuthorizedUsers[localUser] - sortedUserIDs := make([]string, 0, len(userIDs)) - for userID := range userIDs { - sortedUserIDs = append(sortedUserIDs, userID) - } - sort.Strings(sortedUserIDs) - result.AuthorizedUsers[localUser] = sortedUserIDs - } - } - - return result -} - -func createTestAccountWithEntities() *types.Account { - peers := make(map[string]*nbpeer.Peer) - devGroupPeers, opsGroupPeers, allGroupPeers := []string{}, []string{}, []string{} - - for i := range numPeers { - peerID := fmt.Sprintf("peer-%d", i) - ip := net.IP{100, 64, 0, byte(i + 1)} - wtVersion := "0.25.0" - if i%2 == 0 { - wtVersion = "0.40.0" - } - - p := &nbpeer.Peer{ - ID: peerID, IP: ip, Key: fmt.Sprintf("key-%s", peerID), DNSLabel: fmt.Sprintf("peer%d", i+1), - Status: &nbpeer.PeerStatus{Connected: true, LastSeen: time.Now()}, - UserID: "user-admin", Meta: nbpeer.PeerSystemMeta{WtVersion: wtVersion, GoOS: "linux"}, - } - - if peerID == expiredPeerID { - p.LoginExpirationEnabled = true - pastTimestamp := time.Now().Add(-2 * time.Hour) - p.LastLogin = &pastTimestamp - } - - peers[peerID] = p - allGroupPeers = append(allGroupPeers, peerID) - if i < numPeers/2 { - devGroupPeers = append(devGroupPeers, peerID) - } else { - opsGroupPeers = append(opsGroupPeers, peerID) - } - - } - - groups := map[string]*types.Group{ - allGroupID: {ID: allGroupID, Name: "All", Peers: allGroupPeers}, - devGroupID: {ID: devGroupID, Name: "Developers", Peers: devGroupPeers}, - opsGroupID: {ID: opsGroupID, Name: "Operations", Peers: opsGroupPeers}, - sshUsersGroupID: {ID: sshUsersGroupID, Name: "SSH Users", Peers: []string{}}, - } - - policies := []*types.Policy{ - { - ID: policyIDAll, Name: "Default-Allow", Enabled: true, - Rules: []*types.PolicyRule{{ - ID: policyIDAll, Name: "Allow All", Enabled: true, Action: types.PolicyTrafficActionAccept, - Protocol: types.PolicyRuleProtocolALL, Bidirectional: true, - Sources: []string{allGroupID}, Destinations: []string{allGroupID}, - }}, - }, - { - ID: policyIDDevOps, Name: "Dev to Ops Web Access", Enabled: true, - Rules: []*types.PolicyRule{{ - ID: policyIDDevOps, Name: "Dev -> Ops (HTTP Range)", Enabled: true, Action: types.PolicyTrafficActionAccept, - Protocol: types.PolicyRuleProtocolTCP, Bidirectional: false, - PortRanges: []types.RulePortRange{{Start: 8080, End: 8090}}, - Sources: []string{devGroupID}, Destinations: []string{opsGroupID}, - }}, - }, - { - ID: policyIDDrop, Name: "Drop DB traffic", Enabled: true, - Rules: []*types.PolicyRule{{ - ID: policyIDDrop, Name: "Drop DB", Enabled: true, Action: types.PolicyTrafficActionDrop, - Protocol: types.PolicyRuleProtocolTCP, Ports: []string{"5432"}, Bidirectional: true, - Sources: []string{devGroupID}, Destinations: []string{opsGroupID}, - }}, - }, - { - ID: policyIDPosture, Name: "Posture Check for DB Resource", Enabled: true, - SourcePostureChecks: []string{postureCheckID}, - Rules: []*types.PolicyRule{{ - ID: policyIDPosture, Name: "Allow DB Access", Enabled: true, Action: types.PolicyTrafficActionAccept, - Protocol: types.PolicyRuleProtocolALL, Bidirectional: true, - Sources: []string{opsGroupID}, DestinationResource: types.Resource{ID: networkResourceID}, - }}, - }, - { - ID: policyIDSSH, Name: "SSH Access Policy", Enabled: true, - Rules: []*types.PolicyRule{{ - ID: policyIDSSH, Name: "Allow SSH to Ops", Enabled: true, Action: types.PolicyTrafficActionAccept, - Protocol: types.PolicyRuleProtocolNetbirdSSH, Bidirectional: false, - Sources: []string{devGroupID}, Destinations: []string{opsGroupID}, - AuthorizedGroups: map[string][]string{sshUsersGroupID: {"root", "admin"}}, - }}, - }, - } - - routes := map[route.ID]*route.Route{ - routeID: { - ID: routeID, Network: netip.MustParsePrefix("192.168.10.0/24"), - Peer: peers["peer-75"].Key, - PeerID: "peer-75", - Description: "Route to internal resource", Enabled: true, - PeerGroups: []string{devGroupID, opsGroupID}, - Groups: []string{devGroupID, opsGroupID}, - AccessControlGroups: []string{devGroupID}, - }, - routeHA1ID: { - ID: routeHA1ID, Network: netip.MustParsePrefix("10.10.0.0/16"), - Peer: peers["peer-80"].Key, - PeerID: "peer-80", - Description: "HA Route 1", Enabled: true, Metric: 1000, - PeerGroups: []string{allGroupID}, - Groups: []string{allGroupID}, - AccessControlGroups: []string{allGroupID}, - }, - routeHA2ID: { - ID: routeHA2ID, Network: netip.MustParsePrefix("10.10.0.0/16"), - Peer: peers["peer-90"].Key, - PeerID: "peer-90", - Description: "HA Route 2", Enabled: true, Metric: 900, - PeerGroups: []string{devGroupID, opsGroupID}, - Groups: []string{devGroupID, opsGroupID}, - AccessControlGroups: []string{allGroupID}, - }, - } - - users := map[string]*types.User{ - userAdminID: {Id: userAdminID, Role: types.UserRoleAdmin, IsServiceUser: false, AccountID: testAccountID, AutoGroups: []string{allGroupID}}, - userDevID: {Id: userDevID, Role: types.UserRoleUser, IsServiceUser: false, AccountID: testAccountID, AutoGroups: []string{sshUsersGroupID, devGroupID}}, - userOpsID: {Id: userOpsID, Role: types.UserRoleUser, IsServiceUser: false, AccountID: testAccountID, AutoGroups: []string{sshUsersGroupID, opsGroupID}}, - } - - account := &types.Account{ - Id: testAccountID, Peers: peers, Groups: groups, Policies: policies, Routes: routes, - Users: users, - Network: &types.Network{ - Identifier: "net-golden-test", Net: net.IPNet{IP: net.IP{100, 64, 0, 0}, Mask: net.CIDRMask(16, 32)}, Serial: 1, - }, - DNSSettings: types.DNSSettings{DisabledManagementGroups: []string{opsGroupID}}, - NameServerGroups: map[string]*dns.NameServerGroup{ - nameserverGroupID: { - ID: nameserverGroupID, Name: "Main NS", Enabled: true, Groups: []string{devGroupID}, - NameServers: []dns.NameServer{{IP: netip.MustParseAddr("8.8.8.8"), NSType: dns.UDPNameServerType, Port: 53}}, - }, - }, - PostureChecks: []*posture.Checks{ - {ID: postureCheckID, Name: "Check version", Checks: posture.ChecksDefinition{ - NBVersionCheck: &posture.NBVersionCheck{MinVersion: "0.26.0"}, - }}, - }, - NetworkResources: []*resourceTypes.NetworkResource{ - {ID: networkResourceID, NetworkID: networkID, AccountID: testAccountID, Enabled: true, Address: "db.netbird.cloud"}, - }, - Networks: []*networkTypes.Network{{ID: networkID, Name: "DB Network", AccountID: testAccountID}}, - NetworkRouters: []*routerTypes.NetworkRouter{ - {ID: networkRouterID, NetworkID: networkID, Peer: routingPeerID, Enabled: true, AccountID: testAccountID}, - }, - Settings: &types.Settings{PeerLoginExpirationEnabled: true, PeerLoginExpiration: 1 * time.Hour}, - } - - for _, p := range account.Policies { - p.AccountID = account.Id - } - for _, r := range account.Routes { - r.AccountID = account.Id - } - - return account -} - -func TestGetPeerNetworkMap_Golden_New_WithOnPeerAddedRouter_Batched(t *testing.T) { - account := createTestAccountWithEntities() - - ctx := context.Background() - validatedPeersMap := make(map[string]struct{}) - for i := range numPeers { - peerID := fmt.Sprintf("peer-%d", i) - if peerID == offlinePeerID { - continue - } - validatedPeersMap[peerID] = struct{}{} - } - - builder := types.NewNetworkMapBuilder(account, validatedPeersMap) - - newRouterID := "peer-new-router-102" - newRouterIP := net.IP{100, 64, 1, 2} - newRouter := &nbpeer.Peer{ - ID: newRouterID, - IP: newRouterIP, - Key: fmt.Sprintf("key-%s", newRouterID), - DNSLabel: "newrouter102", - Status: &nbpeer.PeerStatus{Connected: true, LastSeen: time.Now()}, - UserID: "user-admin", - Meta: nbpeer.PeerSystemMeta{WtVersion: "0.26.0", GoOS: "linux"}, - LastLogin: func() *time.Time { t := time.Now(); return &t }(), - } - - account.Peers[newRouterID] = newRouter - - if opsGroup, exists := account.Groups[opsGroupID]; exists { - opsGroup.Peers = append(opsGroup.Peers, newRouterID) - } - if allGroup, exists := account.Groups[allGroupID]; exists { - allGroup.Peers = append(allGroup.Peers, newRouterID) - } - - newRoute := &route.Route{ - ID: route.ID("route-new-router"), - Network: netip.MustParsePrefix("172.16.0.0/24"), - Peer: newRouter.Key, - PeerID: newRouterID, - Description: "Route from new router", - Enabled: true, - PeerGroups: []string{opsGroupID}, - Groups: []string{devGroupID, opsGroupID}, - AccessControlGroups: []string{devGroupID}, - AccountID: account.Id, - } - account.Routes[newRoute.ID] = newRoute - - validatedPeersMap[newRouterID] = struct{}{} - - if account.Network != nil { - account.Network.Serial++ - } - - builder.EnqueuePeersForIncrementalAdd(account, newRouterID) - - time.Sleep(100 * time.Millisecond) - - networkMap := builder.GetPeerNetworkMap(ctx, testingPeerID, dns.CustomZone{}, nil, validatedPeersMap, nil) - - normalizeAndSortNetworkMap(networkMap) - - jsonData, err := json.MarshalIndent(networkMap, "", " ") - require.NoError(t, err, "error marshaling network map to JSON") - - goldenFilePath := filepath.Join("testdata", "networkmap_golden_new_with_onpeeradded_router.json") - - t.Log("Update golden file with OnPeerAdded router...") - err = os.MkdirAll(filepath.Dir(goldenFilePath), 0755) - require.NoError(t, err) - err = os.WriteFile(goldenFilePath, jsonData, 0644) - require.NoError(t, err) - - expectedJSON, err := os.ReadFile(goldenFilePath) - require.NoError(t, err, "error reading golden file") - - require.JSONEq(t, string(expectedJSON), string(jsonData), "network map from NEW builder with OnPeerAdded router does not match golden file") -} diff --git a/management/server/types/networkmapbuilder.go b/management/server/types/networkmapbuilder.go deleted file mode 100644 index 6448b8403..000000000 --- a/management/server/types/networkmapbuilder.go +++ /dev/null @@ -1,2317 +0,0 @@ -package types - -import ( - "context" - "fmt" - "slices" - "strconv" - "strings" - "sync" - "time" - - log "github.com/sirupsen/logrus" - "golang.org/x/exp/maps" - - "github.com/netbirdio/netbird/client/ssh/auth" - nbdns "github.com/netbirdio/netbird/dns" - "github.com/netbirdio/netbird/management/internals/modules/zones" - resourceTypes "github.com/netbirdio/netbird/management/server/networks/resources/types" - routerTypes "github.com/netbirdio/netbird/management/server/networks/routers/types" - nbpeer "github.com/netbirdio/netbird/management/server/peer" - "github.com/netbirdio/netbird/management/server/telemetry" - "github.com/netbirdio/netbird/route" -) - -const ( - allPeers = "0.0.0.0" - allWildcard = "0.0.0.0/0" - v6AllWildcard = "::/0" - fw = "fw:" - rfw = "route-fw:" - - szAddPeerBatch = 10 - maxPeerAddRetries = 20 -) - -type NetworkMapCache struct { - globalRoutes map[route.ID]*route.Route - globalRules map[string]*FirewallRule //ruleId - globalRouteRules map[string]*RouteFirewallRule //ruleId - globalPeers map[string]*nbpeer.Peer - - groupToPeers map[string][]string - peerToGroups map[string][]string - policyToRules map[string][]*PolicyRule //policyId - groupToPolicies map[string][]*Policy - groupToRoutes map[string][]*route.Route - peerToRoutes map[string][]*route.Route - - peerACLs map[string]*PeerACLView - peerRoutes map[string]*PeerRoutesView - peerDNS map[string]*nbdns.Config - peerSSH map[string]*PeerSSHView - - groupIDToUserIDs map[string][]string - allowedUserIDs map[string]struct{} - - resourceRouters map[string]map[string]*routerTypes.NetworkRouter - resourcePolicies map[string][]*Policy - - globalResources map[string]*resourceTypes.NetworkResource // resourceId - - acgToRoutes map[string]map[route.ID]*RouteOwnerInfo // routeID -> owner info - noACGRoutes map[route.ID]*RouteOwnerInfo - - mu sync.RWMutex -} - -type RouteOwnerInfo struct { - PeerID string - RouteID route.ID -} - -type PeerACLView struct { - ConnectedPeerIDs []string - FirewallRuleIDs []string -} - -type PeerRoutesView struct { - OwnRouteIDs []route.ID - NetworkResourceIDs []route.ID - InheritedRouteIDs []route.ID - RouteFirewallRuleIDs []string -} - -type PeerSSHView struct { - EnableSSH bool - AuthorizedUsers map[string]map[string]struct{} -} - -type NetworkMapBuilder struct { - account *Account - cache *NetworkMapCache - validatedPeers map[string]struct{} - - apb addPeerBatch -} - -type addPeerBatch struct { - mu sync.Mutex - sg *sync.Cond - ids []string - la *Account - retryCount map[string]int -} - -func NewNetworkMapBuilder(account *Account, validatedPeers map[string]struct{}) *NetworkMapBuilder { - builder := &NetworkMapBuilder{ - cache: &NetworkMapCache{ - globalRoutes: make(map[route.ID]*route.Route), - globalRules: make(map[string]*FirewallRule), - globalRouteRules: make(map[string]*RouteFirewallRule), - globalPeers: make(map[string]*nbpeer.Peer), - groupToPeers: make(map[string][]string), - peerToGroups: make(map[string][]string), - policyToRules: make(map[string][]*PolicyRule), - groupToPolicies: make(map[string][]*Policy), - groupToRoutes: make(map[string][]*route.Route), - peerToRoutes: make(map[string][]*route.Route), - peerACLs: make(map[string]*PeerACLView), - peerRoutes: make(map[string]*PeerRoutesView), - peerDNS: make(map[string]*nbdns.Config), - peerSSH: make(map[string]*PeerSSHView), - groupIDToUserIDs: make(map[string][]string), - allowedUserIDs: make(map[string]struct{}), - globalResources: make(map[string]*resourceTypes.NetworkResource), - acgToRoutes: make(map[string]map[route.ID]*RouteOwnerInfo), - noACGRoutes: make(map[route.ID]*RouteOwnerInfo), - }, - validatedPeers: make(map[string]struct{}), - } - builder.apb.sg = sync.NewCond(&builder.apb.mu) - builder.apb.ids = make([]string, 0, szAddPeerBatch) - builder.apb.la = account - builder.apb.retryCount = make(map[string]int) - - maps.Copy(builder.validatedPeers, validatedPeers) - - builder.initialBuild(account) - - go builder.incAddPeerLoop() - return builder -} - -func (b *NetworkMapBuilder) initialBuild(account *Account) { - b.cache.mu.Lock() - defer b.cache.mu.Unlock() - - b.account = account - - start := time.Now() - - b.buildGlobalIndexes(account) - - resourceRouters := account.GetResourceRoutersMap() - resourcePolicies := account.GetResourcePoliciesMap() - b.cache.resourceRouters = resourceRouters - b.cache.resourcePolicies = resourcePolicies - - for peerID := range account.Peers { - b.buildPeerACLView(account, peerID) - b.buildPeerRoutesView(account, peerID) - b.buildPeerDNSView(account, peerID) - } - - log.Debugf("NetworkMapBuilder: Initial build completed in %v for account %s", time.Since(start), account.Id) -} - -func (b *NetworkMapBuilder) buildGlobalIndexes(account *Account) { - clear(b.cache.globalPeers) - clear(b.cache.groupToPeers) - clear(b.cache.peerToGroups) - clear(b.cache.policyToRules) - clear(b.cache.groupToPolicies) - clear(b.cache.globalRoutes) - clear(b.cache.globalRules) - clear(b.cache.globalRouteRules) - clear(b.cache.globalResources) - clear(b.cache.groupToRoutes) - clear(b.cache.peerToRoutes) - clear(b.cache.acgToRoutes) - clear(b.cache.noACGRoutes) - clear(b.cache.groupIDToUserIDs) - clear(b.cache.allowedUserIDs) - clear(b.cache.peerSSH) - - maps.Copy(b.cache.globalPeers, account.Peers) - - b.cache.groupIDToUserIDs = account.GetActiveGroupUsers() - b.cache.allowedUserIDs = b.buildAllowedUserIDs(account) - - for groupID, group := range account.Groups { - peersCopy := make([]string, len(group.Peers)) - copy(peersCopy, group.Peers) - b.cache.groupToPeers[groupID] = peersCopy - - for _, peerID := range group.Peers { - b.cache.peerToGroups[peerID] = append(b.cache.peerToGroups[peerID], groupID) - } - } - - for _, policy := range account.Policies { - if !policy.Enabled { - continue - } - - b.cache.policyToRules[policy.ID] = policy.Rules - - affectedGroups := make(map[string]struct{}) - for _, rule := range policy.Rules { - if !rule.Enabled { - continue - } - - for _, groupID := range rule.Sources { - affectedGroups[groupID] = struct{}{} - } - for _, groupID := range rule.Destinations { - affectedGroups[groupID] = struct{}{} - } - if rule.SourceResource.Type == ResourceTypePeer && rule.SourceResource.ID != "" { - groupId := rule.SourceResource.ID - affectedGroups[groupId] = struct{}{} - b.cache.peerToGroups[rule.SourceResource.ID] = append(b.cache.peerToGroups[rule.SourceResource.ID], groupId) - } - if rule.DestinationResource.Type == ResourceTypePeer && rule.DestinationResource.ID != "" { - groupId := rule.DestinationResource.ID - affectedGroups[groupId] = struct{}{} - b.cache.peerToGroups[rule.DestinationResource.ID] = append(b.cache.peerToGroups[rule.DestinationResource.ID], groupId) - } - } - - for groupID := range affectedGroups { - b.cache.groupToPolicies[groupID] = append(b.cache.groupToPolicies[groupID], policy) - } - } - - for _, resource := range account.NetworkResources { - if !resource.Enabled { - continue - } - b.cache.globalResources[resource.ID] = resource - } - - for _, r := range account.Routes { - if !r.Enabled { - continue - } - for _, groupID := range r.PeerGroups { - b.cache.groupToRoutes[groupID] = append(b.cache.groupToRoutes[groupID], r) - } - if r.Peer != "" { - if peer, ok := b.cache.globalPeers[r.Peer]; ok { - b.cache.peerToRoutes[peer.ID] = append(b.cache.peerToRoutes[peer.ID], r) - } - } - } -} - -func (b *NetworkMapBuilder) buildPeerACLView(account *Account, peerID string) { - peer := account.GetPeer(peerID) - if peer == nil { - return - } - - allPotentialPeers, firewallRules, authorizedUsers, sshEnabled := b.getPeerConnectionResources(account, peer, b.validatedPeers) - - isRouter, networkResourcesRoutes, sourcePeers := b.getNetworkResourcesForPeer(account, peer) - - var emptyExpiredPeers []*nbpeer.Peer - finalAllPeers := b.addNetworksRoutingPeers( - networkResourcesRoutes, - peer, - allPotentialPeers, - emptyExpiredPeers, - isRouter, - sourcePeers, - ) - - view := &PeerACLView{ - ConnectedPeerIDs: make([]string, 0, len(finalAllPeers)), - FirewallRuleIDs: make([]string, 0, len(firewallRules)), - } - - for _, p := range finalAllPeers { - view.ConnectedPeerIDs = append(view.ConnectedPeerIDs, p.ID) - } - - for _, rule := range firewallRules { - ruleID := b.generateFirewallRuleID(rule) - view.FirewallRuleIDs = append(view.FirewallRuleIDs, ruleID) - b.cache.globalRules[ruleID] = rule - } - - b.cache.peerACLs[peerID] = view - b.cache.peerSSH[peerID] = &PeerSSHView{ - EnableSSH: sshEnabled, - AuthorizedUsers: authorizedUsers, - } -} - -func (b *NetworkMapBuilder) getPeerConnectionResources(account *Account, peer *nbpeer.Peer, - validatedPeersMap map[string]struct{}, -) ([]*nbpeer.Peer, []*FirewallRule, map[string]map[string]struct{}, bool) { - peerID := peer.ID - ctx := context.Background() - - peerGroups := b.cache.peerToGroups[peerID] - peerGroupsMap := make(map[string]struct{}, len(peerGroups)) - for _, groupID := range peerGroups { - peerGroupsMap[groupID] = struct{}{} - } - - rulesExists := make(map[string]struct{}) - peersExists := make(map[string]struct{}) - fwRules := make([]*FirewallRule, 0) - peers := make([]*nbpeer.Peer, 0) - - authorizedUsers := make(map[string]map[string]struct{}) - sshEnabled := false - - for _, group := range peerGroups { - policies := b.cache.groupToPolicies[group] - for _, policy := range policies { - if isValid := account.validatePostureChecksOnPeer(ctx, policy.SourcePostureChecks, peerID); !isValid { - continue - } - rules := b.cache.policyToRules[policy.ID] - for _, rule := range rules { - var sourcePeers, destinationPeers []*nbpeer.Peer - var peerInSources, peerInDestinations bool - - if rule.SourceResource.Type == ResourceTypePeer && rule.SourceResource.ID != "" { - peerInSources = rule.SourceResource.ID == peerID - } else { - peerInSources = b.isPeerInGroupscached(rule.Sources, peerGroupsMap) - } - - if rule.DestinationResource.Type == ResourceTypePeer && rule.DestinationResource.ID != "" { - peerInDestinations = rule.DestinationResource.ID == peerID - } else { - peerInDestinations = b.isPeerInGroupscached(rule.Destinations, peerGroupsMap) - } - - if !peerInSources && !peerInDestinations { - continue - } - - if rule.SourceResource.Type == ResourceTypePeer && rule.SourceResource.ID != "" { - peer := account.GetPeer(rule.SourceResource.ID) - if peer != nil { - sourcePeers = []*nbpeer.Peer{peer} - } - } else { - sourcePeers = b.getPeersFromGroupscached(account, rule.Sources, peerID, policy.SourcePostureChecks, validatedPeersMap) - } - - if rule.DestinationResource.Type == ResourceTypePeer && rule.DestinationResource.ID != "" { - peer := account.GetPeer(rule.DestinationResource.ID) - if peer != nil { - destinationPeers = []*nbpeer.Peer{peer} - } - } else { - destinationPeers = b.getPeersFromGroupscached(account, rule.Destinations, peerID, nil, validatedPeersMap) - } - - if rule.Bidirectional { - if peerInSources { - b.generateResourcescached( - rule, destinationPeers, FirewallRuleDirectionIN, - peer, &peers, &fwRules, peersExists, rulesExists, - ) - } - if peerInDestinations { - b.generateResourcescached( - rule, sourcePeers, FirewallRuleDirectionOUT, - peer, &peers, &fwRules, peersExists, rulesExists, - ) - } - } - - if peerInSources { - b.generateResourcescached( - rule, destinationPeers, FirewallRuleDirectionOUT, - peer, &peers, &fwRules, peersExists, rulesExists, - ) - } - - if peerInDestinations { - b.generateResourcescached( - rule, sourcePeers, FirewallRuleDirectionIN, - peer, &peers, &fwRules, peersExists, rulesExists, - ) - - if rule.Protocol == PolicyRuleProtocolNetbirdSSH { - sshEnabled = true - switch { - case len(rule.AuthorizedGroups) > 0: - for groupID, localUsers := range rule.AuthorizedGroups { - userIDs, ok := b.cache.groupIDToUserIDs[groupID] - if !ok { - continue - } - - if len(localUsers) == 0 { - localUsers = []string{auth.Wildcard} - } - - for _, localUser := range localUsers { - if authorizedUsers[localUser] == nil { - authorizedUsers[localUser] = make(map[string]struct{}) - } - for _, userID := range userIDs { - authorizedUsers[localUser][userID] = struct{}{} - } - } - } - case rule.AuthorizedUser != "": - if authorizedUsers[auth.Wildcard] == nil { - authorizedUsers[auth.Wildcard] = make(map[string]struct{}) - } - authorizedUsers[auth.Wildcard][rule.AuthorizedUser] = struct{}{} - default: - authorizedUsers[auth.Wildcard] = maps.Clone(b.cache.allowedUserIDs) - } - } else if policyRuleImpliesLegacySSH(rule) && peer.SSHEnabled { - sshEnabled = true - authorizedUsers[auth.Wildcard] = maps.Clone(b.cache.allowedUserIDs) - } - } - } - } - } - - return peers, fwRules, authorizedUsers, sshEnabled -} - -func (b *NetworkMapBuilder) isPeerInGroupscached(groupIDs []string, peerGroupsMap map[string]struct{}) bool { - for _, groupID := range groupIDs { - if _, exists := peerGroupsMap[groupID]; exists { - return true - } - } - return false -} - -func (b *NetworkMapBuilder) getPeersFromGroupscached(account *Account, groupIDs []string, - excludePeerID string, postureChecksIDs []string, validatedPeersMap map[string]struct{}, -) []*nbpeer.Peer { - ctx := context.Background() - uniquePeers := make(map[string]*nbpeer.Peer) - - for _, groupID := range groupIDs { - peerIDs := b.cache.groupToPeers[groupID] - for _, peerID := range peerIDs { - if peerID == excludePeerID { - continue - } - - if _, ok := validatedPeersMap[peerID]; !ok { - continue - } - - peer := b.cache.globalPeers[peerID] - if peer == nil { - continue - } - - if len(postureChecksIDs) > 0 { - if !account.validatePostureChecksOnPeer(ctx, postureChecksIDs, peerID) { - continue - } - } - - uniquePeers[peerID] = peer - } - } - - result := make([]*nbpeer.Peer, 0, len(uniquePeers)) - for _, peer := range uniquePeers { - result = append(result, peer) - } - - return result -} - -func (b *NetworkMapBuilder) generateResourcescached( - rule *PolicyRule, groupPeers []*nbpeer.Peer, direction int, targetPeer *nbpeer.Peer, - peers *[]*nbpeer.Peer, rules *[]*FirewallRule, peersExists map[string]struct{}, rulesExists map[string]struct{}, -) { - for _, peer := range groupPeers { - if peer == nil { - continue - } - if _, ok := peersExists[peer.ID]; !ok { - *peers = append(*peers, peer) - peersExists[peer.ID] = struct{}{} - } - - fr := FirewallRule{ - PolicyID: rule.ID, - PeerIP: peer.IP.String(), - Direction: direction, - Action: string(rule.Action), - Protocol: firewallRuleProtocol(rule.Protocol), - } - - var s strings.Builder - s.WriteString(rule.ID) - s.WriteString(fr.PeerIP) - s.WriteString(strconv.Itoa(direction)) - s.WriteString(fr.Protocol) - s.WriteString(fr.Action) - s.WriteString(strings.Join(rule.Ports, ",")) - - ruleID := s.String() - - if _, ok := rulesExists[ruleID]; ok { - continue - } - rulesExists[ruleID] = struct{}{} - - if len(rule.Ports) == 0 && len(rule.PortRanges) == 0 { - *rules = append(*rules, &fr) - continue - } - - *rules = append(*rules, expandPortsAndRanges(fr, rule, targetPeer)...) - } -} - -func (b *NetworkMapBuilder) getNetworkResourcesForPeer(account *Account, peer *nbpeer.Peer) (bool, []*route.Route, map[string]struct{}) { - ctx := context.Background() - peerID := peer.ID - - var isRoutingPeer bool - var routes []*route.Route - allSourcePeers := make(map[string]struct{}) - - peerGroups := b.cache.peerToGroups[peerID] - peerGroupsMap := make(map[string]struct{}, len(peerGroups)) - for _, groupID := range peerGroups { - peerGroupsMap[groupID] = struct{}{} - } - - for _, resource := range b.cache.globalResources { - - networkRoutingPeers := b.cache.resourceRouters[resource.NetworkID] - resourcePolicies := b.cache.resourcePolicies[resource.ID] - if len(resourcePolicies) == 0 { - continue - } - - isRouterForThisResource := false - - if networkRoutingPeers != nil { - if router, ok := networkRoutingPeers[peerID]; ok && router.Enabled { - isRoutingPeer = true - isRouterForThisResource = true - if rt := b.createNetworkResourceRoutes(resource, peerID, router, resourcePolicies); rt != nil { - routes = append(routes, rt) - } - } - } - - hasAccessAsClient := false - if !isRouterForThisResource { - for _, policy := range resourcePolicies { - if b.isPeerInGroupscached(policy.SourceGroups(), peerGroupsMap) { - if account.validatePostureChecksOnPeer(ctx, policy.SourcePostureChecks, peerID) { - hasAccessAsClient = true - break - } - } - } - } - - if hasAccessAsClient && networkRoutingPeers != nil { - for routerPeerID, router := range networkRoutingPeers { - if router.Enabled { - if rt := b.createNetworkResourceRoutes(resource, routerPeerID, router, resourcePolicies); rt != nil { - routes = append(routes, rt) - } - } - } - } - - if isRouterForThisResource { - for _, policy := range resourcePolicies { - var peersWithAccess []*nbpeer.Peer - if policy.Rules[0].SourceResource.Type == ResourceTypePeer && policy.Rules[0].SourceResource.ID != "" { - peersWithAccess = []*nbpeer.Peer{peer} - } else { - peersWithAccess = b.getPeersFromGroupscached(account, policy.SourceGroups(), "", policy.SourcePostureChecks, b.validatedPeers) - } - for _, p := range peersWithAccess { - allSourcePeers[p.ID] = struct{}{} - } - } - } - } - - return isRoutingPeer, routes, allSourcePeers -} - -func (b *NetworkMapBuilder) createNetworkResourceRoutes( - resource *resourceTypes.NetworkResource, routerPeerID string, - router *routerTypes.NetworkRouter, resourcePolicies []*Policy, -) *route.Route { - if len(resourcePolicies) > 0 { - peer := b.cache.globalPeers[routerPeerID] - if peer != nil { - return resource.ToRoute(peer, router) - } - } - return nil -} - -func (b *NetworkMapBuilder) addNetworksRoutingPeers( - networkResourcesRoutes []*route.Route, peer *nbpeer.Peer, peersToConnect []*nbpeer.Peer, - expiredPeers []*nbpeer.Peer, isRouter bool, sourcePeers map[string]struct{}, -) []*nbpeer.Peer { - - networkRoutesPeers := make(map[string]struct{}, len(networkResourcesRoutes)) - for _, r := range networkResourcesRoutes { - networkRoutesPeers[r.PeerID] = struct{}{} - } - - delete(sourcePeers, peer.ID) - delete(networkRoutesPeers, peer.ID) - - for _, existingPeer := range peersToConnect { - delete(sourcePeers, existingPeer.ID) - delete(networkRoutesPeers, existingPeer.ID) - } - for _, expPeer := range expiredPeers { - delete(sourcePeers, expPeer.ID) - delete(networkRoutesPeers, expPeer.ID) - } - - missingPeers := make(map[string]struct{}, len(sourcePeers)+len(networkRoutesPeers)) - if isRouter { - for p := range sourcePeers { - missingPeers[p] = struct{}{} - } - } - for p := range networkRoutesPeers { - missingPeers[p] = struct{}{} - } - - for p := range missingPeers { - if missingPeer := b.cache.globalPeers[p]; missingPeer != nil { - peersToConnect = append(peersToConnect, missingPeer) - } - } - - return peersToConnect -} - -func (b *NetworkMapBuilder) buildPeerRoutesView(account *Account, peerID string) { - ctx := context.Background() - peer := account.GetPeer(peerID) - if peer == nil { - return - } - resourcePolicies := b.cache.resourcePolicies - - view := &PeerRoutesView{ - OwnRouteIDs: make([]route.ID, 0), - NetworkResourceIDs: make([]route.ID, 0), - RouteFirewallRuleIDs: make([]string, 0), - } - - enabledRoutes, disabledRoutes := b.getRoutingPeerRoutes(peerID) - for _, rt := range enabledRoutes { - if rt.PeerID != "" && rt.PeerID != peerID { - if b.cache.globalPeers[rt.PeerID] == nil { - continue - } - } - - view.OwnRouteIDs = append(view.OwnRouteIDs, rt.ID) - b.cache.globalRoutes[rt.ID] = rt - } - - aclView := b.cache.peerACLs[peerID] - if aclView != nil { - peerRoutesMembership := make(LookupMap) - for _, r := range append(enabledRoutes, disabledRoutes...) { - peerRoutesMembership[string(r.GetHAUniqueID())] = struct{}{} - } - - peerGroups := b.cache.peerToGroups[peerID] - peerGroupsMap := make(LookupMap) - for _, groupID := range peerGroups { - peerGroupsMap[groupID] = struct{}{} - } - - for _, aclPeerID := range aclView.ConnectedPeerIDs { - if aclPeerID == peerID { - continue - } - activeRoutes, _ := b.getRoutingPeerRoutes(aclPeerID) - groupFilteredRoutes := account.filterRoutesByGroups(activeRoutes, peerGroupsMap) - haFilteredRoutes := account.filterRoutesFromPeersOfSameHAGroup(groupFilteredRoutes, peerRoutesMembership) - - for _, inheritedRoute := range haFilteredRoutes { - view.InheritedRouteIDs = append(view.InheritedRouteIDs, inheritedRoute.ID) - b.cache.globalRoutes[inheritedRoute.ID] = inheritedRoute - } - } - } - - _, networkResourcesRoutes, _ := b.getNetworkResourcesForPeer(account, peer) - - for _, rt := range networkResourcesRoutes { - view.NetworkResourceIDs = append(view.NetworkResourceIDs, rt.ID) - b.cache.globalRoutes[rt.ID] = rt - } - - allRoutes := slices.Concat(enabledRoutes, networkResourcesRoutes) - b.updateACGIndexForPeer(peerID, allRoutes) - - routeFirewallRules := b.getPeerRoutesFirewallRules(account, peerID, b.validatedPeers) - for _, rule := range routeFirewallRules { - ruleID := b.generateRouteFirewallRuleID(rule) - view.RouteFirewallRuleIDs = append(view.RouteFirewallRuleIDs, ruleID) - b.cache.globalRouteRules[ruleID] = rule - } - - if len(networkResourcesRoutes) > 0 { - networkResourceFirewallRules := account.GetPeerNetworkResourceFirewallRules(ctx, peer, b.validatedPeers, networkResourcesRoutes, resourcePolicies) - for _, rule := range networkResourceFirewallRules { - ruleID := b.generateRouteFirewallRuleID(rule) - view.RouteFirewallRuleIDs = append(view.RouteFirewallRuleIDs, ruleID) - b.cache.globalRouteRules[ruleID] = rule - } - } - - b.cache.peerRoutes[peerID] = view -} - -func (b *NetworkMapBuilder) updateACGIndexForPeer(peerID string, routes []*route.Route) { - for acg, routeMap := range b.cache.acgToRoutes { - for routeID, info := range routeMap { - if info.PeerID == peerID { - delete(routeMap, routeID) - } - } - if len(routeMap) == 0 { - delete(b.cache.acgToRoutes, acg) - } - } - - for routeID, info := range b.cache.noACGRoutes { - if info.PeerID == peerID { - delete(b.cache.noACGRoutes, routeID) - } - } - - for _, rt := range routes { - if !rt.Enabled { - continue - } - - if len(rt.AccessControlGroups) == 0 { - b.cache.noACGRoutes[rt.ID] = &RouteOwnerInfo{ - PeerID: peerID, - RouteID: rt.ID, - } - } else { - for _, acg := range rt.AccessControlGroups { - if b.cache.acgToRoutes[acg] == nil { - b.cache.acgToRoutes[acg] = make(map[route.ID]*RouteOwnerInfo) - } - - b.cache.acgToRoutes[acg][rt.ID] = &RouteOwnerInfo{ - PeerID: peerID, - RouteID: rt.ID, - } - } - } - } -} - -func (b *NetworkMapBuilder) getRoutingPeerRoutes(peerID string) (enabledRoutes []*route.Route, disabledRoutes []*route.Route) { - peer := b.cache.globalPeers[peerID] - if peer == nil { - return enabledRoutes, disabledRoutes - } - - seenRoute := make(map[route.ID]struct{}) - - takeRoute := func(r *route.Route, id string) { - if _, ok := seenRoute[r.ID]; ok { - return - } - seenRoute[r.ID] = struct{}{} - - if r.Enabled { - // maybe here is some mess - here we store peer key (see comment below) - r.Peer = peer.Key - enabledRoutes = append(enabledRoutes, r) - return - } - disabledRoutes = append(disabledRoutes, r) - } - - peerGroups := b.cache.peerToGroups[peerID] - for _, groupID := range peerGroups { - groupRoutes := b.cache.groupToRoutes[groupID] - for _, r := range groupRoutes { - newPeerRoute := r.Copy() - // and here we store peer ID - this logic is taken from original account.getRoutingPeerRoutes - newPeerRoute.Peer = peerID - newPeerRoute.PeerGroups = nil - newPeerRoute.ID = route.ID(string(r.ID) + ":" + peerID) - takeRoute(newPeerRoute, peerID) - } - } - for _, r := range b.cache.peerToRoutes[peerID] { - takeRoute(r.Copy(), peerID) - } - return enabledRoutes, disabledRoutes -} - -func (b *NetworkMapBuilder) getPeerRoutesFirewallRules(account *Account, peerID string, validatedPeersMap map[string]struct{}) []*RouteFirewallRule { - routesFirewallRules := make([]*RouteFirewallRule, 0) - - enabledRoutes, _ := b.getRoutingPeerRoutes(peerID) - for _, route := range enabledRoutes { - if len(route.AccessControlGroups) == 0 { - defaultPermit := getDefaultPermit(route) - routesFirewallRules = append(routesFirewallRules, defaultPermit...) - continue - } - - distributionPeers := b.getDistributionGroupsPeers(route) - - for _, accessGroup := range route.AccessControlGroups { - policies := b.getAllRoutePoliciesFromGroups([]string{accessGroup}) - - rules := b.getRouteFirewallRules(peerID, policies, route, validatedPeersMap, distributionPeers, account) - routesFirewallRules = append(routesFirewallRules, rules...) - } - } - - return routesFirewallRules -} - -func (b *NetworkMapBuilder) getDistributionGroupsPeers(route *route.Route) map[string]struct{} { - distPeers := make(map[string]struct{}) - for _, id := range route.Groups { - groupPeers := b.cache.groupToPeers[id] - if groupPeers == nil { - continue - } - - for _, pID := range groupPeers { - distPeers[pID] = struct{}{} - } - } - return distPeers -} - -func (b *NetworkMapBuilder) getAllRoutePoliciesFromGroups(accessControlGroups []string) []*Policy { - routePolicies := make(map[string]*Policy) - - for _, groupID := range accessControlGroups { - candidatePolicies := b.cache.groupToPolicies[groupID] - - for _, policy := range candidatePolicies { - if _, found := routePolicies[policy.ID]; found { - continue - } - policyRules := b.cache.policyToRules[policy.ID] - for _, rule := range policyRules { - if slices.Contains(rule.Destinations, groupID) { - routePolicies[policy.ID] = policy - break - } - } - } - } - - return maps.Values(routePolicies) -} - -func (b *NetworkMapBuilder) getRouteFirewallRules( - peerID string, policies []*Policy, route *route.Route, validatedPeersMap map[string]struct{}, - distributionPeers map[string]struct{}, account *Account, -) []*RouteFirewallRule { - ctx := context.Background() - var fwRules []*RouteFirewallRule - for _, policy := range policies { - if !policy.Enabled { - continue - } - - for _, rule := range policy.Rules { - if !rule.Enabled { - continue - } - - rulePeers := b.getRulePeers(rule, policy.SourcePostureChecks, peerID, distributionPeers, validatedPeersMap, account) - - rules := generateRouteFirewallRules(ctx, route, rule, rulePeers, FirewallRuleDirectionIN) - fwRules = append(fwRules, rules...) - } - } - return fwRules -} - -func (b *NetworkMapBuilder) getRulePeers( - rule *PolicyRule, postureChecks []string, peerID string, distributionPeers map[string]struct{}, - validatedPeersMap map[string]struct{}, account *Account, -) []*nbpeer.Peer { - distPeersWithPolicy := make(map[string]struct{}) - - for _, id := range rule.Sources { - groupPeers := b.cache.groupToPeers[id] - if groupPeers == nil { - continue - } - - for _, pID := range groupPeers { - if pID == peerID { - continue - } - _, distPeer := distributionPeers[pID] - _, valid := validatedPeersMap[pID] - - if distPeer && valid && account.validatePostureChecksOnPeer(context.Background(), postureChecks, pID) { - distPeersWithPolicy[pID] = struct{}{} - } - } - } - - if rule.SourceResource.Type == ResourceTypePeer && rule.SourceResource.ID != "" { - _, distPeer := distributionPeers[rule.SourceResource.ID] - _, valid := validatedPeersMap[rule.SourceResource.ID] - if distPeer && valid && account.validatePostureChecksOnPeer(context.Background(), postureChecks, rule.SourceResource.ID) { - distPeersWithPolicy[rule.SourceResource.ID] = struct{}{} - } - } - - distributionGroupPeers := make([]*nbpeer.Peer, 0, len(distPeersWithPolicy)) - for pID := range distPeersWithPolicy { - peer := b.cache.globalPeers[pID] - if peer == nil { - continue - } - distributionGroupPeers = append(distributionGroupPeers, peer) - } - return distributionGroupPeers -} - -func (b *NetworkMapBuilder) buildPeerDNSView(account *Account, peerID string) { - peerGroups := b.cache.peerToGroups[peerID] - checkGroups := make(map[string]struct{}, len(peerGroups)) - for _, groupID := range peerGroups { - checkGroups[groupID] = struct{}{} - } - - dnsManagementStatus := b.getPeerDNSManagementStatus(account, checkGroups) - dnsConfig := &nbdns.Config{ - ServiceEnable: dnsManagementStatus, - } - - if dnsManagementStatus { - dnsConfig.NameServerGroups = b.getPeerNSGroups(account, peerID, checkGroups) - } - - b.cache.peerDNS[peerID] = dnsConfig -} - -func (b *NetworkMapBuilder) getPeerDNSManagementStatus(account *Account, checkGroups map[string]struct{}) bool { - - enabled := true - for _, groupID := range account.DNSSettings.DisabledManagementGroups { - _, found := checkGroups[groupID] - if found { - enabled = false - break - } - } - return enabled -} - -func (b *NetworkMapBuilder) getPeerNSGroups(account *Account, peerID string, checkGroups map[string]struct{}) []*nbdns.NameServerGroup { - var peerNSGroups []*nbdns.NameServerGroup - - for _, nsGroup := range account.NameServerGroups { - if !nsGroup.Enabled { - continue - } - for _, gID := range nsGroup.Groups { - _, found := checkGroups[gID] - if found { - peer := b.cache.globalPeers[peerID] - if !peerIsNameserver(peer, nsGroup) { - peerNSGroups = append(peerNSGroups, nsGroup.Copy()) - break - } - } - } - } - - return peerNSGroups -} - -func (b *NetworkMapBuilder) buildAllowedUserIDs(account *Account) map[string]struct{} { - users := make(map[string]struct{}) - for _, nbUser := range account.Users { - if !nbUser.IsBlocked() && !nbUser.IsServiceUser { - users[nbUser.Id] = struct{}{} - } - } - return users -} - -func firewallRuleProtocol(protocol PolicyRuleProtocolType) string { - if protocol == PolicyRuleProtocolNetbirdSSH { - return string(PolicyRuleProtocolTCP) - } - return string(protocol) -} - -// lock should be held -func (b *NetworkMapBuilder) updateAccountLocked(account *Account) *Account { - if account.Network.CurrentSerial() > b.account.Network.CurrentSerial() { - b.account = account - } - return b.account -} - -func (b *NetworkMapBuilder) GetPeerNetworkMap( - ctx context.Context, peerID string, peersCustomZone nbdns.CustomZone, accountZones []*zones.Zone, - validatedPeers map[string]struct{}, metrics *telemetry.AccountManagerMetrics, -) *NetworkMap { - start := time.Now() - - b.cache.mu.RLock() - defer b.cache.mu.RUnlock() - - account := b.account - - peer := account.GetPeer(peerID) - if peer == nil { - return &NetworkMap{Network: account.Network.Copy()} - } - - aclView := b.cache.peerACLs[peerID] - routesView := b.cache.peerRoutes[peerID] - dnsConfig := b.cache.peerDNS[peerID] - sshView := b.cache.peerSSH[peerID] - - if aclView == nil || routesView == nil || dnsConfig == nil { - return &NetworkMap{Network: account.Network.Copy()} - } - - nm := b.assembleNetworkMap(ctx, account, peer, aclView, routesView, dnsConfig, sshView, peersCustomZone, accountZones, validatedPeers) - - if metrics != nil { - objectCount := int64(len(nm.Peers) + len(nm.OfflinePeers) + len(nm.Routes) + len(nm.FirewallRules) + len(nm.RoutesFirewallRules)) - metrics.CountNetworkMapObjects(objectCount) - metrics.CountGetPeerNetworkMapDuration(time.Since(start)) - - if objectCount > 5000 { - log.WithContext(ctx).Tracef("account: %s has a total resource count of %d objects from cache", - account.Id, objectCount) - } - } - - return nm -} - -func (b *NetworkMapBuilder) assembleNetworkMap( - ctx context.Context, account *Account, peer *nbpeer.Peer, aclView *PeerACLView, routesView *PeerRoutesView, - dnsConfig *nbdns.Config, sshView *PeerSSHView, peersCustomZone nbdns.CustomZone, accountZones []*zones.Zone, validatedPeers map[string]struct{}, -) *NetworkMap { - - var peersToConnect []*nbpeer.Peer - var expiredPeers []*nbpeer.Peer - - for _, peerID := range aclView.ConnectedPeerIDs { - if _, ok := validatedPeers[peerID]; !ok { - continue - } - - peer := b.cache.globalPeers[peerID] - if peer == nil { - continue - } - - expired, _ := peer.LoginExpired(account.Settings.PeerLoginExpiration) - if account.Settings.PeerLoginExpirationEnabled && expired { - expiredPeers = append(expiredPeers, peer) - } else { - peersToConnect = append(peersToConnect, peer) - } - } - - var routes []*route.Route - allRouteIDs := slices.Concat(routesView.OwnRouteIDs, routesView.NetworkResourceIDs, routesView.InheritedRouteIDs) - - for _, routeID := range allRouteIDs { - if route := b.cache.globalRoutes[routeID]; route != nil { - routes = append(routes, route) - } - } - - var firewallRules []*FirewallRule - for _, ruleID := range aclView.FirewallRuleIDs { - if rule := b.cache.globalRules[ruleID]; rule != nil { - firewallRules = append(firewallRules, rule) - } else { - log.Debugf("NetworkMapBuilder: peer %s assembling network map has no fwrule %s in globalRules", peer.ID, ruleID) - } - } - - var routesFirewallRules []*RouteFirewallRule - for _, ruleID := range routesView.RouteFirewallRuleIDs { - if rule := b.cache.globalRouteRules[ruleID]; rule != nil { - routesFirewallRules = append(routesFirewallRules, rule) - } - } - - finalDNSConfig := *dnsConfig - if finalDNSConfig.ServiceEnable { - var zones []nbdns.CustomZone - - peerGroupsSlice := b.cache.peerToGroups[peer.ID] - peerGroups := make(LookupMap, len(peerGroupsSlice)) - for _, groupID := range peerGroupsSlice { - peerGroups[groupID] = struct{}{} - } - - if peersCustomZone.Domain != "" { - records := filterZoneRecordsForPeers(peer, peersCustomZone, peersToConnect, expiredPeers) - zones = append(zones, nbdns.CustomZone{ - Domain: peersCustomZone.Domain, - Records: records, - }) - } - - filteredAccountZones := filterPeerAppliedZones(ctx, accountZones, peerGroups) - zones = append(zones, filteredAccountZones...) - - finalDNSConfig.CustomZones = zones - } - - nm := &NetworkMap{ - Peers: peersToConnect, - Network: account.Network.Copy(), - Routes: routes, - DNSConfig: finalDNSConfig, - OfflinePeers: expiredPeers, - FirewallRules: firewallRules, - RoutesFirewallRules: routesFirewallRules, - } - - if sshView != nil { - nm.EnableSSH = sshView.EnableSSH - nm.AuthorizedUsers = sshView.AuthorizedUsers - } - - return nm -} - -func (b *NetworkMapBuilder) generateFirewallRuleID(rule *FirewallRule) string { - var s strings.Builder - s.WriteString(fw) - s.WriteString(rule.PolicyID) - s.WriteRune(':') - s.WriteString(rule.PeerIP) - s.WriteRune(':') - s.WriteString(strconv.Itoa(rule.Direction)) - s.WriteRune(':') - s.WriteString(rule.Protocol) - s.WriteRune(':') - s.WriteString(rule.Action) - s.WriteRune(':') - s.WriteString(rule.Port) - s.WriteRune(':') - s.WriteString(strconv.Itoa(int(rule.PortRange.Start))) - s.WriteRune('-') - s.WriteString(strconv.Itoa(int(rule.PortRange.End))) - return s.String() -} - -func (b *NetworkMapBuilder) generateRouteFirewallRuleID(rule *RouteFirewallRule) string { - var s strings.Builder - s.WriteString(rfw) - s.WriteString(string(rule.RouteID)) - s.WriteRune(':') - s.WriteString(rule.Destination) - s.WriteRune(':') - s.WriteString(rule.Action) - s.WriteRune(':') - s.WriteString(strings.Join(rule.SourceRanges, ",")) - s.WriteRune(':') - s.WriteString(rule.Protocol) - s.WriteRune(':') - s.WriteString(strconv.Itoa(int(rule.Port))) - return s.String() -} - -func (b *NetworkMapBuilder) isPeerInGroups(groupIDs []string, peerGroups []string) bool { - for _, groupID := range groupIDs { - if slices.Contains(peerGroups, groupID) { - return true - } - } - return false -} - -func (b *NetworkMapBuilder) isPeerRouter(account *Account, peerID string) bool { - for _, r := range account.Routes { - if !r.Enabled { - continue - } - - if r.PeerID == peerID { - return true - } - - if peer := b.cache.globalPeers[peerID]; peer != nil { - if r.Peer == peer.Key && r.PeerID == "" { - return true - } - } - } - - routers := account.GetResourceRoutersMap() - for _, networkRouters := range routers { - if router, exists := networkRouters[peerID]; exists && router.Enabled { - return true - } - } - - return false -} - -func (b *NetworkMapBuilder) incAddPeerLoop() { - for { - b.apb.mu.Lock() - if len(b.apb.ids) == 0 { - b.apb.sg.Wait() - } - b.addPeersIncrementally() - b.apb.mu.Unlock() - } -} - -// lock on b.apb level should be held -func (b *NetworkMapBuilder) addPeersIncrementally() { - peers := slices.Clone(b.apb.ids) - clear(b.apb.ids) - b.apb.ids = b.apb.ids[:0] - latestAcc := b.apb.la - b.apb.mu.Unlock() - - tt := time.Now() - b.cache.mu.Lock() - defer b.cache.mu.Unlock() - - account := b.updateAccountLocked(latestAcc) - - log.Debugf("NetworkMapBuilder: Starting incremental add of %d peers", len(peers)) - - allUpdates := make(map[string]*PeerUpdateDelta) - - for _, peerID := range peers { - peer := account.GetPeer(peerID) - if peer == nil { - b.apb.mu.Lock() - retries := b.apb.retryCount[peerID] - b.apb.mu.Unlock() - - if retries >= maxPeerAddRetries { - log.Errorf("NetworkMapBuilder: peer %s not found in account %s after %d retries, giving up", peerID, account.Id, retries) - b.apb.mu.Lock() - delete(b.apb.retryCount, peerID) - b.apb.mu.Unlock() - continue - } - - log.Warnf("NetworkMapBuilder: peer %s not found in account %s, retry %d/%d", peerID, account.Id, retries+1, maxPeerAddRetries) - b.apb.mu.Lock() - b.apb.retryCount[peerID] = retries + 1 - b.apb.mu.Unlock() - b.enqueuePeersForIncrementalAdd(latestAcc, peerID) - continue - } - - b.apb.mu.Lock() - delete(b.apb.retryCount, peerID) - b.apb.mu.Unlock() - - b.validatedPeers[peerID] = struct{}{} - b.cache.globalPeers[peerID] = peer - - peerGroups := b.updateIndexesForNewPeer(account, peerID) - b.buildPeerACLView(account, peerID) - b.buildPeerRoutesView(account, peerID) - b.buildPeerDNSView(account, peerID) - - peerDeltas := b.collectDeltasForNewPeer(account, peerID, peerGroups) - for affectedPeerID, delta := range peerDeltas { - if existing, ok := allUpdates[affectedPeerID]; ok { - existing.mergeFrom(delta) - continue - } - allUpdates[affectedPeerID] = delta - } - } - - for affectedPeerID, delta := range allUpdates { - b.applyDeltaToPeer(account, affectedPeerID, delta) - } - - log.Debugf("NetworkMapBuilder: Added %d peers to cache, affected %d peers, took %s", len(peers), len(allUpdates), time.Since(tt)) - - b.apb.mu.Lock() - if len(b.apb.ids) > 0 { - b.apb.sg.Signal() - } -} - -func (b *NetworkMapBuilder) enqueuePeersForIncrementalAdd(acc *Account, peerIDs ...string) { - b.apb.mu.Lock() - b.apb.ids = append(b.apb.ids, peerIDs...) - if b.apb.la != nil && acc.Network.CurrentSerial() > b.apb.la.Network.CurrentSerial() { - b.apb.la = acc - } - b.apb.sg.Signal() - b.apb.mu.Unlock() -} - -func (b *NetworkMapBuilder) EnqueuePeersForIncrementalAdd(acc *Account, peerIDs ...string) { - b.enqueuePeersForIncrementalAdd(acc, peerIDs...) -} - -type ViewDelta struct { - AddedPeerIDs []string - RemovedPeerIDs []string - AddedRuleIDs []string - RemovedRuleIDs []string -} - -func (b *NetworkMapBuilder) OnPeerAddedIncremental(acc *Account, peerID string) error { - tt := time.Now() - peer := acc.GetPeer(peerID) - if peer == nil { - return fmt.Errorf("NetworkMapBuilder: peer %s not found in account", peerID) - } - - b.cache.mu.Lock() - defer b.cache.mu.Unlock() - - account := b.updateAccountLocked(acc) - - log.Debugf("NetworkMapBuilder: Adding peer %s (IP: %s) to cache", peerID, peer.IP.String()) - - b.validatedPeers[peerID] = struct{}{} - - b.cache.globalPeers[peerID] = peer - - peerGroups := b.updateIndexesForNewPeer(account, peerID) - - b.buildPeerACLView(account, peerID) - b.buildPeerRoutesView(account, peerID) - b.buildPeerDNSView(account, peerID) - - log.Debugf("NetworkMapBuilder: Adding peer %s to cache, views took %s", peerID, time.Since(tt)) - - b.incrementalUpdateAffectedPeers(account, peerID, peerGroups) - - log.Debugf("NetworkMapBuilder: Added peer %s to cache, took %s", peerID, time.Since(tt)) - - return nil -} - -func (b *NetworkMapBuilder) updateIndexesForNewPeer(account *Account, peerID string) []string { - peerGroups := make([]string, 0) - - for groupID, group := range account.Groups { - if slices.Contains(group.Peers, peerID) { - if !slices.Contains(b.cache.groupToPeers[groupID], peerID) { - b.cache.groupToPeers[groupID] = append(b.cache.groupToPeers[groupID], peerID) - } - peerGroups = append(peerGroups, groupID) - } - } - - b.cache.peerToGroups[peerID] = peerGroups - - for _, r := range account.Routes { - if !r.Enabled || b.cache.globalRoutes[r.ID] != nil { - continue - } - for _, groupID := range r.PeerGroups { - if !slices.Contains(b.cache.groupToRoutes[groupID], r) { - b.cache.groupToRoutes[groupID] = append(b.cache.groupToRoutes[groupID], r) - } - } - if r.Peer != "" { - if peer, ok := b.cache.globalPeers[r.Peer]; ok { - if !slices.Contains(b.cache.peerToRoutes[peer.ID], r) { - b.cache.peerToRoutes[peer.ID] = append(b.cache.peerToRoutes[peer.ID], r) - } - } - } - b.cache.globalRoutes[r.ID] = r - } - - return peerGroups -} - -func (b *NetworkMapBuilder) incrementalUpdateAffectedPeers(account *Account, newPeerID string, peerGroups []string) { - updates := b.collectDeltasForNewPeer(account, newPeerID, peerGroups) - for affectedPeerID, delta := range updates { - b.applyDeltaToPeer(account, affectedPeerID, delta) - } -} - -func (b *NetworkMapBuilder) collectDeltasForNewPeer(account *Account, newPeerID string, peerGroups []string) map[string]*PeerUpdateDelta { - updates := b.calculateIncrementalUpdates(account, newPeerID, peerGroups) - - if b.isPeerRouter(account, newPeerID) { - affectedByRoutes := b.findPeersAffectedByNewRouter(account, newPeerID, peerGroups) - for affectedPeerID := range affectedByRoutes { - if affectedPeerID == newPeerID { - continue - } - if _, exists := updates[affectedPeerID]; !exists { - updates[affectedPeerID] = &PeerUpdateDelta{ - PeerID: affectedPeerID, - RebuildRoutesView: true, - } - } else { - updates[affectedPeerID].RebuildRoutesView = true - } - } - } - - return updates -} - -func (b *NetworkMapBuilder) findPeersAffectedByNewRouter(account *Account, newRouterID string, routerGroups []string) map[string]struct{} { - affected := make(map[string]struct{}) - enabledRoutes, _ := b.getRoutingPeerRoutes(newRouterID) - - for _, route := range enabledRoutes { - for _, distGroupID := range route.Groups { - if peers := b.cache.groupToPeers[distGroupID]; peers != nil { - for _, peerID := range peers { - if peerID != newRouterID { - affected[peerID] = struct{}{} - } - } - } - } - - for _, peerGroupID := range route.PeerGroups { - if peers := b.cache.groupToPeers[peerGroupID]; peers != nil { - for _, peerID := range peers { - if peerID != newRouterID { - affected[peerID] = struct{}{} - } - } - } - } - } - - for _, route := range account.Routes { - if !route.Enabled { - continue - } - - routerInPeerGroups := false - for _, peerGroupID := range route.PeerGroups { - if slices.Contains(routerGroups, peerGroupID) { - routerInPeerGroups = true - break - } - } - - if routerInPeerGroups { - for _, distGroupID := range route.Groups { - if peers := b.cache.groupToPeers[distGroupID]; peers != nil { - for _, peerID := range peers { - affected[peerID] = struct{}{} - } - } - } - } - } - - return affected -} - -func (b *NetworkMapBuilder) calculateIncrementalUpdates(account *Account, newPeerID string, peerGroups []string) map[string]*PeerUpdateDelta { - updates := make(map[string]*PeerUpdateDelta) - ctx := context.Background() - - groupAllLn := 0 - if allGroup, err := account.GetGroupAll(); err == nil { - groupAllLn = len(allGroup.Peers) - 1 - } - - newPeer := b.cache.globalPeers[newPeerID] - if newPeer == nil { - return updates - } - - for _, policy := range account.Policies { - if !policy.Enabled { - continue - } - - for _, rule := range policy.Rules { - if !rule.Enabled { - continue - } - var peerInSources, peerInDestinations bool - - if rule.SourceResource.Type == ResourceTypePeer && rule.SourceResource.ID == newPeerID { - peerInSources = true - } else { - peerInSources = b.isPeerInGroups(rule.Sources, peerGroups) - } - - if rule.DestinationResource.Type == ResourceTypePeer && rule.DestinationResource.ID == newPeerID { - peerInDestinations = true - } else { - peerInDestinations = b.isPeerInGroups(rule.Destinations, peerGroups) - } - - if peerInSources { - if len(rule.Destinations) > 0 { - b.addUpdateForPeersInGroups(updates, rule.Destinations, newPeerID, rule, FirewallRuleDirectionIN, groupAllLn) - } - if rule.DestinationResource.Type == ResourceTypePeer && rule.DestinationResource.ID != "" { - b.addUpdateForDirectPeerResource(updates, rule.DestinationResource.ID, newPeerID, rule, FirewallRuleDirectionIN) - } - } - - if peerInDestinations { - if len(rule.Sources) > 0 { - b.addUpdateForPeersInGroups(updates, rule.Sources, newPeerID, rule, FirewallRuleDirectionOUT, groupAllLn) - } - if rule.SourceResource.Type == ResourceTypePeer && rule.SourceResource.ID != "" { - b.addUpdateForDirectPeerResource(updates, rule.SourceResource.ID, newPeerID, rule, FirewallRuleDirectionOUT) - } - } - - if rule.Bidirectional { - if peerInSources { - if len(rule.Destinations) > 0 { - b.addUpdateForPeersInGroups(updates, rule.Destinations, newPeerID, rule, FirewallRuleDirectionOUT, groupAllLn) - } - if rule.DestinationResource.Type == ResourceTypePeer && rule.DestinationResource.ID != "" { - b.addUpdateForDirectPeerResource(updates, rule.DestinationResource.ID, newPeerID, rule, FirewallRuleDirectionOUT) - } - } - if peerInDestinations { - if len(rule.Sources) > 0 { - b.addUpdateForPeersInGroups(updates, rule.Sources, newPeerID, rule, FirewallRuleDirectionIN, groupAllLn) - } - if rule.SourceResource.Type == ResourceTypePeer && rule.SourceResource.ID != "" { - b.addUpdateForDirectPeerResource(updates, rule.SourceResource.ID, newPeerID, rule, FirewallRuleDirectionIN) - } - } - } - } - } - - b.calculateRouteFirewallUpdates(newPeerID, newPeer, peerGroups, updates) - - b.calculateNetworkResourceFirewallUpdates(ctx, account, newPeerID, newPeer, peerGroups, updates) - - b.calculateNewRouterNetworkResourceUpdates(ctx, account, newPeerID, updates) - - return updates -} - -func (b *NetworkMapBuilder) calculateNewRouterNetworkResourceUpdates( - ctx context.Context, account *Account, newPeerID string, - updates map[string]*PeerUpdateDelta, -) { - resourceRouters := b.cache.resourceRouters - - for networkID, routers := range resourceRouters { - router, isRouter := routers[newPeerID] - if !isRouter || !router.Enabled { - continue - } - - for _, resource := range b.cache.globalResources { - if resource.NetworkID != networkID { - continue - } - - policies := b.cache.resourcePolicies[resource.ID] - if len(policies) == 0 { - continue - } - - peersWithAccess := make(map[string]struct{}) - - for _, policy := range policies { - if !policy.Enabled { - continue - } - - sourceGroups := policy.SourceGroups() - for _, sourceGroup := range sourceGroups { - groupPeers := b.cache.groupToPeers[sourceGroup] - for _, peerID := range groupPeers { - if peerID == newPeerID { - continue - } - - if account.validatePostureChecksOnPeer(ctx, policy.SourcePostureChecks, peerID) { - peersWithAccess[peerID] = struct{}{} - } - } - } - } - - for peerID := range peersWithAccess { - delta := updates[peerID] - if delta == nil { - delta = &PeerUpdateDelta{ - PeerID: peerID, - } - updates[peerID] = delta - } - - if !slices.Contains(delta.AddConnectedPeers, newPeerID) { - delta.AddConnectedPeers = append(delta.AddConnectedPeers, newPeerID) - } - - delta.RebuildRoutesView = true - } - } - } -} - -func (b *NetworkMapBuilder) calculateRouteFirewallUpdates( - newPeerID string, newPeer *nbpeer.Peer, - peerGroups []string, updates map[string]*PeerUpdateDelta, -) { - processedPeerRoutes := make(map[string]map[route.ID]struct{}) - - for routeID, info := range b.cache.noACGRoutes { - if info.PeerID == newPeerID { - continue - } - - b.addRouteFirewallUpdate(updates, info.PeerID, string(routeID), newPeer.IP.String()) - - if processedPeerRoutes[info.PeerID] == nil { - processedPeerRoutes[info.PeerID] = make(map[route.ID]struct{}) - } - processedPeerRoutes[info.PeerID][routeID] = struct{}{} - } - - for _, acg := range peerGroups { - routeInfos := b.cache.acgToRoutes[acg] - if routeInfos == nil { - continue - } - - for routeID, info := range routeInfos { - if info.PeerID == newPeerID { - continue - } - - if processedRoutes, exists := processedPeerRoutes[info.PeerID]; exists { - if _, processed := processedRoutes[routeID]; processed { - continue - } - } - - b.addRouteFirewallUpdate(updates, info.PeerID, string(routeID), newPeer.IP.String()) - - if processedPeerRoutes[info.PeerID] == nil { - processedPeerRoutes[info.PeerID] = make(map[route.ID]struct{}) - } - processedPeerRoutes[info.PeerID][routeID] = struct{}{} - } - } -} - -func (b *NetworkMapBuilder) addRouteFirewallUpdate( - updates map[string]*PeerUpdateDelta, peerID string, - routeID string, sourceIP string, -) { - delta := updates[peerID] - if delta == nil { - delta = &PeerUpdateDelta{ - PeerID: peerID, - UpdateRouteFirewallRules: make([]*RouteFirewallRuleUpdate, 0), - } - updates[peerID] = delta - } - - for _, existing := range delta.UpdateRouteFirewallRules { - if existing.RuleID == routeID && existing.AddSourceIP == sourceIP { - return - } - } - - delta.UpdateRouteFirewallRules = append(delta.UpdateRouteFirewallRules, &RouteFirewallRuleUpdate{ - RuleID: routeID, - AddSourceIP: sourceIP, - }) -} - -func (b *NetworkMapBuilder) calculateNetworkResourceFirewallUpdates( - ctx context.Context, account *Account, newPeerID string, - newPeer *nbpeer.Peer, peerGroups []string, updates map[string]*PeerUpdateDelta, -) { - for _, resource := range b.cache.globalResources { - resourcePolicies := b.cache.resourcePolicies - resourceRouters := b.cache.resourceRouters - - policies := resourcePolicies[resource.ID] - peerHasAccess := false - - for _, policy := range policies { - if !policy.Enabled { - continue - } - - sourceGroups := policy.SourceGroups() - for _, sourceGroup := range sourceGroups { - if slices.Contains(peerGroups, sourceGroup) { - if account.validatePostureChecksOnPeer(ctx, policy.SourcePostureChecks, newPeerID) { - peerHasAccess = true - break - } - } - } - - if peerHasAccess { - break - } - } - - if !peerHasAccess { - continue - } - - networkRouters := resourceRouters[resource.NetworkID] - for routerPeerID, router := range networkRouters { - if !router.Enabled || routerPeerID == newPeerID { - continue - } - - delta := updates[routerPeerID] - if delta == nil { - delta = &PeerUpdateDelta{ - PeerID: routerPeerID, - } - updates[routerPeerID] = delta - } - - if !slices.Contains(delta.AddConnectedPeers, newPeerID) { - delta.AddConnectedPeers = append(delta.AddConnectedPeers, newPeerID) - } - - delta.RebuildRoutesView = true - } - } -} - -type PeerUpdateDelta struct { - PeerID string - AddConnectedPeers []string - AddFirewallRules []*FirewallRuleDelta - AddRoutes []route.ID - UpdateRouteFirewallRules []*RouteFirewallRuleUpdate - UpdateDNS bool - RebuildRoutesView bool -} - -func (d *PeerUpdateDelta) mergeFrom(other *PeerUpdateDelta) { - for _, peerID := range other.AddConnectedPeers { - if !slices.Contains(d.AddConnectedPeers, peerID) { - d.AddConnectedPeers = append(d.AddConnectedPeers, peerID) - } - } - - existingRuleIDs := make(map[string]struct{}, len(d.AddFirewallRules)) - for _, rule := range d.AddFirewallRules { - existingRuleIDs[rule.RuleID] = struct{}{} - } - for _, rule := range other.AddFirewallRules { - if _, exists := existingRuleIDs[rule.RuleID]; !exists { - d.AddFirewallRules = append(d.AddFirewallRules, rule) - existingRuleIDs[rule.RuleID] = struct{}{} - } - } - - for _, routeID := range other.AddRoutes { - if !slices.Contains(d.AddRoutes, routeID) { - d.AddRoutes = append(d.AddRoutes, routeID) - } - } - - existingRouteUpdates := make(map[string]map[string]struct{}) - for _, update := range d.UpdateRouteFirewallRules { - if existingRouteUpdates[update.RuleID] == nil { - existingRouteUpdates[update.RuleID] = make(map[string]struct{}) - } - existingRouteUpdates[update.RuleID][update.AddSourceIP] = struct{}{} - } - for _, update := range other.UpdateRouteFirewallRules { - if existingRouteUpdates[update.RuleID] == nil { - existingRouteUpdates[update.RuleID] = make(map[string]struct{}) - } - if _, exists := existingRouteUpdates[update.RuleID][update.AddSourceIP]; !exists { - d.UpdateRouteFirewallRules = append(d.UpdateRouteFirewallRules, update) - existingRouteUpdates[update.RuleID][update.AddSourceIP] = struct{}{} - } - } - - if other.UpdateDNS { - d.UpdateDNS = true - } - if other.RebuildRoutesView { - d.RebuildRoutesView = true - } -} - -type FirewallRuleDelta struct { - Rule *FirewallRule - RuleID string - Direction int -} - -type RouteFirewallRuleUpdate struct { - RuleID string - AddSourceIP string -} - -func (b *NetworkMapBuilder) addUpdateForPeersInGroups( - updates map[string]*PeerUpdateDelta, groupIDs []string, newPeerID string, - rule *PolicyRule, direction int, allGroupLn int, -) { - for _, groupID := range groupIDs { - peers := b.cache.groupToPeers[groupID] - cnt := 0 - for _, peerID := range peers { - if peerID == newPeerID { - continue - } - if _, ok := b.validatedPeers[peerID]; !ok { - continue - } - cnt++ - } - all := false - if allGroupLn > 0 && cnt == allGroupLn { - all = true - } - newPeer := b.cache.globalPeers[newPeerID] - fr := &FirewallRule{ - PolicyID: rule.ID, - PeerIP: newPeer.IP.String(), - Direction: direction, - Action: string(rule.Action), - Protocol: firewallRuleProtocol(rule.Protocol), - } - for _, peerID := range peers { - if peerID == newPeerID { - continue - } - if _, ok := b.validatedPeers[peerID]; !ok { - continue - } - targetPeer := b.cache.globalPeers[peerID] - if targetPeer == nil { - continue - } - - peerIPForRule := fr.PeerIP - if all { - peerIPForRule = allPeers - } - - b.addOrUpdateFirewallRuleInDelta(updates, peerID, newPeerID, rule, direction, fr, peerIPForRule, targetPeer) - } - } -} - -func (b *NetworkMapBuilder) addUpdateForDirectPeerResource( - updates map[string]*PeerUpdateDelta, targetPeerID string, newPeerID string, - rule *PolicyRule, direction int, -) { - if targetPeerID == newPeerID { - return - } - - if _, ok := b.validatedPeers[targetPeerID]; !ok { - return - } - - newPeer := b.cache.globalPeers[newPeerID] - if newPeer == nil { - return - } - - targetPeer := b.cache.globalPeers[targetPeerID] - if targetPeer == nil { - return - } - - fr := &FirewallRule{ - PolicyID: rule.ID, - PeerIP: newPeer.IP.String(), - Direction: direction, - Action: string(rule.Action), - Protocol: firewallRuleProtocol(rule.Protocol), - } - - b.addOrUpdateFirewallRuleInDelta(updates, targetPeerID, newPeerID, rule, direction, fr, fr.PeerIP, targetPeer) -} - -func (b *NetworkMapBuilder) addOrUpdateFirewallRuleInDelta( - updates map[string]*PeerUpdateDelta, targetPeerID string, newPeerID string, - rule *PolicyRule, direction int, baseRule *FirewallRule, peerIP string, targetPeer *nbpeer.Peer, -) { - delta := updates[targetPeerID] - if delta == nil { - delta = &PeerUpdateDelta{ - PeerID: targetPeerID, - AddConnectedPeers: []string{newPeerID}, - AddFirewallRules: make([]*FirewallRuleDelta, 0), - } - updates[targetPeerID] = delta - } else if !slices.Contains(delta.AddConnectedPeers, newPeerID) { - delta.AddConnectedPeers = append(delta.AddConnectedPeers, newPeerID) - } - - baseRule.PeerIP = peerIP - - if len(rule.Ports) > 0 || len(rule.PortRanges) > 0 { - expandedRules := expandPortsAndRanges(*baseRule, rule, targetPeer) - for _, expandedRule := range expandedRules { - ruleID := b.generateFirewallRuleID(expandedRule) - delta.AddFirewallRules = append(delta.AddFirewallRules, &FirewallRuleDelta{ - Rule: expandedRule, - RuleID: ruleID, - Direction: direction, - }) - } - } else { - ruleID := b.generateFirewallRuleID(baseRule) - delta.AddFirewallRules = append(delta.AddFirewallRules, &FirewallRuleDelta{ - Rule: baseRule, - RuleID: ruleID, - Direction: direction, - }) - } -} - -func (b *NetworkMapBuilder) applyDeltaToPeer(account *Account, peerID string, delta *PeerUpdateDelta) { - if len(delta.AddConnectedPeers) > 0 || len(delta.AddFirewallRules) > 0 { - if aclView := b.cache.peerACLs[peerID]; aclView != nil { - for _, connectedPeerID := range delta.AddConnectedPeers { - if !slices.Contains(aclView.ConnectedPeerIDs, connectedPeerID) { - aclView.ConnectedPeerIDs = append(aclView.ConnectedPeerIDs, connectedPeerID) - } - } - - for _, ruleDelta := range delta.AddFirewallRules { - b.cache.globalRules[ruleDelta.RuleID] = ruleDelta.Rule - - if !slices.Contains(aclView.FirewallRuleIDs, ruleDelta.RuleID) { - aclView.FirewallRuleIDs = append(aclView.FirewallRuleIDs, ruleDelta.RuleID) - } - } - } - } - - if delta.RebuildRoutesView { - b.buildPeerRoutesView(account, peerID) - } else if len(delta.UpdateRouteFirewallRules) > 0 { - if routesView := b.cache.peerRoutes[peerID]; routesView != nil { - b.updateRouteFirewallRules(routesView, delta.UpdateRouteFirewallRules) - } - } - - if delta.UpdateDNS { - b.buildPeerDNSView(account, peerID) - } -} - -func (b *NetworkMapBuilder) updateRouteFirewallRules(routesView *PeerRoutesView, updates []*RouteFirewallRuleUpdate) { - for _, update := range updates { - for _, ruleID := range routesView.RouteFirewallRuleIDs { - rule := b.cache.globalRouteRules[ruleID] - if rule == nil { - continue - } - - if string(rule.RouteID) == update.RuleID { - if hasWildcard := slices.Contains(rule.SourceRanges, allWildcard) || slices.Contains(rule.SourceRanges, v6AllWildcard); hasWildcard { - break - } - - sourceIP := update.AddSourceIP - - if strings.Contains(sourceIP, ":") { - sourceIP += "/128" // IPv6 - } else { - sourceIP += "/32" // IPv4 - } - - if !slices.Contains(rule.SourceRanges, sourceIP) { - rule.SourceRanges = append(rule.SourceRanges, sourceIP) - } - break - } - } - } -} - -func (b *NetworkMapBuilder) OnPeerDeleted(acc *Account, peerID string) error { - b.cache.mu.Lock() - defer b.cache.mu.Unlock() - - account := b.updateAccountLocked(acc) - - deletedPeer := b.cache.globalPeers[peerID] - if deletedPeer == nil { - return fmt.Errorf("peer %s not found in cache", peerID) - } - - deletedPeerKey := deletedPeer.Key - peerGroups := b.cache.peerToGroups[peerID] - peerIP := deletedPeer.IP.String() - - log.Debugf("NetworkMapBuilder: Deleting peer %s (IP: %s) from cache", peerID, peerIP) - - delete(b.validatedPeers, peerID) - - routesToDelete := []route.ID{} - - for routeID, r := range account.Routes { - if r.Peer != deletedPeerKey && r.PeerID != peerID { - continue - } - if len(r.PeerGroups) == 0 { - routesToDelete = append(routesToDelete, routeID) - continue - } - newPeerAssigned := false - for _, groupID := range r.PeerGroups { - candidatePeerIDs := b.cache.groupToPeers[groupID] - for _, candidatePeerID := range candidatePeerIDs { - if candidatePeerID == peerID { - continue - } - if candidatePeer := b.cache.globalPeers[candidatePeerID]; candidatePeer != nil { - r.Peer = candidatePeer.Key - r.PeerID = candidatePeerID - newPeerAssigned = true - break - } - } - if newPeerAssigned { - break - } - } - - if !newPeerAssigned { - routesToDelete = append(routesToDelete, routeID) - } - } - - for _, routeID := range routesToDelete { - delete(account.Routes, routeID) - } - - delete(b.cache.peerACLs, peerID) - delete(b.cache.peerRoutes, peerID) - delete(b.cache.peerDNS, peerID) - delete(b.cache.peerSSH, peerID) - - delete(b.cache.globalPeers, peerID) - - for acg, routeMap := range b.cache.acgToRoutes { - for routeID, info := range routeMap { - if info.PeerID == peerID { - delete(routeMap, routeID) - } - } - if len(routeMap) == 0 { - delete(b.cache.acgToRoutes, acg) - } - } - - for _, groupID := range peerGroups { - if peers := b.cache.groupToPeers[groupID]; peers != nil { - b.cache.groupToPeers[groupID] = slices.DeleteFunc(peers, func(id string) bool { - return id == peerID - }) - } - } - delete(b.cache.peerToGroups, peerID) - - affectedPeers := make(map[string]struct{}) - - for _, r := range account.Routes { - for _, groupID := range r.Groups { - if peers := b.cache.groupToPeers[groupID]; peers != nil { - for _, p := range peers { - affectedPeers[p] = struct{}{} - } - } - } - - for _, groupID := range r.PeerGroups { - if peers := b.cache.groupToPeers[groupID]; peers != nil { - for _, p := range peers { - affectedPeers[p] = struct{}{} - } - } - } - } - - for affectedPeerID := range affectedPeers { - if affectedPeerID == peerID { - continue - } - b.buildPeerRoutesView(account, affectedPeerID) - } - - peersToRebuildACL := make(map[string]struct{}) - peerDeletionUpdates := b.findPeersAffectedByDeletedPeerACL(peerID, peerIP, peerGroups, peersToRebuildACL) - for affectedPeerID, updates := range peerDeletionUpdates { - b.applyDeletionUpdates(affectedPeerID, updates) - } - - for affectedPeerID := range peersToRebuildACL { - b.buildPeerACLView(account, affectedPeerID) - } - - b.cleanupUnusedRules() - - log.Debugf("NetworkMapBuilder: Deleted peer %s, affected %d other peers", peerID, len(affectedPeers)) - - return nil -} - -func (b *NetworkMapBuilder) findPeersAffectedByDeletedPeerACL( - deletedPeerID string, - peerIP string, - peerGroups []string, - peersToRebuildACL map[string]struct{}, -) map[string]*PeerDeletionUpdate { - - affected := make(map[string]*PeerDeletionUpdate) - - for peerID, aclView := range b.cache.peerACLs { - if peerID == deletedPeerID { - continue - } - - if slices.Contains(aclView.ConnectedPeerIDs, deletedPeerID) { - peersToRebuildACL[peerID] = struct{}{} - if affected[peerID] == nil { - affected[peerID] = &PeerDeletionUpdate{ - RemovePeerID: deletedPeerID, - PeerIP: peerIP, - } - } - } - } - - affectedRouteOwners := make(map[string]struct{}) - - for _, groupID := range peerGroups { - if routeMap, ok := b.cache.acgToRoutes[groupID]; ok { - for _, info := range routeMap { - if info.PeerID != deletedPeerID { - affectedRouteOwners[info.PeerID] = struct{}{} - } - } - } - } - - for _, info := range b.cache.noACGRoutes { - if info.PeerID != deletedPeerID { - affectedRouteOwners[info.PeerID] = struct{}{} - } - } - - for ownerPeerID := range affectedRouteOwners { - if affected[ownerPeerID] == nil { - affected[ownerPeerID] = &PeerDeletionUpdate{ - RemovePeerID: deletedPeerID, - PeerIP: peerIP, - RemoveFromSourceRanges: true, - } - } else { - affected[ownerPeerID].RemoveFromSourceRanges = true - } - } - - return affected -} - -type PeerDeletionUpdate struct { - RemovePeerID string - RemoveFirewallRuleIDs []string - RemoveRouteIDs []route.ID - RemoveFromSourceRanges bool - PeerIP string -} - -func (b *NetworkMapBuilder) applyDeletionUpdates(peerID string, updates *PeerDeletionUpdate) { - if routesView := b.cache.peerRoutes[peerID]; routesView != nil { - if len(updates.RemoveRouteIDs) > 0 { - routesView.NetworkResourceIDs = slices.DeleteFunc(routesView.NetworkResourceIDs, func(routeID route.ID) bool { - return slices.Contains(updates.RemoveRouteIDs, routeID) - }) - } - - if updates.RemoveFromSourceRanges { - b.removeIPFromRouteFirewallRules(routesView, updates.PeerIP) - } - } -} - -func (b *NetworkMapBuilder) removeIPFromRouteFirewallRules(routesView *PeerRoutesView, peerIP string) { - sourceIPv4 := peerIP + "/32" - sourceIPv6 := peerIP + "/128" - - rulesToRemove := []string{} - - for _, ruleID := range routesView.RouteFirewallRuleIDs { - if rule := b.cache.globalRouteRules[ruleID]; rule != nil { - rule.SourceRanges = slices.DeleteFunc(rule.SourceRanges, func(source string) bool { - return source == sourceIPv4 || source == sourceIPv6 || source == peerIP - }) - - if len(rule.SourceRanges) == 0 { - rulesToRemove = append(rulesToRemove, ruleID) - } - } - } - - if len(rulesToRemove) > 0 { - routesView.RouteFirewallRuleIDs = slices.DeleteFunc(routesView.RouteFirewallRuleIDs, func(ruleID string) bool { - return slices.Contains(rulesToRemove, ruleID) - }) - } -} - -func (b *NetworkMapBuilder) cleanupUnusedRules() { - usedFirewallRules := make(map[string]struct{}) - usedRouteRules := make(map[string]struct{}) - usedRoutes := make(map[route.ID]struct{}) - - for _, aclView := range b.cache.peerACLs { - for _, ruleID := range aclView.FirewallRuleIDs { - usedFirewallRules[ruleID] = struct{}{} - } - } - - for _, routesView := range b.cache.peerRoutes { - for _, ruleID := range routesView.RouteFirewallRuleIDs { - usedRouteRules[ruleID] = struct{}{} - } - - for _, routeID := range routesView.OwnRouteIDs { - usedRoutes[routeID] = struct{}{} - } - for _, routeID := range routesView.NetworkResourceIDs { - usedRoutes[routeID] = struct{}{} - } - } - - for ruleID := range b.cache.globalRules { - if _, used := usedFirewallRules[ruleID]; !used { - delete(b.cache.globalRules, ruleID) - } - } - - for ruleID := range b.cache.globalRouteRules { - if _, used := usedRouteRules[ruleID]; !used { - delete(b.cache.globalRouteRules, ruleID) - } - } - - for routeID := range b.cache.globalRoutes { - if _, used := usedRoutes[routeID]; !used { - delete(b.cache.globalRoutes, routeID) - } - } -} - -func (b *NetworkMapBuilder) UpdatePeer(peer *nbpeer.Peer) { - b.cache.mu.Lock() - defer b.cache.mu.Unlock() - peerStored, ok := b.cache.globalPeers[peer.ID] - if !ok { - return - } - *peerStored = *peer -}