diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 5ada1033d..826c05ff3 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -9,7 +9,7 @@ on: pull_request: env: - SIGN_PIPE_VER: "v0.1.2" + SIGN_PIPE_VER: "v0.1.4" GORELEASER_VER: "v2.14.3" PRODUCT_NAME: "NetBird" COPYRIGHT: "NetBird GmbH" diff --git a/Makefile b/Makefile index 43379e115..5d52b94fa 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ GOLANGCI_LINT := $(shell pwd)/bin/golangci-lint $(GOLANGCI_LINT): @echo "Installing golangci-lint..." @mkdir -p ./bin - @GOBIN=$(shell pwd)/bin go install github.com/golangci/golangci-lint/cmd/golangci-lint@latest + @GOBIN=$(shell pwd)/bin go install github.com/golangci/golangci-lint/v2/cmd/golangci-lint@latest # Lint only changed files (fast, for pre-push) lint: $(GOLANGCI_LINT) diff --git a/client/android/client.go b/client/android/client.go index d35bf4279..37e17a363 100644 --- a/client/android/client.go +++ b/client/android/client.go @@ -8,6 +8,7 @@ import ( "os" "slices" "sync" + "time" "golang.org/x/exp/maps" @@ -15,6 +16,7 @@ import ( "github.com/netbirdio/netbird/client/iface/device" "github.com/netbirdio/netbird/client/internal" + "github.com/netbirdio/netbird/client/internal/debug" "github.com/netbirdio/netbird/client/internal/dns" "github.com/netbirdio/netbird/client/internal/listener" "github.com/netbirdio/netbird/client/internal/peer" @@ -26,6 +28,7 @@ import ( "github.com/netbirdio/netbird/formatter" "github.com/netbirdio/netbird/route" "github.com/netbirdio/netbird/shared/management/domain" + types "github.com/netbirdio/netbird/upload-server/types" ) // ConnectionListener export internal Listener for mobile @@ -68,7 +71,30 @@ type Client struct { uiVersion string networkChangeListener listener.NetworkChangeListener + stateMu sync.RWMutex connectClient *internal.ConnectClient + config *profilemanager.Config + cacheDir string +} + +func (c *Client) setState(cfg *profilemanager.Config, cacheDir string, cc *internal.ConnectClient) { + c.stateMu.Lock() + defer c.stateMu.Unlock() + c.config = cfg + c.cacheDir = cacheDir + c.connectClient = cc +} + +func (c *Client) stateSnapshot() (*profilemanager.Config, string, *internal.ConnectClient) { + c.stateMu.RLock() + defer c.stateMu.RUnlock() + return c.config, c.cacheDir, c.connectClient +} + +func (c *Client) getConnectClient() *internal.ConnectClient { + c.stateMu.RLock() + defer c.stateMu.RUnlock() + return c.connectClient } // NewClient instantiate a new Client @@ -93,6 +119,7 @@ func (c *Client) Run(platformFiles PlatformFiles, urlOpener URLOpener, isAndroid cfgFile := platformFiles.ConfigurationFilePath() stateFile := platformFiles.StateFilePath() + cacheDir := platformFiles.CacheDir() log.Infof("Starting client with config: %s, state: %s", cfgFile, stateFile) @@ -124,8 +151,9 @@ func (c *Client) Run(platformFiles PlatformFiles, urlOpener URLOpener, isAndroid // todo do not throw error in case of cancelled context ctx = internal.CtxInitState(ctx) - c.connectClient = internal.NewConnectClient(ctx, cfg, c.recorder) - return c.connectClient.RunOnAndroid(c.tunAdapter, c.iFaceDiscover, c.networkChangeListener, slices.Clone(dns.items), dnsReadyListener, stateFile) + connectClient := internal.NewConnectClient(ctx, cfg, c.recorder) + c.setState(cfg, cacheDir, connectClient) + return connectClient.RunOnAndroid(c.tunAdapter, c.iFaceDiscover, c.networkChangeListener, slices.Clone(dns.items), dnsReadyListener, stateFile, cacheDir) } // RunWithoutLogin we apply this type of run function when the backed has been started without UI (i.e. after reboot). @@ -135,6 +163,7 @@ func (c *Client) RunWithoutLogin(platformFiles PlatformFiles, dns *DNSList, dnsR cfgFile := platformFiles.ConfigurationFilePath() stateFile := platformFiles.StateFilePath() + cacheDir := platformFiles.CacheDir() log.Infof("Starting client without login with config: %s, state: %s", cfgFile, stateFile) @@ -157,8 +186,9 @@ func (c *Client) RunWithoutLogin(platformFiles PlatformFiles, dns *DNSList, dnsR // todo do not throw error in case of cancelled context ctx = internal.CtxInitState(ctx) - c.connectClient = internal.NewConnectClient(ctx, cfg, c.recorder) - return c.connectClient.RunOnAndroid(c.tunAdapter, c.iFaceDiscover, c.networkChangeListener, slices.Clone(dns.items), dnsReadyListener, stateFile) + connectClient := internal.NewConnectClient(ctx, cfg, c.recorder) + c.setState(cfg, cacheDir, connectClient) + return connectClient.RunOnAndroid(c.tunAdapter, c.iFaceDiscover, c.networkChangeListener, slices.Clone(dns.items), dnsReadyListener, stateFile, cacheDir) } // Stop the internal client and free the resources @@ -173,11 +203,12 @@ func (c *Client) Stop() { } func (c *Client) RenewTun(fd int) error { - if c.connectClient == nil { + cc := c.getConnectClient() + if cc == nil { return fmt.Errorf("engine not running") } - e := c.connectClient.Engine() + e := cc.Engine() if e == nil { return fmt.Errorf("engine not initialized") } @@ -185,6 +216,73 @@ func (c *Client) RenewTun(fd int) error { return e.RenewTun(fd) } +// DebugBundle generates a debug bundle, uploads it, and returns the upload key. +// It works both with and without a running engine. +func (c *Client) DebugBundle(platformFiles PlatformFiles, anonymize bool) (string, error) { + cfg, cacheDir, cc := c.stateSnapshot() + + // If the engine hasn't been started, load config from disk + if cfg == nil { + var err error + cfg, err = profilemanager.UpdateOrCreateConfig(profilemanager.ConfigInput{ + ConfigPath: platformFiles.ConfigurationFilePath(), + }) + if err != nil { + return "", fmt.Errorf("load config: %w", err) + } + cacheDir = platformFiles.CacheDir() + } + + deps := debug.GeneratorDependencies{ + InternalConfig: cfg, + StatusRecorder: c.recorder, + TempDir: cacheDir, + } + + if cc != nil { + resp, err := cc.GetLatestSyncResponse() + if err != nil { + log.Warnf("get latest sync response: %v", err) + } + deps.SyncResponse = resp + + if e := cc.Engine(); e != nil { + if cm := e.GetClientMetrics(); cm != nil { + deps.ClientMetrics = cm + } + } + } + + bundleGenerator := debug.NewBundleGenerator( + deps, + debug.BundleConfig{ + Anonymize: anonymize, + IncludeSystemInfo: true, + }, + ) + + path, err := bundleGenerator.Generate() + if err != nil { + return "", fmt.Errorf("generate debug bundle: %w", err) + } + defer func() { + if err := os.Remove(path); err != nil { + log.Errorf("failed to remove debug bundle file: %v", err) + } + }() + + uploadCtx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) + defer cancel() + + key, err := debug.UploadDebugBundle(uploadCtx, types.DefaultBundleURL, cfg.ManagementURL.String(), path) + if err != nil { + return "", fmt.Errorf("upload debug bundle: %w", err) + } + + log.Infof("debug bundle uploaded with key %s", key) + return key, nil +} + // SetTraceLogLevel configure the logger to trace level func (c *Client) SetTraceLogLevel() { log.SetLevel(log.TraceLevel) @@ -214,12 +312,13 @@ func (c *Client) PeersList() *PeerInfoArray { } func (c *Client) Networks() *NetworkArray { - if c.connectClient == nil { + cc := c.getConnectClient() + if cc == nil { log.Error("not connected") return nil } - engine := c.connectClient.Engine() + engine := cc.Engine() if engine == nil { log.Error("could not get engine") return nil @@ -300,7 +399,7 @@ func (c *Client) toggleRoute(command routeCommand) error { } func (c *Client) getRouteManager() (routemanager.Manager, error) { - client := c.connectClient + client := c.getConnectClient() if client == nil { return nil, fmt.Errorf("not connected") } diff --git a/client/android/platform_files.go b/client/android/platform_files.go index f0c369750..3be40c0bd 100644 --- a/client/android/platform_files.go +++ b/client/android/platform_files.go @@ -7,4 +7,5 @@ package android type PlatformFiles interface { ConfigurationFilePath() string StateFilePath() string + CacheDir() string } diff --git a/client/firewall/firewalld/firewalld.go b/client/firewall/firewalld/firewalld.go new file mode 100644 index 000000000..188ea61dd --- /dev/null +++ b/client/firewall/firewalld/firewalld.go @@ -0,0 +1,11 @@ +// Package firewalld integrates with the firewalld daemon so NetBird can place +// its wg interface into firewalld's "trusted" zone. This is required because +// firewalld's nftables chains are created with NFT_CHAIN_OWNER on recent +// versions, which returns EPERM to any other process that tries to insert +// rules into them. The workaround mirrors what Tailscale does: let firewalld +// itself add the accept rules to its own chains by trusting the interface. +package firewalld + +// TrustedZone is the firewalld zone name used for interfaces whose traffic +// should bypass firewalld filtering. +const TrustedZone = "trusted" diff --git a/client/firewall/firewalld/firewalld_linux.go b/client/firewall/firewalld/firewalld_linux.go new file mode 100644 index 000000000..924a04b0a --- /dev/null +++ b/client/firewall/firewalld/firewalld_linux.go @@ -0,0 +1,260 @@ +//go:build linux + +package firewalld + +import ( + "context" + "errors" + "fmt" + "os/exec" + "strings" + "sync" + "time" + + "github.com/godbus/dbus/v5" + log "github.com/sirupsen/logrus" +) + +const ( + dbusDest = "org.fedoraproject.FirewallD1" + dbusPath = "/org/fedoraproject/FirewallD1" + dbusRootIface = "org.fedoraproject.FirewallD1" + dbusZoneIface = "org.fedoraproject.FirewallD1.zone" + + errZoneAlreadySet = "ZONE_ALREADY_SET" + errAlreadyEnabled = "ALREADY_ENABLED" + errUnknownIface = "UNKNOWN_INTERFACE" + errNotEnabled = "NOT_ENABLED" + + // callTimeout bounds each individual DBus or firewall-cmd invocation. + // A fresh context is created for each call so a slow DBus probe can't + // exhaust the deadline before the firewall-cmd fallback gets to run. + callTimeout = 3 * time.Second +) + +var ( + errDBusUnavailable = errors.New("firewalld dbus unavailable") + + // trustLogOnce ensures the "added to trusted zone" message is logged at + // Info level only for the first successful add per process; repeat adds + // from other init paths are quieter. + trustLogOnce sync.Once + + parentCtxMu sync.RWMutex + parentCtx context.Context = context.Background() +) + +// SetParentContext installs a parent context whose cancellation aborts any +// in-flight TrustInterface call. It does not affect UntrustInterface, which +// always uses a fresh Background-rooted timeout so cleanup can still run +// during engine shutdown when the engine context is already cancelled. +func SetParentContext(ctx context.Context) { + parentCtxMu.Lock() + parentCtx = ctx + parentCtxMu.Unlock() +} + +func getParentContext() context.Context { + parentCtxMu.RLock() + defer parentCtxMu.RUnlock() + return parentCtx +} + +// TrustInterface places iface into firewalld's trusted zone if firewalld is +// running. It is idempotent and best-effort: errors are returned so callers +// can log, but a non-running firewalld is not an error. Only the first +// successful call per process logs at Info. Respects the parent context set +// via SetParentContext so startup-time cancellation unblocks it. +func TrustInterface(iface string) error { + parent := getParentContext() + if !isRunning(parent) { + return nil + } + if err := addTrusted(parent, iface); err != nil { + return fmt.Errorf("add %s to firewalld trusted zone: %w", iface, err) + } + trustLogOnce.Do(func() { + log.Infof("added %s to firewalld trusted zone", iface) + }) + log.Debugf("firewalld: ensured %s is in trusted zone", iface) + return nil +} + +// UntrustInterface removes iface from firewalld's trusted zone if firewalld +// is running. Idempotent. Uses a Background-rooted timeout so it still runs +// during shutdown after the engine context has been cancelled. +func UntrustInterface(iface string) error { + if !isRunning(context.Background()) { + return nil + } + if err := removeTrusted(context.Background(), iface); err != nil { + return fmt.Errorf("remove %s from firewalld trusted zone: %w", iface, err) + } + return nil +} + +func newCallContext(parent context.Context) (context.Context, context.CancelFunc) { + return context.WithTimeout(parent, callTimeout) +} + +func isRunning(parent context.Context) bool { + ctx, cancel := newCallContext(parent) + ok, err := isRunningDBus(ctx) + cancel() + if err == nil { + return ok + } + if errors.Is(err, errDBusUnavailable) || errors.Is(err, context.DeadlineExceeded) { + ctx, cancel = newCallContext(parent) + defer cancel() + return isRunningCLI(ctx) + } + return false +} + +func addTrusted(parent context.Context, iface string) error { + ctx, cancel := newCallContext(parent) + err := addDBus(ctx, iface) + cancel() + if err == nil { + return nil + } + if !errors.Is(err, errDBusUnavailable) { + log.Debugf("firewalld: dbus add failed, falling back to firewall-cmd: %v", err) + } + ctx, cancel = newCallContext(parent) + defer cancel() + return addCLI(ctx, iface) +} + +func removeTrusted(parent context.Context, iface string) error { + ctx, cancel := newCallContext(parent) + err := removeDBus(ctx, iface) + cancel() + if err == nil { + return nil + } + if !errors.Is(err, errDBusUnavailable) { + log.Debugf("firewalld: dbus remove failed, falling back to firewall-cmd: %v", err) + } + ctx, cancel = newCallContext(parent) + defer cancel() + return removeCLI(ctx, iface) +} + +func isRunningDBus(ctx context.Context) (bool, error) { + conn, err := dbus.SystemBus() + if err != nil { + return false, fmt.Errorf("%w: %v", errDBusUnavailable, err) + } + obj := conn.Object(dbusDest, dbusPath) + + var zone string + if err := obj.CallWithContext(ctx, dbusRootIface+".getDefaultZone", 0).Store(&zone); err != nil { + return false, fmt.Errorf("firewalld getDefaultZone: %w", err) + } + return true, nil +} + +func isRunningCLI(ctx context.Context) bool { + if _, err := exec.LookPath("firewall-cmd"); err != nil { + return false + } + return exec.CommandContext(ctx, "firewall-cmd", "--state").Run() == nil +} + +func addDBus(ctx context.Context, iface string) error { + conn, err := dbus.SystemBus() + if err != nil { + return fmt.Errorf("%w: %v", errDBusUnavailable, err) + } + obj := conn.Object(dbusDest, dbusPath) + + call := obj.CallWithContext(ctx, dbusZoneIface+".addInterface", 0, TrustedZone, iface) + if call.Err == nil { + return nil + } + + if dbusErrContains(call.Err, errAlreadyEnabled) { + return nil + } + + if dbusErrContains(call.Err, errZoneAlreadySet) { + move := obj.CallWithContext(ctx, dbusZoneIface+".changeZoneOfInterface", 0, TrustedZone, iface) + if move.Err != nil { + return fmt.Errorf("firewalld changeZoneOfInterface: %w", move.Err) + } + return nil + } + + return fmt.Errorf("firewalld addInterface: %w", call.Err) +} + +func removeDBus(ctx context.Context, iface string) error { + conn, err := dbus.SystemBus() + if err != nil { + return fmt.Errorf("%w: %v", errDBusUnavailable, err) + } + obj := conn.Object(dbusDest, dbusPath) + + call := obj.CallWithContext(ctx, dbusZoneIface+".removeInterface", 0, TrustedZone, iface) + if call.Err == nil { + return nil + } + + if dbusErrContains(call.Err, errUnknownIface) || dbusErrContains(call.Err, errNotEnabled) { + return nil + } + + return fmt.Errorf("firewalld removeInterface: %w", call.Err) +} + +func addCLI(ctx context.Context, iface string) error { + if _, err := exec.LookPath("firewall-cmd"); err != nil { + return fmt.Errorf("firewall-cmd not available: %w", err) + } + + // --change-interface (no --permanent) binds the interface for the + // current runtime only; we do not want membership to persist across + // reboots because netbird re-asserts it on every startup. + out, err := exec.CommandContext(ctx, + "firewall-cmd", "--zone="+TrustedZone, "--change-interface="+iface, + ).CombinedOutput() + if err != nil { + return fmt.Errorf("firewall-cmd change-interface: %w: %s", err, strings.TrimSpace(string(out))) + } + return nil +} + +func removeCLI(ctx context.Context, iface string) error { + if _, err := exec.LookPath("firewall-cmd"); err != nil { + return fmt.Errorf("firewall-cmd not available: %w", err) + } + + out, err := exec.CommandContext(ctx, + "firewall-cmd", "--zone="+TrustedZone, "--remove-interface="+iface, + ).CombinedOutput() + if err != nil { + msg := strings.TrimSpace(string(out)) + if strings.Contains(msg, errUnknownIface) || strings.Contains(msg, errNotEnabled) { + return nil + } + return fmt.Errorf("firewall-cmd remove-interface: %w: %s", err, msg) + } + return nil +} + +func dbusErrContains(err error, code string) bool { + if err == nil { + return false + } + var de dbus.Error + if errors.As(err, &de) { + for _, b := range de.Body { + if s, ok := b.(string); ok && strings.Contains(s, code) { + return true + } + } + } + return strings.Contains(err.Error(), code) +} diff --git a/client/firewall/firewalld/firewalld_linux_test.go b/client/firewall/firewalld/firewalld_linux_test.go new file mode 100644 index 000000000..d812745fc --- /dev/null +++ b/client/firewall/firewalld/firewalld_linux_test.go @@ -0,0 +1,49 @@ +//go:build linux + +package firewalld + +import ( + "errors" + "testing" + + "github.com/godbus/dbus/v5" +) + +func TestDBusErrContains(t *testing.T) { + tests := []struct { + name string + err error + code string + want bool + }{ + {"nil error", nil, errZoneAlreadySet, false}, + {"plain error match", errors.New("ZONE_ALREADY_SET: wt0"), errZoneAlreadySet, true}, + {"plain error miss", errors.New("something else"), errZoneAlreadySet, false}, + { + "dbus.Error body match", + dbus.Error{Name: "org.fedoraproject.FirewallD1.Exception", Body: []any{"ZONE_ALREADY_SET: wt0"}}, + errZoneAlreadySet, + true, + }, + { + "dbus.Error body miss", + dbus.Error{Name: "org.fedoraproject.FirewallD1.Exception", Body: []any{"INVALID_INTERFACE"}}, + errAlreadyEnabled, + false, + }, + { + "dbus.Error non-string body falls back to Error()", + dbus.Error{Name: "x", Body: []any{123}}, + "x", + true, + }, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + got := dbusErrContains(tc.err, tc.code) + if got != tc.want { + t.Fatalf("dbusErrContains(%v, %q) = %v; want %v", tc.err, tc.code, got, tc.want) + } + }) + } +} diff --git a/client/firewall/firewalld/firewalld_other.go b/client/firewall/firewalld/firewalld_other.go new file mode 100644 index 000000000..cfa28221d --- /dev/null +++ b/client/firewall/firewalld/firewalld_other.go @@ -0,0 +1,25 @@ +//go:build !linux + +package firewalld + +import "context" + +// SetParentContext is a no-op on non-Linux platforms because firewalld only +// runs on Linux. +func SetParentContext(context.Context) { + // intentionally empty: firewalld is a Linux-only daemon +} + +// TrustInterface is a no-op on non-Linux platforms because firewalld only +// runs on Linux. +func TrustInterface(string) error { + // intentionally empty: firewalld is a Linux-only daemon + return nil +} + +// UntrustInterface is a no-op on non-Linux platforms because firewalld only +// runs on Linux. +func UntrustInterface(string) error { + // intentionally empty: firewalld is a Linux-only daemon + return nil +} diff --git a/client/firewall/iptables/manager_linux.go b/client/firewall/iptables/manager_linux.go index a1d4467d5..7d8cd7f8c 100644 --- a/client/firewall/iptables/manager_linux.go +++ b/client/firewall/iptables/manager_linux.go @@ -12,6 +12,7 @@ import ( log "github.com/sirupsen/logrus" nberrors "github.com/netbirdio/netbird/client/errors" + "github.com/netbirdio/netbird/client/firewall/firewalld" firewall "github.com/netbirdio/netbird/client/firewall/manager" "github.com/netbirdio/netbird/client/iface/wgaddr" "github.com/netbirdio/netbird/client/internal/statemanager" @@ -86,6 +87,12 @@ func (m *Manager) Init(stateManager *statemanager.Manager) error { log.Warnf("raw table not available, notrack rules will be disabled: %v", err) } + // Trust after all fatal init steps so a later failure doesn't leave the + // interface in firewalld's trusted zone without a corresponding Close. + if err := firewalld.TrustInterface(m.wgIface.Name()); err != nil { + log.Warnf("failed to trust interface in firewalld: %v", err) + } + // persist early to ensure cleanup of chains go func() { if err := stateManager.PersistState(context.Background()); err != nil { @@ -191,6 +198,12 @@ func (m *Manager) Close(stateManager *statemanager.Manager) error { merr = multierror.Append(merr, fmt.Errorf("reset router: %w", err)) } + // Appending to merr intentionally blocks DeleteState below so ShutdownState + // stays persisted and the crash-recovery path retries firewalld cleanup. + if err := firewalld.UntrustInterface(m.wgIface.Name()); err != nil { + merr = multierror.Append(merr, err) + } + // attempt to delete state only if all other operations succeeded if merr == nil { if err := stateManager.DeleteState(&ShutdownState{}); err != nil { @@ -217,6 +230,11 @@ func (m *Manager) AllowNetbird() error { if err != nil { return fmt.Errorf("allow netbird interface traffic: %w", err) } + + if err := firewalld.TrustInterface(m.wgIface.Name()); err != nil { + log.Warnf("failed to trust interface in firewalld: %v", err) + } + return nil } diff --git a/client/firewall/nftables/manager_linux.go b/client/firewall/nftables/manager_linux.go index 0b5b61e04..8cd5cc6b3 100644 --- a/client/firewall/nftables/manager_linux.go +++ b/client/firewall/nftables/manager_linux.go @@ -14,6 +14,7 @@ import ( log "github.com/sirupsen/logrus" "golang.org/x/sys/unix" + "github.com/netbirdio/netbird/client/firewall/firewalld" firewall "github.com/netbirdio/netbird/client/firewall/manager" "github.com/netbirdio/netbird/client/iface/wgaddr" "github.com/netbirdio/netbird/client/internal/statemanager" @@ -217,6 +218,10 @@ func (m *Manager) AllowNetbird() error { return fmt.Errorf("flush allow input netbird rules: %w", err) } + if err := firewalld.TrustInterface(m.wgIface.Name()); err != nil { + log.Warnf("failed to trust interface in firewalld: %v", err) + } + return nil } diff --git a/client/firewall/nftables/router_linux.go b/client/firewall/nftables/router_linux.go index 904daf7cb..8cc0d2792 100644 --- a/client/firewall/nftables/router_linux.go +++ b/client/firewall/nftables/router_linux.go @@ -19,6 +19,7 @@ import ( "golang.org/x/sys/unix" nberrors "github.com/netbirdio/netbird/client/errors" + "github.com/netbirdio/netbird/client/firewall/firewalld" firewall "github.com/netbirdio/netbird/client/firewall/manager" nbid "github.com/netbirdio/netbird/client/internal/acl/id" "github.com/netbirdio/netbird/client/internal/routemanager/ipfwdstate" @@ -40,6 +41,8 @@ const ( chainNameForward = "FORWARD" chainNameMangleForward = "netbird-mangle-forward" + firewalldTableName = "firewalld" + userDataAcceptForwardRuleIif = "frwacceptiif" userDataAcceptForwardRuleOif = "frwacceptoif" userDataAcceptInputRule = "inputaccept" @@ -133,6 +136,10 @@ func (r *router) Reset() error { merr = multierror.Append(merr, fmt.Errorf("remove accept filter rules: %w", err)) } + if err := firewalld.UntrustInterface(r.wgIface.Name()); err != nil { + merr = multierror.Append(merr, err) + } + if err := r.removeNatPreroutingRules(); err != nil { merr = multierror.Append(merr, fmt.Errorf("remove filter prerouting rules: %w", err)) } @@ -280,6 +287,10 @@ func (r *router) createContainers() error { log.Errorf("failed to add accept rules for the forward chain: %s", err) } + if err := firewalld.TrustInterface(r.wgIface.Name()); err != nil { + log.Warnf("failed to trust interface in firewalld: %v", err) + } + if err := r.refreshRulesMap(); err != nil { log.Errorf("failed to refresh rules: %s", err) } @@ -1319,6 +1330,13 @@ func (r *router) isExternalChain(chain *nftables.Chain) bool { return false } + // Skip firewalld-owned chains. Firewalld creates its chains with the + // NFT_CHAIN_OWNER flag, so inserting rules into them returns EPERM. + // We delegate acceptance to firewalld by trusting the interface instead. + if chain.Table.Name == firewalldTableName { + return false + } + // Skip all iptables-managed tables in the ip family if chain.Table.Family == nftables.TableFamilyIPv4 && isIptablesTable(chain.Table.Name) { return false diff --git a/client/firewall/uspfilter/allow_netbird.go b/client/firewall/uspfilter/allow_netbird.go index 6a6533344..b120cdf12 100644 --- a/client/firewall/uspfilter/allow_netbird.go +++ b/client/firewall/uspfilter/allow_netbird.go @@ -3,6 +3,9 @@ package uspfilter import ( + log "github.com/sirupsen/logrus" + + "github.com/netbirdio/netbird/client/firewall/firewalld" "github.com/netbirdio/netbird/client/internal/statemanager" ) @@ -16,6 +19,9 @@ func (m *Manager) Close(stateManager *statemanager.Manager) error { if m.nativeFirewall != nil { return m.nativeFirewall.Close(stateManager) } + if err := firewalld.UntrustInterface(m.wgIface.Name()); err != nil { + log.Warnf("failed to untrust interface in firewalld: %v", err) + } return nil } @@ -24,5 +30,8 @@ func (m *Manager) AllowNetbird() error { if m.nativeFirewall != nil { return m.nativeFirewall.AllowNetbird() } + if err := firewalld.TrustInterface(m.wgIface.Name()); err != nil { + log.Warnf("failed to trust interface in firewalld: %v", err) + } return nil } diff --git a/client/firewall/uspfilter/common/iface.go b/client/firewall/uspfilter/common/iface.go index 7296953db..9c06eb3f7 100644 --- a/client/firewall/uspfilter/common/iface.go +++ b/client/firewall/uspfilter/common/iface.go @@ -9,6 +9,7 @@ import ( // IFaceMapper defines subset methods of interface required for manager type IFaceMapper interface { + Name() string SetFilter(device.PacketFilter) error Address() wgaddr.Address GetWGDevice() *wgdevice.Device diff --git a/client/firewall/uspfilter/filter_test.go b/client/firewall/uspfilter/filter_test.go index 39e8efa2c..5fb9fef0e 100644 --- a/client/firewall/uspfilter/filter_test.go +++ b/client/firewall/uspfilter/filter_test.go @@ -31,12 +31,20 @@ var logger = log.NewFromLogrus(logrus.StandardLogger()) var flowLogger = netflow.NewManager(nil, []byte{}, nil).GetLogger() type IFaceMock struct { + NameFunc func() string SetFilterFunc func(device.PacketFilter) error AddressFunc func() wgaddr.Address GetWGDeviceFunc func() *wgdevice.Device GetDeviceFunc func() *device.FilteredDevice } +func (i *IFaceMock) Name() string { + if i.NameFunc == nil { + return "wgtest" + } + return i.NameFunc() +} + func (i *IFaceMock) GetWGDevice() *wgdevice.Device { if i.GetWGDeviceFunc == nil { return nil diff --git a/client/iface/bind/ice_bind_test.go b/client/iface/bind/ice_bind_test.go index 1fdd955c9..f49e68508 100644 --- a/client/iface/bind/ice_bind_test.go +++ b/client/iface/bind/ice_bind_test.go @@ -239,8 +239,12 @@ func TestICEBind_HandlesConcurrentMixedTraffic(t *testing.T) { ipv6Count++ } - assert.Equal(t, packetsPerFamily, ipv4Count) - assert.Equal(t, packetsPerFamily, ipv6Count) + // Allow some UDP packet loss under load (e.g. FreeBSD/QEMU runners). The + // routing-correctness checks above are the real assertions; the counts + // are a sanity bound to catch a totally silent path. + minDelivered := packetsPerFamily * 80 / 100 + assert.GreaterOrEqual(t, ipv4Count, minDelivered, "IPv4 delivery below threshold") + assert.GreaterOrEqual(t, ipv6Count, minDelivered, "IPv6 delivery below threshold") } func TestICEBind_DetectsAddressFamilyFromConnection(t *testing.T) { diff --git a/client/iface/iface.go b/client/iface/iface.go index 9b331d68c..655dd1682 100644 --- a/client/iface/iface.go +++ b/client/iface/iface.go @@ -217,7 +217,6 @@ func (w *WGIface) RemoveAllowedIP(peerKey string, allowedIP netip.Prefix) error // Close closes the tunnel interface func (w *WGIface) Close() error { w.mu.Lock() - defer w.mu.Unlock() var result *multierror.Error @@ -225,7 +224,15 @@ func (w *WGIface) Close() error { result = multierror.Append(result, fmt.Errorf("failed to free WireGuard proxy: %w", err)) } - if err := w.tun.Close(); err != nil { + // Release w.mu before calling w.tun.Close(): the underlying + // wireguard-go device.Close() waits for its send/receive goroutines + // to drain. Some of those goroutines re-enter WGIface methods that + // take w.mu (e.g. the packet filter DNS hook calls GetDevice()), so + // holding the mutex here would deadlock the shutdown path. + tun := w.tun + w.mu.Unlock() + + if err := tun.Close(); err != nil { result = multierror.Append(result, fmt.Errorf("failed to close wireguard interface %s: %w", w.Name(), err)) } diff --git a/client/iface/iface_close_test.go b/client/iface/iface_close_test.go new file mode 100644 index 000000000..171e15d0a --- /dev/null +++ b/client/iface/iface_close_test.go @@ -0,0 +1,113 @@ +//go:build !android + +package iface + +import ( + "errors" + "sync" + "testing" + "time" + + wgdevice "golang.zx2c4.com/wireguard/device" + "golang.zx2c4.com/wireguard/tun/netstack" + + "github.com/netbirdio/netbird/client/iface/device" + "github.com/netbirdio/netbird/client/iface/udpmux" + "github.com/netbirdio/netbird/client/iface/wgaddr" + "github.com/netbirdio/netbird/client/iface/wgproxy" +) + +// fakeTunDevice implements WGTunDevice and lets the test control when +// Close() returns. It mimics the wireguard-go shutdown path, which blocks +// until its goroutines drain. Some of those goroutines (e.g. the packet +// filter DNS hook in client/internal/dns) call back into WGIface, so if +// WGIface.Close() held w.mu across tun.Close() the shutdown would +// deadlock. +type fakeTunDevice struct { + closeStarted chan struct{} + unblockClose chan struct{} +} + +func (f *fakeTunDevice) Create() (device.WGConfigurer, error) { + return nil, errors.New("not implemented") +} +func (f *fakeTunDevice) Up() (*udpmux.UniversalUDPMuxDefault, error) { + return nil, errors.New("not implemented") +} +func (f *fakeTunDevice) UpdateAddr(wgaddr.Address) error { return nil } +func (f *fakeTunDevice) WgAddress() wgaddr.Address { return wgaddr.Address{} } +func (f *fakeTunDevice) MTU() uint16 { return DefaultMTU } +func (f *fakeTunDevice) DeviceName() string { return "nb-close-test" } +func (f *fakeTunDevice) FilteredDevice() *device.FilteredDevice { return nil } +func (f *fakeTunDevice) Device() *wgdevice.Device { return nil } +func (f *fakeTunDevice) GetNet() *netstack.Net { return nil } +func (f *fakeTunDevice) GetICEBind() device.EndpointManager { return nil } + +func (f *fakeTunDevice) Close() error { + close(f.closeStarted) + <-f.unblockClose + return nil +} + +type fakeProxyFactory struct{} + +func (fakeProxyFactory) GetProxy() wgproxy.Proxy { return nil } +func (fakeProxyFactory) GetProxyPort() uint16 { return 0 } +func (fakeProxyFactory) Free() error { return nil } + +// TestWGIface_CloseReleasesMutexBeforeTunClose guards against a deadlock +// that surfaces as a macOS test-timeout in +// TestDNSPermanent_updateUpstream: WGIface.Close() used to hold w.mu +// while waiting for the wireguard-go device goroutines to finish, and +// one of those goroutines (the DNS filter hook) calls back into +// WGIface.GetDevice() which needs the same mutex. The fix is to drop +// the lock before tun.Close() returns control. +func TestWGIface_CloseReleasesMutexBeforeTunClose(t *testing.T) { + tun := &fakeTunDevice{ + closeStarted: make(chan struct{}), + unblockClose: make(chan struct{}), + } + w := &WGIface{ + tun: tun, + wgProxyFactory: fakeProxyFactory{}, + } + + closeDone := make(chan error, 1) + go func() { + closeDone <- w.Close() + }() + + select { + case <-tun.closeStarted: + case <-time.After(2 * time.Second): + close(tun.unblockClose) + t.Fatal("tun.Close() was never invoked") + } + + // Simulate the WireGuard read goroutine calling back into WGIface + // via the packet filter's DNS hook. If Close() still held w.mu + // during tun.Close(), this would block until the test timeout. + getDeviceDone := make(chan struct{}) + var wg sync.WaitGroup + wg.Add(1) + go func() { + defer wg.Done() + _ = w.GetDevice() + close(getDeviceDone) + }() + + select { + case <-getDeviceDone: + case <-time.After(2 * time.Second): + close(tun.unblockClose) + wg.Wait() + t.Fatal("GetDevice() deadlocked while WGIface.Close was closing the tun") + } + + close(tun.unblockClose) + select { + case <-closeDone: + case <-time.After(2 * time.Second): + t.Fatal("WGIface.Close() never returned after the tun was unblocked") + } +} diff --git a/client/iface/udpmux/universal.go b/client/iface/udpmux/universal.go index 43bfedaaa..89a7eefb9 100644 --- a/client/iface/udpmux/universal.go +++ b/client/iface/udpmux/universal.go @@ -171,7 +171,7 @@ func (u *UDPConn) performFilterCheck(addr net.Addr) error { } if u.address.Network.Contains(a) { - log.Warnf("Address %s is part of the NetBird network %s, refusing to write", addr, u.address) + log.Warnf("address %s is part of the NetBird network %s, refusing to write", addr, u.address) return fmt.Errorf("address %s is part of the NetBird network %s, refusing to write", addr, u.address) } @@ -181,7 +181,7 @@ func (u *UDPConn) performFilterCheck(addr net.Addr) error { u.addrCache.Store(addr.String(), isRouted) if isRouted { // Extra log, as the error only shows up with ICE logging enabled - log.Infof("Address %s is part of routed network %s, refusing to write", addr, prefix) + log.Infof("address %s is part of routed network %s, refusing to write", addr, prefix) return fmt.Errorf("address %s is part of routed network %s, refusing to write", addr, prefix) } } diff --git a/client/internal/connect.go b/client/internal/connect.go index bc2bd84d9..ac498f719 100644 --- a/client/internal/connect.go +++ b/client/internal/connect.go @@ -94,6 +94,7 @@ func (c *ConnectClient) RunOnAndroid( dnsAddresses []netip.AddrPort, dnsReadyListener dns.ReadyListener, stateFilePath string, + cacheDir string, ) error { // in case of non Android os these variables will be nil mobileDependency := MobileDependency{ @@ -103,6 +104,7 @@ func (c *ConnectClient) RunOnAndroid( HostDNSAddresses: dnsAddresses, DnsReadyListener: dnsReadyListener, StateFilePath: stateFilePath, + TempDir: cacheDir, } return c.run(mobileDependency, nil, "") } @@ -338,6 +340,7 @@ func (c *ConnectClient) run(mobileDependency MobileDependency, runningChan chan log.Error(err) return wrapErr(err) } + engineConfig.TempDir = mobileDependency.TempDir relayManager := relayClient.NewManager(engineCtx, relayURLs, myPrivateKey.PublicKey().String(), engineConfig.MTU) c.statusRecorder.SetRelayMgr(relayManager) diff --git a/client/internal/debug/debug.go b/client/internal/debug/debug.go index 6a8eae324..bddb9a69e 100644 --- a/client/internal/debug/debug.go +++ b/client/internal/debug/debug.go @@ -16,7 +16,6 @@ import ( "path/filepath" "runtime" "runtime/pprof" - "slices" "sort" "strings" "time" @@ -31,7 +30,6 @@ import ( "github.com/netbirdio/netbird/client/internal/updater/installer" nbstatus "github.com/netbirdio/netbird/client/status" mgmProto "github.com/netbirdio/netbird/shared/management/proto" - "github.com/netbirdio/netbird/util" ) const readmeContent = `Netbird debug bundle @@ -234,6 +232,7 @@ type BundleGenerator struct { statusRecorder *peer.Status syncResponse *mgmProto.SyncResponse logPath string + tempDir string cpuProfile []byte refreshStatus func() // Optional callback to refresh status before bundle generation clientMetrics MetricsExporter @@ -256,6 +255,7 @@ type GeneratorDependencies struct { StatusRecorder *peer.Status SyncResponse *mgmProto.SyncResponse LogPath string + TempDir string // Directory for temporary bundle zip files. If empty, os.TempDir() is used. CPUProfile []byte RefreshStatus func() // Optional callback to refresh status before bundle generation ClientMetrics MetricsExporter @@ -275,6 +275,7 @@ func NewBundleGenerator(deps GeneratorDependencies, cfg BundleConfig) *BundleGen statusRecorder: deps.StatusRecorder, syncResponse: deps.SyncResponse, logPath: deps.LogPath, + tempDir: deps.TempDir, cpuProfile: deps.CPUProfile, refreshStatus: deps.RefreshStatus, clientMetrics: deps.ClientMetrics, @@ -287,7 +288,7 @@ func NewBundleGenerator(deps GeneratorDependencies, cfg BundleConfig) *BundleGen // Generate creates a debug bundle and returns the location. func (g *BundleGenerator) Generate() (resp string, err error) { - bundlePath, err := os.CreateTemp("", "netbird.debug.*.zip") + bundlePath, err := os.CreateTemp(g.tempDir, "netbird.debug.*.zip") if err != nil { return "", fmt.Errorf("create zip file: %w", err) } @@ -373,15 +374,8 @@ func (g *BundleGenerator) createArchive() error { log.Errorf("failed to add wg show output: %v", err) } - if g.logPath != "" && !slices.Contains(util.SpecialLogs, g.logPath) { - if err := g.addLogfile(); err != nil { - log.Errorf("failed to add log file to debug bundle: %v", err) - if err := g.trySystemdLogFallback(); err != nil { - log.Errorf("failed to add systemd logs as fallback: %v", err) - } - } - } else if err := g.trySystemdLogFallback(); err != nil { - log.Errorf("failed to add systemd logs: %v", err) + if err := g.addPlatformLog(); err != nil { + log.Errorf("failed to add logs to debug bundle: %v", err) } if err := g.addUpdateLogs(); err != nil { diff --git a/client/internal/debug/debug_android.go b/client/internal/debug/debug_android.go new file mode 100644 index 000000000..a4e2b3e98 --- /dev/null +++ b/client/internal/debug/debug_android.go @@ -0,0 +1,41 @@ +//go:build android + +package debug + +import ( + "fmt" + "io" + "os/exec" + + log "github.com/sirupsen/logrus" +) + +func (g *BundleGenerator) addPlatformLog() error { + cmd := exec.Command("/system/bin/logcat", "-d") + stdout, err := cmd.StdoutPipe() + if err != nil { + return fmt.Errorf("logcat stdout pipe: %w", err) + } + + if err := cmd.Start(); err != nil { + return fmt.Errorf("start logcat: %w", err) + } + + var logReader io.Reader = stdout + if g.anonymize { + var pw *io.PipeWriter + logReader, pw = io.Pipe() + go anonymizeLog(stdout, pw, g.anonymizer) + } + + if err := g.addFileToZip(logReader, "logcat.txt"); err != nil { + return fmt.Errorf("add logcat to zip: %w", err) + } + + if err := cmd.Wait(); err != nil { + return fmt.Errorf("wait logcat: %w", err) + } + + log.Debug("added logcat output to debug bundle") + return nil +} diff --git a/client/internal/debug/debug_nonandroid.go b/client/internal/debug/debug_nonandroid.go new file mode 100644 index 000000000..117238dec --- /dev/null +++ b/client/internal/debug/debug_nonandroid.go @@ -0,0 +1,25 @@ +//go:build !android + +package debug + +import ( + "slices" + + log "github.com/sirupsen/logrus" + + "github.com/netbirdio/netbird/util" +) + +func (g *BundleGenerator) addPlatformLog() error { + if g.logPath != "" && !slices.Contains(util.SpecialLogs, g.logPath) { + if err := g.addLogfile(); err != nil { + log.Errorf("failed to add log file to debug bundle: %v", err) + if err := g.trySystemdLogFallback(); err != nil { + return err + } + } + } else if err := g.trySystemdLogFallback(); err != nil { + return err + } + return nil +} diff --git a/client/internal/debug/upload_test.go b/client/internal/debug/upload_test.go index e833c196d..f224b8d3f 100644 --- a/client/internal/debug/upload_test.go +++ b/client/internal/debug/upload_test.go @@ -3,10 +3,12 @@ package debug import ( "context" "errors" + "net" "net/http" "os" "path/filepath" "testing" + "time" "github.com/stretchr/testify/require" @@ -19,8 +21,10 @@ func TestUpload(t *testing.T) { t.Skip("Skipping upload test on docker ci") } testDir := t.TempDir() - testURL := "http://localhost:8080" + addr := reserveLoopbackPort(t) + testURL := "http://" + addr t.Setenv("SERVER_URL", testURL) + t.Setenv("SERVER_ADDRESS", addr) t.Setenv("STORE_DIR", testDir) srv := server.NewServer() go func() { @@ -33,6 +37,7 @@ func TestUpload(t *testing.T) { t.Errorf("Failed to stop server: %v", err) } }) + waitForServer(t, addr) file := filepath.Join(t.TempDir(), "tmpfile") fileContent := []byte("test file content") @@ -47,3 +52,30 @@ func TestUpload(t *testing.T) { require.NoError(t, err) require.Equal(t, fileContent, createdFileContent) } + +// reserveLoopbackPort binds an ephemeral port on loopback to learn a free +// address, then releases it so the server under test can rebind. The close/ +// rebind window is racy in theory; on loopback with a kernel-assigned port +// it's essentially never contended in practice. +func reserveLoopbackPort(t *testing.T) string { + t.Helper() + l, err := net.Listen("tcp", "127.0.0.1:0") + require.NoError(t, err) + addr := l.Addr().String() + require.NoError(t, l.Close()) + return addr +} + +func waitForServer(t *testing.T, addr string) { + t.Helper() + deadline := time.Now().Add(5 * time.Second) + for time.Now().Before(deadline) { + c, err := net.DialTimeout("tcp", addr, 100*time.Millisecond) + if err == nil { + _ = c.Close() + return + } + time.Sleep(20 * time.Millisecond) + } + t.Fatalf("server did not start listening on %s in time", addr) +} diff --git a/client/internal/dns/file_parser_unix.go b/client/internal/dns/file_parser_unix.go index 8dacb4e51..50ba74c0c 100644 --- a/client/internal/dns/file_parser_unix.go +++ b/client/internal/dns/file_parser_unix.go @@ -13,6 +13,7 @@ import ( const ( defaultResolvConfPath = "/etc/resolv.conf" + nsswitchConfPath = "/etc/nsswitch.conf" ) type resolvConf struct { diff --git a/client/internal/dns/handler_chain.go b/client/internal/dns/handler_chain.go index 6fbdedc59..57e7722d4 100644 --- a/client/internal/dns/handler_chain.go +++ b/client/internal/dns/handler_chain.go @@ -1,7 +1,10 @@ package dns import ( + "context" "fmt" + "math" + "net" "slices" "strconv" "strings" @@ -192,6 +195,12 @@ func (c *HandlerChain) logHandlers() { } func (c *HandlerChain) ServeDNS(w dns.ResponseWriter, r *dns.Msg) { + c.dispatch(w, r, math.MaxInt) +} + +// dispatch routes a DNS request through the chain, skipping handlers with +// priority > maxPriority. Shared by ServeDNS and ResolveInternal. +func (c *HandlerChain) dispatch(w dns.ResponseWriter, r *dns.Msg, maxPriority int) { if len(r.Question) == 0 { return } @@ -216,6 +225,9 @@ func (c *HandlerChain) ServeDNS(w dns.ResponseWriter, r *dns.Msg) { // Try handlers in priority order for _, entry := range handlers { + if entry.Priority > maxPriority { + continue + } if !c.isHandlerMatch(qname, entry) { continue } @@ -273,6 +285,55 @@ func (c *HandlerChain) logResponse(logger *log.Entry, cw *ResponseWriterChain, q cw.response.Len(), meta, time.Since(startTime)) } +// ResolveInternal runs an in-process DNS query against the chain, skipping any +// handler with priority > maxPriority. Used by internal callers (e.g. the mgmt +// cache refresher) that must bypass themselves to avoid loops. Honors ctx +// cancellation; on ctx.Done the dispatch goroutine is left to drain on its own +// (bounded by the invoked handler's internal timeout). +func (c *HandlerChain) ResolveInternal(ctx context.Context, r *dns.Msg, maxPriority int) (*dns.Msg, error) { + if len(r.Question) == 0 { + return nil, fmt.Errorf("empty question") + } + + base := &internalResponseWriter{} + done := make(chan struct{}) + go func() { + c.dispatch(base, r, maxPriority) + close(done) + }() + + select { + case <-done: + case <-ctx.Done(): + // Prefer a completed response if dispatch finished concurrently with cancellation. + select { + case <-done: + default: + return nil, fmt.Errorf("resolve %s: %w", strings.ToLower(r.Question[0].Name), ctx.Err()) + } + } + + if base.response == nil || base.response.Rcode == dns.RcodeRefused { + return nil, fmt.Errorf("no handler resolved %s at priority ≤ %d", + strings.ToLower(r.Question[0].Name), maxPriority) + } + return base.response, nil +} + +// HasRootHandlerAtOrBelow reports whether any "." handler is registered at +// priority ≤ maxPriority. +func (c *HandlerChain) HasRootHandlerAtOrBelow(maxPriority int) bool { + c.mu.RLock() + defer c.mu.RUnlock() + + for _, h := range c.handlers { + if h.Pattern == "." && h.Priority <= maxPriority { + return true + } + } + return false +} + func (c *HandlerChain) isHandlerMatch(qname string, entry HandlerEntry) bool { switch { case entry.Pattern == ".": @@ -291,3 +352,36 @@ func (c *HandlerChain) isHandlerMatch(qname string, entry HandlerEntry) bool { } } } + +// internalResponseWriter captures a dns.Msg for in-process chain queries. +type internalResponseWriter struct { + response *dns.Msg +} + +func (w *internalResponseWriter) WriteMsg(m *dns.Msg) error { w.response = m; return nil } +func (w *internalResponseWriter) LocalAddr() net.Addr { return nil } +func (w *internalResponseWriter) RemoteAddr() net.Addr { return nil } + +// Write unpacks raw DNS bytes so handlers that call Write instead of WriteMsg +// still surface their answer to ResolveInternal. +func (w *internalResponseWriter) Write(p []byte) (int, error) { + msg := new(dns.Msg) + if err := msg.Unpack(p); err != nil { + return 0, err + } + w.response = msg + return len(p), nil +} + +func (w *internalResponseWriter) Close() error { return nil } +func (w *internalResponseWriter) TsigStatus() error { return nil } + +// TsigTimersOnly is part of dns.ResponseWriter. +func (w *internalResponseWriter) TsigTimersOnly(bool) { + // no-op: in-process queries carry no TSIG state. +} + +// Hijack is part of dns.ResponseWriter. +func (w *internalResponseWriter) Hijack() { + // no-op: in-process queries have no underlying connection to hand off. +} diff --git a/client/internal/dns/handler_chain_test.go b/client/internal/dns/handler_chain_test.go index fa9525069..034a760dc 100644 --- a/client/internal/dns/handler_chain_test.go +++ b/client/internal/dns/handler_chain_test.go @@ -1,11 +1,15 @@ package dns_test import ( + "context" + "net" "testing" + "time" "github.com/miekg/dns" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" nbdns "github.com/netbirdio/netbird/client/internal/dns" "github.com/netbirdio/netbird/client/internal/dns/test" @@ -1042,3 +1046,163 @@ func TestHandlerChain_AddRemoveRoundtrip(t *testing.T) { }) } } + +// answeringHandler writes a fixed A record to ack the query. Used to verify +// which handler ResolveInternal dispatches to. +type answeringHandler struct { + name string + ip string +} + +func (h *answeringHandler) ServeDNS(w dns.ResponseWriter, r *dns.Msg) { + resp := &dns.Msg{} + resp.SetReply(r) + resp.Answer = []dns.RR{&dns.A{ + Hdr: dns.RR_Header{Name: r.Question[0].Name, Rrtype: dns.TypeA, Class: dns.ClassINET, Ttl: 60}, + A: net.ParseIP(h.ip).To4(), + }} + _ = w.WriteMsg(resp) +} + +func (h *answeringHandler) String() string { return h.name } + +func TestHandlerChain_ResolveInternal_SkipsAboveMaxPriority(t *testing.T) { + chain := nbdns.NewHandlerChain() + + high := &answeringHandler{name: "high", ip: "10.0.0.1"} + low := &answeringHandler{name: "low", ip: "10.0.0.2"} + + chain.AddHandler("example.com.", high, nbdns.PriorityMgmtCache) + chain.AddHandler("example.com.", low, nbdns.PriorityUpstream) + + r := new(dns.Msg) + r.SetQuestion("example.com.", dns.TypeA) + + resp, err := chain.ResolveInternal(context.Background(), r, nbdns.PriorityUpstream) + assert.NoError(t, err) + assert.NotNil(t, resp) + assert.Equal(t, 1, len(resp.Answer)) + a, ok := resp.Answer[0].(*dns.A) + assert.True(t, ok) + assert.Equal(t, "10.0.0.2", a.A.String(), "should skip mgmtCache handler and resolve via upstream") +} + +func TestHandlerChain_ResolveInternal_ErrorWhenNoMatch(t *testing.T) { + chain := nbdns.NewHandlerChain() + high := &answeringHandler{name: "high", ip: "10.0.0.1"} + chain.AddHandler("example.com.", high, nbdns.PriorityMgmtCache) + + r := new(dns.Msg) + r.SetQuestion("example.com.", dns.TypeA) + + _, err := chain.ResolveInternal(context.Background(), r, nbdns.PriorityUpstream) + assert.Error(t, err, "no handler at or below maxPriority should error") +} + +// rawWriteHandler packs a response and calls ResponseWriter.Write directly +// (instead of WriteMsg), exercising the internalResponseWriter.Write path. +type rawWriteHandler struct { + ip string +} + +func (h *rawWriteHandler) ServeDNS(w dns.ResponseWriter, r *dns.Msg) { + resp := &dns.Msg{} + resp.SetReply(r) + resp.Answer = []dns.RR{&dns.A{ + Hdr: dns.RR_Header{Name: r.Question[0].Name, Rrtype: dns.TypeA, Class: dns.ClassINET, Ttl: 60}, + A: net.ParseIP(h.ip).To4(), + }} + packed, err := resp.Pack() + if err != nil { + return + } + _, _ = w.Write(packed) +} + +func TestHandlerChain_ResolveInternal_CapturesRawWrite(t *testing.T) { + chain := nbdns.NewHandlerChain() + chain.AddHandler("example.com.", &rawWriteHandler{ip: "10.0.0.3"}, nbdns.PriorityUpstream) + + r := new(dns.Msg) + r.SetQuestion("example.com.", dns.TypeA) + + resp, err := chain.ResolveInternal(context.Background(), r, nbdns.PriorityUpstream) + assert.NoError(t, err) + require.NotNil(t, resp) + require.Len(t, resp.Answer, 1) + a, ok := resp.Answer[0].(*dns.A) + require.True(t, ok) + assert.Equal(t, "10.0.0.3", a.A.String(), "handlers calling Write(packed) must still surface their answer") +} + +func TestHandlerChain_ResolveInternal_EmptyQuestion(t *testing.T) { + chain := nbdns.NewHandlerChain() + _, err := chain.ResolveInternal(context.Background(), new(dns.Msg), nbdns.PriorityUpstream) + assert.Error(t, err) +} + +// hangingHandler blocks indefinitely until closed, simulating a wedged upstream. +type hangingHandler struct { + block chan struct{} +} + +func (h *hangingHandler) ServeDNS(w dns.ResponseWriter, r *dns.Msg) { + <-h.block + resp := &dns.Msg{} + resp.SetReply(r) + _ = w.WriteMsg(resp) +} + +func (h *hangingHandler) String() string { return "hangingHandler" } + +func TestHandlerChain_ResolveInternal_HonorsContextTimeout(t *testing.T) { + chain := nbdns.NewHandlerChain() + h := &hangingHandler{block: make(chan struct{})} + defer close(h.block) + + chain.AddHandler("example.com.", h, nbdns.PriorityUpstream) + + r := new(dns.Msg) + r.SetQuestion("example.com.", dns.TypeA) + + ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond) + defer cancel() + + start := time.Now() + _, err := chain.ResolveInternal(ctx, r, nbdns.PriorityUpstream) + elapsed := time.Since(start) + + assert.Error(t, err) + assert.ErrorIs(t, err, context.DeadlineExceeded) + assert.Less(t, elapsed, 500*time.Millisecond, "ResolveInternal must return shortly after ctx deadline") +} + +func TestHandlerChain_HasRootHandlerAtOrBelow(t *testing.T) { + chain := nbdns.NewHandlerChain() + h := &answeringHandler{name: "h", ip: "10.0.0.1"} + + assert.False(t, chain.HasRootHandlerAtOrBelow(nbdns.PriorityUpstream), "empty chain") + + chain.AddHandler("example.com.", h, nbdns.PriorityUpstream) + assert.False(t, chain.HasRootHandlerAtOrBelow(nbdns.PriorityUpstream), "non-root handler does not count") + + chain.AddHandler(".", h, nbdns.PriorityMgmtCache) + assert.False(t, chain.HasRootHandlerAtOrBelow(nbdns.PriorityUpstream), "root handler above threshold excluded") + + chain.AddHandler(".", h, nbdns.PriorityDefault) + assert.True(t, chain.HasRootHandlerAtOrBelow(nbdns.PriorityUpstream), "root handler at PriorityDefault included") + + chain.RemoveHandler(".", nbdns.PriorityDefault) + assert.False(t, chain.HasRootHandlerAtOrBelow(nbdns.PriorityUpstream)) + + // Primary nsgroup case: root handler lands at PriorityUpstream. + chain.AddHandler(".", h, nbdns.PriorityUpstream) + assert.True(t, chain.HasRootHandlerAtOrBelow(nbdns.PriorityUpstream), "root at PriorityUpstream included") + chain.RemoveHandler(".", nbdns.PriorityUpstream) + + // Fallback case: original /etc/resolv.conf entries land at PriorityFallback. + chain.AddHandler(".", h, nbdns.PriorityFallback) + assert.True(t, chain.HasRootHandlerAtOrBelow(nbdns.PriorityUpstream), "root at PriorityFallback included") + chain.RemoveHandler(".", nbdns.PriorityFallback) + assert.False(t, chain.HasRootHandlerAtOrBelow(nbdns.PriorityUpstream)) +} diff --git a/client/internal/dns/host_unix.go b/client/internal/dns/host_unix.go index 422fed4e5..d7301d725 100644 --- a/client/internal/dns/host_unix.go +++ b/client/internal/dns/host_unix.go @@ -46,12 +46,12 @@ type restoreHostManager interface { } func newHostManager(wgInterface string) (hostManager, error) { - osManager, err := getOSDNSManagerType() + osManager, reason, err := getOSDNSManagerType() if err != nil { return nil, fmt.Errorf("get os dns manager type: %w", err) } - log.Infof("System DNS manager discovered: %s", osManager) + log.Infof("System DNS manager discovered: %s (%s)", osManager, reason) mgr, err := newHostManagerFromType(wgInterface, osManager) // need to explicitly return nil mgr on error to avoid returning a non-nil interface containing a nil value if err != nil { @@ -74,17 +74,49 @@ func newHostManagerFromType(wgInterface string, osManager osManagerType) (restor } } -func getOSDNSManagerType() (osManagerType, error) { +func getOSDNSManagerType() (osManagerType, string, error) { + resolved := isSystemdResolvedRunning() + nss := isLibnssResolveUsed() + stub := checkStub() + + // Prefer systemd-resolved whenever it owns libc resolution, regardless of + // who wrote /etc/resolv.conf. File-mode rewrites do not affect lookups + // that go through nss-resolve, and in foreign mode they can loop back + // through resolved as an upstream. + if resolved && (nss || stub) { + return systemdManager, fmt.Sprintf("systemd-resolved active (nss-resolve=%t, stub=%t)", nss, stub), nil + } + + mgr, reason, rejected, err := scanResolvConfHeader() + if err != nil { + return 0, "", err + } + if reason != "" { + return mgr, reason, nil + } + + fallback := fmt.Sprintf("no manager matched (resolved=%t, nss-resolve=%t, stub=%t)", resolved, nss, stub) + if len(rejected) > 0 { + fallback += "; rejected: " + strings.Join(rejected, ", ") + } + return fileManager, fallback, nil +} + +// scanResolvConfHeader walks /etc/resolv.conf header comments and returns the +// matching manager. If reason is empty the caller should pick file mode and +// use rejected for diagnostics. +func scanResolvConfHeader() (osManagerType, string, []string, error) { file, err := os.Open(defaultResolvConfPath) if err != nil { - return 0, fmt.Errorf("unable to open %s for checking owner, got error: %w", defaultResolvConfPath, err) + return 0, "", nil, fmt.Errorf("unable to open %s for checking owner, got error: %w", defaultResolvConfPath, err) } defer func() { - if err := file.Close(); err != nil { - log.Errorf("close file %s: %s", defaultResolvConfPath, err) + if cerr := file.Close(); cerr != nil { + log.Errorf("close file %s: %s", defaultResolvConfPath, cerr) } }() + var rejected []string scanner := bufio.NewScanner(file) for scanner.Scan() { text := scanner.Text() @@ -92,41 +124,48 @@ func getOSDNSManagerType() (osManagerType, error) { continue } if text[0] != '#' { - return fileManager, nil + break } - if strings.Contains(text, fileGeneratedResolvConfContentHeader) { - return netbirdManager, nil - } - if strings.Contains(text, "NetworkManager") && isDbusListenerRunning(networkManagerDest, networkManagerDbusObjectNode) && isNetworkManagerSupported() { - return networkManager, nil - } - if strings.Contains(text, "systemd-resolved") && isSystemdResolvedRunning() { - if checkStub() { - return systemdManager, nil - } else { - return fileManager, nil - } - } - if strings.Contains(text, "resolvconf") { - if isSystemdResolveConfMode() { - return systemdManager, nil - } - - return resolvConfManager, nil + if mgr, reason, rej := matchResolvConfHeader(text); reason != "" { + return mgr, reason, nil, nil + } else if rej != "" { + rejected = append(rejected, rej) } } if err := scanner.Err(); err != nil && err != io.EOF { - return 0, fmt.Errorf("scan: %w", err) + return 0, "", nil, fmt.Errorf("scan: %w", err) } - - return fileManager, nil + return 0, "", rejected, nil } -// checkStub checks if the stub resolver is disabled in systemd-resolved. If it is disabled, we fall back to file manager. +// matchResolvConfHeader inspects a single comment line. Returns either a +// definitive (manager, reason) or a non-empty rejected diagnostic. +func matchResolvConfHeader(text string) (osManagerType, string, string) { + if strings.Contains(text, fileGeneratedResolvConfContentHeader) { + return netbirdManager, "netbird-managed resolv.conf header detected", "" + } + if strings.Contains(text, "NetworkManager") { + if isDbusListenerRunning(networkManagerDest, networkManagerDbusObjectNode) && isNetworkManagerSupported() { + return networkManager, "NetworkManager header + supported version on dbus", "" + } + return 0, "", "NetworkManager header (no dbus or unsupported version)" + } + if strings.Contains(text, "resolvconf") { + if isSystemdResolveConfMode() { + return systemdManager, "resolvconf header in systemd-resolved compatibility mode", "" + } + return resolvConfManager, "resolvconf header detected", "" + } + return 0, "", "" +} + +// checkStub reports whether systemd-resolved's stub (127.0.0.53) is listed +// in /etc/resolv.conf. On parse failure we assume it is, to avoid dropping +// into file mode while resolved is active. func checkStub() bool { rConf, err := parseDefaultResolvConf() if err != nil { - log.Warnf("failed to parse resolv conf: %s", err) + log.Warnf("failed to parse resolv conf, assuming stub is active: %s", err) return true } @@ -139,3 +178,36 @@ func checkStub() bool { return false } + +// isLibnssResolveUsed reports whether nss-resolve is listed before dns on +// the hosts: line of /etc/nsswitch.conf. When it is, libc lookups are +// delegated to systemd-resolved regardless of /etc/resolv.conf. +func isLibnssResolveUsed() bool { + bs, err := os.ReadFile(nsswitchConfPath) + if err != nil { + log.Debugf("read %s: %v", nsswitchConfPath, err) + return false + } + return parseNsswitchResolveAhead(bs) +} + +func parseNsswitchResolveAhead(data []byte) bool { + for _, line := range strings.Split(string(data), "\n") { + if i := strings.IndexByte(line, '#'); i >= 0 { + line = line[:i] + } + fields := strings.Fields(line) + if len(fields) < 2 || fields[0] != "hosts:" { + continue + } + for _, module := range fields[1:] { + switch module { + case "dns": + return false + case "resolve": + return true + } + } + } + return false +} diff --git a/client/internal/dns/host_unix_test.go b/client/internal/dns/host_unix_test.go new file mode 100644 index 000000000..e936281d3 --- /dev/null +++ b/client/internal/dns/host_unix_test.go @@ -0,0 +1,76 @@ +//go:build (linux && !android) || freebsd + +package dns + +import "testing" + +func TestParseNsswitchResolveAhead(t *testing.T) { + tests := []struct { + name string + in string + want bool + }{ + { + name: "resolve before dns with action token", + in: "hosts: mymachines resolve [!UNAVAIL=return] files myhostname dns\n", + want: true, + }, + { + name: "dns before resolve", + in: "hosts: files mdns4_minimal [NOTFOUND=return] dns resolve\n", + want: false, + }, + { + name: "debian default with only dns", + in: "hosts: files mdns4_minimal [NOTFOUND=return] dns mymachines\n", + want: false, + }, + { + name: "neither resolve nor dns", + in: "hosts: files myhostname\n", + want: false, + }, + { + name: "no hosts line", + in: "passwd: files systemd\ngroup: files systemd\n", + want: false, + }, + { + name: "empty", + in: "", + want: false, + }, + { + name: "comments and blank lines ignored", + in: "# comment\n\n# another\nhosts: resolve dns\n", + want: true, + }, + { + name: "trailing inline comment", + in: "hosts: resolve [!UNAVAIL=return] dns # fallback\n", + want: true, + }, + { + name: "hosts token must be the first field", + in: " hosts: resolve dns\n", + want: true, + }, + { + name: "other db line mentioning resolve is ignored", + in: "networks: resolve\nhosts: dns\n", + want: false, + }, + { + name: "only resolve, no dns", + in: "hosts: files resolve\n", + want: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := parseNsswitchResolveAhead([]byte(tt.in)); got != tt.want { + t.Errorf("parseNsswitchResolveAhead() = %v, want %v", got, tt.want) + } + }) + } +} diff --git a/client/internal/dns/mgmt/mgmt.go b/client/internal/dns/mgmt/mgmt.go index 314af51d9..988e427fb 100644 --- a/client/internal/dns/mgmt/mgmt.go +++ b/client/internal/dns/mgmt/mgmt.go @@ -2,40 +2,83 @@ package mgmt import ( "context" + "errors" "fmt" "net" - "net/netip" "net/url" + "os" + "slices" "strings" "sync" + "sync/atomic" "time" "github.com/miekg/dns" log "github.com/sirupsen/logrus" + "golang.org/x/sync/singleflight" dnsconfig "github.com/netbirdio/netbird/client/internal/dns/config" + "github.com/netbirdio/netbird/client/internal/dns/resutil" "github.com/netbirdio/netbird/shared/management/domain" ) -const dnsTimeout = 5 * time.Second +const ( + dnsTimeout = 5 * time.Second + defaultTTL = 300 * time.Second + refreshBackoff = 30 * time.Second -// Resolver caches critical NetBird infrastructure domains + // envMgmtCacheTTL overrides defaultTTL for integration/dev testing. + envMgmtCacheTTL = "NB_MGMT_CACHE_TTL" +) + +// ChainResolver lets the cache refresh stale entries through the DNS handler +// chain instead of net.DefaultResolver, avoiding loopback when NetBird is the +// system resolver. +type ChainResolver interface { + ResolveInternal(ctx context.Context, msg *dns.Msg, maxPriority int) (*dns.Msg, error) + HasRootHandlerAtOrBelow(maxPriority int) bool +} + +// cachedRecord holds DNS records plus timestamps used for TTL refresh. +// records and cachedAt are set at construction and treated as immutable; +// lastFailedRefresh and consecFailures are mutable and must be accessed under +// Resolver.mutex. +type cachedRecord struct { + records []dns.RR + cachedAt time.Time + lastFailedRefresh time.Time + consecFailures int +} + +// Resolver caches critical NetBird infrastructure domains. +// records, refreshing, mgmtDomain and serverDomains are all guarded by mutex. type Resolver struct { - records map[dns.Question][]dns.RR + records map[dns.Question]*cachedRecord mgmtDomain *domain.Domain serverDomains *dnsconfig.ServerDomains mutex sync.RWMutex -} -type ipsResponse struct { - ips []netip.Addr - err error + chain ChainResolver + chainMaxPriority int + refreshGroup singleflight.Group + + // refreshing tracks questions whose refresh is running via the OS + // fallback path. A ServeDNS hit for a question in this map indicates + // the OS resolver routed the recursive query back to us (loop). Only + // the OS path arms this so chain-path refreshes don't produce false + // positives. The atomic bool is CAS-flipped once per refresh to + // throttle the warning log. + refreshing map[dns.Question]*atomic.Bool + + cacheTTL time.Duration } // NewResolver creates a new management domains cache resolver. func NewResolver() *Resolver { return &Resolver{ - records: make(map[dns.Question][]dns.RR), + records: make(map[dns.Question]*cachedRecord), + refreshing: make(map[dns.Question]*atomic.Bool), + cacheTTL: resolveCacheTTL(), } } @@ -44,7 +87,19 @@ func (m *Resolver) String() string { return "MgmtCacheResolver" } -// ServeDNS implements dns.Handler interface. +// SetChainResolver wires the handler chain used to refresh stale cache entries. +// maxPriority caps which handlers may answer refresh queries (typically +// PriorityUpstream, so upstream/default/fallback handlers are consulted and +// mgmt/route/local handlers are skipped). +func (m *Resolver) SetChainResolver(chain ChainResolver, maxPriority int) { + m.mutex.Lock() + m.chain = chain + m.chainMaxPriority = maxPriority + m.mutex.Unlock() +} + +// ServeDNS serves cached A/AAAA records. Stale entries are returned +// immediately and refreshed asynchronously (stale-while-revalidate). func (m *Resolver) ServeDNS(w dns.ResponseWriter, r *dns.Msg) { if len(r.Question) == 0 { m.continueToNext(w, r) @@ -60,7 +115,14 @@ func (m *Resolver) ServeDNS(w dns.ResponseWriter, r *dns.Msg) { } m.mutex.RLock() - records, found := m.records[question] + cached, found := m.records[question] + inflight := m.refreshing[question] + var shouldRefresh bool + if found { + stale := time.Since(cached.cachedAt) > m.cacheTTL + inBackoff := !cached.lastFailedRefresh.IsZero() && time.Since(cached.lastFailedRefresh) < refreshBackoff + shouldRefresh = stale && !inBackoff + } m.mutex.RUnlock() if !found { @@ -68,12 +130,23 @@ func (m *Resolver) ServeDNS(w dns.ResponseWriter, r *dns.Msg) { return } + if inflight != nil && inflight.CompareAndSwap(false, true) { + log.Warnf("mgmt cache: possible resolver loop for domain=%s: served stale while an OS-fallback refresh was inflight (if NetBird is the system resolver, the OS-path predicate is wrong)", + question.Name) + } + + // Skip scheduling a refresh goroutine if one is already inflight for + // this question; singleflight would dedup anyway but skipping avoids + // a parked goroutine per stale hit under bursty load. + if shouldRefresh && inflight == nil { + m.scheduleRefresh(question, cached) + } + resp := &dns.Msg{} resp.SetReply(r) resp.Authoritative = false resp.RecursionAvailable = true - - resp.Answer = append(resp.Answer, records...) + resp.Answer = cloneRecordsWithTTL(cached.records, m.responseTTL(cached.cachedAt)) log.Debugf("serving %d cached records for domain=%s", len(resp.Answer), question.Name) @@ -98,101 +171,260 @@ func (m *Resolver) continueToNext(w dns.ResponseWriter, r *dns.Msg) { } } -// AddDomain manually adds a domain to cache by resolving it. +// AddDomain resolves a domain and stores its A/AAAA records in the cache. +// A family that resolves NODATA (nil err, zero records) evicts any stale +// entry for that qtype. func (m *Resolver) AddDomain(ctx context.Context, d domain.Domain) error { dnsName := strings.ToLower(dns.Fqdn(d.PunycodeString())) ctx, cancel := context.WithTimeout(ctx, dnsTimeout) defer cancel() - ips, err := lookupIPWithExtraTimeout(ctx, d) - if err != nil { - return err + aRecords, aaaaRecords, errA, errAAAA := m.lookupBoth(ctx, d, dnsName) + + if errA != nil && errAAAA != nil { + return fmt.Errorf("resolve %s: %w", d.SafeString(), errors.Join(errA, errAAAA)) } - var aRecords, aaaaRecords []dns.RR - for _, ip := range ips { - if ip.Is4() { - rr := &dns.A{ - Hdr: dns.RR_Header{ - Name: dnsName, - Rrtype: dns.TypeA, - Class: dns.ClassINET, - Ttl: 300, - }, - A: ip.AsSlice(), - } - aRecords = append(aRecords, rr) - } else if ip.Is6() { - rr := &dns.AAAA{ - Hdr: dns.RR_Header{ - Name: dnsName, - Rrtype: dns.TypeAAAA, - Class: dns.ClassINET, - Ttl: 300, - }, - AAAA: ip.AsSlice(), - } - aaaaRecords = append(aaaaRecords, rr) + if len(aRecords) == 0 && len(aaaaRecords) == 0 { + if err := errors.Join(errA, errAAAA); err != nil { + return fmt.Errorf("resolve %s: no A/AAAA records: %w", d.SafeString(), err) } + return fmt.Errorf("resolve %s: no A/AAAA records", d.SafeString()) } + now := time.Now() m.mutex.Lock() + defer m.mutex.Unlock() - if len(aRecords) > 0 { - aQuestion := dns.Question{ - Name: dnsName, - Qtype: dns.TypeA, - Qclass: dns.ClassINET, - } - m.records[aQuestion] = aRecords - } + m.applyFamilyRecords(dnsName, dns.TypeA, aRecords, errA, now) + m.applyFamilyRecords(dnsName, dns.TypeAAAA, aaaaRecords, errAAAA, now) - if len(aaaaRecords) > 0 { - aaaaQuestion := dns.Question{ - Name: dnsName, - Qtype: dns.TypeAAAA, - Qclass: dns.ClassINET, - } - m.records[aaaaQuestion] = aaaaRecords - } - - m.mutex.Unlock() - - log.Debugf("added domain=%s with %d A records and %d AAAA records", + log.Debugf("added/updated domain=%s with %d A records and %d AAAA records", d.SafeString(), len(aRecords), len(aaaaRecords)) return nil } -func lookupIPWithExtraTimeout(ctx context.Context, d domain.Domain) ([]netip.Addr, error) { - log.Infof("looking up IP for mgmt domain=%s", d.SafeString()) - defer log.Infof("done looking up IP for mgmt domain=%s", d.SafeString()) - resultChan := make(chan *ipsResponse, 1) +// applyFamilyRecords writes records, evicts on NODATA, leaves the cache +// untouched on error. Caller holds m.mutex. +func (m *Resolver) applyFamilyRecords(dnsName string, qtype uint16, records []dns.RR, err error, now time.Time) { + q := dns.Question{Name: dnsName, Qtype: qtype, Qclass: dns.ClassINET} + switch { + case len(records) > 0: + m.records[q] = &cachedRecord{records: records, cachedAt: now} + case err == nil: + delete(m.records, q) + } +} - go func() { - ips, err := net.DefaultResolver.LookupNetIP(ctx, "ip", d.PunycodeString()) - resultChan <- &ipsResponse{ - err: err, - ips: ips, +// scheduleRefresh kicks off an async refresh. DoChan spawns one goroutine per +// unique in-flight key; bursty stale hits share its channel. expected is the +// cachedRecord pointer observed by the caller; the refresh only mutates the +// cache if that pointer is still the one stored, so a stale in-flight refresh +// can't clobber a newer entry written by AddDomain or a competing refresh. +func (m *Resolver) scheduleRefresh(question dns.Question, expected *cachedRecord) { + key := question.Name + "|" + dns.TypeToString[question.Qtype] + _ = m.refreshGroup.DoChan(key, func() (any, error) { + return nil, m.refreshQuestion(question, expected) + }) +} + +// refreshQuestion replaces the cached records on success, or marks the entry +// failed (arming the backoff) on failure. While this runs, ServeDNS can detect +// a resolver loop by spotting a query for this same question arriving on us. +// expected pins the cache entry observed at schedule time; mutations only apply +// if m.records[question] still points at it. +func (m *Resolver) refreshQuestion(question dns.Question, expected *cachedRecord) error { + ctx, cancel := context.WithTimeout(context.Background(), dnsTimeout) + defer cancel() + + d, err := domain.FromString(strings.TrimSuffix(question.Name, ".")) + if err != nil { + m.markRefreshFailed(question, expected) + return fmt.Errorf("parse domain: %w", err) + } + + records, err := m.lookupRecords(ctx, d, question) + if err != nil { + fails := m.markRefreshFailed(question, expected) + logf := log.Warnf + if fails == 0 || fails > 1 { + logf = log.Debugf } - }() - - var resp *ipsResponse - - select { - case <-time.After(dnsTimeout + time.Millisecond*500): - log.Warnf("timed out waiting for IP for mgmt domain=%s", d.SafeString()) - return nil, fmt.Errorf("timed out waiting for ips to be available for domain %s", d.SafeString()) - case <-ctx.Done(): - return nil, ctx.Err() - case resp = <-resultChan: + logf("refresh mgmt cache domain=%s type=%s: %v (consecutive failures=%d)", + d.SafeString(), dns.TypeToString[question.Qtype], err, fails) + return err } - if resp.err != nil { - return nil, fmt.Errorf("resolve domain %s: %w", d.SafeString(), resp.err) + // NOERROR/NODATA: family gone upstream, evict so we stop serving stale. + if len(records) == 0 { + m.mutex.Lock() + if m.records[question] == expected { + delete(m.records, question) + m.mutex.Unlock() + log.Infof("removed mgmt cache domain=%s type=%s: no records returned", + d.SafeString(), dns.TypeToString[question.Qtype]) + return nil + } + m.mutex.Unlock() + log.Debugf("skipping refresh evict for domain=%s type=%s: entry changed during refresh", + d.SafeString(), dns.TypeToString[question.Qtype]) + return nil } - return resp.ips, nil + + now := time.Now() + m.mutex.Lock() + if m.records[question] != expected { + m.mutex.Unlock() + log.Debugf("skipping refresh write for domain=%s type=%s: entry changed during refresh", + d.SafeString(), dns.TypeToString[question.Qtype]) + return nil + } + m.records[question] = &cachedRecord{records: records, cachedAt: now} + m.mutex.Unlock() + + log.Infof("refreshed mgmt cache domain=%s type=%s", + d.SafeString(), dns.TypeToString[question.Qtype]) + return nil +} + +func (m *Resolver) markRefreshing(question dns.Question) { + m.mutex.Lock() + m.refreshing[question] = &atomic.Bool{} + m.mutex.Unlock() +} + +func (m *Resolver) clearRefreshing(question dns.Question) { + m.mutex.Lock() + delete(m.refreshing, question) + m.mutex.Unlock() +} + +// markRefreshFailed arms the backoff and returns the new consecutive-failure +// count so callers can downgrade subsequent failure logs to debug. +func (m *Resolver) markRefreshFailed(question dns.Question, expected *cachedRecord) int { + m.mutex.Lock() + defer m.mutex.Unlock() + c, ok := m.records[question] + if !ok || c != expected { + return 0 + } + c.lastFailedRefresh = time.Now() + c.consecFailures++ + return c.consecFailures +} + +// lookupBoth resolves A and AAAA via chain or OS. Per-family errors let +// callers tell records, NODATA (nil err, no records), and failure apart. +func (m *Resolver) lookupBoth(ctx context.Context, d domain.Domain, dnsName string) (aRecords, aaaaRecords []dns.RR, errA, errAAAA error) { + m.mutex.RLock() + chain := m.chain + maxPriority := m.chainMaxPriority + m.mutex.RUnlock() + + if chain != nil && chain.HasRootHandlerAtOrBelow(maxPriority) { + aRecords, errA = m.lookupViaChain(ctx, chain, maxPriority, dnsName, dns.TypeA) + aaaaRecords, errAAAA = m.lookupViaChain(ctx, chain, maxPriority, dnsName, dns.TypeAAAA) + return + } + + // TODO: drop once every supported OS registers a fallback resolver. Safe + // today: no root handler at priority ≤ PriorityUpstream means NetBird is + // not the system resolver, so net.DefaultResolver will not loop back. + aRecords, errA = m.osLookup(ctx, d, dnsName, dns.TypeA) + aaaaRecords, errAAAA = m.osLookup(ctx, d, dnsName, dns.TypeAAAA) + return +} + +// lookupRecords resolves a single record type via chain or OS. The OS branch +// arms the loop detector for the duration of its call so that ServeDNS can +// spot the OS resolver routing the recursive query back to us. +func (m *Resolver) lookupRecords(ctx context.Context, d domain.Domain, q dns.Question) ([]dns.RR, error) { + m.mutex.RLock() + chain := m.chain + maxPriority := m.chainMaxPriority + m.mutex.RUnlock() + + if chain != nil && chain.HasRootHandlerAtOrBelow(maxPriority) { + return m.lookupViaChain(ctx, chain, maxPriority, q.Name, q.Qtype) + } + + // TODO: drop once every supported OS registers a fallback resolver. + m.markRefreshing(q) + defer m.clearRefreshing(q) + + return m.osLookup(ctx, d, q.Name, q.Qtype) +} + +// lookupViaChain resolves via the handler chain and rewrites each RR to use +// dnsName as owner and m.cacheTTL as TTL, so CNAME-backed domains don't cache +// target-owned records or upstream TTLs. NODATA returns (nil, nil). +func (m *Resolver) lookupViaChain(ctx context.Context, chain ChainResolver, maxPriority int, dnsName string, qtype uint16) ([]dns.RR, error) { + msg := &dns.Msg{} + msg.SetQuestion(dnsName, qtype) + msg.RecursionDesired = true + + resp, err := chain.ResolveInternal(ctx, msg, maxPriority) + if err != nil { + return nil, fmt.Errorf("chain resolve: %w", err) + } + if resp == nil { + return nil, fmt.Errorf("chain resolve returned nil response") + } + if resp.Rcode != dns.RcodeSuccess { + return nil, fmt.Errorf("chain resolve rcode=%s", dns.RcodeToString[resp.Rcode]) + } + + ttl := uint32(m.cacheTTL.Seconds()) + owners := cnameOwners(dnsName, resp.Answer) + var filtered []dns.RR + for _, rr := range resp.Answer { + h := rr.Header() + if h.Class != dns.ClassINET || h.Rrtype != qtype { + continue + } + if !owners[strings.ToLower(dns.Fqdn(h.Name))] { + continue + } + if cp := cloneIPRecord(rr, dnsName, ttl); cp != nil { + filtered = append(filtered, cp) + } + } + return filtered, nil +} + +// osLookup resolves a single family via net.DefaultResolver using resutil, +// which disambiguates NODATA from NXDOMAIN and Unmaps v4-mapped-v6. NODATA +// returns (nil, nil). +func (m *Resolver) osLookup(ctx context.Context, d domain.Domain, dnsName string, qtype uint16) ([]dns.RR, error) { + network := resutil.NetworkForQtype(qtype) + if network == "" { + return nil, fmt.Errorf("unsupported qtype %s", dns.TypeToString[qtype]) + } + + log.Infof("looking up IP for mgmt domain=%s type=%s", d.SafeString(), dns.TypeToString[qtype]) + defer log.Infof("done looking up IP for mgmt domain=%s type=%s", d.SafeString(), dns.TypeToString[qtype]) + + result := resutil.LookupIP(ctx, net.DefaultResolver, network, d.PunycodeString(), qtype) + if result.Rcode == dns.RcodeSuccess { + return resutil.IPsToRRs(dnsName, result.IPs, uint32(m.cacheTTL.Seconds())), nil + } + + if result.Err != nil { + return nil, fmt.Errorf("resolve %s type=%s: %w", d.SafeString(), dns.TypeToString[qtype], result.Err) + } + return nil, fmt.Errorf("resolve %s type=%s: rcode=%s", d.SafeString(), dns.TypeToString[qtype], dns.RcodeToString[result.Rcode]) +} + +// responseTTL returns the remaining cache lifetime in seconds (rounded up), +// so downstream resolvers don't cache an answer for longer than we will. +func (m *Resolver) responseTTL(cachedAt time.Time) uint32 { + remaining := m.cacheTTL - time.Since(cachedAt) + if remaining <= 0 { + return 0 + } + return uint32((remaining + time.Second - 1) / time.Second) } // PopulateFromConfig extracts and caches domains from the client configuration. @@ -224,19 +456,12 @@ func (m *Resolver) RemoveDomain(d domain.Domain) error { m.mutex.Lock() defer m.mutex.Unlock() - aQuestion := dns.Question{ - Name: dnsName, - Qtype: dns.TypeA, - Qclass: dns.ClassINET, - } - delete(m.records, aQuestion) - - aaaaQuestion := dns.Question{ - Name: dnsName, - Qtype: dns.TypeAAAA, - Qclass: dns.ClassINET, - } - delete(m.records, aaaaQuestion) + qA := dns.Question{Name: dnsName, Qtype: dns.TypeA, Qclass: dns.ClassINET} + qAAAA := dns.Question{Name: dnsName, Qtype: dns.TypeAAAA, Qclass: dns.ClassINET} + delete(m.records, qA) + delete(m.records, qAAAA) + delete(m.refreshing, qA) + delete(m.refreshing, qAAAA) log.Debugf("removed domain=%s from cache", d.SafeString()) return nil @@ -394,3 +619,73 @@ func (m *Resolver) extractDomainsFromServerDomains(serverDomains dnsconfig.Serve return domains } + +// cloneIPRecord returns a deep copy of rr retargeted to owner with ttl. Non +// A/AAAA records return nil. +func cloneIPRecord(rr dns.RR, owner string, ttl uint32) dns.RR { + switch r := rr.(type) { + case *dns.A: + cp := *r + cp.Hdr.Name = owner + cp.Hdr.Ttl = ttl + cp.A = slices.Clone(r.A) + return &cp + case *dns.AAAA: + cp := *r + cp.Hdr.Name = owner + cp.Hdr.Ttl = ttl + cp.AAAA = slices.Clone(r.AAAA) + return &cp + } + return nil +} + +// cloneRecordsWithTTL clones A/AAAA records preserving their owner and +// stamping ttl so the response shares no memory with the cached slice. +func cloneRecordsWithTTL(records []dns.RR, ttl uint32) []dns.RR { + out := make([]dns.RR, 0, len(records)) + for _, rr := range records { + if cp := cloneIPRecord(rr, rr.Header().Name, ttl); cp != nil { + out = append(out, cp) + } + } + return out +} + +// cnameOwners returns dnsName plus every target reachable by following CNAMEs +// in answer, iterating until fixed point so out-of-order chains resolve. +func cnameOwners(dnsName string, answer []dns.RR) map[string]bool { + owners := map[string]bool{dnsName: true} + for { + added := false + for _, rr := range answer { + cname, ok := rr.(*dns.CNAME) + if !ok { + continue + } + name := strings.ToLower(dns.Fqdn(cname.Hdr.Name)) + if !owners[name] { + continue + } + target := strings.ToLower(dns.Fqdn(cname.Target)) + if !owners[target] { + owners[target] = true + added = true + } + } + if !added { + return owners + } + } +} + +// resolveCacheTTL reads the cache TTL override env var; invalid or empty +// values fall back to defaultTTL. Called once per Resolver from NewResolver. +func resolveCacheTTL() time.Duration { + if v := os.Getenv(envMgmtCacheTTL); v != "" { + if d, err := time.ParseDuration(v); err == nil && d > 0 { + return d + } + } + return defaultTTL +} diff --git a/client/internal/dns/mgmt/mgmt_refresh_test.go b/client/internal/dns/mgmt/mgmt_refresh_test.go new file mode 100644 index 000000000..9faa5a0b8 --- /dev/null +++ b/client/internal/dns/mgmt/mgmt_refresh_test.go @@ -0,0 +1,408 @@ +package mgmt + +import ( + "context" + "errors" + "net" + "sync" + "sync/atomic" + "testing" + "time" + + "github.com/miekg/dns" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/netbirdio/netbird/client/internal/dns/test" + "github.com/netbirdio/netbird/shared/management/domain" +) + +type fakeChain struct { + mu sync.Mutex + calls map[string]int + answers map[string][]dns.RR + err error + hasRoot bool + onLookup func() +} + +func newFakeChain() *fakeChain { + return &fakeChain{ + calls: map[string]int{}, + answers: map[string][]dns.RR{}, + hasRoot: true, + } +} + +func (f *fakeChain) HasRootHandlerAtOrBelow(maxPriority int) bool { + f.mu.Lock() + defer f.mu.Unlock() + return f.hasRoot +} + +func (f *fakeChain) ResolveInternal(ctx context.Context, msg *dns.Msg, maxPriority int) (*dns.Msg, error) { + f.mu.Lock() + q := msg.Question[0] + key := q.Name + "|" + dns.TypeToString[q.Qtype] + f.calls[key]++ + answers := f.answers[key] + err := f.err + onLookup := f.onLookup + f.mu.Unlock() + + if onLookup != nil { + onLookup() + } + if err != nil { + return nil, err + } + resp := &dns.Msg{} + resp.SetReply(msg) + resp.Answer = answers + return resp, nil +} + +func (f *fakeChain) setAnswer(name string, qtype uint16, ip string) { + f.mu.Lock() + defer f.mu.Unlock() + key := name + "|" + dns.TypeToString[qtype] + hdr := dns.RR_Header{Name: name, Rrtype: qtype, Class: dns.ClassINET, Ttl: 60} + switch qtype { + case dns.TypeA: + f.answers[key] = []dns.RR{&dns.A{Hdr: hdr, A: net.ParseIP(ip).To4()}} + case dns.TypeAAAA: + f.answers[key] = []dns.RR{&dns.AAAA{Hdr: hdr, AAAA: net.ParseIP(ip).To16()}} + } +} + +func (f *fakeChain) callCount(name string, qtype uint16) int { + f.mu.Lock() + defer f.mu.Unlock() + return f.calls[name+"|"+dns.TypeToString[qtype]] +} + +// waitFor polls the predicate until it returns true or the deadline passes. +func waitFor(t *testing.T, d time.Duration, fn func() bool) { + t.Helper() + deadline := time.Now().Add(d) + for time.Now().Before(deadline) { + if fn() { + return + } + time.Sleep(5 * time.Millisecond) + } + t.Fatalf("condition not met within %s", d) +} + +func queryA(t *testing.T, r *Resolver, name string) *dns.Msg { + t.Helper() + msg := new(dns.Msg) + msg.SetQuestion(name, dns.TypeA) + w := &test.MockResponseWriter{} + r.ServeDNS(w, msg) + return w.GetLastResponse() +} + +func firstA(t *testing.T, resp *dns.Msg) string { + t.Helper() + require.NotNil(t, resp) + require.Greater(t, len(resp.Answer), 0, "expected at least one answer") + a, ok := resp.Answer[0].(*dns.A) + require.True(t, ok, "expected A record") + return a.A.String() +} + +func TestResolver_CacheTTLGatesRefresh(t *testing.T) { + // Same cached entry age, different cacheTTL values: the shorter TTL must + // trigger a background refresh, the longer one must not. Proves that the + // per-Resolver cacheTTL field actually drives the stale decision. + cachedAt := time.Now().Add(-100 * time.Millisecond) + + newRec := func() *cachedRecord { + return &cachedRecord{ + records: []dns.RR{&dns.A{ + Hdr: dns.RR_Header{Name: "mgmt.example.com.", Rrtype: dns.TypeA, Class: dns.ClassINET, Ttl: 60}, + A: net.ParseIP("10.0.0.1").To4(), + }}, + cachedAt: cachedAt, + } + } + q := dns.Question{Name: "mgmt.example.com.", Qtype: dns.TypeA, Qclass: dns.ClassINET} + + t.Run("short TTL treats entry as stale and refreshes", func(t *testing.T) { + r := NewResolver() + r.cacheTTL = 10 * time.Millisecond + chain := newFakeChain() + chain.setAnswer(q.Name, dns.TypeA, "10.0.0.2") + r.SetChainResolver(chain, 50) + r.records[q] = newRec() + + resp := queryA(t, r, q.Name) + assert.Equal(t, "10.0.0.1", firstA(t, resp), "stale entry must be served while refresh runs") + + waitFor(t, time.Second, func() bool { + return chain.callCount(q.Name, dns.TypeA) >= 1 + }) + }) + + t.Run("long TTL keeps entry fresh and skips refresh", func(t *testing.T) { + r := NewResolver() + r.cacheTTL = time.Hour + chain := newFakeChain() + chain.setAnswer(q.Name, dns.TypeA, "10.0.0.2") + r.SetChainResolver(chain, 50) + r.records[q] = newRec() + + resp := queryA(t, r, q.Name) + assert.Equal(t, "10.0.0.1", firstA(t, resp)) + + time.Sleep(50 * time.Millisecond) + assert.Equal(t, 0, chain.callCount(q.Name, dns.TypeA), "fresh entry must not trigger refresh") + }) +} + +func TestResolver_ServeFresh_NoRefresh(t *testing.T) { + r := NewResolver() + chain := newFakeChain() + chain.setAnswer("mgmt.example.com.", dns.TypeA, "10.0.0.2") + r.SetChainResolver(chain, 50) + + r.records[dns.Question{Name: "mgmt.example.com.", Qtype: dns.TypeA, Qclass: dns.ClassINET}] = &cachedRecord{ + records: []dns.RR{&dns.A{ + Hdr: dns.RR_Header{Name: "mgmt.example.com.", Rrtype: dns.TypeA, Class: dns.ClassINET, Ttl: 60}, + A: net.ParseIP("10.0.0.1").To4(), + }}, + cachedAt: time.Now(), // fresh + } + + resp := queryA(t, r, "mgmt.example.com.") + assert.Equal(t, "10.0.0.1", firstA(t, resp)) + + time.Sleep(20 * time.Millisecond) + assert.Equal(t, 0, chain.callCount("mgmt.example.com.", dns.TypeA), "fresh entry must not trigger refresh") +} + +func TestResolver_StaleTriggersAsyncRefresh(t *testing.T) { + r := NewResolver() + chain := newFakeChain() + chain.setAnswer("mgmt.example.com.", dns.TypeA, "10.0.0.2") + r.SetChainResolver(chain, 50) + + q := dns.Question{Name: "mgmt.example.com.", Qtype: dns.TypeA, Qclass: dns.ClassINET} + r.records[q] = &cachedRecord{ + records: []dns.RR{&dns.A{ + Hdr: dns.RR_Header{Name: q.Name, Rrtype: dns.TypeA, Class: dns.ClassINET, Ttl: 60}, + A: net.ParseIP("10.0.0.1").To4(), + }}, + cachedAt: time.Now().Add(-2 * defaultTTL), // stale + } + + // First query: serves stale immediately. + resp := queryA(t, r, "mgmt.example.com.") + assert.Equal(t, "10.0.0.1", firstA(t, resp), "stale entry must be served while refresh runs") + + waitFor(t, time.Second, func() bool { + return chain.callCount("mgmt.example.com.", dns.TypeA) >= 1 + }) + + // Next query should now return the refreshed IP. + waitFor(t, time.Second, func() bool { + resp := queryA(t, r, "mgmt.example.com.") + return resp != nil && len(resp.Answer) > 0 && firstA(t, resp) == "10.0.0.2" + }) +} + +func TestResolver_ConcurrentStaleHitsCollapseRefresh(t *testing.T) { + r := NewResolver() + chain := newFakeChain() + chain.setAnswer("mgmt.example.com.", dns.TypeA, "10.0.0.2") + + var inflight atomic.Int32 + var maxInflight atomic.Int32 + chain.onLookup = func() { + cur := inflight.Add(1) + defer inflight.Add(-1) + for { + prev := maxInflight.Load() + if cur <= prev || maxInflight.CompareAndSwap(prev, cur) { + break + } + } + time.Sleep(50 * time.Millisecond) // hold inflight long enough to collide + } + + r.SetChainResolver(chain, 50) + + q := dns.Question{Name: "mgmt.example.com.", Qtype: dns.TypeA, Qclass: dns.ClassINET} + r.records[q] = &cachedRecord{ + records: []dns.RR{&dns.A{ + Hdr: dns.RR_Header{Name: q.Name, Rrtype: dns.TypeA, Class: dns.ClassINET, Ttl: 60}, + A: net.ParseIP("10.0.0.1").To4(), + }}, + cachedAt: time.Now().Add(-2 * defaultTTL), + } + + var wg sync.WaitGroup + for i := 0; i < 50; i++ { + wg.Add(1) + go func() { + defer wg.Done() + queryA(t, r, "mgmt.example.com.") + }() + } + wg.Wait() + + waitFor(t, 2*time.Second, func() bool { + return inflight.Load() == 0 + }) + + calls := chain.callCount("mgmt.example.com.", dns.TypeA) + assert.LessOrEqual(t, calls, 2, "singleflight must collapse concurrent refreshes (got %d)", calls) + assert.Equal(t, int32(1), maxInflight.Load(), "only one refresh should run concurrently") +} + +func TestResolver_RefreshFailureArmsBackoff(t *testing.T) { + r := NewResolver() + chain := newFakeChain() + chain.err = errors.New("boom") + r.SetChainResolver(chain, 50) + + q := dns.Question{Name: "mgmt.example.com.", Qtype: dns.TypeA, Qclass: dns.ClassINET} + r.records[q] = &cachedRecord{ + records: []dns.RR{&dns.A{ + Hdr: dns.RR_Header{Name: q.Name, Rrtype: dns.TypeA, Class: dns.ClassINET, Ttl: 60}, + A: net.ParseIP("10.0.0.1").To4(), + }}, + cachedAt: time.Now().Add(-2 * defaultTTL), + } + + // First stale hit triggers a refresh attempt that fails. + resp := queryA(t, r, "mgmt.example.com.") + assert.Equal(t, "10.0.0.1", firstA(t, resp), "stale entry served while refresh fails") + + waitFor(t, time.Second, func() bool { + return chain.callCount("mgmt.example.com.", dns.TypeA) == 1 + }) + waitFor(t, time.Second, func() bool { + r.mutex.RLock() + defer r.mutex.RUnlock() + c, ok := r.records[q] + return ok && !c.lastFailedRefresh.IsZero() + }) + + // Subsequent stale hits within backoff window should not schedule more refreshes. + for i := 0; i < 10; i++ { + queryA(t, r, "mgmt.example.com.") + } + time.Sleep(50 * time.Millisecond) + assert.Equal(t, 1, chain.callCount("mgmt.example.com.", dns.TypeA), "backoff must suppress further refreshes") +} + +func TestResolver_NoRootHandler_SkipsChain(t *testing.T) { + r := NewResolver() + chain := newFakeChain() + chain.hasRoot = false + chain.setAnswer("mgmt.example.com.", dns.TypeA, "10.0.0.2") + r.SetChainResolver(chain, 50) + + // With hasRoot=false the chain must not be consulted. Use a short + // deadline so the OS fallback returns quickly without waiting on a + // real network call in CI. + ctx, cancel := context.WithTimeout(context.Background(), 50*time.Millisecond) + defer cancel() + _, _, _, _ = r.lookupBoth(ctx, domain.Domain("mgmt.example.com"), "mgmt.example.com.") + + assert.Equal(t, 0, chain.callCount("mgmt.example.com.", dns.TypeA), + "chain must not be used when no root handler is registered at the bound priority") +} + +func TestResolver_ServeDuringRefreshSetsLoopFlag(t *testing.T) { + // ServeDNS being invoked for a question while a refresh for that question + // is inflight indicates a resolver loop (OS resolver sent the recursive + // query back to us). The inflightRefresh.loopLoggedOnce flag must be set. + r := NewResolver() + + q := dns.Question{Name: "mgmt.example.com.", Qtype: dns.TypeA, Qclass: dns.ClassINET} + r.records[q] = &cachedRecord{ + records: []dns.RR{&dns.A{ + Hdr: dns.RR_Header{Name: q.Name, Rrtype: dns.TypeA, Class: dns.ClassINET, Ttl: 60}, + A: net.ParseIP("10.0.0.1").To4(), + }}, + cachedAt: time.Now(), + } + + // Simulate an inflight refresh. + r.markRefreshing(q) + defer r.clearRefreshing(q) + + resp := queryA(t, r, "mgmt.example.com.") + assert.Equal(t, "10.0.0.1", firstA(t, resp), "stale entry must still be served to avoid breaking external queries") + + r.mutex.RLock() + inflight := r.refreshing[q] + r.mutex.RUnlock() + require.NotNil(t, inflight) + assert.True(t, inflight.Load(), "loop flag must be set once a ServeDNS during refresh was observed") +} + +func TestResolver_LoopFlagOnlyTrippedOncePerRefresh(t *testing.T) { + r := NewResolver() + + q := dns.Question{Name: "mgmt.example.com.", Qtype: dns.TypeA, Qclass: dns.ClassINET} + r.records[q] = &cachedRecord{ + records: []dns.RR{&dns.A{ + Hdr: dns.RR_Header{Name: q.Name, Rrtype: dns.TypeA, Class: dns.ClassINET, Ttl: 60}, + A: net.ParseIP("10.0.0.1").To4(), + }}, + cachedAt: time.Now(), + } + + r.markRefreshing(q) + defer r.clearRefreshing(q) + + // Multiple ServeDNS calls during the same refresh must not re-set the flag + // (CompareAndSwap from false -> true returns true only on the first call). + for range 5 { + queryA(t, r, "mgmt.example.com.") + } + + r.mutex.RLock() + inflight := r.refreshing[q] + r.mutex.RUnlock() + assert.True(t, inflight.Load()) +} + +func TestResolver_NoLoopFlagWhenNotRefreshing(t *testing.T) { + r := NewResolver() + + q := dns.Question{Name: "mgmt.example.com.", Qtype: dns.TypeA, Qclass: dns.ClassINET} + r.records[q] = &cachedRecord{ + records: []dns.RR{&dns.A{ + Hdr: dns.RR_Header{Name: q.Name, Rrtype: dns.TypeA, Class: dns.ClassINET, Ttl: 60}, + A: net.ParseIP("10.0.0.1").To4(), + }}, + cachedAt: time.Now(), + } + + queryA(t, r, "mgmt.example.com.") + + r.mutex.RLock() + _, ok := r.refreshing[q] + r.mutex.RUnlock() + assert.False(t, ok, "no refresh inflight means no loop tracking") +} + +func TestResolver_AddDomain_UsesChainWhenRootRegistered(t *testing.T) { + r := NewResolver() + chain := newFakeChain() + chain.setAnswer("mgmt.example.com.", dns.TypeA, "10.0.0.2") + chain.setAnswer("mgmt.example.com.", dns.TypeAAAA, "fd00::2") + r.SetChainResolver(chain, 50) + + require.NoError(t, r.AddDomain(context.Background(), domain.Domain("mgmt.example.com"))) + + resp := queryA(t, r, "mgmt.example.com.") + assert.Equal(t, "10.0.0.2", firstA(t, resp)) + assert.Equal(t, 1, chain.callCount("mgmt.example.com.", dns.TypeA)) + assert.Equal(t, 1, chain.callCount("mgmt.example.com.", dns.TypeAAAA)) +} diff --git a/client/internal/dns/mgmt/mgmt_test.go b/client/internal/dns/mgmt/mgmt_test.go index 9e8a746f3..276cbba0a 100644 --- a/client/internal/dns/mgmt/mgmt_test.go +++ b/client/internal/dns/mgmt/mgmt_test.go @@ -6,6 +6,7 @@ import ( "net/url" "strings" "testing" + "time" "github.com/miekg/dns" "github.com/stretchr/testify/assert" @@ -23,6 +24,60 @@ func TestResolver_NewResolver(t *testing.T) { assert.False(t, resolver.MatchSubdomains()) } +func TestResolveCacheTTL(t *testing.T) { + tests := []struct { + name string + value string + want time.Duration + }{ + {"unset falls back to default", "", defaultTTL}, + {"valid duration", "45s", 45 * time.Second}, + {"valid minutes", "2m", 2 * time.Minute}, + {"malformed falls back to default", "not-a-duration", defaultTTL}, + {"zero falls back to default", "0s", defaultTTL}, + {"negative falls back to default", "-5s", defaultTTL}, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + t.Setenv(envMgmtCacheTTL, tc.value) + got := resolveCacheTTL() + assert.Equal(t, tc.want, got, "parsed TTL should match") + }) + } +} + +func TestNewResolver_CacheTTLFromEnv(t *testing.T) { + t.Setenv(envMgmtCacheTTL, "7s") + r := NewResolver() + assert.Equal(t, 7*time.Second, r.cacheTTL, "NewResolver should evaluate cacheTTL once from env") +} + +func TestResolver_ResponseTTL(t *testing.T) { + now := time.Now() + tests := []struct { + name string + cacheTTL time.Duration + cachedAt time.Time + wantMin uint32 + wantMax uint32 + }{ + {"fresh entry returns full TTL", 60 * time.Second, now, 59, 60}, + {"half-aged entry returns half TTL", 60 * time.Second, now.Add(-30 * time.Second), 29, 31}, + {"expired entry returns zero", 60 * time.Second, now.Add(-61 * time.Second), 0, 0}, + {"exactly expired returns zero", 10 * time.Second, now.Add(-10 * time.Second), 0, 0}, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + r := &Resolver{cacheTTL: tc.cacheTTL} + got := r.responseTTL(tc.cachedAt) + assert.GreaterOrEqual(t, got, tc.wantMin, "remaining TTL should be >= wantMin") + assert.LessOrEqual(t, got, tc.wantMax, "remaining TTL should be <= wantMax") + }) + } +} + func TestResolver_ExtractDomainFromURL(t *testing.T) { tests := []struct { name string diff --git a/client/internal/dns/server.go b/client/internal/dns/server.go index f7865047b..d4f54dec5 100644 --- a/client/internal/dns/server.go +++ b/client/internal/dns/server.go @@ -212,6 +212,7 @@ func newDefaultServer( ctx, stop := context.WithCancel(ctx) mgmtCacheResolver := mgmt.NewResolver() + mgmtCacheResolver.SetChainResolver(handlerChain, PriorityUpstream) defaultServer := &DefaultServer{ ctx: ctx, diff --git a/client/internal/engine.go b/client/internal/engine.go index be2d8bbf3..8d7e02bd5 100644 --- a/client/internal/engine.go +++ b/client/internal/engine.go @@ -26,6 +26,7 @@ import ( nberrors "github.com/netbirdio/netbird/client/errors" "github.com/netbirdio/netbird/client/firewall" + "github.com/netbirdio/netbird/client/firewall/firewalld" firewallManager "github.com/netbirdio/netbird/client/firewall/manager" "github.com/netbirdio/netbird/client/iface" "github.com/netbirdio/netbird/client/iface/device" @@ -140,6 +141,7 @@ type EngineConfig struct { ProfileConfig *profilemanager.Config LogPath string + TempDir string } // EngineServices holds the external service dependencies required by the Engine. @@ -569,7 +571,7 @@ func (e *Engine) Start(netbirdConfig *mgmProto.NetbirdConfig, mgmtURL *url.URL) e.connMgr.Start(e.ctx) e.srWatcher = guard.NewSRWatcher(e.signal, e.relayManager, e.mobileDep.IFaceDiscover, iceCfg) - e.srWatcher.Start() + e.srWatcher.Start(peer.IsForceRelayed()) e.receiveSignalEvents() e.receiveManagementEvents() @@ -603,6 +605,8 @@ func (e *Engine) createFirewall() error { return nil } + firewalld.SetParentContext(e.ctx) + var err error e.firewall, err = firewall.NewFirewall(e.wgInterface, e.stateManager, e.flowManager.GetLogger(), e.config.DisableServerRoutes, e.config.MTU) if err != nil { @@ -1095,6 +1099,7 @@ func (e *Engine) handleBundle(params *mgmProto.BundleParameters) (*mgmProto.JobR StatusRecorder: e.statusRecorder, SyncResponse: syncResponse, LogPath: e.config.LogPath, + TempDir: e.config.TempDir, ClientMetrics: e.clientMetrics, RefreshStatus: func() { e.RunHealthProbes(true) diff --git a/client/internal/mobile_dependency.go b/client/internal/mobile_dependency.go index 7c95e2b99..310d61a25 100644 --- a/client/internal/mobile_dependency.go +++ b/client/internal/mobile_dependency.go @@ -22,4 +22,8 @@ type MobileDependency struct { DnsManager dns.IosDnsManager FileDescriptor int32 StateFilePath string + + // TempDir is a writable directory for temporary files (e.g., debug bundle zip). + // On Android, this should be set to the app's cache directory. + TempDir string } diff --git a/client/internal/peer/conn.go b/client/internal/peer/conn.go index 8d1585b3f..1e416bfe7 100644 --- a/client/internal/peer/conn.go +++ b/client/internal/peer/conn.go @@ -185,17 +185,20 @@ func (conn *Conn) Open(engineCtx context.Context) error { conn.workerRelay = NewWorkerRelay(conn.ctx, conn.Log, isController(conn.config), conn.config, conn, conn.relayManager) - relayIsSupportedLocally := conn.workerRelay.RelayIsSupportedLocally() - workerICE, err := NewWorkerICE(conn.ctx, conn.Log, conn.config, conn, conn.signaler, conn.iFaceDiscover, conn.statusRecorder, relayIsSupportedLocally) - if err != nil { - return err + forceRelay := IsForceRelayed() + if !forceRelay { + relayIsSupportedLocally := conn.workerRelay.RelayIsSupportedLocally() + workerICE, err := NewWorkerICE(conn.ctx, conn.Log, conn.config, conn, conn.signaler, conn.iFaceDiscover, conn.statusRecorder, relayIsSupportedLocally) + if err != nil { + return err + } + conn.workerICE = workerICE } - conn.workerICE = workerICE conn.handshaker = NewHandshaker(conn.Log, conn.config, conn.signaler, conn.workerICE, conn.workerRelay, conn.metricsStages) conn.handshaker.AddRelayListener(conn.workerRelay.OnNewOffer) - if !isForceRelayed() { + if !forceRelay { conn.handshaker.AddICEListener(conn.workerICE.OnNewOffer) } @@ -251,7 +254,9 @@ func (conn *Conn) Close(signalToRemote bool) { conn.wgWatcherCancel() } conn.workerRelay.CloseConn() - conn.workerICE.Close() + if conn.workerICE != nil { + conn.workerICE.Close() + } if conn.wgProxyRelay != nil { err := conn.wgProxyRelay.CloseConn() @@ -294,7 +299,9 @@ func (conn *Conn) OnRemoteAnswer(answer OfferAnswer) { // OnRemoteCandidate Handles ICE connection Candidate provided by the remote peer. func (conn *Conn) OnRemoteCandidate(candidate ice.Candidate, haRoutes route.HAMap) { conn.dumpState.RemoteCandidate() - conn.workerICE.OnRemoteCandidate(candidate, haRoutes) + if conn.workerICE != nil { + conn.workerICE.OnRemoteCandidate(candidate, haRoutes) + } } // SetOnConnected sets a handler function to be triggered by Conn when a new connection to a remote peer established @@ -712,33 +719,35 @@ func (conn *Conn) evalStatus() ConnStatus { return StatusConnecting } -func (conn *Conn) isConnectedOnAllWay() (connected bool) { - // would be better to protect this with a mutex, but it could cause deadlock with Close function - +// isConnectedOnAllWay evaluates the overall connection status based on ICE and Relay transports. +// +// The result is a tri-state: +// - ConnStatusConnected: all available transports are up +// - ConnStatusPartiallyConnected: relay is up but ICE is still pending/reconnecting +// - ConnStatusDisconnected: no working transport +func (conn *Conn) isConnectedOnAllWay() (status guard.ConnStatus) { defer func() { - if !connected { + if status == guard.ConnStatusDisconnected { conn.logTraceConnState() } }() - // For JS platform: only relay connection is supported - if runtime.GOOS == "js" { - return conn.statusRelay.Get() == worker.StatusConnected + iceWorkerCreated := conn.workerICE != nil + + var iceInProgress bool + if iceWorkerCreated { + iceInProgress = conn.workerICE.InProgress() } - // For non-JS platforms: check ICE connection status - if conn.statusICE.Get() == worker.StatusDisconnected && !conn.workerICE.InProgress() { - return false - } - - // If relay is supported with peer, it must also be connected - if conn.workerRelay.IsRelayConnectionSupportedWithPeer() { - if conn.statusRelay.Get() == worker.StatusDisconnected { - return false - } - } - - return true + return evalConnStatus(connStatusInputs{ + forceRelay: IsForceRelayed(), + peerUsesRelay: conn.workerRelay.IsRelayConnectionSupportedWithPeer(), + relayConnected: conn.statusRelay.Get() == worker.StatusConnected, + remoteSupportsICE: conn.handshaker.RemoteICESupported(), + iceWorkerCreated: iceWorkerCreated, + iceStatusConnecting: conn.statusICE.Get() != worker.StatusDisconnected, + iceInProgress: iceInProgress, + }) } func (conn *Conn) enableWgWatcherIfNeeded(enabledTime time.Time) { @@ -926,3 +935,43 @@ func isController(config ConnConfig) bool { func isRosenpassEnabled(remoteRosenpassPubKey []byte) bool { return remoteRosenpassPubKey != nil } + +func evalConnStatus(in connStatusInputs) guard.ConnStatus { + // "Relay up and needed" — the peer uses relay and the transport is connected. + relayUsedAndUp := in.peerUsesRelay && in.relayConnected + + // Force-relay mode: ICE never runs. Relay is the only transport and must be up. + if in.forceRelay { + return boolToConnStatus(relayUsedAndUp) + } + + // Remote peer doesn't support ICE, or we haven't created the worker yet: + // relay is the only possible transport. + if !in.remoteSupportsICE || !in.iceWorkerCreated { + return boolToConnStatus(relayUsedAndUp) + } + + // ICE counts as "up" when the status is anything other than Disconnected, OR + // when a negotiation is currently in progress (so we don't spam offers while one is in flight). + iceUp := in.iceStatusConnecting || in.iceInProgress + + // Relay side is acceptable if the peer doesn't rely on relay, or relay is connected. + relayOK := !in.peerUsesRelay || in.relayConnected + + switch { + case iceUp && relayOK: + return guard.ConnStatusConnected + case relayUsedAndUp: + // Relay is up but ICE is down — partially connected. + return guard.ConnStatusPartiallyConnected + default: + return guard.ConnStatusDisconnected + } +} + +func boolToConnStatus(connected bool) guard.ConnStatus { + if connected { + return guard.ConnStatusConnected + } + return guard.ConnStatusDisconnected +} diff --git a/client/internal/peer/conn_status.go b/client/internal/peer/conn_status.go index 73acc5ef5..b43e245f3 100644 --- a/client/internal/peer/conn_status.go +++ b/client/internal/peer/conn_status.go @@ -13,6 +13,20 @@ const ( StatusConnected ) +// connStatusInputs is the primitive-valued snapshot of the state that drives the +// tri-state connection classification. Extracted so the decision logic can be unit-tested +// without constructing full Worker/Handshaker objects. +type connStatusInputs struct { + forceRelay bool // NB_FORCE_RELAY or JS/WASM + peerUsesRelay bool // remote peer advertises relay support AND local has relay + relayConnected bool // statusRelay reports Connected (independent of whether peer uses relay) + remoteSupportsICE bool // remote peer sent ICE credentials + iceWorkerCreated bool // local WorkerICE exists (false in force-relay mode) + iceStatusConnecting bool // statusICE is anything other than Disconnected + iceInProgress bool // a negotiation is currently in flight +} + + // ConnStatus describe the status of a peer's connection type ConnStatus int32 diff --git a/client/internal/peer/conn_status_eval_test.go b/client/internal/peer/conn_status_eval_test.go new file mode 100644 index 000000000..66393cafe --- /dev/null +++ b/client/internal/peer/conn_status_eval_test.go @@ -0,0 +1,201 @@ +package peer + +import ( + "testing" + + "github.com/netbirdio/netbird/client/internal/peer/guard" +) + +func TestEvalConnStatus_ForceRelay(t *testing.T) { + tests := []struct { + name string + in connStatusInputs + want guard.ConnStatus + }{ + { + name: "force relay, peer uses relay, relay up", + in: connStatusInputs{ + forceRelay: true, + peerUsesRelay: true, + relayConnected: true, + }, + want: guard.ConnStatusConnected, + }, + { + name: "force relay, peer uses relay, relay down", + in: connStatusInputs{ + forceRelay: true, + peerUsesRelay: true, + relayConnected: false, + }, + want: guard.ConnStatusDisconnected, + }, + { + name: "force relay, peer does NOT use relay - disconnected forever", + in: connStatusInputs{ + forceRelay: true, + peerUsesRelay: false, + relayConnected: true, + }, + want: guard.ConnStatusDisconnected, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + if got := evalConnStatus(tc.in); got != tc.want { + t.Fatalf("evalConnStatus = %v, want %v", got, tc.want) + } + }) + } +} + +func TestEvalConnStatus_ICEUnavailable(t *testing.T) { + tests := []struct { + name string + in connStatusInputs + want guard.ConnStatus + }{ + { + name: "remote does not support ICE, peer uses relay, relay up", + in: connStatusInputs{ + peerUsesRelay: true, + relayConnected: true, + remoteSupportsICE: false, + iceWorkerCreated: true, + }, + want: guard.ConnStatusConnected, + }, + { + name: "remote does not support ICE, peer uses relay, relay down", + in: connStatusInputs{ + peerUsesRelay: true, + relayConnected: false, + remoteSupportsICE: false, + iceWorkerCreated: true, + }, + want: guard.ConnStatusDisconnected, + }, + { + name: "ICE worker not yet created, relay up", + in: connStatusInputs{ + peerUsesRelay: true, + relayConnected: true, + remoteSupportsICE: true, + iceWorkerCreated: false, + }, + want: guard.ConnStatusConnected, + }, + { + name: "remote does not support ICE, peer does not use relay", + in: connStatusInputs{ + peerUsesRelay: false, + relayConnected: false, + remoteSupportsICE: false, + iceWorkerCreated: true, + }, + want: guard.ConnStatusDisconnected, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + if got := evalConnStatus(tc.in); got != tc.want { + t.Fatalf("evalConnStatus = %v, want %v", got, tc.want) + } + }) + } +} + +func TestEvalConnStatus_FullyAvailable(t *testing.T) { + base := connStatusInputs{ + remoteSupportsICE: true, + iceWorkerCreated: true, + } + + tests := []struct { + name string + mutator func(*connStatusInputs) + want guard.ConnStatus + }{ + { + name: "ICE connected, relay connected, peer uses relay", + mutator: func(in *connStatusInputs) { + in.peerUsesRelay = true + in.relayConnected = true + in.iceStatusConnecting = true + }, + want: guard.ConnStatusConnected, + }, + { + name: "ICE connected, peer does NOT use relay", + mutator: func(in *connStatusInputs) { + in.peerUsesRelay = false + in.relayConnected = false + in.iceStatusConnecting = true + }, + want: guard.ConnStatusConnected, + }, + { + name: "ICE InProgress only, peer does NOT use relay", + mutator: func(in *connStatusInputs) { + in.peerUsesRelay = false + in.iceStatusConnecting = false + in.iceInProgress = true + }, + want: guard.ConnStatusConnected, + }, + { + name: "ICE down, relay up, peer uses relay -> partial", + mutator: func(in *connStatusInputs) { + in.peerUsesRelay = true + in.relayConnected = true + in.iceStatusConnecting = false + in.iceInProgress = false + }, + want: guard.ConnStatusPartiallyConnected, + }, + { + name: "ICE down, peer does NOT use relay -> disconnected", + mutator: func(in *connStatusInputs) { + in.peerUsesRelay = false + in.relayConnected = false + in.iceStatusConnecting = false + in.iceInProgress = false + }, + want: guard.ConnStatusDisconnected, + }, + { + name: "ICE up, peer uses relay but relay down -> partial (relay required, ICE ignored)", + mutator: func(in *connStatusInputs) { + in.peerUsesRelay = true + in.relayConnected = false + in.iceStatusConnecting = true + }, + // relayOK = false (peer uses relay but it's down), iceUp = true + // first switch arm fails (relayOK false), relayUsedAndUp = false (relay down), + // falls into default: Disconnected. + want: guard.ConnStatusDisconnected, + }, + { + name: "ICE down, relay up but peer does not use relay -> disconnected", + mutator: func(in *connStatusInputs) { + in.peerUsesRelay = false + in.relayConnected = true // not actually used since peer doesn't rely on it + in.iceStatusConnecting = false + in.iceInProgress = false + }, + want: guard.ConnStatusDisconnected, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + in := base + tc.mutator(&in) + if got := evalConnStatus(in); got != tc.want { + t.Fatalf("evalConnStatus = %v, want %v (inputs: %+v)", got, tc.want, in) + } + }) + } +} diff --git a/client/internal/peer/env.go b/client/internal/peer/env.go index 7f500c410..b4ba9ad7b 100644 --- a/client/internal/peer/env.go +++ b/client/internal/peer/env.go @@ -10,7 +10,7 @@ const ( EnvKeyNBForceRelay = "NB_FORCE_RELAY" ) -func isForceRelayed() bool { +func IsForceRelayed() bool { if runtime.GOOS == "js" { return true } diff --git a/client/internal/peer/guard/guard.go b/client/internal/peer/guard/guard.go index d93403730..2e5efbcc5 100644 --- a/client/internal/peer/guard/guard.go +++ b/client/internal/peer/guard/guard.go @@ -8,7 +8,19 @@ import ( log "github.com/sirupsen/logrus" ) -type isConnectedFunc func() bool +// ConnStatus represents the connection state as seen by the guard. +type ConnStatus int + +const ( + // ConnStatusDisconnected means neither ICE nor Relay is connected. + ConnStatusDisconnected ConnStatus = iota + // ConnStatusPartiallyConnected means Relay is connected but ICE is not. + ConnStatusPartiallyConnected + // ConnStatusConnected means all required connections are established. + ConnStatusConnected +) + +type connStatusFunc func() ConnStatus // Guard is responsible for the reconnection logic. // It will trigger to send an offer to the peer then has connection issues. @@ -20,14 +32,14 @@ type isConnectedFunc func() bool // - ICE candidate changes type Guard struct { log *log.Entry - isConnectedOnAllWay isConnectedFunc + isConnectedOnAllWay connStatusFunc timeout time.Duration srWatcher *SRWatcher relayedConnDisconnected chan struct{} iCEConnDisconnected chan struct{} } -func NewGuard(log *log.Entry, isConnectedFn isConnectedFunc, timeout time.Duration, srWatcher *SRWatcher) *Guard { +func NewGuard(log *log.Entry, isConnectedFn connStatusFunc, timeout time.Duration, srWatcher *SRWatcher) *Guard { return &Guard{ log: log, isConnectedOnAllWay: isConnectedFn, @@ -57,8 +69,17 @@ func (g *Guard) SetICEConnDisconnected() { } } -// reconnectLoopWithRetry periodically check the connection status. -// Try to send offer while the P2P is not established or while the Relay is not connected if is it supported +// reconnectLoopWithRetry periodically checks the connection status and sends offers to re-establish connectivity. +// +// Behavior depends on the connection state reported by isConnectedOnAllWay: +// - Connected: no action, the peer is fully reachable. +// - Disconnected (neither ICE nor Relay): retries aggressively with exponential backoff (800ms doubling +// up to timeout), never gives up. This ensures rapid recovery when the peer has no connectivity at all. +// - PartiallyConnected (Relay up, ICE not): retries up to 3 times with exponential backoff, then switches +// to one attempt per hour. This limits signaling traffic when relay already provides connectivity. +// +// External events (relay/ICE disconnect, signal/relay reconnect, candidate changes) reset the retry +// counter and backoff ticker, giving ICE a fresh chance after network conditions change. func (g *Guard) reconnectLoopWithRetry(ctx context.Context, callback func()) { srReconnectedChan := g.srWatcher.NewListener() defer g.srWatcher.RemoveListener(srReconnectedChan) @@ -68,36 +89,47 @@ func (g *Guard) reconnectLoopWithRetry(ctx context.Context, callback func()) { tickerChannel := ticker.C + iceState := &iceRetryState{log: g.log} + defer iceState.reset() + for { select { - case t := <-tickerChannel: - if t.IsZero() { - g.log.Infof("retry timed out, stop periodic offer sending") - // after backoff timeout the ticker.C will be closed. We need to a dummy channel to avoid loop - tickerChannel = make(<-chan time.Time) - continue + case <-tickerChannel: + switch g.isConnectedOnAllWay() { + case ConnStatusConnected: + // all good, nothing to do + case ConnStatusDisconnected: + callback() + case ConnStatusPartiallyConnected: + if iceState.shouldRetry() { + callback() + } else { + iceState.enterHourlyMode() + ticker.Stop() + tickerChannel = iceState.hourlyC() + } } - if !g.isConnectedOnAllWay() { - callback() - } case <-g.relayedConnDisconnected: g.log.Debugf("Relay connection changed, reset reconnection ticker") ticker.Stop() - ticker = g.prepareExponentTicker(ctx) + ticker = g.newReconnectTicker(ctx) tickerChannel = ticker.C + iceState.reset() case <-g.iCEConnDisconnected: g.log.Debugf("ICE connection changed, reset reconnection ticker") ticker.Stop() - ticker = g.prepareExponentTicker(ctx) + ticker = g.newReconnectTicker(ctx) tickerChannel = ticker.C + iceState.reset() case <-srReconnectedChan: g.log.Debugf("has network changes, reset reconnection ticker") ticker.Stop() - ticker = g.prepareExponentTicker(ctx) + ticker = g.newReconnectTicker(ctx) tickerChannel = ticker.C + iceState.reset() case <-ctx.Done(): g.log.Debugf("context is done, stop reconnect loop") @@ -120,7 +152,7 @@ func (g *Guard) initialTicker(ctx context.Context) *backoff.Ticker { return backoff.NewTicker(bo) } -func (g *Guard) prepareExponentTicker(ctx context.Context) *backoff.Ticker { +func (g *Guard) newReconnectTicker(ctx context.Context) *backoff.Ticker { bo := backoff.WithContext(&backoff.ExponentialBackOff{ InitialInterval: 800 * time.Millisecond, RandomizationFactor: 0.1, diff --git a/client/internal/peer/guard/ice_retry_state.go b/client/internal/peer/guard/ice_retry_state.go new file mode 100644 index 000000000..01dc1bf2d --- /dev/null +++ b/client/internal/peer/guard/ice_retry_state.go @@ -0,0 +1,61 @@ +package guard + +import ( + "time" + + log "github.com/sirupsen/logrus" +) + +const ( + // maxICERetries is the maximum number of ICE offer attempts when relay is connected + maxICERetries = 3 + // iceRetryInterval is the periodic retry interval after ICE retries are exhausted + iceRetryInterval = 1 * time.Hour +) + +// iceRetryState tracks the limited ICE retry attempts when relay is already connected. +// After maxICERetries attempts it switches to a periodic hourly retry. +type iceRetryState struct { + log *log.Entry + retries int + hourly *time.Ticker +} + +func (s *iceRetryState) reset() { + s.retries = 0 + if s.hourly != nil { + s.hourly.Stop() + s.hourly = nil + } +} + +// shouldRetry reports whether the caller should send another ICE offer on this tick. +// Returns false when the per-cycle retry budget is exhausted and the caller must switch +// to the hourly ticker via enterHourlyMode + hourlyC. +func (s *iceRetryState) shouldRetry() bool { + if s.hourly != nil { + s.log.Debugf("hourly ICE retry attempt") + return true + } + + s.retries++ + if s.retries <= maxICERetries { + s.log.Debugf("ICE retry attempt %d/%d", s.retries, maxICERetries) + return true + } + + return false +} + +// enterHourlyMode starts the hourly retry ticker. Must be called after shouldRetry returns false. +func (s *iceRetryState) enterHourlyMode() { + s.log.Infof("ICE retries exhausted (%d/%d), switching to hourly retry", maxICERetries, maxICERetries) + s.hourly = time.NewTicker(iceRetryInterval) +} + +func (s *iceRetryState) hourlyC() <-chan time.Time { + if s.hourly == nil { + return nil + } + return s.hourly.C +} diff --git a/client/internal/peer/guard/ice_retry_state_test.go b/client/internal/peer/guard/ice_retry_state_test.go new file mode 100644 index 000000000..6a5b5a76f --- /dev/null +++ b/client/internal/peer/guard/ice_retry_state_test.go @@ -0,0 +1,103 @@ +package guard + +import ( + "testing" + + log "github.com/sirupsen/logrus" +) + +func newTestRetryState() *iceRetryState { + return &iceRetryState{log: log.NewEntry(log.StandardLogger())} +} + +func TestICERetryState_AllowsInitialBudget(t *testing.T) { + s := newTestRetryState() + + for i := 1; i <= maxICERetries; i++ { + if !s.shouldRetry() { + t.Fatalf("shouldRetry returned false on attempt %d, want true (budget = %d)", i, maxICERetries) + } + } +} + +func TestICERetryState_ExhaustsAfterBudget(t *testing.T) { + s := newTestRetryState() + + for i := 0; i < maxICERetries; i++ { + _ = s.shouldRetry() + } + + if s.shouldRetry() { + t.Fatalf("shouldRetry returned true after budget exhausted, want false") + } +} + +func TestICERetryState_HourlyCNilBeforeEnterHourlyMode(t *testing.T) { + s := newTestRetryState() + + if s.hourlyC() != nil { + t.Fatalf("hourlyC returned non-nil channel before enterHourlyMode") + } +} + +func TestICERetryState_EnterHourlyModeArmsTicker(t *testing.T) { + s := newTestRetryState() + for i := 0; i < maxICERetries+1; i++ { + _ = s.shouldRetry() + } + + s.enterHourlyMode() + defer s.reset() + + if s.hourlyC() == nil { + t.Fatalf("hourlyC returned nil after enterHourlyMode") + } +} + +func TestICERetryState_ShouldRetryTrueInHourlyMode(t *testing.T) { + s := newTestRetryState() + s.enterHourlyMode() + defer s.reset() + + if !s.shouldRetry() { + t.Fatalf("shouldRetry returned false in hourly mode, want true") + } + + // Subsequent calls also return true — we keep retrying on each hourly tick. + if !s.shouldRetry() { + t.Fatalf("second shouldRetry returned false in hourly mode, want true") + } +} + +func TestICERetryState_ResetRestoresBudget(t *testing.T) { + s := newTestRetryState() + for i := 0; i < maxICERetries+1; i++ { + _ = s.shouldRetry() + } + s.enterHourlyMode() + + s.reset() + + if s.hourlyC() != nil { + t.Fatalf("hourlyC returned non-nil channel after reset") + } + if s.retries != 0 { + t.Fatalf("retries = %d after reset, want 0", s.retries) + } + + for i := 1; i <= maxICERetries; i++ { + if !s.shouldRetry() { + t.Fatalf("shouldRetry returned false on attempt %d after reset, want true", i) + } + } +} + +func TestICERetryState_ResetIsIdempotent(t *testing.T) { + s := newTestRetryState() + s.reset() + s.reset() // second call must not panic or re-stop a nil ticker + + if s.hourlyC() != nil { + t.Fatalf("hourlyC non-nil after double reset") + } +} diff --git a/client/internal/peer/guard/sr_watcher.go b/client/internal/peer/guard/sr_watcher.go index 6f4f5ad4f..0befd7438 100644 --- a/client/internal/peer/guard/sr_watcher.go +++ b/client/internal/peer/guard/sr_watcher.go @@ -39,7 +39,7 @@ func NewSRWatcher(signalClient chNotifier, relayManager chNotifier, iFaceDiscove return srw } -func (w *SRWatcher) Start() { +func (w *SRWatcher) Start(disableICEMonitor bool) { w.mu.Lock() defer w.mu.Unlock() @@ -50,8 +50,10 @@ func (w *SRWatcher) Start() { ctx, cancel := context.WithCancel(context.Background()) w.cancelIceMonitor = cancel - iceMonitor := NewICEMonitor(w.iFaceDiscover, w.iceConfig, GetICEMonitorPeriod()) - go iceMonitor.Start(ctx, w.onICEChanged) + if !disableICEMonitor { + iceMonitor := NewICEMonitor(w.iFaceDiscover, w.iceConfig, GetICEMonitorPeriod()) + go iceMonitor.Start(ctx, w.onICEChanged) + } w.signalClient.SetOnReconnectedListener(w.onReconnected) w.relayManager.SetOnReconnectedListener(w.onReconnected) diff --git a/client/internal/peer/handshaker.go b/client/internal/peer/handshaker.go index 9b50cecd1..741dfce60 100644 --- a/client/internal/peer/handshaker.go +++ b/client/internal/peer/handshaker.go @@ -4,6 +4,7 @@ import ( "context" "errors" "sync" + "sync/atomic" log "github.com/sirupsen/logrus" @@ -43,6 +44,10 @@ type OfferAnswer struct { SessionID *ICESessionID } +func (o *OfferAnswer) hasICECredentials() bool { + return o.IceCredentials.UFrag != "" && o.IceCredentials.Pwd != "" +} + type Handshaker struct { mu sync.Mutex log *log.Entry @@ -59,6 +64,10 @@ type Handshaker struct { relayListener *AsyncOfferListener iceListener func(remoteOfferAnswer *OfferAnswer) + // remoteICESupported tracks whether the remote peer includes ICE credentials in its offers/answers. + // When false, the local side skips ICE listener dispatch and suppresses ICE credentials in responses. + remoteICESupported atomic.Bool + // remoteOffersCh is a channel used to wait for remote credentials to proceed with the connection remoteOffersCh chan OfferAnswer // remoteAnswerCh is a channel used to wait for remote credentials answer (confirmation of our offer) to proceed with the connection @@ -66,7 +75,7 @@ type Handshaker struct { } func NewHandshaker(log *log.Entry, config ConnConfig, signaler *Signaler, ice *WorkerICE, relay *WorkerRelay, metricsStages *MetricsStages) *Handshaker { - return &Handshaker{ + h := &Handshaker{ log: log, config: config, signaler: signaler, @@ -76,6 +85,13 @@ func NewHandshaker(log *log.Entry, config ConnConfig, signaler *Signaler, ice *W remoteOffersCh: make(chan OfferAnswer), remoteAnswerCh: make(chan OfferAnswer), } + // assume remote supports ICE until we learn otherwise from received offers + h.remoteICESupported.Store(ice != nil) + return h +} + +func (h *Handshaker) RemoteICESupported() bool { + return h.remoteICESupported.Load() } func (h *Handshaker) AddRelayListener(offer func(remoteOfferAnswer *OfferAnswer)) { @@ -90,18 +106,20 @@ func (h *Handshaker) Listen(ctx context.Context) { for { select { case remoteOfferAnswer := <-h.remoteOffersCh: - h.log.Infof("received offer, running version %s, remote WireGuard listen port %d, session id: %s", remoteOfferAnswer.Version, remoteOfferAnswer.WgListenPort, remoteOfferAnswer.SessionIDString()) + h.log.Infof("received offer, running version %s, remote WireGuard listen port %d, session id: %s, remote ICE supported: %t", remoteOfferAnswer.Version, remoteOfferAnswer.WgListenPort, remoteOfferAnswer.SessionIDString(), remoteOfferAnswer.hasICECredentials()) // Record signaling received for reconnection attempts if h.metricsStages != nil { h.metricsStages.RecordSignalingReceived() } + h.updateRemoteICEState(&remoteOfferAnswer) + if h.relayListener != nil { h.relayListener.Notify(&remoteOfferAnswer) } - if h.iceListener != nil { + if h.iceListener != nil && h.RemoteICESupported() { h.iceListener(&remoteOfferAnswer) } @@ -110,18 +128,20 @@ func (h *Handshaker) Listen(ctx context.Context) { continue } case remoteOfferAnswer := <-h.remoteAnswerCh: - h.log.Infof("received answer, running version %s, remote WireGuard listen port %d, session id: %s", remoteOfferAnswer.Version, remoteOfferAnswer.WgListenPort, remoteOfferAnswer.SessionIDString()) + h.log.Infof("received answer, running version %s, remote WireGuard listen port %d, session id: %s, remote ICE supported: %t", remoteOfferAnswer.Version, remoteOfferAnswer.WgListenPort, remoteOfferAnswer.SessionIDString(), remoteOfferAnswer.hasICECredentials()) // Record signaling received for reconnection attempts if h.metricsStages != nil { h.metricsStages.RecordSignalingReceived() } + h.updateRemoteICEState(&remoteOfferAnswer) + if h.relayListener != nil { h.relayListener.Notify(&remoteOfferAnswer) } - if h.iceListener != nil { + if h.iceListener != nil && h.RemoteICESupported() { h.iceListener(&remoteOfferAnswer) } case <-ctx.Done(): @@ -183,15 +203,18 @@ func (h *Handshaker) sendAnswer() error { } func (h *Handshaker) buildOfferAnswer() OfferAnswer { - uFrag, pwd := h.ice.GetLocalUserCredentials() - sid := h.ice.SessionID() answer := OfferAnswer{ - IceCredentials: IceCredentials{uFrag, pwd}, WgListenPort: h.config.LocalWgPort, Version: version.NetbirdVersion(), RosenpassPubKey: h.config.RosenpassConfig.PubKey, RosenpassAddr: h.config.RosenpassConfig.Addr, - SessionID: &sid, + } + + if h.ice != nil && h.RemoteICESupported() { + uFrag, pwd := h.ice.GetLocalUserCredentials() + sid := h.ice.SessionID() + answer.IceCredentials = IceCredentials{uFrag, pwd} + answer.SessionID = &sid } if addr, err := h.relay.RelayInstanceAddress(); err == nil { @@ -200,3 +223,18 @@ func (h *Handshaker) buildOfferAnswer() OfferAnswer { return answer } + +func (h *Handshaker) updateRemoteICEState(offer *OfferAnswer) { + hasICE := offer.hasICECredentials() + prev := h.remoteICESupported.Swap(hasICE) + if prev != hasICE { + if hasICE { + h.log.Infof("remote peer started sending ICE credentials") + } else { + h.log.Infof("remote peer stopped sending ICE credentials") + if h.ice != nil { + h.ice.Close() + } + } + } +} diff --git a/client/internal/peer/signaler.go b/client/internal/peer/signaler.go index b28906625..f6eb87cca 100644 --- a/client/internal/peer/signaler.go +++ b/client/internal/peer/signaler.go @@ -46,9 +46,13 @@ func (s *Signaler) Ready() bool { // SignalOfferAnswer signals either an offer or an answer to remote peer func (s *Signaler) signalOfferAnswer(offerAnswer OfferAnswer, remoteKey string, bodyType sProto.Body_Type) error { - sessionIDBytes, err := offerAnswer.SessionID.Bytes() - if err != nil { - log.Warnf("failed to get session ID bytes: %v", err) + var sessionIDBytes []byte + if offerAnswer.SessionID != nil { + var err error + sessionIDBytes, err = offerAnswer.SessionID.Bytes() + if err != nil { + log.Warnf("failed to get session ID bytes: %v", err) + } } msg, err := signal.MarshalCredential( s.wgPrivateKey, diff --git a/client/internal/routemanager/systemops/systemops_bsd_other.go b/client/internal/routemanager/systemops/systemops_bsd_other.go new file mode 100644 index 000000000..3f09219aa --- /dev/null +++ b/client/internal/routemanager/systemops/systemops_bsd_other.go @@ -0,0 +1,10 @@ +//go:build (dragonfly || freebsd || netbsd || openbsd) && !darwin + +package systemops + +// Non-darwin BSDs don't support the IP_BOUND_IF + scoped default model. They +// always fall through to the ref-counter exclusion-route path; these stubs +// exist only so systemops_unix.go compiles. +func (r *SysOps) setupAdvancedRouting() error { return nil } +func (r *SysOps) cleanupAdvancedRouting() error { return nil } +func (r *SysOps) flushPlatformExtras() error { return nil } diff --git a/client/internal/routemanager/systemops/systemops_darwin.go b/client/internal/routemanager/systemops/systemops_darwin.go new file mode 100644 index 000000000..d6875ff95 --- /dev/null +++ b/client/internal/routemanager/systemops/systemops_darwin.go @@ -0,0 +1,241 @@ +//go:build darwin && !ios + +package systemops + +import ( + "errors" + "fmt" + "net/netip" + "os" + "time" + + "github.com/hashicorp/go-multierror" + log "github.com/sirupsen/logrus" + "golang.org/x/net/route" + "golang.org/x/sys/unix" + + nberrors "github.com/netbirdio/netbird/client/errors" + "github.com/netbirdio/netbird/client/internal/routemanager/vars" + nbnet "github.com/netbirdio/netbird/client/net" +) + +// scopedRouteBudget bounds retries for the scoped default route. Installing or +// deleting it matters enough that we're willing to spend longer waiting for the +// kernel reply than for per-prefix exclusion routes. +const scopedRouteBudget = 5 * time.Second + +// setupAdvancedRouting installs an RTF_IFSCOPE default route per address family +// pinned to the current physical egress, so IP_BOUND_IF scoped lookups can +// resolve gateway'd destinations while the VPN's split default owns the +// unscoped table. +// +// Timing note: this runs during routeManager.Init, which happens before the +// VPN interface is created and before any peer routes propagate. The initial +// mgmt / signal / relay TCP dials always fire before this runs, so those +// sockets miss the IP_BOUND_IF binding and rely on the kernel's normal route +// lookup, which at that point correctly picks the physical default. Those +// already-established TCP flows keep their originally-selected interface for +// their lifetime on Darwin because the kernel caches the egress route +// per-socket at connect time; adding the VPN's 0/1 + 128/1 split default +// afterwards does not migrate them since the original en0 default stays in +// the table. Any subsequent reconnect via nbnet.NewDialer picks up the +// populated bound-iface cache and gets IP_BOUND_IF set cleanly. +func (r *SysOps) setupAdvancedRouting() error { + // Drop any previously-cached egress interface before reinstalling. On a + // refresh, a family that no longer resolves would otherwise keep the stale + // binding, causing new sockets to scope to an interface without a matching + // scoped default. + nbnet.ClearBoundInterfaces() + + if err := r.flushScopedDefaults(); err != nil { + log.Warnf("flush residual scoped defaults: %v", err) + } + + var merr *multierror.Error + installed := 0 + + for _, unspec := range []netip.Addr{netip.IPv4Unspecified(), netip.IPv6Unspecified()} { + ok, err := r.installScopedDefaultFor(unspec) + if err != nil { + merr = multierror.Append(merr, err) + continue + } + if ok { + installed++ + } + } + + if installed == 0 && merr != nil { + return nberrors.FormatErrorOrNil(merr) + } + if merr != nil { + log.Warnf("advanced routing setup partially succeeded: %v", nberrors.FormatErrorOrNil(merr)) + } + return nil +} + +// installScopedDefaultFor resolves the physical default nexthop for the given +// address family, installs a scoped default via it, and caches the iface for +// subsequent IP_BOUND_IF / IPV6_BOUND_IF socket binds. +func (r *SysOps) installScopedDefaultFor(unspec netip.Addr) (bool, error) { + nexthop, err := GetNextHop(unspec) + if err != nil { + if errors.Is(err, vars.ErrRouteNotFound) { + return false, nil + } + return false, fmt.Errorf("get default nexthop for %s: %w", unspec, err) + } + if nexthop.Intf == nil { + return false, fmt.Errorf("unusable default nexthop for %s (no interface)", unspec) + } + + if err := r.addScopedDefault(unspec, nexthop); err != nil { + return false, fmt.Errorf("add scoped default on %s: %w", nexthop.Intf.Name, err) + } + + af := unix.AF_INET + if unspec.Is6() { + af = unix.AF_INET6 + } + nbnet.SetBoundInterface(af, nexthop.Intf) + via := "point-to-point" + if nexthop.IP.IsValid() { + via = nexthop.IP.String() + } + log.Infof("installed scoped default route via %s on %s for %s", via, nexthop.Intf.Name, afOf(unspec)) + return true, nil +} + +func (r *SysOps) cleanupAdvancedRouting() error { + nbnet.ClearBoundInterfaces() + return r.flushScopedDefaults() +} + +// flushPlatformExtras runs darwin-specific residual cleanup hooked into the +// generic FlushMarkedRoutes path, so a crashed daemon's scoped defaults get +// removed on the next boot regardless of whether a profile is brought up. +func (r *SysOps) flushPlatformExtras() error { + return r.flushScopedDefaults() +} + +// flushScopedDefaults removes any scoped default routes tagged with routeProtoFlag. +// Safe to call at startup to clear residual entries from a prior session. +func (r *SysOps) flushScopedDefaults() error { + rib, err := retryFetchRIB() + if err != nil { + return fmt.Errorf("fetch routing table: %w", err) + } + + msgs, err := route.ParseRIB(route.RIBTypeRoute, rib) + if err != nil { + return fmt.Errorf("parse routing table: %w", err) + } + + var merr *multierror.Error + removed := 0 + + for _, msg := range msgs { + rtMsg, ok := msg.(*route.RouteMessage) + if !ok { + continue + } + if rtMsg.Flags&routeProtoFlag == 0 { + continue + } + if rtMsg.Flags&unix.RTF_IFSCOPE == 0 { + continue + } + + info, err := MsgToRoute(rtMsg) + if err != nil { + log.Debugf("skip scoped flush: %v", err) + continue + } + if !info.Dst.IsValid() || info.Dst.Bits() != 0 { + continue + } + + if err := r.deleteScopedRoute(rtMsg); err != nil { + merr = multierror.Append(merr, fmt.Errorf("delete scoped default %s on index %d: %w", + info.Dst, rtMsg.Index, err)) + continue + } + removed++ + log.Debugf("flushed residual scoped default %s on index %d", info.Dst, rtMsg.Index) + } + + if removed > 0 { + log.Infof("flushed %d residual scoped default route(s)", removed) + } + return nberrors.FormatErrorOrNil(merr) +} + +func (r *SysOps) addScopedDefault(unspec netip.Addr, nexthop Nexthop) error { + return r.scopedRouteSocket(unix.RTM_ADD, unspec, nexthop) +} + +func (r *SysOps) deleteScopedRoute(rtMsg *route.RouteMessage) error { + // Preserve identifying flags from the stored route (including RTF_GATEWAY + // only if present); kernel-set bits like RTF_DONE don't belong on RTM_DELETE. + keep := unix.RTF_UP | unix.RTF_STATIC | unix.RTF_GATEWAY | unix.RTF_IFSCOPE | routeProtoFlag + del := &route.RouteMessage{ + Type: unix.RTM_DELETE, + Flags: rtMsg.Flags & keep, + Version: unix.RTM_VERSION, + Seq: r.getSeq(), + Index: rtMsg.Index, + Addrs: rtMsg.Addrs, + } + return r.writeRouteMessage(del, scopedRouteBudget) +} + +func (r *SysOps) scopedRouteSocket(action int, unspec netip.Addr, nexthop Nexthop) error { + flags := unix.RTF_UP | unix.RTF_STATIC | unix.RTF_IFSCOPE | routeProtoFlag + + msg := &route.RouteMessage{ + Type: action, + Flags: flags, + Version: unix.RTM_VERSION, + ID: uintptr(os.Getpid()), + Seq: r.getSeq(), + Index: nexthop.Intf.Index, + } + + const numAddrs = unix.RTAX_NETMASK + 1 + addrs := make([]route.Addr, numAddrs) + + dst, err := addrToRouteAddr(unspec) + if err != nil { + return fmt.Errorf("build destination: %w", err) + } + mask, err := prefixToRouteNetmask(netip.PrefixFrom(unspec, 0)) + if err != nil { + return fmt.Errorf("build netmask: %w", err) + } + addrs[unix.RTAX_DST] = dst + addrs[unix.RTAX_NETMASK] = mask + + if nexthop.IP.IsValid() { + msg.Flags |= unix.RTF_GATEWAY + gw, err := addrToRouteAddr(nexthop.IP.Unmap()) + if err != nil { + return fmt.Errorf("build gateway: %w", err) + } + addrs[unix.RTAX_GATEWAY] = gw + } else { + addrs[unix.RTAX_GATEWAY] = &route.LinkAddr{ + Index: nexthop.Intf.Index, + Name: nexthop.Intf.Name, + } + } + msg.Addrs = addrs + + return r.writeRouteMessage(msg, scopedRouteBudget) +} + +func afOf(a netip.Addr) string { + if a.Is4() { + return "IPv4" + } + return "IPv6" +} diff --git a/client/internal/routemanager/systemops/systemops_generic.go b/client/internal/routemanager/systemops/systemops_generic.go index ec219c7fe..4211eb057 100644 --- a/client/internal/routemanager/systemops/systemops_generic.go +++ b/client/internal/routemanager/systemops/systemops_generic.go @@ -21,6 +21,7 @@ import ( "github.com/netbirdio/netbird/client/internal/routemanager/util" "github.com/netbirdio/netbird/client/internal/routemanager/vars" "github.com/netbirdio/netbird/client/internal/statemanager" + nbnet "github.com/netbirdio/netbird/client/net" "github.com/netbirdio/netbird/client/net/hooks" ) @@ -31,8 +32,6 @@ var splitDefaultv4_2 = netip.PrefixFrom(netip.AddrFrom4([4]byte{128}), 1) var splitDefaultv6_1 = netip.PrefixFrom(netip.IPv6Unspecified(), 1) var splitDefaultv6_2 = netip.PrefixFrom(netip.AddrFrom16([16]byte{0x80}), 1) -var ErrRoutingIsSeparate = errors.New("routing is separate") - func (r *SysOps) setupRefCounter(initAddresses []net.IP, stateManager *statemanager.Manager) error { stateManager.RegisterState(&ShutdownState{}) @@ -397,12 +396,16 @@ func ipToAddr(ip net.IP, intf *net.Interface) (netip.Addr, error) { } // IsAddrRouted checks if the candidate address would route to the vpn, in which case it returns true and the matched prefix. +// When advanced routing is active the WG socket is bound to the physical interface (fwmark on linux, +// IP_UNICAST_IF on windows, IP_BOUND_IF on darwin) and bypasses the main routing table, so the check is skipped. func IsAddrRouted(addr netip.Addr, vpnRoutes []netip.Prefix) (bool, netip.Prefix) { - localRoutes, err := hasSeparateRouting() + if nbnet.AdvancedRouting() { + return false, netip.Prefix{} + } + + localRoutes, err := GetRoutesFromTable() if err != nil { - if !errors.Is(err, ErrRoutingIsSeparate) { - log.Errorf("Failed to get routes: %v", err) - } + log.Errorf("Failed to get routes: %v", err) return false, netip.Prefix{} } diff --git a/client/internal/routemanager/systemops/systemops_js.go b/client/internal/routemanager/systemops/systemops_js.go index 808507fc9..242571b3d 100644 --- a/client/internal/routemanager/systemops/systemops_js.go +++ b/client/internal/routemanager/systemops/systemops_js.go @@ -22,10 +22,6 @@ func GetRoutesFromTable() ([]netip.Prefix, error) { return []netip.Prefix{}, nil } -func hasSeparateRouting() ([]netip.Prefix, error) { - return []netip.Prefix{}, nil -} - // GetDetailedRoutesFromTable returns empty routes for WASM. func GetDetailedRoutesFromTable() ([]DetailedRoute, error) { return []DetailedRoute{}, nil diff --git a/client/internal/routemanager/systemops/systemops_linux.go b/client/internal/routemanager/systemops/systemops_linux.go index bd10f131f..39a9fd978 100644 --- a/client/internal/routemanager/systemops/systemops_linux.go +++ b/client/internal/routemanager/systemops/systemops_linux.go @@ -894,13 +894,6 @@ func getAddressFamily(prefix netip.Prefix) int { return netlink.FAMILY_V6 } -func hasSeparateRouting() ([]netip.Prefix, error) { - if !nbnet.AdvancedRouting() { - return GetRoutesFromTable() - } - return nil, ErrRoutingIsSeparate -} - func isOpErr(err error) bool { // EAFTNOSUPPORT when ipv6 is disabled via sysctl, EOPNOTSUPP when disabled in boot options or otherwise not supported if errors.Is(err, syscall.EAFNOSUPPORT) || errors.Is(err, syscall.EOPNOTSUPP) { diff --git a/client/internal/routemanager/systemops/systemops_nonlinux.go b/client/internal/routemanager/systemops/systemops_nonlinux.go index 905a7bc12..016a62ebd 100644 --- a/client/internal/routemanager/systemops/systemops_nonlinux.go +++ b/client/internal/routemanager/systemops/systemops_nonlinux.go @@ -48,10 +48,6 @@ func EnableIPForwarding() error { return nil } -func hasSeparateRouting() ([]netip.Prefix, error) { - return GetRoutesFromTable() -} - // GetIPRules returns IP rules for debugging (not supported on non-Linux platforms) func GetIPRules() ([]IPRule, error) { log.Infof("IP rules collection is not supported on %s", runtime.GOOS) diff --git a/client/internal/routemanager/systemops/systemops_unix.go b/client/internal/routemanager/systemops/systemops_unix.go index 7089178fb..2d3f9b69a 100644 --- a/client/internal/routemanager/systemops/systemops_unix.go +++ b/client/internal/routemanager/systemops/systemops_unix.go @@ -25,6 +25,9 @@ import ( const ( envRouteProtoFlag = "NB_ROUTE_PROTO_FLAG" + + // routeBudget bounds retries for per-prefix exclusion route programming. + routeBudget = 1 * time.Second ) var routeProtoFlag int @@ -41,26 +44,42 @@ func init() { } func (r *SysOps) SetupRouting(initAddresses []net.IP, stateManager *statemanager.Manager, advancedRouting bool) error { + if advancedRouting { + return r.setupAdvancedRouting() + } + + log.Infof("Using legacy routing setup with ref counters") return r.setupRefCounter(initAddresses, stateManager) } func (r *SysOps) CleanupRouting(stateManager *statemanager.Manager, advancedRouting bool) error { + if advancedRouting { + return r.cleanupAdvancedRouting() + } + return r.cleanupRefCounter(stateManager) } // FlushMarkedRoutes removes single IP exclusion routes marked with the configured RTF_PROTO flag. +// On darwin it also flushes residual RTF_IFSCOPE scoped default routes so a +// crashed prior session can't leave crud in the table. func (r *SysOps) FlushMarkedRoutes() error { + var merr *multierror.Error + + if err := r.flushPlatformExtras(); err != nil { + merr = multierror.Append(merr, fmt.Errorf("flush platform extras: %w", err)) + } + rib, err := retryFetchRIB() if err != nil { - return fmt.Errorf("fetch routing table: %w", err) + return nberrors.FormatErrorOrNil(multierror.Append(merr, fmt.Errorf("fetch routing table: %w", err))) } msgs, err := route.ParseRIB(route.RIBTypeRoute, rib) if err != nil { - return fmt.Errorf("parse routing table: %w", err) + return nberrors.FormatErrorOrNil(multierror.Append(merr, fmt.Errorf("parse routing table: %w", err))) } - var merr *multierror.Error flushedCount := 0 for _, msg := range msgs { @@ -117,12 +136,12 @@ func (r *SysOps) routeSocket(action int, prefix netip.Prefix, nexthop Nexthop) e return fmt.Errorf("invalid prefix: %s", prefix) } - expBackOff := backoff.NewExponentialBackOff() - expBackOff.InitialInterval = 50 * time.Millisecond - expBackOff.MaxInterval = 500 * time.Millisecond - expBackOff.MaxElapsedTime = 1 * time.Second + msg, err := r.buildRouteMessage(action, prefix, nexthop) + if err != nil { + return fmt.Errorf("build route message: %w", err) + } - if err := backoff.Retry(r.routeOp(action, prefix, nexthop), expBackOff); err != nil { + if err := r.writeRouteMessage(msg, routeBudget); err != nil { a := "add" if action == unix.RTM_DELETE { a = "remove" @@ -132,50 +151,91 @@ func (r *SysOps) routeSocket(action int, prefix netip.Prefix, nexthop Nexthop) e return nil } -func (r *SysOps) routeOp(action int, prefix netip.Prefix, nexthop Nexthop) func() error { - operation := func() error { - fd, err := unix.Socket(syscall.AF_ROUTE, syscall.SOCK_RAW, syscall.AF_UNSPEC) - if err != nil { - return fmt.Errorf("open routing socket: %w", err) +// writeRouteMessage sends a route message over AF_ROUTE and waits for the +// kernel's matching reply, retrying transient failures until budget elapses. +// Callers do not need to manage sockets or seq numbers themselves. +func (r *SysOps) writeRouteMessage(msg *route.RouteMessage, budget time.Duration) error { + expBackOff := backoff.NewExponentialBackOff() + expBackOff.InitialInterval = 50 * time.Millisecond + expBackOff.MaxInterval = 500 * time.Millisecond + expBackOff.MaxElapsedTime = budget + + return backoff.Retry(func() error { return routeMessageRoundtrip(msg) }, expBackOff) +} + +func routeMessageRoundtrip(msg *route.RouteMessage) error { + fd, err := unix.Socket(syscall.AF_ROUTE, syscall.SOCK_RAW, syscall.AF_UNSPEC) + if err != nil { + return fmt.Errorf("open routing socket: %w", err) + } + defer func() { + if err := unix.Close(fd); err != nil && !errors.Is(err, unix.EBADF) { + log.Warnf("close routing socket: %v", err) } - defer func() { - if err := unix.Close(fd); err != nil && !errors.Is(err, unix.EBADF) { - log.Warnf("failed to close routing socket: %v", err) + }() + + tv := unix.Timeval{Sec: 1} + if err := unix.SetsockoptTimeval(fd, unix.SOL_SOCKET, unix.SO_RCVTIMEO, &tv); err != nil { + return backoff.Permanent(fmt.Errorf("set recv timeout: %w", err)) + } + + // AF_ROUTE is a broadcast channel: every route socket on the host sees + // every RTM_* event. With concurrent route programming the default + // per-socket queue overflows and our own reply gets dropped. + if err := unix.SetsockoptInt(fd, unix.SOL_SOCKET, unix.SO_RCVBUF, 1<<20); err != nil { + log.Debugf("set SO_RCVBUF on route socket: %v", err) + } + + bytes, err := msg.Marshal() + if err != nil { + return backoff.Permanent(fmt.Errorf("marshal: %w", err)) + } + + if _, err = unix.Write(fd, bytes); err != nil { + if errors.Is(err, unix.ENOBUFS) || errors.Is(err, unix.EAGAIN) { + return fmt.Errorf("write: %w", err) + } + return backoff.Permanent(fmt.Errorf("write: %w", err)) + } + return readRouteResponse(fd, msg.Type, msg.Seq) +} + +// readRouteResponse reads from the AF_ROUTE socket until it sees a reply +// matching our write (same type, seq, and pid). AF_ROUTE SOCK_RAW is a +// broadcast channel: interface up/down, third-party route changes and neighbor +// discovery events can all land between our write and read, so we must filter. +func readRouteResponse(fd, wantType, wantSeq int) error { + pid := int32(os.Getpid()) + resp := make([]byte, 2048) + deadline := time.Now().Add(time.Second) + for { + if time.Now().After(deadline) { + // Transient: under concurrent pressure the kernel can drop our reply + // from the socket buffer. Let backoff.Retry re-send with a fresh seq. + return fmt.Errorf("read: timeout waiting for route reply type=%d seq=%d", wantType, wantSeq) + } + n, err := unix.Read(fd, resp) + if err != nil { + if errors.Is(err, unix.EAGAIN) || errors.Is(err, unix.EWOULDBLOCK) { + // SO_RCVTIMEO fired while waiting; loop to re-check the absolute deadline. + continue } - }() - - msg, err := r.buildRouteMessage(action, prefix, nexthop) - if err != nil { - return backoff.Permanent(fmt.Errorf("build route message: %w", err)) + return backoff.Permanent(fmt.Errorf("read: %w", err)) } - - msgBytes, err := msg.Marshal() - if err != nil { - return backoff.Permanent(fmt.Errorf("marshal route message: %w", err)) + if n < int(unsafe.Sizeof(unix.RtMsghdr{})) { + continue } - - if _, err = unix.Write(fd, msgBytes); err != nil { - if errors.Is(err, unix.ENOBUFS) || errors.Is(err, unix.EAGAIN) { - return fmt.Errorf("write: %w", err) - } - return backoff.Permanent(fmt.Errorf("write: %w", err)) + hdr := (*unix.RtMsghdr)(unsafe.Pointer(&resp[0])) + // Darwin reflects the sender's pid on replies; matching (Type, Seq, Pid) + // uniquely identifies our own reply among broadcast traffic. + if int(hdr.Type) != wantType || int(hdr.Seq) != wantSeq || hdr.Pid != pid { + continue } - - respBuf := make([]byte, 2048) - n, err := unix.Read(fd, respBuf) - if err != nil { - return backoff.Permanent(fmt.Errorf("read route response: %w", err)) + if hdr.Errno != 0 { + return backoff.Permanent(fmt.Errorf("kernel: %w", syscall.Errno(hdr.Errno))) } - - if n > 0 { - if err := r.parseRouteResponse(respBuf[:n]); err != nil { - return backoff.Permanent(err) - } - } - return nil } - return operation } func (r *SysOps) buildRouteMessage(action int, prefix netip.Prefix, nexthop Nexthop) (msg *route.RouteMessage, err error) { @@ -183,6 +243,7 @@ func (r *SysOps) buildRouteMessage(action int, prefix netip.Prefix, nexthop Next Type: action, Flags: unix.RTF_UP | routeProtoFlag, Version: unix.RTM_VERSION, + ID: uintptr(os.Getpid()), Seq: r.getSeq(), } @@ -221,19 +282,6 @@ func (r *SysOps) buildRouteMessage(action int, prefix netip.Prefix, nexthop Next return msg, nil } -func (r *SysOps) parseRouteResponse(buf []byte) error { - if len(buf) < int(unsafe.Sizeof(unix.RtMsghdr{})) { - return nil - } - - rtMsg := (*unix.RtMsghdr)(unsafe.Pointer(&buf[0])) - if rtMsg.Errno != 0 { - return fmt.Errorf("parse: %d", rtMsg.Errno) - } - - return nil -} - // addrToRouteAddr converts a netip.Addr to the appropriate route.Addr (*route.Inet4Addr or *route.Inet6Addr). func addrToRouteAddr(addr netip.Addr) (route.Addr, error) { if addr.Is4() { diff --git a/client/net/dialer_init_darwin.go b/client/net/dialer_init_darwin.go new file mode 100644 index 000000000..e18909ff7 --- /dev/null +++ b/client/net/dialer_init_darwin.go @@ -0,0 +1,5 @@ +package net + +func (d *Dialer) init() { + d.Dialer.Control = applyBoundIfToSocket +} diff --git a/client/net/dialer_init_generic.go b/client/net/dialer_init_generic.go index 18ebc6ad1..78973b47d 100644 --- a/client/net/dialer_init_generic.go +++ b/client/net/dialer_init_generic.go @@ -1,4 +1,4 @@ -//go:build !linux && !windows +//go:build !linux && !windows && !darwin package net diff --git a/client/net/env_android.go b/client/net/env_android.go deleted file mode 100644 index 9d89951a1..000000000 --- a/client/net/env_android.go +++ /dev/null @@ -1,24 +0,0 @@ -//go:build android - -package net - -// Init initializes the network environment for Android -func Init() { - // No initialization needed on Android -} - -// AdvancedRouting reports whether routing loops can be avoided without using exclusion routes. -// Always returns true on Android since we cannot handle routes dynamically. -func AdvancedRouting() bool { - return true -} - -// SetVPNInterfaceName is a no-op on Android -func SetVPNInterfaceName(name string) { - // No-op on Android - not needed for Android VPN service -} - -// GetVPNInterfaceName returns empty string on Android -func GetVPNInterfaceName() string { - return "" -} diff --git a/client/net/env_windows.go b/client/net/env_bound_iface.go similarity index 71% rename from client/net/env_windows.go rename to client/net/env_bound_iface.go index 7e8868ba5..593988c2c 100644 --- a/client/net/env_windows.go +++ b/client/net/env_bound_iface.go @@ -1,4 +1,4 @@ -//go:build windows +//go:build (darwin && !ios) || windows package net @@ -24,17 +24,22 @@ func Init() { } func checkAdvancedRoutingSupport() bool { - var err error - var legacyRouting bool + legacyRouting := false if val := os.Getenv(envUseLegacyRouting); val != "" { - legacyRouting, err = strconv.ParseBool(val) + parsed, err := strconv.ParseBool(val) if err != nil { - log.Warnf("failed to parse %s: %v", envUseLegacyRouting, err) + log.Warnf("ignoring unparsable %s=%q: %v", envUseLegacyRouting, val, err) + } else { + legacyRouting = parsed } } - if legacyRouting || netstack.IsEnabled() { - log.Info("advanced routing has been requested to be disabled") + if legacyRouting { + log.Infof("advanced routing disabled: legacy routing requested via %s", envUseLegacyRouting) + return false + } + if netstack.IsEnabled() { + log.Info("advanced routing disabled: netstack mode is enabled") return false } diff --git a/client/net/env_generic.go b/client/net/env_generic.go index f467930c3..18c10bb78 100644 --- a/client/net/env_generic.go +++ b/client/net/env_generic.go @@ -1,4 +1,4 @@ -//go:build !linux && !windows && !android +//go:build !linux && !windows && !darwin package net diff --git a/client/net/env_mobile.go b/client/net/env_mobile.go new file mode 100644 index 000000000..80b0fad8d --- /dev/null +++ b/client/net/env_mobile.go @@ -0,0 +1,25 @@ +//go:build ios || android + +package net + +// Init initializes the network environment for mobile platforms. +func Init() { + // no-op on mobile: routing scope is owned by the VPN extension. +} + +// AdvancedRouting reports whether routing loops can be avoided without using exclusion routes. +// Always returns true on mobile since routes cannot be handled dynamically and the VPN extension +// owns the routing scope. +func AdvancedRouting() bool { + return true +} + +// SetVPNInterfaceName is a no-op on mobile. +func SetVPNInterfaceName(string) { + // no-op on mobile: the VPN extension manages the interface. +} + +// GetVPNInterfaceName returns an empty string on mobile. +func GetVPNInterfaceName() string { + return "" +} diff --git a/client/net/listener_init_darwin.go b/client/net/listener_init_darwin.go new file mode 100644 index 000000000..f2fcc80ed --- /dev/null +++ b/client/net/listener_init_darwin.go @@ -0,0 +1,5 @@ +package net + +func (l *ListenerConfig) init() { + l.ListenConfig.Control = applyBoundIfToSocket +} diff --git a/client/net/listener_init_generic.go b/client/net/listener_init_generic.go index 4f8f17ab2..65a785222 100644 --- a/client/net/listener_init_generic.go +++ b/client/net/listener_init_generic.go @@ -1,4 +1,4 @@ -//go:build !linux && !windows +//go:build !linux && !windows && !darwin package net diff --git a/client/net/net_darwin.go b/client/net/net_darwin.go new file mode 100644 index 000000000..00d858a6a --- /dev/null +++ b/client/net/net_darwin.go @@ -0,0 +1,160 @@ +package net + +import ( + "fmt" + "net" + "net/netip" + "strconv" + "strings" + "sync" + "syscall" + + log "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" +) + +// On darwin IPV6_BOUND_IF also scopes v4-mapped egress from dual-stack +// (IPV6_V6ONLY=0) AF_INET6 sockets, so a single setsockopt on "udp6"/"tcp6" +// covers both families. Setting IP_BOUND_IF on an AF_INET6 socket returns +// EINVAL regardless of V6ONLY because the IPPROTO_IP ctloutput path is +// dispatched by socket domain (AF_INET only) not by inp_vflag. + +// boundIface holds the physical interface chosen at routing setup time. Sockets +// created via nbnet.NewDialer / nbnet.NewListener bind to it via IP_BOUND_IF +// (IPv4) or IPV6_BOUND_IF (IPv6 / dual-stack) so their scoped route lookup +// hits the RTF_IFSCOPE default installed by the routemanager, rather than +// following the VPN's split default. +var ( + boundIfaceMu sync.RWMutex + boundIface4 *net.Interface + boundIface6 *net.Interface +) + +// SetBoundInterface records the egress interface for an address family. Called +// by the routemanager after a scoped default route has been installed. +// af must be unix.AF_INET or unix.AF_INET6; other values are ignored. +// nil iface is rejected — use ClearBoundInterfaces to clear all slots. +func SetBoundInterface(af int, iface *net.Interface) { + if iface == nil { + log.Warnf("SetBoundInterface: nil iface for AF %d, ignored", af) + return + } + boundIfaceMu.Lock() + defer boundIfaceMu.Unlock() + switch af { + case unix.AF_INET: + boundIface4 = iface + case unix.AF_INET6: + boundIface6 = iface + default: + log.Warnf("SetBoundInterface: unsupported address family %d", af) + } +} + +// ClearBoundInterfaces resets the cached egress interfaces. Called by the +// routemanager during cleanup. +func ClearBoundInterfaces() { + boundIfaceMu.Lock() + defer boundIfaceMu.Unlock() + boundIface4 = nil + boundIface6 = nil +} + +// boundInterfaceFor returns the cached egress interface for a socket's address +// family, falling back to the other family if the preferred slot is empty. +// The kernel stores both IP_BOUND_IF and IPV6_BOUND_IF in inp_boundifp, so +// either setsockopt scopes the socket; preferring same-family still matters +// when v4 and v6 defaults egress different NICs. +func boundInterfaceFor(network, address string) *net.Interface { + if iface := zoneInterface(address); iface != nil { + return iface + } + + boundIfaceMu.RLock() + defer boundIfaceMu.RUnlock() + + primary, secondary := boundIface4, boundIface6 + if isV6Network(network) { + primary, secondary = boundIface6, boundIface4 + } + if primary != nil { + return primary + } + return secondary +} + +func isV6Network(network string) bool { + return strings.HasSuffix(network, "6") +} + +// zoneInterface extracts an explicit interface from an IPv6 link-local zone (e.g. fe80::1%en0). +func zoneInterface(address string) *net.Interface { + if address == "" { + return nil + } + addr, err := netip.ParseAddrPort(address) + if err != nil { + a, err := netip.ParseAddr(address) + if err != nil { + return nil + } + addr = netip.AddrPortFrom(a, 0) + } + zone := addr.Addr().Zone() + if zone == "" { + return nil + } + if iface, err := net.InterfaceByName(zone); err == nil { + return iface + } + if idx, err := strconv.Atoi(zone); err == nil { + if iface, err := net.InterfaceByIndex(idx); err == nil { + return iface + } + } + return nil +} + +func setIPv4BoundIf(fd uintptr, iface *net.Interface) error { + if err := unix.SetsockoptInt(int(fd), unix.IPPROTO_IP, unix.IP_BOUND_IF, iface.Index); err != nil { + return fmt.Errorf("set IP_BOUND_IF: %w (interface: %s, index: %d)", err, iface.Name, iface.Index) + } + return nil +} + +func setIPv6BoundIf(fd uintptr, iface *net.Interface) error { + if err := unix.SetsockoptInt(int(fd), unix.IPPROTO_IPV6, unix.IPV6_BOUND_IF, iface.Index); err != nil { + return fmt.Errorf("set IPV6_BOUND_IF: %w (interface: %s, index: %d)", err, iface.Name, iface.Index) + } + return nil +} + +// applyBoundIfToSocket binds the socket to the cached physical egress interface +// so scoped route lookup avoids the VPN utun and egresses the underlay directly. +func applyBoundIfToSocket(network, address string, c syscall.RawConn) error { + if !AdvancedRouting() { + return nil + } + + iface := boundInterfaceFor(network, address) + if iface == nil { + log.Debugf("no bound iface cached for %s to %s, skipping BOUND_IF", network, address) + return nil + } + + isV6 := isV6Network(network) + var controlErr error + if err := c.Control(func(fd uintptr) { + if isV6 { + controlErr = setIPv6BoundIf(fd, iface) + } else { + controlErr = setIPv4BoundIf(fd, iface) + } + if controlErr == nil { + log.Debugf("set BOUND_IF=%d on %s for %s to %s", iface.Index, iface.Name, network, address) + } + }); err != nil { + return fmt.Errorf("control: %w", err) + } + return controlErr +} diff --git a/client/server/state.go b/client/server/state.go index 8dca6bde1..f2d823465 100644 --- a/client/server/state.go +++ b/client/server/state.go @@ -12,7 +12,6 @@ import ( "github.com/netbirdio/netbird/client/internal" "github.com/netbirdio/netbird/client/internal/routemanager/systemops" "github.com/netbirdio/netbird/client/internal/statemanager" - nbnet "github.com/netbirdio/netbird/client/net" "github.com/netbirdio/netbird/client/proto" ) @@ -138,10 +137,8 @@ func restoreResidualState(ctx context.Context, statePath string) error { } // clean up any remaining routes independently of the state file - if !nbnet.AdvancedRouting() { - if err := systemops.New(nil, nil).FlushMarkedRoutes(); err != nil { - merr = multierror.Append(merr, fmt.Errorf("flush marked routes: %w", err)) - } + if err := systemops.New(nil, nil).FlushMarkedRoutes(); err != nil { + merr = multierror.Append(merr, fmt.Errorf("flush marked routes: %w", err)) } return nberrors.FormatErrorOrNil(merr) diff --git a/client/ssh/config/manager.go b/client/ssh/config/manager.go index cc47fd2d2..6e584b2c3 100644 --- a/client/ssh/config/manager.go +++ b/client/ssh/config/manager.go @@ -187,24 +187,23 @@ func (m *Manager) buildPeerConfig(allHostPatterns []string) (string, error) { return "", fmt.Errorf("get NetBird executable path: %w", err) } - hostLine := strings.Join(deduplicatedPatterns, " ") - config := fmt.Sprintf("Host %s\n", hostLine) - config += fmt.Sprintf(" Match exec \"%s ssh detect %%h %%p\"\n", execPath) - config += " PreferredAuthentications password,publickey,keyboard-interactive\n" - config += " PasswordAuthentication yes\n" - config += " PubkeyAuthentication yes\n" - config += " BatchMode no\n" - config += fmt.Sprintf(" ProxyCommand %s ssh proxy %%h %%p\n", execPath) - config += " StrictHostKeyChecking no\n" + hostList := strings.Join(deduplicatedPatterns, ",") + config := fmt.Sprintf("Match host \"%s\" exec \"%s ssh detect %%h %%p\"\n", hostList, execPath) + config += " PreferredAuthentications password,publickey,keyboard-interactive\n" + config += " PasswordAuthentication yes\n" + config += " PubkeyAuthentication yes\n" + config += " BatchMode no\n" + config += fmt.Sprintf(" ProxyCommand %s ssh proxy %%h %%p\n", execPath) + config += " StrictHostKeyChecking no\n" if runtime.GOOS == "windows" { - config += " UserKnownHostsFile NUL\n" + config += " UserKnownHostsFile NUL\n" } else { - config += " UserKnownHostsFile /dev/null\n" + config += " UserKnownHostsFile /dev/null\n" } - config += " CheckHostIP no\n" - config += " LogLevel ERROR\n\n" + config += " CheckHostIP no\n" + config += " LogLevel ERROR\n\n" return config, nil } diff --git a/client/ssh/config/manager_test.go b/client/ssh/config/manager_test.go index dc3ad95b3..e7380c7f2 100644 --- a/client/ssh/config/manager_test.go +++ b/client/ssh/config/manager_test.go @@ -116,6 +116,37 @@ func TestManager_PeerLimit(t *testing.T) { assert.True(t, os.IsNotExist(err), "SSH config should not be created with too many peers") } +func TestManager_MatchHostFormat(t *testing.T) { + tempDir, err := os.MkdirTemp("", "netbird-ssh-config-test") + require.NoError(t, err) + defer func() { assert.NoError(t, os.RemoveAll(tempDir)) }() + + manager := &Manager{ + sshConfigDir: filepath.Join(tempDir, "ssh_config.d"), + sshConfigFile: "99-netbird.conf", + } + + peers := []PeerSSHInfo{ + {Hostname: "peer1", IP: "100.125.1.1", FQDN: "peer1.nb.internal"}, + {Hostname: "peer2", IP: "100.125.1.2", FQDN: "peer2.nb.internal"}, + } + + err = manager.SetupSSHClientConfig(peers) + require.NoError(t, err) + + configPath := filepath.Join(manager.sshConfigDir, manager.sshConfigFile) + content, err := os.ReadFile(configPath) + require.NoError(t, err) + configStr := string(content) + + // Must use "Match host" with comma-separated patterns, not a bare "Host" directive. + // A bare "Host" followed by "Match exec" is incorrect per ssh_config(5): the Host block + // ends at the next Match keyword, making it a no-op and leaving the Match exec unscoped. + assert.NotContains(t, configStr, "\nHost ", "should not use bare Host directive") + assert.Contains(t, configStr, "Match host \"100.125.1.1,peer1.nb.internal,peer1,100.125.1.2,peer2.nb.internal,peer2\"", + "should use Match host with comma-separated patterns") +} + func TestManager_ForcedSSHConfig(t *testing.T) { // Set force environment variable t.Setenv(EnvForceSSHConfig, "true") diff --git a/client/system/info.go b/client/system/info.go index f2546cfe6..175d1f07f 100644 --- a/client/system/info.go +++ b/client/system/info.go @@ -2,7 +2,6 @@ package system import ( "context" - "net" "net/netip" "strings" @@ -145,59 +144,6 @@ func extractDeviceName(ctx context.Context, defaultName string) string { return v } -func networkAddresses() ([]NetworkAddress, error) { - interfaces, err := net.Interfaces() - if err != nil { - return nil, err - } - - var netAddresses []NetworkAddress - for _, iface := range interfaces { - if iface.Flags&net.FlagUp == 0 { - continue - } - if iface.HardwareAddr.String() == "" { - continue - } - addrs, err := iface.Addrs() - if err != nil { - continue - } - - for _, address := range addrs { - ipNet, ok := address.(*net.IPNet) - if !ok { - continue - } - - if ipNet.IP.IsLoopback() { - continue - } - - netAddr := NetworkAddress{ - NetIP: netip.MustParsePrefix(ipNet.String()), - Mac: iface.HardwareAddr.String(), - } - - if isDuplicated(netAddresses, netAddr) { - continue - } - - netAddresses = append(netAddresses, netAddr) - } - } - return netAddresses, nil -} - -func isDuplicated(addresses []NetworkAddress, addr NetworkAddress) bool { - for _, duplicated := range addresses { - if duplicated.NetIP == addr.NetIP { - return true - } - } - return false -} - // GetInfoWithChecks retrieves and parses the system information with applied checks. func GetInfoWithChecks(ctx context.Context, checks []*proto.Checks) (*Info, error) { log.Debugf("gathering system information with checks: %d", len(checks)) diff --git a/client/system/info_ios.go b/client/system/info_ios.go index 81936cf1d..ad42b1edf 100644 --- a/client/system/info_ios.go +++ b/client/system/info_ios.go @@ -2,6 +2,8 @@ package system import ( "context" + "net" + "net/netip" "runtime" log "github.com/sirupsen/logrus" @@ -42,6 +44,66 @@ func GetInfo(ctx context.Context) *Info { return gio } +// networkAddresses returns the list of network addresses on iOS. +// On iOS, hardware (MAC) addresses are not available due to Apple's privacy +// restrictions (iOS returns a fixed 02:00:00:00:00:00 placeholder), so we +// leave Mac empty to match Android's behavior. We also skip the HardwareAddr +// check that other platforms use and filter out link-local addresses as they +// are not useful for posture checks. +func networkAddresses() ([]NetworkAddress, error) { + interfaces, err := net.Interfaces() + if err != nil { + return nil, err + } + + var netAddresses []NetworkAddress + for _, iface := range interfaces { + if iface.Flags&net.FlagUp == 0 { + continue + } + addrs, err := iface.Addrs() + if err != nil { + continue + } + + for _, address := range addrs { + netAddr, ok := toNetworkAddress(address) + if !ok { + continue + } + if isDuplicated(netAddresses, netAddr) { + continue + } + netAddresses = append(netAddresses, netAddr) + } + } + return netAddresses, nil +} + +func toNetworkAddress(address net.Addr) (NetworkAddress, bool) { + ipNet, ok := address.(*net.IPNet) + if !ok { + return NetworkAddress{}, false + } + if ipNet.IP.IsLoopback() || ipNet.IP.IsLinkLocalUnicast() || ipNet.IP.IsMulticast() { + return NetworkAddress{}, false + } + prefix, err := netip.ParsePrefix(ipNet.String()) + if err != nil { + return NetworkAddress{}, false + } + return NetworkAddress{NetIP: prefix, Mac: ""}, true +} + +func isDuplicated(addresses []NetworkAddress, addr NetworkAddress) bool { + for _, duplicated := range addresses { + if duplicated.NetIP == addr.NetIP { + return true + } + } + return false +} + // checkFileAndProcess checks if the file path exists and if a process is running at that path. func checkFileAndProcess(paths []string) ([]File, error) { return []File{}, nil diff --git a/client/system/network_addr.go b/client/system/network_addr.go new file mode 100644 index 000000000..5423cf8ad --- /dev/null +++ b/client/system/network_addr.go @@ -0,0 +1,66 @@ +//go:build !ios + +package system + +import ( + "net" + "net/netip" +) + +func networkAddresses() ([]NetworkAddress, error) { + interfaces, err := net.Interfaces() + if err != nil { + return nil, err + } + + var netAddresses []NetworkAddress + for _, iface := range interfaces { + if iface.Flags&net.FlagUp == 0 { + continue + } + if iface.HardwareAddr.String() == "" { + continue + } + addrs, err := iface.Addrs() + if err != nil { + continue + } + + mac := iface.HardwareAddr.String() + for _, address := range addrs { + netAddr, ok := toNetworkAddress(address, mac) + if !ok { + continue + } + if isDuplicated(netAddresses, netAddr) { + continue + } + netAddresses = append(netAddresses, netAddr) + } + } + return netAddresses, nil +} + +func toNetworkAddress(address net.Addr, mac string) (NetworkAddress, bool) { + ipNet, ok := address.(*net.IPNet) + if !ok { + return NetworkAddress{}, false + } + if ipNet.IP.IsLoopback() { + return NetworkAddress{}, false + } + prefix, err := netip.ParsePrefix(ipNet.String()) + if err != nil { + return NetworkAddress{}, false + } + return NetworkAddress{NetIP: prefix, Mac: mac}, true +} + +func isDuplicated(addresses []NetworkAddress, addr NetworkAddress) bool { + for _, duplicated := range addresses { + if duplicated.NetIP == addr.NetIP { + return true + } + } + return false +} diff --git a/combined/config.yaml.example b/combined/config.yaml.example index d0c4088cf..b944895a1 100644 --- a/combined/config.yaml.example +++ b/combined/config.yaml.example @@ -126,6 +126,8 @@ server: # Reverse proxy settings (optional) # reverseProxy: - # trustedHTTPProxies: [] - # trustedHTTPProxiesCount: 0 - # trustedPeers: [] + # trustedHTTPProxies: [] # CIDRs of trusted reverse proxies (e.g. ["10.0.0.0/8"]) + # trustedHTTPProxiesCount: 0 # Number of trusted proxies in front of the server (alternative to trustedHTTPProxies) + # trustedPeers: [] # CIDRs of trusted peer networks (e.g. ["100.64.0.0/10"]) + # accessLogRetentionDays: 7 # Days to retain HTTP access logs. 0 (or unset) defaults to 7. Negative values disable cleanup (logs kept indefinitely). + # accessLogCleanupIntervalHours: 24 # How often (in hours) to run the access-log cleanup job. 0 (or unset) is treated as "not set" and defaults to 24 hours; cleanup remains enabled. To disable cleanup, set accessLogRetentionDays to a negative value. diff --git a/flow/client/client_test.go b/flow/client/client_test.go index 55157acbc..c8f5f4af4 100644 --- a/flow/client/client_test.go +++ b/flow/client/client_test.go @@ -457,6 +457,18 @@ func TestReceive_ProtocolErrorStreamReconnect(t *testing.T) { client, err := flow.NewClient("http://"+server.addr, "test-payload", "test-signature", 1*time.Second) require.NoError(t, err) + + // Cleanups run LIFO: the goroutine-drain registered here runs after Close below, + // which is when Receive has actually returned. Without this, the Receive goroutine + // can outlive the test and call t.Logf after teardown, panicking. + receiveDone := make(chan struct{}) + t.Cleanup(func() { + select { + case <-receiveDone: + case <-time.After(2 * time.Second): + t.Error("Receive goroutine did not exit after Close") + } + }) t.Cleanup(func() { err := client.Close() assert.NoError(t, err, "failed to close flow") @@ -468,6 +480,7 @@ func TestReceive_ProtocolErrorStreamReconnect(t *testing.T) { receivedAfterReconnect := make(chan struct{}) go func() { + defer close(receiveDone) err := client.Receive(ctx, 1*time.Second, func(msg *proto.FlowEventAck) error { if msg.IsInitiator || len(msg.EventId) == 0 { return nil diff --git a/go.mod b/go.mod index 32ff5f89f..3199f483b 100644 --- a/go.mod +++ b/go.mod @@ -343,3 +343,5 @@ replace github.com/libp2p/go-netroute => github.com/netbirdio/go-netroute v0.0.0 replace github.com/dexidp/dex => github.com/netbirdio/dex v0.244.1-0.20260415145816-a0c6b40ff9f2 replace github.com/dexidp/dex/api/v2 => github.com/netbirdio/dex/api/v2 v2.0.0-20260415145816-a0c6b40ff9f2 + +replace github.com/mailru/easyjson => github.com/netbirdio/easyjson v0.9.0 diff --git a/go.sum b/go.sum index a925038cc..c7dddb377 100644 --- a/go.sum +++ b/go.sum @@ -430,8 +430,6 @@ github.com/lufia/plan9stats v0.0.0-20240513124658-fba389f38bae h1:dIZY4ULFcto4tA github.com/lufia/plan9stats v0.0.0-20240513124658-fba389f38bae/go.mod h1:ilwx/Dta8jXAgpFYFvSWEMwxmbWXyiUHkd5FwyKhb5k= github.com/magiconair/properties v1.8.10 h1:s31yESBquKXCV9a/ScB3ESkOjUYYv+X0rg8SYxI99mE= github.com/magiconair/properties v1.8.10/go.mod h1:Dhd985XPs7jluiymwWYZ0G4Z61jb3vdS329zhj2hYo0= -github.com/mailru/easyjson v0.9.0 h1:PrnmzHw7262yW8sTBwxi1PdJA3Iw/EKBa8psRf7d9a4= -github.com/mailru/easyjson v0.9.0/go.mod h1:1+xMtQp2MRNVL/V1bOzuP3aP8VNwRW55fQUto+XFtTU= github.com/mattermost/xml-roundtrip-validator v0.1.0 h1:RXbVD2UAl7A7nOTR4u7E3ILa4IbtvKBHw64LDsmu9hU= github.com/mattermost/xml-roundtrip-validator v0.1.0/go.mod h1:qccnGMcpgwcNaBnxqpJpWWUiPNr5H3O8eDgGV9gT5To= github.com/mattn/go-colorable v0.1.14 h1:9A9LHSqF/7dyVVX6g0U9cwm9pG3kP9gSzcuIPHPsaIE= @@ -485,6 +483,8 @@ github.com/netbirdio/dex v0.244.1-0.20260415145816-a0c6b40ff9f2 h1:AP7OM/JnTogod github.com/netbirdio/dex v0.244.1-0.20260415145816-a0c6b40ff9f2/go.mod h1:+trSlzHNmdJGvz0oLEyyiuaPstUeD7YO6B3Fx9nyziY= github.com/netbirdio/dex/api/v2 v2.0.0-20260415145816-a0c6b40ff9f2 h1:HEEGJPsVw7/p7SEL3HWP4vaInxHo8OJSEaOkHpUAk+M= github.com/netbirdio/dex/api/v2 v2.0.0-20260415145816-a0c6b40ff9f2/go.mod h1:awuTyT29CYALpEyET0S307EgNlPWrc7fFKRAyhsO45M= +github.com/netbirdio/easyjson v0.9.0 h1:6Nw2lghSVuy8RSkAYDhDv1thBVEmfVbKZnV7T7Z6Aus= +github.com/netbirdio/easyjson v0.9.0/go.mod h1:1+xMtQp2MRNVL/V1bOzuP3aP8VNwRW55fQUto+XFtTU= github.com/netbirdio/go-netroute v0.0.0-20240611143515-f59b0e1d3944 h1:TDtJKmM6Sf8uYFx/dMeqNOL90KUoRscdfpFZ3Im89uk= github.com/netbirdio/go-netroute v0.0.0-20240611143515-f59b0e1d3944/go.mod h1:sHA6TRxjQ6RLbnI+3R4DZo2Eseg/iKiPRfNmcuNySVQ= github.com/netbirdio/ice/v4 v4.0.0-20250908184934-6202be846b51 h1:Ov4qdafATOgGMB1wbSuh+0aAHcwz9hdvB6VZjh1mVMI= diff --git a/infrastructure_files/getting-started.sh b/infrastructure_files/getting-started.sh index 08da48264..2a3f840b4 100755 --- a/infrastructure_files/getting-started.sh +++ b/infrastructure_files/getting-started.sh @@ -472,7 +472,7 @@ start_services_and_show_instructions() { if [[ "$ENABLE_CROWDSEC" == "true" ]]; then echo "Registering CrowdSec bouncer..." local cs_retries=0 - while ! $DOCKER_COMPOSE_COMMAND exec -T crowdsec cscli capi status >/dev/null 2>&1; do + while ! $DOCKER_COMPOSE_COMMAND exec -T crowdsec cscli lapi status >/dev/null 2>&1; do cs_retries=$((cs_retries + 1)) if [[ $cs_retries -ge 30 ]]; then echo "WARNING: CrowdSec did not become ready. Skipping CrowdSec setup." > /dev/stderr diff --git a/management/internals/controllers/network_map/controller/controller.go b/management/internals/controllers/network_map/controller/controller.go index 4b414df6f..4b47ecaa0 100644 --- a/management/internals/controllers/network_map/controller/controller.go +++ b/management/internals/controllers/network_map/controller/controller.go @@ -7,7 +7,6 @@ import ( "os" "slices" "strconv" - "strings" "sync" "sync/atomic" "time" @@ -16,11 +15,9 @@ import ( "golang.org/x/exp/maps" "golang.org/x/mod/semver" - nbdns "github.com/netbirdio/netbird/dns" "github.com/netbirdio/netbird/management/internals/controllers/network_map" "github.com/netbirdio/netbird/management/internals/controllers/network_map/controller/cache" "github.com/netbirdio/netbird/management/internals/modules/peers/ephemeral" - "github.com/netbirdio/netbird/management/internals/modules/zones" "github.com/netbirdio/netbird/management/internals/server/config" "github.com/netbirdio/netbird/management/internals/shared/grpc" "github.com/netbirdio/netbird/management/server/account" @@ -58,13 +55,6 @@ type Controller struct { proxyController port_forwarding.Controller integratedPeerValidator integrated_validator.IntegratedValidator - - holder *types.Holder - - expNewNetworkMap bool - expNewNetworkMapAIDs map[string]struct{} - - compactedNetworkMap bool } type bufferUpdate struct { @@ -81,29 +71,6 @@ func NewController(ctx context.Context, store store.Store, metrics telemetry.App log.Fatal(fmt.Errorf("error creating metrics: %w", err)) } - newNetworkMapBuilder, err := strconv.ParseBool(os.Getenv(network_map.EnvNewNetworkMapBuilder)) - if err != nil { - log.WithContext(ctx).Warnf("failed to parse %s, using default value false: %v", network_map.EnvNewNetworkMapBuilder, err) - newNetworkMapBuilder = false - } - - compactedNetworkMap := true - compactedEnv := os.Getenv(types.EnvNewNetworkMapCompacted) - parsedCompactedNmap, err := strconv.ParseBool(compactedEnv) - if err != nil && len(compactedEnv) > 0 { - log.WithContext(ctx).Warnf("failed to parse %s, using default value true: %v", types.EnvNewNetworkMapCompacted, err) - } - if err == nil && !parsedCompactedNmap { - log.WithContext(ctx).Info("disabling compacted mode") - compactedNetworkMap = false - } - - ids := strings.Split(os.Getenv(network_map.EnvNewNetworkMapAccounts), ",") - expIDs := make(map[string]struct{}, len(ids)) - for _, id := range ids { - expIDs[id] = struct{}{} - } - return &Controller{ repo: newRepository(store), metrics: nMetrics, @@ -117,12 +84,6 @@ func NewController(ctx context.Context, store store.Store, metrics telemetry.App proxyController: proxyController, EphemeralPeersManager: ephemeralPeersManager, - - holder: types.NewHolder(), - expNewNetworkMap: newNetworkMapBuilder, - expNewNetworkMapAIDs: expIDs, - - compactedNetworkMap: compactedNetworkMap, } } @@ -153,17 +114,9 @@ func (c *Controller) CountStreams() int { func (c *Controller) sendUpdateAccountPeers(ctx context.Context, accountID string) error { log.WithContext(ctx).Tracef("updating peers for account %s from %s", accountID, util.GetCallerName()) - var ( - account *types.Account - err error - ) - if c.experimentalNetworkMap(accountID) { - account = c.getAccountFromHolderOrInit(ctx, accountID) - } else { - account, err = c.requestBuffer.GetAccountWithBackpressure(ctx, accountID) - if err != nil { - return fmt.Errorf("failed to get account: %v", err) - } + account, err := c.requestBuffer.GetAccountWithBackpressure(ctx, accountID) + if err != nil { + return fmt.Errorf("failed to get account: %v", err) } globalStart := time.Now() @@ -197,10 +150,6 @@ func (c *Controller) sendUpdateAccountPeers(ctx context.Context, accountID strin routers := account.GetResourceRoutersMap() groupIDToUserIDs := account.GetActiveGroupUsers() - if c.experimentalNetworkMap(accountID) { - c.initNetworkMapBuilderIfNeeded(account, approvedPeersMap) - } - proxyNetworkMaps, err := c.proxyController.GetProxyNetworkMapsAll(ctx, accountID, account.Peers) if err != nil { log.WithContext(ctx).Errorf("failed to get proxy network maps: %v", err) @@ -243,16 +192,7 @@ func (c *Controller) sendUpdateAccountPeers(ctx context.Context, accountID strin c.metrics.CountCalcPostureChecksDuration(time.Since(start)) start = time.Now() - var remotePeerNetworkMap *types.NetworkMap - - switch { - case c.experimentalNetworkMap(accountID): - remotePeerNetworkMap = c.getPeerNetworkMapExp(ctx, p.AccountID, p.ID, approvedPeersMap, peersCustomZone, accountZones, c.accountManagerMetrics) - case c.compactedNetworkMap: - remotePeerNetworkMap = account.GetPeerNetworkMapFromComponents(ctx, p.ID, peersCustomZone, accountZones, approvedPeersMap, resourcePolicies, routers, c.accountManagerMetrics, groupIDToUserIDs) - default: - remotePeerNetworkMap = account.GetPeerNetworkMap(ctx, p.ID, peersCustomZone, accountZones, approvedPeersMap, resourcePolicies, routers, c.accountManagerMetrics, groupIDToUserIDs) - } + remotePeerNetworkMap := account.GetPeerNetworkMapFromComponents(ctx, p.ID, peersCustomZone, accountZones, approvedPeersMap, resourcePolicies, routers, c.accountManagerMetrics, groupIDToUserIDs) c.metrics.CountCalcPeerNetworkMapDuration(time.Since(start)) @@ -318,10 +258,6 @@ func (c *Controller) bufferSendUpdateAccountPeers(ctx context.Context, accountID // UpdatePeers updates all peers that belong to an account. // Should be called when changes have to be synced to peers. func (c *Controller) UpdateAccountPeers(ctx context.Context, accountID string) error { - if err := c.RecalculateNetworkMapCache(ctx, accountID); err != nil { - return fmt.Errorf("recalculate network map cache: %v", err) - } - return c.sendUpdateAccountPeers(ctx, accountID) } @@ -371,16 +307,7 @@ func (c *Controller) UpdateAccountPeer(ctx context.Context, accountId string, pe return err } - var remotePeerNetworkMap *types.NetworkMap - - switch { - case c.experimentalNetworkMap(accountId): - remotePeerNetworkMap = c.getPeerNetworkMapExp(ctx, peer.AccountID, peer.ID, approvedPeersMap, peersCustomZone, accountZones, c.accountManagerMetrics) - case c.compactedNetworkMap: - remotePeerNetworkMap = account.GetPeerNetworkMapFromComponents(ctx, peerId, peersCustomZone, accountZones, approvedPeersMap, resourcePolicies, routers, c.accountManagerMetrics, groupIDToUserIDs) - default: - remotePeerNetworkMap = account.GetPeerNetworkMap(ctx, peerId, peersCustomZone, accountZones, approvedPeersMap, resourcePolicies, routers, c.accountManagerMetrics, groupIDToUserIDs) - } + remotePeerNetworkMap := account.GetPeerNetworkMapFromComponents(ctx, peerId, peersCustomZone, accountZones, approvedPeersMap, resourcePolicies, routers, c.accountManagerMetrics, groupIDToUserIDs) proxyNetworkMap, ok := proxyNetworkMaps[peer.ID] if ok { @@ -451,17 +378,9 @@ func (c *Controller) GetValidatedPeerWithMap(ctx context.Context, isRequiresAppr return peer, emptyMap, nil, 0, nil } - var ( - account *types.Account - err error - ) - if c.experimentalNetworkMap(accountID) { - account = c.getAccountFromHolderOrInit(ctx, accountID) - } else { - account, err = c.requestBuffer.GetAccountWithBackpressure(ctx, accountID) - if err != nil { - return nil, nil, nil, 0, err - } + account, err := c.requestBuffer.GetAccountWithBackpressure(ctx, accountID) + if err != nil { + return nil, nil, nil, 0, err } account.InjectProxyPolicies(ctx) @@ -493,20 +412,10 @@ func (c *Controller) GetValidatedPeerWithMap(ctx context.Context, isRequiresAppr return nil, nil, nil, 0, err } - var networkMap *types.NetworkMap - - if c.experimentalNetworkMap(accountID) { - networkMap = c.getPeerNetworkMapExp(ctx, peer.AccountID, peer.ID, approvedPeersMap, peersCustomZone, accountZones, c.accountManagerMetrics) - } else { - resourcePolicies := account.GetResourcePoliciesMap() - routers := account.GetResourceRoutersMap() - groupIDToUserIDs := account.GetActiveGroupUsers() - if c.compactedNetworkMap { - networkMap = account.GetPeerNetworkMapFromComponents(ctx, peer.ID, peersCustomZone, accountZones, approvedPeersMap, resourcePolicies, routers, c.accountManagerMetrics, groupIDToUserIDs) - } else { - networkMap = account.GetPeerNetworkMap(ctx, peer.ID, peersCustomZone, accountZones, approvedPeersMap, resourcePolicies, routers, c.accountManagerMetrics, groupIDToUserIDs) - } - } + resourcePolicies := account.GetResourcePoliciesMap() + routers := account.GetResourceRoutersMap() + groupIDToUserIDs := account.GetActiveGroupUsers() + networkMap := account.GetPeerNetworkMapFromComponents(ctx, peer.ID, peersCustomZone, accountZones, approvedPeersMap, resourcePolicies, routers, c.accountManagerMetrics, groupIDToUserIDs) proxyNetworkMap, ok := proxyNetworkMaps[peer.ID] if ok { @@ -518,108 +427,6 @@ func (c *Controller) GetValidatedPeerWithMap(ctx context.Context, isRequiresAppr return peer, networkMap, postureChecks, dnsFwdPort, nil } -func (c *Controller) initNetworkMapBuilderIfNeeded(account *types.Account, validatedPeers map[string]struct{}) { - c.enrichAccountFromHolder(account) - account.InitNetworkMapBuilderIfNeeded(validatedPeers) -} - -func (c *Controller) getPeerNetworkMapExp( - ctx context.Context, - accountId string, - peerId string, - validatedPeers map[string]struct{}, - peersCustomZone nbdns.CustomZone, - accountZones []*zones.Zone, - metrics *telemetry.AccountManagerMetrics, -) *types.NetworkMap { - account := c.getAccountFromHolderOrInit(ctx, accountId) - if account == nil { - log.WithContext(ctx).Warnf("account %s not found in holder when getting peer network map", accountId) - return &types.NetworkMap{ - Network: &types.Network{}, - } - } - - return account.GetPeerNetworkMapExp(ctx, peerId, peersCustomZone, accountZones, validatedPeers, metrics) -} - -func (c *Controller) onPeersAddedUpdNetworkMapCache(account *types.Account, peerIds ...string) { - c.enrichAccountFromHolder(account) - account.OnPeersAddedUpdNetworkMapCache(peerIds...) -} - -func (c *Controller) onPeerDeletedUpdNetworkMapCache(account *types.Account, peerId string) error { - c.enrichAccountFromHolder(account) - return account.OnPeerDeletedUpdNetworkMapCache(peerId) -} - -func (c *Controller) UpdatePeerInNetworkMapCache(accountId string, peer *nbpeer.Peer) { - account := c.getAccountFromHolder(accountId) - if account == nil { - return - } - account.UpdatePeerInNetworkMapCache(peer) -} - -func (c *Controller) recalculateNetworkMapCache(account *types.Account, validatedPeers map[string]struct{}) { - account.RecalculateNetworkMapCache(validatedPeers) - c.updateAccountInHolder(account) -} - -func (c *Controller) RecalculateNetworkMapCache(ctx context.Context, accountId string) error { - if c.experimentalNetworkMap(accountId) { - account, err := c.requestBuffer.GetAccountWithBackpressure(ctx, accountId) - if err != nil { - return err - } - validatedPeers, err := c.integratedPeerValidator.GetValidatedPeers(ctx, account.Id, maps.Values(account.Groups), maps.Values(account.Peers), account.Settings.Extra) - if err != nil { - log.WithContext(ctx).Errorf("failed to get validate peers: %v", err) - return err - } - c.recalculateNetworkMapCache(account, validatedPeers) - } - return nil -} - -func (c *Controller) experimentalNetworkMap(accountId string) bool { - _, ok := c.expNewNetworkMapAIDs[accountId] - return c.expNewNetworkMap || ok -} - -func (c *Controller) enrichAccountFromHolder(account *types.Account) { - a := c.holder.GetAccount(account.Id) - if a == nil { - c.holder.AddAccount(account) - return - } - account.NetworkMapCache = a.NetworkMapCache - if account.NetworkMapCache == nil { - return - } - c.holder.AddAccount(account) -} - -func (c *Controller) getAccountFromHolder(accountID string) *types.Account { - return c.holder.GetAccount(accountID) -} - -func (c *Controller) getAccountFromHolderOrInit(ctx context.Context, accountID string) *types.Account { - a := c.holder.GetAccount(accountID) - if a != nil { - return a - } - account, err := c.holder.LoadOrStoreFunc(ctx, accountID, c.requestBuffer.GetAccountWithBackpressure) - if err != nil { - return nil - } - return account -} - -func (c *Controller) updateAccountInHolder(account *types.Account) { - c.holder.AddAccount(account) -} - // GetDNSDomain returns the configured dnsDomain func (c *Controller) GetDNSDomain(settings *types.Settings) string { if settings == nil { @@ -756,16 +563,7 @@ func isPeerInPolicySourceGroups(account *types.Account, peerID string, policy *t } func (c *Controller) OnPeersUpdated(ctx context.Context, accountID string, peerIDs []string) error { - peers, err := c.repo.GetPeersByIDs(ctx, accountID, peerIDs) - if err != nil { - return fmt.Errorf("failed to get peers by ids: %w", err) - } - - for _, peer := range peers { - c.UpdatePeerInNetworkMapCache(accountID, peer) - } - - err = c.bufferSendUpdateAccountPeers(ctx, accountID) + err := c.bufferSendUpdateAccountPeers(ctx, accountID) if err != nil { log.WithContext(ctx).Errorf("failed to buffer update account peers for peer update in account %s: %v", accountID, err) } @@ -775,14 +573,6 @@ func (c *Controller) OnPeersUpdated(ctx context.Context, accountID string, peerI func (c *Controller) OnPeersAdded(ctx context.Context, accountID string, peerIDs []string) error { log.WithContext(ctx).Debugf("OnPeersAdded call to add peers: %v", peerIDs) - if c.experimentalNetworkMap(accountID) { - account, err := c.requestBuffer.GetAccountWithBackpressure(ctx, accountID) - if err != nil { - return err - } - log.WithContext(ctx).Debugf("peers are ready to be added to networkmap cache: %v", peerIDs) - c.onPeersAddedUpdNetworkMapCache(account, peerIDs...) - } return c.bufferSendUpdateAccountPeers(ctx, accountID) } @@ -817,19 +607,6 @@ func (c *Controller) OnPeersDeleted(ctx context.Context, accountID string, peerI MessageType: network_map.MessageTypeNetworkMap, }) c.peersUpdateManager.CloseChannel(ctx, peerID) - - if c.experimentalNetworkMap(accountID) { - account, err := c.requestBuffer.GetAccountWithBackpressure(ctx, accountID) - if err != nil { - log.WithContext(ctx).Errorf("failed to get account %s: %v", accountID, err) - continue - } - err = c.onPeerDeletedUpdNetworkMapCache(account, peerID) - if err != nil { - log.WithContext(ctx).Errorf("failed to update network map cache for deleted peer %s in account %s: %v", peerID, accountID, err) - continue - } - } } return c.bufferSendUpdateAccountPeers(ctx, accountID) @@ -872,21 +649,11 @@ func (c *Controller) GetNetworkMap(ctx context.Context, peerID string) (*types.N return nil, err } - var networkMap *types.NetworkMap - - if c.experimentalNetworkMap(peer.AccountID) { - networkMap = c.getPeerNetworkMapExp(ctx, peer.AccountID, peerID, validatedPeers, peersCustomZone, accountZones, nil) - } else { - account.InjectProxyPolicies(ctx) - resourcePolicies := account.GetResourcePoliciesMap() - routers := account.GetResourceRoutersMap() - groupIDToUserIDs := account.GetActiveGroupUsers() - if c.compactedNetworkMap { - networkMap = account.GetPeerNetworkMapFromComponents(ctx, peer.ID, peersCustomZone, accountZones, validatedPeers, resourcePolicies, routers, nil, groupIDToUserIDs) - } else { - networkMap = account.GetPeerNetworkMap(ctx, peer.ID, peersCustomZone, accountZones, validatedPeers, resourcePolicies, routers, nil, groupIDToUserIDs) - } - } + account.InjectProxyPolicies(ctx) + resourcePolicies := account.GetResourcePoliciesMap() + routers := account.GetResourceRoutersMap() + groupIDToUserIDs := account.GetActiveGroupUsers() + networkMap := account.GetPeerNetworkMapFromComponents(ctx, peer.ID, peersCustomZone, accountZones, validatedPeers, resourcePolicies, routers, nil, groupIDToUserIDs) proxyNetworkMap, ok := proxyNetworkMaps[peer.ID] if ok { diff --git a/management/internals/controllers/network_map/interface.go b/management/internals/controllers/network_map/interface.go index 64caac861..cfea2d3de 100644 --- a/management/internals/controllers/network_map/interface.go +++ b/management/internals/controllers/network_map/interface.go @@ -12,9 +12,6 @@ import ( ) const ( - EnvNewNetworkMapBuilder = "NB_EXPERIMENT_NETWORK_MAP" - EnvNewNetworkMapAccounts = "NB_EXPERIMENT_NETWORK_MAP_ACCOUNTS" - DnsForwarderPort = nbdns.ForwarderServerPort OldForwarderPort = nbdns.ForwarderClientPort DnsForwarderPortMinVersion = "v0.59.0" diff --git a/management/internals/server/boot.go b/management/internals/server/boot.go index 24dfb641b..2b40c0aad 100644 --- a/management/internals/server/boot.go +++ b/management/internals/server/boot.go @@ -30,6 +30,7 @@ import ( nbcache "github.com/netbirdio/netbird/management/server/cache" nbContext "github.com/netbirdio/netbird/management/server/context" nbhttp "github.com/netbirdio/netbird/management/server/http" + "github.com/netbirdio/netbird/management/server/http/middleware" "github.com/netbirdio/netbird/management/server/store" "github.com/netbirdio/netbird/management/server/telemetry" mgmtProto "github.com/netbirdio/netbird/shared/management/proto" @@ -109,7 +110,7 @@ func (s *BaseServer) EventStore() activity.Store { func (s *BaseServer) APIHandler() http.Handler { return Create(s, func() http.Handler { - httpAPIHandler, err := nbhttp.NewAPIHandler(context.Background(), s.AccountManager(), s.NetworksManager(), s.ResourcesManager(), s.RoutesManager(), s.GroupsManager(), s.GeoLocationManager(), s.AuthManager(), s.Metrics(), s.IntegratedValidator(), s.ProxyController(), s.PermissionsManager(), s.PeersManager(), s.SettingsManager(), s.ZonesManager(), s.RecordsManager(), s.NetworkMapController(), s.IdpManager(), s.ServiceManager(), s.ReverseProxyDomainManager(), s.AccessLogsManager(), s.ReverseProxyGRPCServer(), s.Config.ReverseProxy.TrustedHTTPProxies) + httpAPIHandler, err := nbhttp.NewAPIHandler(context.Background(), s.AccountManager(), s.NetworksManager(), s.ResourcesManager(), s.RoutesManager(), s.GroupsManager(), s.GeoLocationManager(), s.AuthManager(), s.Metrics(), s.IntegratedValidator(), s.ProxyController(), s.PermissionsManager(), s.PeersManager(), s.SettingsManager(), s.ZonesManager(), s.RecordsManager(), s.NetworkMapController(), s.IdpManager(), s.ServiceManager(), s.ReverseProxyDomainManager(), s.AccessLogsManager(), s.ReverseProxyGRPCServer(), s.Config.ReverseProxy.TrustedHTTPProxies, s.RateLimiter()) if err != nil { log.Fatalf("failed to create API handler: %v", err) } @@ -117,6 +118,15 @@ func (s *BaseServer) APIHandler() http.Handler { }) } +func (s *BaseServer) RateLimiter() *middleware.APIRateLimiter { + return Create(s, func() *middleware.APIRateLimiter { + cfg, enabled := middleware.RateLimiterConfigFromEnv() + limiter := middleware.NewAPIRateLimiter(cfg) + limiter.SetEnabled(enabled) + return limiter + }) +} + func (s *BaseServer) GRPCServer() *grpc.Server { return Create(s, func() *grpc.Server { trustedPeers := s.Config.ReverseProxy.TrustedPeers diff --git a/management/server/account_test.go b/management/server/account_test.go index 4453d064e..756c42421 100644 --- a/management/server/account_test.go +++ b/management/server/account_test.go @@ -408,7 +408,7 @@ func TestAccount_GetPeerNetworkMap(t *testing.T) { } customZone := account.GetPeersCustomZone(context.Background(), "netbird.io") - networkMap := account.GetPeerNetworkMap(context.Background(), testCase.peerID, customZone, nil, validatedPeers, account.GetResourcePoliciesMap(), account.GetResourceRoutersMap(), nil, account.GetActiveGroupUsers()) + networkMap := account.GetPeerNetworkMapFromComponents(context.Background(), testCase.peerID, customZone, nil, validatedPeers, account.GetResourcePoliciesMap(), account.GetResourceRoutersMap(), nil, account.GetActiveGroupUsers()) assert.Len(t, networkMap.Peers, len(testCase.expectedPeers)) assert.Len(t, networkMap.OfflinePeers, len(testCase.expectedOfflinePeers)) } @@ -1171,11 +1171,6 @@ func TestAccountManager_AddPeerWithUserID(t *testing.T) { assert.Equal(t, peer.IP.String(), fmt.Sprint(ev.Meta["ip"])) } -func TestAccountManager_NetworkUpdates_SaveGroup_Experimental(t *testing.T) { - t.Setenv(network_map.EnvNewNetworkMapBuilder, "true") - testAccountManager_NetworkUpdates_SaveGroup(t) -} - func TestAccountManager_NetworkUpdates_SaveGroup(t *testing.T) { testAccountManager_NetworkUpdates_SaveGroup(t) } @@ -1231,11 +1226,6 @@ func testAccountManager_NetworkUpdates_SaveGroup(t *testing.T) { wg.Wait() } -func TestAccountManager_NetworkUpdates_DeletePolicy_Experimental(t *testing.T) { - t.Setenv(network_map.EnvNewNetworkMapBuilder, "true") - testAccountManager_NetworkUpdates_DeletePolicy(t) -} - func TestAccountManager_NetworkUpdates_DeletePolicy(t *testing.T) { testAccountManager_NetworkUpdates_DeletePolicy(t) } @@ -1274,11 +1264,6 @@ func testAccountManager_NetworkUpdates_DeletePolicy(t *testing.T) { wg.Wait() } -func TestAccountManager_NetworkUpdates_SavePolicy_Experimental(t *testing.T) { - t.Setenv(network_map.EnvNewNetworkMapBuilder, "true") - testAccountManager_NetworkUpdates_SavePolicy(t) -} - func TestAccountManager_NetworkUpdates_SavePolicy(t *testing.T) { testAccountManager_NetworkUpdates_SavePolicy(t) } @@ -1332,11 +1317,6 @@ func testAccountManager_NetworkUpdates_SavePolicy(t *testing.T) { wg.Wait() } -func TestAccountManager_NetworkUpdates_DeletePeer_Experimental(t *testing.T) { - t.Setenv(network_map.EnvNewNetworkMapBuilder, "true") - testAccountManager_NetworkUpdates_DeletePeer(t) -} - func TestAccountManager_NetworkUpdates_DeletePeer(t *testing.T) { testAccountManager_NetworkUpdates_DeletePeer(t) } @@ -1397,11 +1377,6 @@ func testAccountManager_NetworkUpdates_DeletePeer(t *testing.T) { wg.Wait() } -func TestAccountManager_NetworkUpdates_DeleteGroup_Experimental(t *testing.T) { - t.Setenv(network_map.EnvNewNetworkMapBuilder, "true") - testAccountManager_NetworkUpdates_DeleteGroup(t) -} - func TestAccountManager_NetworkUpdates_DeleteGroup(t *testing.T) { testAccountManager_NetworkUpdates_DeleteGroup(t) } @@ -1633,75 +1608,6 @@ func TestFileStore_GetRoutesByPrefix(t *testing.T) { assert.Contains(t, routeIDs, route.ID("route-2")) } -func TestAccount_GetRoutesToSync(t *testing.T) { - _, prefix, err := route.ParseNetwork("192.168.64.0/24") - if err != nil { - t.Fatal(err) - } - _, prefix2, err := route.ParseNetwork("192.168.0.0/24") - if err != nil { - t.Fatal(err) - } - account := &types.Account{ - Peers: map[string]*nbpeer.Peer{ - "peer-1": {Key: "peer-1", Meta: nbpeer.PeerSystemMeta{GoOS: "linux"}}, "peer-2": {Key: "peer-2", Meta: nbpeer.PeerSystemMeta{GoOS: "linux"}}, "peer-3": {Key: "peer-1", Meta: nbpeer.PeerSystemMeta{GoOS: "linux"}}, - }, - Groups: map[string]*types.Group{"group1": {ID: "group1", Peers: []string{"peer-1", "peer-2"}}}, - Routes: map[route.ID]*route.Route{ - "route-1": { - ID: "route-1", - Network: prefix, - NetID: "network-1", - Description: "network-1", - Peer: "peer-1", - NetworkType: 0, - Masquerade: false, - Metric: 999, - Enabled: true, - Groups: []string{"group1"}, - }, - "route-2": { - ID: "route-2", - Network: prefix2, - NetID: "network-2", - Description: "network-2", - Peer: "peer-2", - NetworkType: 0, - Masquerade: false, - Metric: 999, - Enabled: true, - Groups: []string{"group1"}, - }, - "route-3": { - ID: "route-3", - Network: prefix, - NetID: "network-1", - Description: "network-1", - Peer: "peer-2", - NetworkType: 0, - Masquerade: false, - Metric: 999, - Enabled: true, - Groups: []string{"group1"}, - }, - }, - } - - routes := account.GetRoutesToSync(context.Background(), "peer-2", []*nbpeer.Peer{{Key: "peer-1"}, {Key: "peer-3"}}, account.GetPeerGroups("peer-2")) - - assert.Len(t, routes, 2) - routeIDs := make(map[route.ID]struct{}, 2) - for _, r := range routes { - routeIDs[r.ID] = struct{}{} - } - assert.Contains(t, routeIDs, route.ID("route-2")) - assert.Contains(t, routeIDs, route.ID("route-3")) - - emptyRoutes := account.GetRoutesToSync(context.Background(), "peer-3", []*nbpeer.Peer{{Key: "peer-1"}, {Key: "peer-2"}}, account.GetPeerGroups("peer-3")) - - assert.Len(t, emptyRoutes, 0) -} - func TestAccount_Copy(t *testing.T) { account := &types.Account{ Id: "account1", @@ -1824,9 +1730,7 @@ func TestAccount_Copy(t *testing.T) { AccountID: "account1", }, }, - NetworkMapCache: &types.NetworkMapBuilder{}, } - account.InitOnce() err := hasNilField(account) if err != nil { t.Fatal(err) @@ -2311,6 +2215,29 @@ func TestAccount_GetExpiredPeers(t *testing.T) { } } +func TestGetExpiredPeers_SkipsAlreadyExpired(t *testing.T) { + ctx := context.Background() + + testStore, cleanUp, err := store.NewTestStoreFromSQL(ctx, "testdata/store_with_expired_peers.sql", t.TempDir()) + t.Cleanup(cleanUp) + require.NoError(t, err) + + accountID := "bf1c8084-ba50-4ce7-9439-34653001fc3b" + + // Verify the already-expired peer is excluded at the store level + peers, err := testStore.GetAccountPeersWithExpiration(ctx, store.LockingStrengthNone, accountID) + require.NoError(t, err) + + for _, peer := range peers { + assert.NotEqual(t, "cg05lnblo1hkg2j514p0", peer.ID, "already expired peer should be excluded by the store query") + assert.False(t, peer.Status.LoginExpired, "returned peers should not already be marked as login expired") + } + + // Only the non-expired peer with expiration enabled should be returned + require.Len(t, peers, 1) + assert.Equal(t, "notexpired01", peers[0].ID) +} + func TestAccount_GetInactivePeers(t *testing.T) { type test struct { name string @@ -3230,6 +3157,13 @@ func setupNetworkMapTest(t *testing.T) (*DefaultAccountManager, *update_channel. return manager, updateManager, account, peer1, peer2, peer3 } +// peerUpdateTimeout bounds how long peerShouldReceiveUpdate and its outer +// wrappers wait for an expected update message. Sized for slow CI runners +// (MySQL, FreeBSD, loaded sqlite) where the channel publish can take +// seconds. Only runs down on failure; passing tests return immediately +// when the channel delivers. +const peerUpdateTimeout = 5 * time.Second + func peerShouldNotReceiveUpdate(t *testing.T, updateMessage <-chan *network_map.UpdateMessage) { t.Helper() select { @@ -3248,7 +3182,7 @@ func peerShouldReceiveUpdate(t *testing.T, updateMessage <-chan *network_map.Upd if msg == nil { t.Errorf("Received nil update message, expected valid message") } - case <-time.After(500 * time.Millisecond): + case <-time.After(peerUpdateTimeout): t.Error("Timed out waiting for update message") } } diff --git a/management/server/dns_test.go b/management/server/dns_test.go index 0e37a3b22..c443223c6 100644 --- a/management/server/dns_test.go +++ b/management/server/dns_test.go @@ -458,7 +458,7 @@ func TestDNSAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) @@ -478,7 +478,7 @@ func TestDNSAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) @@ -518,7 +518,7 @@ func TestDNSAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) diff --git a/management/server/group_test.go b/management/server/group_test.go index fa818e532..5821b90a3 100644 --- a/management/server/group_test.go +++ b/management/server/group_test.go @@ -620,7 +620,7 @@ func TestGroupAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) @@ -638,7 +638,7 @@ func TestGroupAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) @@ -656,7 +656,7 @@ func TestGroupAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) @@ -689,7 +689,7 @@ func TestGroupAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) @@ -730,7 +730,7 @@ func TestGroupAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) @@ -757,7 +757,7 @@ func TestGroupAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) @@ -804,7 +804,7 @@ func TestGroupAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) diff --git a/management/server/http/handler.go b/management/server/http/handler.go index ad36b9d46..56b2d8203 100644 --- a/management/server/http/handler.go +++ b/management/server/http/handler.go @@ -5,9 +5,6 @@ import ( "fmt" "net/http" "net/netip" - "os" - "strconv" - "time" "github.com/gorilla/mux" "github.com/rs/cors" @@ -66,14 +63,11 @@ import ( ) const ( - apiPrefix = "/api" - rateLimitingEnabledKey = "NB_API_RATE_LIMITING_ENABLED" - rateLimitingBurstKey = "NB_API_RATE_LIMITING_BURST" - rateLimitingRPMKey = "NB_API_RATE_LIMITING_RPM" + apiPrefix = "/api" ) // NewAPIHandler creates the Management service HTTP API handler registering all the available endpoints. -func NewAPIHandler(ctx context.Context, accountManager account.Manager, networksManager nbnetworks.Manager, resourceManager resources.Manager, routerManager routers.Manager, groupsManager nbgroups.Manager, LocationManager geolocation.Geolocation, authManager auth.Manager, appMetrics telemetry.AppMetrics, integratedValidator integrated_validator.IntegratedValidator, proxyController port_forwarding.Controller, permissionsManager permissions.Manager, peersManager nbpeers.Manager, settingsManager settings.Manager, zManager zones.Manager, rManager records.Manager, networkMapController network_map.Controller, idpManager idpmanager.Manager, serviceManager service.Manager, reverseProxyDomainManager *manager.Manager, reverseProxyAccessLogsManager accesslogs.Manager, proxyGRPCServer *nbgrpc.ProxyServiceServer, trustedHTTPProxies []netip.Prefix) (http.Handler, error) { +func NewAPIHandler(ctx context.Context, accountManager account.Manager, networksManager nbnetworks.Manager, resourceManager resources.Manager, routerManager routers.Manager, groupsManager nbgroups.Manager, LocationManager geolocation.Geolocation, authManager auth.Manager, appMetrics telemetry.AppMetrics, integratedValidator integrated_validator.IntegratedValidator, proxyController port_forwarding.Controller, permissionsManager permissions.Manager, peersManager nbpeers.Manager, settingsManager settings.Manager, zManager zones.Manager, rManager records.Manager, networkMapController network_map.Controller, idpManager idpmanager.Manager, serviceManager service.Manager, reverseProxyDomainManager *manager.Manager, reverseProxyAccessLogsManager accesslogs.Manager, proxyGRPCServer *nbgrpc.ProxyServiceServer, trustedHTTPProxies []netip.Prefix, rateLimiter *middleware.APIRateLimiter) (http.Handler, error) { // Register bypass paths for unauthenticated endpoints if err := bypass.AddBypassPath("/api/instance"); err != nil { @@ -94,34 +88,10 @@ func NewAPIHandler(ctx context.Context, accountManager account.Manager, networks return nil, fmt.Errorf("failed to add bypass path: %w", err) } - var rateLimitingConfig *middleware.RateLimiterConfig - if os.Getenv(rateLimitingEnabledKey) == "true" { - rpm := 6 - if v := os.Getenv(rateLimitingRPMKey); v != "" { - value, err := strconv.Atoi(v) - if err != nil { - log.Warnf("parsing %s env var: %v, using default %d", rateLimitingRPMKey, err, rpm) - } else { - rpm = value - } - } - - burst := 500 - if v := os.Getenv(rateLimitingBurstKey); v != "" { - value, err := strconv.Atoi(v) - if err != nil { - log.Warnf("parsing %s env var: %v, using default %d", rateLimitingBurstKey, err, burst) - } else { - burst = value - } - } - - rateLimitingConfig = &middleware.RateLimiterConfig{ - RequestsPerMinute: float64(rpm), - Burst: burst, - CleanupInterval: 6 * time.Hour, - LimiterTTL: 24 * time.Hour, - } + if rateLimiter == nil { + log.Warn("NewAPIHandler: nil rate limiter, rate limiting disabled") + rateLimiter = middleware.NewAPIRateLimiter(nil) + rateLimiter.SetEnabled(false) } authMiddleware := middleware.NewAuthMiddleware( @@ -129,7 +99,7 @@ func NewAPIHandler(ctx context.Context, accountManager account.Manager, networks accountManager.GetAccountIDFromUserAuth, accountManager.SyncUserJWTGroups, accountManager.GetUserFromUserAuth, - rateLimitingConfig, + rateLimiter, appMetrics.GetMeter(), ) diff --git a/management/server/http/handlers/peers/peers_handler.go b/management/server/http/handlers/peers/peers_handler.go index 6b9a69f04..bf6937a49 100644 --- a/management/server/http/handlers/peers/peers_handler.go +++ b/management/server/http/handlers/peers/peers_handler.go @@ -417,7 +417,7 @@ func (h *Handler) GetAccessiblePeers(w http.ResponseWriter, r *http.Request) { dnsDomain := h.networkMapController.GetDNSDomain(account.Settings) - netMap := account.GetPeerNetworkMap(r.Context(), peerID, dns.CustomZone{}, nil, validPeers, account.GetResourcePoliciesMap(), account.GetResourceRoutersMap(), nil, account.GetActiveGroupUsers()) + netMap := account.GetPeerNetworkMapFromComponents(r.Context(), peerID, dns.CustomZone{}, nil, validPeers, account.GetResourcePoliciesMap(), account.GetResourceRoutersMap(), nil, account.GetActiveGroupUsers()) util.WriteJSONObject(r.Context(), w, toAccessiblePeers(netMap, dnsDomain)) } diff --git a/management/server/http/middleware/auth_middleware.go b/management/server/http/middleware/auth_middleware.go index 63be672e6..6d075d9c2 100644 --- a/management/server/http/middleware/auth_middleware.go +++ b/management/server/http/middleware/auth_middleware.go @@ -12,6 +12,7 @@ import ( "go.opentelemetry.io/otel/metric" "github.com/netbirdio/management-integrations/integrations" + serverauth "github.com/netbirdio/netbird/management/server/auth" nbcontext "github.com/netbirdio/netbird/management/server/context" "github.com/netbirdio/netbird/management/server/http/middleware/bypass" @@ -42,14 +43,9 @@ func NewAuthMiddleware( ensureAccount EnsureAccountFunc, syncUserJWTGroups SyncUserJWTGroupsFunc, getUserFromUserAuth GetUserFromUserAuthFunc, - rateLimiterConfig *RateLimiterConfig, + rateLimiter *APIRateLimiter, meter metric.Meter, ) *AuthMiddleware { - var rateLimiter *APIRateLimiter - if rateLimiterConfig != nil { - rateLimiter = NewAPIRateLimiter(rateLimiterConfig) - } - var patUsageTracker *PATUsageTracker if meter != nil { var err error @@ -87,17 +83,14 @@ func (m *AuthMiddleware) Handler(h http.Handler) http.Handler { switch authType { case "bearer": - request, err := m.checkJWTFromRequest(r, authHeader) - if err != nil { + if err := m.checkJWTFromRequest(r, authHeader); err != nil { log.WithContext(r.Context()).Errorf("Error when validating JWT: %s", err.Error()) util.WriteError(r.Context(), status.Errorf(status.Unauthorized, "token invalid"), w) return } - - h.ServeHTTP(w, request) + h.ServeHTTP(w, r) case "token": - request, err := m.checkPATFromRequest(r, authHeader) - if err != nil { + if err := m.checkPATFromRequest(r, authHeader); err != nil { log.WithContext(r.Context()).Debugf("Error when validating PAT: %s", err.Error()) // Check if it's a status error, otherwise default to Unauthorized if _, ok := status.FromError(err); !ok { @@ -106,7 +99,7 @@ func (m *AuthMiddleware) Handler(h http.Handler) http.Handler { util.WriteError(r.Context(), err, w) return } - h.ServeHTTP(w, request) + h.ServeHTTP(w, r) default: util.WriteError(r.Context(), status.Errorf(status.Unauthorized, "no valid authentication provided"), w) return @@ -115,19 +108,19 @@ func (m *AuthMiddleware) Handler(h http.Handler) http.Handler { } // CheckJWTFromRequest checks if the JWT is valid -func (m *AuthMiddleware) checkJWTFromRequest(r *http.Request, authHeaderParts []string) (*http.Request, error) { +func (m *AuthMiddleware) checkJWTFromRequest(r *http.Request, authHeaderParts []string) error { token, err := getTokenFromJWTRequest(authHeaderParts) // If an error occurs, call the error handler and return an error if err != nil { - return r, fmt.Errorf("error extracting token: %w", err) + return fmt.Errorf("error extracting token: %w", err) } ctx := r.Context() userAuth, validatedToken, err := m.authManager.ValidateAndParseToken(ctx, token) if err != nil { - return r, err + return err } if impersonate, ok := r.URL.Query()["account"]; ok && len(impersonate) == 1 { @@ -143,7 +136,7 @@ func (m *AuthMiddleware) checkJWTFromRequest(r *http.Request, authHeaderParts [] // we need to call this method because if user is new, we will automatically add it to existing or create a new account accountId, _, err := m.ensureAccount(ctx, userAuth) if err != nil { - return r, err + return err } if userAuth.AccountId != accountId { @@ -153,7 +146,7 @@ func (m *AuthMiddleware) checkJWTFromRequest(r *http.Request, authHeaderParts [] userAuth, err = m.authManager.EnsureUserAccessByJWTGroups(ctx, userAuth, validatedToken) if err != nil { - return r, err + return err } err = m.syncUserJWTGroups(ctx, userAuth) @@ -164,41 +157,41 @@ func (m *AuthMiddleware) checkJWTFromRequest(r *http.Request, authHeaderParts [] _, err = m.getUserFromUserAuth(ctx, userAuth) if err != nil { log.WithContext(ctx).Errorf("HTTP server failed to update user from user auth: %s", err) - return r, err + return err } - return nbcontext.SetUserAuthInRequest(r, userAuth), nil + // propagates ctx change to upstream middleware + *r = *nbcontext.SetUserAuthInRequest(r, userAuth) + return nil } // CheckPATFromRequest checks if the PAT is valid -func (m *AuthMiddleware) checkPATFromRequest(r *http.Request, authHeaderParts []string) (*http.Request, error) { +func (m *AuthMiddleware) checkPATFromRequest(r *http.Request, authHeaderParts []string) error { token, err := getTokenFromPATRequest(authHeaderParts) if err != nil { - return r, fmt.Errorf("error extracting token: %w", err) + return fmt.Errorf("error extracting token: %w", err) } if m.patUsageTracker != nil { m.patUsageTracker.IncrementUsage(token) } - if m.rateLimiter != nil && !isTerraformRequest(r) { - if !m.rateLimiter.Allow(token) { - return r, status.Errorf(status.TooManyRequests, "too many requests") - } + if !isTerraformRequest(r) && !m.rateLimiter.Allow(token) { + return status.Errorf(status.TooManyRequests, "too many requests") } ctx := r.Context() user, pat, accDomain, accCategory, err := m.authManager.GetPATInfo(ctx, token) if err != nil { - return r, fmt.Errorf("invalid Token: %w", err) + return fmt.Errorf("invalid Token: %w", err) } if time.Now().After(pat.GetExpirationDate()) { - return r, fmt.Errorf("token expired") + return fmt.Errorf("token expired") } err = m.authManager.MarkPATUsed(ctx, pat.ID) if err != nil { - return r, err + return err } userAuth := auth.UserAuth{ @@ -216,7 +209,9 @@ func (m *AuthMiddleware) checkPATFromRequest(r *http.Request, authHeaderParts [] } } - return nbcontext.SetUserAuthInRequest(r, userAuth), nil + // propagates ctx change to upstream middleware + *r = *nbcontext.SetUserAuthInRequest(r, userAuth) + return nil } func isTerraformRequest(r *http.Request) bool { diff --git a/management/server/http/middleware/auth_middleware_test.go b/management/server/http/middleware/auth_middleware_test.go index f397c63a4..8f736fbfd 100644 --- a/management/server/http/middleware/auth_middleware_test.go +++ b/management/server/http/middleware/auth_middleware_test.go @@ -196,6 +196,8 @@ func TestAuthMiddleware_Handler(t *testing.T) { GetPATInfoFunc: mockGetAccountInfoFromPAT, } + disabledLimiter := NewAPIRateLimiter(nil) + disabledLimiter.SetEnabled(false) authMiddleware := NewAuthMiddleware( mockAuth, func(ctx context.Context, userAuth nbauth.UserAuth) (string, string, error) { @@ -207,7 +209,7 @@ func TestAuthMiddleware_Handler(t *testing.T) { func(ctx context.Context, userAuth nbauth.UserAuth) (*types.User, error) { return &types.User{}, nil }, - nil, + disabledLimiter, nil, ) @@ -266,7 +268,7 @@ func TestAuthMiddleware_RateLimiting(t *testing.T) { func(ctx context.Context, userAuth nbauth.UserAuth) (*types.User, error) { return &types.User{}, nil }, - rateLimitConfig, + NewAPIRateLimiter(rateLimitConfig), nil, ) @@ -318,7 +320,7 @@ func TestAuthMiddleware_RateLimiting(t *testing.T) { func(ctx context.Context, userAuth nbauth.UserAuth) (*types.User, error) { return &types.User{}, nil }, - rateLimitConfig, + NewAPIRateLimiter(rateLimitConfig), nil, ) @@ -361,7 +363,7 @@ func TestAuthMiddleware_RateLimiting(t *testing.T) { func(ctx context.Context, userAuth nbauth.UserAuth) (*types.User, error) { return &types.User{}, nil }, - rateLimitConfig, + NewAPIRateLimiter(rateLimitConfig), nil, ) @@ -405,7 +407,7 @@ func TestAuthMiddleware_RateLimiting(t *testing.T) { func(ctx context.Context, userAuth nbauth.UserAuth) (*types.User, error) { return &types.User{}, nil }, - rateLimitConfig, + NewAPIRateLimiter(rateLimitConfig), nil, ) @@ -469,7 +471,7 @@ func TestAuthMiddleware_RateLimiting(t *testing.T) { func(ctx context.Context, userAuth nbauth.UserAuth) (*types.User, error) { return &types.User{}, nil }, - rateLimitConfig, + NewAPIRateLimiter(rateLimitConfig), nil, ) @@ -528,7 +530,7 @@ func TestAuthMiddleware_RateLimiting(t *testing.T) { func(ctx context.Context, userAuth nbauth.UserAuth) (*types.User, error) { return &types.User{}, nil }, - rateLimitConfig, + NewAPIRateLimiter(rateLimitConfig), nil, ) @@ -583,7 +585,7 @@ func TestAuthMiddleware_RateLimiting(t *testing.T) { func(ctx context.Context, userAuth nbauth.UserAuth) (*types.User, error) { return &types.User{}, nil }, - rateLimitConfig, + NewAPIRateLimiter(rateLimitConfig), nil, ) @@ -670,6 +672,8 @@ func TestAuthMiddleware_Handler_Child(t *testing.T) { GetPATInfoFunc: mockGetAccountInfoFromPAT, } + disabledLimiter := NewAPIRateLimiter(nil) + disabledLimiter.SetEnabled(false) authMiddleware := NewAuthMiddleware( mockAuth, func(ctx context.Context, userAuth nbauth.UserAuth) (string, string, error) { @@ -681,7 +685,7 @@ func TestAuthMiddleware_Handler_Child(t *testing.T) { func(ctx context.Context, userAuth nbauth.UserAuth) (*types.User, error) { return &types.User{}, nil }, - nil, + disabledLimiter, nil, ) diff --git a/management/server/http/middleware/rate_limiter.go b/management/server/http/middleware/rate_limiter.go index 936b34319..bfd44afee 100644 --- a/management/server/http/middleware/rate_limiter.go +++ b/management/server/http/middleware/rate_limiter.go @@ -4,14 +4,27 @@ import ( "context" "net" "net/http" + "os" + "strconv" "sync" + "sync/atomic" "time" + log "github.com/sirupsen/logrus" "golang.org/x/time/rate" "github.com/netbirdio/netbird/shared/management/http/util" ) +const ( + RateLimitingEnabledEnv = "NB_API_RATE_LIMITING_ENABLED" + RateLimitingBurstEnv = "NB_API_RATE_LIMITING_BURST" + RateLimitingRPMEnv = "NB_API_RATE_LIMITING_RPM" + + defaultAPIRPM = 6 + defaultAPIBurst = 500 +) + // RateLimiterConfig holds configuration for the API rate limiter type RateLimiterConfig struct { // RequestsPerMinute defines the rate at which tokens are replenished @@ -34,6 +47,43 @@ func DefaultRateLimiterConfig() *RateLimiterConfig { } } +func RateLimiterConfigFromEnv() (cfg *RateLimiterConfig, enabled bool) { + rpm := defaultAPIRPM + if v := os.Getenv(RateLimitingRPMEnv); v != "" { + value, err := strconv.Atoi(v) + if err != nil { + log.Warnf("parsing %s env var: %v, using default %d", RateLimitingRPMEnv, err, rpm) + } else { + rpm = value + } + } + if rpm <= 0 { + log.Warnf("%s=%d is non-positive, using default %d", RateLimitingRPMEnv, rpm, defaultAPIRPM) + rpm = defaultAPIRPM + } + + burst := defaultAPIBurst + if v := os.Getenv(RateLimitingBurstEnv); v != "" { + value, err := strconv.Atoi(v) + if err != nil { + log.Warnf("parsing %s env var: %v, using default %d", RateLimitingBurstEnv, err, burst) + } else { + burst = value + } + } + if burst <= 0 { + log.Warnf("%s=%d is non-positive, using default %d", RateLimitingBurstEnv, burst, defaultAPIBurst) + burst = defaultAPIBurst + } + + return &RateLimiterConfig{ + RequestsPerMinute: float64(rpm), + Burst: burst, + CleanupInterval: 6 * time.Hour, + LimiterTTL: 24 * time.Hour, + }, os.Getenv(RateLimitingEnabledEnv) == "true" +} + // limiterEntry holds a rate limiter and its last access time type limiterEntry struct { limiter *rate.Limiter @@ -46,6 +96,7 @@ type APIRateLimiter struct { limiters map[string]*limiterEntry mu sync.RWMutex stopChan chan struct{} + enabled atomic.Bool } // NewAPIRateLimiter creates a new API rate limiter with the given configuration @@ -59,14 +110,53 @@ func NewAPIRateLimiter(config *RateLimiterConfig) *APIRateLimiter { limiters: make(map[string]*limiterEntry), stopChan: make(chan struct{}), } + rl.enabled.Store(true) go rl.cleanupLoop() return rl } +func (rl *APIRateLimiter) SetEnabled(enabled bool) { + rl.enabled.Store(enabled) +} + +func (rl *APIRateLimiter) Enabled() bool { + return rl.enabled.Load() +} + +func (rl *APIRateLimiter) UpdateConfig(config *RateLimiterConfig) { + if config == nil { + return + } + if config.RequestsPerMinute <= 0 || config.Burst <= 0 { + log.Warnf("UpdateConfig: ignoring invalid rpm=%v burst=%d", config.RequestsPerMinute, config.Burst) + return + } + + newRPS := rate.Limit(config.RequestsPerMinute / 60.0) + newBurst := config.Burst + + rl.mu.Lock() + rl.config.RequestsPerMinute = config.RequestsPerMinute + rl.config.Burst = newBurst + snapshot := make([]*rate.Limiter, 0, len(rl.limiters)) + for _, entry := range rl.limiters { + snapshot = append(snapshot, entry.limiter) + } + rl.mu.Unlock() + + for _, l := range snapshot { + l.SetLimit(newRPS) + l.SetBurst(newBurst) + } +} + // Allow checks if a request for the given key (token) is allowed func (rl *APIRateLimiter) Allow(key string) bool { + if !rl.enabled.Load() { + return true + } limiter := rl.getLimiter(key) return limiter.Allow() } @@ -74,6 +164,9 @@ func (rl *APIRateLimiter) Allow(key string) bool { // Wait blocks until the rate limiter allows another request for the given key // Returns an error if the context is canceled func (rl *APIRateLimiter) Wait(ctx context.Context, key string) error { + if !rl.enabled.Load() { + return nil + } limiter := rl.getLimiter(key) return limiter.Wait(ctx) } @@ -153,6 +246,10 @@ func (rl *APIRateLimiter) Reset(key string) { // Returns 429 Too Many Requests if the rate limit is exceeded. func (rl *APIRateLimiter) Middleware(next http.Handler) http.Handler { return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if !rl.enabled.Load() { + next.ServeHTTP(w, r) + return + } clientIP := getClientIP(r) if !rl.Allow(clientIP) { util.WriteErrorResponse("rate limit exceeded, please try again later", http.StatusTooManyRequests, w) diff --git a/management/server/http/middleware/rate_limiter_test.go b/management/server/http/middleware/rate_limiter_test.go index 68f804e57..4b97d1874 100644 --- a/management/server/http/middleware/rate_limiter_test.go +++ b/management/server/http/middleware/rate_limiter_test.go @@ -1,8 +1,10 @@ package middleware import ( + "fmt" "net/http" "net/http/httptest" + "sync" "testing" "time" @@ -156,3 +158,172 @@ func TestAPIRateLimiter_Reset(t *testing.T) { // Should be allowed again assert.True(t, rl.Allow("test-key")) } + +func TestAPIRateLimiter_SetEnabled(t *testing.T) { + rl := NewAPIRateLimiter(&RateLimiterConfig{ + RequestsPerMinute: 60, + Burst: 1, + CleanupInterval: time.Minute, + LimiterTTL: time.Minute, + }) + defer rl.Stop() + + assert.True(t, rl.Allow("key")) + assert.False(t, rl.Allow("key"), "burst exhausted while enabled") + + rl.SetEnabled(false) + assert.False(t, rl.Enabled()) + for i := 0; i < 5; i++ { + assert.True(t, rl.Allow("key"), "disabled limiter must always allow") + } + + rl.SetEnabled(true) + assert.True(t, rl.Enabled()) + assert.False(t, rl.Allow("key"), "re-enabled limiter retains prior bucket state") +} + +func TestAPIRateLimiter_UpdateConfig(t *testing.T) { + rl := NewAPIRateLimiter(&RateLimiterConfig{ + RequestsPerMinute: 60, + Burst: 2, + CleanupInterval: time.Minute, + LimiterTTL: time.Minute, + }) + defer rl.Stop() + + assert.True(t, rl.Allow("k1")) + assert.True(t, rl.Allow("k1")) + assert.False(t, rl.Allow("k1"), "burst=2 exhausted") + + rl.UpdateConfig(&RateLimiterConfig{ + RequestsPerMinute: 60, + Burst: 10, + CleanupInterval: time.Minute, + LimiterTTL: time.Minute, + }) + + // New burst applies to existing keys in place; bucket refills up to new burst over time, + // but importantly newly-added keys use the updated config immediately. + assert.True(t, rl.Allow("k2")) + for i := 0; i < 9; i++ { + assert.True(t, rl.Allow("k2")) + } + assert.False(t, rl.Allow("k2"), "new burst=10 exhausted") +} + +func TestAPIRateLimiter_UpdateConfig_NilIgnored(t *testing.T) { + rl := NewAPIRateLimiter(&RateLimiterConfig{ + RequestsPerMinute: 60, + Burst: 1, + CleanupInterval: time.Minute, + LimiterTTL: time.Minute, + }) + defer rl.Stop() + + rl.UpdateConfig(nil) // must not panic or zero the config + + assert.True(t, rl.Allow("k")) + assert.False(t, rl.Allow("k")) +} + +func TestAPIRateLimiter_UpdateConfig_NonPositiveIgnored(t *testing.T) { + rl := NewAPIRateLimiter(&RateLimiterConfig{ + RequestsPerMinute: 60, + Burst: 1, + CleanupInterval: time.Minute, + LimiterTTL: time.Minute, + }) + defer rl.Stop() + + assert.True(t, rl.Allow("k")) + assert.False(t, rl.Allow("k")) + + rl.UpdateConfig(&RateLimiterConfig{RequestsPerMinute: 0, Burst: 0, CleanupInterval: time.Minute, LimiterTTL: time.Minute}) + rl.UpdateConfig(&RateLimiterConfig{RequestsPerMinute: -1, Burst: 5, CleanupInterval: time.Minute, LimiterTTL: time.Minute}) + rl.UpdateConfig(&RateLimiterConfig{RequestsPerMinute: 60, Burst: -1, CleanupInterval: time.Minute, LimiterTTL: time.Minute}) + + rl.Reset("k") + assert.True(t, rl.Allow("k")) + assert.False(t, rl.Allow("k"), "burst should still be 1 — invalid UpdateConfig calls were ignored") +} + +func TestAPIRateLimiter_ConcurrentAllowAndUpdate(t *testing.T) { + rl := NewAPIRateLimiter(&RateLimiterConfig{ + RequestsPerMinute: 600, + Burst: 10, + CleanupInterval: time.Minute, + LimiterTTL: time.Minute, + }) + defer rl.Stop() + + var wg sync.WaitGroup + stop := make(chan struct{}) + + for i := 0; i < 8; i++ { + wg.Add(1) + go func(id int) { + defer wg.Done() + key := fmt.Sprintf("k%d", id) + for { + select { + case <-stop: + return + default: + rl.Allow(key) + } + } + }(i) + } + + wg.Add(1) + go func() { + defer wg.Done() + for i := 0; i < 200; i++ { + select { + case <-stop: + return + default: + rl.UpdateConfig(&RateLimiterConfig{ + RequestsPerMinute: float64(30 + (i % 90)), + Burst: 1 + (i % 20), + CleanupInterval: time.Minute, + LimiterTTL: time.Minute, + }) + rl.SetEnabled(i%2 == 0) + } + } + }() + + time.Sleep(100 * time.Millisecond) + close(stop) + wg.Wait() +} + +func TestRateLimiterConfigFromEnv(t *testing.T) { + t.Setenv(RateLimitingEnabledEnv, "true") + t.Setenv(RateLimitingRPMEnv, "42") + t.Setenv(RateLimitingBurstEnv, "7") + + cfg, enabled := RateLimiterConfigFromEnv() + assert.True(t, enabled) + assert.Equal(t, float64(42), cfg.RequestsPerMinute) + assert.Equal(t, 7, cfg.Burst) + + t.Setenv(RateLimitingEnabledEnv, "false") + _, enabled = RateLimiterConfigFromEnv() + assert.False(t, enabled) + + t.Setenv(RateLimitingEnabledEnv, "") + t.Setenv(RateLimitingRPMEnv, "") + t.Setenv(RateLimitingBurstEnv, "") + cfg, enabled = RateLimiterConfigFromEnv() + assert.False(t, enabled) + assert.Equal(t, float64(defaultAPIRPM), cfg.RequestsPerMinute) + assert.Equal(t, defaultAPIBurst, cfg.Burst) + + t.Setenv(RateLimitingRPMEnv, "0") + t.Setenv(RateLimitingBurstEnv, "-5") + cfg, _ = RateLimiterConfigFromEnv() + assert.Equal(t, float64(defaultAPIRPM), cfg.RequestsPerMinute, "non-positive rpm must fall back to default") + assert.Equal(t, defaultAPIBurst, cfg.Burst, "non-positive burst must fall back to default") +} diff --git a/management/server/http/testing/testing_tools/channel/channel.go b/management/server/http/testing/testing_tools/channel/channel.go index 0203d6177..1a8b83c7e 100644 --- a/management/server/http/testing/testing_tools/channel/channel.go +++ b/management/server/http/testing/testing_tools/channel/channel.go @@ -135,7 +135,7 @@ func BuildApiBlackBoxWithDBState(t testing_tools.TB, sqlFile string, expectedPee customZonesManager := zonesManager.NewManager(store, am, permissionsManager, "") zoneRecordsManager := recordsManager.NewManager(store, am, permissionsManager) - apiHandler, err := http2.NewAPIHandler(context.Background(), am, networksManager, resourcesManager, routersManager, groupsManager, geoMock, authManagerMock, metrics, validatorMock, proxyController, permissionsManager, peersManager, settingsManager, customZonesManager, zoneRecordsManager, networkMapController, nil, serviceManager, nil, nil, nil, nil) + apiHandler, err := http2.NewAPIHandler(context.Background(), am, networksManager, resourcesManager, routersManager, groupsManager, geoMock, authManagerMock, metrics, validatorMock, proxyController, permissionsManager, peersManager, settingsManager, customZonesManager, zoneRecordsManager, networkMapController, nil, serviceManager, nil, nil, nil, nil, nil) if err != nil { t.Fatalf("Failed to create API handler: %v", err) } @@ -264,7 +264,7 @@ func BuildApiBlackBoxWithDBStateAndPeerChannel(t testing_tools.TB, sqlFile strin customZonesManager := zonesManager.NewManager(store, am, permissionsManager, "") zoneRecordsManager := recordsManager.NewManager(store, am, permissionsManager) - apiHandler, err := http2.NewAPIHandler(context.Background(), am, networksManager, resourcesManager, routersManager, groupsManager, geoMock, authManagerMock, metrics, validatorMock, proxyController, permissionsManager, peersManager, settingsManager, customZonesManager, zoneRecordsManager, networkMapController, nil, serviceManager, nil, nil, nil, nil) + apiHandler, err := http2.NewAPIHandler(context.Background(), am, networksManager, resourcesManager, routersManager, groupsManager, geoMock, authManagerMock, metrics, validatorMock, proxyController, permissionsManager, peersManager, settingsManager, customZonesManager, zoneRecordsManager, networkMapController, nil, serviceManager, nil, nil, nil, nil, nil) if err != nil { t.Fatalf("Failed to create API handler: %v", err) } diff --git a/management/server/management_proto_test.go b/management/server/management_proto_test.go index 4e6eb0a33..18d85315d 100644 --- a/management/server/management_proto_test.go +++ b/management/server/management_proto_test.go @@ -267,8 +267,8 @@ func Test_SyncProtocol(t *testing.T) { } // expired peers come separately. - if len(networkMap.GetOfflinePeers()) != 1 { - t.Fatal("expecting SyncResponse to have NetworkMap with 1 offline peer") + if len(networkMap.GetOfflinePeers()) != 2 { + t.Fatal("expecting SyncResponse to have NetworkMap with 2 offline peer") } expiredPeerPubKey := "RlSy2vzoG2HyMBTUImXOiVhCBiiBa5qD5xzMxkiFDW4=" diff --git a/management/server/nameserver_test.go b/management/server/nameserver_test.go index d10d4464f..b2c8300d6 100644 --- a/management/server/nameserver_test.go +++ b/management/server/nameserver_test.go @@ -1087,7 +1087,7 @@ func TestNameServerAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) @@ -1105,7 +1105,7 @@ func TestNameServerAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) diff --git a/management/server/peer.go b/management/server/peer.go index a02e34e0d..a95ae17a3 100644 --- a/management/server/peer.go +++ b/management/server/peer.go @@ -1405,6 +1405,10 @@ func (am *DefaultAccountManager) getExpiredPeers(ctx context.Context, accountID var peers []*nbpeer.Peer for _, peer := range peersWithExpiry { + if peer.Status.LoginExpired { + continue + } + expired, _ := peer.LoginExpired(settings.PeerLoginExpiration) if expired { peers = append(peers, peer) diff --git a/management/server/peer_test.go b/management/server/peer_test.go index 6f8d924fd..17202597a 100644 --- a/management/server/peer_test.go +++ b/management/server/peer_test.go @@ -179,11 +179,6 @@ func TestAccountManager_GetNetworkMap(t *testing.T) { testGetNetworkMapGeneral(t) } -func TestAccountManager_GetNetworkMap_Experimental(t *testing.T) { - t.Setenv(network_map.EnvNewNetworkMapBuilder, "true") - testGetNetworkMapGeneral(t) -} - func testGetNetworkMapGeneral(t *testing.T) { manager, _, err := createManager(t) if err != nil { @@ -1016,11 +1011,6 @@ func BenchmarkUpdateAccountPeers(b *testing.B) { } } -func TestUpdateAccountPeers_Experimental(t *testing.T) { - t.Setenv(network_map.EnvNewNetworkMapBuilder, "true") - testUpdateAccountPeers(t) -} - func TestUpdateAccountPeers(t *testing.T) { testUpdateAccountPeers(t) } @@ -1600,7 +1590,6 @@ func Test_RegisterPeerRollbackOnFailure(t *testing.T) { } func Test_LoginPeer(t *testing.T) { - t.Setenv(network_map.EnvNewNetworkMapBuilder, "true") if runtime.GOOS == "windows" { t.Skip("The SQLite store is not properly supported by Windows yet") } @@ -1907,7 +1896,7 @@ func TestPeerAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) @@ -1929,7 +1918,7 @@ func TestPeerAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) @@ -1994,7 +1983,7 @@ func TestPeerAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) @@ -2012,7 +2001,7 @@ func TestPeerAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) @@ -2058,7 +2047,7 @@ func TestPeerAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) @@ -2076,7 +2065,7 @@ func TestPeerAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) @@ -2113,7 +2102,7 @@ func TestPeerAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) @@ -2131,7 +2120,7 @@ func TestPeerAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) diff --git a/management/server/policy.go b/management/server/policy.go index 3e84c3d10..48297ca11 100644 --- a/management/server/policy.go +++ b/management/server/policy.go @@ -5,6 +5,7 @@ import ( _ "embed" "github.com/rs/xid" + "github.com/sirupsen/logrus" "github.com/netbirdio/netbird/management/server/permissions/modules" "github.com/netbirdio/netbird/management/server/permissions/operations" @@ -46,25 +47,40 @@ func (am *DefaultAccountManager) SavePolicy(ctx context.Context, accountID, user var isUpdate = policy.ID != "" var updateAccountPeers bool var action = activity.PolicyAdded + var unchanged bool err = am.Store.ExecuteInTransaction(ctx, func(transaction store.Store) error { - if err = validatePolicy(ctx, transaction, accountID, policy); err != nil { - return err - } - - updateAccountPeers, err = arePolicyChangesAffectPeers(ctx, transaction, accountID, policy, isUpdate) + existingPolicy, err := validatePolicy(ctx, transaction, accountID, policy) if err != nil { return err } - saveFunc := transaction.CreatePolicy if isUpdate { - action = activity.PolicyUpdated - saveFunc = transaction.SavePolicy - } + if policy.Equal(existingPolicy) { + logrus.WithContext(ctx).Tracef("policy update skipped because equal to stored one - policy id %s", policy.ID) + unchanged = true + return nil + } - if err = saveFunc(ctx, policy); err != nil { - return err + action = activity.PolicyUpdated + + updateAccountPeers, err = arePolicyChangesAffectPeersWithExisting(ctx, transaction, policy, existingPolicy) + if err != nil { + return err + } + + if err = transaction.SavePolicy(ctx, policy); err != nil { + return err + } + } else { + updateAccountPeers, err = arePolicyChangesAffectPeers(ctx, transaction, policy) + if err != nil { + return err + } + + if err = transaction.CreatePolicy(ctx, policy); err != nil { + return err + } } return transaction.IncrementNetworkSerial(ctx, accountID) @@ -73,6 +89,10 @@ func (am *DefaultAccountManager) SavePolicy(ctx context.Context, accountID, user return nil, err } + if unchanged { + return policy, nil + } + am.StoreEvent(ctx, userID, policy.ID, accountID, action, policy.EventMeta()) if updateAccountPeers { @@ -101,7 +121,7 @@ func (am *DefaultAccountManager) DeletePolicy(ctx context.Context, accountID, po return err } - updateAccountPeers, err = arePolicyChangesAffectPeers(ctx, transaction, accountID, policy, false) + updateAccountPeers, err = arePolicyChangesAffectPeers(ctx, transaction, policy) if err != nil { return err } @@ -138,34 +158,37 @@ func (am *DefaultAccountManager) ListPolicies(ctx context.Context, accountID, us return am.Store.GetAccountPolicies(ctx, store.LockingStrengthNone, accountID) } -// arePolicyChangesAffectPeers checks if changes to a policy will affect any associated peers. -func arePolicyChangesAffectPeers(ctx context.Context, transaction store.Store, accountID string, policy *types.Policy, isUpdate bool) (bool, error) { - if isUpdate { - existingPolicy, err := transaction.GetPolicyByID(ctx, store.LockingStrengthNone, accountID, policy.ID) - if err != nil { - return false, err - } - - if !policy.Enabled && !existingPolicy.Enabled { - return false, nil - } - - for _, rule := range existingPolicy.Rules { - if rule.SourceResource.Type != "" || rule.DestinationResource.Type != "" { - return true, nil - } - } - - hasPeers, err := anyGroupHasPeersOrResources(ctx, transaction, policy.AccountID, existingPolicy.RuleGroups()) - if err != nil { - return false, err - } - - if hasPeers { +// arePolicyChangesAffectPeers checks if a policy (being created or deleted) will affect any associated peers. +func arePolicyChangesAffectPeers(ctx context.Context, transaction store.Store, policy *types.Policy) (bool, error) { + for _, rule := range policy.Rules { + if rule.SourceResource.Type != "" || rule.DestinationResource.Type != "" { return true, nil } } + return anyGroupHasPeersOrResources(ctx, transaction, policy.AccountID, policy.RuleGroups()) +} + +func arePolicyChangesAffectPeersWithExisting(ctx context.Context, transaction store.Store, policy *types.Policy, existingPolicy *types.Policy) (bool, error) { + if !policy.Enabled && !existingPolicy.Enabled { + return false, nil + } + + for _, rule := range existingPolicy.Rules { + if rule.SourceResource.Type != "" || rule.DestinationResource.Type != "" { + return true, nil + } + } + + hasPeers, err := anyGroupHasPeersOrResources(ctx, transaction, policy.AccountID, existingPolicy.RuleGroups()) + if err != nil { + return false, err + } + + if hasPeers { + return true, nil + } + for _, rule := range policy.Rules { if rule.SourceResource.Type != "" || rule.DestinationResource.Type != "" { return true, nil @@ -175,12 +198,15 @@ func arePolicyChangesAffectPeers(ctx context.Context, transaction store.Store, a return anyGroupHasPeersOrResources(ctx, transaction, policy.AccountID, policy.RuleGroups()) } -// validatePolicy validates the policy and its rules. -func validatePolicy(ctx context.Context, transaction store.Store, accountID string, policy *types.Policy) error { +// validatePolicy validates the policy and its rules. For updates it returns +// the existing policy loaded from the store so callers can avoid a second read. +func validatePolicy(ctx context.Context, transaction store.Store, accountID string, policy *types.Policy) (*types.Policy, error) { + var existingPolicy *types.Policy if policy.ID != "" { - existingPolicy, err := transaction.GetPolicyByID(ctx, store.LockingStrengthNone, accountID, policy.ID) + var err error + existingPolicy, err = transaction.GetPolicyByID(ctx, store.LockingStrengthNone, accountID, policy.ID) if err != nil { - return err + return nil, err } // TODO: Refactor to support multiple rules per policy @@ -191,7 +217,7 @@ func validatePolicy(ctx context.Context, transaction store.Store, accountID stri for _, rule := range policy.Rules { if rule.ID != "" && !existingRuleIDs[rule.ID] { - return status.Errorf(status.InvalidArgument, "invalid rule ID: %s", rule.ID) + return nil, status.Errorf(status.InvalidArgument, "invalid rule ID: %s", rule.ID) } } } else { @@ -201,12 +227,12 @@ func validatePolicy(ctx context.Context, transaction store.Store, accountID stri groups, err := transaction.GetGroupsByIDs(ctx, store.LockingStrengthNone, accountID, policy.RuleGroups()) if err != nil { - return err + return nil, err } postureChecks, err := transaction.GetPostureChecksByIDs(ctx, store.LockingStrengthNone, accountID, policy.SourcePostureChecks) if err != nil { - return err + return nil, err } for i, rule := range policy.Rules { @@ -225,7 +251,7 @@ func validatePolicy(ctx context.Context, transaction store.Store, accountID stri policy.SourcePostureChecks = getValidPostureCheckIDs(postureChecks, policy.SourcePostureChecks) } - return nil + return existingPolicy, nil } // getValidPostureCheckIDs filters and returns only the valid posture check IDs from the provided list. diff --git a/management/server/policy_test.go b/management/server/policy_test.go index a3f987732..a553b7d05 100644 --- a/management/server/policy_test.go +++ b/management/server/policy_test.go @@ -1231,7 +1231,7 @@ func TestPolicyAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) @@ -1263,7 +1263,7 @@ func TestPolicyAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) @@ -1294,7 +1294,7 @@ func TestPolicyAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) @@ -1314,7 +1314,7 @@ func TestPolicyAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) @@ -1355,7 +1355,7 @@ func TestPolicyAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) @@ -1373,7 +1373,7 @@ func TestPolicyAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } @@ -1393,7 +1393,7 @@ func TestPolicyAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) diff --git a/management/server/posture_checks_test.go b/management/server/posture_checks_test.go index 7f0a48dc7..394f0d896 100644 --- a/management/server/posture_checks_test.go +++ b/management/server/posture_checks_test.go @@ -244,7 +244,7 @@ func TestPostureCheckAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) @@ -273,7 +273,7 @@ func TestPostureCheckAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) @@ -292,7 +292,7 @@ func TestPostureCheckAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) @@ -395,7 +395,7 @@ func TestPostureCheckAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) @@ -438,7 +438,7 @@ func TestPostureCheckAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) diff --git a/management/server/route_test.go b/management/server/route_test.go index 91b2cf982..d0caf4b9b 100644 --- a/management/server/route_test.go +++ b/management/server/route_test.go @@ -2,10 +2,8 @@ package server import ( "context" - "fmt" "net" "net/netip" - "sort" "testing" "time" @@ -1840,11 +1838,6 @@ func TestAccount_getPeersRoutesFirewall(t *testing.T) { }, } - validatedPeers := make(map[string]struct{}) - for p := range account.Peers { - validatedPeers[p] = struct{}{} - } - t.Run("check applied policies for the route", func(t *testing.T) { route1 := account.Routes["route1"] policies := types.GetAllRoutePoliciesFromGroups(account, route1.AccessControlGroups) @@ -1858,116 +1851,6 @@ func TestAccount_getPeersRoutesFirewall(t *testing.T) { policies = types.GetAllRoutePoliciesFromGroups(account, route3.AccessControlGroups) assert.Len(t, policies, 0) }) - - t.Run("check peer routes firewall rules", func(t *testing.T) { - routesFirewallRules := account.GetPeerRoutesFirewallRules(context.Background(), "peerA", validatedPeers) - assert.Len(t, routesFirewallRules, 4) - - expectedRoutesFirewallRules := []*types.RouteFirewallRule{ - { - SourceRanges: []string{ - fmt.Sprintf(types.AllowedIPsFormat, peerCIp), - fmt.Sprintf(types.AllowedIPsFormat, peerHIp), - fmt.Sprintf(types.AllowedIPsFormat, peerBIp), - }, - Action: "accept", - Destination: "192.168.0.0/16", - Protocol: "all", - Port: 80, - RouteID: "route1:peerA", - }, - { - SourceRanges: []string{ - fmt.Sprintf(types.AllowedIPsFormat, peerCIp), - fmt.Sprintf(types.AllowedIPsFormat, peerHIp), - fmt.Sprintf(types.AllowedIPsFormat, peerBIp), - }, - Action: "accept", - Destination: "192.168.0.0/16", - Protocol: "all", - Port: 320, - RouteID: "route1:peerA", - }, - } - additionalFirewallRule := []*types.RouteFirewallRule{ - { - SourceRanges: []string{ - fmt.Sprintf(types.AllowedIPsFormat, peerJIp), - }, - Action: "accept", - Destination: "192.168.10.0/16", - Protocol: "tcp", - Port: 80, - RouteID: "route4:peerA", - }, - { - SourceRanges: []string{ - fmt.Sprintf(types.AllowedIPsFormat, peerKIp), - }, - Action: "accept", - Destination: "192.168.10.0/16", - Protocol: "all", - RouteID: "route4:peerA", - }, - } - - assert.ElementsMatch(t, orderRuleSourceRanges(routesFirewallRules), orderRuleSourceRanges(append(expectedRoutesFirewallRules, additionalFirewallRule...))) - - // peerD is also the routing peer for route1, should contain same routes firewall rules as peerA - routesFirewallRules = account.GetPeerRoutesFirewallRules(context.Background(), "peerD", validatedPeers) - assert.Len(t, routesFirewallRules, 2) - for _, rule := range expectedRoutesFirewallRules { - rule.RouteID = "route1:peerD" - } - assert.ElementsMatch(t, orderRuleSourceRanges(routesFirewallRules), orderRuleSourceRanges(expectedRoutesFirewallRules)) - - // peerE is a single routing peer for route 2 and route 3 - routesFirewallRules = account.GetPeerRoutesFirewallRules(context.Background(), "peerE", validatedPeers) - assert.Len(t, routesFirewallRules, 3) - - expectedRoutesFirewallRules = []*types.RouteFirewallRule{ - { - SourceRanges: []string{"100.65.250.202/32", "100.65.13.186/32"}, - Action: "accept", - Destination: existingNetwork.String(), - Protocol: "tcp", - PortRange: types.RulePortRange{Start: 80, End: 350}, - RouteID: "route2", - }, - { - SourceRanges: []string{"0.0.0.0/0"}, - Action: "accept", - Destination: "192.0.2.0/32", - Protocol: "all", - Domains: domain.List{"example.com"}, - IsDynamic: true, - RouteID: "route3", - }, - { - SourceRanges: []string{"::/0"}, - Action: "accept", - Destination: "192.0.2.0/32", - Protocol: "all", - Domains: domain.List{"example.com"}, - IsDynamic: true, - RouteID: "route3", - }, - } - assert.ElementsMatch(t, orderRuleSourceRanges(routesFirewallRules), orderRuleSourceRanges(expectedRoutesFirewallRules)) - - // peerC is part of route1 distribution groups but should not receive the routes firewall rules - routesFirewallRules = account.GetPeerRoutesFirewallRules(context.Background(), "peerC", validatedPeers) - assert.Len(t, routesFirewallRules, 0) - }) - -} - -// orderList is a helper function to sort a list of strings -func orderRuleSourceRanges(ruleList []*types.RouteFirewallRule) []*types.RouteFirewallRule { - for _, rule := range ruleList { - sort.Strings(rule.SourceRanges) - } - return ruleList } func TestRouteAccountPeersUpdate(t *testing.T) { @@ -2070,7 +1953,7 @@ func TestRouteAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } @@ -2107,7 +1990,7 @@ func TestRouteAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) @@ -2127,7 +2010,7 @@ func TestRouteAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) @@ -2145,7 +2028,7 @@ func TestRouteAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) @@ -2185,7 +2068,7 @@ func TestRouteAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) @@ -2225,7 +2108,7 @@ func TestRouteAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) @@ -2665,11 +2548,6 @@ func TestAccount_GetPeerNetworkResourceFirewallRules(t *testing.T) { }, } - validatedPeers := make(map[string]struct{}) - for p := range account.Peers { - validatedPeers[p] = struct{}{} - } - t.Run("validate applied policies for different network resources", func(t *testing.T) { // Test case: Resource1 is directly applied to the policy (policyResource1) policies := account.GetPoliciesForNetworkResource("resource1") @@ -2693,127 +2571,4 @@ func TestAccount_GetPeerNetworkResourceFirewallRules(t *testing.T) { policies = account.GetPoliciesForNetworkResource("resource6") assert.Len(t, policies, 1, "resource6 should have exactly 1 policy applied via access control groups") }) - - t.Run("validate routing peer firewall rules for network resources", func(t *testing.T) { - resourcePoliciesMap := account.GetResourcePoliciesMap() - resourceRoutersMap := account.GetResourceRoutersMap() - _, routes, sourcePeers := account.GetNetworkResourcesRoutesToSync(context.Background(), "peerA", resourcePoliciesMap, resourceRoutersMap) - firewallRules := account.GetPeerNetworkResourceFirewallRules(context.Background(), account.Peers["peerA"], validatedPeers, routes, resourcePoliciesMap) - assert.Len(t, firewallRules, 4) - assert.Len(t, sourcePeers, 5) - - expectedFirewallRules := []*types.RouteFirewallRule{ - { - SourceRanges: []string{ - fmt.Sprintf(types.AllowedIPsFormat, peerCIp), - fmt.Sprintf(types.AllowedIPsFormat, peerHIp), - fmt.Sprintf(types.AllowedIPsFormat, peerBIp), - }, - Action: "accept", - Destination: "192.168.0.0/16", - Protocol: "all", - Port: 80, - RouteID: "resource2:peerA", - }, - { - SourceRanges: []string{ - fmt.Sprintf(types.AllowedIPsFormat, peerCIp), - fmt.Sprintf(types.AllowedIPsFormat, peerHIp), - fmt.Sprintf(types.AllowedIPsFormat, peerBIp), - }, - Action: "accept", - Destination: "192.168.0.0/16", - Protocol: "all", - Port: 320, - RouteID: "resource2:peerA", - }, - } - - additionalFirewallRules := []*types.RouteFirewallRule{ - { - SourceRanges: []string{ - fmt.Sprintf(types.AllowedIPsFormat, peerJIp), - }, - Action: "accept", - Destination: "192.0.2.0/32", - Protocol: "tcp", - Port: 80, - Domains: domain.List{"example.com"}, - IsDynamic: true, - RouteID: "resource4:peerA", - }, - { - SourceRanges: []string{ - fmt.Sprintf(types.AllowedIPsFormat, peerKIp), - }, - Action: "accept", - Destination: "192.0.2.0/32", - Protocol: "all", - Domains: domain.List{"example.com"}, - IsDynamic: true, - RouteID: "resource4:peerA", - }, - } - assert.ElementsMatch(t, orderRuleSourceRanges(firewallRules), orderRuleSourceRanges(append(expectedFirewallRules, additionalFirewallRules...))) - - // peerD is also the routing peer for resource2 - _, routes, sourcePeers = account.GetNetworkResourcesRoutesToSync(context.Background(), "peerD", resourcePoliciesMap, resourceRoutersMap) - firewallRules = account.GetPeerNetworkResourceFirewallRules(context.Background(), account.Peers["peerD"], validatedPeers, routes, resourcePoliciesMap) - assert.Len(t, firewallRules, 2) - for _, rule := range expectedFirewallRules { - rule.RouteID = "resource2:peerD" - } - assert.ElementsMatch(t, orderRuleSourceRanges(firewallRules), orderRuleSourceRanges(expectedFirewallRules)) - assert.Len(t, sourcePeers, 3) - - // peerE is a single routing peer for resource1 and resource3 - // PeerE should only receive rules for resource1 since resource3 has no applied policy - _, routes, sourcePeers = account.GetNetworkResourcesRoutesToSync(context.Background(), "peerE", resourcePoliciesMap, resourceRoutersMap) - firewallRules = account.GetPeerNetworkResourceFirewallRules(context.Background(), account.Peers["peerE"], validatedPeers, routes, resourcePoliciesMap) - assert.Len(t, firewallRules, 1) - assert.Len(t, sourcePeers, 2) - - expectedFirewallRules = []*types.RouteFirewallRule{ - { - SourceRanges: []string{"100.65.250.202/32", "100.65.13.186/32"}, - Action: "accept", - Destination: "10.10.10.0/24", - Protocol: "tcp", - PortRange: types.RulePortRange{Start: 80, End: 350}, - RouteID: "resource1:peerE", - }, - } - assert.ElementsMatch(t, orderRuleSourceRanges(firewallRules), orderRuleSourceRanges(expectedFirewallRules)) - - // peerC is part of distribution groups for resource2 but should not receive the firewall rules - firewallRules = account.GetPeerRoutesFirewallRules(context.Background(), "peerC", validatedPeers) - assert.Len(t, firewallRules, 0) - - // peerL is the single routing peer for resource5 - _, routes, sourcePeers = account.GetNetworkResourcesRoutesToSync(context.Background(), "peerL", resourcePoliciesMap, resourceRoutersMap) - assert.Len(t, routes, 1) - firewallRules = account.GetPeerNetworkResourceFirewallRules(context.Background(), account.Peers["peerL"], validatedPeers, routes, resourcePoliciesMap) - assert.Len(t, firewallRules, 1) - assert.Len(t, sourcePeers, 1) - - expectedFirewallRules = []*types.RouteFirewallRule{ - { - SourceRanges: []string{"100.65.29.67/32"}, - Action: "accept", - Destination: "10.12.12.1/32", - Protocol: "tcp", - Port: 8080, - RouteID: "resource5:peerL", - }, - } - assert.ElementsMatch(t, orderRuleSourceRanges(firewallRules), orderRuleSourceRanges(expectedFirewallRules)) - - _, routes, sourcePeers = account.GetNetworkResourcesRoutesToSync(context.Background(), "peerM", resourcePoliciesMap, resourceRoutersMap) - assert.Len(t, routes, 1) - assert.Len(t, sourcePeers, 0) - - _, routes, sourcePeers = account.GetNetworkResourcesRoutesToSync(context.Background(), "peerN", resourcePoliciesMap, resourceRoutersMap) - assert.Len(t, routes, 1) - assert.Len(t, sourcePeers, 2) - }) } diff --git a/management/server/store/sql_store.go b/management/server/store/sql_store.go index 8189548b7..0a716d08d 100644 --- a/management/server/store/sql_store.go +++ b/management/server/store/sql_store.go @@ -1196,7 +1196,6 @@ func (s *SqlStore) getAccountGorm(ctx context.Context, accountID string) (*types account.NameServerGroups[ns.ID] = &ns } account.NameServerGroupsG = nil - account.InitOnce() return &account, nil } @@ -1635,7 +1634,6 @@ func (s *SqlStore) getAccount(ctx context.Context, accountID string) (*types.Acc if sExtraIntegratedValidatorGroups.Valid { _ = json.Unmarshal([]byte(sExtraIntegratedValidatorGroups.String), &account.Settings.Extra.IntegratedValidatorGroups) } - account.InitOnce() return &account, nil } @@ -3310,7 +3308,7 @@ func (s *SqlStore) GetAccountPeersWithExpiration(ctx context.Context, lockStreng var peers []*nbpeer.Peer result := tx. - Where("login_expiration_enabled = ? AND user_id IS NOT NULL AND user_id != ''", true). + Where("login_expiration_enabled = ? AND peer_status_login_expired != ? AND user_id IS NOT NULL AND user_id != ''", true, true). Find(&peers, accountIDCondition, accountID) if err := result.Error; err != nil { log.WithContext(ctx).Errorf("failed to get peers with expiration from the store: %s", result.Error) diff --git a/management/server/store/sql_store_test.go b/management/server/store/sql_store_test.go index 8ea6c2ae5..5a5616abc 100644 --- a/management/server/store/sql_store_test.go +++ b/management/server/store/sql_store_test.go @@ -2729,7 +2729,7 @@ func TestSqlStore_GetAccountPeers(t *testing.T) { { name: "should retrieve peers for an existing account ID", accountID: "bf1c8084-ba50-4ce7-9439-34653001fc3b", - expectedCount: 4, + expectedCount: 5, }, { name: "should return no peers for a non-existing account ID", @@ -2751,7 +2751,7 @@ func TestSqlStore_GetAccountPeers(t *testing.T) { name: "should filter peers by partial name", accountID: "bf1c8084-ba50-4ce7-9439-34653001fc3b", nameFilter: "host", - expectedCount: 3, + expectedCount: 4, }, { name: "should filter peers by ip", @@ -2777,14 +2777,16 @@ func TestSqlStore_GetAccountPeersWithExpiration(t *testing.T) { require.NoError(t, err) tests := []struct { - name string - accountID string - expectedCount int + name string + accountID string + expectedCount int + expectedPeerIDs []string }{ { - name: "should retrieve peers with expiration for an existing account ID", - accountID: "bf1c8084-ba50-4ce7-9439-34653001fc3b", - expectedCount: 1, + name: "should retrieve only non-expired peers with expiration enabled", + accountID: "bf1c8084-ba50-4ce7-9439-34653001fc3b", + expectedCount: 1, + expectedPeerIDs: []string{"notexpired01"}, }, { name: "should return no peers with expiration for a non-existing account ID", @@ -2803,10 +2805,30 @@ func TestSqlStore_GetAccountPeersWithExpiration(t *testing.T) { peers, err := store.GetAccountPeersWithExpiration(context.Background(), LockingStrengthNone, tt.accountID) require.NoError(t, err) require.Len(t, peers, tt.expectedCount) + for i, peer := range peers { + assert.Equal(t, tt.expectedPeerIDs[i], peer.ID) + } }) } } +func TestSqlStore_GetAccountPeersWithExpiration_ExcludesAlreadyExpired(t *testing.T) { + store, cleanup, err := NewTestStoreFromSQL(context.Background(), "../testdata/store_with_expired_peers.sql", t.TempDir()) + t.Cleanup(cleanup) + require.NoError(t, err) + + accountID := "bf1c8084-ba50-4ce7-9439-34653001fc3b" + + peers, err := store.GetAccountPeersWithExpiration(context.Background(), LockingStrengthNone, accountID) + require.NoError(t, err) + + // Verify the already-expired peer (cg05lnblo1hkg2j514p0) is not returned + for _, peer := range peers { + assert.NotEqual(t, "cg05lnblo1hkg2j514p0", peer.ID, "already expired peer should not be returned") + assert.False(t, peer.Status.LoginExpired, "returned peers should not have LoginExpired set") + } +} + func TestSqlStore_GetAccountPeersWithInactivity(t *testing.T) { store, cleanup, err := NewTestStoreFromSQL(context.Background(), "../testdata/store_with_expired_peers.sql", t.TempDir()) t.Cleanup(cleanup) @@ -2887,7 +2909,7 @@ func TestSqlStore_GetUserPeers(t *testing.T) { name: "should retrieve peers for another valid account ID and user ID", accountID: "bf1c8084-ba50-4ce7-9439-34653001fc3b", userID: "edafee4e-63fb-11ec-90d6-0242ac120003", - expectedCount: 2, + expectedCount: 3, }, { name: "should return no peers for existing account ID with empty user ID", diff --git a/management/server/telemetry/http_api_metrics.go b/management/server/telemetry/http_api_metrics.go index 28e8457e2..e48e6d64a 100644 --- a/management/server/telemetry/http_api_metrics.go +++ b/management/server/telemetry/http_api_metrics.go @@ -193,20 +193,12 @@ func (m *HTTPMiddleware) Handler(h http.Handler) http.Handler { } }) - h.ServeHTTP(w, r.WithContext(ctx)) + // Hold on to req so auth's in-place ctx update is visible after ServeHTTP. + req := r.WithContext(ctx) + h.ServeHTTP(w, req) close(handlerDone) - userAuth, err := nbContext.GetUserAuthFromContext(r.Context()) - if err == nil { - if userAuth.AccountId != "" { - //nolint - ctx = context.WithValue(ctx, nbContext.AccountIDKey, userAuth.AccountId) - } - if userAuth.UserId != "" { - //nolint - ctx = context.WithValue(ctx, nbContext.UserIDKey, userAuth.UserId) - } - } + ctx = req.Context() if w.Status() > 399 { log.WithContext(ctx).Errorf("HTTP response %v: %v %v status %v", reqID, r.Method, r.URL, w.Status()) diff --git a/management/server/testdata/store_with_expired_peers.sql b/management/server/testdata/store_with_expired_peers.sql index dfcaeee6f..189bd1262 100644 --- a/management/server/testdata/store_with_expired_peers.sql +++ b/management/server/testdata/store_with_expired_peers.sql @@ -31,6 +31,7 @@ INSERT INTO peers VALUES('cfvprsrlo1hqoo49ohog','bf1c8084-ba50-4ce7-9439-3465300 INSERT INTO peers VALUES('cg05lnblo1hkg2j514p0','bf1c8084-ba50-4ce7-9439-34653001fc3b','RlSy2vzoG2HyMBTUImXOiVhCBiiBa5qD5xzMxkiFDW4=','','"100.64.39.54"','expiredhost','linux','Linux','22.04','x86_64','Ubuntu','','development','','',NULL,'','','','{"Cloud":"","Platform":""}',NULL,'expiredhost','expiredhost','2023-03-02 09:19:57.276717255+01:00',0,1,0,'edafee4e-63fb-11ec-90d6-0242ac120003','ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIMbK5ZXJsGOOWoBT4OmkPtgdPZe2Q7bDuS/zjn2CZxhK',0,1,0,'2023-03-02 09:14:21.791679181+01:00','2024-10-02 17:00:32.527947+02:00',0,'""','','',0); INSERT INTO peers VALUES('cg3161rlo1hs9cq94gdg','bf1c8084-ba50-4ce7-9439-34653001fc3b','mVABSKj28gv+JRsf7e0NEGKgSOGTfU/nPB2cpuG56HU=','','"100.64.117.96"','testhost','linux','Linux','22.04','x86_64','Ubuntu','','development','','',NULL,'','','','{"Cloud":"","Platform":""}',NULL,'testhost','testhost','2023-03-06 18:21:27.252010027+01:00',0,0,0,'edafee4e-63fb-11ec-90d6-0242ac120003','ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINWvvUkFFcrj48CWTkNUb/do/n52i1L5dH4DhGu+4ZuM',0,0,0,'2023-03-07 09:02:47.442857106+01:00','2024-10-02 17:00:32.527947+02:00',0,'""','','',0); INSERT INTO peers VALUES('csrnkiq7qv9d8aitqd50','bf1c8084-ba50-4ce7-9439-34653001fc3b','nVABSKj28gv+JRsf7e0NEGKgSOGTfU/nPB2cpuG56HX=','','"100.64.117.97"','testhost','linux','Linux','22.04','x86_64','Ubuntu','','development','','',NULL,'','','','{"Cloud":"","Platform":""}',NULL,'testhost','testhost-1','2023-03-06 18:21:27.252010027+01:00',0,0,0,'f4f6d672-63fb-11ec-90d6-0242ac120003','ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINWvvUkFFcrj48CWTkNUb/do/n52i1L5dH4DhGu+4ZuM',0,0,1,'2023-03-07 09:02:47.442857106+01:00','2024-10-02 17:00:32.527947+02:00',0,'""','','',0); +INSERT INTO peers VALUES('notexpired01','bf1c8084-ba50-4ce7-9439-34653001fc3b','oVABSKj28gv+JRsf7e0NEGKgSOGTfU/nPB2cpuG56HY=','','"100.64.117.98"','activehost','linux','Linux','22.04','x86_64','Ubuntu','','development','','',NULL,'','','','{"Cloud":"","Platform":""}',NULL,'activehost','activehost','2023-03-06 18:21:27.252010027+01:00',0,0,0,'edafee4e-63fb-11ec-90d6-0242ac120003','ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINWvvUkFFcrj48CWTkNUb/do/n52i1L5dH4DhGu+4ZuM',0,1,0,'2023-03-07 09:02:47.442857106+01:00','2024-10-02 17:00:32.527947+02:00',0,'""','','',0); INSERT INTO users VALUES('f4f6d672-63fb-11ec-90d6-0242ac120003','bf1c8084-ba50-4ce7-9439-34653001fc3b','user',0,0,'','[]',0,NULL,'2024-10-02 17:00:32.528196+02:00','api',0,''); INSERT INTO users VALUES('edafee4e-63fb-11ec-90d6-0242ac120003','bf1c8084-ba50-4ce7-9439-34653001fc3b','admin',0,0,'','[]',0,NULL,'2024-10-02 17:00:32.528196+02:00','api',0,''); INSERT INTO installations VALUES(1,''); diff --git a/management/server/types/account.go b/management/server/types/account.go index c448813db..e7c1e2dce 100644 --- a/management/server/types/account.go +++ b/management/server/types/account.go @@ -8,7 +8,6 @@ import ( "slices" "strconv" "strings" - "sync" "time" "github.com/hashicorp/go-multierror" @@ -27,7 +26,6 @@ import ( networkTypes "github.com/netbirdio/netbird/management/server/networks/types" nbpeer "github.com/netbirdio/netbird/management/server/peer" "github.com/netbirdio/netbird/management/server/posture" - "github.com/netbirdio/netbird/management/server/telemetry" "github.com/netbirdio/netbird/management/server/util" "github.com/netbirdio/netbird/route" "github.com/netbirdio/netbird/shared/management/domain" @@ -110,16 +108,9 @@ type Account struct { NetworkResources []*resourceTypes.NetworkResource `gorm:"foreignKey:AccountID;references:id"` Onboarding AccountOnboarding `gorm:"foreignKey:AccountID;references:id;constraint:OnDelete:CASCADE"` - NetworkMapCache *NetworkMapBuilder `gorm:"-"` - nmapInitOnce *sync.Once `gorm:"-"` - ReverseProxyFreeDomainNonce string } -func (a *Account) InitOnce() { - a.nmapInitOnce = &sync.Once{} -} - // this class is used by gorm only type PrimaryAccountInfo struct { IsDomainPrimaryAccount bool @@ -155,108 +146,6 @@ func (o AccountOnboarding) IsEqual(onboarding AccountOnboarding) bool { o.SignupFormPending == onboarding.SignupFormPending } -// GetRoutesToSync returns the enabled routes for the peer ID and the routes -// from the ACL peers that have distribution groups associated with the peer ID. -// Please mind, that the returned route.Route objects will contain Peer.Key instead of Peer.ID. -func (a *Account) GetRoutesToSync(ctx context.Context, peerID string, aclPeers []*nbpeer.Peer, peerGroups LookupMap) []*route.Route { - routes, peerDisabledRoutes := a.getRoutingPeerRoutes(ctx, peerID) - peerRoutesMembership := make(LookupMap) - for _, r := range append(routes, peerDisabledRoutes...) { - peerRoutesMembership[string(r.GetHAUniqueID())] = struct{}{} - } - - for _, peer := range aclPeers { - activeRoutes, _ := a.getRoutingPeerRoutes(ctx, peer.ID) - groupFilteredRoutes := a.filterRoutesByGroups(activeRoutes, peerGroups) - filteredRoutes := a.filterRoutesFromPeersOfSameHAGroup(groupFilteredRoutes, peerRoutesMembership) - routes = append(routes, filteredRoutes...) - } - - return routes -} - -// filterRoutesFromPeersOfSameHAGroup filters and returns a list of routes that don't share the same HA route membership -func (a *Account) filterRoutesFromPeersOfSameHAGroup(routes []*route.Route, peerMemberships LookupMap) []*route.Route { - var filteredRoutes []*route.Route - for _, r := range routes { - _, found := peerMemberships[string(r.GetHAUniqueID())] - if !found { - filteredRoutes = append(filteredRoutes, r) - } - } - return filteredRoutes -} - -// filterRoutesByGroups returns a list with routes that have distribution groups in the group's map -func (a *Account) filterRoutesByGroups(routes []*route.Route, groupListMap LookupMap) []*route.Route { - var filteredRoutes []*route.Route - for _, r := range routes { - for _, groupID := range r.Groups { - _, found := groupListMap[groupID] - if found { - filteredRoutes = append(filteredRoutes, r) - break - } - } - } - return filteredRoutes -} - -// getRoutingPeerRoutes returns the enabled and disabled lists of routes that the given routing peer serves -// Please mind, that the returned route.Route objects will contain Peer.Key instead of Peer.ID. -// If the given is not a routing peer, then the lists are empty. -func (a *Account) getRoutingPeerRoutes(ctx context.Context, peerID string) (enabledRoutes []*route.Route, disabledRoutes []*route.Route) { - - peer := a.GetPeer(peerID) - if peer == nil { - log.WithContext(ctx).Errorf("peer %s that doesn't exist under account %s", peerID, a.Id) - return enabledRoutes, disabledRoutes - } - - seenRoute := make(map[route.ID]struct{}) - - takeRoute := func(r *route.Route, id string) { - if _, ok := seenRoute[r.ID]; ok { - return - } - seenRoute[r.ID] = struct{}{} - - if r.Enabled { - r.Peer = peer.Key - enabledRoutes = append(enabledRoutes, r) - return - } - disabledRoutes = append(disabledRoutes, r) - } - - for _, r := range a.Routes { - for _, groupID := range r.PeerGroups { - group := a.GetGroup(groupID) - if group == nil { - log.WithContext(ctx).Errorf("route %s has peers group %s that doesn't exist under account %s", r.ID, groupID, a.Id) - continue - } - for _, id := range group.Peers { - if id != peerID { - continue - } - - newPeerRoute := r.Copy() - newPeerRoute.Peer = id - newPeerRoute.PeerGroups = nil - newPeerRoute.ID = route.ID(string(r.ID) + ":" + id) // we have to provide unique route id when distribute network map - takeRoute(newPeerRoute, id) - break - } - } - if r.Peer == peerID { - takeRoute(r.Copy(), peerID) - } - } - - return enabledRoutes, disabledRoutes -} - // GetRoutesByPrefixOrDomains return list of routes by account and route prefix func (a *Account) GetRoutesByPrefixOrDomains(prefix netip.Prefix, domains domain.List) []*route.Route { var routes []*route.Route @@ -276,106 +165,6 @@ func (a *Account) GetGroup(groupID string) *Group { return a.Groups[groupID] } -// GetPeerNetworkMap returns the networkmap for the given peer ID. -func (a *Account) GetPeerNetworkMap( - ctx context.Context, - peerID string, - peersCustomZone nbdns.CustomZone, - accountZones []*zones.Zone, - validatedPeersMap map[string]struct{}, - resourcePolicies map[string][]*Policy, - routers map[string]map[string]*routerTypes.NetworkRouter, - metrics *telemetry.AccountManagerMetrics, - groupIDToUserIDs map[string][]string, -) *NetworkMap { - start := time.Now() - peer := a.Peers[peerID] - if peer == nil { - return &NetworkMap{ - Network: a.Network.Copy(), - } - } - - if _, ok := validatedPeersMap[peerID]; !ok { - return &NetworkMap{ - Network: a.Network.Copy(), - } - } - - peerGroups := a.GetPeerGroups(peerID) - - aclPeers, firewallRules, authorizedUsers, enableSSH := a.GetPeerConnectionResources(ctx, peer, validatedPeersMap, groupIDToUserIDs) - // exclude expired peers - var peersToConnect []*nbpeer.Peer - var expiredPeers []*nbpeer.Peer - for _, p := range aclPeers { - expired, _ := p.LoginExpired(a.Settings.PeerLoginExpiration) - if a.Settings.PeerLoginExpirationEnabled && expired { - expiredPeers = append(expiredPeers, p) - continue - } - peersToConnect = append(peersToConnect, p) - } - - routesUpdate := a.GetRoutesToSync(ctx, peerID, peersToConnect, peerGroups) - routesFirewallRules := a.GetPeerRoutesFirewallRules(ctx, peerID, validatedPeersMap) - isRouter, networkResourcesRoutes, sourcePeers := a.GetNetworkResourcesRoutesToSync(ctx, peerID, resourcePolicies, routers) - var networkResourcesFirewallRules []*RouteFirewallRule - if isRouter { - networkResourcesFirewallRules = a.GetPeerNetworkResourceFirewallRules(ctx, peer, validatedPeersMap, networkResourcesRoutes, resourcePolicies) - } - peersToConnectIncludingRouters := a.addNetworksRoutingPeers(networkResourcesRoutes, peer, peersToConnect, expiredPeers, isRouter, sourcePeers) - - dnsManagementStatus := a.getPeerDNSManagementStatus(peerID) - dnsUpdate := nbdns.Config{ - ServiceEnable: dnsManagementStatus, - } - - if dnsManagementStatus { - var zones []nbdns.CustomZone - - if peersCustomZone.Domain != "" { - records := filterZoneRecordsForPeers(peer, peersCustomZone, peersToConnectIncludingRouters, expiredPeers) - zones = append(zones, nbdns.CustomZone{ - Domain: peersCustomZone.Domain, - Records: records, - }) - } - - filteredAccountZones := filterPeerAppliedZones(ctx, accountZones, peerGroups) - zones = append(zones, filteredAccountZones...) - - dnsUpdate.CustomZones = zones - dnsUpdate.NameServerGroups = getPeerNSGroups(a, peerID) - } - - nm := &NetworkMap{ - Peers: peersToConnectIncludingRouters, - Network: a.Network.Copy(), - Routes: slices.Concat(networkResourcesRoutes, routesUpdate), - DNSConfig: dnsUpdate, - OfflinePeers: expiredPeers, - FirewallRules: firewallRules, - RoutesFirewallRules: slices.Concat(networkResourcesFirewallRules, routesFirewallRules), - AuthorizedUsers: authorizedUsers, - EnableSSH: enableSSH, - } - - if metrics != nil { - objectCount := int64(len(peersToConnectIncludingRouters) + len(expiredPeers) + len(routesUpdate) + len(networkResourcesRoutes) + len(firewallRules) + +len(networkResourcesFirewallRules) + len(routesFirewallRules)) - metrics.CountNetworkMapObjects(objectCount) - metrics.CountGetPeerNetworkMapDuration(time.Since(start)) - - if objectCount > 5000 { - log.WithContext(ctx).Tracef("account: %s has a total resource count of %d objects, "+ - "peers to connect: %d, expired peers: %d, routes: %d, firewall rules: %d, network resources routes: %d, network resources firewall rules: %d, routes firewall rules: %d", - a.Id, objectCount, len(peersToConnectIncludingRouters), len(expiredPeers), len(routesUpdate), len(firewallRules), len(networkResourcesRoutes), len(networkResourcesFirewallRules), len(routesFirewallRules)) - } - } - - return nm -} - func (a *Account) addNetworksRoutingPeers( networkResourcesRoutes []*route.Route, peer *nbpeer.Peer, @@ -421,39 +210,6 @@ func (a *Account) addNetworksRoutingPeers( return peersToConnect } -func getPeerNSGroups(account *Account, peerID string) []*nbdns.NameServerGroup { - groupList := account.GetPeerGroups(peerID) - - var peerNSGroups []*nbdns.NameServerGroup - - for _, nsGroup := range account.NameServerGroups { - if !nsGroup.Enabled { - continue - } - for _, gID := range nsGroup.Groups { - _, found := groupList[gID] - if found { - if !peerIsNameserver(account.GetPeer(peerID), nsGroup) { - peerNSGroups = append(peerNSGroups, nsGroup.Copy()) - break - } - } - } - } - - return peerNSGroups -} - -// peerIsNameserver returns true if the peer is a nameserver for a nsGroup -func peerIsNameserver(peer *nbpeer.Peer, nsGroup *nbdns.NameServerGroup) bool { - for _, ns := range nsGroup.NameServers { - if peer.IP.Equal(ns.IP.AsSlice()) { - return true - } - } - return false -} - func AddPeerLabelsToAccount(ctx context.Context, account *Account, peerLabels LookupMap) { for _, peer := range account.Peers { label, err := GetPeerHostLabel(peer.Name, peerLabels) @@ -800,19 +556,6 @@ func (a *Account) GetPeerGroupsList(peerID string) []string { return grps } -func (a *Account) getPeerDNSManagementStatus(peerID string) bool { - peerGroups := a.GetPeerGroups(peerID) - enabled := true - for _, groupID := range a.DNSSettings.DisabledManagementGroups { - _, found := peerGroups[groupID] - if found { - enabled = false - break - } - } - return enabled -} - func (a *Account) GetPeerGroups(peerID string) LookupMap { groupList := make(LookupMap) for groupID, group := range a.Groups { @@ -941,8 +684,6 @@ func (a *Account) Copy() *Account { NetworkResources: networkResources, Services: services, Onboarding: a.Onboarding, - NetworkMapCache: a.NetworkMapCache, - nmapInitOnce: a.nmapInitOnce, Domains: domains, } } @@ -1304,31 +1045,6 @@ func (a *Account) GetPostureChecks(postureChecksID string) *posture.Checks { return nil } -// GetPeerRoutesFirewallRules gets the routes firewall rules associated with a routing peer ID for the account. -func (a *Account) GetPeerRoutesFirewallRules(ctx context.Context, peerID string, validatedPeersMap map[string]struct{}) []*RouteFirewallRule { - routesFirewallRules := make([]*RouteFirewallRule, 0, len(a.Routes)) - - enabledRoutes, _ := a.getRoutingPeerRoutes(ctx, peerID) - for _, route := range enabledRoutes { - // If no access control groups are specified, accept all traffic. - if len(route.AccessControlGroups) == 0 { - defaultPermit := getDefaultPermit(route) - routesFirewallRules = append(routesFirewallRules, defaultPermit...) - continue - } - - distributionPeers := a.getDistributionGroupsPeers(route) - - for _, accessGroup := range route.AccessControlGroups { - policies := GetAllRoutePoliciesFromGroups(a, []string{accessGroup}) - rules := a.getRouteFirewallRules(ctx, peerID, policies, route, validatedPeersMap, distributionPeers) - routesFirewallRules = append(routesFirewallRules, rules...) - } - } - - return routesFirewallRules -} - func (a *Account) getRouteFirewallRules(ctx context.Context, peerID string, policies []*Policy, route *route.Route, validatedPeersMap map[string]struct{}, distributionPeers map[string]struct{}) []*RouteFirewallRule { var fwRules []*RouteFirewallRule for _, policy := range policies { @@ -1387,50 +1103,6 @@ func (a *Account) getRulePeers(rule *PolicyRule, postureChecks []string, peerID return distributionGroupPeers } -func (a *Account) getDistributionGroupsPeers(route *route.Route) map[string]struct{} { - distPeers := make(map[string]struct{}) - for _, id := range route.Groups { - group := a.Groups[id] - if group == nil { - continue - } - - for _, pID := range group.Peers { - distPeers[pID] = struct{}{} - } - } - return distPeers -} - -func getDefaultPermit(route *route.Route) []*RouteFirewallRule { - var rules []*RouteFirewallRule - - sources := []string{"0.0.0.0/0"} - if route.Network.Addr().Is6() { - sources = []string{"::/0"} - } - rule := RouteFirewallRule{ - SourceRanges: sources, - Action: string(PolicyTrafficActionAccept), - Destination: route.Network.String(), - Protocol: string(PolicyRuleProtocolALL), - Domains: route.Domains, - IsDynamic: route.IsDynamic(), - RouteID: route.ID, - } - - rules = append(rules, &rule) - - // dynamic routes always contain an IPv4 placeholder as destination, hence we must add IPv6 rules additionally - if route.IsDynamic() { - ruleV6 := rule - ruleV6.SourceRanges = []string{"::/0"} - rules = append(rules, &ruleV6) - } - - return rules -} - // GetAllRoutePoliciesFromGroups retrieves route policies associated with the specified access control groups // and returns a list of policies that have rules with destinations matching the specified groups. func GetAllRoutePoliciesFromGroups(account *Account, accessControlGroups []string) []*Policy { @@ -1508,65 +1180,6 @@ func (a *Account) GetResourcePoliciesMap() map[string][]*Policy { return resourcePolicies } -// GetNetworkResourcesRoutesToSync returns network routes for syncing with a specific peer and its ACL peers. -func (a *Account) GetNetworkResourcesRoutesToSync(ctx context.Context, peerID string, resourcePolicies map[string][]*Policy, routers map[string]map[string]*routerTypes.NetworkRouter) (bool, []*route.Route, map[string]struct{}) { - var isRoutingPeer bool - var routes []*route.Route - allSourcePeers := make(map[string]struct{}, len(a.Peers)) - - for _, resource := range a.NetworkResources { - if !resource.Enabled { - continue - } - - var addSourcePeers bool - - networkRoutingPeers, exists := routers[resource.NetworkID] - if exists { - if router, ok := networkRoutingPeers[peerID]; ok { - isRoutingPeer, addSourcePeers = true, true - routes = append(routes, a.getNetworkResourcesRoutes(resource, peerID, router, resourcePolicies)...) - } - } - - addedResourceRoute := false - for _, policy := range resourcePolicies[resource.ID] { - var peers []string - if policy.Rules[0].SourceResource.Type == ResourceTypePeer && policy.Rules[0].SourceResource.ID != "" { - peers = []string{policy.Rules[0].SourceResource.ID} - } else { - peers = a.getUniquePeerIDsFromGroupsIDs(ctx, policy.SourceGroups()) - } - if addSourcePeers { - for _, pID := range a.getPostureValidPeers(peers, policy.SourcePostureChecks) { - allSourcePeers[pID] = struct{}{} - } - } else if slices.Contains(peers, peerID) && a.validatePostureChecksOnPeer(ctx, policy.SourcePostureChecks, peerID) { - // add routes for the resource if the peer is in the distribution group - for peerId, router := range networkRoutingPeers { - routes = append(routes, a.getNetworkResourcesRoutes(resource, peerId, router, resourcePolicies)...) - } - addedResourceRoute = true - } - if addedResourceRoute { - break - } - } - } - - return isRoutingPeer, routes, allSourcePeers -} - -func (a *Account) getPostureValidPeers(inputPeers []string, postureChecksIDs []string) []string { - var dest []string - for _, peerID := range inputPeers { - if a.validatePostureChecksOnPeer(context.Background(), postureChecksIDs, peerID) { - dest = append(dest, peerID) - } - } - return dest -} - func (a *Account) getUniquePeerIDsFromGroupsIDs(ctx context.Context, groups []string) []string { peerIDs := make(map[string]struct{}, len(groups)) // we expect at least one peer per group as initial capacity for _, groupID := range groups { @@ -1658,22 +1271,6 @@ func (a *Account) GetPoliciesAppliedInNetwork(networkID string) []string { return result } -// getNetworkResourcesRoutes convert the network resources list to routes list. -func (a *Account) getNetworkResourcesRoutes(resource *resourceTypes.NetworkResource, peerId string, router *routerTypes.NetworkRouter, resourcePolicies map[string][]*Policy) []*route.Route { - resourceAppliedPolicies := resourcePolicies[resource.ID] - - var routes []*route.Route - // distribute the resource routes only if there is policy applied to it - if len(resourceAppliedPolicies) > 0 { - peer := a.GetPeer(peerId) - if peer != nil { - routes = append(routes, resource.ToRoute(peer, router)) - } - } - - return routes -} - func (a *Account) GetResourceRoutersMap() map[string]map[string]*routerTypes.NetworkRouter { routers := make(map[string]map[string]*routerTypes.NetworkRouter) diff --git a/management/server/types/account_test.go b/management/server/types/account_test.go index 00ba29b7f..9b1c9e31d 100644 --- a/management/server/types/account_test.go +++ b/management/server/types/account_test.go @@ -4,8 +4,6 @@ import ( "context" "fmt" "net" - "net/netip" - "slices" "testing" "github.com/miekg/dns" @@ -19,7 +17,6 @@ import ( routerTypes "github.com/netbirdio/netbird/management/server/networks/routers/types" networkTypes "github.com/netbirdio/netbird/management/server/networks/types" nbpeer "github.com/netbirdio/netbird/management/server/peer" - "github.com/netbirdio/netbird/management/server/posture" "github.com/netbirdio/netbird/route" ) @@ -451,402 +448,6 @@ func Test_AddNetworksRoutingPeersHandlesNoMissingPeers(t *testing.T) { require.Len(t, result, 0) } -const ( - accID = "accountID" - network1ID = "network1ID" - group1ID = "group1" - accNetResourcePeer1ID = "peer1" - accNetResourcePeer2ID = "peer2" - accNetResourceRouter1ID = "router1" - accNetResource1ID = "resource1ID" - accNetResourceRestrictPostureCheckID = "restrictPostureCheck" - accNetResourceRelaxedPostureCheckID = "relaxedPostureCheck" - accNetResourceLockedPostureCheckID = "lockedPostureCheck" - accNetResourceLinuxPostureCheckID = "linuxPostureCheck" -) - -var ( - accNetResourcePeer1IP = net.IP{192, 168, 1, 1} - accNetResourcePeer2IP = net.IP{192, 168, 1, 2} - accNetResourceRouter1IP = net.IP{192, 168, 1, 3} - accNetResourceValidPeers = map[string]struct{}{accNetResourcePeer1ID: {}, accNetResourcePeer2ID: {}} -) - -func getBasicAccountsWithResource() *Account { - return &Account{ - Id: accID, - Peers: map[string]*nbpeer.Peer{ - accNetResourcePeer1ID: { - ID: accNetResourcePeer1ID, - AccountID: accID, - Key: "peer1Key", - IP: accNetResourcePeer1IP, - Meta: nbpeer.PeerSystemMeta{ - GoOS: "linux", - WtVersion: "0.35.1", - KernelVersion: "4.4.0", - }, - }, - accNetResourcePeer2ID: { - ID: accNetResourcePeer2ID, - AccountID: accID, - Key: "peer2Key", - IP: accNetResourcePeer2IP, - Meta: nbpeer.PeerSystemMeta{ - GoOS: "windows", - WtVersion: "0.34.1", - KernelVersion: "4.4.0", - }, - }, - accNetResourceRouter1ID: { - ID: accNetResourceRouter1ID, - AccountID: accID, - Key: "router1Key", - IP: accNetResourceRouter1IP, - Meta: nbpeer.PeerSystemMeta{ - GoOS: "linux", - WtVersion: "0.35.1", - KernelVersion: "4.4.0", - }, - }, - }, - Groups: map[string]*Group{ - group1ID: { - ID: group1ID, - Peers: []string{accNetResourcePeer1ID, accNetResourcePeer2ID}, - }, - }, - Networks: []*networkTypes.Network{ - { - ID: network1ID, - AccountID: accID, - Name: "network1", - }, - }, - NetworkRouters: []*routerTypes.NetworkRouter{ - { - ID: accNetResourceRouter1ID, - NetworkID: network1ID, - AccountID: accID, - Peer: accNetResourceRouter1ID, - PeerGroups: []string{}, - Masquerade: false, - Metric: 100, - Enabled: true, - }, - }, - NetworkResources: []*resourceTypes.NetworkResource{ - { - ID: accNetResource1ID, - AccountID: accID, - NetworkID: network1ID, - Address: "10.10.10.0/24", - Prefix: netip.MustParsePrefix("10.10.10.0/24"), - Type: resourceTypes.NetworkResourceType("subnet"), - Enabled: true, - }, - }, - Policies: []*Policy{ - { - ID: "policy1ID", - AccountID: accID, - Enabled: true, - Rules: []*PolicyRule{ - { - ID: "rule1ID", - Enabled: true, - Sources: []string{group1ID}, - DestinationResource: Resource{ - ID: accNetResource1ID, - Type: "Host", - }, - Protocol: PolicyRuleProtocolTCP, - Ports: []string{"80"}, - Action: PolicyTrafficActionAccept, - }, - }, - SourcePostureChecks: nil, - }, - }, - PostureChecks: []*posture.Checks{ - { - ID: accNetResourceRestrictPostureCheckID, - Name: accNetResourceRestrictPostureCheckID, - Checks: posture.ChecksDefinition{ - NBVersionCheck: &posture.NBVersionCheck{ - MinVersion: "0.35.0", - }, - }, - }, - { - ID: accNetResourceRelaxedPostureCheckID, - Name: accNetResourceRelaxedPostureCheckID, - Checks: posture.ChecksDefinition{ - NBVersionCheck: &posture.NBVersionCheck{ - MinVersion: "0.0.1", - }, - }, - }, - { - ID: accNetResourceLockedPostureCheckID, - Name: accNetResourceLockedPostureCheckID, - Checks: posture.ChecksDefinition{ - NBVersionCheck: &posture.NBVersionCheck{ - MinVersion: "7.7.7", - }, - }, - }, - { - ID: accNetResourceLinuxPostureCheckID, - Name: accNetResourceLinuxPostureCheckID, - Checks: posture.ChecksDefinition{ - OSVersionCheck: &posture.OSVersionCheck{ - Linux: &posture.MinKernelVersionCheck{ - MinKernelVersion: "0.0.0"}, - }, - }, - }, - }, - } -} - -func Test_NetworksNetMapGenWithNoPostureChecks(t *testing.T) { - account := getBasicAccountsWithResource() - - // all peers should match the policy - - // validate for peer1 - isRouter, networkResourcesRoutes, sourcePeers := account.GetNetworkResourcesRoutesToSync(context.Background(), accNetResourcePeer1ID, account.GetResourcePoliciesMap(), account.GetResourceRoutersMap()) - assert.False(t, isRouter, "expected router status") - assert.Len(t, networkResourcesRoutes, 1, "expected network resource route don't match") - assert.Len(t, sourcePeers, 0, "expected source peers don't match") - - // validate for peer2 - isRouter, networkResourcesRoutes, sourcePeers = account.GetNetworkResourcesRoutesToSync(context.Background(), accNetResourcePeer2ID, account.GetResourcePoliciesMap(), account.GetResourceRoutersMap()) - assert.False(t, isRouter, "expected router status") - assert.Len(t, networkResourcesRoutes, 1, "expected network resource route don't match") - assert.Len(t, sourcePeers, 0, "expected source peers don't match") - - // validate routes for router1 - isRouter, networkResourcesRoutes, sourcePeers = account.GetNetworkResourcesRoutesToSync(context.Background(), accNetResourceRouter1ID, account.GetResourcePoliciesMap(), account.GetResourceRoutersMap()) - assert.True(t, isRouter, "should be router") - assert.Len(t, networkResourcesRoutes, 1, "expected network resource route don't match") - assert.Len(t, sourcePeers, 2, "expected source peers don't match") - assert.NotNil(t, sourcePeers[accNetResourcePeer1ID], "expected source peers don't match") - assert.NotNil(t, sourcePeers[accNetResourcePeer2ID], "expected source peers don't match") - - // validate rules for router1 - rules := account.GetPeerNetworkResourceFirewallRules(context.Background(), account.Peers[accNetResourceRouter1ID], accNetResourceValidPeers, networkResourcesRoutes, account.GetResourcePoliciesMap()) - assert.Len(t, rules, 1, "expected rules count don't match") - assert.Equal(t, uint16(80), rules[0].Port, "should have port 80") - assert.Equal(t, "tcp", rules[0].Protocol, "should have protocol tcp") - if !slices.Contains(rules[0].SourceRanges, accNetResourcePeer1IP.String()+"/32") { - t.Errorf("%s should have source range of peer1 %s", rules[0].SourceRanges, accNetResourcePeer1IP.String()) - } - if !slices.Contains(rules[0].SourceRanges, accNetResourcePeer2IP.String()+"/32") { - t.Errorf("%s should have source range of peer2 %s", rules[0].SourceRanges, accNetResourcePeer2IP.String()) - } -} - -func Test_NetworksNetMapGenWithPostureChecks(t *testing.T) { - account := getBasicAccountsWithResource() - - // should allow peer1 to match the policy - policy := account.Policies[0] - policy.SourcePostureChecks = []string{accNetResourceRestrictPostureCheckID} - - // validate for peer1 - isRouter, networkResourcesRoutes, sourcePeers := account.GetNetworkResourcesRoutesToSync(context.Background(), accNetResourcePeer1ID, account.GetResourcePoliciesMap(), account.GetResourceRoutersMap()) - assert.False(t, isRouter, "expected router status") - assert.Len(t, networkResourcesRoutes, 1, "expected network resource route don't match") - assert.Len(t, sourcePeers, 0, "expected source peers don't match") - - // validate for peer2 - isRouter, networkResourcesRoutes, sourcePeers = account.GetNetworkResourcesRoutesToSync(context.Background(), accNetResourcePeer2ID, account.GetResourcePoliciesMap(), account.GetResourceRoutersMap()) - assert.False(t, isRouter, "expected router status") - assert.Len(t, networkResourcesRoutes, 0, "expected network resource route don't match") - assert.Len(t, sourcePeers, 0, "expected source peers don't match") - - // validate routes for router1 - isRouter, networkResourcesRoutes, sourcePeers = account.GetNetworkResourcesRoutesToSync(context.Background(), accNetResourceRouter1ID, account.GetResourcePoliciesMap(), account.GetResourceRoutersMap()) - assert.True(t, isRouter, "should be router") - assert.Len(t, networkResourcesRoutes, 1, "expected network resource route don't match") - assert.Len(t, sourcePeers, 1, "expected source peers don't match") - assert.NotNil(t, sourcePeers[accNetResourcePeer1ID], "expected source peers don't match") - - // validate rules for router1 - rules := account.GetPeerNetworkResourceFirewallRules(context.Background(), account.Peers[accNetResourceRouter1ID], accNetResourceValidPeers, networkResourcesRoutes, account.GetResourcePoliciesMap()) - assert.Len(t, rules, 1, "expected rules count don't match") - assert.Equal(t, uint16(80), rules[0].Port, "should have port 80") - assert.Equal(t, "tcp", rules[0].Protocol, "should have protocol tcp") - if !slices.Contains(rules[0].SourceRanges, accNetResourcePeer1IP.String()+"/32") { - t.Errorf("%s should have source range of peer1 %s", rules[0].SourceRanges, accNetResourcePeer1IP.String()) - } - if slices.Contains(rules[0].SourceRanges, accNetResourcePeer2IP.String()+"/32") { - t.Errorf("%s should not have source range of peer2 %s", rules[0].SourceRanges, accNetResourcePeer2IP.String()) - } -} - -func Test_NetworksNetMapGenWithNoMatchedPostureChecks(t *testing.T) { - account := getBasicAccountsWithResource() - - // should not match any peer - policy := account.Policies[0] - policy.SourcePostureChecks = []string{accNetResourceLockedPostureCheckID} - - // validate for peer1 - isRouter, networkResourcesRoutes, sourcePeers := account.GetNetworkResourcesRoutesToSync(context.Background(), accNetResourcePeer1ID, account.GetResourcePoliciesMap(), account.GetResourceRoutersMap()) - assert.False(t, isRouter, "expected router status") - assert.Len(t, networkResourcesRoutes, 0, "expected network resource route don't match") - assert.Len(t, sourcePeers, 0, "expected source peers don't match") - - // validate for peer2 - isRouter, networkResourcesRoutes, sourcePeers = account.GetNetworkResourcesRoutesToSync(context.Background(), accNetResourcePeer2ID, account.GetResourcePoliciesMap(), account.GetResourceRoutersMap()) - assert.False(t, isRouter, "expected router status") - assert.Len(t, networkResourcesRoutes, 0, "expected network resource route don't match") - assert.Len(t, sourcePeers, 0, "expected source peers don't match") - - // validate routes for router1 - isRouter, networkResourcesRoutes, sourcePeers = account.GetNetworkResourcesRoutesToSync(context.Background(), accNetResourceRouter1ID, account.GetResourcePoliciesMap(), account.GetResourceRoutersMap()) - assert.True(t, isRouter, "should be router") - assert.Len(t, networkResourcesRoutes, 1, "expected network resource route don't match") - assert.Len(t, sourcePeers, 0, "expected source peers don't match") - - // validate rules for router1 - rules := account.GetPeerNetworkResourceFirewallRules(context.Background(), account.Peers[accNetResourceRouter1ID], accNetResourceValidPeers, networkResourcesRoutes, account.GetResourcePoliciesMap()) - assert.Len(t, rules, 0, "expected rules count don't match") -} - -func Test_NetworksNetMapGenWithTwoPoliciesAndPostureChecks(t *testing.T) { - account := getBasicAccountsWithResource() - - // should allow peer1 to match the policy - policy := account.Policies[0] - policy.SourcePostureChecks = []string{accNetResourceRestrictPostureCheckID} - - // should allow peer1 and peer2 to match the policy - newPolicy := &Policy{ - ID: "policy2ID", - AccountID: accID, - Enabled: true, - Rules: []*PolicyRule{ - { - ID: "policy2ID", - Enabled: true, - Sources: []string{group1ID}, - DestinationResource: Resource{ - ID: accNetResource1ID, - Type: "Host", - }, - Protocol: PolicyRuleProtocolTCP, - Ports: []string{"22"}, - Action: PolicyTrafficActionAccept, - }, - }, - SourcePostureChecks: []string{accNetResourceRelaxedPostureCheckID}, - } - - account.Policies = append(account.Policies, newPolicy) - - // validate for peer1 - isRouter, networkResourcesRoutes, sourcePeers := account.GetNetworkResourcesRoutesToSync(context.Background(), accNetResourcePeer1ID, account.GetResourcePoliciesMap(), account.GetResourceRoutersMap()) - assert.False(t, isRouter, "expected router status") - assert.Len(t, networkResourcesRoutes, 1, "expected network resource route don't match") - assert.Len(t, sourcePeers, 0, "expected source peers don't match") - - // validate for peer2 - isRouter, networkResourcesRoutes, sourcePeers = account.GetNetworkResourcesRoutesToSync(context.Background(), accNetResourcePeer2ID, account.GetResourcePoliciesMap(), account.GetResourceRoutersMap()) - assert.False(t, isRouter, "expected router status") - assert.Len(t, networkResourcesRoutes, 1, "expected network resource route don't match") - assert.Len(t, sourcePeers, 0, "expected source peers don't match") - - // validate routes for router1 - isRouter, networkResourcesRoutes, sourcePeers = account.GetNetworkResourcesRoutesToSync(context.Background(), accNetResourceRouter1ID, account.GetResourcePoliciesMap(), account.GetResourceRoutersMap()) - assert.True(t, isRouter, "should be router") - assert.Len(t, networkResourcesRoutes, 1, "expected network resource route don't match") - assert.Len(t, sourcePeers, 2, "expected source peers don't match") - assert.NotNil(t, sourcePeers[accNetResourcePeer1ID], "expected source peers don't match") - assert.NotNil(t, sourcePeers[accNetResourcePeer2ID], "expected source peers don't match") - - // validate rules for router1 - rules := account.GetPeerNetworkResourceFirewallRules(context.Background(), account.Peers[accNetResourceRouter1ID], accNetResourceValidPeers, networkResourcesRoutes, account.GetResourcePoliciesMap()) - assert.Len(t, rules, 2, "expected rules count don't match") - assert.Equal(t, uint16(80), rules[0].Port, "should have port 80") - assert.Equal(t, "tcp", rules[0].Protocol, "should have protocol tcp") - if !slices.Contains(rules[0].SourceRanges, accNetResourcePeer1IP.String()+"/32") { - t.Errorf("%s should have source range of peer1 %s", rules[0].SourceRanges, accNetResourcePeer1IP.String()) - } - if slices.Contains(rules[0].SourceRanges, accNetResourcePeer2IP.String()+"/32") { - t.Errorf("%s should not have source range of peer2 %s", rules[0].SourceRanges, accNetResourcePeer2IP.String()) - } - - assert.Equal(t, uint16(22), rules[1].Port, "should have port 22") - assert.Equal(t, "tcp", rules[1].Protocol, "should have protocol tcp") - if !slices.Contains(rules[1].SourceRanges, accNetResourcePeer1IP.String()+"/32") { - t.Errorf("%s should have source range of peer1 %s", rules[1].SourceRanges, accNetResourcePeer1IP.String()) - } - if !slices.Contains(rules[1].SourceRanges, accNetResourcePeer2IP.String()+"/32") { - t.Errorf("%s should have source range of peer2 %s", rules[1].SourceRanges, accNetResourcePeer2IP.String()) - } -} - -func Test_NetworksNetMapGenWithTwoPostureChecks(t *testing.T) { - account := getBasicAccountsWithResource() - - // two posture checks should match only the peers that match both checks - policy := account.Policies[0] - policy.SourcePostureChecks = []string{accNetResourceRelaxedPostureCheckID, accNetResourceLinuxPostureCheckID} - - // validate for peer1 - isRouter, networkResourcesRoutes, sourcePeers := account.GetNetworkResourcesRoutesToSync(context.Background(), accNetResourcePeer1ID, account.GetResourcePoliciesMap(), account.GetResourceRoutersMap()) - assert.False(t, isRouter, "expected router status") - assert.Len(t, networkResourcesRoutes, 1, "expected network resource route don't match") - assert.Len(t, sourcePeers, 0, "expected source peers don't match") - - // validate for peer2 - isRouter, networkResourcesRoutes, sourcePeers = account.GetNetworkResourcesRoutesToSync(context.Background(), accNetResourcePeer2ID, account.GetResourcePoliciesMap(), account.GetResourceRoutersMap()) - assert.False(t, isRouter, "expected router status") - assert.Len(t, networkResourcesRoutes, 0, "expected network resource route don't match") - assert.Len(t, sourcePeers, 0, "expected source peers don't match") - - // validate routes for router1 - isRouter, networkResourcesRoutes, sourcePeers = account.GetNetworkResourcesRoutesToSync(context.Background(), accNetResourceRouter1ID, account.GetResourcePoliciesMap(), account.GetResourceRoutersMap()) - assert.True(t, isRouter, "should be router") - assert.Len(t, networkResourcesRoutes, 1, "expected network resource route don't match") - assert.Len(t, sourcePeers, 1, "expected source peers don't match") - assert.NotNil(t, sourcePeers[accNetResourcePeer1ID], "expected source peers don't match") - - // validate rules for router1 - rules := account.GetPeerNetworkResourceFirewallRules(context.Background(), account.Peers[accNetResourceRouter1ID], accNetResourceValidPeers, networkResourcesRoutes, account.GetResourcePoliciesMap()) - assert.Len(t, rules, 1, "expected rules count don't match") - assert.Equal(t, uint16(80), rules[0].Port, "should have port 80") - assert.Equal(t, "tcp", rules[0].Protocol, "should have protocol tcp") - if !slices.Contains(rules[0].SourceRanges, accNetResourcePeer1IP.String()+"/32") { - t.Errorf("%s should have source range of peer1 %s", rules[0].SourceRanges, accNetResourcePeer1IP.String()) - } - if slices.Contains(rules[0].SourceRanges, accNetResourcePeer2IP.String()+"/32") { - t.Errorf("%s should not have source range of peer2 %s", rules[0].SourceRanges, accNetResourcePeer2IP.String()) - } -} - -func Test_NetworksNetMapGenShouldExcludeOtherRouters(t *testing.T) { - account := getBasicAccountsWithResource() - - account.Peers["router2Id"] = &nbpeer.Peer{Key: "router2Key", ID: "router2Id", AccountID: accID, IP: net.IP{192, 168, 1, 4}} - account.NetworkRouters = append(account.NetworkRouters, &routerTypes.NetworkRouter{ - ID: "router2Id", - NetworkID: network1ID, - AccountID: accID, - Peer: "router2Id", - }) - - // validate routes for router1 - isRouter, networkResourcesRoutes, sourcePeers := account.GetNetworkResourcesRoutesToSync(context.Background(), accNetResourceRouter1ID, account.GetResourcePoliciesMap(), account.GetResourceRoutersMap()) - assert.True(t, isRouter, "should be router") - assert.Len(t, networkResourcesRoutes, 1, "expected network resource route don't match") - assert.Len(t, sourcePeers, 2, "expected source peers don't match") -} - func Test_ExpandPortsAndRanges_SSHRuleExpansion(t *testing.T) { tests := []struct { name string diff --git a/management/server/types/holder.go b/management/server/types/holder.go deleted file mode 100644 index de8ac8110..000000000 --- a/management/server/types/holder.go +++ /dev/null @@ -1,47 +0,0 @@ -package types - -import ( - "context" - "sync" -) - -type Holder struct { - mu sync.RWMutex - accounts map[string]*Account -} - -func NewHolder() *Holder { - return &Holder{ - accounts: make(map[string]*Account), - } -} - -func (h *Holder) GetAccount(id string) *Account { - h.mu.RLock() - defer h.mu.RUnlock() - return h.accounts[id] -} - -func (h *Holder) AddAccount(account *Account) { - h.mu.Lock() - defer h.mu.Unlock() - a := h.accounts[account.Id] - if a != nil && a.Network.CurrentSerial() >= account.Network.CurrentSerial() { - return - } - h.accounts[account.Id] = account -} - -func (h *Holder) LoadOrStoreFunc(ctx context.Context, id string, accGetter func(context.Context, string) (*Account, error)) (*Account, error) { - h.mu.Lock() - defer h.mu.Unlock() - if acc, ok := h.accounts[id]; ok { - return acc, nil - } - account, err := accGetter(ctx, id) - if err != nil { - return nil, err - } - h.accounts[id] = account - return account, nil -} diff --git a/management/server/types/networkmap.go b/management/server/types/networkmap.go deleted file mode 100644 index 68c988a93..000000000 --- a/management/server/types/networkmap.go +++ /dev/null @@ -1,67 +0,0 @@ -package types - -import ( - "context" - - nbdns "github.com/netbirdio/netbird/dns" - "github.com/netbirdio/netbird/management/internals/modules/zones" - nbpeer "github.com/netbirdio/netbird/management/server/peer" - "github.com/netbirdio/netbird/management/server/telemetry" -) - -func (a *Account) initNetworkMapBuilder(validatedPeers map[string]struct{}) { - if a.NetworkMapCache != nil { - return - } - a.nmapInitOnce.Do(func() { - a.NetworkMapCache = NewNetworkMapBuilder(a, validatedPeers) - }) -} - -func (a *Account) InitNetworkMapBuilderIfNeeded(validatedPeers map[string]struct{}) { - a.initNetworkMapBuilder(validatedPeers) -} - -func (a *Account) GetPeerNetworkMapExp( - ctx context.Context, - peerID string, - peersCustomZone nbdns.CustomZone, - accountZones []*zones.Zone, - validatedPeers map[string]struct{}, - metrics *telemetry.AccountManagerMetrics, -) *NetworkMap { - a.initNetworkMapBuilder(validatedPeers) - return a.NetworkMapCache.GetPeerNetworkMap(ctx, peerID, peersCustomZone, accountZones, validatedPeers, metrics) -} - -func (a *Account) OnPeerAddedUpdNetworkMapCache(peerId string) error { - if a.NetworkMapCache == nil { - return nil - } - return a.NetworkMapCache.OnPeerAddedIncremental(a, peerId) -} - -func (a *Account) OnPeersAddedUpdNetworkMapCache(peerIds ...string) { - if a.NetworkMapCache == nil { - return - } - a.NetworkMapCache.EnqueuePeersForIncrementalAdd(a, peerIds...) -} - -func (a *Account) OnPeerDeletedUpdNetworkMapCache(peerId string) error { - if a.NetworkMapCache == nil { - return nil - } - return a.NetworkMapCache.OnPeerDeleted(a, peerId) -} - -func (a *Account) UpdatePeerInNetworkMapCache(peer *nbpeer.Peer) { - if a.NetworkMapCache == nil { - return - } - a.NetworkMapCache.UpdatePeer(peer) -} - -func (a *Account) RecalculateNetworkMapCache(validatedPeers map[string]struct{}) { - a.initNetworkMapBuilder(validatedPeers) -} diff --git a/management/server/types/networkmap_comparison_test.go b/management/server/types/networkmap_comparison_test.go deleted file mode 100644 index c5844cca0..000000000 --- a/management/server/types/networkmap_comparison_test.go +++ /dev/null @@ -1,592 +0,0 @@ -package types - -import ( - "context" - "encoding/json" - "fmt" - "net" - "net/netip" - "os" - "path/filepath" - "sort" - "testing" - "time" - - "github.com/stretchr/testify/require" - - nbdns "github.com/netbirdio/netbird/dns" - resourceTypes "github.com/netbirdio/netbird/management/server/networks/resources/types" - routerTypes "github.com/netbirdio/netbird/management/server/networks/routers/types" - networkTypes "github.com/netbirdio/netbird/management/server/networks/types" - nbpeer "github.com/netbirdio/netbird/management/server/peer" - "github.com/netbirdio/netbird/management/server/posture" - "github.com/netbirdio/netbird/route" -) - -func TestNetworkMapComponents_CompareWithLegacy(t *testing.T) { - account := createTestAccount() - ctx := context.Background() - - peerID := testingPeerID - validatedPeersMap := make(map[string]struct{}) - for i := range numPeers { - pid := fmt.Sprintf("peer-%d", i) - if pid == offlinePeerID { - continue - } - validatedPeersMap[pid] = struct{}{} - } - - peersCustomZone := nbdns.CustomZone{} - resourcePolicies := account.GetResourcePoliciesMap() - routers := account.GetResourceRoutersMap() - groupIDToUserIDs := account.GetActiveGroupUsers() - - legacyNetworkMap := account.GetPeerNetworkMap( - ctx, - peerID, - peersCustomZone, - nil, - validatedPeersMap, - resourcePolicies, - routers, - nil, - groupIDToUserIDs, - ) - - components := account.GetPeerNetworkMapComponents( - ctx, - peerID, - peersCustomZone, - nil, - validatedPeersMap, - resourcePolicies, - routers, - groupIDToUserIDs, - ) - - if components == nil { - t.Fatal("GetPeerNetworkMapComponents returned nil") - } - - newNetworkMap := CalculateNetworkMapFromComponents(ctx, components) - - if newNetworkMap == nil { - t.Fatal("CalculateNetworkMapFromComponents returned nil") - } - - compareNetworkMaps(t, legacyNetworkMap, newNetworkMap) -} - -func TestNetworkMapComponents_GoldenFileComparison(t *testing.T) { - account := createTestAccount() - ctx := context.Background() - - peerID := testingPeerID - validatedPeersMap := make(map[string]struct{}) - for i := range numPeers { - pid := fmt.Sprintf("peer-%d", i) - if pid == offlinePeerID { - continue - } - validatedPeersMap[pid] = struct{}{} - } - - peersCustomZone := nbdns.CustomZone{} - resourcePolicies := account.GetResourcePoliciesMap() - routers := account.GetResourceRoutersMap() - groupIDToUserIDs := account.GetActiveGroupUsers() - - legacyNetworkMap := account.GetPeerNetworkMap( - ctx, - peerID, - peersCustomZone, - nil, - validatedPeersMap, - resourcePolicies, - routers, - nil, - groupIDToUserIDs, - ) - - components := account.GetPeerNetworkMapComponents( - ctx, - peerID, - peersCustomZone, - nil, - validatedPeersMap, - resourcePolicies, - routers, - groupIDToUserIDs, - ) - - require.NotNil(t, components, "GetPeerNetworkMapComponents returned nil") - - newNetworkMap := CalculateNetworkMapFromComponents(ctx, components) - require.NotNil(t, newNetworkMap, "CalculateNetworkMapFromComponents returned nil") - - normalizeAndSortNetworkMap(legacyNetworkMap) - normalizeAndSortNetworkMap(newNetworkMap) - - componentsJSON, err := json.MarshalIndent(components, "", " ") - require.NoError(t, err, "error marshaling components to JSON") - - legacyJSON, err := json.MarshalIndent(legacyNetworkMap, "", " ") - require.NoError(t, err, "error marshaling legacy network map to JSON") - - newJSON, err := json.MarshalIndent(newNetworkMap, "", " ") - require.NoError(t, err, "error marshaling new network map to JSON") - - goldenDir := filepath.Join("testdata", "comparison") - err = os.MkdirAll(goldenDir, 0755) - require.NoError(t, err) - - legacyGoldenPath := filepath.Join(goldenDir, "legacy_networkmap.json") - err = os.WriteFile(legacyGoldenPath, legacyJSON, 0644) - require.NoError(t, err, "error writing legacy golden file") - - newGoldenPath := filepath.Join(goldenDir, "components_networkmap.json") - err = os.WriteFile(newGoldenPath, newJSON, 0644) - require.NoError(t, err, "error writing components golden file") - - componentsPath := filepath.Join(goldenDir, "components.json") - err = os.WriteFile(componentsPath, componentsJSON, 0644) - require.NoError(t, err, "error writing components golden file") - - require.JSONEq(t, string(legacyJSON), string(newJSON), - "NetworkMaps from legacy and components approaches do not match.\n"+ - "Legacy JSON saved to: %s\n"+ - "Components JSON saved to: %s", - legacyGoldenPath, newGoldenPath) - - t.Logf("✅ NetworkMaps are identical") - t.Logf(" Legacy NetworkMap: %s", legacyGoldenPath) - t.Logf(" Components NetworkMap: %s", newGoldenPath) -} - -func normalizeAndSortNetworkMap(nm *NetworkMap) { - if nm == nil { - return - } - - sort.Slice(nm.Peers, func(i, j int) bool { - return nm.Peers[i].ID < nm.Peers[j].ID - }) - - sort.Slice(nm.OfflinePeers, func(i, j int) bool { - return nm.OfflinePeers[i].ID < nm.OfflinePeers[j].ID - }) - - sort.Slice(nm.Routes, func(i, j int) bool { - return string(nm.Routes[i].ID) < string(nm.Routes[j].ID) - }) - - sort.Slice(nm.FirewallRules, func(i, j int) bool { - if nm.FirewallRules[i].PeerIP != nm.FirewallRules[j].PeerIP { - return nm.FirewallRules[i].PeerIP < nm.FirewallRules[j].PeerIP - } - if nm.FirewallRules[i].Direction != nm.FirewallRules[j].Direction { - return nm.FirewallRules[i].Direction < nm.FirewallRules[j].Direction - } - if nm.FirewallRules[i].Protocol != nm.FirewallRules[j].Protocol { - return nm.FirewallRules[i].Protocol < nm.FirewallRules[j].Protocol - } - if nm.FirewallRules[i].Port != nm.FirewallRules[j].Port { - return nm.FirewallRules[i].Port < nm.FirewallRules[j].Port - } - return nm.FirewallRules[i].PolicyID < nm.FirewallRules[j].PolicyID - }) - - for i := range nm.RoutesFirewallRules { - sort.Strings(nm.RoutesFirewallRules[i].SourceRanges) - } - - sort.Slice(nm.RoutesFirewallRules, func(i, j int) bool { - if nm.RoutesFirewallRules[i].Destination != nm.RoutesFirewallRules[j].Destination { - return nm.RoutesFirewallRules[i].Destination < nm.RoutesFirewallRules[j].Destination - } - - minLen := len(nm.RoutesFirewallRules[i].SourceRanges) - if len(nm.RoutesFirewallRules[j].SourceRanges) < minLen { - minLen = len(nm.RoutesFirewallRules[j].SourceRanges) - } - for k := 0; k < minLen; k++ { - if nm.RoutesFirewallRules[i].SourceRanges[k] != nm.RoutesFirewallRules[j].SourceRanges[k] { - return nm.RoutesFirewallRules[i].SourceRanges[k] < nm.RoutesFirewallRules[j].SourceRanges[k] - } - } - if len(nm.RoutesFirewallRules[i].SourceRanges) != len(nm.RoutesFirewallRules[j].SourceRanges) { - return len(nm.RoutesFirewallRules[i].SourceRanges) < len(nm.RoutesFirewallRules[j].SourceRanges) - } - - if string(nm.RoutesFirewallRules[i].RouteID) != string(nm.RoutesFirewallRules[j].RouteID) { - return string(nm.RoutesFirewallRules[i].RouteID) < string(nm.RoutesFirewallRules[j].RouteID) - } - - if nm.RoutesFirewallRules[i].PolicyID != nm.RoutesFirewallRules[j].PolicyID { - return nm.RoutesFirewallRules[i].PolicyID < nm.RoutesFirewallRules[j].PolicyID - } - - if nm.RoutesFirewallRules[i].Port != nm.RoutesFirewallRules[j].Port { - return nm.RoutesFirewallRules[i].Port < nm.RoutesFirewallRules[j].Port - } - - return nm.RoutesFirewallRules[i].Protocol < nm.RoutesFirewallRules[j].Protocol - }) - - if nm.DNSConfig.CustomZones != nil { - for i := range nm.DNSConfig.CustomZones { - sort.Slice(nm.DNSConfig.CustomZones[i].Records, func(a, b int) bool { - return nm.DNSConfig.CustomZones[i].Records[a].Name < nm.DNSConfig.CustomZones[i].Records[b].Name - }) - } - } - - if len(nm.DNSConfig.NameServerGroups) != 0 { - sort.Slice(nm.DNSConfig.NameServerGroups, func(a, b int) bool { - return nm.DNSConfig.NameServerGroups[a].Name < nm.DNSConfig.NameServerGroups[b].Name - }) - } -} - -func compareNetworkMaps(t *testing.T, legacy, current *NetworkMap) { - t.Helper() - - if legacy.Network.Serial != current.Network.Serial { - t.Errorf("Network Serial mismatch: legacy=%d, current=%d", legacy.Network.Serial, current.Network.Serial) - } - - if len(legacy.Peers) != len(current.Peers) { - t.Errorf("Peers count mismatch: legacy=%d, current=%d", len(legacy.Peers), len(current.Peers)) - } - - legacyPeerIDs := make(map[string]bool) - for _, p := range legacy.Peers { - legacyPeerIDs[p.ID] = true - } - - for _, p := range current.Peers { - if !legacyPeerIDs[p.ID] { - t.Errorf("Current NetworkMap contains peer %s not in legacy", p.ID) - } - } - - if len(legacy.OfflinePeers) != len(current.OfflinePeers) { - t.Errorf("OfflinePeers count mismatch: legacy=%d, current=%d", len(legacy.OfflinePeers), len(current.OfflinePeers)) - } - - if len(legacy.FirewallRules) != len(current.FirewallRules) { - t.Logf("FirewallRules count mismatch: legacy=%d, current=%d", len(legacy.FirewallRules), len(current.FirewallRules)) - } - - if len(legacy.Routes) != len(current.Routes) { - t.Logf("Routes count mismatch: legacy=%d, current=%d", len(legacy.Routes), len(current.Routes)) - } - - if len(legacy.RoutesFirewallRules) != len(current.RoutesFirewallRules) { - t.Logf("RoutesFirewallRules count mismatch: legacy=%d, current=%d", len(legacy.RoutesFirewallRules), len(current.RoutesFirewallRules)) - } - - if legacy.DNSConfig.ServiceEnable != current.DNSConfig.ServiceEnable { - t.Errorf("DNSConfig.ServiceEnable mismatch: legacy=%v, current=%v", legacy.DNSConfig.ServiceEnable, current.DNSConfig.ServiceEnable) - } -} - -const ( - numPeers = 100 - devGroupID = "group-dev" - opsGroupID = "group-ops" - allGroupID = "group-all" - routeID = route.ID("route-main") - routeHA1ID = route.ID("route-ha-1") - routeHA2ID = route.ID("route-ha-2") - policyIDDevOps = "policy-dev-ops" - policyIDAll = "policy-all" - policyIDPosture = "policy-posture" - policyIDDrop = "policy-drop" - postureCheckID = "posture-check-ver" - networkResourceID = "res-database" - networkID = "net-database" - networkRouterID = "router-database" - nameserverGroupID = "ns-group-main" - testingPeerID = "peer-60" - expiredPeerID = "peer-98" - offlinePeerID = "peer-99" - routingPeerID = "peer-95" - testAccountID = "account-comparison-test" -) - -func createTestAccount() *Account { - peers := make(map[string]*nbpeer.Peer) - devGroupPeers, opsGroupPeers, allGroupPeers := []string{}, []string{}, []string{} - - for i := range numPeers { - peerID := fmt.Sprintf("peer-%d", i) - ip := net.IP{100, 64, 0, byte(i + 1)} - wtVersion := "0.25.0" - if i%2 == 0 { - wtVersion = "0.40.0" - } - - p := &nbpeer.Peer{ - ID: peerID, IP: ip, Key: fmt.Sprintf("key-%s", peerID), DNSLabel: fmt.Sprintf("peer%d", i+1), - Status: &nbpeer.PeerStatus{Connected: true, LastSeen: time.Now()}, - UserID: "user-admin", Meta: nbpeer.PeerSystemMeta{WtVersion: wtVersion, GoOS: "linux"}, - } - - if peerID == expiredPeerID { - p.LoginExpirationEnabled = true - pastTimestamp := time.Now().Add(-2 * time.Hour) - p.LastLogin = &pastTimestamp - } - - peers[peerID] = p - allGroupPeers = append(allGroupPeers, peerID) - if i < numPeers/2 { - devGroupPeers = append(devGroupPeers, peerID) - } else { - opsGroupPeers = append(opsGroupPeers, peerID) - } - } - - groups := map[string]*Group{ - allGroupID: {ID: allGroupID, Name: "All", Peers: allGroupPeers}, - devGroupID: {ID: devGroupID, Name: "Developers", Peers: devGroupPeers}, - opsGroupID: {ID: opsGroupID, Name: "Operations", Peers: opsGroupPeers}, - } - - policies := []*Policy{ - { - ID: policyIDAll, Name: "Default-Allow", Enabled: true, - Rules: []*PolicyRule{{ - ID: policyIDAll, Name: "Allow All", Enabled: true, Action: PolicyTrafficActionAccept, - Protocol: PolicyRuleProtocolALL, Bidirectional: true, - Sources: []string{allGroupID}, Destinations: []string{allGroupID}, - }}, - }, - { - ID: policyIDDevOps, Name: "Dev to Ops Web Access", Enabled: true, - Rules: []*PolicyRule{{ - ID: policyIDDevOps, Name: "Dev -> Ops (HTTP Range)", Enabled: true, Action: PolicyTrafficActionAccept, - Protocol: PolicyRuleProtocolTCP, Bidirectional: false, - PortRanges: []RulePortRange{{Start: 8080, End: 8090}}, - Sources: []string{devGroupID}, Destinations: []string{opsGroupID}, - }}, - }, - { - ID: policyIDDrop, Name: "Drop DB traffic", Enabled: true, - Rules: []*PolicyRule{{ - ID: policyIDDrop, Name: "Drop DB", Enabled: true, Action: PolicyTrafficActionDrop, - Protocol: PolicyRuleProtocolTCP, Ports: []string{"5432"}, Bidirectional: true, - Sources: []string{devGroupID}, Destinations: []string{opsGroupID}, - }}, - }, - { - ID: policyIDPosture, Name: "Posture Check for DB Resource", Enabled: true, - SourcePostureChecks: []string{postureCheckID}, - Rules: []*PolicyRule{{ - ID: policyIDPosture, Name: "Allow DB Access", Enabled: true, Action: PolicyTrafficActionAccept, - Protocol: PolicyRuleProtocolALL, Bidirectional: true, - Sources: []string{opsGroupID}, DestinationResource: Resource{ID: networkResourceID}, - }}, - }, - } - - routes := map[route.ID]*route.Route{ - routeID: { - ID: routeID, Network: netip.MustParsePrefix("192.168.10.0/24"), - Peer: peers["peer-75"].Key, - PeerID: "peer-75", - Description: "Route to internal resource", Enabled: true, - PeerGroups: []string{devGroupID, opsGroupID}, - Groups: []string{devGroupID, opsGroupID}, - AccessControlGroups: []string{devGroupID}, - }, - routeHA1ID: { - ID: routeHA1ID, Network: netip.MustParsePrefix("10.10.0.0/16"), - Peer: peers["peer-80"].Key, - PeerID: "peer-80", - Description: "HA Route 1", Enabled: true, Metric: 1000, - PeerGroups: []string{allGroupID}, - Groups: []string{allGroupID}, - AccessControlGroups: []string{allGroupID}, - }, - routeHA2ID: { - ID: routeHA2ID, Network: netip.MustParsePrefix("10.10.0.0/16"), - Peer: peers["peer-90"].Key, - PeerID: "peer-90", - Description: "HA Route 2", Enabled: true, Metric: 900, - PeerGroups: []string{devGroupID, opsGroupID}, - Groups: []string{devGroupID, opsGroupID}, - AccessControlGroups: []string{allGroupID}, - }, - } - - account := &Account{ - Id: testAccountID, Peers: peers, Groups: groups, Policies: policies, Routes: routes, - Network: &Network{ - Identifier: "net-comparison-test", Net: net.IPNet{IP: net.IP{100, 64, 0, 0}, Mask: net.CIDRMask(16, 32)}, Serial: 1, - }, - DNSSettings: DNSSettings{DisabledManagementGroups: []string{opsGroupID}}, - NameServerGroups: map[string]*nbdns.NameServerGroup{ - nameserverGroupID: { - ID: nameserverGroupID, Name: "Main NS", Enabled: true, Groups: []string{devGroupID}, - NameServers: []nbdns.NameServer{{IP: netip.MustParseAddr("8.8.8.8"), NSType: nbdns.UDPNameServerType, Port: 53}}, - }, - }, - PostureChecks: []*posture.Checks{ - {ID: postureCheckID, Name: "Check version", Checks: posture.ChecksDefinition{ - NBVersionCheck: &posture.NBVersionCheck{MinVersion: "0.26.0"}, - }}, - }, - NetworkResources: []*resourceTypes.NetworkResource{ - {ID: networkResourceID, NetworkID: networkID, AccountID: testAccountID, Enabled: true, Address: "db.netbird.cloud"}, - }, - Networks: []*networkTypes.Network{{ID: networkID, Name: "DB Network", AccountID: testAccountID}}, - NetworkRouters: []*routerTypes.NetworkRouter{ - {ID: networkRouterID, NetworkID: networkID, Peer: routingPeerID, Enabled: true, AccountID: testAccountID}, - }, - Settings: &Settings{PeerLoginExpirationEnabled: true, PeerLoginExpiration: 1 * time.Hour}, - } - - for _, p := range account.Policies { - p.AccountID = account.Id - } - for _, r := range account.Routes { - r.AccountID = account.Id - } - - return account -} - -func BenchmarkLegacyNetworkMap(b *testing.B) { - account := createTestAccount() - ctx := context.Background() - peerID := testingPeerID - validatedPeersMap := make(map[string]struct{}) - for i := range numPeers { - pid := fmt.Sprintf("peer-%d", i) - if pid != offlinePeerID { - validatedPeersMap[pid] = struct{}{} - } - } - - peersCustomZone := nbdns.CustomZone{} - resourcePolicies := account.GetResourcePoliciesMap() - routers := account.GetResourceRoutersMap() - groupIDToUserIDs := account.GetActiveGroupUsers() - - b.ResetTimer() - for i := 0; i < b.N; i++ { - _ = account.GetPeerNetworkMap( - ctx, - peerID, - peersCustomZone, - nil, - validatedPeersMap, - resourcePolicies, - routers, - nil, - groupIDToUserIDs, - ) - } -} - -func BenchmarkComponentsNetworkMap(b *testing.B) { - account := createTestAccount() - ctx := context.Background() - peerID := testingPeerID - validatedPeersMap := make(map[string]struct{}) - for i := range numPeers { - pid := fmt.Sprintf("peer-%d", i) - if pid != offlinePeerID { - validatedPeersMap[pid] = struct{}{} - } - } - - peersCustomZone := nbdns.CustomZone{} - resourcePolicies := account.GetResourcePoliciesMap() - routers := account.GetResourceRoutersMap() - groupIDToUserIDs := account.GetActiveGroupUsers() - - b.ResetTimer() - for i := 0; i < b.N; i++ { - components := account.GetPeerNetworkMapComponents( - ctx, - peerID, - peersCustomZone, - nil, - validatedPeersMap, - resourcePolicies, - routers, - groupIDToUserIDs, - ) - _ = CalculateNetworkMapFromComponents(ctx, components) - } -} - -func BenchmarkComponentsCreation(b *testing.B) { - account := createTestAccount() - ctx := context.Background() - peerID := testingPeerID - validatedPeersMap := make(map[string]struct{}) - for i := range numPeers { - pid := fmt.Sprintf("peer-%d", i) - if pid != offlinePeerID { - validatedPeersMap[pid] = struct{}{} - } - } - - peersCustomZone := nbdns.CustomZone{} - resourcePolicies := account.GetResourcePoliciesMap() - routers := account.GetResourceRoutersMap() - groupIDToUserIDs := account.GetActiveGroupUsers() - - b.ResetTimer() - for i := 0; i < b.N; i++ { - _ = account.GetPeerNetworkMapComponents( - ctx, - peerID, - peersCustomZone, - nil, - validatedPeersMap, - resourcePolicies, - routers, - groupIDToUserIDs, - ) - } -} - -func BenchmarkCalculationFromComponents(b *testing.B) { - account := createTestAccount() - ctx := context.Background() - peerID := testingPeerID - validatedPeersMap := make(map[string]struct{}) - for i := range numPeers { - pid := fmt.Sprintf("peer-%d", i) - if pid != offlinePeerID { - validatedPeersMap[pid] = struct{}{} - } - } - - peersCustomZone := nbdns.CustomZone{} - resourcePolicies := account.GetResourcePoliciesMap() - routers := account.GetResourceRoutersMap() - groupIDToUserIDs := account.GetActiveGroupUsers() - - components := account.GetPeerNetworkMapComponents( - ctx, - peerID, - peersCustomZone, - nil, - validatedPeersMap, - resourcePolicies, - routers, - groupIDToUserIDs, - ) - - b.ResetTimer() - for i := 0; i < b.N; i++ { - _ = CalculateNetworkMapFromComponents(ctx, components) - } -} diff --git a/management/server/types/networkmap_components.go b/management/server/types/networkmap_components.go index 23d84a994..6f84c8d30 100644 --- a/management/server/types/networkmap_components.go +++ b/management/server/types/networkmap_components.go @@ -19,8 +19,6 @@ import ( "github.com/netbirdio/netbird/shared/management/domain" ) -const EnvNewNetworkMapCompacted = "NB_NETWORK_MAP_COMPACTED" - type NetworkMapComponents struct { PeerID string diff --git a/management/server/types/networkmap_components_test.go b/management/server/types/networkmap_components_test.go new file mode 100644 index 000000000..dde639ccb --- /dev/null +++ b/management/server/types/networkmap_components_test.go @@ -0,0 +1,787 @@ +package types_test + +import ( + "context" + "net" + "net/netip" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + nbdns "github.com/netbirdio/netbird/dns" + resourceTypes "github.com/netbirdio/netbird/management/server/networks/resources/types" + routerTypes "github.com/netbirdio/netbird/management/server/networks/routers/types" + networkTypes "github.com/netbirdio/netbird/management/server/networks/types" + nbpeer "github.com/netbirdio/netbird/management/server/peer" + "github.com/netbirdio/netbird/management/server/posture" + "github.com/netbirdio/netbird/management/server/types" + "github.com/netbirdio/netbird/route" +) + +func networkMapFromComponents(t *testing.T, account *types.Account, peerID string, validatedPeers map[string]struct{}) *types.NetworkMap { + t.Helper() + return account.GetPeerNetworkMapFromComponents( + context.Background(), + peerID, + account.GetPeersCustomZone(context.Background(), "netbird.io"), + nil, + validatedPeers, + account.GetResourcePoliciesMap(), + account.GetResourceRoutersMap(), + nil, + account.GetActiveGroupUsers(), + ) +} + +func allPeersValidated(account *types.Account, excludePeerIDs ...string) map[string]struct{} { + excludeSet := make(map[string]struct{}, len(excludePeerIDs)) + for _, id := range excludePeerIDs { + excludeSet[id] = struct{}{} + } + validated := make(map[string]struct{}, len(account.Peers)) + for id := range account.Peers { + if _, excluded := excludeSet[id]; !excluded { + validated[id] = struct{}{} + } + } + return validated +} + +func peerIDs(peers []*nbpeer.Peer) []string { + ids := make([]string, len(peers)) + for i, p := range peers { + ids[i] = p.ID + } + return ids +} + +func TestNetworkMapComponents_RegularPeerConnectivity(t *testing.T) { + account := createComponentTestAccount() + validated := allPeersValidated(account) + + nm := networkMapFromComponents(t, account, "peer-src-1", validated) + + assert.NotNil(t, nm) + assert.Contains(t, peerIDs(nm.Peers), "peer-dst-1", "should see peer from destination group via bidirectional policy") + assert.Contains(t, peerIDs(nm.Peers), "peer-router-1", "should see router peer via resource policy") + assert.NotContains(t, peerIDs(nm.Peers), "peer-src-1", "should not see itself") + assert.Empty(t, nm.OfflinePeers, "no expired peers expected") +} + +func TestNetworkMapComponents_IntraGroupConnectivity(t *testing.T) { + account := createComponentTestAccount() + account.Policies = append(account.Policies, &types.Policy{ + ID: "policy-intra-src", Name: "Intra-source connectivity", Enabled: true, AccountID: account.Id, + Rules: []*types.PolicyRule{{ + ID: "rule-intra-src", Name: "src <-> src", Enabled: true, + Action: types.PolicyTrafficActionAccept, Protocol: types.PolicyRuleProtocolALL, + Bidirectional: true, + Sources: []string{"group-src"}, Destinations: []string{"group-src"}, + }}, + }) + validated := allPeersValidated(account) + + nm := networkMapFromComponents(t, account, "peer-src-1", validated) + assert.Contains(t, peerIDs(nm.Peers), "peer-src-2", "should see peer from same group with intra-group policy") +} + +func TestNetworkMapComponents_FirewallRules(t *testing.T) { + account := createComponentTestAccount() + validated := allPeersValidated(account) + + nm := networkMapFromComponents(t, account, "peer-src-1", validated) + + require.NotEmpty(t, nm.FirewallRules, "firewall rules should be generated") + + var hasAcceptAll bool + for _, rule := range nm.FirewallRules { + if rule.Protocol == string(types.PolicyRuleProtocolALL) && rule.Action == string(types.PolicyTrafficActionAccept) { + hasAcceptAll = true + } + } + assert.True(t, hasAcceptAll, "should have an accept-all firewall rule from the base policy") +} + +func TestNetworkMapComponents_LoginExpiration(t *testing.T) { + account := createComponentTestAccount() + account.Settings.PeerLoginExpirationEnabled = true + account.Settings.PeerLoginExpiration = 1 * time.Hour + + expiredTime := time.Now().Add(-2 * time.Hour) + account.Peers["peer-dst-1"].LoginExpirationEnabled = true + account.Peers["peer-dst-1"].LastLogin = &expiredTime + + validated := allPeersValidated(account) + nm := networkMapFromComponents(t, account, "peer-src-1", validated) + + assert.Contains(t, peerIDs(nm.OfflinePeers), "peer-dst-1", "expired peer should be in OfflinePeers") + assert.NotContains(t, peerIDs(nm.Peers), "peer-dst-1", "expired peer should NOT be in active Peers") +} + +func TestNetworkMapComponents_InvalidatedPeerExcluded(t *testing.T) { + account := createComponentTestAccount() + validated := allPeersValidated(account, "peer-dst-1") + + nm := networkMapFromComponents(t, account, "peer-src-1", validated) + + assert.NotContains(t, peerIDs(nm.Peers), "peer-dst-1", "non-validated peer should be excluded") + assert.NotContains(t, peerIDs(nm.OfflinePeers), "peer-dst-1", "non-validated peer should not be in offline peers either") +} + +func TestNetworkMapComponents_NonValidatedTargetPeer(t *testing.T) { + account := createComponentTestAccount() + validated := allPeersValidated(account, "peer-src-1") + + nm := networkMapFromComponents(t, account, "peer-src-1", validated) + + assert.Empty(t, nm.Peers, "non-validated target peer should get empty network map") + assert.Empty(t, nm.FirewallRules) +} + +func TestNetworkMapComponents_NetworkResourceRoutes_SourcePeer(t *testing.T) { + account := createComponentTestAccount() + validated := allPeersValidated(account) + + nm := networkMapFromComponents(t, account, "peer-src-1", validated) + + var hasResourceRoute bool + for _, r := range nm.Routes { + if r.Network.String() == "10.200.0.1/32" { + hasResourceRoute = true + break + } + } + assert.True(t, hasResourceRoute, "source peer should receive route to network resource via router") + assert.Contains(t, peerIDs(nm.Peers), "peer-router-1", "source peer should see the routing peer") +} + +func TestNetworkMapComponents_NetworkResourceRoutes_RouterPeer(t *testing.T) { + account := createComponentTestAccount() + validated := allPeersValidated(account) + + nm := networkMapFromComponents(t, account, "peer-router-1", validated) + + var hasResourceRoute bool + for _, r := range nm.Routes { + if r.Network.String() == "10.200.0.1/32" { + hasResourceRoute = true + break + } + } + assert.True(t, hasResourceRoute, "router peer should receive network resource route") + assert.NotEmpty(t, nm.RoutesFirewallRules, "router peer should have route firewall rules for the resource") +} + +func TestNetworkMapComponents_NetworkResourceRoutes_UnrelatedPeer(t *testing.T) { + account := createComponentTestAccount() + validated := allPeersValidated(account) + + nm := networkMapFromComponents(t, account, "peer-dst-1", validated) + + for _, r := range nm.Routes { + assert.NotEqual(t, "10.200.0.1/32", r.Network.String(), "unrelated peer should not receive network resource route") + } +} + +func TestNetworkMapComponents_NetworkResource_WithPostureCheck(t *testing.T) { + account := createComponentTestAccount() + validated := allPeersValidated(account) + + account.PostureChecks = []*posture.Checks{ + {ID: "pc-version", Name: "Version check", Checks: posture.ChecksDefinition{ + NBVersionCheck: &posture.NBVersionCheck{MinVersion: "0.30.0"}, + }}, + } + account.Policies = append(account.Policies, &types.Policy{ + ID: "policy-posture-resource", Name: "Posture resource access", Enabled: true, AccountID: account.Id, + SourcePostureChecks: []string{"pc-version"}, + Rules: []*types.PolicyRule{{ + ID: "rule-posture-resource", Name: "Posture -> Resource", Enabled: true, + Action: types.PolicyTrafficActionAccept, Protocol: types.PolicyRuleProtocolALL, + Sources: []string{"group-src"}, + DestinationResource: types.Resource{ID: "resource-guarded"}, + }}, + }) + + account.NetworkResources = append(account.NetworkResources, &resourceTypes.NetworkResource{ + ID: "resource-guarded", NetworkID: "net-guarded", AccountID: account.Id, Enabled: true, + Type: resourceTypes.Host, Prefix: netip.MustParsePrefix("10.200.1.1/32"), Address: "10.200.1.1/32", + }) + account.Networks = append(account.Networks, &networkTypes.Network{ + ID: "net-guarded", Name: "Guarded Net", AccountID: account.Id, + }) + account.NetworkRouters = append(account.NetworkRouters, &routerTypes.NetworkRouter{ + ID: "router-guarded", NetworkID: "net-guarded", Peer: "peer-router-1", Enabled: true, AccountID: account.Id, + }) + + t.Run("peer passes posture check", func(t *testing.T) { + account.Peers["peer-src-1"].Meta.WtVersion = "0.35.0" + nm := networkMapFromComponents(t, account, "peer-src-1", validated) + + var hasGuardedRoute bool + for _, r := range nm.Routes { + if r.Network.String() == "10.200.1.1/32" { + hasGuardedRoute = true + } + } + assert.True(t, hasGuardedRoute, "peer passing posture check should get guarded resource route") + }) + + t.Run("peer fails posture check", func(t *testing.T) { + account.Peers["peer-src-1"].Meta.WtVersion = "0.20.0" + nm := networkMapFromComponents(t, account, "peer-src-1", validated) + + for _, r := range nm.Routes { + assert.NotEqual(t, "10.200.1.1/32", r.Network.String(), "peer failing posture check should NOT get guarded resource route") + } + }) +} + +func TestNetworkMapComponents_NetworkResource_MultiplePostureChecks(t *testing.T) { + account := createComponentTestAccount() + validated := allPeersValidated(account) + + account.PostureChecks = []*posture.Checks{ + {ID: "pc-version", Name: "Version", Checks: posture.ChecksDefinition{ + NBVersionCheck: &posture.NBVersionCheck{MinVersion: "0.30.0"}, + }}, + {ID: "pc-os", Name: "OS check", Checks: posture.ChecksDefinition{ + OSVersionCheck: &posture.OSVersionCheck{Linux: &posture.MinKernelVersionCheck{MinKernelVersion: "5.0"}}, + }}, + } + + account.Policies = append(account.Policies, &types.Policy{ + ID: "policy-multi-posture", Name: "Multi posture", Enabled: true, AccountID: account.Id, + SourcePostureChecks: []string{"pc-version", "pc-os"}, + Rules: []*types.PolicyRule{{ + ID: "rule-multi-posture", Name: "Multi posture rule", Enabled: true, + Action: types.PolicyTrafficActionAccept, Protocol: types.PolicyRuleProtocolALL, + Sources: []string{"group-src"}, + DestinationResource: types.Resource{ID: "resource-strict"}, + }}, + }) + + account.NetworkResources = append(account.NetworkResources, &resourceTypes.NetworkResource{ + ID: "resource-strict", NetworkID: "net-strict", AccountID: account.Id, Enabled: true, + Type: resourceTypes.Host, Prefix: netip.MustParsePrefix("10.200.2.1/32"), Address: "10.200.2.1/32", + }) + account.Networks = append(account.Networks, &networkTypes.Network{ + ID: "net-strict", Name: "Strict Net", AccountID: account.Id, + }) + account.NetworkRouters = append(account.NetworkRouters, &routerTypes.NetworkRouter{ + ID: "router-strict", NetworkID: "net-strict", Peer: "peer-router-1", Enabled: true, AccountID: account.Id, + }) + + t.Run("passes both posture checks", func(t *testing.T) { + account.Peers["peer-src-1"].Meta.WtVersion = "0.35.0" + account.Peers["peer-src-1"].Meta.GoOS = "linux" + account.Peers["peer-src-1"].Meta.KernelVersion = "6.1.0" + nm := networkMapFromComponents(t, account, "peer-src-1", validated) + + var found bool + for _, r := range nm.Routes { + if r.Network.String() == "10.200.2.1/32" { + found = true + } + } + assert.True(t, found, "peer passing both checks should get resource route") + }) + + t.Run("fails version posture check", func(t *testing.T) { + account.Peers["peer-src-1"].Meta.WtVersion = "0.20.0" + account.Peers["peer-src-1"].Meta.KernelVersion = "6.1.0" + nm := networkMapFromComponents(t, account, "peer-src-1", validated) + + for _, r := range nm.Routes { + assert.NotEqual(t, "10.200.2.1/32", r.Network.String(), "peer failing version check should NOT get resource route") + } + }) + + t.Run("fails OS posture check", func(t *testing.T) { + account.Peers["peer-src-1"].Meta.WtVersion = "0.35.0" + account.Peers["peer-src-1"].Meta.KernelVersion = "4.0.0" + nm := networkMapFromComponents(t, account, "peer-src-1", validated) + + for _, r := range nm.Routes { + assert.NotEqual(t, "10.200.2.1/32", r.Network.String(), "peer failing OS check should NOT get resource route") + } + }) +} + +func TestNetworkMapComponents_RouterPeerFirewallRules(t *testing.T) { + account := createComponentTestAccount() + validated := allPeersValidated(account) + + nm := networkMapFromComponents(t, account, "peer-router-1", validated) + + var resourceFWRules []*types.RouteFirewallRule + for _, rule := range nm.RoutesFirewallRules { + if rule.Destination == "10.200.0.1/32" { + resourceFWRules = append(resourceFWRules, rule) + } + } + assert.NotEmpty(t, resourceFWRules, "router should have firewall rules for the network resource") + + var hasSourcePeerIP bool + for _, rule := range resourceFWRules { + for _, sr := range rule.SourceRanges { + if sr == account.Peers["peer-src-1"].IP.String()+"/32" || sr == account.Peers["peer-src-2"].IP.String()+"/32" { + hasSourcePeerIP = true + } + } + } + assert.True(t, hasSourcePeerIP, "resource firewall rules should include source peer IPs") +} + +func TestNetworkMapComponents_DNSManagement(t *testing.T) { + account := createComponentTestAccount() + validated := allPeersValidated(account) + + t.Run("peer in DNS-enabled group", func(t *testing.T) { + nm := networkMapFromComponents(t, account, "peer-src-1", validated) + assert.True(t, nm.DNSConfig.ServiceEnable, "peer in non-disabled group should have DNS enabled") + }) + + t.Run("peer in DNS-disabled group", func(t *testing.T) { + nm := networkMapFromComponents(t, account, "peer-dst-1", validated) + assert.False(t, nm.DNSConfig.ServiceEnable, "peer in DNS-disabled group should have DNS disabled") + }) +} + +func TestNetworkMapComponents_NameServerGroups(t *testing.T) { + account := createComponentTestAccount() + validated := allPeersValidated(account) + + nm := networkMapFromComponents(t, account, "peer-src-1", validated) + assert.True(t, nm.DNSConfig.ServiceEnable) + + var hasNSGroup bool + for _, ns := range nm.DNSConfig.NameServerGroups { + if ns.ID == "ns-main" { + hasNSGroup = true + } + } + assert.True(t, hasNSGroup, "peer in NS group should receive nameserver configuration") +} + +func TestNetworkMapComponents_RoutesWithHADeduplication(t *testing.T) { + account := createComponentTestAccount() + validated := allPeersValidated(account) + + account.Routes["route-ha-1"] = &route.Route{ + ID: "route-ha-1", Network: netip.MustParsePrefix("172.16.0.0/16"), + Peer: account.Peers["peer-dst-1"].Key, PeerID: "peer-dst-1", + Enabled: true, Metric: 100, AccountID: account.Id, + Groups: []string{"group-src", "group-dst"}, PeerGroups: []string{"group-dst"}, + } + account.Routes["route-ha-2"] = &route.Route{ + ID: "route-ha-2", Network: netip.MustParsePrefix("172.16.0.0/16"), + Peer: account.Peers["peer-src-1"].Key, PeerID: "peer-src-1", + Enabled: true, Metric: 200, AccountID: account.Id, + Groups: []string{"group-src", "group-dst"}, PeerGroups: []string{"group-src"}, + } + + nm := networkMapFromComponents(t, account, "peer-src-1", validated) + + haCount := 0 + for _, r := range nm.Routes { + if r.Network.String() == "172.16.0.0/16" { + haCount++ + } + } + assert.Equal(t, 1, haCount, "peer should only receive one route from HA group (not both, since it's a member of one)") +} + +func TestNetworkMapComponents_RoutesFirewallRulesForAccessControl(t *testing.T) { + account := createComponentTestAccount() + validated := allPeersValidated(account) + + account.Routes["route-acl"] = &route.Route{ + ID: "route-acl", Network: netip.MustParsePrefix("192.168.100.0/24"), + Peer: account.Peers["peer-src-1"].Key, PeerID: "peer-src-1", + Enabled: true, Metric: 100, AccountID: account.Id, + Groups: []string{"group-dst"}, + PeerGroups: []string{"group-src"}, + AccessControlGroups: []string{"group-dst"}, + } + + nm := networkMapFromComponents(t, account, "peer-src-1", validated) + + var hasFWRule bool + for _, rule := range nm.RoutesFirewallRules { + if rule.Destination == "192.168.100.0/24" { + hasFWRule = true + } + } + assert.True(t, hasFWRule, "routing peer should have firewall rules for route with access control groups") +} + +func TestNetworkMapComponents_RoutesDefaultPermit(t *testing.T) { + account := createComponentTestAccount() + validated := allPeersValidated(account) + + account.Routes["route-open"] = &route.Route{ + ID: "route-open", Network: netip.MustParsePrefix("10.99.0.0/16"), + Peer: account.Peers["peer-src-1"].Key, PeerID: "peer-src-1", + Enabled: true, Metric: 100, AccountID: account.Id, + Groups: []string{"group-src"}, + PeerGroups: []string{"group-src"}, + } + + nm := networkMapFromComponents(t, account, "peer-src-1", validated) + + var hasFWRule bool + for _, rule := range nm.RoutesFirewallRules { + if rule.Destination == "10.99.0.0/16" { + hasFWRule = true + } + } + assert.True(t, hasFWRule, "route without access control groups should have default permit firewall rules") +} + +func TestNetworkMapComponents_SSHAuthorizedUsers(t *testing.T) { + account := createComponentTestAccount() + validated := allPeersValidated(account) + + account.Peers["peer-dst-1"].SSHEnabled = true + + account.Policies = append(account.Policies, &types.Policy{ + ID: "policy-ssh", Name: "SSH Access", Enabled: true, AccountID: account.Id, + Rules: []*types.PolicyRule{{ + ID: "rule-ssh", Name: "SSH to dst", Enabled: true, + Action: types.PolicyTrafficActionAccept, Protocol: types.PolicyRuleProtocolALL, + Bidirectional: true, + Sources: []string{"group-src"}, Destinations: []string{"group-dst"}, + }}, + }) + + nm := networkMapFromComponents(t, account, "peer-dst-1", validated) + assert.True(t, nm.EnableSSH, "SSH-enabled peer with matching policy should have EnableSSH") +} + +func TestNetworkMapComponents_DisabledPolicyIgnored(t *testing.T) { + account := createComponentTestAccount() + validated := allPeersValidated(account) + + for _, p := range account.Policies { + p.Enabled = false + } + + nm := networkMapFromComponents(t, account, "peer-src-1", validated) + assert.Empty(t, nm.Peers, "with all policies disabled, peer should see no other peers") + assert.Empty(t, nm.FirewallRules) +} + +func TestNetworkMapComponents_DisabledRouteIgnored(t *testing.T) { + account := createComponentTestAccount() + validated := allPeersValidated(account) + + for _, r := range account.Routes { + r.Enabled = false + } + for _, r := range account.NetworkResources { + r.Enabled = false + } + + nm := networkMapFromComponents(t, account, "peer-src-1", validated) + assert.Empty(t, nm.Routes, "disabled routes should not appear in network map") +} + +func TestNetworkMapComponents_DisabledNetworkResourceIgnored(t *testing.T) { + account := createComponentTestAccount() + validated := allPeersValidated(account) + + for _, r := range account.NetworkResources { + r.Enabled = false + } + + nm := networkMapFromComponents(t, account, "peer-router-1", validated) + + for _, r := range nm.Routes { + assert.NotEqual(t, "10.200.0.1/32", r.Network.String(), "disabled resource should not generate routes") + } +} + +func TestNetworkMapComponents_BidirectionalPolicy(t *testing.T) { + account := createComponentTestAccount() + validated := allPeersValidated(account) + + nmSrc := networkMapFromComponents(t, account, "peer-src-1", validated) + nmDst := networkMapFromComponents(t, account, "peer-dst-1", validated) + + assert.Contains(t, peerIDs(nmSrc.Peers), "peer-dst-1", "src should see dst via bidirectional policy") + assert.Contains(t, peerIDs(nmDst.Peers), "peer-src-1", "dst should see src via bidirectional policy") +} + +func TestNetworkMapComponents_DropPolicy(t *testing.T) { + account := createComponentTestAccount() + validated := allPeersValidated(account) + + account.Policies = append(account.Policies, &types.Policy{ + ID: "policy-drop", Name: "Drop traffic", Enabled: true, AccountID: account.Id, + Rules: []*types.PolicyRule{{ + ID: "rule-drop", Name: "Drop src->dst", Enabled: true, + Action: types.PolicyTrafficActionDrop, Protocol: types.PolicyRuleProtocolTCP, + Ports: []string{"5432"}, + Sources: []string{"group-src"}, Destinations: []string{"group-dst"}, + }}, + }) + + nm := networkMapFromComponents(t, account, "peer-src-1", validated) + + var hasDropRule bool + for _, rule := range nm.FirewallRules { + if rule.Action == string(types.PolicyTrafficActionDrop) && rule.Port == "5432" { + hasDropRule = true + } + } + assert.True(t, hasDropRule, "drop policy should generate drop firewall rule") +} + +func TestNetworkMapComponents_PortRangePolicy(t *testing.T) { + account := createComponentTestAccount() + validated := allPeersValidated(account) + + account.Peers["peer-src-1"].Meta.WtVersion = "0.50.0" + + account.Policies = append(account.Policies, &types.Policy{ + ID: "policy-range", Name: "Port range", Enabled: true, AccountID: account.Id, + Rules: []*types.PolicyRule{{ + ID: "rule-range", Name: "Range rule", Enabled: true, + Action: types.PolicyTrafficActionAccept, Protocol: types.PolicyRuleProtocolTCP, + PortRanges: []types.RulePortRange{{Start: 8080, End: 8090}}, + Sources: []string{"group-src"}, Destinations: []string{"group-dst"}, + }}, + }) + + nm := networkMapFromComponents(t, account, "peer-src-1", validated) + + var hasRangeRule bool + for _, rule := range nm.FirewallRules { + if rule.PortRange.Start == 8080 && rule.PortRange.End == 8090 { + hasRangeRule = true + } + } + assert.True(t, hasRangeRule, "port range policy should generate corresponding firewall rule") +} + +func TestNetworkMapComponents_MultipleNetworkResources(t *testing.T) { + account := createComponentTestAccount() + validated := allPeersValidated(account) + + account.NetworkResources = append(account.NetworkResources, &resourceTypes.NetworkResource{ + ID: "resource-2", NetworkID: "net-1", AccountID: account.Id, Enabled: true, + Type: resourceTypes.Host, Prefix: netip.MustParsePrefix("10.200.0.2/32"), Address: "10.200.0.2/32", + }) + account.Groups["group-res2"] = &types.Group{ID: "group-res2", Name: "Resource 2 Group", Peers: []string{"peer-src-1", "peer-src-2"}, + Resources: []types.Resource{{ID: "resource-2"}}, + } + account.Policies = append(account.Policies, &types.Policy{ + ID: "policy-res2", Name: "Resource 2 Policy", Enabled: true, AccountID: account.Id, + Rules: []*types.PolicyRule{{ + ID: "rule-res2", Name: "Access Resource 2", Enabled: true, + Action: types.PolicyTrafficActionAccept, Protocol: types.PolicyRuleProtocolALL, + Sources: []string{"group-src"}, + DestinationResource: types.Resource{ID: "resource-2"}, + }}, + }) + + nm := networkMapFromComponents(t, account, "peer-router-1", validated) + + resourceRouteCount := 0 + for _, r := range nm.Routes { + if r.Network.String() == "10.200.0.1/32" || r.Network.String() == "10.200.0.2/32" { + resourceRouteCount++ + } + } + assert.Equal(t, 2, resourceRouteCount, "router should have routes for both network resources") +} + +func TestNetworkMapComponents_DomainNetworkResource(t *testing.T) { + account := createComponentTestAccount() + validated := allPeersValidated(account) + + account.NetworkResources = append(account.NetworkResources, &resourceTypes.NetworkResource{ + ID: "resource-domain", NetworkID: "net-1", AccountID: account.Id, Enabled: true, + Type: resourceTypes.Domain, Domain: "api.example.com", Address: "api.example.com", + }) + account.Groups["group-res-domain"] = &types.Group{ + ID: "group-res-domain", Name: "Domain Resource Group", + Resources: []types.Resource{{ID: "resource-domain"}}, + } + account.Policies = append(account.Policies, &types.Policy{ + ID: "policy-domain", Name: "Domain resource policy", Enabled: true, AccountID: account.Id, + Rules: []*types.PolicyRule{{ + ID: "rule-domain", Name: "Access domain resource", Enabled: true, + Action: types.PolicyTrafficActionAccept, Protocol: types.PolicyRuleProtocolALL, + Sources: []string{"group-src"}, + DestinationResource: types.Resource{ID: "resource-domain"}, + }}, + }) + + nm := networkMapFromComponents(t, account, "peer-src-1", validated) + + var hasDomainRoute bool + for _, r := range nm.Routes { + if r.NetworkType == route.DomainNetwork && len(r.Domains) > 0 && r.Domains[0].SafeString() == "api.example.com" { + hasDomainRoute = true + } + } + assert.True(t, hasDomainRoute, "source peer should receive domain route for domain network resource") +} + +func TestNetworkMapComponents_NetworkEmpty(t *testing.T) { + account := createComponentTestAccount() + validated := allPeersValidated(account) + + nm := networkMapFromComponents(t, account, "nonexistent-peer", validated) + + assert.NotNil(t, nm) + assert.Empty(t, nm.Peers) + assert.Empty(t, nm.FirewallRules) + assert.NotNil(t, nm.Network) +} + +func TestNetworkMapComponents_RouterExcludesOtherNetworkRoutes(t *testing.T) { + account := createComponentTestAccount() + validated := allPeersValidated(account) + + account.NetworkResources = append(account.NetworkResources, &resourceTypes.NetworkResource{ + ID: "resource-other", NetworkID: "net-other", AccountID: account.Id, Enabled: true, + Type: resourceTypes.Host, Prefix: netip.MustParsePrefix("10.200.99.1/32"), Address: "10.200.99.1/32", + }) + account.Networks = append(account.Networks, &networkTypes.Network{ + ID: "net-other", Name: "Other Net", AccountID: account.Id, + }) + account.NetworkRouters = append(account.NetworkRouters, &routerTypes.NetworkRouter{ + ID: "router-other", NetworkID: "net-other", Peer: "peer-dst-1", Enabled: true, AccountID: account.Id, + }) + account.Groups["group-res-other"] = &types.Group{ID: "group-res-other", Name: "Other resource group", + Resources: []types.Resource{{ID: "resource-other"}}, + } + account.Policies = append(account.Policies, &types.Policy{ + ID: "policy-other-resource", Name: "Other resource policy", Enabled: true, AccountID: account.Id, + Rules: []*types.PolicyRule{{ + ID: "rule-other", Name: "Other resource access", Enabled: true, + Action: types.PolicyTrafficActionAccept, Protocol: types.PolicyRuleProtocolALL, + Sources: []string{"group-src"}, + DestinationResource: types.Resource{ID: "resource-other"}, + }}, + }) + + nm := networkMapFromComponents(t, account, "peer-router-1", validated) + + for _, r := range nm.Routes { + assert.NotEqual(t, "10.200.99.1/32", r.Network.String(), "router-1 should NOT get routes for other network's resources") + } +} + +func createComponentTestAccount() *types.Account { + peers := map[string]*nbpeer.Peer{ + "peer-src-1": { + ID: "peer-src-1", IP: net.IP{100, 64, 0, 1}, Key: "key-src-1", DNSLabel: "src1", + Status: &nbpeer.PeerStatus{Connected: true, LastSeen: time.Now()}, UserID: "user-1", + Meta: nbpeer.PeerSystemMeta{WtVersion: "0.35.0", GoOS: "linux"}, + }, + "peer-src-2": { + ID: "peer-src-2", IP: net.IP{100, 64, 0, 2}, Key: "key-src-2", DNSLabel: "src2", + Status: &nbpeer.PeerStatus{Connected: true, LastSeen: time.Now()}, UserID: "user-1", + Meta: nbpeer.PeerSystemMeta{WtVersion: "0.35.0", GoOS: "linux"}, + }, + "peer-dst-1": { + ID: "peer-dst-1", IP: net.IP{100, 64, 0, 3}, Key: "key-dst-1", DNSLabel: "dst1", + Status: &nbpeer.PeerStatus{Connected: true, LastSeen: time.Now()}, UserID: "user-2", + Meta: nbpeer.PeerSystemMeta{WtVersion: "0.35.0", GoOS: "linux"}, + }, + "peer-router-1": { + ID: "peer-router-1", IP: net.IP{100, 64, 0, 10}, Key: "key-router-1", DNSLabel: "router1", + Status: &nbpeer.PeerStatus{Connected: true, LastSeen: time.Now()}, UserID: "user-1", + Meta: nbpeer.PeerSystemMeta{WtVersion: "0.35.0", GoOS: "linux"}, + }, + } + + groups := map[string]*types.Group{ + "group-src": {ID: "group-src", Name: "Sources", Peers: []string{"peer-src-1", "peer-src-2"}}, + "group-dst": {ID: "group-dst", Name: "Destinations", Peers: []string{"peer-dst-1"}}, + "group-all": {ID: "group-all", Name: "All", Peers: []string{"peer-src-1", "peer-src-2", "peer-dst-1", "peer-router-1"}}, + "group-res": { + ID: "group-res", Name: "Resource Group", + Resources: []types.Resource{{ID: "resource-1"}}, + }, + } + + policies := []*types.Policy{ + { + ID: "policy-base", Name: "Base connectivity", Enabled: true, + Rules: []*types.PolicyRule{{ + ID: "rule-base", Name: "Allow src <-> dst", Enabled: true, + Action: types.PolicyTrafficActionAccept, Protocol: types.PolicyRuleProtocolALL, + Bidirectional: true, + Sources: []string{"group-src"}, Destinations: []string{"group-dst"}, + }}, + }, + { + ID: "policy-resource", Name: "Network resource access", Enabled: true, + Rules: []*types.PolicyRule{{ + ID: "rule-resource", Name: "Source -> Resource", Enabled: true, + Action: types.PolicyTrafficActionAccept, Protocol: types.PolicyRuleProtocolALL, + Sources: []string{"group-src"}, + DestinationResource: types.Resource{ID: "resource-1"}, + }}, + }, + } + + routes := map[route.ID]*route.Route{ + "route-main": { + ID: "route-main", Network: netip.MustParsePrefix("192.168.10.0/24"), + Peer: peers["peer-dst-1"].Key, PeerID: "peer-dst-1", + Enabled: true, Metric: 100, + Groups: []string{"group-src", "group-dst"}, PeerGroups: []string{"group-dst"}, + }, + } + + users := map[string]*types.User{ + "user-1": {Id: "user-1", Role: types.UserRoleAdmin, IsServiceUser: false, AutoGroups: []string{"group-all"}}, + "user-2": {Id: "user-2", Role: types.UserRoleUser, IsServiceUser: false, AutoGroups: []string{"group-all"}}, + } + + account := &types.Account{ + Id: "account-components-test", Peers: peers, Groups: groups, Policies: policies, Routes: routes, + Users: users, + Network: &types.Network{ + Identifier: "net-test", Net: net.IPNet{IP: net.IP{100, 64, 0, 0}, Mask: net.CIDRMask(16, 32)}, Serial: 1, + }, + DNSSettings: types.DNSSettings{DisabledManagementGroups: []string{"group-dst"}}, + NameServerGroups: map[string]*nbdns.NameServerGroup{ + "ns-main": { + ID: "ns-main", Name: "Main NS", Enabled: true, Groups: []string{"group-src"}, + NameServers: []nbdns.NameServer{{IP: netip.MustParseAddr("8.8.8.8"), NSType: nbdns.UDPNameServerType, Port: 53}}, + }, + }, + PostureChecks: []*posture.Checks{}, + NetworkResources: []*resourceTypes.NetworkResource{ + { + ID: "resource-1", NetworkID: "net-1", AccountID: "account-components-test", Enabled: true, + Type: resourceTypes.Host, Prefix: netip.MustParsePrefix("10.200.0.1/32"), Address: "10.200.0.1/32", + }, + }, + Networks: []*networkTypes.Network{ + {ID: "net-1", Name: "Resource Net", AccountID: "account-components-test"}, + }, + NetworkRouters: []*routerTypes.NetworkRouter{ + {ID: "router-1", NetworkID: "net-1", Peer: "peer-router-1", Enabled: true, AccountID: "account-components-test"}, + }, + Settings: &types.Settings{PeerLoginExpirationEnabled: false, PeerLoginExpiration: 24 * time.Hour}, + } + + for _, p := range account.Policies { + p.AccountID = account.Id + } + for _, r := range account.Routes { + r.AccountID = account.Id + } + + return account +} diff --git a/management/server/types/networkmap_golden_test.go b/management/server/types/networkmap_golden_test.go deleted file mode 100644 index 53261f22d..000000000 --- a/management/server/types/networkmap_golden_test.go +++ /dev/null @@ -1,967 +0,0 @@ -package types_test - -import ( - "context" - "encoding/json" - "fmt" - "net" - "net/netip" - "os" - "path/filepath" - "slices" - "sort" - "testing" - "time" - - "github.com/stretchr/testify/require" - - "github.com/netbirdio/netbird/dns" - "github.com/netbirdio/netbird/management/internals/modules/zones" - resourceTypes "github.com/netbirdio/netbird/management/server/networks/resources/types" - routerTypes "github.com/netbirdio/netbird/management/server/networks/routers/types" - networkTypes "github.com/netbirdio/netbird/management/server/networks/types" - nbpeer "github.com/netbirdio/netbird/management/server/peer" - "github.com/netbirdio/netbird/management/server/posture" - "github.com/netbirdio/netbird/management/server/types" - "github.com/netbirdio/netbird/route" -) - -const ( - numPeers = 100 - devGroupID = "group-dev" - opsGroupID = "group-ops" - allGroupID = "group-all" - sshUsersGroupID = "group-ssh-users" - routeID = route.ID("route-main") - routeHA1ID = route.ID("route-ha-1") - routeHA2ID = route.ID("route-ha-2") - policyIDDevOps = "policy-dev-ops" - policyIDAll = "policy-all" - policyIDPosture = "policy-posture" - policyIDDrop = "policy-drop" - policyIDSSH = "policy-ssh" - postureCheckID = "posture-check-ver" - networkResourceID = "res-database" - networkID = "net-database" - networkRouterID = "router-database" - nameserverGroupID = "ns-group-main" - testingPeerID = "peer-60" // A peer from the "dev" group, should receive the most detailed map. - expiredPeerID = "peer-98" // This peer will be online but with an expired session. - offlinePeerID = "peer-99" // This peer will be completely offline. - routingPeerID = "peer-95" // This peer is used for routing, it has a route to the network. - testAccountID = "account-golden-test" - userAdminID = "user-admin" - userDevID = "user-dev" - userOpsID = "user-ops" -) - -func TestGetPeerNetworkMap_Golden(t *testing.T) { - account := createTestAccountWithEntities() - - ctx := context.Background() - validatedPeersMap := make(map[string]struct{}) - for i := range numPeers { - peerID := fmt.Sprintf("peer-%d", i) - if peerID == offlinePeerID { - continue - } - validatedPeersMap[peerID] = struct{}{} - } - - resourcePolicies := account.GetResourcePoliciesMap() - routers := account.GetResourceRoutersMap() - - legacyNetworkMap := account.GetPeerNetworkMap(ctx, testingPeerID, dns.CustomZone{}, []*zones.Zone{}, validatedPeersMap, resourcePolicies, routers, nil, account.GetActiveGroupUsers()) - normalizeAndSortNetworkMap(legacyNetworkMap) - legacyJSON, err := json.MarshalIndent(toNetworkMapJSON(legacyNetworkMap), "", " ") - require.NoError(t, err, "error marshaling legacy network map to JSON") - - builder := types.NewNetworkMapBuilder(account, validatedPeersMap) - newNetworkMap := builder.GetPeerNetworkMap(ctx, testingPeerID, dns.CustomZone{}, nil, validatedPeersMap, nil) - normalizeAndSortNetworkMap(newNetworkMap) - newJSON, err := json.MarshalIndent(toNetworkMapJSON(newNetworkMap), "", " ") - require.NoError(t, err, "error marshaling new network map to JSON") - - if string(legacyJSON) != string(newJSON) { - legacyFilePath := filepath.Join("testdata", "networkmap_golden.json") - newFilePath := filepath.Join("testdata", "networkmap_golden_new.json") - - err = os.MkdirAll(filepath.Dir(legacyFilePath), 0755) - require.NoError(t, err) - - err = os.WriteFile(legacyFilePath, legacyJSON, 0644) - require.NoError(t, err) - t.Logf("Saved legacy network map to %s", legacyFilePath) - - err = os.WriteFile(newFilePath, newJSON, 0644) - require.NoError(t, err) - t.Logf("Saved new network map to %s", newFilePath) - - require.JSONEq(t, string(legacyJSON), string(newJSON), "network maps from legacy and new builder do not match") - } -} - -func BenchmarkGetPeerNetworkMap(b *testing.B) { - account := createTestAccountWithEntities() - ctx := context.Background() - validatedPeersMap := make(map[string]struct{}) - var peerIDs []string - for i := range numPeers { - peerID := fmt.Sprintf("peer-%d", i) - validatedPeersMap[peerID] = struct{}{} - peerIDs = append(peerIDs, peerID) - } - - b.ResetTimer() - b.Run("old builder", func(b *testing.B) { - for range b.N { - for _, peerID := range peerIDs { - _ = account.GetPeerNetworkMap(ctx, peerID, dns.CustomZone{}, []*zones.Zone{}, validatedPeersMap, nil, nil, nil, account.GetActiveGroupUsers()) - } - } - }) - b.ResetTimer() - b.Run("new builder", func(b *testing.B) { - for range b.N { - builder := types.NewNetworkMapBuilder(account, validatedPeersMap) - for _, peerID := range peerIDs { - _ = builder.GetPeerNetworkMap(ctx, peerID, dns.CustomZone{}, nil, validatedPeersMap, nil) - } - } - }) -} - -func TestGetPeerNetworkMap_Golden_WithNewPeer(t *testing.T) { - account := createTestAccountWithEntities() - - ctx := context.Background() - validatedPeersMap := make(map[string]struct{}) - for i := range numPeers { - peerID := fmt.Sprintf("peer-%d", i) - if peerID == offlinePeerID { - continue - } - validatedPeersMap[peerID] = struct{}{} - } - - builder := types.NewNetworkMapBuilder(account, validatedPeersMap) - - newPeerID := "peer-new-101" - newPeerIP := net.IP{100, 64, 1, 1} - newPeer := &nbpeer.Peer{ - ID: newPeerID, - IP: newPeerIP, - Key: fmt.Sprintf("key-%s", newPeerID), - DNSLabel: "peernew101", - Status: &nbpeer.PeerStatus{Connected: true, LastSeen: time.Now()}, - UserID: "user-admin", - Meta: nbpeer.PeerSystemMeta{WtVersion: "0.26.0", GoOS: "linux"}, - LastLogin: func() *time.Time { t := time.Now(); return &t }(), - } - - account.Peers[newPeerID] = newPeer - - if devGroup, exists := account.Groups[devGroupID]; exists { - devGroup.Peers = append(devGroup.Peers, newPeerID) - } - - if allGroup, exists := account.Groups[allGroupID]; exists { - allGroup.Peers = append(allGroup.Peers, newPeerID) - } - - validatedPeersMap[newPeerID] = struct{}{} - - if account.Network != nil { - account.Network.Serial++ - } - - resourcePolicies := account.GetResourcePoliciesMap() - routers := account.GetResourceRoutersMap() - - legacyNetworkMap := account.GetPeerNetworkMap(ctx, testingPeerID, dns.CustomZone{}, []*zones.Zone{}, validatedPeersMap, resourcePolicies, routers, nil, account.GetActiveGroupUsers()) - normalizeAndSortNetworkMap(legacyNetworkMap) - legacyJSON, err := json.MarshalIndent(toNetworkMapJSON(legacyNetworkMap), "", " ") - require.NoError(t, err, "error marshaling legacy network map to JSON") - - err = builder.OnPeerAddedIncremental(account, newPeerID) - require.NoError(t, err, "error adding peer to cache") - - newNetworkMap := builder.GetPeerNetworkMap(ctx, testingPeerID, dns.CustomZone{}, nil, validatedPeersMap, nil) - normalizeAndSortNetworkMap(newNetworkMap) - newJSON, err := json.MarshalIndent(toNetworkMapJSON(newNetworkMap), "", " ") - require.NoError(t, err, "error marshaling new network map to JSON") - - if string(legacyJSON) != string(newJSON) { - legacyFilePath := filepath.Join("testdata", "networkmap_golden_with_new_peer.json") - newFilePath := filepath.Join("testdata", "networkmap_golden_new_with_onpeeradded.json") - - err = os.MkdirAll(filepath.Dir(legacyFilePath), 0755) - require.NoError(t, err) - - err = os.WriteFile(legacyFilePath, legacyJSON, 0644) - require.NoError(t, err) - t.Logf("Saved legacy network map to %s", legacyFilePath) - - err = os.WriteFile(newFilePath, newJSON, 0644) - require.NoError(t, err) - t.Logf("Saved new network map to %s", newFilePath) - - require.JSONEq(t, string(legacyJSON), string(newJSON), "network maps with new peer from legacy and new builder do not match") - } -} - -func BenchmarkGetPeerNetworkMap_AfterPeerAdded(b *testing.B) { - account := createTestAccountWithEntities() - ctx := context.Background() - validatedPeersMap := make(map[string]struct{}) - var peerIDs []string - for i := range numPeers { - peerID := fmt.Sprintf("peer-%d", i) - validatedPeersMap[peerID] = struct{}{} - peerIDs = append(peerIDs, peerID) - } - builder := types.NewNetworkMapBuilder(account, validatedPeersMap) - newPeerID := "peer-new-101" - newPeer := &nbpeer.Peer{ - ID: newPeerID, - IP: net.IP{100, 64, 1, 1}, - Key: fmt.Sprintf("key-%s", newPeerID), - DNSLabel: "peernew101", - Status: &nbpeer.PeerStatus{Connected: true, LastSeen: time.Now()}, - UserID: "user-admin", - Meta: nbpeer.PeerSystemMeta{WtVersion: "0.26.0", GoOS: "linux"}, - } - - account.Peers[newPeerID] = newPeer - account.Groups[devGroupID].Peers = append(account.Groups[devGroupID].Peers, newPeerID) - account.Groups[allGroupID].Peers = append(account.Groups[allGroupID].Peers, newPeerID) - validatedPeersMap[newPeerID] = struct{}{} - - b.ResetTimer() - b.Run("old builder after add", func(b *testing.B) { - for i := 0; i < b.N; i++ { - for _, testingPeerID := range peerIDs { - _ = account.GetPeerNetworkMap(ctx, testingPeerID, dns.CustomZone{}, []*zones.Zone{}, validatedPeersMap, nil, nil, nil, account.GetActiveGroupUsers()) - } - } - }) - - b.ResetTimer() - b.Run("new builder after add", func(b *testing.B) { - for i := 0; i < b.N; i++ { - _ = builder.OnPeerAddedIncremental(account, newPeerID) - for _, testingPeerID := range peerIDs { - _ = builder.GetPeerNetworkMap(ctx, testingPeerID, dns.CustomZone{}, nil, validatedPeersMap, nil) - } - } - }) -} - -func TestGetPeerNetworkMap_Golden_WithNewRoutingPeer(t *testing.T) { - account := createTestAccountWithEntities() - - ctx := context.Background() - validatedPeersMap := make(map[string]struct{}) - for i := range numPeers { - peerID := fmt.Sprintf("peer-%d", i) - if peerID == offlinePeerID { - continue - } - validatedPeersMap[peerID] = struct{}{} - } - - builder := types.NewNetworkMapBuilder(account, validatedPeersMap) - - newRouterID := "peer-new-router-102" - newRouterIP := net.IP{100, 64, 1, 2} - newRouter := &nbpeer.Peer{ - ID: newRouterID, - IP: newRouterIP, - Key: fmt.Sprintf("key-%s", newRouterID), - DNSLabel: "newrouter102", - Status: &nbpeer.PeerStatus{Connected: true, LastSeen: time.Now()}, - UserID: "user-admin", - Meta: nbpeer.PeerSystemMeta{WtVersion: "0.26.0", GoOS: "linux"}, - LastLogin: func() *time.Time { t := time.Now(); return &t }(), - } - - account.Peers[newRouterID] = newRouter - - if opsGroup, exists := account.Groups[opsGroupID]; exists { - opsGroup.Peers = append(opsGroup.Peers, newRouterID) - } - - if allGroup, exists := account.Groups[allGroupID]; exists { - allGroup.Peers = append(allGroup.Peers, newRouterID) - } - - newRoute := &route.Route{ - ID: route.ID("route-new-router"), - Network: netip.MustParsePrefix("172.16.0.0/24"), - Peer: newRouter.Key, - PeerID: newRouterID, - Description: "Route from new router", - Enabled: true, - PeerGroups: []string{opsGroupID}, - Groups: []string{devGroupID, opsGroupID}, - AccessControlGroups: []string{devGroupID}, - AccountID: account.Id, - } - account.Routes[newRoute.ID] = newRoute - - validatedPeersMap[newRouterID] = struct{}{} - - if account.Network != nil { - account.Network.Serial++ - } - - resourcePolicies := account.GetResourcePoliciesMap() - routers := account.GetResourceRoutersMap() - - legacyNetworkMap := account.GetPeerNetworkMap(ctx, testingPeerID, dns.CustomZone{}, []*zones.Zone{}, validatedPeersMap, resourcePolicies, routers, nil, account.GetActiveGroupUsers()) - normalizeAndSortNetworkMap(legacyNetworkMap) - legacyJSON, err := json.MarshalIndent(toNetworkMapJSON(legacyNetworkMap), "", " ") - require.NoError(t, err, "error marshaling legacy network map to JSON") - - err = builder.OnPeerAddedIncremental(account, newRouterID) - require.NoError(t, err, "error adding router to cache") - - newNetworkMap := builder.GetPeerNetworkMap(ctx, testingPeerID, dns.CustomZone{}, nil, validatedPeersMap, nil) - normalizeAndSortNetworkMap(newNetworkMap) - newJSON, err := json.MarshalIndent(toNetworkMapJSON(newNetworkMap), "", " ") - require.NoError(t, err, "error marshaling new network map to JSON") - - if string(legacyJSON) != string(newJSON) { - legacyFilePath := filepath.Join("testdata", "networkmap_golden_with_new_router.json") - newFilePath := filepath.Join("testdata", "networkmap_golden_new_with_onpeeradded_router.json") - - err = os.MkdirAll(filepath.Dir(legacyFilePath), 0755) - require.NoError(t, err) - - err = os.WriteFile(legacyFilePath, legacyJSON, 0644) - require.NoError(t, err) - t.Logf("Saved legacy network map to %s", legacyFilePath) - - err = os.WriteFile(newFilePath, newJSON, 0644) - require.NoError(t, err) - t.Logf("Saved new network map to %s", newFilePath) - - require.JSONEq(t, string(legacyJSON), string(newJSON), "network maps with new router from legacy and new builder do not match") - } -} - -func BenchmarkGetPeerNetworkMap_AfterRouterPeerAdded(b *testing.B) { - account := createTestAccountWithEntities() - ctx := context.Background() - validatedPeersMap := make(map[string]struct{}) - var peerIDs []string - for i := range numPeers { - peerID := fmt.Sprintf("peer-%d", i) - validatedPeersMap[peerID] = struct{}{} - peerIDs = append(peerIDs, peerID) - } - builder := types.NewNetworkMapBuilder(account, validatedPeersMap) - newRouterID := "peer-new-router-102" - newRouterIP := net.IP{100, 64, 1, 2} - newRouter := &nbpeer.Peer{ - ID: newRouterID, - IP: newRouterIP, - Key: fmt.Sprintf("key-%s", newRouterID), - DNSLabel: "newrouter102", - Status: &nbpeer.PeerStatus{Connected: true, LastSeen: time.Now()}, - UserID: "user-admin", - Meta: nbpeer.PeerSystemMeta{WtVersion: "0.26.0", GoOS: "linux"}, - LastLogin: func() *time.Time { t := time.Now(); return &t }(), - } - - account.Peers[newRouterID] = newRouter - - if opsGroup, exists := account.Groups[opsGroupID]; exists { - opsGroup.Peers = append(opsGroup.Peers, newRouterID) - } - if allGroup, exists := account.Groups[allGroupID]; exists { - allGroup.Peers = append(allGroup.Peers, newRouterID) - } - - newRoute := &route.Route{ - ID: route.ID("route-new-router"), - Network: netip.MustParsePrefix("172.16.0.0/24"), - Peer: newRouter.Key, - PeerID: newRouterID, - Description: "Route from new router", - Enabled: true, - PeerGroups: []string{opsGroupID}, - Groups: []string{devGroupID, opsGroupID}, - AccessControlGroups: []string{devGroupID}, - AccountID: account.Id, - } - account.Routes[newRoute.ID] = newRoute - - validatedPeersMap[newRouterID] = struct{}{} - - b.ResetTimer() - b.Run("old builder after add", func(b *testing.B) { - for i := 0; i < b.N; i++ { - for _, testingPeerID := range peerIDs { - _ = account.GetPeerNetworkMap(ctx, testingPeerID, dns.CustomZone{}, []*zones.Zone{}, validatedPeersMap, nil, nil, nil, account.GetActiveGroupUsers()) - } - } - }) - - b.ResetTimer() - b.Run("new builder after add", func(b *testing.B) { - for i := 0; i < b.N; i++ { - _ = builder.OnPeerAddedIncremental(account, newRouterID) - for _, testingPeerID := range peerIDs { - _ = builder.GetPeerNetworkMap(ctx, testingPeerID, dns.CustomZone{}, nil, validatedPeersMap, nil) - } - } - }) -} - -func TestGetPeerNetworkMap_Golden_WithDeletedPeer(t *testing.T) { - account := createTestAccountWithEntities() - - ctx := context.Background() - validatedPeersMap := make(map[string]struct{}) - for i := range numPeers { - peerID := fmt.Sprintf("peer-%d", i) - if peerID == offlinePeerID { - continue - } - validatedPeersMap[peerID] = struct{}{} - } - - builder := types.NewNetworkMapBuilder(account, validatedPeersMap) - - deletedPeerID := "peer-25" - - delete(account.Peers, deletedPeerID) - - if devGroup, exists := account.Groups[devGroupID]; exists { - devGroup.Peers = slices.DeleteFunc(devGroup.Peers, func(id string) bool { - return id == deletedPeerID - }) - } - - if allGroup, exists := account.Groups[allGroupID]; exists { - allGroup.Peers = slices.DeleteFunc(allGroup.Peers, func(id string) bool { - return id == deletedPeerID - }) - } - - delete(validatedPeersMap, deletedPeerID) - - if account.Network != nil { - account.Network.Serial++ - } - - resourcePolicies := account.GetResourcePoliciesMap() - routers := account.GetResourceRoutersMap() - - legacyNetworkMap := account.GetPeerNetworkMap(ctx, testingPeerID, dns.CustomZone{}, []*zones.Zone{}, validatedPeersMap, resourcePolicies, routers, nil, account.GetActiveGroupUsers()) - normalizeAndSortNetworkMap(legacyNetworkMap) - legacyJSON, err := json.MarshalIndent(toNetworkMapJSON(legacyNetworkMap), "", " ") - require.NoError(t, err, "error marshaling legacy network map to JSON") - - err = builder.OnPeerDeleted(account, deletedPeerID) - require.NoError(t, err, "error deleting peer from cache") - - newNetworkMap := builder.GetPeerNetworkMap(ctx, testingPeerID, dns.CustomZone{}, nil, validatedPeersMap, nil) - normalizeAndSortNetworkMap(newNetworkMap) - newJSON, err := json.MarshalIndent(toNetworkMapJSON(newNetworkMap), "", " ") - require.NoError(t, err, "error marshaling new network map to JSON") - - if string(legacyJSON) != string(newJSON) { - legacyFilePath := filepath.Join("testdata", "networkmap_golden_with_deleted_peer.json") - newFilePath := filepath.Join("testdata", "networkmap_golden_new_with_onpeerdeleted.json") - - err = os.MkdirAll(filepath.Dir(legacyFilePath), 0755) - require.NoError(t, err) - - err = os.WriteFile(legacyFilePath, legacyJSON, 0644) - require.NoError(t, err) - t.Logf("Saved legacy network map to %s", legacyFilePath) - - err = os.WriteFile(newFilePath, newJSON, 0644) - require.NoError(t, err) - t.Logf("Saved new network map to %s", newFilePath) - - require.JSONEq(t, string(legacyJSON), string(newJSON), "network maps with deleted peer from legacy and new builder do not match") - } -} - -func TestGetPeerNetworkMap_Golden_WithDeletedRouterPeer(t *testing.T) { - account := createTestAccountWithEntities() - - ctx := context.Background() - validatedPeersMap := make(map[string]struct{}) - for i := range numPeers { - peerID := fmt.Sprintf("peer-%d", i) - if peerID == offlinePeerID { - continue - } - validatedPeersMap[peerID] = struct{}{} - } - - builder := types.NewNetworkMapBuilder(account, validatedPeersMap) - - deletedRouterID := "peer-75" - - var affectedRoute *route.Route - for _, r := range account.Routes { - if r.PeerID == deletedRouterID { - affectedRoute = r - break - } - } - require.NotNil(t, affectedRoute, "Router peer should have a route") - - for _, group := range account.Groups { - group.Peers = slices.DeleteFunc(group.Peers, func(id string) bool { - return id == deletedRouterID - }) - } - - for routeID, r := range account.Routes { - if r.Peer == account.Peers[deletedRouterID].Key || r.PeerID == deletedRouterID { - delete(account.Routes, routeID) - } - } - delete(account.Peers, deletedRouterID) - delete(validatedPeersMap, deletedRouterID) - - if account.Network != nil { - account.Network.Serial++ - } - - resourcePolicies := account.GetResourcePoliciesMap() - routers := account.GetResourceRoutersMap() - - legacyNetworkMap := account.GetPeerNetworkMap(ctx, testingPeerID, dns.CustomZone{}, []*zones.Zone{}, validatedPeersMap, resourcePolicies, routers, nil, account.GetActiveGroupUsers()) - normalizeAndSortNetworkMap(legacyNetworkMap) - legacyJSON, err := json.MarshalIndent(toNetworkMapJSON(legacyNetworkMap), "", " ") - require.NoError(t, err, "error marshaling legacy network map to JSON") - - err = builder.OnPeerDeleted(account, deletedRouterID) - require.NoError(t, err, "error deleting routing peer from cache") - - newNetworkMap := builder.GetPeerNetworkMap(ctx, testingPeerID, dns.CustomZone{}, nil, validatedPeersMap, nil) - normalizeAndSortNetworkMap(newNetworkMap) - newJSON, err := json.MarshalIndent(toNetworkMapJSON(newNetworkMap), "", " ") - require.NoError(t, err, "error marshaling new network map to JSON") - - if string(legacyJSON) != string(newJSON) { - legacyFilePath := filepath.Join("testdata", "networkmap_golden_with_deleted_router_peer.json") - newFilePath := filepath.Join("testdata", "networkmap_golden_new_with_deleted_router.json") - - err = os.MkdirAll(filepath.Dir(legacyFilePath), 0755) - require.NoError(t, err) - - err = os.WriteFile(legacyFilePath, legacyJSON, 0644) - require.NoError(t, err) - t.Logf("Saved legacy network map to %s", legacyFilePath) - - err = os.WriteFile(newFilePath, newJSON, 0644) - require.NoError(t, err) - t.Logf("Saved new network map to %s", newFilePath) - - require.JSONEq(t, string(legacyJSON), string(newJSON), "network maps with deleted router from legacy and new builder do not match") - } -} - -func BenchmarkGetPeerNetworkMap_AfterPeerDeleted(b *testing.B) { - account := createTestAccountWithEntities() - ctx := context.Background() - validatedPeersMap := make(map[string]struct{}) - var peerIDs []string - for i := range numPeers { - peerID := fmt.Sprintf("peer-%d", i) - validatedPeersMap[peerID] = struct{}{} - peerIDs = append(peerIDs, peerID) - } - - deletedPeerID := "peer-25" - - delete(account.Peers, deletedPeerID) - account.Groups[devGroupID].Peers = slices.DeleteFunc(account.Groups[devGroupID].Peers, func(id string) bool { - return id == deletedPeerID - }) - account.Groups[allGroupID].Peers = slices.DeleteFunc(account.Groups[allGroupID].Peers, func(id string) bool { - return id == deletedPeerID - }) - delete(validatedPeersMap, deletedPeerID) - - builder := types.NewNetworkMapBuilder(account, validatedPeersMap) - - b.ResetTimer() - b.Run("old builder after delete", func(b *testing.B) { - for i := 0; i < b.N; i++ { - for _, testingPeerID := range peerIDs { - _ = account.GetPeerNetworkMap(ctx, testingPeerID, dns.CustomZone{}, []*zones.Zone{}, validatedPeersMap, nil, nil, nil, account.GetActiveGroupUsers()) - } - } - }) - - b.ResetTimer() - b.Run("new builder after delete", func(b *testing.B) { - for i := 0; i < b.N; i++ { - _ = builder.OnPeerDeleted(account, deletedPeerID) - for _, testingPeerID := range peerIDs { - _ = builder.GetPeerNetworkMap(ctx, testingPeerID, dns.CustomZone{}, nil, validatedPeersMap, nil) - } - } - }) -} - -func normalizeAndSortNetworkMap(networkMap *types.NetworkMap) { - for _, peer := range networkMap.Peers { - if peer.Status != nil { - peer.Status.LastSeen = time.Time{} - } - peer.LastLogin = &time.Time{} - } - for _, peer := range networkMap.OfflinePeers { - if peer.Status != nil { - peer.Status.LastSeen = time.Time{} - } - peer.LastLogin = &time.Time{} - } - - sort.Slice(networkMap.Peers, func(i, j int) bool { return networkMap.Peers[i].ID < networkMap.Peers[j].ID }) - sort.Slice(networkMap.OfflinePeers, func(i, j int) bool { return networkMap.OfflinePeers[i].ID < networkMap.OfflinePeers[j].ID }) - sort.Slice(networkMap.Routes, func(i, j int) bool { return networkMap.Routes[i].ID < networkMap.Routes[j].ID }) - - sort.Slice(networkMap.FirewallRules, func(i, j int) bool { - r1, r2 := networkMap.FirewallRules[i], networkMap.FirewallRules[j] - if r1.PeerIP != r2.PeerIP { - return r1.PeerIP < r2.PeerIP - } - if r1.Protocol != r2.Protocol { - return r1.Protocol < r2.Protocol - } - if r1.Direction != r2.Direction { - return r1.Direction < r2.Direction - } - if r1.Action != r2.Action { - return r1.Action < r2.Action - } - return r1.Port < r2.Port - }) - - sort.Slice(networkMap.RoutesFirewallRules, func(i, j int) bool { - r1, r2 := networkMap.RoutesFirewallRules[i], networkMap.RoutesFirewallRules[j] - if r1.RouteID != r2.RouteID { - return r1.RouteID < r2.RouteID - } - if r1.Action != r2.Action { - return r1.Action < r2.Action - } - if r1.Destination != r2.Destination { - return r1.Destination < r2.Destination - } - if len(r1.SourceRanges) > 0 && len(r2.SourceRanges) > 0 { - if r1.SourceRanges[0] != r2.SourceRanges[0] { - return r1.SourceRanges[0] < r2.SourceRanges[0] - } - } - return r1.Port < r2.Port - }) - - for _, ranges := range networkMap.RoutesFirewallRules { - sort.Slice(ranges.SourceRanges, func(i, j int) bool { - return ranges.SourceRanges[i] < ranges.SourceRanges[j] - }) - } -} - -type networkMapJSON struct { - Peers []*nbpeer.Peer `json:"Peers"` - Network *types.Network `json:"Network"` - Routes []*route.Route `json:"Routes"` - DNSConfig dns.Config `json:"DNSConfig"` - OfflinePeers []*nbpeer.Peer `json:"OfflinePeers"` - FirewallRules []*types.FirewallRule `json:"FirewallRules"` - RoutesFirewallRules []*types.RouteFirewallRule `json:"RoutesFirewallRules"` - ForwardingRules []*types.ForwardingRule `json:"ForwardingRules"` - AuthorizedUsers map[string][]string `json:"AuthorizedUsers,omitempty"` - EnableSSH bool `json:"EnableSSH"` -} - -func toNetworkMapJSON(nm *types.NetworkMap) *networkMapJSON { - result := &networkMapJSON{ - Peers: nm.Peers, - Network: nm.Network, - Routes: nm.Routes, - DNSConfig: nm.DNSConfig, - OfflinePeers: nm.OfflinePeers, - FirewallRules: nm.FirewallRules, - RoutesFirewallRules: nm.RoutesFirewallRules, - ForwardingRules: nm.ForwardingRules, - EnableSSH: nm.EnableSSH, - } - - if len(nm.AuthorizedUsers) > 0 { - result.AuthorizedUsers = make(map[string][]string) - localUsers := make([]string, 0, len(nm.AuthorizedUsers)) - for localUser := range nm.AuthorizedUsers { - localUsers = append(localUsers, localUser) - } - sort.Strings(localUsers) - - for _, localUser := range localUsers { - userIDs := nm.AuthorizedUsers[localUser] - sortedUserIDs := make([]string, 0, len(userIDs)) - for userID := range userIDs { - sortedUserIDs = append(sortedUserIDs, userID) - } - sort.Strings(sortedUserIDs) - result.AuthorizedUsers[localUser] = sortedUserIDs - } - } - - return result -} - -func createTestAccountWithEntities() *types.Account { - peers := make(map[string]*nbpeer.Peer) - devGroupPeers, opsGroupPeers, allGroupPeers := []string{}, []string{}, []string{} - - for i := range numPeers { - peerID := fmt.Sprintf("peer-%d", i) - ip := net.IP{100, 64, 0, byte(i + 1)} - wtVersion := "0.25.0" - if i%2 == 0 { - wtVersion = "0.40.0" - } - - p := &nbpeer.Peer{ - ID: peerID, IP: ip, Key: fmt.Sprintf("key-%s", peerID), DNSLabel: fmt.Sprintf("peer%d", i+1), - Status: &nbpeer.PeerStatus{Connected: true, LastSeen: time.Now()}, - UserID: "user-admin", Meta: nbpeer.PeerSystemMeta{WtVersion: wtVersion, GoOS: "linux"}, - } - - if peerID == expiredPeerID { - p.LoginExpirationEnabled = true - pastTimestamp := time.Now().Add(-2 * time.Hour) - p.LastLogin = &pastTimestamp - } - - peers[peerID] = p - allGroupPeers = append(allGroupPeers, peerID) - if i < numPeers/2 { - devGroupPeers = append(devGroupPeers, peerID) - } else { - opsGroupPeers = append(opsGroupPeers, peerID) - } - - } - - groups := map[string]*types.Group{ - allGroupID: {ID: allGroupID, Name: "All", Peers: allGroupPeers}, - devGroupID: {ID: devGroupID, Name: "Developers", Peers: devGroupPeers}, - opsGroupID: {ID: opsGroupID, Name: "Operations", Peers: opsGroupPeers}, - sshUsersGroupID: {ID: sshUsersGroupID, Name: "SSH Users", Peers: []string{}}, - } - - policies := []*types.Policy{ - { - ID: policyIDAll, Name: "Default-Allow", Enabled: true, - Rules: []*types.PolicyRule{{ - ID: policyIDAll, Name: "Allow All", Enabled: true, Action: types.PolicyTrafficActionAccept, - Protocol: types.PolicyRuleProtocolALL, Bidirectional: true, - Sources: []string{allGroupID}, Destinations: []string{allGroupID}, - }}, - }, - { - ID: policyIDDevOps, Name: "Dev to Ops Web Access", Enabled: true, - Rules: []*types.PolicyRule{{ - ID: policyIDDevOps, Name: "Dev -> Ops (HTTP Range)", Enabled: true, Action: types.PolicyTrafficActionAccept, - Protocol: types.PolicyRuleProtocolTCP, Bidirectional: false, - PortRanges: []types.RulePortRange{{Start: 8080, End: 8090}}, - Sources: []string{devGroupID}, Destinations: []string{opsGroupID}, - }}, - }, - { - ID: policyIDDrop, Name: "Drop DB traffic", Enabled: true, - Rules: []*types.PolicyRule{{ - ID: policyIDDrop, Name: "Drop DB", Enabled: true, Action: types.PolicyTrafficActionDrop, - Protocol: types.PolicyRuleProtocolTCP, Ports: []string{"5432"}, Bidirectional: true, - Sources: []string{devGroupID}, Destinations: []string{opsGroupID}, - }}, - }, - { - ID: policyIDPosture, Name: "Posture Check for DB Resource", Enabled: true, - SourcePostureChecks: []string{postureCheckID}, - Rules: []*types.PolicyRule{{ - ID: policyIDPosture, Name: "Allow DB Access", Enabled: true, Action: types.PolicyTrafficActionAccept, - Protocol: types.PolicyRuleProtocolALL, Bidirectional: true, - Sources: []string{opsGroupID}, DestinationResource: types.Resource{ID: networkResourceID}, - }}, - }, - { - ID: policyIDSSH, Name: "SSH Access Policy", Enabled: true, - Rules: []*types.PolicyRule{{ - ID: policyIDSSH, Name: "Allow SSH to Ops", Enabled: true, Action: types.PolicyTrafficActionAccept, - Protocol: types.PolicyRuleProtocolNetbirdSSH, Bidirectional: false, - Sources: []string{devGroupID}, Destinations: []string{opsGroupID}, - AuthorizedGroups: map[string][]string{sshUsersGroupID: {"root", "admin"}}, - }}, - }, - } - - routes := map[route.ID]*route.Route{ - routeID: { - ID: routeID, Network: netip.MustParsePrefix("192.168.10.0/24"), - Peer: peers["peer-75"].Key, - PeerID: "peer-75", - Description: "Route to internal resource", Enabled: true, - PeerGroups: []string{devGroupID, opsGroupID}, - Groups: []string{devGroupID, opsGroupID}, - AccessControlGroups: []string{devGroupID}, - }, - routeHA1ID: { - ID: routeHA1ID, Network: netip.MustParsePrefix("10.10.0.0/16"), - Peer: peers["peer-80"].Key, - PeerID: "peer-80", - Description: "HA Route 1", Enabled: true, Metric: 1000, - PeerGroups: []string{allGroupID}, - Groups: []string{allGroupID}, - AccessControlGroups: []string{allGroupID}, - }, - routeHA2ID: { - ID: routeHA2ID, Network: netip.MustParsePrefix("10.10.0.0/16"), - Peer: peers["peer-90"].Key, - PeerID: "peer-90", - Description: "HA Route 2", Enabled: true, Metric: 900, - PeerGroups: []string{devGroupID, opsGroupID}, - Groups: []string{devGroupID, opsGroupID}, - AccessControlGroups: []string{allGroupID}, - }, - } - - users := map[string]*types.User{ - userAdminID: {Id: userAdminID, Role: types.UserRoleAdmin, IsServiceUser: false, AccountID: testAccountID, AutoGroups: []string{allGroupID}}, - userDevID: {Id: userDevID, Role: types.UserRoleUser, IsServiceUser: false, AccountID: testAccountID, AutoGroups: []string{sshUsersGroupID, devGroupID}}, - userOpsID: {Id: userOpsID, Role: types.UserRoleUser, IsServiceUser: false, AccountID: testAccountID, AutoGroups: []string{sshUsersGroupID, opsGroupID}}, - } - - account := &types.Account{ - Id: testAccountID, Peers: peers, Groups: groups, Policies: policies, Routes: routes, - Users: users, - Network: &types.Network{ - Identifier: "net-golden-test", Net: net.IPNet{IP: net.IP{100, 64, 0, 0}, Mask: net.CIDRMask(16, 32)}, Serial: 1, - }, - DNSSettings: types.DNSSettings{DisabledManagementGroups: []string{opsGroupID}}, - NameServerGroups: map[string]*dns.NameServerGroup{ - nameserverGroupID: { - ID: nameserverGroupID, Name: "Main NS", Enabled: true, Groups: []string{devGroupID}, - NameServers: []dns.NameServer{{IP: netip.MustParseAddr("8.8.8.8"), NSType: dns.UDPNameServerType, Port: 53}}, - }, - }, - PostureChecks: []*posture.Checks{ - {ID: postureCheckID, Name: "Check version", Checks: posture.ChecksDefinition{ - NBVersionCheck: &posture.NBVersionCheck{MinVersion: "0.26.0"}, - }}, - }, - NetworkResources: []*resourceTypes.NetworkResource{ - {ID: networkResourceID, NetworkID: networkID, AccountID: testAccountID, Enabled: true, Address: "db.netbird.cloud"}, - }, - Networks: []*networkTypes.Network{{ID: networkID, Name: "DB Network", AccountID: testAccountID}}, - NetworkRouters: []*routerTypes.NetworkRouter{ - {ID: networkRouterID, NetworkID: networkID, Peer: routingPeerID, Enabled: true, AccountID: testAccountID}, - }, - Settings: &types.Settings{PeerLoginExpirationEnabled: true, PeerLoginExpiration: 1 * time.Hour}, - } - - for _, p := range account.Policies { - p.AccountID = account.Id - } - for _, r := range account.Routes { - r.AccountID = account.Id - } - - return account -} - -func TestGetPeerNetworkMap_Golden_New_WithOnPeerAddedRouter_Batched(t *testing.T) { - account := createTestAccountWithEntities() - - ctx := context.Background() - validatedPeersMap := make(map[string]struct{}) - for i := range numPeers { - peerID := fmt.Sprintf("peer-%d", i) - if peerID == offlinePeerID { - continue - } - validatedPeersMap[peerID] = struct{}{} - } - - builder := types.NewNetworkMapBuilder(account, validatedPeersMap) - - newRouterID := "peer-new-router-102" - newRouterIP := net.IP{100, 64, 1, 2} - newRouter := &nbpeer.Peer{ - ID: newRouterID, - IP: newRouterIP, - Key: fmt.Sprintf("key-%s", newRouterID), - DNSLabel: "newrouter102", - Status: &nbpeer.PeerStatus{Connected: true, LastSeen: time.Now()}, - UserID: "user-admin", - Meta: nbpeer.PeerSystemMeta{WtVersion: "0.26.0", GoOS: "linux"}, - LastLogin: func() *time.Time { t := time.Now(); return &t }(), - } - - account.Peers[newRouterID] = newRouter - - if opsGroup, exists := account.Groups[opsGroupID]; exists { - opsGroup.Peers = append(opsGroup.Peers, newRouterID) - } - if allGroup, exists := account.Groups[allGroupID]; exists { - allGroup.Peers = append(allGroup.Peers, newRouterID) - } - - newRoute := &route.Route{ - ID: route.ID("route-new-router"), - Network: netip.MustParsePrefix("172.16.0.0/24"), - Peer: newRouter.Key, - PeerID: newRouterID, - Description: "Route from new router", - Enabled: true, - PeerGroups: []string{opsGroupID}, - Groups: []string{devGroupID, opsGroupID}, - AccessControlGroups: []string{devGroupID}, - AccountID: account.Id, - } - account.Routes[newRoute.ID] = newRoute - - validatedPeersMap[newRouterID] = struct{}{} - - if account.Network != nil { - account.Network.Serial++ - } - - builder.EnqueuePeersForIncrementalAdd(account, newRouterID) - - time.Sleep(100 * time.Millisecond) - - networkMap := builder.GetPeerNetworkMap(ctx, testingPeerID, dns.CustomZone{}, nil, validatedPeersMap, nil) - - normalizeAndSortNetworkMap(networkMap) - - jsonData, err := json.MarshalIndent(networkMap, "", " ") - require.NoError(t, err, "error marshaling network map to JSON") - - goldenFilePath := filepath.Join("testdata", "networkmap_golden_new_with_onpeeradded_router.json") - - t.Log("Update golden file with OnPeerAdded router...") - err = os.MkdirAll(filepath.Dir(goldenFilePath), 0755) - require.NoError(t, err) - err = os.WriteFile(goldenFilePath, jsonData, 0644) - require.NoError(t, err) - - expectedJSON, err := os.ReadFile(goldenFilePath) - require.NoError(t, err, "error reading golden file") - - require.JSONEq(t, string(expectedJSON), string(jsonData), "network map from NEW builder with OnPeerAdded router does not match golden file") -} diff --git a/management/server/types/networkmapbuilder.go b/management/server/types/networkmapbuilder.go deleted file mode 100644 index 6448b8403..000000000 --- a/management/server/types/networkmapbuilder.go +++ /dev/null @@ -1,2317 +0,0 @@ -package types - -import ( - "context" - "fmt" - "slices" - "strconv" - "strings" - "sync" - "time" - - log "github.com/sirupsen/logrus" - "golang.org/x/exp/maps" - - "github.com/netbirdio/netbird/client/ssh/auth" - nbdns "github.com/netbirdio/netbird/dns" - "github.com/netbirdio/netbird/management/internals/modules/zones" - resourceTypes "github.com/netbirdio/netbird/management/server/networks/resources/types" - routerTypes "github.com/netbirdio/netbird/management/server/networks/routers/types" - nbpeer "github.com/netbirdio/netbird/management/server/peer" - "github.com/netbirdio/netbird/management/server/telemetry" - "github.com/netbirdio/netbird/route" -) - -const ( - allPeers = "0.0.0.0" - allWildcard = "0.0.0.0/0" - v6AllWildcard = "::/0" - fw = "fw:" - rfw = "route-fw:" - - szAddPeerBatch = 10 - maxPeerAddRetries = 20 -) - -type NetworkMapCache struct { - globalRoutes map[route.ID]*route.Route - globalRules map[string]*FirewallRule //ruleId - globalRouteRules map[string]*RouteFirewallRule //ruleId - globalPeers map[string]*nbpeer.Peer - - groupToPeers map[string][]string - peerToGroups map[string][]string - policyToRules map[string][]*PolicyRule //policyId - groupToPolicies map[string][]*Policy - groupToRoutes map[string][]*route.Route - peerToRoutes map[string][]*route.Route - - peerACLs map[string]*PeerACLView - peerRoutes map[string]*PeerRoutesView - peerDNS map[string]*nbdns.Config - peerSSH map[string]*PeerSSHView - - groupIDToUserIDs map[string][]string - allowedUserIDs map[string]struct{} - - resourceRouters map[string]map[string]*routerTypes.NetworkRouter - resourcePolicies map[string][]*Policy - - globalResources map[string]*resourceTypes.NetworkResource // resourceId - - acgToRoutes map[string]map[route.ID]*RouteOwnerInfo // routeID -> owner info - noACGRoutes map[route.ID]*RouteOwnerInfo - - mu sync.RWMutex -} - -type RouteOwnerInfo struct { - PeerID string - RouteID route.ID -} - -type PeerACLView struct { - ConnectedPeerIDs []string - FirewallRuleIDs []string -} - -type PeerRoutesView struct { - OwnRouteIDs []route.ID - NetworkResourceIDs []route.ID - InheritedRouteIDs []route.ID - RouteFirewallRuleIDs []string -} - -type PeerSSHView struct { - EnableSSH bool - AuthorizedUsers map[string]map[string]struct{} -} - -type NetworkMapBuilder struct { - account *Account - cache *NetworkMapCache - validatedPeers map[string]struct{} - - apb addPeerBatch -} - -type addPeerBatch struct { - mu sync.Mutex - sg *sync.Cond - ids []string - la *Account - retryCount map[string]int -} - -func NewNetworkMapBuilder(account *Account, validatedPeers map[string]struct{}) *NetworkMapBuilder { - builder := &NetworkMapBuilder{ - cache: &NetworkMapCache{ - globalRoutes: make(map[route.ID]*route.Route), - globalRules: make(map[string]*FirewallRule), - globalRouteRules: make(map[string]*RouteFirewallRule), - globalPeers: make(map[string]*nbpeer.Peer), - groupToPeers: make(map[string][]string), - peerToGroups: make(map[string][]string), - policyToRules: make(map[string][]*PolicyRule), - groupToPolicies: make(map[string][]*Policy), - groupToRoutes: make(map[string][]*route.Route), - peerToRoutes: make(map[string][]*route.Route), - peerACLs: make(map[string]*PeerACLView), - peerRoutes: make(map[string]*PeerRoutesView), - peerDNS: make(map[string]*nbdns.Config), - peerSSH: make(map[string]*PeerSSHView), - groupIDToUserIDs: make(map[string][]string), - allowedUserIDs: make(map[string]struct{}), - globalResources: make(map[string]*resourceTypes.NetworkResource), - acgToRoutes: make(map[string]map[route.ID]*RouteOwnerInfo), - noACGRoutes: make(map[route.ID]*RouteOwnerInfo), - }, - validatedPeers: make(map[string]struct{}), - } - builder.apb.sg = sync.NewCond(&builder.apb.mu) - builder.apb.ids = make([]string, 0, szAddPeerBatch) - builder.apb.la = account - builder.apb.retryCount = make(map[string]int) - - maps.Copy(builder.validatedPeers, validatedPeers) - - builder.initialBuild(account) - - go builder.incAddPeerLoop() - return builder -} - -func (b *NetworkMapBuilder) initialBuild(account *Account) { - b.cache.mu.Lock() - defer b.cache.mu.Unlock() - - b.account = account - - start := time.Now() - - b.buildGlobalIndexes(account) - - resourceRouters := account.GetResourceRoutersMap() - resourcePolicies := account.GetResourcePoliciesMap() - b.cache.resourceRouters = resourceRouters - b.cache.resourcePolicies = resourcePolicies - - for peerID := range account.Peers { - b.buildPeerACLView(account, peerID) - b.buildPeerRoutesView(account, peerID) - b.buildPeerDNSView(account, peerID) - } - - log.Debugf("NetworkMapBuilder: Initial build completed in %v for account %s", time.Since(start), account.Id) -} - -func (b *NetworkMapBuilder) buildGlobalIndexes(account *Account) { - clear(b.cache.globalPeers) - clear(b.cache.groupToPeers) - clear(b.cache.peerToGroups) - clear(b.cache.policyToRules) - clear(b.cache.groupToPolicies) - clear(b.cache.globalRoutes) - clear(b.cache.globalRules) - clear(b.cache.globalRouteRules) - clear(b.cache.globalResources) - clear(b.cache.groupToRoutes) - clear(b.cache.peerToRoutes) - clear(b.cache.acgToRoutes) - clear(b.cache.noACGRoutes) - clear(b.cache.groupIDToUserIDs) - clear(b.cache.allowedUserIDs) - clear(b.cache.peerSSH) - - maps.Copy(b.cache.globalPeers, account.Peers) - - b.cache.groupIDToUserIDs = account.GetActiveGroupUsers() - b.cache.allowedUserIDs = b.buildAllowedUserIDs(account) - - for groupID, group := range account.Groups { - peersCopy := make([]string, len(group.Peers)) - copy(peersCopy, group.Peers) - b.cache.groupToPeers[groupID] = peersCopy - - for _, peerID := range group.Peers { - b.cache.peerToGroups[peerID] = append(b.cache.peerToGroups[peerID], groupID) - } - } - - for _, policy := range account.Policies { - if !policy.Enabled { - continue - } - - b.cache.policyToRules[policy.ID] = policy.Rules - - affectedGroups := make(map[string]struct{}) - for _, rule := range policy.Rules { - if !rule.Enabled { - continue - } - - for _, groupID := range rule.Sources { - affectedGroups[groupID] = struct{}{} - } - for _, groupID := range rule.Destinations { - affectedGroups[groupID] = struct{}{} - } - if rule.SourceResource.Type == ResourceTypePeer && rule.SourceResource.ID != "" { - groupId := rule.SourceResource.ID - affectedGroups[groupId] = struct{}{} - b.cache.peerToGroups[rule.SourceResource.ID] = append(b.cache.peerToGroups[rule.SourceResource.ID], groupId) - } - if rule.DestinationResource.Type == ResourceTypePeer && rule.DestinationResource.ID != "" { - groupId := rule.DestinationResource.ID - affectedGroups[groupId] = struct{}{} - b.cache.peerToGroups[rule.DestinationResource.ID] = append(b.cache.peerToGroups[rule.DestinationResource.ID], groupId) - } - } - - for groupID := range affectedGroups { - b.cache.groupToPolicies[groupID] = append(b.cache.groupToPolicies[groupID], policy) - } - } - - for _, resource := range account.NetworkResources { - if !resource.Enabled { - continue - } - b.cache.globalResources[resource.ID] = resource - } - - for _, r := range account.Routes { - if !r.Enabled { - continue - } - for _, groupID := range r.PeerGroups { - b.cache.groupToRoutes[groupID] = append(b.cache.groupToRoutes[groupID], r) - } - if r.Peer != "" { - if peer, ok := b.cache.globalPeers[r.Peer]; ok { - b.cache.peerToRoutes[peer.ID] = append(b.cache.peerToRoutes[peer.ID], r) - } - } - } -} - -func (b *NetworkMapBuilder) buildPeerACLView(account *Account, peerID string) { - peer := account.GetPeer(peerID) - if peer == nil { - return - } - - allPotentialPeers, firewallRules, authorizedUsers, sshEnabled := b.getPeerConnectionResources(account, peer, b.validatedPeers) - - isRouter, networkResourcesRoutes, sourcePeers := b.getNetworkResourcesForPeer(account, peer) - - var emptyExpiredPeers []*nbpeer.Peer - finalAllPeers := b.addNetworksRoutingPeers( - networkResourcesRoutes, - peer, - allPotentialPeers, - emptyExpiredPeers, - isRouter, - sourcePeers, - ) - - view := &PeerACLView{ - ConnectedPeerIDs: make([]string, 0, len(finalAllPeers)), - FirewallRuleIDs: make([]string, 0, len(firewallRules)), - } - - for _, p := range finalAllPeers { - view.ConnectedPeerIDs = append(view.ConnectedPeerIDs, p.ID) - } - - for _, rule := range firewallRules { - ruleID := b.generateFirewallRuleID(rule) - view.FirewallRuleIDs = append(view.FirewallRuleIDs, ruleID) - b.cache.globalRules[ruleID] = rule - } - - b.cache.peerACLs[peerID] = view - b.cache.peerSSH[peerID] = &PeerSSHView{ - EnableSSH: sshEnabled, - AuthorizedUsers: authorizedUsers, - } -} - -func (b *NetworkMapBuilder) getPeerConnectionResources(account *Account, peer *nbpeer.Peer, - validatedPeersMap map[string]struct{}, -) ([]*nbpeer.Peer, []*FirewallRule, map[string]map[string]struct{}, bool) { - peerID := peer.ID - ctx := context.Background() - - peerGroups := b.cache.peerToGroups[peerID] - peerGroupsMap := make(map[string]struct{}, len(peerGroups)) - for _, groupID := range peerGroups { - peerGroupsMap[groupID] = struct{}{} - } - - rulesExists := make(map[string]struct{}) - peersExists := make(map[string]struct{}) - fwRules := make([]*FirewallRule, 0) - peers := make([]*nbpeer.Peer, 0) - - authorizedUsers := make(map[string]map[string]struct{}) - sshEnabled := false - - for _, group := range peerGroups { - policies := b.cache.groupToPolicies[group] - for _, policy := range policies { - if isValid := account.validatePostureChecksOnPeer(ctx, policy.SourcePostureChecks, peerID); !isValid { - continue - } - rules := b.cache.policyToRules[policy.ID] - for _, rule := range rules { - var sourcePeers, destinationPeers []*nbpeer.Peer - var peerInSources, peerInDestinations bool - - if rule.SourceResource.Type == ResourceTypePeer && rule.SourceResource.ID != "" { - peerInSources = rule.SourceResource.ID == peerID - } else { - peerInSources = b.isPeerInGroupscached(rule.Sources, peerGroupsMap) - } - - if rule.DestinationResource.Type == ResourceTypePeer && rule.DestinationResource.ID != "" { - peerInDestinations = rule.DestinationResource.ID == peerID - } else { - peerInDestinations = b.isPeerInGroupscached(rule.Destinations, peerGroupsMap) - } - - if !peerInSources && !peerInDestinations { - continue - } - - if rule.SourceResource.Type == ResourceTypePeer && rule.SourceResource.ID != "" { - peer := account.GetPeer(rule.SourceResource.ID) - if peer != nil { - sourcePeers = []*nbpeer.Peer{peer} - } - } else { - sourcePeers = b.getPeersFromGroupscached(account, rule.Sources, peerID, policy.SourcePostureChecks, validatedPeersMap) - } - - if rule.DestinationResource.Type == ResourceTypePeer && rule.DestinationResource.ID != "" { - peer := account.GetPeer(rule.DestinationResource.ID) - if peer != nil { - destinationPeers = []*nbpeer.Peer{peer} - } - } else { - destinationPeers = b.getPeersFromGroupscached(account, rule.Destinations, peerID, nil, validatedPeersMap) - } - - if rule.Bidirectional { - if peerInSources { - b.generateResourcescached( - rule, destinationPeers, FirewallRuleDirectionIN, - peer, &peers, &fwRules, peersExists, rulesExists, - ) - } - if peerInDestinations { - b.generateResourcescached( - rule, sourcePeers, FirewallRuleDirectionOUT, - peer, &peers, &fwRules, peersExists, rulesExists, - ) - } - } - - if peerInSources { - b.generateResourcescached( - rule, destinationPeers, FirewallRuleDirectionOUT, - peer, &peers, &fwRules, peersExists, rulesExists, - ) - } - - if peerInDestinations { - b.generateResourcescached( - rule, sourcePeers, FirewallRuleDirectionIN, - peer, &peers, &fwRules, peersExists, rulesExists, - ) - - if rule.Protocol == PolicyRuleProtocolNetbirdSSH { - sshEnabled = true - switch { - case len(rule.AuthorizedGroups) > 0: - for groupID, localUsers := range rule.AuthorizedGroups { - userIDs, ok := b.cache.groupIDToUserIDs[groupID] - if !ok { - continue - } - - if len(localUsers) == 0 { - localUsers = []string{auth.Wildcard} - } - - for _, localUser := range localUsers { - if authorizedUsers[localUser] == nil { - authorizedUsers[localUser] = make(map[string]struct{}) - } - for _, userID := range userIDs { - authorizedUsers[localUser][userID] = struct{}{} - } - } - } - case rule.AuthorizedUser != "": - if authorizedUsers[auth.Wildcard] == nil { - authorizedUsers[auth.Wildcard] = make(map[string]struct{}) - } - authorizedUsers[auth.Wildcard][rule.AuthorizedUser] = struct{}{} - default: - authorizedUsers[auth.Wildcard] = maps.Clone(b.cache.allowedUserIDs) - } - } else if policyRuleImpliesLegacySSH(rule) && peer.SSHEnabled { - sshEnabled = true - authorizedUsers[auth.Wildcard] = maps.Clone(b.cache.allowedUserIDs) - } - } - } - } - } - - return peers, fwRules, authorizedUsers, sshEnabled -} - -func (b *NetworkMapBuilder) isPeerInGroupscached(groupIDs []string, peerGroupsMap map[string]struct{}) bool { - for _, groupID := range groupIDs { - if _, exists := peerGroupsMap[groupID]; exists { - return true - } - } - return false -} - -func (b *NetworkMapBuilder) getPeersFromGroupscached(account *Account, groupIDs []string, - excludePeerID string, postureChecksIDs []string, validatedPeersMap map[string]struct{}, -) []*nbpeer.Peer { - ctx := context.Background() - uniquePeers := make(map[string]*nbpeer.Peer) - - for _, groupID := range groupIDs { - peerIDs := b.cache.groupToPeers[groupID] - for _, peerID := range peerIDs { - if peerID == excludePeerID { - continue - } - - if _, ok := validatedPeersMap[peerID]; !ok { - continue - } - - peer := b.cache.globalPeers[peerID] - if peer == nil { - continue - } - - if len(postureChecksIDs) > 0 { - if !account.validatePostureChecksOnPeer(ctx, postureChecksIDs, peerID) { - continue - } - } - - uniquePeers[peerID] = peer - } - } - - result := make([]*nbpeer.Peer, 0, len(uniquePeers)) - for _, peer := range uniquePeers { - result = append(result, peer) - } - - return result -} - -func (b *NetworkMapBuilder) generateResourcescached( - rule *PolicyRule, groupPeers []*nbpeer.Peer, direction int, targetPeer *nbpeer.Peer, - peers *[]*nbpeer.Peer, rules *[]*FirewallRule, peersExists map[string]struct{}, rulesExists map[string]struct{}, -) { - for _, peer := range groupPeers { - if peer == nil { - continue - } - if _, ok := peersExists[peer.ID]; !ok { - *peers = append(*peers, peer) - peersExists[peer.ID] = struct{}{} - } - - fr := FirewallRule{ - PolicyID: rule.ID, - PeerIP: peer.IP.String(), - Direction: direction, - Action: string(rule.Action), - Protocol: firewallRuleProtocol(rule.Protocol), - } - - var s strings.Builder - s.WriteString(rule.ID) - s.WriteString(fr.PeerIP) - s.WriteString(strconv.Itoa(direction)) - s.WriteString(fr.Protocol) - s.WriteString(fr.Action) - s.WriteString(strings.Join(rule.Ports, ",")) - - ruleID := s.String() - - if _, ok := rulesExists[ruleID]; ok { - continue - } - rulesExists[ruleID] = struct{}{} - - if len(rule.Ports) == 0 && len(rule.PortRanges) == 0 { - *rules = append(*rules, &fr) - continue - } - - *rules = append(*rules, expandPortsAndRanges(fr, rule, targetPeer)...) - } -} - -func (b *NetworkMapBuilder) getNetworkResourcesForPeer(account *Account, peer *nbpeer.Peer) (bool, []*route.Route, map[string]struct{}) { - ctx := context.Background() - peerID := peer.ID - - var isRoutingPeer bool - var routes []*route.Route - allSourcePeers := make(map[string]struct{}) - - peerGroups := b.cache.peerToGroups[peerID] - peerGroupsMap := make(map[string]struct{}, len(peerGroups)) - for _, groupID := range peerGroups { - peerGroupsMap[groupID] = struct{}{} - } - - for _, resource := range b.cache.globalResources { - - networkRoutingPeers := b.cache.resourceRouters[resource.NetworkID] - resourcePolicies := b.cache.resourcePolicies[resource.ID] - if len(resourcePolicies) == 0 { - continue - } - - isRouterForThisResource := false - - if networkRoutingPeers != nil { - if router, ok := networkRoutingPeers[peerID]; ok && router.Enabled { - isRoutingPeer = true - isRouterForThisResource = true - if rt := b.createNetworkResourceRoutes(resource, peerID, router, resourcePolicies); rt != nil { - routes = append(routes, rt) - } - } - } - - hasAccessAsClient := false - if !isRouterForThisResource { - for _, policy := range resourcePolicies { - if b.isPeerInGroupscached(policy.SourceGroups(), peerGroupsMap) { - if account.validatePostureChecksOnPeer(ctx, policy.SourcePostureChecks, peerID) { - hasAccessAsClient = true - break - } - } - } - } - - if hasAccessAsClient && networkRoutingPeers != nil { - for routerPeerID, router := range networkRoutingPeers { - if router.Enabled { - if rt := b.createNetworkResourceRoutes(resource, routerPeerID, router, resourcePolicies); rt != nil { - routes = append(routes, rt) - } - } - } - } - - if isRouterForThisResource { - for _, policy := range resourcePolicies { - var peersWithAccess []*nbpeer.Peer - if policy.Rules[0].SourceResource.Type == ResourceTypePeer && policy.Rules[0].SourceResource.ID != "" { - peersWithAccess = []*nbpeer.Peer{peer} - } else { - peersWithAccess = b.getPeersFromGroupscached(account, policy.SourceGroups(), "", policy.SourcePostureChecks, b.validatedPeers) - } - for _, p := range peersWithAccess { - allSourcePeers[p.ID] = struct{}{} - } - } - } - } - - return isRoutingPeer, routes, allSourcePeers -} - -func (b *NetworkMapBuilder) createNetworkResourceRoutes( - resource *resourceTypes.NetworkResource, routerPeerID string, - router *routerTypes.NetworkRouter, resourcePolicies []*Policy, -) *route.Route { - if len(resourcePolicies) > 0 { - peer := b.cache.globalPeers[routerPeerID] - if peer != nil { - return resource.ToRoute(peer, router) - } - } - return nil -} - -func (b *NetworkMapBuilder) addNetworksRoutingPeers( - networkResourcesRoutes []*route.Route, peer *nbpeer.Peer, peersToConnect []*nbpeer.Peer, - expiredPeers []*nbpeer.Peer, isRouter bool, sourcePeers map[string]struct{}, -) []*nbpeer.Peer { - - networkRoutesPeers := make(map[string]struct{}, len(networkResourcesRoutes)) - for _, r := range networkResourcesRoutes { - networkRoutesPeers[r.PeerID] = struct{}{} - } - - delete(sourcePeers, peer.ID) - delete(networkRoutesPeers, peer.ID) - - for _, existingPeer := range peersToConnect { - delete(sourcePeers, existingPeer.ID) - delete(networkRoutesPeers, existingPeer.ID) - } - for _, expPeer := range expiredPeers { - delete(sourcePeers, expPeer.ID) - delete(networkRoutesPeers, expPeer.ID) - } - - missingPeers := make(map[string]struct{}, len(sourcePeers)+len(networkRoutesPeers)) - if isRouter { - for p := range sourcePeers { - missingPeers[p] = struct{}{} - } - } - for p := range networkRoutesPeers { - missingPeers[p] = struct{}{} - } - - for p := range missingPeers { - if missingPeer := b.cache.globalPeers[p]; missingPeer != nil { - peersToConnect = append(peersToConnect, missingPeer) - } - } - - return peersToConnect -} - -func (b *NetworkMapBuilder) buildPeerRoutesView(account *Account, peerID string) { - ctx := context.Background() - peer := account.GetPeer(peerID) - if peer == nil { - return - } - resourcePolicies := b.cache.resourcePolicies - - view := &PeerRoutesView{ - OwnRouteIDs: make([]route.ID, 0), - NetworkResourceIDs: make([]route.ID, 0), - RouteFirewallRuleIDs: make([]string, 0), - } - - enabledRoutes, disabledRoutes := b.getRoutingPeerRoutes(peerID) - for _, rt := range enabledRoutes { - if rt.PeerID != "" && rt.PeerID != peerID { - if b.cache.globalPeers[rt.PeerID] == nil { - continue - } - } - - view.OwnRouteIDs = append(view.OwnRouteIDs, rt.ID) - b.cache.globalRoutes[rt.ID] = rt - } - - aclView := b.cache.peerACLs[peerID] - if aclView != nil { - peerRoutesMembership := make(LookupMap) - for _, r := range append(enabledRoutes, disabledRoutes...) { - peerRoutesMembership[string(r.GetHAUniqueID())] = struct{}{} - } - - peerGroups := b.cache.peerToGroups[peerID] - peerGroupsMap := make(LookupMap) - for _, groupID := range peerGroups { - peerGroupsMap[groupID] = struct{}{} - } - - for _, aclPeerID := range aclView.ConnectedPeerIDs { - if aclPeerID == peerID { - continue - } - activeRoutes, _ := b.getRoutingPeerRoutes(aclPeerID) - groupFilteredRoutes := account.filterRoutesByGroups(activeRoutes, peerGroupsMap) - haFilteredRoutes := account.filterRoutesFromPeersOfSameHAGroup(groupFilteredRoutes, peerRoutesMembership) - - for _, inheritedRoute := range haFilteredRoutes { - view.InheritedRouteIDs = append(view.InheritedRouteIDs, inheritedRoute.ID) - b.cache.globalRoutes[inheritedRoute.ID] = inheritedRoute - } - } - } - - _, networkResourcesRoutes, _ := b.getNetworkResourcesForPeer(account, peer) - - for _, rt := range networkResourcesRoutes { - view.NetworkResourceIDs = append(view.NetworkResourceIDs, rt.ID) - b.cache.globalRoutes[rt.ID] = rt - } - - allRoutes := slices.Concat(enabledRoutes, networkResourcesRoutes) - b.updateACGIndexForPeer(peerID, allRoutes) - - routeFirewallRules := b.getPeerRoutesFirewallRules(account, peerID, b.validatedPeers) - for _, rule := range routeFirewallRules { - ruleID := b.generateRouteFirewallRuleID(rule) - view.RouteFirewallRuleIDs = append(view.RouteFirewallRuleIDs, ruleID) - b.cache.globalRouteRules[ruleID] = rule - } - - if len(networkResourcesRoutes) > 0 { - networkResourceFirewallRules := account.GetPeerNetworkResourceFirewallRules(ctx, peer, b.validatedPeers, networkResourcesRoutes, resourcePolicies) - for _, rule := range networkResourceFirewallRules { - ruleID := b.generateRouteFirewallRuleID(rule) - view.RouteFirewallRuleIDs = append(view.RouteFirewallRuleIDs, ruleID) - b.cache.globalRouteRules[ruleID] = rule - } - } - - b.cache.peerRoutes[peerID] = view -} - -func (b *NetworkMapBuilder) updateACGIndexForPeer(peerID string, routes []*route.Route) { - for acg, routeMap := range b.cache.acgToRoutes { - for routeID, info := range routeMap { - if info.PeerID == peerID { - delete(routeMap, routeID) - } - } - if len(routeMap) == 0 { - delete(b.cache.acgToRoutes, acg) - } - } - - for routeID, info := range b.cache.noACGRoutes { - if info.PeerID == peerID { - delete(b.cache.noACGRoutes, routeID) - } - } - - for _, rt := range routes { - if !rt.Enabled { - continue - } - - if len(rt.AccessControlGroups) == 0 { - b.cache.noACGRoutes[rt.ID] = &RouteOwnerInfo{ - PeerID: peerID, - RouteID: rt.ID, - } - } else { - for _, acg := range rt.AccessControlGroups { - if b.cache.acgToRoutes[acg] == nil { - b.cache.acgToRoutes[acg] = make(map[route.ID]*RouteOwnerInfo) - } - - b.cache.acgToRoutes[acg][rt.ID] = &RouteOwnerInfo{ - PeerID: peerID, - RouteID: rt.ID, - } - } - } - } -} - -func (b *NetworkMapBuilder) getRoutingPeerRoutes(peerID string) (enabledRoutes []*route.Route, disabledRoutes []*route.Route) { - peer := b.cache.globalPeers[peerID] - if peer == nil { - return enabledRoutes, disabledRoutes - } - - seenRoute := make(map[route.ID]struct{}) - - takeRoute := func(r *route.Route, id string) { - if _, ok := seenRoute[r.ID]; ok { - return - } - seenRoute[r.ID] = struct{}{} - - if r.Enabled { - // maybe here is some mess - here we store peer key (see comment below) - r.Peer = peer.Key - enabledRoutes = append(enabledRoutes, r) - return - } - disabledRoutes = append(disabledRoutes, r) - } - - peerGroups := b.cache.peerToGroups[peerID] - for _, groupID := range peerGroups { - groupRoutes := b.cache.groupToRoutes[groupID] - for _, r := range groupRoutes { - newPeerRoute := r.Copy() - // and here we store peer ID - this logic is taken from original account.getRoutingPeerRoutes - newPeerRoute.Peer = peerID - newPeerRoute.PeerGroups = nil - newPeerRoute.ID = route.ID(string(r.ID) + ":" + peerID) - takeRoute(newPeerRoute, peerID) - } - } - for _, r := range b.cache.peerToRoutes[peerID] { - takeRoute(r.Copy(), peerID) - } - return enabledRoutes, disabledRoutes -} - -func (b *NetworkMapBuilder) getPeerRoutesFirewallRules(account *Account, peerID string, validatedPeersMap map[string]struct{}) []*RouteFirewallRule { - routesFirewallRules := make([]*RouteFirewallRule, 0) - - enabledRoutes, _ := b.getRoutingPeerRoutes(peerID) - for _, route := range enabledRoutes { - if len(route.AccessControlGroups) == 0 { - defaultPermit := getDefaultPermit(route) - routesFirewallRules = append(routesFirewallRules, defaultPermit...) - continue - } - - distributionPeers := b.getDistributionGroupsPeers(route) - - for _, accessGroup := range route.AccessControlGroups { - policies := b.getAllRoutePoliciesFromGroups([]string{accessGroup}) - - rules := b.getRouteFirewallRules(peerID, policies, route, validatedPeersMap, distributionPeers, account) - routesFirewallRules = append(routesFirewallRules, rules...) - } - } - - return routesFirewallRules -} - -func (b *NetworkMapBuilder) getDistributionGroupsPeers(route *route.Route) map[string]struct{} { - distPeers := make(map[string]struct{}) - for _, id := range route.Groups { - groupPeers := b.cache.groupToPeers[id] - if groupPeers == nil { - continue - } - - for _, pID := range groupPeers { - distPeers[pID] = struct{}{} - } - } - return distPeers -} - -func (b *NetworkMapBuilder) getAllRoutePoliciesFromGroups(accessControlGroups []string) []*Policy { - routePolicies := make(map[string]*Policy) - - for _, groupID := range accessControlGroups { - candidatePolicies := b.cache.groupToPolicies[groupID] - - for _, policy := range candidatePolicies { - if _, found := routePolicies[policy.ID]; found { - continue - } - policyRules := b.cache.policyToRules[policy.ID] - for _, rule := range policyRules { - if slices.Contains(rule.Destinations, groupID) { - routePolicies[policy.ID] = policy - break - } - } - } - } - - return maps.Values(routePolicies) -} - -func (b *NetworkMapBuilder) getRouteFirewallRules( - peerID string, policies []*Policy, route *route.Route, validatedPeersMap map[string]struct{}, - distributionPeers map[string]struct{}, account *Account, -) []*RouteFirewallRule { - ctx := context.Background() - var fwRules []*RouteFirewallRule - for _, policy := range policies { - if !policy.Enabled { - continue - } - - for _, rule := range policy.Rules { - if !rule.Enabled { - continue - } - - rulePeers := b.getRulePeers(rule, policy.SourcePostureChecks, peerID, distributionPeers, validatedPeersMap, account) - - rules := generateRouteFirewallRules(ctx, route, rule, rulePeers, FirewallRuleDirectionIN) - fwRules = append(fwRules, rules...) - } - } - return fwRules -} - -func (b *NetworkMapBuilder) getRulePeers( - rule *PolicyRule, postureChecks []string, peerID string, distributionPeers map[string]struct{}, - validatedPeersMap map[string]struct{}, account *Account, -) []*nbpeer.Peer { - distPeersWithPolicy := make(map[string]struct{}) - - for _, id := range rule.Sources { - groupPeers := b.cache.groupToPeers[id] - if groupPeers == nil { - continue - } - - for _, pID := range groupPeers { - if pID == peerID { - continue - } - _, distPeer := distributionPeers[pID] - _, valid := validatedPeersMap[pID] - - if distPeer && valid && account.validatePostureChecksOnPeer(context.Background(), postureChecks, pID) { - distPeersWithPolicy[pID] = struct{}{} - } - } - } - - if rule.SourceResource.Type == ResourceTypePeer && rule.SourceResource.ID != "" { - _, distPeer := distributionPeers[rule.SourceResource.ID] - _, valid := validatedPeersMap[rule.SourceResource.ID] - if distPeer && valid && account.validatePostureChecksOnPeer(context.Background(), postureChecks, rule.SourceResource.ID) { - distPeersWithPolicy[rule.SourceResource.ID] = struct{}{} - } - } - - distributionGroupPeers := make([]*nbpeer.Peer, 0, len(distPeersWithPolicy)) - for pID := range distPeersWithPolicy { - peer := b.cache.globalPeers[pID] - if peer == nil { - continue - } - distributionGroupPeers = append(distributionGroupPeers, peer) - } - return distributionGroupPeers -} - -func (b *NetworkMapBuilder) buildPeerDNSView(account *Account, peerID string) { - peerGroups := b.cache.peerToGroups[peerID] - checkGroups := make(map[string]struct{}, len(peerGroups)) - for _, groupID := range peerGroups { - checkGroups[groupID] = struct{}{} - } - - dnsManagementStatus := b.getPeerDNSManagementStatus(account, checkGroups) - dnsConfig := &nbdns.Config{ - ServiceEnable: dnsManagementStatus, - } - - if dnsManagementStatus { - dnsConfig.NameServerGroups = b.getPeerNSGroups(account, peerID, checkGroups) - } - - b.cache.peerDNS[peerID] = dnsConfig -} - -func (b *NetworkMapBuilder) getPeerDNSManagementStatus(account *Account, checkGroups map[string]struct{}) bool { - - enabled := true - for _, groupID := range account.DNSSettings.DisabledManagementGroups { - _, found := checkGroups[groupID] - if found { - enabled = false - break - } - } - return enabled -} - -func (b *NetworkMapBuilder) getPeerNSGroups(account *Account, peerID string, checkGroups map[string]struct{}) []*nbdns.NameServerGroup { - var peerNSGroups []*nbdns.NameServerGroup - - for _, nsGroup := range account.NameServerGroups { - if !nsGroup.Enabled { - continue - } - for _, gID := range nsGroup.Groups { - _, found := checkGroups[gID] - if found { - peer := b.cache.globalPeers[peerID] - if !peerIsNameserver(peer, nsGroup) { - peerNSGroups = append(peerNSGroups, nsGroup.Copy()) - break - } - } - } - } - - return peerNSGroups -} - -func (b *NetworkMapBuilder) buildAllowedUserIDs(account *Account) map[string]struct{} { - users := make(map[string]struct{}) - for _, nbUser := range account.Users { - if !nbUser.IsBlocked() && !nbUser.IsServiceUser { - users[nbUser.Id] = struct{}{} - } - } - return users -} - -func firewallRuleProtocol(protocol PolicyRuleProtocolType) string { - if protocol == PolicyRuleProtocolNetbirdSSH { - return string(PolicyRuleProtocolTCP) - } - return string(protocol) -} - -// lock should be held -func (b *NetworkMapBuilder) updateAccountLocked(account *Account) *Account { - if account.Network.CurrentSerial() > b.account.Network.CurrentSerial() { - b.account = account - } - return b.account -} - -func (b *NetworkMapBuilder) GetPeerNetworkMap( - ctx context.Context, peerID string, peersCustomZone nbdns.CustomZone, accountZones []*zones.Zone, - validatedPeers map[string]struct{}, metrics *telemetry.AccountManagerMetrics, -) *NetworkMap { - start := time.Now() - - b.cache.mu.RLock() - defer b.cache.mu.RUnlock() - - account := b.account - - peer := account.GetPeer(peerID) - if peer == nil { - return &NetworkMap{Network: account.Network.Copy()} - } - - aclView := b.cache.peerACLs[peerID] - routesView := b.cache.peerRoutes[peerID] - dnsConfig := b.cache.peerDNS[peerID] - sshView := b.cache.peerSSH[peerID] - - if aclView == nil || routesView == nil || dnsConfig == nil { - return &NetworkMap{Network: account.Network.Copy()} - } - - nm := b.assembleNetworkMap(ctx, account, peer, aclView, routesView, dnsConfig, sshView, peersCustomZone, accountZones, validatedPeers) - - if metrics != nil { - objectCount := int64(len(nm.Peers) + len(nm.OfflinePeers) + len(nm.Routes) + len(nm.FirewallRules) + len(nm.RoutesFirewallRules)) - metrics.CountNetworkMapObjects(objectCount) - metrics.CountGetPeerNetworkMapDuration(time.Since(start)) - - if objectCount > 5000 { - log.WithContext(ctx).Tracef("account: %s has a total resource count of %d objects from cache", - account.Id, objectCount) - } - } - - return nm -} - -func (b *NetworkMapBuilder) assembleNetworkMap( - ctx context.Context, account *Account, peer *nbpeer.Peer, aclView *PeerACLView, routesView *PeerRoutesView, - dnsConfig *nbdns.Config, sshView *PeerSSHView, peersCustomZone nbdns.CustomZone, accountZones []*zones.Zone, validatedPeers map[string]struct{}, -) *NetworkMap { - - var peersToConnect []*nbpeer.Peer - var expiredPeers []*nbpeer.Peer - - for _, peerID := range aclView.ConnectedPeerIDs { - if _, ok := validatedPeers[peerID]; !ok { - continue - } - - peer := b.cache.globalPeers[peerID] - if peer == nil { - continue - } - - expired, _ := peer.LoginExpired(account.Settings.PeerLoginExpiration) - if account.Settings.PeerLoginExpirationEnabled && expired { - expiredPeers = append(expiredPeers, peer) - } else { - peersToConnect = append(peersToConnect, peer) - } - } - - var routes []*route.Route - allRouteIDs := slices.Concat(routesView.OwnRouteIDs, routesView.NetworkResourceIDs, routesView.InheritedRouteIDs) - - for _, routeID := range allRouteIDs { - if route := b.cache.globalRoutes[routeID]; route != nil { - routes = append(routes, route) - } - } - - var firewallRules []*FirewallRule - for _, ruleID := range aclView.FirewallRuleIDs { - if rule := b.cache.globalRules[ruleID]; rule != nil { - firewallRules = append(firewallRules, rule) - } else { - log.Debugf("NetworkMapBuilder: peer %s assembling network map has no fwrule %s in globalRules", peer.ID, ruleID) - } - } - - var routesFirewallRules []*RouteFirewallRule - for _, ruleID := range routesView.RouteFirewallRuleIDs { - if rule := b.cache.globalRouteRules[ruleID]; rule != nil { - routesFirewallRules = append(routesFirewallRules, rule) - } - } - - finalDNSConfig := *dnsConfig - if finalDNSConfig.ServiceEnable { - var zones []nbdns.CustomZone - - peerGroupsSlice := b.cache.peerToGroups[peer.ID] - peerGroups := make(LookupMap, len(peerGroupsSlice)) - for _, groupID := range peerGroupsSlice { - peerGroups[groupID] = struct{}{} - } - - if peersCustomZone.Domain != "" { - records := filterZoneRecordsForPeers(peer, peersCustomZone, peersToConnect, expiredPeers) - zones = append(zones, nbdns.CustomZone{ - Domain: peersCustomZone.Domain, - Records: records, - }) - } - - filteredAccountZones := filterPeerAppliedZones(ctx, accountZones, peerGroups) - zones = append(zones, filteredAccountZones...) - - finalDNSConfig.CustomZones = zones - } - - nm := &NetworkMap{ - Peers: peersToConnect, - Network: account.Network.Copy(), - Routes: routes, - DNSConfig: finalDNSConfig, - OfflinePeers: expiredPeers, - FirewallRules: firewallRules, - RoutesFirewallRules: routesFirewallRules, - } - - if sshView != nil { - nm.EnableSSH = sshView.EnableSSH - nm.AuthorizedUsers = sshView.AuthorizedUsers - } - - return nm -} - -func (b *NetworkMapBuilder) generateFirewallRuleID(rule *FirewallRule) string { - var s strings.Builder - s.WriteString(fw) - s.WriteString(rule.PolicyID) - s.WriteRune(':') - s.WriteString(rule.PeerIP) - s.WriteRune(':') - s.WriteString(strconv.Itoa(rule.Direction)) - s.WriteRune(':') - s.WriteString(rule.Protocol) - s.WriteRune(':') - s.WriteString(rule.Action) - s.WriteRune(':') - s.WriteString(rule.Port) - s.WriteRune(':') - s.WriteString(strconv.Itoa(int(rule.PortRange.Start))) - s.WriteRune('-') - s.WriteString(strconv.Itoa(int(rule.PortRange.End))) - return s.String() -} - -func (b *NetworkMapBuilder) generateRouteFirewallRuleID(rule *RouteFirewallRule) string { - var s strings.Builder - s.WriteString(rfw) - s.WriteString(string(rule.RouteID)) - s.WriteRune(':') - s.WriteString(rule.Destination) - s.WriteRune(':') - s.WriteString(rule.Action) - s.WriteRune(':') - s.WriteString(strings.Join(rule.SourceRanges, ",")) - s.WriteRune(':') - s.WriteString(rule.Protocol) - s.WriteRune(':') - s.WriteString(strconv.Itoa(int(rule.Port))) - return s.String() -} - -func (b *NetworkMapBuilder) isPeerInGroups(groupIDs []string, peerGroups []string) bool { - for _, groupID := range groupIDs { - if slices.Contains(peerGroups, groupID) { - return true - } - } - return false -} - -func (b *NetworkMapBuilder) isPeerRouter(account *Account, peerID string) bool { - for _, r := range account.Routes { - if !r.Enabled { - continue - } - - if r.PeerID == peerID { - return true - } - - if peer := b.cache.globalPeers[peerID]; peer != nil { - if r.Peer == peer.Key && r.PeerID == "" { - return true - } - } - } - - routers := account.GetResourceRoutersMap() - for _, networkRouters := range routers { - if router, exists := networkRouters[peerID]; exists && router.Enabled { - return true - } - } - - return false -} - -func (b *NetworkMapBuilder) incAddPeerLoop() { - for { - b.apb.mu.Lock() - if len(b.apb.ids) == 0 { - b.apb.sg.Wait() - } - b.addPeersIncrementally() - b.apb.mu.Unlock() - } -} - -// lock on b.apb level should be held -func (b *NetworkMapBuilder) addPeersIncrementally() { - peers := slices.Clone(b.apb.ids) - clear(b.apb.ids) - b.apb.ids = b.apb.ids[:0] - latestAcc := b.apb.la - b.apb.mu.Unlock() - - tt := time.Now() - b.cache.mu.Lock() - defer b.cache.mu.Unlock() - - account := b.updateAccountLocked(latestAcc) - - log.Debugf("NetworkMapBuilder: Starting incremental add of %d peers", len(peers)) - - allUpdates := make(map[string]*PeerUpdateDelta) - - for _, peerID := range peers { - peer := account.GetPeer(peerID) - if peer == nil { - b.apb.mu.Lock() - retries := b.apb.retryCount[peerID] - b.apb.mu.Unlock() - - if retries >= maxPeerAddRetries { - log.Errorf("NetworkMapBuilder: peer %s not found in account %s after %d retries, giving up", peerID, account.Id, retries) - b.apb.mu.Lock() - delete(b.apb.retryCount, peerID) - b.apb.mu.Unlock() - continue - } - - log.Warnf("NetworkMapBuilder: peer %s not found in account %s, retry %d/%d", peerID, account.Id, retries+1, maxPeerAddRetries) - b.apb.mu.Lock() - b.apb.retryCount[peerID] = retries + 1 - b.apb.mu.Unlock() - b.enqueuePeersForIncrementalAdd(latestAcc, peerID) - continue - } - - b.apb.mu.Lock() - delete(b.apb.retryCount, peerID) - b.apb.mu.Unlock() - - b.validatedPeers[peerID] = struct{}{} - b.cache.globalPeers[peerID] = peer - - peerGroups := b.updateIndexesForNewPeer(account, peerID) - b.buildPeerACLView(account, peerID) - b.buildPeerRoutesView(account, peerID) - b.buildPeerDNSView(account, peerID) - - peerDeltas := b.collectDeltasForNewPeer(account, peerID, peerGroups) - for affectedPeerID, delta := range peerDeltas { - if existing, ok := allUpdates[affectedPeerID]; ok { - existing.mergeFrom(delta) - continue - } - allUpdates[affectedPeerID] = delta - } - } - - for affectedPeerID, delta := range allUpdates { - b.applyDeltaToPeer(account, affectedPeerID, delta) - } - - log.Debugf("NetworkMapBuilder: Added %d peers to cache, affected %d peers, took %s", len(peers), len(allUpdates), time.Since(tt)) - - b.apb.mu.Lock() - if len(b.apb.ids) > 0 { - b.apb.sg.Signal() - } -} - -func (b *NetworkMapBuilder) enqueuePeersForIncrementalAdd(acc *Account, peerIDs ...string) { - b.apb.mu.Lock() - b.apb.ids = append(b.apb.ids, peerIDs...) - if b.apb.la != nil && acc.Network.CurrentSerial() > b.apb.la.Network.CurrentSerial() { - b.apb.la = acc - } - b.apb.sg.Signal() - b.apb.mu.Unlock() -} - -func (b *NetworkMapBuilder) EnqueuePeersForIncrementalAdd(acc *Account, peerIDs ...string) { - b.enqueuePeersForIncrementalAdd(acc, peerIDs...) -} - -type ViewDelta struct { - AddedPeerIDs []string - RemovedPeerIDs []string - AddedRuleIDs []string - RemovedRuleIDs []string -} - -func (b *NetworkMapBuilder) OnPeerAddedIncremental(acc *Account, peerID string) error { - tt := time.Now() - peer := acc.GetPeer(peerID) - if peer == nil { - return fmt.Errorf("NetworkMapBuilder: peer %s not found in account", peerID) - } - - b.cache.mu.Lock() - defer b.cache.mu.Unlock() - - account := b.updateAccountLocked(acc) - - log.Debugf("NetworkMapBuilder: Adding peer %s (IP: %s) to cache", peerID, peer.IP.String()) - - b.validatedPeers[peerID] = struct{}{} - - b.cache.globalPeers[peerID] = peer - - peerGroups := b.updateIndexesForNewPeer(account, peerID) - - b.buildPeerACLView(account, peerID) - b.buildPeerRoutesView(account, peerID) - b.buildPeerDNSView(account, peerID) - - log.Debugf("NetworkMapBuilder: Adding peer %s to cache, views took %s", peerID, time.Since(tt)) - - b.incrementalUpdateAffectedPeers(account, peerID, peerGroups) - - log.Debugf("NetworkMapBuilder: Added peer %s to cache, took %s", peerID, time.Since(tt)) - - return nil -} - -func (b *NetworkMapBuilder) updateIndexesForNewPeer(account *Account, peerID string) []string { - peerGroups := make([]string, 0) - - for groupID, group := range account.Groups { - if slices.Contains(group.Peers, peerID) { - if !slices.Contains(b.cache.groupToPeers[groupID], peerID) { - b.cache.groupToPeers[groupID] = append(b.cache.groupToPeers[groupID], peerID) - } - peerGroups = append(peerGroups, groupID) - } - } - - b.cache.peerToGroups[peerID] = peerGroups - - for _, r := range account.Routes { - if !r.Enabled || b.cache.globalRoutes[r.ID] != nil { - continue - } - for _, groupID := range r.PeerGroups { - if !slices.Contains(b.cache.groupToRoutes[groupID], r) { - b.cache.groupToRoutes[groupID] = append(b.cache.groupToRoutes[groupID], r) - } - } - if r.Peer != "" { - if peer, ok := b.cache.globalPeers[r.Peer]; ok { - if !slices.Contains(b.cache.peerToRoutes[peer.ID], r) { - b.cache.peerToRoutes[peer.ID] = append(b.cache.peerToRoutes[peer.ID], r) - } - } - } - b.cache.globalRoutes[r.ID] = r - } - - return peerGroups -} - -func (b *NetworkMapBuilder) incrementalUpdateAffectedPeers(account *Account, newPeerID string, peerGroups []string) { - updates := b.collectDeltasForNewPeer(account, newPeerID, peerGroups) - for affectedPeerID, delta := range updates { - b.applyDeltaToPeer(account, affectedPeerID, delta) - } -} - -func (b *NetworkMapBuilder) collectDeltasForNewPeer(account *Account, newPeerID string, peerGroups []string) map[string]*PeerUpdateDelta { - updates := b.calculateIncrementalUpdates(account, newPeerID, peerGroups) - - if b.isPeerRouter(account, newPeerID) { - affectedByRoutes := b.findPeersAffectedByNewRouter(account, newPeerID, peerGroups) - for affectedPeerID := range affectedByRoutes { - if affectedPeerID == newPeerID { - continue - } - if _, exists := updates[affectedPeerID]; !exists { - updates[affectedPeerID] = &PeerUpdateDelta{ - PeerID: affectedPeerID, - RebuildRoutesView: true, - } - } else { - updates[affectedPeerID].RebuildRoutesView = true - } - } - } - - return updates -} - -func (b *NetworkMapBuilder) findPeersAffectedByNewRouter(account *Account, newRouterID string, routerGroups []string) map[string]struct{} { - affected := make(map[string]struct{}) - enabledRoutes, _ := b.getRoutingPeerRoutes(newRouterID) - - for _, route := range enabledRoutes { - for _, distGroupID := range route.Groups { - if peers := b.cache.groupToPeers[distGroupID]; peers != nil { - for _, peerID := range peers { - if peerID != newRouterID { - affected[peerID] = struct{}{} - } - } - } - } - - for _, peerGroupID := range route.PeerGroups { - if peers := b.cache.groupToPeers[peerGroupID]; peers != nil { - for _, peerID := range peers { - if peerID != newRouterID { - affected[peerID] = struct{}{} - } - } - } - } - } - - for _, route := range account.Routes { - if !route.Enabled { - continue - } - - routerInPeerGroups := false - for _, peerGroupID := range route.PeerGroups { - if slices.Contains(routerGroups, peerGroupID) { - routerInPeerGroups = true - break - } - } - - if routerInPeerGroups { - for _, distGroupID := range route.Groups { - if peers := b.cache.groupToPeers[distGroupID]; peers != nil { - for _, peerID := range peers { - affected[peerID] = struct{}{} - } - } - } - } - } - - return affected -} - -func (b *NetworkMapBuilder) calculateIncrementalUpdates(account *Account, newPeerID string, peerGroups []string) map[string]*PeerUpdateDelta { - updates := make(map[string]*PeerUpdateDelta) - ctx := context.Background() - - groupAllLn := 0 - if allGroup, err := account.GetGroupAll(); err == nil { - groupAllLn = len(allGroup.Peers) - 1 - } - - newPeer := b.cache.globalPeers[newPeerID] - if newPeer == nil { - return updates - } - - for _, policy := range account.Policies { - if !policy.Enabled { - continue - } - - for _, rule := range policy.Rules { - if !rule.Enabled { - continue - } - var peerInSources, peerInDestinations bool - - if rule.SourceResource.Type == ResourceTypePeer && rule.SourceResource.ID == newPeerID { - peerInSources = true - } else { - peerInSources = b.isPeerInGroups(rule.Sources, peerGroups) - } - - if rule.DestinationResource.Type == ResourceTypePeer && rule.DestinationResource.ID == newPeerID { - peerInDestinations = true - } else { - peerInDestinations = b.isPeerInGroups(rule.Destinations, peerGroups) - } - - if peerInSources { - if len(rule.Destinations) > 0 { - b.addUpdateForPeersInGroups(updates, rule.Destinations, newPeerID, rule, FirewallRuleDirectionIN, groupAllLn) - } - if rule.DestinationResource.Type == ResourceTypePeer && rule.DestinationResource.ID != "" { - b.addUpdateForDirectPeerResource(updates, rule.DestinationResource.ID, newPeerID, rule, FirewallRuleDirectionIN) - } - } - - if peerInDestinations { - if len(rule.Sources) > 0 { - b.addUpdateForPeersInGroups(updates, rule.Sources, newPeerID, rule, FirewallRuleDirectionOUT, groupAllLn) - } - if rule.SourceResource.Type == ResourceTypePeer && rule.SourceResource.ID != "" { - b.addUpdateForDirectPeerResource(updates, rule.SourceResource.ID, newPeerID, rule, FirewallRuleDirectionOUT) - } - } - - if rule.Bidirectional { - if peerInSources { - if len(rule.Destinations) > 0 { - b.addUpdateForPeersInGroups(updates, rule.Destinations, newPeerID, rule, FirewallRuleDirectionOUT, groupAllLn) - } - if rule.DestinationResource.Type == ResourceTypePeer && rule.DestinationResource.ID != "" { - b.addUpdateForDirectPeerResource(updates, rule.DestinationResource.ID, newPeerID, rule, FirewallRuleDirectionOUT) - } - } - if peerInDestinations { - if len(rule.Sources) > 0 { - b.addUpdateForPeersInGroups(updates, rule.Sources, newPeerID, rule, FirewallRuleDirectionIN, groupAllLn) - } - if rule.SourceResource.Type == ResourceTypePeer && rule.SourceResource.ID != "" { - b.addUpdateForDirectPeerResource(updates, rule.SourceResource.ID, newPeerID, rule, FirewallRuleDirectionIN) - } - } - } - } - } - - b.calculateRouteFirewallUpdates(newPeerID, newPeer, peerGroups, updates) - - b.calculateNetworkResourceFirewallUpdates(ctx, account, newPeerID, newPeer, peerGroups, updates) - - b.calculateNewRouterNetworkResourceUpdates(ctx, account, newPeerID, updates) - - return updates -} - -func (b *NetworkMapBuilder) calculateNewRouterNetworkResourceUpdates( - ctx context.Context, account *Account, newPeerID string, - updates map[string]*PeerUpdateDelta, -) { - resourceRouters := b.cache.resourceRouters - - for networkID, routers := range resourceRouters { - router, isRouter := routers[newPeerID] - if !isRouter || !router.Enabled { - continue - } - - for _, resource := range b.cache.globalResources { - if resource.NetworkID != networkID { - continue - } - - policies := b.cache.resourcePolicies[resource.ID] - if len(policies) == 0 { - continue - } - - peersWithAccess := make(map[string]struct{}) - - for _, policy := range policies { - if !policy.Enabled { - continue - } - - sourceGroups := policy.SourceGroups() - for _, sourceGroup := range sourceGroups { - groupPeers := b.cache.groupToPeers[sourceGroup] - for _, peerID := range groupPeers { - if peerID == newPeerID { - continue - } - - if account.validatePostureChecksOnPeer(ctx, policy.SourcePostureChecks, peerID) { - peersWithAccess[peerID] = struct{}{} - } - } - } - } - - for peerID := range peersWithAccess { - delta := updates[peerID] - if delta == nil { - delta = &PeerUpdateDelta{ - PeerID: peerID, - } - updates[peerID] = delta - } - - if !slices.Contains(delta.AddConnectedPeers, newPeerID) { - delta.AddConnectedPeers = append(delta.AddConnectedPeers, newPeerID) - } - - delta.RebuildRoutesView = true - } - } - } -} - -func (b *NetworkMapBuilder) calculateRouteFirewallUpdates( - newPeerID string, newPeer *nbpeer.Peer, - peerGroups []string, updates map[string]*PeerUpdateDelta, -) { - processedPeerRoutes := make(map[string]map[route.ID]struct{}) - - for routeID, info := range b.cache.noACGRoutes { - if info.PeerID == newPeerID { - continue - } - - b.addRouteFirewallUpdate(updates, info.PeerID, string(routeID), newPeer.IP.String()) - - if processedPeerRoutes[info.PeerID] == nil { - processedPeerRoutes[info.PeerID] = make(map[route.ID]struct{}) - } - processedPeerRoutes[info.PeerID][routeID] = struct{}{} - } - - for _, acg := range peerGroups { - routeInfos := b.cache.acgToRoutes[acg] - if routeInfos == nil { - continue - } - - for routeID, info := range routeInfos { - if info.PeerID == newPeerID { - continue - } - - if processedRoutes, exists := processedPeerRoutes[info.PeerID]; exists { - if _, processed := processedRoutes[routeID]; processed { - continue - } - } - - b.addRouteFirewallUpdate(updates, info.PeerID, string(routeID), newPeer.IP.String()) - - if processedPeerRoutes[info.PeerID] == nil { - processedPeerRoutes[info.PeerID] = make(map[route.ID]struct{}) - } - processedPeerRoutes[info.PeerID][routeID] = struct{}{} - } - } -} - -func (b *NetworkMapBuilder) addRouteFirewallUpdate( - updates map[string]*PeerUpdateDelta, peerID string, - routeID string, sourceIP string, -) { - delta := updates[peerID] - if delta == nil { - delta = &PeerUpdateDelta{ - PeerID: peerID, - UpdateRouteFirewallRules: make([]*RouteFirewallRuleUpdate, 0), - } - updates[peerID] = delta - } - - for _, existing := range delta.UpdateRouteFirewallRules { - if existing.RuleID == routeID && existing.AddSourceIP == sourceIP { - return - } - } - - delta.UpdateRouteFirewallRules = append(delta.UpdateRouteFirewallRules, &RouteFirewallRuleUpdate{ - RuleID: routeID, - AddSourceIP: sourceIP, - }) -} - -func (b *NetworkMapBuilder) calculateNetworkResourceFirewallUpdates( - ctx context.Context, account *Account, newPeerID string, - newPeer *nbpeer.Peer, peerGroups []string, updates map[string]*PeerUpdateDelta, -) { - for _, resource := range b.cache.globalResources { - resourcePolicies := b.cache.resourcePolicies - resourceRouters := b.cache.resourceRouters - - policies := resourcePolicies[resource.ID] - peerHasAccess := false - - for _, policy := range policies { - if !policy.Enabled { - continue - } - - sourceGroups := policy.SourceGroups() - for _, sourceGroup := range sourceGroups { - if slices.Contains(peerGroups, sourceGroup) { - if account.validatePostureChecksOnPeer(ctx, policy.SourcePostureChecks, newPeerID) { - peerHasAccess = true - break - } - } - } - - if peerHasAccess { - break - } - } - - if !peerHasAccess { - continue - } - - networkRouters := resourceRouters[resource.NetworkID] - for routerPeerID, router := range networkRouters { - if !router.Enabled || routerPeerID == newPeerID { - continue - } - - delta := updates[routerPeerID] - if delta == nil { - delta = &PeerUpdateDelta{ - PeerID: routerPeerID, - } - updates[routerPeerID] = delta - } - - if !slices.Contains(delta.AddConnectedPeers, newPeerID) { - delta.AddConnectedPeers = append(delta.AddConnectedPeers, newPeerID) - } - - delta.RebuildRoutesView = true - } - } -} - -type PeerUpdateDelta struct { - PeerID string - AddConnectedPeers []string - AddFirewallRules []*FirewallRuleDelta - AddRoutes []route.ID - UpdateRouteFirewallRules []*RouteFirewallRuleUpdate - UpdateDNS bool - RebuildRoutesView bool -} - -func (d *PeerUpdateDelta) mergeFrom(other *PeerUpdateDelta) { - for _, peerID := range other.AddConnectedPeers { - if !slices.Contains(d.AddConnectedPeers, peerID) { - d.AddConnectedPeers = append(d.AddConnectedPeers, peerID) - } - } - - existingRuleIDs := make(map[string]struct{}, len(d.AddFirewallRules)) - for _, rule := range d.AddFirewallRules { - existingRuleIDs[rule.RuleID] = struct{}{} - } - for _, rule := range other.AddFirewallRules { - if _, exists := existingRuleIDs[rule.RuleID]; !exists { - d.AddFirewallRules = append(d.AddFirewallRules, rule) - existingRuleIDs[rule.RuleID] = struct{}{} - } - } - - for _, routeID := range other.AddRoutes { - if !slices.Contains(d.AddRoutes, routeID) { - d.AddRoutes = append(d.AddRoutes, routeID) - } - } - - existingRouteUpdates := make(map[string]map[string]struct{}) - for _, update := range d.UpdateRouteFirewallRules { - if existingRouteUpdates[update.RuleID] == nil { - existingRouteUpdates[update.RuleID] = make(map[string]struct{}) - } - existingRouteUpdates[update.RuleID][update.AddSourceIP] = struct{}{} - } - for _, update := range other.UpdateRouteFirewallRules { - if existingRouteUpdates[update.RuleID] == nil { - existingRouteUpdates[update.RuleID] = make(map[string]struct{}) - } - if _, exists := existingRouteUpdates[update.RuleID][update.AddSourceIP]; !exists { - d.UpdateRouteFirewallRules = append(d.UpdateRouteFirewallRules, update) - existingRouteUpdates[update.RuleID][update.AddSourceIP] = struct{}{} - } - } - - if other.UpdateDNS { - d.UpdateDNS = true - } - if other.RebuildRoutesView { - d.RebuildRoutesView = true - } -} - -type FirewallRuleDelta struct { - Rule *FirewallRule - RuleID string - Direction int -} - -type RouteFirewallRuleUpdate struct { - RuleID string - AddSourceIP string -} - -func (b *NetworkMapBuilder) addUpdateForPeersInGroups( - updates map[string]*PeerUpdateDelta, groupIDs []string, newPeerID string, - rule *PolicyRule, direction int, allGroupLn int, -) { - for _, groupID := range groupIDs { - peers := b.cache.groupToPeers[groupID] - cnt := 0 - for _, peerID := range peers { - if peerID == newPeerID { - continue - } - if _, ok := b.validatedPeers[peerID]; !ok { - continue - } - cnt++ - } - all := false - if allGroupLn > 0 && cnt == allGroupLn { - all = true - } - newPeer := b.cache.globalPeers[newPeerID] - fr := &FirewallRule{ - PolicyID: rule.ID, - PeerIP: newPeer.IP.String(), - Direction: direction, - Action: string(rule.Action), - Protocol: firewallRuleProtocol(rule.Protocol), - } - for _, peerID := range peers { - if peerID == newPeerID { - continue - } - if _, ok := b.validatedPeers[peerID]; !ok { - continue - } - targetPeer := b.cache.globalPeers[peerID] - if targetPeer == nil { - continue - } - - peerIPForRule := fr.PeerIP - if all { - peerIPForRule = allPeers - } - - b.addOrUpdateFirewallRuleInDelta(updates, peerID, newPeerID, rule, direction, fr, peerIPForRule, targetPeer) - } - } -} - -func (b *NetworkMapBuilder) addUpdateForDirectPeerResource( - updates map[string]*PeerUpdateDelta, targetPeerID string, newPeerID string, - rule *PolicyRule, direction int, -) { - if targetPeerID == newPeerID { - return - } - - if _, ok := b.validatedPeers[targetPeerID]; !ok { - return - } - - newPeer := b.cache.globalPeers[newPeerID] - if newPeer == nil { - return - } - - targetPeer := b.cache.globalPeers[targetPeerID] - if targetPeer == nil { - return - } - - fr := &FirewallRule{ - PolicyID: rule.ID, - PeerIP: newPeer.IP.String(), - Direction: direction, - Action: string(rule.Action), - Protocol: firewallRuleProtocol(rule.Protocol), - } - - b.addOrUpdateFirewallRuleInDelta(updates, targetPeerID, newPeerID, rule, direction, fr, fr.PeerIP, targetPeer) -} - -func (b *NetworkMapBuilder) addOrUpdateFirewallRuleInDelta( - updates map[string]*PeerUpdateDelta, targetPeerID string, newPeerID string, - rule *PolicyRule, direction int, baseRule *FirewallRule, peerIP string, targetPeer *nbpeer.Peer, -) { - delta := updates[targetPeerID] - if delta == nil { - delta = &PeerUpdateDelta{ - PeerID: targetPeerID, - AddConnectedPeers: []string{newPeerID}, - AddFirewallRules: make([]*FirewallRuleDelta, 0), - } - updates[targetPeerID] = delta - } else if !slices.Contains(delta.AddConnectedPeers, newPeerID) { - delta.AddConnectedPeers = append(delta.AddConnectedPeers, newPeerID) - } - - baseRule.PeerIP = peerIP - - if len(rule.Ports) > 0 || len(rule.PortRanges) > 0 { - expandedRules := expandPortsAndRanges(*baseRule, rule, targetPeer) - for _, expandedRule := range expandedRules { - ruleID := b.generateFirewallRuleID(expandedRule) - delta.AddFirewallRules = append(delta.AddFirewallRules, &FirewallRuleDelta{ - Rule: expandedRule, - RuleID: ruleID, - Direction: direction, - }) - } - } else { - ruleID := b.generateFirewallRuleID(baseRule) - delta.AddFirewallRules = append(delta.AddFirewallRules, &FirewallRuleDelta{ - Rule: baseRule, - RuleID: ruleID, - Direction: direction, - }) - } -} - -func (b *NetworkMapBuilder) applyDeltaToPeer(account *Account, peerID string, delta *PeerUpdateDelta) { - if len(delta.AddConnectedPeers) > 0 || len(delta.AddFirewallRules) > 0 { - if aclView := b.cache.peerACLs[peerID]; aclView != nil { - for _, connectedPeerID := range delta.AddConnectedPeers { - if !slices.Contains(aclView.ConnectedPeerIDs, connectedPeerID) { - aclView.ConnectedPeerIDs = append(aclView.ConnectedPeerIDs, connectedPeerID) - } - } - - for _, ruleDelta := range delta.AddFirewallRules { - b.cache.globalRules[ruleDelta.RuleID] = ruleDelta.Rule - - if !slices.Contains(aclView.FirewallRuleIDs, ruleDelta.RuleID) { - aclView.FirewallRuleIDs = append(aclView.FirewallRuleIDs, ruleDelta.RuleID) - } - } - } - } - - if delta.RebuildRoutesView { - b.buildPeerRoutesView(account, peerID) - } else if len(delta.UpdateRouteFirewallRules) > 0 { - if routesView := b.cache.peerRoutes[peerID]; routesView != nil { - b.updateRouteFirewallRules(routesView, delta.UpdateRouteFirewallRules) - } - } - - if delta.UpdateDNS { - b.buildPeerDNSView(account, peerID) - } -} - -func (b *NetworkMapBuilder) updateRouteFirewallRules(routesView *PeerRoutesView, updates []*RouteFirewallRuleUpdate) { - for _, update := range updates { - for _, ruleID := range routesView.RouteFirewallRuleIDs { - rule := b.cache.globalRouteRules[ruleID] - if rule == nil { - continue - } - - if string(rule.RouteID) == update.RuleID { - if hasWildcard := slices.Contains(rule.SourceRanges, allWildcard) || slices.Contains(rule.SourceRanges, v6AllWildcard); hasWildcard { - break - } - - sourceIP := update.AddSourceIP - - if strings.Contains(sourceIP, ":") { - sourceIP += "/128" // IPv6 - } else { - sourceIP += "/32" // IPv4 - } - - if !slices.Contains(rule.SourceRanges, sourceIP) { - rule.SourceRanges = append(rule.SourceRanges, sourceIP) - } - break - } - } - } -} - -func (b *NetworkMapBuilder) OnPeerDeleted(acc *Account, peerID string) error { - b.cache.mu.Lock() - defer b.cache.mu.Unlock() - - account := b.updateAccountLocked(acc) - - deletedPeer := b.cache.globalPeers[peerID] - if deletedPeer == nil { - return fmt.Errorf("peer %s not found in cache", peerID) - } - - deletedPeerKey := deletedPeer.Key - peerGroups := b.cache.peerToGroups[peerID] - peerIP := deletedPeer.IP.String() - - log.Debugf("NetworkMapBuilder: Deleting peer %s (IP: %s) from cache", peerID, peerIP) - - delete(b.validatedPeers, peerID) - - routesToDelete := []route.ID{} - - for routeID, r := range account.Routes { - if r.Peer != deletedPeerKey && r.PeerID != peerID { - continue - } - if len(r.PeerGroups) == 0 { - routesToDelete = append(routesToDelete, routeID) - continue - } - newPeerAssigned := false - for _, groupID := range r.PeerGroups { - candidatePeerIDs := b.cache.groupToPeers[groupID] - for _, candidatePeerID := range candidatePeerIDs { - if candidatePeerID == peerID { - continue - } - if candidatePeer := b.cache.globalPeers[candidatePeerID]; candidatePeer != nil { - r.Peer = candidatePeer.Key - r.PeerID = candidatePeerID - newPeerAssigned = true - break - } - } - if newPeerAssigned { - break - } - } - - if !newPeerAssigned { - routesToDelete = append(routesToDelete, routeID) - } - } - - for _, routeID := range routesToDelete { - delete(account.Routes, routeID) - } - - delete(b.cache.peerACLs, peerID) - delete(b.cache.peerRoutes, peerID) - delete(b.cache.peerDNS, peerID) - delete(b.cache.peerSSH, peerID) - - delete(b.cache.globalPeers, peerID) - - for acg, routeMap := range b.cache.acgToRoutes { - for routeID, info := range routeMap { - if info.PeerID == peerID { - delete(routeMap, routeID) - } - } - if len(routeMap) == 0 { - delete(b.cache.acgToRoutes, acg) - } - } - - for _, groupID := range peerGroups { - if peers := b.cache.groupToPeers[groupID]; peers != nil { - b.cache.groupToPeers[groupID] = slices.DeleteFunc(peers, func(id string) bool { - return id == peerID - }) - } - } - delete(b.cache.peerToGroups, peerID) - - affectedPeers := make(map[string]struct{}) - - for _, r := range account.Routes { - for _, groupID := range r.Groups { - if peers := b.cache.groupToPeers[groupID]; peers != nil { - for _, p := range peers { - affectedPeers[p] = struct{}{} - } - } - } - - for _, groupID := range r.PeerGroups { - if peers := b.cache.groupToPeers[groupID]; peers != nil { - for _, p := range peers { - affectedPeers[p] = struct{}{} - } - } - } - } - - for affectedPeerID := range affectedPeers { - if affectedPeerID == peerID { - continue - } - b.buildPeerRoutesView(account, affectedPeerID) - } - - peersToRebuildACL := make(map[string]struct{}) - peerDeletionUpdates := b.findPeersAffectedByDeletedPeerACL(peerID, peerIP, peerGroups, peersToRebuildACL) - for affectedPeerID, updates := range peerDeletionUpdates { - b.applyDeletionUpdates(affectedPeerID, updates) - } - - for affectedPeerID := range peersToRebuildACL { - b.buildPeerACLView(account, affectedPeerID) - } - - b.cleanupUnusedRules() - - log.Debugf("NetworkMapBuilder: Deleted peer %s, affected %d other peers", peerID, len(affectedPeers)) - - return nil -} - -func (b *NetworkMapBuilder) findPeersAffectedByDeletedPeerACL( - deletedPeerID string, - peerIP string, - peerGroups []string, - peersToRebuildACL map[string]struct{}, -) map[string]*PeerDeletionUpdate { - - affected := make(map[string]*PeerDeletionUpdate) - - for peerID, aclView := range b.cache.peerACLs { - if peerID == deletedPeerID { - continue - } - - if slices.Contains(aclView.ConnectedPeerIDs, deletedPeerID) { - peersToRebuildACL[peerID] = struct{}{} - if affected[peerID] == nil { - affected[peerID] = &PeerDeletionUpdate{ - RemovePeerID: deletedPeerID, - PeerIP: peerIP, - } - } - } - } - - affectedRouteOwners := make(map[string]struct{}) - - for _, groupID := range peerGroups { - if routeMap, ok := b.cache.acgToRoutes[groupID]; ok { - for _, info := range routeMap { - if info.PeerID != deletedPeerID { - affectedRouteOwners[info.PeerID] = struct{}{} - } - } - } - } - - for _, info := range b.cache.noACGRoutes { - if info.PeerID != deletedPeerID { - affectedRouteOwners[info.PeerID] = struct{}{} - } - } - - for ownerPeerID := range affectedRouteOwners { - if affected[ownerPeerID] == nil { - affected[ownerPeerID] = &PeerDeletionUpdate{ - RemovePeerID: deletedPeerID, - PeerIP: peerIP, - RemoveFromSourceRanges: true, - } - } else { - affected[ownerPeerID].RemoveFromSourceRanges = true - } - } - - return affected -} - -type PeerDeletionUpdate struct { - RemovePeerID string - RemoveFirewallRuleIDs []string - RemoveRouteIDs []route.ID - RemoveFromSourceRanges bool - PeerIP string -} - -func (b *NetworkMapBuilder) applyDeletionUpdates(peerID string, updates *PeerDeletionUpdate) { - if routesView := b.cache.peerRoutes[peerID]; routesView != nil { - if len(updates.RemoveRouteIDs) > 0 { - routesView.NetworkResourceIDs = slices.DeleteFunc(routesView.NetworkResourceIDs, func(routeID route.ID) bool { - return slices.Contains(updates.RemoveRouteIDs, routeID) - }) - } - - if updates.RemoveFromSourceRanges { - b.removeIPFromRouteFirewallRules(routesView, updates.PeerIP) - } - } -} - -func (b *NetworkMapBuilder) removeIPFromRouteFirewallRules(routesView *PeerRoutesView, peerIP string) { - sourceIPv4 := peerIP + "/32" - sourceIPv6 := peerIP + "/128" - - rulesToRemove := []string{} - - for _, ruleID := range routesView.RouteFirewallRuleIDs { - if rule := b.cache.globalRouteRules[ruleID]; rule != nil { - rule.SourceRanges = slices.DeleteFunc(rule.SourceRanges, func(source string) bool { - return source == sourceIPv4 || source == sourceIPv6 || source == peerIP - }) - - if len(rule.SourceRanges) == 0 { - rulesToRemove = append(rulesToRemove, ruleID) - } - } - } - - if len(rulesToRemove) > 0 { - routesView.RouteFirewallRuleIDs = slices.DeleteFunc(routesView.RouteFirewallRuleIDs, func(ruleID string) bool { - return slices.Contains(rulesToRemove, ruleID) - }) - } -} - -func (b *NetworkMapBuilder) cleanupUnusedRules() { - usedFirewallRules := make(map[string]struct{}) - usedRouteRules := make(map[string]struct{}) - usedRoutes := make(map[route.ID]struct{}) - - for _, aclView := range b.cache.peerACLs { - for _, ruleID := range aclView.FirewallRuleIDs { - usedFirewallRules[ruleID] = struct{}{} - } - } - - for _, routesView := range b.cache.peerRoutes { - for _, ruleID := range routesView.RouteFirewallRuleIDs { - usedRouteRules[ruleID] = struct{}{} - } - - for _, routeID := range routesView.OwnRouteIDs { - usedRoutes[routeID] = struct{}{} - } - for _, routeID := range routesView.NetworkResourceIDs { - usedRoutes[routeID] = struct{}{} - } - } - - for ruleID := range b.cache.globalRules { - if _, used := usedFirewallRules[ruleID]; !used { - delete(b.cache.globalRules, ruleID) - } - } - - for ruleID := range b.cache.globalRouteRules { - if _, used := usedRouteRules[ruleID]; !used { - delete(b.cache.globalRouteRules, ruleID) - } - } - - for routeID := range b.cache.globalRoutes { - if _, used := usedRoutes[routeID]; !used { - delete(b.cache.globalRoutes, routeID) - } - } -} - -func (b *NetworkMapBuilder) UpdatePeer(peer *nbpeer.Peer) { - b.cache.mu.Lock() - defer b.cache.mu.Unlock() - peerStored, ok := b.cache.globalPeers[peer.ID] - if !ok { - return - } - *peerStored = *peer -} diff --git a/management/server/types/policy.go b/management/server/types/policy.go index d4e1a8816..d410aec8d 100644 --- a/management/server/types/policy.go +++ b/management/server/types/policy.go @@ -93,6 +93,44 @@ func (p *Policy) Copy() *Policy { return c } +func (p *Policy) Equal(other *Policy) bool { + if p == nil || other == nil { + return p == other + } + + if p.ID != other.ID || + p.AccountID != other.AccountID || + p.Name != other.Name || + p.Description != other.Description || + p.Enabled != other.Enabled { + return false + } + + if !stringSlicesEqualUnordered(p.SourcePostureChecks, other.SourcePostureChecks) { + return false + } + + if len(p.Rules) != len(other.Rules) { + return false + } + + otherRules := make(map[string]*PolicyRule, len(other.Rules)) + for _, r := range other.Rules { + otherRules[r.ID] = r + } + for _, r := range p.Rules { + otherRule, ok := otherRules[r.ID] + if !ok { + return false + } + if !r.Equal(otherRule) { + return false + } + } + + return true +} + // EventMeta returns activity event meta related to this policy func (p *Policy) EventMeta() map[string]any { return map[string]any{"name": p.Name} diff --git a/management/server/types/policy_test.go b/management/server/types/policy_test.go new file mode 100644 index 000000000..b1d7aabc2 --- /dev/null +++ b/management/server/types/policy_test.go @@ -0,0 +1,193 @@ +package types + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestPolicyEqual_SameRulesDifferentOrder(t *testing.T) { + a := &Policy{ + ID: "pol1", + AccountID: "acc1", + Name: "test", + Enabled: true, + Rules: []*PolicyRule{ + {ID: "r1", PolicyID: "pol1", Ports: []string{"80"}}, + {ID: "r2", PolicyID: "pol1", Ports: []string{"443"}}, + }, + } + b := &Policy{ + ID: "pol1", + AccountID: "acc1", + Name: "test", + Enabled: true, + Rules: []*PolicyRule{ + {ID: "r2", PolicyID: "pol1", Ports: []string{"443"}}, + {ID: "r1", PolicyID: "pol1", Ports: []string{"80"}}, + }, + } + assert.True(t, a.Equal(b)) +} + +func TestPolicyEqual_DifferentRules(t *testing.T) { + a := &Policy{ + ID: "pol1", + Enabled: true, + Rules: []*PolicyRule{ + {ID: "r1", PolicyID: "pol1", Ports: []string{"80"}}, + }, + } + b := &Policy{ + ID: "pol1", + Enabled: true, + Rules: []*PolicyRule{ + {ID: "r1", PolicyID: "pol1", Ports: []string{"443"}}, + }, + } + assert.False(t, a.Equal(b)) +} + +func TestPolicyEqual_DifferentRuleCount(t *testing.T) { + a := &Policy{ + ID: "pol1", + Rules: []*PolicyRule{ + {ID: "r1", PolicyID: "pol1"}, + }, + } + b := &Policy{ + ID: "pol1", + Rules: []*PolicyRule{ + {ID: "r1", PolicyID: "pol1"}, + {ID: "r2", PolicyID: "pol1"}, + }, + } + assert.False(t, a.Equal(b)) +} + +func TestPolicyEqual_PostureChecksDifferentOrder(t *testing.T) { + a := &Policy{ + ID: "pol1", + SourcePostureChecks: []string{"pc3", "pc1", "pc2"}, + } + b := &Policy{ + ID: "pol1", + SourcePostureChecks: []string{"pc1", "pc2", "pc3"}, + } + assert.True(t, a.Equal(b)) +} + +func TestPolicyEqual_DifferentPostureChecks(t *testing.T) { + a := &Policy{ + ID: "pol1", + SourcePostureChecks: []string{"pc1", "pc2"}, + } + b := &Policy{ + ID: "pol1", + SourcePostureChecks: []string{"pc1", "pc3"}, + } + assert.False(t, a.Equal(b)) +} + +func TestPolicyEqual_DifferentScalarFields(t *testing.T) { + base := Policy{ + ID: "pol1", + AccountID: "acc1", + Name: "test", + Description: "desc", + Enabled: true, + } + + other := base + other.Name = "changed" + assert.False(t, base.Equal(&other)) + + other = base + other.Enabled = false + assert.False(t, base.Equal(&other)) + + other = base + other.Description = "changed" + assert.False(t, base.Equal(&other)) +} + +func TestPolicyEqual_NilCases(t *testing.T) { + var a *Policy + var b *Policy + assert.True(t, a.Equal(b)) + + a = &Policy{ID: "pol1"} + assert.False(t, a.Equal(nil)) +} + +func TestPolicyEqual_RulesMismatchByID(t *testing.T) { + a := &Policy{ + ID: "pol1", + Rules: []*PolicyRule{ + {ID: "r1", PolicyID: "pol1"}, + }, + } + b := &Policy{ + ID: "pol1", + Rules: []*PolicyRule{ + {ID: "r2", PolicyID: "pol1"}, + }, + } + assert.False(t, a.Equal(b)) +} + +func TestPolicyEqual_FullScenario(t *testing.T) { + a := &Policy{ + ID: "pol1", + AccountID: "acc1", + Name: "Web Access", + Description: "Allow web access", + Enabled: true, + SourcePostureChecks: []string{"pc2", "pc1"}, + Rules: []*PolicyRule{ + { + ID: "r1", + PolicyID: "pol1", + Name: "HTTP", + Enabled: true, + Action: PolicyTrafficActionAccept, + Protocol: PolicyRuleProtocolTCP, + Bidirectional: true, + Sources: []string{"g2", "g1"}, + Destinations: []string{"g4", "g3"}, + Ports: []string{"443", "80", "8080"}, + PortRanges: []RulePortRange{ + {Start: 8000, End: 9000}, + {Start: 80, End: 80}, + }, + }, + }, + } + b := &Policy{ + ID: "pol1", + AccountID: "acc1", + Name: "Web Access", + Description: "Allow web access", + Enabled: true, + SourcePostureChecks: []string{"pc1", "pc2"}, + Rules: []*PolicyRule{ + { + ID: "r1", + PolicyID: "pol1", + Name: "HTTP", + Enabled: true, + Action: PolicyTrafficActionAccept, + Protocol: PolicyRuleProtocolTCP, + Bidirectional: true, + Sources: []string{"g1", "g2"}, + Destinations: []string{"g3", "g4"}, + Ports: []string{"80", "8080", "443"}, + PortRanges: []RulePortRange{ + {Start: 80, End: 80}, + {Start: 8000, End: 9000}, + }, + }, + }, + } + assert.True(t, a.Equal(b)) +} diff --git a/management/server/types/policyrule.go b/management/server/types/policyrule.go index bb75dd555..52c494a6a 100644 --- a/management/server/types/policyrule.go +++ b/management/server/types/policyrule.go @@ -1,6 +1,8 @@ package types import ( + "slices" + "github.com/netbirdio/netbird/shared/management/proto" ) @@ -118,3 +120,106 @@ func (pm *PolicyRule) Copy() *PolicyRule { } return rule } + +func (pm *PolicyRule) Equal(other *PolicyRule) bool { + if pm == nil || other == nil { + return pm == other + } + + if pm.ID != other.ID || + pm.PolicyID != other.PolicyID || + pm.Name != other.Name || + pm.Description != other.Description || + pm.Enabled != other.Enabled || + pm.Action != other.Action || + pm.Bidirectional != other.Bidirectional || + pm.Protocol != other.Protocol || + pm.SourceResource != other.SourceResource || + pm.DestinationResource != other.DestinationResource || + pm.AuthorizedUser != other.AuthorizedUser { + return false + } + + if !stringSlicesEqualUnordered(pm.Sources, other.Sources) { + return false + } + if !stringSlicesEqualUnordered(pm.Destinations, other.Destinations) { + return false + } + if !stringSlicesEqualUnordered(pm.Ports, other.Ports) { + return false + } + if !portRangeSlicesEqualUnordered(pm.PortRanges, other.PortRanges) { + return false + } + if !authorizedGroupsEqual(pm.AuthorizedGroups, other.AuthorizedGroups) { + return false + } + + return true +} + +func stringSlicesEqualUnordered(a, b []string) bool { + if len(a) != len(b) { + return false + } + if len(a) == 0 { + return true + } + sorted1 := make([]string, len(a)) + sorted2 := make([]string, len(b)) + copy(sorted1, a) + copy(sorted2, b) + slices.Sort(sorted1) + slices.Sort(sorted2) + return slices.Equal(sorted1, sorted2) +} + +func portRangeSlicesEqualUnordered(a, b []RulePortRange) bool { + if len(a) != len(b) { + return false + } + if len(a) == 0 { + return true + } + cmp := func(x, y RulePortRange) int { + if x.Start != y.Start { + if x.Start < y.Start { + return -1 + } + return 1 + } + if x.End != y.End { + if x.End < y.End { + return -1 + } + return 1 + } + return 0 + } + sorted1 := make([]RulePortRange, len(a)) + sorted2 := make([]RulePortRange, len(b)) + copy(sorted1, a) + copy(sorted2, b) + slices.SortFunc(sorted1, cmp) + slices.SortFunc(sorted2, cmp) + return slices.EqualFunc(sorted1, sorted2, func(x, y RulePortRange) bool { + return x.Start == y.Start && x.End == y.End + }) +} + +func authorizedGroupsEqual(a, b map[string][]string) bool { + if len(a) != len(b) { + return false + } + for k, va := range a { + vb, ok := b[k] + if !ok { + return false + } + if !stringSlicesEqualUnordered(va, vb) { + return false + } + } + return true +} diff --git a/management/server/types/policyrule_test.go b/management/server/types/policyrule_test.go new file mode 100644 index 000000000..816e72abb --- /dev/null +++ b/management/server/types/policyrule_test.go @@ -0,0 +1,194 @@ +package types + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestPolicyRuleEqual_SamePortsDifferentOrder(t *testing.T) { + a := &PolicyRule{ + ID: "rule1", + PolicyID: "pol1", + Ports: []string{"443", "80", "22"}, + } + b := &PolicyRule{ + ID: "rule1", + PolicyID: "pol1", + Ports: []string{"22", "443", "80"}, + } + assert.True(t, a.Equal(b)) +} + +func TestPolicyRuleEqual_DifferentPorts(t *testing.T) { + a := &PolicyRule{ + ID: "rule1", + PolicyID: "pol1", + Ports: []string{"443", "80"}, + } + b := &PolicyRule{ + ID: "rule1", + PolicyID: "pol1", + Ports: []string{"443", "22"}, + } + assert.False(t, a.Equal(b)) +} + +func TestPolicyRuleEqual_SourcesDestinationsDifferentOrder(t *testing.T) { + a := &PolicyRule{ + ID: "rule1", + PolicyID: "pol1", + Sources: []string{"g1", "g2", "g3"}, + Destinations: []string{"g4", "g5"}, + } + b := &PolicyRule{ + ID: "rule1", + PolicyID: "pol1", + Sources: []string{"g3", "g1", "g2"}, + Destinations: []string{"g5", "g4"}, + } + assert.True(t, a.Equal(b)) +} + +func TestPolicyRuleEqual_DifferentSources(t *testing.T) { + a := &PolicyRule{ + ID: "rule1", + PolicyID: "pol1", + Sources: []string{"g1", "g2"}, + } + b := &PolicyRule{ + ID: "rule1", + PolicyID: "pol1", + Sources: []string{"g1", "g3"}, + } + assert.False(t, a.Equal(b)) +} + +func TestPolicyRuleEqual_PortRangesDifferentOrder(t *testing.T) { + a := &PolicyRule{ + ID: "rule1", + PolicyID: "pol1", + PortRanges: []RulePortRange{ + {Start: 8000, End: 9000}, + {Start: 80, End: 80}, + }, + } + b := &PolicyRule{ + ID: "rule1", + PolicyID: "pol1", + PortRanges: []RulePortRange{ + {Start: 80, End: 80}, + {Start: 8000, End: 9000}, + }, + } + assert.True(t, a.Equal(b)) +} + +func TestPolicyRuleEqual_DifferentPortRanges(t *testing.T) { + a := &PolicyRule{ + ID: "rule1", + PolicyID: "pol1", + PortRanges: []RulePortRange{ + {Start: 80, End: 80}, + }, + } + b := &PolicyRule{ + ID: "rule1", + PolicyID: "pol1", + PortRanges: []RulePortRange{ + {Start: 80, End: 443}, + }, + } + assert.False(t, a.Equal(b)) +} + +func TestPolicyRuleEqual_AuthorizedGroupsDifferentValueOrder(t *testing.T) { + a := &PolicyRule{ + ID: "rule1", + PolicyID: "pol1", + AuthorizedGroups: map[string][]string{ + "g1": {"u1", "u2", "u3"}, + }, + } + b := &PolicyRule{ + ID: "rule1", + PolicyID: "pol1", + AuthorizedGroups: map[string][]string{ + "g1": {"u3", "u1", "u2"}, + }, + } + assert.True(t, a.Equal(b)) +} + +func TestPolicyRuleEqual_DifferentAuthorizedGroups(t *testing.T) { + a := &PolicyRule{ + ID: "rule1", + PolicyID: "pol1", + AuthorizedGroups: map[string][]string{ + "g1": {"u1"}, + }, + } + b := &PolicyRule{ + ID: "rule1", + PolicyID: "pol1", + AuthorizedGroups: map[string][]string{ + "g2": {"u1"}, + }, + } + assert.False(t, a.Equal(b)) +} + +func TestPolicyRuleEqual_DifferentScalarFields(t *testing.T) { + base := PolicyRule{ + ID: "rule1", + PolicyID: "pol1", + Name: "test", + Description: "desc", + Enabled: true, + Action: PolicyTrafficActionAccept, + Bidirectional: true, + Protocol: PolicyRuleProtocolTCP, + } + + other := base + other.Name = "changed" + assert.False(t, base.Equal(&other)) + + other = base + other.Enabled = false + assert.False(t, base.Equal(&other)) + + other = base + other.Action = PolicyTrafficActionDrop + assert.False(t, base.Equal(&other)) + + other = base + other.Protocol = PolicyRuleProtocolUDP + assert.False(t, base.Equal(&other)) +} + +func TestPolicyRuleEqual_NilCases(t *testing.T) { + var a *PolicyRule + var b *PolicyRule + assert.True(t, a.Equal(b)) + + a = &PolicyRule{ID: "rule1"} + assert.False(t, a.Equal(nil)) +} + +func TestPolicyRuleEqual_EmptySlices(t *testing.T) { + a := &PolicyRule{ + ID: "rule1", + PolicyID: "pol1", + Ports: []string{}, + Sources: nil, + } + b := &PolicyRule{ + ID: "rule1", + PolicyID: "pol1", + Ports: nil, + Sources: []string{}, + } + assert.True(t, a.Equal(b)) +} + diff --git a/management/server/user_test.go b/management/server/user_test.go index 8fdfbd633..c77ea53d1 100644 --- a/management/server/user_test.go +++ b/management/server/user_test.go @@ -1586,7 +1586,7 @@ func TestUserAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) @@ -1609,7 +1609,7 @@ func TestUserAccountPeersUpdate(t *testing.T) { select { case <-done: - case <-time.After(time.Second): + case <-time.After(peerUpdateTimeout): t.Error("timeout waiting for peerShouldReceiveUpdate") } }) diff --git a/proxy/internal/auth/middleware.go b/proxy/internal/auth/middleware.go index 055e4510f..3b383f8b4 100644 --- a/proxy/internal/auth/middleware.go +++ b/proxy/internal/auth/middleware.go @@ -433,6 +433,7 @@ func setSessionCookie(w http.ResponseWriter, token string, expiration time.Durat http.SetCookie(w, &http.Cookie{ Name: auth.SessionCookieName, Value: token, + Path: "/", HttpOnly: true, Secure: true, SameSite: http.SameSiteLaxMode, diff --git a/proxy/internal/auth/middleware_test.go b/proxy/internal/auth/middleware_test.go index 16d09800c..2c93d7912 100644 --- a/proxy/internal/auth/middleware_test.go +++ b/proxy/internal/auth/middleware_test.go @@ -391,6 +391,15 @@ func TestProtect_SchemeAuthRedirectsWithCookie(t *testing.T) { assert.Equal(t, http.SameSiteLaxMode, sessionCookie.SameSite) } +func TestSetSessionCookieHasRootPath(t *testing.T) { + w := httptest.NewRecorder() + setSessionCookie(w, "test-token", time.Hour) + + cookies := w.Result().Cookies() + require.Len(t, cookies, 1) + assert.Equal(t, "/", cookies[0].Path, "session cookie must be scoped to root so it applies to all paths") +} + func TestProtect_FailedAuthDoesNotSetCookie(t *testing.T) { mw := NewMiddleware(log.StandardLogger(), nil, nil) kp := generateTestKeyPair(t) diff --git a/shared/management/client/grpc.go b/shared/management/client/grpc.go index a01e51abc..e9bea7ffb 100644 --- a/shared/management/client/grpc.go +++ b/shared/management/client/grpc.go @@ -30,6 +30,8 @@ import ( const ConnectTimeout = 10 * time.Second +const healthCheckTimeout = 5 * time.Second + const ( // EnvMaxRecvMsgSize overrides the default gRPC max receive message size (4 MB) // for the management client connection. Value is in bytes. @@ -532,7 +534,7 @@ func (c *GrpcClient) IsHealthy() bool { case connectivity.Ready: } - ctx, cancel := context.WithTimeout(c.ctx, 1*time.Second) + ctx, cancel := context.WithTimeout(c.ctx, healthCheckTimeout) defer cancel() _, err := c.realClient.GetServerKey(ctx, &proto.Empty{}) diff --git a/shared/signal/client/grpc.go b/shared/signal/client/grpc.go index 5368b57a2..d0f598dd7 100644 --- a/shared/signal/client/grpc.go +++ b/shared/signal/client/grpc.go @@ -23,6 +23,8 @@ import ( "github.com/netbirdio/netbird/util/wsproxy" ) +const healthCheckTimeout = 5 * time.Second + // ConnStateNotifier is a wrapper interface of the status recorder type ConnStateNotifier interface { MarkSignalDisconnected(error) @@ -263,7 +265,7 @@ func (c *GrpcClient) IsHealthy() bool { case connectivity.Ready: } - ctx, cancel := context.WithTimeout(c.ctx, 1*time.Second) + ctx, cancel := context.WithTimeout(c.ctx, healthCheckTimeout) defer cancel() _, err := c.realClient.Send(ctx, &proto.EncryptedMessage{ Key: c.key.PublicKey().String(),