From f538e6e9ae215d1e4baf48f93bbe6fbcb0f1e303 Mon Sep 17 00:00:00 2001 From: Viktor Liu <17948409+lixmal@users.noreply.github.com> Date: Fri, 5 Dec 2025 03:29:27 +0100 Subject: [PATCH 01/13] [client] Use setsid to avoid the parent process from being killed via HUP by login (#4900) --- client/ssh/server/command_execution_js.go | 5 +++ client/ssh/server/command_execution_unix.go | 26 ++++++++++++- .../ssh/server/command_execution_windows.go | 5 +++ client/ssh/server/server.go | 4 +- client/ssh/server/userswitching_unix.go | 39 ++++++++++++++++--- 5 files changed, 71 insertions(+), 8 deletions(-) diff --git a/client/ssh/server/command_execution_js.go b/client/ssh/server/command_execution_js.go index 6473f8273..01759a337 100644 --- a/client/ssh/server/command_execution_js.go +++ b/client/ssh/server/command_execution_js.go @@ -42,6 +42,11 @@ func (s *Server) detectSuPtySupport(context.Context) bool { return false } +// detectUtilLinuxLogin always returns false on JS/WASM +func (s *Server) detectUtilLinuxLogin(context.Context) bool { + return false +} + // executeCommandWithPty is not supported on JS/WASM func (s *Server) executeCommandWithPty(logger *log.Entry, session ssh.Session, execCmd *exec.Cmd, privilegeResult PrivilegeCheckResult, ptyReq ssh.Pty, winCh <-chan ssh.Window) bool { logger.Errorf("PTY command execution not supported on JS/WASM") diff --git a/client/ssh/server/command_execution_unix.go b/client/ssh/server/command_execution_unix.go index da059fed9..db1a9bcfe 100644 --- a/client/ssh/server/command_execution_unix.go +++ b/client/ssh/server/command_execution_unix.go @@ -10,6 +10,7 @@ import ( "os" "os/exec" "os/user" + "runtime" "strings" "sync" "syscall" @@ -75,6 +76,29 @@ func (s *Server) detectSuPtySupport(ctx context.Context) bool { return supported } +// detectUtilLinuxLogin checks if login is from util-linux (vs shadow-utils). +// util-linux login uses vhangup() which requires setsid wrapper to avoid killing parent. +// See https://bugs.debian.org/1078023 for details. +func (s *Server) detectUtilLinuxLogin(ctx context.Context) bool { + if runtime.GOOS != "linux" { + return false + } + + ctx, cancel := context.WithTimeout(ctx, 500*time.Millisecond) + defer cancel() + + cmd := exec.CommandContext(ctx, "login", "--version") + output, err := cmd.CombinedOutput() + if err != nil { + log.Debugf("login --version failed (likely shadow-utils): %v", err) + return false + } + + isUtilLinux := strings.Contains(string(output), "util-linux") + log.Debugf("util-linux login detected: %v", isUtilLinux) + return isUtilLinux +} + // createSuCommand creates a command using su -l -c for privilege switching func (s *Server) createSuCommand(session ssh.Session, localUser *user.User, hasPty bool) (*exec.Cmd, error) { suPath, err := exec.LookPath("su") @@ -144,7 +168,7 @@ func (s *Server) handlePty(logger *log.Entry, session ssh.Session, privilegeResu return false } - logger.Infof("starting interactive shell: %s", execCmd.Path) + logger.Infof("starting interactive shell: %s", strings.Join(execCmd.Args, " ")) return s.runPtyCommand(logger, session, execCmd, ptyReq, winCh) } diff --git a/client/ssh/server/command_execution_windows.go b/client/ssh/server/command_execution_windows.go index 37b3ae0ee..998796871 100644 --- a/client/ssh/server/command_execution_windows.go +++ b/client/ssh/server/command_execution_windows.go @@ -383,6 +383,11 @@ func (s *Server) detectSuPtySupport(context.Context) bool { return false } +// detectUtilLinuxLogin always returns false on Windows +func (s *Server) detectUtilLinuxLogin(context.Context) bool { + return false +} + // executeCommandWithPty executes a command with PTY allocation on Windows using ConPty func (s *Server) executeCommandWithPty(logger *log.Entry, session ssh.Session, execCmd *exec.Cmd, privilegeResult PrivilegeCheckResult, ptyReq ssh.Pty, winCh <-chan ssh.Window) bool { command := session.RawCommand() diff --git a/client/ssh/server/server.go b/client/ssh/server/server.go index 44612532b..37763ee0e 100644 --- a/client/ssh/server/server.go +++ b/client/ssh/server/server.go @@ -138,7 +138,8 @@ type Server struct { jwtExtractor *jwt.ClaimsExtractor jwtConfig *JWTConfig - suSupportsPty bool + suSupportsPty bool + loginIsUtilLinux bool } type JWTConfig struct { @@ -193,6 +194,7 @@ func (s *Server) Start(ctx context.Context, addr netip.AddrPort) error { } s.suSupportsPty = s.detectSuPtySupport(ctx) + s.loginIsUtilLinux = s.detectUtilLinuxLogin(ctx) ln, addrDesc, err := s.createListener(ctx, addr) if err != nil { diff --git a/client/ssh/server/userswitching_unix.go b/client/ssh/server/userswitching_unix.go index 06fefabd7..bc1557419 100644 --- a/client/ssh/server/userswitching_unix.go +++ b/client/ssh/server/userswitching_unix.go @@ -87,11 +87,8 @@ func (s *Server) getLoginCmd(username string, remoteAddr net.Addr) (string, []st switch runtime.GOOS { case "linux": - // Special handling for Arch Linux without /etc/pam.d/remote - if s.fileExists("/etc/arch-release") && !s.fileExists("/etc/pam.d/remote") { - return loginPath, []string{"-f", username, "-p"}, nil - } - return loginPath, []string{"-f", username, "-h", addrPort.Addr().String(), "-p"}, nil + p, a := s.getLinuxLoginCmd(loginPath, username, addrPort.Addr().String()) + return p, a, nil case "darwin", "freebsd", "openbsd", "netbsd", "dragonfly": return loginPath, []string{"-fp", "-h", addrPort.Addr().String(), username}, nil default: @@ -99,7 +96,37 @@ func (s *Server) getLoginCmd(username string, remoteAddr net.Addr) (string, []st } } -// fileExists checks if a file exists (helper for login command logic) +// getLinuxLoginCmd returns the login command for Linux systems. +// Handles differences between util-linux and shadow-utils login implementations. +func (s *Server) getLinuxLoginCmd(loginPath, username, remoteIP string) (string, []string) { + // Special handling for Arch Linux without /etc/pam.d/remote + var loginArgs []string + if s.fileExists("/etc/arch-release") && !s.fileExists("/etc/pam.d/remote") { + loginArgs = []string{"-f", username, "-p"} + } else { + loginArgs = []string{"-f", username, "-h", remoteIP, "-p"} + } + + // util-linux login requires setsid -c to create a new session and set the + // controlling terminal. Without this, vhangup() kills the parent process. + // See https://bugs.debian.org/1078023 for details. + // TODO: handle this via the executor using syscall.Setsid() + TIOCSCTTY + syscall.Exec() + // to avoid external setsid dependency. + if !s.loginIsUtilLinux { + return loginPath, loginArgs + } + + setsidPath, err := exec.LookPath("setsid") + if err != nil { + log.Warnf("setsid not available but util-linux login detected, login may fail: %v", err) + return loginPath, loginArgs + } + + args := append([]string{"-w", "-c", loginPath}, loginArgs...) + return setsidPath, args +} + +// fileExists checks if a file exists func (s *Server) fileExists(path string) bool { _, err := os.Stat(path) return err == nil From 3f4f825ec14c45dd149a743eb80e98e7bb5e9ec5 Mon Sep 17 00:00:00 2001 From: Viktor Liu <17948409+lixmal@users.noreply.github.com> Date: Fri, 5 Dec 2025 17:42:49 +0100 Subject: [PATCH 02/13] [client] Fix DNS forwarder returning broken records on 4 to 6 mapped IP addresses (#4887) --- client/internal/dnsfwd/forwarder.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/client/internal/dnsfwd/forwarder.go b/client/internal/dnsfwd/forwarder.go index aef16a8cf..6b8042ccb 100644 --- a/client/internal/dnsfwd/forwarder.go +++ b/client/internal/dnsfwd/forwarder.go @@ -234,6 +234,11 @@ func (f *DNSForwarder) handleDNSQuery(w dns.ResponseWriter, query *dns.Msg) *dns return nil } + // Unmap IPv4-mapped IPv6 addresses that some resolvers may return + for i, ip := range ips { + ips[i] = ip.Unmap() + } + f.updateInternalState(ips, mostSpecificResId, matchingEntries) f.addIPsToResponse(resp, domain, ips) f.cache.set(domain, question.Qtype, ips) From 44851e06fbaaf02f2cde642c40ff6cecea0a3c3f Mon Sep 17 00:00:00 2001 From: Pascal Fischer <32096965+pascal-fischer@users.noreply.github.com> Date: Wed, 10 Dec 2025 19:26:51 +0100 Subject: [PATCH 03/13] [management] cleanup logs (#4933) --- management/internals/shared/grpc/server.go | 29 +-------- management/internals/shared/grpc/token_mgr.go | 6 +- .../server/http/middleware/auth_middleware.go | 2 +- management/server/peer.go | 5 +- management/server/posture/os_version.go | 4 +- management/server/settings/manager.go | 8 --- management/server/store/sql_store.go | 63 +------------------ 7 files changed, 9 insertions(+), 108 deletions(-) diff --git a/management/internals/shared/grpc/server.go b/management/internals/shared/grpc/server.go index 62dc215d8..16950db5e 100644 --- a/management/internals/shared/grpc/server.go +++ b/management/internals/shared/grpc/server.go @@ -134,10 +134,6 @@ func (s *Server) GetServerKey(ctx context.Context, req *proto.Empty) (*proto.Ser } log.WithContext(ctx).Tracef("GetServerKey request from %s", ip) - start := time.Now() - defer func() { - log.WithContext(ctx).Tracef("GetServerKey from %s took %v", ip, time.Since(start)) - }() // todo introduce something more meaningful with the key expiration/rotation if s.appMetrics != nil { @@ -222,8 +218,6 @@ func (s *Server) Sync(req *proto.EncryptedMessage, srv proto.ManagementService_S return err } - log.WithContext(ctx).Debugf("Sync: GetAccountIDForPeerKey since start %v", time.Since(reqStart)) - // nolint:staticcheck ctx = context.WithValue(ctx, nbContext.AccountIDKey, accountID) @@ -235,7 +229,6 @@ func (s *Server) Sync(req *proto.EncryptedMessage, srv proto.ManagementService_S } }() log.WithContext(ctx).Tracef("acquired peer lock for peer %s took %v", peerKey.String(), time.Since(start)) - log.WithContext(ctx).Debugf("Sync: acquirePeerLockByUID since start %v", time.Since(reqStart)) log.WithContext(ctx).Debugf("Sync request from peer [%s] [%s]", req.WgPubKey, sRealIP) @@ -352,7 +345,7 @@ func (s *Server) cancelPeerRoutines(ctx context.Context, accountID string, peer s.networkMapController.OnPeerDisconnected(ctx, accountID, peer.ID) s.secretsManager.CancelRefresh(peer.ID) - log.WithContext(ctx).Tracef("peer %s has been disconnected", peer.Key) + log.WithContext(ctx).Debugf("peer %s has been disconnected", peer.Key) } func (s *Server) validateToken(ctx context.Context, jwtToken string) (string, error) { @@ -561,16 +554,10 @@ func (s *Server) Login(ctx context.Context, req *proto.EncryptedMessage) (*proto //nolint ctx = context.WithValue(ctx, nbContext.AccountIDKey, accountID) - log.WithContext(ctx).Debugf("Login: GetAccountIDForPeerKey since start %v", time.Since(reqStart)) - defer func() { if s.appMetrics != nil { s.appMetrics.GRPCMetrics().CountLoginRequestDuration(time.Since(reqStart), accountID) } - took := time.Since(reqStart) - if took > 7*time.Second { - log.WithContext(ctx).Debugf("Login: took %v", time.Since(reqStart)) - } }() if loginReq.GetMeta() == nil { @@ -604,16 +591,12 @@ func (s *Server) Login(ctx context.Context, req *proto.EncryptedMessage) (*proto return nil, mapError(ctx, err) } - log.WithContext(ctx).Debugf("Login: LoginPeer since start %v", time.Since(reqStart)) - loginResp, err := s.prepareLoginResponse(ctx, peer, netMap, postureChecks) if err != nil { log.WithContext(ctx).Warnf("failed preparing login response for peer %s: %s", peerKey, err) return nil, status.Errorf(codes.Internal, "failed logging in peer") } - log.WithContext(ctx).Debugf("Login: prepareLoginResponse since start %v", time.Since(reqStart)) - key, err := s.secretsManager.GetWGKey() if err != nil { log.WithContext(ctx).Warnf("failed getting server's WireGuard private key: %s", err) @@ -730,12 +713,10 @@ func (s *Server) sendInitialSync(ctx context.Context, peerKey wgtypes.Key, peer return status.Errorf(codes.Internal, "error handling request") } - sendStart := time.Now() err = srv.Send(&proto.EncryptedMessage{ WgPubKey: key.PublicKey().String(), Body: encryptedResp, }) - log.WithContext(ctx).Debugf("sendInitialSync: sending response took %s", time.Since(sendStart)) if err != nil { log.WithContext(ctx).Errorf("failed sending SyncResponse %v", err) @@ -750,10 +731,6 @@ func (s *Server) sendInitialSync(ctx context.Context, peerKey wgtypes.Key, peer // which will be used by our clients to Login func (s *Server) GetDeviceAuthorizationFlow(ctx context.Context, req *proto.EncryptedMessage) (*proto.EncryptedMessage, error) { log.WithContext(ctx).Tracef("GetDeviceAuthorizationFlow request for pubKey: %s", req.WgPubKey) - start := time.Now() - defer func() { - log.WithContext(ctx).Tracef("GetDeviceAuthorizationFlow for pubKey: %s took %v", req.WgPubKey, time.Since(start)) - }() peerKey, err := wgtypes.ParseKey(req.GetWgPubKey()) if err != nil { @@ -813,10 +790,6 @@ func (s *Server) GetDeviceAuthorizationFlow(ctx context.Context, req *proto.Encr // which will be used by our clients to Login func (s *Server) GetPKCEAuthorizationFlow(ctx context.Context, req *proto.EncryptedMessage) (*proto.EncryptedMessage, error) { log.WithContext(ctx).Tracef("GetPKCEAuthorizationFlow request for pubKey: %s", req.WgPubKey) - start := time.Now() - defer func() { - log.WithContext(ctx).Tracef("GetPKCEAuthorizationFlow for pubKey %s took %v", req.WgPubKey, time.Since(start)) - }() peerKey, err := wgtypes.ParseKey(req.GetWgPubKey()) if err != nil { diff --git a/management/internals/shared/grpc/token_mgr.go b/management/internals/shared/grpc/token_mgr.go index 0f893ae3a..ccb32202f 100644 --- a/management/internals/shared/grpc/token_mgr.go +++ b/management/internals/shared/grpc/token_mgr.go @@ -167,7 +167,7 @@ func (m *TimeBasedAuthSecretsManager) SetupRefresh(ctx context.Context, accountI relayCancel := make(chan struct{}, 1) m.relayCancelMap[peerID] = relayCancel go m.refreshRelayTokens(ctx, accountID, peerID, relayCancel) - log.WithContext(ctx).Debugf("starting relay refresh for %s", peerID) + log.WithContext(ctx).Tracef("starting relay refresh for %s", peerID) } } @@ -178,7 +178,7 @@ func (m *TimeBasedAuthSecretsManager) refreshTURNTokens(ctx context.Context, acc for { select { case <-cancel: - log.WithContext(ctx).Debugf("stopping TURN refresh for %s", peerID) + log.WithContext(ctx).Tracef("stopping TURN refresh for %s", peerID) return case <-ticker.C: m.pushNewTURNAndRelayTokens(ctx, accountID, peerID) @@ -193,7 +193,7 @@ func (m *TimeBasedAuthSecretsManager) refreshRelayTokens(ctx context.Context, ac for { select { case <-cancel: - log.WithContext(ctx).Debugf("stopping relay refresh for %s", peerID) + log.WithContext(ctx).Tracef("stopping relay refresh for %s", peerID) return case <-ticker.C: m.pushNewRelayTokens(ctx, accountID, peerID) diff --git a/management/server/http/middleware/auth_middleware.go b/management/server/http/middleware/auth_middleware.go index ffd7e0b93..38cf0c290 100644 --- a/management/server/http/middleware/auth_middleware.go +++ b/management/server/http/middleware/auth_middleware.go @@ -141,7 +141,7 @@ func (m *AuthMiddleware) checkJWTFromRequest(r *http.Request, authHeaderParts [] } if userAuth.AccountId != accountId { - log.WithContext(ctx).Debugf("Auth middleware sets accountId from ensure, before %s, now %s", userAuth.AccountId, accountId) + log.WithContext(ctx).Tracef("Auth middleware sets accountId from ensure, before %s, now %s", userAuth.AccountId, accountId) userAuth.AccountId = accountId } diff --git a/management/server/peer.go b/management/server/peer.go index f2de05f15..49f5bf2a5 100644 --- a/management/server/peer.go +++ b/management/server/peer.go @@ -172,7 +172,7 @@ func updatePeerStatusAndLocation(ctx context.Context, geo geolocation.Geolocatio } } - log.WithContext(ctx).Tracef("saving peer status for peer %s is connected: %t", peer.ID, connected) + log.WithContext(ctx).Debugf("saving peer status for peer %s is connected: %t", peer.ID, connected) err := transaction.SavePeerStatus(ctx, accountID, peer.ID, *newStatus) if err != nil { @@ -783,7 +783,6 @@ func (am *DefaultAccountManager) LoginPeer(ctx context.Context, login types.Peer return nil, nil, nil, err } - startTransaction := time.Now() err = am.Store.ExecuteInTransaction(ctx, func(transaction store.Store) error { peer, err = transaction.GetPeerByPeerPubKey(ctx, store.LockingStrengthUpdate, login.WireGuardPubKey) if err != nil { @@ -853,8 +852,6 @@ func (am *DefaultAccountManager) LoginPeer(ctx context.Context, login types.Peer return nil, nil, nil, err } - log.WithContext(ctx).Debugf("LoginPeer: transaction took %v", time.Since(startTransaction)) - if updateRemotePeers || isStatusChanged || (isPeerUpdated && len(postureChecks) > 0) { err = am.networkMapController.OnPeersUpdated(ctx, accountID, []string{peer.ID}) if err != nil { diff --git a/management/server/posture/os_version.go b/management/server/posture/os_version.go index 411f4c2c6..2ef97a066 100644 --- a/management/server/posture/os_version.go +++ b/management/server/posture/os_version.go @@ -82,7 +82,7 @@ func (c *OSVersionCheck) Validate() error { func checkMinVersion(ctx context.Context, peerGoOS, peerVersion string, check *MinVersionCheck) (bool, error) { if check == nil { - log.WithContext(ctx).Debugf("peer %s OS is not allowed in the check", peerGoOS) + log.WithContext(ctx).Tracef("peer %s OS is not allowed in the check", peerGoOS) return false, nil } @@ -107,7 +107,7 @@ func checkMinVersion(ctx context.Context, peerGoOS, peerVersion string, check *M func checkMinKernelVersion(ctx context.Context, peerGoOS, peerVersion string, check *MinKernelVersionCheck) (bool, error) { if check == nil { - log.WithContext(ctx).Debugf("peer %s OS is not allowed in the check", peerGoOS) + log.WithContext(ctx).Tracef("peer %s OS is not allowed in the check", peerGoOS) return false, nil } diff --git a/management/server/settings/manager.go b/management/server/settings/manager.go index f16b609f8..2b2896572 100644 --- a/management/server/settings/manager.go +++ b/management/server/settings/manager.go @@ -5,9 +5,6 @@ package settings import ( "context" "fmt" - "time" - - log "github.com/sirupsen/logrus" "github.com/netbirdio/netbird/management/server/activity" "github.com/netbirdio/netbird/management/server/integrations/extra_settings" @@ -48,11 +45,6 @@ func (m *managerImpl) GetExtraSettingsManager() extra_settings.Manager { } func (m *managerImpl) GetSettings(ctx context.Context, accountID, userID string) (*types.Settings, error) { - start := time.Now() - defer func() { - log.WithContext(ctx).Debugf("GetSettings took %s", time.Since(start)) - }() - if userID != activity.SystemInitiator { ok, err := m.permissionsManager.ValidateUserPermissions(ctx, accountID, userID, modules.Settings, operations.Read) if err != nil { diff --git a/management/server/store/sql_store.go b/management/server/store/sql_store.go index 94b7fc1cc..4aa3de499 100644 --- a/management/server/store/sql_store.go +++ b/management/server/store/sql_store.go @@ -27,7 +27,6 @@ import ( "gorm.io/gorm/logger" nbdns "github.com/netbirdio/netbird/dns" - nbcontext "github.com/netbirdio/netbird/management/server/context" resourceTypes "github.com/netbirdio/netbird/management/server/networks/resources/types" routerTypes "github.com/netbirdio/netbird/management/server/networks/routers/types" networkTypes "github.com/netbirdio/netbird/management/server/networks/types" @@ -288,7 +287,7 @@ func (s *SqlStore) DeleteAccount(ctx context.Context, account *types.Account) er if s.metrics != nil { s.metrics.StoreMetrics().CountPersistenceDuration(took) } - log.WithContext(ctx).Debugf("took %d ms to delete an account to the store", took.Milliseconds()) + log.WithContext(ctx).Tracef("took %d ms to delete an account to the store", took.Milliseconds()) return err } @@ -583,9 +582,6 @@ func (s *SqlStore) GetUserByPATID(ctx context.Context, lockStrength LockingStren } func (s *SqlStore) GetUserByUserID(ctx context.Context, lockStrength LockingStrength, userID string) (*types.User, error) { - ctx, cancel := getDebuggingCtx(ctx) - defer cancel() - tx := s.db if lockStrength != LockingStrengthNone { tx = tx.Clauses(clause.Locking{Strength: string(lockStrength)}) @@ -2152,9 +2148,6 @@ func (s *SqlStore) GetPeerLabelsInAccount(ctx context.Context, lockStrength Lock } func (s *SqlStore) GetAccountNetwork(ctx context.Context, lockStrength LockingStrength, accountID string) (*types.Network, error) { - ctx, cancel := getDebuggingCtx(ctx) - defer cancel() - tx := s.db if lockStrength != LockingStrengthNone { tx = tx.Clauses(clause.Locking{Strength: string(lockStrength)}) @@ -2171,9 +2164,6 @@ func (s *SqlStore) GetAccountNetwork(ctx context.Context, lockStrength LockingSt } func (s *SqlStore) GetPeerByPeerPubKey(ctx context.Context, lockStrength LockingStrength, peerKey string) (*nbpeer.Peer, error) { - ctx, cancel := getDebuggingCtx(ctx) - defer cancel() - tx := s.db if lockStrength != LockingStrengthNone { tx = tx.Clauses(clause.Locking{Strength: string(lockStrength)}) @@ -2229,9 +2219,6 @@ func (s *SqlStore) GetAccountCreatedBy(ctx context.Context, lockStrength Locking // SaveUserLastLogin stores the last login time for a user in DB. func (s *SqlStore) SaveUserLastLogin(ctx context.Context, accountID, userID string, lastLogin time.Time) error { - ctx, cancel := getDebuggingCtx(ctx) - defer cancel() - var user types.User result := s.db.WithContext(ctx).Take(&user, accountAndIDQueryCondition, accountID, userID) if result.Error != nil { @@ -2491,9 +2478,6 @@ func NewMysqlStoreFromSqlStore(ctx context.Context, sqliteStore *SqlStore, dsn s } func (s *SqlStore) GetSetupKeyBySecret(ctx context.Context, lockStrength LockingStrength, key string) (*types.SetupKey, error) { - ctx, cancel := getDebuggingCtx(ctx) - defer cancel() - tx := s.db if lockStrength != LockingStrengthNone { tx = tx.Clauses(clause.Locking{Strength: string(lockStrength)}) @@ -2514,9 +2498,6 @@ func (s *SqlStore) GetSetupKeyBySecret(ctx context.Context, lockStrength Locking } func (s *SqlStore) IncrementSetupKeyUsage(ctx context.Context, setupKeyID string) error { - ctx, cancel := getDebuggingCtx(ctx) - defer cancel() - result := s.db.WithContext(ctx).Model(&types.SetupKey{}). Where(idQueryCondition, setupKeyID). Updates(map[string]interface{}{ @@ -2537,9 +2518,6 @@ func (s *SqlStore) IncrementSetupKeyUsage(ctx context.Context, setupKeyID string // AddPeerToAllGroup adds a peer to the 'All' group. Method always needs to run in a transaction func (s *SqlStore) AddPeerToAllGroup(ctx context.Context, accountID string, peerID string) error { - ctx, cancel := getDebuggingCtx(ctx) - defer cancel() - var groupID string _ = s.db.WithContext(ctx).Model(types.Group{}). Select("id"). @@ -2569,9 +2547,6 @@ func (s *SqlStore) AddPeerToAllGroup(ctx context.Context, accountID string, peer // AddPeerToGroup adds a peer to a group func (s *SqlStore) AddPeerToGroup(ctx context.Context, accountID, peerID, groupID string) error { - ctx, cancel := getDebuggingCtx(ctx) - defer cancel() - peer := &types.GroupPeer{ AccountID: accountID, GroupID: groupID, @@ -2768,9 +2743,6 @@ func (s *SqlStore) GetUserPeers(ctx context.Context, lockStrength LockingStrengt } func (s *SqlStore) AddPeerToAccount(ctx context.Context, peer *nbpeer.Peer) error { - ctx, cancel := getDebuggingCtx(ctx) - defer cancel() - if err := s.db.WithContext(ctx).Create(peer).Error; err != nil { return status.Errorf(status.Internal, "issue adding peer to account: %s", err) } @@ -2897,9 +2869,6 @@ func (s *SqlStore) DeletePeer(ctx context.Context, accountID string, peerID stri } func (s *SqlStore) IncrementNetworkSerial(ctx context.Context, accountId string) error { - ctx, cancel := getDebuggingCtx(ctx) - defer cancel() - result := s.db.WithContext(ctx).Model(&types.Account{}).Where(idQueryCondition, accountId).Update("network_serial", gorm.Expr("network_serial + 1")) if result.Error != nil { log.WithContext(ctx).Errorf("failed to increment network serial count in store: %v", result.Error) @@ -4022,36 +3991,6 @@ func (s *SqlStore) GetAccountGroupPeers(ctx context.Context, lockStrength Lockin return groupPeers, nil } -func getDebuggingCtx(grpcCtx context.Context) (context.Context, context.CancelFunc) { - ctx, cancel := context.WithTimeout(context.Background(), time.Minute) - userID, ok := grpcCtx.Value(nbcontext.UserIDKey).(string) - if ok { - //nolint - ctx = context.WithValue(ctx, nbcontext.UserIDKey, userID) - } - - requestID, ok := grpcCtx.Value(nbcontext.RequestIDKey).(string) - if ok { - //nolint - ctx = context.WithValue(ctx, nbcontext.RequestIDKey, requestID) - } - - accountID, ok := grpcCtx.Value(nbcontext.AccountIDKey).(string) - if ok { - //nolint - ctx = context.WithValue(ctx, nbcontext.AccountIDKey, accountID) - } - - go func() { - select { - case <-ctx.Done(): - case <-grpcCtx.Done(): - log.WithContext(grpcCtx).Warnf("grpc context ended early, error: %v", grpcCtx.Err()) - } - }() - return ctx, cancel -} - func (s *SqlStore) IsPrimaryAccount(ctx context.Context, accountID string) (bool, string, error) { var info types.PrimaryAccountInfo result := s.db.Model(&types.Account{}). From 94d34dc0c5cc9ea353438adfbe4fd613566be2b2 Mon Sep 17 00:00:00 2001 From: Pascal Fischer <32096965+pascal-fischer@users.noreply.github.com> Date: Thu, 11 Dec 2025 18:29:15 +0100 Subject: [PATCH 04/13] [management] monitoring updates (#4937) --- management/internals/shared/grpc/server.go | 13 ++++--------- management/server/account.go | 7 +++++++ management/server/telemetry/grpc_metrics.go | 18 ------------------ .../server/telemetry/http_api_metrics.go | 12 ++++++++++++ 4 files changed, 23 insertions(+), 27 deletions(-) diff --git a/management/internals/shared/grpc/server.go b/management/internals/shared/grpc/server.go index 16950db5e..6029dc8bf 100644 --- a/management/internals/shared/grpc/server.go +++ b/management/internals/shared/grpc/server.go @@ -171,8 +171,6 @@ func (s *Server) Sync(req *proto.EncryptedMessage, srv proto.ManagementService_S } s.syncSem.Add(1) - reqStart := time.Now() - ctx := srv.Context() syncReq := &proto.SyncRequest{} @@ -190,7 +188,7 @@ func (s *Server) Sync(req *proto.EncryptedMessage, srv proto.ManagementService_S s.appMetrics.GRPCMetrics().CountSyncRequestBlocked() } if s.logBlockedPeers { - log.WithContext(ctx).Warnf("peer %s with meta hash %d is blocked from syncing", peerKey.String(), metahashed) + log.WithContext(ctx).Tracef("peer %s with meta hash %d is blocked from syncing", peerKey.String(), metahashed) } if s.blockPeersWithSameConfig { s.syncSem.Add(-1) @@ -263,10 +261,6 @@ func (s *Server) Sync(req *proto.EncryptedMessage, srv proto.ManagementService_S s.secretsManager.SetupRefresh(ctx, accountID, peer.ID) - if s.appMetrics != nil { - s.appMetrics.GRPCMetrics().CountSyncRequestDuration(time.Since(reqStart), accountID) - } - unlock() unlock = nil @@ -518,7 +512,6 @@ func (s *Server) Login(ctx context.Context, req *proto.EncryptedMessage) (*proto reqStart := time.Now() realIP := getRealIP(ctx) sRealIP := realIP.String() - log.WithContext(ctx).Debugf("Login request from peer [%s] [%s]", req.WgPubKey, sRealIP) loginReq := &proto.LoginRequest{} peerKey, err := s.parseRequest(ctx, req, loginReq) @@ -530,7 +523,7 @@ func (s *Server) Login(ctx context.Context, req *proto.EncryptedMessage) (*proto metahashed := metaHash(peerMeta, sRealIP) if !s.loginFilter.allowLogin(peerKey.String(), metahashed) { if s.logBlockedPeers { - log.WithContext(ctx).Warnf("peer %s with meta hash %d is blocked from login", peerKey.String(), metahashed) + log.WithContext(ctx).Tracef("peer %s with meta hash %d is blocked from login", peerKey.String(), metahashed) } if s.appMetrics != nil { s.appMetrics.GRPCMetrics().CountLoginRequestBlocked() @@ -554,6 +547,8 @@ func (s *Server) Login(ctx context.Context, req *proto.EncryptedMessage) (*proto //nolint ctx = context.WithValue(ctx, nbContext.AccountIDKey, accountID) + log.WithContext(ctx).Debugf("Login request from peer [%s] [%s]", req.WgPubKey, sRealIP) + defer func() { if s.appMetrics != nil { s.appMetrics.GRPCMetrics().CountLoginRequestDuration(time.Since(reqStart), accountID) diff --git a/management/server/account.go b/management/server/account.go index dac040db0..8a2155cb4 100644 --- a/management/server/account.go +++ b/management/server/account.go @@ -787,6 +787,13 @@ func (am *DefaultAccountManager) loadAccount(ctx context.Context, accountID any) log.WithContext(ctx).Debugf("account %s not found in cache, reloading", accountID) accountIDString := fmt.Sprintf("%v", accountID) + if ctx == nil { + ctx = context.Background() + } + + // nolint:staticcheck + ctx = context.WithValue(ctx, nbcontext.AccountIDKey, accountID) + accountUsers, err := am.Store.GetAccountUsers(ctx, store.LockingStrengthNone, accountIDString) if err != nil { return nil, nil, err diff --git a/management/server/telemetry/grpc_metrics.go b/management/server/telemetry/grpc_metrics.go index d4301802f..4ba1ee14e 100644 --- a/management/server/telemetry/grpc_metrics.go +++ b/management/server/telemetry/grpc_metrics.go @@ -16,7 +16,6 @@ type GRPCMetrics struct { meter metric.Meter syncRequestsCounter metric.Int64Counter syncRequestsBlockedCounter metric.Int64Counter - syncRequestHighLatencyCounter metric.Int64Counter loginRequestsCounter metric.Int64Counter loginRequestsBlockedCounter metric.Int64Counter loginRequestHighLatencyCounter metric.Int64Counter @@ -46,14 +45,6 @@ func NewGRPCMetrics(ctx context.Context, meter metric.Meter) (*GRPCMetrics, erro return nil, err } - syncRequestHighLatencyCounter, err := meter.Int64Counter("management.grpc.sync.request.high.latency.counter", - metric.WithUnit("1"), - metric.WithDescription("Number of sync gRPC requests from the peers that took longer than the threshold to establish a connection and receive network map updates (update channel)"), - ) - if err != nil { - return nil, err - } - loginRequestsCounter, err := meter.Int64Counter("management.grpc.login.request.counter", metric.WithUnit("1"), metric.WithDescription("Number of login gRPC requests from the peers to authenticate and receive initial configuration and relay credentials"), @@ -126,7 +117,6 @@ func NewGRPCMetrics(ctx context.Context, meter metric.Meter) (*GRPCMetrics, erro meter: meter, syncRequestsCounter: syncRequestsCounter, syncRequestsBlockedCounter: syncRequestsBlockedCounter, - syncRequestHighLatencyCounter: syncRequestHighLatencyCounter, loginRequestsCounter: loginRequestsCounter, loginRequestsBlockedCounter: loginRequestsBlockedCounter, loginRequestHighLatencyCounter: loginRequestHighLatencyCounter, @@ -172,14 +162,6 @@ func (grpcMetrics *GRPCMetrics) CountLoginRequestDuration(duration time.Duration } } -// CountSyncRequestDuration counts the duration of the sync gRPC requests -func (grpcMetrics *GRPCMetrics) CountSyncRequestDuration(duration time.Duration, accountID string) { - grpcMetrics.syncRequestDuration.Record(grpcMetrics.ctx, duration.Milliseconds()) - if duration > HighLatencyThreshold { - grpcMetrics.syncRequestHighLatencyCounter.Add(grpcMetrics.ctx, 1, metric.WithAttributes(attribute.String(AccountIDLabel, accountID))) - } -} - // RegisterConnectedStreams registers a function that collects number of active streams and feeds it to the metrics gauge. func (grpcMetrics *GRPCMetrics) RegisterConnectedStreams(producer func() int64) error { _, err := grpcMetrics.meter.RegisterCallback( diff --git a/management/server/telemetry/http_api_metrics.go b/management/server/telemetry/http_api_metrics.go index ae27466d9..0b6f8beb6 100644 --- a/management/server/telemetry/http_api_metrics.go +++ b/management/server/telemetry/http_api_metrics.go @@ -185,6 +185,18 @@ func (m *HTTPMiddleware) Handler(h http.Handler) http.Handler { h.ServeHTTP(w, r.WithContext(ctx)) + userAuth, err := nbContext.GetUserAuthFromContext(r.Context()) + if err == nil { + if userAuth.AccountId != "" { + //nolint + ctx = context.WithValue(ctx, nbContext.AccountIDKey, userAuth.AccountId) + } + if userAuth.UserId != "" { + //nolint + ctx = context.WithValue(ctx, nbContext.UserIDKey, userAuth.UserId) + } + } + if w.Status() > 399 { log.WithContext(ctx).Errorf("HTTP response %v: %v %v status %v", reqID, r.Method, r.URL, w.Status()) } else { From 90e3b8009fb5228a8643143d775eb141f4abba91 Mon Sep 17 00:00:00 2001 From: Pascal Fischer <32096965+pascal-fischer@users.noreply.github.com> Date: Thu, 11 Dec 2025 20:11:12 +0100 Subject: [PATCH 05/13] [management] Fix sync metrics (#4939) --- management/internals/shared/grpc/server.go | 6 ++++++ management/server/telemetry/grpc_metrics.go | 5 +++++ 2 files changed, 11 insertions(+) diff --git a/management/internals/shared/grpc/server.go b/management/internals/shared/grpc/server.go index 6029dc8bf..462e2e6eb 100644 --- a/management/internals/shared/grpc/server.go +++ b/management/internals/shared/grpc/server.go @@ -171,6 +171,8 @@ func (s *Server) Sync(req *proto.EncryptedMessage, srv proto.ManagementService_S } s.syncSem.Add(1) + reqStart := time.Now() + ctx := srv.Context() syncReq := &proto.SyncRequest{} @@ -261,6 +263,10 @@ func (s *Server) Sync(req *proto.EncryptedMessage, srv proto.ManagementService_S s.secretsManager.SetupRefresh(ctx, accountID, peer.ID) + if s.appMetrics != nil { + s.appMetrics.GRPCMetrics().CountSyncRequestDuration(time.Since(reqStart), accountID) + } + unlock() unlock = nil diff --git a/management/server/telemetry/grpc_metrics.go b/management/server/telemetry/grpc_metrics.go index 4ba1ee14e..bd7fbc235 100644 --- a/management/server/telemetry/grpc_metrics.go +++ b/management/server/telemetry/grpc_metrics.go @@ -162,6 +162,11 @@ func (grpcMetrics *GRPCMetrics) CountLoginRequestDuration(duration time.Duration } } +// CountSyncRequestDuration counts the duration of the sync gRPC requests +func (grpcMetrics *GRPCMetrics) CountSyncRequestDuration(duration time.Duration, accountID string) { + grpcMetrics.syncRequestDuration.Record(grpcMetrics.ctx, duration.Milliseconds()) +} + // RegisterConnectedStreams registers a function that collects number of active streams and feeds it to the metrics gauge. func (grpcMetrics *GRPCMetrics) RegisterConnectedStreams(producer func() int64) error { _, err := grpcMetrics.meter.RegisterCallback( From abcbde26f9294f5bd649386530c21d816bca02be Mon Sep 17 00:00:00 2001 From: Pascal Fischer <32096965+pascal-fischer@users.noreply.github.com> Date: Thu, 11 Dec 2025 21:45:47 +0100 Subject: [PATCH 06/13] [management] remove context from store methods (#4940) --- management/server/store/sql_store.go | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/management/server/store/sql_store.go b/management/server/store/sql_store.go index 4aa3de499..74b03ce48 100644 --- a/management/server/store/sql_store.go +++ b/management/server/store/sql_store.go @@ -588,7 +588,7 @@ func (s *SqlStore) GetUserByUserID(ctx context.Context, lockStrength LockingStre } var user types.User - result := tx.WithContext(ctx).Take(&user, idQueryCondition, userID) + result := tx.Take(&user, idQueryCondition, userID) if result.Error != nil { if errors.Is(result.Error, gorm.ErrRecordNotFound) { return nil, status.NewUserNotFoundError(userID) @@ -2154,7 +2154,7 @@ func (s *SqlStore) GetAccountNetwork(ctx context.Context, lockStrength LockingSt } var accountNetwork types.AccountNetwork - if err := tx.WithContext(ctx).Model(&types.Account{}).Where(idQueryCondition, accountID).Take(&accountNetwork).Error; err != nil { + if err := tx.Model(&types.Account{}).Where(idQueryCondition, accountID).Take(&accountNetwork).Error; err != nil { if errors.Is(err, gorm.ErrRecordNotFound) { return nil, status.NewAccountNotFoundError(accountID) } @@ -2170,7 +2170,7 @@ func (s *SqlStore) GetPeerByPeerPubKey(ctx context.Context, lockStrength Locking } var peer nbpeer.Peer - result := tx.WithContext(ctx).Take(&peer, GetKeyQueryCondition(s), peerKey) + result := tx.Take(&peer, GetKeyQueryCondition(s), peerKey) if result.Error != nil { if errors.Is(result.Error, gorm.ErrRecordNotFound) { @@ -2220,7 +2220,7 @@ func (s *SqlStore) GetAccountCreatedBy(ctx context.Context, lockStrength Locking // SaveUserLastLogin stores the last login time for a user in DB. func (s *SqlStore) SaveUserLastLogin(ctx context.Context, accountID, userID string, lastLogin time.Time) error { var user types.User - result := s.db.WithContext(ctx).Take(&user, accountAndIDQueryCondition, accountID, userID) + result := s.db.Take(&user, accountAndIDQueryCondition, accountID, userID) if result.Error != nil { if errors.Is(result.Error, gorm.ErrRecordNotFound) { return status.NewUserNotFoundError(userID) @@ -2484,7 +2484,7 @@ func (s *SqlStore) GetSetupKeyBySecret(ctx context.Context, lockStrength Locking } var setupKey types.SetupKey - result := tx.WithContext(ctx). + result := tx. Take(&setupKey, GetKeyQueryCondition(s), key) if result.Error != nil { @@ -2498,7 +2498,7 @@ func (s *SqlStore) GetSetupKeyBySecret(ctx context.Context, lockStrength Locking } func (s *SqlStore) IncrementSetupKeyUsage(ctx context.Context, setupKeyID string) error { - result := s.db.WithContext(ctx).Model(&types.SetupKey{}). + result := s.db.Model(&types.SetupKey{}). Where(idQueryCondition, setupKeyID). Updates(map[string]interface{}{ "used_times": gorm.Expr("used_times + 1"), @@ -2519,7 +2519,7 @@ func (s *SqlStore) IncrementSetupKeyUsage(ctx context.Context, setupKeyID string // AddPeerToAllGroup adds a peer to the 'All' group. Method always needs to run in a transaction func (s *SqlStore) AddPeerToAllGroup(ctx context.Context, accountID string, peerID string) error { var groupID string - _ = s.db.WithContext(ctx).Model(types.Group{}). + _ = s.db.Model(types.Group{}). Select("id"). Where("account_id = ? AND name = ?", accountID, "All"). Limit(1). @@ -2743,7 +2743,7 @@ func (s *SqlStore) GetUserPeers(ctx context.Context, lockStrength LockingStrengt } func (s *SqlStore) AddPeerToAccount(ctx context.Context, peer *nbpeer.Peer) error { - if err := s.db.WithContext(ctx).Create(peer).Error; err != nil { + if err := s.db.Create(peer).Error; err != nil { return status.Errorf(status.Internal, "issue adding peer to account: %s", err) } @@ -2869,7 +2869,7 @@ func (s *SqlStore) DeletePeer(ctx context.Context, accountID string, peerID stri } func (s *SqlStore) IncrementNetworkSerial(ctx context.Context, accountId string) error { - result := s.db.WithContext(ctx).Model(&types.Account{}).Where(idQueryCondition, accountId).Update("network_serial", gorm.Expr("network_serial + 1")) + result := s.db.Model(&types.Account{}).Where(idQueryCondition, accountId).Update("network_serial", gorm.Expr("network_serial + 1")) if result.Error != nil { log.WithContext(ctx).Errorf("failed to increment network serial count in store: %v", result.Error) return status.Errorf(status.Internal, "failed to increment network serial count in store") @@ -4030,7 +4030,7 @@ func (s *SqlStore) UpdateAccountNetwork(ctx context.Context, accountID string, i Network: &types.Network{Net: ipNet}, } - result := s.db.WithContext(ctx). + result := s.db. Model(&types.Account{}). Where(idQueryCondition, accountID). Updates(&patch) From 932c02eaabbcc3d314013969d85cd1aee353af7b Mon Sep 17 00:00:00 2001 From: Bethuel Mmbaga Date: Fri, 12 Dec 2025 18:49:57 +0300 Subject: [PATCH 07/13] [management] Approve all pending peers when peer approval is disabled (#4806) --- go.mod | 2 +- go.sum | 4 +- management/server/account.go | 29 +++---- management/server/account_test.go | 37 +++++++++ management/server/integrated_validator.go | 2 +- .../integrated_validator/interface.go | 2 +- management/server/store/sql_store.go | 12 +++ management/server/store/sql_store_test.go | 77 +++++++++++++++++++ management/server/store/store.go | 1 + 9 files changed, 148 insertions(+), 18 deletions(-) diff --git a/go.mod b/go.mod index 870118c88..8f4ec530b 100644 --- a/go.mod +++ b/go.mod @@ -64,7 +64,7 @@ require ( github.com/mdlayher/socket v0.5.1 github.com/miekg/dns v1.1.59 github.com/mitchellh/hashstructure/v2 v2.0.2 - github.com/netbirdio/management-integrations/integrations v0.0.0-20251202114414-534cf891e0ba + github.com/netbirdio/management-integrations/integrations v0.0.0-20251203183432-d5400f030847 github.com/netbirdio/signal-dispatcher/dispatcher v0.0.0-20250805121659-6b4ac470ca45 github.com/okta/okta-sdk-golang/v2 v2.18.0 github.com/oschwald/maxminddb-golang v1.12.0 diff --git a/go.sum b/go.sum index 96a303f79..f10e1e6da 100644 --- a/go.sum +++ b/go.sum @@ -368,8 +368,8 @@ github.com/netbirdio/go-netroute v0.0.0-20240611143515-f59b0e1d3944 h1:TDtJKmM6S github.com/netbirdio/go-netroute v0.0.0-20240611143515-f59b0e1d3944/go.mod h1:sHA6TRxjQ6RLbnI+3R4DZo2Eseg/iKiPRfNmcuNySVQ= github.com/netbirdio/ice/v4 v4.0.0-20250908184934-6202be846b51 h1:Ov4qdafATOgGMB1wbSuh+0aAHcwz9hdvB6VZjh1mVMI= github.com/netbirdio/ice/v4 v4.0.0-20250908184934-6202be846b51/go.mod h1:ZSIbPdBn5hePO8CpF1PekH2SfpTxg1PDhEwtbqZS7R8= -github.com/netbirdio/management-integrations/integrations v0.0.0-20251202114414-534cf891e0ba h1:pD6eygRJ5EYAlgzeNskPU3WqszMz6/HhPuc6/Bc/580= -github.com/netbirdio/management-integrations/integrations v0.0.0-20251202114414-534cf891e0ba/go.mod h1:qzLCKeR253jtsWhfZTt4fyegI5zei32jKZykV+oSQOo= +github.com/netbirdio/management-integrations/integrations v0.0.0-20251203183432-d5400f030847 h1:V0zsYYMU5d2UN1m9zOLPEZCGWpnhtkYcxQVi9Rrx3bY= +github.com/netbirdio/management-integrations/integrations v0.0.0-20251203183432-d5400f030847/go.mod h1:qzLCKeR253jtsWhfZTt4fyegI5zei32jKZykV+oSQOo= github.com/netbirdio/service v0.0.0-20240911161631-f62744f42502 h1:3tHlFmhTdX9axERMVN63dqyFqnvuD+EMJHzM7mNGON8= github.com/netbirdio/service v0.0.0-20240911161631-f62744f42502/go.mod h1:CIMRFEJVL+0DS1a3Nx06NaMn4Dz63Ng6O7dl0qH0zVM= github.com/netbirdio/signal-dispatcher/dispatcher v0.0.0-20250805121659-6b4ac470ca45 h1:ujgviVYmx243Ksy7NdSwrdGPSRNE3pb8kEDSpH0QuAQ= diff --git a/management/server/account.go b/management/server/account.go index 8a2155cb4..a9becc4b6 100644 --- a/management/server/account.go +++ b/management/server/account.go @@ -295,10 +295,23 @@ func (am *DefaultAccountManager) UpdateAccountSettings(ctx context.Context, acco return err } - if err = am.validateSettingsUpdate(ctx, transaction, newSettings, oldSettings, userID, accountID); err != nil { + if err = am.validateSettingsUpdate(ctx, newSettings, oldSettings, userID, accountID); err != nil { return err } + if oldSettings.Extra != nil && newSettings.Extra != nil && + oldSettings.Extra.PeerApprovalEnabled && !newSettings.Extra.PeerApprovalEnabled { + approvedCount, err := transaction.ApproveAccountPeers(ctx, accountID) + if err != nil { + return fmt.Errorf("failed to approve pending peers: %w", err) + } + + if approvedCount > 0 { + log.WithContext(ctx).Debugf("approved %d pending peers in account %s", approvedCount, accountID) + updateAccountPeers = true + } + } + if oldSettings.NetworkRange != newSettings.NetworkRange { if err = am.reallocateAccountPeerIPs(ctx, transaction, accountID, newSettings.NetworkRange); err != nil { return err @@ -372,7 +385,7 @@ func (am *DefaultAccountManager) UpdateAccountSettings(ctx context.Context, acco return newSettings, nil } -func (am *DefaultAccountManager) validateSettingsUpdate(ctx context.Context, transaction store.Store, newSettings, oldSettings *types.Settings, userID, accountID string) error { +func (am *DefaultAccountManager) validateSettingsUpdate(ctx context.Context, newSettings, oldSettings *types.Settings, userID, accountID string) error { halfYearLimit := 180 * 24 * time.Hour if newSettings.PeerLoginExpiration > halfYearLimit { return status.Errorf(status.InvalidArgument, "peer login expiration can't be larger than 180 days") @@ -386,17 +399,7 @@ func (am *DefaultAccountManager) validateSettingsUpdate(ctx context.Context, tra return status.Errorf(status.InvalidArgument, "invalid domain \"%s\" provided for DNS domain", newSettings.DNSDomain) } - peers, err := transaction.GetAccountPeers(ctx, store.LockingStrengthNone, accountID, "", "") - if err != nil { - return err - } - - peersMap := make(map[string]*nbpeer.Peer, len(peers)) - for _, peer := range peers { - peersMap[peer.ID] = peer - } - - return am.integratedPeerValidator.ValidateExtraSettings(ctx, newSettings.Extra, oldSettings.Extra, peersMap, userID, accountID) + return am.integratedPeerValidator.ValidateExtraSettings(ctx, newSettings.Extra, oldSettings.Extra, userID, accountID) } func (am *DefaultAccountManager) handleRoutingPeerDNSResolutionSettings(ctx context.Context, oldSettings, newSettings *types.Settings, userID, accountID string) { diff --git a/management/server/account_test.go b/management/server/account_test.go index 8569f1b2f..7f125e3a0 100644 --- a/management/server/account_test.go +++ b/management/server/account_test.go @@ -2058,6 +2058,43 @@ func TestDefaultAccountManager_UpdateAccountSettings(t *testing.T) { require.Error(t, err, "expecting to fail when providing PeerLoginExpiration more than 180 days") } +func TestDefaultAccountManager_UpdateAccountSettings_PeerApproval(t *testing.T) { + manager, _, account, peer1, peer2, peer3 := setupNetworkMapTest(t) + + accountID := account.Id + userID := account.Users[account.CreatedBy].Id + ctx := context.Background() + + newSettings := account.Settings.Copy() + newSettings.Extra = &types.ExtraSettings{ + PeerApprovalEnabled: true, + } + _, err := manager.UpdateAccountSettings(ctx, accountID, userID, newSettings) + require.NoError(t, err) + + peer1.Status.RequiresApproval = true + peer2.Status.RequiresApproval = true + peer3.Status.RequiresApproval = false + + require.NoError(t, manager.Store.SavePeer(ctx, accountID, peer1)) + require.NoError(t, manager.Store.SavePeer(ctx, accountID, peer2)) + require.NoError(t, manager.Store.SavePeer(ctx, accountID, peer3)) + + newSettings = account.Settings.Copy() + newSettings.Extra = &types.ExtraSettings{ + PeerApprovalEnabled: false, + } + _, err = manager.UpdateAccountSettings(ctx, accountID, userID, newSettings) + require.NoError(t, err) + + accountPeers, err := manager.Store.GetAccountPeers(ctx, store.LockingStrengthNone, accountID, "", "") + require.NoError(t, err) + + for _, peer := range accountPeers { + assert.False(t, peer.Status.RequiresApproval, "peer %s should not require approval after disabling peer approval", peer.ID) + } +} + func TestAccount_GetExpiredPeers(t *testing.T) { type test struct { name string diff --git a/management/server/integrated_validator.go b/management/server/integrated_validator.go index e9a1c8701..69ea668ad 100644 --- a/management/server/integrated_validator.go +++ b/management/server/integrated_validator.go @@ -127,7 +127,7 @@ type MockIntegratedValidator struct { ValidatePeerFunc func(_ context.Context, update *nbpeer.Peer, peer *nbpeer.Peer, userID string, accountID string, dnsDomain string, peersGroup []string, extraSettings *types.ExtraSettings) (*nbpeer.Peer, bool, error) } -func (a MockIntegratedValidator) ValidateExtraSettings(_ context.Context, newExtraSettings *types.ExtraSettings, oldExtraSettings *types.ExtraSettings, peers map[string]*nbpeer.Peer, userID string, accountID string) error { +func (a MockIntegratedValidator) ValidateExtraSettings(_ context.Context, newExtraSettings *types.ExtraSettings, oldExtraSettings *types.ExtraSettings, userID string, accountID string) error { return nil } diff --git a/management/server/integrations/integrated_validator/interface.go b/management/server/integrations/integrated_validator/interface.go index 26c338cb6..326fbfaf0 100644 --- a/management/server/integrations/integrated_validator/interface.go +++ b/management/server/integrations/integrated_validator/interface.go @@ -10,7 +10,7 @@ import ( // IntegratedValidator interface exists to avoid the circle dependencies type IntegratedValidator interface { - ValidateExtraSettings(ctx context.Context, newExtraSettings *types.ExtraSettings, oldExtraSettings *types.ExtraSettings, peers map[string]*nbpeer.Peer, userID string, accountID string) error + ValidateExtraSettings(ctx context.Context, newExtraSettings *types.ExtraSettings, oldExtraSettings *types.ExtraSettings, userID string, accountID string) error ValidatePeer(ctx context.Context, update *nbpeer.Peer, peer *nbpeer.Peer, userID string, accountID string, dnsDomain string, peersGroup []string, extraSettings *types.ExtraSettings) (*nbpeer.Peer, bool, error) PreparePeer(ctx context.Context, accountID string, peer *nbpeer.Peer, peersGroup []string, extraSettings *types.ExtraSettings, temporary bool) *nbpeer.Peer IsNotValidPeer(ctx context.Context, accountID string, peer *nbpeer.Peer, peersGroup []string, extraSettings *types.ExtraSettings) (bool, bool, error) diff --git a/management/server/store/sql_store.go b/management/server/store/sql_store.go index 74b03ce48..2b8981b97 100644 --- a/management/server/store/sql_store.go +++ b/management/server/store/sql_store.go @@ -412,6 +412,18 @@ func (s *SqlStore) SavePeerLocation(ctx context.Context, accountID string, peerW return nil } +// ApproveAccountPeers marks all peers that currently require approval in the given account as approved. +func (s *SqlStore) ApproveAccountPeers(ctx context.Context, accountID string) (int, error) { + result := s.db.Model(&nbpeer.Peer{}). + Where("account_id = ? AND peer_status_requires_approval = ?", accountID, true). + Update("peer_status_requires_approval", false) + if result.Error != nil { + return 0, status.Errorf(status.Internal, "failed to approve pending account peers: %v", result.Error) + } + + return int(result.RowsAffected), nil +} + // SaveUsers saves the given list of users to the database. func (s *SqlStore) SaveUsers(ctx context.Context, users []*types.User) error { if len(users) == 0 { diff --git a/management/server/store/sql_store_test.go b/management/server/store/sql_store_test.go index d40c4664c..2e2623910 100644 --- a/management/server/store/sql_store_test.go +++ b/management/server/store/sql_store_test.go @@ -3717,3 +3717,80 @@ func TestSqlStore_GetPeersByGroupIDs(t *testing.T) { }) } } + +func TestSqlStore_ApproveAccountPeers(t *testing.T) { + runTestForAllEngines(t, "", func(t *testing.T, store Store) { + accountID := "test-account" + ctx := context.Background() + + account := newAccountWithId(ctx, accountID, "testuser", "example.com") + err := store.SaveAccount(ctx, account) + require.NoError(t, err) + + peers := []*nbpeer.Peer{ + { + ID: "peer1", + AccountID: accountID, + DNSLabel: "peer1.netbird.cloud", + Key: "peer1-key", + IP: net.ParseIP("100.64.0.1"), + Status: &nbpeer.PeerStatus{ + RequiresApproval: true, + LastSeen: time.Now().UTC(), + }, + }, + { + ID: "peer2", + AccountID: accountID, + DNSLabel: "peer2.netbird.cloud", + Key: "peer2-key", + IP: net.ParseIP("100.64.0.2"), + Status: &nbpeer.PeerStatus{ + RequiresApproval: true, + LastSeen: time.Now().UTC(), + }, + }, + { + ID: "peer3", + AccountID: accountID, + DNSLabel: "peer3.netbird.cloud", + Key: "peer3-key", + IP: net.ParseIP("100.64.0.3"), + Status: &nbpeer.PeerStatus{ + RequiresApproval: false, + LastSeen: time.Now().UTC(), + }, + }, + } + + for _, peer := range peers { + err = store.AddPeerToAccount(ctx, peer) + require.NoError(t, err) + } + + t.Run("approve all pending peers", func(t *testing.T) { + count, err := store.ApproveAccountPeers(ctx, accountID) + require.NoError(t, err) + assert.Equal(t, 2, count) + + allPeers, err := store.GetAccountPeers(ctx, LockingStrengthNone, accountID, "", "") + require.NoError(t, err) + + for _, peer := range allPeers { + assert.False(t, peer.Status.RequiresApproval, "peer %s should not require approval", peer.ID) + } + }) + + t.Run("no peers to approve", func(t *testing.T) { + count, err := store.ApproveAccountPeers(ctx, accountID) + require.NoError(t, err) + assert.Equal(t, 0, count) + }) + + t.Run("non-existent account", func(t *testing.T) { + count, err := store.ApproveAccountPeers(ctx, "non-existent") + require.NoError(t, err) + assert.Equal(t, 0, count) + }) + }) +} diff --git a/management/server/store/store.go b/management/server/store/store.go index 007e2b739..0ec7949f9 100644 --- a/management/server/store/store.go +++ b/management/server/store/store.go @@ -143,6 +143,7 @@ type Store interface { SavePeer(ctx context.Context, accountID string, peer *nbpeer.Peer) error SavePeerStatus(ctx context.Context, accountID, peerID string, status nbpeer.PeerStatus) error SavePeerLocation(ctx context.Context, accountID string, peer *nbpeer.Peer) error + ApproveAccountPeers(ctx context.Context, accountID string) (int, error) DeletePeer(ctx context.Context, accountID string, peerID string) error GetSetupKeyBySecret(ctx context.Context, lockStrength LockingStrength, key string) (*types.SetupKey, error) From 08f31fbcb3198f309fb52a70a48d99d47ca35b83 Mon Sep 17 00:00:00 2001 From: Diego Romar Date: Fri, 12 Dec 2025 14:29:58 -0300 Subject: [PATCH 08/13] [iOS] Add force relay connection on iOS (#4928) * [ios] Add a bogus test to check iOS behavior when setting environment variables * [ios] Revert "Add a bogus test to check iOS behavior when setting environment variables" This reverts commit 90ca01105a6b0f4471aac07a63fc95e5d4eaef9b. * [ios] Add EnvList struct to export and import environment variables * [ios] Add envList parameter to the iOS Client Run method * [ios] Add some debug logging to exportEnvVarList * Add "//go:build ios" to client/ios/NetBirdSDK files --- client/ios/NetBirdSDK/client.go | 22 ++++++++++++++- client/ios/NetBirdSDK/env_list.go | 34 +++++++++++++++++++++++ client/ios/NetBirdSDK/gomobile.go | 2 ++ client/ios/NetBirdSDK/logger.go | 2 ++ client/ios/NetBirdSDK/login.go | 2 ++ client/ios/NetBirdSDK/peer_notifier.go | 2 ++ client/ios/NetBirdSDK/preferences.go | 2 ++ client/ios/NetBirdSDK/preferences_test.go | 2 ++ client/ios/NetBirdSDK/routes.go | 2 ++ 9 files changed, 69 insertions(+), 1 deletion(-) create mode 100644 client/ios/NetBirdSDK/env_list.go diff --git a/client/ios/NetBirdSDK/client.go b/client/ios/NetBirdSDK/client.go index 6d969bb12..463c93d57 100644 --- a/client/ios/NetBirdSDK/client.go +++ b/client/ios/NetBirdSDK/client.go @@ -1,9 +1,12 @@ +//go:build ios + package NetBirdSDK import ( "context" "fmt" "net/netip" + "os" "sort" "strings" "sync" @@ -90,7 +93,8 @@ func NewClient(cfgFile, stateFile, deviceName string, osVersion string, osName s } // Run start the internal client. It is a blocker function -func (c *Client) Run(fd int32, interfaceName string) error { +func (c *Client) Run(fd int32, interfaceName string, envList *EnvList) error { + exportEnvList(envList) log.Infof("Starting NetBird client") log.Debugf("Tunnel uses interface: %s", interfaceName) cfg, err := profilemanager.UpdateOrCreateConfig(profilemanager.ConfigInput{ @@ -433,3 +437,19 @@ func toNetIDs(routes []string) []route.NetID { } return netIDs } + +func exportEnvList(list *EnvList) { + if list == nil { + return + } + for k, v := range list.AllItems() { + log.Debugf("Env variable %s's value is currently: %s", k, os.Getenv(k)) + log.Debugf("Setting env variable %s: %s", k, v) + + if err := os.Setenv(k, v); err != nil { + log.Errorf("could not set env variable %s: %v", k, err) + } else { + log.Debugf("Env variable %s was set successfully", k) + } + } +} diff --git a/client/ios/NetBirdSDK/env_list.go b/client/ios/NetBirdSDK/env_list.go new file mode 100644 index 000000000..4800803d7 --- /dev/null +++ b/client/ios/NetBirdSDK/env_list.go @@ -0,0 +1,34 @@ +//go:build ios + +package NetBirdSDK + +import "github.com/netbirdio/netbird/client/internal/peer" + +// EnvList is an exported struct to be bound by gomobile +type EnvList struct { + data map[string]string +} + +// NewEnvList creates a new EnvList +func NewEnvList() *EnvList { + return &EnvList{data: make(map[string]string)} +} + +// Put adds a key-value pair +func (el *EnvList) Put(key, value string) { + el.data[key] = value +} + +// Get retrieves a value by key +func (el *EnvList) Get(key string) string { + return el.data[key] +} + +func (el *EnvList) AllItems() map[string]string { + return el.data +} + +// GetEnvKeyNBForceRelay Exports the environment variable for the iOS client +func GetEnvKeyNBForceRelay() string { + return peer.EnvKeyNBForceRelay +} diff --git a/client/ios/NetBirdSDK/gomobile.go b/client/ios/NetBirdSDK/gomobile.go index 9eadd6a7f..79bf0c2ac 100644 --- a/client/ios/NetBirdSDK/gomobile.go +++ b/client/ios/NetBirdSDK/gomobile.go @@ -1,3 +1,5 @@ +//go:build ios + package NetBirdSDK import _ "golang.org/x/mobile/bind" diff --git a/client/ios/NetBirdSDK/logger.go b/client/ios/NetBirdSDK/logger.go index f1ad1b9f6..531d0ba89 100644 --- a/client/ios/NetBirdSDK/logger.go +++ b/client/ios/NetBirdSDK/logger.go @@ -1,3 +1,5 @@ +//go:build ios + package NetBirdSDK import ( diff --git a/client/ios/NetBirdSDK/login.go b/client/ios/NetBirdSDK/login.go index 570c44f80..1c2b38a61 100644 --- a/client/ios/NetBirdSDK/login.go +++ b/client/ios/NetBirdSDK/login.go @@ -1,3 +1,5 @@ +//go:build ios + package NetBirdSDK import ( diff --git a/client/ios/NetBirdSDK/peer_notifier.go b/client/ios/NetBirdSDK/peer_notifier.go index 16c5039eb..9b00568be 100644 --- a/client/ios/NetBirdSDK/peer_notifier.go +++ b/client/ios/NetBirdSDK/peer_notifier.go @@ -1,3 +1,5 @@ +//go:build ios + package NetBirdSDK // PeerInfo describe information about the peers. It designed for the UI usage diff --git a/client/ios/NetBirdSDK/preferences.go b/client/ios/NetBirdSDK/preferences.go index 5e7050465..39ae06538 100644 --- a/client/ios/NetBirdSDK/preferences.go +++ b/client/ios/NetBirdSDK/preferences.go @@ -1,3 +1,5 @@ +//go:build ios + package NetBirdSDK import ( diff --git a/client/ios/NetBirdSDK/preferences_test.go b/client/ios/NetBirdSDK/preferences_test.go index 780443a7b..5f75e7c9a 100644 --- a/client/ios/NetBirdSDK/preferences_test.go +++ b/client/ios/NetBirdSDK/preferences_test.go @@ -1,3 +1,5 @@ +//go:build ios + package NetBirdSDK import ( diff --git a/client/ios/NetBirdSDK/routes.go b/client/ios/NetBirdSDK/routes.go index 30d0d0d0a..7b84d6e1c 100644 --- a/client/ios/NetBirdSDK/routes.go +++ b/client/ios/NetBirdSDK/routes.go @@ -1,3 +1,5 @@ +//go:build ios + package NetBirdSDK // RoutesSelectionInfoCollection made for Java layer to get non default types as collection From 5748bdd64edf0a0e77dab2311ceb700d049730c8 Mon Sep 17 00:00:00 2001 From: Zoltan Papp Date: Mon, 15 Dec 2025 10:28:25 +0100 Subject: [PATCH 09/13] Add health-check agent recognition to avoid error logs (#4917) Health-check connections now send a properly formatted auth message with a well-known peer ID instead of immediately closing. The server recognizes this peer ID and handles the connection gracefully with a debug log instead of error logs. --- relay/healthcheck/peerid/peerid.go | 31 ++++++++++++++++++++++++++++++ relay/healthcheck/ws.go | 15 ++++++++++++++- relay/server/handshake.go | 4 ++-- relay/server/relay.go | 7 ++++++- 4 files changed, 53 insertions(+), 4 deletions(-) create mode 100644 relay/healthcheck/peerid/peerid.go diff --git a/relay/healthcheck/peerid/peerid.go b/relay/healthcheck/peerid/peerid.go new file mode 100644 index 000000000..cd8696817 --- /dev/null +++ b/relay/healthcheck/peerid/peerid.go @@ -0,0 +1,31 @@ +package peerid + +import ( + "crypto/sha256" + + v2 "github.com/netbirdio/netbird/shared/relay/auth/hmac/v2" + "github.com/netbirdio/netbird/shared/relay/messages" +) + +var ( + // HealthCheckPeerID is the hashed peer ID for health check connections + HealthCheckPeerID = messages.HashID("healthcheck-agent") + + // DummyAuthToken is a structurally valid auth token for health check. + // The signature is not valid but the format is correct (1 byte algo + 32 bytes signature + payload). + DummyAuthToken = createDummyToken() +) + +func createDummyToken() []byte { + token := v2.Token{ + AuthAlgo: v2.AuthAlgoHMACSHA256, + Signature: make([]byte, sha256.Size), + Payload: []byte("healthcheck"), + } + return token.Marshal() +} + +// IsHealthCheck checks if the given peer ID is the health check agent +func IsHealthCheck(peerID *messages.PeerID) bool { + return peerID != nil && *peerID == HealthCheckPeerID +} diff --git a/relay/healthcheck/ws.go b/relay/healthcheck/ws.go index db61ed802..9267096f5 100644 --- a/relay/healthcheck/ws.go +++ b/relay/healthcheck/ws.go @@ -7,8 +7,10 @@ import ( "github.com/coder/websocket" + "github.com/netbirdio/netbird/relay/healthcheck/peerid" "github.com/netbirdio/netbird/relay/server" "github.com/netbirdio/netbird/shared/relay" + "github.com/netbirdio/netbird/shared/relay/messages" ) func dialWS(ctx context.Context, address url.URL) error { @@ -30,7 +32,18 @@ func dialWS(ctx context.Context, address url.URL) error { if err != nil { return fmt.Errorf("failed to connect to websocket: %w", err) } + defer func() { + _ = conn.CloseNow() + }() + + authMsg, err := messages.MarshalAuthMsg(peerid.HealthCheckPeerID, peerid.DummyAuthToken) + if err != nil { + return fmt.Errorf("failed to marshal auth message: %w", err) + } + + if err := conn.Write(ctx, websocket.MessageBinary, authMsg); err != nil { + return fmt.Errorf("failed to write auth message: %w", err) + } - _ = conn.Close(websocket.StatusNormalClosure, "availability check complete") return nil } diff --git a/relay/server/handshake.go b/relay/server/handshake.go index 922369798..8c3ee1899 100644 --- a/relay/server/handshake.go +++ b/relay/server/handshake.go @@ -97,7 +97,7 @@ func (h *handshake) handshakeReceive() (*messages.PeerID, error) { return nil, fmt.Errorf("invalid message type %d from %s", msgType, h.conn.RemoteAddr()) } if err != nil { - return nil, err + return peerID, err } h.peerID = peerID return peerID, nil @@ -147,7 +147,7 @@ func (h *handshake) handleAuthMsg(buf []byte) (*messages.PeerID, error) { } if err := h.validator.Validate(authPayload); err != nil { - return nil, fmt.Errorf("validate %s (%s): %w", rawPeerID.String(), h.conn.RemoteAddr(), err) + return rawPeerID, fmt.Errorf("validate %s (%s): %w", rawPeerID.String(), h.conn.RemoteAddr(), err) } return rawPeerID, nil diff --git a/relay/server/relay.go b/relay/server/relay.go index c1cfa13fd..bb355f58f 100644 --- a/relay/server/relay.go +++ b/relay/server/relay.go @@ -12,6 +12,7 @@ import ( "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/metric" + "github.com/netbirdio/netbird/relay/healthcheck/peerid" //nolint:staticcheck "github.com/netbirdio/netbird/relay/metrics" "github.com/netbirdio/netbird/relay/server/store" @@ -123,7 +124,11 @@ func (r *Relay) Accept(conn net.Conn) { } peerID, err := h.handshakeReceive() if err != nil { - log.Errorf("failed to handshake: %s", err) + if peerid.IsHealthCheck(peerID) { + log.Debugf("health check connection from %s", conn.RemoteAddr()) + } else { + log.Errorf("failed to handshake: %s", err) + } if cErr := conn.Close(); cErr != nil { log.Errorf("failed to close connection, %s: %s", conn.RemoteAddr(), cErr) } From 447cd287f5ef17c12242125ed154ddfa1dd82acf Mon Sep 17 00:00:00 2001 From: Zoltan Papp Date: Mon, 15 Dec 2025 10:34:48 +0100 Subject: [PATCH 10/13] [ci] Add local lint setup with pre-push hook to catch issues early (#4925) * Add local lint setup with pre-push hook to catch issues early Developers can now catch lint issues before pushing, reducing CI failures and iteration time. The setup uses golangci-lint locally with the same configuration as CI. Setup: - Run `make setup-hooks` once after cloning - Pre-push hook automatically lints changed files (~90s) - Use `make lint` to manually check changed files - Use `make lint-all` to run full CI-equivalent lint The Makefile auto-installs golangci-lint to ./bin/ using go install to match the Go version in go.mod, avoiding version compatibility issues. --------- Co-authored-by: mlsmaycon --- .githooks/pre-push | 11 +++++++++++ CONTRIBUTING.md | 8 ++++++++ Makefile | 27 +++++++++++++++++++++++++++ 3 files changed, 46 insertions(+) create mode 100755 .githooks/pre-push create mode 100644 Makefile diff --git a/.githooks/pre-push b/.githooks/pre-push new file mode 100755 index 000000000..31898182e --- /dev/null +++ b/.githooks/pre-push @@ -0,0 +1,11 @@ +#!/bin/bash + +echo "Running pre-push hook..." +if ! make lint; then + echo "" + echo "Hint: To push without verification, run:" + echo " git push --no-verify" + exit 1 +fi + +echo "All checks passed!" diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index c82cfc763..efc7d9460 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -136,6 +136,14 @@ checked out and set up: go mod tidy ``` +6. Configure Git hooks for automatic linting: + + ```bash + make setup-hooks + ``` + + This will configure Git to run linting automatically before each push, helping catch issues early. + ### Dev Container Support If you prefer using a dev container for development, NetBird now includes support for dev containers. diff --git a/Makefile b/Makefile new file mode 100644 index 000000000..43379e115 --- /dev/null +++ b/Makefile @@ -0,0 +1,27 @@ +.PHONY: lint lint-all lint-install setup-hooks +GOLANGCI_LINT := $(shell pwd)/bin/golangci-lint + +# Install golangci-lint locally if needed +$(GOLANGCI_LINT): + @echo "Installing golangci-lint..." + @mkdir -p ./bin + @GOBIN=$(shell pwd)/bin go install github.com/golangci/golangci-lint/cmd/golangci-lint@latest + +# Lint only changed files (fast, for pre-push) +lint: $(GOLANGCI_LINT) + @echo "Running lint on changed files..." + @$(GOLANGCI_LINT) run --new-from-rev=origin/main --timeout=2m + +# Lint entire codebase (slow, matches CI) +lint-all: $(GOLANGCI_LINT) + @echo "Running lint on all files..." + @$(GOLANGCI_LINT) run --timeout=12m + +# Just install the linter +lint-install: $(GOLANGCI_LINT) + +# Setup git hooks for all developers +setup-hooks: + @git config core.hooksPath .githooks + @chmod +x .githooks/pre-push + @echo "✅ Git hooks configured! Pre-push will now run 'make lint'" From c29bb1a289d6e8688e3d994ddfc8d40f25a8babd Mon Sep 17 00:00:00 2001 From: Pascal Fischer <32096965+pascal-fischer@users.noreply.github.com> Date: Tue, 16 Dec 2025 14:02:37 +0100 Subject: [PATCH 11/13] [management] use xid as request id for logging (#4955) --- formatter/hook/hook.go | 35 ++----------------- management/internals/server/boot.go | 6 ++-- .../server/telemetry/http_api_metrics.go | 4 +-- 3 files changed, 7 insertions(+), 38 deletions(-) diff --git a/formatter/hook/hook.go b/formatter/hook/hook.go index c0d8c4eba..f0ee509f8 100644 --- a/formatter/hook/hook.go +++ b/formatter/hook/hook.go @@ -60,14 +60,7 @@ func (hook ContextHook) Fire(entry *logrus.Entry) error { entry.Data["context"] = source - switch source { - case HTTPSource: - addHTTPFields(entry) - case GRPCSource: - addGRPCFields(entry) - case SystemSource: - addSystemFields(entry) - } + addFields(entry) return nil } @@ -99,7 +92,7 @@ func (hook ContextHook) parseSrc(filePath string) string { return fmt.Sprintf("%s/%s", pkg, file) } -func addHTTPFields(entry *logrus.Entry) { +func addFields(entry *logrus.Entry) { if ctxReqID, ok := entry.Context.Value(context.RequestIDKey).(string); ok { entry.Data[context.RequestIDKey] = ctxReqID } @@ -109,30 +102,6 @@ func addHTTPFields(entry *logrus.Entry) { if ctxInitiatorID, ok := entry.Context.Value(context.UserIDKey).(string); ok { entry.Data[context.UserIDKey] = ctxInitiatorID } -} - -func addGRPCFields(entry *logrus.Entry) { - if ctxReqID, ok := entry.Context.Value(context.RequestIDKey).(string); ok { - entry.Data[context.RequestIDKey] = ctxReqID - } - if ctxAccountID, ok := entry.Context.Value(context.AccountIDKey).(string); ok { - entry.Data[context.AccountIDKey] = ctxAccountID - } - if ctxDeviceID, ok := entry.Context.Value(context.PeerIDKey).(string); ok { - entry.Data[context.PeerIDKey] = ctxDeviceID - } -} - -func addSystemFields(entry *logrus.Entry) { - if ctxReqID, ok := entry.Context.Value(context.RequestIDKey).(string); ok { - entry.Data[context.RequestIDKey] = ctxReqID - } - if ctxInitiatorID, ok := entry.Context.Value(context.UserIDKey).(string); ok { - entry.Data[context.UserIDKey] = ctxInitiatorID - } - if ctxAccountID, ok := entry.Context.Value(context.AccountIDKey).(string); ok { - entry.Data[context.AccountIDKey] = ctxAccountID - } if ctxDeviceID, ok := entry.Context.Value(context.PeerIDKey).(string); ok { entry.Data[context.PeerIDKey] = ctxDeviceID } diff --git a/management/internals/server/boot.go b/management/internals/server/boot.go index 37788e80e..57b3fac78 100644 --- a/management/internals/server/boot.go +++ b/management/internals/server/boot.go @@ -10,9 +10,9 @@ import ( "slices" "time" - "github.com/google/uuid" grpcMiddleware "github.com/grpc-ecosystem/go-grpc-middleware/v2" "github.com/grpc-ecosystem/go-grpc-middleware/v2/interceptors/realip" + "github.com/rs/xid" log "github.com/sirupsen/logrus" "google.golang.org/grpc" "google.golang.org/grpc/credentials" @@ -180,7 +180,7 @@ func unaryInterceptor( info *grpc.UnaryServerInfo, handler grpc.UnaryHandler, ) (interface{}, error) { - reqID := uuid.New().String() + reqID := xid.New().String() //nolint ctx = context.WithValue(ctx, hook.ExecutionContextKey, hook.GRPCSource) //nolint @@ -194,7 +194,7 @@ func streamInterceptor( info *grpc.StreamServerInfo, handler grpc.StreamHandler, ) error { - reqID := uuid.New().String() + reqID := xid.New().String() wrapped := grpcMiddleware.WrapServerStream(ss) //nolint ctx := context.WithValue(ss.Context(), hook.ExecutionContextKey, hook.GRPCSource) diff --git a/management/server/telemetry/http_api_metrics.go b/management/server/telemetry/http_api_metrics.go index 0b6f8beb6..c50ed1e51 100644 --- a/management/server/telemetry/http_api_metrics.go +++ b/management/server/telemetry/http_api_metrics.go @@ -7,8 +7,8 @@ import ( "strings" "time" - "github.com/google/uuid" "github.com/gorilla/mux" + "github.com/rs/xid" log "github.com/sirupsen/logrus" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/metric" @@ -169,7 +169,7 @@ func (m *HTTPMiddleware) Handler(h http.Handler) http.Handler { //nolint ctx := context.WithValue(r.Context(), hook.ExecutionContextKey, hook.HTTPSource) - reqID := uuid.New().String() + reqID := xid.New().String() //nolint ctx = context.WithValue(ctx, nbContext.RequestIDKey, reqID) From a9c28ef723c790792124209b02a5131087dda2b4 Mon Sep 17 00:00:00 2001 From: Zoltan Papp Date: Wed, 17 Dec 2025 13:49:02 +0100 Subject: [PATCH 12/13] Add stack trace for bundle (#4957) --- client/internal/debug/debug.go | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/client/internal/debug/debug.go b/client/internal/debug/debug.go index 58977b884..3c201ecfc 100644 --- a/client/internal/debug/debug.go +++ b/client/internal/debug/debug.go @@ -56,6 +56,7 @@ block.prof: Block profiling information. heap.prof: Heap profiling information (snapshot of memory allocations). allocs.prof: Allocations profiling information. threadcreate.prof: Thread creation profiling information. +stack_trace.txt: Complete stack traces of all goroutines at the time of bundle creation. Anonymization Process @@ -109,6 +110,9 @@ go tool pprof -http=:8088 heap.prof This will open a web browser tab with the profiling information. +Stack Trace +The stack_trace.txt file contains a complete snapshot of all goroutine stack traces at the time the debug bundle was created. + Routes The routes.txt file contains detailed routing table information in a tabular format: @@ -327,6 +331,10 @@ func (g *BundleGenerator) createArchive() error { log.Errorf("failed to add profiles to debug bundle: %v", err) } + if err := g.addStackTrace(); err != nil { + log.Errorf("failed to add stack trace to debug bundle: %v", err) + } + if err := g.addSyncResponse(); err != nil { return fmt.Errorf("add sync response: %w", err) } @@ -522,6 +530,18 @@ func (g *BundleGenerator) addProf() (err error) { return nil } +func (g *BundleGenerator) addStackTrace() error { + buf := make([]byte, 5242880) // 5 MB buffer + n := runtime.Stack(buf, true) + + stackTrace := bytes.NewReader(buf[:n]) + if err := g.addFileToZip(stackTrace, "stack_trace.txt"); err != nil { + return fmt.Errorf("add stack trace file to zip: %w", err) + } + + return nil +} + func (g *BundleGenerator) addInterfaces() error { interfaces, err := net.Interfaces() if err != nil { From 537151e0f365bfd9c94e8fa5b9c88b7cb1b78a02 Mon Sep 17 00:00:00 2001 From: Zoltan Papp Date: Wed, 17 Dec 2025 13:55:33 +0100 Subject: [PATCH 13/13] Remove redundant lock in peer update logic to avoid deadlock with exported functions (#4953) --- client/internal/peer/endpoint.go | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/client/internal/peer/endpoint.go b/client/internal/peer/endpoint.go index 39cb95591..52d66159c 100644 --- a/client/internal/peer/endpoint.go +++ b/client/internal/peer/endpoint.go @@ -20,7 +20,7 @@ type EndpointUpdater struct { wgConfig WgConfig initiator bool - // mu protects updateWireGuardPeer and cancelFunc + // mu protects cancelFunc mu sync.Mutex cancelFunc func() updateWg sync.WaitGroup @@ -86,11 +86,9 @@ func (e *EndpointUpdater) scheduleDelayedUpdate(ctx context.Context, addr *net.U case <-ctx.Done(): return case <-t.C: - e.mu.Lock() if err := e.updateWireGuardPeer(addr, presharedKey); err != nil { e.log.Errorf("failed to update WireGuard peer, address: %s, error: %v", addr, err) } - e.mu.Unlock() } }