mirror of
https://github.com/netbirdio/netbird.git
synced 2026-05-20 07:39:56 +00:00
* [management] Add metrics for peer status updates and ephemeral cleanup
The session-fenced MarkPeerConnected / MarkPeerDisconnected path and
the ephemeral peer cleanup loop both run silently today: when fencing
rejects a stale stream, when a cleanup tick deletes peers, or when a
batch delete fails, we have no operational signal beyond log lines.
Add OpenTelemetry counters and a histogram so the same SLO-style
dashboards that already exist for the network-map controller can cover
peer connect/disconnect and ephemeral cleanup too.
All new attributes are bounded enums: operation in {connect,disconnect}
and outcome in {applied,stale,error,peer_not_found}. No account, peer,
or user ID is ever written as a metric label — total cardinality is
fixed at compile time (8 counter series, 2 histogram series, 4 unlabeled
ephemeral series).
Metric methods are nil-receiver safe so test composition that doesn't
wire telemetry (the bulk of the existing tests) works unchanged. The
ephemeral manager exposes a SetMetrics setter rather than taking the
collector through its constructor, keeping the constructor signature
stable across all test call sites.
* [management] Add OpenTelemetry metrics for ephemeral peer cleanup
Introduce counters for tracking ephemeral peer cleanup, including peers pending deletion, cleanup runs, successful deletions, and failed batches. Metrics are nil-receiver safe to ensure compatibility with test setups without telemetry.
148 lines
5.1 KiB
Go
148 lines
5.1 KiB
Go
package server
|
|
|
|
import (
|
|
"context"
|
|
|
|
log "github.com/sirupsen/logrus"
|
|
|
|
"github.com/netbirdio/management-integrations/integrations"
|
|
|
|
"github.com/netbirdio/netbird/management/internals/modules/reverseproxy/proxy"
|
|
proxymanager "github.com/netbirdio/netbird/management/internals/modules/reverseproxy/proxy/manager"
|
|
|
|
"github.com/netbirdio/netbird/management/internals/controllers/network_map"
|
|
nmapcontroller "github.com/netbirdio/netbird/management/internals/controllers/network_map/controller"
|
|
"github.com/netbirdio/netbird/management/internals/controllers/network_map/update_channel"
|
|
"github.com/netbirdio/netbird/management/internals/modules/peers/ephemeral"
|
|
"github.com/netbirdio/netbird/management/internals/modules/peers/ephemeral/manager"
|
|
"github.com/netbirdio/netbird/management/internals/shared/grpc"
|
|
"github.com/netbirdio/netbird/management/server"
|
|
"github.com/netbirdio/netbird/management/server/auth"
|
|
"github.com/netbirdio/netbird/management/server/integrations/integrated_validator"
|
|
"github.com/netbirdio/netbird/management/server/integrations/port_forwarding"
|
|
"github.com/netbirdio/netbird/management/server/job"
|
|
nbjwt "github.com/netbirdio/netbird/shared/auth/jwt"
|
|
)
|
|
|
|
func (s *BaseServer) PeersUpdateManager() network_map.PeersUpdateManager {
|
|
return Create(s, func() network_map.PeersUpdateManager {
|
|
return update_channel.NewPeersUpdateManager(s.Metrics())
|
|
})
|
|
}
|
|
|
|
func (s *BaseServer) JobManager() *job.Manager {
|
|
return Create(s, func() *job.Manager {
|
|
return job.NewJobManager(s.Metrics(), s.Store(), s.PeersManager())
|
|
})
|
|
}
|
|
|
|
func (s *BaseServer) IntegratedValidator() integrated_validator.IntegratedValidator {
|
|
return Create(s, func() integrated_validator.IntegratedValidator {
|
|
integratedPeerValidator, err := integrations.NewIntegratedValidator(
|
|
context.Background(),
|
|
s.PeersManager(),
|
|
s.SettingsManager(),
|
|
s.EventStore(),
|
|
s.CacheStore())
|
|
if err != nil {
|
|
log.Errorf("failed to create integrated peer validator: %v", err)
|
|
}
|
|
return integratedPeerValidator
|
|
})
|
|
}
|
|
|
|
func (s *BaseServer) ProxyController() port_forwarding.Controller {
|
|
return Create(s, func() port_forwarding.Controller {
|
|
return integrations.NewController(s.Store())
|
|
})
|
|
}
|
|
|
|
func (s *BaseServer) SecretsManager() grpc.SecretsManager {
|
|
return Create(s, func() grpc.SecretsManager {
|
|
secretsManager, err := grpc.NewTimeBasedAuthSecretsManager(s.PeersUpdateManager(), s.Config.TURNConfig, s.Config.Relay, s.SettingsManager(), s.GroupsManager())
|
|
if err != nil {
|
|
log.Fatalf("failed to create secrets manager: %v", err)
|
|
}
|
|
return secretsManager
|
|
})
|
|
}
|
|
|
|
func (s *BaseServer) SessionStore() *auth.SessionStore {
|
|
return Create(s, func() *auth.SessionStore {
|
|
return auth.NewSessionStore(s.CacheStore())
|
|
})
|
|
}
|
|
|
|
func (s *BaseServer) AuthManager() auth.Manager {
|
|
audiences := s.Config.GetAuthAudiences()
|
|
audience := s.Config.HttpConfig.AuthAudience
|
|
keysLocation := s.Config.HttpConfig.AuthKeysLocation
|
|
signingKeyRefreshEnabled := s.Config.HttpConfig.IdpSignKeyRefreshEnabled
|
|
issuer := s.Config.HttpConfig.AuthIssuer
|
|
userIDClaim := s.Config.HttpConfig.AuthUserIDClaim
|
|
var keyFetcher nbjwt.KeyFetcher
|
|
|
|
// Use embedded IdP configuration if available
|
|
if oauthProvider := s.OAuthConfigProvider(); oauthProvider != nil {
|
|
audiences = oauthProvider.GetClientIDs()
|
|
if len(audiences) > 0 {
|
|
audience = audiences[0] // Use the first client ID as the primary audience
|
|
}
|
|
keyFetcher = oauthProvider.GetKeyFetcher()
|
|
// Fall back to default keys location if direct key fetching is not available
|
|
if keyFetcher == nil {
|
|
keysLocation = oauthProvider.GetLocalKeysLocation()
|
|
}
|
|
signingKeyRefreshEnabled = true
|
|
issuer = oauthProvider.GetIssuer()
|
|
userIDClaim = oauthProvider.GetUserIDClaim()
|
|
}
|
|
|
|
return Create(s, func() auth.Manager {
|
|
return auth.NewManager(s.Store(),
|
|
issuer,
|
|
audience,
|
|
keysLocation,
|
|
userIDClaim,
|
|
audiences,
|
|
signingKeyRefreshEnabled,
|
|
keyFetcher)
|
|
})
|
|
}
|
|
|
|
func (s *BaseServer) EphemeralManager() ephemeral.Manager {
|
|
return Create(s, func() ephemeral.Manager {
|
|
em := manager.NewEphemeralManager(s.Store(), s.PeersManager())
|
|
if metrics := s.Metrics(); metrics != nil {
|
|
em.SetMetrics(metrics.EphemeralPeersMetrics())
|
|
}
|
|
return em
|
|
})
|
|
}
|
|
|
|
func (s *BaseServer) NetworkMapController() network_map.Controller {
|
|
return Create(s, func() network_map.Controller {
|
|
return nmapcontroller.NewController(context.Background(), s.Store(), s.Metrics(), s.PeersUpdateManager(), s.AccountRequestBuffer(), s.IntegratedValidator(), s.SettingsManager(), s.DNSDomain(), s.ProxyController(), s.EphemeralManager(), s.Config)
|
|
})
|
|
}
|
|
|
|
func (s *BaseServer) ServiceProxyController() proxy.Controller {
|
|
return Create(s, func() proxy.Controller {
|
|
controller, err := proxymanager.NewGRPCController(s.ReverseProxyGRPCServer(), s.Metrics().GetMeter())
|
|
if err != nil {
|
|
log.Fatalf("failed to create service proxy controller: %v", err)
|
|
}
|
|
return controller
|
|
})
|
|
}
|
|
|
|
func (s *BaseServer) AccountRequestBuffer() *server.AccountRequestBuffer {
|
|
return Create(s, func() *server.AccountRequestBuffer {
|
|
return server.NewAccountRequestBuffer(context.Background(), s.Store())
|
|
})
|
|
}
|
|
|
|
func (s *BaseServer) DNSDomain() string {
|
|
return s.dnsDomain
|
|
}
|