Add client metrics system with OpenTelemetry and VictoriaMetrics support

Implements a comprehensive client metrics system to track peer connection
stages and performance. The system supports multiple backend implementations
(OpenTelemetry, VictoriaMetrics, and no-op) and tracks detailed connection
stage durations from creation through WireGuard handshake.

Key changes:
- Add metrics package with pluggable backend implementations
- Implement OpenTelemetry metrics backend
- Implement VictoriaMetrics metrics backend
- Add no-op metrics implementation for disabled state
- Track connection stages: creation, semaphore, signaling, connection ready, and WireGuard handshake
- Move WireGuard watcher functionality to conn.go
- Refactor engine to integrate metrics tracking
- Add metrics export endpoint in debug server
This commit is contained in:
Zoltán Papp
2026-01-15 22:16:38 +01:00
parent c5eb5ba1c6
commit e3a5c44d37
16 changed files with 694 additions and 80 deletions

View File

@@ -32,12 +32,20 @@ func (s *Server) DebugBundle(_ context.Context, req *proto.DebugBundleRequest) (
log.Warnf("failed to get latest sync response: %v", err)
}
var clientMetrics debug.MetricsExporter
if s.connectClient != nil {
if engine := s.connectClient.Engine(); engine != nil {
clientMetrics = engine.GetClientMetrics()
}
}
bundleGenerator := debug.NewBundleGenerator(
debug.GeneratorDependencies{
InternalConfig: s.config,
StatusRecorder: s.statusRecorder,
SyncResponse: syncResponse,
LogFile: s.logFile,
ClientMetrics: clientMetrics,
},
debug.BundleConfig{
Anonymize: req.GetAnonymize(),

View File

@@ -24,7 +24,6 @@ import (
"google.golang.org/protobuf/types/known/timestamppb"
"github.com/netbirdio/netbird/client/internal/auth"
"github.com/netbirdio/netbird/client/internal/metrics"
"github.com/netbirdio/netbird/client/internal/profilemanager"
"github.com/netbirdio/netbird/client/system"
mgm "github.com/netbirdio/netbird/shared/management/client"
@@ -77,7 +76,6 @@ type Server struct {
statusRecorder *peer.Status
sessionWatcher *internal.SessionWatcher
clientMetrics *metrics.ClientMetrics
lastProbe time.Time
persistSyncResponse bool
@@ -111,7 +109,6 @@ func New(ctx context.Context, logFile string, configFile string, profilesDisable
profilesDisabled: profilesDisabled,
updateSettingsDisabled: updateSettingsDisabled,
jwtCache: newJWTCache(),
clientMetrics: metrics.NewClientMetrics(),
}
}
@@ -1527,7 +1524,7 @@ func (s *Server) GetFeatures(ctx context.Context, msg *proto.GetFeaturesRequest)
func (s *Server) connect(ctx context.Context, config *profilemanager.Config, statusRecorder *peer.Status, doInitialAutoUpdate bool, runningChan chan struct{}) error {
log.Tracef("running client connection")
s.connectClient = internal.NewConnectClient(ctx, config, statusRecorder, doInitialAutoUpdate, s.clientMetrics)
s.connectClient = internal.NewConnectClient(ctx, config, statusRecorder, doInitialAutoUpdate)
s.connectClient.SetSyncResponsePersistence(s.persistSyncResponse)
if err := s.connectClient.Run(runningChan); err != nil {
return err