diff --git a/client/internal/connect.go b/client/internal/connect.go index 017c8bf10..7ca3d8f49 100644 --- a/client/internal/connect.go +++ b/client/internal/connect.go @@ -22,6 +22,7 @@ import ( "github.com/netbirdio/netbird/client/iface/device" "github.com/netbirdio/netbird/client/internal/dns" "github.com/netbirdio/netbird/client/internal/listener" + "github.com/netbirdio/netbird/client/internal/metrics" "github.com/netbirdio/netbird/client/internal/peer" "github.com/netbirdio/netbird/client/internal/profilemanager" "github.com/netbirdio/netbird/client/internal/statemanager" @@ -52,6 +53,7 @@ type ConnectClient struct { engineMutex sync.Mutex persistSyncResponse bool + clientMetrics *metrics.ClientMetrics } func NewConnectClient( @@ -59,7 +61,7 @@ func NewConnectClient( config *profilemanager.Config, statusRecorder *peer.Status, doInitalAutoUpdate bool, - + clientMetrics *metrics.ClientMetrics, ) *ConnectClient { return &ConnectClient{ ctx: ctx, @@ -67,6 +69,7 @@ func NewConnectClient( statusRecorder: statusRecorder, doInitialAutoUpdate: doInitalAutoUpdate, engineMutex: sync.Mutex{}, + clientMetrics: clientMetrics, } } @@ -308,7 +311,7 @@ func (c *ConnectClient) run(mobileDependency MobileDependency, runningChan chan checks := loginResp.GetChecks() c.engineMutex.Lock() - engine := NewEngine(engineCtx, cancel, signalClient, mgmClient, relayManager, engineConfig, mobileDependency, c.statusRecorder, checks, stateManager) + engine := NewEngine(engineCtx, cancel, signalClient, mgmClient, relayManager, engineConfig, mobileDependency, c.statusRecorder, checks, stateManager, c.clientMetrics) engine.SetSyncResponsePersistence(c.persistSyncResponse) c.engine = engine c.engineMutex.Unlock() diff --git a/client/internal/debug/debug.go b/client/internal/debug/debug.go index 01a0377a5..25390fe65 100644 --- a/client/internal/debug/debug.go +++ b/client/internal/debug/debug.go @@ -51,6 +51,7 @@ resolved_domains.txt: Anonymized resolved domain IP addresses from the status re config.txt: Anonymized configuration information of the NetBird client. network_map.json: Anonymized sync response containing peer configurations, routes, DNS settings, and firewall rules. state.json: Anonymized client state dump containing netbird states for the active profile. +metrics.txt: Client metrics in Prometheus format including connection statistics, reliability metrics, and performance indicators. mutex.prof: Mutex profiling information. goroutine.prof: Goroutine profiling information. block.prof: Block profiling information. @@ -216,6 +217,11 @@ const ( darwinStdoutLogPath = "/var/log/netbird.err.log" ) +// MetricsExporter is an interface for exporting metrics +type MetricsExporter interface { + Export(w io.Writer) error +} + type BundleGenerator struct { anonymizer *anonymize.Anonymizer @@ -224,6 +230,7 @@ type BundleGenerator struct { statusRecorder *peer.Status syncResponse *mgmProto.SyncResponse logFile string + clientMetrics MetricsExporter anonymize bool clientStatus string @@ -245,6 +252,7 @@ type GeneratorDependencies struct { StatusRecorder *peer.Status SyncResponse *mgmProto.SyncResponse LogFile string + ClientMetrics MetricsExporter } func NewBundleGenerator(deps GeneratorDependencies, cfg BundleConfig) *BundleGenerator { @@ -261,6 +269,7 @@ func NewBundleGenerator(deps GeneratorDependencies, cfg BundleConfig) *BundleGen statusRecorder: deps.StatusRecorder, syncResponse: deps.SyncResponse, logFile: deps.LogFile, + clientMetrics: deps.ClientMetrics, anonymize: cfg.Anonymize, clientStatus: cfg.ClientStatus, @@ -348,6 +357,10 @@ func (g *BundleGenerator) createArchive() error { log.Errorf("failed to add corrupted state files to debug bundle: %v", err) } + if err := g.addMetrics(); err != nil { + log.Errorf("failed to add metrics to debug bundle: %v", err) + } + if err := g.addWgShow(); err != nil { log.Errorf("failed to add wg show output: %v", err) } @@ -655,6 +668,25 @@ func (g *BundleGenerator) addStateFile() error { return nil } +func (g *BundleGenerator) addMetrics() error { + if g.clientMetrics == nil { + log.Debugf("skipping metrics in debug bundle: no metrics collector") + return nil + } + + var buf bytes.Buffer + if err := g.clientMetrics.Export(&buf); err != nil { + return fmt.Errorf("export metrics: %w", err) + } + + if err := g.addFileToZip(&buf, "metrics.txt"); err != nil { + return fmt.Errorf("add metrics file to zip: %w", err) + } + + log.Debugf("added metrics to debug bundle") + return nil +} + func (g *BundleGenerator) addUpdateLogs() error { inst := installer.New() logFiles := inst.LogFiles() diff --git a/client/internal/engine.go b/client/internal/engine.go index 4f18c3bc8..439058afc 100644 --- a/client/internal/engine.go +++ b/client/internal/engine.go @@ -35,6 +35,7 @@ import ( dnsconfig "github.com/netbirdio/netbird/client/internal/dns/config" "github.com/netbirdio/netbird/client/internal/dnsfwd" "github.com/netbirdio/netbird/client/internal/ingressgw" + "github.com/netbirdio/netbird/client/internal/metrics" "github.com/netbirdio/netbird/client/internal/netflow" nftypes "github.com/netbirdio/netbird/client/internal/netflow/types" "github.com/netbirdio/netbird/client/internal/networkmonitor" @@ -211,6 +212,9 @@ type Engine struct { shutdownWg sync.WaitGroup probeStunTurn *relay.StunTurnProbe + + // clientMetrics collects and pushes metrics + clientMetrics *metrics.ClientMetrics } // Peer is an instance of the Connection Peer @@ -224,7 +228,7 @@ type localIpUpdater interface { } // NewEngine creates a new Connection Engine with probes attached -func NewEngine(clientCtx context.Context, clientCancel context.CancelFunc, signalClient signal.Client, mgmClient mgm.Client, relayManager *relayClient.Manager, config *EngineConfig, mobileDep MobileDependency, statusRecorder *peer.Status, checks []*mgmProto.Checks, stateManager *statemanager.Manager) *Engine { +func NewEngine(clientCtx context.Context, clientCancel context.CancelFunc, signalClient signal.Client, mgmClient mgm.Client, relayManager *relayClient.Manager, config *EngineConfig, mobileDep MobileDependency, statusRecorder *peer.Status, checks []*mgmProto.Checks, stateManager *statemanager.Manager, clientMetrics *metrics.ClientMetrics) *Engine { engine := &Engine{ clientCtx: clientCtx, clientCancel: clientCancel, @@ -244,6 +248,7 @@ func NewEngine(clientCtx context.Context, clientCancel context.CancelFunc, signa checks: checks, connSemaphore: semaphoregroup.NewSemaphoreGroup(connInitLimit), probeStunTurn: relay.NewStunTurnProbe(relay.DefaultCacheTTL), + clientMetrics: clientMetrics, } log.Infof("I am: %s", config.WgPrivateKey.PublicKey().String()) @@ -289,6 +294,11 @@ func (e *Engine) Stop() error { e.updateManager.Stop() } + // Update metrics engine status + if e.clientMetrics != nil { + e.clientMetrics.SetEngineStatus(0) // 0=stopped + } + log.Info("cleaning up status recorder states") e.statusRecorder.ReplaceOfflinePeers([]peer.State{}) e.statusRecorder.UpdateDNSStates([]peer.NSGroupState{}) @@ -519,6 +529,11 @@ func (e *Engine) Start(netbirdConfig *mgmProto.NetbirdConfig, mgmtURL *url.URL) } }() + // Update metrics engine status + if e.clientMetrics != nil { + e.clientMetrics.SetEngineStatus(1) // 1=running + } + return nil } diff --git a/client/internal/metrics/metrics.go b/client/internal/metrics/metrics.go new file mode 100644 index 000000000..b1322c532 --- /dev/null +++ b/client/internal/metrics/metrics.go @@ -0,0 +1,32 @@ +package metrics + +import ( + "io" + + "github.com/VictoriaMetrics/metrics" +) + +// ClientMetrics holds all client-side metrics +type ClientMetrics struct { + // ICE negotiation metrics + iceNegotiationDuration *metrics.Histogram +} + +// NewClientMetrics creates a new ClientMetrics instance +func NewClientMetrics() *ClientMetrics { + return &ClientMetrics{ + // ICE negotiation metrics + iceNegotiationDuration: metrics.NewHistogram(`netbird_client_ice_negotiation_duration_seconds`), + } +} + +// RecordICENegotiationDuration records the time taken for ICE negotiation +func (m *ClientMetrics) RecordICENegotiationDuration(seconds float64) { + m.iceNegotiationDuration.Update(seconds) +} + +// Export writes all metrics in Prometheus format to the provided writer +func (m *ClientMetrics) Export(w io.Writer) error { + metrics.WritePrometheus(w, true) + return nil +} diff --git a/client/server/server.go b/client/server/server.go index 7b6c4e98c..66dca5cd2 100644 --- a/client/server/server.go +++ b/client/server/server.go @@ -24,6 +24,7 @@ import ( "google.golang.org/protobuf/types/known/timestamppb" "github.com/netbirdio/netbird/client/internal/auth" + "github.com/netbirdio/netbird/client/internal/metrics" "github.com/netbirdio/netbird/client/internal/profilemanager" "github.com/netbirdio/netbird/client/system" mgm "github.com/netbirdio/netbird/shared/management/client" @@ -76,6 +77,7 @@ type Server struct { statusRecorder *peer.Status sessionWatcher *internal.SessionWatcher + clientMetrics *metrics.ClientMetrics lastProbe time.Time persistSyncResponse bool @@ -109,6 +111,7 @@ func New(ctx context.Context, logFile string, configFile string, profilesDisable profilesDisabled: profilesDisabled, updateSettingsDisabled: updateSettingsDisabled, jwtCache: newJWTCache(), + clientMetrics: metrics.NewClientMetrics(), } } @@ -1524,7 +1527,7 @@ func (s *Server) GetFeatures(ctx context.Context, msg *proto.GetFeaturesRequest) func (s *Server) connect(ctx context.Context, config *profilemanager.Config, statusRecorder *peer.Status, doInitialAutoUpdate bool, runningChan chan struct{}) error { log.Tracef("running client connection") - s.connectClient = internal.NewConnectClient(ctx, config, statusRecorder, doInitialAutoUpdate) + s.connectClient = internal.NewConnectClient(ctx, config, statusRecorder, doInitialAutoUpdate, s.clientMetrics) s.connectClient.SetSyncResponsePersistence(s.persistSyncResponse) if err := s.connectClient.Run(runningChan); err != nil { return err diff --git a/go.mod b/go.mod index cf55b9260..5c1bd1e8a 100644 --- a/go.mod +++ b/go.mod @@ -140,6 +140,7 @@ require ( github.com/Masterminds/sprig/v3 v3.3.0 // indirect github.com/Microsoft/go-winio v0.6.2 // indirect github.com/Microsoft/hcsshim v0.12.3 // indirect + github.com/VictoriaMetrics/metrics v1.40.2 // indirect github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be // indirect github.com/awnumar/memcall v0.4.0 // indirect github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.10 // indirect @@ -261,6 +262,8 @@ require ( github.com/stretchr/objx v0.5.2 // indirect github.com/tklauser/go-sysconf v0.3.14 // indirect github.com/tklauser/numcpus v0.8.0 // indirect + github.com/valyala/fastrand v1.1.0 // indirect + github.com/valyala/histogram v1.2.0 // indirect github.com/vishvananda/netns v0.0.5 // indirect github.com/vmihailenco/tagparser/v2 v2.0.0 // indirect github.com/wlynxg/anet v0.0.3 // indirect diff --git a/go.sum b/go.sum index e89e0ef12..0b1377e2b 100644 --- a/go.sum +++ b/go.sum @@ -37,6 +37,8 @@ github.com/Microsoft/hcsshim v0.12.3 h1:LS9NXqXhMoqNCplK1ApmVSfB4UnVLRDWRapB6EIl github.com/Microsoft/hcsshim v0.12.3/go.mod h1:Iyl1WVpZzr+UkzjekHZbV8o5Z9ZkxNGx6CtY2Qg/JVQ= github.com/TheJumpCloud/jcapi-go v3.0.0+incompatible h1:hqcTK6ZISdip65SR792lwYJTa/axESA0889D3UlZbLo= github.com/TheJumpCloud/jcapi-go v3.0.0+incompatible/go.mod h1:6B1nuc1MUs6c62ODZDl7hVE5Pv7O2XGSkgg2olnq34I= +github.com/VictoriaMetrics/metrics v1.40.2 h1:OVSjKcQEx6JAwGeu8/KQm9Su5qJ72TMEW4xYn5vw3Ac= +github.com/VictoriaMetrics/metrics v1.40.2/go.mod h1:XE4uudAAIRaJE614Tl5HMrtoEU6+GDZO4QTnNSsZRuA= github.com/alexbrainman/sspi v0.0.0-20250919150558-7d374ff0d59e h1:4dAU9FXIyQktpoUAgOJK3OTFc/xug0PCXYCqU0FgDKI= github.com/alexbrainman/sspi v0.0.0-20250919150558-7d374ff0d59e/go.mod h1:cEWa1LVoE5KvSD9ONXsZrj0z6KqySlCCNKHlLzbqAt4= github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be h1:9AeTilPcZAjCFIImctFaOjnTIavg87rW78vTPkQqLI8= @@ -566,6 +568,10 @@ github.com/tklauser/numcpus v0.8.0 h1:Mx4Wwe/FjZLeQsK/6kt2EOepwwSl7SmJrK5bV/dXYg github.com/tklauser/numcpus v0.8.0/go.mod h1:ZJZlAY+dmR4eut8epnzf0u/VwodKmryxR8txiloSqBE= github.com/ugorji/go v1.1.7/go.mod h1:kZn38zHttfInRq0xu/PH0az30d+z6vm202qpg1oXVMw= github.com/ugorji/go/codec v1.1.7/go.mod h1:Ax+UKWsSmolVDwsd+7N3ZtXu+yMGCf907BLYF3GoBXY= +github.com/valyala/fastrand v1.1.0 h1:f+5HkLW4rsgzdNoleUOB69hyT9IlD2ZQh9GyDMfb5G8= +github.com/valyala/fastrand v1.1.0/go.mod h1:HWqCzkrkg6QXT8V2EXWvXCoow7vLwOFN002oeRzjapQ= +github.com/valyala/histogram v1.2.0 h1:wyYGAZZt3CpwUiIb9AU/Zbllg1llXyrtApRS815OLoQ= +github.com/valyala/histogram v1.2.0/go.mod h1:Hb4kBwb4UxsaNbbbh+RRz8ZR6pdodR57tzWUS3BUzXY= github.com/vishvananda/netlink v1.3.1 h1:3AEMt62VKqz90r0tmNhog0r/PpWKmrEShJU0wJW6bV0= github.com/vishvananda/netlink v1.3.1/go.mod h1:ARtKouGSTGchR8aMwmkzC0qiNPrrWO5JS/XMVl45+b4= github.com/vishvananda/netns v0.0.5 h1:DfiHV+j8bA32MFM7bfEunvT8IAqQ/NzSJHtcmW5zdEY=