add metrics

This commit is contained in:
pascal
2026-05-18 19:23:18 +02:00
parent ddc4c20a31
commit 420802374b
5 changed files with 102 additions and 11 deletions

View File

@@ -25,6 +25,11 @@ type Metrics struct {
backendDuration metric.Int64Histogram
certificateIssueDuration metric.Int64Histogram
// Management sync metrics.
snapshotSyncDuration metric.Int64Histogram
snapshotBatchDuration metric.Int64Histogram
addPeerDuration metric.Int64Histogram
// L4 service-level metrics.
l4Services metric.Int64UpDownCounter
@@ -54,6 +59,9 @@ func New(ctx context.Context, meter metric.Meter) (*Metrics, error) {
if err := m.initHTTPMetrics(meter); err != nil {
return nil, err
}
if err := m.initSyncMetrics(meter); err != nil {
return nil, err
}
if err := m.initL4Metrics(meter); err != nil {
return nil, err
}
@@ -126,6 +134,57 @@ func (m *Metrics) initHTTPMetrics(meter metric.Meter) error {
return err
}
func (m *Metrics) initSyncMetrics(meter metric.Meter) error {
var err error
m.snapshotSyncDuration, err = meter.Int64Histogram(
"proxy.sync.snapshot.duration.ms",
metric.WithUnit("milliseconds"),
metric.WithDescription("Duration from management connect until the initial snapshot sync is complete"),
)
if err != nil {
return err
}
m.snapshotBatchDuration, err = meter.Int64Histogram(
"proxy.sync.batch.duration.ms",
metric.WithUnit("milliseconds"),
metric.WithDescription("Duration to process a single mapping batch during initial snapshot sync"),
)
if err != nil {
return err
}
m.addPeerDuration, err = meter.Int64Histogram(
"proxy.peer.add.duration.ms",
metric.WithUnit("milliseconds"),
metric.WithDescription("Duration to add a peer for an account (keygen + gRPC CreateProxyPeer + embed.New)"),
metric.WithExplicitBucketBoundaries(10, 25, 50, 100, 250, 500, 1000, 2500, 5000, 10000),
)
return err
}
// RecordSnapshotSyncDuration records the total time from connect to sync-complete.
func (m *Metrics) RecordSnapshotSyncDuration(d time.Duration) {
m.snapshotSyncDuration.Record(m.ctx, d.Milliseconds())
}
// RecordSnapshotBatchDuration records the time to process one mapping batch during initial sync.
func (m *Metrics) RecordSnapshotBatchDuration(d time.Duration) {
m.snapshotBatchDuration.Record(m.ctx, d.Milliseconds())
}
// RecordAddPeerDuration records the time to create a new peer for an account.
func (m *Metrics) RecordAddPeerDuration(d time.Duration, err error) {
result := "success"
if err != nil {
result = "error"
}
m.addPeerDuration.Record(m.ctx, d.Milliseconds(), metric.WithAttributes(
attribute.String("result", result),
))
}
func (m *Metrics) initL4Metrics(meter metric.Meter) error {
var err error

View File

@@ -142,6 +142,11 @@ type NetBird struct {
clients map[types.AccountID]*clientEntry
initLogOnce sync.Once
statusNotifier statusNotifier
// OnAddPeer, when set, is called after AddPeer completes for a new account
// (i.e. when a new client was actually created, not when an existing one
// was reused). The duration covers keygen + gRPC CreateProxyPeer + embed.New.
OnAddPeer func(d time.Duration, err error)
}
// ClientDebugInfo contains debug information about a client.
@@ -215,7 +220,11 @@ func (n *NetBird) AddPeer(ctx context.Context, accountID types.AccountID, key Se
n.clients[accountID] = entry
n.clientsMux.Unlock()
createStart := time.Now()
created, err := n.createClientEntry(ctx, accountID, key, authToken, si)
if n.OnAddPeer != nil {
n.OnAddPeer(time.Since(createStart), err)
}
if err != nil {
entry.initErr = err
close(entry.ready)