mirror of
https://github.com/fosrl/newt.git
synced 2026-03-26 12:36:45 +00:00
234 lines
7.1 KiB
Go
234 lines
7.1 KiB
Go
package telemetry
|
|
|
|
import (
|
|
"context"
|
|
"sync"
|
|
|
|
"go.opentelemetry.io/otel"
|
|
"go.opentelemetry.io/otel/attribute"
|
|
"go.opentelemetry.io/otel/metric"
|
|
)
|
|
|
|
// Instruments and helpers for Newt metrics following the naming, units, and
|
|
// low-cardinality label guidance from the issue description.
|
|
//
|
|
// Counters end with _total, durations are in seconds, sizes in bytes.
|
|
// Only low-cardinality stable labels are supported: tunnel_id,
|
|
// transport, direction, result, reason, error_type.
|
|
var (
|
|
initOnce sync.Once
|
|
|
|
meter metric.Meter
|
|
|
|
// Site / Registration
|
|
mSiteRegistrations metric.Int64Counter
|
|
mSiteOnline metric.Int64ObservableGauge
|
|
mSiteLastHeartbeat metric.Float64ObservableGauge
|
|
|
|
// Tunnel / Sessions
|
|
mTunnelSessions metric.Int64ObservableGauge
|
|
mTunnelBytes metric.Int64Counter
|
|
mTunnelLatency metric.Float64Histogram
|
|
mReconnects metric.Int64Counter
|
|
|
|
// Connection / NAT
|
|
mConnAttempts metric.Int64Counter
|
|
mConnErrors metric.Int64Counter
|
|
|
|
// Config/Restart
|
|
mConfigReloads metric.Int64Counter
|
|
mRestartCount metric.Int64Counter
|
|
|
|
// Build info
|
|
mBuildInfo metric.Int64ObservableGauge
|
|
|
|
buildVersion string
|
|
buildCommit string
|
|
)
|
|
|
|
// attrsWithSite appends global site/region labels when present.
|
|
func attrsWithSite(extra ...attribute.KeyValue) []attribute.KeyValue {
|
|
attrs := make([]attribute.KeyValue, 0, len(extra)+2)
|
|
attrs = append(attrs, extra...)
|
|
attrs = append(attrs, siteAttrs()...)
|
|
return attrs
|
|
}
|
|
|
|
func registerInstruments() error {
|
|
var err error
|
|
initOnce.Do(func() {
|
|
meter = otel.Meter("newt")
|
|
|
|
// Site / Registration
|
|
mSiteRegistrations, err = meter.Int64Counter("newt_site_registrations_total",
|
|
metric.WithDescription("Total site registration attempts"))
|
|
if err != nil {
|
|
return
|
|
}
|
|
mSiteOnline, err = meter.Int64ObservableGauge("newt_site_online",
|
|
metric.WithDescription("Site online (0/1)"))
|
|
if err != nil {
|
|
return
|
|
}
|
|
mSiteLastHeartbeat, err = meter.Float64ObservableGauge("newt_site_last_heartbeat_seconds",
|
|
metric.WithDescription("Seconds since last site heartbeat"))
|
|
if err != nil {
|
|
return
|
|
}
|
|
|
|
// Tunnel / Sessions
|
|
mTunnelSessions, err = meter.Int64ObservableGauge("newt_tunnel_sessions",
|
|
metric.WithDescription("Active tunnel sessions"))
|
|
if err != nil {
|
|
return
|
|
}
|
|
mTunnelBytes, err = meter.Int64Counter("newt_tunnel_bytes_total",
|
|
metric.WithDescription("Tunnel bytes in/out"))
|
|
if err != nil {
|
|
return
|
|
}
|
|
mTunnelLatency, err = meter.Float64Histogram("newt_tunnel_latency_seconds",
|
|
metric.WithDescription("Per-tunnel latency in seconds"))
|
|
if err != nil {
|
|
return
|
|
}
|
|
mReconnects, err = meter.Int64Counter("newt_tunnel_reconnects_total",
|
|
metric.WithDescription("Tunnel reconnect events"))
|
|
if err != nil {
|
|
return
|
|
}
|
|
|
|
// Connection / NAT
|
|
mConnAttempts, err = meter.Int64Counter("newt_connection_attempts_total",
|
|
metric.WithDescription("Connection attempts"))
|
|
if err != nil {
|
|
return
|
|
}
|
|
mConnErrors, err = meter.Int64Counter("newt_connection_errors_total",
|
|
metric.WithDescription("Connection errors by type"))
|
|
if err != nil {
|
|
return
|
|
}
|
|
|
|
// Config/Restart
|
|
mConfigReloads, _ = meter.Int64Counter("newt_config_reloads_total",
|
|
metric.WithDescription("Configuration reloads"))
|
|
mRestartCount, _ = meter.Int64Counter("newt_restart_count_total",
|
|
metric.WithDescription("Process restart count (incremented on start)"))
|
|
|
|
// Build info gauge (value 1 with version/commit attributes)
|
|
mBuildInfo, _ = meter.Int64ObservableGauge("newt_build_info",
|
|
metric.WithDescription("Newt build information (value is always 1)"))
|
|
|
|
// Register a default callback for build info if version/commit set
|
|
meter.RegisterCallback(func(ctx context.Context, o metric.Observer) error {
|
|
if buildVersion == "" && buildCommit == "" {
|
|
return nil
|
|
}
|
|
attrs := []attribute.KeyValue{}
|
|
if buildVersion != "" {
|
|
attrs = append(attrs, attribute.String("version", buildVersion))
|
|
}
|
|
if buildCommit != "" {
|
|
attrs = append(attrs, attribute.String("commit", buildCommit))
|
|
}
|
|
attrs = append(attrs, siteAttrs()...)
|
|
o.ObserveInt64(mBuildInfo, 1, metric.WithAttributes(attrs...))
|
|
return nil
|
|
}, mBuildInfo)
|
|
})
|
|
return err
|
|
}
|
|
|
|
// Observable registration: Newt can register a callback to report gauges.
|
|
// Call SetObservableCallback once to start observing online status, last
|
|
// heartbeat seconds, and active sessions.
|
|
|
|
var (
|
|
obsOnce sync.Once
|
|
obsStopper func()
|
|
)
|
|
|
|
// SetObservableCallback registers a single callback that will be invoked
|
|
// on collection. Use the provided observer to emit values for the observable
|
|
// gauges defined here.
|
|
//
|
|
// Example inside your code (where you have access to current state):
|
|
//
|
|
// telemetry.SetObservableCallback(func(ctx context.Context, o metric.Observer) error {
|
|
// o.ObserveInt64(mSiteOnline, 1)
|
|
// o.ObserveFloat64(mSiteLastHeartbeat, time.Since(lastHB).Seconds())
|
|
// o.ObserveInt64(mTunnelSessions, int64(len(activeSessions)))
|
|
// return nil
|
|
// })
|
|
func SetObservableCallback(cb func(context.Context, metric.Observer) error) {
|
|
obsOnce.Do(func() {
|
|
meter.RegisterCallback(cb, mSiteOnline, mSiteLastHeartbeat, mTunnelSessions)
|
|
obsStopper = func() { /* no-op; otel callbacks are unregistered when provider shuts down */ }
|
|
})
|
|
}
|
|
|
|
// Build info registration
|
|
func RegisterBuildInfo(version, commit string) {
|
|
buildVersion = version
|
|
buildCommit = commit
|
|
// Increment restart count on boot
|
|
mRestartCount.Add(context.Background(), 1)
|
|
}
|
|
|
|
// Config reloads
|
|
func IncConfigReload(ctx context.Context, result string) {
|
|
mConfigReloads.Add(ctx, 1, metric.WithAttributes(attrsWithSite(
|
|
attribute.String("result", result),
|
|
)...))
|
|
}
|
|
|
|
// Helpers for counters/histograms
|
|
|
|
func IncSiteRegistration(ctx context.Context, result string) {
|
|
attrs := []attribute.KeyValue{
|
|
attribute.String("result", result),
|
|
}
|
|
mSiteRegistrations.Add(ctx, 1, metric.WithAttributes(attrsWithSite(attrs...)...))
|
|
}
|
|
|
|
func AddTunnelBytes(ctx context.Context, tunnelID, direction string, n int64) {
|
|
mTunnelBytes.Add(ctx, n, metric.WithAttributes(attrsWithSite(
|
|
attribute.String("tunnel_id", tunnelID),
|
|
attribute.String("direction", direction),
|
|
)...))
|
|
}
|
|
|
|
// AddTunnelBytesSet adds bytes using a pre-built attribute.Set to avoid per-call allocations.
|
|
func AddTunnelBytesSet(ctx context.Context, n int64, attrs attribute.Set) {
|
|
mTunnelBytes.Add(ctx, n, metric.WithAttributeSet(attrs))
|
|
}
|
|
|
|
func ObserveTunnelLatency(ctx context.Context, tunnelID, transport string, seconds float64) {
|
|
mTunnelLatency.Record(ctx, seconds, metric.WithAttributes(attrsWithSite(
|
|
attribute.String("tunnel_id", tunnelID),
|
|
attribute.String("transport", transport),
|
|
)...))
|
|
}
|
|
|
|
func IncReconnect(ctx context.Context, tunnelID, reason string) {
|
|
mReconnects.Add(ctx, 1, metric.WithAttributes(attrsWithSite(
|
|
attribute.String("tunnel_id", tunnelID),
|
|
attribute.String("reason", reason),
|
|
)...))
|
|
}
|
|
|
|
func IncConnAttempt(ctx context.Context, transport, result string) {
|
|
mConnAttempts.Add(ctx, 1, metric.WithAttributes(attrsWithSite(
|
|
attribute.String("transport", transport),
|
|
attribute.String("result", result),
|
|
)...))
|
|
}
|
|
|
|
func IncConnError(ctx context.Context, transport, typ string) {
|
|
mConnErrors.Add(ctx, 1, metric.WithAttributes(attrsWithSite(
|
|
attribute.String("transport", transport),
|
|
attribute.String("error_type", typ),
|
|
)...))
|
|
}
|