mirror of
https://github.com/fosrl/newt.git
synced 2026-03-26 20:46:41 +00:00
refactor: Simplify telemetry metrics by removing site_id and enhancing tunnel_id usage
This commit is contained in:
@@ -13,8 +13,8 @@ import (
|
||||
// low-cardinality label guidance from the issue description.
|
||||
//
|
||||
// Counters end with _total, durations are in seconds, sizes in bytes.
|
||||
// Only low-cardinality stable labels are supported: site_id, tunnel_id,
|
||||
// transport, direction, result, reason, error_type, region.
|
||||
// Only low-cardinality stable labels are supported: tunnel_id,
|
||||
// transport, direction, result, reason, error_type.
|
||||
var (
|
||||
initOnce sync.Once
|
||||
|
||||
@@ -147,9 +147,9 @@ var (
|
||||
// Example inside your code (where you have access to current state):
|
||||
//
|
||||
// telemetry.SetObservableCallback(func(ctx context.Context, o metric.Observer) error {
|
||||
// o.ObserveInt64(mSiteOnline, 1, attribute.String("site_id", siteID))
|
||||
// o.ObserveFloat64(mSiteLastHeartbeat, time.Since(lastHB).Seconds(), attribute.String("site_id", siteID))
|
||||
// o.ObserveInt64(mTunnelSessions, int64(len(activeSessions)), attribute.String("site_id", siteID))
|
||||
// o.ObserveInt64(mSiteOnline, 1)
|
||||
// o.ObserveFloat64(mSiteLastHeartbeat, time.Since(lastHB).Seconds())
|
||||
// o.ObserveInt64(mTunnelSessions, int64(len(activeSessions)))
|
||||
// return nil
|
||||
// })
|
||||
func SetObservableCallback(cb func(context.Context, metric.Observer) error) {
|
||||
@@ -174,20 +174,15 @@ func IncConfigReload(ctx context.Context, result string) {
|
||||
|
||||
// Helpers for counters/histograms
|
||||
|
||||
func IncSiteRegistration(ctx context.Context, siteID, region, result string) {
|
||||
func IncSiteRegistration(ctx context.Context, result string) {
|
||||
attrs := []attribute.KeyValue{
|
||||
attribute.String("site_id", siteID),
|
||||
attribute.String("result", result),
|
||||
}
|
||||
if region != "" {
|
||||
attrs = append(attrs, attribute.String("region", region))
|
||||
}
|
||||
mSiteRegistrations.Add(ctx, 1, metric.WithAttributes(attrs...))
|
||||
}
|
||||
|
||||
func AddTunnelBytes(ctx context.Context, siteID, tunnelID, direction string, n int64) {
|
||||
func AddTunnelBytes(ctx context.Context, tunnelID, direction string, n int64) {
|
||||
mTunnelBytes.Add(ctx, n, metric.WithAttributes(
|
||||
attribute.String("site_id", siteID),
|
||||
attribute.String("tunnel_id", tunnelID),
|
||||
attribute.String("direction", direction),
|
||||
))
|
||||
@@ -198,33 +193,29 @@ func AddTunnelBytesSet(ctx context.Context, n int64, attrs attribute.Set) {
|
||||
mTunnelBytes.Add(ctx, n, metric.WithAttributeSet(attrs))
|
||||
}
|
||||
|
||||
func ObserveTunnelLatency(ctx context.Context, siteID, tunnelID, transport string, seconds float64) {
|
||||
func ObserveTunnelLatency(ctx context.Context, tunnelID, transport string, seconds float64) {
|
||||
mTunnelLatency.Record(ctx, seconds, metric.WithAttributes(
|
||||
attribute.String("site_id", siteID),
|
||||
attribute.String("tunnel_id", tunnelID),
|
||||
attribute.String("transport", transport),
|
||||
))
|
||||
}
|
||||
|
||||
func IncReconnect(ctx context.Context, siteID, tunnelID, reason string) {
|
||||
func IncReconnect(ctx context.Context, tunnelID, reason string) {
|
||||
mReconnects.Add(ctx, 1, metric.WithAttributes(
|
||||
attribute.String("site_id", siteID),
|
||||
attribute.String("tunnel_id", tunnelID),
|
||||
attribute.String("reason", reason),
|
||||
))
|
||||
}
|
||||
|
||||
func IncConnAttempt(ctx context.Context, siteID, transport, result string) {
|
||||
func IncConnAttempt(ctx context.Context, transport, result string) {
|
||||
mConnAttempts.Add(ctx, 1, metric.WithAttributes(
|
||||
attribute.String("site_id", siteID),
|
||||
attribute.String("transport", transport),
|
||||
attribute.String("result", result),
|
||||
))
|
||||
}
|
||||
|
||||
func IncConnError(ctx context.Context, siteID, transport, typ string) {
|
||||
func IncConnError(ctx context.Context, transport, typ string) {
|
||||
mConnErrors.Add(ctx, 1, metric.WithAttributes(
|
||||
attribute.String("site_id", siteID),
|
||||
attribute.String("transport", transport),
|
||||
attribute.String("error_type", typ),
|
||||
))
|
||||
|
||||
@@ -42,16 +42,19 @@ func RegisterStateView(v StateView) {
|
||||
if online {
|
||||
val = 1
|
||||
}
|
||||
o.ObserveInt64(mSiteOnline, val, metric.WithAttributes(attribute.String("site_id", siteID)))
|
||||
o.ObserveInt64(mSiteOnline, val)
|
||||
}
|
||||
if t, ok := sv.LastHeartbeat(siteID); ok {
|
||||
secs := time.Since(t).Seconds()
|
||||
o.ObserveFloat64(mSiteLastHeartbeat, secs, metric.WithAttributes(attribute.String("site_id", siteID)))
|
||||
o.ObserveFloat64(mSiteLastHeartbeat, secs)
|
||||
}
|
||||
// If the view supports per-tunnel sessions, report them labeled by tunnel_id.
|
||||
if tm, ok := any.(interface{ SessionsByTunnel() map[string]int64 }); ok {
|
||||
for tid, n := range tm.SessionsByTunnel() {
|
||||
o.ObserveInt64(mTunnelSessions, n, metric.WithAttributes(attribute.String("tunnel_id", tid)))
|
||||
o.ObserveInt64(mTunnelSessions, n, metric.WithAttributes(
|
||||
attribute.String("tunnel_id", tid),
|
||||
attribute.String("transport", "tcp"),
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -93,16 +93,21 @@ type Setup struct {
|
||||
// installs recommended histogram views for *_latency_seconds, and returns a Setup with
|
||||
// a Shutdown method to flush exporters.
|
||||
func Init(ctx context.Context, cfg Config) (*Setup, error) {
|
||||
// Build resource with required attributes and only include optional ones when non-empty
|
||||
attrs := []attribute.KeyValue{
|
||||
semconv.ServiceName(cfg.ServiceName),
|
||||
semconv.ServiceVersion(cfg.ServiceVersion),
|
||||
}
|
||||
if cfg.SiteID != "" {
|
||||
attrs = append(attrs, attribute.String("site_id", cfg.SiteID))
|
||||
}
|
||||
if cfg.Region != "" {
|
||||
attrs = append(attrs, attribute.String("region", cfg.Region))
|
||||
}
|
||||
res, _ := resource.New(ctx,
|
||||
resource.WithFromEnv(),
|
||||
resource.WithHost(),
|
||||
resource.WithAttributes(
|
||||
semconv.ServiceName(cfg.ServiceName),
|
||||
semconv.ServiceVersion(cfg.ServiceVersion),
|
||||
// Optional resource attributes
|
||||
attribute.String("site_id", cfg.SiteID),
|
||||
attribute.String("region", cfg.Region),
|
||||
),
|
||||
resource.WithAttributes(attrs...),
|
||||
)
|
||||
|
||||
s := &Setup{}
|
||||
@@ -168,7 +173,7 @@ func Init(ctx context.Context, cfg Config) (*Setup, error) {
|
||||
AttributeFilter: func(kv attribute.KeyValue) bool {
|
||||
k := string(kv.Key)
|
||||
switch k {
|
||||
case "tunnel_id", "transport", "direction", "protocol", "result", "reason", "error_type":
|
||||
case "tunnel_id", "transport", "direction", "protocol", "result", "reason", "error_type", "version", "commit":
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
|
||||
@@ -25,7 +25,7 @@ cfg := Config{ServiceName: "newt", PromEnabled: true, AdminAddr: "127.0.0.1:0",
|
||||
defer ts.Close()
|
||||
|
||||
// Trigger a counter
|
||||
IncConnAttempt(ctx, "ignored", "websocket", "success")
|
||||
IncConnAttempt(ctx, "websocket", "success")
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
|
||||
resp, err := http.Get(ts.URL)
|
||||
|
||||
@@ -36,7 +36,7 @@ func TestMetricsSmoke(t *testing.T) {
|
||||
defer ts.Close()
|
||||
|
||||
// Record a simple metric and then fetch /metrics
|
||||
IncConnAttempt(ctx, "site-1", "websocket", "success")
|
||||
IncConnAttempt(ctx, "websocket", "success")
|
||||
// Give the exporter a tick to collect
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user