mirror of
https://github.com/fosrl/newt.git
synced 2026-03-27 04:56:41 +00:00
Add telemetry metrics and constants for improved observability
This commit is contained in:
265
internal/telemetry/telemetry.go
Normal file
265
internal/telemetry/telemetry.go
Normal file
@@ -0,0 +1,265 @@
|
||||
package telemetry
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"net/http"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
promclient "github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||
"go.opentelemetry.io/contrib/instrumentation/runtime"
|
||||
"go.opentelemetry.io/otel"
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc"
|
||||
"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc"
|
||||
"go.opentelemetry.io/otel/exporters/prometheus"
|
||||
sdkmetric "go.opentelemetry.io/otel/sdk/metric"
|
||||
"go.opentelemetry.io/otel/sdk/resource"
|
||||
sdktrace "go.opentelemetry.io/otel/sdk/trace"
|
||||
semconv "go.opentelemetry.io/otel/semconv/v1.26.0"
|
||||
"google.golang.org/grpc/credentials"
|
||||
)
|
||||
|
||||
// Config controls telemetry initialization via env flags.
|
||||
//
|
||||
// Defaults align with the issue requirements:
|
||||
// - Prometheus exporter enabled by default (/metrics)
|
||||
// - OTLP exporter disabled by default
|
||||
// - Durations in seconds, bytes in raw bytes
|
||||
// - Admin HTTP server address configurable (for mounting /metrics)
|
||||
type Config struct {
|
||||
ServiceName string
|
||||
ServiceVersion string
|
||||
|
||||
// Optional resource attributes
|
||||
SiteID string
|
||||
Region string
|
||||
|
||||
PromEnabled bool
|
||||
OTLPEnabled bool
|
||||
|
||||
OTLPEndpoint string // host:port
|
||||
OTLPInsecure bool
|
||||
|
||||
MetricExportInterval time.Duration
|
||||
AdminAddr string // e.g.: ":2112"
|
||||
|
||||
// Optional build info for newt_build_info metric
|
||||
BuildVersion string
|
||||
BuildCommit string
|
||||
}
|
||||
|
||||
// FromEnv reads configuration from environment variables.
|
||||
//
|
||||
// NEWT_METRICS_PROMETHEUS_ENABLED (default: true)
|
||||
// NEWT_METRICS_OTLP_ENABLED (default: false)
|
||||
// OTEL_EXPORTER_OTLP_ENDPOINT (default: "localhost:4317")
|
||||
// OTEL_EXPORTER_OTLP_INSECURE (default: true)
|
||||
// OTEL_METRIC_EXPORT_INTERVAL (default: 15s)
|
||||
// OTEL_SERVICE_NAME (default: "newt")
|
||||
// OTEL_SERVICE_VERSION (default: "")
|
||||
// NEWT_ADMIN_ADDR (default: ":2112")
|
||||
func FromEnv() Config {
|
||||
return Config{
|
||||
ServiceName: getenv("OTEL_SERVICE_NAME", "newt"),
|
||||
ServiceVersion: os.Getenv("OTEL_SERVICE_VERSION"),
|
||||
Region: os.Getenv("NEWT_REGION"),
|
||||
PromEnabled: getenv("NEWT_METRICS_PROMETHEUS_ENABLED", "true") == "true",
|
||||
OTLPEnabled: getenv("NEWT_METRICS_OTLP_ENABLED", "false") == "true",
|
||||
OTLPEndpoint: getenv("OTEL_EXPORTER_OTLP_ENDPOINT", "localhost:4317"),
|
||||
OTLPInsecure: getenv("OTEL_EXPORTER_OTLP_INSECURE", "true") == "true",
|
||||
MetricExportInterval: getdur("OTEL_METRIC_EXPORT_INTERVAL", 15*time.Second),
|
||||
AdminAddr: getenv("NEWT_ADMIN_ADDR", "127.0.0.1:2112"),
|
||||
}
|
||||
}
|
||||
|
||||
// Setup holds initialized telemetry providers and (optionally) a /metrics handler.
|
||||
// Call Shutdown when the process terminates to flush exporters.
|
||||
type Setup struct {
|
||||
MeterProvider *sdkmetric.MeterProvider
|
||||
TracerProvider *sdktrace.TracerProvider
|
||||
|
||||
PrometheusHandler http.Handler // nil if Prometheus exporter disabled
|
||||
|
||||
shutdowns []func(context.Context) error
|
||||
}
|
||||
|
||||
// Init configures OpenTelemetry metrics and (optionally) tracing.
|
||||
//
|
||||
// It sets a global MeterProvider and TracerProvider, registers runtime instrumentation,
|
||||
// installs recommended histogram views for *_latency_seconds, and returns a Setup with
|
||||
// a Shutdown method to flush exporters.
|
||||
func Init(ctx context.Context, cfg Config) (*Setup, error) {
|
||||
res, _ := resource.New(ctx,
|
||||
resource.WithFromEnv(),
|
||||
resource.WithHost(),
|
||||
resource.WithAttributes(
|
||||
semconv.ServiceName(cfg.ServiceName),
|
||||
semconv.ServiceVersion(cfg.ServiceVersion),
|
||||
// Optional resource attributes
|
||||
attribute.String("site_id", cfg.SiteID),
|
||||
attribute.String("region", cfg.Region),
|
||||
),
|
||||
)
|
||||
|
||||
s := &Setup{}
|
||||
|
||||
// Build metric readers/exporters
|
||||
var readers []sdkmetric.Reader
|
||||
|
||||
// Prometheus exporter exposes a native /metrics handler for scraping
|
||||
if cfg.PromEnabled {
|
||||
reg := promclient.NewRegistry()
|
||||
exp, err := prometheus.New(prometheus.WithRegisterer(reg))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
readers = append(readers, exp)
|
||||
s.PrometheusHandler = promhttp.HandlerFor(reg, promhttp.HandlerOpts{})
|
||||
}
|
||||
|
||||
// Optional OTLP metric exporter (gRPC)
|
||||
if cfg.OTLPEnabled {
|
||||
mopts := []otlpmetricgrpc.Option{otlpmetricgrpc.WithEndpoint(cfg.OTLPEndpoint)}
|
||||
// Headers support via OTEL_EXPORTER_OTLP_HEADERS (k=v,k2=v2)
|
||||
if hdrs := parseOTLPHeaders(os.Getenv("OTEL_EXPORTER_OTLP_HEADERS")); len(hdrs) > 0 {
|
||||
mopts = append(mopts, otlpmetricgrpc.WithHeaders(hdrs))
|
||||
}
|
||||
if cfg.OTLPInsecure {
|
||||
mopts = append(mopts, otlpmetricgrpc.WithInsecure())
|
||||
} else if certFile := os.Getenv("OTEL_EXPORTER_OTLP_CERTIFICATE"); certFile != "" {
|
||||
creds, cerr := credentials.NewClientTLSFromFile(certFile, "")
|
||||
if cerr == nil {
|
||||
mopts = append(mopts, otlpmetricgrpc.WithTLSCredentials(creds))
|
||||
}
|
||||
}
|
||||
mexp, err := otlpmetricgrpc.New(ctx, mopts...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
readers = append(readers, sdkmetric.NewPeriodicReader(mexp, sdkmetric.WithInterval(cfg.MetricExportInterval)))
|
||||
s.shutdowns = append(s.shutdowns, mexp.Shutdown)
|
||||
}
|
||||
|
||||
// Build provider options iteratively (WithReader is not variadic)
|
||||
var mpOpts []sdkmetric.Option
|
||||
mpOpts = append(mpOpts, sdkmetric.WithResource(res))
|
||||
for _, r := range readers {
|
||||
mpOpts = append(mpOpts, sdkmetric.WithReader(r))
|
||||
}
|
||||
// Default view for latency histograms in seconds.
|
||||
mpOpts = append(mpOpts, sdkmetric.WithView(sdkmetric.NewView(
|
||||
sdkmetric.Instrument{
|
||||
Name: "newt_*_latency_seconds",
|
||||
},
|
||||
sdkmetric.Stream{
|
||||
Aggregation: sdkmetric.AggregationExplicitBucketHistogram{
|
||||
Boundaries: []float64{0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30},
|
||||
},
|
||||
},
|
||||
)))
|
||||
// Attribute whitelist: only allow expected low-cardinality keys on newt_* instruments.
|
||||
mpOpts = append(mpOpts, sdkmetric.WithView(sdkmetric.NewView(
|
||||
sdkmetric.Instrument{Name: "newt_*"},
|
||||
sdkmetric.Stream{
|
||||
AttributeFilter: func(kv attribute.KeyValue) bool {
|
||||
k := string(kv.Key)
|
||||
switch k {
|
||||
case "tunnel_id", "transport", "direction", "protocol", "result", "reason", "error_type":
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
},
|
||||
},
|
||||
)))
|
||||
mp := sdkmetric.NewMeterProvider(mpOpts...)
|
||||
otel.SetMeterProvider(mp)
|
||||
s.MeterProvider = mp
|
||||
s.shutdowns = append(s.shutdowns, mp.Shutdown)
|
||||
|
||||
// Optional tracing (OTLP over gRPC)
|
||||
if cfg.OTLPEnabled {
|
||||
topts := []otlptracegrpc.Option{otlptracegrpc.WithEndpoint(cfg.OTLPEndpoint)}
|
||||
if hdrs := parseOTLPHeaders(os.Getenv("OTEL_EXPORTER_OTLP_HEADERS")); len(hdrs) > 0 {
|
||||
topts = append(topts, otlptracegrpc.WithHeaders(hdrs))
|
||||
}
|
||||
if cfg.OTLPInsecure {
|
||||
topts = append(topts, otlptracegrpc.WithInsecure())
|
||||
} else if certFile := os.Getenv("OTEL_EXPORTER_OTLP_CERTIFICATE"); certFile != "" {
|
||||
creds, cerr := credentials.NewClientTLSFromFile(certFile, "")
|
||||
if cerr == nil {
|
||||
topts = append(topts, otlptracegrpc.WithTLSCredentials(creds))
|
||||
}
|
||||
}
|
||||
exp, err := otlptracegrpc.New(ctx, topts...)
|
||||
if err == nil {
|
||||
tp := sdktrace.NewTracerProvider(
|
||||
sdktrace.WithBatcher(exp),
|
||||
sdktrace.WithResource(res),
|
||||
)
|
||||
otel.SetTracerProvider(tp)
|
||||
s.TracerProvider = tp
|
||||
s.shutdowns = append(s.shutdowns, func(ctx context.Context) error {
|
||||
return errors.Join(exp.Shutdown(ctx), tp.Shutdown(ctx))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Export Go runtime metrics (goroutines, GC, mem, etc.)
|
||||
_ = runtime.Start(runtime.WithMeterProvider(mp))
|
||||
|
||||
// Register instruments after provider is set
|
||||
if err := registerInstruments(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// Optional build info metric
|
||||
if cfg.BuildVersion != "" || cfg.BuildCommit != "" {
|
||||
RegisterBuildInfo(cfg.BuildVersion, cfg.BuildCommit)
|
||||
}
|
||||
|
||||
return s, nil
|
||||
}
|
||||
|
||||
// Shutdown flushes exporters and providers in reverse init order.
|
||||
func (s *Setup) Shutdown(ctx context.Context) error {
|
||||
var err error
|
||||
for i := len(s.shutdowns) - 1; i >= 0; i-- {
|
||||
err = errors.Join(err, s.shutdowns[i](ctx))
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func parseOTLPHeaders(h string) map[string]string {
|
||||
m := map[string]string{}
|
||||
if h == "" {
|
||||
return m
|
||||
}
|
||||
pairs := strings.Split(h, ",")
|
||||
for _, p := range pairs {
|
||||
kv := strings.SplitN(strings.TrimSpace(p), "=", 2)
|
||||
if len(kv) == 2 {
|
||||
m[strings.TrimSpace(kv[0])] = strings.TrimSpace(kv[1])
|
||||
}
|
||||
}
|
||||
return m
|
||||
}
|
||||
|
||||
func getenv(k, d string) string {
|
||||
if v := os.Getenv(k); v != "" {
|
||||
return v
|
||||
}
|
||||
return d
|
||||
}
|
||||
|
||||
func getdur(k string, d time.Duration) time.Duration {
|
||||
if v := os.Getenv(k); v != "" {
|
||||
if p, e := time.ParseDuration(v); e == nil {
|
||||
return p
|
||||
}
|
||||
}
|
||||
return d
|
||||
}
|
||||
Reference in New Issue
Block a user