feat(observability): unify backend APIs and harden OTel handling

This commit is contained in:
Marc Schäfer
2026-05-04 00:12:23 +02:00
parent bcb5cc4746
commit 73d4d4d37c
8 changed files with 360 additions and 105 deletions

View File

@@ -9,7 +9,10 @@ package otel
import (
"context"
"fmt"
"log"
"net/http"
"regexp"
"strings"
"time"
"go.opentelemetry.io/otel/attribute"
@@ -17,6 +20,8 @@ import (
sdkmetric "go.opentelemetry.io/otel/sdk/metric"
)
var metricLabelNameRE = regexp.MustCompile(`^[a-zA-Z_][a-zA-Z0-9_]*$`)
// Config holds OTel backend configuration.
type Config struct {
// Protocol is "grpc" (default) or "http".
@@ -31,6 +36,9 @@ type Config struct {
// ExportInterval is the period between pushes to the collector.
ExportInterval time.Duration
// Timeout bounds exporter construction calls.
Timeout time.Duration
ServiceName string
ServiceVersion string
DeploymentEnvironment string
@@ -57,9 +65,15 @@ func New(cfg Config) (*Backend, error) {
if cfg.Protocol == "" {
cfg.Protocol = "grpc"
}
if strings.TrimSpace(cfg.Endpoint) == "" {
return nil, fmt.Errorf("otel backend: empty cfg.Endpoint")
}
if cfg.ExportInterval <= 0 {
cfg.ExportInterval = 60 * time.Second
}
if cfg.Timeout <= 0 {
cfg.Timeout = 10 * time.Second
}
if cfg.ServiceName == "" {
cfg.ServiceName = "gerbil"
}
@@ -100,111 +114,196 @@ func (b *Backend) Shutdown(ctx context.Context) error {
}
// NewCounter creates an OTel Int64Counter.
func (b *Backend) NewCounter(name, desc string, _ ...string) *Counter {
func (b *Backend) NewCounter(name, desc string, labelNames ...string) (*Counter, error) {
normalizedLabelNames, err := validateLabelNames(labelNames)
if err != nil {
return nil, fmt.Errorf("otel: create counter %q: %w", name, err)
}
c, err := b.meter.Int64Counter(name, metric.WithDescription(desc))
if err != nil {
panic(fmt.Sprintf("otel: create counter %q: %v", name, err))
return nil, fmt.Errorf("otel: create counter %q: %w", name, err)
}
return &Counter{c: c}
return &Counter{c: c, labelNames: normalizedLabelNames}, nil
}
// NewUpDownCounter creates an OTel Int64UpDownCounter.
func (b *Backend) NewUpDownCounter(name, desc string, _ ...string) *UpDownCounter {
func (b *Backend) NewUpDownCounter(name, desc string, labelNames ...string) (*UpDownCounter, error) {
normalizedLabelNames, err := validateLabelNames(labelNames)
if err != nil {
return nil, fmt.Errorf("otel: create up-down counter %q: %w", name, err)
}
c, err := b.meter.Int64UpDownCounter(name, metric.WithDescription(desc))
if err != nil {
panic(fmt.Sprintf("otel: create up-down counter %q: %v", name, err))
return nil, fmt.Errorf("otel: create up-down counter %q: %w", name, err)
}
return &UpDownCounter{c: c}
return &UpDownCounter{c: c, labelNames: normalizedLabelNames}, nil
}
// NewInt64Gauge creates an OTel Int64Gauge.
func (b *Backend) NewInt64Gauge(name, desc string, _ ...string) *Int64Gauge {
func (b *Backend) NewInt64Gauge(name, desc string, labelNames ...string) (*Int64Gauge, error) {
normalizedLabelNames, err := validateLabelNames(labelNames)
if err != nil {
return nil, fmt.Errorf("otel: create int64 gauge %q: %w", name, err)
}
g, err := b.meter.Int64Gauge(name, metric.WithDescription(desc))
if err != nil {
panic(fmt.Sprintf("otel: create int64 gauge %q: %v", name, err))
return nil, fmt.Errorf("otel: create int64 gauge %q: %w", name, err)
}
return &Int64Gauge{g: g}
return &Int64Gauge{g: g, labelNames: normalizedLabelNames}, nil
}
// NewFloat64Gauge creates an OTel Float64Gauge.
func (b *Backend) NewFloat64Gauge(name, desc string, _ ...string) *Float64Gauge {
func (b *Backend) NewFloat64Gauge(name, desc string, labelNames ...string) (*Float64Gauge, error) {
normalizedLabelNames, err := validateLabelNames(labelNames)
if err != nil {
return nil, fmt.Errorf("otel: create float64 gauge %q: %w", name, err)
}
g, err := b.meter.Float64Gauge(name, metric.WithDescription(desc))
if err != nil {
panic(fmt.Sprintf("otel: create float64 gauge %q: %v", name, err))
return nil, fmt.Errorf("otel: create float64 gauge %q: %w", name, err)
}
return &Float64Gauge{g: g}
return &Float64Gauge{g: g, labelNames: normalizedLabelNames}, nil
}
// NewHistogram creates an OTel Float64Histogram with explicit bucket boundaries.
func (b *Backend) NewHistogram(name, desc string, buckets []float64, _ ...string) *Histogram {
func (b *Backend) NewHistogram(name, desc string, buckets []float64, labelNames ...string) (*Histogram, error) {
normalizedLabelNames, err := validateLabelNames(labelNames)
if err != nil {
return nil, fmt.Errorf("otel: create histogram %q: %w", name, err)
}
h, err := b.meter.Float64Histogram(name,
metric.WithDescription(desc),
metric.WithExplicitBucketBoundaries(buckets...),
)
if err != nil {
panic(fmt.Sprintf("otel: create histogram %q: %v", name, err))
return nil, fmt.Errorf("otel: create histogram %q: %w", name, err)
}
return &Histogram{h: h}
return &Histogram{h: h, labelNames: normalizedLabelNames}, nil
}
// labelsToAttrs converts a Labels map to OTel attribute key-value pairs.
func labelsToAttrs(labels map[string]string) []attribute.KeyValue {
if len(labels) == 0 {
return nil
func validateLabelNames(labelNames []string) ([]string, error) {
if len(labelNames) == 0 {
return nil, nil
}
attrs := make([]attribute.KeyValue, 0, len(labels))
for k, v := range labels {
attrs = append(attrs, attribute.String(k, v))
normalized := make([]string, len(labelNames))
seen := make(map[string]struct{}, len(labelNames))
for i, name := range labelNames {
if !metricLabelNameRE.MatchString(name) {
return nil, fmt.Errorf("invalid label name %q", name)
}
if _, exists := seen[name]; exists {
return nil, fmt.Errorf("duplicate label name %q", name)
}
seen[name] = struct{}{}
normalized[i] = name
}
return normalized, nil
}
func labelsToAttrs(labelNames []string, labels map[string]string) []attribute.KeyValue {
if len(labelNames) == 0 {
if len(labels) > 0 {
log.Printf("WARN: dropping otel metric sample due to unexpected labels: got=%v expected=none", labels)
return nil
}
return []attribute.KeyValue{}
}
attrs := make([]attribute.KeyValue, 0, len(labelNames))
for _, labelName := range labelNames {
attrs = append(attrs, attribute.String(labelName, labels[labelName]))
}
for got := range labels {
found := false
for _, expected := range labelNames {
if got == expected {
found = true
break
}
}
if !found {
log.Printf("WARN: dropping otel metric sample due to unexpected label key %q (expected=%v)", got, labelNames)
return nil
}
}
return attrs
}
// Counter wraps an OTel Int64Counter.
type Counter struct {
c metric.Int64Counter
c metric.Int64Counter
labelNames []string
}
// Add increments the counter by value.
func (c *Counter) Add(ctx context.Context, value int64, labels map[string]string) {
c.c.Add(ctx, value, metric.WithAttributes(labelsToAttrs(labels)...))
attrs := labelsToAttrs(c.labelNames, labels)
if attrs == nil {
return
}
c.c.Add(ctx, value, metric.WithAttributes(attrs...))
}
// UpDownCounter wraps an OTel Int64UpDownCounter.
type UpDownCounter struct {
c metric.Int64UpDownCounter
c metric.Int64UpDownCounter
labelNames []string
}
// Add adjusts the up-down counter by value.
func (u *UpDownCounter) Add(ctx context.Context, value int64, labels map[string]string) {
u.c.Add(ctx, value, metric.WithAttributes(labelsToAttrs(labels)...))
attrs := labelsToAttrs(u.labelNames, labels)
if attrs == nil {
return
}
u.c.Add(ctx, value, metric.WithAttributes(attrs...))
}
// Int64Gauge wraps an OTel Int64Gauge.
type Int64Gauge struct {
g metric.Int64Gauge
g metric.Int64Gauge
labelNames []string
}
// Record sets the gauge to value.
func (g *Int64Gauge) Record(ctx context.Context, value int64, labels map[string]string) {
g.g.Record(ctx, value, metric.WithAttributes(labelsToAttrs(labels)...))
attrs := labelsToAttrs(g.labelNames, labels)
if attrs == nil {
return
}
g.g.Record(ctx, value, metric.WithAttributes(attrs...))
}
// Float64Gauge wraps an OTel Float64Gauge.
type Float64Gauge struct {
g metric.Float64Gauge
g metric.Float64Gauge
labelNames []string
}
// Record sets the gauge to value.
func (g *Float64Gauge) Record(ctx context.Context, value float64, labels map[string]string) {
g.g.Record(ctx, value, metric.WithAttributes(labelsToAttrs(labels)...))
attrs := labelsToAttrs(g.labelNames, labels)
if attrs == nil {
return
}
g.g.Record(ctx, value, metric.WithAttributes(attrs...))
}
// Histogram wraps an OTel Float64Histogram.
type Histogram struct {
h metric.Float64Histogram
h metric.Float64Histogram
labelNames []string
}
// Record observes value in the histogram.
func (h *Histogram) Record(ctx context.Context, value float64, labels map[string]string) {
h.h.Record(ctx, value, metric.WithAttributes(labelsToAttrs(labels)...))
attrs := labelsToAttrs(h.labelNames, labels)
if attrs == nil {
return
}
h.h.Record(ctx, value, metric.WithAttributes(attrs...))
}

View File

@@ -55,7 +55,10 @@ func TestOtelBackendCounter(t *testing.T) {
b := newInMemoryBackend(t)
defer b.Shutdown(context.Background()) //nolint:errcheck
c := b.NewCounter("gerbil_test_counter_total", "test counter", "result")
c, err := b.NewCounter("gerbil_test_counter_total", "test counter", "result")
if err != nil {
t.Fatalf("NewCounter returned error: %v", err)
}
// Should not panic
c.Add(context.Background(), 1, map[string]string{"result": "ok"})
c.Add(context.Background(), 5, nil)
@@ -65,7 +68,10 @@ func TestOtelBackendUpDownCounter(t *testing.T) {
b := newInMemoryBackend(t)
defer b.Shutdown(context.Background()) //nolint:errcheck
u := b.NewUpDownCounter("gerbil_test_updown", "test updown", "state")
u, err := b.NewUpDownCounter("gerbil_test_updown", "test updown", "state")
if err != nil {
t.Fatalf("NewUpDownCounter returned error: %v", err)
}
u.Add(context.Background(), 3, map[string]string{"state": "active"})
u.Add(context.Background(), -1, map[string]string{"state": "active"})
}
@@ -74,7 +80,10 @@ func TestOtelBackendInt64Gauge(t *testing.T) {
b := newInMemoryBackend(t)
defer b.Shutdown(context.Background()) //nolint:errcheck
g := b.NewInt64Gauge("gerbil_test_int_gauge", "test gauge")
g, err := b.NewInt64Gauge("gerbil_test_int_gauge", "test gauge")
if err != nil {
t.Fatalf("NewInt64Gauge returned error: %v", err)
}
g.Record(context.Background(), 42, nil)
}
@@ -82,7 +91,10 @@ func TestOtelBackendFloat64Gauge(t *testing.T) {
b := newInMemoryBackend(t)
defer b.Shutdown(context.Background()) //nolint:errcheck
g := b.NewFloat64Gauge("gerbil_test_float_gauge", "test float gauge")
g, err := b.NewFloat64Gauge("gerbil_test_float_gauge", "test float gauge")
if err != nil {
t.Fatalf("NewFloat64Gauge returned error: %v", err)
}
g.Record(context.Background(), 3.14, nil)
}
@@ -90,8 +102,11 @@ func TestOtelBackendHistogram(t *testing.T) {
b := newInMemoryBackend(t)
defer b.Shutdown(context.Background()) //nolint:errcheck
h := b.NewHistogram("gerbil_test_duration_seconds", "test histogram",
h, err := b.NewHistogram("gerbil_test_duration_seconds", "test histogram",
[]float64{0.1, 0.5, 1.0}, "method")
if err != nil {
t.Fatalf("NewHistogram returned error: %v", err)
}
h.Record(context.Background(), 0.3, map[string]string{"method": "GET"})
}
@@ -139,3 +154,22 @@ func TestOtelBackendDeploymentEnvironment(t *testing.T) {
}
defer b.Shutdown(context.Background()) //nolint:errcheck
}
func TestOtelBackendRejectsInvalidLabelNames(t *testing.T) {
b := newInMemoryBackend(t)
defer b.Shutdown(context.Background()) //nolint:errcheck
t.Run("duplicate labels", func(t *testing.T) {
_, err := b.NewCounter("gerbil_test_invalid_labels_total", "test counter", "result", "result")
if err == nil {
t.Fatal("expected error for duplicate label names")
}
})
t.Run("invalid label name", func(t *testing.T) {
_, err := b.NewHistogram("gerbil_test_invalid_histogram", "test histogram", []float64{0.1, 1.0}, "status-code")
if err == nil {
t.Fatal("expected error for invalid label name")
}
})
}

View File

@@ -3,6 +3,8 @@ package otel
import (
"context"
"fmt"
"net/url"
"strings"
"go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc"
"go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp"
@@ -11,6 +13,10 @@ import (
// newExporter creates the appropriate OTLP exporter based on cfg.Protocol.
func newExporter(ctx context.Context, cfg Config) (sdkmetric.Exporter, error) {
if strings.TrimSpace(cfg.Endpoint) == "" {
return nil, fmt.Errorf("otel: cfg.Endpoint is empty")
}
switch cfg.Protocol {
case "grpc", "":
return newGRPCExporter(ctx, cfg)
@@ -36,8 +42,20 @@ func newGRPCExporter(ctx context.Context, cfg Config) (sdkmetric.Exporter, error
}
func newHTTPExporter(ctx context.Context, cfg Config) (sdkmetric.Exporter, error) {
opts := []otlpmetrichttp.Option{
otlpmetrichttp.WithEndpoint(cfg.Endpoint),
endpoint := strings.TrimSpace(cfg.Endpoint)
opts := make([]otlpmetrichttp.Option, 0, 3)
if strings.Contains(endpoint, "://") {
parsed, err := url.Parse(endpoint)
if err != nil {
return nil, fmt.Errorf("otlp http exporter: parse endpoint URL %q: %w", endpoint, err)
}
opts = append(opts, otlpmetrichttp.WithEndpointURL(parsed.String()))
} else {
opts = append(opts,
otlpmetrichttp.WithEndpoint(endpoint),
otlpmetrichttp.WithURLPath("/v1/metrics"),
)
}
if cfg.Insecure {
opts = append(opts, otlpmetrichttp.WithInsecure())