mirror of
https://github.com/fosrl/gerbil.git
synced 2026-05-17 21:59:54 +00:00
Add OpenTelemetry and Prometheus metrics infrastructure
This commit is contained in:
506
internal/metrics/metrics.go
Normal file
506
internal/metrics/metrics.go
Normal file
@@ -0,0 +1,506 @@
|
||||
// Package metrics provides the application-level metrics facade for Gerbil.
|
||||
//
|
||||
// Application code (main, relay, proxy) uses only the Record* functions in this
|
||||
// package. The actual recording is delegated to the backend selected in
|
||||
// internal/observability. Neither Prometheus nor OTel packages are imported here.
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/http"
|
||||
|
||||
"github.com/fosrl/gerbil/internal/observability"
|
||||
)
|
||||
|
||||
// Config is the metrics configuration type. It is an alias for
|
||||
// observability.MetricsConfig so callers do not need to import observability.
|
||||
type Config = observability.MetricsConfig
|
||||
|
||||
// PrometheusConfig is re-exported for convenience.
|
||||
type PrometheusConfig = observability.PrometheusConfig
|
||||
|
||||
// OTelConfig is re-exported for convenience.
|
||||
type OTelConfig = observability.OTelConfig
|
||||
|
||||
var (
|
||||
backend observability.Backend
|
||||
|
||||
// Interface and peer metrics
|
||||
wgInterfaceUp observability.Int64Gauge
|
||||
wgPeersTotal observability.UpDownCounter
|
||||
wgPeerConnected observability.Int64Gauge
|
||||
wgHandshakesTotal observability.Counter
|
||||
wgHandshakeLatency observability.Histogram
|
||||
wgPeerRTT observability.Histogram
|
||||
wgBytesReceived observability.Counter
|
||||
wgBytesTransmitted observability.Counter
|
||||
allowedIPsCount observability.UpDownCounter
|
||||
keyRotationTotal observability.Counter
|
||||
|
||||
// System and proxy metrics
|
||||
netlinkEventsTotal observability.Counter
|
||||
netlinkErrorsTotal observability.Counter
|
||||
syncDuration observability.Histogram
|
||||
workqueueDepth observability.UpDownCounter
|
||||
kernelModuleLoads observability.Counter
|
||||
firewallRulesApplied observability.Counter
|
||||
activeSessions observability.UpDownCounter
|
||||
activeProxyConnections observability.UpDownCounter
|
||||
proxyRouteLookups observability.Counter
|
||||
proxyTLSHandshake observability.Histogram
|
||||
proxyBytesTransmitted observability.Counter
|
||||
|
||||
// UDP Relay / Proxy Metrics
|
||||
udpPacketsTotal observability.Counter
|
||||
udpPacketSizeBytes observability.Histogram
|
||||
holePunchEventsTotal observability.Counter
|
||||
proxyMappingActive observability.UpDownCounter
|
||||
sessionActive observability.UpDownCounter
|
||||
sessionRebuiltTotal observability.Counter
|
||||
commPatternActive observability.UpDownCounter
|
||||
proxyCleanupRemovedTotal observability.Counter
|
||||
proxyConnectionErrorsTotal observability.Counter
|
||||
proxyInitialMappingsTotal observability.Int64Gauge
|
||||
proxyMappingUpdatesTotal observability.Counter
|
||||
proxyIdleCleanupDuration observability.Histogram
|
||||
|
||||
// SNI Proxy Metrics
|
||||
sniConnectionsTotal observability.Counter
|
||||
sniConnectionDuration observability.Histogram
|
||||
sniActiveConnections observability.UpDownCounter
|
||||
sniRouteCacheHitsTotal observability.Counter
|
||||
sniRouteAPIRequestsTotal observability.Counter
|
||||
sniRouteAPILatency observability.Histogram
|
||||
sniLocalOverrideTotal observability.Counter
|
||||
sniTrustedProxyEventsTotal observability.Counter
|
||||
sniProxyProtocolParseErrorsTotal observability.Counter
|
||||
sniDataBytesTotal observability.Counter
|
||||
sniTunnelTerminationsTotal observability.Counter
|
||||
|
||||
// HTTP API & Peer Management Metrics
|
||||
httpRequestsTotal observability.Counter
|
||||
httpRequestDuration observability.Histogram
|
||||
peerOperationsTotal observability.Counter
|
||||
proxyMappingUpdateRequestsTotal observability.Counter
|
||||
destinationsUpdateRequestsTotal observability.Counter
|
||||
|
||||
// Remote Configuration, Reporting & Housekeeping
|
||||
remoteConfigFetchesTotal observability.Counter
|
||||
bandwidthReportsTotal observability.Counter
|
||||
peerBandwidthBytesTotal observability.Counter
|
||||
memorySpikeTotal observability.Counter
|
||||
heapProfilesWrittenTotal observability.Counter
|
||||
|
||||
// Operational metrics
|
||||
configReloadsTotal observability.Counter
|
||||
restartTotal observability.Counter
|
||||
authFailuresTotal observability.Counter
|
||||
aclDeniedTotal observability.Counter
|
||||
certificateExpiryDays observability.Float64Gauge
|
||||
)
|
||||
|
||||
// DefaultConfig returns a default metrics configuration.
|
||||
func DefaultConfig() Config {
|
||||
return observability.DefaultMetricsConfig()
|
||||
}
|
||||
|
||||
// Initialize sets up the metrics system using the selected backend.
|
||||
// It returns the /metrics HTTP handler (non-nil only for Prometheus backend).
|
||||
func Initialize(cfg Config) (http.Handler, error) {
|
||||
b, err := observability.New(cfg)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
backend = b
|
||||
|
||||
if err := createInstruments(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return backend.HTTPHandler(), nil
|
||||
}
|
||||
|
||||
// Shutdown gracefully shuts down the metrics backend.
|
||||
func Shutdown(ctx context.Context) error {
|
||||
if backend != nil {
|
||||
return backend.Shutdown(ctx)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func createInstruments() error {
|
||||
durationBuckets := []float64{0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30}
|
||||
sizeBuckets := []float64{512, 1024, 4096, 16384, 65536, 262144, 1048576}
|
||||
sniDurationBuckets := []float64{0.1, 0.5, 1, 2.5, 5, 10, 30, 60, 120}
|
||||
|
||||
b := backend
|
||||
|
||||
wgInterfaceUp = b.NewInt64Gauge("gerbil_wg_interface_up",
|
||||
"Operational state of a WireGuard interface (1=up, 0=down)", "ifname", "instance")
|
||||
wgPeersTotal = b.NewUpDownCounter("gerbil_wg_peers_total",
|
||||
"Total number of configured peers per interface", "ifname")
|
||||
wgPeerConnected = b.NewInt64Gauge("gerbil_wg_peer_connected",
|
||||
"Whether a specific peer is connected (1=connected, 0=disconnected)", "ifname", "peer")
|
||||
allowedIPsCount = b.NewUpDownCounter("gerbil_allowed_ips_count",
|
||||
"Number of allowed IPs configured per peer", "ifname", "peer")
|
||||
keyRotationTotal = b.NewCounter("gerbil_key_rotation_total",
|
||||
"Key rotation events", "ifname", "reason")
|
||||
wgHandshakesTotal = b.NewCounter("gerbil_wg_handshakes_total",
|
||||
"Count of handshake attempts with their result status", "ifname", "peer", "result")
|
||||
wgHandshakeLatency = b.NewHistogram("gerbil_wg_handshake_latency_seconds",
|
||||
"Distribution of handshake latencies in seconds", durationBuckets, "ifname", "peer")
|
||||
wgPeerRTT = b.NewHistogram("gerbil_wg_peer_rtt_seconds",
|
||||
"Observed round-trip time to a peer in seconds", durationBuckets, "ifname", "peer")
|
||||
wgBytesReceived = b.NewCounter("gerbil_wg_bytes_received_total",
|
||||
"Number of bytes received from a peer", "ifname", "peer")
|
||||
wgBytesTransmitted = b.NewCounter("gerbil_wg_bytes_transmitted_total",
|
||||
"Number of bytes transmitted to a peer", "ifname", "peer")
|
||||
netlinkEventsTotal = b.NewCounter("gerbil_netlink_events_total",
|
||||
"Number of netlink events processed", "event_type")
|
||||
netlinkErrorsTotal = b.NewCounter("gerbil_netlink_errors_total",
|
||||
"Count of netlink or kernel errors", "component", "error_type")
|
||||
syncDuration = b.NewHistogram("gerbil_sync_duration_seconds",
|
||||
"Duration of reconciliation/sync loops in seconds", durationBuckets, "component")
|
||||
workqueueDepth = b.NewUpDownCounter("gerbil_workqueue_depth",
|
||||
"Current length of internal work queues", "queue")
|
||||
kernelModuleLoads = b.NewCounter("gerbil_kernel_module_loads_total",
|
||||
"Count of kernel module load attempts", "result")
|
||||
firewallRulesApplied = b.NewCounter("gerbil_firewall_rules_applied_total",
|
||||
"IPTables/NFT rules applied", "result", "chain")
|
||||
activeSessions = b.NewUpDownCounter("gerbil_active_sessions",
|
||||
"Number of active UDP relay sessions", "ifname")
|
||||
activeProxyConnections = b.NewUpDownCounter("gerbil_active_proxy_connections",
|
||||
"Active SNI proxy connections")
|
||||
proxyRouteLookups = b.NewCounter("gerbil_proxy_route_lookups_total",
|
||||
"Number of route lookups", "result")
|
||||
proxyTLSHandshake = b.NewHistogram("gerbil_proxy_tls_handshake_seconds",
|
||||
"TLS handshake duration for SNI proxy in seconds", durationBuckets)
|
||||
proxyBytesTransmitted = b.NewCounter("gerbil_proxy_bytes_transmitted_total",
|
||||
"Bytes sent/received by the SNI proxy", "direction")
|
||||
configReloadsTotal = b.NewCounter("gerbil_config_reloads_total",
|
||||
"Number of configuration reloads", "result")
|
||||
restartTotal = b.NewCounter("gerbil_restart_total",
|
||||
"Process restart count")
|
||||
authFailuresTotal = b.NewCounter("gerbil_auth_failures_total",
|
||||
"Count of authentication or peer validation failures", "peer", "reason")
|
||||
aclDeniedTotal = b.NewCounter("gerbil_acl_denied_total",
|
||||
"Access control denied events", "ifname", "peer", "policy")
|
||||
certificateExpiryDays = b.NewFloat64Gauge("gerbil_certificate_expiry_days",
|
||||
"Days until certificate expiry", "cert_name", "ifname")
|
||||
udpPacketsTotal = b.NewCounter("gerbil_udp_packets_total",
|
||||
"Count of UDP packets processed by relay workers", "ifname", "type", "direction")
|
||||
udpPacketSizeBytes = b.NewHistogram("gerbil_udp_packet_size_bytes",
|
||||
"Size distribution of packets forwarded through relay", sizeBuckets, "ifname", "type")
|
||||
holePunchEventsTotal = b.NewCounter("gerbil_hole_punch_events_total",
|
||||
"Count of hole punch messages processed", "ifname", "result")
|
||||
proxyMappingActive = b.NewUpDownCounter("gerbil_proxy_mapping_active",
|
||||
"Number of active proxy mappings", "ifname")
|
||||
sessionActive = b.NewUpDownCounter("gerbil_session_active",
|
||||
"Number of active WireGuard sessions", "ifname")
|
||||
sessionRebuiltTotal = b.NewCounter("gerbil_session_rebuilt_total",
|
||||
"Count of sessions rebuilt from communication patterns", "ifname")
|
||||
commPatternActive = b.NewUpDownCounter("gerbil_comm_pattern_active",
|
||||
"Number of active communication patterns", "ifname")
|
||||
proxyCleanupRemovedTotal = b.NewCounter("gerbil_proxy_cleanup_removed_total",
|
||||
"Count of items removed during cleanup routines", "ifname", "component")
|
||||
proxyConnectionErrorsTotal = b.NewCounter("gerbil_proxy_connection_errors_total",
|
||||
"Count of connection errors in proxy operations", "ifname", "error_type")
|
||||
proxyInitialMappingsTotal = b.NewInt64Gauge("gerbil_proxy_initial_mappings",
|
||||
"Number of initial proxy mappings loaded", "ifname")
|
||||
proxyMappingUpdatesTotal = b.NewCounter("gerbil_proxy_mapping_updates_total",
|
||||
"Count of proxy mapping updates", "ifname")
|
||||
proxyIdleCleanupDuration = b.NewHistogram("gerbil_proxy_idle_cleanup_duration_seconds",
|
||||
"Duration of cleanup cycles", durationBuckets, "ifname", "component")
|
||||
sniConnectionsTotal = b.NewCounter("gerbil_sni_connections_total",
|
||||
"Count of connections processed by SNI proxy", "result")
|
||||
sniConnectionDuration = b.NewHistogram("gerbil_sni_connection_duration_seconds",
|
||||
"Lifetime distribution of proxied TLS connections", sniDurationBuckets)
|
||||
sniActiveConnections = b.NewUpDownCounter("gerbil_sni_active_connections",
|
||||
"Number of active SNI tunnels")
|
||||
sniRouteCacheHitsTotal = b.NewCounter("gerbil_sni_route_cache_hits_total",
|
||||
"Count of route cache hits and misses", "result")
|
||||
sniRouteAPIRequestsTotal = b.NewCounter("gerbil_sni_route_api_requests_total",
|
||||
"Count of route API requests", "result")
|
||||
sniRouteAPILatency = b.NewHistogram("gerbil_sni_route_api_latency_seconds",
|
||||
"Distribution of route API call latencies", durationBuckets)
|
||||
sniLocalOverrideTotal = b.NewCounter("gerbil_sni_local_override_total",
|
||||
"Count of routes using local overrides", "hit")
|
||||
sniTrustedProxyEventsTotal = b.NewCounter("gerbil_sni_trusted_proxy_events_total",
|
||||
"Count of PROXY protocol events", "event")
|
||||
sniProxyProtocolParseErrorsTotal = b.NewCounter("gerbil_sni_proxy_protocol_parse_errors_total",
|
||||
"Count of PROXY protocol parse failures")
|
||||
sniDataBytesTotal = b.NewCounter("gerbil_sni_data_bytes_total",
|
||||
"Count of bytes proxied through SNI tunnels", "direction")
|
||||
sniTunnelTerminationsTotal = b.NewCounter("gerbil_sni_tunnel_terminations_total",
|
||||
"Count of tunnel terminations by reason", "reason")
|
||||
httpRequestsTotal = b.NewCounter("gerbil_http_requests_total",
|
||||
"Count of HTTP requests to management API", "endpoint", "method", "status_code")
|
||||
httpRequestDuration = b.NewHistogram("gerbil_http_request_duration_seconds",
|
||||
"Distribution of HTTP request handling time", durationBuckets, "endpoint", "method")
|
||||
peerOperationsTotal = b.NewCounter("gerbil_peer_operations_total",
|
||||
"Count of peer lifecycle operations", "operation", "result")
|
||||
proxyMappingUpdateRequestsTotal = b.NewCounter("gerbil_proxy_mapping_update_requests_total",
|
||||
"Count of proxy mapping update API calls", "result")
|
||||
destinationsUpdateRequestsTotal = b.NewCounter("gerbil_destinations_update_requests_total",
|
||||
"Count of destinations update API calls", "result")
|
||||
remoteConfigFetchesTotal = b.NewCounter("gerbil_remote_config_fetches_total",
|
||||
"Count of remote configuration fetch attempts", "result")
|
||||
bandwidthReportsTotal = b.NewCounter("gerbil_bandwidth_reports_total",
|
||||
"Count of bandwidth report transmissions", "result")
|
||||
peerBandwidthBytesTotal = b.NewCounter("gerbil_peer_bandwidth_bytes_total",
|
||||
"Bytes per peer tracked by bandwidth calculation", "peer", "direction")
|
||||
memorySpikeTotal = b.NewCounter("gerbil_memory_spike_total",
|
||||
"Count of memory spikes detected", "severity")
|
||||
heapProfilesWrittenTotal = b.NewCounter("gerbil_heap_profiles_written_total",
|
||||
"Count of heap profile files generated")
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func RecordInterfaceUp(ifname, instance string, up bool) {
|
||||
value := int64(0)
|
||||
if up {
|
||||
value = 1
|
||||
}
|
||||
wgInterfaceUp.Record(context.Background(), value, observability.Labels{"ifname": ifname, "instance": instance})
|
||||
}
|
||||
|
||||
func RecordPeersTotal(ifname string, delta int64) {
|
||||
wgPeersTotal.Add(context.Background(), delta, observability.Labels{"ifname": ifname})
|
||||
}
|
||||
|
||||
func RecordPeerConnected(ifname, peer string, connected bool) {
|
||||
value := int64(0)
|
||||
if connected {
|
||||
value = 1
|
||||
}
|
||||
wgPeerConnected.Record(context.Background(), value, observability.Labels{"ifname": ifname, "peer": peer})
|
||||
}
|
||||
|
||||
func RecordHandshake(ifname, peer, result string) {
|
||||
wgHandshakesTotal.Add(context.Background(), 1, observability.Labels{"ifname": ifname, "peer": peer, "result": result})
|
||||
}
|
||||
|
||||
func RecordHandshakeLatency(ifname, peer string, seconds float64) {
|
||||
wgHandshakeLatency.Record(context.Background(), seconds, observability.Labels{"ifname": ifname, "peer": peer})
|
||||
}
|
||||
|
||||
func RecordPeerRTT(ifname, peer string, seconds float64) {
|
||||
wgPeerRTT.Record(context.Background(), seconds, observability.Labels{"ifname": ifname, "peer": peer})
|
||||
}
|
||||
|
||||
func RecordBytesReceived(ifname, peer string, bytes int64) {
|
||||
wgBytesReceived.Add(context.Background(), bytes, observability.Labels{"ifname": ifname, "peer": peer})
|
||||
}
|
||||
|
||||
func RecordBytesTransmitted(ifname, peer string, bytes int64) {
|
||||
wgBytesTransmitted.Add(context.Background(), bytes, observability.Labels{"ifname": ifname, "peer": peer})
|
||||
}
|
||||
|
||||
func RecordAllowedIPsCount(ifname, peer string, delta int64) {
|
||||
allowedIPsCount.Add(context.Background(), delta, observability.Labels{"ifname": ifname, "peer": peer})
|
||||
}
|
||||
|
||||
func RecordKeyRotation(ifname, reason string) {
|
||||
keyRotationTotal.Add(context.Background(), 1, observability.Labels{"ifname": ifname, "reason": reason})
|
||||
}
|
||||
|
||||
func RecordNetlinkEvent(eventType string) {
|
||||
netlinkEventsTotal.Add(context.Background(), 1, observability.Labels{"event_type": eventType})
|
||||
}
|
||||
|
||||
func RecordNetlinkError(component, errorType string) {
|
||||
netlinkErrorsTotal.Add(context.Background(), 1, observability.Labels{"component": component, "error_type": errorType})
|
||||
}
|
||||
|
||||
func RecordSyncDuration(component string, seconds float64) {
|
||||
syncDuration.Record(context.Background(), seconds, observability.Labels{"component": component})
|
||||
}
|
||||
|
||||
func RecordWorkqueueDepth(queue string, delta int64) {
|
||||
workqueueDepth.Add(context.Background(), delta, observability.Labels{"queue": queue})
|
||||
}
|
||||
|
||||
func RecordKernelModuleLoad(result string) {
|
||||
kernelModuleLoads.Add(context.Background(), 1, observability.Labels{"result": result})
|
||||
}
|
||||
|
||||
func RecordFirewallRuleApplied(result, chain string) {
|
||||
firewallRulesApplied.Add(context.Background(), 1, observability.Labels{"result": result, "chain": chain})
|
||||
}
|
||||
|
||||
func RecordActiveSession(ifname string, delta int64) {
|
||||
activeSessions.Add(context.Background(), delta, observability.Labels{"ifname": ifname})
|
||||
}
|
||||
|
||||
func RecordActiveProxyConnection(hostname string, delta int64) {
|
||||
_ = hostname
|
||||
activeProxyConnections.Add(context.Background(), delta, nil)
|
||||
}
|
||||
|
||||
func RecordProxyRouteLookup(result, hostname string) {
|
||||
_ = hostname
|
||||
proxyRouteLookups.Add(context.Background(), 1, observability.Labels{"result": result})
|
||||
}
|
||||
|
||||
func RecordProxyTLSHandshake(hostname string, seconds float64) {
|
||||
_ = hostname
|
||||
proxyTLSHandshake.Record(context.Background(), seconds, nil)
|
||||
}
|
||||
|
||||
func RecordProxyBytesTransmitted(hostname, direction string, bytes int64) {
|
||||
_ = hostname
|
||||
proxyBytesTransmitted.Add(context.Background(), bytes, observability.Labels{"direction": direction})
|
||||
}
|
||||
|
||||
func RecordConfigReload(result string) {
|
||||
configReloadsTotal.Add(context.Background(), 1, observability.Labels{"result": result})
|
||||
}
|
||||
|
||||
func RecordRestart() {
|
||||
restartTotal.Add(context.Background(), 1, nil)
|
||||
}
|
||||
|
||||
func RecordAuthFailure(peer, reason string) {
|
||||
authFailuresTotal.Add(context.Background(), 1, observability.Labels{"peer": peer, "reason": reason})
|
||||
}
|
||||
|
||||
func RecordACLDenied(ifname, peer, policy string) {
|
||||
aclDeniedTotal.Add(context.Background(), 1, observability.Labels{"ifname": ifname, "peer": peer, "policy": policy})
|
||||
}
|
||||
|
||||
func RecordCertificateExpiry(certName, ifname string, days float64) {
|
||||
certificateExpiryDays.Record(context.Background(), days, observability.Labels{"cert_name": certName, "ifname": ifname})
|
||||
}
|
||||
|
||||
func RecordUDPPacket(ifname, packetType, direction string) {
|
||||
udpPacketsTotal.Add(context.Background(), 1, observability.Labels{"ifname": ifname, "type": packetType, "direction": direction})
|
||||
}
|
||||
|
||||
func RecordUDPPacketSize(ifname, packetType string, bytes float64) {
|
||||
udpPacketSizeBytes.Record(context.Background(), bytes, observability.Labels{"ifname": ifname, "type": packetType})
|
||||
}
|
||||
|
||||
func RecordHolePunchEvent(ifname, result string) {
|
||||
holePunchEventsTotal.Add(context.Background(), 1, observability.Labels{"ifname": ifname, "result": result})
|
||||
}
|
||||
|
||||
func RecordProxyMapping(ifname string, delta int64) {
|
||||
proxyMappingActive.Add(context.Background(), delta, observability.Labels{"ifname": ifname})
|
||||
}
|
||||
|
||||
func RecordSession(ifname string, delta int64) {
|
||||
sessionActive.Add(context.Background(), delta, observability.Labels{"ifname": ifname})
|
||||
}
|
||||
|
||||
func RecordSessionRebuilt(ifname string) {
|
||||
sessionRebuiltTotal.Add(context.Background(), 1, observability.Labels{"ifname": ifname})
|
||||
}
|
||||
|
||||
func RecordCommPattern(ifname string, delta int64) {
|
||||
commPatternActive.Add(context.Background(), delta, observability.Labels{"ifname": ifname})
|
||||
}
|
||||
|
||||
func RecordProxyCleanupRemoved(ifname, component string, count int64) {
|
||||
proxyCleanupRemovedTotal.Add(context.Background(), count, observability.Labels{"ifname": ifname, "component": component})
|
||||
}
|
||||
|
||||
func RecordProxyConnectionError(ifname, errorType string) {
|
||||
proxyConnectionErrorsTotal.Add(context.Background(), 1, observability.Labels{"ifname": ifname, "error_type": errorType})
|
||||
}
|
||||
|
||||
func RecordProxyInitialMappings(ifname string, count int64) {
|
||||
proxyInitialMappingsTotal.Record(context.Background(), count, observability.Labels{"ifname": ifname})
|
||||
}
|
||||
|
||||
func RecordProxyMappingUpdate(ifname string) {
|
||||
proxyMappingUpdatesTotal.Add(context.Background(), 1, observability.Labels{"ifname": ifname})
|
||||
}
|
||||
|
||||
func RecordProxyIdleCleanupDuration(ifname, component string, seconds float64) {
|
||||
proxyIdleCleanupDuration.Record(context.Background(), seconds, observability.Labels{"ifname": ifname, "component": component})
|
||||
}
|
||||
|
||||
func RecordSNIConnection(result string) {
|
||||
sniConnectionsTotal.Add(context.Background(), 1, observability.Labels{"result": result})
|
||||
}
|
||||
|
||||
func RecordSNIConnectionDuration(seconds float64) {
|
||||
sniConnectionDuration.Record(context.Background(), seconds, nil)
|
||||
}
|
||||
|
||||
func RecordSNIActiveConnection(delta int64) {
|
||||
sniActiveConnections.Add(context.Background(), delta, nil)
|
||||
}
|
||||
|
||||
func RecordSNIRouteCacheHit(result string) {
|
||||
sniRouteCacheHitsTotal.Add(context.Background(), 1, observability.Labels{"result": result})
|
||||
}
|
||||
|
||||
func RecordSNIRouteAPIRequest(result string) {
|
||||
sniRouteAPIRequestsTotal.Add(context.Background(), 1, observability.Labels{"result": result})
|
||||
}
|
||||
|
||||
func RecordSNIRouteAPILatency(seconds float64) {
|
||||
sniRouteAPILatency.Record(context.Background(), seconds, nil)
|
||||
}
|
||||
|
||||
func RecordSNILocalOverride(hit string) {
|
||||
sniLocalOverrideTotal.Add(context.Background(), 1, observability.Labels{"hit": hit})
|
||||
}
|
||||
|
||||
func RecordSNITrustedProxyEvent(event string) {
|
||||
sniTrustedProxyEventsTotal.Add(context.Background(), 1, observability.Labels{"event": event})
|
||||
}
|
||||
|
||||
func RecordSNIProxyProtocolParseError() {
|
||||
sniProxyProtocolParseErrorsTotal.Add(context.Background(), 1, nil)
|
||||
}
|
||||
|
||||
func RecordSNIDataBytes(direction string, bytes int64) {
|
||||
sniDataBytesTotal.Add(context.Background(), bytes, observability.Labels{"direction": direction})
|
||||
}
|
||||
|
||||
func RecordSNITunnelTermination(reason string) {
|
||||
sniTunnelTerminationsTotal.Add(context.Background(), 1, observability.Labels{"reason": reason})
|
||||
}
|
||||
|
||||
func RecordHTTPRequest(endpoint, method, statusCode string) {
|
||||
httpRequestsTotal.Add(context.Background(), 1, observability.Labels{"endpoint": endpoint, "method": method, "status_code": statusCode})
|
||||
}
|
||||
|
||||
func RecordHTTPRequestDuration(endpoint, method string, seconds float64) {
|
||||
httpRequestDuration.Record(context.Background(), seconds, observability.Labels{"endpoint": endpoint, "method": method})
|
||||
}
|
||||
|
||||
func RecordPeerOperation(operation, result string) {
|
||||
peerOperationsTotal.Add(context.Background(), 1, observability.Labels{"operation": operation, "result": result})
|
||||
}
|
||||
|
||||
func RecordProxyMappingUpdateRequest(result string) {
|
||||
proxyMappingUpdateRequestsTotal.Add(context.Background(), 1, observability.Labels{"result": result})
|
||||
}
|
||||
|
||||
func RecordDestinationsUpdateRequest(result string) {
|
||||
destinationsUpdateRequestsTotal.Add(context.Background(), 1, observability.Labels{"result": result})
|
||||
}
|
||||
|
||||
func RecordRemoteConfigFetch(result string) {
|
||||
remoteConfigFetchesTotal.Add(context.Background(), 1, observability.Labels{"result": result})
|
||||
}
|
||||
|
||||
func RecordBandwidthReport(result string) {
|
||||
bandwidthReportsTotal.Add(context.Background(), 1, observability.Labels{"result": result})
|
||||
}
|
||||
|
||||
func RecordPeerBandwidthBytes(peer, direction string, bytes int64) {
|
||||
peerBandwidthBytesTotal.Add(context.Background(), bytes, observability.Labels{"peer": peer, "direction": direction})
|
||||
}
|
||||
|
||||
func RecordMemorySpike(severity string) {
|
||||
memorySpikeTotal.Add(context.Background(), 1, observability.Labels{"severity": severity})
|
||||
}
|
||||
|
||||
func RecordHeapProfileWritten() {
|
||||
heapProfilesWrittenTotal.Add(context.Background(), 1, nil)
|
||||
}
|
||||
258
internal/metrics/metrics_test.go
Normal file
258
internal/metrics/metrics_test.go
Normal file
@@ -0,0 +1,258 @@
|
||||
package metrics_test
|
||||
|
||||
import (
|
||||
"context"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/fosrl/gerbil/internal/metrics"
|
||||
"github.com/fosrl/gerbil/internal/observability"
|
||||
)
|
||||
|
||||
const exampleHostname = "example.com"
|
||||
|
||||
func initPrometheus(t *testing.T) http.Handler {
|
||||
t.Helper()
|
||||
cfg := metrics.DefaultConfig()
|
||||
cfg.Enabled = true
|
||||
cfg.Backend = "prometheus"
|
||||
cfg.Prometheus.Path = "/metrics"
|
||||
|
||||
h, err := metrics.Initialize(cfg)
|
||||
if err != nil {
|
||||
t.Fatalf("Initialize failed: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
metrics.Shutdown(context.Background()) //nolint:errcheck
|
||||
})
|
||||
return h
|
||||
}
|
||||
|
||||
func initNoop(t *testing.T) {
|
||||
t.Helper()
|
||||
cfg := metrics.DefaultConfig()
|
||||
cfg.Enabled = false
|
||||
_, err := metrics.Initialize(cfg)
|
||||
if err != nil {
|
||||
t.Fatalf("Initialize noop failed: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
metrics.Shutdown(context.Background()) //nolint:errcheck
|
||||
})
|
||||
}
|
||||
|
||||
func scrape(t *testing.T, h http.Handler) string {
|
||||
t.Helper()
|
||||
req := httptest.NewRequest(http.MethodGet, "/metrics", http.NoBody)
|
||||
rr := httptest.NewRecorder()
|
||||
h.ServeHTTP(rr, req)
|
||||
if rr.Code != http.StatusOK {
|
||||
t.Fatalf("scrape returned %d", rr.Code)
|
||||
}
|
||||
b, _ := io.ReadAll(rr.Body)
|
||||
return string(b)
|
||||
}
|
||||
|
||||
func assertContains(t *testing.T, body, substr string) {
|
||||
t.Helper()
|
||||
if !strings.Contains(body, substr) {
|
||||
t.Errorf("expected %q in output\nbody:\n%s", substr, body)
|
||||
}
|
||||
}
|
||||
|
||||
// --- Tests ---
|
||||
|
||||
func TestInitializePrometheus(t *testing.T) {
|
||||
h := initPrometheus(t)
|
||||
if h == nil {
|
||||
t.Error("expected non-nil HTTP handler for prometheus backend")
|
||||
}
|
||||
}
|
||||
|
||||
func TestInitializeNoop(t *testing.T) {
|
||||
initNoop(t)
|
||||
// All Record* functions must not panic when noop backend is active.
|
||||
metrics.RecordRestart()
|
||||
metrics.RecordHTTPRequest("/test", "GET", "200")
|
||||
metrics.RecordSNIConnection("accepted")
|
||||
metrics.RecordPeersTotal("wg0", 1)
|
||||
}
|
||||
|
||||
func TestDefaultConfig(t *testing.T) {
|
||||
cfg := metrics.DefaultConfig()
|
||||
if cfg.Backend != "prometheus" {
|
||||
t.Errorf("expected prometheus default backend, got %q", cfg.Backend)
|
||||
}
|
||||
}
|
||||
|
||||
func TestShutdownNoInit(t *testing.T) {
|
||||
// Shutdown without Initialize should not panic or error.
|
||||
if err := metrics.Shutdown(context.Background()); err != nil {
|
||||
t.Errorf("unexpected error: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRecordHTTPRequest(t *testing.T) {
|
||||
h := initPrometheus(t)
|
||||
metrics.RecordHTTPRequest("/peers", "POST", "201")
|
||||
body := scrape(t, h)
|
||||
assertContains(t, body, "gerbil_http_requests_total")
|
||||
}
|
||||
|
||||
func TestRecordHTTPRequestDuration(t *testing.T) {
|
||||
h := initPrometheus(t)
|
||||
metrics.RecordHTTPRequestDuration("/peers", "POST", 0.05)
|
||||
body := scrape(t, h)
|
||||
assertContains(t, body, "gerbil_http_request_duration_seconds")
|
||||
}
|
||||
|
||||
func TestRecordInterfaceUp(t *testing.T) {
|
||||
h := initPrometheus(t)
|
||||
metrics.RecordInterfaceUp("wg0", "host1", true)
|
||||
metrics.RecordInterfaceUp("wg0", "host1", false)
|
||||
body := scrape(t, h)
|
||||
assertContains(t, body, "gerbil_wg_interface_up")
|
||||
}
|
||||
|
||||
func TestRecordPeersTotal(t *testing.T) {
|
||||
h := initPrometheus(t)
|
||||
metrics.RecordPeersTotal("wg0", 3)
|
||||
body := scrape(t, h)
|
||||
assertContains(t, body, "gerbil_wg_peers_total")
|
||||
}
|
||||
|
||||
func TestRecordBytesReceivedTransmitted(t *testing.T) {
|
||||
h := initPrometheus(t)
|
||||
metrics.RecordBytesReceived("wg0", "peer1", 1024)
|
||||
metrics.RecordBytesTransmitted("wg0", "peer1", 512)
|
||||
body := scrape(t, h)
|
||||
assertContains(t, body, "gerbil_wg_bytes_received_total")
|
||||
assertContains(t, body, "gerbil_wg_bytes_transmitted_total")
|
||||
}
|
||||
|
||||
func TestRecordSNI(t *testing.T) {
|
||||
h := initPrometheus(t)
|
||||
metrics.RecordSNIConnection("accepted")
|
||||
metrics.RecordSNIActiveConnection(1)
|
||||
metrics.RecordSNIConnectionDuration(1.5)
|
||||
metrics.RecordSNIRouteCacheHit("hit")
|
||||
metrics.RecordSNIRouteAPIRequest("success")
|
||||
metrics.RecordSNIRouteAPILatency(0.01)
|
||||
metrics.RecordSNILocalOverride("yes")
|
||||
metrics.RecordSNITrustedProxyEvent("proxy_protocol_parsed")
|
||||
metrics.RecordSNIProxyProtocolParseError()
|
||||
metrics.RecordSNIDataBytes("client_to_target", 2048)
|
||||
metrics.RecordSNITunnelTermination("eof")
|
||||
body := scrape(t, h)
|
||||
assertContains(t, body, "gerbil_sni_connections_total")
|
||||
assertContains(t, body, "gerbil_sni_active_connections")
|
||||
}
|
||||
|
||||
func TestRecordRelay(t *testing.T) {
|
||||
h := initPrometheus(t)
|
||||
metrics.RecordUDPPacket("relay", "data", "in")
|
||||
metrics.RecordUDPPacketSize("relay", "data", 256)
|
||||
metrics.RecordHolePunchEvent("relay", "success")
|
||||
metrics.RecordProxyMapping("relay", 1)
|
||||
metrics.RecordSession("relay", 1)
|
||||
metrics.RecordSessionRebuilt("relay")
|
||||
metrics.RecordCommPattern("relay", 1)
|
||||
metrics.RecordProxyCleanupRemoved("relay", "session", 2)
|
||||
metrics.RecordProxyConnectionError("relay", "dial_udp")
|
||||
metrics.RecordProxyInitialMappings("relay", 5)
|
||||
metrics.RecordProxyMappingUpdate("relay")
|
||||
metrics.RecordProxyIdleCleanupDuration("relay", "conn", 0.1)
|
||||
body := scrape(t, h)
|
||||
assertContains(t, body, "gerbil_udp_packets_total")
|
||||
assertContains(t, body, "gerbil_proxy_mapping_active")
|
||||
}
|
||||
|
||||
func TestRecordWireGuard(t *testing.T) {
|
||||
h := initPrometheus(t)
|
||||
metrics.RecordHandshake("wg0", "peer1", "success")
|
||||
metrics.RecordHandshakeLatency("wg0", "peer1", 0.02)
|
||||
metrics.RecordPeerRTT("wg0", "peer1", 0.005)
|
||||
metrics.RecordPeerConnected("wg0", "peer1", true)
|
||||
metrics.RecordAllowedIPsCount("wg0", "peer1", 2)
|
||||
metrics.RecordKeyRotation("wg0", "scheduled")
|
||||
body := scrape(t, h)
|
||||
assertContains(t, body, "gerbil_wg_handshakes_total")
|
||||
assertContains(t, body, "gerbil_wg_peer_connected")
|
||||
}
|
||||
|
||||
func TestRecordHousekeeping(t *testing.T) {
|
||||
h := initPrometheus(t)
|
||||
metrics.RecordRemoteConfigFetch("success")
|
||||
metrics.RecordBandwidthReport("success")
|
||||
metrics.RecordPeerBandwidthBytes("peer1", "rx", 512)
|
||||
metrics.RecordMemorySpike("warning")
|
||||
metrics.RecordHeapProfileWritten()
|
||||
body := scrape(t, h)
|
||||
assertContains(t, body, "gerbil_remote_config_fetches_total")
|
||||
assertContains(t, body, "gerbil_memory_spike_total")
|
||||
}
|
||||
|
||||
func TestRecordOperational(t *testing.T) {
|
||||
h := initPrometheus(t)
|
||||
metrics.RecordConfigReload("success")
|
||||
metrics.RecordRestart()
|
||||
metrics.RecordAuthFailure("peer1", "bad_key")
|
||||
metrics.RecordACLDenied("wg0", "peer1", "default-deny")
|
||||
metrics.RecordCertificateExpiry(exampleHostname, "wg0", 90.0)
|
||||
body := scrape(t, h)
|
||||
assertContains(t, body, "gerbil_config_reloads_total")
|
||||
assertContains(t, body, "gerbil_restart_total")
|
||||
}
|
||||
|
||||
func TestRecordNetlink(t *testing.T) {
|
||||
h := initPrometheus(t)
|
||||
metrics.RecordNetlinkEvent("link_up")
|
||||
metrics.RecordNetlinkError("wg", "timeout")
|
||||
metrics.RecordSyncDuration("config", 0.1)
|
||||
metrics.RecordWorkqueueDepth("main", 3)
|
||||
metrics.RecordKernelModuleLoad("success")
|
||||
metrics.RecordFirewallRuleApplied("success", "INPUT")
|
||||
metrics.RecordActiveSession("wg0", 1)
|
||||
metrics.RecordActiveProxyConnection(exampleHostname, 1)
|
||||
metrics.RecordProxyRouteLookup("hit", exampleHostname)
|
||||
metrics.RecordProxyTLSHandshake(exampleHostname, 0.05)
|
||||
metrics.RecordProxyBytesTransmitted(exampleHostname, "tx", 1024)
|
||||
body := scrape(t, h)
|
||||
assertContains(t, body, "gerbil_netlink_events_total")
|
||||
assertContains(t, body, "gerbil_active_sessions")
|
||||
}
|
||||
|
||||
func TestRecordPeerOperation(t *testing.T) {
|
||||
h := initPrometheus(t)
|
||||
metrics.RecordPeerOperation("add", "success")
|
||||
metrics.RecordProxyMappingUpdateRequest("success")
|
||||
metrics.RecordDestinationsUpdateRequest("success")
|
||||
body := scrape(t, h)
|
||||
assertContains(t, body, "gerbil_peer_operations_total")
|
||||
}
|
||||
|
||||
func TestInitializeInvalidBackend(t *testing.T) {
|
||||
cfg := observability.MetricsConfig{Enabled: true, Backend: "invalid"}
|
||||
_, err := metrics.Initialize(cfg)
|
||||
if err == nil {
|
||||
t.Error("expected error for invalid backend")
|
||||
}
|
||||
}
|
||||
|
||||
func TestInitializeBackendNone(t *testing.T) {
|
||||
cfg := metrics.DefaultConfig()
|
||||
cfg.Backend = "none"
|
||||
h, err := metrics.Initialize(cfg)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if h != nil {
|
||||
t.Error("none backend should return nil handler")
|
||||
}
|
||||
// All Record* calls should be noop
|
||||
metrics.RecordRestart()
|
||||
metrics.Shutdown(context.Background()) //nolint:errcheck
|
||||
}
|
||||
119
internal/observability/config.go
Normal file
119
internal/observability/config.go
Normal file
@@ -0,0 +1,119 @@
|
||||
// Package observability provides a backend-neutral metrics abstraction for Gerbil.
|
||||
//
|
||||
// Exactly one metrics backend may be enabled at runtime:
|
||||
// - "prometheus" – native Prometheus client; exposes /metrics (no OTel SDK required)
|
||||
// - "otel" – OpenTelemetry metrics pushed via OTLP (gRPC or HTTP)
|
||||
// - "none" – metrics disabled; a safe noop implementation is used
|
||||
//
|
||||
// Future OTel tracing and logging can be added to this package alongside the
|
||||
// existing otel sub-package without touching the Prometheus-native path.
|
||||
package observability
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
)
|
||||
|
||||
// MetricsConfig is the top-level metrics configuration.
|
||||
type MetricsConfig struct {
|
||||
// Enabled controls whether any metrics backend is started.
|
||||
// When false the noop backend is used regardless of Backend.
|
||||
Enabled bool
|
||||
|
||||
// Backend selects the active backend: "prometheus", "otel", or "none".
|
||||
Backend string
|
||||
|
||||
// Prometheus holds settings used only by the Prometheus-native backend.
|
||||
Prometheus PrometheusConfig
|
||||
|
||||
// OTel holds settings used only by the OTel backend.
|
||||
OTel OTelConfig
|
||||
|
||||
// ServiceName is propagated to OTel resource attributes.
|
||||
ServiceName string
|
||||
|
||||
// ServiceVersion is propagated to OTel resource attributes.
|
||||
ServiceVersion string
|
||||
|
||||
// DeploymentEnvironment is an optional OTel resource attribute.
|
||||
DeploymentEnvironment string
|
||||
}
|
||||
|
||||
// PrometheusConfig holds Prometheus-native backend settings.
|
||||
type PrometheusConfig struct {
|
||||
// Path is the HTTP path to expose the /metrics endpoint.
|
||||
// Defaults to "/metrics".
|
||||
Path string
|
||||
}
|
||||
|
||||
// OTelConfig holds OpenTelemetry backend settings.
|
||||
type OTelConfig struct {
|
||||
// Protocol is the OTLP transport: "grpc" (default) or "http".
|
||||
Protocol string
|
||||
|
||||
// Endpoint is the OTLP collector address (e.g. "localhost:4317").
|
||||
Endpoint string
|
||||
|
||||
// Insecure disables TLS for the OTLP connection.
|
||||
Insecure bool
|
||||
|
||||
// ExportInterval is how often metrics are pushed to the collector.
|
||||
// Defaults to 60 s.
|
||||
ExportInterval time.Duration
|
||||
}
|
||||
|
||||
// DefaultMetricsConfig returns a MetricsConfig with sensible defaults.
|
||||
func DefaultMetricsConfig() MetricsConfig {
|
||||
return MetricsConfig{
|
||||
Enabled: true,
|
||||
Backend: "prometheus",
|
||||
Prometheus: PrometheusConfig{
|
||||
Path: "/metrics",
|
||||
},
|
||||
OTel: OTelConfig{
|
||||
Protocol: "grpc",
|
||||
Endpoint: "localhost:4317",
|
||||
Insecure: true,
|
||||
ExportInterval: 60 * time.Second,
|
||||
},
|
||||
ServiceName: "gerbil",
|
||||
ServiceVersion: "1.0.0",
|
||||
}
|
||||
}
|
||||
|
||||
// Validate checks the configuration for logical errors.
|
||||
func (c *MetricsConfig) Validate() error {
|
||||
if !c.Enabled {
|
||||
return nil
|
||||
}
|
||||
|
||||
switch c.Backend {
|
||||
case "prometheus", "none", "":
|
||||
// valid
|
||||
case "otel":
|
||||
if c.OTel.Endpoint == "" {
|
||||
return fmt.Errorf("metrics: backend=otel requires a non-empty OTel endpoint")
|
||||
}
|
||||
if c.OTel.Protocol != "grpc" && c.OTel.Protocol != "http" {
|
||||
return fmt.Errorf("metrics: otel protocol must be \"grpc\" or \"http\", got %q", c.OTel.Protocol)
|
||||
}
|
||||
if c.OTel.ExportInterval <= 0 {
|
||||
return fmt.Errorf("metrics: otel export interval must be positive")
|
||||
}
|
||||
default:
|
||||
return fmt.Errorf("metrics: unknown backend %q (must be \"prometheus\", \"otel\", or \"none\")", c.Backend)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// effectiveBackend resolves the backend string, treating "" and "none" as noop.
|
||||
func (c *MetricsConfig) effectiveBackend() string {
|
||||
if !c.Enabled {
|
||||
return "none"
|
||||
}
|
||||
if c.Backend == "" {
|
||||
return "none"
|
||||
}
|
||||
return c.Backend
|
||||
}
|
||||
152
internal/observability/metrics.go
Normal file
152
internal/observability/metrics.go
Normal file
@@ -0,0 +1,152 @@
|
||||
package observability
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net/http"
|
||||
|
||||
obsotel "github.com/fosrl/gerbil/internal/observability/otel"
|
||||
obsprom "github.com/fosrl/gerbil/internal/observability/prometheus"
|
||||
)
|
||||
|
||||
// Labels is a set of key-value pairs attached to a metric observation.
|
||||
// Use only stable, bounded-cardinality label values.
|
||||
type Labels = map[string]string
|
||||
|
||||
// Counter is a monotonically increasing instrument.
|
||||
type Counter interface {
|
||||
Add(ctx context.Context, value int64, labels Labels)
|
||||
}
|
||||
|
||||
// UpDownCounter is a bidirectional integer instrument (can go up or down).
|
||||
type UpDownCounter interface {
|
||||
Add(ctx context.Context, value int64, labels Labels)
|
||||
}
|
||||
|
||||
// Int64Gauge records a snapshot integer value.
|
||||
type Int64Gauge interface {
|
||||
Record(ctx context.Context, value int64, labels Labels)
|
||||
}
|
||||
|
||||
// Float64Gauge records a snapshot float value.
|
||||
type Float64Gauge interface {
|
||||
Record(ctx context.Context, value float64, labels Labels)
|
||||
}
|
||||
|
||||
// Histogram records a distribution of values.
|
||||
type Histogram interface {
|
||||
Record(ctx context.Context, value float64, labels Labels)
|
||||
}
|
||||
|
||||
// Backend is the single interface that each metrics implementation must satisfy.
|
||||
// Application code must not import backend-specific packages (prometheus, otel).
|
||||
type Backend interface {
|
||||
// NewCounter creates a counter metric.
|
||||
// labelNames declares the set of label keys that will be passed at observation time.
|
||||
NewCounter(name, desc string, labelNames ...string) Counter
|
||||
|
||||
// NewUpDownCounter creates an up-down counter metric.
|
||||
NewUpDownCounter(name, desc string, labelNames ...string) UpDownCounter
|
||||
|
||||
// NewInt64Gauge creates an integer gauge metric.
|
||||
NewInt64Gauge(name, desc string, labelNames ...string) Int64Gauge
|
||||
|
||||
// NewFloat64Gauge creates a float gauge metric.
|
||||
NewFloat64Gauge(name, desc string, labelNames ...string) Float64Gauge
|
||||
|
||||
// NewHistogram creates a histogram metric.
|
||||
// buckets are the explicit upper-bound bucket boundaries.
|
||||
NewHistogram(name, desc string, buckets []float64, labelNames ...string) Histogram
|
||||
|
||||
// HTTPHandler returns the /metrics HTTP handler.
|
||||
// Implementations that do not expose an HTTP endpoint return nil.
|
||||
HTTPHandler() http.Handler
|
||||
|
||||
// Shutdown performs a graceful flush / shutdown of the backend.
|
||||
Shutdown(ctx context.Context) error
|
||||
}
|
||||
|
||||
// New creates the backend selected by cfg and returns it.
|
||||
// Exactly one backend is created; the selection is mutually exclusive.
|
||||
func New(cfg MetricsConfig) (Backend, error) {
|
||||
if err := cfg.Validate(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
switch cfg.effectiveBackend() {
|
||||
case "prometheus":
|
||||
b, err := obsprom.New(obsprom.Config{
|
||||
Path: cfg.Prometheus.Path,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &promAdapter{b: b}, nil
|
||||
case "otel":
|
||||
b, err := obsotel.New(obsotel.Config{
|
||||
Protocol: cfg.OTel.Protocol,
|
||||
Endpoint: cfg.OTel.Endpoint,
|
||||
Insecure: cfg.OTel.Insecure,
|
||||
ExportInterval: cfg.OTel.ExportInterval,
|
||||
ServiceName: cfg.ServiceName,
|
||||
ServiceVersion: cfg.ServiceVersion,
|
||||
DeploymentEnvironment: cfg.DeploymentEnvironment,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &otelAdapter{b: b}, nil
|
||||
case "none":
|
||||
return &NoopBackend{}, nil
|
||||
default:
|
||||
return nil, fmt.Errorf("observability: unknown backend %q", cfg.effectiveBackend())
|
||||
}
|
||||
}
|
||||
|
||||
// promAdapter wraps obsprom.Backend to implement the observability.Backend interface.
|
||||
// The concrete instrument types from the prometheus sub-package satisfy the instrument
|
||||
// interfaces via Go's structural (duck) typing without importing this package.
|
||||
type promAdapter struct {
|
||||
b *obsprom.Backend
|
||||
}
|
||||
|
||||
func (a *promAdapter) NewCounter(name, desc string, labelNames ...string) Counter {
|
||||
return a.b.NewCounter(name, desc, labelNames...)
|
||||
}
|
||||
func (a *promAdapter) NewUpDownCounter(name, desc string, labelNames ...string) UpDownCounter {
|
||||
return a.b.NewUpDownCounter(name, desc, labelNames...)
|
||||
}
|
||||
func (a *promAdapter) NewInt64Gauge(name, desc string, labelNames ...string) Int64Gauge {
|
||||
return a.b.NewInt64Gauge(name, desc, labelNames...)
|
||||
}
|
||||
func (a *promAdapter) NewFloat64Gauge(name, desc string, labelNames ...string) Float64Gauge {
|
||||
return a.b.NewFloat64Gauge(name, desc, labelNames...)
|
||||
}
|
||||
func (a *promAdapter) NewHistogram(name, desc string, buckets []float64, labelNames ...string) Histogram {
|
||||
return a.b.NewHistogram(name, desc, buckets, labelNames...)
|
||||
}
|
||||
func (a *promAdapter) HTTPHandler() http.Handler { return a.b.HTTPHandler() }
|
||||
func (a *promAdapter) Shutdown(ctx context.Context) error { return a.b.Shutdown(ctx) }
|
||||
|
||||
// otelAdapter wraps obsotel.Backend to implement the observability.Backend interface.
|
||||
type otelAdapter struct {
|
||||
b *obsotel.Backend
|
||||
}
|
||||
|
||||
func (a *otelAdapter) NewCounter(name, desc string, labelNames ...string) Counter {
|
||||
return a.b.NewCounter(name, desc, labelNames...)
|
||||
}
|
||||
func (a *otelAdapter) NewUpDownCounter(name, desc string, labelNames ...string) UpDownCounter {
|
||||
return a.b.NewUpDownCounter(name, desc, labelNames...)
|
||||
}
|
||||
func (a *otelAdapter) NewInt64Gauge(name, desc string, labelNames ...string) Int64Gauge {
|
||||
return a.b.NewInt64Gauge(name, desc, labelNames...)
|
||||
}
|
||||
func (a *otelAdapter) NewFloat64Gauge(name, desc string, labelNames ...string) Float64Gauge {
|
||||
return a.b.NewFloat64Gauge(name, desc, labelNames...)
|
||||
}
|
||||
func (a *otelAdapter) NewHistogram(name, desc string, buckets []float64, labelNames ...string) Histogram {
|
||||
return a.b.NewHistogram(name, desc, buckets, labelNames...)
|
||||
}
|
||||
func (a *otelAdapter) HTTPHandler() http.Handler { return a.b.HTTPHandler() }
|
||||
func (a *otelAdapter) Shutdown(ctx context.Context) error { return a.b.Shutdown(ctx) }
|
||||
198
internal/observability/metrics_test.go
Normal file
198
internal/observability/metrics_test.go
Normal file
@@ -0,0 +1,198 @@
|
||||
package observability_test
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/fosrl/gerbil/internal/observability"
|
||||
)
|
||||
|
||||
const (
|
||||
defaultMetricsPath = "/metrics"
|
||||
otelGRPCEndpoint = "localhost:4317"
|
||||
errUnexpectedFmt = "unexpected error: %v"
|
||||
)
|
||||
|
||||
func TestDefaultMetricsConfig(t *testing.T) {
|
||||
cfg := observability.DefaultMetricsConfig()
|
||||
if !cfg.Enabled {
|
||||
t.Error("default config should have Enabled=true")
|
||||
}
|
||||
if cfg.Backend != "prometheus" {
|
||||
t.Errorf("default backend should be prometheus, got %q", cfg.Backend)
|
||||
}
|
||||
if cfg.Prometheus.Path != defaultMetricsPath {
|
||||
t.Errorf("default prometheus path should be %s, got %q", defaultMetricsPath, cfg.Prometheus.Path)
|
||||
}
|
||||
if cfg.OTel.Protocol != "grpc" {
|
||||
t.Errorf("default otel protocol should be grpc, got %q", cfg.OTel.Protocol)
|
||||
}
|
||||
if cfg.OTel.ExportInterval != 60*time.Second {
|
||||
t.Errorf("default otel export interval should be 60s, got %v", cfg.OTel.ExportInterval)
|
||||
}
|
||||
}
|
||||
func TestValidateValidConfigs(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
cfg observability.MetricsConfig
|
||||
}{
|
||||
{name: "disabled", cfg: observability.MetricsConfig{Enabled: false}},
|
||||
{name: "backend none", cfg: observability.MetricsConfig{Enabled: true, Backend: "none"}},
|
||||
{name: "backend empty", cfg: observability.MetricsConfig{Enabled: true, Backend: ""}},
|
||||
{name: "prometheus", cfg: observability.MetricsConfig{Enabled: true, Backend: "prometheus"}},
|
||||
{
|
||||
name: "otel grpc",
|
||||
cfg: observability.MetricsConfig{
|
||||
Enabled: true, Backend: "otel",
|
||||
OTel: observability.OTelConfig{Protocol: "grpc", Endpoint: otelGRPCEndpoint, ExportInterval: 10 * time.Second},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "otel http",
|
||||
cfg: observability.MetricsConfig{
|
||||
Enabled: true, Backend: "otel",
|
||||
OTel: observability.OTelConfig{Protocol: "http", Endpoint: "localhost:4318", ExportInterval: 30 * time.Second},
|
||||
},
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if err := tt.cfg.Validate(); err != nil {
|
||||
t.Errorf("unexpected validation error: %v", err)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidateInvalidConfigs(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
cfg observability.MetricsConfig
|
||||
}{
|
||||
{name: "unknown backend", cfg: observability.MetricsConfig{Enabled: true, Backend: "datadog"}},
|
||||
{
|
||||
name: "otel missing endpoint",
|
||||
cfg: observability.MetricsConfig{
|
||||
Enabled: true, Backend: "otel",
|
||||
OTel: observability.OTelConfig{Protocol: "grpc", Endpoint: "", ExportInterval: 10 * time.Second},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "otel invalid protocol",
|
||||
cfg: observability.MetricsConfig{
|
||||
Enabled: true, Backend: "otel",
|
||||
OTel: observability.OTelConfig{Protocol: "tcp", Endpoint: otelGRPCEndpoint, ExportInterval: 10 * time.Second},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "otel zero interval",
|
||||
cfg: observability.MetricsConfig{
|
||||
Enabled: true, Backend: "otel",
|
||||
OTel: observability.OTelConfig{Protocol: "grpc", Endpoint: otelGRPCEndpoint, ExportInterval: 0},
|
||||
},
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if err := tt.cfg.Validate(); err == nil {
|
||||
t.Error("expected validation error but got nil")
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestNewNoopBackend(t *testing.T) {
|
||||
b, err := observability.New(observability.MetricsConfig{Enabled: false})
|
||||
if err != nil {
|
||||
t.Fatalf(errUnexpectedFmt, err)
|
||||
}
|
||||
if b.HTTPHandler() != nil {
|
||||
t.Error("noop backend HTTPHandler should return nil")
|
||||
}
|
||||
}
|
||||
|
||||
func TestNewNoneBackend(t *testing.T) {
|
||||
b, err := observability.New(observability.MetricsConfig{Enabled: true, Backend: "none"})
|
||||
if err != nil {
|
||||
t.Fatalf(errUnexpectedFmt, err)
|
||||
}
|
||||
if b.HTTPHandler() != nil {
|
||||
t.Error("none backend HTTPHandler should return nil")
|
||||
}
|
||||
}
|
||||
|
||||
func TestNewPrometheusBackend(t *testing.T) {
|
||||
cfg := observability.MetricsConfig{
|
||||
Enabled: true, Backend: "prometheus",
|
||||
Prometheus: observability.PrometheusConfig{Path: defaultMetricsPath},
|
||||
}
|
||||
b, err := observability.New(cfg)
|
||||
if err != nil {
|
||||
t.Fatalf(errUnexpectedFmt, err)
|
||||
}
|
||||
if b.HTTPHandler() == nil {
|
||||
t.Error("prometheus backend HTTPHandler should not be nil")
|
||||
}
|
||||
if err := b.Shutdown(context.Background()); err != nil {
|
||||
t.Errorf("prometheus shutdown error: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestNewInvalidBackend(t *testing.T) {
|
||||
_, err := observability.New(observability.MetricsConfig{Enabled: true, Backend: "invalid"})
|
||||
if err == nil {
|
||||
t.Error("expected error for invalid backend")
|
||||
}
|
||||
}
|
||||
|
||||
func TestPrometheusAdapterAllInstruments(t *testing.T) {
|
||||
b, err := observability.New(observability.MetricsConfig{
|
||||
Enabled: true, Backend: "prometheus",
|
||||
Prometheus: observability.PrometheusConfig{Path: defaultMetricsPath},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create backend: %v", err)
|
||||
}
|
||||
ctx := context.Background()
|
||||
labels := observability.Labels{"k": "v"}
|
||||
|
||||
b.NewCounter("prom_adapter_counter_total", "desc", "k").Add(ctx, 1, labels)
|
||||
b.NewUpDownCounter("prom_adapter_updown", "desc", "k").Add(ctx, 2, labels)
|
||||
b.NewInt64Gauge("prom_adapter_int_gauge", "desc", "k").Record(ctx, 99, labels)
|
||||
b.NewFloat64Gauge("prom_adapter_float_gauge", "desc", "k").Record(ctx, 1.23, labels)
|
||||
b.NewHistogram("prom_adapter_histogram", "desc", []float64{0.1, 1.0}, "k").Record(ctx, 0.5, labels)
|
||||
|
||||
if b.HTTPHandler() == nil {
|
||||
t.Error("prometheus adapter HTTPHandler should not be nil")
|
||||
}
|
||||
if err := b.Shutdown(ctx); err != nil {
|
||||
t.Errorf("Shutdown error: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestOtelAdapterAllInstruments(t *testing.T) {
|
||||
b, err := observability.New(observability.MetricsConfig{
|
||||
Enabled: true, Backend: "otel",
|
||||
OTel: observability.OTelConfig{Protocol: "grpc", Endpoint: otelGRPCEndpoint, Insecure: true, ExportInterval: 100 * time.Millisecond},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create otel backend: %v", err)
|
||||
}
|
||||
ctx := context.Background()
|
||||
labels := observability.Labels{"k": "v"}
|
||||
|
||||
b.NewCounter("otel_adapter_counter_total", "desc", "k").Add(ctx, 1, labels)
|
||||
b.NewUpDownCounter("otel_adapter_updown", "desc", "k").Add(ctx, 2, labels)
|
||||
b.NewInt64Gauge("otel_adapter_int_gauge", "desc", "k").Record(ctx, 99, labels)
|
||||
b.NewFloat64Gauge("otel_adapter_float_gauge", "desc", "k").Record(ctx, 1.23, labels)
|
||||
b.NewHistogram("otel_adapter_histogram", "desc", []float64{0.1, 1.0}, "k").Record(ctx, 0.5, labels)
|
||||
|
||||
if b.HTTPHandler() != nil {
|
||||
t.Error("OTel adapter HTTPHandler should be nil")
|
||||
}
|
||||
|
||||
shutdownCtx, cancel := context.WithTimeout(ctx, 2*time.Second)
|
||||
defer cancel()
|
||||
b.Shutdown(shutdownCtx) //nolint:errcheck
|
||||
}
|
||||
71
internal/observability/noop.go
Normal file
71
internal/observability/noop.go
Normal file
@@ -0,0 +1,71 @@
|
||||
package observability
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/http"
|
||||
)
|
||||
|
||||
// NoopBackend is a Backend that discards all observations.
|
||||
// It is used when metrics are disabled (Enabled=false or Backend="none").
|
||||
// All methods are safe to call concurrently.
|
||||
type NoopBackend struct{}
|
||||
|
||||
// Compile-time interface check.
|
||||
var _ Backend = (*NoopBackend)(nil)
|
||||
|
||||
func (n *NoopBackend) NewCounter(_ string, _ string, _ ...string) Counter {
|
||||
_ = n
|
||||
return noopCounter{}
|
||||
}
|
||||
|
||||
func (n *NoopBackend) NewUpDownCounter(_ string, _ string, _ ...string) UpDownCounter {
|
||||
_ = n
|
||||
return noopUpDownCounter{}
|
||||
}
|
||||
|
||||
func (n *NoopBackend) NewInt64Gauge(_ string, _ string, _ ...string) Int64Gauge {
|
||||
_ = n
|
||||
return noopInt64Gauge{}
|
||||
}
|
||||
|
||||
func (n *NoopBackend) NewFloat64Gauge(_ string, _ string, _ ...string) Float64Gauge {
|
||||
_ = n
|
||||
return noopFloat64Gauge{}
|
||||
}
|
||||
|
||||
func (n *NoopBackend) NewHistogram(_ string, _ string, _ []float64, _ ...string) Histogram {
|
||||
_ = n
|
||||
return noopHistogram{}
|
||||
}
|
||||
|
||||
func (n *NoopBackend) HTTPHandler() http.Handler {
|
||||
_ = n
|
||||
return nil
|
||||
}
|
||||
|
||||
func (n *NoopBackend) Shutdown(_ context.Context) error {
|
||||
_ = n
|
||||
return nil
|
||||
}
|
||||
|
||||
// --- noop instrument types ---
|
||||
|
||||
type noopCounter struct{}
|
||||
|
||||
func (noopCounter) Add(_ context.Context, _ int64, _ Labels) { /* intentionally no-op */ }
|
||||
|
||||
type noopUpDownCounter struct{}
|
||||
|
||||
func (noopUpDownCounter) Add(_ context.Context, _ int64, _ Labels) { /* intentionally no-op */ }
|
||||
|
||||
type noopInt64Gauge struct{}
|
||||
|
||||
func (noopInt64Gauge) Record(_ context.Context, _ int64, _ Labels) { /* intentionally no-op */ }
|
||||
|
||||
type noopFloat64Gauge struct{}
|
||||
|
||||
func (noopFloat64Gauge) Record(_ context.Context, _ float64, _ Labels) { /* intentionally no-op */ }
|
||||
|
||||
type noopHistogram struct{}
|
||||
|
||||
func (noopHistogram) Record(_ context.Context, _ float64, _ Labels) { /* intentionally no-op */ }
|
||||
67
internal/observability/noop_test.go
Normal file
67
internal/observability/noop_test.go
Normal file
@@ -0,0 +1,67 @@
|
||||
package observability_test
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
|
||||
"github.com/fosrl/gerbil/internal/observability"
|
||||
)
|
||||
|
||||
func TestNoopBackendAllInstruments(t *testing.T) {
|
||||
n := &observability.NoopBackend{}
|
||||
|
||||
ctx := context.Background()
|
||||
labels := observability.Labels{"k": "v"}
|
||||
|
||||
t.Run("Counter", func(_ *testing.T) {
|
||||
c := n.NewCounter("test_counter", "desc")
|
||||
c.Add(ctx, 1, labels)
|
||||
c.Add(ctx, 0, nil)
|
||||
})
|
||||
|
||||
t.Run("UpDownCounter", func(_ *testing.T) {
|
||||
u := n.NewUpDownCounter("test_updown", "desc")
|
||||
u.Add(ctx, 1, labels)
|
||||
u.Add(ctx, -1, nil)
|
||||
})
|
||||
|
||||
t.Run("Int64Gauge", func(_ *testing.T) {
|
||||
g := n.NewInt64Gauge("test_int64gauge", "desc")
|
||||
g.Record(ctx, 42, labels)
|
||||
g.Record(ctx, 0, nil)
|
||||
})
|
||||
|
||||
t.Run("Float64Gauge", func(_ *testing.T) {
|
||||
g := n.NewFloat64Gauge("test_float64gauge", "desc")
|
||||
g.Record(ctx, 3.14, labels)
|
||||
g.Record(ctx, 0, nil)
|
||||
})
|
||||
|
||||
t.Run("Histogram", func(_ *testing.T) {
|
||||
h := n.NewHistogram("test_histogram", "desc", []float64{1, 5, 10})
|
||||
h.Record(ctx, 2.5, labels)
|
||||
h.Record(ctx, 0, nil)
|
||||
})
|
||||
|
||||
t.Run("HTTPHandler", func(t *testing.T) {
|
||||
if n.HTTPHandler() != nil {
|
||||
t.Error("noop HTTPHandler should be nil")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("Shutdown", func(t *testing.T) {
|
||||
if err := n.Shutdown(ctx); err != nil {
|
||||
t.Errorf("noop Shutdown should not error: %v", err)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestNoopBackendLabelNames(_ *testing.T) {
|
||||
// Verify that label names passed at creation time are accepted without panic.
|
||||
n := &observability.NoopBackend{}
|
||||
n.NewCounter("c", "d", "label1", "label2")
|
||||
n.NewUpDownCounter("u", "d", "l1")
|
||||
n.NewInt64Gauge("g1", "d", "l1", "l2", "l3")
|
||||
n.NewFloat64Gauge("g2", "d")
|
||||
n.NewHistogram("h", "d", []float64{0.1, 1.0}, "l1")
|
||||
}
|
||||
210
internal/observability/otel/backend.go
Normal file
210
internal/observability/otel/backend.go
Normal file
@@ -0,0 +1,210 @@
|
||||
// Package otel implements the OpenTelemetry metrics backend for Gerbil.
|
||||
//
|
||||
// Metrics are exported via OTLP (gRPC or HTTP) to an external collector.
|
||||
// No Prometheus /metrics endpoint is exposed in this mode.
|
||||
// Future OTel tracing and logging can be added alongside this package
|
||||
// without touching the Prometheus-native path.
|
||||
package otel
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"go.opentelemetry.io/otel/metric"
|
||||
sdkmetric "go.opentelemetry.io/otel/sdk/metric"
|
||||
)
|
||||
|
||||
// Config holds OTel backend configuration.
|
||||
type Config struct {
|
||||
// Protocol is "grpc" (default) or "http".
|
||||
Protocol string
|
||||
|
||||
// Endpoint is the OTLP collector address.
|
||||
Endpoint string
|
||||
|
||||
// Insecure disables TLS.
|
||||
Insecure bool
|
||||
|
||||
// ExportInterval is the period between pushes to the collector.
|
||||
ExportInterval time.Duration
|
||||
|
||||
ServiceName string
|
||||
ServiceVersion string
|
||||
DeploymentEnvironment string
|
||||
}
|
||||
|
||||
// Backend is the OTel metrics backend.
|
||||
type Backend struct {
|
||||
cfg Config
|
||||
provider *sdkmetric.MeterProvider
|
||||
meter metric.Meter
|
||||
}
|
||||
|
||||
// New creates and initialises an OTel backend.
|
||||
//
|
||||
// cfg.Protocol must be "grpc" (default) or "http".
|
||||
// cfg.Endpoint is the OTLP collector address (e.g. "localhost:4317").
|
||||
// cfg.ExportInterval sets the push period (defaults to 60 s if ≤ 0).
|
||||
// cfg.Insecure disables TLS on the OTLP connection.
|
||||
//
|
||||
// Connection to the collector is established lazily; New only validates cfg
|
||||
// and creates the SDK components. It returns an error only if the OTel resource
|
||||
// or exporter cannot be constructed.
|
||||
func New(cfg Config) (*Backend, error) {
|
||||
if cfg.Protocol == "" {
|
||||
cfg.Protocol = "grpc"
|
||||
}
|
||||
if cfg.ExportInterval <= 0 {
|
||||
cfg.ExportInterval = 60 * time.Second
|
||||
}
|
||||
if cfg.ServiceName == "" {
|
||||
cfg.ServiceName = "gerbil"
|
||||
}
|
||||
|
||||
res, err := newResource(cfg.ServiceName, cfg.ServiceVersion, cfg.DeploymentEnvironment)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("otel backend: build resource: %w", err)
|
||||
}
|
||||
|
||||
exp, err := newExporter(context.Background(), cfg)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("otel backend: create exporter: %w", err)
|
||||
}
|
||||
|
||||
reader := sdkmetric.NewPeriodicReader(exp,
|
||||
sdkmetric.WithInterval(cfg.ExportInterval),
|
||||
)
|
||||
|
||||
provider := sdkmetric.NewMeterProvider(
|
||||
sdkmetric.WithResource(res),
|
||||
sdkmetric.WithReader(reader),
|
||||
)
|
||||
|
||||
meter := provider.Meter("github.com/fosrl/gerbil")
|
||||
|
||||
return &Backend{cfg: cfg, provider: provider, meter: meter}, nil
|
||||
}
|
||||
|
||||
// HTTPHandler returns nil – the OTel backend does not expose an HTTP endpoint.
|
||||
func (b *Backend) HTTPHandler() http.Handler {
|
||||
_ = b
|
||||
return nil
|
||||
}
|
||||
|
||||
// Shutdown flushes pending metrics and shuts down the MeterProvider.
|
||||
func (b *Backend) Shutdown(ctx context.Context) error {
|
||||
return b.provider.Shutdown(ctx)
|
||||
}
|
||||
|
||||
// NewCounter creates an OTel Int64Counter.
|
||||
func (b *Backend) NewCounter(name, desc string, _ ...string) *Counter {
|
||||
c, err := b.meter.Int64Counter(name, metric.WithDescription(desc))
|
||||
if err != nil {
|
||||
panic(fmt.Sprintf("otel: create counter %q: %v", name, err))
|
||||
}
|
||||
return &Counter{c: c}
|
||||
}
|
||||
|
||||
// NewUpDownCounter creates an OTel Int64UpDownCounter.
|
||||
func (b *Backend) NewUpDownCounter(name, desc string, _ ...string) *UpDownCounter {
|
||||
c, err := b.meter.Int64UpDownCounter(name, metric.WithDescription(desc))
|
||||
if err != nil {
|
||||
panic(fmt.Sprintf("otel: create up-down counter %q: %v", name, err))
|
||||
}
|
||||
return &UpDownCounter{c: c}
|
||||
}
|
||||
|
||||
// NewInt64Gauge creates an OTel Int64Gauge.
|
||||
func (b *Backend) NewInt64Gauge(name, desc string, _ ...string) *Int64Gauge {
|
||||
g, err := b.meter.Int64Gauge(name, metric.WithDescription(desc))
|
||||
if err != nil {
|
||||
panic(fmt.Sprintf("otel: create int64 gauge %q: %v", name, err))
|
||||
}
|
||||
return &Int64Gauge{g: g}
|
||||
}
|
||||
|
||||
// NewFloat64Gauge creates an OTel Float64Gauge.
|
||||
func (b *Backend) NewFloat64Gauge(name, desc string, _ ...string) *Float64Gauge {
|
||||
g, err := b.meter.Float64Gauge(name, metric.WithDescription(desc))
|
||||
if err != nil {
|
||||
panic(fmt.Sprintf("otel: create float64 gauge %q: %v", name, err))
|
||||
}
|
||||
return &Float64Gauge{g: g}
|
||||
}
|
||||
|
||||
// NewHistogram creates an OTel Float64Histogram with explicit bucket boundaries.
|
||||
func (b *Backend) NewHistogram(name, desc string, buckets []float64, _ ...string) *Histogram {
|
||||
h, err := b.meter.Float64Histogram(name,
|
||||
metric.WithDescription(desc),
|
||||
metric.WithExplicitBucketBoundaries(buckets...),
|
||||
)
|
||||
if err != nil {
|
||||
panic(fmt.Sprintf("otel: create histogram %q: %v", name, err))
|
||||
}
|
||||
return &Histogram{h: h}
|
||||
}
|
||||
|
||||
// labelsToAttrs converts a Labels map to OTel attribute key-value pairs.
|
||||
func labelsToAttrs(labels map[string]string) []attribute.KeyValue {
|
||||
if len(labels) == 0 {
|
||||
return nil
|
||||
}
|
||||
attrs := make([]attribute.KeyValue, 0, len(labels))
|
||||
for k, v := range labels {
|
||||
attrs = append(attrs, attribute.String(k, v))
|
||||
}
|
||||
return attrs
|
||||
}
|
||||
|
||||
// Counter wraps an OTel Int64Counter.
|
||||
type Counter struct {
|
||||
c metric.Int64Counter
|
||||
}
|
||||
|
||||
// Add increments the counter by value.
|
||||
func (c *Counter) Add(ctx context.Context, value int64, labels map[string]string) {
|
||||
c.c.Add(ctx, value, metric.WithAttributes(labelsToAttrs(labels)...))
|
||||
}
|
||||
|
||||
// UpDownCounter wraps an OTel Int64UpDownCounter.
|
||||
type UpDownCounter struct {
|
||||
c metric.Int64UpDownCounter
|
||||
}
|
||||
|
||||
// Add adjusts the up-down counter by value.
|
||||
func (u *UpDownCounter) Add(ctx context.Context, value int64, labels map[string]string) {
|
||||
u.c.Add(ctx, value, metric.WithAttributes(labelsToAttrs(labels)...))
|
||||
}
|
||||
|
||||
// Int64Gauge wraps an OTel Int64Gauge.
|
||||
type Int64Gauge struct {
|
||||
g metric.Int64Gauge
|
||||
}
|
||||
|
||||
// Record sets the gauge to value.
|
||||
func (g *Int64Gauge) Record(ctx context.Context, value int64, labels map[string]string) {
|
||||
g.g.Record(ctx, value, metric.WithAttributes(labelsToAttrs(labels)...))
|
||||
}
|
||||
|
||||
// Float64Gauge wraps an OTel Float64Gauge.
|
||||
type Float64Gauge struct {
|
||||
g metric.Float64Gauge
|
||||
}
|
||||
|
||||
// Record sets the gauge to value.
|
||||
func (g *Float64Gauge) Record(ctx context.Context, value float64, labels map[string]string) {
|
||||
g.g.Record(ctx, value, metric.WithAttributes(labelsToAttrs(labels)...))
|
||||
}
|
||||
|
||||
// Histogram wraps an OTel Float64Histogram.
|
||||
type Histogram struct {
|
||||
h metric.Float64Histogram
|
||||
}
|
||||
|
||||
// Record observes value in the histogram.
|
||||
func (h *Histogram) Record(ctx context.Context, value float64, labels map[string]string) {
|
||||
h.h.Record(ctx, value, metric.WithAttributes(labelsToAttrs(labels)...))
|
||||
}
|
||||
141
internal/observability/otel/backend_test.go
Normal file
141
internal/observability/otel/backend_test.go
Normal file
@@ -0,0 +1,141 @@
|
||||
package otel_test
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
obsotel "github.com/fosrl/gerbil/internal/observability/otel"
|
||||
)
|
||||
|
||||
const (
|
||||
defaultGRPCEndpoint = "localhost:4317"
|
||||
defaultServiceName = "gerbil-test"
|
||||
)
|
||||
|
||||
func newInMemoryBackend(t *testing.T) *obsotel.Backend {
|
||||
t.Helper()
|
||||
// Use a very short export interval; an in-process collector (noop exporter)
|
||||
// is used by pointing to a non-existent endpoint with insecure mode.
|
||||
// The backend itself should initialise without error since connection is lazy.
|
||||
b, err := obsotel.New(obsotel.Config{
|
||||
Protocol: "grpc",
|
||||
Endpoint: defaultGRPCEndpoint,
|
||||
Insecure: true,
|
||||
ExportInterval: 100 * time.Millisecond,
|
||||
ServiceName: defaultServiceName,
|
||||
ServiceVersion: "0.0.1",
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create otel backend: %v", err)
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
func TestOtelBackendHTTPHandlerIsNil(t *testing.T) {
|
||||
b := newInMemoryBackend(t)
|
||||
defer b.Shutdown(context.Background()) //nolint:errcheck
|
||||
if b.HTTPHandler() != nil {
|
||||
t.Error("OTel backend HTTPHandler should return nil")
|
||||
}
|
||||
}
|
||||
|
||||
func TestOtelBackendShutdown(t *testing.T) {
|
||||
b := newInMemoryBackend(t)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
|
||||
defer cancel()
|
||||
if err := b.Shutdown(ctx); err != nil {
|
||||
// Shutdown with unreachable collector may fail to flush; that's acceptable.
|
||||
// What matters is that Shutdown does not panic.
|
||||
t.Logf("Shutdown returned (expected with no collector): %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestOtelBackendCounter(t *testing.T) {
|
||||
b := newInMemoryBackend(t)
|
||||
defer b.Shutdown(context.Background()) //nolint:errcheck
|
||||
|
||||
c := b.NewCounter("gerbil_test_counter_total", "test counter", "result")
|
||||
// Should not panic
|
||||
c.Add(context.Background(), 1, map[string]string{"result": "ok"})
|
||||
c.Add(context.Background(), 5, nil)
|
||||
}
|
||||
|
||||
func TestOtelBackendUpDownCounter(t *testing.T) {
|
||||
b := newInMemoryBackend(t)
|
||||
defer b.Shutdown(context.Background()) //nolint:errcheck
|
||||
|
||||
u := b.NewUpDownCounter("gerbil_test_updown", "test updown", "state")
|
||||
u.Add(context.Background(), 3, map[string]string{"state": "active"})
|
||||
u.Add(context.Background(), -1, map[string]string{"state": "active"})
|
||||
}
|
||||
|
||||
func TestOtelBackendInt64Gauge(t *testing.T) {
|
||||
b := newInMemoryBackend(t)
|
||||
defer b.Shutdown(context.Background()) //nolint:errcheck
|
||||
|
||||
g := b.NewInt64Gauge("gerbil_test_int_gauge", "test gauge")
|
||||
g.Record(context.Background(), 42, nil)
|
||||
}
|
||||
|
||||
func TestOtelBackendFloat64Gauge(t *testing.T) {
|
||||
b := newInMemoryBackend(t)
|
||||
defer b.Shutdown(context.Background()) //nolint:errcheck
|
||||
|
||||
g := b.NewFloat64Gauge("gerbil_test_float_gauge", "test float gauge")
|
||||
g.Record(context.Background(), 3.14, nil)
|
||||
}
|
||||
|
||||
func TestOtelBackendHistogram(t *testing.T) {
|
||||
b := newInMemoryBackend(t)
|
||||
defer b.Shutdown(context.Background()) //nolint:errcheck
|
||||
|
||||
h := b.NewHistogram("gerbil_test_duration_seconds", "test histogram",
|
||||
[]float64{0.1, 0.5, 1.0}, "method")
|
||||
h.Record(context.Background(), 0.3, map[string]string{"method": "GET"})
|
||||
}
|
||||
|
||||
func TestOtelBackendHTTPProtocol(t *testing.T) {
|
||||
b, err := obsotel.New(obsotel.Config{
|
||||
Protocol: "http",
|
||||
Endpoint: "localhost:4318",
|
||||
Insecure: true,
|
||||
ExportInterval: 100 * time.Millisecond,
|
||||
ServiceName: defaultServiceName,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create otel http backend: %v", err)
|
||||
}
|
||||
defer b.Shutdown(context.Background()) //nolint:errcheck
|
||||
|
||||
if b.HTTPHandler() != nil {
|
||||
t.Error("OTel HTTP backend should not expose a /metrics endpoint")
|
||||
}
|
||||
}
|
||||
|
||||
func TestOtelBackendInvalidProtocol(t *testing.T) {
|
||||
_, err := obsotel.New(obsotel.Config{
|
||||
Protocol: "tcp",
|
||||
Endpoint: defaultGRPCEndpoint,
|
||||
ExportInterval: 10 * time.Second,
|
||||
})
|
||||
if err == nil {
|
||||
t.Error("expected error for invalid protocol")
|
||||
}
|
||||
}
|
||||
|
||||
func TestOtelBackendDeploymentEnvironment(t *testing.T) {
|
||||
b, err := obsotel.New(obsotel.Config{
|
||||
Protocol: "grpc",
|
||||
Endpoint: defaultGRPCEndpoint,
|
||||
Insecure: true,
|
||||
ExportInterval: 100 * time.Millisecond,
|
||||
ServiceName: defaultServiceName,
|
||||
ServiceVersion: "1.2.3",
|
||||
DeploymentEnvironment: "staging",
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
defer b.Shutdown(context.Background()) //nolint:errcheck
|
||||
}
|
||||
50
internal/observability/otel/exporter.go
Normal file
50
internal/observability/otel/exporter.go
Normal file
@@ -0,0 +1,50 @@
|
||||
package otel
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
"go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc"
|
||||
"go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp"
|
||||
sdkmetric "go.opentelemetry.io/otel/sdk/metric"
|
||||
)
|
||||
|
||||
// newExporter creates the appropriate OTLP exporter based on cfg.Protocol.
|
||||
func newExporter(ctx context.Context, cfg Config) (sdkmetric.Exporter, error) {
|
||||
switch cfg.Protocol {
|
||||
case "grpc", "":
|
||||
return newGRPCExporter(ctx, cfg)
|
||||
case "http":
|
||||
return newHTTPExporter(ctx, cfg)
|
||||
default:
|
||||
return nil, fmt.Errorf("otel: unknown protocol %q (must be \"grpc\" or \"http\")", cfg.Protocol)
|
||||
}
|
||||
}
|
||||
|
||||
func newGRPCExporter(ctx context.Context, cfg Config) (sdkmetric.Exporter, error) {
|
||||
opts := []otlpmetricgrpc.Option{
|
||||
otlpmetricgrpc.WithEndpoint(cfg.Endpoint),
|
||||
}
|
||||
if cfg.Insecure {
|
||||
opts = append(opts, otlpmetricgrpc.WithInsecure())
|
||||
}
|
||||
exp, err := otlpmetricgrpc.New(ctx, opts...)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("otlp grpc exporter: %w", err)
|
||||
}
|
||||
return exp, nil
|
||||
}
|
||||
|
||||
func newHTTPExporter(ctx context.Context, cfg Config) (sdkmetric.Exporter, error) {
|
||||
opts := []otlpmetrichttp.Option{
|
||||
otlpmetrichttp.WithEndpoint(cfg.Endpoint),
|
||||
}
|
||||
if cfg.Insecure {
|
||||
opts = append(opts, otlpmetrichttp.WithInsecure())
|
||||
}
|
||||
exp, err := otlpmetrichttp.New(ctx, opts...)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("otlp http exporter: %w", err)
|
||||
}
|
||||
return exp, nil
|
||||
}
|
||||
25
internal/observability/otel/resource.go
Normal file
25
internal/observability/otel/resource.go
Normal file
@@ -0,0 +1,25 @@
|
||||
package otel
|
||||
|
||||
import (
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"go.opentelemetry.io/otel/sdk/resource"
|
||||
semconv "go.opentelemetry.io/otel/semconv/v1.40.0"
|
||||
)
|
||||
|
||||
// newResource builds an OTel resource for the Gerbil service.
|
||||
func newResource(serviceName, serviceVersion, deploymentEnv string) (*resource.Resource, error) {
|
||||
attrs := []attribute.KeyValue{
|
||||
semconv.ServiceName(serviceName),
|
||||
}
|
||||
if serviceVersion != "" {
|
||||
attrs = append(attrs, semconv.ServiceVersion(serviceVersion))
|
||||
}
|
||||
if deploymentEnv != "" {
|
||||
attrs = append(attrs, semconv.DeploymentEnvironmentName(deploymentEnv))
|
||||
}
|
||||
|
||||
return resource.Merge(
|
||||
resource.Default(),
|
||||
resource.NewWithAttributes(semconv.SchemaURL, attrs...),
|
||||
)
|
||||
}
|
||||
185
internal/observability/prometheus/backend.go
Normal file
185
internal/observability/prometheus/backend.go
Normal file
@@ -0,0 +1,185 @@
|
||||
// Package prometheus implements the native Prometheus metrics backend for Gerbil.
|
||||
//
|
||||
// This backend uses the Prometheus Go client directly; it does NOT depend on the
|
||||
// OpenTelemetry SDK. A dedicated Prometheus registry is used so that default
|
||||
// Go/process metrics are not unintentionally included unless the caller opts in.
|
||||
package prometheus
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/http"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/client_golang/prometheus/collectors"
|
||||
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||
)
|
||||
|
||||
// Config holds Prometheus-backend configuration.
|
||||
type Config struct {
|
||||
// Path is the HTTP endpoint path (e.g. "/metrics").
|
||||
Path string
|
||||
|
||||
// IncludeGoMetrics controls whether the standard Go runtime and process
|
||||
// collectors are registered on the dedicated registry.
|
||||
// Defaults to true if not explicitly set.
|
||||
IncludeGoMetrics *bool
|
||||
}
|
||||
|
||||
// Backend is the native Prometheus metrics backend.
|
||||
// Metric instruments are created via the New* family of methods and stored
|
||||
// in the backend-specific instrument types that implement the observability
|
||||
// instrument interfaces.
|
||||
type Backend struct {
|
||||
cfg Config
|
||||
registry *prometheus.Registry
|
||||
handler http.Handler
|
||||
}
|
||||
|
||||
// New creates and initialises a Prometheus backend.
|
||||
//
|
||||
// cfg.Path sets the HTTP endpoint path (defaults to "/metrics" if empty).
|
||||
// cfg.IncludeGoMetrics controls whether standard Go runtime and process metrics
|
||||
// are included; defaults to true when nil.
|
||||
//
|
||||
// Returns an error if the registry cannot be created.
|
||||
func New(cfg Config) (*Backend, error) {
|
||||
if cfg.Path == "" {
|
||||
cfg.Path = "/metrics"
|
||||
}
|
||||
|
||||
registry := prometheus.NewRegistry()
|
||||
|
||||
// Include Go and process metrics by default.
|
||||
includeGo := cfg.IncludeGoMetrics == nil || *cfg.IncludeGoMetrics
|
||||
if includeGo {
|
||||
registry.MustRegister(
|
||||
collectors.NewGoCollector(),
|
||||
collectors.NewProcessCollector(collectors.ProcessCollectorOpts{}),
|
||||
)
|
||||
}
|
||||
|
||||
handler := promhttp.HandlerFor(registry, promhttp.HandlerOpts{
|
||||
EnableOpenMetrics: false,
|
||||
})
|
||||
|
||||
return &Backend{cfg: cfg, registry: registry, handler: handler}, nil
|
||||
}
|
||||
|
||||
// HTTPHandler returns the Prometheus /metrics HTTP handler.
|
||||
func (b *Backend) HTTPHandler() http.Handler {
|
||||
return b.handler
|
||||
}
|
||||
|
||||
// Shutdown is a no-op for the Prometheus backend.
|
||||
// The registry does not maintain background goroutines.
|
||||
func (b *Backend) Shutdown(_ context.Context) error {
|
||||
_ = b
|
||||
return nil
|
||||
}
|
||||
|
||||
// NewCounter creates a Prometheus CounterVec registered on the backend's registry.
|
||||
func (b *Backend) NewCounter(name, desc string, labelNames ...string) *Counter {
|
||||
vec := prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||
Name: name,
|
||||
Help: desc,
|
||||
}, labelNames)
|
||||
b.registry.MustRegister(vec)
|
||||
return &Counter{vec: vec}
|
||||
}
|
||||
|
||||
// NewUpDownCounter creates a Prometheus GaugeVec (Prometheus gauges are
|
||||
// bidirectional) registered on the backend's registry.
|
||||
func (b *Backend) NewUpDownCounter(name, desc string, labelNames ...string) *UpDownCounter {
|
||||
vec := prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Name: name,
|
||||
Help: desc,
|
||||
}, labelNames)
|
||||
b.registry.MustRegister(vec)
|
||||
return &UpDownCounter{vec: vec}
|
||||
}
|
||||
|
||||
// NewInt64Gauge creates a Prometheus GaugeVec registered on the backend's registry.
|
||||
func (b *Backend) NewInt64Gauge(name, desc string, labelNames ...string) *Int64Gauge {
|
||||
vec := prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Name: name,
|
||||
Help: desc,
|
||||
}, labelNames)
|
||||
b.registry.MustRegister(vec)
|
||||
return &Int64Gauge{vec: vec}
|
||||
}
|
||||
|
||||
// NewFloat64Gauge creates a Prometheus GaugeVec registered on the backend's registry.
|
||||
func (b *Backend) NewFloat64Gauge(name, desc string, labelNames ...string) *Float64Gauge {
|
||||
vec := prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Name: name,
|
||||
Help: desc,
|
||||
}, labelNames)
|
||||
b.registry.MustRegister(vec)
|
||||
return &Float64Gauge{vec: vec}
|
||||
}
|
||||
|
||||
// NewHistogram creates a Prometheus HistogramVec registered on the backend's registry.
|
||||
func (b *Backend) NewHistogram(name, desc string, buckets []float64, labelNames ...string) *Histogram {
|
||||
vec := prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
||||
Name: name,
|
||||
Help: desc,
|
||||
Buckets: buckets,
|
||||
}, labelNames)
|
||||
b.registry.MustRegister(vec)
|
||||
return &Histogram{vec: vec}
|
||||
}
|
||||
|
||||
// Counter is a native Prometheus counter instrument.
|
||||
type Counter struct {
|
||||
vec *prometheus.CounterVec
|
||||
}
|
||||
|
||||
// Add increments the counter by value for the given labels.
|
||||
//
|
||||
// value must be non-negative. Negative values are ignored.
|
||||
func (c *Counter) Add(_ context.Context, value int64, labels map[string]string) {
|
||||
if value < 0 {
|
||||
return
|
||||
}
|
||||
c.vec.With(prometheus.Labels(labels)).Add(float64(value))
|
||||
}
|
||||
|
||||
// UpDownCounter is a native Prometheus gauge used as a bidirectional counter.
|
||||
type UpDownCounter struct {
|
||||
vec *prometheus.GaugeVec
|
||||
}
|
||||
|
||||
// Add adjusts the gauge by value for the given labels.
|
||||
func (u *UpDownCounter) Add(_ context.Context, value int64, labels map[string]string) {
|
||||
u.vec.With(prometheus.Labels(labels)).Add(float64(value))
|
||||
}
|
||||
|
||||
// Int64Gauge is a native Prometheus gauge recording integer snapshot values.
|
||||
type Int64Gauge struct {
|
||||
vec *prometheus.GaugeVec
|
||||
}
|
||||
|
||||
// Record sets the gauge to value for the given labels.
|
||||
func (g *Int64Gauge) Record(_ context.Context, value int64, labels map[string]string) {
|
||||
g.vec.With(prometheus.Labels(labels)).Set(float64(value))
|
||||
}
|
||||
|
||||
// Float64Gauge is a native Prometheus gauge recording float snapshot values.
|
||||
type Float64Gauge struct {
|
||||
vec *prometheus.GaugeVec
|
||||
}
|
||||
|
||||
// Record sets the gauge to value for the given labels.
|
||||
func (g *Float64Gauge) Record(_ context.Context, value float64, labels map[string]string) {
|
||||
g.vec.With(prometheus.Labels(labels)).Set(value)
|
||||
}
|
||||
|
||||
// Histogram is a native Prometheus histogram instrument.
|
||||
type Histogram struct {
|
||||
vec *prometheus.HistogramVec
|
||||
}
|
||||
|
||||
// Record observes value for the given labels.
|
||||
func (h *Histogram) Record(_ context.Context, value float64, labels map[string]string) {
|
||||
h.vec.With(prometheus.Labels(labels)).Observe(value)
|
||||
}
|
||||
173
internal/observability/prometheus/backend_test.go
Normal file
173
internal/observability/prometheus/backend_test.go
Normal file
@@ -0,0 +1,173 @@
|
||||
package prometheus_test
|
||||
|
||||
import (
|
||||
"context"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
obsprom "github.com/fosrl/gerbil/internal/observability/prometheus"
|
||||
)
|
||||
|
||||
func newTestBackend(t *testing.T) *obsprom.Backend {
|
||||
t.Helper()
|
||||
b, err := obsprom.New(obsprom.Config{Path: "/metrics"})
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create prometheus backend: %v", err)
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
func TestPrometheusBackendHTTPHandler(t *testing.T) {
|
||||
b := newTestBackend(t)
|
||||
if b.HTTPHandler() == nil {
|
||||
t.Error("HTTPHandler should not be nil")
|
||||
}
|
||||
}
|
||||
|
||||
func TestPrometheusBackendShutdown(t *testing.T) {
|
||||
b := newTestBackend(t)
|
||||
if err := b.Shutdown(context.Background()); err != nil {
|
||||
t.Errorf("Shutdown returned error: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPrometheusBackendCounter(t *testing.T) {
|
||||
b := newTestBackend(t)
|
||||
c := b.NewCounter("test_counter_total", "A test counter", "result")
|
||||
c.Add(context.Background(), 3, map[string]string{"result": "ok"})
|
||||
|
||||
body := scrapeMetrics(t, b)
|
||||
assertMetricPresent(t, body, `test_counter_total{result="ok"} 3`)
|
||||
}
|
||||
|
||||
func TestPrometheusBackendUpDownCounter(t *testing.T) {
|
||||
b := newTestBackend(t)
|
||||
u := b.NewUpDownCounter("test_gauge_total", "A test up-down counter", "state")
|
||||
u.Add(context.Background(), 5, map[string]string{"state": "active"})
|
||||
u.Add(context.Background(), -2, map[string]string{"state": "active"})
|
||||
|
||||
body := scrapeMetrics(t, b)
|
||||
assertMetricPresent(t, body, `test_gauge_total{state="active"} 3`)
|
||||
}
|
||||
|
||||
func TestPrometheusBackendInt64Gauge(t *testing.T) {
|
||||
b := newTestBackend(t)
|
||||
g := b.NewInt64Gauge("test_int_gauge", "An integer gauge", "ifname")
|
||||
g.Record(context.Background(), 42, map[string]string{"ifname": "wg0"})
|
||||
|
||||
body := scrapeMetrics(t, b)
|
||||
assertMetricPresent(t, body, `test_int_gauge{ifname="wg0"} 42`)
|
||||
}
|
||||
|
||||
func TestPrometheusBackendFloat64Gauge(t *testing.T) {
|
||||
b := newTestBackend(t)
|
||||
g := b.NewFloat64Gauge("test_float_gauge", "A float gauge", "cert")
|
||||
g.Record(context.Background(), 7.5, map[string]string{"cert": "example.com"})
|
||||
|
||||
body := scrapeMetrics(t, b)
|
||||
assertMetricPresent(t, body, `test_float_gauge{cert="example.com"} 7.5`)
|
||||
}
|
||||
|
||||
func TestPrometheusBackendHistogram(t *testing.T) {
|
||||
b := newTestBackend(t)
|
||||
buckets := []float64{0.1, 0.5, 1.0, 5.0}
|
||||
h := b.NewHistogram("test_duration_seconds", "A test histogram", buckets, "method")
|
||||
h.Record(context.Background(), 0.3, map[string]string{"method": "GET"})
|
||||
|
||||
body := scrapeMetrics(t, b)
|
||||
if !strings.Contains(body, "test_duration_seconds") {
|
||||
t.Errorf("expected histogram metric in output, body:\n%s", body)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPrometheusBackendMultipleLabels(t *testing.T) {
|
||||
b := newTestBackend(t)
|
||||
c := b.NewCounter("multi_label_total", "Multi-label counter", "method", "route", "status_code")
|
||||
c.Add(context.Background(), 1, map[string]string{
|
||||
"method": "POST",
|
||||
"route": "/api/peers",
|
||||
"status_code": "200",
|
||||
})
|
||||
|
||||
body := scrapeMetrics(t, b)
|
||||
if !strings.Contains(body, "multi_label_total") {
|
||||
t.Errorf("expected multi_label_total in output, body:\n%s", body)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPrometheusBackendGoMetrics(t *testing.T) {
|
||||
b := newTestBackend(t)
|
||||
body := scrapeMetrics(t, b)
|
||||
// Default backend includes Go runtime metrics.
|
||||
if !strings.Contains(body, "go_goroutines") {
|
||||
t.Error("expected go_goroutines in default backend output")
|
||||
}
|
||||
}
|
||||
|
||||
func TestPrometheusBackendNoGoMetrics(t *testing.T) {
|
||||
f := false
|
||||
b, err := obsprom.New(obsprom.Config{IncludeGoMetrics: &f})
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
body := scrapeMetrics(t, b)
|
||||
if strings.Contains(body, "go_goroutines") {
|
||||
t.Error("expected no go_goroutines when IncludeGoMetrics=false")
|
||||
}
|
||||
}
|
||||
|
||||
func TestPrometheusBackendNilLabels(t *testing.T) {
|
||||
// Adding with nil labels should not panic (treated as empty map).
|
||||
b := newTestBackend(t)
|
||||
c := b.NewCounter("nil_labels_total", "counter with no labels")
|
||||
// nil labels with no label names declared should be safe
|
||||
c.Add(context.Background(), 1, nil)
|
||||
}
|
||||
|
||||
func TestPrometheusBackendConcurrentAdd(t *testing.T) {
|
||||
b := newTestBackend(t)
|
||||
c := b.NewCounter("concurrent_total", "concurrent counter", "worker")
|
||||
|
||||
done := make(chan struct{})
|
||||
for i := 0; i < 10; i++ {
|
||||
go func(_ int) {
|
||||
for j := 0; j < 100; j++ {
|
||||
c.Add(context.Background(), 1, map[string]string{"worker": "w"})
|
||||
}
|
||||
done <- struct{}{}
|
||||
}(i)
|
||||
}
|
||||
for i := 0; i < 10; i++ {
|
||||
<-done
|
||||
}
|
||||
|
||||
body := scrapeMetrics(t, b)
|
||||
assertMetricPresent(t, body, `concurrent_total{worker="w"} 1000`)
|
||||
}
|
||||
|
||||
// --- helpers ---
|
||||
|
||||
func scrapeMetrics(t *testing.T, b *obsprom.Backend) string {
|
||||
t.Helper()
|
||||
req := httptest.NewRequest(http.MethodGet, "/metrics", http.NoBody)
|
||||
rr := httptest.NewRecorder()
|
||||
b.HTTPHandler().ServeHTTP(rr, req)
|
||||
if rr.Code != http.StatusOK {
|
||||
t.Fatalf("metrics handler returned %d", rr.Code)
|
||||
}
|
||||
body, err := io.ReadAll(rr.Body)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to read response body: %v", err)
|
||||
}
|
||||
return string(body)
|
||||
}
|
||||
|
||||
func assertMetricPresent(t *testing.T, body, expected string) {
|
||||
t.Helper()
|
||||
if !strings.Contains(body, expected) {
|
||||
t.Errorf("expected %q in metrics output\nbody:\n%s", expected, body)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user