mirror of
https://github.com/fosrl/gerbil.git
synced 2026-05-12 11:20:01 +00:00
Merge pull request #79 from fosrl/observability-update
Enhance observability with OTLP timeout and API unification
This commit is contained in:
@@ -138,7 +138,7 @@ make
|
||||
|
||||
### Binary
|
||||
|
||||
Make sure to have Go 1.23.1 installed.
|
||||
Make sure to have Go 1.26 installed.
|
||||
|
||||
```bash
|
||||
make local
|
||||
|
||||
@@ -83,6 +83,7 @@ type OTelConfig struct {
|
||||
Endpoint string // default: "localhost:4317"
|
||||
Insecure bool // default: true
|
||||
ExportInterval time.Duration // default: 60s
|
||||
Timeout time.Duration // default: 10s
|
||||
}
|
||||
```
|
||||
|
||||
@@ -97,6 +98,7 @@ type OTelConfig struct {
|
||||
| `OTEL_METRICS_ENDPOINT` | `localhost:4317` | OTLP collector address |
|
||||
| `OTEL_METRICS_INSECURE` | `true` | Disable TLS for OTLP |
|
||||
| `OTEL_METRICS_EXPORT_INTERVAL` | `60s` | Push interval (e.g. `10s`, `1m`) |
|
||||
| `OTEL_METRICS_TIMEOUT` | `10s` | Timeout for OTLP exporter connection setup |
|
||||
| `DEPLOYMENT_ENVIRONMENT` | _(unset)_ | OTel deployment.environment attribute |
|
||||
|
||||
### CLI flags
|
||||
@@ -108,7 +110,8 @@ type OTelConfig struct {
|
||||
--otel-metrics-protocol string (default: grpc)
|
||||
--otel-metrics-endpoint string (default: localhost:4317)
|
||||
--otel-metrics-insecure bool (default: true)
|
||||
--otel-metrics-export-interval duration (default: 1m0s)
|
||||
--otel-metrics-export-interval duration (default: 60s)
|
||||
--otel-metrics-timeout duration (default: 10s)
|
||||
```
|
||||
|
||||
---
|
||||
@@ -164,6 +167,7 @@ export OTEL_METRICS_PROTOCOL=grpc
|
||||
export OTEL_METRICS_ENDPOINT=otel-collector:4317
|
||||
export OTEL_METRICS_INSECURE=true
|
||||
export OTEL_METRICS_EXPORT_INTERVAL=10s
|
||||
export OTEL_METRICS_TIMEOUT=10s
|
||||
export DEPLOYMENT_ENVIRONMENT=production
|
||||
```
|
||||
|
||||
@@ -176,6 +180,7 @@ export DEPLOYMENT_ENVIRONMENT=production
|
||||
--otel-metrics-endpoint=otel-collector:4317 \
|
||||
--otel-metrics-insecure \
|
||||
--otel-metrics-export-interval=10s \
|
||||
--otel-metrics-timeout=10s \
|
||||
--config=/etc/gerbil/config.json
|
||||
```
|
||||
|
||||
@@ -225,7 +230,6 @@ All metrics use the prefix `gerbil_<component>_<name>`.
|
||||
| Metric | Type | Labels |
|
||||
|--------|------|--------|
|
||||
| `gerbil_proxy_mapping_active` | UpDownCounter | `ifname` |
|
||||
| `gerbil_session_active` | UpDownCounter | `ifname` |
|
||||
| `gerbil_active_sessions` | UpDownCounter | `ifname` |
|
||||
| `gerbil_udp_packets_total` | Counter | `ifname`, `type`, `direction` |
|
||||
| `gerbil_hole_punch_events_total` | Counter | `ifname`, `result` |
|
||||
@@ -256,7 +260,7 @@ The `docker-compose.metrics.yml` provides a complete observability stack.
|
||||
**Prometheus mode:**
|
||||
|
||||
```bash
|
||||
METRICS_BACKEND=prometheus docker-compose -f docker-compose.metrics.yml up -d
|
||||
METRICS_BACKEND=prometheus docker-compose -f docker compose.metrics.yml up -d
|
||||
# Scrape at http://localhost:3003/metrics
|
||||
# Grafana at http://localhost:3000 (admin/admin)
|
||||
```
|
||||
@@ -265,5 +269,5 @@ METRICS_BACKEND=prometheus docker-compose -f docker-compose.metrics.yml up -d
|
||||
|
||||
```bash
|
||||
METRICS_BACKEND=otel OTEL_METRICS_ENDPOINT=otel-collector:4317 \
|
||||
docker-compose -f docker-compose.metrics.yml up -d
|
||||
docker compose -f docker-compose.metrics.yml up -d
|
||||
```
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
file_format: '1.0'
|
||||
receivers:
|
||||
otlp:
|
||||
protocols:
|
||||
@@ -43,4 +44,4 @@ service:
|
||||
metrics:
|
||||
receivers: [otlp]
|
||||
processors: [batch, resource]
|
||||
exporters: [prometheus, prometheusremotewrite, debug]
|
||||
exporters: [prometheus, prometheusremotewrite, debug]
|
||||
|
||||
24
go.mod
24
go.mod
@@ -6,12 +6,12 @@ require (
|
||||
github.com/patrickmn/go-cache v2.1.0+incompatible
|
||||
github.com/prometheus/client_golang v1.20.5
|
||||
github.com/vishvananda/netlink v1.3.1
|
||||
go.opentelemetry.io/otel v1.42.0
|
||||
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.42.0
|
||||
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.42.0
|
||||
go.opentelemetry.io/otel/metric v1.42.0
|
||||
go.opentelemetry.io/otel/sdk v1.42.0
|
||||
go.opentelemetry.io/otel/sdk/metric v1.42.0
|
||||
go.opentelemetry.io/otel v1.43.0
|
||||
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.43.0
|
||||
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.43.0
|
||||
go.opentelemetry.io/otel/metric v1.43.0
|
||||
go.opentelemetry.io/otel/sdk v1.43.0
|
||||
go.opentelemetry.io/otel/sdk/metric v1.43.0
|
||||
golang.org/x/crypto v0.49.0
|
||||
golang.org/x/sync v0.20.0
|
||||
golang.zx2c4.com/wireguard/wgctrl v0.0.0-20230429144221-925a1e7659e6
|
||||
@@ -37,14 +37,14 @@ require (
|
||||
github.com/prometheus/procfs v0.15.1 // indirect
|
||||
github.com/vishvananda/netns v0.0.5 // indirect
|
||||
go.opentelemetry.io/auto/sdk v1.2.1 // indirect
|
||||
go.opentelemetry.io/otel/trace v1.42.0 // indirect
|
||||
go.opentelemetry.io/proto/otlp v1.9.0 // indirect
|
||||
golang.org/x/net v0.51.0 // indirect
|
||||
go.opentelemetry.io/otel/trace v1.43.0 // indirect
|
||||
go.opentelemetry.io/proto/otlp v1.10.0 // indirect
|
||||
golang.org/x/net v0.52.0 // indirect
|
||||
golang.org/x/sys v0.42.0 // indirect
|
||||
golang.org/x/text v0.35.0 // indirect
|
||||
golang.zx2c4.com/wireguard v0.0.0-20230325221338-052af4a8072b // indirect
|
||||
google.golang.org/genproto/googleapis/api v0.0.0-20260209200024-4cfbd4190f57 // indirect
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20260209200024-4cfbd4190f57 // indirect
|
||||
google.golang.org/grpc v1.79.3 // indirect
|
||||
google.golang.org/genproto/googleapis/api v0.0.0-20260401024825-9d38bb4040a9 // indirect
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20260401024825-9d38bb4040a9 // indirect
|
||||
google.golang.org/grpc v1.80.0 // indirect
|
||||
google.golang.org/protobuf v1.36.11 // indirect
|
||||
)
|
||||
|
||||
52
go.sum
52
go.sum
@@ -55,26 +55,26 @@ github.com/vishvananda/netns v0.0.5 h1:DfiHV+j8bA32MFM7bfEunvT8IAqQ/NzSJHtcmW5zd
|
||||
github.com/vishvananda/netns v0.0.5/go.mod h1:SpkAiCQRtJ6TvvxPnOSyH3BMl6unz3xZlaprSwhNNJM=
|
||||
go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64=
|
||||
go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y=
|
||||
go.opentelemetry.io/otel v1.42.0 h1:lSQGzTgVR3+sgJDAU/7/ZMjN9Z+vUip7leaqBKy4sho=
|
||||
go.opentelemetry.io/otel v1.42.0/go.mod h1:lJNsdRMxCUIWuMlVJWzecSMuNjE7dOYyWlqOXWkdqCc=
|
||||
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.42.0 h1:MdKucPl/HbzckWWEisiNqMPhRrAOQX8r4jTuGr636gk=
|
||||
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.42.0/go.mod h1:RolT8tWtfHcjajEH5wFIZ4Dgh5jpPdFXYV9pTAk/qjc=
|
||||
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.42.0 h1:H7O6RlGOMTizyl3R08Kn5pdM06bnH8oscSj7o11tmLA=
|
||||
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.42.0/go.mod h1:mBFWu/WOVDkWWsR7Tx7h6EpQB8wsv7P0Yrh0Pb7othc=
|
||||
go.opentelemetry.io/otel/metric v1.42.0 h1:2jXG+3oZLNXEPfNmnpxKDeZsFI5o4J+nz6xUlaFdF/4=
|
||||
go.opentelemetry.io/otel/metric v1.42.0/go.mod h1:RlUN/7vTU7Ao/diDkEpQpnz3/92J9ko05BIwxYa2SSI=
|
||||
go.opentelemetry.io/otel/sdk v1.42.0 h1:LyC8+jqk6UJwdrI/8VydAq/hvkFKNHZVIWuslJXYsDo=
|
||||
go.opentelemetry.io/otel/sdk v1.42.0/go.mod h1:rGHCAxd9DAph0joO4W6OPwxjNTYWghRWmkHuGbayMts=
|
||||
go.opentelemetry.io/otel/sdk/metric v1.42.0 h1:D/1QR46Clz6ajyZ3G8SgNlTJKBdGp84q9RKCAZ3YGuA=
|
||||
go.opentelemetry.io/otel/sdk/metric v1.42.0/go.mod h1:Ua6AAlDKdZ7tdvaQKfSmnFTdHx37+J4ba8MwVCYM5hc=
|
||||
go.opentelemetry.io/otel/trace v1.42.0 h1:OUCgIPt+mzOnaUTpOQcBiM/PLQ/Op7oq6g4LenLmOYY=
|
||||
go.opentelemetry.io/otel/trace v1.42.0/go.mod h1:f3K9S+IFqnumBkKhRJMeaZeNk9epyhnCmQh/EysQCdc=
|
||||
go.opentelemetry.io/proto/otlp v1.9.0 h1:l706jCMITVouPOqEnii2fIAuO3IVGBRPV5ICjceRb/A=
|
||||
go.opentelemetry.io/proto/otlp v1.9.0/go.mod h1:xE+Cx5E/eEHw+ISFkwPLwCZefwVjY+pqKg1qcK03+/4=
|
||||
go.opentelemetry.io/otel v1.43.0 h1:mYIM03dnh5zfN7HautFE4ieIig9amkNANT+xcVxAj9I=
|
||||
go.opentelemetry.io/otel v1.43.0/go.mod h1:JuG+u74mvjvcm8vj8pI5XiHy1zDeoCS2LB1spIq7Ay0=
|
||||
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.43.0 h1:8UQVDcZxOJLtX6gxtDt3vY2WTgvZqMQRzjsqiIHQdkc=
|
||||
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.43.0/go.mod h1:2lmweYCiHYpEjQ/lSJBYhj9jP1zvCvQW4BqL9dnT7FQ=
|
||||
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.43.0 h1:w1K+pCJoPpQifuVpsKamUdn9U0zM3xUziVOqsGksUrY=
|
||||
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.43.0/go.mod h1:HBy4BjzgVE8139ieRI75oXm3EcDN+6GhD88JT1Kjvxg=
|
||||
go.opentelemetry.io/otel/metric v1.43.0 h1:d7638QeInOnuwOONPp4JAOGfbCEpYb+K6DVWvdxGzgM=
|
||||
go.opentelemetry.io/otel/metric v1.43.0/go.mod h1:RDnPtIxvqlgO8GRW18W6Z/4P462ldprJtfxHxyKd2PY=
|
||||
go.opentelemetry.io/otel/sdk v1.43.0 h1:pi5mE86i5rTeLXqoF/hhiBtUNcrAGHLKQdhg4h4V9Dg=
|
||||
go.opentelemetry.io/otel/sdk v1.43.0/go.mod h1:P+IkVU3iWukmiit/Yf9AWvpyRDlUeBaRg6Y+C58QHzg=
|
||||
go.opentelemetry.io/otel/sdk/metric v1.43.0 h1:S88dyqXjJkuBNLeMcVPRFXpRw2fuwdvfCGLEo89fDkw=
|
||||
go.opentelemetry.io/otel/sdk/metric v1.43.0/go.mod h1:C/RJtwSEJ5hzTiUz5pXF1kILHStzb9zFlIEe85bhj6A=
|
||||
go.opentelemetry.io/otel/trace v1.43.0 h1:BkNrHpup+4k4w+ZZ86CZoHHEkohws8AY+WTX09nk+3A=
|
||||
go.opentelemetry.io/otel/trace v1.43.0/go.mod h1:/QJhyVBUUswCphDVxq+8mld+AvhXZLhe+8WVFxiFff0=
|
||||
go.opentelemetry.io/proto/otlp v1.10.0 h1:IQRWgT5srOCYfiWnpqUYz9CVmbO8bFmKcwYxpuCSL2g=
|
||||
go.opentelemetry.io/proto/otlp v1.10.0/go.mod h1:/CV4QoCR/S9yaPj8utp3lvQPoqMtxXdzn7ozvvozVqk=
|
||||
golang.org/x/crypto v0.49.0 h1:+Ng2ULVvLHnJ/ZFEq4KdcDd/cfjrrjjNSXNzxg0Y4U4=
|
||||
golang.org/x/crypto v0.49.0/go.mod h1:ErX4dUh2UM+CFYiXZRTcMpEcN8b/1gxEuv3nODoYtCA=
|
||||
golang.org/x/net v0.51.0 h1:94R/GTO7mt3/4wIKpcR5gkGmRLOuE/2hNGeWq/GBIFo=
|
||||
golang.org/x/net v0.51.0/go.mod h1:aamm+2QF5ogm02fjy5Bb7CQ0WMt1/WVM7FtyaTLlA9Y=
|
||||
golang.org/x/net v0.52.0 h1:He/TN1l0e4mmR3QqHMT2Xab3Aj3L9qjbhRm78/6jrW0=
|
||||
golang.org/x/net v0.52.0/go.mod h1:R1MAz7uMZxVMualyPXb+VaqGSa3LIaUqk0eEt3w36Sw=
|
||||
golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4=
|
||||
golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0=
|
||||
golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
@@ -87,14 +87,14 @@ golang.zx2c4.com/wireguard v0.0.0-20230325221338-052af4a8072b h1:J1CaxgLerRR5lgx
|
||||
golang.zx2c4.com/wireguard v0.0.0-20230325221338-052af4a8072b/go.mod h1:tqur9LnfstdR9ep2LaJT4lFUl0EjlHtge+gAjmsHUG4=
|
||||
golang.zx2c4.com/wireguard/wgctrl v0.0.0-20230429144221-925a1e7659e6 h1:CawjfCvYQH2OU3/TnxLx97WDSUDRABfT18pCOYwc2GE=
|
||||
golang.zx2c4.com/wireguard/wgctrl v0.0.0-20230429144221-925a1e7659e6/go.mod h1:3rxYc4HtVcSG9gVaTs2GEBdehh+sYPOwKtyUWEOTb80=
|
||||
gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk=
|
||||
gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E=
|
||||
google.golang.org/genproto/googleapis/api v0.0.0-20260209200024-4cfbd4190f57 h1:JLQynH/LBHfCTSbDWl+py8C+Rg/k1OVH3xfcaiANuF0=
|
||||
google.golang.org/genproto/googleapis/api v0.0.0-20260209200024-4cfbd4190f57/go.mod h1:kSJwQxqmFXeo79zOmbrALdflXQeAYcUbgS7PbpMknCY=
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20260209200024-4cfbd4190f57 h1:mWPCjDEyshlQYzBpMNHaEof6UX1PmHcaUODUywQ0uac=
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20260209200024-4cfbd4190f57/go.mod h1:j9x/tPzZkyxcgEFkiKEEGxfvyumM01BEtsW8xzOahRQ=
|
||||
google.golang.org/grpc v1.79.3 h1:sybAEdRIEtvcD68Gx7dmnwjZKlyfuc61Dyo9pGXXkKE=
|
||||
google.golang.org/grpc v1.79.3/go.mod h1:KmT0Kjez+0dde/v2j9vzwoAScgEPx/Bw1CYChhHLrHQ=
|
||||
gonum.org/v1/gonum v0.17.0 h1:VbpOemQlsSMrYmn7T2OUvQ4dqxQXU+ouZFQsZOx50z4=
|
||||
gonum.org/v1/gonum v0.17.0/go.mod h1:El3tOrEuMpv2UdMrbNlKEh9vd86bmQ6vqIcDwxEOc1E=
|
||||
google.golang.org/genproto/googleapis/api v0.0.0-20260401024825-9d38bb4040a9 h1:VPWxll4HlMw1Vs/qXtN7BvhZqsS9cdAittCNvVENElA=
|
||||
google.golang.org/genproto/googleapis/api v0.0.0-20260401024825-9d38bb4040a9/go.mod h1:7QBABkRtR8z+TEnmXTqIqwJLlzrZKVfAUm7tY3yGv0M=
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20260401024825-9d38bb4040a9 h1:m8qni9SQFH0tJc1X0vmnpw/0t+AImlSvp30sEupozUg=
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20260401024825-9d38bb4040a9/go.mod h1:4Hqkh8ycfw05ld/3BWL7rJOSfebL2Q+DVDeRgYgxUU8=
|
||||
google.golang.org/grpc v1.80.0 h1:Xr6m2WmWZLETvUNvIUmeD5OAagMw3FiKmMlTdViWsHM=
|
||||
google.golang.org/grpc v1.80.0/go.mod h1:ho/dLnxwi3EDJA4Zghp7k2Ec1+c2jqup0bFkw07bwF4=
|
||||
google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE=
|
||||
google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco=
|
||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||
|
||||
@@ -7,7 +7,9 @@ package metrics
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"sync"
|
||||
|
||||
"github.com/fosrl/gerbil/internal/observability"
|
||||
)
|
||||
@@ -24,6 +26,7 @@ type OTelConfig = observability.OTelConfig
|
||||
|
||||
var (
|
||||
backend observability.Backend
|
||||
initMu sync.Mutex
|
||||
|
||||
// Interface and peer metrics
|
||||
wgInterfaceUp observability.Int64Gauge
|
||||
@@ -55,7 +58,6 @@ var (
|
||||
udpPacketSizeBytes observability.Histogram
|
||||
holePunchEventsTotal observability.Counter
|
||||
proxyMappingActive observability.UpDownCounter
|
||||
sessionActive observability.UpDownCounter
|
||||
sessionRebuiltTotal observability.Counter
|
||||
commPatternActive observability.UpDownCounter
|
||||
proxyCleanupRemovedTotal observability.Counter
|
||||
@@ -107,6 +109,13 @@ func DefaultConfig() Config {
|
||||
// Initialize sets up the metrics system using the selected backend.
|
||||
// It returns the /metrics HTTP handler (non-nil only for Prometheus backend).
|
||||
func Initialize(cfg Config) (http.Handler, error) {
|
||||
initMu.Lock()
|
||||
defer initMu.Unlock()
|
||||
|
||||
if backend != nil {
|
||||
return backend.HTTPHandler(), nil
|
||||
}
|
||||
|
||||
b, err := observability.New(cfg)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -114,6 +123,7 @@ func Initialize(cfg Config) (http.Handler, error) {
|
||||
backend = b
|
||||
|
||||
if err := createInstruments(); err != nil {
|
||||
backend = nil
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -122,8 +132,13 @@ func Initialize(cfg Config) (http.Handler, error) {
|
||||
|
||||
// Shutdown gracefully shuts down the metrics backend.
|
||||
func Shutdown(ctx context.Context) error {
|
||||
if backend != nil {
|
||||
return backend.Shutdown(ctx)
|
||||
initMu.Lock()
|
||||
b := backend
|
||||
backend = nil
|
||||
initMu.Unlock()
|
||||
|
||||
if b != nil {
|
||||
return b.Shutdown(ctx)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -135,129 +150,346 @@ func createInstruments() error {
|
||||
|
||||
b := backend
|
||||
|
||||
wgInterfaceUp = b.NewInt64Gauge("gerbil_wg_interface_up",
|
||||
newCounter := func(name, desc string, labelNames ...string) (observability.Counter, error) {
|
||||
c, err := b.NewCounter(name, desc, labelNames...)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("create counter %q: %w", name, err)
|
||||
}
|
||||
return c, nil
|
||||
}
|
||||
|
||||
newUpDownCounter := func(name, desc string, labelNames ...string) (observability.UpDownCounter, error) {
|
||||
c, err := b.NewUpDownCounter(name, desc, labelNames...)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("create updown counter %q: %w", name, err)
|
||||
}
|
||||
return c, nil
|
||||
}
|
||||
|
||||
newInt64Gauge := func(name, desc string, labelNames ...string) (observability.Int64Gauge, error) {
|
||||
g, err := b.NewInt64Gauge(name, desc, labelNames...)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("create int64 gauge %q: %w", name, err)
|
||||
}
|
||||
return g, nil
|
||||
}
|
||||
|
||||
newFloat64Gauge := func(name, desc string, labelNames ...string) (observability.Float64Gauge, error) {
|
||||
g, err := b.NewFloat64Gauge(name, desc, labelNames...)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("create float64 gauge %q: %w", name, err)
|
||||
}
|
||||
return g, nil
|
||||
}
|
||||
|
||||
newHistogram := func(name, desc string, buckets []float64, labelNames ...string) (observability.Histogram, error) {
|
||||
h, err := b.NewHistogram(name, desc, buckets, labelNames...)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("create histogram %q: %w", name, err)
|
||||
}
|
||||
return h, nil
|
||||
}
|
||||
|
||||
var err error
|
||||
|
||||
wgInterfaceUp, err = newInt64Gauge("gerbil_wg_interface_up",
|
||||
"Operational state of a WireGuard interface (1=up, 0=down)", "ifname", "instance")
|
||||
wgPeersTotal = b.NewUpDownCounter("gerbil_wg_peers_total",
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
wgPeersTotal, err = newUpDownCounter("gerbil_wg_peers_total",
|
||||
"Total number of configured peers per interface", "ifname")
|
||||
wgPeerConnected = b.NewInt64Gauge("gerbil_wg_peer_connected",
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
wgPeerConnected, err = newInt64Gauge("gerbil_wg_peer_connected",
|
||||
"Whether a specific peer is connected (1=connected, 0=disconnected)", "ifname", "peer")
|
||||
allowedIPsCount = b.NewUpDownCounter("gerbil_allowed_ips_count",
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
allowedIPsCount, err = newUpDownCounter("gerbil_allowed_ips_count",
|
||||
"Number of allowed IPs configured per peer", "ifname", "peer")
|
||||
keyRotationTotal = b.NewCounter("gerbil_key_rotation_total",
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
keyRotationTotal, err = newCounter("gerbil_key_rotation_total",
|
||||
"Key rotation events", "ifname", "reason")
|
||||
wgHandshakesTotal = b.NewCounter("gerbil_wg_handshakes_total",
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
wgHandshakesTotal, err = newCounter("gerbil_wg_handshakes_total",
|
||||
"Count of handshake attempts with their result status", "ifname", "peer", "result")
|
||||
wgHandshakeLatency = b.NewHistogram("gerbil_wg_handshake_latency_seconds",
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
wgHandshakeLatency, err = newHistogram("gerbil_wg_handshake_latency_seconds",
|
||||
"Distribution of handshake latencies in seconds", durationBuckets, "ifname", "peer")
|
||||
wgPeerRTT = b.NewHistogram("gerbil_wg_peer_rtt_seconds",
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
wgPeerRTT, err = newHistogram("gerbil_wg_peer_rtt_seconds",
|
||||
"Observed round-trip time to a peer in seconds", durationBuckets, "ifname", "peer")
|
||||
wgBytesReceived = b.NewCounter("gerbil_wg_bytes_received_total",
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
wgBytesReceived, err = newCounter("gerbil_wg_bytes_received_total",
|
||||
"Number of bytes received from a peer", "ifname", "peer")
|
||||
wgBytesTransmitted = b.NewCounter("gerbil_wg_bytes_transmitted_total",
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
wgBytesTransmitted, err = newCounter("gerbil_wg_bytes_transmitted_total",
|
||||
"Number of bytes transmitted to a peer", "ifname", "peer")
|
||||
netlinkEventsTotal = b.NewCounter("gerbil_netlink_events_total",
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
netlinkEventsTotal, err = newCounter("gerbil_netlink_events_total",
|
||||
"Number of netlink events processed", "event_type")
|
||||
netlinkErrorsTotal = b.NewCounter("gerbil_netlink_errors_total",
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
netlinkErrorsTotal, err = newCounter("gerbil_netlink_errors_total",
|
||||
"Count of netlink or kernel errors", "component", "error_type")
|
||||
syncDuration = b.NewHistogram("gerbil_sync_duration_seconds",
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
syncDuration, err = newHistogram("gerbil_sync_duration_seconds",
|
||||
"Duration of reconciliation/sync loops in seconds", durationBuckets, "component")
|
||||
workqueueDepth = b.NewUpDownCounter("gerbil_workqueue_depth",
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
workqueueDepth, err = newUpDownCounter("gerbil_workqueue_depth",
|
||||
"Current length of internal work queues", "queue")
|
||||
kernelModuleLoads = b.NewCounter("gerbil_kernel_module_loads_total",
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
kernelModuleLoads, err = newCounter("gerbil_kernel_module_loads_total",
|
||||
"Count of kernel module load attempts", "result")
|
||||
firewallRulesApplied = b.NewCounter("gerbil_firewall_rules_applied_total",
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
firewallRulesApplied, err = newCounter("gerbil_firewall_rules_applied_total",
|
||||
"IPTables/NFT rules applied", "result", "chain")
|
||||
activeSessions = b.NewUpDownCounter("gerbil_active_sessions",
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
activeSessions, err = newUpDownCounter("gerbil_active_sessions",
|
||||
"Number of active UDP relay sessions", "ifname")
|
||||
activeProxyConnections = b.NewUpDownCounter("gerbil_active_proxy_connections",
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
activeProxyConnections, err = newUpDownCounter("gerbil_active_proxy_connections",
|
||||
"Active SNI proxy connections")
|
||||
proxyRouteLookups = b.NewCounter("gerbil_proxy_route_lookups_total",
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
proxyRouteLookups, err = newCounter("gerbil_proxy_route_lookups_total",
|
||||
"Number of route lookups", "result")
|
||||
proxyTLSHandshake = b.NewHistogram("gerbil_proxy_tls_handshake_seconds",
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
proxyTLSHandshake, err = newHistogram("gerbil_proxy_tls_handshake_seconds",
|
||||
"TLS handshake duration for SNI proxy in seconds", durationBuckets)
|
||||
proxyBytesTransmitted = b.NewCounter("gerbil_proxy_bytes_transmitted_total",
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
proxyBytesTransmitted, err = newCounter("gerbil_proxy_bytes_transmitted_total",
|
||||
"Bytes sent/received by the SNI proxy", "direction")
|
||||
configReloadsTotal = b.NewCounter("gerbil_config_reloads_total",
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
configReloadsTotal, err = newCounter("gerbil_config_reloads_total",
|
||||
"Number of configuration reloads", "result")
|
||||
restartTotal = b.NewCounter("gerbil_restart_total",
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
restartTotal, err = newCounter("gerbil_restart_total",
|
||||
"Process restart count")
|
||||
authFailuresTotal = b.NewCounter("gerbil_auth_failures_total",
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
authFailuresTotal, err = newCounter("gerbil_auth_failures_total",
|
||||
"Count of authentication or peer validation failures", "peer", "reason")
|
||||
aclDeniedTotal = b.NewCounter("gerbil_acl_denied_total",
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
aclDeniedTotal, err = newCounter("gerbil_acl_denied_total",
|
||||
"Access control denied events", "ifname", "peer", "policy")
|
||||
certificateExpiryDays = b.NewFloat64Gauge("gerbil_certificate_expiry_days",
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
certificateExpiryDays, err = newFloat64Gauge("gerbil_certificate_expiry_days",
|
||||
"Days until certificate expiry", "cert_name", "ifname")
|
||||
udpPacketsTotal = b.NewCounter("gerbil_udp_packets_total",
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
udpPacketsTotal, err = newCounter("gerbil_udp_packets_total",
|
||||
"Count of UDP packets processed by relay workers", "ifname", "type", "direction")
|
||||
udpPacketSizeBytes = b.NewHistogram("gerbil_udp_packet_size_bytes",
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
udpPacketSizeBytes, err = newHistogram("gerbil_udp_packet_size_bytes",
|
||||
"Size distribution of packets forwarded through relay", sizeBuckets, "ifname", "type")
|
||||
holePunchEventsTotal = b.NewCounter("gerbil_hole_punch_events_total",
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
holePunchEventsTotal, err = newCounter("gerbil_hole_punch_events_total",
|
||||
"Count of hole punch messages processed", "ifname", "result")
|
||||
proxyMappingActive = b.NewUpDownCounter("gerbil_proxy_mapping_active",
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
proxyMappingActive, err = newUpDownCounter("gerbil_proxy_mapping_active",
|
||||
"Number of active proxy mappings", "ifname")
|
||||
sessionActive = b.NewUpDownCounter("gerbil_session_active",
|
||||
"Number of active WireGuard sessions", "ifname")
|
||||
sessionRebuiltTotal = b.NewCounter("gerbil_session_rebuilt_total",
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
sessionRebuiltTotal, err = newCounter("gerbil_session_rebuilt_total",
|
||||
"Count of sessions rebuilt from communication patterns", "ifname")
|
||||
commPatternActive = b.NewUpDownCounter("gerbil_comm_pattern_active",
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
commPatternActive, err = newUpDownCounter("gerbil_comm_pattern_active",
|
||||
"Number of active communication patterns", "ifname")
|
||||
proxyCleanupRemovedTotal = b.NewCounter("gerbil_proxy_cleanup_removed_total",
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
proxyCleanupRemovedTotal, err = newCounter("gerbil_proxy_cleanup_removed_total",
|
||||
"Count of items removed during cleanup routines", "ifname", "component")
|
||||
proxyConnectionErrorsTotal = b.NewCounter("gerbil_proxy_connection_errors_total",
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
proxyConnectionErrorsTotal, err = newCounter("gerbil_proxy_connection_errors_total",
|
||||
"Count of connection errors in proxy operations", "ifname", "error_type")
|
||||
proxyInitialMappingsTotal = b.NewInt64Gauge("gerbil_proxy_initial_mappings",
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
proxyInitialMappingsTotal, err = newInt64Gauge("gerbil_proxy_initial_mappings",
|
||||
"Number of initial proxy mappings loaded", "ifname")
|
||||
proxyMappingUpdatesTotal = b.NewCounter("gerbil_proxy_mapping_updates_total",
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
proxyMappingUpdatesTotal, err = newCounter("gerbil_proxy_mapping_updates_total",
|
||||
"Count of proxy mapping updates", "ifname")
|
||||
proxyIdleCleanupDuration = b.NewHistogram("gerbil_proxy_idle_cleanup_duration_seconds",
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
proxyIdleCleanupDuration, err = newHistogram("gerbil_proxy_idle_cleanup_duration_seconds",
|
||||
"Duration of cleanup cycles", durationBuckets, "ifname", "component")
|
||||
sniConnectionsTotal = b.NewCounter("gerbil_sni_connections_total",
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
sniConnectionsTotal, err = newCounter("gerbil_sni_connections_total",
|
||||
"Count of connections processed by SNI proxy", "result")
|
||||
sniConnectionDuration = b.NewHistogram("gerbil_sni_connection_duration_seconds",
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
sniConnectionDuration, err = newHistogram("gerbil_sni_connection_duration_seconds",
|
||||
"Lifetime distribution of proxied TLS connections", sniDurationBuckets)
|
||||
sniActiveConnections = b.NewUpDownCounter("gerbil_sni_active_connections",
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
sniActiveConnections, err = newUpDownCounter("gerbil_sni_active_connections",
|
||||
"Number of active SNI tunnels")
|
||||
sniRouteCacheHitsTotal = b.NewCounter("gerbil_sni_route_cache_hits_total",
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
sniRouteCacheHitsTotal, err = newCounter("gerbil_sni_route_cache_hits_total",
|
||||
"Count of route cache hits and misses", "result")
|
||||
sniRouteAPIRequestsTotal = b.NewCounter("gerbil_sni_route_api_requests_total",
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
sniRouteAPIRequestsTotal, err = newCounter("gerbil_sni_route_api_requests_total",
|
||||
"Count of route API requests", "result")
|
||||
sniRouteAPILatency = b.NewHistogram("gerbil_sni_route_api_latency_seconds",
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
sniRouteAPILatency, err = newHistogram("gerbil_sni_route_api_latency_seconds",
|
||||
"Distribution of route API call latencies", durationBuckets)
|
||||
sniLocalOverrideTotal = b.NewCounter("gerbil_sni_local_override_total",
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
sniLocalOverrideTotal, err = newCounter("gerbil_sni_local_override_total",
|
||||
"Count of routes using local overrides", "hit")
|
||||
sniTrustedProxyEventsTotal = b.NewCounter("gerbil_sni_trusted_proxy_events_total",
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
sniTrustedProxyEventsTotal, err = newCounter("gerbil_sni_trusted_proxy_events_total",
|
||||
"Count of PROXY protocol events", "event")
|
||||
sniProxyProtocolParseErrorsTotal = b.NewCounter("gerbil_sni_proxy_protocol_parse_errors_total",
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
sniProxyProtocolParseErrorsTotal, err = newCounter("gerbil_sni_proxy_protocol_parse_errors_total",
|
||||
"Count of PROXY protocol parse failures")
|
||||
sniDataBytesTotal = b.NewCounter("gerbil_sni_data_bytes_total",
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
sniDataBytesTotal, err = newCounter("gerbil_sni_data_bytes_total",
|
||||
"Count of bytes proxied through SNI tunnels", "direction")
|
||||
sniTunnelTerminationsTotal = b.NewCounter("gerbil_sni_tunnel_terminations_total",
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
sniTunnelTerminationsTotal, err = newCounter("gerbil_sni_tunnel_terminations_total",
|
||||
"Count of tunnel terminations by reason", "reason")
|
||||
httpRequestsTotal = b.NewCounter("gerbil_http_requests_total",
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
httpRequestsTotal, err = newCounter("gerbil_http_requests_total",
|
||||
"Count of HTTP requests to management API", "endpoint", "method", "status_code")
|
||||
httpRequestDuration = b.NewHistogram("gerbil_http_request_duration_seconds",
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
httpRequestDuration, err = newHistogram("gerbil_http_request_duration_seconds",
|
||||
"Distribution of HTTP request handling time", durationBuckets, "endpoint", "method")
|
||||
peerOperationsTotal = b.NewCounter("gerbil_peer_operations_total",
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
peerOperationsTotal, err = newCounter("gerbil_peer_operations_total",
|
||||
"Count of peer lifecycle operations", "operation", "result")
|
||||
proxyMappingUpdateRequestsTotal = b.NewCounter("gerbil_proxy_mapping_update_requests_total",
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
proxyMappingUpdateRequestsTotal, err = newCounter("gerbil_proxy_mapping_update_requests_total",
|
||||
"Count of proxy mapping update API calls", "result")
|
||||
destinationsUpdateRequestsTotal = b.NewCounter("gerbil_destinations_update_requests_total",
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
destinationsUpdateRequestsTotal, err = newCounter("gerbil_destinations_update_requests_total",
|
||||
"Count of destinations update API calls", "result")
|
||||
remoteConfigFetchesTotal = b.NewCounter("gerbil_remote_config_fetches_total",
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
remoteConfigFetchesTotal, err = newCounter("gerbil_remote_config_fetches_total",
|
||||
"Count of remote configuration fetch attempts", "result")
|
||||
bandwidthReportsTotal = b.NewCounter("gerbil_bandwidth_reports_total",
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
bandwidthReportsTotal, err = newCounter("gerbil_bandwidth_reports_total",
|
||||
"Count of bandwidth report transmissions", "result")
|
||||
peerBandwidthBytesTotal = b.NewCounter("gerbil_peer_bandwidth_bytes_total",
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
peerBandwidthBytesTotal, err = newCounter("gerbil_peer_bandwidth_bytes_total",
|
||||
"Bytes per peer tracked by bandwidth calculation", "peer", "direction")
|
||||
memorySpikeTotal = b.NewCounter("gerbil_memory_spike_total",
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
memorySpikeTotal, err = newCounter("gerbil_memory_spike_total",
|
||||
"Count of memory spikes detected", "severity")
|
||||
heapProfilesWrittenTotal = b.NewCounter("gerbil_heap_profiles_written_total",
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
heapProfilesWrittenTotal, err = newCounter("gerbil_heap_profiles_written_total",
|
||||
"Count of heap profile files generated")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func RecordInterfaceUp(ifname, instance string, up bool) {
|
||||
if wgInterfaceUp == nil {
|
||||
return
|
||||
}
|
||||
value := int64(0)
|
||||
if up {
|
||||
value = 1
|
||||
@@ -266,10 +498,16 @@ func RecordInterfaceUp(ifname, instance string, up bool) {
|
||||
}
|
||||
|
||||
func RecordPeersTotal(ifname string, delta int64) {
|
||||
if wgPeersTotal == nil {
|
||||
return
|
||||
}
|
||||
wgPeersTotal.Add(context.Background(), delta, observability.Labels{"ifname": ifname})
|
||||
}
|
||||
|
||||
func RecordPeerConnected(ifname, peer string, connected bool) {
|
||||
if wgPeerConnected == nil {
|
||||
return
|
||||
}
|
||||
value := int64(0)
|
||||
if connected {
|
||||
value = 1
|
||||
@@ -278,229 +516,393 @@ func RecordPeerConnected(ifname, peer string, connected bool) {
|
||||
}
|
||||
|
||||
func RecordHandshake(ifname, peer, result string) {
|
||||
if wgHandshakesTotal == nil {
|
||||
return
|
||||
}
|
||||
wgHandshakesTotal.Add(context.Background(), 1, observability.Labels{"ifname": ifname, "peer": peer, "result": result})
|
||||
}
|
||||
|
||||
func RecordHandshakeLatency(ifname, peer string, seconds float64) {
|
||||
if wgHandshakeLatency == nil {
|
||||
return
|
||||
}
|
||||
wgHandshakeLatency.Record(context.Background(), seconds, observability.Labels{"ifname": ifname, "peer": peer})
|
||||
}
|
||||
|
||||
func RecordPeerRTT(ifname, peer string, seconds float64) {
|
||||
if wgPeerRTT == nil {
|
||||
return
|
||||
}
|
||||
wgPeerRTT.Record(context.Background(), seconds, observability.Labels{"ifname": ifname, "peer": peer})
|
||||
}
|
||||
|
||||
func RecordBytesReceived(ifname, peer string, bytes int64) {
|
||||
if wgBytesReceived == nil {
|
||||
return
|
||||
}
|
||||
wgBytesReceived.Add(context.Background(), bytes, observability.Labels{"ifname": ifname, "peer": peer})
|
||||
}
|
||||
|
||||
func RecordBytesTransmitted(ifname, peer string, bytes int64) {
|
||||
if wgBytesTransmitted == nil {
|
||||
return
|
||||
}
|
||||
wgBytesTransmitted.Add(context.Background(), bytes, observability.Labels{"ifname": ifname, "peer": peer})
|
||||
}
|
||||
|
||||
func RecordAllowedIPsCount(ifname, peer string, delta int64) {
|
||||
if allowedIPsCount == nil {
|
||||
return
|
||||
}
|
||||
allowedIPsCount.Add(context.Background(), delta, observability.Labels{"ifname": ifname, "peer": peer})
|
||||
}
|
||||
|
||||
func RecordKeyRotation(ifname, reason string) {
|
||||
if keyRotationTotal == nil {
|
||||
return
|
||||
}
|
||||
keyRotationTotal.Add(context.Background(), 1, observability.Labels{"ifname": ifname, "reason": reason})
|
||||
}
|
||||
|
||||
func RecordNetlinkEvent(eventType string) {
|
||||
if netlinkEventsTotal == nil {
|
||||
return
|
||||
}
|
||||
netlinkEventsTotal.Add(context.Background(), 1, observability.Labels{"event_type": eventType})
|
||||
}
|
||||
|
||||
func RecordNetlinkError(component, errorType string) {
|
||||
if netlinkErrorsTotal == nil {
|
||||
return
|
||||
}
|
||||
netlinkErrorsTotal.Add(context.Background(), 1, observability.Labels{"component": component, "error_type": errorType})
|
||||
}
|
||||
|
||||
func RecordSyncDuration(component string, seconds float64) {
|
||||
if syncDuration == nil {
|
||||
return
|
||||
}
|
||||
syncDuration.Record(context.Background(), seconds, observability.Labels{"component": component})
|
||||
}
|
||||
|
||||
func RecordWorkqueueDepth(queue string, delta int64) {
|
||||
if workqueueDepth == nil {
|
||||
return
|
||||
}
|
||||
workqueueDepth.Add(context.Background(), delta, observability.Labels{"queue": queue})
|
||||
}
|
||||
|
||||
func RecordKernelModuleLoad(result string) {
|
||||
if kernelModuleLoads == nil {
|
||||
return
|
||||
}
|
||||
kernelModuleLoads.Add(context.Background(), 1, observability.Labels{"result": result})
|
||||
}
|
||||
|
||||
func RecordFirewallRuleApplied(result, chain string) {
|
||||
if firewallRulesApplied == nil {
|
||||
return
|
||||
}
|
||||
firewallRulesApplied.Add(context.Background(), 1, observability.Labels{"result": result, "chain": chain})
|
||||
}
|
||||
|
||||
func RecordActiveSession(ifname string, delta int64) {
|
||||
if activeSessions == nil {
|
||||
return
|
||||
}
|
||||
activeSessions.Add(context.Background(), delta, observability.Labels{"ifname": ifname})
|
||||
}
|
||||
|
||||
func RecordActiveProxyConnection(hostname string, delta int64) {
|
||||
_ = hostname
|
||||
func RecordActiveProxyConnection(delta int64) {
|
||||
if activeProxyConnections == nil {
|
||||
return
|
||||
}
|
||||
activeProxyConnections.Add(context.Background(), delta, nil)
|
||||
}
|
||||
|
||||
func RecordProxyRouteLookup(result, hostname string) {
|
||||
_ = hostname
|
||||
func RecordProxyRouteLookup(result string) {
|
||||
if proxyRouteLookups == nil {
|
||||
return
|
||||
}
|
||||
proxyRouteLookups.Add(context.Background(), 1, observability.Labels{"result": result})
|
||||
}
|
||||
|
||||
func RecordProxyTLSHandshake(hostname string, seconds float64) {
|
||||
_ = hostname
|
||||
func RecordProxyTLSHandshake(seconds float64) {
|
||||
if proxyTLSHandshake == nil {
|
||||
return
|
||||
}
|
||||
proxyTLSHandshake.Record(context.Background(), seconds, nil)
|
||||
}
|
||||
|
||||
func RecordProxyBytesTransmitted(hostname, direction string, bytes int64) {
|
||||
_ = hostname
|
||||
func RecordProxyBytesTransmitted(direction string, bytes int64) {
|
||||
if proxyBytesTransmitted == nil {
|
||||
return
|
||||
}
|
||||
proxyBytesTransmitted.Add(context.Background(), bytes, observability.Labels{"direction": direction})
|
||||
}
|
||||
|
||||
func RecordConfigReload(result string) {
|
||||
if configReloadsTotal == nil {
|
||||
return
|
||||
}
|
||||
configReloadsTotal.Add(context.Background(), 1, observability.Labels{"result": result})
|
||||
}
|
||||
|
||||
func RecordRestart() {
|
||||
if restartTotal == nil {
|
||||
return
|
||||
}
|
||||
restartTotal.Add(context.Background(), 1, nil)
|
||||
}
|
||||
|
||||
func RecordAuthFailure(peer, reason string) {
|
||||
if authFailuresTotal == nil {
|
||||
return
|
||||
}
|
||||
authFailuresTotal.Add(context.Background(), 1, observability.Labels{"peer": peer, "reason": reason})
|
||||
}
|
||||
|
||||
func RecordACLDenied(ifname, peer, policy string) {
|
||||
if aclDeniedTotal == nil {
|
||||
return
|
||||
}
|
||||
aclDeniedTotal.Add(context.Background(), 1, observability.Labels{"ifname": ifname, "peer": peer, "policy": policy})
|
||||
}
|
||||
|
||||
func RecordCertificateExpiry(certName, ifname string, days float64) {
|
||||
if certificateExpiryDays == nil {
|
||||
return
|
||||
}
|
||||
certificateExpiryDays.Record(context.Background(), days, observability.Labels{"cert_name": certName, "ifname": ifname})
|
||||
}
|
||||
|
||||
func RecordUDPPacket(ifname, packetType, direction string) {
|
||||
if udpPacketsTotal == nil {
|
||||
return
|
||||
}
|
||||
udpPacketsTotal.Add(context.Background(), 1, observability.Labels{"ifname": ifname, "type": packetType, "direction": direction})
|
||||
}
|
||||
|
||||
func RecordUDPPacketSize(ifname, packetType string, bytes float64) {
|
||||
if udpPacketSizeBytes == nil {
|
||||
return
|
||||
}
|
||||
udpPacketSizeBytes.Record(context.Background(), bytes, observability.Labels{"ifname": ifname, "type": packetType})
|
||||
}
|
||||
|
||||
func RecordHolePunchEvent(ifname, result string) {
|
||||
if holePunchEventsTotal == nil {
|
||||
return
|
||||
}
|
||||
holePunchEventsTotal.Add(context.Background(), 1, observability.Labels{"ifname": ifname, "result": result})
|
||||
}
|
||||
|
||||
func RecordProxyMapping(ifname string, delta int64) {
|
||||
if proxyMappingActive == nil {
|
||||
return
|
||||
}
|
||||
proxyMappingActive.Add(context.Background(), delta, observability.Labels{"ifname": ifname})
|
||||
}
|
||||
|
||||
func RecordSession(ifname string, delta int64) {
|
||||
sessionActive.Add(context.Background(), delta, observability.Labels{"ifname": ifname})
|
||||
if activeSessions == nil {
|
||||
return
|
||||
}
|
||||
activeSessions.Add(context.Background(), delta, observability.Labels{"ifname": ifname})
|
||||
}
|
||||
|
||||
func RecordSessionRebuilt(ifname string) {
|
||||
if sessionRebuiltTotal == nil {
|
||||
return
|
||||
}
|
||||
sessionRebuiltTotal.Add(context.Background(), 1, observability.Labels{"ifname": ifname})
|
||||
}
|
||||
|
||||
func RecordCommPattern(ifname string, delta int64) {
|
||||
if commPatternActive == nil {
|
||||
return
|
||||
}
|
||||
commPatternActive.Add(context.Background(), delta, observability.Labels{"ifname": ifname})
|
||||
}
|
||||
|
||||
func RecordProxyCleanupRemoved(ifname, component string, count int64) {
|
||||
if proxyCleanupRemovedTotal == nil {
|
||||
return
|
||||
}
|
||||
proxyCleanupRemovedTotal.Add(context.Background(), count, observability.Labels{"ifname": ifname, "component": component})
|
||||
}
|
||||
|
||||
func RecordProxyConnectionError(ifname, errorType string) {
|
||||
if proxyConnectionErrorsTotal == nil {
|
||||
return
|
||||
}
|
||||
proxyConnectionErrorsTotal.Add(context.Background(), 1, observability.Labels{"ifname": ifname, "error_type": errorType})
|
||||
}
|
||||
|
||||
func RecordProxyInitialMappings(ifname string, count int64) {
|
||||
if proxyInitialMappingsTotal == nil {
|
||||
return
|
||||
}
|
||||
proxyInitialMappingsTotal.Record(context.Background(), count, observability.Labels{"ifname": ifname})
|
||||
}
|
||||
|
||||
func RecordProxyMappingUpdate(ifname string) {
|
||||
if proxyMappingUpdatesTotal == nil {
|
||||
return
|
||||
}
|
||||
proxyMappingUpdatesTotal.Add(context.Background(), 1, observability.Labels{"ifname": ifname})
|
||||
}
|
||||
|
||||
func RecordProxyIdleCleanupDuration(ifname, component string, seconds float64) {
|
||||
if proxyIdleCleanupDuration == nil {
|
||||
return
|
||||
}
|
||||
proxyIdleCleanupDuration.Record(context.Background(), seconds, observability.Labels{"ifname": ifname, "component": component})
|
||||
}
|
||||
|
||||
func RecordSNIConnection(result string) {
|
||||
if sniConnectionsTotal == nil {
|
||||
return
|
||||
}
|
||||
sniConnectionsTotal.Add(context.Background(), 1, observability.Labels{"result": result})
|
||||
}
|
||||
|
||||
func RecordSNIConnectionDuration(seconds float64) {
|
||||
if sniConnectionDuration == nil {
|
||||
return
|
||||
}
|
||||
sniConnectionDuration.Record(context.Background(), seconds, nil)
|
||||
}
|
||||
|
||||
func RecordSNIActiveConnection(delta int64) {
|
||||
if sniActiveConnections == nil {
|
||||
return
|
||||
}
|
||||
sniActiveConnections.Add(context.Background(), delta, nil)
|
||||
}
|
||||
|
||||
func RecordSNIRouteCacheHit(result string) {
|
||||
if sniRouteCacheHitsTotal == nil {
|
||||
return
|
||||
}
|
||||
sniRouteCacheHitsTotal.Add(context.Background(), 1, observability.Labels{"result": result})
|
||||
}
|
||||
|
||||
func RecordSNIRouteAPIRequest(result string) {
|
||||
if sniRouteAPIRequestsTotal == nil {
|
||||
return
|
||||
}
|
||||
sniRouteAPIRequestsTotal.Add(context.Background(), 1, observability.Labels{"result": result})
|
||||
}
|
||||
|
||||
func RecordSNIRouteAPILatency(seconds float64) {
|
||||
if sniRouteAPILatency == nil {
|
||||
return
|
||||
}
|
||||
sniRouteAPILatency.Record(context.Background(), seconds, nil)
|
||||
}
|
||||
|
||||
func RecordSNILocalOverride(hit string) {
|
||||
if sniLocalOverrideTotal == nil {
|
||||
return
|
||||
}
|
||||
sniLocalOverrideTotal.Add(context.Background(), 1, observability.Labels{"hit": hit})
|
||||
}
|
||||
|
||||
func RecordSNITrustedProxyEvent(event string) {
|
||||
if sniTrustedProxyEventsTotal == nil {
|
||||
return
|
||||
}
|
||||
sniTrustedProxyEventsTotal.Add(context.Background(), 1, observability.Labels{"event": event})
|
||||
}
|
||||
|
||||
func RecordSNIProxyProtocolParseError() {
|
||||
if sniProxyProtocolParseErrorsTotal == nil {
|
||||
return
|
||||
}
|
||||
sniProxyProtocolParseErrorsTotal.Add(context.Background(), 1, nil)
|
||||
}
|
||||
|
||||
func RecordSNIDataBytes(direction string, bytes int64) {
|
||||
if sniDataBytesTotal == nil {
|
||||
return
|
||||
}
|
||||
sniDataBytesTotal.Add(context.Background(), bytes, observability.Labels{"direction": direction})
|
||||
}
|
||||
|
||||
func RecordSNITunnelTermination(reason string) {
|
||||
if sniTunnelTerminationsTotal == nil {
|
||||
return
|
||||
}
|
||||
sniTunnelTerminationsTotal.Add(context.Background(), 1, observability.Labels{"reason": reason})
|
||||
}
|
||||
|
||||
func RecordHTTPRequest(endpoint, method, statusCode string) {
|
||||
if httpRequestsTotal == nil {
|
||||
return
|
||||
}
|
||||
httpRequestsTotal.Add(context.Background(), 1, observability.Labels{"endpoint": endpoint, "method": method, "status_code": statusCode})
|
||||
}
|
||||
|
||||
func RecordHTTPRequestDuration(endpoint, method string, seconds float64) {
|
||||
if httpRequestDuration == nil {
|
||||
return
|
||||
}
|
||||
httpRequestDuration.Record(context.Background(), seconds, observability.Labels{"endpoint": endpoint, "method": method})
|
||||
}
|
||||
|
||||
func RecordPeerOperation(operation, result string) {
|
||||
if peerOperationsTotal == nil {
|
||||
return
|
||||
}
|
||||
peerOperationsTotal.Add(context.Background(), 1, observability.Labels{"operation": operation, "result": result})
|
||||
}
|
||||
|
||||
func RecordProxyMappingUpdateRequest(result string) {
|
||||
if proxyMappingUpdateRequestsTotal == nil {
|
||||
return
|
||||
}
|
||||
proxyMappingUpdateRequestsTotal.Add(context.Background(), 1, observability.Labels{"result": result})
|
||||
}
|
||||
|
||||
func RecordDestinationsUpdateRequest(result string) {
|
||||
if destinationsUpdateRequestsTotal == nil {
|
||||
return
|
||||
}
|
||||
destinationsUpdateRequestsTotal.Add(context.Background(), 1, observability.Labels{"result": result})
|
||||
}
|
||||
|
||||
func RecordRemoteConfigFetch(result string) {
|
||||
if remoteConfigFetchesTotal == nil {
|
||||
return
|
||||
}
|
||||
remoteConfigFetchesTotal.Add(context.Background(), 1, observability.Labels{"result": result})
|
||||
}
|
||||
|
||||
func RecordBandwidthReport(result string) {
|
||||
if bandwidthReportsTotal == nil {
|
||||
return
|
||||
}
|
||||
bandwidthReportsTotal.Add(context.Background(), 1, observability.Labels{"result": result})
|
||||
}
|
||||
|
||||
func RecordPeerBandwidthBytes(peer, direction string, bytes int64) {
|
||||
if peerBandwidthBytesTotal == nil {
|
||||
return
|
||||
}
|
||||
peerBandwidthBytesTotal.Add(context.Background(), bytes, observability.Labels{"peer": peer, "direction": direction})
|
||||
}
|
||||
|
||||
func RecordMemorySpike(severity string) {
|
||||
if memorySpikeTotal == nil {
|
||||
return
|
||||
}
|
||||
memorySpikeTotal.Add(context.Background(), 1, observability.Labels{"severity": severity})
|
||||
}
|
||||
|
||||
func RecordHeapProfileWritten() {
|
||||
if heapProfilesWrittenTotal == nil {
|
||||
return
|
||||
}
|
||||
heapProfilesWrittenTotal.Add(context.Background(), 1, nil)
|
||||
}
|
||||
|
||||
@@ -89,6 +89,9 @@ func TestDefaultConfig(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestShutdownNoInit(t *testing.T) {
|
||||
// Ensure a known clean global state before testing no-init shutdown behavior.
|
||||
_ = metrics.Shutdown(context.Background())
|
||||
|
||||
// Shutdown without Initialize should not panic or error.
|
||||
if err := metrics.Shutdown(context.Background()); err != nil {
|
||||
t.Errorf("unexpected error: %v", err)
|
||||
@@ -168,6 +171,7 @@ func TestRecordRelay(t *testing.T) {
|
||||
body := scrape(t, h)
|
||||
assertContains(t, body, "gerbil_udp_packets_total")
|
||||
assertContains(t, body, "gerbil_proxy_mapping_active")
|
||||
assertContains(t, body, "gerbil_active_sessions")
|
||||
}
|
||||
|
||||
func TestRecordWireGuard(t *testing.T) {
|
||||
@@ -216,10 +220,10 @@ func TestRecordNetlink(t *testing.T) {
|
||||
metrics.RecordKernelModuleLoad("success")
|
||||
metrics.RecordFirewallRuleApplied("success", "INPUT")
|
||||
metrics.RecordActiveSession("wg0", 1)
|
||||
metrics.RecordActiveProxyConnection(exampleHostname, 1)
|
||||
metrics.RecordProxyRouteLookup("hit", exampleHostname)
|
||||
metrics.RecordProxyTLSHandshake(exampleHostname, 0.05)
|
||||
metrics.RecordProxyBytesTransmitted(exampleHostname, "tx", 1024)
|
||||
metrics.RecordActiveProxyConnection(1)
|
||||
metrics.RecordProxyRouteLookup("hit")
|
||||
metrics.RecordProxyTLSHandshake(0.05)
|
||||
metrics.RecordProxyBytesTransmitted("tx", 1024)
|
||||
body := scrape(t, h)
|
||||
assertContains(t, body, "gerbil_netlink_events_total")
|
||||
assertContains(t, body, "gerbil_active_sessions")
|
||||
|
||||
@@ -60,6 +60,10 @@ type OTelConfig struct {
|
||||
// ExportInterval is how often metrics are pushed to the collector.
|
||||
// Defaults to 60 s.
|
||||
ExportInterval time.Duration
|
||||
|
||||
// Timeout bounds OTLP exporter construction calls.
|
||||
// Defaults to 10 s.
|
||||
Timeout time.Duration
|
||||
}
|
||||
|
||||
// DefaultMetricsConfig returns a MetricsConfig with sensible defaults.
|
||||
@@ -75,6 +79,7 @@ func DefaultMetricsConfig() MetricsConfig {
|
||||
Endpoint: "localhost:4317",
|
||||
Insecure: true,
|
||||
ExportInterval: 60 * time.Second,
|
||||
Timeout: 10 * time.Second,
|
||||
},
|
||||
ServiceName: "gerbil",
|
||||
ServiceVersion: "1.0.0",
|
||||
@@ -88,8 +93,10 @@ func (c *MetricsConfig) Validate() error {
|
||||
}
|
||||
|
||||
switch c.Backend {
|
||||
case "prometheus", "none", "":
|
||||
case "prometheus", "none":
|
||||
// valid
|
||||
case "":
|
||||
return fmt.Errorf("metrics: enabled requires a non-empty backend")
|
||||
case "otel":
|
||||
if c.OTel.Endpoint == "" {
|
||||
return fmt.Errorf("metrics: backend=otel requires a non-empty OTel endpoint")
|
||||
@@ -100,6 +107,9 @@ func (c *MetricsConfig) Validate() error {
|
||||
if c.OTel.ExportInterval <= 0 {
|
||||
return fmt.Errorf("metrics: otel export interval must be positive")
|
||||
}
|
||||
if c.OTel.Timeout <= 0 {
|
||||
return fmt.Errorf("metrics: otel timeout must be positive")
|
||||
}
|
||||
default:
|
||||
return fmt.Errorf("metrics: unknown backend %q (must be \"prometheus\", \"otel\", or \"none\")", c.Backend)
|
||||
}
|
||||
|
||||
@@ -43,20 +43,20 @@ type Histogram interface {
|
||||
type Backend interface {
|
||||
// NewCounter creates a counter metric.
|
||||
// labelNames declares the set of label keys that will be passed at observation time.
|
||||
NewCounter(name, desc string, labelNames ...string) Counter
|
||||
NewCounter(name, desc string, labelNames ...string) (Counter, error)
|
||||
|
||||
// NewUpDownCounter creates an up-down counter metric.
|
||||
NewUpDownCounter(name, desc string, labelNames ...string) UpDownCounter
|
||||
NewUpDownCounter(name, desc string, labelNames ...string) (UpDownCounter, error)
|
||||
|
||||
// NewInt64Gauge creates an integer gauge metric.
|
||||
NewInt64Gauge(name, desc string, labelNames ...string) Int64Gauge
|
||||
NewInt64Gauge(name, desc string, labelNames ...string) (Int64Gauge, error)
|
||||
|
||||
// NewFloat64Gauge creates a float gauge metric.
|
||||
NewFloat64Gauge(name, desc string, labelNames ...string) Float64Gauge
|
||||
NewFloat64Gauge(name, desc string, labelNames ...string) (Float64Gauge, error)
|
||||
|
||||
// NewHistogram creates a histogram metric.
|
||||
// buckets are the explicit upper-bound bucket boundaries.
|
||||
NewHistogram(name, desc string, buckets []float64, labelNames ...string) Histogram
|
||||
NewHistogram(name, desc string, buckets []float64, labelNames ...string) (Histogram, error)
|
||||
|
||||
// HTTPHandler returns the /metrics HTTP handler.
|
||||
// Implementations that do not expose an HTTP endpoint return nil.
|
||||
@@ -88,6 +88,7 @@ func New(cfg MetricsConfig) (Backend, error) {
|
||||
Endpoint: cfg.OTel.Endpoint,
|
||||
Insecure: cfg.OTel.Insecure,
|
||||
ExportInterval: cfg.OTel.ExportInterval,
|
||||
Timeout: cfg.OTel.Timeout,
|
||||
ServiceName: cfg.ServiceName,
|
||||
ServiceVersion: cfg.ServiceVersion,
|
||||
DeploymentEnvironment: cfg.DeploymentEnvironment,
|
||||
@@ -110,19 +111,19 @@ type promAdapter struct {
|
||||
b *obsprom.Backend
|
||||
}
|
||||
|
||||
func (a *promAdapter) NewCounter(name, desc string, labelNames ...string) Counter {
|
||||
func (a *promAdapter) NewCounter(name, desc string, labelNames ...string) (Counter, error) {
|
||||
return a.b.NewCounter(name, desc, labelNames...)
|
||||
}
|
||||
func (a *promAdapter) NewUpDownCounter(name, desc string, labelNames ...string) UpDownCounter {
|
||||
func (a *promAdapter) NewUpDownCounter(name, desc string, labelNames ...string) (UpDownCounter, error) {
|
||||
return a.b.NewUpDownCounter(name, desc, labelNames...)
|
||||
}
|
||||
func (a *promAdapter) NewInt64Gauge(name, desc string, labelNames ...string) Int64Gauge {
|
||||
func (a *promAdapter) NewInt64Gauge(name, desc string, labelNames ...string) (Int64Gauge, error) {
|
||||
return a.b.NewInt64Gauge(name, desc, labelNames...)
|
||||
}
|
||||
func (a *promAdapter) NewFloat64Gauge(name, desc string, labelNames ...string) Float64Gauge {
|
||||
func (a *promAdapter) NewFloat64Gauge(name, desc string, labelNames ...string) (Float64Gauge, error) {
|
||||
return a.b.NewFloat64Gauge(name, desc, labelNames...)
|
||||
}
|
||||
func (a *promAdapter) NewHistogram(name, desc string, buckets []float64, labelNames ...string) Histogram {
|
||||
func (a *promAdapter) NewHistogram(name, desc string, buckets []float64, labelNames ...string) (Histogram, error) {
|
||||
return a.b.NewHistogram(name, desc, buckets, labelNames...)
|
||||
}
|
||||
func (a *promAdapter) HTTPHandler() http.Handler { return a.b.HTTPHandler() }
|
||||
@@ -133,19 +134,19 @@ type otelAdapter struct {
|
||||
b *obsotel.Backend
|
||||
}
|
||||
|
||||
func (a *otelAdapter) NewCounter(name, desc string, labelNames ...string) Counter {
|
||||
func (a *otelAdapter) NewCounter(name, desc string, labelNames ...string) (Counter, error) {
|
||||
return a.b.NewCounter(name, desc, labelNames...)
|
||||
}
|
||||
func (a *otelAdapter) NewUpDownCounter(name, desc string, labelNames ...string) UpDownCounter {
|
||||
func (a *otelAdapter) NewUpDownCounter(name, desc string, labelNames ...string) (UpDownCounter, error) {
|
||||
return a.b.NewUpDownCounter(name, desc, labelNames...)
|
||||
}
|
||||
func (a *otelAdapter) NewInt64Gauge(name, desc string, labelNames ...string) Int64Gauge {
|
||||
func (a *otelAdapter) NewInt64Gauge(name, desc string, labelNames ...string) (Int64Gauge, error) {
|
||||
return a.b.NewInt64Gauge(name, desc, labelNames...)
|
||||
}
|
||||
func (a *otelAdapter) NewFloat64Gauge(name, desc string, labelNames ...string) Float64Gauge {
|
||||
func (a *otelAdapter) NewFloat64Gauge(name, desc string, labelNames ...string) (Float64Gauge, error) {
|
||||
return a.b.NewFloat64Gauge(name, desc, labelNames...)
|
||||
}
|
||||
func (a *otelAdapter) NewHistogram(name, desc string, buckets []float64, labelNames ...string) Histogram {
|
||||
func (a *otelAdapter) NewHistogram(name, desc string, buckets []float64, labelNames ...string) (Histogram, error) {
|
||||
return a.b.NewHistogram(name, desc, buckets, labelNames...)
|
||||
}
|
||||
func (a *otelAdapter) HTTPHandler() http.Handler { return a.b.HTTPHandler() }
|
||||
|
||||
@@ -2,6 +2,8 @@ package observability_test
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net"
|
||||
"os"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
@@ -39,20 +41,19 @@ func TestValidateValidConfigs(t *testing.T) {
|
||||
}{
|
||||
{name: "disabled", cfg: observability.MetricsConfig{Enabled: false}},
|
||||
{name: "backend none", cfg: observability.MetricsConfig{Enabled: true, Backend: "none"}},
|
||||
{name: "backend empty", cfg: observability.MetricsConfig{Enabled: true, Backend: ""}},
|
||||
{name: "prometheus", cfg: observability.MetricsConfig{Enabled: true, Backend: "prometheus"}},
|
||||
{
|
||||
name: "otel grpc",
|
||||
cfg: observability.MetricsConfig{
|
||||
Enabled: true, Backend: "otel",
|
||||
OTel: observability.OTelConfig{Protocol: "grpc", Endpoint: otelGRPCEndpoint, ExportInterval: 10 * time.Second},
|
||||
OTel: observability.OTelConfig{Protocol: "grpc", Endpoint: otelGRPCEndpoint, ExportInterval: 10 * time.Second, Timeout: 2 * time.Second},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "otel http",
|
||||
cfg: observability.MetricsConfig{
|
||||
Enabled: true, Backend: "otel",
|
||||
OTel: observability.OTelConfig{Protocol: "http", Endpoint: "localhost:4318", ExportInterval: 30 * time.Second},
|
||||
OTel: observability.OTelConfig{Protocol: "http", Endpoint: "localhost:4318", ExportInterval: 30 * time.Second, Timeout: 2 * time.Second},
|
||||
},
|
||||
},
|
||||
}
|
||||
@@ -71,25 +72,36 @@ func TestValidateInvalidConfigs(t *testing.T) {
|
||||
cfg observability.MetricsConfig
|
||||
}{
|
||||
{name: "unknown backend", cfg: observability.MetricsConfig{Enabled: true, Backend: "datadog"}},
|
||||
{
|
||||
name: "backend empty while enabled",
|
||||
cfg: observability.MetricsConfig{Enabled: true, Backend: ""},
|
||||
},
|
||||
{
|
||||
name: "otel missing endpoint",
|
||||
cfg: observability.MetricsConfig{
|
||||
Enabled: true, Backend: "otel",
|
||||
OTel: observability.OTelConfig{Protocol: "grpc", Endpoint: "", ExportInterval: 10 * time.Second},
|
||||
OTel: observability.OTelConfig{Protocol: "grpc", Endpoint: "", ExportInterval: 10 * time.Second, Timeout: 2 * time.Second},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "otel invalid protocol",
|
||||
cfg: observability.MetricsConfig{
|
||||
Enabled: true, Backend: "otel",
|
||||
OTel: observability.OTelConfig{Protocol: "tcp", Endpoint: otelGRPCEndpoint, ExportInterval: 10 * time.Second},
|
||||
OTel: observability.OTelConfig{Protocol: "tcp", Endpoint: otelGRPCEndpoint, ExportInterval: 10 * time.Second, Timeout: 2 * time.Second},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "otel zero interval",
|
||||
cfg: observability.MetricsConfig{
|
||||
Enabled: true, Backend: "otel",
|
||||
OTel: observability.OTelConfig{Protocol: "grpc", Endpoint: otelGRPCEndpoint, ExportInterval: 0},
|
||||
OTel: observability.OTelConfig{Protocol: "grpc", Endpoint: otelGRPCEndpoint, ExportInterval: 0, Timeout: 2 * time.Second},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "otel zero timeout",
|
||||
cfg: observability.MetricsConfig{
|
||||
Enabled: true, Backend: "otel",
|
||||
OTel: observability.OTelConfig{Protocol: "grpc", Endpoint: otelGRPCEndpoint, ExportInterval: 10 * time.Second, Timeout: 0},
|
||||
},
|
||||
},
|
||||
}
|
||||
@@ -157,11 +169,32 @@ func TestPrometheusAdapterAllInstruments(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
labels := observability.Labels{"k": "v"}
|
||||
|
||||
b.NewCounter("prom_adapter_counter_total", "desc", "k").Add(ctx, 1, labels)
|
||||
b.NewUpDownCounter("prom_adapter_updown", "desc", "k").Add(ctx, 2, labels)
|
||||
b.NewInt64Gauge("prom_adapter_int_gauge", "desc", "k").Record(ctx, 99, labels)
|
||||
b.NewFloat64Gauge("prom_adapter_float_gauge", "desc", "k").Record(ctx, 1.23, labels)
|
||||
b.NewHistogram("prom_adapter_histogram", "desc", []float64{0.1, 1.0}, "k").Record(ctx, 0.5, labels)
|
||||
c, err := b.NewCounter("prom_adapter_counter_total", "desc", "k")
|
||||
if err != nil {
|
||||
t.Fatalf("NewCounter error: %v", err)
|
||||
}
|
||||
u, err := b.NewUpDownCounter("prom_adapter_updown", "desc", "k")
|
||||
if err != nil {
|
||||
t.Fatalf("NewUpDownCounter error: %v", err)
|
||||
}
|
||||
ig, err := b.NewInt64Gauge("prom_adapter_int_gauge", "desc", "k")
|
||||
if err != nil {
|
||||
t.Fatalf("NewInt64Gauge error: %v", err)
|
||||
}
|
||||
fg, err := b.NewFloat64Gauge("prom_adapter_float_gauge", "desc", "k")
|
||||
if err != nil {
|
||||
t.Fatalf("NewFloat64Gauge error: %v", err)
|
||||
}
|
||||
h, err := b.NewHistogram("prom_adapter_histogram", "desc", []float64{0.1, 1.0}, "k")
|
||||
if err != nil {
|
||||
t.Fatalf("NewHistogram error: %v", err)
|
||||
}
|
||||
|
||||
c.Add(ctx, 1, labels)
|
||||
u.Add(ctx, 2, labels)
|
||||
ig.Record(ctx, 99, labels)
|
||||
fg.Record(ctx, 1.23, labels)
|
||||
h.Record(ctx, 0.5, labels)
|
||||
|
||||
if b.HTTPHandler() == nil {
|
||||
t.Error("prometheus adapter HTTPHandler should not be nil")
|
||||
@@ -172,9 +205,20 @@ func TestPrometheusAdapterAllInstruments(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestOtelAdapterAllInstruments(t *testing.T) {
|
||||
if os.Getenv("SKIP_OTEL_INTEGRATION") != "" {
|
||||
t.Skip("skipping OTel integration test because SKIP_OTEL_INTEGRATION is set")
|
||||
}
|
||||
|
||||
dialTimeout := 300 * time.Millisecond
|
||||
conn, err := net.DialTimeout("tcp", otelGRPCEndpoint, dialTimeout)
|
||||
if err != nil {
|
||||
t.Skipf("skipping OTel integration test; collector %s not reachable: %v", otelGRPCEndpoint, err)
|
||||
}
|
||||
_ = conn.Close()
|
||||
|
||||
b, err := observability.New(observability.MetricsConfig{
|
||||
Enabled: true, Backend: "otel",
|
||||
OTel: observability.OTelConfig{Protocol: "grpc", Endpoint: otelGRPCEndpoint, Insecure: true, ExportInterval: 100 * time.Millisecond},
|
||||
OTel: observability.OTelConfig{Protocol: "grpc", Endpoint: otelGRPCEndpoint, Insecure: true, ExportInterval: 100 * time.Millisecond, Timeout: 2 * time.Second},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create otel backend: %v", err)
|
||||
@@ -182,11 +226,32 @@ func TestOtelAdapterAllInstruments(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
labels := observability.Labels{"k": "v"}
|
||||
|
||||
b.NewCounter("otel_adapter_counter_total", "desc", "k").Add(ctx, 1, labels)
|
||||
b.NewUpDownCounter("otel_adapter_updown", "desc", "k").Add(ctx, 2, labels)
|
||||
b.NewInt64Gauge("otel_adapter_int_gauge", "desc", "k").Record(ctx, 99, labels)
|
||||
b.NewFloat64Gauge("otel_adapter_float_gauge", "desc", "k").Record(ctx, 1.23, labels)
|
||||
b.NewHistogram("otel_adapter_histogram", "desc", []float64{0.1, 1.0}, "k").Record(ctx, 0.5, labels)
|
||||
c, err := b.NewCounter("otel_adapter_counter_total", "desc", "k")
|
||||
if err != nil {
|
||||
t.Fatalf("NewCounter error: %v", err)
|
||||
}
|
||||
u, err := b.NewUpDownCounter("otel_adapter_updown", "desc", "k")
|
||||
if err != nil {
|
||||
t.Fatalf("NewUpDownCounter error: %v", err)
|
||||
}
|
||||
ig, err := b.NewInt64Gauge("otel_adapter_int_gauge", "desc", "k")
|
||||
if err != nil {
|
||||
t.Fatalf("NewInt64Gauge error: %v", err)
|
||||
}
|
||||
fg, err := b.NewFloat64Gauge("otel_adapter_float_gauge", "desc", "k")
|
||||
if err != nil {
|
||||
t.Fatalf("NewFloat64Gauge error: %v", err)
|
||||
}
|
||||
h, err := b.NewHistogram("otel_adapter_histogram", "desc", []float64{0.1, 1.0}, "k")
|
||||
if err != nil {
|
||||
t.Fatalf("NewHistogram error: %v", err)
|
||||
}
|
||||
|
||||
c.Add(ctx, 1, labels)
|
||||
u.Add(ctx, 2, labels)
|
||||
ig.Record(ctx, 99, labels)
|
||||
fg.Record(ctx, 1.23, labels)
|
||||
h.Record(ctx, 0.5, labels)
|
||||
|
||||
if b.HTTPHandler() != nil {
|
||||
t.Error("OTel adapter HTTPHandler should be nil")
|
||||
|
||||
@@ -13,38 +13,31 @@ type NoopBackend struct{}
|
||||
// Compile-time interface check.
|
||||
var _ Backend = (*NoopBackend)(nil)
|
||||
|
||||
func (n *NoopBackend) NewCounter(_ string, _ string, _ ...string) Counter {
|
||||
_ = n
|
||||
return noopCounter{}
|
||||
func (n *NoopBackend) NewCounter(_ string, _ string, _ ...string) (Counter, error) {
|
||||
return noopCounter{}, nil
|
||||
}
|
||||
|
||||
func (n *NoopBackend) NewUpDownCounter(_ string, _ string, _ ...string) UpDownCounter {
|
||||
_ = n
|
||||
return noopUpDownCounter{}
|
||||
func (n *NoopBackend) NewUpDownCounter(_ string, _ string, _ ...string) (UpDownCounter, error) {
|
||||
return noopUpDownCounter{}, nil
|
||||
}
|
||||
|
||||
func (n *NoopBackend) NewInt64Gauge(_ string, _ string, _ ...string) Int64Gauge {
|
||||
_ = n
|
||||
return noopInt64Gauge{}
|
||||
func (n *NoopBackend) NewInt64Gauge(_ string, _ string, _ ...string) (Int64Gauge, error) {
|
||||
return noopInt64Gauge{}, nil
|
||||
}
|
||||
|
||||
func (n *NoopBackend) NewFloat64Gauge(_ string, _ string, _ ...string) Float64Gauge {
|
||||
_ = n
|
||||
return noopFloat64Gauge{}
|
||||
func (n *NoopBackend) NewFloat64Gauge(_ string, _ string, _ ...string) (Float64Gauge, error) {
|
||||
return noopFloat64Gauge{}, nil
|
||||
}
|
||||
|
||||
func (n *NoopBackend) NewHistogram(_ string, _ string, _ []float64, _ ...string) Histogram {
|
||||
_ = n
|
||||
return noopHistogram{}
|
||||
func (n *NoopBackend) NewHistogram(_ string, _ string, _ []float64, _ ...string) (Histogram, error) {
|
||||
return noopHistogram{}, nil
|
||||
}
|
||||
|
||||
func (n *NoopBackend) HTTPHandler() http.Handler {
|
||||
_ = n
|
||||
return nil
|
||||
}
|
||||
|
||||
func (n *NoopBackend) Shutdown(_ context.Context) error {
|
||||
_ = n
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
@@ -13,32 +13,32 @@ func TestNoopBackendAllInstruments(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
labels := observability.Labels{"k": "v"}
|
||||
|
||||
t.Run("Counter", func(_ *testing.T) {
|
||||
c := n.NewCounter("test_counter", "desc")
|
||||
t.Run("Counter", func(t *testing.T) {
|
||||
c, _ := n.NewCounter("test_counter", "desc")
|
||||
c.Add(ctx, 1, labels)
|
||||
c.Add(ctx, 0, nil)
|
||||
})
|
||||
|
||||
t.Run("UpDownCounter", func(_ *testing.T) {
|
||||
u := n.NewUpDownCounter("test_updown", "desc")
|
||||
t.Run("UpDownCounter", func(t *testing.T) {
|
||||
u, _ := n.NewUpDownCounter("test_updown", "desc")
|
||||
u.Add(ctx, 1, labels)
|
||||
u.Add(ctx, -1, nil)
|
||||
})
|
||||
|
||||
t.Run("Int64Gauge", func(_ *testing.T) {
|
||||
g := n.NewInt64Gauge("test_int64gauge", "desc")
|
||||
t.Run("Int64Gauge", func(t *testing.T) {
|
||||
g, _ := n.NewInt64Gauge("test_int64gauge", "desc")
|
||||
g.Record(ctx, 42, labels)
|
||||
g.Record(ctx, 0, nil)
|
||||
})
|
||||
|
||||
t.Run("Float64Gauge", func(_ *testing.T) {
|
||||
g := n.NewFloat64Gauge("test_float64gauge", "desc")
|
||||
t.Run("Float64Gauge", func(t *testing.T) {
|
||||
g, _ := n.NewFloat64Gauge("test_float64gauge", "desc")
|
||||
g.Record(ctx, 3.14, labels)
|
||||
g.Record(ctx, 0, nil)
|
||||
})
|
||||
|
||||
t.Run("Histogram", func(_ *testing.T) {
|
||||
h := n.NewHistogram("test_histogram", "desc", []float64{1, 5, 10})
|
||||
t.Run("Histogram", func(t *testing.T) {
|
||||
h, _ := n.NewHistogram("test_histogram", "desc", []float64{1, 5, 10})
|
||||
h.Record(ctx, 2.5, labels)
|
||||
h.Record(ctx, 0, nil)
|
||||
})
|
||||
@@ -56,12 +56,47 @@ func TestNoopBackendAllInstruments(t *testing.T) {
|
||||
})
|
||||
}
|
||||
|
||||
func TestNoopBackendLabelNames(_ *testing.T) {
|
||||
func TestNoopBackendLabelNames(t *testing.T) {
|
||||
// Verify that label names passed at creation time are accepted without panic.
|
||||
n := &observability.NoopBackend{}
|
||||
n.NewCounter("c", "d", "label1", "label2")
|
||||
n.NewUpDownCounter("u", "d", "l1")
|
||||
n.NewInt64Gauge("g1", "d", "l1", "l2", "l3")
|
||||
n.NewFloat64Gauge("g2", "d")
|
||||
n.NewHistogram("h", "d", []float64{0.1, 1.0}, "l1")
|
||||
|
||||
assertNoPanic := func(t *testing.T, constructor string, fn func()) {
|
||||
t.Helper()
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
t.Fatalf("%s panicked: %v", constructor, r)
|
||||
}
|
||||
}()
|
||||
fn()
|
||||
}
|
||||
|
||||
t.Run("NewCounter", func(t *testing.T) {
|
||||
assertNoPanic(t, "NewCounter", func() {
|
||||
_, _ = n.NewCounter("c", "d", "label1", "label2")
|
||||
})
|
||||
})
|
||||
|
||||
t.Run("NewUpDownCounter", func(t *testing.T) {
|
||||
assertNoPanic(t, "NewUpDownCounter", func() {
|
||||
_, _ = n.NewUpDownCounter("u", "d", "l1")
|
||||
})
|
||||
})
|
||||
|
||||
t.Run("NewInt64Gauge", func(t *testing.T) {
|
||||
assertNoPanic(t, "NewInt64Gauge", func() {
|
||||
_, _ = n.NewInt64Gauge("g1", "d", "l1", "l2", "l3")
|
||||
})
|
||||
})
|
||||
|
||||
t.Run("NewFloat64Gauge", func(t *testing.T) {
|
||||
assertNoPanic(t, "NewFloat64Gauge", func() {
|
||||
_, _ = n.NewFloat64Gauge("g2", "d")
|
||||
})
|
||||
})
|
||||
|
||||
t.Run("NewHistogram", func(t *testing.T) {
|
||||
assertNoPanic(t, "NewHistogram", func() {
|
||||
_, _ = n.NewHistogram("h", "d", []float64{0.1, 1.0}, "l1")
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
@@ -9,7 +9,10 @@ package otel
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
"regexp"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
@@ -17,6 +20,8 @@ import (
|
||||
sdkmetric "go.opentelemetry.io/otel/sdk/metric"
|
||||
)
|
||||
|
||||
var metricLabelNameRE = regexp.MustCompile(`^[a-zA-Z_][a-zA-Z0-9_]*$`)
|
||||
|
||||
// Config holds OTel backend configuration.
|
||||
type Config struct {
|
||||
// Protocol is "grpc" (default) or "http".
|
||||
@@ -31,6 +36,9 @@ type Config struct {
|
||||
// ExportInterval is the period between pushes to the collector.
|
||||
ExportInterval time.Duration
|
||||
|
||||
// Timeout bounds exporter construction calls.
|
||||
Timeout time.Duration
|
||||
|
||||
ServiceName string
|
||||
ServiceVersion string
|
||||
DeploymentEnvironment string
|
||||
@@ -57,9 +65,15 @@ func New(cfg Config) (*Backend, error) {
|
||||
if cfg.Protocol == "" {
|
||||
cfg.Protocol = "grpc"
|
||||
}
|
||||
if strings.TrimSpace(cfg.Endpoint) == "" {
|
||||
return nil, fmt.Errorf("otel backend: empty cfg.Endpoint")
|
||||
}
|
||||
if cfg.ExportInterval <= 0 {
|
||||
cfg.ExportInterval = 60 * time.Second
|
||||
}
|
||||
if cfg.Timeout <= 0 {
|
||||
cfg.Timeout = 10 * time.Second
|
||||
}
|
||||
if cfg.ServiceName == "" {
|
||||
cfg.ServiceName = "gerbil"
|
||||
}
|
||||
@@ -100,111 +114,196 @@ func (b *Backend) Shutdown(ctx context.Context) error {
|
||||
}
|
||||
|
||||
// NewCounter creates an OTel Int64Counter.
|
||||
func (b *Backend) NewCounter(name, desc string, _ ...string) *Counter {
|
||||
func (b *Backend) NewCounter(name, desc string, labelNames ...string) (*Counter, error) {
|
||||
normalizedLabelNames, err := validateLabelNames(labelNames)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("otel: create counter %q: %w", name, err)
|
||||
}
|
||||
c, err := b.meter.Int64Counter(name, metric.WithDescription(desc))
|
||||
if err != nil {
|
||||
panic(fmt.Sprintf("otel: create counter %q: %v", name, err))
|
||||
return nil, fmt.Errorf("otel: create counter %q: %w", name, err)
|
||||
}
|
||||
return &Counter{c: c}
|
||||
return &Counter{c: c, labelNames: normalizedLabelNames}, nil
|
||||
}
|
||||
|
||||
// NewUpDownCounter creates an OTel Int64UpDownCounter.
|
||||
func (b *Backend) NewUpDownCounter(name, desc string, _ ...string) *UpDownCounter {
|
||||
func (b *Backend) NewUpDownCounter(name, desc string, labelNames ...string) (*UpDownCounter, error) {
|
||||
normalizedLabelNames, err := validateLabelNames(labelNames)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("otel: create up-down counter %q: %w", name, err)
|
||||
}
|
||||
c, err := b.meter.Int64UpDownCounter(name, metric.WithDescription(desc))
|
||||
if err != nil {
|
||||
panic(fmt.Sprintf("otel: create up-down counter %q: %v", name, err))
|
||||
return nil, fmt.Errorf("otel: create up-down counter %q: %w", name, err)
|
||||
}
|
||||
return &UpDownCounter{c: c}
|
||||
return &UpDownCounter{c: c, labelNames: normalizedLabelNames}, nil
|
||||
}
|
||||
|
||||
// NewInt64Gauge creates an OTel Int64Gauge.
|
||||
func (b *Backend) NewInt64Gauge(name, desc string, _ ...string) *Int64Gauge {
|
||||
func (b *Backend) NewInt64Gauge(name, desc string, labelNames ...string) (*Int64Gauge, error) {
|
||||
normalizedLabelNames, err := validateLabelNames(labelNames)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("otel: create int64 gauge %q: %w", name, err)
|
||||
}
|
||||
g, err := b.meter.Int64Gauge(name, metric.WithDescription(desc))
|
||||
if err != nil {
|
||||
panic(fmt.Sprintf("otel: create int64 gauge %q: %v", name, err))
|
||||
return nil, fmt.Errorf("otel: create int64 gauge %q: %w", name, err)
|
||||
}
|
||||
return &Int64Gauge{g: g}
|
||||
return &Int64Gauge{g: g, labelNames: normalizedLabelNames}, nil
|
||||
}
|
||||
|
||||
// NewFloat64Gauge creates an OTel Float64Gauge.
|
||||
func (b *Backend) NewFloat64Gauge(name, desc string, _ ...string) *Float64Gauge {
|
||||
func (b *Backend) NewFloat64Gauge(name, desc string, labelNames ...string) (*Float64Gauge, error) {
|
||||
normalizedLabelNames, err := validateLabelNames(labelNames)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("otel: create float64 gauge %q: %w", name, err)
|
||||
}
|
||||
g, err := b.meter.Float64Gauge(name, metric.WithDescription(desc))
|
||||
if err != nil {
|
||||
panic(fmt.Sprintf("otel: create float64 gauge %q: %v", name, err))
|
||||
return nil, fmt.Errorf("otel: create float64 gauge %q: %w", name, err)
|
||||
}
|
||||
return &Float64Gauge{g: g}
|
||||
return &Float64Gauge{g: g, labelNames: normalizedLabelNames}, nil
|
||||
}
|
||||
|
||||
// NewHistogram creates an OTel Float64Histogram with explicit bucket boundaries.
|
||||
func (b *Backend) NewHistogram(name, desc string, buckets []float64, _ ...string) *Histogram {
|
||||
func (b *Backend) NewHistogram(name, desc string, buckets []float64, labelNames ...string) (*Histogram, error) {
|
||||
normalizedLabelNames, err := validateLabelNames(labelNames)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("otel: create histogram %q: %w", name, err)
|
||||
}
|
||||
h, err := b.meter.Float64Histogram(name,
|
||||
metric.WithDescription(desc),
|
||||
metric.WithExplicitBucketBoundaries(buckets...),
|
||||
)
|
||||
if err != nil {
|
||||
panic(fmt.Sprintf("otel: create histogram %q: %v", name, err))
|
||||
return nil, fmt.Errorf("otel: create histogram %q: %w", name, err)
|
||||
}
|
||||
return &Histogram{h: h}
|
||||
return &Histogram{h: h, labelNames: normalizedLabelNames}, nil
|
||||
}
|
||||
|
||||
// labelsToAttrs converts a Labels map to OTel attribute key-value pairs.
|
||||
func labelsToAttrs(labels map[string]string) []attribute.KeyValue {
|
||||
if len(labels) == 0 {
|
||||
return nil
|
||||
func validateLabelNames(labelNames []string) ([]string, error) {
|
||||
if len(labelNames) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
attrs := make([]attribute.KeyValue, 0, len(labels))
|
||||
for k, v := range labels {
|
||||
attrs = append(attrs, attribute.String(k, v))
|
||||
|
||||
normalized := make([]string, len(labelNames))
|
||||
seen := make(map[string]struct{}, len(labelNames))
|
||||
for i, name := range labelNames {
|
||||
if !metricLabelNameRE.MatchString(name) {
|
||||
return nil, fmt.Errorf("invalid label name %q", name)
|
||||
}
|
||||
if _, exists := seen[name]; exists {
|
||||
return nil, fmt.Errorf("duplicate label name %q", name)
|
||||
}
|
||||
seen[name] = struct{}{}
|
||||
normalized[i] = name
|
||||
}
|
||||
|
||||
return normalized, nil
|
||||
}
|
||||
|
||||
func labelsToAttrs(labelNames []string, labels map[string]string) []attribute.KeyValue {
|
||||
if len(labelNames) == 0 {
|
||||
if len(labels) > 0 {
|
||||
log.Printf("WARN: dropping otel metric sample due to unexpected labels: got=%v expected=none", labels)
|
||||
return nil
|
||||
}
|
||||
return []attribute.KeyValue{}
|
||||
}
|
||||
|
||||
attrs := make([]attribute.KeyValue, 0, len(labelNames))
|
||||
for _, labelName := range labelNames {
|
||||
attrs = append(attrs, attribute.String(labelName, labels[labelName]))
|
||||
}
|
||||
|
||||
for got := range labels {
|
||||
found := false
|
||||
for _, expected := range labelNames {
|
||||
if got == expected {
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
log.Printf("WARN: dropping otel metric sample due to unexpected label key %q (expected=%v)", got, labelNames)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
return attrs
|
||||
}
|
||||
|
||||
// Counter wraps an OTel Int64Counter.
|
||||
type Counter struct {
|
||||
c metric.Int64Counter
|
||||
c metric.Int64Counter
|
||||
labelNames []string
|
||||
}
|
||||
|
||||
// Add increments the counter by value.
|
||||
func (c *Counter) Add(ctx context.Context, value int64, labels map[string]string) {
|
||||
c.c.Add(ctx, value, metric.WithAttributes(labelsToAttrs(labels)...))
|
||||
attrs := labelsToAttrs(c.labelNames, labels)
|
||||
if attrs == nil {
|
||||
return
|
||||
}
|
||||
c.c.Add(ctx, value, metric.WithAttributes(attrs...))
|
||||
}
|
||||
|
||||
// UpDownCounter wraps an OTel Int64UpDownCounter.
|
||||
type UpDownCounter struct {
|
||||
c metric.Int64UpDownCounter
|
||||
c metric.Int64UpDownCounter
|
||||
labelNames []string
|
||||
}
|
||||
|
||||
// Add adjusts the up-down counter by value.
|
||||
func (u *UpDownCounter) Add(ctx context.Context, value int64, labels map[string]string) {
|
||||
u.c.Add(ctx, value, metric.WithAttributes(labelsToAttrs(labels)...))
|
||||
attrs := labelsToAttrs(u.labelNames, labels)
|
||||
if attrs == nil {
|
||||
return
|
||||
}
|
||||
u.c.Add(ctx, value, metric.WithAttributes(attrs...))
|
||||
}
|
||||
|
||||
// Int64Gauge wraps an OTel Int64Gauge.
|
||||
type Int64Gauge struct {
|
||||
g metric.Int64Gauge
|
||||
g metric.Int64Gauge
|
||||
labelNames []string
|
||||
}
|
||||
|
||||
// Record sets the gauge to value.
|
||||
func (g *Int64Gauge) Record(ctx context.Context, value int64, labels map[string]string) {
|
||||
g.g.Record(ctx, value, metric.WithAttributes(labelsToAttrs(labels)...))
|
||||
attrs := labelsToAttrs(g.labelNames, labels)
|
||||
if attrs == nil {
|
||||
return
|
||||
}
|
||||
g.g.Record(ctx, value, metric.WithAttributes(attrs...))
|
||||
}
|
||||
|
||||
// Float64Gauge wraps an OTel Float64Gauge.
|
||||
type Float64Gauge struct {
|
||||
g metric.Float64Gauge
|
||||
g metric.Float64Gauge
|
||||
labelNames []string
|
||||
}
|
||||
|
||||
// Record sets the gauge to value.
|
||||
func (g *Float64Gauge) Record(ctx context.Context, value float64, labels map[string]string) {
|
||||
g.g.Record(ctx, value, metric.WithAttributes(labelsToAttrs(labels)...))
|
||||
attrs := labelsToAttrs(g.labelNames, labels)
|
||||
if attrs == nil {
|
||||
return
|
||||
}
|
||||
g.g.Record(ctx, value, metric.WithAttributes(attrs...))
|
||||
}
|
||||
|
||||
// Histogram wraps an OTel Float64Histogram.
|
||||
type Histogram struct {
|
||||
h metric.Float64Histogram
|
||||
h metric.Float64Histogram
|
||||
labelNames []string
|
||||
}
|
||||
|
||||
// Record observes value in the histogram.
|
||||
func (h *Histogram) Record(ctx context.Context, value float64, labels map[string]string) {
|
||||
h.h.Record(ctx, value, metric.WithAttributes(labelsToAttrs(labels)...))
|
||||
attrs := labelsToAttrs(h.labelNames, labels)
|
||||
if attrs == nil {
|
||||
return
|
||||
}
|
||||
h.h.Record(ctx, value, metric.WithAttributes(attrs...))
|
||||
}
|
||||
|
||||
@@ -55,7 +55,10 @@ func TestOtelBackendCounter(t *testing.T) {
|
||||
b := newInMemoryBackend(t)
|
||||
defer b.Shutdown(context.Background()) //nolint:errcheck
|
||||
|
||||
c := b.NewCounter("gerbil_test_counter_total", "test counter", "result")
|
||||
c, err := b.NewCounter("gerbil_test_counter_total", "test counter", "result")
|
||||
if err != nil {
|
||||
t.Fatalf("NewCounter returned error: %v", err)
|
||||
}
|
||||
// Should not panic
|
||||
c.Add(context.Background(), 1, map[string]string{"result": "ok"})
|
||||
c.Add(context.Background(), 5, nil)
|
||||
@@ -65,7 +68,10 @@ func TestOtelBackendUpDownCounter(t *testing.T) {
|
||||
b := newInMemoryBackend(t)
|
||||
defer b.Shutdown(context.Background()) //nolint:errcheck
|
||||
|
||||
u := b.NewUpDownCounter("gerbil_test_updown", "test updown", "state")
|
||||
u, err := b.NewUpDownCounter("gerbil_test_updown", "test updown", "state")
|
||||
if err != nil {
|
||||
t.Fatalf("NewUpDownCounter returned error: %v", err)
|
||||
}
|
||||
u.Add(context.Background(), 3, map[string]string{"state": "active"})
|
||||
u.Add(context.Background(), -1, map[string]string{"state": "active"})
|
||||
}
|
||||
@@ -74,7 +80,10 @@ func TestOtelBackendInt64Gauge(t *testing.T) {
|
||||
b := newInMemoryBackend(t)
|
||||
defer b.Shutdown(context.Background()) //nolint:errcheck
|
||||
|
||||
g := b.NewInt64Gauge("gerbil_test_int_gauge", "test gauge")
|
||||
g, err := b.NewInt64Gauge("gerbil_test_int_gauge", "test gauge")
|
||||
if err != nil {
|
||||
t.Fatalf("NewInt64Gauge returned error: %v", err)
|
||||
}
|
||||
g.Record(context.Background(), 42, nil)
|
||||
}
|
||||
|
||||
@@ -82,7 +91,10 @@ func TestOtelBackendFloat64Gauge(t *testing.T) {
|
||||
b := newInMemoryBackend(t)
|
||||
defer b.Shutdown(context.Background()) //nolint:errcheck
|
||||
|
||||
g := b.NewFloat64Gauge("gerbil_test_float_gauge", "test float gauge")
|
||||
g, err := b.NewFloat64Gauge("gerbil_test_float_gauge", "test float gauge")
|
||||
if err != nil {
|
||||
t.Fatalf("NewFloat64Gauge returned error: %v", err)
|
||||
}
|
||||
g.Record(context.Background(), 3.14, nil)
|
||||
}
|
||||
|
||||
@@ -90,8 +102,11 @@ func TestOtelBackendHistogram(t *testing.T) {
|
||||
b := newInMemoryBackend(t)
|
||||
defer b.Shutdown(context.Background()) //nolint:errcheck
|
||||
|
||||
h := b.NewHistogram("gerbil_test_duration_seconds", "test histogram",
|
||||
h, err := b.NewHistogram("gerbil_test_duration_seconds", "test histogram",
|
||||
[]float64{0.1, 0.5, 1.0}, "method")
|
||||
if err != nil {
|
||||
t.Fatalf("NewHistogram returned error: %v", err)
|
||||
}
|
||||
h.Record(context.Background(), 0.3, map[string]string{"method": "GET"})
|
||||
}
|
||||
|
||||
@@ -139,3 +154,22 @@ func TestOtelBackendDeploymentEnvironment(t *testing.T) {
|
||||
}
|
||||
defer b.Shutdown(context.Background()) //nolint:errcheck
|
||||
}
|
||||
|
||||
func TestOtelBackendRejectsInvalidLabelNames(t *testing.T) {
|
||||
b := newInMemoryBackend(t)
|
||||
defer b.Shutdown(context.Background()) //nolint:errcheck
|
||||
|
||||
t.Run("duplicate labels", func(t *testing.T) {
|
||||
_, err := b.NewCounter("gerbil_test_invalid_labels_total", "test counter", "result", "result")
|
||||
if err == nil {
|
||||
t.Fatal("expected error for duplicate label names")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("invalid label name", func(t *testing.T) {
|
||||
_, err := b.NewHistogram("gerbil_test_invalid_histogram", "test histogram", []float64{0.1, 1.0}, "status-code")
|
||||
if err == nil {
|
||||
t.Fatal("expected error for invalid label name")
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
@@ -3,6 +3,8 @@ package otel
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net/url"
|
||||
"strings"
|
||||
|
||||
"go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc"
|
||||
"go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp"
|
||||
@@ -11,6 +13,10 @@ import (
|
||||
|
||||
// newExporter creates the appropriate OTLP exporter based on cfg.Protocol.
|
||||
func newExporter(ctx context.Context, cfg Config) (sdkmetric.Exporter, error) {
|
||||
if strings.TrimSpace(cfg.Endpoint) == "" {
|
||||
return nil, fmt.Errorf("otel: cfg.Endpoint is empty")
|
||||
}
|
||||
|
||||
switch cfg.Protocol {
|
||||
case "grpc", "":
|
||||
return newGRPCExporter(ctx, cfg)
|
||||
@@ -36,8 +42,20 @@ func newGRPCExporter(ctx context.Context, cfg Config) (sdkmetric.Exporter, error
|
||||
}
|
||||
|
||||
func newHTTPExporter(ctx context.Context, cfg Config) (sdkmetric.Exporter, error) {
|
||||
opts := []otlpmetrichttp.Option{
|
||||
otlpmetrichttp.WithEndpoint(cfg.Endpoint),
|
||||
endpoint := strings.TrimSpace(cfg.Endpoint)
|
||||
|
||||
opts := make([]otlpmetrichttp.Option, 0, 3)
|
||||
if strings.Contains(endpoint, "://") {
|
||||
parsed, err := url.Parse(endpoint)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("otlp http exporter: parse endpoint URL %q: %w", endpoint, err)
|
||||
}
|
||||
opts = append(opts, otlpmetrichttp.WithEndpointURL(parsed.String()))
|
||||
} else {
|
||||
opts = append(opts,
|
||||
otlpmetrichttp.WithEndpoint(endpoint),
|
||||
otlpmetrichttp.WithURLPath("/v1/metrics"),
|
||||
)
|
||||
}
|
||||
if cfg.Insecure {
|
||||
opts = append(opts, otlpmetrichttp.WithInsecure())
|
||||
|
||||
@@ -3,7 +3,7 @@ package otel
|
||||
import (
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"go.opentelemetry.io/otel/sdk/resource"
|
||||
semconv "go.opentelemetry.io/otel/semconv/v1.40.0"
|
||||
semconv "go.opentelemetry.io/otel/semconv/v1.18.0"
|
||||
)
|
||||
|
||||
// newResource builds an OTel resource for the Gerbil service.
|
||||
@@ -15,11 +15,11 @@ func newResource(serviceName, serviceVersion, deploymentEnv string) (*resource.R
|
||||
attrs = append(attrs, semconv.ServiceVersion(serviceVersion))
|
||||
}
|
||||
if deploymentEnv != "" {
|
||||
attrs = append(attrs, semconv.DeploymentEnvironmentName(deploymentEnv))
|
||||
attrs = append(attrs, semconv.DeploymentEnvironment(deploymentEnv))
|
||||
}
|
||||
|
||||
return resource.Merge(
|
||||
resource.Default(),
|
||||
resource.NewWithAttributes(semconv.SchemaURL, attrs...),
|
||||
resource.NewSchemaless(attrs...),
|
||||
)
|
||||
}
|
||||
|
||||
@@ -7,6 +7,7 @@ package prometheus
|
||||
|
||||
import (
|
||||
"context"
|
||||
"log"
|
||||
"net/http"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
@@ -30,9 +31,10 @@ type Config struct {
|
||||
// in the backend-specific instrument types that implement the observability
|
||||
// instrument interfaces.
|
||||
type Backend struct {
|
||||
cfg Config
|
||||
registry *prometheus.Registry
|
||||
handler http.Handler
|
||||
cfg Config
|
||||
registry *prometheus.Registry
|
||||
handler http.Handler
|
||||
droppedSamplesCounter prometheus.Counter
|
||||
}
|
||||
|
||||
// New creates and initialises a Prometheus backend.
|
||||
@@ -48,6 +50,11 @@ func New(cfg Config) (*Backend, error) {
|
||||
}
|
||||
|
||||
registry := prometheus.NewRegistry()
|
||||
droppedSamplesCounter := prometheus.NewCounter(prometheus.CounterOpts{
|
||||
Name: "gerbil_dropped_metric_samples_total",
|
||||
Help: "Total number of metric samples dropped due to invalid labels or unsupported label sets",
|
||||
})
|
||||
registry.MustRegister(droppedSamplesCounter)
|
||||
|
||||
// Include Go and process metrics by default.
|
||||
includeGo := cfg.IncludeGoMetrics == nil || *cfg.IncludeGoMetrics
|
||||
@@ -62,7 +69,7 @@ func New(cfg Config) (*Backend, error) {
|
||||
EnableOpenMetrics: false,
|
||||
})
|
||||
|
||||
return &Backend{cfg: cfg, registry: registry, handler: handler}, nil
|
||||
return &Backend{cfg: cfg, registry: registry, handler: handler, droppedSamplesCounter: droppedSamplesCounter}, nil
|
||||
}
|
||||
|
||||
// HTTPHandler returns the Prometheus /metrics HTTP handler.
|
||||
@@ -78,60 +85,107 @@ func (b *Backend) Shutdown(_ context.Context) error {
|
||||
}
|
||||
|
||||
// NewCounter creates a Prometheus CounterVec registered on the backend's registry.
|
||||
func (b *Backend) NewCounter(name, desc string, labelNames ...string) *Counter {
|
||||
func (b *Backend) NewCounter(name, desc string, labelNames ...string) (*Counter, error) {
|
||||
vec := prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||
Name: name,
|
||||
Help: desc,
|
||||
}, labelNames)
|
||||
b.registry.MustRegister(vec)
|
||||
return &Counter{vec: vec}
|
||||
if err := b.registry.Register(vec); err != nil {
|
||||
if are, ok := err.(prometheus.AlreadyRegisteredError); ok {
|
||||
existing, ok := are.ExistingCollector.(*prometheus.CounterVec)
|
||||
if !ok {
|
||||
return nil, err
|
||||
}
|
||||
return &Counter{vec: existing, labelNames: append([]string(nil), labelNames...), droppedSamplesCounter: b.droppedSamplesCounter}, nil
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
return &Counter{vec: vec, labelNames: append([]string(nil), labelNames...), droppedSamplesCounter: b.droppedSamplesCounter}, nil
|
||||
}
|
||||
|
||||
// NewUpDownCounter creates a Prometheus GaugeVec (Prometheus gauges are
|
||||
// bidirectional) registered on the backend's registry.
|
||||
func (b *Backend) NewUpDownCounter(name, desc string, labelNames ...string) *UpDownCounter {
|
||||
func (b *Backend) NewUpDownCounter(name, desc string, labelNames ...string) (*UpDownCounter, error) {
|
||||
vec := prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Name: name,
|
||||
Help: desc,
|
||||
}, labelNames)
|
||||
b.registry.MustRegister(vec)
|
||||
return &UpDownCounter{vec: vec}
|
||||
if err := b.registry.Register(vec); err != nil {
|
||||
if are, ok := err.(prometheus.AlreadyRegisteredError); ok {
|
||||
existing, ok := are.ExistingCollector.(*prometheus.GaugeVec)
|
||||
if !ok {
|
||||
return nil, err
|
||||
}
|
||||
return &UpDownCounter{vec: existing, labelNames: append([]string(nil), labelNames...), droppedSamplesCounter: b.droppedSamplesCounter}, nil
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
return &UpDownCounter{vec: vec, labelNames: append([]string(nil), labelNames...), droppedSamplesCounter: b.droppedSamplesCounter}, nil
|
||||
}
|
||||
|
||||
// NewInt64Gauge creates a Prometheus GaugeVec registered on the backend's registry.
|
||||
func (b *Backend) NewInt64Gauge(name, desc string, labelNames ...string) *Int64Gauge {
|
||||
func (b *Backend) NewInt64Gauge(name, desc string, labelNames ...string) (*Int64Gauge, error) {
|
||||
vec := prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Name: name,
|
||||
Help: desc,
|
||||
}, labelNames)
|
||||
b.registry.MustRegister(vec)
|
||||
return &Int64Gauge{vec: vec}
|
||||
if err := b.registry.Register(vec); err != nil {
|
||||
if are, ok := err.(prometheus.AlreadyRegisteredError); ok {
|
||||
existing, ok := are.ExistingCollector.(*prometheus.GaugeVec)
|
||||
if !ok {
|
||||
return nil, err
|
||||
}
|
||||
return &Int64Gauge{vec: existing, labelNames: append([]string(nil), labelNames...), droppedSamplesCounter: b.droppedSamplesCounter}, nil
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
return &Int64Gauge{vec: vec, labelNames: append([]string(nil), labelNames...), droppedSamplesCounter: b.droppedSamplesCounter}, nil
|
||||
}
|
||||
|
||||
// NewFloat64Gauge creates a Prometheus GaugeVec registered on the backend's registry.
|
||||
func (b *Backend) NewFloat64Gauge(name, desc string, labelNames ...string) *Float64Gauge {
|
||||
func (b *Backend) NewFloat64Gauge(name, desc string, labelNames ...string) (*Float64Gauge, error) {
|
||||
vec := prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Name: name,
|
||||
Help: desc,
|
||||
}, labelNames)
|
||||
b.registry.MustRegister(vec)
|
||||
return &Float64Gauge{vec: vec}
|
||||
if err := b.registry.Register(vec); err != nil {
|
||||
if are, ok := err.(prometheus.AlreadyRegisteredError); ok {
|
||||
existing, ok := are.ExistingCollector.(*prometheus.GaugeVec)
|
||||
if !ok {
|
||||
return nil, err
|
||||
}
|
||||
return &Float64Gauge{vec: existing, labelNames: append([]string(nil), labelNames...), droppedSamplesCounter: b.droppedSamplesCounter}, nil
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
return &Float64Gauge{vec: vec, labelNames: append([]string(nil), labelNames...), droppedSamplesCounter: b.droppedSamplesCounter}, nil
|
||||
}
|
||||
|
||||
// NewHistogram creates a Prometheus HistogramVec registered on the backend's registry.
|
||||
func (b *Backend) NewHistogram(name, desc string, buckets []float64, labelNames ...string) *Histogram {
|
||||
func (b *Backend) NewHistogram(name, desc string, buckets []float64, labelNames ...string) (*Histogram, error) {
|
||||
vec := prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
||||
Name: name,
|
||||
Help: desc,
|
||||
Buckets: buckets,
|
||||
}, labelNames)
|
||||
b.registry.MustRegister(vec)
|
||||
return &Histogram{vec: vec}
|
||||
if err := b.registry.Register(vec); err != nil {
|
||||
if are, ok := err.(prometheus.AlreadyRegisteredError); ok {
|
||||
existing, ok := are.ExistingCollector.(*prometheus.HistogramVec)
|
||||
if !ok {
|
||||
return nil, err
|
||||
}
|
||||
return &Histogram{vec: existing, labelNames: append([]string(nil), labelNames...), droppedSamplesCounter: b.droppedSamplesCounter}, nil
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
return &Histogram{vec: vec, labelNames: append([]string(nil), labelNames...), droppedSamplesCounter: b.droppedSamplesCounter}, nil
|
||||
}
|
||||
|
||||
// Counter is a native Prometheus counter instrument.
|
||||
type Counter struct {
|
||||
vec *prometheus.CounterVec
|
||||
vec *prometheus.CounterVec
|
||||
labelNames []string
|
||||
droppedSamplesCounter prometheus.Counter
|
||||
}
|
||||
|
||||
// Add increments the counter by value for the given labels.
|
||||
@@ -139,47 +193,118 @@ type Counter struct {
|
||||
// value must be non-negative. Negative values are ignored.
|
||||
func (c *Counter) Add(_ context.Context, value int64, labels map[string]string) {
|
||||
if value < 0 {
|
||||
log.Printf("WARN: counter add called with negative value=%d labels=%v expected_labels=%v", value, labels, c.labelNames)
|
||||
return
|
||||
}
|
||||
c.vec.With(prometheus.Labels(labels)).Add(float64(value))
|
||||
normalized, ok := normalizeLabels(c.labelNames, labels, c.droppedSamplesCounter)
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
defer guardMetricPanic("counter", c.labelNames, labels)
|
||||
c.vec.With(normalized).Add(float64(value))
|
||||
}
|
||||
|
||||
// UpDownCounter is a native Prometheus gauge used as a bidirectional counter.
|
||||
type UpDownCounter struct {
|
||||
vec *prometheus.GaugeVec
|
||||
vec *prometheus.GaugeVec
|
||||
labelNames []string
|
||||
droppedSamplesCounter prometheus.Counter
|
||||
}
|
||||
|
||||
// Add adjusts the gauge by value for the given labels.
|
||||
func (u *UpDownCounter) Add(_ context.Context, value int64, labels map[string]string) {
|
||||
u.vec.With(prometheus.Labels(labels)).Add(float64(value))
|
||||
normalized, ok := normalizeLabels(u.labelNames, labels, u.droppedSamplesCounter)
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
defer guardMetricPanic("updown", u.labelNames, labels)
|
||||
u.vec.With(normalized).Add(float64(value))
|
||||
}
|
||||
|
||||
// Int64Gauge is a native Prometheus gauge recording integer snapshot values.
|
||||
type Int64Gauge struct {
|
||||
vec *prometheus.GaugeVec
|
||||
vec *prometheus.GaugeVec
|
||||
labelNames []string
|
||||
droppedSamplesCounter prometheus.Counter
|
||||
}
|
||||
|
||||
// Record sets the gauge to value for the given labels.
|
||||
func (g *Int64Gauge) Record(_ context.Context, value int64, labels map[string]string) {
|
||||
g.vec.With(prometheus.Labels(labels)).Set(float64(value))
|
||||
normalized, ok := normalizeLabels(g.labelNames, labels, g.droppedSamplesCounter)
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
defer guardMetricPanic("int64-gauge", g.labelNames, labels)
|
||||
g.vec.With(normalized).Set(float64(value))
|
||||
}
|
||||
|
||||
// Float64Gauge is a native Prometheus gauge recording float snapshot values.
|
||||
type Float64Gauge struct {
|
||||
vec *prometheus.GaugeVec
|
||||
vec *prometheus.GaugeVec
|
||||
labelNames []string
|
||||
droppedSamplesCounter prometheus.Counter
|
||||
}
|
||||
|
||||
// Record sets the gauge to value for the given labels.
|
||||
func (g *Float64Gauge) Record(_ context.Context, value float64, labels map[string]string) {
|
||||
g.vec.With(prometheus.Labels(labels)).Set(value)
|
||||
normalized, ok := normalizeLabels(g.labelNames, labels, g.droppedSamplesCounter)
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
defer guardMetricPanic("float64-gauge", g.labelNames, labels)
|
||||
g.vec.With(normalized).Set(value)
|
||||
}
|
||||
|
||||
// Histogram is a native Prometheus histogram instrument.
|
||||
type Histogram struct {
|
||||
vec *prometheus.HistogramVec
|
||||
vec *prometheus.HistogramVec
|
||||
labelNames []string
|
||||
droppedSamplesCounter prometheus.Counter
|
||||
}
|
||||
|
||||
// Record observes value for the given labels.
|
||||
func (h *Histogram) Record(_ context.Context, value float64, labels map[string]string) {
|
||||
h.vec.With(prometheus.Labels(labels)).Observe(value)
|
||||
normalized, ok := normalizeLabels(h.labelNames, labels, h.droppedSamplesCounter)
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
defer guardMetricPanic("histogram", h.labelNames, labels)
|
||||
h.vec.With(normalized).Observe(value)
|
||||
}
|
||||
|
||||
func normalizeLabels(labelNames []string, labels map[string]string, droppedSamplesCounter prometheus.Counter) (prometheus.Labels, bool) {
|
||||
if len(labelNames) == 0 {
|
||||
if len(labels) > 0 {
|
||||
if droppedSamplesCounter != nil {
|
||||
droppedSamplesCounter.Inc()
|
||||
}
|
||||
log.Printf("WARN: dropping metric sample due to unexpected labels: got=%v expected=none", labels)
|
||||
return nil, false
|
||||
}
|
||||
return nil, true
|
||||
}
|
||||
|
||||
normalized := make(prometheus.Labels, len(labelNames))
|
||||
for _, name := range labelNames {
|
||||
normalized[name] = ""
|
||||
}
|
||||
|
||||
for k, v := range labels {
|
||||
if _, ok := normalized[k]; !ok {
|
||||
if droppedSamplesCounter != nil {
|
||||
droppedSamplesCounter.Inc()
|
||||
}
|
||||
log.Printf("WARN: dropping metric sample due to unexpected label key %q (expected=%v)", k, labelNames)
|
||||
return nil, false
|
||||
}
|
||||
normalized[k] = v
|
||||
}
|
||||
|
||||
return normalized, true
|
||||
}
|
||||
|
||||
func guardMetricPanic(kind string, expected []string, labels map[string]string) {
|
||||
if recovered := recover(); recovered != nil {
|
||||
log.Printf("WARN: dropped %s metric sample due to label panic: expected=%v got=%v err=%v", kind, expected, labels, recovered)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -36,7 +36,10 @@ func TestPrometheusBackendShutdown(t *testing.T) {
|
||||
|
||||
func TestPrometheusBackendCounter(t *testing.T) {
|
||||
b := newTestBackend(t)
|
||||
c := b.NewCounter("test_counter_total", "A test counter", "result")
|
||||
c, err := b.NewCounter("test_counter_total", "A test counter", "result")
|
||||
if err != nil {
|
||||
t.Fatalf("NewCounter returned error: %v", err)
|
||||
}
|
||||
c.Add(context.Background(), 3, map[string]string{"result": "ok"})
|
||||
|
||||
body := scrapeMetrics(t, b)
|
||||
@@ -45,7 +48,10 @@ func TestPrometheusBackendCounter(t *testing.T) {
|
||||
|
||||
func TestPrometheusBackendUpDownCounter(t *testing.T) {
|
||||
b := newTestBackend(t)
|
||||
u := b.NewUpDownCounter("test_gauge_total", "A test up-down counter", "state")
|
||||
u, err := b.NewUpDownCounter("test_gauge_total", "A test up-down counter", "state")
|
||||
if err != nil {
|
||||
t.Fatalf("NewUpDownCounter returned error: %v", err)
|
||||
}
|
||||
u.Add(context.Background(), 5, map[string]string{"state": "active"})
|
||||
u.Add(context.Background(), -2, map[string]string{"state": "active"})
|
||||
|
||||
@@ -55,7 +61,10 @@ func TestPrometheusBackendUpDownCounter(t *testing.T) {
|
||||
|
||||
func TestPrometheusBackendInt64Gauge(t *testing.T) {
|
||||
b := newTestBackend(t)
|
||||
g := b.NewInt64Gauge("test_int_gauge", "An integer gauge", "ifname")
|
||||
g, err := b.NewInt64Gauge("test_int_gauge", "An integer gauge", "ifname")
|
||||
if err != nil {
|
||||
t.Fatalf("NewInt64Gauge returned error: %v", err)
|
||||
}
|
||||
g.Record(context.Background(), 42, map[string]string{"ifname": "wg0"})
|
||||
|
||||
body := scrapeMetrics(t, b)
|
||||
@@ -64,7 +73,10 @@ func TestPrometheusBackendInt64Gauge(t *testing.T) {
|
||||
|
||||
func TestPrometheusBackendFloat64Gauge(t *testing.T) {
|
||||
b := newTestBackend(t)
|
||||
g := b.NewFloat64Gauge("test_float_gauge", "A float gauge", "cert")
|
||||
g, err := b.NewFloat64Gauge("test_float_gauge", "A float gauge", "cert")
|
||||
if err != nil {
|
||||
t.Fatalf("NewFloat64Gauge returned error: %v", err)
|
||||
}
|
||||
g.Record(context.Background(), 7.5, map[string]string{"cert": "example.com"})
|
||||
|
||||
body := scrapeMetrics(t, b)
|
||||
@@ -74,7 +86,10 @@ func TestPrometheusBackendFloat64Gauge(t *testing.T) {
|
||||
func TestPrometheusBackendHistogram(t *testing.T) {
|
||||
b := newTestBackend(t)
|
||||
buckets := []float64{0.1, 0.5, 1.0, 5.0}
|
||||
h := b.NewHistogram("test_duration_seconds", "A test histogram", buckets, "method")
|
||||
h, err := b.NewHistogram("test_duration_seconds", "A test histogram", buckets, "method")
|
||||
if err != nil {
|
||||
t.Fatalf("NewHistogram returned error: %v", err)
|
||||
}
|
||||
h.Record(context.Background(), 0.3, map[string]string{"method": "GET"})
|
||||
|
||||
body := scrapeMetrics(t, b)
|
||||
@@ -85,7 +100,10 @@ func TestPrometheusBackendHistogram(t *testing.T) {
|
||||
|
||||
func TestPrometheusBackendMultipleLabels(t *testing.T) {
|
||||
b := newTestBackend(t)
|
||||
c := b.NewCounter("multi_label_total", "Multi-label counter", "method", "route", "status_code")
|
||||
c, err := b.NewCounter("multi_label_total", "Multi-label counter", "method", "route", "status_code")
|
||||
if err != nil {
|
||||
t.Fatalf("NewCounter returned error: %v", err)
|
||||
}
|
||||
c.Add(context.Background(), 1, map[string]string{
|
||||
"method": "POST",
|
||||
"route": "/api/peers",
|
||||
@@ -122,23 +140,29 @@ func TestPrometheusBackendNoGoMetrics(t *testing.T) {
|
||||
func TestPrometheusBackendNilLabels(t *testing.T) {
|
||||
// Adding with nil labels should not panic (treated as empty map).
|
||||
b := newTestBackend(t)
|
||||
c := b.NewCounter("nil_labels_total", "counter with no labels")
|
||||
c, err := b.NewCounter("nil_labels_total", "counter with no labels")
|
||||
if err != nil {
|
||||
t.Fatalf("NewCounter returned error: %v", err)
|
||||
}
|
||||
// nil labels with no label names declared should be safe
|
||||
c.Add(context.Background(), 1, nil)
|
||||
}
|
||||
|
||||
func TestPrometheusBackendConcurrentAdd(t *testing.T) {
|
||||
b := newTestBackend(t)
|
||||
c := b.NewCounter("concurrent_total", "concurrent counter", "worker")
|
||||
c, err := b.NewCounter("concurrent_total", "concurrent counter", "worker")
|
||||
if err != nil {
|
||||
t.Fatalf("NewCounter returned error: %v", err)
|
||||
}
|
||||
|
||||
done := make(chan struct{})
|
||||
for i := 0; i < 10; i++ {
|
||||
go func(_ int) {
|
||||
go func() {
|
||||
for j := 0; j < 100; j++ {
|
||||
c.Add(context.Background(), 1, map[string]string{"worker": "w"})
|
||||
}
|
||||
done <- struct{}{}
|
||||
}(i)
|
||||
}()
|
||||
}
|
||||
for i := 0; i < 10; i++ {
|
||||
<-done
|
||||
@@ -148,6 +172,40 @@ func TestPrometheusBackendConcurrentAdd(t *testing.T) {
|
||||
assertMetricPresent(t, body, `concurrent_total{worker="w"} 1000`)
|
||||
}
|
||||
|
||||
func TestPrometheusBackendAlreadyRegisteredCounter(t *testing.T) {
|
||||
b := newTestBackend(t)
|
||||
c1, err := b.NewCounter("dupe_counter_total", "duplicate counter", "result")
|
||||
if err != nil {
|
||||
t.Fatalf("first NewCounter returned error: %v", err)
|
||||
}
|
||||
c2, err := b.NewCounter("dupe_counter_total", "duplicate counter", "result")
|
||||
if err != nil {
|
||||
t.Fatalf("second NewCounter returned error: %v", err)
|
||||
}
|
||||
|
||||
c1.Add(context.Background(), 1, map[string]string{"result": "ok"})
|
||||
c2.Add(context.Background(), 2, map[string]string{"result": "ok"})
|
||||
|
||||
body := scrapeMetrics(t, b)
|
||||
assertMetricPresent(t, body, `dupe_counter_total{result="ok"} 3`)
|
||||
}
|
||||
|
||||
func TestPrometheusBackendInvalidLabelsNoPanic(t *testing.T) {
|
||||
b := newTestBackend(t)
|
||||
c, err := b.NewCounter("invalid_labels_total", "invalid labels test", "result")
|
||||
if err != nil {
|
||||
t.Fatalf("NewCounter returned error: %v", err)
|
||||
}
|
||||
|
||||
// Extra label key should be dropped and must not panic.
|
||||
c.Add(context.Background(), 5, map[string]string{"result": "ok", "unexpected": "x"})
|
||||
|
||||
body := scrapeMetrics(t, b)
|
||||
if strings.Contains(body, `invalid_labels_total{result="ok"}`) {
|
||||
t.Error("invalid label sample should have been dropped")
|
||||
}
|
||||
}
|
||||
|
||||
// --- helpers ---
|
||||
|
||||
func scrapeMetrics(t *testing.T, b *obsprom.Backend) string {
|
||||
|
||||
17
main.go
17
main.go
@@ -175,6 +175,7 @@ func main() {
|
||||
otelMetricsEndpoint string
|
||||
otelMetricsInsecure bool
|
||||
otelMetricsExportInterval time.Duration
|
||||
otelMetricsTimeout time.Duration
|
||||
)
|
||||
|
||||
interfaceName = os.Getenv("INTERFACE")
|
||||
@@ -229,6 +230,14 @@ func main() {
|
||||
log.Printf("WARN: invalid OTEL_METRICS_EXPORT_INTERVAL=%q: %v", v, err2)
|
||||
}
|
||||
}
|
||||
otelMetricsTimeout = 10 * time.Second // default
|
||||
if v := os.Getenv("OTEL_METRICS_TIMEOUT"); v != "" {
|
||||
if d, err2 := time.ParseDuration(v); err2 == nil {
|
||||
otelMetricsTimeout = d
|
||||
} else {
|
||||
log.Printf("WARN: invalid OTEL_METRICS_TIMEOUT=%q: %v", v, err2)
|
||||
}
|
||||
}
|
||||
|
||||
if interfaceName == "" {
|
||||
flag.StringVar(&interfaceName, "interface", "wg0", "Name of the WireGuard interface")
|
||||
@@ -322,6 +331,7 @@ func main() {
|
||||
flag.StringVar(&otelMetricsEndpoint, "otel-metrics-endpoint", otelMetricsEndpoint, "OTLP collector endpoint (e.g. localhost:4317)")
|
||||
flag.BoolVar(&otelMetricsInsecure, "otel-metrics-insecure", otelMetricsInsecure, "Disable TLS for OTLP connection")
|
||||
flag.DurationVar(&otelMetricsExportInterval, "otel-metrics-export-interval", otelMetricsExportInterval, "Interval between OTLP metric pushes")
|
||||
flag.DurationVar(&otelMetricsTimeout, "otel-metrics-timeout", otelMetricsTimeout, "Timeout for OTLP exporter setup")
|
||||
|
||||
flag.Parse()
|
||||
|
||||
@@ -347,6 +357,7 @@ func main() {
|
||||
Endpoint: otelMetricsEndpoint,
|
||||
Insecure: otelMetricsInsecure,
|
||||
ExportInterval: otelMetricsExportInterval,
|
||||
Timeout: otelMetricsTimeout,
|
||||
},
|
||||
ServiceName: "gerbil",
|
||||
ServiceVersion: "1.0.0",
|
||||
@@ -543,6 +554,8 @@ func main() {
|
||||
|
||||
// Register metrics endpoint only for Prometheus backend.
|
||||
// OTel backend pushes to a collector; no /metrics endpoint needed.
|
||||
// Note: metricsPath is registered directly without httpMetricsMiddleware to prevent infinite recursion.
|
||||
// The metricsHandler must not be wrapped by the middleware, as it would observe its own observation calls.
|
||||
if metricsHandler != nil {
|
||||
http.Handle(metricsPath, metricsHandler)
|
||||
logger.Info("Metrics endpoint enabled at %s", metricsPath)
|
||||
@@ -1162,10 +1175,12 @@ func removePeerInternal(publicKey string) error {
|
||||
|
||||
// Get current peer info before removing to clear relay connections and bandwidth limits
|
||||
var wgIPs []string
|
||||
allowedIPsCount := 0
|
||||
device, err := wgClient.Device(interfaceName)
|
||||
if err == nil {
|
||||
for _, peer := range device.Peers {
|
||||
if peer.PublicKey.String() == publicKey {
|
||||
allowedIPsCount = len(peer.AllowedIPs)
|
||||
// Extract WireGuard IPs from this peer's allowed IPs
|
||||
for _, allowedIP := range peer.AllowedIPs {
|
||||
wgIPs = append(wgIPs, allowedIP.IP.String())
|
||||
@@ -1208,7 +1223,7 @@ func removePeerInternal(publicKey string) error {
|
||||
|
||||
// Record metrics
|
||||
metrics.RecordPeersTotal(interfaceName, -1)
|
||||
metrics.RecordAllowedIPsCount(interfaceName, publicKey, -int64(len(wgIPs)))
|
||||
metrics.RecordAllowedIPsCount(interfaceName, publicKey, -int64(allowedIPsCount))
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -548,7 +548,7 @@ func (p *SNIProxy) handleConnection(clientConn net.Conn) {
|
||||
logger.Debug("SNI extraction failed: %v", err)
|
||||
return
|
||||
}
|
||||
metrics.RecordProxyTLSHandshake(hostname, time.Since(clientHelloStart).Seconds())
|
||||
metrics.RecordProxyTLSHandshake(time.Since(clientHelloStart).Seconds())
|
||||
|
||||
if hostname == "" {
|
||||
log.Println("No SNI hostname found")
|
||||
@@ -596,8 +596,8 @@ func (p *SNIProxy) handleConnection(clientConn net.Conn) {
|
||||
defer targetConn.Close()
|
||||
|
||||
logger.Debug("Connected to target: %s:%d", route.TargetHost, route.TargetPort)
|
||||
metrics.RecordActiveProxyConnection(hostname, 1)
|
||||
defer metrics.RecordActiveProxyConnection(hostname, -1)
|
||||
metrics.RecordActiveProxyConnection(1)
|
||||
defer metrics.RecordActiveProxyConnection(-1)
|
||||
|
||||
// Send PROXY protocol header if enabled
|
||||
if p.proxyProtocol {
|
||||
@@ -655,7 +655,7 @@ func (p *SNIProxy) getRoute(hostname, clientAddr string) (*RouteRecord, error) {
|
||||
// Check local overrides first
|
||||
if _, isOverride := p.localOverrides[hostname]; isOverride {
|
||||
logger.Debug("Local override matched for hostname: %s", hostname)
|
||||
metrics.RecordProxyRouteLookup("local_override", hostname)
|
||||
metrics.RecordProxyRouteLookup("local_override")
|
||||
return &RouteRecord{
|
||||
Hostname: hostname,
|
||||
TargetHost: p.localProxyAddr,
|
||||
@@ -668,7 +668,7 @@ func (p *SNIProxy) getRoute(hostname, clientAddr string) (*RouteRecord, error) {
|
||||
_, isLocal := p.localSNIs[hostname]
|
||||
p.localSNIsLock.RUnlock()
|
||||
if isLocal {
|
||||
metrics.RecordProxyRouteLookup("local", hostname)
|
||||
metrics.RecordProxyRouteLookup("local")
|
||||
return &RouteRecord{
|
||||
Hostname: hostname,
|
||||
TargetHost: p.localProxyAddr,
|
||||
@@ -679,16 +679,16 @@ func (p *SNIProxy) getRoute(hostname, clientAddr string) (*RouteRecord, error) {
|
||||
// Check cache first
|
||||
if cached, found := p.cache.Get(hostname); found {
|
||||
if cached == nil {
|
||||
metrics.RecordProxyRouteLookup("cached_not_found", hostname)
|
||||
metrics.RecordProxyRouteLookup("cached_not_found")
|
||||
return nil, nil // Cached negative result
|
||||
}
|
||||
logger.Debug("Cache hit for hostname: %s", hostname)
|
||||
metrics.RecordProxyRouteLookup("cache_hit", hostname)
|
||||
metrics.RecordProxyRouteLookup("cache_hit")
|
||||
return cached.(*RouteRecord), nil
|
||||
}
|
||||
|
||||
logger.Debug("Cache miss for hostname: %s, querying API", hostname)
|
||||
metrics.RecordProxyRouteLookup("cache_miss", hostname)
|
||||
metrics.RecordProxyRouteLookup("cache_miss")
|
||||
|
||||
// Query API with timeout
|
||||
ctx, cancel := context.WithTimeout(p.ctx, 5*time.Second)
|
||||
@@ -822,7 +822,7 @@ func (p *SNIProxy) pipe(hostname string, clientConn, targetConn net.Conn, client
|
||||
}()
|
||||
|
||||
bytesCopied, err := io.CopyBuffer(targetConn, clientReader, *bufPtr)
|
||||
metrics.RecordProxyBytesTransmitted(hostname, "client_to_target", bytesCopied)
|
||||
metrics.RecordProxyBytesTransmitted("client_to_target", bytesCopied)
|
||||
if err != nil && err != io.EOF {
|
||||
logger.Debug("Copy client->target error: %v", err)
|
||||
}
|
||||
@@ -842,7 +842,7 @@ func (p *SNIProxy) pipe(hostname string, clientConn, targetConn net.Conn, client
|
||||
}()
|
||||
|
||||
bytesCopied, err := io.CopyBuffer(clientConn, targetConn, *bufPtr)
|
||||
metrics.RecordProxyBytesTransmitted(hostname, "target_to_client", bytesCopied)
|
||||
metrics.RecordProxyBytesTransmitted("target_to_client", bytesCopied)
|
||||
if err != nil && err != io.EOF {
|
||||
logger.Debug("Copy target->client error: %v", err)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user