mirror of
https://github.com/netbirdio/netbird.git
synced 2026-04-16 07:16:38 +00:00
switch proxy to use opentelemetry
This commit is contained in:
@@ -1,64 +1,90 @@
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/client_golang/prometheus/promauto"
|
||||
"go.opentelemetry.io/otel/metric"
|
||||
|
||||
"github.com/netbirdio/netbird/proxy/internal/proxy"
|
||||
"github.com/netbirdio/netbird/proxy/internal/responsewriter"
|
||||
)
|
||||
|
||||
type Metrics struct {
|
||||
requestsTotal prometheus.Counter
|
||||
activeRequests prometheus.Gauge
|
||||
configuredDomains prometheus.Gauge
|
||||
pathsPerDomain *prometheus.GaugeVec
|
||||
requestDuration *prometheus.HistogramVec
|
||||
backendDuration *prometheus.HistogramVec
|
||||
ctx context.Context
|
||||
requestsTotal metric.Int64Counter
|
||||
activeRequests metric.Int64UpDownCounter
|
||||
configuredDomains metric.Int64UpDownCounter
|
||||
totalPaths metric.Int64UpDownCounter
|
||||
requestDuration metric.Int64Histogram
|
||||
backendDuration metric.Int64Histogram
|
||||
}
|
||||
|
||||
func New(reg prometheus.Registerer) *Metrics {
|
||||
promFactory := promauto.With(reg)
|
||||
return &Metrics{
|
||||
requestsTotal: promFactory.NewCounter(prometheus.CounterOpts{
|
||||
Name: "netbird_proxy_requests_total",
|
||||
Help: "Total number of requests made to the netbird proxy",
|
||||
}),
|
||||
activeRequests: promFactory.NewGauge(prometheus.GaugeOpts{
|
||||
Name: "netbird_proxy_active_requests_count",
|
||||
Help: "Current in-flight requests handled by the netbird proxy",
|
||||
}),
|
||||
configuredDomains: promFactory.NewGauge(prometheus.GaugeOpts{
|
||||
Name: "netbird_proxy_domains_count",
|
||||
Help: "Current number of domains configured on the netbird proxy",
|
||||
}),
|
||||
pathsPerDomain: promFactory.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "netbird_proxy_paths_count",
|
||||
Help: "Current number of paths configured on the netbird proxy labelled by domain",
|
||||
},
|
||||
[]string{"domain"},
|
||||
),
|
||||
requestDuration: promFactory.NewHistogramVec(
|
||||
prometheus.HistogramOpts{
|
||||
Name: "netbird_proxy_request_duration_seconds",
|
||||
Help: "Duration of requests made to the netbird proxy",
|
||||
Buckets: []float64{0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10},
|
||||
},
|
||||
[]string{"status", "size", "method", "host", "path"},
|
||||
),
|
||||
backendDuration: promFactory.NewHistogramVec(prometheus.HistogramOpts{
|
||||
Name: "netbird_proxy_backend_duration_seconds",
|
||||
Help: "Duration of peer round trip time from the netbird proxy",
|
||||
Buckets: []float64{0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10},
|
||||
},
|
||||
[]string{"status", "size", "method", "host", "path"},
|
||||
),
|
||||
func New(ctx context.Context, meter metric.Meter) (*Metrics, error) {
|
||||
requestsTotal, err := meter.Int64Counter(
|
||||
"proxy.http.request.counter",
|
||||
metric.WithUnit("1"),
|
||||
metric.WithDescription("Total number of requests made to the netbird proxy"),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
activeRequests, err := meter.Int64UpDownCounter(
|
||||
"proxy.http.active_requests",
|
||||
metric.WithUnit("1"),
|
||||
metric.WithDescription("Current in-flight requests handled by the netbird proxy"),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
configuredDomains, err := meter.Int64UpDownCounter(
|
||||
"proxy.domains.count",
|
||||
metric.WithUnit("1"),
|
||||
metric.WithDescription("Current number of domains configured on the netbird proxy"),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
totalPaths, err := meter.Int64UpDownCounter(
|
||||
"proxy.paths.count",
|
||||
metric.WithUnit("1"),
|
||||
metric.WithDescription("Total number of paths configured on the netbird proxy"),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
requestDuration, err := meter.Int64Histogram(
|
||||
"proxy.http.request.duration.ms",
|
||||
metric.WithUnit("milliseconds"),
|
||||
metric.WithDescription("Duration of requests made to the netbird proxy"),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
backendDuration, err := meter.Int64Histogram(
|
||||
"proxy.backend.duration.ms",
|
||||
metric.WithUnit("milliseconds"),
|
||||
metric.WithDescription("Duration of peer round trip time from the netbird proxy"),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &Metrics{
|
||||
ctx: ctx,
|
||||
requestsTotal: requestsTotal,
|
||||
activeRequests: activeRequests,
|
||||
configuredDomains: configuredDomains,
|
||||
totalPaths: totalPaths,
|
||||
requestDuration: requestDuration,
|
||||
backendDuration: backendDuration,
|
||||
}, nil
|
||||
}
|
||||
|
||||
type responseInterceptor struct {
|
||||
@@ -80,8 +106,8 @@ func (w *responseInterceptor) Write(b []byte) (int, error) {
|
||||
|
||||
func (m *Metrics) Middleware(next http.Handler) http.Handler {
|
||||
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
m.requestsTotal.Inc()
|
||||
m.activeRequests.Inc()
|
||||
m.requestsTotal.Add(m.ctx, 1)
|
||||
m.activeRequests.Add(m.ctx, 1)
|
||||
|
||||
interceptor := &responseInterceptor{PassthroughWriter: responsewriter.New(w)}
|
||||
|
||||
@@ -89,14 +115,8 @@ func (m *Metrics) Middleware(next http.Handler) http.Handler {
|
||||
next.ServeHTTP(interceptor, r)
|
||||
duration := time.Since(start)
|
||||
|
||||
m.activeRequests.Desc()
|
||||
m.requestDuration.With(prometheus.Labels{
|
||||
"status": strconv.Itoa(interceptor.status),
|
||||
"size": strconv.Itoa(interceptor.size),
|
||||
"method": r.Method,
|
||||
"host": r.Host,
|
||||
"path": r.URL.Path,
|
||||
}).Observe(duration.Seconds())
|
||||
m.activeRequests.Add(m.ctx, -1)
|
||||
m.requestDuration.Record(m.ctx, duration.Milliseconds())
|
||||
})
|
||||
}
|
||||
|
||||
@@ -108,44 +128,22 @@ func (f roundTripperFunc) RoundTrip(r *http.Request) (*http.Response, error) {
|
||||
|
||||
func (m *Metrics) RoundTripper(next http.RoundTripper) http.RoundTripper {
|
||||
return roundTripperFunc(func(req *http.Request) (*http.Response, error) {
|
||||
labels := prometheus.Labels{
|
||||
"method": req.Method,
|
||||
"host": req.Host,
|
||||
// Fill potentially empty labels with default values to avoid cardinality issues.
|
||||
"path": "/",
|
||||
"status": "0",
|
||||
"size": "0",
|
||||
}
|
||||
if req.URL != nil {
|
||||
labels["path"] = req.URL.Path
|
||||
}
|
||||
|
||||
start := time.Now()
|
||||
res, err := next.RoundTrip(req)
|
||||
duration := time.Since(start)
|
||||
|
||||
// Not all labels will be available if there was an error.
|
||||
if res != nil {
|
||||
labels["status"] = strconv.Itoa(res.StatusCode)
|
||||
labels["size"] = strconv.Itoa(int(res.ContentLength))
|
||||
}
|
||||
|
||||
m.backendDuration.With(labels).Observe(duration.Seconds())
|
||||
m.backendDuration.Record(m.ctx, duration.Milliseconds())
|
||||
|
||||
return res, err
|
||||
})
|
||||
}
|
||||
|
||||
func (m *Metrics) AddMapping(mapping proxy.Mapping) {
|
||||
m.configuredDomains.Inc()
|
||||
m.pathsPerDomain.With(prometheus.Labels{
|
||||
"domain": mapping.Host,
|
||||
}).Set(float64(len(mapping.Paths)))
|
||||
m.configuredDomains.Add(m.ctx, 1)
|
||||
m.totalPaths.Add(m.ctx, int64(len(mapping.Paths)))
|
||||
}
|
||||
|
||||
func (m *Metrics) RemoveMapping(mapping proxy.Mapping) {
|
||||
m.configuredDomains.Dec()
|
||||
m.pathsPerDomain.With(prometheus.Labels{
|
||||
"domain": mapping.Host,
|
||||
}).Set(0)
|
||||
m.configuredDomains.Add(m.ctx, -1)
|
||||
m.totalPaths.Add(m.ctx, -int64(len(mapping.Paths)))
|
||||
}
|
||||
|
||||
@@ -1,13 +1,17 @@
|
||||
package metrics_test
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
"go.opentelemetry.io/otel/exporters/prometheus"
|
||||
"go.opentelemetry.io/otel/sdk/metric"
|
||||
|
||||
"github.com/netbirdio/netbird/proxy/internal/metrics"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
type testRoundTripper struct {
|
||||
@@ -47,7 +51,19 @@ func TestMetrics_RoundTripper(t *testing.T) {
|
||||
},
|
||||
}
|
||||
|
||||
m := metrics.New(prometheus.NewRegistry())
|
||||
exporter, err := prometheus.New()
|
||||
if err != nil {
|
||||
t.Fatalf("create prometheus exporter: %v", err)
|
||||
}
|
||||
|
||||
provider := metric.NewMeterProvider(metric.WithReader(exporter))
|
||||
pkg := reflect.TypeOf(metrics.Metrics{}).PkgPath()
|
||||
meter := provider.Meter(pkg)
|
||||
|
||||
m, err := metrics.New(context.Background(), meter)
|
||||
if err != nil {
|
||||
t.Fatalf("create metrics: %v", err)
|
||||
}
|
||||
|
||||
for name, test := range tests {
|
||||
t.Run(name, func(t *testing.T) {
|
||||
|
||||
@@ -19,14 +19,17 @@ import (
|
||||
"net/netip"
|
||||
"net/url"
|
||||
"path/filepath"
|
||||
"reflect"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/cenkalti/backoff/v4"
|
||||
"github.com/pires/go-proxyproto"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
prometheus2 "github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||
log "github.com/sirupsen/logrus"
|
||||
"go.opentelemetry.io/otel/exporters/prometheus"
|
||||
"go.opentelemetry.io/otel/sdk/metric"
|
||||
"google.golang.org/grpc"
|
||||
"google.golang.org/grpc/credentials"
|
||||
"google.golang.org/grpc/credentials/insecure"
|
||||
@@ -42,7 +45,7 @@ import (
|
||||
proxygrpc "github.com/netbirdio/netbird/proxy/internal/grpc"
|
||||
"github.com/netbirdio/netbird/proxy/internal/health"
|
||||
"github.com/netbirdio/netbird/proxy/internal/k8s"
|
||||
"github.com/netbirdio/netbird/proxy/internal/metrics"
|
||||
proxymetrics "github.com/netbirdio/netbird/proxy/internal/metrics"
|
||||
"github.com/netbirdio/netbird/proxy/internal/proxy"
|
||||
"github.com/netbirdio/netbird/proxy/internal/roundtrip"
|
||||
"github.com/netbirdio/netbird/proxy/internal/types"
|
||||
@@ -63,7 +66,7 @@ type Server struct {
|
||||
debug *http.Server
|
||||
healthServer *health.Server
|
||||
healthChecker *health.Checker
|
||||
meter *metrics.Metrics
|
||||
meter *proxymetrics.Metrics
|
||||
|
||||
// hijackTracker tracks hijacked connections (e.g. WebSocket upgrades)
|
||||
// so they can be closed during graceful shutdown, since http.Server.Shutdown
|
||||
@@ -152,8 +155,19 @@ func (s *Server) NotifyCertificateIssued(ctx context.Context, accountID, service
|
||||
func (s *Server) ListenAndServe(ctx context.Context, addr string) (err error) {
|
||||
s.initDefaults()
|
||||
|
||||
reg := prometheus.NewRegistry()
|
||||
s.meter = metrics.New(reg)
|
||||
exporter, err := prometheus.New()
|
||||
if err != nil {
|
||||
return fmt.Errorf("create prometheus exporter: %w", err)
|
||||
}
|
||||
|
||||
provider := metric.NewMeterProvider(metric.WithReader(exporter))
|
||||
pkg := reflect.TypeOf(Server{}).PkgPath()
|
||||
meter := provider.Meter(pkg)
|
||||
|
||||
s.meter, err = proxymetrics.New(ctx, meter)
|
||||
if err != nil {
|
||||
return fmt.Errorf("create metrics: %w", err)
|
||||
}
|
||||
|
||||
mgmtConn, err := s.dialManagement()
|
||||
if err != nil {
|
||||
@@ -193,7 +207,7 @@ func (s *Server) ListenAndServe(ctx context.Context, addr string) (err error) {
|
||||
|
||||
s.startDebugEndpoint()
|
||||
|
||||
if err := s.startHealthServer(reg); err != nil {
|
||||
if err := s.startHealthServer(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -284,12 +298,12 @@ func (s *Server) startDebugEndpoint() {
|
||||
}
|
||||
|
||||
// startHealthServer launches the health probe and metrics server.
|
||||
func (s *Server) startHealthServer(reg *prometheus.Registry) error {
|
||||
func (s *Server) startHealthServer() error {
|
||||
healthAddr := s.HealthAddress
|
||||
if healthAddr == "" {
|
||||
healthAddr = defaultHealthAddr
|
||||
}
|
||||
s.healthServer = health.NewServer(healthAddr, s.healthChecker, s.Logger, promhttp.HandlerFor(reg, promhttp.HandlerOpts{}))
|
||||
s.healthServer = health.NewServer(healthAddr, s.healthChecker, s.Logger, promhttp.HandlerFor(prometheus2.DefaultGatherer, promhttp.HandlerOpts{EnableOpenMetrics: true}))
|
||||
healthListener, err := net.Listen("tcp", healthAddr)
|
||||
if err != nil {
|
||||
return fmt.Errorf("health probe server listen on %s: %w", healthAddr, err)
|
||||
|
||||
Reference in New Issue
Block a user