From f3493ee0420253129b1ffa07d7349fc406e91682 Mon Sep 17 00:00:00 2001 From: Alisdair MacLeod Date: Wed, 11 Feb 2026 14:56:39 +0000 Subject: [PATCH] add basic metrics for stress testing --- proxy/internal/health/health.go | 14 +++++- proxy/internal/metrics/metrics.go | 56 ++++++++++++++++++++++++ proxy/internal/roundtrip/netbird.go | 27 ++++++++---- proxy/internal/roundtrip/netbird_test.go | 2 +- proxy/server.go | 13 ++++-- 5 files changed, 97 insertions(+), 15 deletions(-) create mode 100644 proxy/internal/metrics/metrics.go diff --git a/proxy/internal/health/health.go b/proxy/internal/health/health.go index bef968d27..d8c882687 100644 --- a/proxy/internal/health/health.go +++ b/proxy/internal/health/health.go @@ -300,14 +300,24 @@ func NewChecker(logger *log.Logger, provider clientProvider) *Checker { } // NewServer creates a new health probe server. -func NewServer(addr string, checker *Checker, logger *log.Logger) *Server { +// If metricsHandler is non-nil, it is mounted at /metrics on the same port. +func NewServer(addr string, checker *Checker, logger *log.Logger, metricsHandler http.Handler) *Server { if logger == nil { logger = log.StandardLogger() } + + handler := checker.Handler() + if metricsHandler != nil { + mux := http.NewServeMux() + mux.Handle("/metrics", metricsHandler) + mux.Handle("/", checker.Handler()) + handler = mux + } + return &Server{ server: &http.Server{ Addr: addr, - Handler: checker.Handler(), + Handler: handler, ReadTimeout: 5 * time.Second, WriteTimeout: 5 * time.Second, }, diff --git a/proxy/internal/metrics/metrics.go b/proxy/internal/metrics/metrics.go new file mode 100644 index 000000000..8e6d46054 --- /dev/null +++ b/proxy/internal/metrics/metrics.go @@ -0,0 +1,56 @@ +package metrics + +import ( + "net/http" + "time" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" +) + +type Metrics struct { + requestsTotal prometheus.Counter + requestDuration prometheus.Histogram + activeRequests prometheus.Counter + backendDuration prometheus.Histogram +} + +func New(reg prometheus.Registerer) *Metrics { + return &Metrics{ + requestsTotal: promauto.With(reg).NewCounter(prometheus.CounterOpts{ + Name: "netbird_proxy_requests_total", + Help: "Total number of requests made to the netbird proxy", + }), + requestDuration: promauto.With(reg).NewHistogram(prometheus.HistogramOpts{ + Name: "netbird_proxy_request_duration_seconds", + Help: "Duration of requests made to the netbird proxy", + Buckets: []float64{0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10}, + }), + activeRequests: promauto.With(reg).NewCounter(prometheus.CounterOpts{ + Name: "netbird_proxy_active_requests_total", + Help: "Current in-flight requests handled by the netbird proxy", + }), + backendDuration: promauto.With(reg).NewHistogram(prometheus.HistogramOpts{ + Name: "netbird_proxy_backend_duration_seconds", + Help: "Duration of peer round trip time from the netbird proxy", + Buckets: []float64{0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10}, + }), + } +} + +func (m *Metrics) Middleware(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + m.requestsTotal.Inc() + m.activeRequests.Inc() + + start := time.Now() + next.ServeHTTP(w, r) + + m.activeRequests.Desc() + m.requestDuration.Observe(time.Since(start).Seconds()) + }) +} + +func (m *Metrics) CompleteRoundTrip(t time.Duration) { + m.backendDuration.Observe(t.Seconds()) +} diff --git a/proxy/internal/roundtrip/netbird.go b/proxy/internal/roundtrip/netbird.go index 02297dd3f..76f3cc8a7 100644 --- a/proxy/internal/roundtrip/netbird.go +++ b/proxy/internal/roundtrip/netbird.go @@ -55,6 +55,8 @@ type managementClient interface { CreateProxyPeer(ctx context.Context, req *proto.CreateProxyPeerRequest, opts ...grpc.CallOption) (*proto.CreateProxyPeerResponse, error) } +type backendMetricRecorder func(duration time.Duration) + // NetBird provides an http.RoundTripper implementation // backed by underlying NetBird connections. // Clients are keyed by AccountID, allowing multiple domains to share the same connection. @@ -70,6 +72,8 @@ type NetBird struct { clients map[types.AccountID]*clientEntry initLogOnce sync.Once statusNotifier statusNotifier + + recordBackendDuration backendMetricRecorder } // ClientDebugInfo contains debug information about a client. @@ -375,6 +379,10 @@ func (n *NetBird) RoundTrip(req *http.Request) (*http.Response, error) { resp, err := transport.RoundTrip(req) duration := time.Since(start) + if n.recordBackendDuration != nil { + n.recordBackendDuration(duration) + } + if err != nil { n.logger.Debugf("roundtrip: method=%s host=%s url=%s account=%s duration=%s err=%v", req.Method, req.Host, req.URL.String(), accountID, duration.Truncate(time.Millisecond), err) @@ -483,19 +491,20 @@ func (n *NetBird) ListClientsForStartup() map[types.AccountID]*embed.Client { // NewNetBird creates a new NetBird transport. Set wgPort to 0 for a random // OS-assigned port. A fixed port only works with single-account deployments; // multiple accounts will fail to bind the same port. -func NewNetBird(mgmtAddr, proxyID, proxyAddr string, wgPort int, logger *log.Logger, notifier statusNotifier, mgmtClient managementClient) *NetBird { +func NewNetBird(mgmtAddr, proxyID, proxyAddr string, wgPort int, logger *log.Logger, notifier statusNotifier, mgmtClient managementClient, metric backendMetricRecorder) *NetBird { if logger == nil { logger = log.StandardLogger() } return &NetBird{ - mgmtAddr: mgmtAddr, - proxyID: proxyID, - proxyAddr: proxyAddr, - wgPort: wgPort, - logger: logger, - clients: make(map[types.AccountID]*clientEntry), - statusNotifier: notifier, - mgmtClient: mgmtClient, + mgmtAddr: mgmtAddr, + proxyID: proxyID, + proxyAddr: proxyAddr, + wgPort: wgPort, + logger: logger, + clients: make(map[types.AccountID]*clientEntry), + statusNotifier: notifier, + mgmtClient: mgmtClient, + recordBackendDuration: metric, } } diff --git a/proxy/internal/roundtrip/netbird_test.go b/proxy/internal/roundtrip/netbird_test.go index b1cdc7ab2..39d04be20 100644 --- a/proxy/internal/roundtrip/netbird_test.go +++ b/proxy/internal/roundtrip/netbird_test.go @@ -23,7 +23,7 @@ func (m *mockMgmtClient) CreateProxyPeer(_ context.Context, _ *proto.CreateProxy // mockNetBird creates a NetBird instance for testing without actually connecting. // It uses an invalid management URL to prevent real connections. func mockNetBird() *NetBird { - return NewNetBird("http://invalid.test:9999", "test-proxy", 0, nil, nil, &mockMgmtClient{}) + return NewNetBird("http://invalid.test:9999", "test-proxy", "localhost", 0, nil, nil, &mockMgmtClient{}, nil) } func TestNetBird_AddPeer_CreatesClientForNewAccount(t *testing.T) { diff --git a/proxy/server.go b/proxy/server.go index 4bbb8ac79..a817b729e 100644 --- a/proxy/server.go +++ b/proxy/server.go @@ -23,6 +23,8 @@ import ( "time" "github.com/cenkalti/backoff/v4" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promhttp" log "github.com/sirupsen/logrus" "google.golang.org/grpc" "google.golang.org/grpc/credentials" @@ -38,6 +40,7 @@ import ( proxygrpc "github.com/netbirdio/netbird/proxy/internal/grpc" "github.com/netbirdio/netbird/proxy/internal/health" "github.com/netbirdio/netbird/proxy/internal/k8s" + "github.com/netbirdio/netbird/proxy/internal/metrics" "github.com/netbirdio/netbird/proxy/internal/proxy" "github.com/netbirdio/netbird/proxy/internal/roundtrip" "github.com/netbirdio/netbird/proxy/internal/types" @@ -146,6 +149,10 @@ func (s *Server) ListenAndServe(ctx context.Context, addr string) (err error) { s.Logger = log.StandardLogger() } + // Start up metrics gathering + reg := prometheus.NewRegistry() + meter := metrics.New(reg) + // The very first thing to do should be to connect to the Management server. // Without this connection, the Proxy cannot do anything. mgmtURL, err := url.Parse(s.ManagementAddress) @@ -192,7 +199,7 @@ func (s *Server) ListenAndServe(ctx context.Context, addr string) (err error) { // Initialize the netbird client, this is required to build peer connections // to proxy over. - s.netbird = roundtrip.NewNetBird(s.ManagementAddress, s.ID, s.ProxyURL, s.WireguardPort, s.Logger, s, s.mgmtClient) + s.netbird = roundtrip.NewNetBird(s.ManagementAddress, s.ID, s.ProxyURL, s.WireguardPort, s.Logger, s, s.mgmtClient, meter.CompleteRoundTrip) // When generating ACME certificates, start a challenge server. tlsConfig := &tls.Config{} @@ -271,7 +278,7 @@ func (s *Server) ListenAndServe(ctx context.Context, addr string) (err error) { if healthAddr == "" { healthAddr = "localhost:8080" } - s.healthServer = health.NewServer(healthAddr, s.healthChecker, s.Logger) + s.healthServer = health.NewServer(healthAddr, s.healthChecker, s.Logger, promhttp.HandlerFor(reg, promhttp.HandlerOpts{})) healthListener, err := net.Listen("tcp", healthAddr) if err != nil { return fmt.Errorf("health probe server listen on %s: %w", healthAddr, err) @@ -285,7 +292,7 @@ func (s *Server) ListenAndServe(ctx context.Context, addr string) (err error) { // Start the reverse proxy HTTPS server. s.https = &http.Server{ Addr: addr, - Handler: accessLog.Middleware(web.AssetHandler(s.auth.Protect(s.proxy))), + Handler: meter.Middleware(accessLog.Middleware(web.AssetHandler(s.auth.Protect(s.proxy)))), TLSConfig: tlsConfig, }