diff --git a/client/cmd/testutil.go b/client/cmd/testutil.go index bbd4c0cb9..988ef8cc0 100644 --- a/client/cmd/testutil.go +++ b/client/cmd/testutil.go @@ -2,21 +2,23 @@ package cmd import ( "context" - "github.com/netbirdio/netbird/management/server/activity" "net" "path/filepath" "testing" "time" + "github.com/netbirdio/netbird/management/server/activity" + "github.com/netbirdio/netbird/util" + "google.golang.org/grpc" + clientProto "github.com/netbirdio/netbird/client/proto" client "github.com/netbirdio/netbird/client/server" mgmtProto "github.com/netbirdio/netbird/management/proto" mgmt "github.com/netbirdio/netbird/management/server" sigProto "github.com/netbirdio/netbird/signal/proto" sig "github.com/netbirdio/netbird/signal/server" - "google.golang.org/grpc" ) func startTestingServices(t *testing.T) string { @@ -63,7 +65,7 @@ func startManagement(t *testing.T, config *mgmt.Config) (*grpc.Server, net.Liste t.Fatal(err) } s := grpc.NewServer() - store, err := mgmt.NewFileStore(config.Datadir) + store, err := mgmt.NewFileStore(config.Datadir, nil) if err != nil { t.Fatal(err) } diff --git a/client/internal/engine_test.go b/client/internal/engine_test.go index 85413e049..9ccfd5f7c 100644 --- a/client/internal/engine_test.go +++ b/client/internal/engine_test.go @@ -3,8 +3,6 @@ package internal import ( "context" "fmt" - "github.com/netbirdio/netbird/iface/bind" - "github.com/pion/transport/v2/stdnet" "net" "net/netip" "os" @@ -15,6 +13,10 @@ import ( "testing" "time" + "github.com/pion/transport/v2/stdnet" + + "github.com/netbirdio/netbird/iface/bind" + log "github.com/sirupsen/logrus" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -1039,7 +1041,7 @@ func startManagement(dataDir string) (*grpc.Server, string, error) { return nil, "", err } s := grpc.NewServer(grpc.KeepaliveEnforcementPolicy(kaep), grpc.KeepaliveParams(kasp)) - store, err := server.NewFileStore(config.Datadir) + store, err := server.NewFileStore(config.Datadir, nil) if err != nil { log.Fatalf("failed creating a store: %s: %v", config.Datadir, err) } diff --git a/management/client/client_test.go b/management/client/client_test.go index 92edd7551..41c5a7257 100644 --- a/management/client/client_test.go +++ b/management/client/client_test.go @@ -2,28 +2,31 @@ package client import ( "context" - "github.com/netbirdio/netbird/management/server/activity" "net" "path/filepath" "sync" "testing" "time" + "github.com/netbirdio/netbird/management/server/activity" + "github.com/netbirdio/netbird/client/system" + log "github.com/sirupsen/logrus" + "github.com/stretchr/testify/assert" + "github.com/netbirdio/netbird/encryption" "github.com/netbirdio/netbird/management/proto" mgmtProto "github.com/netbirdio/netbird/management/proto" mgmt "github.com/netbirdio/netbird/management/server" "github.com/netbirdio/netbird/management/server/mock_server" - log "github.com/sirupsen/logrus" - "github.com/stretchr/testify/assert" - "github.com/netbirdio/netbird/util" "golang.zx2c4.com/wireguard/wgctrl/wgtypes" "google.golang.org/grpc" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" + + "github.com/netbirdio/netbird/util" ) const ValidKey = "A2C8E62B-38F5-4553-B31E-DD66C696CEBB" @@ -50,7 +53,7 @@ func startManagement(t *testing.T) (*grpc.Server, net.Listener) { t.Fatal(err) } s := grpc.NewServer() - store, err := mgmt.NewFileStore(config.Datadir) + store, err := mgmt.NewFileStore(config.Datadir, nil) if err != nil { t.Fatal(err) } diff --git a/management/cmd/management.go b/management/cmd/management.go index 383991928..2efef7093 100644 --- a/management/cmd/management.go +++ b/management/cmd/management.go @@ -121,13 +121,6 @@ var ( return fmt.Errorf("failed creating datadir: %s: %v", config.Datadir, err) } } - - store, err := server.NewFileStore(config.Datadir) - if err != nil { - return fmt.Errorf("failed creating Store: %s: %v", config.Datadir, err) - } - peersUpdateManager := server.NewPeersUpdateManager() - appMetrics, err := telemetry.NewDefaultAppMetrics(cmd.Context()) if err != nil { return err @@ -136,6 +129,11 @@ var ( if err != nil { return err } + store, err := server.NewFileStore(config.Datadir, appMetrics) + if err != nil { + return fmt.Errorf("failed creating Store: %s: %v", config.Datadir, err) + } + peersUpdateManager := server.NewPeersUpdateManager() var idpManager idp.Manager if config.IdpManagerConfig != nil { diff --git a/management/server/account_test.go b/management/server/account_test.go index a8a1806a7..716fde187 100644 --- a/management/server/account_test.go +++ b/management/server/account_test.go @@ -1869,7 +1869,7 @@ func createManager(t *testing.T) (*DefaultAccountManager, error) { func createStore(t *testing.T) (Store, error) { dataDir := t.TempDir() - store, err := NewFileStore(dataDir) + store, err := NewFileStore(dataDir, nil) if err != nil { return nil, err } diff --git a/management/server/dns_test.go b/management/server/dns_test.go index 69a3bf2d9..d3ef464e6 100644 --- a/management/server/dns_test.go +++ b/management/server/dns_test.go @@ -1,10 +1,12 @@ package server import ( + "testing" + + "github.com/stretchr/testify/require" + "github.com/netbirdio/netbird/management/server/activity" "github.com/netbirdio/netbird/management/server/status" - "github.com/stretchr/testify/require" - "testing" ) const ( @@ -190,7 +192,7 @@ func createDNSManager(t *testing.T) (*DefaultAccountManager, error) { func createDNSStore(t *testing.T) (Store, error) { dataDir := t.TempDir() - store, err := NewFileStore(dataDir) + store, err := NewFileStore(dataDir, nil) if err != nil { return nil, err } diff --git a/management/server/file_store.go b/management/server/file_store.go index 1f20ba4fa..e13a3746a 100644 --- a/management/server/file_store.go +++ b/management/server/file_store.go @@ -11,6 +11,7 @@ import ( log "github.com/sirupsen/logrus" "github.com/netbirdio/netbird/management/server/status" + "github.com/netbirdio/netbird/management/server/telemetry" "github.com/netbirdio/netbird/util" ) @@ -37,13 +38,20 @@ type FileStore struct { // sync.Mutex indexed by accountID accountLocks sync.Map `json:"-"` globalAccountLock sync.Mutex `json:"-"` + + metrics telemetry.AppMetrics `json:"-"` } type StoredAccount struct{} // NewFileStore restores a store from the file located in the datadir -func NewFileStore(dataDir string) (*FileStore, error) { - return restore(filepath.Join(dataDir, storeFileName)) +func NewFileStore(dataDir string, metrics telemetry.AppMetrics) (*FileStore, error) { + fs, err := restore(filepath.Join(dataDir, storeFileName)) + if err != nil { + return nil, err + } + fs.metrics = metrics + return fs, nil } // restore the state of the store from the file. @@ -221,7 +229,17 @@ func restore(file string) (*FileStore, error) { // persist account data to a file // It is recommended to call it with locking FileStore.mux func (s *FileStore) persist(file string) error { - return util.WriteJson(file, s) + start := time.Now() + err := util.WriteJson(file, s) + if err != nil { + return err + } + took := time.Since(start) + if s.metrics != nil { + s.metrics.StoreMetrics().CountPersistenceDuration(took) + } + log.Debugf("took %d ms to persist the FileStore", took.Milliseconds()) + return nil } // AcquireGlobalLock acquires global lock across all the accounts and returns a function that releases the lock @@ -235,6 +253,12 @@ func (s *FileStore) AcquireGlobalLock() (unlock func()) { log.Debugf("released global lock in %v", time.Since(start)) } + took := time.Since(start) + log.Debugf("took %v to acquire global lock", took) + if s.metrics != nil { + s.metrics.StoreMetrics().CountGlobalLockAcquisitionDuration(took) + } + return unlock } diff --git a/management/server/file_store_test.go b/management/server/file_store_test.go index 484ee987b..90c5d6dea 100644 --- a/management/server/file_store_test.go +++ b/management/server/file_store_test.go @@ -25,7 +25,7 @@ func TestStalePeerIndices(t *testing.T) { t.Fatal(err) } - store, err := NewFileStore(storeDir) + store, err := NewFileStore(storeDir, nil) if err != nil { return } @@ -172,7 +172,7 @@ func TestStore(t *testing.T) { return } - restored, err := NewFileStore(store.storeFile) + restored, err := NewFileStore(store.storeFile, nil) if err != nil { return } @@ -232,7 +232,7 @@ func TestRestore(t *testing.T) { t.Fatal(err) } - store, err := NewFileStore(storeDir) + store, err := NewFileStore(storeDir, nil) if err != nil { return } @@ -270,7 +270,7 @@ func TestRestorePolicies_Migration(t *testing.T) { t.Fatal(err) } - store, err := NewFileStore(storeDir) + store, err := NewFileStore(storeDir, nil) if err != nil { return } @@ -307,7 +307,7 @@ func TestGetAccountByPrivateDomain(t *testing.T) { t.Fatal(err) } - store, err := NewFileStore(storeDir) + store, err := NewFileStore(storeDir, nil) if err != nil { return } @@ -336,7 +336,7 @@ func TestFileStore_GetAccount(t *testing.T) { t.Fatal(err) } - store, err := NewFileStore(storeDir) + store, err := NewFileStore(storeDir, nil) if err != nil { t.Fatal(err) } @@ -378,7 +378,7 @@ func TestFileStore_GetTokenIDByHashedToken(t *testing.T) { t.Fatal(err) } - store, err := NewFileStore(storeDir) + store, err := NewFileStore(storeDir, nil) if err != nil { t.Fatal(err) } @@ -431,7 +431,7 @@ func TestFileStore_GetTokenIDByHashedToken_Failure(t *testing.T) { t.Fatal(err) } - store, err := NewFileStore(storeDir) + store, err := NewFileStore(storeDir, nil) if err != nil { t.Fatal(err) } @@ -456,7 +456,7 @@ func TestFileStore_GetUserByTokenID(t *testing.T) { t.Fatal(err) } - store, err := NewFileStore(storeDir) + store, err := NewFileStore(storeDir, nil) if err != nil { t.Fatal(err) } @@ -484,7 +484,7 @@ func TestFileStore_GetUserByTokenID_Failure(t *testing.T) { t.Fatal(err) } - store, err := NewFileStore(storeDir) + store, err := NewFileStore(storeDir, nil) if err != nil { t.Fatal(err) } @@ -503,7 +503,7 @@ func TestFileStore_SavePeerStatus(t *testing.T) { t.Fatal(err) } - store, err := NewFileStore(storeDir) + store, err := NewFileStore(storeDir, nil) if err != nil { return } @@ -548,7 +548,7 @@ func TestFileStore_SavePeerStatus(t *testing.T) { } func newStore(t *testing.T) *FileStore { - store, err := NewFileStore(t.TempDir()) + store, err := NewFileStore(t.TempDir(), nil) if err != nil { t.Errorf("failed creating a new store") } diff --git a/management/server/grpcserver.go b/management/server/grpcserver.go index d27a73a8c..9c501c345 100644 --- a/management/server/grpcserver.go +++ b/management/server/grpcserver.go @@ -114,6 +114,7 @@ func (s *GRPCServer) GetServerKey(ctx context.Context, req *proto.Empty) (*proto // Sync validates the existence of a connecting peer, sends an initial state (all available for the connecting peers) and // notifies the connected peer of any updates (e.g. new peers under the same account) func (s *GRPCServer) Sync(req *proto.EncryptedMessage, srv proto.ManagementService_SyncServer) error { + reqStart := time.Now() if s.appMetrics != nil { s.appMetrics.GRPCMetrics().CountSyncRequest() } @@ -148,6 +149,11 @@ func (s *GRPCServer) Sync(req *proto.EncryptedMessage, srv proto.ManagementServi if s.config.TURNConfig.TimeBasedCredentials { s.turnCredentialsManager.SetupRefresh(peer.ID) } + + if s.appMetrics != nil { + s.appMetrics.GRPCMetrics().CountSyncRequestDuration(time.Since(reqStart)) + } + // keep a connection to the peer and send updates when available for { select { @@ -262,6 +268,12 @@ func (s *GRPCServer) parseRequest(req *proto.EncryptedMessage, parsed pb.Message // In case it isn't, the endpoint checks whether setup key is provided within the request and tries to register a peer. // In case of the successful registration login is also successful func (s *GRPCServer) Login(ctx context.Context, req *proto.EncryptedMessage) (*proto.EncryptedMessage, error) { + reqStart := time.Now() + defer func() { + if s.appMetrics != nil { + s.appMetrics.GRPCMetrics().CountLoginRequestDuration(time.Since(reqStart)) + } + }() if s.appMetrics != nil { s.appMetrics.GRPCMetrics().CountLoginRequest() } diff --git a/management/server/management_proto_test.go b/management/server/management_proto_test.go index f6ef54544..6855c84bd 100644 --- a/management/server/management_proto_test.go +++ b/management/server/management_proto_test.go @@ -2,7 +2,6 @@ package server import ( "context" - "github.com/netbirdio/netbird/management/server/activity" "net" "os" "path/filepath" @@ -10,14 +9,17 @@ import ( "testing" "time" - "github.com/netbirdio/netbird/encryption" - mgmtProto "github.com/netbirdio/netbird/management/proto" - "github.com/netbirdio/netbird/util" + "github.com/netbirdio/netbird/management/server/activity" + "github.com/stretchr/testify/require" "golang.zx2c4.com/wireguard/wgctrl/wgtypes" "google.golang.org/grpc" "google.golang.org/grpc/credentials/insecure" "google.golang.org/grpc/keepalive" + + "github.com/netbirdio/netbird/encryption" + mgmtProto "github.com/netbirdio/netbird/management/proto" + "github.com/netbirdio/netbird/util" ) var ( @@ -408,7 +410,7 @@ func startManagement(t *testing.T, config *Config) (*grpc.Server, string, error) return nil, "", err } s := grpc.NewServer(grpc.KeepaliveEnforcementPolicy(kaep), grpc.KeepaliveParams(kasp)) - store, err := NewFileStore(config.Datadir) + store, err := NewFileStore(config.Datadir, nil) if err != nil { return nil, "", err } diff --git a/management/server/management_test.go b/management/server/management_test.go index d74b8e24d..7af2535f8 100644 --- a/management/server/management_test.go +++ b/management/server/management_test.go @@ -496,7 +496,7 @@ func startServer(config *server.Config) (*grpc.Server, net.Listener) { Expect(err).NotTo(HaveOccurred()) s := grpc.NewServer() - store, err := server.NewFileStore(config.Datadir) + store, err := server.NewFileStore(config.Datadir, nil) if err != nil { log.Fatalf("failed creating a store: %s: %v", config.Datadir, err) } diff --git a/management/server/nameserver_test.go b/management/server/nameserver_test.go index fde144b26..3a5c34431 100644 --- a/management/server/nameserver_test.go +++ b/management/server/nameserver_test.go @@ -1,11 +1,13 @@ package server import ( - nbdns "github.com/netbirdio/netbird/dns" - "github.com/netbirdio/netbird/management/server/activity" - "github.com/stretchr/testify/require" "net/netip" "testing" + + "github.com/stretchr/testify/require" + + nbdns "github.com/netbirdio/netbird/dns" + "github.com/netbirdio/netbird/management/server/activity" ) const ( @@ -1064,7 +1066,7 @@ func createNSManager(t *testing.T) (*DefaultAccountManager, error) { func createNSStore(t *testing.T) (Store, error) { dataDir := t.TempDir() - store, err := NewFileStore(dataDir) + store, err := NewFileStore(dataDir, nil) if err != nil { return nil, err } diff --git a/management/server/route_test.go b/management/server/route_test.go index 5145fadeb..a6cd3035e 100644 --- a/management/server/route_test.go +++ b/management/server/route_test.go @@ -4,10 +4,11 @@ import ( "net/netip" "testing" - "github.com/netbirdio/netbird/management/server/activity" - "github.com/netbirdio/netbird/route" "github.com/rs/xid" "github.com/stretchr/testify/require" + + "github.com/netbirdio/netbird/management/server/activity" + "github.com/netbirdio/netbird/route" ) const ( @@ -946,7 +947,7 @@ func createRouterManager(t *testing.T) (*DefaultAccountManager, error) { func createRouterStore(t *testing.T) (Store, error) { dataDir := t.TempDir() - store, err := NewFileStore(dataDir) + store, err := NewFileStore(dataDir, nil) if err != nil { return nil, err } diff --git a/management/server/telemetry/app_metrics.go b/management/server/telemetry/app_metrics.go index 8686a0b3d..de5d278b9 100644 --- a/management/server/telemetry/app_metrics.go +++ b/management/server/telemetry/app_metrics.go @@ -3,6 +3,10 @@ package telemetry import ( "context" "fmt" + "net" + "net/http" + "reflect" + "github.com/gorilla/mux" prometheus2 "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" @@ -10,9 +14,6 @@ import ( "go.opentelemetry.io/otel/exporters/prometheus" metric2 "go.opentelemetry.io/otel/metric" "go.opentelemetry.io/otel/sdk/metric" - "net" - "net/http" - "reflect" ) const defaultEndpoint = "/metrics" @@ -25,6 +26,7 @@ type MockAppMetrics struct { IDPMetricsFunc func() *IDPMetrics HTTPMiddlewareFunc func() *HTTPMiddleware GRPCMetricsFunc func() *GRPCMetrics + StoreMetricsFunc func() *StoreMetrics } // GetMeter mocks the GetMeter function of the AppMetrics interface @@ -75,6 +77,14 @@ func (mock *MockAppMetrics) GRPCMetrics() *GRPCMetrics { return nil } +// StoreMetrics mocks the MockAppMetrics function of the StoreMetrics interface +func (mock *MockAppMetrics) StoreMetrics() *StoreMetrics { + if mock.StoreMetricsFunc != nil { + return mock.StoreMetricsFunc() + } + return nil +} + // AppMetrics is metrics interface type AppMetrics interface { GetMeter() metric2.Meter @@ -83,6 +93,7 @@ type AppMetrics interface { IDPMetrics() *IDPMetrics HTTPMiddleware() *HTTPMiddleware GRPCMetrics() *GRPCMetrics + StoreMetrics() *StoreMetrics } // defaultAppMetrics are core application metrics based on OpenTelemetry https://opentelemetry.io/ @@ -94,6 +105,7 @@ type defaultAppMetrics struct { idpMetrics *IDPMetrics httpMiddleware *HTTPMiddleware grpcMetrics *GRPCMetrics + storeMetrics *StoreMetrics } // IDPMetrics returns metrics for the idp package @@ -111,6 +123,11 @@ func (appMetrics *defaultAppMetrics) GRPCMetrics() *GRPCMetrics { return appMetrics.grpcMetrics } +// StoreMetrics returns metrics for the store +func (appMetrics *defaultAppMetrics) StoreMetrics() *StoreMetrics { + return appMetrics.storeMetrics +} + // Close stop application metrics HTTP handler and closes listener. func (appMetrics *defaultAppMetrics) Close() error { if appMetrics.listener == nil { @@ -171,11 +188,17 @@ func NewDefaultAppMetrics(ctx context.Context) (AppMetrics, error) { if err != nil { return nil, err } + grpcMetrics, err := NewGRPCMetrics(ctx, meter) if err != nil { return nil, err } + storeMetrics, err := NewStoreMetrics(ctx, meter) + if err != nil { + return nil, err + } + return &defaultAppMetrics{Meter: meter, ctx: ctx, idpMetrics: idpMetrics, httpMiddleware: middleware, - grpcMetrics: grpcMetrics}, nil + grpcMetrics: grpcMetrics, storeMetrics: storeMetrics}, nil } diff --git a/management/server/telemetry/grpc_metrics.go b/management/server/telemetry/grpc_metrics.go index f4495ce4e..4ca592179 100644 --- a/management/server/telemetry/grpc_metrics.go +++ b/management/server/telemetry/grpc_metrics.go @@ -2,6 +2,8 @@ package telemetry import ( "context" + "time" + "go.opentelemetry.io/otel/metric" "go.opentelemetry.io/otel/metric/instrument" "go.opentelemetry.io/otel/metric/instrument/asyncint64" @@ -15,6 +17,8 @@ type GRPCMetrics struct { loginRequestsCounter syncint64.Counter getKeyRequestsCounter syncint64.Counter activeStreamsGauge asyncint64.Gauge + syncRequestDuration syncint64.Histogram + loginRequestDuration syncint64.Histogram ctx context.Context } @@ -38,12 +42,24 @@ func NewGRPCMetrics(ctx context.Context, meter metric.Meter) (*GRPCMetrics, erro return nil, err } + syncRequestDuration, err := meter.SyncInt64().Histogram("management.grpc.sync.request.duration.ms", instrument.WithUnit("milliseconds")) + if err != nil { + return nil, err + } + + loginRequestDuration, err := meter.SyncInt64().Histogram("management.grpc.login.request.duration.ms", instrument.WithUnit("milliseconds")) + if err != nil { + return nil, err + } + return &GRPCMetrics{ meter: meter, syncRequestsCounter: syncRequestsCounter, loginRequestsCounter: loginRequestsCounter, getKeyRequestsCounter: getKeyRequestsCounter, activeStreamsGauge: activeStreamsGauge, + syncRequestDuration: syncRequestDuration, + loginRequestDuration: loginRequestDuration, ctx: ctx, }, err } @@ -63,6 +79,16 @@ func (grpcMetrics *GRPCMetrics) CountLoginRequest() { grpcMetrics.loginRequestsCounter.Add(grpcMetrics.ctx, 1) } +// CountLoginRequestDuration counts the duration of the login gRPC requests +func (grpcMetrics *GRPCMetrics) CountLoginRequestDuration(duration time.Duration) { + grpcMetrics.loginRequestDuration.Record(grpcMetrics.ctx, duration.Milliseconds()) +} + +// CountSyncRequestDuration counts the duration of the sync gRPC requests +func (grpcMetrics *GRPCMetrics) CountSyncRequestDuration(duration time.Duration) { + grpcMetrics.syncRequestDuration.Record(grpcMetrics.ctx, duration.Milliseconds()) +} + // RegisterConnectedStreams registers a function that collects number of active streams and feeds it to the metrics gauge. func (grpcMetrics *GRPCMetrics) RegisterConnectedStreams(producer func() int64) error { return grpcMetrics.meter.RegisterCallback( diff --git a/management/server/telemetry/http_api_metrics.go b/management/server/telemetry/http_api_metrics.go index 652ac92a9..e40b8c3a2 100644 --- a/management/server/telemetry/http_api_metrics.go +++ b/management/server/telemetry/http_api_metrics.go @@ -3,18 +3,21 @@ package telemetry import ( "context" "fmt" + "hash/fnv" + "net/http" + "strings" + time "time" + log "github.com/sirupsen/logrus" "go.opentelemetry.io/otel/metric" "go.opentelemetry.io/otel/metric/instrument" "go.opentelemetry.io/otel/metric/instrument/syncint64" - "hash/fnv" - "net/http" - "strings" ) const ( httpRequestCounterPrefix = "management.http.request.counter" httpResponseCounterPrefix = "management.http.response.counter" + httpRequestDurationPrefix = "management.http.request.duration.ms" ) // WrappedResponseWriter is a wrapper for http.ResponseWriter that allows the @@ -51,9 +54,9 @@ func (rw *WrappedResponseWriter) WriteHeader(code int) { type HTTPMiddleware struct { meter metric.Meter ctx context.Context - // defaultEndpoint & method + // all HTTP requests by endpoint & method httpRequestCounters map[string]syncint64.Counter - // defaultEndpoint & method & status code + // all HTTP responses by endpoint & method & status code httpResponseCounters map[string]syncint64.Counter // all HTTP requests totalHTTPRequestsCounter syncint64.Counter @@ -61,6 +64,48 @@ type HTTPMiddleware struct { totalHTTPResponseCounter syncint64.Counter // all HTTP responses by status code totalHTTPResponseCodeCounters map[int]syncint64.Counter + // all HTTP requests durations by endpoint and method + httpRequestDurations map[string]syncint64.Histogram + // all HTTP requests durations + totalHTTPRequestDuration syncint64.Histogram +} + +// NewMetricsMiddleware creates a new HTTPMiddleware +func NewMetricsMiddleware(ctx context.Context, meter metric.Meter) (*HTTPMiddleware, error) { + + totalHTTPRequestsCounter, err := meter.SyncInt64().Counter( + fmt.Sprintf("%s_total", httpRequestCounterPrefix), + instrument.WithUnit("1")) + if err != nil { + return nil, err + } + totalHTTPResponseCounter, err := meter.SyncInt64().Counter( + fmt.Sprintf("%s_total", httpResponseCounterPrefix), + instrument.WithUnit("1")) + if err != nil { + return nil, err + } + + totalHTTPRequestDuration, err := meter.SyncInt64().Histogram( + fmt.Sprintf("%s_total", httpRequestDurationPrefix), + instrument.WithUnit("milliseconds")) + + if err != nil { + return nil, err + } + + return &HTTPMiddleware{ + ctx: ctx, + httpRequestCounters: map[string]syncint64.Counter{}, + httpResponseCounters: map[string]syncint64.Counter{}, + httpRequestDurations: map[string]syncint64.Histogram{}, + totalHTTPResponseCodeCounters: map[int]syncint64.Counter{}, + meter: meter, + totalHTTPRequestsCounter: totalHTTPRequestsCounter, + totalHTTPResponseCounter: totalHTTPResponseCounter, + totalHTTPRequestDuration: totalHTTPRequestDuration, + }, + nil } // AddHTTPRequestResponseCounter adds a new meter for an HTTP defaultEndpoint and Method (GET, POST, etc) @@ -72,6 +117,12 @@ func (m *HTTPMiddleware) AddHTTPRequestResponseCounter(endpoint string, method s return err } m.httpRequestCounters[meterKey] = httpReqCounter + durationKey := getRequestDurationKey(endpoint, method) + requestDuration, err := m.meter.SyncInt64().Histogram(durationKey, instrument.WithUnit("milliseconds")) + if err != nil { + return err + } + m.httpRequestDurations[durationKey] = requestDuration respCodes := []int{200, 204, 400, 401, 403, 404, 500, 502, 503} for _, code := range respCodes { meterKey = getResponseCounterKey(endpoint, method, code) @@ -92,38 +143,16 @@ func (m *HTTPMiddleware) AddHTTPRequestResponseCounter(endpoint string, method s return nil } -// NewMetricsMiddleware creates a new HTTPMiddleware -func NewMetricsMiddleware(ctx context.Context, meter metric.Meter) (*HTTPMiddleware, error) { - - totalHTTPRequestsCounter, err := meter.SyncInt64().Counter( - fmt.Sprintf("%s_total", httpRequestCounterPrefix), - instrument.WithUnit("1")) - if err != nil { - return nil, err - } - totalHTTPResponseCounter, err := meter.SyncInt64().Counter( - fmt.Sprintf("%s_total", httpResponseCounterPrefix), - instrument.WithUnit("1")) - if err != nil { - return nil, err - } - return &HTTPMiddleware{ - ctx: ctx, - httpRequestCounters: map[string]syncint64.Counter{}, - httpResponseCounters: map[string]syncint64.Counter{}, - totalHTTPResponseCodeCounters: map[int]syncint64.Counter{}, - meter: meter, - totalHTTPRequestsCounter: totalHTTPRequestsCounter, - totalHTTPResponseCounter: totalHTTPResponseCounter, - }, - nil -} - func getRequestCounterKey(endpoint, method string) string { return fmt.Sprintf("%s%s_%s", httpRequestCounterPrefix, strings.ReplaceAll(endpoint, "/", "_"), method) } +func getRequestDurationKey(endpoint, method string) string { + return fmt.Sprintf("%s%s_%s", httpRequestDurationPrefix, + strings.ReplaceAll(endpoint, "/", "_"), method) +} + func getResponseCounterKey(endpoint, method string, status int) string { return fmt.Sprintf("%s%s_%s_%d", httpResponseCounterPrefix, strings.ReplaceAll(endpoint, "/", "_"), method, status) @@ -132,6 +161,10 @@ func getResponseCounterKey(endpoint, method string, status int) string { // Handler logs every request and response and adds the, to metrics. func (m *HTTPMiddleware) Handler(h http.Handler) http.Handler { fn := func(rw http.ResponseWriter, r *http.Request) { + reqStart := time.Now() + defer func() { + m.totalHTTPRequestDuration.Record(m.ctx, time.Since(reqStart).Milliseconds()) + }() traceID := hash(fmt.Sprintf("%v", r)) log.Tracef("HTTP request %v: %v %v", traceID, r.Method, r.URL) @@ -161,6 +194,14 @@ func (m *HTTPMiddleware) Handler(h http.Handler) http.Handler { if c, ok := m.totalHTTPResponseCodeCounters[w.Status()]; ok { c.Add(m.ctx, 1) } + + durationKey := getRequestDurationKey(r.URL.Path, r.Method) + reqTook := time.Since(reqStart) + if c, ok := m.httpRequestDurations[durationKey]; ok { + c.Record(m.ctx, reqTook.Milliseconds()) + } + log.Debugf("request %s %s took %d ms", r.Method, r.URL.Path, reqTook.Milliseconds()) + } return http.HandlerFunc(fn) diff --git a/management/server/telemetry/store_metrics.go b/management/server/telemetry/store_metrics.go new file mode 100644 index 000000000..704ef65d4 --- /dev/null +++ b/management/server/telemetry/store_metrics.go @@ -0,0 +1,47 @@ +package telemetry + +import ( + "context" + "time" + + "go.opentelemetry.io/otel/metric" + "go.opentelemetry.io/otel/metric/instrument" + "go.opentelemetry.io/otel/metric/instrument/syncint64" +) + +// StoreMetrics represents all metrics related to the FileStore +type StoreMetrics struct { + globalLockAcquisitionDuration syncint64.Histogram + persistenceDuration syncint64.Histogram + ctx context.Context +} + +// NewStoreMetrics creates an instance of StoreMetrics +func NewStoreMetrics(ctx context.Context, meter metric.Meter) (*StoreMetrics, error) { + globalLockAcquisitionDuration, err := meter.SyncInt64().Histogram("management.store.global.lock.acquisition.duration.micro", + instrument.WithUnit("microseconds")) + if err != nil { + return nil, err + } + persistenceDuration, err := meter.SyncInt64().Histogram("management.store.persistence.duration.micro", + instrument.WithUnit("microseconds")) + if err != nil { + return nil, err + } + + return &StoreMetrics{ + globalLockAcquisitionDuration: globalLockAcquisitionDuration, + persistenceDuration: persistenceDuration, + ctx: ctx, + }, nil +} + +// CountGlobalLockAcquisitionDuration counts the duration of the global lock acquisition +func (metrics *StoreMetrics) CountGlobalLockAcquisitionDuration(duration time.Duration) { + metrics.globalLockAcquisitionDuration.Record(metrics.ctx, duration.Microseconds()) +} + +// CountPersistenceDuration counts the duration of a store persistence operation +func (metrics *StoreMetrics) CountPersistenceDuration(duration time.Duration) { + metrics.persistenceDuration.Record(metrics.ctx, duration.Microseconds()) +}