diff --git a/management/server/telemetry/grpc_metrics.go b/management/server/telemetry/grpc_metrics.go index 16ad5c61c..9840b4b32 100644 --- a/management/server/telemetry/grpc_metrics.go +++ b/management/server/telemetry/grpc_metrics.go @@ -9,18 +9,21 @@ import ( ) const AccountIDLabel = "account_id" +const HighLatencyThreshold = time.Second * 7 // GRPCMetrics are gRPC server metrics type GRPCMetrics struct { - meter metric.Meter - syncRequestsCounter metric.Int64Counter - loginRequestsCounter metric.Int64Counter - getKeyRequestsCounter metric.Int64Counter - activeStreamsGauge metric.Int64ObservableGauge - syncRequestDuration metric.Int64Histogram - loginRequestDuration metric.Int64Histogram - channelQueueLength metric.Int64Histogram - ctx context.Context + meter metric.Meter + syncRequestsCounter metric.Int64Counter + syncRequestHighLatencyCounter metric.Int64Counter + loginRequestsCounter metric.Int64Counter + loginRequestHighLatencyCounter metric.Int64Counter + getKeyRequestsCounter metric.Int64Counter + activeStreamsGauge metric.Int64ObservableGauge + syncRequestDuration metric.Int64Histogram + loginRequestDuration metric.Int64Histogram + channelQueueLength metric.Int64Histogram + ctx context.Context } // NewGRPCMetrics creates new GRPCMetrics struct and registers common metrics of the gRPC server @@ -33,6 +36,14 @@ func NewGRPCMetrics(ctx context.Context, meter metric.Meter) (*GRPCMetrics, erro return nil, err } + syncRequestHighLatencyCounter, err := meter.Int64Counter("management.grpc.sync.request.high.latency.counter", + metric.WithUnit("1"), + metric.WithDescription("Number of sync gRPC requests from the peers that took longer than the threshold to establish a connection and receive network map updates (update channel)"), + ) + if err != nil { + return nil, err + } + loginRequestsCounter, err := meter.Int64Counter("management.grpc.login.request.counter", metric.WithUnit("1"), metric.WithDescription("Number of login gRPC requests from the peers to authenticate and receive initial configuration and relay credentials"), @@ -41,6 +52,14 @@ func NewGRPCMetrics(ctx context.Context, meter metric.Meter) (*GRPCMetrics, erro return nil, err } + loginRequestHighLatencyCounter, err := meter.Int64Counter("management.grpc.login.request.high.latency.counter", + metric.WithUnit("1"), + metric.WithDescription("Number of login gRPC requests from the peers that took longer than the threshold to authenticate and receive initial configuration and relay credentials"), + ) + if err != nil { + return nil, err + } + getKeyRequestsCounter, err := meter.Int64Counter("management.grpc.key.request.counter", metric.WithUnit("1"), metric.WithDescription("Number of key gRPC requests from the peers to get the server's public WireGuard key"), @@ -86,15 +105,17 @@ func NewGRPCMetrics(ctx context.Context, meter metric.Meter) (*GRPCMetrics, erro } return &GRPCMetrics{ - meter: meter, - syncRequestsCounter: syncRequestsCounter, - loginRequestsCounter: loginRequestsCounter, - getKeyRequestsCounter: getKeyRequestsCounter, - activeStreamsGauge: activeStreamsGauge, - syncRequestDuration: syncRequestDuration, - loginRequestDuration: loginRequestDuration, - channelQueueLength: channelQueue, - ctx: ctx, + meter: meter, + syncRequestsCounter: syncRequestsCounter, + syncRequestHighLatencyCounter: syncRequestHighLatencyCounter, + loginRequestsCounter: loginRequestsCounter, + loginRequestHighLatencyCounter: loginRequestHighLatencyCounter, + getKeyRequestsCounter: getKeyRequestsCounter, + activeStreamsGauge: activeStreamsGauge, + syncRequestDuration: syncRequestDuration, + loginRequestDuration: loginRequestDuration, + channelQueueLength: channelQueue, + ctx: ctx, }, err } @@ -115,12 +136,18 @@ func (grpcMetrics *GRPCMetrics) CountLoginRequest() { // CountLoginRequestDuration counts the duration of the login gRPC requests func (grpcMetrics *GRPCMetrics) CountLoginRequestDuration(duration time.Duration, accountID string) { - grpcMetrics.loginRequestDuration.Record(grpcMetrics.ctx, duration.Milliseconds(), metric.WithAttributes(attribute.String(AccountIDLabel, accountID))) + grpcMetrics.loginRequestDuration.Record(grpcMetrics.ctx, duration.Milliseconds()) + if duration > HighLatencyThreshold { + grpcMetrics.loginRequestHighLatencyCounter.Add(grpcMetrics.ctx, 1, metric.WithAttributes(attribute.String(AccountIDLabel, accountID))) + } } // CountSyncRequestDuration counts the duration of the sync gRPC requests func (grpcMetrics *GRPCMetrics) CountSyncRequestDuration(duration time.Duration, accountID string) { - grpcMetrics.syncRequestDuration.Record(grpcMetrics.ctx, duration.Milliseconds(), metric.WithAttributes(attribute.String(AccountIDLabel, accountID))) + grpcMetrics.syncRequestDuration.Record(grpcMetrics.ctx, duration.Milliseconds()) + if duration > HighLatencyThreshold { + grpcMetrics.syncRequestHighLatencyCounter.Add(grpcMetrics.ctx, 1, metric.WithAttributes(attribute.String(AccountIDLabel, accountID))) + } } // RegisterConnectedStreams registers a function that collects number of active streams and feeds it to the metrics gauge.