Add telemetry to measure app durations (#878)

This commit is contained in:
Misha Bragin
2023-05-19 11:42:25 +02:00
committed by GitHub
parent 8b78209ae5
commit 03a42de5a0
17 changed files with 271 additions and 86 deletions

View File

@@ -3,6 +3,10 @@ package telemetry
import (
"context"
"fmt"
"net"
"net/http"
"reflect"
"github.com/gorilla/mux"
prometheus2 "github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
@@ -10,9 +14,6 @@ import (
"go.opentelemetry.io/otel/exporters/prometheus"
metric2 "go.opentelemetry.io/otel/metric"
"go.opentelemetry.io/otel/sdk/metric"
"net"
"net/http"
"reflect"
)
const defaultEndpoint = "/metrics"
@@ -25,6 +26,7 @@ type MockAppMetrics struct {
IDPMetricsFunc func() *IDPMetrics
HTTPMiddlewareFunc func() *HTTPMiddleware
GRPCMetricsFunc func() *GRPCMetrics
StoreMetricsFunc func() *StoreMetrics
}
// GetMeter mocks the GetMeter function of the AppMetrics interface
@@ -75,6 +77,14 @@ func (mock *MockAppMetrics) GRPCMetrics() *GRPCMetrics {
return nil
}
// StoreMetrics mocks the MockAppMetrics function of the StoreMetrics interface
func (mock *MockAppMetrics) StoreMetrics() *StoreMetrics {
if mock.StoreMetricsFunc != nil {
return mock.StoreMetricsFunc()
}
return nil
}
// AppMetrics is metrics interface
type AppMetrics interface {
GetMeter() metric2.Meter
@@ -83,6 +93,7 @@ type AppMetrics interface {
IDPMetrics() *IDPMetrics
HTTPMiddleware() *HTTPMiddleware
GRPCMetrics() *GRPCMetrics
StoreMetrics() *StoreMetrics
}
// defaultAppMetrics are core application metrics based on OpenTelemetry https://opentelemetry.io/
@@ -94,6 +105,7 @@ type defaultAppMetrics struct {
idpMetrics *IDPMetrics
httpMiddleware *HTTPMiddleware
grpcMetrics *GRPCMetrics
storeMetrics *StoreMetrics
}
// IDPMetrics returns metrics for the idp package
@@ -111,6 +123,11 @@ func (appMetrics *defaultAppMetrics) GRPCMetrics() *GRPCMetrics {
return appMetrics.grpcMetrics
}
// StoreMetrics returns metrics for the store
func (appMetrics *defaultAppMetrics) StoreMetrics() *StoreMetrics {
return appMetrics.storeMetrics
}
// Close stop application metrics HTTP handler and closes listener.
func (appMetrics *defaultAppMetrics) Close() error {
if appMetrics.listener == nil {
@@ -171,11 +188,17 @@ func NewDefaultAppMetrics(ctx context.Context) (AppMetrics, error) {
if err != nil {
return nil, err
}
grpcMetrics, err := NewGRPCMetrics(ctx, meter)
if err != nil {
return nil, err
}
storeMetrics, err := NewStoreMetrics(ctx, meter)
if err != nil {
return nil, err
}
return &defaultAppMetrics{Meter: meter, ctx: ctx, idpMetrics: idpMetrics, httpMiddleware: middleware,
grpcMetrics: grpcMetrics}, nil
grpcMetrics: grpcMetrics, storeMetrics: storeMetrics}, nil
}

View File

@@ -2,6 +2,8 @@ package telemetry
import (
"context"
"time"
"go.opentelemetry.io/otel/metric"
"go.opentelemetry.io/otel/metric/instrument"
"go.opentelemetry.io/otel/metric/instrument/asyncint64"
@@ -15,6 +17,8 @@ type GRPCMetrics struct {
loginRequestsCounter syncint64.Counter
getKeyRequestsCounter syncint64.Counter
activeStreamsGauge asyncint64.Gauge
syncRequestDuration syncint64.Histogram
loginRequestDuration syncint64.Histogram
ctx context.Context
}
@@ -38,12 +42,24 @@ func NewGRPCMetrics(ctx context.Context, meter metric.Meter) (*GRPCMetrics, erro
return nil, err
}
syncRequestDuration, err := meter.SyncInt64().Histogram("management.grpc.sync.request.duration.ms", instrument.WithUnit("milliseconds"))
if err != nil {
return nil, err
}
loginRequestDuration, err := meter.SyncInt64().Histogram("management.grpc.login.request.duration.ms", instrument.WithUnit("milliseconds"))
if err != nil {
return nil, err
}
return &GRPCMetrics{
meter: meter,
syncRequestsCounter: syncRequestsCounter,
loginRequestsCounter: loginRequestsCounter,
getKeyRequestsCounter: getKeyRequestsCounter,
activeStreamsGauge: activeStreamsGauge,
syncRequestDuration: syncRequestDuration,
loginRequestDuration: loginRequestDuration,
ctx: ctx,
}, err
}
@@ -63,6 +79,16 @@ func (grpcMetrics *GRPCMetrics) CountLoginRequest() {
grpcMetrics.loginRequestsCounter.Add(grpcMetrics.ctx, 1)
}
// CountLoginRequestDuration counts the duration of the login gRPC requests
func (grpcMetrics *GRPCMetrics) CountLoginRequestDuration(duration time.Duration) {
grpcMetrics.loginRequestDuration.Record(grpcMetrics.ctx, duration.Milliseconds())
}
// CountSyncRequestDuration counts the duration of the sync gRPC requests
func (grpcMetrics *GRPCMetrics) CountSyncRequestDuration(duration time.Duration) {
grpcMetrics.syncRequestDuration.Record(grpcMetrics.ctx, duration.Milliseconds())
}
// RegisterConnectedStreams registers a function that collects number of active streams and feeds it to the metrics gauge.
func (grpcMetrics *GRPCMetrics) RegisterConnectedStreams(producer func() int64) error {
return grpcMetrics.meter.RegisterCallback(

View File

@@ -3,18 +3,21 @@ package telemetry
import (
"context"
"fmt"
"hash/fnv"
"net/http"
"strings"
time "time"
log "github.com/sirupsen/logrus"
"go.opentelemetry.io/otel/metric"
"go.opentelemetry.io/otel/metric/instrument"
"go.opentelemetry.io/otel/metric/instrument/syncint64"
"hash/fnv"
"net/http"
"strings"
)
const (
httpRequestCounterPrefix = "management.http.request.counter"
httpResponseCounterPrefix = "management.http.response.counter"
httpRequestDurationPrefix = "management.http.request.duration.ms"
)
// WrappedResponseWriter is a wrapper for http.ResponseWriter that allows the
@@ -51,9 +54,9 @@ func (rw *WrappedResponseWriter) WriteHeader(code int) {
type HTTPMiddleware struct {
meter metric.Meter
ctx context.Context
// defaultEndpoint & method
// all HTTP requests by endpoint & method
httpRequestCounters map[string]syncint64.Counter
// defaultEndpoint & method & status code
// all HTTP responses by endpoint & method & status code
httpResponseCounters map[string]syncint64.Counter
// all HTTP requests
totalHTTPRequestsCounter syncint64.Counter
@@ -61,6 +64,48 @@ type HTTPMiddleware struct {
totalHTTPResponseCounter syncint64.Counter
// all HTTP responses by status code
totalHTTPResponseCodeCounters map[int]syncint64.Counter
// all HTTP requests durations by endpoint and method
httpRequestDurations map[string]syncint64.Histogram
// all HTTP requests durations
totalHTTPRequestDuration syncint64.Histogram
}
// NewMetricsMiddleware creates a new HTTPMiddleware
func NewMetricsMiddleware(ctx context.Context, meter metric.Meter) (*HTTPMiddleware, error) {
totalHTTPRequestsCounter, err := meter.SyncInt64().Counter(
fmt.Sprintf("%s_total", httpRequestCounterPrefix),
instrument.WithUnit("1"))
if err != nil {
return nil, err
}
totalHTTPResponseCounter, err := meter.SyncInt64().Counter(
fmt.Sprintf("%s_total", httpResponseCounterPrefix),
instrument.WithUnit("1"))
if err != nil {
return nil, err
}
totalHTTPRequestDuration, err := meter.SyncInt64().Histogram(
fmt.Sprintf("%s_total", httpRequestDurationPrefix),
instrument.WithUnit("milliseconds"))
if err != nil {
return nil, err
}
return &HTTPMiddleware{
ctx: ctx,
httpRequestCounters: map[string]syncint64.Counter{},
httpResponseCounters: map[string]syncint64.Counter{},
httpRequestDurations: map[string]syncint64.Histogram{},
totalHTTPResponseCodeCounters: map[int]syncint64.Counter{},
meter: meter,
totalHTTPRequestsCounter: totalHTTPRequestsCounter,
totalHTTPResponseCounter: totalHTTPResponseCounter,
totalHTTPRequestDuration: totalHTTPRequestDuration,
},
nil
}
// AddHTTPRequestResponseCounter adds a new meter for an HTTP defaultEndpoint and Method (GET, POST, etc)
@@ -72,6 +117,12 @@ func (m *HTTPMiddleware) AddHTTPRequestResponseCounter(endpoint string, method s
return err
}
m.httpRequestCounters[meterKey] = httpReqCounter
durationKey := getRequestDurationKey(endpoint, method)
requestDuration, err := m.meter.SyncInt64().Histogram(durationKey, instrument.WithUnit("milliseconds"))
if err != nil {
return err
}
m.httpRequestDurations[durationKey] = requestDuration
respCodes := []int{200, 204, 400, 401, 403, 404, 500, 502, 503}
for _, code := range respCodes {
meterKey = getResponseCounterKey(endpoint, method, code)
@@ -92,38 +143,16 @@ func (m *HTTPMiddleware) AddHTTPRequestResponseCounter(endpoint string, method s
return nil
}
// NewMetricsMiddleware creates a new HTTPMiddleware
func NewMetricsMiddleware(ctx context.Context, meter metric.Meter) (*HTTPMiddleware, error) {
totalHTTPRequestsCounter, err := meter.SyncInt64().Counter(
fmt.Sprintf("%s_total", httpRequestCounterPrefix),
instrument.WithUnit("1"))
if err != nil {
return nil, err
}
totalHTTPResponseCounter, err := meter.SyncInt64().Counter(
fmt.Sprintf("%s_total", httpResponseCounterPrefix),
instrument.WithUnit("1"))
if err != nil {
return nil, err
}
return &HTTPMiddleware{
ctx: ctx,
httpRequestCounters: map[string]syncint64.Counter{},
httpResponseCounters: map[string]syncint64.Counter{},
totalHTTPResponseCodeCounters: map[int]syncint64.Counter{},
meter: meter,
totalHTTPRequestsCounter: totalHTTPRequestsCounter,
totalHTTPResponseCounter: totalHTTPResponseCounter,
},
nil
}
func getRequestCounterKey(endpoint, method string) string {
return fmt.Sprintf("%s%s_%s", httpRequestCounterPrefix,
strings.ReplaceAll(endpoint, "/", "_"), method)
}
func getRequestDurationKey(endpoint, method string) string {
return fmt.Sprintf("%s%s_%s", httpRequestDurationPrefix,
strings.ReplaceAll(endpoint, "/", "_"), method)
}
func getResponseCounterKey(endpoint, method string, status int) string {
return fmt.Sprintf("%s%s_%s_%d", httpResponseCounterPrefix,
strings.ReplaceAll(endpoint, "/", "_"), method, status)
@@ -132,6 +161,10 @@ func getResponseCounterKey(endpoint, method string, status int) string {
// Handler logs every request and response and adds the, to metrics.
func (m *HTTPMiddleware) Handler(h http.Handler) http.Handler {
fn := func(rw http.ResponseWriter, r *http.Request) {
reqStart := time.Now()
defer func() {
m.totalHTTPRequestDuration.Record(m.ctx, time.Since(reqStart).Milliseconds())
}()
traceID := hash(fmt.Sprintf("%v", r))
log.Tracef("HTTP request %v: %v %v", traceID, r.Method, r.URL)
@@ -161,6 +194,14 @@ func (m *HTTPMiddleware) Handler(h http.Handler) http.Handler {
if c, ok := m.totalHTTPResponseCodeCounters[w.Status()]; ok {
c.Add(m.ctx, 1)
}
durationKey := getRequestDurationKey(r.URL.Path, r.Method)
reqTook := time.Since(reqStart)
if c, ok := m.httpRequestDurations[durationKey]; ok {
c.Record(m.ctx, reqTook.Milliseconds())
}
log.Debugf("request %s %s took %d ms", r.Method, r.URL.Path, reqTook.Milliseconds())
}
return http.HandlerFunc(fn)

View File

@@ -0,0 +1,47 @@
package telemetry
import (
"context"
"time"
"go.opentelemetry.io/otel/metric"
"go.opentelemetry.io/otel/metric/instrument"
"go.opentelemetry.io/otel/metric/instrument/syncint64"
)
// StoreMetrics represents all metrics related to the FileStore
type StoreMetrics struct {
globalLockAcquisitionDuration syncint64.Histogram
persistenceDuration syncint64.Histogram
ctx context.Context
}
// NewStoreMetrics creates an instance of StoreMetrics
func NewStoreMetrics(ctx context.Context, meter metric.Meter) (*StoreMetrics, error) {
globalLockAcquisitionDuration, err := meter.SyncInt64().Histogram("management.store.global.lock.acquisition.duration.micro",
instrument.WithUnit("microseconds"))
if err != nil {
return nil, err
}
persistenceDuration, err := meter.SyncInt64().Histogram("management.store.persistence.duration.micro",
instrument.WithUnit("microseconds"))
if err != nil {
return nil, err
}
return &StoreMetrics{
globalLockAcquisitionDuration: globalLockAcquisitionDuration,
persistenceDuration: persistenceDuration,
ctx: ctx,
}, nil
}
// CountGlobalLockAcquisitionDuration counts the duration of the global lock acquisition
func (metrics *StoreMetrics) CountGlobalLockAcquisitionDuration(duration time.Duration) {
metrics.globalLockAcquisitionDuration.Record(metrics.ctx, duration.Microseconds())
}
// CountPersistenceDuration counts the duration of a store persistence operation
func (metrics *StoreMetrics) CountPersistenceDuration(duration time.Duration) {
metrics.persistenceDuration.Record(metrics.ctx, duration.Microseconds())
}