feat: OpenTelemetry tracing and metrics (#262) (#495)

Co-authored-by: ItalyPaleAle <43508+ItalyPaleAle@users.noreply.github.com>
This commit is contained in:
Daenney
2025-05-05 15:59:44 +02:00
committed by GitHub
parent 9efab5f3e8
commit 6f54ee5d66
11 changed files with 293 additions and 43 deletions

View File

@@ -8,6 +8,7 @@ import (
_ "github.com/golang-migrate/migrate/v4/source/file"
"github.com/pocket-id/pocket-id/backend/internal/common"
"github.com/pocket-id/pocket-id/backend/internal/job"
"github.com/pocket-id/pocket-id/backend/internal/utils"
"github.com/pocket-id/pocket-id/backend/internal/utils/signals"
@@ -23,11 +24,17 @@ func Bootstrap() error {
migrateConfigDBConnstring()
migrateKey()
// Initialize the tracer and metrics exporter
shutdownFns, httpClient, err := initOtel(ctx, common.EnvConfig.MetricsEnabled, common.EnvConfig.TracingEnabled)
if err != nil {
return fmt.Errorf("failed to initialize OpenTelemetry: %w", err)
}
// Connect to the database
db := newDatabase()
// Create all services
svc, err := initServices(ctx, db)
svc, err := initServices(ctx, db, httpClient)
if err != nil {
return fmt.Errorf("failed to initialize services: %w", err)
}
@@ -59,8 +66,7 @@ func Bootstrap() error {
shutdownCtx, shutdownCancel := context.WithTimeout(context.Background(), 5*time.Second)
defer shutdownCancel()
err = utils.
// TODO: Add shutdown services here
NewServiceRunner().
NewServiceRunner(shutdownFns...).
Run(shutdownCtx)
if err != nil {
log.Printf("Error shutting down services: %v", err)

View File

@@ -0,0 +1,107 @@
package bootstrap
import (
"context"
"fmt"
"net/http"
"time"
"github.com/pocket-id/pocket-id/backend/internal/common"
"github.com/pocket-id/pocket-id/backend/internal/utils"
"go.opentelemetry.io/contrib/exporters/autoexport"
"go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp"
"go.opentelemetry.io/otel"
metricnoop "go.opentelemetry.io/otel/metric/noop"
"go.opentelemetry.io/otel/propagation"
"go.opentelemetry.io/otel/sdk/metric"
"go.opentelemetry.io/otel/sdk/resource"
sdktrace "go.opentelemetry.io/otel/sdk/trace"
semconv "go.opentelemetry.io/otel/semconv/v1.30.0"
tracenoop "go.opentelemetry.io/otel/trace/noop"
)
func defaultResource() (*resource.Resource, error) {
return resource.Merge(
resource.Default(),
resource.NewSchemaless(
semconv.ServiceName("pocket-id-backend"),
semconv.ServiceVersion(common.Version),
),
)
}
func initOtel(ctx context.Context, metrics, traces bool) (shutdownFns []utils.Service, httpClient *http.Client, err error) {
resource, err := defaultResource()
if err != nil {
return nil, nil, fmt.Errorf("failed to create OpenTelemetry resource: %w", err)
}
shutdownFns = make([]utils.Service, 0, 2)
httpClient = &http.Client{}
defaultTransport, ok := http.DefaultTransport.(*http.Transport)
if !ok {
// Indicates a development-time error
panic("Default transport is not of type *http.Transport")
}
httpClient.Transport = defaultTransport.Clone()
if traces {
tr, err := autoexport.NewSpanExporter(ctx)
if err != nil {
return nil, nil, fmt.Errorf("failed to initialize OpenTelemetry span exporter: %w", err)
}
tp := sdktrace.NewTracerProvider(
sdktrace.WithResource(resource),
sdktrace.WithBatcher(tr),
)
otel.SetTracerProvider(tp)
otel.SetTextMapPropagator(
propagation.NewCompositeTextMapPropagator(
propagation.TraceContext{},
propagation.Baggage{},
),
)
shutdownFns = append(shutdownFns, func(shutdownCtx context.Context) error { //nolint:contextcheck
tpCtx, tpCancel := context.WithTimeout(shutdownCtx, 10*time.Second)
defer tpCancel()
shutdownErr := tp.Shutdown(tpCtx)
if shutdownErr != nil {
return fmt.Errorf("failed to gracefully shut down traces exporter: %w", shutdownErr)
}
return nil
})
httpClient.Transport = otelhttp.NewTransport(httpClient.Transport)
} else {
otel.SetTracerProvider(tracenoop.NewTracerProvider())
}
if metrics {
mr, err := autoexport.NewMetricReader(ctx)
if err != nil {
return nil, nil, fmt.Errorf("failed to initialize OpenTelemetry metric reader: %w", err)
}
mp := metric.NewMeterProvider(
metric.WithResource(resource),
metric.WithReader(mr),
)
otel.SetMeterProvider(mp)
shutdownFns = append(shutdownFns, func(shutdownCtx context.Context) error { //nolint:contextcheck
mpCtx, mpCancel := context.WithTimeout(shutdownCtx, 10*time.Second)
defer mpCancel()
shutdownErr := mp.Shutdown(mpCtx)
if shutdownErr != nil {
return fmt.Errorf("failed to gracefully shut down metrics exporter: %w", shutdownErr)
}
return nil
})
} else {
otel.SetMeterProvider(metricnoop.NewMeterProvider())
}
return shutdownFns, httpClient, nil
}

View File

@@ -9,6 +9,7 @@ import (
"time"
"github.com/gin-gonic/gin"
"go.opentelemetry.io/contrib/instrumentation/github.com/gin-gonic/gin/otelgin"
"golang.org/x/time/rate"
"gorm.io/gorm"
@@ -44,6 +45,10 @@ func initRouterInternal(db *gorm.DB, svc *services) (utils.Service, error) {
r := gin.Default()
r.Use(gin.Logger())
if common.EnvConfig.TracingEnabled {
r.Use(otelgin.Middleware("pocket-id-backend"))
}
rateLimitMiddleware := middleware.NewRateLimitMiddleware()
// Setup global middleware

View File

@@ -3,6 +3,7 @@ package bootstrap
import (
"context"
"fmt"
"net/http"
"gorm.io/gorm"
@@ -26,7 +27,7 @@ type services struct {
// Initializes all services
// The context should be used by services only for initialization, and not for running
func initServices(initCtx context.Context, db *gorm.DB) (svc *services, err error) {
func initServices(initCtx context.Context, db *gorm.DB, httpClient *http.Client) (svc *services, err error) {
svc = &services{}
svc.appConfigService = service.NewAppConfigService(initCtx, db)
@@ -36,14 +37,14 @@ func initServices(initCtx context.Context, db *gorm.DB) (svc *services, err erro
return nil, fmt.Errorf("unable to create email service: %w", err)
}
svc.geoLiteService = service.NewGeoLiteService()
svc.geoLiteService = service.NewGeoLiteService(httpClient)
svc.auditLogService = service.NewAuditLogService(db, svc.appConfigService, svc.emailService, svc.geoLiteService)
svc.jwtService = service.NewJwtService(svc.appConfigService)
svc.userService = service.NewUserService(db, svc.jwtService, svc.auditLogService, svc.emailService, svc.appConfigService)
svc.customClaimService = service.NewCustomClaimService(db)
svc.oidcService = service.NewOidcService(db, svc.jwtService, svc.appConfigService, svc.auditLogService, svc.customClaimService)
svc.userGroupService = service.NewUserGroupService(db, svc.appConfigService)
svc.ldapService = service.NewLdapService(db, svc.appConfigService, svc.userService, svc.userGroupService)
svc.ldapService = service.NewLdapService(db, httpClient, svc.appConfigService, svc.userService, svc.userGroupService)
svc.apiKeyService = service.NewApiKeyService(db, svc.emailService)
svc.webauthnService = service.NewWebAuthnService(db, svc.jwtService, svc.auditLogService, svc.appConfigService)

View File

@@ -10,6 +10,13 @@ import (
type DbProvider string
const (
// TracerName should be passed to otel.Tracer, trace.SpanFromContext when creating custom spans.
TracerName = "github.com/pocket-id/pocket-id/backend/tracing"
// MeterName should be passed to otel.Meter when create custom metrics.
MeterName = "github.com/pocket-id/pocket-id/backend/metrics"
)
const (
DbProviderSqlite DbProvider = "sqlite"
DbProviderPostgres DbProvider = "postgres"
@@ -31,6 +38,8 @@ type EnvConfigSchema struct {
GeoLiteDBPath string `env:"GEOLITE_DB_PATH"`
GeoLiteDBUrl string `env:"GEOLITE_DB_URL"`
UiConfigDisabled bool `env:"PUBLIC_UI_CONFIG_DISABLED"`
MetricsEnabled bool `env:"METRICS_ENABLED"`
TracingEnabled bool `env:"TRACING_ENABLED"`
}
var EnvConfig = &EnvConfigSchema{
@@ -48,6 +57,8 @@ var EnvConfig = &EnvConfigSchema{
GeoLiteDBPath: "data/GeoLite2-City.mmdb",
GeoLiteDBUrl: MaxMindGeoLiteCityUrl,
UiConfigDisabled: false,
MetricsEnabled: false,
TracingEnabled: false,
}
func init() {

View File

@@ -0,0 +1,6 @@
package common
// Version contains the Pocket ID version.
//
// It can be set at build time using -ldflags.
var Version = "unknown"

View File

@@ -22,6 +22,7 @@ import (
)
type GeoLiteService struct {
httpClient *http.Client
disableUpdater bool
mutex sync.RWMutex
}
@@ -42,8 +43,10 @@ var tailscaleIPNets = []*net.IPNet{
}
// NewGeoLiteService initializes a new GeoLiteService instance and starts a goroutine to update the GeoLite2 City database.
func NewGeoLiteService() *GeoLiteService {
service := &GeoLiteService{}
func NewGeoLiteService(httpClient *http.Client) *GeoLiteService {
service := &GeoLiteService{
httpClient: httpClient,
}
if common.EnvConfig.MaxMindLicenseKey == "" && common.EnvConfig.GeoLiteDBUrl == common.MaxMindGeoLiteCityUrl {
// Warn the user, and disable the periodic updater
@@ -129,7 +132,7 @@ func (s *GeoLiteService) UpdateDatabase(parentCtx context.Context) error {
return fmt.Errorf("failed to create request: %w", err)
}
resp, err := http.DefaultClient.Do(req)
resp, err := s.httpClient.Do(req)
if err != nil {
return fmt.Errorf("failed to download database: %w", err)
}

View File

@@ -23,14 +23,16 @@ import (
type LdapService struct {
db *gorm.DB
httpClient *http.Client
appConfigService *AppConfigService
userService *UserService
groupService *UserGroupService
}
func NewLdapService(db *gorm.DB, appConfigService *AppConfigService, userService *UserService, groupService *UserGroupService) *LdapService {
func NewLdapService(db *gorm.DB, httpClient *http.Client, appConfigService *AppConfigService, userService *UserService, groupService *UserGroupService) *LdapService {
return &LdapService{
db: db,
httpClient: httpClient,
appConfigService: appConfigService,
userService: userService,
groupService: groupService,
@@ -393,7 +395,7 @@ func (s *LdapService) saveProfilePicture(parentCtx context.Context, userId strin
_, err := url.ParseRequestURI(pictureString)
if err == nil {
ctx, cancel := context.WithTimeout(parentCtx, 5*time.Second)
ctx, cancel := context.WithTimeout(parentCtx, 15*time.Second)
defer cancel()
var req *http.Request
@@ -403,7 +405,7 @@ func (s *LdapService) saveProfilePicture(parentCtx context.Context, userId strin
}
var res *http.Response
res, err = http.DefaultClient.Do(req)
res, err = s.httpClient.Do(req)
if err != nil {
return fmt.Errorf("failed to download profile picture: %w", err)
}