collector: refactor metrics handler (#1773)

Signed-off-by: Jan-Otto Kröpke <mail@jkroepke.de>
This commit is contained in:
Jan-Otto Kröpke
2024-11-26 21:13:47 +01:00
committed by GitHub
parent c8eeb595c0
commit fd55ac4894
5 changed files with 154 additions and 134 deletions

View File

@@ -36,7 +36,7 @@ var _ http.Handler = (*MetricsHTTPHandler)(nil)
const defaultScrapeTimeout = 10.0 const defaultScrapeTimeout = 10.0
type MetricsHTTPHandler struct { type MetricsHTTPHandler struct {
metricCollectors *collector.MetricCollectors metricCollectors *collector.Collection
// exporterMetricsRegistry is a separate registry for the metrics about // exporterMetricsRegistry is a separate registry for the metrics about
// the exporter itself. // the exporter itself.
exporterMetricsRegistry *prometheus.Registry exporterMetricsRegistry *prometheus.Registry
@@ -51,7 +51,7 @@ type Options struct {
TimeoutMargin float64 TimeoutMargin float64
} }
func New(logger *slog.Logger, metricCollectors *collector.MetricCollectors, options *Options) *MetricsHTTPHandler { func New(logger *slog.Logger, metricCollectors *collector.Collection, options *Options) *MetricsHTTPHandler {
if options == nil { if options == nil {
options = &Options{ options = &Options{
DisableExporterMetrics: false, DisableExporterMetrics: false,
@@ -126,28 +126,20 @@ func (c *MetricsHTTPHandler) getScrapeTimeout(logger *slog.Logger, r *http.Reque
func (c *MetricsHTTPHandler) handlerFactory(logger *slog.Logger, scrapeTimeout time.Duration, requestedCollectors []string) (http.Handler, error) { func (c *MetricsHTTPHandler) handlerFactory(logger *slog.Logger, scrapeTimeout time.Duration, requestedCollectors []string) (http.Handler, error) {
reg := prometheus.NewRegistry() reg := prometheus.NewRegistry()
var metricCollectors *collector.MetricCollectors
if len(requestedCollectors) == 0 {
metricCollectors = c.metricCollectors
} else {
var err error
metricCollectors, err = c.metricCollectors.CloneWithCollectors(requestedCollectors)
if err != nil {
return nil, fmt.Errorf("couldn't clone metric collectors: %w", err)
}
}
reg.MustRegister(version.NewCollector("windows_exporter")) reg.MustRegister(version.NewCollector("windows_exporter"))
if err := reg.Register(metricCollectors.NewPrometheusCollector(scrapeTimeout, c.logger)); err != nil { collectionHandler, err := c.metricCollectors.NewHandler(scrapeTimeout, c.logger, requestedCollectors)
if err != nil {
return nil, fmt.Errorf("couldn't create collector handler: %w", err)
}
if err := reg.Register(collectionHandler); err != nil {
return nil, fmt.Errorf("couldn't register Prometheus collector: %w", err) return nil, fmt.Errorf("couldn't register Prometheus collector: %w", err)
} }
var handler http.Handler var regHandler http.Handler
if c.exporterMetricsRegistry != nil { if c.exporterMetricsRegistry != nil {
handler = promhttp.HandlerFor( regHandler = promhttp.HandlerFor(
prometheus.Gatherers{c.exporterMetricsRegistry, reg}, prometheus.Gatherers{c.exporterMetricsRegistry, reg},
promhttp.HandlerOpts{ promhttp.HandlerOpts{
ErrorLog: slog.NewLogLogger(logger.Handler(), slog.LevelError), ErrorLog: slog.NewLogLogger(logger.Handler(), slog.LevelError),
@@ -161,11 +153,11 @@ func (c *MetricsHTTPHandler) handlerFactory(logger *slog.Logger, scrapeTimeout t
// Note that we have to use h.exporterMetricsRegistry here to // Note that we have to use h.exporterMetricsRegistry here to
// use the same promhttp metrics for all expositions. // use the same promhttp metrics for all expositions.
handler = promhttp.InstrumentMetricHandler( regHandler = promhttp.InstrumentMetricHandler(
c.exporterMetricsRegistry, handler, c.exporterMetricsRegistry, regHandler,
) )
} else { } else {
handler = promhttp.HandlerFor( regHandler = promhttp.HandlerFor(
reg, reg,
promhttp.HandlerOpts{ promhttp.HandlerOpts{
ErrorLog: slog.NewLogLogger(logger.Handler(), slog.LevelError), ErrorLog: slog.NewLogLogger(logger.Handler(), slog.LevelError),
@@ -177,21 +169,5 @@ func (c *MetricsHTTPHandler) handlerFactory(logger *slog.Logger, scrapeTimeout t
) )
} }
return c.withConcurrencyLimit(handler.ServeHTTP), nil return regHandler, nil
}
func (c *MetricsHTTPHandler) withConcurrencyLimit(next http.HandlerFunc) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
select {
case c.concurrencyCh <- struct{}{}:
defer func() { <-c.concurrencyCh }()
default:
w.WriteHeader(http.StatusServiceUnavailable)
_, _ = w.Write([]byte("Too many concurrent requests"))
return
}
next(w, r)
}
} }

View File

@@ -11,8 +11,6 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
//go:build windows
package collector package collector
import ( import (
@@ -31,22 +29,6 @@ import (
"github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus"
) )
// Interface guard.
var _ prometheus.Collector = (*Prometheus)(nil)
// Prometheus implements prometheus.Collector for a set of Windows MetricCollectors.
type Prometheus struct {
maxScrapeDuration time.Duration
logger *slog.Logger
metricCollectors *MetricCollectors
// Base metrics returned by Prometheus
scrapeDurationDesc *prometheus.Desc
collectorScrapeDurationDesc *prometheus.Desc
collectorScrapeSuccessDesc *prometheus.Desc
collectorScrapeTimeoutDesc *prometheus.Desc
}
type collectorStatus struct { type collectorStatus struct {
name string name string
statusCode collectorStatusCode statusCode collectorStatusCode
@@ -60,64 +42,26 @@ const (
failed failed
) )
// NewPrometheusCollector returns a new Prometheus where the set of MetricCollectors must func (c *Collection) collectAll(ch chan<- prometheus.Metric, logger *slog.Logger, maxScrapeDuration time.Duration) {
// return metrics within the given timeout.
func (c *MetricCollectors) NewPrometheusCollector(timeout time.Duration, logger *slog.Logger) *Prometheus {
return &Prometheus{
maxScrapeDuration: timeout,
metricCollectors: c,
logger: logger,
scrapeDurationDesc: prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, "exporter", "scrape_duration_seconds"),
"windows_exporter: Total scrape duration.",
nil,
nil,
),
collectorScrapeDurationDesc: prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, "exporter", "collector_duration_seconds"),
"windows_exporter: Duration of a collection.",
[]string{"collector"},
nil,
),
collectorScrapeSuccessDesc: prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, "exporter", "collector_success"),
"windows_exporter: Whether the collector was successful.",
[]string{"collector"},
nil,
),
collectorScrapeTimeoutDesc: prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, "exporter", "collector_timeout"),
"windows_exporter: Whether the collector timed out.",
[]string{"collector"},
nil,
),
}
}
func (p *Prometheus) Describe(_ chan<- *prometheus.Desc) {}
// Collect sends the collected metrics from each of the MetricCollectors to
// prometheus.
func (p *Prometheus) Collect(ch chan<- prometheus.Metric) {
collectorStartTime := time.Now() collectorStartTime := time.Now()
// WaitGroup to wait for all collectors to finish // WaitGroup to wait for all collectors to finish
wg := sync.WaitGroup{} wg := sync.WaitGroup{}
wg.Add(len(p.metricCollectors.collectors)) wg.Add(len(c.collectors))
// Using a channel to collect the status of each collector // Using a channel to collect the status of each collector
// A channel is safe to use concurrently while a map is not // A channel is safe to use concurrently while a map is not
collectorStatusCh := make(chan collectorStatus, len(p.metricCollectors.collectors)) collectorStatusCh := make(chan collectorStatus, len(c.collectors))
// Execute all collectors concurrently // Execute all collectors concurrently
// timeout handling is done in the execute function // timeout handling is done in the execute function
for name, metricsCollector := range p.metricCollectors.collectors { for name, metricsCollector := range c.collectors {
go func(name string, metricsCollector Collector) { go func(name string, metricsCollector Collector) {
defer wg.Done() defer wg.Done()
collectorStatusCh <- collectorStatus{ collectorStatusCh <- collectorStatus{
name: name, name: name,
statusCode: p.execute(name, metricsCollector, ch), statusCode: c.collectCollector(ch, logger, name, metricsCollector, maxScrapeDuration),
} }
}(name, metricsCollector) }(name, metricsCollector)
} }
@@ -139,14 +83,14 @@ func (p *Prometheus) Collect(ch chan<- prometheus.Metric) {
} }
ch <- prometheus.MustNewConstMetric( ch <- prometheus.MustNewConstMetric(
p.collectorScrapeSuccessDesc, c.collectorScrapeSuccessDesc,
prometheus.GaugeValue, prometheus.GaugeValue,
successValue, successValue,
status.name, status.name,
) )
ch <- prometheus.MustNewConstMetric( ch <- prometheus.MustNewConstMetric(
p.collectorScrapeTimeoutDesc, c.collectorScrapeTimeoutDesc,
prometheus.GaugeValue, prometheus.GaugeValue,
timeoutValue, timeoutValue,
status.name, status.name,
@@ -154,13 +98,13 @@ func (p *Prometheus) Collect(ch chan<- prometheus.Metric) {
} }
ch <- prometheus.MustNewConstMetric( ch <- prometheus.MustNewConstMetric(
p.scrapeDurationDesc, c.scrapeDurationDesc,
prometheus.GaugeValue, prometheus.GaugeValue,
time.Since(collectorStartTime).Seconds(), time.Since(collectorStartTime).Seconds(),
) )
} }
func (p *Prometheus) execute(name string, c Collector, ch chan<- prometheus.Metric) collectorStatusCode { func (c *Collection) collectCollector(ch chan<- prometheus.Metric, logger *slog.Logger, name string, collector Collector, maxScrapeDuration time.Duration) collectorStatusCode {
var ( var (
err error err error
numMetrics int numMetrics int
@@ -173,10 +117,10 @@ func (p *Prometheus) execute(name string, c Collector, ch chan<- prometheus.Metr
bufCh := make(chan prometheus.Metric, 1000) bufCh := make(chan prometheus.Metric, 1000)
errCh := make(chan error, 1) errCh := make(chan error, 1)
ctx, cancel := context.WithTimeout(context.Background(), p.maxScrapeDuration) ctx, cancel := context.WithTimeout(context.Background(), maxScrapeDuration)
defer cancel() defer cancel()
// Execute the collector // execute the collector
go func() { go func() {
defer func() { defer func() {
if r := recover(); r != nil { if r := recover(); r != nil {
@@ -188,7 +132,7 @@ func (p *Prometheus) execute(name string, c Collector, ch chan<- prometheus.Metr
close(bufCh) close(bufCh)
}() }()
errCh <- c.Collect(bufCh) errCh <- collector.Collect(bufCh)
}() }()
wg := sync.WaitGroup{} wg := sync.WaitGroup{}
@@ -232,7 +176,7 @@ func (p *Prometheus) execute(name string, c Collector, ch chan<- prometheus.Metr
duration = time.Since(t) duration = time.Since(t)
ch <- prometheus.MustNewConstMetric( ch <- prometheus.MustNewConstMetric(
p.collectorScrapeDurationDesc, c.collectorScrapeDurationDesc,
prometheus.GaugeValue, prometheus.GaugeValue,
duration.Seconds(), duration.Seconds(),
name, name,
@@ -242,13 +186,13 @@ func (p *Prometheus) execute(name string, c Collector, ch chan<- prometheus.Metr
duration = time.Since(t) duration = time.Since(t)
ch <- prometheus.MustNewConstMetric( ch <- prometheus.MustNewConstMetric(
p.collectorScrapeDurationDesc, c.collectorScrapeDurationDesc,
prometheus.GaugeValue, prometheus.GaugeValue,
duration.Seconds(), duration.Seconds(),
name, name,
) )
p.logger.Warn(fmt.Sprintf("collector %s timeouted after %s, resulting in %d metrics", name, p.maxScrapeDuration, numMetrics)) logger.Warn(fmt.Sprintf("collector %s timeouted after %s, resulting in %d metrics", name, maxScrapeDuration, numMetrics))
go func() { go func() {
// Drain channel in case of premature return to not leak a goroutine. // Drain channel in case of premature return to not leak a goroutine.
@@ -261,12 +205,12 @@ func (p *Prometheus) execute(name string, c Collector, ch chan<- prometheus.Metr
} }
if err != nil { if err != nil {
loggerFn := p.logger.Warn loggerFn := logger.Warn
if errors.Is(err, types.ErrNoData) || if errors.Is(err, types.ErrNoData) ||
errors.Is(err, perfdata.ErrNoData) || errors.Is(err, perfdata.ErrNoData) ||
errors.Is(err, perfdata.ErrPerformanceCounterNotInitialized) || errors.Is(err, perfdata.ErrPerformanceCounterNotInitialized) ||
errors.Is(err, mi.MI_RESULT_INVALID_NAMESPACE) { errors.Is(err, mi.MI_RESULT_INVALID_NAMESPACE) {
loggerFn = p.logger.Debug loggerFn = logger.Debug
} }
loggerFn(fmt.Sprintf("collector %s failed after %s, resulting in %d metrics", name, duration, numMetrics), loggerFn(fmt.Sprintf("collector %s failed after %s, resulting in %d metrics", name, duration, numMetrics),
@@ -276,7 +220,7 @@ func (p *Prometheus) execute(name string, c Collector, ch chan<- prometheus.Metr
return failed return failed
} }
p.logger.Debug(fmt.Sprintf("collector %s succeeded after %s, resulting in %d metrics", name, duration, numMetrics)) logger.Debug(fmt.Sprintf("collector %s succeeded after %s, resulting in %d metrics", name, duration, numMetrics))
return success return success
} }

View File

@@ -22,7 +22,7 @@ import (
"maps" "maps"
"slices" "slices"
"sync" "sync"
stdtime "time" gotime "time"
"github.com/alecthomas/kingpin/v2" "github.com/alecthomas/kingpin/v2"
"github.com/prometheus-community/windows_exporter/internal/collector/ad" "github.com/prometheus-community/windows_exporter/internal/collector/ad"
@@ -74,10 +74,12 @@ import (
"github.com/prometheus-community/windows_exporter/internal/collector/update" "github.com/prometheus-community/windows_exporter/internal/collector/update"
"github.com/prometheus-community/windows_exporter/internal/collector/vmware" "github.com/prometheus-community/windows_exporter/internal/collector/vmware"
"github.com/prometheus-community/windows_exporter/internal/mi" "github.com/prometheus-community/windows_exporter/internal/mi"
"github.com/prometheus-community/windows_exporter/internal/types"
"github.com/prometheus/client_golang/prometheus"
) )
// NewWithFlags To be called by the exporter for collector initialization before running kingpin.Parse. // NewWithFlags To be called by the exporter for collector initialization before running kingpin.Parse.
func NewWithFlags(app *kingpin.Application) *MetricCollectors { func NewWithFlags(app *kingpin.Application) *Collection {
collectors := map[string]Collector{} collectors := map[string]Collector{}
for name, builder := range BuildersWithFlags { for name, builder := range BuildersWithFlags {
@@ -90,7 +92,7 @@ func NewWithFlags(app *kingpin.Application) *MetricCollectors {
// NewWithConfig To be called by the external libraries for collector initialization without running [kingpin.Parse]. // NewWithConfig To be called by the external libraries for collector initialization without running [kingpin.Parse].
// //
//goland:noinspection GoUnusedExportedFunction //goland:noinspection GoUnusedExportedFunction
func NewWithConfig(config Config) *MetricCollectors { func NewWithConfig(config Config) *Collection {
collectors := Map{} collectors := Map{}
collectors[ad.Name] = ad.New(&config.AD) collectors[ad.Name] = ad.New(&config.AD)
collectors[adcs.Name] = adcs.New(&config.ADCS) collectors[adcs.Name] = adcs.New(&config.ADCS)
@@ -145,14 +147,39 @@ func NewWithConfig(config Config) *MetricCollectors {
} }
// New To be called by the external libraries for collector initialization. // New To be called by the external libraries for collector initialization.
func New(collectors Map) *MetricCollectors { func New(collectors Map) *Collection {
return &MetricCollectors{ return &Collection{
collectors: collectors, collectors: collectors,
concurrencyCh: make(chan struct{}, 1),
scrapeDurationDesc: prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, "exporter", "scrape_duration_seconds"),
"windows_exporter: Total scrape duration.",
nil,
nil,
),
collectorScrapeDurationDesc: prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, "exporter", "collector_duration_seconds"),
"windows_exporter: Duration of a collection.",
[]string{"collector"},
nil,
),
collectorScrapeSuccessDesc: prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, "exporter", "collector_success"),
"windows_exporter: Whether the collector was successful.",
[]string{"collector"},
nil,
),
collectorScrapeTimeoutDesc: prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, "exporter", "collector_timeout"),
"windows_exporter: Whether the collector timed out.",
[]string{"collector"},
nil,
),
} }
} }
// Enable removes all collectors that not enabledCollectors. // Enable removes all collectors that not enabledCollectors.
func (c *MetricCollectors) Enable(enabledCollectors []string) error { func (c *Collection) Enable(enabledCollectors []string) error {
for _, name := range enabledCollectors { for _, name := range enabledCollectors {
if _, ok := c.collectors[name]; !ok { if _, ok := c.collectors[name]; !ok {
return fmt.Errorf("unknown collector %s", name) return fmt.Errorf("unknown collector %s", name)
@@ -169,8 +196,8 @@ func (c *MetricCollectors) Enable(enabledCollectors []string) error {
} }
// Build To be called by the exporter for collector initialization. // Build To be called by the exporter for collector initialization.
func (c *MetricCollectors) Build(logger *slog.Logger) error { func (c *Collection) Build(logger *slog.Logger) error {
c.startTime = stdtime.Now() c.startTime = gotime.Now()
err := c.initMI() err := c.initMI()
if err != nil { if err != nil {
@@ -205,7 +232,7 @@ func (c *MetricCollectors) Build(logger *slog.Logger) error {
} }
// Close To be called by the exporter for collector cleanup. // Close To be called by the exporter for collector cleanup.
func (c *MetricCollectors) Close() error { func (c *Collection) Close() error {
errs := make([]error, 0, len(c.collectors)) errs := make([]error, 0, len(c.collectors))
for _, collector := range c.collectors { for _, collector := range c.collectors {
@@ -231,7 +258,7 @@ func (c *MetricCollectors) Close() error {
} }
// initMI To be called by the exporter for collector initialization. // initMI To be called by the exporter for collector initialization.
func (c *MetricCollectors) initMI() error { func (c *Collection) initMI() error {
app, err := mi.Application_Initialize() app, err := mi.Application_Initialize()
if err != nil { if err != nil {
return fmt.Errorf("error from initialize MI application: %w", err) return fmt.Errorf("error from initialize MI application: %w", err)
@@ -254,12 +281,17 @@ func (c *MetricCollectors) initMI() error {
return nil return nil
} }
// CloneWithCollectors To be called by the exporter for collector initialization. // WithCollectors To be called by the exporter for collector initialization.
func (c *MetricCollectors) CloneWithCollectors(collectors []string) (*MetricCollectors, error) { func (c *Collection) WithCollectors(collectors []string) (*Collection, error) {
metricCollectors := &MetricCollectors{ metricCollectors := &Collection{
collectors: maps.Clone(c.collectors), miSession: c.miSession,
miSession: c.miSession, startTime: c.startTime,
startTime: c.startTime, concurrencyCh: c.concurrencyCh,
scrapeDurationDesc: c.scrapeDurationDesc,
collectorScrapeDurationDesc: c.collectorScrapeDurationDesc,
collectorScrapeSuccessDesc: c.collectorScrapeSuccessDesc,
collectorScrapeTimeoutDesc: c.collectorScrapeTimeoutDesc,
collectors: maps.Clone(c.collectors),
} }
if err := metricCollectors.Enable(collectors); err != nil { if err := metricCollectors.Enable(collectors); err != nil {
@@ -269,6 +301,6 @@ func (c *MetricCollectors) CloneWithCollectors(collectors []string) (*MetricColl
return metricCollectors, nil return metricCollectors, nil
} }
func (c *MetricCollectors) GetStartTime() stdtime.Time { func (c *Collection) GetStartTime() gotime.Time {
return c.startTime return c.startTime
} }

62
pkg/collector/handler.go Normal file
View File

@@ -0,0 +1,62 @@
// Copyright 2024 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build windows
package collector
import (
"fmt"
"log/slog"
"time"
"github.com/prometheus/client_golang/prometheus"
)
// Interface guard.
var _ prometheus.Collector = (*Handler)(nil)
// Handler implements [prometheus.Collector] for a set of Windows Collection.
type Handler struct {
maxScrapeDuration time.Duration
logger *slog.Logger
collection *Collection
}
// NewHandler returns a new Handler that implements a [prometheus.Collector] for the given metrics Collection.
func (c *Collection) NewHandler(maxScrapeDuration time.Duration, logger *slog.Logger, collectors []string) (*Handler, error) {
collection := c
if len(collectors) != 0 {
var err error
collection, err = c.WithCollectors(collectors)
if err != nil {
return nil, fmt.Errorf("failed to create handler with collectors: %w", err)
}
}
return &Handler{
maxScrapeDuration: maxScrapeDuration,
collection: collection,
logger: logger,
}, nil
}
func (p *Handler) Describe(_ chan<- *prometheus.Desc) {}
// Collect sends the collected metrics from each of the Collection to
// prometheus.
func (p *Handler) Collect(ch chan<- prometheus.Metric) {
p.collection.collectAll(ch, p.logger, p.maxScrapeDuration)
}

View File

@@ -26,10 +26,16 @@ import (
const DefaultCollectors = "cpu,cs,memory,logical_disk,physical_disk,net,os,service,system" const DefaultCollectors = "cpu,cs,memory,logical_disk,physical_disk,net,os,service,system"
type MetricCollectors struct { type Collection struct {
collectors Map collectors Map
miSession *mi.Session miSession *mi.Session
startTime time.Time startTime time.Time
concurrencyCh chan struct{}
scrapeDurationDesc *prometheus.Desc
collectorScrapeDurationDesc *prometheus.Desc
collectorScrapeSuccessDesc *prometheus.Desc
collectorScrapeTimeoutDesc *prometheus.Desc
} }
type ( type (