collector: refactor metrics handler (#1773)

Signed-off-by: Jan-Otto Kröpke <mail@jkroepke.de>
This commit is contained in:
Jan-Otto Kröpke
2024-11-26 21:13:47 +01:00
committed by GitHub
parent c8eeb595c0
commit fd55ac4894
5 changed files with 154 additions and 134 deletions

View File

@@ -36,7 +36,7 @@ var _ http.Handler = (*MetricsHTTPHandler)(nil)
const defaultScrapeTimeout = 10.0
type MetricsHTTPHandler struct {
metricCollectors *collector.MetricCollectors
metricCollectors *collector.Collection
// exporterMetricsRegistry is a separate registry for the metrics about
// the exporter itself.
exporterMetricsRegistry *prometheus.Registry
@@ -51,7 +51,7 @@ type Options struct {
TimeoutMargin float64
}
func New(logger *slog.Logger, metricCollectors *collector.MetricCollectors, options *Options) *MetricsHTTPHandler {
func New(logger *slog.Logger, metricCollectors *collector.Collection, options *Options) *MetricsHTTPHandler {
if options == nil {
options = &Options{
DisableExporterMetrics: false,
@@ -126,28 +126,20 @@ func (c *MetricsHTTPHandler) getScrapeTimeout(logger *slog.Logger, r *http.Reque
func (c *MetricsHTTPHandler) handlerFactory(logger *slog.Logger, scrapeTimeout time.Duration, requestedCollectors []string) (http.Handler, error) {
reg := prometheus.NewRegistry()
var metricCollectors *collector.MetricCollectors
if len(requestedCollectors) == 0 {
metricCollectors = c.metricCollectors
} else {
var err error
metricCollectors, err = c.metricCollectors.CloneWithCollectors(requestedCollectors)
if err != nil {
return nil, fmt.Errorf("couldn't clone metric collectors: %w", err)
}
}
reg.MustRegister(version.NewCollector("windows_exporter"))
if err := reg.Register(metricCollectors.NewPrometheusCollector(scrapeTimeout, c.logger)); err != nil {
collectionHandler, err := c.metricCollectors.NewHandler(scrapeTimeout, c.logger, requestedCollectors)
if err != nil {
return nil, fmt.Errorf("couldn't create collector handler: %w", err)
}
if err := reg.Register(collectionHandler); err != nil {
return nil, fmt.Errorf("couldn't register Prometheus collector: %w", err)
}
var handler http.Handler
var regHandler http.Handler
if c.exporterMetricsRegistry != nil {
handler = promhttp.HandlerFor(
regHandler = promhttp.HandlerFor(
prometheus.Gatherers{c.exporterMetricsRegistry, reg},
promhttp.HandlerOpts{
ErrorLog: slog.NewLogLogger(logger.Handler(), slog.LevelError),
@@ -161,11 +153,11 @@ func (c *MetricsHTTPHandler) handlerFactory(logger *slog.Logger, scrapeTimeout t
// Note that we have to use h.exporterMetricsRegistry here to
// use the same promhttp metrics for all expositions.
handler = promhttp.InstrumentMetricHandler(
c.exporterMetricsRegistry, handler,
regHandler = promhttp.InstrumentMetricHandler(
c.exporterMetricsRegistry, regHandler,
)
} else {
handler = promhttp.HandlerFor(
regHandler = promhttp.HandlerFor(
reg,
promhttp.HandlerOpts{
ErrorLog: slog.NewLogLogger(logger.Handler(), slog.LevelError),
@@ -177,21 +169,5 @@ func (c *MetricsHTTPHandler) handlerFactory(logger *slog.Logger, scrapeTimeout t
)
}
return c.withConcurrencyLimit(handler.ServeHTTP), nil
}
func (c *MetricsHTTPHandler) withConcurrencyLimit(next http.HandlerFunc) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
select {
case c.concurrencyCh <- struct{}{}:
defer func() { <-c.concurrencyCh }()
default:
w.WriteHeader(http.StatusServiceUnavailable)
_, _ = w.Write([]byte("Too many concurrent requests"))
return
}
next(w, r)
}
return regHandler, nil
}

View File

@@ -11,8 +11,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build windows
package collector
import (
@@ -31,22 +29,6 @@ import (
"github.com/prometheus/client_golang/prometheus"
)
// Interface guard.
var _ prometheus.Collector = (*Prometheus)(nil)
// Prometheus implements prometheus.Collector for a set of Windows MetricCollectors.
type Prometheus struct {
maxScrapeDuration time.Duration
logger *slog.Logger
metricCollectors *MetricCollectors
// Base metrics returned by Prometheus
scrapeDurationDesc *prometheus.Desc
collectorScrapeDurationDesc *prometheus.Desc
collectorScrapeSuccessDesc *prometheus.Desc
collectorScrapeTimeoutDesc *prometheus.Desc
}
type collectorStatus struct {
name string
statusCode collectorStatusCode
@@ -60,64 +42,26 @@ const (
failed
)
// NewPrometheusCollector returns a new Prometheus where the set of MetricCollectors must
// return metrics within the given timeout.
func (c *MetricCollectors) NewPrometheusCollector(timeout time.Duration, logger *slog.Logger) *Prometheus {
return &Prometheus{
maxScrapeDuration: timeout,
metricCollectors: c,
logger: logger,
scrapeDurationDesc: prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, "exporter", "scrape_duration_seconds"),
"windows_exporter: Total scrape duration.",
nil,
nil,
),
collectorScrapeDurationDesc: prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, "exporter", "collector_duration_seconds"),
"windows_exporter: Duration of a collection.",
[]string{"collector"},
nil,
),
collectorScrapeSuccessDesc: prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, "exporter", "collector_success"),
"windows_exporter: Whether the collector was successful.",
[]string{"collector"},
nil,
),
collectorScrapeTimeoutDesc: prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, "exporter", "collector_timeout"),
"windows_exporter: Whether the collector timed out.",
[]string{"collector"},
nil,
),
}
}
func (p *Prometheus) Describe(_ chan<- *prometheus.Desc) {}
// Collect sends the collected metrics from each of the MetricCollectors to
// prometheus.
func (p *Prometheus) Collect(ch chan<- prometheus.Metric) {
func (c *Collection) collectAll(ch chan<- prometheus.Metric, logger *slog.Logger, maxScrapeDuration time.Duration) {
collectorStartTime := time.Now()
// WaitGroup to wait for all collectors to finish
wg := sync.WaitGroup{}
wg.Add(len(p.metricCollectors.collectors))
wg.Add(len(c.collectors))
// Using a channel to collect the status of each collector
// A channel is safe to use concurrently while a map is not
collectorStatusCh := make(chan collectorStatus, len(p.metricCollectors.collectors))
collectorStatusCh := make(chan collectorStatus, len(c.collectors))
// Execute all collectors concurrently
// timeout handling is done in the execute function
for name, metricsCollector := range p.metricCollectors.collectors {
for name, metricsCollector := range c.collectors {
go func(name string, metricsCollector Collector) {
defer wg.Done()
collectorStatusCh <- collectorStatus{
name: name,
statusCode: p.execute(name, metricsCollector, ch),
statusCode: c.collectCollector(ch, logger, name, metricsCollector, maxScrapeDuration),
}
}(name, metricsCollector)
}
@@ -139,14 +83,14 @@ func (p *Prometheus) Collect(ch chan<- prometheus.Metric) {
}
ch <- prometheus.MustNewConstMetric(
p.collectorScrapeSuccessDesc,
c.collectorScrapeSuccessDesc,
prometheus.GaugeValue,
successValue,
status.name,
)
ch <- prometheus.MustNewConstMetric(
p.collectorScrapeTimeoutDesc,
c.collectorScrapeTimeoutDesc,
prometheus.GaugeValue,
timeoutValue,
status.name,
@@ -154,13 +98,13 @@ func (p *Prometheus) Collect(ch chan<- prometheus.Metric) {
}
ch <- prometheus.MustNewConstMetric(
p.scrapeDurationDesc,
c.scrapeDurationDesc,
prometheus.GaugeValue,
time.Since(collectorStartTime).Seconds(),
)
}
func (p *Prometheus) execute(name string, c Collector, ch chan<- prometheus.Metric) collectorStatusCode {
func (c *Collection) collectCollector(ch chan<- prometheus.Metric, logger *slog.Logger, name string, collector Collector, maxScrapeDuration time.Duration) collectorStatusCode {
var (
err error
numMetrics int
@@ -173,10 +117,10 @@ func (p *Prometheus) execute(name string, c Collector, ch chan<- prometheus.Metr
bufCh := make(chan prometheus.Metric, 1000)
errCh := make(chan error, 1)
ctx, cancel := context.WithTimeout(context.Background(), p.maxScrapeDuration)
ctx, cancel := context.WithTimeout(context.Background(), maxScrapeDuration)
defer cancel()
// Execute the collector
// execute the collector
go func() {
defer func() {
if r := recover(); r != nil {
@@ -188,7 +132,7 @@ func (p *Prometheus) execute(name string, c Collector, ch chan<- prometheus.Metr
close(bufCh)
}()
errCh <- c.Collect(bufCh)
errCh <- collector.Collect(bufCh)
}()
wg := sync.WaitGroup{}
@@ -232,7 +176,7 @@ func (p *Prometheus) execute(name string, c Collector, ch chan<- prometheus.Metr
duration = time.Since(t)
ch <- prometheus.MustNewConstMetric(
p.collectorScrapeDurationDesc,
c.collectorScrapeDurationDesc,
prometheus.GaugeValue,
duration.Seconds(),
name,
@@ -242,13 +186,13 @@ func (p *Prometheus) execute(name string, c Collector, ch chan<- prometheus.Metr
duration = time.Since(t)
ch <- prometheus.MustNewConstMetric(
p.collectorScrapeDurationDesc,
c.collectorScrapeDurationDesc,
prometheus.GaugeValue,
duration.Seconds(),
name,
)
p.logger.Warn(fmt.Sprintf("collector %s timeouted after %s, resulting in %d metrics", name, p.maxScrapeDuration, numMetrics))
logger.Warn(fmt.Sprintf("collector %s timeouted after %s, resulting in %d metrics", name, maxScrapeDuration, numMetrics))
go func() {
// Drain channel in case of premature return to not leak a goroutine.
@@ -261,12 +205,12 @@ func (p *Prometheus) execute(name string, c Collector, ch chan<- prometheus.Metr
}
if err != nil {
loggerFn := p.logger.Warn
loggerFn := logger.Warn
if errors.Is(err, types.ErrNoData) ||
errors.Is(err, perfdata.ErrNoData) ||
errors.Is(err, perfdata.ErrPerformanceCounterNotInitialized) ||
errors.Is(err, mi.MI_RESULT_INVALID_NAMESPACE) {
loggerFn = p.logger.Debug
loggerFn = logger.Debug
}
loggerFn(fmt.Sprintf("collector %s failed after %s, resulting in %d metrics", name, duration, numMetrics),
@@ -276,7 +220,7 @@ func (p *Prometheus) execute(name string, c Collector, ch chan<- prometheus.Metr
return failed
}
p.logger.Debug(fmt.Sprintf("collector %s succeeded after %s, resulting in %d metrics", name, duration, numMetrics))
logger.Debug(fmt.Sprintf("collector %s succeeded after %s, resulting in %d metrics", name, duration, numMetrics))
return success
}

View File

@@ -22,7 +22,7 @@ import (
"maps"
"slices"
"sync"
stdtime "time"
gotime "time"
"github.com/alecthomas/kingpin/v2"
"github.com/prometheus-community/windows_exporter/internal/collector/ad"
@@ -74,10 +74,12 @@ import (
"github.com/prometheus-community/windows_exporter/internal/collector/update"
"github.com/prometheus-community/windows_exporter/internal/collector/vmware"
"github.com/prometheus-community/windows_exporter/internal/mi"
"github.com/prometheus-community/windows_exporter/internal/types"
"github.com/prometheus/client_golang/prometheus"
)
// NewWithFlags To be called by the exporter for collector initialization before running kingpin.Parse.
func NewWithFlags(app *kingpin.Application) *MetricCollectors {
func NewWithFlags(app *kingpin.Application) *Collection {
collectors := map[string]Collector{}
for name, builder := range BuildersWithFlags {
@@ -90,7 +92,7 @@ func NewWithFlags(app *kingpin.Application) *MetricCollectors {
// NewWithConfig To be called by the external libraries for collector initialization without running [kingpin.Parse].
//
//goland:noinspection GoUnusedExportedFunction
func NewWithConfig(config Config) *MetricCollectors {
func NewWithConfig(config Config) *Collection {
collectors := Map{}
collectors[ad.Name] = ad.New(&config.AD)
collectors[adcs.Name] = adcs.New(&config.ADCS)
@@ -145,14 +147,39 @@ func NewWithConfig(config Config) *MetricCollectors {
}
// New To be called by the external libraries for collector initialization.
func New(collectors Map) *MetricCollectors {
return &MetricCollectors{
collectors: collectors,
func New(collectors Map) *Collection {
return &Collection{
collectors: collectors,
concurrencyCh: make(chan struct{}, 1),
scrapeDurationDesc: prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, "exporter", "scrape_duration_seconds"),
"windows_exporter: Total scrape duration.",
nil,
nil,
),
collectorScrapeDurationDesc: prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, "exporter", "collector_duration_seconds"),
"windows_exporter: Duration of a collection.",
[]string{"collector"},
nil,
),
collectorScrapeSuccessDesc: prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, "exporter", "collector_success"),
"windows_exporter: Whether the collector was successful.",
[]string{"collector"},
nil,
),
collectorScrapeTimeoutDesc: prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, "exporter", "collector_timeout"),
"windows_exporter: Whether the collector timed out.",
[]string{"collector"},
nil,
),
}
}
// Enable removes all collectors that not enabledCollectors.
func (c *MetricCollectors) Enable(enabledCollectors []string) error {
func (c *Collection) Enable(enabledCollectors []string) error {
for _, name := range enabledCollectors {
if _, ok := c.collectors[name]; !ok {
return fmt.Errorf("unknown collector %s", name)
@@ -169,8 +196,8 @@ func (c *MetricCollectors) Enable(enabledCollectors []string) error {
}
// Build To be called by the exporter for collector initialization.
func (c *MetricCollectors) Build(logger *slog.Logger) error {
c.startTime = stdtime.Now()
func (c *Collection) Build(logger *slog.Logger) error {
c.startTime = gotime.Now()
err := c.initMI()
if err != nil {
@@ -205,7 +232,7 @@ func (c *MetricCollectors) Build(logger *slog.Logger) error {
}
// Close To be called by the exporter for collector cleanup.
func (c *MetricCollectors) Close() error {
func (c *Collection) Close() error {
errs := make([]error, 0, len(c.collectors))
for _, collector := range c.collectors {
@@ -231,7 +258,7 @@ func (c *MetricCollectors) Close() error {
}
// initMI To be called by the exporter for collector initialization.
func (c *MetricCollectors) initMI() error {
func (c *Collection) initMI() error {
app, err := mi.Application_Initialize()
if err != nil {
return fmt.Errorf("error from initialize MI application: %w", err)
@@ -254,12 +281,17 @@ func (c *MetricCollectors) initMI() error {
return nil
}
// CloneWithCollectors To be called by the exporter for collector initialization.
func (c *MetricCollectors) CloneWithCollectors(collectors []string) (*MetricCollectors, error) {
metricCollectors := &MetricCollectors{
collectors: maps.Clone(c.collectors),
miSession: c.miSession,
startTime: c.startTime,
// WithCollectors To be called by the exporter for collector initialization.
func (c *Collection) WithCollectors(collectors []string) (*Collection, error) {
metricCollectors := &Collection{
miSession: c.miSession,
startTime: c.startTime,
concurrencyCh: c.concurrencyCh,
scrapeDurationDesc: c.scrapeDurationDesc,
collectorScrapeDurationDesc: c.collectorScrapeDurationDesc,
collectorScrapeSuccessDesc: c.collectorScrapeSuccessDesc,
collectorScrapeTimeoutDesc: c.collectorScrapeTimeoutDesc,
collectors: maps.Clone(c.collectors),
}
if err := metricCollectors.Enable(collectors); err != nil {
@@ -269,6 +301,6 @@ func (c *MetricCollectors) CloneWithCollectors(collectors []string) (*MetricColl
return metricCollectors, nil
}
func (c *MetricCollectors) GetStartTime() stdtime.Time {
func (c *Collection) GetStartTime() gotime.Time {
return c.startTime
}

62
pkg/collector/handler.go Normal file
View File

@@ -0,0 +1,62 @@
// Copyright 2024 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build windows
package collector
import (
"fmt"
"log/slog"
"time"
"github.com/prometheus/client_golang/prometheus"
)
// Interface guard.
var _ prometheus.Collector = (*Handler)(nil)
// Handler implements [prometheus.Collector] for a set of Windows Collection.
type Handler struct {
maxScrapeDuration time.Duration
logger *slog.Logger
collection *Collection
}
// NewHandler returns a new Handler that implements a [prometheus.Collector] for the given metrics Collection.
func (c *Collection) NewHandler(maxScrapeDuration time.Duration, logger *slog.Logger, collectors []string) (*Handler, error) {
collection := c
if len(collectors) != 0 {
var err error
collection, err = c.WithCollectors(collectors)
if err != nil {
return nil, fmt.Errorf("failed to create handler with collectors: %w", err)
}
}
return &Handler{
maxScrapeDuration: maxScrapeDuration,
collection: collection,
logger: logger,
}, nil
}
func (p *Handler) Describe(_ chan<- *prometheus.Desc) {}
// Collect sends the collected metrics from each of the Collection to
// prometheus.
func (p *Handler) Collect(ch chan<- prometheus.Metric) {
p.collection.collectAll(ch, p.logger, p.maxScrapeDuration)
}

View File

@@ -26,10 +26,16 @@ import (
const DefaultCollectors = "cpu,cs,memory,logical_disk,physical_disk,net,os,service,system"
type MetricCollectors struct {
collectors Map
miSession *mi.Session
startTime time.Time
type Collection struct {
collectors Map
miSession *mi.Session
startTime time.Time
concurrencyCh chan struct{}
scrapeDurationDesc *prometheus.Desc
collectorScrapeDurationDesc *prometheus.Desc
collectorScrapeSuccessDesc *prometheus.Desc
collectorScrapeTimeoutDesc *prometheus.Desc
}
type (