From 48e0e1106353f3e9ac1aeb739399b674657227cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan-Otto=20Kr=C3=B6pke?= Date: Tue, 1 Oct 2024 10:53:13 +0200 Subject: [PATCH] cpu: add workaround for counter resets related to `% Processor Utility` metric (#1637) --- pkg/collector/cpu/cpu.go | 63 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 61 insertions(+), 2 deletions(-) diff --git a/pkg/collector/cpu/cpu.go b/pkg/collector/cpu/cpu.go index a3482ff3..6805cc0d 100644 --- a/pkg/collector/cpu/cpu.go +++ b/pkg/collector/cpu/cpu.go @@ -27,6 +27,9 @@ type Collector struct { perfDataCollector *perfdata.Collector + processorRTCValues map[string]cpuCounter + processorMPerfValues map[string]cpuCounter + logicalProcessors *prometheus.Desc cStateSecondsTotal *prometheus.Desc timeTotal *prometheus.Desc @@ -43,6 +46,11 @@ type Collector struct { processorPrivilegedUtility *prometheus.Desc } +type cpuCounter struct { + lastValue uint32 + totalValue float64 +} + func New(config *Config) *Collector { if config == nil { config = &ConfigDefaults @@ -221,6 +229,9 @@ func (c *Collector) Build(_ *slog.Logger, _ *wmi.Client) error { nil, ) + c.processorRTCValues = map[string]cpuCounter{} + c.processorMPerfValues = map[string]cpuCounter{} + return nil } @@ -251,6 +262,30 @@ func (c *Collector) collectFull(ctx *types.ScrapeContext, logger *slog.Logger, c core := cpu.Name + if val, ok := c.processorRTCValues[core]; ok { + c.processorRTCValues[core] = cpuCounter{ + uint32(cpu.ProcessorRTC), + val.totalValue + float64(uint32(cpu.ProcessorRTC)-val.lastValue), + } + } else { + c.processorRTCValues[core] = cpuCounter{ + uint32(cpu.ProcessorRTC), + 0, + } + } + + if val, ok := c.processorMPerfValues[core]; ok { + c.processorMPerfValues[core] = cpuCounter{ + uint32(cpu.ProcessorMPerf), + val.totalValue + float64(uint32(cpu.ProcessorMPerf)-val.lastValue), + } + } else { + c.processorMPerfValues[core] = cpuCounter{ + uint32(cpu.ProcessorMPerf), + 0, + } + } + coreCount++ ch <- prometheus.MustNewConstMetric( @@ -350,13 +385,13 @@ func (c *Collector) collectFull(ctx *types.ScrapeContext, logger *slog.Logger, c ch <- prometheus.MustNewConstMetric( c.processorMPerf, prometheus.CounterValue, - cpu.ProcessorMPerf, + c.processorMPerfValues[core].totalValue, core, ) ch <- prometheus.MustNewConstMetric( c.processorRTC, prometheus.CounterValue, - cpu.ProcessorRTC, + c.processorRTCValues[core].totalValue, core, ) ch <- prometheus.MustNewConstMetric( @@ -393,6 +428,30 @@ func (c *Collector) collectPDH(ch chan<- prometheus.Metric) error { for core, coreData := range data { coreCount++ + if val, ok := c.processorRTCValues[core]; ok { + c.processorRTCValues[core] = cpuCounter{ + uint32(coreData[privilegedUtilitySeconds].SecondValue), + val.totalValue + float64(uint32(coreData[privilegedUtilitySeconds].SecondValue)-val.lastValue), + } + } else { + c.processorRTCValues[core] = cpuCounter{ + uint32(coreData[privilegedUtilitySeconds].SecondValue), + 0, + } + } + + if val, ok := c.processorMPerfValues[core]; ok { + c.processorMPerfValues[core] = cpuCounter{ + uint32(coreData[processorPerformance].SecondValue), + val.totalValue + float64(uint32(coreData[processorPerformance].SecondValue)-val.lastValue), + } + } else { + c.processorMPerfValues[core] = cpuCounter{ + uint32(coreData[processorPerformance].SecondValue), + 0, + } + } + ch <- prometheus.MustNewConstMetric( c.cStateSecondsTotal, prometheus.CounterValue,