process: add collector.process.counter-version CLI parameter (#2064)

This commit is contained in:
Jan-Otto Kröpke
2025-05-31 08:46:30 +02:00
committed by GitHub
parent 298d820bd6
commit e673f192d2
7 changed files with 93 additions and 400 deletions

1
.idea/go.imports.xml generated
View File

@@ -4,6 +4,7 @@
<option name="excludedPackages">
<array>
<option value="github.com/pkg/errors" />
<option value="golang.org/x/net/context" />
</array>
</option>
</component>

View File

@@ -37,6 +37,11 @@ Enables IIS process name queries. IIS process names are combined with their app
Disabled by default, and can be enabled with `--collector.process.iis`. NOTE: Just plain parameter without `true`.
### `--collector.process.counter-version`
Version of the process collector to use. 1 for Process V1, 2 for Process V2.
Defaults to 0 which will use the latest version available.
### Example
To match all firefox processes: `--collector.process.include="firefox.*"`.

View File

@@ -18,6 +18,7 @@
package process
import (
"context"
"errors"
"fmt"
"log/slog"
@@ -31,6 +32,7 @@ import (
"github.com/prometheus-community/windows_exporter/internal/mi"
"github.com/prometheus-community/windows_exporter/internal/pdh"
"github.com/prometheus-community/windows_exporter/internal/pdh/registry"
pdhtypes "github.com/prometheus-community/windows_exporter/internal/pdh/types"
"github.com/prometheus-community/windows_exporter/internal/types"
"github.com/prometheus/client_golang/prometheus"
"golang.org/x/sys/windows"
@@ -42,6 +44,7 @@ type Config struct {
ProcessInclude *regexp.Regexp `yaml:"include"`
ProcessExclude *regexp.Regexp `yaml:"exclude"`
EnableWorkerProcess bool `yaml:"iis"`
CounterVersion uint8 `yaml:"counter-version"`
}
//nolint:gochecknoglobals
@@ -49,6 +52,7 @@ var ConfigDefaults = Config{
ProcessInclude: types.RegExpAny,
ProcessExclude: types.RegExpEmpty,
EnableWorkerProcess: false,
CounterVersion: 0,
}
type Collector struct {
@@ -59,10 +63,9 @@ type Collector struct {
miSession *mi.Session
workerProcessMIQueryQuery mi.Query
collectorVersion int
collectorV1
collectorV2
perfDataCollector pdhtypes.Collector
perfDataObject []perfDataCounterValues
workerCh chan processWorkerRequest
lookupCache sync.Map
@@ -130,6 +133,11 @@ func NewWithFlags(app *kingpin.Application) *Collector {
"Enable IIS collectWorker process name queries. May cause the collector to leak memory.",
).Default(strconv.FormatBool(c.config.EnableWorkerProcess)).BoolVar(&c.config.EnableWorkerProcess)
app.Flag(
"collector.process.counter-version",
"Version of the process collector to use. 1 for Process V1, 2 for Process V2. Defaults to 0 which will use the latest version available.",
).Default(strconv.FormatUint(uint64(c.config.CounterVersion), 10)).Uint8Var(&c.config.CounterVersion)
app.Action(func(*kingpin.ParseContext) error {
var err error
@@ -157,8 +165,12 @@ func (c *Collector) Close() error {
c.mu.Lock()
defer c.mu.Unlock()
c.closeV1()
c.closeV2()
c.perfDataCollector.Close()
if c.workerCh != nil {
close(c.workerCh)
c.workerCh = nil
}
return nil
}
@@ -166,6 +178,9 @@ func (c *Collector) Close() error {
func (c *Collector) Build(logger *slog.Logger, miSession *mi.Session) error {
c.logger = logger.With(slog.String("collector", Name))
var err error
if c.config.EnableWorkerProcess {
if miSession == nil {
return errors.New("miSession is nil")
}
@@ -177,31 +192,33 @@ func (c *Collector) Build(logger *slog.Logger, miSession *mi.Session) error {
c.workerProcessMIQueryQuery = miQuery
c.miSession = miSession
}
c.collectorVersion = 2
c.perfDataCollectorV2, err = pdh.NewCollector[perfDataCounterValuesV2](pdh.CounterTypeRaw, "Process V2", pdh.InstancesAll)
switch c.config.CounterVersion {
case 2:
c.perfDataCollector, err = pdh.NewCollector[perfDataCounterValues](pdh.CounterTypeRaw, "Process V2", pdh.InstancesAll)
case 1:
c.perfDataCollector, err = registry.NewCollector[perfDataCounterValues]("Process", pdh.InstancesAll)
default:
c.perfDataCollector, err = pdh.NewCollector[perfDataCounterValues](pdh.CounterTypeRaw, "Process V2", pdh.InstancesAll)
c.config.CounterVersion = 2
if errors.Is(err, pdh.NewPdhError(pdh.CstatusNoObject)) {
c.collectorVersion = 1
c.perfDataCollectorV1, err = registry.NewCollector[perfDataCounterValuesV1]("Process", pdh.InstancesAll)
c.perfDataCollector, err = registry.NewCollector[perfDataCounterValues]("Process", pdh.InstancesAll)
c.config.CounterVersion = 1
}
c.logger.LogAttrs(context.Background(), slog.LevelDebug, fmt.Sprintf("Using process collector V%d", c.config.CounterVersion))
}
if err != nil {
return fmt.Errorf("failed to create Process collector: %w", err)
return fmt.Errorf("failed to create Process V%d collector: %w", c.config.CounterVersion, err)
}
if c.collectorVersion == 1 {
c.workerChV1 = make(chan processWorkerRequestV1, 32)
c.workerCh = make(chan processWorkerRequest, 32)
for range 4 {
go c.collectWorkerV1()
}
} else {
c.workerChV2 = make(chan processWorkerRequestV2, 32)
for range 4 {
go c.collectWorkerV2()
}
go c.collectWorker()
}
c.mu = sync.RWMutex{}
@@ -320,18 +337,7 @@ func (c *Collector) Build(logger *slog.Logger, miSession *mi.Session) error {
}
func (c *Collector) Collect(ch chan<- prometheus.Metric) error {
var workerProcesses []WorkerProcess
if c.config.EnableWorkerProcess {
if err := c.miSession.Query(&workerProcesses, mi.NamespaceRootWebAdministration, c.workerProcessMIQueryQuery); err != nil {
return fmt.Errorf("WMI query failed: %w", err)
}
}
if c.collectorVersion == 1 {
return c.collectV1(ch, workerProcesses)
}
return c.collectV2(ch, workerProcesses)
return c.collect(ch)
}
// ref: https://github.com/microsoft/hcsshim/blob/8beabacfc2d21767a07c20f8dd5f9f3932dbf305/internal/uvm/stats.go#L25

View File

@@ -1,294 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
//
// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build windows
package process
import (
"context"
"fmt"
"log/slog"
"runtime/debug"
"strconv"
"strings"
"sync"
"github.com/prometheus-community/windows_exporter/internal/pdh/registry"
"github.com/prometheus/client_golang/prometheus"
)
type collectorV1 struct {
perfDataCollectorV1 *registry.Collector
perfDataObjectV1 []perfDataCounterValuesV1
workerChV1 chan processWorkerRequestV1
}
type processWorkerRequestV1 struct {
ch chan<- prometheus.Metric
name string
performanceCounterValues perfDataCounterValuesV1
waitGroup *sync.WaitGroup
workerProcesses []WorkerProcess
}
func (c *Collector) closeV1() {
c.perfDataCollectorV1.Close()
if c.workerChV1 != nil {
close(c.workerChV1)
c.workerChV1 = nil
}
}
func (c *Collector) collectV1(ch chan<- prometheus.Metric, workerProcesses []WorkerProcess) error {
err := c.perfDataCollectorV1.Collect(&c.perfDataObjectV1)
if err != nil {
return fmt.Errorf("failed to collect metrics: %w", err)
}
wg := &sync.WaitGroup{}
for _, process := range c.perfDataObjectV1 {
// Duplicate processes are suffixed #, and an index number. Remove those.
name, _, _ := strings.Cut(process.Name, ":") // Process V1
if c.config.ProcessExclude.MatchString(name) || !c.config.ProcessInclude.MatchString(name) {
continue
}
wg.Add(1)
c.workerChV1 <- processWorkerRequestV1{
ch: ch,
name: name,
performanceCounterValues: process,
workerProcesses: workerProcesses,
waitGroup: wg,
}
}
wg.Wait()
return nil
}
func (c *Collector) collectWorkerV1() {
defer func() {
if r := recover(); r != nil {
c.logger.Error("Worker panic",
slog.Any("panic", r),
slog.String("stack", string(debug.Stack())),
)
// Restart the collectWorker
go c.collectWorkerV1()
}
}()
for req := range c.workerChV1 {
(func() {
defer req.waitGroup.Done()
ch := req.ch
name := req.name
data := req.performanceCounterValues
pid := uint64(data.IdProcess)
parentPID := strconv.FormatUint(uint64(data.CreatingProcessID), 10)
if c.config.EnableWorkerProcess {
for _, wp := range req.workerProcesses {
if wp.ProcessId == pid {
name = strings.Join([]string{name, wp.AppPoolName}, "_")
break
}
}
}
cmdLine, processOwner, processGroupID, err := c.getProcessInformation(uint32(pid))
if err != nil {
slog.LogAttrs(context.Background(), slog.LevelDebug, "Failed to get process information",
slog.Uint64("pid", pid),
slog.Any("err", err),
)
}
pidString := strconv.FormatUint(pid, 10)
ch <- prometheus.MustNewConstMetric(
c.info,
prometheus.GaugeValue,
1.0,
name, pidString, parentPID, strconv.Itoa(int(processGroupID)), processOwner, cmdLine,
)
ch <- prometheus.MustNewConstMetric(
c.startTime,
prometheus.GaugeValue,
data.ElapsedTime,
name, pidString,
)
ch <- prometheus.MustNewConstMetric(
c.startTimeOld,
prometheus.GaugeValue,
data.ElapsedTime,
name, pidString,
)
ch <- prometheus.MustNewConstMetric(
c.handleCount,
prometheus.GaugeValue,
data.HandleCount,
name, pidString,
)
ch <- prometheus.MustNewConstMetric(
c.cpuTimeTotal,
prometheus.CounterValue,
data.PercentPrivilegedTime,
name, pidString, "privileged",
)
ch <- prometheus.MustNewConstMetric(
c.cpuTimeTotal,
prometheus.CounterValue,
data.PercentUserTime,
name, pidString, "user",
)
ch <- prometheus.MustNewConstMetric(
c.ioBytesTotal,
prometheus.CounterValue,
data.IoOtherBytesPerSec,
name, pidString, "other",
)
ch <- prometheus.MustNewConstMetric(
c.ioOperationsTotal,
prometheus.CounterValue,
data.IoOtherOperationsPerSec,
name, pidString, "other",
)
ch <- prometheus.MustNewConstMetric(
c.ioBytesTotal,
prometheus.CounterValue,
data.IoReadBytesPerSec,
name, pidString, "read",
)
ch <- prometheus.MustNewConstMetric(
c.ioOperationsTotal,
prometheus.CounterValue,
data.IoReadOperationsPerSec,
name, pidString, "read",
)
ch <- prometheus.MustNewConstMetric(
c.ioBytesTotal,
prometheus.CounterValue,
data.IoWriteBytesPerSec,
name, pidString, "write",
)
ch <- prometheus.MustNewConstMetric(
c.ioOperationsTotal,
prometheus.CounterValue,
data.IoWriteOperationsPerSec,
name, pidString, "write",
)
ch <- prometheus.MustNewConstMetric(
c.pageFaultsTotal,
prometheus.CounterValue,
data.PageFaultsPerSec,
name, pidString,
)
ch <- prometheus.MustNewConstMetric(
c.pageFileBytes,
prometheus.GaugeValue,
data.PageFileBytes,
name, pidString,
)
ch <- prometheus.MustNewConstMetric(
c.poolBytes,
prometheus.GaugeValue,
data.PoolNonPagedBytes,
name, pidString, "nonpaged",
)
ch <- prometheus.MustNewConstMetric(
c.poolBytes,
prometheus.GaugeValue,
data.PoolPagedBytes,
name, pidString, "paged",
)
ch <- prometheus.MustNewConstMetric(
c.priorityBase,
prometheus.GaugeValue,
data.PriorityBase,
name, pidString,
)
ch <- prometheus.MustNewConstMetric(
c.privateBytes,
prometheus.GaugeValue,
data.PrivateBytes,
name, pidString,
)
ch <- prometheus.MustNewConstMetric(
c.threadCount,
prometheus.GaugeValue,
data.ThreadCount,
name, pidString,
)
ch <- prometheus.MustNewConstMetric(
c.virtualBytes,
prometheus.GaugeValue,
data.VirtualBytes,
name, pidString,
)
ch <- prometheus.MustNewConstMetric(
c.workingSetPrivate,
prometheus.GaugeValue,
data.WorkingSetPrivate,
name, pidString,
)
ch <- prometheus.MustNewConstMetric(
c.workingSetPeak,
prometheus.GaugeValue,
data.WorkingSetPeak,
name, pidString,
)
ch <- prometheus.MustNewConstMetric(
c.workingSet,
prometheus.GaugeValue,
data.WorkingSet,
name, pidString,
)
})()
}
}

View File

@@ -27,52 +27,57 @@ import (
"sync"
"time"
"github.com/prometheus-community/windows_exporter/internal/pdh"
"github.com/prometheus-community/windows_exporter/internal/mi"
"github.com/prometheus/client_golang/prometheus"
)
type collectorV2 struct {
perfDataCollectorV2 *pdh.Collector
perfDataObjectV2 []perfDataCounterValuesV2
workerChV2 chan processWorkerRequestV2
}
type processWorkerRequestV2 struct {
type processWorkerRequest struct {
ch chan<- prometheus.Metric
name string
performanceCounterValues perfDataCounterValuesV2
performanceCounterValues perfDataCounterValues
waitGroup *sync.WaitGroup
workerProcesses []WorkerProcess
}
func (c *Collector) closeV2() {
c.perfDataCollectorV2.Close()
if c.workerChV2 != nil {
close(c.workerChV2)
c.workerChV2 = nil
}
}
func (c *Collector) collectV2(ch chan<- prometheus.Metric, workerProcesses []WorkerProcess) error {
err := c.perfDataCollectorV2.Collect(&c.perfDataObjectV2)
func (c *Collector) collect(ch chan<- prometheus.Metric) error {
err := c.perfDataCollector.Collect(&c.perfDataObject)
if err != nil {
return fmt.Errorf("failed to collect metrics: %w", err)
}
var workerProcesses []WorkerProcess
if c.config.EnableWorkerProcess {
if err := c.miSession.Query(&workerProcesses, mi.NamespaceRootWebAdministration, c.workerProcessMIQueryQuery); err != nil {
return fmt.Errorf("WMI query failed: %w", err)
}
}
wg := &sync.WaitGroup{}
for _, process := range c.perfDataObjectV2 {
for _, process := range c.perfDataObject {
// Duplicate processes are suffixed #, and an index number. Remove those.
name, _, _ := strings.Cut(process.Name, ":") // Process V2
// Duplicate processes are suffixed #, and an index number. Remove those.
name, _, _ = strings.Cut(name, "#") // Process V1
if c.config.ProcessExclude.MatchString(name) || !c.config.ProcessInclude.MatchString(name) {
continue
}
if process.ProcessID == 0 && name != "Idle" {
c.logger.LogAttrs(context.Background(), slog.LevelDebug, "Skipping process with PID 0",
slog.String("name", name),
slog.String("process_name", process.Name),
slog.Any("process", fmt.Sprintf("%+v", process)),
)
continue
}
wg.Add(1)
c.workerChV2 <- processWorkerRequestV2{
c.workerCh <- processWorkerRequest{
ch: ch,
name: name,
performanceCounterValues: process,
@@ -86,7 +91,7 @@ func (c *Collector) collectV2(ch chan<- prometheus.Metric, workerProcesses []Wor
return nil
}
func (c *Collector) collectWorkerV2() {
func (c *Collector) collectWorker() {
defer func() {
if r := recover(); r != nil {
c.logger.Error("Worker panic",
@@ -95,11 +100,11 @@ func (c *Collector) collectWorkerV2() {
)
// Restart the collectWorker
go c.collectWorkerV2()
go c.collectWorker()
}
}()
for req := range c.workerChV2 {
for req := range c.workerCh {
(func() {
defer req.waitGroup.Done()

View File

@@ -22,7 +22,7 @@ type WorkerProcess struct {
ProcessId uint64 `mi:"ProcessId"`
}
type perfDataCounterValuesV1 struct {
type perfDataCounterValues struct {
Name string
PercentProcessorTime float64 `perfdata:"% Processor Time"`
@@ -52,38 +52,5 @@ type perfDataCounterValuesV1 struct {
WorkingSetPrivate float64 `perfdata:"Working Set - Private"`
WorkingSetPeak float64 `perfdata:"Working Set Peak"`
WorkingSet float64 `perfdata:"Working Set"`
IdProcess float64 `perfdata:"ID Process"`
}
type perfDataCounterValuesV2 struct {
Name string
PercentProcessorTime float64 `perfdata:"% Processor Time"`
PercentPrivilegedTime float64 `perfdata:"% Privileged Time"`
PercentUserTime float64 `perfdata:"% User Time"`
CreatingProcessID float64 `perfdata:"Creating Process ID"`
ElapsedTime float64 `perfdata:"Elapsed Time"`
HandleCount float64 `perfdata:"Handle Count"`
IoDataBytesPerSec float64 `perfdata:"IO Data Bytes/sec"`
IoDataOperationsPerSec float64 `perfdata:"IO Data Operations/sec"`
IoOtherBytesPerSec float64 `perfdata:"IO Other Bytes/sec"`
IoOtherOperationsPerSec float64 `perfdata:"IO Other Operations/sec"`
IoReadBytesPerSec float64 `perfdata:"IO Read Bytes/sec"`
IoReadOperationsPerSec float64 `perfdata:"IO Read Operations/sec"`
IoWriteBytesPerSec float64 `perfdata:"IO Write Bytes/sec"`
IoWriteOperationsPerSec float64 `perfdata:"IO Write Operations/sec"`
PageFaultsPerSec float64 `perfdata:"Page Faults/sec"`
PageFileBytesPeak float64 `perfdata:"Page File Bytes Peak"`
PageFileBytes float64 `perfdata:"Page File Bytes"`
PoolNonPagedBytes float64 `perfdata:"Pool Nonpaged Bytes"`
PoolPagedBytes float64 `perfdata:"Pool Paged Bytes"`
PriorityBase float64 `perfdata:"Priority Base"`
PrivateBytes float64 `perfdata:"Private Bytes"`
ThreadCount float64 `perfdata:"Thread Count"`
VirtualBytesPeak float64 `perfdata:"Virtual Bytes Peak"`
VirtualBytes float64 `perfdata:"Virtual Bytes"`
WorkingSetPrivate float64 `perfdata:"Working Set - Private"`
WorkingSetPeak float64 `perfdata:"Working Set Peak"`
WorkingSet float64 `perfdata:"Working Set"`
ProcessID float64 `perfdata:"Process ID"`
ProcessID float64 `perfdata:"Process ID" perfdata_v1:"ID Process"`
}

View File

@@ -63,10 +63,13 @@ func NewCollector[T any](object string, _ []string) (*Collector, error) {
}
for _, f := range reflect.VisibleFields(valueType) {
counterName, ok := f.Tag.Lookup("perfdata")
counterName, ok := f.Tag.Lookup("perfdata_v1")
if !ok {
counterName, ok = f.Tag.Lookup("perfdata")
if !ok {
continue
}
}
var counter Counter
if counter, ok = collector.counters[counterName]; !ok {