process: add collector.process.counter-version CLI parameter (#2064)

This commit is contained in:
Jan-Otto Kröpke
2025-05-31 08:46:30 +02:00
committed by GitHub
parent 298d820bd6
commit e673f192d2
7 changed files with 93 additions and 400 deletions

1
.idea/go.imports.xml generated
View File

@@ -4,6 +4,7 @@
<option name="excludedPackages"> <option name="excludedPackages">
<array> <array>
<option value="github.com/pkg/errors" /> <option value="github.com/pkg/errors" />
<option value="golang.org/x/net/context" />
</array> </array>
</option> </option>
</component> </component>

View File

@@ -37,6 +37,11 @@ Enables IIS process name queries. IIS process names are combined with their app
Disabled by default, and can be enabled with `--collector.process.iis`. NOTE: Just plain parameter without `true`. Disabled by default, and can be enabled with `--collector.process.iis`. NOTE: Just plain parameter without `true`.
### `--collector.process.counter-version`
Version of the process collector to use. 1 for Process V1, 2 for Process V2.
Defaults to 0 which will use the latest version available.
### Example ### Example
To match all firefox processes: `--collector.process.include="firefox.*"`. To match all firefox processes: `--collector.process.include="firefox.*"`.

View File

@@ -18,6 +18,7 @@
package process package process
import ( import (
"context"
"errors" "errors"
"fmt" "fmt"
"log/slog" "log/slog"
@@ -31,6 +32,7 @@ import (
"github.com/prometheus-community/windows_exporter/internal/mi" "github.com/prometheus-community/windows_exporter/internal/mi"
"github.com/prometheus-community/windows_exporter/internal/pdh" "github.com/prometheus-community/windows_exporter/internal/pdh"
"github.com/prometheus-community/windows_exporter/internal/pdh/registry" "github.com/prometheus-community/windows_exporter/internal/pdh/registry"
pdhtypes "github.com/prometheus-community/windows_exporter/internal/pdh/types"
"github.com/prometheus-community/windows_exporter/internal/types" "github.com/prometheus-community/windows_exporter/internal/types"
"github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus"
"golang.org/x/sys/windows" "golang.org/x/sys/windows"
@@ -42,6 +44,7 @@ type Config struct {
ProcessInclude *regexp.Regexp `yaml:"include"` ProcessInclude *regexp.Regexp `yaml:"include"`
ProcessExclude *regexp.Regexp `yaml:"exclude"` ProcessExclude *regexp.Regexp `yaml:"exclude"`
EnableWorkerProcess bool `yaml:"iis"` EnableWorkerProcess bool `yaml:"iis"`
CounterVersion uint8 `yaml:"counter-version"`
} }
//nolint:gochecknoglobals //nolint:gochecknoglobals
@@ -49,6 +52,7 @@ var ConfigDefaults = Config{
ProcessInclude: types.RegExpAny, ProcessInclude: types.RegExpAny,
ProcessExclude: types.RegExpEmpty, ProcessExclude: types.RegExpEmpty,
EnableWorkerProcess: false, EnableWorkerProcess: false,
CounterVersion: 0,
} }
type Collector struct { type Collector struct {
@@ -59,10 +63,9 @@ type Collector struct {
miSession *mi.Session miSession *mi.Session
workerProcessMIQueryQuery mi.Query workerProcessMIQueryQuery mi.Query
collectorVersion int perfDataCollector pdhtypes.Collector
perfDataObject []perfDataCounterValues
collectorV1 workerCh chan processWorkerRequest
collectorV2
lookupCache sync.Map lookupCache sync.Map
@@ -130,6 +133,11 @@ func NewWithFlags(app *kingpin.Application) *Collector {
"Enable IIS collectWorker process name queries. May cause the collector to leak memory.", "Enable IIS collectWorker process name queries. May cause the collector to leak memory.",
).Default(strconv.FormatBool(c.config.EnableWorkerProcess)).BoolVar(&c.config.EnableWorkerProcess) ).Default(strconv.FormatBool(c.config.EnableWorkerProcess)).BoolVar(&c.config.EnableWorkerProcess)
app.Flag(
"collector.process.counter-version",
"Version of the process collector to use. 1 for Process V1, 2 for Process V2. Defaults to 0 which will use the latest version available.",
).Default(strconv.FormatUint(uint64(c.config.CounterVersion), 10)).Uint8Var(&c.config.CounterVersion)
app.Action(func(*kingpin.ParseContext) error { app.Action(func(*kingpin.ParseContext) error {
var err error var err error
@@ -157,8 +165,12 @@ func (c *Collector) Close() error {
c.mu.Lock() c.mu.Lock()
defer c.mu.Unlock() defer c.mu.Unlock()
c.closeV1() c.perfDataCollector.Close()
c.closeV2()
if c.workerCh != nil {
close(c.workerCh)
c.workerCh = nil
}
return nil return nil
} }
@@ -166,42 +178,47 @@ func (c *Collector) Close() error {
func (c *Collector) Build(logger *slog.Logger, miSession *mi.Session) error { func (c *Collector) Build(logger *slog.Logger, miSession *mi.Session) error {
c.logger = logger.With(slog.String("collector", Name)) c.logger = logger.With(slog.String("collector", Name))
if miSession == nil { var err error
return errors.New("miSession is nil")
if c.config.EnableWorkerProcess {
if miSession == nil {
return errors.New("miSession is nil")
}
miQuery, err := mi.NewQuery("SELECT AppPoolName, ProcessId FROM WorkerProcess")
if err != nil {
return fmt.Errorf("failed to create WMI query: %w", err)
}
c.workerProcessMIQueryQuery = miQuery
c.miSession = miSession
} }
miQuery, err := mi.NewQuery("SELECT AppPoolName, ProcessId FROM WorkerProcess") switch c.config.CounterVersion {
if err != nil { case 2:
return fmt.Errorf("failed to create WMI query: %w", err) c.perfDataCollector, err = pdh.NewCollector[perfDataCounterValues](pdh.CounterTypeRaw, "Process V2", pdh.InstancesAll)
} case 1:
c.perfDataCollector, err = registry.NewCollector[perfDataCounterValues]("Process", pdh.InstancesAll)
default:
c.perfDataCollector, err = pdh.NewCollector[perfDataCounterValues](pdh.CounterTypeRaw, "Process V2", pdh.InstancesAll)
c.config.CounterVersion = 2
c.workerProcessMIQueryQuery = miQuery if errors.Is(err, pdh.NewPdhError(pdh.CstatusNoObject)) {
c.miSession = miSession c.perfDataCollector, err = registry.NewCollector[perfDataCounterValues]("Process", pdh.InstancesAll)
c.config.CounterVersion = 1
}
c.collectorVersion = 2 c.logger.LogAttrs(context.Background(), slog.LevelDebug, fmt.Sprintf("Using process collector V%d", c.config.CounterVersion))
c.perfDataCollectorV2, err = pdh.NewCollector[perfDataCounterValuesV2](pdh.CounterTypeRaw, "Process V2", pdh.InstancesAll)
if errors.Is(err, pdh.NewPdhError(pdh.CstatusNoObject)) {
c.collectorVersion = 1
c.perfDataCollectorV1, err = registry.NewCollector[perfDataCounterValuesV1]("Process", pdh.InstancesAll)
} }
if err != nil { if err != nil {
return fmt.Errorf("failed to create Process collector: %w", err) return fmt.Errorf("failed to create Process V%d collector: %w", c.config.CounterVersion, err)
} }
if c.collectorVersion == 1 { c.workerCh = make(chan processWorkerRequest, 32)
c.workerChV1 = make(chan processWorkerRequestV1, 32)
for range 4 { for range 4 {
go c.collectWorkerV1() go c.collectWorker()
}
} else {
c.workerChV2 = make(chan processWorkerRequestV2, 32)
for range 4 {
go c.collectWorkerV2()
}
} }
c.mu = sync.RWMutex{} c.mu = sync.RWMutex{}
@@ -320,18 +337,7 @@ func (c *Collector) Build(logger *slog.Logger, miSession *mi.Session) error {
} }
func (c *Collector) Collect(ch chan<- prometheus.Metric) error { func (c *Collector) Collect(ch chan<- prometheus.Metric) error {
var workerProcesses []WorkerProcess return c.collect(ch)
if c.config.EnableWorkerProcess {
if err := c.miSession.Query(&workerProcesses, mi.NamespaceRootWebAdministration, c.workerProcessMIQueryQuery); err != nil {
return fmt.Errorf("WMI query failed: %w", err)
}
}
if c.collectorVersion == 1 {
return c.collectV1(ch, workerProcesses)
}
return c.collectV2(ch, workerProcesses)
} }
// ref: https://github.com/microsoft/hcsshim/blob/8beabacfc2d21767a07c20f8dd5f9f3932dbf305/internal/uvm/stats.go#L25 // ref: https://github.com/microsoft/hcsshim/blob/8beabacfc2d21767a07c20f8dd5f9f3932dbf305/internal/uvm/stats.go#L25

View File

@@ -1,294 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
//
// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build windows
package process
import (
"context"
"fmt"
"log/slog"
"runtime/debug"
"strconv"
"strings"
"sync"
"github.com/prometheus-community/windows_exporter/internal/pdh/registry"
"github.com/prometheus/client_golang/prometheus"
)
type collectorV1 struct {
perfDataCollectorV1 *registry.Collector
perfDataObjectV1 []perfDataCounterValuesV1
workerChV1 chan processWorkerRequestV1
}
type processWorkerRequestV1 struct {
ch chan<- prometheus.Metric
name string
performanceCounterValues perfDataCounterValuesV1
waitGroup *sync.WaitGroup
workerProcesses []WorkerProcess
}
func (c *Collector) closeV1() {
c.perfDataCollectorV1.Close()
if c.workerChV1 != nil {
close(c.workerChV1)
c.workerChV1 = nil
}
}
func (c *Collector) collectV1(ch chan<- prometheus.Metric, workerProcesses []WorkerProcess) error {
err := c.perfDataCollectorV1.Collect(&c.perfDataObjectV1)
if err != nil {
return fmt.Errorf("failed to collect metrics: %w", err)
}
wg := &sync.WaitGroup{}
for _, process := range c.perfDataObjectV1 {
// Duplicate processes are suffixed #, and an index number. Remove those.
name, _, _ := strings.Cut(process.Name, ":") // Process V1
if c.config.ProcessExclude.MatchString(name) || !c.config.ProcessInclude.MatchString(name) {
continue
}
wg.Add(1)
c.workerChV1 <- processWorkerRequestV1{
ch: ch,
name: name,
performanceCounterValues: process,
workerProcesses: workerProcesses,
waitGroup: wg,
}
}
wg.Wait()
return nil
}
func (c *Collector) collectWorkerV1() {
defer func() {
if r := recover(); r != nil {
c.logger.Error("Worker panic",
slog.Any("panic", r),
slog.String("stack", string(debug.Stack())),
)
// Restart the collectWorker
go c.collectWorkerV1()
}
}()
for req := range c.workerChV1 {
(func() {
defer req.waitGroup.Done()
ch := req.ch
name := req.name
data := req.performanceCounterValues
pid := uint64(data.IdProcess)
parentPID := strconv.FormatUint(uint64(data.CreatingProcessID), 10)
if c.config.EnableWorkerProcess {
for _, wp := range req.workerProcesses {
if wp.ProcessId == pid {
name = strings.Join([]string{name, wp.AppPoolName}, "_")
break
}
}
}
cmdLine, processOwner, processGroupID, err := c.getProcessInformation(uint32(pid))
if err != nil {
slog.LogAttrs(context.Background(), slog.LevelDebug, "Failed to get process information",
slog.Uint64("pid", pid),
slog.Any("err", err),
)
}
pidString := strconv.FormatUint(pid, 10)
ch <- prometheus.MustNewConstMetric(
c.info,
prometheus.GaugeValue,
1.0,
name, pidString, parentPID, strconv.Itoa(int(processGroupID)), processOwner, cmdLine,
)
ch <- prometheus.MustNewConstMetric(
c.startTime,
prometheus.GaugeValue,
data.ElapsedTime,
name, pidString,
)
ch <- prometheus.MustNewConstMetric(
c.startTimeOld,
prometheus.GaugeValue,
data.ElapsedTime,
name, pidString,
)
ch <- prometheus.MustNewConstMetric(
c.handleCount,
prometheus.GaugeValue,
data.HandleCount,
name, pidString,
)
ch <- prometheus.MustNewConstMetric(
c.cpuTimeTotal,
prometheus.CounterValue,
data.PercentPrivilegedTime,
name, pidString, "privileged",
)
ch <- prometheus.MustNewConstMetric(
c.cpuTimeTotal,
prometheus.CounterValue,
data.PercentUserTime,
name, pidString, "user",
)
ch <- prometheus.MustNewConstMetric(
c.ioBytesTotal,
prometheus.CounterValue,
data.IoOtherBytesPerSec,
name, pidString, "other",
)
ch <- prometheus.MustNewConstMetric(
c.ioOperationsTotal,
prometheus.CounterValue,
data.IoOtherOperationsPerSec,
name, pidString, "other",
)
ch <- prometheus.MustNewConstMetric(
c.ioBytesTotal,
prometheus.CounterValue,
data.IoReadBytesPerSec,
name, pidString, "read",
)
ch <- prometheus.MustNewConstMetric(
c.ioOperationsTotal,
prometheus.CounterValue,
data.IoReadOperationsPerSec,
name, pidString, "read",
)
ch <- prometheus.MustNewConstMetric(
c.ioBytesTotal,
prometheus.CounterValue,
data.IoWriteBytesPerSec,
name, pidString, "write",
)
ch <- prometheus.MustNewConstMetric(
c.ioOperationsTotal,
prometheus.CounterValue,
data.IoWriteOperationsPerSec,
name, pidString, "write",
)
ch <- prometheus.MustNewConstMetric(
c.pageFaultsTotal,
prometheus.CounterValue,
data.PageFaultsPerSec,
name, pidString,
)
ch <- prometheus.MustNewConstMetric(
c.pageFileBytes,
prometheus.GaugeValue,
data.PageFileBytes,
name, pidString,
)
ch <- prometheus.MustNewConstMetric(
c.poolBytes,
prometheus.GaugeValue,
data.PoolNonPagedBytes,
name, pidString, "nonpaged",
)
ch <- prometheus.MustNewConstMetric(
c.poolBytes,
prometheus.GaugeValue,
data.PoolPagedBytes,
name, pidString, "paged",
)
ch <- prometheus.MustNewConstMetric(
c.priorityBase,
prometheus.GaugeValue,
data.PriorityBase,
name, pidString,
)
ch <- prometheus.MustNewConstMetric(
c.privateBytes,
prometheus.GaugeValue,
data.PrivateBytes,
name, pidString,
)
ch <- prometheus.MustNewConstMetric(
c.threadCount,
prometheus.GaugeValue,
data.ThreadCount,
name, pidString,
)
ch <- prometheus.MustNewConstMetric(
c.virtualBytes,
prometheus.GaugeValue,
data.VirtualBytes,
name, pidString,
)
ch <- prometheus.MustNewConstMetric(
c.workingSetPrivate,
prometheus.GaugeValue,
data.WorkingSetPrivate,
name, pidString,
)
ch <- prometheus.MustNewConstMetric(
c.workingSetPeak,
prometheus.GaugeValue,
data.WorkingSetPeak,
name, pidString,
)
ch <- prometheus.MustNewConstMetric(
c.workingSet,
prometheus.GaugeValue,
data.WorkingSet,
name, pidString,
)
})()
}
}

View File

@@ -27,52 +27,57 @@ import (
"sync" "sync"
"time" "time"
"github.com/prometheus-community/windows_exporter/internal/pdh" "github.com/prometheus-community/windows_exporter/internal/mi"
"github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus"
) )
type collectorV2 struct { type processWorkerRequest struct {
perfDataCollectorV2 *pdh.Collector
perfDataObjectV2 []perfDataCounterValuesV2
workerChV2 chan processWorkerRequestV2
}
type processWorkerRequestV2 struct {
ch chan<- prometheus.Metric ch chan<- prometheus.Metric
name string name string
performanceCounterValues perfDataCounterValuesV2 performanceCounterValues perfDataCounterValues
waitGroup *sync.WaitGroup waitGroup *sync.WaitGroup
workerProcesses []WorkerProcess workerProcesses []WorkerProcess
} }
func (c *Collector) closeV2() { func (c *Collector) collect(ch chan<- prometheus.Metric) error {
c.perfDataCollectorV2.Close() err := c.perfDataCollector.Collect(&c.perfDataObject)
if c.workerChV2 != nil {
close(c.workerChV2)
c.workerChV2 = nil
}
}
func (c *Collector) collectV2(ch chan<- prometheus.Metric, workerProcesses []WorkerProcess) error {
err := c.perfDataCollectorV2.Collect(&c.perfDataObjectV2)
if err != nil { if err != nil {
return fmt.Errorf("failed to collect metrics: %w", err) return fmt.Errorf("failed to collect metrics: %w", err)
} }
var workerProcesses []WorkerProcess
if c.config.EnableWorkerProcess {
if err := c.miSession.Query(&workerProcesses, mi.NamespaceRootWebAdministration, c.workerProcessMIQueryQuery); err != nil {
return fmt.Errorf("WMI query failed: %w", err)
}
}
wg := &sync.WaitGroup{} wg := &sync.WaitGroup{}
for _, process := range c.perfDataObjectV2 { for _, process := range c.perfDataObject {
// Duplicate processes are suffixed #, and an index number. Remove those. // Duplicate processes are suffixed #, and an index number. Remove those.
name, _, _ := strings.Cut(process.Name, ":") // Process V2 name, _, _ := strings.Cut(process.Name, ":") // Process V2
// Duplicate processes are suffixed #, and an index number. Remove those.
name, _, _ = strings.Cut(name, "#") // Process V1
if c.config.ProcessExclude.MatchString(name) || !c.config.ProcessInclude.MatchString(name) { if c.config.ProcessExclude.MatchString(name) || !c.config.ProcessInclude.MatchString(name) {
continue continue
} }
if process.ProcessID == 0 && name != "Idle" {
c.logger.LogAttrs(context.Background(), slog.LevelDebug, "Skipping process with PID 0",
slog.String("name", name),
slog.String("process_name", process.Name),
slog.Any("process", fmt.Sprintf("%+v", process)),
)
continue
}
wg.Add(1) wg.Add(1)
c.workerChV2 <- processWorkerRequestV2{ c.workerCh <- processWorkerRequest{
ch: ch, ch: ch,
name: name, name: name,
performanceCounterValues: process, performanceCounterValues: process,
@@ -86,7 +91,7 @@ func (c *Collector) collectV2(ch chan<- prometheus.Metric, workerProcesses []Wor
return nil return nil
} }
func (c *Collector) collectWorkerV2() { func (c *Collector) collectWorker() {
defer func() { defer func() {
if r := recover(); r != nil { if r := recover(); r != nil {
c.logger.Error("Worker panic", c.logger.Error("Worker panic",
@@ -95,11 +100,11 @@ func (c *Collector) collectWorkerV2() {
) )
// Restart the collectWorker // Restart the collectWorker
go c.collectWorkerV2() go c.collectWorker()
} }
}() }()
for req := range c.workerChV2 { for req := range c.workerCh {
(func() { (func() {
defer req.waitGroup.Done() defer req.waitGroup.Done()

View File

@@ -22,7 +22,7 @@ type WorkerProcess struct {
ProcessId uint64 `mi:"ProcessId"` ProcessId uint64 `mi:"ProcessId"`
} }
type perfDataCounterValuesV1 struct { type perfDataCounterValues struct {
Name string Name string
PercentProcessorTime float64 `perfdata:"% Processor Time"` PercentProcessorTime float64 `perfdata:"% Processor Time"`
@@ -52,38 +52,5 @@ type perfDataCounterValuesV1 struct {
WorkingSetPrivate float64 `perfdata:"Working Set - Private"` WorkingSetPrivate float64 `perfdata:"Working Set - Private"`
WorkingSetPeak float64 `perfdata:"Working Set Peak"` WorkingSetPeak float64 `perfdata:"Working Set Peak"`
WorkingSet float64 `perfdata:"Working Set"` WorkingSet float64 `perfdata:"Working Set"`
IdProcess float64 `perfdata:"ID Process"` ProcessID float64 `perfdata:"Process ID" perfdata_v1:"ID Process"`
}
type perfDataCounterValuesV2 struct {
Name string
PercentProcessorTime float64 `perfdata:"% Processor Time"`
PercentPrivilegedTime float64 `perfdata:"% Privileged Time"`
PercentUserTime float64 `perfdata:"% User Time"`
CreatingProcessID float64 `perfdata:"Creating Process ID"`
ElapsedTime float64 `perfdata:"Elapsed Time"`
HandleCount float64 `perfdata:"Handle Count"`
IoDataBytesPerSec float64 `perfdata:"IO Data Bytes/sec"`
IoDataOperationsPerSec float64 `perfdata:"IO Data Operations/sec"`
IoOtherBytesPerSec float64 `perfdata:"IO Other Bytes/sec"`
IoOtherOperationsPerSec float64 `perfdata:"IO Other Operations/sec"`
IoReadBytesPerSec float64 `perfdata:"IO Read Bytes/sec"`
IoReadOperationsPerSec float64 `perfdata:"IO Read Operations/sec"`
IoWriteBytesPerSec float64 `perfdata:"IO Write Bytes/sec"`
IoWriteOperationsPerSec float64 `perfdata:"IO Write Operations/sec"`
PageFaultsPerSec float64 `perfdata:"Page Faults/sec"`
PageFileBytesPeak float64 `perfdata:"Page File Bytes Peak"`
PageFileBytes float64 `perfdata:"Page File Bytes"`
PoolNonPagedBytes float64 `perfdata:"Pool Nonpaged Bytes"`
PoolPagedBytes float64 `perfdata:"Pool Paged Bytes"`
PriorityBase float64 `perfdata:"Priority Base"`
PrivateBytes float64 `perfdata:"Private Bytes"`
ThreadCount float64 `perfdata:"Thread Count"`
VirtualBytesPeak float64 `perfdata:"Virtual Bytes Peak"`
VirtualBytes float64 `perfdata:"Virtual Bytes"`
WorkingSetPrivate float64 `perfdata:"Working Set - Private"`
WorkingSetPeak float64 `perfdata:"Working Set Peak"`
WorkingSet float64 `perfdata:"Working Set"`
ProcessID float64 `perfdata:"Process ID"`
} }

View File

@@ -63,9 +63,12 @@ func NewCollector[T any](object string, _ []string) (*Collector, error) {
} }
for _, f := range reflect.VisibleFields(valueType) { for _, f := range reflect.VisibleFields(valueType) {
counterName, ok := f.Tag.Lookup("perfdata") counterName, ok := f.Tag.Lookup("perfdata_v1")
if !ok { if !ok {
continue counterName, ok = f.Tag.Lookup("perfdata")
if !ok {
continue
}
} }
var counter Counter var counter Counter