Add label user to process collector (#1472)

Co-authored-by: Jan-Otto Kröpke <github@jkroepke.de>
Co-authored-by: Jan-Otto Kröpke <mail@jkroepke.de>
This commit is contained in:
Andrey Burtasov
2024-06-17 12:05:52 +03:00
committed by GitHub
parent a4a5ac464a
commit 4efb502aab
2 changed files with 147 additions and 112 deletions

View File

@@ -3,10 +3,13 @@
package process
import (
"errors"
"fmt"
"golang.org/x/sys/windows"
"regexp"
"strconv"
"strings"
"syscall"
"github.com/alecthomas/kingpin/v2"
"github.com/go-kit/log"
@@ -23,18 +26,21 @@ const (
FlagProcessExclude = "collector.process.exclude"
FlagProcessInclude = "collector.process.include"
FlagEnableWorkerProcess = "collector.process.iis"
FlagEnableReportOwner = "collector.process.report-owner"
)
type Config struct {
ProcessInclude string `yaml:"process_include"`
ProcessExclude string `yaml:"process_exclude"`
EnableWorkerProcess bool `yaml:"enable_iis_worker_process"`
EnableReportOwner bool `yaml:"enable_report_owner"`
}
var ConfigDefaults = Config{
ProcessInclude: ".+",
ProcessExclude: "",
EnableWorkerProcess: false,
EnableReportOwner: false,
}
type collector struct {
@@ -44,6 +50,7 @@ type collector struct {
processExclude *string
enableWorkerProcess *bool
enableReportOwner *bool
StartTime *prometheus.Desc
CPUTimeTotal *prometheus.Desc
@@ -63,6 +70,8 @@ type collector struct {
processIncludePattern *regexp.Regexp
processExcludePattern *regexp.Regexp
lookupCache map[string]string
}
func New(logger log.Logger, config *Config) types.Collector {
@@ -95,6 +104,11 @@ func NewWithFlags(app *kingpin.Application) types.Collector {
FlagEnableWorkerProcess,
"Enable IIS worker process name queries. May cause the collector to leak memory.",
).Default("false").Bool(),
enableReportOwner: app.Flag(
FlagEnableReportOwner,
"Enable reporting of process owner.",
).Default("false").Bool(),
}
return c
}
@@ -116,97 +130,104 @@ func (c *collector) Build() error {
_ = level.Warn(c.logger).Log("msg", "No filters specified for process collector. This will generate a very large number of metrics!")
}
commonLabels := make([]string, 0)
if *c.enableReportOwner {
commonLabels = []string{"owner"}
}
c.StartTime = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "start_time"),
"Time of process start.",
[]string{"process", "process_id", "creating_process_id"},
append(commonLabels, "process", "process_id", "creating_process_id"),
nil,
)
c.CPUTimeTotal = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "cpu_time_total"),
"Returns elapsed time that all of the threads of this process used the processor to execute instructions by mode (privileged, user).",
[]string{"process", "process_id", "creating_process_id", "mode"},
append(commonLabels, "process", "process_id", "creating_process_id", "mode"),
nil,
)
c.HandleCount = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "handles"),
"Total number of handles the process has open. This number is the sum of the handles currently open by each thread in the process.",
[]string{"process", "process_id", "creating_process_id"},
append(commonLabels, "process", "process_id", "creating_process_id"),
nil,
)
c.IOBytesTotal = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "io_bytes_total"),
"Bytes issued to I/O operations in different modes (read, write, other).",
[]string{"process", "process_id", "creating_process_id", "mode"},
append(commonLabels, "process", "process_id", "creating_process_id", "mode"),
nil,
)
c.IOOperationsTotal = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "io_operations_total"),
"I/O operations issued in different modes (read, write, other).",
[]string{"process", "process_id", "creating_process_id", "mode"},
append(commonLabels, "process", "process_id", "creating_process_id", "mode"),
nil,
)
c.PageFaultsTotal = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "page_faults_total"),
"Page faults by the threads executing in this process.",
[]string{"process", "process_id", "creating_process_id"},
append(commonLabels, "process", "process_id", "creating_process_id"),
nil,
)
c.PageFileBytes = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "page_file_bytes"),
"Current number of bytes this process has used in the paging file(s).",
[]string{"process", "process_id", "creating_process_id"},
append(commonLabels, "process", "process_id", "creating_process_id"),
nil,
)
c.PoolBytes = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "pool_bytes"),
"Pool Bytes is the last observed number of bytes in the paged or nonpaged pool.",
[]string{"process", "process_id", "creating_process_id", "pool"},
append(commonLabels, "process", "process_id", "creating_process_id", "pool"),
nil,
)
c.PriorityBase = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "priority_base"),
"Current base priority of this process. Threads within a process can raise and lower their own base priority relative to the process base priority of the process.",
[]string{"process", "process_id", "creating_process_id"},
append(commonLabels, "process", "process_id", "creating_process_id"),
nil,
)
c.PrivateBytes = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "private_bytes"),
"Current number of bytes this process has allocated that cannot be shared with other processes.",
[]string{"process", "process_id", "creating_process_id"},
append(commonLabels, "process", "process_id", "creating_process_id"),
nil,
)
c.ThreadCount = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "threads"),
"Number of threads currently active in this process.",
[]string{"process", "process_id", "creating_process_id"},
append(commonLabels, "process", "process_id", "creating_process_id"),
nil,
)
c.VirtualBytes = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "virtual_bytes"),
"Current size, in bytes, of the virtual address space that the process is using.",
[]string{"process", "process_id", "creating_process_id"},
append(commonLabels, "process", "process_id", "creating_process_id"),
nil,
)
c.WorkingSetPrivate = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "working_set_private_bytes"),
"Size of the working set, in bytes, that is use for this process only and not shared nor shareable by other processes.",
[]string{"process", "process_id", "creating_process_id"},
append(commonLabels, "process", "process_id", "creating_process_id"),
nil,
)
c.WorkingSetPeak = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "working_set_peak_bytes"),
"Maximum size, in bytes, of the Working Set of this process at any point in time. The Working Set is the set of memory pages touched recently by the threads in the process.",
[]string{"process", "process_id", "creating_process_id"},
append(commonLabels, "process", "process_id", "creating_process_id"),
nil,
)
c.WorkingSet = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "working_set_bytes"),
"Maximum number of bytes in the working set of this process at any point in time. The working set is the set of memory pages touched recently by the threads in the process.",
[]string{"process", "process_id", "creating_process_id"},
append(commonLabels, "process", "process_id", "creating_process_id"),
nil,
)
c.lookupCache = make(map[string]string)
var err error
c.processIncludePattern, err = regexp.Compile(fmt.Sprintf("^(?:%s)$", *c.processInclude))
@@ -274,6 +295,8 @@ func (c *collector) Collect(ctx *types.ScrapeContext, ch chan<- prometheus.Metri
}
}
var owner string
for _, process := range data {
if process.Name == "_Total" ||
c.processExcludePattern.MatchString(process.Name) ||
@@ -294,205 +317,204 @@ func (c *collector) Collect(ctx *types.ScrapeContext, ch chan<- prometheus.Metri
}
}
labels := make([]string, 0, 4)
if *c.enableReportOwner {
owner, err = c.getProcessOwner(int(process.IDProcess))
if err != nil {
owner = "unknown"
}
labels = []string{owner}
}
labels = append(labels, processName, pid, cpid)
ch <- prometheus.MustNewConstMetric(
c.StartTime,
prometheus.GaugeValue,
process.ElapsedTime,
processName,
pid,
cpid,
labels...,
)
ch <- prometheus.MustNewConstMetric(
c.HandleCount,
prometheus.GaugeValue,
process.HandleCount,
processName,
pid,
cpid,
labels...,
)
ch <- prometheus.MustNewConstMetric(
c.CPUTimeTotal,
prometheus.CounterValue,
process.PercentPrivilegedTime,
processName,
pid,
cpid,
"privileged",
append(labels, "privileged")...,
)
ch <- prometheus.MustNewConstMetric(
c.CPUTimeTotal,
prometheus.CounterValue,
process.PercentUserTime,
processName,
pid,
cpid,
"user",
append(labels, "user")...,
)
ch <- prometheus.MustNewConstMetric(
c.IOBytesTotal,
prometheus.CounterValue,
process.IOOtherBytesPerSec,
processName,
pid,
cpid,
"other",
append(labels, "other")...,
)
ch <- prometheus.MustNewConstMetric(
c.IOOperationsTotal,
prometheus.CounterValue,
process.IOOtherOperationsPerSec,
processName,
pid,
cpid,
"other",
append(labels, "other")...,
)
ch <- prometheus.MustNewConstMetric(
c.IOBytesTotal,
prometheus.CounterValue,
process.IOReadBytesPerSec,
processName,
pid,
cpid,
"read",
append(labels, "read")...,
)
ch <- prometheus.MustNewConstMetric(
c.IOOperationsTotal,
prometheus.CounterValue,
process.IOReadOperationsPerSec,
processName,
pid,
cpid,
"read",
append(labels, "read")...,
)
ch <- prometheus.MustNewConstMetric(
c.IOBytesTotal,
prometheus.CounterValue,
process.IOWriteBytesPerSec,
processName,
pid,
cpid,
"write",
append(labels, "write")...,
)
ch <- prometheus.MustNewConstMetric(
c.IOOperationsTotal,
prometheus.CounterValue,
process.IOWriteOperationsPerSec,
processName,
pid,
cpid,
"write",
append(labels, "write")...,
)
ch <- prometheus.MustNewConstMetric(
c.PageFaultsTotal,
prometheus.CounterValue,
process.PageFaultsPerSec,
processName,
pid,
cpid,
labels...,
)
ch <- prometheus.MustNewConstMetric(
c.PageFileBytes,
prometheus.GaugeValue,
process.PageFileBytes,
processName,
pid,
cpid,
labels...,
)
ch <- prometheus.MustNewConstMetric(
c.PoolBytes,
prometheus.GaugeValue,
process.PoolNonpagedBytes,
processName,
pid,
cpid,
"nonpaged",
append(labels, "nonpaged")...,
)
ch <- prometheus.MustNewConstMetric(
c.PoolBytes,
prometheus.GaugeValue,
process.PoolPagedBytes,
processName,
pid,
cpid,
"paged",
append(labels, "paged")...,
)
ch <- prometheus.MustNewConstMetric(
c.PriorityBase,
prometheus.GaugeValue,
process.PriorityBase,
processName,
pid,
cpid,
labels...,
)
ch <- prometheus.MustNewConstMetric(
c.PrivateBytes,
prometheus.GaugeValue,
process.PrivateBytes,
processName,
pid,
cpid,
labels...,
)
ch <- prometheus.MustNewConstMetric(
c.ThreadCount,
prometheus.GaugeValue,
process.ThreadCount,
processName,
pid,
cpid,
labels...,
)
ch <- prometheus.MustNewConstMetric(
c.VirtualBytes,
prometheus.GaugeValue,
process.VirtualBytes,
processName,
pid,
cpid,
labels...,
)
ch <- prometheus.MustNewConstMetric(
c.WorkingSetPrivate,
prometheus.GaugeValue,
process.WorkingSetPrivate,
processName,
pid,
cpid,
labels...,
)
ch <- prometheus.MustNewConstMetric(
c.WorkingSetPeak,
prometheus.GaugeValue,
process.WorkingSetPeak,
processName,
pid,
cpid,
labels...,
)
ch <- prometheus.MustNewConstMetric(
c.WorkingSet,
prometheus.GaugeValue,
process.WorkingSet,
processName,
pid,
cpid,
labels...,
)
}
return nil
}
// ref: https://github.com/microsoft/hcsshim/blob/8beabacfc2d21767a07c20f8dd5f9f3932dbf305/internal/uvm/stats.go#L25
func (c *collector) getProcessOwner(pid int) (string, error) {
p, err := windows.OpenProcess(windows.PROCESS_QUERY_LIMITED_INFORMATION, false, uint32(pid))
if errors.Is(err, syscall.Errno(0x57)) { // invalid parameter, for PIDs that don't exist
return "", errors.New("process not found")
}
if err != nil {
return "", fmt.Errorf("OpenProcess: %T %w", err, err)
}
defer windows.Close(p)
var tok windows.Token
if err = windows.OpenProcessToken(p, windows.TOKEN_QUERY, &tok); err != nil {
return "", fmt.Errorf("OpenProcessToken: %w", err)
}
tokenUser, err := tok.GetTokenUser()
if err != nil {
return "", fmt.Errorf("GetTokenUser: %w", err)
}
sid := tokenUser.User.Sid.String()
if owner, ok := c.lookupCache[sid]; ok {
return owner, nil
}
account, domain, _, err := tokenUser.User.Sid.LookupAccount("")
if err != nil {
c.lookupCache[sid] = sid
} else {
c.lookupCache[sid] = fmt.Sprintf(`%s\%s`, account, domain)
}
return c.lookupCache[sid], nil
}