Service V2 collector (#1497)

This commit is contained in:
Jan-Otto Kröpke
2024-06-14 08:59:11 +02:00
committed by GitHub
parent 1b438cdb82
commit a4a5ac464a
2 changed files with 188 additions and 20 deletions

View File

@@ -3,10 +3,12 @@
package service
import (
"errors"
"fmt"
"strconv"
"strings"
"syscall"
"unsafe"
"github.com/alecthomas/kingpin/v2"
"github.com/go-kit/log"
@@ -23,16 +25,19 @@ const (
Name = "service"
FlagServiceWhereClause = "collector.service.services-where"
FlagServiceUseAPI = "collector.service.use-api"
FlagServiceCollectorV2 = "collector.service.v2"
)
type Config struct {
ServiceWhereClause string `yaml:"service_where_clause"`
UseAPI bool `yaml:"use_api"`
V2 bool `yaml:"v2"`
}
var ConfigDefaults = Config{
ServiceWhereClause: "",
UseAPI: false,
V2: false,
}
// A collector is a Prometheus collector for WMI Win32_Service metrics
@@ -41,13 +46,13 @@ type collector struct {
serviceWhereClause *string
useAPI *bool
v2 *bool
Information *prometheus.Desc
State *prometheus.Desc
StartMode *prometheus.Desc
Status *prometheus.Desc
queryWhereClause string
StateV2 *prometheus.Desc
}
func New(logger log.Logger, config *Config) types.Collector {
@@ -73,6 +78,10 @@ func NewWithFlags(app *kingpin.Application) types.Collector {
FlagServiceUseAPI,
"Use API calls to collect service data instead of WMI. Flag 'collector.service.services-where' won't be effective.",
).Default(strconv.FormatBool(ConfigDefaults.UseAPI)).Bool(),
v2: app.Flag(
FlagServiceCollectorV2,
"Enable V2 service collector. This collector can services state much more efficiently, can't provide general service information.",
).Default(strconv.FormatBool(ConfigDefaults.V2)).Bool(),
}
}
@@ -120,25 +129,37 @@ func (c *collector) Build() error {
[]string{"name", "status"},
nil,
)
c.queryWhereClause = *c.serviceWhereClause
c.StateV2 = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "state"),
"The state of the service (State)",
[]string{"name", "display_name", "status"},
nil,
)
return nil
}
// Collect sends the metric values for each metric
// to the provided prometheus Metric channel.
func (c *collector) Collect(_ *types.ScrapeContext, ch chan<- prometheus.Metric) error {
if *c.useAPI {
if err := c.collectAPI(ch); err != nil {
var err error
switch {
case *c.useAPI:
if err = c.collectAPI(ch); err != nil {
_ = level.Error(c.logger).Log("msg", "failed collecting API service metrics:", "err", err)
return err
}
} else {
if err := c.collectWMI(ch); err != nil {
case *c.v2:
if err = c.collectAPIV2(ch); err != nil {
_ = level.Error(c.logger).Log("msg", "failed collecting API service metrics:", "err", err)
}
default:
if err = c.collectWMI(ch); err != nil {
_ = level.Error(c.logger).Log("msg", "failed collecting WMI service metrics:", "err", err)
return err
}
}
return nil
return err
}
// Win32_Service docs:
@@ -164,7 +185,7 @@ var (
"paused",
"unknown",
}
apiStateValues = map[uint]string{
apiStateValues = map[uint32]string{
windows.SERVICE_CONTINUE_PENDING: "continue pending",
windows.SERVICE_PAUSE_PENDING: "pause pending",
windows.SERVICE_PAUSED: "paused",
@@ -205,7 +226,7 @@ var (
func (c *collector) collectWMI(ch chan<- prometheus.Metric) error {
var dst []Win32_Service
q := wmi.QueryAllWhere(&dst, c.queryWhereClause, c.logger)
q := wmi.QueryAllWhere(&dst, *c.serviceWhereClause, c.logger)
if err := wmi.Query(q, &dst); err != nil {
return err
}
@@ -333,7 +354,7 @@ func (c *collector) collectAPI(ch chan<- prometheus.Metric) error {
for _, state := range apiStateValues {
isCurrentState := 0.0
if state == apiStateValues[uint(serviceStatus.State)] {
if state == apiStateValues[uint32(serviceStatus.State)] {
isCurrentState = 1.0
}
ch <- prometheus.MustNewConstMetric(
@@ -362,3 +383,92 @@ func (c *collector) collectAPI(ch chan<- prometheus.Metric) error {
}
return nil
}
func (c *collector) collectAPIV2(ch chan<- prometheus.Metric) error {
services, err := c.queryAllServiceStates()
if err != nil {
_ = level.Warn(c.logger).Log("msg", "Failed to query services", "err", err)
return err
}
if services == nil {
_ = level.Warn(c.logger).Log("msg", "No services queried")
return nil
}
var isCurrentState float64
for _, svc := range services {
for state, stateValue := range apiStateValues {
isCurrentState = 0.0
if state == svc.ServiceStatusProcess.CurrentState {
isCurrentState = 1.0
}
ch <- prometheus.MustNewConstMetric(
c.StateV2,
prometheus.GaugeValue,
isCurrentState,
windows.UTF16PtrToString(svc.ServiceName),
windows.UTF16PtrToString(svc.DisplayName),
stateValue,
)
}
}
return nil
}
// queryAllServiceStates returns all service states of the current Windows system
// This is realized by ask Service Manager directly.
//
// Unless explicitly stated otherwise all files in this repository are licensed
// under the Apache License Version 2.0.
// This product includes software developed at Datadog (https://www.datadoghq.com/).
// Copyright 2016-present Datadog, Inc.
//
// Source: https://github.com/DataDog/datadog-agent/blob/afbd8b6c87939c92610c654cb07fdfd439e4fb27/pkg/util/winutil/scmmonitor.go#L61-L96
func (c *collector) queryAllServiceStates() ([]windows.ENUM_SERVICE_STATUS_PROCESS, error) {
// EnumServiceStatusEx requires only SC_MANAGER_ENUM_SERVICE.
h, err := windows.OpenSCManager(nil, nil, windows.SC_MANAGER_ENUMERATE_SERVICE)
if err != nil {
return nil, fmt.Errorf("failed to open scm: %w", err)
}
m := &mgr.Mgr{Handle: h}
defer func() {
if err := m.Disconnect(); err != nil {
_ = level.Warn(c.logger).Log("msg", "Failed to disconnect from scm", "err", err)
}
}()
var bytesNeeded, servicesReturned uint32
var buf []byte
for {
var p *byte
if len(buf) > 0 {
p = &buf[0]
}
err = windows.EnumServicesStatusEx(m.Handle, windows.SC_ENUM_PROCESS_INFO,
windows.SERVICE_WIN32, windows.SERVICE_STATE_ALL,
p, uint32(len(buf)), &bytesNeeded, &servicesReturned, nil, nil)
if err == nil {
break
}
if !errors.Is(err, windows.ERROR_MORE_DATA) {
return nil, fmt.Errorf("failed to enum services %w", err)
}
if bytesNeeded <= uint32(len(buf)) {
return nil, err
}
buf = make([]byte, bytesNeeded)
}
if servicesReturned == 0 {
return nil, nil
}
services := unsafe.Slice((*windows.ENUM_SERVICE_STATUS_PROCESS)(unsafe.Pointer(&buf[0])), servicesReturned)
return services, nil
}