gpu: add info metric about devices (#2070)

This commit is contained in:
Jan-Otto Kröpke
2025-06-04 22:49:59 +02:00
committed by GitHub
parent dcf85032ca
commit 55c877f536
7 changed files with 299 additions and 15 deletions

View File

@@ -21,8 +21,10 @@ import (
"errors"
"fmt"
"log/slog"
"strconv"
"github.com/alecthomas/kingpin/v2"
"github.com/prometheus-community/windows_exporter/internal/headers/setupapi"
"github.com/prometheus-community/windows_exporter/internal/mi"
"github.com/prometheus-community/windows_exporter/internal/pdh"
"github.com/prometheus-community/windows_exporter/internal/types"
@@ -43,6 +45,7 @@ type Collector struct {
gpuEnginePerfDataCollector *pdh.Collector
gpuEnginePerfDataObject []gpuEnginePerfDataCounterValues
gpuInfo *prometheus.Desc
gpuEngineRunningTime *prometheus.Desc
// GPU Adapter Memory
@@ -109,6 +112,13 @@ func (c *Collector) Close() error {
func (c *Collector) Build(_ *slog.Logger, _ *mi.Session) error {
var err error
c.gpuInfo = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "info"),
"A metric with a constant '1' value labeled with gpu device information.",
[]string{"phys", "physical_device_object_name", "hardware_id", "friendly_name", "description"},
nil,
)
c.gpuEngineRunningTime = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "engine_time_seconds"),
"Total running time of the GPU in seconds.",
@@ -213,6 +223,10 @@ func (c *Collector) Build(_ *slog.Logger, _ *mi.Session) error {
func (c *Collector) Collect(ch chan<- prometheus.Metric) error {
errs := make([]error, 0)
if err := c.collectGpuInfo(ch); err != nil {
errs = append(errs, err)
}
if err := c.collectGpuEngineMetrics(ch); err != nil {
errs = append(errs, err)
}
@@ -236,6 +250,28 @@ func (c *Collector) Collect(ch chan<- prometheus.Metric) error {
return errors.Join(errs...)
}
func (c *Collector) collectGpuInfo(ch chan<- prometheus.Metric) error {
gpus, err := setupapi.GetGPUDevices()
if err != nil {
return fmt.Errorf("failed to get GPU devices: %w", err)
}
for i, gpu := range gpus {
ch <- prometheus.MustNewConstMetric(
c.gpuInfo,
prometheus.GaugeValue,
1.0,
strconv.Itoa(i),
gpu.PhysicalDeviceObjectName,
gpu.HardwareID,
gpu.FriendlyName,
gpu.DeviceDesc,
)
}
return nil
}
func (c *Collector) collectGpuEngineMetrics(ch chan<- prometheus.Metric) error {
// Collect the GPU Engine perf data.
if err := c.gpuEnginePerfDataCollector.Collect(&c.gpuEnginePerfDataObject); err != nil {