diff --git a/.idea/dictionaries/project.xml b/.idea/dictionaries/project.xml
index ecd06d91..74d07f8a 100644
--- a/.idea/dictionaries/project.xml
+++ b/.idea/dictionaries/project.xml
@@ -4,6 +4,7 @@
containerd
endpointstats
gochecknoglobals
+ lpwstr
luid
operationoptions
setupapi
diff --git a/docs/collector.gpu.md b/docs/collector.gpu.md
index a0f79190..3da22978 100644
--- a/docs/collector.gpu.md
+++ b/docs/collector.gpu.md
@@ -20,28 +20,28 @@ These metrics are available on supported versions of Windows with compatible GPU
### Adapter-level Metrics
-| Name | Description | Type | Labels |
-|--------------------------------------------------|------------------------------------------------------------------------------------|-------|---------------|
-| `windows_gpu_info` | A metric with a constant '1' value labeled with gpu device information. | gauge | `luid`,`name`,`bus_number`,`phys`,`function_number` |
-| `windows_gpu_dedicated_system_memory_size_bytes` | The size, in bytes, of memory that is dedicated from system memory. | gauge | `luid` |
-| `windows_gpu_dedicated_video_memory_size_bytes` | The size, in bytes, of memory that is dedicated from video memory. | gauge | `luid` |
-| `windows_gpu_shared_system_memory_size_bytes` | The size, in bytes, of memory from system memory that can be shared by many users. | gauge | `luid` |
-| `windows_gpu_adapter_memory_committed_bytes` | Total committed GPU memory in bytes per physical GPU | gauge | `luid`,`phys` |
-| `windows_gpu_adapter_memory_dedicated_bytes` | Dedicated GPU memory usage in bytes per physical GPU | gauge | `luid`,`phys` |
-| `windows_gpu_adapter_memory_shared_bytes` | Shared GPU memory usage in bytes per physical GPU | gauge | `luid`,`phys` |
-| `windows_gpu_local_adapter_memory_bytes` | Local adapter memory usage in bytes per physical GPU | gauge | `luid`,`phys` |
-| `windows_gpu_non_local_adapter_memory_bytes` | Non-local adapter memory usage in bytes per physical GPU | gauge | `luid`,`phys` |
+| Name | Description | Type | Labels |
+|--------------------------------------------------|------------------------------------------------------------------------------------|-------|-----------------------------------------------------------------|
+| `windows_gpu_info` | A metric with a constant '1' value labeled with gpu device information. | gauge | `bus_number`,`device_id`,`function_number`,`luid`,`name`,`phys` |
+| `windows_gpu_dedicated_system_memory_size_bytes` | The size, in bytes, of memory that is dedicated from system memory. | gauge | `device_id`,`luid` |
+| `windows_gpu_dedicated_video_memory_size_bytes` | The size, in bytes, of memory that is dedicated from video memory. | gauge | `device_id`,`luid` |
+| `windows_gpu_shared_system_memory_size_bytes` | The size, in bytes, of memory from system memory that can be shared by many users. | gauge | `device_id`,`luid` |
+| `windows_gpu_adapter_memory_committed_bytes` | Total committed GPU memory in bytes per physical GPU | gauge | `device_id`,`luid`,`phys` |
+| `windows_gpu_adapter_memory_dedicated_bytes` | Dedicated GPU memory usage in bytes per physical GPU | gauge | `device_id`,`luid`,`phys` |
+| `windows_gpu_adapter_memory_shared_bytes` | Shared GPU memory usage in bytes per physical GPU | gauge | `device_id`,`luid`,`phys` |
+| `windows_gpu_local_adapter_memory_bytes` | Local adapter memory usage in bytes per physical GPU | gauge | `device_id`,`luid`,`phys`,`part` |
+| `windows_gpu_non_local_adapter_memory_bytes` | Non-local adapter memory usage in bytes per physical GPU | gauge | `device_id`,`luid`,`phys`,`part` |
### Per-process Metrics
-| Name | Description | Type | Labels |
-|----------------------------------------------|-------------------------------------------------|---------|-----------------------------------------------|
-| `windows_gpu_engine_time_seconds` | Total running time of the GPU engine in seconds | counter | `luid`,`phys`, `eng`, `engtype`, `process_id` |
-| `windows_gpu_process_memory_committed_bytes` | Total committed GPU memory in bytes per process | gauge | `luid`,`phys`,`process_id` |
-| `windows_gpu_process_memory_dedicated_bytes` | Dedicated GPU memory usage in bytes per process | gauge | `luid`,`phys`,`process_id` |
-| `windows_gpu_process_memory_local_bytes` | Local GPU memory usage in bytes per process | gauge | `luid`,`phys`,`process_id` |
-| `windows_gpu_process_memory_non_local_bytes` | Non-local GPU memory usage in bytes per process | gauge | `luid`,`phys`,`process_id` |
-| `windows_gpu_process_memory_shared_bytes` | Shared GPU memory usage in bytes per process | gauge | `luid`,`phys`,`process_id` |
+| Name | Description | Type | Labels |
+|----------------------------------------------|-------------------------------------------------|---------|-----------------------------------------------------------|
+| `windows_gpu_engine_time_seconds` | Total running time of the GPU engine in seconds | counter | `device_id`,`luid`,`phys`, `eng`, `engtype`, `process_id` |
+| `windows_gpu_process_memory_committed_bytes` | Total committed GPU memory in bytes per process | gauge | `device_id`,`luid`,`phys`,`process_id` |
+| `windows_gpu_process_memory_dedicated_bytes` | Dedicated GPU memory usage in bytes per process | gauge | `device_id`,`luid`,`phys`,`process_id` |
+| `windows_gpu_process_memory_local_bytes` | Local GPU memory usage in bytes per process | gauge | `device_id`,`luid`,`phys`,`process_id` |
+| `windows_gpu_process_memory_non_local_bytes` | Non-local GPU memory usage in bytes per process | gauge | `device_id`,`luid`,`phys`,`process_id` |
+| `windows_gpu_process_memory_shared_bytes` | Shared GPU memory usage in bytes per process | gauge | `device_id`,`luid`,`phys`,`process_id` |
## Metric Labels
@@ -57,7 +57,7 @@ These are basic queries to help you get started with GPU monitoring on Windows u
**Show GPU information for a specific physical GPU (0):**
```promql
-windows_gpu_info{description="NVIDIA GeForce GTX 1070",friendly_name="",hardware_id="PCI\\VEN_10DE&DEV_1B81&SUBSYS_61733842&REV_A1",phys="0",physical_device_object_name="\\Device\\NTPNP_PCI0027"} 1
+windows_gpu_info{bus_number="8",device_id="PCI\\VEN_10DE&DEV_1B81&SUBSYS_61733842&REV_A1",function_number="0",luid="0x00000000_0x00010F8A",name="NVIDIA GeForce GTX 1070",phys="0"} 1
```
**Show total dedicated GPU memory (in bytes) usage on GPU 0:**
diff --git a/internal/collector/gpu/gpu.go b/internal/collector/gpu/gpu.go
index 0916c9ca..df30f61e 100644
--- a/internal/collector/gpu/gpu.go
+++ b/internal/collector/gpu/gpu.go
@@ -21,9 +21,9 @@ import (
"errors"
"fmt"
"log/slog"
- "strconv"
"github.com/alecthomas/kingpin/v2"
+ "github.com/prometheus-community/windows_exporter/internal/headers/cfgmgr32"
"github.com/prometheus-community/windows_exporter/internal/headers/gdi32"
"github.com/prometheus-community/windows_exporter/internal/mi"
"github.com/prometheus-community/windows_exporter/internal/pdh"
@@ -41,7 +41,7 @@ var ConfigDefaults = Config{}
type Collector struct {
config Config
- gpuDeviceCache map[string]gdi32.GPUDevice
+ gpuDeviceCache map[string]gpuDevice
// GPU Engine
gpuEnginePerfDataCollector *pdh.Collector
@@ -85,6 +85,12 @@ type Collector struct {
gpuProcessMemoryTotalCommitted *prometheus.Desc
}
+type gpuDevice struct {
+ gdi32 gdi32.GPUDevice
+ cfgmgr32 cfgmgr32.Device
+ ID string
+}
+
func New(config *Config) *Collector {
if config == nil {
config = &ConfigDefaults
@@ -121,97 +127,97 @@ func (c *Collector) Build(_ *slog.Logger, _ *mi.Session) error {
c.gpuInfo = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "info"),
"A metric with a constant '1' value labeled with gpu device information.",
- []string{"luid", "name", "bus_number", "phys", "function_number"},
+ []string{"luid", "device_id", "name", "bus_number", "phys", "function_number"},
nil,
)
c.gpuSharedSystemMemorySize = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "shared_system_memory_size_bytes"),
"The size, in bytes, of memory from system memory that can be shared by many users.",
- []string{"luid"},
+ []string{"luid", "device_id"},
nil,
)
c.gpuDedicatedSystemMemorySize = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "dedicated_system_memory_size_bytes"),
"The size, in bytes, of memory that is dedicated from system memory.",
- []string{"luid"},
+ []string{"luid", "device_id"},
nil,
)
c.gpuDedicatedVideoMemorySize = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "dedicated_video_memory_size_bytes"),
"The size, in bytes, of memory that is dedicated from video memory.",
- []string{"luid"},
+ []string{"luid", "device_id"},
nil,
)
c.gpuEngineRunningTime = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "engine_time_seconds"),
"Total running time of the GPU in seconds.",
- []string{"process_id", "luid", "phys", "eng", "engtype"},
+ []string{"process_id", "luid", "device_id", "phys", "eng", "engtype"},
nil,
)
c.gpuAdapterMemoryDedicatedUsage = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "adapter_memory_dedicated_bytes"),
"Dedicated GPU memory usage in bytes.",
- []string{"luid", "phys"},
+ []string{"luid", "device_id", "phys"},
nil,
)
c.gpuAdapterMemorySharedUsage = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "adapter_memory_shared_bytes"),
"Shared GPU memory usage in bytes.",
- []string{"luid", "phys"},
+ []string{"luid", "device_id", "phys"},
nil,
)
c.gpuAdapterMemoryTotalCommitted = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "adapter_memory_committed_bytes"),
"Total committed GPU memory in bytes.",
- []string{"luid", "phys"},
+ []string{"luid", "device_id", "phys"},
nil,
)
c.gpuLocalAdapterMemoryUsage = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "local_adapter_memory_bytes"),
"Local adapter memory usage in bytes.",
- []string{"luid", "phys"},
+ []string{"luid", "device_id", "phys", "part"},
nil,
)
c.gpuNonLocalAdapterMemoryUsage = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "non_local_adapter_memory_bytes"),
"Non-local adapter memory usage in bytes.",
- []string{"luid", "phys"},
+ []string{"luid", "device_id", "phys", "part"},
nil,
)
c.gpuProcessMemoryDedicatedUsage = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "process_memory_dedicated_bytes"),
"Dedicated process memory usage in bytes.",
- []string{"process_id", "luid", "phys"},
+ []string{"process_id", "luid", "device_id", "phys"},
nil,
)
c.gpuProcessMemoryLocalUsage = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "process_memory_local_bytes"),
"Local process memory usage in bytes.",
- []string{"process_id", "luid", "phys"},
+ []string{"process_id", "luid", "device_id", "phys"},
nil,
)
c.gpuProcessMemoryNonLocalUsage = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "process_memory_non_local_bytes"),
"Non-local process memory usage in bytes.",
- []string{"process_id", "luid", "phys"},
+ []string{"process_id", "luid", "device_id", "phys"},
nil,
)
c.gpuProcessMemorySharedUsage = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "process_memory_shared_bytes"),
"Shared process memory usage in bytes.",
- []string{"process_id", "luid", "phys"},
+ []string{"process_id", "luid", "device_id", "phys"},
nil,
)
c.gpuProcessMemoryTotalCommitted = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "process_memory_committed_bytes"),
"Total committed process memory in bytes.",
- []string{"process_id", "luid", "phys"},
+ []string{"process_id", "luid", "device_id", "phys"},
nil,
)
@@ -253,11 +259,39 @@ func (c *Collector) Build(_ *slog.Logger, _ *mi.Session) error {
}
if c.gpuDeviceCache == nil {
- c.gpuDeviceCache = make(map[string]gdi32.GPUDevice)
+ c.gpuDeviceCache = make(map[string]gpuDevice)
}
luidKey := fmt.Sprintf("0x%08X_0x%08X", gpu.LUID.HighPart, gpu.LUID.LowPart)
- c.gpuDeviceCache[luidKey] = gpu
+
+ deviceID := gpu.DeviceID
+
+ cfgmgr32Devs, err := cfgmgr32.GetDevicesInstanceIDs(gpu.DeviceID)
+ if err != nil {
+ errs = append(errs, fmt.Errorf("failed to get device instance IDs for device ID %s: %w", gpu.DeviceID, err))
+ }
+
+ var cfgmgr32Dev cfgmgr32.Device
+
+ for _, dev := range cfgmgr32Devs {
+ if dev.BusNumber == gpu.BusNumber && dev.DeviceNumber == gpu.DeviceNumber && dev.FunctionNumber == gpu.FunctionNumber {
+ cfgmgr32Dev = dev
+
+ break
+ }
+ }
+
+ if cfgmgr32Dev.InstanceID == "" {
+ errs = append(errs, fmt.Errorf("failed to find matching device for device ID %s", gpu.DeviceID))
+ } else {
+ deviceID = cfgmgr32Dev.InstanceID
+ }
+
+ c.gpuDeviceCache[luidKey] = gpuDevice{
+ gdi32: gpu,
+ cfgmgr32: cfgmgr32Dev,
+ ID: deviceID,
+ }
}
return errors.Join(errs...)
@@ -298,31 +332,32 @@ func (c *Collector) collectGpuInfo(ch chan<- prometheus.Metric) {
prometheus.GaugeValue,
1.0,
luid,
- gpu.AdapterString,
- strconv.FormatInt(int64(gpu.BusNumber), 10),
- strconv.FormatInt(int64(gpu.DeviceNumber), 10),
- strconv.FormatInt(int64(gpu.FunctionNumber), 10),
+ gpu.ID,
+ gpu.gdi32.AdapterString,
+ gpu.gdi32.BusNumber.String(),
+ gpu.gdi32.DeviceNumber.String(),
+ gpu.gdi32.FunctionNumber.String(),
)
ch <- prometheus.MustNewConstMetric(
c.gpuSharedSystemMemorySize,
prometheus.GaugeValue,
- float64(gpu.SharedSystemMemorySize),
- luid,
+ float64(gpu.gdi32.SharedSystemMemorySize),
+ luid, gpu.ID,
)
ch <- prometheus.MustNewConstMetric(
c.gpuDedicatedSystemMemorySize,
prometheus.GaugeValue,
- float64(gpu.DedicatedSystemMemorySize),
- luid,
+ float64(gpu.gdi32.DedicatedSystemMemorySize),
+ luid, gpu.ID,
)
ch <- prometheus.MustNewConstMetric(
c.gpuDedicatedVideoMemorySize,
prometheus.GaugeValue,
- float64(gpu.DedicatedVideoMemorySize),
- luid,
+ float64(gpu.gdi32.DedicatedVideoMemorySize),
+ luid, gpu.ID,
)
}
}
@@ -333,31 +368,20 @@ func (c *Collector) collectGpuEngineMetrics(ch chan<- prometheus.Metric) error {
return fmt.Errorf("failed to collect GPU Engine perf data: %w", err)
}
- runningTimeMap := make(map[PidPhysEngEngType]float64)
// Iterate over the GPU Engine perf data and aggregate the values.
for _, data := range c.gpuEnginePerfDataObject {
instance := parseGPUCounterInstanceString(data.Name)
- if _, ok := c.gpuDeviceCache[instance.Luid]; !ok {
+ device, ok := c.gpuDeviceCache[instance.Luid]
+ if !ok {
continue
}
- key := PidPhysEngEngType{
- Pid: instance.Pid,
- Phys: instance.Phys,
- Luid: instance.Luid,
- Eng: instance.Eng,
- Engtype: instance.Engtype,
- }
- runningTimeMap[key] += data.RunningTime / 10_000_000 // RunningTime is in 100ns units, convert to seconds.
- }
-
- for key, runningTime := range runningTimeMap {
ch <- prometheus.MustNewConstMetric(
c.gpuEngineRunningTime,
prometheus.CounterValue,
- runningTime,
- key.Pid, key.Luid, key.Phys, key.Eng, key.Engtype,
+ data.RunningTime/10_000_000,
+ instance.Pid, instance.Luid, device.ID, instance.Phys, instance.Eng, instance.Engtype,
)
}
@@ -370,49 +394,33 @@ func (c *Collector) collectGpuAdapterMemoryMetrics(ch chan<- prometheus.Metric)
return fmt.Errorf("failed to collect GPU Adapter Memory perf data: %w", err)
}
- dedicatedUsageMap := make(map[PidPhysEngEngType]float64)
- sharedUsageMap := make(map[PidPhysEngEngType]float64)
- totalCommittedMap := make(map[PidPhysEngEngType]float64)
-
for _, data := range c.gpuAdapterMemoryPerfDataObject {
instance := parseGPUCounterInstanceString(data.Name)
- if _, ok := c.gpuDeviceCache[instance.Luid]; !ok {
+ device, ok := c.gpuDeviceCache[instance.Luid]
+ if !ok {
continue
}
- key := PidPhysEngEngType{
- Pid: instance.Pid,
- Luid: instance.Luid,
- Phys: instance.Phys,
- Eng: instance.Eng,
- Engtype: instance.Engtype,
- }
- dedicatedUsageMap[key] += data.DedicatedUsage
- sharedUsageMap[key] += data.SharedUsage
- totalCommittedMap[key] += data.TotalCommitted
- }
-
- for key, dedicatedUsage := range dedicatedUsageMap {
ch <- prometheus.MustNewConstMetric(
c.gpuAdapterMemoryDedicatedUsage,
prometheus.GaugeValue,
- dedicatedUsage,
- key.Luid, key.Phys,
+ data.DedicatedUsage,
+ instance.Luid, device.ID, instance.Phys,
)
ch <- prometheus.MustNewConstMetric(
c.gpuAdapterMemorySharedUsage,
prometheus.GaugeValue,
- sharedUsageMap[key],
- key.Luid, key.Phys,
+ data.SharedUsage,
+ instance.Luid, device.ID, instance.Phys,
)
ch <- prometheus.MustNewConstMetric(
c.gpuAdapterMemoryTotalCommitted,
prometheus.GaugeValue,
- totalCommittedMap[key],
- key.Luid, key.Phys,
+ data.TotalCommitted,
+ instance.Luid, device.ID, instance.Phys,
)
}
@@ -425,29 +433,19 @@ func (c *Collector) collectGpuLocalAdapterMemoryMetrics(ch chan<- prometheus.Met
return fmt.Errorf("failed to collect GPU Local Adapter Memory perf data: %w", err)
}
- localAdapterMemoryMap := make(map[PidPhysEngEngType]float64)
-
for _, data := range c.gpuLocalAdapterMemoryPerfDataObject {
instance := parseGPUCounterInstanceString(data.Name)
- if _, ok := c.gpuDeviceCache[instance.Luid]; !ok {
+ device, ok := c.gpuDeviceCache[instance.Luid]
+ if !ok {
continue
}
- key := PidPhysEngEngType{
- Luid: instance.Luid,
- Phys: instance.Phys,
- }
-
- localAdapterMemoryMap[key] += data.LocalUsage
- }
-
- for key, localUsage := range localAdapterMemoryMap {
ch <- prometheus.MustNewConstMetric(
c.gpuLocalAdapterMemoryUsage,
prometheus.GaugeValue,
- localUsage,
- key.Luid, key.Phys,
+ data.LocalUsage,
+ instance.Luid, device.ID, instance.Phys, instance.Part,
)
}
@@ -460,28 +458,19 @@ func (c *Collector) collectGpuNonLocalAdapterMemoryMetrics(ch chan<- prometheus.
return fmt.Errorf("failed to collect GPU Non Local Adapter Memory perf data: %w", err)
}
- nonLocalAdapterMemoryMap := make(map[PidPhysEngEngType]float64)
-
for _, data := range c.gpuNonLocalAdapterMemoryPerfDataObject {
instance := parseGPUCounterInstanceString(data.Name)
- if _, ok := c.gpuDeviceCache[instance.Luid]; !ok {
+ device, ok := c.gpuDeviceCache[instance.Luid]
+ if !ok {
continue
}
- key := PidPhysEngEngType{
- Luid: instance.Luid,
- Phys: instance.Phys,
- }
- nonLocalAdapterMemoryMap[key] += data.NonLocalUsage
- }
-
- for key, nonLocalUsage := range nonLocalAdapterMemoryMap {
ch <- prometheus.MustNewConstMetric(
c.gpuNonLocalAdapterMemoryUsage,
prometheus.GaugeValue,
- nonLocalUsage,
- key.Luid, key.Phys,
+ data.NonLocalUsage,
+ instance.Luid, device.ID, instance.Phys, instance.Part,
)
}
@@ -494,65 +483,47 @@ func (c *Collector) collectGpuProcessMemoryMetrics(ch chan<- prometheus.Metric)
return fmt.Errorf("failed to collect GPU Process Memory perf data: %w", err)
}
- processDedicatedUsageMap := make(map[PidPhys]float64)
- processLocalUsageMap := make(map[PidPhys]float64)
- processNonLocalUsageMap := make(map[PidPhys]float64)
- processSharedUsageMap := make(map[PidPhys]float64)
- processTotalCommittedMap := make(map[PidPhys]float64)
-
for _, data := range c.gpuProcessMemoryPerfDataObject {
instance := parseGPUCounterInstanceString(data.Name)
- if _, ok := c.gpuDeviceCache[instance.Luid]; !ok {
+ device, ok := c.gpuDeviceCache[instance.Luid]
+ if !ok {
continue
}
- key := PidPhys{
- Pid: instance.Pid,
- Luid: instance.Luid,
- Phys: instance.Phys,
- }
- processDedicatedUsageMap[key] += data.DedicatedUsage
- processLocalUsageMap[key] += data.LocalUsage
- processNonLocalUsageMap[key] += data.NonLocalUsage
- processSharedUsageMap[key] += data.SharedUsage
- processTotalCommittedMap[key] += data.TotalCommitted
- }
-
- for key, dedicatedUsage := range processDedicatedUsageMap {
ch <- prometheus.MustNewConstMetric(
c.gpuProcessMemoryDedicatedUsage,
prometheus.GaugeValue,
- dedicatedUsage,
- key.Pid, key.Luid, key.Phys,
+ data.DedicatedUsage,
+ instance.Pid, instance.Luid, device.ID, instance.Phys,
)
ch <- prometheus.MustNewConstMetric(
c.gpuProcessMemoryLocalUsage,
prometheus.GaugeValue,
- processLocalUsageMap[key],
- key.Pid, key.Luid, key.Phys,
+ data.LocalUsage,
+ instance.Pid, instance.Luid, device.ID, instance.Phys,
)
ch <- prometheus.MustNewConstMetric(
c.gpuProcessMemoryNonLocalUsage,
prometheus.GaugeValue,
- processNonLocalUsageMap[key],
- key.Pid, key.Luid, key.Phys,
+ data.NonLocalUsage,
+ instance.Pid, instance.Luid, device.ID, instance.Phys,
)
ch <- prometheus.MustNewConstMetric(
c.gpuProcessMemorySharedUsage,
prometheus.GaugeValue,
- processSharedUsageMap[key],
- key.Pid, key.Luid, key.Phys,
+ data.SharedUsage,
+ instance.Pid, instance.Luid, device.ID, instance.Phys,
)
ch <- prometheus.MustNewConstMetric(
c.gpuProcessMemoryTotalCommitted,
prometheus.GaugeValue,
- processTotalCommittedMap[key],
- key.Pid, key.Luid, key.Phys,
+ data.TotalCommitted,
+ instance.Pid, instance.Luid, device.ID, instance.Phys,
)
}
diff --git a/internal/collector/gpu/utils.go b/internal/collector/gpu/utils.go
index c8b57295..902c8a0a 100644
--- a/internal/collector/gpu/utils.go
+++ b/internal/collector/gpu/utils.go
@@ -23,26 +23,29 @@ import (
)
type Instance struct {
- Pid string
- Luid string
- Phys string
- Eng string
- Engtype string
- Part string
+ Pid string
+ Luid string
+ DeviceID string
+ Phys string
+ Eng string
+ Engtype string
+ Part string
}
type PidPhys struct {
- Pid string
- Luid string
- Phys string
+ Pid string
+ Luid string
+ DeviceID string
+ Phys string
}
type PidPhysEngEngType struct {
- Pid string
- Luid string
- Phys string
- Eng string
- Engtype string
+ Pid string
+ Luid string
+ DeviceID string
+ Phys string
+ Eng string
+ Engtype string
}
func parseGPUCounterInstanceString(s string) Instance {
diff --git a/internal/headers/cfgmgr32/cfgmgr32.go b/internal/headers/cfgmgr32/cfgmgr32.go
new file mode 100644
index 00000000..e0b2e180
--- /dev/null
+++ b/internal/headers/cfgmgr32/cfgmgr32.go
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: Apache-2.0
+//
+// Copyright The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cfgmgr32
+
+import (
+ "fmt"
+ "unsafe"
+
+ "github.com/prometheus-community/windows_exporter/internal/headers/win32"
+ "golang.org/x/sys/windows"
+)
+
+func GetDevicesInstanceIDs(deviceID string) ([]Device, error) {
+ var (
+ err error
+ listSize uint32
+ )
+
+ deviceIDLWStr := win32.NewLPWSTR(deviceID)
+
+ err = CMGetDeviceIDListSize(deviceIDLWStr, &listSize)
+ if err != nil {
+ return nil, err
+ }
+
+ listBuffer := make([]uint16, listSize)
+
+ err = CMGetDeviceIDList(deviceIDLWStr, listBuffer)
+ if err != nil {
+ return nil, err
+ }
+
+ deviceInstanceIDs := win32.ParseMultiSz(listBuffer)
+ devices := make([]Device, 0, len(deviceInstanceIDs))
+
+ for _, deviceInstanceID := range deviceInstanceIDs {
+ var devNode *windows.Handle
+
+ err = CMLocateDevNode(&devNode, deviceInstanceID)
+ if err != nil {
+ return nil, err
+ }
+
+ var (
+ busNumber uint32
+ deviceAddress uint32
+ propType uint32
+ )
+
+ propLen := uint32(4)
+
+ err = CMGetDevNodeProperty(devNode, DEVPKEYDeviceBusNumber, &propType, unsafe.Pointer(&busNumber), &propLen)
+ if err != nil {
+ return nil, err
+ }
+
+ if propType != DEVPROP_TYPE_UINT32 {
+ return nil, fmt.Errorf("unexpected property type: 0x%08X", propType)
+ }
+
+ err = CMGetDevNodeProperty(devNode, DEVPKEYDeviceAddress, &propType, unsafe.Pointer(&deviceAddress), &propLen)
+ if err != nil {
+ return nil, err
+ }
+
+ if propType != DEVPROP_TYPE_UINT32 {
+ return nil, fmt.Errorf("unexpected property type: 0x%08X", propType)
+ }
+
+ devices = append(devices, Device{
+ InstanceID: windows.UTF16ToString(deviceInstanceID),
+ BusNumber: win32.UINT(busNumber),
+ DeviceNumber: win32.UINT(deviceAddress >> 16),
+ FunctionNumber: win32.UINT(deviceAddress & 0xFFFF),
+ })
+ }
+
+ return devices, nil
+}
diff --git a/internal/headers/cfgmgr32/syscall.go b/internal/headers/cfgmgr32/syscall.go
new file mode 100644
index 00000000..837c964d
--- /dev/null
+++ b/internal/headers/cfgmgr32/syscall.go
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: Apache-2.0
+//
+// Copyright The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cfgmgr32
+
+import (
+ "fmt"
+ "unsafe"
+
+ "github.com/prometheus-community/windows_exporter/internal/headers/win32"
+ "golang.org/x/sys/windows"
+)
+
+//nolint:gochecknoglobals
+var (
+ cfgmgr32 = windows.NewLazySystemDLL("cfgmgr32.dll")
+
+ procCMGetDeviceIDListW = cfgmgr32.NewProc("CM_Get_Device_ID_ListW")
+ procCMGetDeviceIDListSize = cfgmgr32.NewProc("CM_Get_Device_ID_List_SizeW")
+ procCMGetDevNodePropertyW = cfgmgr32.NewProc("CM_Get_DevNode_PropertyW")
+ procCMLocateDevNodeW = cfgmgr32.NewProc("CM_Locate_DevNodeW")
+)
+
+func CMGetDeviceIDListSize(filter *win32.LPWSTR, size *uint32) error {
+ ret, _, _ := procCMGetDeviceIDListSize.Call(
+ uintptr(unsafe.Pointer(size)),
+ filter.Pointer(),
+ uintptr(CM_GETIDLIST_FILTER_PRESENT|CM_GETIDLIST_FILTER_ENUMERATOR),
+ )
+
+ if ret != CR_SUCCESS {
+ return fmt.Errorf("CMGetDeviceIDListSize failed: 0x%02X", ret)
+ }
+
+ return nil
+}
+
+func CMGetDeviceIDList(filter *win32.LPWSTR, buf []uint16) error {
+ ret, _, _ := procCMGetDeviceIDListW.Call(
+ filter.Pointer(),
+ uintptr(unsafe.Pointer(&buf[0])),
+ uintptr(len(buf)),
+ uintptr(CM_GETIDLIST_FILTER_PRESENT|CM_GETIDLIST_FILTER_ENUMERATOR),
+ )
+
+ if ret != CR_SUCCESS {
+ return fmt.Errorf("CMGetDeviceIDList failed: 0x%02X", ret)
+ }
+
+ return nil
+}
+
+func CMLocateDevNode(devInst **windows.Handle, deviceID []uint16) error {
+ ret, _, _ := procCMLocateDevNodeW.Call(
+ uintptr(unsafe.Pointer(devInst)),
+ uintptr(unsafe.Pointer(&deviceID[0])),
+ 0,
+ )
+
+ if ret != CR_SUCCESS {
+ return fmt.Errorf("CMLocateDevNode failed: 0x%02X", ret)
+ }
+
+ return nil
+}
+
+func CMGetDevNodeProperty(devInst *windows.Handle, propKey *DEVPROPKEY, propType *uint32, buf unsafe.Pointer, bufLen *uint32) error {
+ ret, _, _ := procCMGetDevNodePropertyW.Call(
+ uintptr(unsafe.Pointer(devInst)),
+ uintptr(unsafe.Pointer(propKey)),
+ uintptr(unsafe.Pointer(propType)),
+ uintptr(buf),
+ uintptr(unsafe.Pointer(bufLen)),
+ 0,
+ )
+
+ if ret != CR_SUCCESS {
+ return fmt.Errorf("CMGetDevNodeProperty failed: 0x%02X", ret)
+ }
+
+ return nil
+}
diff --git a/internal/headers/cfgmgr32/types.go b/internal/headers/cfgmgr32/types.go
new file mode 100644
index 00000000..72573e98
--- /dev/null
+++ b/internal/headers/cfgmgr32/types.go
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: Apache-2.0
+//
+// Copyright The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cfgmgr32
+
+import (
+ "github.com/go-ole/go-ole"
+ "github.com/prometheus-community/windows_exporter/internal/headers/win32"
+)
+
+const (
+ // Configuration Manager return codes
+ CR_SUCCESS = 0x00
+
+ // Filter flags
+ CM_GETIDLIST_FILTER_ENUMERATOR = 0x00000001
+ CM_GETIDLIST_FILTER_PRESENT = 0x00000100
+
+ DEVPROP_TYPE_UINT32 uint32 = 0x00000007
+)
+
+// DEVPROPKEY represents a device property key (GUID + pid)
+type DEVPROPKEY struct {
+ FmtID ole.GUID
+ PID uint32
+}
+
+type Device struct {
+ InstanceID string
+ BusNumber win32.UINT
+ DeviceNumber win32.UINT
+ FunctionNumber win32.UINT
+}
+
+//nolint:gochecknoglobals
+var (
+ // https://github.com/Infinidat/infi.devicemanager/blob/8be9ead6b04ff45c63d9e3bc70d82cceafb75c47/src/infi/devicemanager/setupapi/properties.py#L138C1-L143C34
+ DEVPKEYDeviceBusNumber = &DEVPROPKEY{
+ FmtID: ole.GUID{
+ Data1: 0xa45c254e,
+ Data2: 0xdf1c,
+ Data3: 0x4efd,
+ Data4: [8]byte{0x80, 0x20, 0x67, 0xd1, 0x46, 0xa8, 0x50, 0xe0},
+ },
+ PID: 23, // DEVPROP_TYPE_UINT32
+ }
+
+ // https://github.com/Infinidat/infi.devicemanager/blob/8be9ead6b04ff45c63d9e3bc70d82cceafb75c47/src/infi/devicemanager/setupapi/properties.py#L187-L192
+ DEVPKEYDeviceAddress = &DEVPROPKEY{
+ FmtID: ole.GUID{
+ Data1: 0xa45c254e,
+ Data2: 0xdf1c,
+ Data3: 0x4efd,
+ Data4: [8]byte{0x80, 0x20, 0x67, 0xd1, 0x46, 0xa8, 0x50, 0xe0},
+ },
+ PID: 30, // DEVPROP_TYPE_UINT32
+ }
+)
diff --git a/internal/headers/gdi32/gdi32.go b/internal/headers/gdi32/gdi32.go
index 1816db70..c6b12a10 100644
--- a/internal/headers/gdi32/gdi32.go
+++ b/internal/headers/gdi32/gdi32.go
@@ -34,41 +34,12 @@ const (
KMTQAITYPE_ADAPTERADDRESS = 6
// KMTQAITYPE_ADAPTERREGISTRYINFO pPrivateDriverData points to a D3DKMT_ADAPTERREGISTRYINFO structure that contains registry information about the graphics adapter.
KMTQAITYPE_ADAPTERREGISTRYINFO = 8
+ // KMTQAITYPE_PHYSICALADAPTERDEVICEIDS pPrivateDriverData points to a D3DKMT_QUERY_DEVICE_IDS structure that specifies the device ID(s) of the physical adapters. Supported starting with Windows 10 (WDDM 2.0).
+ KMTQAITYPE_PHYSICALADAPTERDEVICEIDS = 31
)
var ErrNoGPUDevices = errors.New("no GPU devices found")
-func GetGPUDeviceByLUID(adapterLUID windows.LUID) (GPUDevice, error) {
- open := D3DKMT_OPENADAPTERFROMLUID{
- AdapterLUID: adapterLUID,
- }
-
- if err := D3DKMTOpenAdapterFromLuid(&open); err != nil {
- return GPUDevice{}, fmt.Errorf("D3DKMTOpenAdapterFromLuid failed: %w", err)
- }
-
- errs := make([]error, 0)
-
- gpuDevice, err := GetGPUDevice(open.HAdapter)
- if err != nil {
- errs = append(errs, fmt.Errorf("GetGPUDevice failed: %w", err))
- }
-
- if err := D3DKMTCloseAdapter(&D3DKMT_CLOSEADAPTER{
- HAdapter: open.HAdapter,
- }); err != nil {
- errs = append(errs, fmt.Errorf("D3DKMTCloseAdapter failed: %w", err))
- }
-
- if len(errs) > 0 {
- return gpuDevice, fmt.Errorf("errors occurred while getting GPU device: %w", errors.Join(errs...))
- }
-
- gpuDevice.LUID = adapterLUID
-
- return gpuDevice, nil
-}
-
func GetGPUDevice(hAdapter D3DKMT_HANDLE) (GPUDevice, error) {
var gpuDevice GPUDevice
@@ -118,6 +89,18 @@ func GetGPUDevice(hAdapter D3DKMT_HANDLE) (GPUDevice, error) {
gpuDevice.AdapterString = windows.UTF16ToString(info.AdapterString[:])
+ var deviceIDs D3DKMT_QUERY_DEVICE_IDS
+
+ query.queryType = KMTQAITYPE_PHYSICALADAPTERDEVICEIDS
+ query.pPrivateDriverData = unsafe.Pointer(&deviceIDs)
+ query.privateDriverDataSize = uint32(unsafe.Sizeof(deviceIDs))
+
+ if err := D3DKMTQueryAdapterInfo(&query); err != nil && !errors.Is(err, windows.ERROR_FILE_NOT_FOUND) {
+ return gpuDevice, fmt.Errorf("D3DKMTQueryAdapterInfo (Device IDs) failed: %w", err)
+ }
+
+ gpuDevice.DeviceID = formatPNPDeviceID(deviceIDs)
+
return gpuDevice, nil
}
@@ -151,7 +134,7 @@ func GetGPUDevices() ([]GPUDevice, error) {
// Process each adapter
for i := range enumAdapters.NumAdapters {
adapter := pAdapters[i]
- // Validate handle before using it
+ // Validate the handle before using it.
if adapter.HAdapter == 0 {
errs = append(errs, fmt.Errorf("adapter %d has null handle", i))
@@ -190,3 +173,13 @@ func GetGPUDevices() ([]GPUDevice, error) {
return gpuDevices, nil
}
+
+func formatPNPDeviceID(deviceIDs D3DKMT_QUERY_DEVICE_IDS) string {
+ return fmt.Sprintf("PCI\\VEN_%04X&DEV_%04X&SUBSYS_%04X%04X&REV_%02X",
+ uint16(deviceIDs.DeviceIds.VendorID),
+ uint16(deviceIDs.DeviceIds.DeviceID),
+ uint16(deviceIDs.DeviceIds.SubSystemID),
+ uint16(deviceIDs.DeviceIds.SubVendorID),
+ uint8(deviceIDs.DeviceIds.RevisionID),
+ )
+}
diff --git a/internal/headers/gdi32/types.go b/internal/headers/gdi32/types.go
index f1c62596..ef73534d 100644
--- a/internal/headers/gdi32/types.go
+++ b/internal/headers/gdi32/types.go
@@ -73,9 +73,22 @@ type D3DKMT_ADAPTERADDRESS struct {
FunctionNumber win32.UINT
}
+type D3DKMT_QUERY_DEVICE_IDS struct {
+ PhysicalAdapterIndex win32.UINT
+ DeviceIds struct {
+ VendorID win32.UINT
+ DeviceID win32.UINT
+ SubVendorID win32.UINT
+ SubSystemID win32.UINT
+ RevisionID win32.UINT
+ BusType win32.UINT
+ }
+}
+
type GPUDevice struct {
AdapterString string
LUID windows.LUID
+ DeviceID string
DedicatedVideoMemorySize uint64
DedicatedSystemMemorySize uint64
SharedSystemMemorySize uint64
diff --git a/internal/headers/win32/types.go b/internal/headers/win32/types.go
index 08147db9..edcfe16c 100644
--- a/internal/headers/win32/types.go
+++ b/internal/headers/win32/types.go
@@ -18,6 +18,7 @@
package win32
import (
+ "strconv"
"unsafe"
"golang.org/x/sys/windows"
@@ -32,8 +33,8 @@ type (
LPWSTR struct {
*uint16
}
- ULONG = uint32 // ULONG is a 32-bit unsigned int in Win32
- UINT = uint32 // UINT is a 32-bit unsigned int in Win32
+ ULONG uint32 // ULONG is a 32-bit unsigned int in Win32
+ UINT uint32 // UINT is a 32-bit unsigned int in Win32
)
// NewLPWSTR creates a new LPWSTR from a string.
@@ -60,3 +61,7 @@ func (s *LPWSTR) Pointer() uintptr {
func (s *LPWSTR) String() string {
return windows.UTF16PtrToString(s.uint16)
}
+
+func (u *UINT) String() string {
+ return strconv.FormatUint(uint64(*u), 10)
+}
diff --git a/internal/headers/win32/utils.go b/internal/headers/win32/utils.go
new file mode 100644
index 00000000..2d713148
--- /dev/null
+++ b/internal/headers/win32/utils.go
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: Apache-2.0
+//
+// Copyright The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package win32
+
+// ParseMultiSz splits a UTF-16 encoded MULTI_SZ buffer (Windows style) into
+// individual UTF-16 string slices.
+//
+// A MULTI_SZ buffer is a sequence of UTF-16 strings separated by single null
+// terminators (0x0000) and terminated by an extra null (i.e., two consecutive
+// nulls) to mark the end of the list.
+//
+// Example layout in memory (UTF-16):
+//
+// "foo\0bar\0baz\0\0"
+//
+// Given such a []uint16, this function returns a [][]uint16 where each inner
+// slice is one null-terminated string segment without the trailing null.
+//
+// The returned slices reference the original buffer (no copying).
+func ParseMultiSz(buf []uint16) [][]uint16 {
+ var (
+ result [][]uint16
+ start int
+ )
+
+ for i := range buf {
+ if buf[i] == 0 {
+ // Found a null terminator.
+ if i == start {
+ // Two consecutive nulls → end of list.
+ break
+ }
+
+ // Append current string slice (excluding null).
+ result = append(result, buf[start:i])
+ // Move start to next character after null.
+ start = i + 1
+ }
+ }
+
+ return result
+}