gpu: fix windows_gpu_info metric (#2130)

This commit is contained in:
Jan-Otto Kröpke
2025-07-13 01:05:59 +02:00
committed by GitHub
parent 6b8c895a68
commit 524fea08c4
12 changed files with 534 additions and 255 deletions

View File

@@ -2,6 +2,7 @@
<dictionary name="project">
<words>
<w>containerd</w>
<w>luid</w>
<w>setupapi</w>
<w>spdx</w>
</words>

View File

@@ -21,28 +21,31 @@ These metrics are available on supported versions of Windows with compatible GPU
### Adapter-level Metrics
| Name | Description | Type | Labels |
|----------------------------------------------|-------------------------------------------------------------------------|-------|--------------------------------------------------------------------------------------|
| `windows_gpu_adapter_memory_committed_bytes` | Total committed GPU memory in bytes per physical GPU | gauge | `phys` |
| `windows_gpu_adapter_memory_dedicated_bytes` | Dedicated GPU memory usage in bytes per physical GPU | gauge | `phys` |
| `windows_gpu_adapter_memory_shared_bytes` | Shared GPU memory usage in bytes per physical GPU | gauge | `phys` |
| `windows_gpu_info` | A metric with a constant '1' value labeled with gpu device information. | gauge | `phys`, `physical_device_object_name`, `hardware_id`, `friendly_name`, `description` |
| `windows_gpu_local_adapter_memory_bytes` | Local adapter memory usage in bytes per physical GPU | gauge | `phys` |
| `windows_gpu_non_local_adapter_memory_bytes` | Non-local adapter memory usage in bytes per physical GPU | gauge | `phys` |
|--------------------------------------------------|------------------------------------------------------------------------------------|-------|---------------|
| `windows_gpu_info` | A metric with a constant '1' value labeled with gpu device information. | gauge | `luid`,`name`,`bus_number`,`phys`,`function_number` |
| `windows_gpu_dedicated_system_memory_size_bytes` | The size, in bytes, of memory that is dedicated from system memory. | gauge | `luid` |
| `windows_gpu_dedicated_video_memory_size_bytes` | The size, in bytes, of memory that is dedicated from video memory. | gauge | `luid` |
| `windows_gpu_shared_system_memory_size_bytes` | The size, in bytes, of memory from system memory that can be shared by many users. | gauge | `luid` |
| `windows_gpu_adapter_memory_committed_bytes` | Total committed GPU memory in bytes per physical GPU | gauge | `luid`,`phys` |
| `windows_gpu_adapter_memory_dedicated_bytes` | Dedicated GPU memory usage in bytes per physical GPU | gauge | `luid`,`phys` |
| `windows_gpu_adapter_memory_shared_bytes` | Shared GPU memory usage in bytes per physical GPU | gauge | `luid`,`phys` |
| `windows_gpu_local_adapter_memory_bytes` | Local adapter memory usage in bytes per physical GPU | gauge | `luid`,`phys` |
| `windows_gpu_non_local_adapter_memory_bytes` | Non-local adapter memory usage in bytes per physical GPU | gauge | `luid`,`phys` |
### Per-process Metrics
| Name | Description | Type | Labels |
|----------------------------------------------|-------------------------------------------------------------------------|---------|--------------------------------------------------------------------------------------|
| `windows_gpu_engine_time_seconds` | Total running time of the GPU engine in seconds | counter | `phys`, `eng`, `engtype`, `process_id` |
| `windows_gpu_process_memory_committed_bytes` | Total committed GPU memory in bytes per process | gauge | `phys`,`process_id` |
| `windows_gpu_process_memory_dedicated_bytes` | Dedicated GPU memory usage in bytes per process | gauge | `phys`,`process_id` |
| `windows_gpu_process_memory_local_bytes` | Local GPU memory usage in bytes per process | gauge | `phys`,`process_id` |
| `windows_gpu_process_memory_non_local_bytes` | Non-local GPU memory usage in bytes per process | gauge | `phys`,`process_id` |
| `windows_gpu_process_memory_shared_bytes` | Shared GPU memory usage in bytes per process | gauge | `phys`,`process_id` |
|----------------------------------------------|-------------------------------------------------|---------|-----------------------------------------------|
| `windows_gpu_engine_time_seconds` | Total running time of the GPU engine in seconds | counter | `luid`,`phys`, `eng`, `engtype`, `process_id` |
| `windows_gpu_process_memory_committed_bytes` | Total committed GPU memory in bytes per process | gauge | `luid`,`phys`,`process_id` |
| `windows_gpu_process_memory_dedicated_bytes` | Dedicated GPU memory usage in bytes per process | gauge | `luid`,`phys`,`process_id` |
| `windows_gpu_process_memory_local_bytes` | Local GPU memory usage in bytes per process | gauge | `luid`,`phys`,`process_id` |
| `windows_gpu_process_memory_non_local_bytes` | Non-local GPU memory usage in bytes per process | gauge | `luid`,`phys`,`process_id` |
| `windows_gpu_process_memory_shared_bytes` | Shared GPU memory usage in bytes per process | gauge | `luid`,`phys`,`process_id` |
## Metric Labels
* `phys`: Physical GPU index (e.g., "0")
* `luid`,`phys`: Physical GPU index (e.g., "0")
* `eng`: GPU engine index (e.g., "0", "1", ...)
* `engtype`: GPU engine type (e.g., "3D", "Copy", "VideoDecode", etc.)
* `process_id`: Process ID

View File

@@ -24,7 +24,7 @@ import (
"strconv"
"github.com/alecthomas/kingpin/v2"
"github.com/prometheus-community/windows_exporter/internal/headers/setupapi"
"github.com/prometheus-community/windows_exporter/internal/headers/gdi32"
"github.com/prometheus-community/windows_exporter/internal/mi"
"github.com/prometheus-community/windows_exporter/internal/pdh"
"github.com/prometheus-community/windows_exporter/internal/types"
@@ -41,6 +41,8 @@ var ConfigDefaults = Config{}
type Collector struct {
config Config
gpuDeviceCache map[string]gdi32.GPUDevice
// GPU Engine
gpuEnginePerfDataCollector *pdh.Collector
gpuEnginePerfDataObject []gpuEnginePerfDataCounterValues
@@ -48,6 +50,10 @@ type Collector struct {
gpuInfo *prometheus.Desc
gpuEngineRunningTime *prometheus.Desc
gpuSharedSystemMemorySize *prometheus.Desc
gpuDedicatedSystemMemorySize *prometheus.Desc
gpuDedicatedVideoMemorySize *prometheus.Desc
// GPU Adapter Memory
gpuAdapterMemoryPerfDataCollector *pdh.Collector
gpuAdapterMemoryPerfDataObject []gpuAdapterMemoryPerfDataCounterValues
@@ -115,78 +121,97 @@ func (c *Collector) Build(_ *slog.Logger, _ *mi.Session) error {
c.gpuInfo = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "info"),
"A metric with a constant '1' value labeled with gpu device information.",
[]string{"phys", "physical_device_object_name", "hardware_id", "friendly_name", "description"},
[]string{"luid", "name", "bus_number", "phys", "function_number"},
nil,
)
c.gpuSharedSystemMemorySize = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "shared_system_memory_size_bytes"),
"The size, in bytes, of memory from system memory that can be shared by many users.",
[]string{"luid"},
nil,
)
c.gpuDedicatedSystemMemorySize = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "dedicated_system_memory_size_bytes"),
"The size, in bytes, of memory that is dedicated from system memory.",
[]string{"luid"},
nil,
)
c.gpuDedicatedVideoMemorySize = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "dedicated_video_memory_size_bytes"),
"The size, in bytes, of memory that is dedicated from video memory.",
[]string{"luid"},
nil,
)
c.gpuEngineRunningTime = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "engine_time_seconds"),
"Total running time of the GPU in seconds.",
[]string{"process_id", "phys", "eng", "engtype"},
[]string{"process_id", "luid", "phys", "eng", "engtype"},
nil,
)
c.gpuAdapterMemoryDedicatedUsage = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "adapter_memory_dedicated_bytes"),
"Dedicated GPU memory usage in bytes.",
[]string{"phys"},
[]string{"luid", "phys"},
nil,
)
c.gpuAdapterMemorySharedUsage = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "adapter_memory_shared_bytes"),
"Shared GPU memory usage in bytes.",
[]string{"phys"},
[]string{"luid", "phys"},
nil,
)
c.gpuAdapterMemoryTotalCommitted = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "adapter_memory_committed_bytes"),
"Total committed GPU memory in bytes.",
[]string{"phys"},
[]string{"luid", "phys"},
nil,
)
c.gpuLocalAdapterMemoryUsage = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "local_adapter_memory_bytes"),
"Local adapter memory usage in bytes.",
[]string{"phys"},
[]string{"luid", "phys"},
nil,
)
c.gpuNonLocalAdapterMemoryUsage = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "non_local_adapter_memory_bytes"),
"Non-local adapter memory usage in bytes.",
[]string{"phys"},
[]string{"luid", "phys"},
nil,
)
c.gpuProcessMemoryDedicatedUsage = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "process_memory_dedicated_bytes"),
"Dedicated process memory usage in bytes.",
[]string{"process_id", "phys"},
[]string{"process_id", "luid", "phys"},
nil,
)
c.gpuProcessMemoryLocalUsage = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "process_memory_local_bytes"),
"Local process memory usage in bytes.",
[]string{"process_id", "phys"},
[]string{"process_id", "luid", "phys"},
nil,
)
c.gpuProcessMemoryNonLocalUsage = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "process_memory_non_local_bytes"),
"Non-local process memory usage in bytes.",
[]string{"process_id", "phys"},
[]string{"process_id", "luid", "phys"},
nil,
)
c.gpuProcessMemorySharedUsage = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "process_memory_shared_bytes"),
"Shared process memory usage in bytes.",
[]string{"process_id", "phys"},
[]string{"process_id", "luid", "phys"},
nil,
)
c.gpuProcessMemoryTotalCommitted = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "process_memory_committed_bytes"),
"Total committed process memory in bytes.",
[]string{"process_id", "phys"},
[]string{"process_id", "luid", "phys"},
nil,
)
@@ -217,15 +242,31 @@ func (c *Collector) Build(_ *slog.Logger, _ *mi.Session) error {
errs = append(errs, fmt.Errorf("failed to create GPU Process Memory perf data collector: %w", err))
}
gpus, err := gdi32.GetGPUDevices()
if err != nil {
errs = append(errs, fmt.Errorf("failed to get GPU devices: %w", err))
}
for _, gpu := range gpus {
if gpu.AdapterString == "" {
continue
}
if c.gpuDeviceCache == nil {
c.gpuDeviceCache = make(map[string]gdi32.GPUDevice)
}
luidKey := fmt.Sprintf("0x%08X_0x%08X", gpu.LUID.HighPart, gpu.LUID.LowPart)
c.gpuDeviceCache[luidKey] = gpu
}
return errors.Join(errs...)
}
func (c *Collector) Collect(ch chan<- prometheus.Metric) error {
errs := make([]error, 0)
if err := c.collectGpuInfo(ch); err != nil {
errs = append(errs, err)
}
c.collectGpuInfo(ch)
if err := c.collectGpuEngineMetrics(ch); err != nil {
errs = append(errs, err)
@@ -250,26 +291,40 @@ func (c *Collector) Collect(ch chan<- prometheus.Metric) error {
return errors.Join(errs...)
}
func (c *Collector) collectGpuInfo(ch chan<- prometheus.Metric) error {
gpus, err := setupapi.GetGPUDevices()
if err != nil {
return fmt.Errorf("failed to get GPU devices: %w", err)
}
for i, gpu := range gpus {
func (c *Collector) collectGpuInfo(ch chan<- prometheus.Metric) {
for luid, gpu := range c.gpuDeviceCache {
ch <- prometheus.MustNewConstMetric(
c.gpuInfo,
prometheus.GaugeValue,
1.0,
strconv.Itoa(i),
gpu.PhysicalDeviceObjectName,
gpu.HardwareID,
gpu.FriendlyName,
gpu.DeviceDesc,
luid,
gpu.AdapterString,
strconv.FormatInt(int64(gpu.BusNumber), 10),
strconv.FormatInt(int64(gpu.DeviceNumber), 10),
strconv.FormatInt(int64(gpu.FunctionNumber), 10),
)
ch <- prometheus.MustNewConstMetric(
c.gpuSharedSystemMemorySize,
prometheus.GaugeValue,
float64(gpu.SharedSystemMemorySize),
luid,
)
ch <- prometheus.MustNewConstMetric(
c.gpuDedicatedSystemMemorySize,
prometheus.GaugeValue,
float64(gpu.DedicatedSystemMemorySize),
luid,
)
ch <- prometheus.MustNewConstMetric(
c.gpuDedicatedVideoMemorySize,
prometheus.GaugeValue,
float64(gpu.DedicatedVideoMemorySize),
luid,
)
}
return nil
}
func (c *Collector) collectGpuEngineMetrics(ch chan<- prometheus.Metric) error {
@@ -283,9 +338,14 @@ func (c *Collector) collectGpuEngineMetrics(ch chan<- prometheus.Metric) error {
for _, data := range c.gpuEnginePerfDataObject {
instance := parseGPUCounterInstanceString(data.Name)
if _, ok := c.gpuDeviceCache[instance.Luid]; !ok {
continue
}
key := PidPhysEngEngType{
Pid: instance.Pid,
Phys: instance.Phys,
Luid: instance.Luid,
Eng: instance.Eng,
Engtype: instance.Engtype,
}
@@ -297,7 +357,7 @@ func (c *Collector) collectGpuEngineMetrics(ch chan<- prometheus.Metric) error {
c.gpuEngineRunningTime,
prometheus.CounterValue,
runningTime,
key.Pid, key.Phys, key.Eng, key.Engtype,
key.Pid, key.Luid, key.Phys, key.Eng, key.Engtype,
)
}
@@ -317,8 +377,13 @@ func (c *Collector) collectGpuAdapterMemoryMetrics(ch chan<- prometheus.Metric)
for _, data := range c.gpuAdapterMemoryPerfDataObject {
instance := parseGPUCounterInstanceString(data.Name)
if _, ok := c.gpuDeviceCache[instance.Luid]; !ok {
continue
}
key := PidPhysEngEngType{
Pid: instance.Pid,
Luid: instance.Luid,
Phys: instance.Phys,
Eng: instance.Eng,
Engtype: instance.Engtype,
@@ -333,21 +398,21 @@ func (c *Collector) collectGpuAdapterMemoryMetrics(ch chan<- prometheus.Metric)
c.gpuAdapterMemoryDedicatedUsage,
prometheus.GaugeValue,
dedicatedUsage,
key.Phys,
key.Luid, key.Phys,
)
ch <- prometheus.MustNewConstMetric(
c.gpuAdapterMemorySharedUsage,
prometheus.GaugeValue,
sharedUsageMap[key],
key.Phys,
key.Luid, key.Phys,
)
ch <- prometheus.MustNewConstMetric(
c.gpuAdapterMemoryTotalCommitted,
prometheus.GaugeValue,
totalCommittedMap[key],
key.Phys,
key.Luid, key.Phys,
)
}
@@ -360,20 +425,29 @@ func (c *Collector) collectGpuLocalAdapterMemoryMetrics(ch chan<- prometheus.Met
return fmt.Errorf("failed to collect GPU Local Adapter Memory perf data: %w", err)
}
localAdapterMemoryMap := make(map[string]float64)
localAdapterMemoryMap := make(map[PidPhysEngEngType]float64)
for _, data := range c.gpuLocalAdapterMemoryPerfDataObject {
instance := parseGPUCounterInstanceString(data.Name)
localAdapterMemoryMap[instance.Phys] += data.LocalUsage
if _, ok := c.gpuDeviceCache[instance.Luid]; !ok {
continue
}
for phys, localUsage := range localAdapterMemoryMap {
key := PidPhysEngEngType{
Luid: instance.Luid,
Phys: instance.Phys,
}
localAdapterMemoryMap[key] += data.LocalUsage
}
for key, localUsage := range localAdapterMemoryMap {
ch <- prometheus.MustNewConstMetric(
c.gpuLocalAdapterMemoryUsage,
prometheus.GaugeValue,
localUsage,
phys,
key.Luid, key.Phys,
)
}
@@ -386,20 +460,28 @@ func (c *Collector) collectGpuNonLocalAdapterMemoryMetrics(ch chan<- prometheus.
return fmt.Errorf("failed to collect GPU Non Local Adapter Memory perf data: %w", err)
}
nonLocalAdapterMemoryMap := make(map[string]float64)
nonLocalAdapterMemoryMap := make(map[PidPhysEngEngType]float64)
for _, data := range c.gpuNonLocalAdapterMemoryPerfDataObject {
instance := parseGPUCounterInstanceString(data.Name)
nonLocalAdapterMemoryMap[instance.Phys] += data.NonLocalUsage
if _, ok := c.gpuDeviceCache[instance.Luid]; !ok {
continue
}
for phys, nonLocalUsage := range nonLocalAdapterMemoryMap {
key := PidPhysEngEngType{
Luid: instance.Luid,
Phys: instance.Phys,
}
nonLocalAdapterMemoryMap[key] += data.NonLocalUsage
}
for key, nonLocalUsage := range nonLocalAdapterMemoryMap {
ch <- prometheus.MustNewConstMetric(
c.gpuNonLocalAdapterMemoryUsage,
prometheus.GaugeValue,
nonLocalUsage,
phys,
key.Luid, key.Phys,
)
}
@@ -421,8 +503,13 @@ func (c *Collector) collectGpuProcessMemoryMetrics(ch chan<- prometheus.Metric)
for _, data := range c.gpuProcessMemoryPerfDataObject {
instance := parseGPUCounterInstanceString(data.Name)
if _, ok := c.gpuDeviceCache[instance.Luid]; !ok {
continue
}
key := PidPhys{
Pid: instance.Pid,
Luid: instance.Luid,
Phys: instance.Phys,
}
processDedicatedUsageMap[key] += data.DedicatedUsage
@@ -437,35 +524,35 @@ func (c *Collector) collectGpuProcessMemoryMetrics(ch chan<- prometheus.Metric)
c.gpuProcessMemoryDedicatedUsage,
prometheus.GaugeValue,
dedicatedUsage,
key.Pid, key.Phys,
key.Pid, key.Luid, key.Phys,
)
ch <- prometheus.MustNewConstMetric(
c.gpuProcessMemoryLocalUsage,
prometheus.GaugeValue,
processLocalUsageMap[key],
key.Pid, key.Phys,
key.Pid, key.Luid, key.Phys,
)
ch <- prometheus.MustNewConstMetric(
c.gpuProcessMemoryNonLocalUsage,
prometheus.GaugeValue,
processNonLocalUsageMap[key],
key.Pid, key.Phys,
key.Pid, key.Luid, key.Phys,
)
ch <- prometheus.MustNewConstMetric(
c.gpuProcessMemorySharedUsage,
prometheus.GaugeValue,
processSharedUsageMap[key],
key.Pid, key.Phys,
key.Pid, key.Luid, key.Phys,
)
ch <- prometheus.MustNewConstMetric(
c.gpuProcessMemoryTotalCommitted,
prometheus.GaugeValue,
processTotalCommittedMap[key],
key.Pid, key.Phys,
key.Pid, key.Luid, key.Phys,
)
}

View File

@@ -18,12 +18,13 @@
package gpu
import (
"fmt"
"strings"
)
type Instance struct {
Pid string
Luid [2]string
Luid string
Phys string
Eng string
Engtype string
@@ -32,11 +33,13 @@ type Instance struct {
type PidPhys struct {
Pid string
Luid string
Phys string
}
type PidPhysEngEngType struct {
Pid string
Luid string
Phys string
Eng string
Engtype string
@@ -58,8 +61,7 @@ func parseGPUCounterInstanceString(s string) Instance {
}
case "luid":
if i+2 < len(parts) {
instance.Luid[0] = parts[i+1]
instance.Luid[1] = parts[i+2]
instance.Luid = fmt.Sprintf("%s_%s", parts[i+1], parts[i+2])
}
case "phys":
if i+1 < len(parts) {

View File

@@ -0,0 +1,192 @@
// SPDX-License-Identifier: Apache-2.0
//
// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build windows
package gdi32
import (
"errors"
"fmt"
"unsafe"
"golang.org/x/sys/windows"
)
// https://learn.microsoft.com/en-us/windows-hardware/drivers/ddi/d3dkmthk/ne-d3dkmthk-_kmtqueryadapterinfotype
// https://github.com/nalilord/AMDPlugin/blob/bb405b6d58ea543ff630f3488384473bee79f447/Common/d3dkmthk.pas#L54
const (
// KMTQAITYPE_GETSEGMENTSIZE pPrivateDriverData points to a D3DKMT_SEGMENTSIZEINFO structure that contains information about the size of memory and aperture segments.
KMTQAITYPE_GETSEGMENTSIZE = 3
// KMTQAITYPE_ADAPTERADDRESS pPrivateDriverData points to a D3DKMT_ADAPTERADDRESS structure that contains information about the physical location on the PCI bus of the adapter.
KMTQAITYPE_ADAPTERADDRESS = 6
// KMTQAITYPE_ADAPTERREGISTRYINFO pPrivateDriverData points to a D3DKMT_ADAPTERREGISTRYINFO structure that contains registry information about the graphics adapter.
KMTQAITYPE_ADAPTERREGISTRYINFO = 8
)
var ErrNoGPUDevices = errors.New("no GPU devices found")
func GetGPUDeviceByLUID(adapterLUID windows.LUID) (GPUDevice, error) {
open := D3DKMT_OPENADAPTERFROMLUID{
AdapterLUID: adapterLUID,
}
if err := D3DKMTOpenAdapterFromLuid(&open); err != nil {
return GPUDevice{}, fmt.Errorf("D3DKMTOpenAdapterFromLuid failed: %w", err)
}
errs := make([]error, 0)
gpuDevice, err := GetGPUDevice(open.HAdapter)
if err != nil {
errs = append(errs, fmt.Errorf("GetGPUDevice failed: %w", err))
}
if err := D3DKMTCloseAdapter(&D3DKMT_CLOSEADAPTER{
HAdapter: open.HAdapter,
}); err != nil {
errs = append(errs, fmt.Errorf("D3DKMTCloseAdapter failed: %w", err))
}
if len(errs) > 0 {
return gpuDevice, fmt.Errorf("errors occurred while getting GPU device: %w", errors.Join(errs...))
}
gpuDevice.LUID = adapterLUID
return gpuDevice, nil
}
func GetGPUDevice(hAdapter D3DKMT_HANDLE) (GPUDevice, error) {
var gpuDevice GPUDevice
// Try segment size first
var size D3DKMT_SEGMENTSIZEINFO
query := D3DKMT_QUERYADAPTERINFO{
hAdapter: hAdapter,
queryType: KMTQAITYPE_GETSEGMENTSIZE,
pPrivateDriverData: unsafe.Pointer(&size),
privateDriverDataSize: uint32(unsafe.Sizeof(size)),
}
if err := D3DKMTQueryAdapterInfo(&query); err != nil {
return gpuDevice, fmt.Errorf("D3DKMTQueryAdapterInfo (segment size) failed: %w", err)
}
gpuDevice.DedicatedVideoMemorySize = size.DedicatedVideoMemorySize
gpuDevice.DedicatedSystemMemorySize = size.DedicatedSystemMemorySize
gpuDevice.SharedSystemMemorySize = size.SharedSystemMemorySize
// Now try registry info
var address D3DKMT_ADAPTERADDRESS
query.queryType = KMTQAITYPE_ADAPTERADDRESS
query.pPrivateDriverData = unsafe.Pointer(&address)
query.privateDriverDataSize = uint32(unsafe.Sizeof(address))
if err := D3DKMTQueryAdapterInfo(&query); err != nil {
return gpuDevice, fmt.Errorf("D3DKMTQueryAdapterInfo (adapter address) failed: %w", err)
}
gpuDevice.BusNumber = address.BusNumber
gpuDevice.DeviceNumber = address.DeviceNumber
gpuDevice.FunctionNumber = address.FunctionNumber
// Now try registry info
var info D3DKMT_ADAPTERREGISTRYINFO
query.queryType = KMTQAITYPE_ADAPTERREGISTRYINFO
query.pPrivateDriverData = unsafe.Pointer(&info)
query.privateDriverDataSize = uint32(unsafe.Sizeof(info))
if err := D3DKMTQueryAdapterInfo(&query); err != nil && !errors.Is(err, windows.ERROR_FILE_NOT_FOUND) {
return gpuDevice, fmt.Errorf("D3DKMTQueryAdapterInfo (info) failed: %w", err)
}
gpuDevice.AdapterString = windows.UTF16ToString(info.AdapterString[:])
return gpuDevice, nil
}
func GetGPUDevices() ([]GPUDevice, error) {
gpuDevices := make([]GPUDevice, 0, 2)
// First call: Get the number of adapters
enumAdapters := D3DKMT_ENUMADAPTERS2{
NumAdapters: 0,
PAdapters: nil,
}
if err := D3DKMTEnumAdapters2(&enumAdapters); err != nil {
return gpuDevices, fmt.Errorf("D3DKMTEnumAdapters2 (get count) failed: %w", err)
}
if enumAdapters.NumAdapters == 0 {
return gpuDevices, ErrNoGPUDevices
}
// Second call: Get the actual adapter information
pAdapters := make([]D3DKMT_ADAPTERINFO, enumAdapters.NumAdapters)
enumAdapters.PAdapters = &pAdapters[0]
if err := D3DKMTEnumAdapters2(&enumAdapters); err != nil {
return gpuDevices, fmt.Errorf("D3DKMTEnumAdapters2 (get adapters) failed: %w", err)
}
var errs []error
// Process each adapter
for i := range enumAdapters.NumAdapters {
adapter := pAdapters[i]
// Validate handle before using it
if adapter.HAdapter == 0 {
errs = append(errs, fmt.Errorf("adapter %d has null handle", i))
continue
}
func() {
defer func() {
if closeErr := D3DKMTCloseAdapter(&D3DKMT_CLOSEADAPTER{
HAdapter: adapter.HAdapter,
}); closeErr != nil {
errs = append(errs, fmt.Errorf("failed to close adapter %v: %w", adapter.AdapterLUID, closeErr))
}
}()
gpuDevice, err := GetGPUDevice(adapter.HAdapter)
if err != nil {
errs = append(errs, fmt.Errorf("failed to get GPU device for adapter %v: %w", adapter.AdapterLUID, err))
return
}
gpuDevice.LUID = adapter.AdapterLUID
gpuDevices = append(gpuDevices, gpuDevice)
}()
}
if len(errs) > 0 {
return gpuDevices, errors.Join(errs...)
}
if len(gpuDevices) == 0 {
return gpuDevices, ErrNoGPUDevices
}
return gpuDevices, nil
}

View File

@@ -13,19 +13,17 @@
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build windows
package setupapi_test
package gdi32_test
import (
"testing"
"github.com/prometheus-community/windows_exporter/internal/headers/setupapi"
"github.com/prometheus-community/windows_exporter/internal/headers/gdi32"
"github.com/stretchr/testify/require"
)
func TestGetGPUDevices(t *testing.T) {
devices, err := setupapi.GetGPUDevices()
devices, err := gdi32.GetGPUDevices()
require.NoError(t, err, "Failed to get GPU devices")
require.NotNil(t, devices)

View File

@@ -0,0 +1,77 @@
// SPDX-License-Identifier: Apache-2.0
//
// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package gdi32
import (
"fmt"
"unsafe"
"github.com/prometheus-community/windows_exporter/internal/headers/ntdll"
"golang.org/x/sys/windows"
)
//nolint:gochecknoglobals
var (
modGdi32 = windows.NewLazySystemDLL("gdi32.dll")
procD3DKMTOpenAdapterFromLuid = modGdi32.NewProc("D3DKMTOpenAdapterFromLuid")
procD3DKMTQueryAdapterInfo = modGdi32.NewProc("D3DKMTQueryAdapterInfo")
procD3DKMTCloseAdapter = modGdi32.NewProc("D3DKMTCloseAdapter")
procD3DKMTEnumAdapters2 = modGdi32.NewProc("D3DKMTEnumAdapters2")
)
func D3DKMTOpenAdapterFromLuid(ptr *D3DKMT_OPENADAPTERFROMLUID) error {
ret, _, _ := procD3DKMTOpenAdapterFromLuid.Call(
uintptr(unsafe.Pointer(ptr)),
)
if ret != 0 {
return fmt.Errorf("D3DKMTOpenAdapterFromLuid failed: 0x%X: %w", ret, ntdll.RtlNtStatusToDosError(ret))
}
return nil
}
func D3DKMTEnumAdapters2(ptr *D3DKMT_ENUMADAPTERS2) error {
ret, _, _ := procD3DKMTEnumAdapters2.Call(
uintptr(unsafe.Pointer(ptr)),
)
if ret != 0 {
return fmt.Errorf("D3DKMTEnumAdapters2 failed: 0x%X: %w", ret, ntdll.RtlNtStatusToDosError(ret))
}
return nil
}
func D3DKMTQueryAdapterInfo(query *D3DKMT_QUERYADAPTERINFO) error {
ret, _, _ := procD3DKMTQueryAdapterInfo.Call(
uintptr(unsafe.Pointer(query)),
)
if ret != 0 {
return fmt.Errorf("D3DKMTQueryAdapterInfo failed: 0x%X: %w", ret, ntdll.RtlNtStatusToDosError(ret))
}
return nil
}
func D3DKMTCloseAdapter(ptr *D3DKMT_CLOSEADAPTER) error {
ret, _, _ := procD3DKMTCloseAdapter.Call(
uintptr(unsafe.Pointer(ptr)),
)
if ret != 0 {
return fmt.Errorf("D3DKMTCloseAdapter failed: 0x%X: %w", ret, ntdll.RtlNtStatusToDosError(ret))
}
return nil
}

View File

@@ -0,0 +1,85 @@
// SPDX-License-Identifier: Apache-2.0
//
// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build windows
package gdi32
import (
"unsafe"
"github.com/prometheus-community/windows_exporter/internal/headers/win32"
"golang.org/x/sys/windows"
)
type D3DKMT_HANDLE = win32.UINT
type D3DKMT_OPENADAPTERFROMLUID struct {
AdapterLUID windows.LUID
HAdapter D3DKMT_HANDLE
}
type D3DKMT_CLOSEADAPTER struct {
HAdapter D3DKMT_HANDLE
}
type D3DKMT_QUERYADAPTERINFO struct {
hAdapter D3DKMT_HANDLE
queryType int32
pPrivateDriverData unsafe.Pointer
privateDriverDataSize uint32
}
type D3DKMT_ENUMADAPTERS2 struct {
NumAdapters uint32
PAdapters *D3DKMT_ADAPTERINFO
}
type D3DKMT_ADAPTERINFO struct {
HAdapter D3DKMT_HANDLE
AdapterLUID windows.LUID
NumOfSources win32.ULONG
Present win32.BOOL
}
type D3DKMT_ADAPTERREGISTRYINFO struct {
AdapterString [win32.MAX_PATH]uint16
BiosString [win32.MAX_PATH]uint16
DacType [win32.MAX_PATH]uint16
ChipType [win32.MAX_PATH]uint16
}
type D3DKMT_SEGMENTSIZEINFO struct {
DedicatedVideoMemorySize uint64
DedicatedSystemMemorySize uint64
SharedSystemMemorySize uint64
}
type D3DKMT_ADAPTERADDRESS struct {
BusNumber win32.UINT
DeviceNumber win32.UINT
FunctionNumber win32.UINT
}
type GPUDevice struct {
AdapterString string
LUID windows.LUID
DedicatedVideoMemorySize uint64
DedicatedSystemMemorySize uint64
SharedSystemMemorySize uint64
BusNumber win32.UINT
DeviceNumber win32.UINT
FunctionNumber win32.UINT
}

View File

@@ -15,7 +15,7 @@
//go:build windows
package setupapi
package ntdll
import (
"golang.org/x/sys/windows"
@@ -23,9 +23,15 @@ import (
//nolint:gochecknoglobals
var (
modSetupAPI = windows.NewLazySystemDLL("setupapi.dll")
procSetupDiGetClassDevsW = modSetupAPI.NewProc("SetupDiGetClassDevsW")
procSetupDiEnumDeviceInfo = modSetupAPI.NewProc("SetupDiEnumDeviceInfo")
procSetupDiGetDeviceRegistryPropertyW = modSetupAPI.NewProc("SetupDiGetDeviceRegistryPropertyW")
procSetupDiDestroyDeviceInfoList = modSetupAPI.NewProc("SetupDiDestroyDeviceInfoList")
modNtdll = windows.NewLazySystemDLL("ntdll.dll")
procRtlNtStatusToDosError = modNtdll.NewProc("RtlNtStatusToDosError")
)
func RtlNtStatusToDosError(status uintptr) error {
ret, _, _ := procRtlNtStatusToDosError.Call(status)
if ret == 0 {
return nil
}
return windows.Errno(ret)
}

View File

@@ -1,135 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
//
// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build windows
package setupapi
import (
"sync"
"unsafe"
"golang.org/x/sys/windows"
)
//nolint:gochecknoglobals
var GUID_DISPLAY_ADAPTER = sync.OnceValue(func() *windows.GUID {
return &windows.GUID{
Data1: 0x4d36e968,
Data2: 0xe325,
Data3: 0x11ce,
Data4: [8]byte{0xbf, 0xc1, 0x08, 0x00, 0x2b, 0xe1, 0x03, 0x18},
}
})
func GetGPUDevices() ([]GPUDevice, error) {
hDevInfo, _, err := procSetupDiGetClassDevsW.Call(
uintptr(unsafe.Pointer(GUID_DISPLAY_ADAPTER())),
0,
0,
DIGCF_PRESENT,
)
if windows.Handle(hDevInfo) == windows.InvalidHandle {
return nil, err
}
var (
devices []GPUDevice
deviceData SP_DEVINFO_DATA
propertyBuffer [256]uint16
)
deviceData.CbSize = uint32(unsafe.Sizeof(deviceData))
for i := 0; ; i++ {
ret, _, _ := procSetupDiEnumDeviceInfo.Call(hDevInfo, uintptr(i), uintptr(unsafe.Pointer(&deviceData)))
if ret == 0 {
break // No more devices
}
ret, _, _ = procSetupDiGetDeviceRegistryPropertyW.Call(
hDevInfo,
uintptr(unsafe.Pointer(&deviceData)),
uintptr(SPDRP_DEVICEDESC),
0,
uintptr(unsafe.Pointer(&propertyBuffer[0])),
uintptr(len(propertyBuffer)*2),
0,
)
gpuDevice := GPUDevice{}
if ret == 0 {
gpuDevice.DeviceDesc = ""
} else {
gpuDevice.DeviceDesc = windows.UTF16ToString(propertyBuffer[:])
}
ret, _, _ = procSetupDiGetDeviceRegistryPropertyW.Call(
hDevInfo,
uintptr(unsafe.Pointer(&deviceData)),
uintptr(SPDRP_FRIENDLYNAME),
0,
uintptr(unsafe.Pointer(&propertyBuffer[0])),
uintptr(len(propertyBuffer)*2),
0,
)
if ret == 0 {
gpuDevice.FriendlyName = ""
} else {
gpuDevice.FriendlyName = windows.UTF16ToString(propertyBuffer[:])
}
ret, _, _ = procSetupDiGetDeviceRegistryPropertyW.Call(
hDevInfo,
uintptr(unsafe.Pointer(&deviceData)),
uintptr(SPDRP_HARDWAREID),
0,
uintptr(unsafe.Pointer(&propertyBuffer[0])),
uintptr(len(propertyBuffer)*2),
0,
)
if ret == 0 {
gpuDevice.HardwareID = "unknown"
} else {
gpuDevice.HardwareID = windows.UTF16ToString(propertyBuffer[:])
}
ret, _, _ = procSetupDiGetDeviceRegistryPropertyW.Call(
hDevInfo,
uintptr(unsafe.Pointer(&deviceData)),
uintptr(SPDRP_PHYSICAL_DEVICE_OBJECT_NAME),
0,
uintptr(unsafe.Pointer(&propertyBuffer[0])),
uintptr(len(propertyBuffer)*2),
0,
)
if ret == 0 {
gpuDevice.PhysicalDeviceObjectName = "unknown"
} else {
gpuDevice.PhysicalDeviceObjectName = windows.UTF16ToString(propertyBuffer[:])
}
devices = append(devices, gpuDevice)
}
_, _, _ = procSetupDiDestroyDeviceInfoList.Call(hDevInfo)
return devices, nil
}

View File

@@ -1,42 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
//
// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build windows
package setupapi
import "golang.org/x/sys/windows"
const (
DIGCF_PRESENT = 0x00000002
SPDRP_DEVICEDESC = 0x00000000
SPDRP_FRIENDLYNAME = 0x0000000C
SPDRP_HARDWAREID = 0x00000001
SPDRP_PHYSICAL_DEVICE_OBJECT_NAME = 0x0000000E
)
type SP_DEVINFO_DATA struct {
CbSize uint32
ClassGuid windows.GUID
DevInst uint32
_ uintptr // Reserved
}
type GPUDevice struct {
DeviceDesc string
FriendlyName string
HardwareID string
PhysicalDeviceObjectName string
}

View File

@@ -23,12 +23,17 @@ import (
"golang.org/x/sys/windows"
)
const MAX_PATH = 260
type (
BOOL = int32 // BOOL is a 32-bit signed int in Win32
DATE_TIME = windows.Filetime
DWORD = uint32
LPWSTR struct {
*uint16
}
ULONG = uint32 // ULONG is a 32-bit unsigned int in Win32
UINT = uint32 // UINT is a 32-bit unsigned int in Win32
)
// NewLPWSTR creates a new LPWSTR from a string.