mirror of
https://github.com/prometheus-community/windows_exporter.git
synced 2026-02-08 05:56:37 +00:00
gpu: add info metric about devices (#2070)
This commit is contained in:
1
.idea/dictionaries/project.xml
generated
1
.idea/dictionaries/project.xml
generated
@@ -2,6 +2,7 @@
|
||||
<dictionary name="project">
|
||||
<words>
|
||||
<w>containerd</w>
|
||||
<w>setupapi</w>
|
||||
<w>spdx</w>
|
||||
</words>
|
||||
</dictionary>
|
||||
|
||||
@@ -20,24 +20,25 @@ These metrics are available on supported versions of Windows with compatible GPU
|
||||
|
||||
### Adapter-level Metrics
|
||||
|
||||
| Name | Description | Type | Labels |
|
||||
|----------------------------------------------|----------------------------------------------------------|-------|--------|
|
||||
| `windows_gpu_adapter_memory_committed_bytes` | Total committed GPU memory in bytes per physical GPU | gauge | `phys` |
|
||||
| `windows_gpu_adapter_memory_dedicated_bytes` | Dedicated GPU memory usage in bytes per physical GPU | gauge | `phys` |
|
||||
| `windows_gpu_adapter_memory_shared_bytes` | Shared GPU memory usage in bytes per physical GPU | gauge | `phys` |
|
||||
| `windows_gpu_local_adapter_memory_bytes` | Local adapter memory usage in bytes per physical GPU | gauge | `phys` |
|
||||
| `windows_gpu_non_local_adapter_memory_bytes` | Non-local adapter memory usage in bytes per physical GPU | gauge | `phys` |
|
||||
| Name | Description | Type | Labels |
|
||||
|----------------------------------------------|-------------------------------------------------------------------------|-------|--------------------------------------------------------------------------------------|
|
||||
| `windows_gpu_adapter_memory_committed_bytes` | Total committed GPU memory in bytes per physical GPU | gauge | `phys` |
|
||||
| `windows_gpu_adapter_memory_dedicated_bytes` | Dedicated GPU memory usage in bytes per physical GPU | gauge | `phys` |
|
||||
| `windows_gpu_adapter_memory_shared_bytes` | Shared GPU memory usage in bytes per physical GPU | gauge | `phys` |
|
||||
| `windows_gpu_info` | A metric with a constant '1' value labeled with gpu device information. | gauge | `phys`, `physical_device_object_name`, `hardware_id`, `friendly_name`, `description` |
|
||||
| `windows_gpu_local_adapter_memory_bytes` | Local adapter memory usage in bytes per physical GPU | gauge | `phys` |
|
||||
| `windows_gpu_non_local_adapter_memory_bytes` | Non-local adapter memory usage in bytes per physical GPU | gauge | `phys` |
|
||||
|
||||
### Per-process Metrics
|
||||
|
||||
| Name | Description | Type | Labels |
|
||||
|----------------------------------------------|-------------------------------------------------|---------|----------------------------------------|
|
||||
| `windows_gpu_engine_time_seconds` | Total running time of the GPU engine in seconds | counter | `phys`, `eng`, `engtype`, `process_id` |
|
||||
| `windows_gpu_process_memory_committed_bytes` | Total committed GPU memory in bytes per process | gauge | `phys`,`process_id` |
|
||||
| `windows_gpu_process_memory_dedicated_bytes` | Dedicated GPU memory usage in bytes per process | gauge | `phys`,`process_id` |
|
||||
| `windows_gpu_process_memory_local_bytes` | Local GPU memory usage in bytes per process | gauge | `phys`,`process_id` |
|
||||
| `windows_gpu_process_memory_non_local_bytes` | Non-local GPU memory usage in bytes per process | gauge | `phys`,`process_id` |
|
||||
| `windows_gpu_process_memory_shared_bytes` | Shared GPU memory usage in bytes per process | gauge | `phys`,`process_id` |
|
||||
| Name | Description | Type | Labels |
|
||||
|----------------------------------------------|-------------------------------------------------------------------------|---------|--------------------------------------------------------------------------------------|
|
||||
| `windows_gpu_engine_time_seconds` | Total running time of the GPU engine in seconds | counter | `phys`, `eng`, `engtype`, `process_id` |
|
||||
| `windows_gpu_process_memory_committed_bytes` | Total committed GPU memory in bytes per process | gauge | `phys`,`process_id` |
|
||||
| `windows_gpu_process_memory_dedicated_bytes` | Dedicated GPU memory usage in bytes per process | gauge | `phys`,`process_id` |
|
||||
| `windows_gpu_process_memory_local_bytes` | Local GPU memory usage in bytes per process | gauge | `phys`,`process_id` |
|
||||
| `windows_gpu_process_memory_non_local_bytes` | Non-local GPU memory usage in bytes per process | gauge | `phys`,`process_id` |
|
||||
| `windows_gpu_process_memory_shared_bytes` | Shared GPU memory usage in bytes per process | gauge | `phys`,`process_id` |
|
||||
|
||||
## Metric Labels
|
||||
|
||||
@@ -50,6 +51,12 @@ These metrics are available on supported versions of Windows with compatible GPU
|
||||
|
||||
These are basic queries to help you get started with GPU monitoring on Windows using Prometheus.
|
||||
|
||||
**Show GPU information for a specific physical GPU (0):**
|
||||
|
||||
```promql
|
||||
windows_gpu_info{description="NVIDIA GeForce GTX 1070",friendly_name="",hardware_id="PCI\\VEN_10DE&DEV_1B81&SUBSYS_61733842&REV_A1",phys="0",physical_device_object_name="\\Device\\NTPNP_PCI0027"} 1
|
||||
```
|
||||
|
||||
**Show total dedicated GPU memory (in bytes) usage on GPU 0:**
|
||||
|
||||
```promql
|
||||
|
||||
@@ -21,8 +21,10 @@ import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"strconv"
|
||||
|
||||
"github.com/alecthomas/kingpin/v2"
|
||||
"github.com/prometheus-community/windows_exporter/internal/headers/setupapi"
|
||||
"github.com/prometheus-community/windows_exporter/internal/mi"
|
||||
"github.com/prometheus-community/windows_exporter/internal/pdh"
|
||||
"github.com/prometheus-community/windows_exporter/internal/types"
|
||||
@@ -43,6 +45,7 @@ type Collector struct {
|
||||
gpuEnginePerfDataCollector *pdh.Collector
|
||||
gpuEnginePerfDataObject []gpuEnginePerfDataCounterValues
|
||||
|
||||
gpuInfo *prometheus.Desc
|
||||
gpuEngineRunningTime *prometheus.Desc
|
||||
|
||||
// GPU Adapter Memory
|
||||
@@ -109,6 +112,13 @@ func (c *Collector) Close() error {
|
||||
func (c *Collector) Build(_ *slog.Logger, _ *mi.Session) error {
|
||||
var err error
|
||||
|
||||
c.gpuInfo = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(types.Namespace, Name, "info"),
|
||||
"A metric with a constant '1' value labeled with gpu device information.",
|
||||
[]string{"phys", "physical_device_object_name", "hardware_id", "friendly_name", "description"},
|
||||
nil,
|
||||
)
|
||||
|
||||
c.gpuEngineRunningTime = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(types.Namespace, Name, "engine_time_seconds"),
|
||||
"Total running time of the GPU in seconds.",
|
||||
@@ -213,6 +223,10 @@ func (c *Collector) Build(_ *slog.Logger, _ *mi.Session) error {
|
||||
func (c *Collector) Collect(ch chan<- prometheus.Metric) error {
|
||||
errs := make([]error, 0)
|
||||
|
||||
if err := c.collectGpuInfo(ch); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
|
||||
if err := c.collectGpuEngineMetrics(ch); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
@@ -236,6 +250,28 @@ func (c *Collector) Collect(ch chan<- prometheus.Metric) error {
|
||||
return errors.Join(errs...)
|
||||
}
|
||||
|
||||
func (c *Collector) collectGpuInfo(ch chan<- prometheus.Metric) error {
|
||||
gpus, err := setupapi.GetGPUDevices()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get GPU devices: %w", err)
|
||||
}
|
||||
|
||||
for i, gpu := range gpus {
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
c.gpuInfo,
|
||||
prometheus.GaugeValue,
|
||||
1.0,
|
||||
strconv.Itoa(i),
|
||||
gpu.PhysicalDeviceObjectName,
|
||||
gpu.HardwareID,
|
||||
gpu.FriendlyName,
|
||||
gpu.DeviceDesc,
|
||||
)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *Collector) collectGpuEngineMetrics(ch chan<- prometheus.Metric) error {
|
||||
// Collect the GPU Engine perf data.
|
||||
if err := c.gpuEnginePerfDataCollector.Collect(&c.gpuEnginePerfDataObject); err != nil {
|
||||
|
||||
135
internal/headers/setupapi/gpu.go
Normal file
135
internal/headers/setupapi/gpu.go
Normal file
@@ -0,0 +1,135 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
// Copyright The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//go:build windows
|
||||
|
||||
package setupapi
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"unsafe"
|
||||
|
||||
"golang.org/x/sys/windows"
|
||||
)
|
||||
|
||||
//nolint:gochecknoglobals
|
||||
var GUID_DISPLAY_ADAPTER = sync.OnceValue(func() *windows.GUID {
|
||||
return &windows.GUID{
|
||||
Data1: 0x4d36e968,
|
||||
Data2: 0xe325,
|
||||
Data3: 0x11ce,
|
||||
Data4: [8]byte{0xbf, 0xc1, 0x08, 0x00, 0x2b, 0xe1, 0x03, 0x18},
|
||||
}
|
||||
})
|
||||
|
||||
func GetGPUDevices() ([]GPUDevice, error) {
|
||||
hDevInfo, _, err := procSetupDiGetClassDevsW.Call(
|
||||
uintptr(unsafe.Pointer(GUID_DISPLAY_ADAPTER())),
|
||||
0,
|
||||
0,
|
||||
DIGCF_PRESENT,
|
||||
)
|
||||
|
||||
if windows.Handle(hDevInfo) == windows.InvalidHandle {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var (
|
||||
devices []GPUDevice
|
||||
deviceData SP_DEVINFO_DATA
|
||||
propertyBuffer [256]uint16
|
||||
)
|
||||
|
||||
deviceData.CbSize = uint32(unsafe.Sizeof(deviceData))
|
||||
|
||||
for i := 0; ; i++ {
|
||||
ret, _, _ := procSetupDiEnumDeviceInfo.Call(hDevInfo, uintptr(i), uintptr(unsafe.Pointer(&deviceData)))
|
||||
if ret == 0 {
|
||||
break // No more devices
|
||||
}
|
||||
|
||||
ret, _, _ = procSetupDiGetDeviceRegistryPropertyW.Call(
|
||||
hDevInfo,
|
||||
uintptr(unsafe.Pointer(&deviceData)),
|
||||
uintptr(SPDRP_DEVICEDESC),
|
||||
0,
|
||||
uintptr(unsafe.Pointer(&propertyBuffer[0])),
|
||||
uintptr(len(propertyBuffer)*2),
|
||||
0,
|
||||
)
|
||||
|
||||
gpuDevice := GPUDevice{}
|
||||
|
||||
if ret == 0 {
|
||||
gpuDevice.DeviceDesc = ""
|
||||
} else {
|
||||
gpuDevice.DeviceDesc = windows.UTF16ToString(propertyBuffer[:])
|
||||
}
|
||||
|
||||
ret, _, _ = procSetupDiGetDeviceRegistryPropertyW.Call(
|
||||
hDevInfo,
|
||||
uintptr(unsafe.Pointer(&deviceData)),
|
||||
uintptr(SPDRP_FRIENDLYNAME),
|
||||
0,
|
||||
uintptr(unsafe.Pointer(&propertyBuffer[0])),
|
||||
uintptr(len(propertyBuffer)*2),
|
||||
0,
|
||||
)
|
||||
|
||||
if ret == 0 {
|
||||
gpuDevice.FriendlyName = ""
|
||||
} else {
|
||||
gpuDevice.FriendlyName = windows.UTF16ToString(propertyBuffer[:])
|
||||
}
|
||||
|
||||
ret, _, _ = procSetupDiGetDeviceRegistryPropertyW.Call(
|
||||
hDevInfo,
|
||||
uintptr(unsafe.Pointer(&deviceData)),
|
||||
uintptr(SPDRP_HARDWAREID),
|
||||
0,
|
||||
uintptr(unsafe.Pointer(&propertyBuffer[0])),
|
||||
uintptr(len(propertyBuffer)*2),
|
||||
0,
|
||||
)
|
||||
|
||||
if ret == 0 {
|
||||
gpuDevice.HardwareID = "unknown"
|
||||
} else {
|
||||
gpuDevice.HardwareID = windows.UTF16ToString(propertyBuffer[:])
|
||||
}
|
||||
|
||||
ret, _, _ = procSetupDiGetDeviceRegistryPropertyW.Call(
|
||||
hDevInfo,
|
||||
uintptr(unsafe.Pointer(&deviceData)),
|
||||
uintptr(SPDRP_PHYSICAL_DEVICE_OBJECT_NAME),
|
||||
0,
|
||||
uintptr(unsafe.Pointer(&propertyBuffer[0])),
|
||||
uintptr(len(propertyBuffer)*2),
|
||||
0,
|
||||
)
|
||||
|
||||
if ret == 0 {
|
||||
gpuDevice.PhysicalDeviceObjectName = "unknown"
|
||||
} else {
|
||||
gpuDevice.PhysicalDeviceObjectName = windows.UTF16ToString(propertyBuffer[:])
|
||||
}
|
||||
|
||||
devices = append(devices, gpuDevice)
|
||||
}
|
||||
|
||||
_, _, _ = procSetupDiDestroyDeviceInfoList.Call(hDevInfo)
|
||||
|
||||
return devices, nil
|
||||
}
|
||||
32
internal/headers/setupapi/gpu_test.go
Normal file
32
internal/headers/setupapi/gpu_test.go
Normal file
@@ -0,0 +1,32 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
// Copyright The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//go:build windows
|
||||
|
||||
package setupapi_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/prometheus-community/windows_exporter/internal/headers/setupapi"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestGetGPUDevices(t *testing.T) {
|
||||
devices, err := setupapi.GetGPUDevices()
|
||||
require.NoError(t, err, "Failed to get GPU devices")
|
||||
|
||||
require.NotNil(t, devices)
|
||||
}
|
||||
31
internal/headers/setupapi/setupapi.go
Normal file
31
internal/headers/setupapi/setupapi.go
Normal file
@@ -0,0 +1,31 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
// Copyright The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//go:build windows
|
||||
|
||||
package setupapi
|
||||
|
||||
import (
|
||||
"golang.org/x/sys/windows"
|
||||
)
|
||||
|
||||
//nolint:gochecknoglobals
|
||||
var (
|
||||
modSetupAPI = windows.NewLazySystemDLL("setupapi.dll")
|
||||
procSetupDiGetClassDevsW = modSetupAPI.NewProc("SetupDiGetClassDevsW")
|
||||
procSetupDiEnumDeviceInfo = modSetupAPI.NewProc("SetupDiEnumDeviceInfo")
|
||||
procSetupDiGetDeviceRegistryPropertyW = modSetupAPI.NewProc("SetupDiGetDeviceRegistryPropertyW")
|
||||
procSetupDiDestroyDeviceInfoList = modSetupAPI.NewProc("SetupDiDestroyDeviceInfoList")
|
||||
)
|
||||
42
internal/headers/setupapi/types.go
Normal file
42
internal/headers/setupapi/types.go
Normal file
@@ -0,0 +1,42 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
// Copyright The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//go:build windows
|
||||
|
||||
package setupapi
|
||||
|
||||
import "golang.org/x/sys/windows"
|
||||
|
||||
const (
|
||||
DIGCF_PRESENT = 0x00000002
|
||||
SPDRP_DEVICEDESC = 0x00000000
|
||||
SPDRP_FRIENDLYNAME = 0x0000000C
|
||||
SPDRP_HARDWAREID = 0x00000001
|
||||
SPDRP_PHYSICAL_DEVICE_OBJECT_NAME = 0x0000000E
|
||||
)
|
||||
|
||||
type SP_DEVINFO_DATA struct {
|
||||
CbSize uint32
|
||||
ClassGuid windows.GUID
|
||||
DevInst uint32
|
||||
_ uintptr // Reserved
|
||||
}
|
||||
|
||||
type GPUDevice struct {
|
||||
DeviceDesc string
|
||||
FriendlyName string
|
||||
HardwareID string
|
||||
PhysicalDeviceObjectName string
|
||||
}
|
||||
Reference in New Issue
Block a user