diff --git a/.idea/dictionaries/project.xml b/.idea/dictionaries/project.xml index 486bc5cc..3554b8d7 100644 --- a/.idea/dictionaries/project.xml +++ b/.idea/dictionaries/project.xml @@ -2,6 +2,7 @@ containerd + setupapi spdx diff --git a/docs/collector.gpu.md b/docs/collector.gpu.md index d9e3b9cc..a69d5e54 100644 --- a/docs/collector.gpu.md +++ b/docs/collector.gpu.md @@ -20,24 +20,25 @@ These metrics are available on supported versions of Windows with compatible GPU ### Adapter-level Metrics -| Name | Description | Type | Labels | -|----------------------------------------------|----------------------------------------------------------|-------|--------| -| `windows_gpu_adapter_memory_committed_bytes` | Total committed GPU memory in bytes per physical GPU | gauge | `phys` | -| `windows_gpu_adapter_memory_dedicated_bytes` | Dedicated GPU memory usage in bytes per physical GPU | gauge | `phys` | -| `windows_gpu_adapter_memory_shared_bytes` | Shared GPU memory usage in bytes per physical GPU | gauge | `phys` | -| `windows_gpu_local_adapter_memory_bytes` | Local adapter memory usage in bytes per physical GPU | gauge | `phys` | -| `windows_gpu_non_local_adapter_memory_bytes` | Non-local adapter memory usage in bytes per physical GPU | gauge | `phys` | +| Name | Description | Type | Labels | +|----------------------------------------------|-------------------------------------------------------------------------|-------|--------------------------------------------------------------------------------------| +| `windows_gpu_adapter_memory_committed_bytes` | Total committed GPU memory in bytes per physical GPU | gauge | `phys` | +| `windows_gpu_adapter_memory_dedicated_bytes` | Dedicated GPU memory usage in bytes per physical GPU | gauge | `phys` | +| `windows_gpu_adapter_memory_shared_bytes` | Shared GPU memory usage in bytes per physical GPU | gauge | `phys` | +| `windows_gpu_info` | A metric with a constant '1' value labeled with gpu device information. | gauge | `phys`, `physical_device_object_name`, `hardware_id`, `friendly_name`, `description` | +| `windows_gpu_local_adapter_memory_bytes` | Local adapter memory usage in bytes per physical GPU | gauge | `phys` | +| `windows_gpu_non_local_adapter_memory_bytes` | Non-local adapter memory usage in bytes per physical GPU | gauge | `phys` | ### Per-process Metrics -| Name | Description | Type | Labels | -|----------------------------------------------|-------------------------------------------------|---------|----------------------------------------| -| `windows_gpu_engine_time_seconds` | Total running time of the GPU engine in seconds | counter | `phys`, `eng`, `engtype`, `process_id` | -| `windows_gpu_process_memory_committed_bytes` | Total committed GPU memory in bytes per process | gauge | `phys`,`process_id` | -| `windows_gpu_process_memory_dedicated_bytes` | Dedicated GPU memory usage in bytes per process | gauge | `phys`,`process_id` | -| `windows_gpu_process_memory_local_bytes` | Local GPU memory usage in bytes per process | gauge | `phys`,`process_id` | -| `windows_gpu_process_memory_non_local_bytes` | Non-local GPU memory usage in bytes per process | gauge | `phys`,`process_id` | -| `windows_gpu_process_memory_shared_bytes` | Shared GPU memory usage in bytes per process | gauge | `phys`,`process_id` | +| Name | Description | Type | Labels | +|----------------------------------------------|-------------------------------------------------------------------------|---------|--------------------------------------------------------------------------------------| +| `windows_gpu_engine_time_seconds` | Total running time of the GPU engine in seconds | counter | `phys`, `eng`, `engtype`, `process_id` | +| `windows_gpu_process_memory_committed_bytes` | Total committed GPU memory in bytes per process | gauge | `phys`,`process_id` | +| `windows_gpu_process_memory_dedicated_bytes` | Dedicated GPU memory usage in bytes per process | gauge | `phys`,`process_id` | +| `windows_gpu_process_memory_local_bytes` | Local GPU memory usage in bytes per process | gauge | `phys`,`process_id` | +| `windows_gpu_process_memory_non_local_bytes` | Non-local GPU memory usage in bytes per process | gauge | `phys`,`process_id` | +| `windows_gpu_process_memory_shared_bytes` | Shared GPU memory usage in bytes per process | gauge | `phys`,`process_id` | ## Metric Labels @@ -50,6 +51,12 @@ These metrics are available on supported versions of Windows with compatible GPU These are basic queries to help you get started with GPU monitoring on Windows using Prometheus. +**Show GPU information for a specific physical GPU (0):** + +```promql +windows_gpu_info{description="NVIDIA GeForce GTX 1070",friendly_name="",hardware_id="PCI\\VEN_10DE&DEV_1B81&SUBSYS_61733842&REV_A1",phys="0",physical_device_object_name="\\Device\\NTPNP_PCI0027"} 1 +``` + **Show total dedicated GPU memory (in bytes) usage on GPU 0:** ```promql diff --git a/internal/collector/gpu/gpu.go b/internal/collector/gpu/gpu.go index 0c84cd8f..c059f62f 100644 --- a/internal/collector/gpu/gpu.go +++ b/internal/collector/gpu/gpu.go @@ -21,8 +21,10 @@ import ( "errors" "fmt" "log/slog" + "strconv" "github.com/alecthomas/kingpin/v2" + "github.com/prometheus-community/windows_exporter/internal/headers/setupapi" "github.com/prometheus-community/windows_exporter/internal/mi" "github.com/prometheus-community/windows_exporter/internal/pdh" "github.com/prometheus-community/windows_exporter/internal/types" @@ -43,6 +45,7 @@ type Collector struct { gpuEnginePerfDataCollector *pdh.Collector gpuEnginePerfDataObject []gpuEnginePerfDataCounterValues + gpuInfo *prometheus.Desc gpuEngineRunningTime *prometheus.Desc // GPU Adapter Memory @@ -109,6 +112,13 @@ func (c *Collector) Close() error { func (c *Collector) Build(_ *slog.Logger, _ *mi.Session) error { var err error + c.gpuInfo = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, Name, "info"), + "A metric with a constant '1' value labeled with gpu device information.", + []string{"phys", "physical_device_object_name", "hardware_id", "friendly_name", "description"}, + nil, + ) + c.gpuEngineRunningTime = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "engine_time_seconds"), "Total running time of the GPU in seconds.", @@ -213,6 +223,10 @@ func (c *Collector) Build(_ *slog.Logger, _ *mi.Session) error { func (c *Collector) Collect(ch chan<- prometheus.Metric) error { errs := make([]error, 0) + if err := c.collectGpuInfo(ch); err != nil { + errs = append(errs, err) + } + if err := c.collectGpuEngineMetrics(ch); err != nil { errs = append(errs, err) } @@ -236,6 +250,28 @@ func (c *Collector) Collect(ch chan<- prometheus.Metric) error { return errors.Join(errs...) } +func (c *Collector) collectGpuInfo(ch chan<- prometheus.Metric) error { + gpus, err := setupapi.GetGPUDevices() + if err != nil { + return fmt.Errorf("failed to get GPU devices: %w", err) + } + + for i, gpu := range gpus { + ch <- prometheus.MustNewConstMetric( + c.gpuInfo, + prometheus.GaugeValue, + 1.0, + strconv.Itoa(i), + gpu.PhysicalDeviceObjectName, + gpu.HardwareID, + gpu.FriendlyName, + gpu.DeviceDesc, + ) + } + + return nil +} + func (c *Collector) collectGpuEngineMetrics(ch chan<- prometheus.Metric) error { // Collect the GPU Engine perf data. if err := c.gpuEnginePerfDataCollector.Collect(&c.gpuEnginePerfDataObject); err != nil { diff --git a/internal/headers/setupapi/gpu.go b/internal/headers/setupapi/gpu.go new file mode 100644 index 00000000..53cb6caf --- /dev/null +++ b/internal/headers/setupapi/gpu.go @@ -0,0 +1,135 @@ +// SPDX-License-Identifier: Apache-2.0 +// +// Copyright The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build windows + +package setupapi + +import ( + "sync" + "unsafe" + + "golang.org/x/sys/windows" +) + +//nolint:gochecknoglobals +var GUID_DISPLAY_ADAPTER = sync.OnceValue(func() *windows.GUID { + return &windows.GUID{ + Data1: 0x4d36e968, + Data2: 0xe325, + Data3: 0x11ce, + Data4: [8]byte{0xbf, 0xc1, 0x08, 0x00, 0x2b, 0xe1, 0x03, 0x18}, + } +}) + +func GetGPUDevices() ([]GPUDevice, error) { + hDevInfo, _, err := procSetupDiGetClassDevsW.Call( + uintptr(unsafe.Pointer(GUID_DISPLAY_ADAPTER())), + 0, + 0, + DIGCF_PRESENT, + ) + + if windows.Handle(hDevInfo) == windows.InvalidHandle { + return nil, err + } + + var ( + devices []GPUDevice + deviceData SP_DEVINFO_DATA + propertyBuffer [256]uint16 + ) + + deviceData.CbSize = uint32(unsafe.Sizeof(deviceData)) + + for i := 0; ; i++ { + ret, _, _ := procSetupDiEnumDeviceInfo.Call(hDevInfo, uintptr(i), uintptr(unsafe.Pointer(&deviceData))) + if ret == 0 { + break // No more devices + } + + ret, _, _ = procSetupDiGetDeviceRegistryPropertyW.Call( + hDevInfo, + uintptr(unsafe.Pointer(&deviceData)), + uintptr(SPDRP_DEVICEDESC), + 0, + uintptr(unsafe.Pointer(&propertyBuffer[0])), + uintptr(len(propertyBuffer)*2), + 0, + ) + + gpuDevice := GPUDevice{} + + if ret == 0 { + gpuDevice.DeviceDesc = "" + } else { + gpuDevice.DeviceDesc = windows.UTF16ToString(propertyBuffer[:]) + } + + ret, _, _ = procSetupDiGetDeviceRegistryPropertyW.Call( + hDevInfo, + uintptr(unsafe.Pointer(&deviceData)), + uintptr(SPDRP_FRIENDLYNAME), + 0, + uintptr(unsafe.Pointer(&propertyBuffer[0])), + uintptr(len(propertyBuffer)*2), + 0, + ) + + if ret == 0 { + gpuDevice.FriendlyName = "" + } else { + gpuDevice.FriendlyName = windows.UTF16ToString(propertyBuffer[:]) + } + + ret, _, _ = procSetupDiGetDeviceRegistryPropertyW.Call( + hDevInfo, + uintptr(unsafe.Pointer(&deviceData)), + uintptr(SPDRP_HARDWAREID), + 0, + uintptr(unsafe.Pointer(&propertyBuffer[0])), + uintptr(len(propertyBuffer)*2), + 0, + ) + + if ret == 0 { + gpuDevice.HardwareID = "unknown" + } else { + gpuDevice.HardwareID = windows.UTF16ToString(propertyBuffer[:]) + } + + ret, _, _ = procSetupDiGetDeviceRegistryPropertyW.Call( + hDevInfo, + uintptr(unsafe.Pointer(&deviceData)), + uintptr(SPDRP_PHYSICAL_DEVICE_OBJECT_NAME), + 0, + uintptr(unsafe.Pointer(&propertyBuffer[0])), + uintptr(len(propertyBuffer)*2), + 0, + ) + + if ret == 0 { + gpuDevice.PhysicalDeviceObjectName = "unknown" + } else { + gpuDevice.PhysicalDeviceObjectName = windows.UTF16ToString(propertyBuffer[:]) + } + + devices = append(devices, gpuDevice) + } + + _, _, _ = procSetupDiDestroyDeviceInfoList.Call(hDevInfo) + + return devices, nil +} diff --git a/internal/headers/setupapi/gpu_test.go b/internal/headers/setupapi/gpu_test.go new file mode 100644 index 00000000..3b79dd4b --- /dev/null +++ b/internal/headers/setupapi/gpu_test.go @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: Apache-2.0 +// +// Copyright The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build windows + +package setupapi_test + +import ( + "testing" + + "github.com/prometheus-community/windows_exporter/internal/headers/setupapi" + "github.com/stretchr/testify/require" +) + +func TestGetGPUDevices(t *testing.T) { + devices, err := setupapi.GetGPUDevices() + require.NoError(t, err, "Failed to get GPU devices") + + require.NotNil(t, devices) +} diff --git a/internal/headers/setupapi/setupapi.go b/internal/headers/setupapi/setupapi.go new file mode 100644 index 00000000..75e8bd89 --- /dev/null +++ b/internal/headers/setupapi/setupapi.go @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: Apache-2.0 +// +// Copyright The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build windows + +package setupapi + +import ( + "golang.org/x/sys/windows" +) + +//nolint:gochecknoglobals +var ( + modSetupAPI = windows.NewLazySystemDLL("setupapi.dll") + procSetupDiGetClassDevsW = modSetupAPI.NewProc("SetupDiGetClassDevsW") + procSetupDiEnumDeviceInfo = modSetupAPI.NewProc("SetupDiEnumDeviceInfo") + procSetupDiGetDeviceRegistryPropertyW = modSetupAPI.NewProc("SetupDiGetDeviceRegistryPropertyW") + procSetupDiDestroyDeviceInfoList = modSetupAPI.NewProc("SetupDiDestroyDeviceInfoList") +) diff --git a/internal/headers/setupapi/types.go b/internal/headers/setupapi/types.go new file mode 100644 index 00000000..ad1c1e64 --- /dev/null +++ b/internal/headers/setupapi/types.go @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: Apache-2.0 +// +// Copyright The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build windows + +package setupapi + +import "golang.org/x/sys/windows" + +const ( + DIGCF_PRESENT = 0x00000002 + SPDRP_DEVICEDESC = 0x00000000 + SPDRP_FRIENDLYNAME = 0x0000000C + SPDRP_HARDWAREID = 0x00000001 + SPDRP_PHYSICAL_DEVICE_OBJECT_NAME = 0x0000000E +) + +type SP_DEVINFO_DATA struct { + CbSize uint32 + ClassGuid windows.GUID + DevInst uint32 + _ uintptr // Reserved +} + +type GPUDevice struct { + DeviceDesc string + FriendlyName string + HardwareID string + PhysicalDeviceObjectName string +}