diff --git a/.idea/dictionaries/project.xml b/.idea/dictionaries/project.xml index 3554b8d7..e191b893 100644 --- a/.idea/dictionaries/project.xml +++ b/.idea/dictionaries/project.xml @@ -2,6 +2,7 @@ containerd + luid setupapi spdx diff --git a/docs/collector.gpu.md b/docs/collector.gpu.md index a69d5e54..a0f79190 100644 --- a/docs/collector.gpu.md +++ b/docs/collector.gpu.md @@ -20,29 +20,32 @@ These metrics are available on supported versions of Windows with compatible GPU ### Adapter-level Metrics -| Name | Description | Type | Labels | -|----------------------------------------------|-------------------------------------------------------------------------|-------|--------------------------------------------------------------------------------------| -| `windows_gpu_adapter_memory_committed_bytes` | Total committed GPU memory in bytes per physical GPU | gauge | `phys` | -| `windows_gpu_adapter_memory_dedicated_bytes` | Dedicated GPU memory usage in bytes per physical GPU | gauge | `phys` | -| `windows_gpu_adapter_memory_shared_bytes` | Shared GPU memory usage in bytes per physical GPU | gauge | `phys` | -| `windows_gpu_info` | A metric with a constant '1' value labeled with gpu device information. | gauge | `phys`, `physical_device_object_name`, `hardware_id`, `friendly_name`, `description` | -| `windows_gpu_local_adapter_memory_bytes` | Local adapter memory usage in bytes per physical GPU | gauge | `phys` | -| `windows_gpu_non_local_adapter_memory_bytes` | Non-local adapter memory usage in bytes per physical GPU | gauge | `phys` | +| Name | Description | Type | Labels | +|--------------------------------------------------|------------------------------------------------------------------------------------|-------|---------------| +| `windows_gpu_info` | A metric with a constant '1' value labeled with gpu device information. | gauge | `luid`,`name`,`bus_number`,`phys`,`function_number` | +| `windows_gpu_dedicated_system_memory_size_bytes` | The size, in bytes, of memory that is dedicated from system memory. | gauge | `luid` | +| `windows_gpu_dedicated_video_memory_size_bytes` | The size, in bytes, of memory that is dedicated from video memory. | gauge | `luid` | +| `windows_gpu_shared_system_memory_size_bytes` | The size, in bytes, of memory from system memory that can be shared by many users. | gauge | `luid` | +| `windows_gpu_adapter_memory_committed_bytes` | Total committed GPU memory in bytes per physical GPU | gauge | `luid`,`phys` | +| `windows_gpu_adapter_memory_dedicated_bytes` | Dedicated GPU memory usage in bytes per physical GPU | gauge | `luid`,`phys` | +| `windows_gpu_adapter_memory_shared_bytes` | Shared GPU memory usage in bytes per physical GPU | gauge | `luid`,`phys` | +| `windows_gpu_local_adapter_memory_bytes` | Local adapter memory usage in bytes per physical GPU | gauge | `luid`,`phys` | +| `windows_gpu_non_local_adapter_memory_bytes` | Non-local adapter memory usage in bytes per physical GPU | gauge | `luid`,`phys` | ### Per-process Metrics -| Name | Description | Type | Labels | -|----------------------------------------------|-------------------------------------------------------------------------|---------|--------------------------------------------------------------------------------------| -| `windows_gpu_engine_time_seconds` | Total running time of the GPU engine in seconds | counter | `phys`, `eng`, `engtype`, `process_id` | -| `windows_gpu_process_memory_committed_bytes` | Total committed GPU memory in bytes per process | gauge | `phys`,`process_id` | -| `windows_gpu_process_memory_dedicated_bytes` | Dedicated GPU memory usage in bytes per process | gauge | `phys`,`process_id` | -| `windows_gpu_process_memory_local_bytes` | Local GPU memory usage in bytes per process | gauge | `phys`,`process_id` | -| `windows_gpu_process_memory_non_local_bytes` | Non-local GPU memory usage in bytes per process | gauge | `phys`,`process_id` | -| `windows_gpu_process_memory_shared_bytes` | Shared GPU memory usage in bytes per process | gauge | `phys`,`process_id` | +| Name | Description | Type | Labels | +|----------------------------------------------|-------------------------------------------------|---------|-----------------------------------------------| +| `windows_gpu_engine_time_seconds` | Total running time of the GPU engine in seconds | counter | `luid`,`phys`, `eng`, `engtype`, `process_id` | +| `windows_gpu_process_memory_committed_bytes` | Total committed GPU memory in bytes per process | gauge | `luid`,`phys`,`process_id` | +| `windows_gpu_process_memory_dedicated_bytes` | Dedicated GPU memory usage in bytes per process | gauge | `luid`,`phys`,`process_id` | +| `windows_gpu_process_memory_local_bytes` | Local GPU memory usage in bytes per process | gauge | `luid`,`phys`,`process_id` | +| `windows_gpu_process_memory_non_local_bytes` | Non-local GPU memory usage in bytes per process | gauge | `luid`,`phys`,`process_id` | +| `windows_gpu_process_memory_shared_bytes` | Shared GPU memory usage in bytes per process | gauge | `luid`,`phys`,`process_id` | ## Metric Labels -* `phys`: Physical GPU index (e.g., "0") +* `luid`,`phys`: Physical GPU index (e.g., "0") * `eng`: GPU engine index (e.g., "0", "1", ...) * `engtype`: GPU engine type (e.g., "3D", "Copy", "VideoDecode", etc.) * `process_id`: Process ID diff --git a/internal/collector/gpu/gpu.go b/internal/collector/gpu/gpu.go index a91e051f..0916c9ca 100644 --- a/internal/collector/gpu/gpu.go +++ b/internal/collector/gpu/gpu.go @@ -24,7 +24,7 @@ import ( "strconv" "github.com/alecthomas/kingpin/v2" - "github.com/prometheus-community/windows_exporter/internal/headers/setupapi" + "github.com/prometheus-community/windows_exporter/internal/headers/gdi32" "github.com/prometheus-community/windows_exporter/internal/mi" "github.com/prometheus-community/windows_exporter/internal/pdh" "github.com/prometheus-community/windows_exporter/internal/types" @@ -41,6 +41,8 @@ var ConfigDefaults = Config{} type Collector struct { config Config + gpuDeviceCache map[string]gdi32.GPUDevice + // GPU Engine gpuEnginePerfDataCollector *pdh.Collector gpuEnginePerfDataObject []gpuEnginePerfDataCounterValues @@ -48,6 +50,10 @@ type Collector struct { gpuInfo *prometheus.Desc gpuEngineRunningTime *prometheus.Desc + gpuSharedSystemMemorySize *prometheus.Desc + gpuDedicatedSystemMemorySize *prometheus.Desc + gpuDedicatedVideoMemorySize *prometheus.Desc + // GPU Adapter Memory gpuAdapterMemoryPerfDataCollector *pdh.Collector gpuAdapterMemoryPerfDataObject []gpuAdapterMemoryPerfDataCounterValues @@ -115,78 +121,97 @@ func (c *Collector) Build(_ *slog.Logger, _ *mi.Session) error { c.gpuInfo = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "info"), "A metric with a constant '1' value labeled with gpu device information.", - []string{"phys", "physical_device_object_name", "hardware_id", "friendly_name", "description"}, + []string{"luid", "name", "bus_number", "phys", "function_number"}, + nil, + ) + + c.gpuSharedSystemMemorySize = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, Name, "shared_system_memory_size_bytes"), + "The size, in bytes, of memory from system memory that can be shared by many users.", + []string{"luid"}, + nil, + ) + c.gpuDedicatedSystemMemorySize = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, Name, "dedicated_system_memory_size_bytes"), + "The size, in bytes, of memory that is dedicated from system memory.", + []string{"luid"}, + nil, + ) + c.gpuDedicatedVideoMemorySize = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, Name, "dedicated_video_memory_size_bytes"), + "The size, in bytes, of memory that is dedicated from video memory.", + []string{"luid"}, nil, ) c.gpuEngineRunningTime = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "engine_time_seconds"), "Total running time of the GPU in seconds.", - []string{"process_id", "phys", "eng", "engtype"}, + []string{"process_id", "luid", "phys", "eng", "engtype"}, nil, ) c.gpuAdapterMemoryDedicatedUsage = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "adapter_memory_dedicated_bytes"), "Dedicated GPU memory usage in bytes.", - []string{"phys"}, + []string{"luid", "phys"}, nil, ) c.gpuAdapterMemorySharedUsage = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "adapter_memory_shared_bytes"), "Shared GPU memory usage in bytes.", - []string{"phys"}, + []string{"luid", "phys"}, nil, ) c.gpuAdapterMemoryTotalCommitted = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "adapter_memory_committed_bytes"), "Total committed GPU memory in bytes.", - []string{"phys"}, + []string{"luid", "phys"}, nil, ) c.gpuLocalAdapterMemoryUsage = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "local_adapter_memory_bytes"), "Local adapter memory usage in bytes.", - []string{"phys"}, + []string{"luid", "phys"}, nil, ) c.gpuNonLocalAdapterMemoryUsage = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "non_local_adapter_memory_bytes"), "Non-local adapter memory usage in bytes.", - []string{"phys"}, + []string{"luid", "phys"}, nil, ) c.gpuProcessMemoryDedicatedUsage = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "process_memory_dedicated_bytes"), "Dedicated process memory usage in bytes.", - []string{"process_id", "phys"}, + []string{"process_id", "luid", "phys"}, nil, ) c.gpuProcessMemoryLocalUsage = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "process_memory_local_bytes"), "Local process memory usage in bytes.", - []string{"process_id", "phys"}, + []string{"process_id", "luid", "phys"}, nil, ) c.gpuProcessMemoryNonLocalUsage = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "process_memory_non_local_bytes"), "Non-local process memory usage in bytes.", - []string{"process_id", "phys"}, + []string{"process_id", "luid", "phys"}, nil, ) c.gpuProcessMemorySharedUsage = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "process_memory_shared_bytes"), "Shared process memory usage in bytes.", - []string{"process_id", "phys"}, + []string{"process_id", "luid", "phys"}, nil, ) c.gpuProcessMemoryTotalCommitted = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "process_memory_committed_bytes"), "Total committed process memory in bytes.", - []string{"process_id", "phys"}, + []string{"process_id", "luid", "phys"}, nil, ) @@ -217,15 +242,31 @@ func (c *Collector) Build(_ *slog.Logger, _ *mi.Session) error { errs = append(errs, fmt.Errorf("failed to create GPU Process Memory perf data collector: %w", err)) } + gpus, err := gdi32.GetGPUDevices() + if err != nil { + errs = append(errs, fmt.Errorf("failed to get GPU devices: %w", err)) + } + + for _, gpu := range gpus { + if gpu.AdapterString == "" { + continue + } + + if c.gpuDeviceCache == nil { + c.gpuDeviceCache = make(map[string]gdi32.GPUDevice) + } + + luidKey := fmt.Sprintf("0x%08X_0x%08X", gpu.LUID.HighPart, gpu.LUID.LowPart) + c.gpuDeviceCache[luidKey] = gpu + } + return errors.Join(errs...) } func (c *Collector) Collect(ch chan<- prometheus.Metric) error { errs := make([]error, 0) - if err := c.collectGpuInfo(ch); err != nil { - errs = append(errs, err) - } + c.collectGpuInfo(ch) if err := c.collectGpuEngineMetrics(ch); err != nil { errs = append(errs, err) @@ -250,26 +291,40 @@ func (c *Collector) Collect(ch chan<- prometheus.Metric) error { return errors.Join(errs...) } -func (c *Collector) collectGpuInfo(ch chan<- prometheus.Metric) error { - gpus, err := setupapi.GetGPUDevices() - if err != nil { - return fmt.Errorf("failed to get GPU devices: %w", err) - } - - for i, gpu := range gpus { +func (c *Collector) collectGpuInfo(ch chan<- prometheus.Metric) { + for luid, gpu := range c.gpuDeviceCache { ch <- prometheus.MustNewConstMetric( c.gpuInfo, prometheus.GaugeValue, 1.0, - strconv.Itoa(i), - gpu.PhysicalDeviceObjectName, - gpu.HardwareID, - gpu.FriendlyName, - gpu.DeviceDesc, + luid, + gpu.AdapterString, + strconv.FormatInt(int64(gpu.BusNumber), 10), + strconv.FormatInt(int64(gpu.DeviceNumber), 10), + strconv.FormatInt(int64(gpu.FunctionNumber), 10), + ) + + ch <- prometheus.MustNewConstMetric( + c.gpuSharedSystemMemorySize, + prometheus.GaugeValue, + float64(gpu.SharedSystemMemorySize), + luid, + ) + + ch <- prometheus.MustNewConstMetric( + c.gpuDedicatedSystemMemorySize, + prometheus.GaugeValue, + float64(gpu.DedicatedSystemMemorySize), + luid, + ) + + ch <- prometheus.MustNewConstMetric( + c.gpuDedicatedVideoMemorySize, + prometheus.GaugeValue, + float64(gpu.DedicatedVideoMemorySize), + luid, ) } - - return nil } func (c *Collector) collectGpuEngineMetrics(ch chan<- prometheus.Metric) error { @@ -283,9 +338,14 @@ func (c *Collector) collectGpuEngineMetrics(ch chan<- prometheus.Metric) error { for _, data := range c.gpuEnginePerfDataObject { instance := parseGPUCounterInstanceString(data.Name) + if _, ok := c.gpuDeviceCache[instance.Luid]; !ok { + continue + } + key := PidPhysEngEngType{ Pid: instance.Pid, Phys: instance.Phys, + Luid: instance.Luid, Eng: instance.Eng, Engtype: instance.Engtype, } @@ -297,7 +357,7 @@ func (c *Collector) collectGpuEngineMetrics(ch chan<- prometheus.Metric) error { c.gpuEngineRunningTime, prometheus.CounterValue, runningTime, - key.Pid, key.Phys, key.Eng, key.Engtype, + key.Pid, key.Luid, key.Phys, key.Eng, key.Engtype, ) } @@ -317,8 +377,13 @@ func (c *Collector) collectGpuAdapterMemoryMetrics(ch chan<- prometheus.Metric) for _, data := range c.gpuAdapterMemoryPerfDataObject { instance := parseGPUCounterInstanceString(data.Name) + if _, ok := c.gpuDeviceCache[instance.Luid]; !ok { + continue + } + key := PidPhysEngEngType{ Pid: instance.Pid, + Luid: instance.Luid, Phys: instance.Phys, Eng: instance.Eng, Engtype: instance.Engtype, @@ -333,21 +398,21 @@ func (c *Collector) collectGpuAdapterMemoryMetrics(ch chan<- prometheus.Metric) c.gpuAdapterMemoryDedicatedUsage, prometheus.GaugeValue, dedicatedUsage, - key.Phys, + key.Luid, key.Phys, ) ch <- prometheus.MustNewConstMetric( c.gpuAdapterMemorySharedUsage, prometheus.GaugeValue, sharedUsageMap[key], - key.Phys, + key.Luid, key.Phys, ) ch <- prometheus.MustNewConstMetric( c.gpuAdapterMemoryTotalCommitted, prometheus.GaugeValue, totalCommittedMap[key], - key.Phys, + key.Luid, key.Phys, ) } @@ -360,20 +425,29 @@ func (c *Collector) collectGpuLocalAdapterMemoryMetrics(ch chan<- prometheus.Met return fmt.Errorf("failed to collect GPU Local Adapter Memory perf data: %w", err) } - localAdapterMemoryMap := make(map[string]float64) + localAdapterMemoryMap := make(map[PidPhysEngEngType]float64) for _, data := range c.gpuLocalAdapterMemoryPerfDataObject { instance := parseGPUCounterInstanceString(data.Name) - localAdapterMemoryMap[instance.Phys] += data.LocalUsage + if _, ok := c.gpuDeviceCache[instance.Luid]; !ok { + continue + } + + key := PidPhysEngEngType{ + Luid: instance.Luid, + Phys: instance.Phys, + } + + localAdapterMemoryMap[key] += data.LocalUsage } - for phys, localUsage := range localAdapterMemoryMap { + for key, localUsage := range localAdapterMemoryMap { ch <- prometheus.MustNewConstMetric( c.gpuLocalAdapterMemoryUsage, prometheus.GaugeValue, localUsage, - phys, + key.Luid, key.Phys, ) } @@ -386,20 +460,28 @@ func (c *Collector) collectGpuNonLocalAdapterMemoryMetrics(ch chan<- prometheus. return fmt.Errorf("failed to collect GPU Non Local Adapter Memory perf data: %w", err) } - nonLocalAdapterMemoryMap := make(map[string]float64) + nonLocalAdapterMemoryMap := make(map[PidPhysEngEngType]float64) for _, data := range c.gpuNonLocalAdapterMemoryPerfDataObject { instance := parseGPUCounterInstanceString(data.Name) - nonLocalAdapterMemoryMap[instance.Phys] += data.NonLocalUsage + if _, ok := c.gpuDeviceCache[instance.Luid]; !ok { + continue + } + + key := PidPhysEngEngType{ + Luid: instance.Luid, + Phys: instance.Phys, + } + nonLocalAdapterMemoryMap[key] += data.NonLocalUsage } - for phys, nonLocalUsage := range nonLocalAdapterMemoryMap { + for key, nonLocalUsage := range nonLocalAdapterMemoryMap { ch <- prometheus.MustNewConstMetric( c.gpuNonLocalAdapterMemoryUsage, prometheus.GaugeValue, nonLocalUsage, - phys, + key.Luid, key.Phys, ) } @@ -421,8 +503,13 @@ func (c *Collector) collectGpuProcessMemoryMetrics(ch chan<- prometheus.Metric) for _, data := range c.gpuProcessMemoryPerfDataObject { instance := parseGPUCounterInstanceString(data.Name) + if _, ok := c.gpuDeviceCache[instance.Luid]; !ok { + continue + } + key := PidPhys{ Pid: instance.Pid, + Luid: instance.Luid, Phys: instance.Phys, } processDedicatedUsageMap[key] += data.DedicatedUsage @@ -437,35 +524,35 @@ func (c *Collector) collectGpuProcessMemoryMetrics(ch chan<- prometheus.Metric) c.gpuProcessMemoryDedicatedUsage, prometheus.GaugeValue, dedicatedUsage, - key.Pid, key.Phys, + key.Pid, key.Luid, key.Phys, ) ch <- prometheus.MustNewConstMetric( c.gpuProcessMemoryLocalUsage, prometheus.GaugeValue, processLocalUsageMap[key], - key.Pid, key.Phys, + key.Pid, key.Luid, key.Phys, ) ch <- prometheus.MustNewConstMetric( c.gpuProcessMemoryNonLocalUsage, prometheus.GaugeValue, processNonLocalUsageMap[key], - key.Pid, key.Phys, + key.Pid, key.Luid, key.Phys, ) ch <- prometheus.MustNewConstMetric( c.gpuProcessMemorySharedUsage, prometheus.GaugeValue, processSharedUsageMap[key], - key.Pid, key.Phys, + key.Pid, key.Luid, key.Phys, ) ch <- prometheus.MustNewConstMetric( c.gpuProcessMemoryTotalCommitted, prometheus.GaugeValue, processTotalCommittedMap[key], - key.Pid, key.Phys, + key.Pid, key.Luid, key.Phys, ) } diff --git a/internal/collector/gpu/utils.go b/internal/collector/gpu/utils.go index cf177e3a..c8b57295 100644 --- a/internal/collector/gpu/utils.go +++ b/internal/collector/gpu/utils.go @@ -18,12 +18,13 @@ package gpu import ( + "fmt" "strings" ) type Instance struct { Pid string - Luid [2]string + Luid string Phys string Eng string Engtype string @@ -32,11 +33,13 @@ type Instance struct { type PidPhys struct { Pid string + Luid string Phys string } type PidPhysEngEngType struct { Pid string + Luid string Phys string Eng string Engtype string @@ -58,8 +61,7 @@ func parseGPUCounterInstanceString(s string) Instance { } case "luid": if i+2 < len(parts) { - instance.Luid[0] = parts[i+1] - instance.Luid[1] = parts[i+2] + instance.Luid = fmt.Sprintf("%s_%s", parts[i+1], parts[i+2]) } case "phys": if i+1 < len(parts) { diff --git a/internal/headers/gdi32/gdi32.go b/internal/headers/gdi32/gdi32.go new file mode 100644 index 00000000..1816db70 --- /dev/null +++ b/internal/headers/gdi32/gdi32.go @@ -0,0 +1,192 @@ +// SPDX-License-Identifier: Apache-2.0 +// +// Copyright The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build windows + +package gdi32 + +import ( + "errors" + "fmt" + "unsafe" + + "golang.org/x/sys/windows" +) + +// https://learn.microsoft.com/en-us/windows-hardware/drivers/ddi/d3dkmthk/ne-d3dkmthk-_kmtqueryadapterinfotype +// https://github.com/nalilord/AMDPlugin/blob/bb405b6d58ea543ff630f3488384473bee79f447/Common/d3dkmthk.pas#L54 +const ( + // KMTQAITYPE_GETSEGMENTSIZE pPrivateDriverData points to a D3DKMT_SEGMENTSIZEINFO structure that contains information about the size of memory and aperture segments. + KMTQAITYPE_GETSEGMENTSIZE = 3 + // KMTQAITYPE_ADAPTERADDRESS pPrivateDriverData points to a D3DKMT_ADAPTERADDRESS structure that contains information about the physical location on the PCI bus of the adapter. + KMTQAITYPE_ADAPTERADDRESS = 6 + // KMTQAITYPE_ADAPTERREGISTRYINFO pPrivateDriverData points to a D3DKMT_ADAPTERREGISTRYINFO structure that contains registry information about the graphics adapter. + KMTQAITYPE_ADAPTERREGISTRYINFO = 8 +) + +var ErrNoGPUDevices = errors.New("no GPU devices found") + +func GetGPUDeviceByLUID(adapterLUID windows.LUID) (GPUDevice, error) { + open := D3DKMT_OPENADAPTERFROMLUID{ + AdapterLUID: adapterLUID, + } + + if err := D3DKMTOpenAdapterFromLuid(&open); err != nil { + return GPUDevice{}, fmt.Errorf("D3DKMTOpenAdapterFromLuid failed: %w", err) + } + + errs := make([]error, 0) + + gpuDevice, err := GetGPUDevice(open.HAdapter) + if err != nil { + errs = append(errs, fmt.Errorf("GetGPUDevice failed: %w", err)) + } + + if err := D3DKMTCloseAdapter(&D3DKMT_CLOSEADAPTER{ + HAdapter: open.HAdapter, + }); err != nil { + errs = append(errs, fmt.Errorf("D3DKMTCloseAdapter failed: %w", err)) + } + + if len(errs) > 0 { + return gpuDevice, fmt.Errorf("errors occurred while getting GPU device: %w", errors.Join(errs...)) + } + + gpuDevice.LUID = adapterLUID + + return gpuDevice, nil +} + +func GetGPUDevice(hAdapter D3DKMT_HANDLE) (GPUDevice, error) { + var gpuDevice GPUDevice + + // Try segment size first + var size D3DKMT_SEGMENTSIZEINFO + + query := D3DKMT_QUERYADAPTERINFO{ + hAdapter: hAdapter, + queryType: KMTQAITYPE_GETSEGMENTSIZE, + pPrivateDriverData: unsafe.Pointer(&size), + privateDriverDataSize: uint32(unsafe.Sizeof(size)), + } + + if err := D3DKMTQueryAdapterInfo(&query); err != nil { + return gpuDevice, fmt.Errorf("D3DKMTQueryAdapterInfo (segment size) failed: %w", err) + } + + gpuDevice.DedicatedVideoMemorySize = size.DedicatedVideoMemorySize + gpuDevice.DedicatedSystemMemorySize = size.DedicatedSystemMemorySize + gpuDevice.SharedSystemMemorySize = size.SharedSystemMemorySize + + // Now try registry info + var address D3DKMT_ADAPTERADDRESS + + query.queryType = KMTQAITYPE_ADAPTERADDRESS + query.pPrivateDriverData = unsafe.Pointer(&address) + query.privateDriverDataSize = uint32(unsafe.Sizeof(address)) + + if err := D3DKMTQueryAdapterInfo(&query); err != nil { + return gpuDevice, fmt.Errorf("D3DKMTQueryAdapterInfo (adapter address) failed: %w", err) + } + + gpuDevice.BusNumber = address.BusNumber + gpuDevice.DeviceNumber = address.DeviceNumber + gpuDevice.FunctionNumber = address.FunctionNumber + + // Now try registry info + var info D3DKMT_ADAPTERREGISTRYINFO + + query.queryType = KMTQAITYPE_ADAPTERREGISTRYINFO + query.pPrivateDriverData = unsafe.Pointer(&info) + query.privateDriverDataSize = uint32(unsafe.Sizeof(info)) + + if err := D3DKMTQueryAdapterInfo(&query); err != nil && !errors.Is(err, windows.ERROR_FILE_NOT_FOUND) { + return gpuDevice, fmt.Errorf("D3DKMTQueryAdapterInfo (info) failed: %w", err) + } + + gpuDevice.AdapterString = windows.UTF16ToString(info.AdapterString[:]) + + return gpuDevice, nil +} + +func GetGPUDevices() ([]GPUDevice, error) { + gpuDevices := make([]GPUDevice, 0, 2) + + // First call: Get the number of adapters + enumAdapters := D3DKMT_ENUMADAPTERS2{ + NumAdapters: 0, + PAdapters: nil, + } + + if err := D3DKMTEnumAdapters2(&enumAdapters); err != nil { + return gpuDevices, fmt.Errorf("D3DKMTEnumAdapters2 (get count) failed: %w", err) + } + + if enumAdapters.NumAdapters == 0 { + return gpuDevices, ErrNoGPUDevices + } + + // Second call: Get the actual adapter information + pAdapters := make([]D3DKMT_ADAPTERINFO, enumAdapters.NumAdapters) + enumAdapters.PAdapters = &pAdapters[0] + + if err := D3DKMTEnumAdapters2(&enumAdapters); err != nil { + return gpuDevices, fmt.Errorf("D3DKMTEnumAdapters2 (get adapters) failed: %w", err) + } + + var errs []error + + // Process each adapter + for i := range enumAdapters.NumAdapters { + adapter := pAdapters[i] + // Validate handle before using it + if adapter.HAdapter == 0 { + errs = append(errs, fmt.Errorf("adapter %d has null handle", i)) + + continue + } + + func() { + defer func() { + if closeErr := D3DKMTCloseAdapter(&D3DKMT_CLOSEADAPTER{ + HAdapter: adapter.HAdapter, + }); closeErr != nil { + errs = append(errs, fmt.Errorf("failed to close adapter %v: %w", adapter.AdapterLUID, closeErr)) + } + }() + + gpuDevice, err := GetGPUDevice(adapter.HAdapter) + if err != nil { + errs = append(errs, fmt.Errorf("failed to get GPU device for adapter %v: %w", adapter.AdapterLUID, err)) + + return + } + + gpuDevice.LUID = adapter.AdapterLUID + + gpuDevices = append(gpuDevices, gpuDevice) + }() + } + + if len(errs) > 0 { + return gpuDevices, errors.Join(errs...) + } + + if len(gpuDevices) == 0 { + return gpuDevices, ErrNoGPUDevices + } + + return gpuDevices, nil +} diff --git a/internal/headers/setupapi/gpu_test.go b/internal/headers/gdi32/gdi32_test.go similarity index 90% rename from internal/headers/setupapi/gpu_test.go rename to internal/headers/gdi32/gdi32_test.go index 3b79dd4b..b11c7ddb 100644 --- a/internal/headers/setupapi/gpu_test.go +++ b/internal/headers/gdi32/gdi32_test.go @@ -13,19 +13,17 @@ // See the License for the specific language governing permissions and // limitations under the License. -//go:build windows - -package setupapi_test +package gdi32_test import ( "testing" - "github.com/prometheus-community/windows_exporter/internal/headers/setupapi" + "github.com/prometheus-community/windows_exporter/internal/headers/gdi32" "github.com/stretchr/testify/require" ) func TestGetGPUDevices(t *testing.T) { - devices, err := setupapi.GetGPUDevices() + devices, err := gdi32.GetGPUDevices() require.NoError(t, err, "Failed to get GPU devices") require.NotNil(t, devices) diff --git a/internal/headers/gdi32/syscall.go b/internal/headers/gdi32/syscall.go new file mode 100644 index 00000000..5becb299 --- /dev/null +++ b/internal/headers/gdi32/syscall.go @@ -0,0 +1,77 @@ +// SPDX-License-Identifier: Apache-2.0 +// +// Copyright The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package gdi32 + +import ( + "fmt" + "unsafe" + + "github.com/prometheus-community/windows_exporter/internal/headers/ntdll" + "golang.org/x/sys/windows" +) + +//nolint:gochecknoglobals +var ( + modGdi32 = windows.NewLazySystemDLL("gdi32.dll") + procD3DKMTOpenAdapterFromLuid = modGdi32.NewProc("D3DKMTOpenAdapterFromLuid") + procD3DKMTQueryAdapterInfo = modGdi32.NewProc("D3DKMTQueryAdapterInfo") + procD3DKMTCloseAdapter = modGdi32.NewProc("D3DKMTCloseAdapter") + procD3DKMTEnumAdapters2 = modGdi32.NewProc("D3DKMTEnumAdapters2") +) + +func D3DKMTOpenAdapterFromLuid(ptr *D3DKMT_OPENADAPTERFROMLUID) error { + ret, _, _ := procD3DKMTOpenAdapterFromLuid.Call( + uintptr(unsafe.Pointer(ptr)), + ) + if ret != 0 { + return fmt.Errorf("D3DKMTOpenAdapterFromLuid failed: 0x%X: %w", ret, ntdll.RtlNtStatusToDosError(ret)) + } + + return nil +} + +func D3DKMTEnumAdapters2(ptr *D3DKMT_ENUMADAPTERS2) error { + ret, _, _ := procD3DKMTEnumAdapters2.Call( + uintptr(unsafe.Pointer(ptr)), + ) + if ret != 0 { + return fmt.Errorf("D3DKMTEnumAdapters2 failed: 0x%X: %w", ret, ntdll.RtlNtStatusToDosError(ret)) + } + + return nil +} + +func D3DKMTQueryAdapterInfo(query *D3DKMT_QUERYADAPTERINFO) error { + ret, _, _ := procD3DKMTQueryAdapterInfo.Call( + uintptr(unsafe.Pointer(query)), + ) + if ret != 0 { + return fmt.Errorf("D3DKMTQueryAdapterInfo failed: 0x%X: %w", ret, ntdll.RtlNtStatusToDosError(ret)) + } + + return nil +} + +func D3DKMTCloseAdapter(ptr *D3DKMT_CLOSEADAPTER) error { + ret, _, _ := procD3DKMTCloseAdapter.Call( + uintptr(unsafe.Pointer(ptr)), + ) + if ret != 0 { + return fmt.Errorf("D3DKMTCloseAdapter failed: 0x%X: %w", ret, ntdll.RtlNtStatusToDosError(ret)) + } + + return nil +} diff --git a/internal/headers/gdi32/types.go b/internal/headers/gdi32/types.go new file mode 100644 index 00000000..f1c62596 --- /dev/null +++ b/internal/headers/gdi32/types.go @@ -0,0 +1,85 @@ +// SPDX-License-Identifier: Apache-2.0 +// +// Copyright The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build windows + +package gdi32 + +import ( + "unsafe" + + "github.com/prometheus-community/windows_exporter/internal/headers/win32" + "golang.org/x/sys/windows" +) + +type D3DKMT_HANDLE = win32.UINT + +type D3DKMT_OPENADAPTERFROMLUID struct { + AdapterLUID windows.LUID + HAdapter D3DKMT_HANDLE +} + +type D3DKMT_CLOSEADAPTER struct { + HAdapter D3DKMT_HANDLE +} + +type D3DKMT_QUERYADAPTERINFO struct { + hAdapter D3DKMT_HANDLE + queryType int32 + pPrivateDriverData unsafe.Pointer + privateDriverDataSize uint32 +} + +type D3DKMT_ENUMADAPTERS2 struct { + NumAdapters uint32 + PAdapters *D3DKMT_ADAPTERINFO +} + +type D3DKMT_ADAPTERINFO struct { + HAdapter D3DKMT_HANDLE + AdapterLUID windows.LUID + NumOfSources win32.ULONG + Present win32.BOOL +} + +type D3DKMT_ADAPTERREGISTRYINFO struct { + AdapterString [win32.MAX_PATH]uint16 + BiosString [win32.MAX_PATH]uint16 + DacType [win32.MAX_PATH]uint16 + ChipType [win32.MAX_PATH]uint16 +} + +type D3DKMT_SEGMENTSIZEINFO struct { + DedicatedVideoMemorySize uint64 + DedicatedSystemMemorySize uint64 + SharedSystemMemorySize uint64 +} + +type D3DKMT_ADAPTERADDRESS struct { + BusNumber win32.UINT + DeviceNumber win32.UINT + FunctionNumber win32.UINT +} + +type GPUDevice struct { + AdapterString string + LUID windows.LUID + DedicatedVideoMemorySize uint64 + DedicatedSystemMemorySize uint64 + SharedSystemMemorySize uint64 + BusNumber win32.UINT + DeviceNumber win32.UINT + FunctionNumber win32.UINT +} diff --git a/internal/headers/setupapi/setupapi.go b/internal/headers/ntdll/ntdll.go similarity index 61% rename from internal/headers/setupapi/setupapi.go rename to internal/headers/ntdll/ntdll.go index 75e8bd89..2a7c72d0 100644 --- a/internal/headers/setupapi/setupapi.go +++ b/internal/headers/ntdll/ntdll.go @@ -15,7 +15,7 @@ //go:build windows -package setupapi +package ntdll import ( "golang.org/x/sys/windows" @@ -23,9 +23,15 @@ import ( //nolint:gochecknoglobals var ( - modSetupAPI = windows.NewLazySystemDLL("setupapi.dll") - procSetupDiGetClassDevsW = modSetupAPI.NewProc("SetupDiGetClassDevsW") - procSetupDiEnumDeviceInfo = modSetupAPI.NewProc("SetupDiEnumDeviceInfo") - procSetupDiGetDeviceRegistryPropertyW = modSetupAPI.NewProc("SetupDiGetDeviceRegistryPropertyW") - procSetupDiDestroyDeviceInfoList = modSetupAPI.NewProc("SetupDiDestroyDeviceInfoList") + modNtdll = windows.NewLazySystemDLL("ntdll.dll") + procRtlNtStatusToDosError = modNtdll.NewProc("RtlNtStatusToDosError") ) + +func RtlNtStatusToDosError(status uintptr) error { + ret, _, _ := procRtlNtStatusToDosError.Call(status) + if ret == 0 { + return nil + } + + return windows.Errno(ret) +} diff --git a/internal/headers/setupapi/gpu.go b/internal/headers/setupapi/gpu.go deleted file mode 100644 index 53cb6caf..00000000 --- a/internal/headers/setupapi/gpu.go +++ /dev/null @@ -1,135 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// -// Copyright The Prometheus Authors -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build windows - -package setupapi - -import ( - "sync" - "unsafe" - - "golang.org/x/sys/windows" -) - -//nolint:gochecknoglobals -var GUID_DISPLAY_ADAPTER = sync.OnceValue(func() *windows.GUID { - return &windows.GUID{ - Data1: 0x4d36e968, - Data2: 0xe325, - Data3: 0x11ce, - Data4: [8]byte{0xbf, 0xc1, 0x08, 0x00, 0x2b, 0xe1, 0x03, 0x18}, - } -}) - -func GetGPUDevices() ([]GPUDevice, error) { - hDevInfo, _, err := procSetupDiGetClassDevsW.Call( - uintptr(unsafe.Pointer(GUID_DISPLAY_ADAPTER())), - 0, - 0, - DIGCF_PRESENT, - ) - - if windows.Handle(hDevInfo) == windows.InvalidHandle { - return nil, err - } - - var ( - devices []GPUDevice - deviceData SP_DEVINFO_DATA - propertyBuffer [256]uint16 - ) - - deviceData.CbSize = uint32(unsafe.Sizeof(deviceData)) - - for i := 0; ; i++ { - ret, _, _ := procSetupDiEnumDeviceInfo.Call(hDevInfo, uintptr(i), uintptr(unsafe.Pointer(&deviceData))) - if ret == 0 { - break // No more devices - } - - ret, _, _ = procSetupDiGetDeviceRegistryPropertyW.Call( - hDevInfo, - uintptr(unsafe.Pointer(&deviceData)), - uintptr(SPDRP_DEVICEDESC), - 0, - uintptr(unsafe.Pointer(&propertyBuffer[0])), - uintptr(len(propertyBuffer)*2), - 0, - ) - - gpuDevice := GPUDevice{} - - if ret == 0 { - gpuDevice.DeviceDesc = "" - } else { - gpuDevice.DeviceDesc = windows.UTF16ToString(propertyBuffer[:]) - } - - ret, _, _ = procSetupDiGetDeviceRegistryPropertyW.Call( - hDevInfo, - uintptr(unsafe.Pointer(&deviceData)), - uintptr(SPDRP_FRIENDLYNAME), - 0, - uintptr(unsafe.Pointer(&propertyBuffer[0])), - uintptr(len(propertyBuffer)*2), - 0, - ) - - if ret == 0 { - gpuDevice.FriendlyName = "" - } else { - gpuDevice.FriendlyName = windows.UTF16ToString(propertyBuffer[:]) - } - - ret, _, _ = procSetupDiGetDeviceRegistryPropertyW.Call( - hDevInfo, - uintptr(unsafe.Pointer(&deviceData)), - uintptr(SPDRP_HARDWAREID), - 0, - uintptr(unsafe.Pointer(&propertyBuffer[0])), - uintptr(len(propertyBuffer)*2), - 0, - ) - - if ret == 0 { - gpuDevice.HardwareID = "unknown" - } else { - gpuDevice.HardwareID = windows.UTF16ToString(propertyBuffer[:]) - } - - ret, _, _ = procSetupDiGetDeviceRegistryPropertyW.Call( - hDevInfo, - uintptr(unsafe.Pointer(&deviceData)), - uintptr(SPDRP_PHYSICAL_DEVICE_OBJECT_NAME), - 0, - uintptr(unsafe.Pointer(&propertyBuffer[0])), - uintptr(len(propertyBuffer)*2), - 0, - ) - - if ret == 0 { - gpuDevice.PhysicalDeviceObjectName = "unknown" - } else { - gpuDevice.PhysicalDeviceObjectName = windows.UTF16ToString(propertyBuffer[:]) - } - - devices = append(devices, gpuDevice) - } - - _, _, _ = procSetupDiDestroyDeviceInfoList.Call(hDevInfo) - - return devices, nil -} diff --git a/internal/headers/setupapi/types.go b/internal/headers/setupapi/types.go deleted file mode 100644 index ad1c1e64..00000000 --- a/internal/headers/setupapi/types.go +++ /dev/null @@ -1,42 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// -// Copyright The Prometheus Authors -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build windows - -package setupapi - -import "golang.org/x/sys/windows" - -const ( - DIGCF_PRESENT = 0x00000002 - SPDRP_DEVICEDESC = 0x00000000 - SPDRP_FRIENDLYNAME = 0x0000000C - SPDRP_HARDWAREID = 0x00000001 - SPDRP_PHYSICAL_DEVICE_OBJECT_NAME = 0x0000000E -) - -type SP_DEVINFO_DATA struct { - CbSize uint32 - ClassGuid windows.GUID - DevInst uint32 - _ uintptr // Reserved -} - -type GPUDevice struct { - DeviceDesc string - FriendlyName string - HardwareID string - PhysicalDeviceObjectName string -} diff --git a/internal/headers/win32/types.go b/internal/headers/win32/types.go index a6069aff..08147db9 100644 --- a/internal/headers/win32/types.go +++ b/internal/headers/win32/types.go @@ -23,12 +23,17 @@ import ( "golang.org/x/sys/windows" ) +const MAX_PATH = 260 + type ( + BOOL = int32 // BOOL is a 32-bit signed int in Win32 DATE_TIME = windows.Filetime DWORD = uint32 LPWSTR struct { *uint16 } + ULONG = uint32 // ULONG is a 32-bit unsigned int in Win32 + UINT = uint32 // UINT is a 32-bit unsigned int in Win32 ) // NewLPWSTR creates a new LPWSTR from a string.