container: support hostprocess containers and expose kubernetes labels (#1911)

This commit is contained in:
Jan-Otto Kröpke
2025-05-18 09:39:52 +02:00
committed by GitHub
parent 6b87441729
commit 898e16bcb1
43 changed files with 1800 additions and 296 deletions

View File

@@ -18,25 +18,48 @@
package container
import (
"encoding/json"
"errors"
"fmt"
"io/fs"
"log/slog"
"os"
"slices"
"strings"
"unsafe"
"github.com/Microsoft/hcsshim"
"github.com/alecthomas/kingpin/v2"
"github.com/prometheus-community/windows_exporter/internal/headers/guid"
"github.com/prometheus-community/windows_exporter/internal/headers/hcn"
"github.com/prometheus-community/windows_exporter/internal/headers/hcs"
"github.com/prometheus-community/windows_exporter/internal/headers/iphlpapi"
"github.com/prometheus-community/windows_exporter/internal/headers/kernel32"
"github.com/prometheus-community/windows_exporter/internal/mi"
"github.com/prometheus-community/windows_exporter/internal/pdh"
"github.com/prometheus-community/windows_exporter/internal/types"
"github.com/prometheus/client_golang/prometheus"
"golang.org/x/sys/windows"
)
const Name = "container"
const (
Name = "container"
type Config struct{}
subCollectorHCS = "hcs"
subCollectorHostprocess = "hostprocess"
containerDStateDir = `C:\ProgramData\containerd\state\io.containerd.runtime.v2.task\k8s.io\`
)
type Config struct {
CollectorsEnabled []string `yaml:"collectors_enabled"`
}
//nolint:gochecknoglobals
var ConfigDefaults = Config{}
var ConfigDefaults = Config{
CollectorsEnabled: []string{
subCollectorHCS,
subCollectorHostprocess,
},
}
// A Collector is a Prometheus Collector for containers metrics.
type Collector struct {
@@ -44,6 +67,9 @@ type Collector struct {
logger *slog.Logger
annotationsCacheHCS map[string]containerInfo
annotationsCacheJob map[string]containerInfo
// Presence
containerAvailable *prometheus.Desc
@@ -75,12 +101,27 @@ type Collector struct {
writeSizeBytes *prometheus.Desc
}
type containerInfo struct {
id string
namespace string
pod string
container string
}
type ociSpec struct {
Annotations map[string]string `json:"annotations"`
}
// New constructs a new Collector.
func New(config *Config) *Collector {
if config == nil {
config = &ConfigDefaults
}
if config.CollectorsEnabled == nil {
config.CollectorsEnabled = ConfigDefaults.CollectorsEnabled
}
c := &Collector{
config: *config,
}
@@ -88,8 +129,26 @@ func New(config *Config) *Collector {
return c
}
func NewWithFlags(_ *kingpin.Application) *Collector {
return &Collector{}
func NewWithFlags(app *kingpin.Application) *Collector {
c := &Collector{
config: ConfigDefaults,
}
c.config.CollectorsEnabled = make([]string, 0)
var collectorsEnabled string
app.Flag(
"collector.container.enabled",
"Comma-separated list of collectors to use. Defaults to all, if not specified.",
).Default(strings.Join(ConfigDefaults.CollectorsEnabled, ",")).StringVar(&collectorsEnabled)
app.Action(func(*kingpin.ParseContext) error {
c.config.CollectorsEnabled = strings.Split(collectorsEnabled, ",")
return nil
})
return c
}
func (c *Collector) GetName() string {
@@ -103,10 +162,16 @@ func (c *Collector) Close() error {
func (c *Collector) Build(logger *slog.Logger, _ *mi.Session) error {
c.logger = logger.With(slog.String("collector", Name))
for _, collector := range c.config.CollectorsEnabled {
if !slices.Contains([]string{subCollectorHCS, subCollectorHostprocess}, collector) {
return fmt.Errorf("unknown collector: %s", collector)
}
}
c.containerAvailable = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "available"),
"Available",
[]string{"container_id"},
[]string{"container_id", "namespace", "pod", "container"},
nil,
)
c.containersCount = prometheus.NewDesc(
@@ -118,97 +183,97 @@ func (c *Collector) Build(logger *slog.Logger, _ *mi.Session) error {
c.usageCommitBytes = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "memory_usage_commit_bytes"),
"Memory Usage Commit Bytes",
[]string{"container_id"},
[]string{"container_id", "namespace", "pod", "container"},
nil,
)
c.usageCommitPeakBytes = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "memory_usage_commit_peak_bytes"),
"Memory Usage Commit Peak Bytes",
[]string{"container_id"},
[]string{"container_id", "namespace", "pod", "container"},
nil,
)
c.usagePrivateWorkingSetBytes = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "memory_usage_private_working_set_bytes"),
"Memory Usage Private Working Set Bytes",
[]string{"container_id"},
[]string{"container_id", "namespace", "pod", "container"},
nil,
)
c.runtimeTotal = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "cpu_usage_seconds_total"),
"Total Run time in Seconds",
[]string{"container_id"},
[]string{"container_id", "namespace", "pod", "container"},
nil,
)
c.runtimeUser = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "cpu_usage_seconds_usermode"),
"Run Time in User mode in Seconds",
[]string{"container_id"},
[]string{"container_id", "namespace", "pod", "container"},
nil,
)
c.runtimeKernel = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "cpu_usage_seconds_kernelmode"),
"Run time in Kernel mode in Seconds",
[]string{"container_id"},
[]string{"container_id", "namespace", "pod", "container"},
nil,
)
c.bytesReceived = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "network_receive_bytes_total"),
"Bytes Received on Interface",
[]string{"container_id", "interface"},
[]string{"container_id", "namespace", "pod", "container", "interface"},
nil,
)
c.bytesSent = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "network_transmit_bytes_total"),
"Bytes Sent on Interface",
[]string{"container_id", "interface"},
[]string{"container_id", "namespace", "pod", "container", "interface"},
nil,
)
c.packetsReceived = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "network_receive_packets_total"),
"Packets Received on Interface",
[]string{"container_id", "interface"},
[]string{"container_id", "namespace", "pod", "container", "interface"},
nil,
)
c.packetsSent = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "network_transmit_packets_total"),
"Packets Sent on Interface",
[]string{"container_id", "interface"},
[]string{"container_id", "namespace", "pod", "container", "interface"},
nil,
)
c.droppedPacketsIncoming = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "network_receive_packets_dropped_total"),
"Dropped Incoming Packets on Interface",
[]string{"container_id", "interface"},
[]string{"container_id", "namespace", "pod", "container", "interface"},
nil,
)
c.droppedPacketsOutgoing = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "network_transmit_packets_dropped_total"),
"Dropped Outgoing Packets on Interface",
[]string{"container_id", "interface"},
[]string{"container_id", "namespace", "pod", "container", "interface"},
nil,
)
c.readCountNormalized = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "storage_read_count_normalized_total"),
"Read Count Normalized",
[]string{"container_id"},
[]string{"container_id", "namespace", "pod", "container"},
nil,
)
c.readSizeBytes = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "storage_read_size_bytes_total"),
"Read Size Bytes",
[]string{"container_id"},
[]string{"container_id", "namespace", "pod", "container"},
nil,
)
c.writeCountNormalized = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "storage_write_count_normalized_total"),
"Write Count Normalized",
[]string{"container_id"},
[]string{"container_id", "namespace", "pod", "container"},
nil,
)
c.writeSizeBytes = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "storage_write_size_bytes_total"),
"Write Size Bytes",
[]string{"container_id"},
[]string{"container_id", "namespace", "pod", "container"},
nil,
)
@@ -218,39 +283,85 @@ func (c *Collector) Build(logger *slog.Logger, _ *mi.Session) error {
// Collect sends the metric values for each metric
// to the provided prometheus Metric channel.
func (c *Collector) Collect(ch chan<- prometheus.Metric) error {
errs := make([]error, 0)
if slices.Contains(c.config.CollectorsEnabled, subCollectorHCS) {
if err := c.collectHCS(ch); err != nil {
errs = append(errs, err)
}
}
if slices.Contains(c.config.CollectorsEnabled, subCollectorHostprocess) {
if err := c.collectJobContainers(ch); err != nil {
errs = append(errs, err)
}
}
return errors.Join(errs...)
}
func (c *Collector) collectHCS(ch chan<- prometheus.Metric) error {
// Types Container is passed to get the containers compute systems only
containers, err := hcsshim.GetContainers(hcsshim.ComputeSystemQuery{Types: []string{"Container"}})
containers, err := hcs.GetContainers()
if err != nil {
return fmt.Errorf("error in fetching containers: %w", err)
}
count := len(containers)
ch <- prometheus.MustNewConstMetric(
c.containersCount,
prometheus.GaugeValue,
float64(count),
)
if count == 0 {
ch <- prometheus.MustNewConstMetric(
c.containersCount,
prometheus.GaugeValue,
0,
)
return nil
}
containerPrefixes := make(map[string]string)
var countersCount float64
containerIDs := make([]string, 0, len(containers))
collectErrors := make([]error, 0, len(containers))
for _, containerDetails := range containers {
containerIdWithPrefix := getContainerIdWithPrefix(containerDetails)
for _, container := range containers {
if container.State != "Running" {
continue
}
if err = c.collectContainer(ch, containerDetails, containerIdWithPrefix); err != nil {
if hcsshim.IsNotExist(err) {
containerIDs = append(containerIDs, container.ID)
countersCount++
var (
namespace string
podName string
containerName string
)
if _, ok := c.annotationsCacheHCS[container.ID]; !ok {
if spec, err := getContainerAnnotations(container.ID); err == nil {
namespace = spec.Annotations["io.kubernetes.cri.sandbox-namespace"]
podName = spec.Annotations["io.kubernetes.cri.sandbox-name"]
containerName = spec.Annotations["io.kubernetes.cri.container-name"]
}
c.annotationsCacheHCS[container.ID] = containerInfo{
id: getContainerIdWithPrefix(container),
namespace: namespace,
pod: podName,
container: containerName,
}
}
if err = c.collectHCSContainer(ch, container, c.annotationsCacheHCS[container.ID]); err != nil {
if errors.Is(err, hcs.ErrIDNotFound) {
c.logger.Debug("err in fetching container statistics",
slog.String("container_id", containerDetails.ID),
slog.String("container_id", container.ID),
slog.Any("err", err),
)
} else {
c.logger.Error("err in fetching container statistics",
slog.String("container_id", containerDetails.ID),
slog.String("container_id", container.ID),
slog.Any("err", err),
)
@@ -259,14 +370,25 @@ func (c *Collector) Collect(ch chan<- prometheus.Metric) error {
continue
}
containerPrefixes[containerDetails.ID] = containerIdWithPrefix
}
if err = c.collectNetworkMetrics(ch, containerPrefixes); err != nil {
ch <- prometheus.MustNewConstMetric(
c.containersCount,
prometheus.GaugeValue,
countersCount,
)
if err = c.collectNetworkMetrics(ch); err != nil {
return fmt.Errorf("error in fetching container network statistics: %w", err)
}
// Remove containers that are no longer running
for _, containerID := range c.annotationsCacheHCS {
if !slices.Contains(containerIDs, containerID.id) {
delete(c.annotationsCacheHCS, containerID.id)
}
}
if len(collectErrors) > 0 {
return fmt.Errorf("errors while fetching container statistics: %w", errors.Join(collectErrors...))
}
@@ -274,94 +396,87 @@ func (c *Collector) Collect(ch chan<- prometheus.Metric) error {
return nil
}
func (c *Collector) collectContainer(ch chan<- prometheus.Metric, containerDetails hcsshim.ContainerProperties, containerIdWithPrefix string) error {
container, err := hcsshim.OpenContainer(containerDetails.ID)
func (c *Collector) collectHCSContainer(ch chan<- prometheus.Metric, containerDetails hcs.Properties, containerInfo containerInfo) error {
containerStats, err := hcs.GetContainerStatistics(containerDetails.ID)
if err != nil {
return fmt.Errorf("error in opening container: %w", err)
}
defer func() {
if container == nil {
return
}
if err := container.Close(); err != nil {
c.logger.Error("error in closing container",
slog.Any("err", err),
)
}
}()
containerStats, err := container.Statistics()
if err != nil {
return fmt.Errorf("error in fetching container statistics: %w", err)
return fmt.Errorf("error fetching container statistics: %w", err)
}
ch <- prometheus.MustNewConstMetric(
c.containerAvailable,
prometheus.CounterValue,
1,
containerIdWithPrefix,
containerInfo.id, containerInfo.namespace, containerInfo.pod, containerInfo.container,
)
ch <- prometheus.MustNewConstMetric(
c.usageCommitBytes,
prometheus.GaugeValue,
float64(containerStats.Memory.UsageCommitBytes),
containerIdWithPrefix,
float64(containerStats.Memory.MemoryUsageCommitBytes),
containerInfo.id, containerInfo.namespace, containerInfo.pod, containerInfo.container,
)
ch <- prometheus.MustNewConstMetric(
c.usageCommitPeakBytes,
prometheus.GaugeValue,
float64(containerStats.Memory.UsageCommitPeakBytes),
containerIdWithPrefix,
float64(containerStats.Memory.MemoryUsageCommitPeakBytes),
containerInfo.id, containerInfo.namespace, containerInfo.pod, containerInfo.container,
)
ch <- prometheus.MustNewConstMetric(
c.usagePrivateWorkingSetBytes,
prometheus.GaugeValue,
float64(containerStats.Memory.UsagePrivateWorkingSetBytes),
containerIdWithPrefix,
float64(containerStats.Memory.MemoryUsagePrivateWorkingSetBytes),
containerInfo.id, containerInfo.namespace, containerInfo.pod, containerInfo.container,
)
ch <- prometheus.MustNewConstMetric(
c.runtimeTotal,
prometheus.CounterValue,
float64(containerStats.Processor.TotalRuntime100ns)*pdh.TicksToSecondScaleFactor,
containerIdWithPrefix,
containerInfo.id, containerInfo.namespace, containerInfo.pod, containerInfo.container,
)
ch <- prometheus.MustNewConstMetric(
c.runtimeUser,
prometheus.CounterValue,
float64(containerStats.Processor.RuntimeUser100ns)*pdh.TicksToSecondScaleFactor,
containerIdWithPrefix,
containerInfo.id, containerInfo.namespace, containerInfo.pod, containerInfo.container,
)
ch <- prometheus.MustNewConstMetric(
c.runtimeKernel,
prometheus.CounterValue,
float64(containerStats.Processor.RuntimeKernel100ns)*pdh.TicksToSecondScaleFactor,
containerIdWithPrefix,
containerInfo.id, containerInfo.namespace, containerInfo.pod, containerInfo.container,
)
ch <- prometheus.MustNewConstMetric(
c.readCountNormalized,
prometheus.CounterValue,
float64(containerStats.Storage.ReadCountNormalized),
containerIdWithPrefix,
containerInfo.id, containerInfo.namespace, containerInfo.pod, containerInfo.container,
)
ch <- prometheus.MustNewConstMetric(
c.readSizeBytes,
prometheus.CounterValue,
float64(containerStats.Storage.ReadSizeBytes),
containerIdWithPrefix,
containerInfo.id, containerInfo.namespace, containerInfo.pod, containerInfo.container,
)
ch <- prometheus.MustNewConstMetric(
c.writeCountNormalized,
prometheus.CounterValue,
float64(containerStats.Storage.WriteCountNormalized),
containerIdWithPrefix,
containerInfo.id, containerInfo.namespace, containerInfo.pod, containerInfo.container,
)
ch <- prometheus.MustNewConstMetric(
c.writeSizeBytes,
prometheus.CounterValue,
float64(containerStats.Storage.WriteSizeBytes),
containerIdWithPrefix,
containerInfo.id, containerInfo.namespace, containerInfo.pod, containerInfo.container,
)
return nil
@@ -371,73 +486,105 @@ func (c *Collector) collectContainer(ch chan<- prometheus.Metric, containerDetai
// With HNSv2, the network stats must be collected from hcsshim.HNSListEndpointRequest.
// Network statistics from the container.Statistics() are providing data only, if HNSv1 is used.
// Ref: https://github.com/prometheus-community/windows_exporter/pull/1218
func (c *Collector) collectNetworkMetrics(ch chan<- prometheus.Metric, containerPrefixes map[string]string) error {
hnsEndpoints, err := hcsshim.HNSListEndpointRequest()
func (c *Collector) collectNetworkMetrics(ch chan<- prometheus.Metric) error {
endpoints, err := hcn.EnumerateEndpoints()
if err != nil {
return fmt.Errorf("error in fetching HNS endpoints: %w", err)
return fmt.Errorf("error in fetching HCN endpoints: %w", err)
}
if len(hnsEndpoints) == 0 {
return errors.New("no network stats for containers to collect")
if len(endpoints) == 0 {
return nil
}
for _, endpoint := range hnsEndpoints {
endpointStats, err := hcsshim.GetHNSEndpointStats(endpoint.Id)
for _, endpoint := range endpoints {
properties, err := hcn.GetEndpointProperties(endpoint)
if err != nil {
c.logger.Warn("Failed to collect network stats for interface "+endpoint.Id,
c.logger.Warn("Failed to collect properties for interface "+endpoint.String(),
slog.Any("err", err),
)
continue
}
for _, containerId := range endpoint.SharedContainers {
containerIdWithPrefix, ok := containerPrefixes[containerId]
var nicGUID *guid.GUID
for _, allocator := range properties.Resources.Allocators {
if allocator.AdapterNetCfgInstanceId != nil {
nicGUID = allocator.AdapterNetCfgInstanceId
break
}
}
if nicGUID == nil {
c.logger.Warn("Failed to get nic GUID for endpoint " + endpoint.String())
continue
}
luid, err := iphlpapi.ConvertInterfaceGUIDToLUID(*nicGUID)
if err != nil {
return fmt.Errorf("error in converting interface GUID to LUID: %w", err)
}
var endpointStats iphlpapi.MIB_IF_ROW2
endpointStats.InterfaceLuid = luid
if err := iphlpapi.GetIfEntry2Ex(&endpointStats); err != nil {
c.logger.Warn("Failed to get interface entry for endpoint "+endpoint.String(),
slog.Any("err", err),
)
continue
}
for _, containerId := range properties.SharedContainers {
containerInfo, ok := c.annotationsCacheHCS[containerId]
if !ok {
c.logger.Debug("Failed to collect network stats for container " + containerId)
c.logger.Debug("Unknown container " + containerId + " for endpoint " + endpoint.String())
continue
}
endpointId := strings.ToUpper(endpoint.Id)
endpointId := strings.ToUpper(endpoint.String())
ch <- prometheus.MustNewConstMetric(
c.bytesReceived,
prometheus.CounterValue,
float64(endpointStats.BytesReceived),
containerIdWithPrefix, endpointId,
float64(endpointStats.InOctets),
containerInfo.id, containerInfo.namespace, containerInfo.pod, containerInfo.container, endpointId,
)
ch <- prometheus.MustNewConstMetric(
c.bytesSent,
prometheus.CounterValue,
float64(endpointStats.BytesSent),
containerIdWithPrefix, endpointId,
float64(endpointStats.OutOctets),
containerInfo.id, containerInfo.namespace, containerInfo.pod, containerInfo.container, endpointId,
)
ch <- prometheus.MustNewConstMetric(
c.packetsReceived,
prometheus.CounterValue,
float64(endpointStats.PacketsReceived),
containerIdWithPrefix, endpointId,
float64(endpointStats.InUcastPkts+endpointStats.InNUcastPkts),
containerInfo.id, containerInfo.namespace, containerInfo.pod, containerInfo.container, endpointId,
)
ch <- prometheus.MustNewConstMetric(
c.packetsSent,
prometheus.CounterValue,
float64(endpointStats.PacketsSent),
containerIdWithPrefix, endpointId,
float64(endpointStats.OutUcastPkts+endpointStats.OutNUcastPkts),
containerInfo.id, containerInfo.namespace, containerInfo.pod, containerInfo.container, endpointId,
)
ch <- prometheus.MustNewConstMetric(
c.droppedPacketsIncoming,
prometheus.CounterValue,
float64(endpointStats.DroppedPacketsIncoming),
containerIdWithPrefix, endpointId,
float64(endpointStats.InDiscards+endpointStats.InErrors),
containerInfo.id, containerInfo.namespace, containerInfo.pod, containerInfo.container, endpointId,
)
ch <- prometheus.MustNewConstMetric(
c.droppedPacketsOutgoing,
prometheus.CounterValue,
float64(endpointStats.DroppedPacketsOutgoing),
containerIdWithPrefix, endpointId,
float64(endpointStats.OutDiscards+endpointStats.OutErrors),
containerInfo.id, containerInfo.namespace, containerInfo.pod, containerInfo.container, endpointId,
)
}
}
@@ -445,12 +592,286 @@ func (c *Collector) collectNetworkMetrics(ch chan<- prometheus.Metric, container
return nil
}
func getContainerIdWithPrefix(containerDetails hcsshim.ContainerProperties) string {
switch containerDetails.Owner {
// collectJobContainers collects container metrics for job containers.
// Job container based on Win32 Job objects.
// https://learn.microsoft.com/en-us/windows/win32/procthread/job-objects
//
// Job containers are containers that aren't managed by HCS, e.g host process containers.
func (c *Collector) collectJobContainers(ch chan<- prometheus.Metric) error {
containerDStateFS := os.DirFS(containerDStateDir)
allContainerIDs := make([]string, 0, len(c.annotationsCacheJob)+len(c.annotationsCacheHCS))
jobContainerIDs := make([]string, 0, len(allContainerIDs))
if err := fs.WalkDir(containerDStateFS, ".", func(path string, d fs.DirEntry, err error) error {
if err != nil {
if errors.Is(err, fs.ErrNotExist) {
c.logger.Warn("containerd state directory does not exist",
slog.String("path", containerDStateDir),
slog.Any("err", err),
)
return nil
}
return err
}
if !d.IsDir() {
return nil
}
if _, err := os.Stat(path + "\\config.json"); err != nil {
containerID := strings.TrimPrefix(strings.Replace(path, containerDStateDir, "", 1), `\`)
allContainerIDs = append(allContainerIDs, containerID)
}
// Skip the directory content
return fs.SkipDir
}); err != nil {
return fmt.Errorf("error in walking containerd state directory: %w", err)
}
errs := make([]error, 0)
for _, containerID := range allContainerIDs {
if err := c.collectJobContainer(ch, containerID); err != nil {
errs = append(errs, err)
} else {
jobContainerIDs = append(jobContainerIDs, containerID)
}
}
// Remove containers that are no longer running
for _, containerID := range c.annotationsCacheJob {
if !slices.Contains(jobContainerIDs, containerID.id) {
delete(c.annotationsCacheJob, containerID.id)
}
}
return errors.Join(errs...)
}
func (c *Collector) collectJobContainer(ch chan<- prometheus.Metric, containerID string) error {
jobObjectHandle, err := kernel32.OpenJobObject("JobContainer_" + containerID)
if err != nil {
if errors.Is(err, windows.ERROR_FILE_NOT_FOUND) {
return nil
}
return fmt.Errorf("error in opening job object: %w", err)
}
defer func(fd windows.Handle) {
_ = windows.Close(fd)
}(jobObjectHandle)
if _, ok := c.annotationsCacheJob[containerID]; !ok {
var (
namespace string
podName string
containerName string
)
if spec, err := getContainerAnnotations(containerID); err == nil {
namespace = spec.Annotations["io.kubernetes.cri.sandbox-namespace"]
podName = spec.Annotations["io.kubernetes.cri.sandbox-name"]
containerName = spec.Annotations["io.kubernetes.cri.container-name"]
}
c.annotationsCacheJob[containerID] = containerInfo{
id: "containerd://" + containerID,
namespace: namespace,
pod: podName,
container: containerName,
}
}
var jobInfo kernel32.JobObjectExtendedLimitInformation
retLen := uint32(unsafe.Sizeof(jobInfo))
if err := windows.QueryInformationJobObject(
jobObjectHandle,
windows.JobObjectExtendedLimitInformation,
uintptr(unsafe.Pointer(&jobInfo)),
retLen, &retLen); err != nil {
return err
}
privateWorkingSetBytes, err := calculatePrivateWorkingSetBytes(jobObjectHandle)
if err != nil {
c.logger.Debug("error in calculating private working set bytes", slog.Any("err", err))
}
containerInfo := c.annotationsCacheJob[containerID]
ch <- prometheus.MustNewConstMetric(
c.containerAvailable,
prometheus.CounterValue,
1,
containerInfo.id, containerInfo.namespace, containerInfo.pod, containerInfo.container,
)
ch <- prometheus.MustNewConstMetric(
c.usageCommitBytes,
prometheus.GaugeValue,
float64(jobInfo.JobMemoryLimit),
containerInfo.id, containerInfo.namespace, containerInfo.pod, containerInfo.container,
)
ch <- prometheus.MustNewConstMetric(
c.usageCommitPeakBytes,
prometheus.GaugeValue,
float64(jobInfo.PeakProcessMemoryUsed),
containerInfo.id, containerInfo.namespace, containerInfo.pod, containerInfo.container,
)
ch <- prometheus.MustNewConstMetric(
c.usagePrivateWorkingSetBytes,
prometheus.GaugeValue,
float64(privateWorkingSetBytes),
containerInfo.id, containerInfo.namespace, containerInfo.pod, containerInfo.container,
)
ch <- prometheus.MustNewConstMetric(
c.runtimeTotal,
prometheus.CounterValue,
(float64(jobInfo.BasicInfo.ThisPeriodTotalKernelTime)+float64(jobInfo.BasicInfo.ThisPeriodTotalUserTime))*pdh.TicksToSecondScaleFactor,
containerInfo.id, containerInfo.namespace, containerInfo.pod, containerInfo.container,
)
ch <- prometheus.MustNewConstMetric(
c.runtimeUser,
prometheus.CounterValue,
float64(jobInfo.BasicInfo.ThisPeriodTotalUserTime)*pdh.TicksToSecondScaleFactor,
containerInfo.id, containerInfo.namespace, containerInfo.pod, containerInfo.container,
)
ch <- prometheus.MustNewConstMetric(
c.runtimeKernel,
prometheus.CounterValue,
float64(jobInfo.BasicInfo.ThisPeriodTotalKernelTime)*pdh.TicksToSecondScaleFactor,
containerInfo.id, containerInfo.namespace, containerInfo.pod, containerInfo.container,
)
ch <- prometheus.MustNewConstMetric(
c.readCountNormalized,
prometheus.CounterValue,
float64(jobInfo.IoInfo.ReadOperationCount),
containerInfo.id, containerInfo.namespace, containerInfo.pod, containerInfo.container,
)
ch <- prometheus.MustNewConstMetric(
c.readSizeBytes,
prometheus.CounterValue,
float64(jobInfo.IoInfo.ReadTransferCount),
containerInfo.id, containerInfo.namespace, containerInfo.pod, containerInfo.container,
)
ch <- prometheus.MustNewConstMetric(
c.writeCountNormalized,
prometheus.CounterValue,
float64(jobInfo.IoInfo.WriteOperationCount),
containerInfo.id, containerInfo.namespace, containerInfo.pod, containerInfo.container,
)
ch <- prometheus.MustNewConstMetric(
c.writeSizeBytes,
prometheus.CounterValue,
float64(jobInfo.IoInfo.WriteTransferCount),
containerInfo.id, containerInfo.namespace, containerInfo.pod, containerInfo.container,
)
return nil
}
func getContainerIdWithPrefix(container hcs.Properties) string {
switch container.Owner {
case "containerd-shim-runhcs-v1.exe":
return "containerd://" + containerDetails.ID
return "containerd://" + container.ID
default:
// default to docker or if owner is not set
return "docker://" + containerDetails.ID
return "docker://" + container.ID
}
}
func getContainerAnnotations(containerID string) (ociSpec, error) {
configJSON, err := os.OpenFile(containerDStateDir+containerID+`\config.json`, os.O_RDONLY, 0)
if err != nil {
return ociSpec{}, fmt.Errorf("error in opening config.json file: %w", err)
}
var annotations ociSpec
if err = json.NewDecoder(configJSON).Decode(&annotations); err != nil {
return ociSpec{}, fmt.Errorf("error in decoding config.json file: %w", err)
}
return annotations, nil
}
func calculatePrivateWorkingSetBytes(jobObjectHandle windows.Handle) (uint64, error) {
var pidList kernel32.JobObjectBasicProcessIDList
retLen := uint32(unsafe.Sizeof(pidList))
if err := windows.QueryInformationJobObject(
jobObjectHandle,
windows.JobObjectBasicProcessIdList,
uintptr(unsafe.Pointer(&pidList)),
retLen, &retLen); err != nil {
return 0, err
}
var (
privateWorkingSetBytes uint64
vmCounters kernel32.PROCESS_VM_COUNTERS
)
retLen = uint32(unsafe.Sizeof(vmCounters))
getPrivateWorkingSetBytes := func(pid uint32) (uint64, error) {
processHandle, err := windows.OpenProcess(windows.PROCESS_QUERY_LIMITED_INFORMATION, false, pid)
if err != nil {
return 0, fmt.Errorf("error in opening process: %w", err)
}
defer func(fd windows.Handle) {
_ = windows.Close(fd)
}(processHandle)
var isInJob bool
if err := kernel32.IsProcessInJob(processHandle, jobObjectHandle, &isInJob); err != nil {
return 0, fmt.Errorf("error in checking if process is in job: %w", err)
}
if !isInJob {
return 0, nil
}
if err := windows.NtQueryInformationProcess(
processHandle,
windows.ProcessVmCounters,
unsafe.Pointer(&vmCounters),
retLen,
&retLen,
); err != nil {
return 0, fmt.Errorf("error in querying process information: %w", err)
}
return uint64(vmCounters.PrivateWorkingSetSize), nil
}
for _, pid := range pidList.PIDs() {
privateWorkingSetSize, err := getPrivateWorkingSetBytes(pid)
if err != nil {
return 0, fmt.Errorf("error in getting private working set bytes: %w", err)
}
privateWorkingSetBytes += privateWorkingSetSize
}
return privateWorkingSetBytes, nil
}

View File

@@ -25,9 +25,9 @@ import (
"strings"
"sync"
"github.com/Microsoft/hcsshim/osversion"
"github.com/alecthomas/kingpin/v2"
"github.com/prometheus-community/windows_exporter/internal/mi"
"github.com/prometheus-community/windows_exporter/internal/osversion"
"github.com/prometheus/client_golang/prometheus"
)

View File

@@ -20,7 +20,7 @@ package hyperv
import (
"fmt"
"github.com/Microsoft/hcsshim/osversion"
"github.com/prometheus-community/windows_exporter/internal/osversion"
"github.com/prometheus-community/windows_exporter/internal/pdh"
"github.com/prometheus-community/windows_exporter/internal/types"
"github.com/prometheus-community/windows_exporter/internal/utils"

View File

@@ -20,7 +20,7 @@ package hyperv
import (
"fmt"
"github.com/Microsoft/hcsshim/osversion"
"github.com/prometheus-community/windows_exporter/internal/osversion"
"github.com/prometheus-community/windows_exporter/internal/pdh"
"github.com/prometheus-community/windows_exporter/internal/types"
"github.com/prometheus-community/windows_exporter/internal/utils"

View File

@@ -20,8 +20,8 @@ package mscluster
import (
"fmt"
"github.com/Microsoft/hcsshim/osversion"
"github.com/prometheus-community/windows_exporter/internal/mi"
"github.com/prometheus-community/windows_exporter/internal/osversion"
"github.com/prometheus-community/windows_exporter/internal/types"
"github.com/prometheus/client_golang/prometheus"
)

View File

@@ -20,8 +20,8 @@ package mscluster
import (
"fmt"
"github.com/Microsoft/hcsshim/osversion"
"github.com/prometheus-community/windows_exporter/internal/mi"
"github.com/prometheus-community/windows_exporter/internal/osversion"
"github.com/prometheus-community/windows_exporter/internal/types"
"github.com/prometheus/client_golang/prometheus"
)

View File

@@ -30,6 +30,7 @@ import (
"github.com/prometheus-community/windows_exporter/internal/headers/netapi32"
"github.com/prometheus-community/windows_exporter/internal/headers/sysinfoapi"
"github.com/prometheus-community/windows_exporter/internal/mi"
"github.com/prometheus-community/windows_exporter/internal/osversion"
"github.com/prometheus-community/windows_exporter/internal/types"
"github.com/prometheus/client_golang/prometheus"
"golang.org/x/sys/windows"
@@ -118,10 +119,10 @@ func (c *Collector) Build(logger *slog.Logger, _ *mi.Session) error {
return fmt.Errorf("failed to get Windows version: %w", err)
}
version := windows.RtlGetVersion()
version := osversion.Get()
// Microsoft has decided to keep the major version as "10" for Windows 11, including the product name.
if version.BuildNumber >= 22000 {
if version.Build >= osversion.V21H2Win11 {
productName = strings.Replace(productName, " 10 ", " 11 ", 1)
}
@@ -131,10 +132,10 @@ func (c *Collector) Build(logger *slog.Logger, _ *mi.Session) error {
nil,
prometheus.Labels{
"product": productName,
"version": fmt.Sprintf("%d.%d.%d", version.MajorVersion, version.MinorVersion, version.BuildNumber),
"version": version.String(),
"major_version": strconv.FormatUint(uint64(version.MajorVersion), 10),
"minor_version": strconv.FormatUint(uint64(version.MinorVersion), 10),
"build_number": strconv.FormatUint(uint64(version.BuildNumber), 10),
"build_number": strconv.FormatUint(uint64(version.Build), 10),
"revision": revision,
},
)
@@ -365,7 +366,9 @@ func (c *Collector) getWindowsVersion() (string, string, error) {
return "", "", fmt.Errorf("failed to open registry key: %w", err)
}
defer ntKey.Close()
defer func(ntKey registry.Key) {
_ = ntKey.Close()
}(ntKey)
productName, _, err := ntKey.GetStringValue("ProductName")
if err != nil {

View File

@@ -30,7 +30,6 @@ import (
"github.com/prometheus-community/windows_exporter/internal/pdh"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
@@ -208,9 +207,9 @@ windows_performancecounter_processor_information_processor_time\{core="0,0",stat
promhttp.HandlerFor(registry, promhttp.HandlerOpts{ErrorHandling: promhttp.ContinueOnError}).ServeHTTP(rw, &http.Request{})
got := rw.Body.String()
assert.NotEmpty(t, got)
require.NotEmpty(t, got)
require.NotEmpty(t, tc.expectedMetrics)
assert.Regexp(t, tc.expectedMetrics, got)
require.Regexp(t, tc.expectedMetrics, got)
})
}
}

View File

@@ -28,7 +28,7 @@ type Object struct {
Type pdh.CounterType `json:"type" yaml:"type"`
Instances []string `json:"instances" yaml:"instances"`
Counters []Counter `json:"counters" yaml:"counters"`
InstanceLabel string `json:"instance_label" yaml:"instance_label"` //nolint:tagliatelle
InstanceLabel string `json:"instance_label" yaml:"instance_label"`
collector *pdh.Collector
perfDataObject any

View File

@@ -28,7 +28,6 @@ import (
"github.com/prometheus-community/windows_exporter/pkg/collector"
"github.com/prometheus/client_golang/prometheus"
dto "github.com/prometheus/client_model/go"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
@@ -70,7 +69,7 @@ func TestMultipleDirectories(t *testing.T) {
require.NoError(t, <-errCh)
for _, f := range []string{"dir1", "dir2", "dir3", "dir3sub"} {
assert.Contains(t, got, f)
require.Contains(t, got, f)
}
}
@@ -106,6 +105,6 @@ func TestDuplicateFileName(t *testing.T) {
require.ErrorContains(t, <-errCh, "duplicate filename detected")
assert.Contains(t, got, "file")
assert.NotContains(t, got, "sub_file")
require.Contains(t, got, "file")
require.NotContains(t, got, "sub_file")
}

View File

@@ -25,10 +25,10 @@ import (
"strings"
"time"
"github.com/Microsoft/hcsshim/osversion"
"github.com/alecthomas/kingpin/v2"
"github.com/prometheus-community/windows_exporter/internal/headers/kernel32"
"github.com/prometheus-community/windows_exporter/internal/mi"
"github.com/prometheus-community/windows_exporter/internal/osversion"
"github.com/prometheus-community/windows_exporter/internal/pdh"
"github.com/prometheus-community/windows_exporter/internal/types"
"github.com/prometheus/client_golang/prometheus"