From 6ffe504f7e46025902488c8909dde2a023abb893 Mon Sep 17 00:00:00 2001 From: Jody Treffers Date: Mon, 17 Jun 2019 11:56:26 +0200 Subject: [PATCH 01/12] Changed conversion from microseconds to seconds to match the description of the metric --- collector/mssql.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/collector/mssql.go b/collector/mssql.go index b322f7d7..93a55f64 100644 --- a/collector/mssql.go +++ b/collector/mssql.go @@ -2575,7 +2575,7 @@ func (c *MSSQLCollector) collectDatabaseReplica(ch chan<- prometheus.Metric, sql ch <- prometheus.MustNewConstMetric( c.DBReplicaTransactionDelay, prometheus.GaugeValue, - float64(v.TransactionDelay)*1000.0, + float64(v.TransactionDelay)/1000.0, sqlInstance, replicaName, ) } From c73f52338d9c17e329397b5f1207fe1ecf4b7568 Mon Sep 17 00:00:00 2001 From: Stewart Thomson Date: Fri, 12 Jul 2019 14:54:20 -0400 Subject: [PATCH 02/12] Added thermal zone information and documentation --- README.md | 1 + collector/thermalzone.go | 105 ++++++++++++++++++++++++++++++++++ docs/collector.thermalzone.md | 31 ++++++++++ 3 files changed, 137 insertions(+) create mode 100644 collector/thermalzone.go create mode 100644 docs/collector.thermalzone.md diff --git a/README.md b/README.md index 827db57b..0f73b644 100644 --- a/README.md +++ b/README.md @@ -33,6 +33,7 @@ Name | Description | Enabled by default [service](docs/collector.service.md) | Service state metrics | ✓ [system](docs/collector.system.md) | System calls | ✓ [tcp](docs/collector.tcp.md) | TCP connections | +[thermalzone](docs/collector.thermalzone.md) | Thermal information [textfile](docs/collector.textfile.md) | Read prometheus metrics from a text file | ✓ [vmware](docs/collector.vmware.md) | Performance counters installed by the Vmware Guest agent | diff --git a/collector/thermalzone.go b/collector/thermalzone.go new file mode 100644 index 00000000..bb90a940 --- /dev/null +++ b/collector/thermalzone.go @@ -0,0 +1,105 @@ +package collector + +import ( + "github.com/StackExchange/wmi" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/common/log" +) + +func init() { + Factories["thermalzone"] = NewThermalZoneCollector +} + +// A ThermalZoneCollector is a Prometheus collector for WMI Win32_PerfRawData_Counters_ThermalZoneInformation metrics +type ThermalZoneCollector struct { + HighPrecisionTemperature *prometheus.Desc + PercentPassiveLimit *prometheus.Desc + Temperature *prometheus.Desc + ThrottleReasons *prometheus.Desc +} + +// NewThermalZoneCollector ... +func NewThermalZoneCollector() (Collector, error) { + const subsystem = "thermalzone" + return &ThermalZoneCollector{ + HighPrecisionTemperature: prometheus.NewDesc( + prometheus.BuildFQName(Namespace, subsystem, "high_precision_temperature"), + "(HighPrecisionTemperature)", + nil, + nil, + ), + PercentPassiveLimit: prometheus.NewDesc( + prometheus.BuildFQName(Namespace, subsystem, "percent_passive_limit"), + "(PercentPassiveLimit)", + nil, + nil, + ), + Temperature: prometheus.NewDesc( + prometheus.BuildFQName(Namespace, subsystem, "temperature"), + "(Temperature)", + nil, + nil, + ), + ThrottleReasons: prometheus.NewDesc( + prometheus.BuildFQName(Namespace, subsystem, "throttle_reasons"), + "(ThrottleReasons)", + nil, + nil, + ), + }, nil +} + +// Collect sends the metric values for each metric +// to the provided prometheus Metric channel. +func (c *ThermalZoneCollector) Collect(ch chan<- prometheus.Metric) error { + if desc, err := c.collect(ch); err != nil { + log.Error("failed collecting thermalzone metrics:", desc, err) + return err + } + return nil +} + +// Win32_PerfRawData_Counters_ThermalZoneInformation docs: +// - +type Win32_PerfRawData_Counters_ThermalZoneInformation struct { + Name string + + HighPrecisionTemperature uint32 + PercentPassiveLimit uint32 + Temperature uint32 + ThrottleReasons uint32 +} + +func (c *ThermalZoneCollector) collect(ch chan<- prometheus.Metric) (*prometheus.Desc, error) { + var dst []Win32_PerfRawData_Counters_ThermalZoneInformation + q := queryAll(&dst) + if err := wmi.Query(q, &dst); err != nil { + return nil, err + } + + ch <- prometheus.MustNewConstMetric( + c.HighPrecisionTemperature, + prometheus.GaugeValue, + float64(dst[0].HighPrecisionTemperature), + ) + + ch <- prometheus.MustNewConstMetric( + c.PercentPassiveLimit, + prometheus.GaugeValue, + float64(dst[0].PercentPassiveLimit), + ) + + ch <- prometheus.MustNewConstMetric( + c.Temperature, + prometheus.GaugeValue, + float64(dst[0].Temperature), + ) + + ch <- prometheus.MustNewConstMetric( + c.ThrottleReasons, + prometheus.GaugeValue, + float64(dst[0].ThrottleReasons), + ) + + return nil, nil +} diff --git a/docs/collector.thermalzone.md b/docs/collector.thermalzone.md new file mode 100644 index 00000000..5a054ffd --- /dev/null +++ b/docs/collector.thermalzone.md @@ -0,0 +1,31 @@ +# tcp collector + +The thermalzone collector exposes metrics about system temps. Note that temperature is given in Kelvin + +||| +-|- +Metric name prefix | `thermalzone` +Classes | [`Win32_PerfRawData_Counters_ThermalZoneInformation`](https://msdn.microsoft.com/en-us/library/aa394341(v=vs.85).aspx) +Enabled by default? | No + +## Flags + +None + +## Metrics + +Name | Description | Type | Labels +-----|-------------|------|------- +`wmi_thermalzone_high_precision_temperature` | _Not yet documented_ | gauge | None +`wmi_thermalzone_percent_passive_limit` | _Not yet documented_ | gauge | None +`wmi_thermalzone_temperature ` | _Not yet documented_ | gauge | None +`wmi_thermalzone_throttle_reasons ` | _Not yet documented_ | gauge | None + +### Example metric +_This collector does not yet have explained examples, we would appreciate your help adding them!_ + +## Useful queries +_This collector does not yet have any useful queries added, we would appreciate your help adding them!_ + +## Alerting examples +_This collector does not yet have alerting examples, we would appreciate your help adding them!_ From 1a67ca54b6ecfb68840242d55ffe4be626213712 Mon Sep 17 00:00:00 2001 From: Stewart Thomson Date: Sat, 13 Jul 2019 08:55:10 -0400 Subject: [PATCH 03/12] Update collector.thermalzone.md Removed references to tcp in collector.thermalzone.md --- docs/collector.thermalzone.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/collector.thermalzone.md b/docs/collector.thermalzone.md index 5a054ffd..96ceac34 100644 --- a/docs/collector.thermalzone.md +++ b/docs/collector.thermalzone.md @@ -1,11 +1,11 @@ -# tcp collector +# thermalzone collector The thermalzone collector exposes metrics about system temps. Note that temperature is given in Kelvin ||| -|- Metric name prefix | `thermalzone` -Classes | [`Win32_PerfRawData_Counters_ThermalZoneInformation`](https://msdn.microsoft.com/en-us/library/aa394341(v=vs.85).aspx) +Classes | [`Win32_PerfRawData_Counters_ThermalZoneInformation`](https://wutils.com/wmi/root/cimv2/win32_perfrawdata_counters_thermalzoneinformation/#temperature_properties) Enabled by default? | No ## Flags From 8fc47669beeb7eb33ce9d3e60ddb6f9a20bb8dfe Mon Sep 17 00:00:00 2001 From: chrisbloemker Date: Sat, 13 Jul 2019 12:39:50 -0400 Subject: [PATCH 04/12] adding windows service example alerts/queries --- docs/collector.service.md | 38 +++++++++++++++++++++++++++++++++++--- 1 file changed, 35 insertions(+), 3 deletions(-) diff --git a/docs/collector.service.md b/docs/collector.service.md index 0af8c381..4000032e 100644 --- a/docs/collector.service.md +++ b/docs/collector.service.md @@ -66,10 +66,42 @@ A service can have any of the following statuses: Note that there is some overlap with service state. ### Example metric -_This collector does not yet have explained examples, we would appreciate your help adding them!_ +Lists the services that have a 'disabled' start mode. +``` +wmi_service_start_mode{exported_name=~"(mssqlserver|sqlserveragent)",start_mode="disabled"} +``` ## Useful queries -_This collector does not yet have any useful queries added, we would appreciate your help adding them!_ +Counts the number of Microsoft SQL Server/Agent Processes +``` +count(wmi_service_state{exported_name=~"(sqlserveragent|mssqlserver)",state="running"}) +``` ## Alerting examples -_This collector does not yet have alerting examples, we would appreciate your help adding them!_ +**prometheus.rules** +``` +groups: +- name: Microsoft SQL Server Alerts + rules: + + # Sends an alert when the 'sqlserveragent' service is not in the running state for 3 minutes. + - alert: SQL Server Agent DOWN + expr: wmi_service_state{name="SQL",exported_name="sqlserveragent",state="running"} == 0 + for: 3m + labels: + severity: high + annotations: + summary: "Service {{ $labels.exported_name }} down" + description: "Service {{ $labels.exported_name }} on instance {{ $labels.name }} has been down for more than 3 minutes." + + # Sends an alert when the 'mssqlserver' service is not in the running state for 3 minutes. + - alert: SQL Server DOWN + expr: wmi_service_state{name="SQL",exported_name="mssqlserver",state="running"} == 0 + for: 3m + labels: + severity: high + annotations: + summary: "Service {{ $labels.exported_name }} down" + description: "Service {{ $labels.exported_name }} on instance {{ $labels.name }} has been down for more than 3 minutes." +``` +In this example, `name` is the target label of the host. So each alert will be processed per host, which is then used in the alert description. From 47656b16bd681ca0d9d2adf8d9d38eb4b9bf6083 Mon Sep 17 00:00:00 2001 From: Stewart Thomson Date: Mon, 15 Jul 2019 09:50:02 -0400 Subject: [PATCH 05/12] - Removed HighPrecisionTemperature property and just mapped it to Temperature - Converted decikelvin to Celsius - Added a loop to get the values from each zone - Added documentation for percent passive limit and throttle reasons --- collector/thermalzone.go | 76 +++++++++++++++++------------------ docs/collector.thermalzone.md | 9 +++-- 2 files changed, 42 insertions(+), 43 deletions(-) diff --git a/collector/thermalzone.go b/collector/thermalzone.go index bb90a940..99693a55 100644 --- a/collector/thermalzone.go +++ b/collector/thermalzone.go @@ -12,38 +12,37 @@ func init() { // A ThermalZoneCollector is a Prometheus collector for WMI Win32_PerfRawData_Counters_ThermalZoneInformation metrics type ThermalZoneCollector struct { - HighPrecisionTemperature *prometheus.Desc - PercentPassiveLimit *prometheus.Desc - Temperature *prometheus.Desc - ThrottleReasons *prometheus.Desc + PercentPassiveLimit *prometheus.Desc + Temperature *prometheus.Desc + ThrottleReasons *prometheus.Desc } // NewThermalZoneCollector ... func NewThermalZoneCollector() (Collector, error) { const subsystem = "thermalzone" return &ThermalZoneCollector{ - HighPrecisionTemperature: prometheus.NewDesc( - prometheus.BuildFQName(Namespace, subsystem, "high_precision_temperature"), - "(HighPrecisionTemperature)", - nil, + Temperature: prometheus.NewDesc( + prometheus.BuildFQName(Namespace, subsystem, "temperature_celsius"), + "(Temperature)", + []string{ + "Name", + }, nil, ), PercentPassiveLimit: prometheus.NewDesc( prometheus.BuildFQName(Namespace, subsystem, "percent_passive_limit"), "(PercentPassiveLimit)", - nil, - nil, - ), - Temperature: prometheus.NewDesc( - prometheus.BuildFQName(Namespace, subsystem, "temperature"), - "(Temperature)", - nil, + []string{ + "Name", + }, nil, ), ThrottleReasons: prometheus.NewDesc( prometheus.BuildFQName(Namespace, subsystem, "throttle_reasons"), "(ThrottleReasons)", - nil, + []string{ + "Name", + }, nil, ), }, nil @@ -60,13 +59,12 @@ func (c *ThermalZoneCollector) Collect(ch chan<- prometheus.Metric) error { } // Win32_PerfRawData_Counters_ThermalZoneInformation docs: -// - +// https://wutils.com/wmi/root/cimv2/win32_perfrawdata_counters_thermalzoneinformation/ type Win32_PerfRawData_Counters_ThermalZoneInformation struct { Name string HighPrecisionTemperature uint32 PercentPassiveLimit uint32 - Temperature uint32 ThrottleReasons uint32 } @@ -77,29 +75,29 @@ func (c *ThermalZoneCollector) collect(ch chan<- prometheus.Metric) (*prometheus return nil, err } - ch <- prometheus.MustNewConstMetric( - c.HighPrecisionTemperature, - prometheus.GaugeValue, - float64(dst[0].HighPrecisionTemperature), - ) + for _, info := range dst { + //Divide by 10 and subtract 273.15 to convert decikelvin to celsius + ch <- prometheus.MustNewConstMetric( + c.Temperature, + prometheus.GaugeValue, + (float64(info.HighPrecisionTemperature)/10.0)-273.15, + info.Name, + ) - ch <- prometheus.MustNewConstMetric( - c.PercentPassiveLimit, - prometheus.GaugeValue, - float64(dst[0].PercentPassiveLimit), - ) + ch <- prometheus.MustNewConstMetric( + c.PercentPassiveLimit, + prometheus.GaugeValue, + float64(info.PercentPassiveLimit), + info.Name, + ) - ch <- prometheus.MustNewConstMetric( - c.Temperature, - prometheus.GaugeValue, - float64(dst[0].Temperature), - ) - - ch <- prometheus.MustNewConstMetric( - c.ThrottleReasons, - prometheus.GaugeValue, - float64(dst[0].ThrottleReasons), - ) + ch <- prometheus.MustNewConstMetric( + c.ThrottleReasons, + prometheus.GaugeValue, + float64(info.ThrottleReasons), + info.Name, + ) + } return nil, nil } diff --git a/docs/collector.thermalzone.md b/docs/collector.thermalzone.md index 96ceac34..71f36c75 100644 --- a/docs/collector.thermalzone.md +++ b/docs/collector.thermalzone.md @@ -16,10 +16,11 @@ None Name | Description | Type | Labels -----|-------------|------|------- -`wmi_thermalzone_high_precision_temperature` | _Not yet documented_ | gauge | None -`wmi_thermalzone_percent_passive_limit` | _Not yet documented_ | gauge | None -`wmi_thermalzone_temperature ` | _Not yet documented_ | gauge | None -`wmi_thermalzone_throttle_reasons ` | _Not yet documented_ | gauge | None +`wmi_thermalzone_percent_passive_limit` | % Passive Limit is the current limit this thermal zone is placing on the devices it controls. A limit of 100% indicates the devices are unconstrained. A limit of 0% indicates the devices are fully constrained. | gauge | None +`wmi_thermalzone_temperature_celsius ` | Temperature of the thermal zone, in degrees Celsius. | gauge | None +`wmi_thermalzone_throttle_reasons ` | Throttle Reasons indicate reasons why the thermal zone is limiting performance of the devices it controls. 0x0 – The zone is not throttled. 0x1 – The zone is throttled for thermal reasons. 0x2 – The zone is throttled to limit electrical current. | gauge | None + +[`Throttle reasons` source](https://docs.microsoft.com/en-us/windows-hardware/design/device-experiences/examples--requirements-and-diagnostics) ### Example metric _This collector does not yet have explained examples, we would appreciate your help adding them!_ From ce1005add8f9352aced450c2fe2ab98cf312f86e Mon Sep 17 00:00:00 2001 From: Calle Pettersson Date: Mon, 15 Jul 2019 19:54:23 +0200 Subject: [PATCH 06/12] Lower-case label name --- collector/thermalzone.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/collector/thermalzone.go b/collector/thermalzone.go index 99693a55..634de333 100644 --- a/collector/thermalzone.go +++ b/collector/thermalzone.go @@ -25,7 +25,7 @@ func NewThermalZoneCollector() (Collector, error) { prometheus.BuildFQName(Namespace, subsystem, "temperature_celsius"), "(Temperature)", []string{ - "Name", + "name", }, nil, ), @@ -33,7 +33,7 @@ func NewThermalZoneCollector() (Collector, error) { prometheus.BuildFQName(Namespace, subsystem, "percent_passive_limit"), "(PercentPassiveLimit)", []string{ - "Name", + "name", }, nil, ), @@ -41,7 +41,7 @@ func NewThermalZoneCollector() (Collector, error) { prometheus.BuildFQName(Namespace, subsystem, "throttle_reasons"), "(ThrottleReasons)", []string{ - "Name", + "name", }, nil, ), From 31a30474f12b2747a5a18bab27e98313466c0f32 Mon Sep 17 00:00:00 2001 From: chrisbloemker Date: Mon, 15 Jul 2019 23:27:15 -0400 Subject: [PATCH 07/12] changing label to instance to match common examples --- docs/collector.service.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/collector.service.md b/docs/collector.service.md index 4000032e..15aaa3ce 100644 --- a/docs/collector.service.md +++ b/docs/collector.service.md @@ -86,22 +86,22 @@ groups: # Sends an alert when the 'sqlserveragent' service is not in the running state for 3 minutes. - alert: SQL Server Agent DOWN - expr: wmi_service_state{name="SQL",exported_name="sqlserveragent",state="running"} == 0 + expr: wmi_service_state{instance="SQL",exported_name="sqlserveragent",state="running"} == 0 for: 3m labels: severity: high annotations: summary: "Service {{ $labels.exported_name }} down" - description: "Service {{ $labels.exported_name }} on instance {{ $labels.name }} has been down for more than 3 minutes." + description: "Service {{ $labels.exported_name }} on instance {{ $labels.instance }} has been down for more than 3 minutes." # Sends an alert when the 'mssqlserver' service is not in the running state for 3 minutes. - alert: SQL Server DOWN - expr: wmi_service_state{name="SQL",exported_name="mssqlserver",state="running"} == 0 + expr: wmi_service_state{instance="SQL",exported_name="mssqlserver",state="running"} == 0 for: 3m labels: severity: high annotations: summary: "Service {{ $labels.exported_name }} down" - description: "Service {{ $labels.exported_name }} on instance {{ $labels.name }} has been down for more than 3 minutes." + description: "Service {{ $labels.exported_name }} on instance {{ $labels.instance }} has been down for more than 3 minutes." ``` -In this example, `name` is the target label of the host. So each alert will be processed per host, which is then used in the alert description. +In this example, `instance` is the target label of the host. So each alert will be processed per host, which is then used in the alert description. From b348c245e8467cabb5e40c8b599220ba6879a652 Mon Sep 17 00:00:00 2001 From: Drew McMinn Date: Sat, 20 Jul 2019 11:18:34 -0500 Subject: [PATCH 08/12] add readme link to container collector doc --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 0f73b644..7714ef13 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,7 @@ Name | Description | Enabled by default [ad](docs/collector.ad.md) | Active Directory Domain Services | [cpu](docs/collector.cpu.md) | CPU usage | ✓ [cs](docs/collector.cs.md) | "Computer System" metrics (system properties, num cpus/total memory) | ✓ +[container](docs/collector.container.md) | Container metrics | ✓ [dns](docs/collector.dns.md) | DNS Server | [hyperv](docs/collector.hyperv.md) | Hyper-V hosts | [iis](docs/collector.iis.md) | IIS sites and applications | From 2c5e30d920690225d341a1ad56ab31d7a96efec1 Mon Sep 17 00:00:00 2001 From: Drew McMinn Date: Sat, 20 Jul 2019 11:19:40 -0500 Subject: [PATCH 09/12] remove enabled by default for container collector in readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 7714ef13..863b527e 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ Name | Description | Enabled by default [ad](docs/collector.ad.md) | Active Directory Domain Services | [cpu](docs/collector.cpu.md) | CPU usage | ✓ [cs](docs/collector.cs.md) | "Computer System" metrics (system properties, num cpus/total memory) | ✓ -[container](docs/collector.container.md) | Container metrics | ✓ +[container](docs/collector.container.md) | Container metrics | [dns](docs/collector.dns.md) | DNS Server | [hyperv](docs/collector.hyperv.md) | Hyper-V hosts | [iis](docs/collector.iis.md) | IIS sites and applications | From 3900504504b7a5392d8b291ffd1ed4714f0898aa Mon Sep 17 00:00:00 2001 From: Drewster727 Date: Sat, 20 Jul 2019 13:00:23 -0500 Subject: [PATCH 10/12] add container name --- collector/container.go | 53 +++++++++++++++++++++--------------------- 1 file changed, 27 insertions(+), 26 deletions(-) diff --git a/collector/container.go b/collector/container.go index 87e02e06..a997054b 100644 --- a/collector/container.go +++ b/collector/container.go @@ -45,7 +45,7 @@ func NewContainerMetricsCollector() (Collector, error) { ContainerAvailable: prometheus.NewDesc( prometheus.BuildFQName(Namespace, subsystem, "available"), "Available", - []string{"container_id"}, + []string{"container_id", "container_name"}, nil, ), ContainersCount: prometheus.NewDesc( @@ -57,73 +57,73 @@ func NewContainerMetricsCollector() (Collector, error) { UsageCommitBytes: prometheus.NewDesc( prometheus.BuildFQName(Namespace, subsystem, "memory_usage_commit_bytes"), "Memory Usage Commit Bytes", - []string{"container_id"}, + []string{"container_id", "container_name"}, nil, ), UsageCommitPeakBytes: prometheus.NewDesc( prometheus.BuildFQName(Namespace, subsystem, "memory_usage_commit_peak_bytes"), "Memory Usage Commit Peak Bytes", - []string{"container_id"}, + []string{"container_id", "container_name"}, nil, ), UsagePrivateWorkingSetBytes: prometheus.NewDesc( prometheus.BuildFQName(Namespace, subsystem, "memory_usage_private_working_set_bytes"), "Memory Usage Private Working Set Bytes", - []string{"container_id"}, + []string{"container_id", "container_name"}, nil, ), RuntimeTotal: prometheus.NewDesc( prometheus.BuildFQName(Namespace, subsystem, "cpu_usage_seconds_total"), "Total Run time in Seconds", - []string{"container_id"}, + []string{"container_id", "container_name"}, nil, ), RuntimeUser: prometheus.NewDesc( prometheus.BuildFQName(Namespace, subsystem, "cpu_usage_seconds_usermode"), "Run Time in User mode in Seconds", - []string{"container_id"}, + []string{"container_id", "container_name"}, nil, ), RuntimeKernel: prometheus.NewDesc( prometheus.BuildFQName(Namespace, subsystem, "cpu_usage_seconds_kernelmode"), "Run time in Kernel mode in Seconds", - []string{"container_id"}, + []string{"container_id", "container_name"}, nil, ), BytesReceived: prometheus.NewDesc( prometheus.BuildFQName(Namespace, subsystem, "network_receive_bytes_total"), "Bytes Received on Interface", - []string{"container_id", "interface"}, + []string{"container_id", "container_name", "interface"}, nil, ), BytesSent: prometheus.NewDesc( prometheus.BuildFQName(Namespace, subsystem, "network_transmit_bytes_total"), "Bytes Sent on Interface", - []string{"container_id", "interface"}, + []string{"container_id", "container_name", "interface"}, nil, ), PacketsReceived: prometheus.NewDesc( prometheus.BuildFQName(Namespace, subsystem, "network_receive_packets_total"), "Packets Received on Interface", - []string{"container_id", "interface"}, + []string{"container_id", "container_name", "interface"}, nil, ), PacketsSent: prometheus.NewDesc( prometheus.BuildFQName(Namespace, subsystem, "network_transmit_packets_total"), "Packets Sent on Interface", - []string{"container_id", "interface"}, + []string{"container_id", "container_name", "interface"}, nil, ), DroppedPacketsIncoming: prometheus.NewDesc( prometheus.BuildFQName(Namespace, subsystem, "network_receive_packets_dropped_total"), "Dropped Incoming Packets on Interface", - []string{"container_id", "interface"}, + []string{"container_id", "container_name", "interface"}, nil, ), DroppedPacketsOutgoing: prometheus.NewDesc( prometheus.BuildFQName(Namespace, subsystem, "network_transmit_packets_dropped_total"), "Dropped Outgoing Packets on Interface", - []string{"container_id", "interface"}, + []string{"container_id", "container_name", "interface"}, nil, ), }, nil @@ -169,6 +169,7 @@ func (c *ContainerMetricsCollector) collect(ch chan<- prometheus.Metric) (*prome for _, containerDetails := range containers { containerId := containerDetails.ID + containerName := containerDetails.Name container, err := hcsshim.OpenContainer(containerId) if container != nil { @@ -191,43 +192,43 @@ func (c *ContainerMetricsCollector) collect(ch chan<- prometheus.Metric) (*prome c.ContainerAvailable, prometheus.CounterValue, 1, - containerId, + containerId, containerName, ) ch <- prometheus.MustNewConstMetric( c.UsageCommitBytes, prometheus.GaugeValue, float64(cstats.Memory.UsageCommitBytes), - containerId, + containerId, containerName, ) ch <- prometheus.MustNewConstMetric( c.UsageCommitPeakBytes, prometheus.GaugeValue, float64(cstats.Memory.UsageCommitPeakBytes), - containerId, + containerId, containerName, ) ch <- prometheus.MustNewConstMetric( c.UsagePrivateWorkingSetBytes, prometheus.GaugeValue, float64(cstats.Memory.UsagePrivateWorkingSetBytes), - containerId, + containerId, containerName, ) ch <- prometheus.MustNewConstMetric( c.RuntimeTotal, prometheus.CounterValue, float64(cstats.Processor.TotalRuntime100ns)*ticksToSecondsScaleFactor, - containerId, + containerId, containerName, ) ch <- prometheus.MustNewConstMetric( c.RuntimeUser, prometheus.CounterValue, float64(cstats.Processor.RuntimeUser100ns)*ticksToSecondsScaleFactor, - containerId, + containerId, containerName, ) ch <- prometheus.MustNewConstMetric( c.RuntimeKernel, prometheus.CounterValue, float64(cstats.Processor.RuntimeKernel100ns)*ticksToSecondsScaleFactor, - containerId, + containerId, containerName, ) if len(cstats.Network) == 0 { @@ -242,37 +243,37 @@ func (c *ContainerMetricsCollector) collect(ch chan<- prometheus.Metric) (*prome c.BytesReceived, prometheus.CounterValue, float64(networkInterface.BytesReceived), - containerId, networkInterface.EndpointId, + containerId, containerName, networkInterface.EndpointId, ) ch <- prometheus.MustNewConstMetric( c.BytesSent, prometheus.CounterValue, float64(networkInterface.BytesSent), - containerId, networkInterface.EndpointId, + containerId, containerName, networkInterface.EndpointId, ) ch <- prometheus.MustNewConstMetric( c.PacketsReceived, prometheus.CounterValue, float64(networkInterface.PacketsReceived), - containerId, networkInterface.EndpointId, + containerId, containerName, networkInterface.EndpointId, ) ch <- prometheus.MustNewConstMetric( c.PacketsSent, prometheus.CounterValue, float64(networkInterface.PacketsSent), - containerId, networkInterface.EndpointId, + containerId, containerName, networkInterface.EndpointId, ) ch <- prometheus.MustNewConstMetric( c.DroppedPacketsIncoming, prometheus.CounterValue, float64(networkInterface.DroppedPacketsIncoming), - containerId, networkInterface.EndpointId, + containerId, containerName, networkInterface.EndpointId, ) ch <- prometheus.MustNewConstMetric( c.DroppedPacketsOutgoing, prometheus.CounterValue, float64(networkInterface.DroppedPacketsOutgoing), - containerId, networkInterface.EndpointId, + containerId, containerName, networkInterface.EndpointId, ) break } From 72de1995287935c494a646328ca5e433249e0d95 Mon Sep 17 00:00:00 2001 From: Calle Pettersson Date: Sat, 3 Aug 2019 11:10:35 +0200 Subject: [PATCH 11/12] Revert "Add container_name label to container collector" --- README.md | 1 - collector/container.go | 53 +++++++++++++++++++++--------------------- 2 files changed, 26 insertions(+), 28 deletions(-) diff --git a/README.md b/README.md index 863b527e..0f73b644 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,6 @@ Name | Description | Enabled by default [ad](docs/collector.ad.md) | Active Directory Domain Services | [cpu](docs/collector.cpu.md) | CPU usage | ✓ [cs](docs/collector.cs.md) | "Computer System" metrics (system properties, num cpus/total memory) | ✓ -[container](docs/collector.container.md) | Container metrics | [dns](docs/collector.dns.md) | DNS Server | [hyperv](docs/collector.hyperv.md) | Hyper-V hosts | [iis](docs/collector.iis.md) | IIS sites and applications | diff --git a/collector/container.go b/collector/container.go index a997054b..87e02e06 100644 --- a/collector/container.go +++ b/collector/container.go @@ -45,7 +45,7 @@ func NewContainerMetricsCollector() (Collector, error) { ContainerAvailable: prometheus.NewDesc( prometheus.BuildFQName(Namespace, subsystem, "available"), "Available", - []string{"container_id", "container_name"}, + []string{"container_id"}, nil, ), ContainersCount: prometheus.NewDesc( @@ -57,73 +57,73 @@ func NewContainerMetricsCollector() (Collector, error) { UsageCommitBytes: prometheus.NewDesc( prometheus.BuildFQName(Namespace, subsystem, "memory_usage_commit_bytes"), "Memory Usage Commit Bytes", - []string{"container_id", "container_name"}, + []string{"container_id"}, nil, ), UsageCommitPeakBytes: prometheus.NewDesc( prometheus.BuildFQName(Namespace, subsystem, "memory_usage_commit_peak_bytes"), "Memory Usage Commit Peak Bytes", - []string{"container_id", "container_name"}, + []string{"container_id"}, nil, ), UsagePrivateWorkingSetBytes: prometheus.NewDesc( prometheus.BuildFQName(Namespace, subsystem, "memory_usage_private_working_set_bytes"), "Memory Usage Private Working Set Bytes", - []string{"container_id", "container_name"}, + []string{"container_id"}, nil, ), RuntimeTotal: prometheus.NewDesc( prometheus.BuildFQName(Namespace, subsystem, "cpu_usage_seconds_total"), "Total Run time in Seconds", - []string{"container_id", "container_name"}, + []string{"container_id"}, nil, ), RuntimeUser: prometheus.NewDesc( prometheus.BuildFQName(Namespace, subsystem, "cpu_usage_seconds_usermode"), "Run Time in User mode in Seconds", - []string{"container_id", "container_name"}, + []string{"container_id"}, nil, ), RuntimeKernel: prometheus.NewDesc( prometheus.BuildFQName(Namespace, subsystem, "cpu_usage_seconds_kernelmode"), "Run time in Kernel mode in Seconds", - []string{"container_id", "container_name"}, + []string{"container_id"}, nil, ), BytesReceived: prometheus.NewDesc( prometheus.BuildFQName(Namespace, subsystem, "network_receive_bytes_total"), "Bytes Received on Interface", - []string{"container_id", "container_name", "interface"}, + []string{"container_id", "interface"}, nil, ), BytesSent: prometheus.NewDesc( prometheus.BuildFQName(Namespace, subsystem, "network_transmit_bytes_total"), "Bytes Sent on Interface", - []string{"container_id", "container_name", "interface"}, + []string{"container_id", "interface"}, nil, ), PacketsReceived: prometheus.NewDesc( prometheus.BuildFQName(Namespace, subsystem, "network_receive_packets_total"), "Packets Received on Interface", - []string{"container_id", "container_name", "interface"}, + []string{"container_id", "interface"}, nil, ), PacketsSent: prometheus.NewDesc( prometheus.BuildFQName(Namespace, subsystem, "network_transmit_packets_total"), "Packets Sent on Interface", - []string{"container_id", "container_name", "interface"}, + []string{"container_id", "interface"}, nil, ), DroppedPacketsIncoming: prometheus.NewDesc( prometheus.BuildFQName(Namespace, subsystem, "network_receive_packets_dropped_total"), "Dropped Incoming Packets on Interface", - []string{"container_id", "container_name", "interface"}, + []string{"container_id", "interface"}, nil, ), DroppedPacketsOutgoing: prometheus.NewDesc( prometheus.BuildFQName(Namespace, subsystem, "network_transmit_packets_dropped_total"), "Dropped Outgoing Packets on Interface", - []string{"container_id", "container_name", "interface"}, + []string{"container_id", "interface"}, nil, ), }, nil @@ -169,7 +169,6 @@ func (c *ContainerMetricsCollector) collect(ch chan<- prometheus.Metric) (*prome for _, containerDetails := range containers { containerId := containerDetails.ID - containerName := containerDetails.Name container, err := hcsshim.OpenContainer(containerId) if container != nil { @@ -192,43 +191,43 @@ func (c *ContainerMetricsCollector) collect(ch chan<- prometheus.Metric) (*prome c.ContainerAvailable, prometheus.CounterValue, 1, - containerId, containerName, + containerId, ) ch <- prometheus.MustNewConstMetric( c.UsageCommitBytes, prometheus.GaugeValue, float64(cstats.Memory.UsageCommitBytes), - containerId, containerName, + containerId, ) ch <- prometheus.MustNewConstMetric( c.UsageCommitPeakBytes, prometheus.GaugeValue, float64(cstats.Memory.UsageCommitPeakBytes), - containerId, containerName, + containerId, ) ch <- prometheus.MustNewConstMetric( c.UsagePrivateWorkingSetBytes, prometheus.GaugeValue, float64(cstats.Memory.UsagePrivateWorkingSetBytes), - containerId, containerName, + containerId, ) ch <- prometheus.MustNewConstMetric( c.RuntimeTotal, prometheus.CounterValue, float64(cstats.Processor.TotalRuntime100ns)*ticksToSecondsScaleFactor, - containerId, containerName, + containerId, ) ch <- prometheus.MustNewConstMetric( c.RuntimeUser, prometheus.CounterValue, float64(cstats.Processor.RuntimeUser100ns)*ticksToSecondsScaleFactor, - containerId, containerName, + containerId, ) ch <- prometheus.MustNewConstMetric( c.RuntimeKernel, prometheus.CounterValue, float64(cstats.Processor.RuntimeKernel100ns)*ticksToSecondsScaleFactor, - containerId, containerName, + containerId, ) if len(cstats.Network) == 0 { @@ -243,37 +242,37 @@ func (c *ContainerMetricsCollector) collect(ch chan<- prometheus.Metric) (*prome c.BytesReceived, prometheus.CounterValue, float64(networkInterface.BytesReceived), - containerId, containerName, networkInterface.EndpointId, + containerId, networkInterface.EndpointId, ) ch <- prometheus.MustNewConstMetric( c.BytesSent, prometheus.CounterValue, float64(networkInterface.BytesSent), - containerId, containerName, networkInterface.EndpointId, + containerId, networkInterface.EndpointId, ) ch <- prometheus.MustNewConstMetric( c.PacketsReceived, prometheus.CounterValue, float64(networkInterface.PacketsReceived), - containerId, containerName, networkInterface.EndpointId, + containerId, networkInterface.EndpointId, ) ch <- prometheus.MustNewConstMetric( c.PacketsSent, prometheus.CounterValue, float64(networkInterface.PacketsSent), - containerId, containerName, networkInterface.EndpointId, + containerId, networkInterface.EndpointId, ) ch <- prometheus.MustNewConstMetric( c.DroppedPacketsIncoming, prometheus.CounterValue, float64(networkInterface.DroppedPacketsIncoming), - containerId, containerName, networkInterface.EndpointId, + containerId, networkInterface.EndpointId, ) ch <- prometheus.MustNewConstMetric( c.DroppedPacketsOutgoing, prometheus.CounterValue, float64(networkInterface.DroppedPacketsOutgoing), - containerId, containerName, networkInterface.EndpointId, + containerId, networkInterface.EndpointId, ) break } From f4ab322e5b8c320e28adcd1dff679b97b47f63ad Mon Sep 17 00:00:00 2001 From: Calle Pettersson Date: Sat, 3 Aug 2019 11:12:56 +0200 Subject: [PATCH 12/12] Keep the docs link --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 0f73b644..863b527e 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,7 @@ Name | Description | Enabled by default [ad](docs/collector.ad.md) | Active Directory Domain Services | [cpu](docs/collector.cpu.md) | CPU usage | ✓ [cs](docs/collector.cs.md) | "Computer System" metrics (system properties, num cpus/total memory) | ✓ +[container](docs/collector.container.md) | Container metrics | [dns](docs/collector.dns.md) | DNS Server | [hyperv](docs/collector.hyperv.md) | Hyper-V hosts | [iis](docs/collector.iis.md) | IIS sites and applications |