Compare commits

..

87 Commits

Author SHA1 Message Date
Calle Pettersson
012b938b54 Merge pull request #402 from breed808/perf_mem
Use perflib for memory collector
2019-10-09 21:16:46 +02:00
Calle Pettersson
a0e5baa171 Merge pull request #403 from breed808/perf_net
Use perflib for net collector
2019-10-09 21:15:55 +02:00
Calle Pettersson
7611e33bc7 Merge pull request #405 from breed808/perf_system
Use perflib for system collector
2019-10-09 21:15:06 +02:00
Ben Reedy
2aafa9ebf3 Use perflib for system collector 2019-10-08 20:59:50 +10:00
Ben Reedy
f9f27b0b97 Use perflib for net collector 2019-10-08 20:57:09 +10:00
Ben Reedy
18128f48f5 Use perflib for memory collector 2019-10-08 20:52:44 +10:00
Calle Pettersson
2688847c2e Merge pull request #401 from breed808/adfs_loop
ADFS: explicitly use first perflib result
2019-10-07 17:47:51 +02:00
Calle Pettersson
1c605adb5e Merge pull request #400 from breed808/perf
Use Perflib for logical_disk exporter
2019-10-07 17:47:05 +02:00
Ben Reedy
d0877d0dc0 Update logical_disk docs to Perflib counter 2019-10-04 21:05:17 +10:00
Ben Reedy
2cd630fb2f Use ticks to seconds scale for latency metrics
Latency metrics were previously exposing as ticks
2019-10-04 21:05:17 +10:00
Ben Reedy
b210986181 Use perflib for logical_disk collector 2019-10-04 21:05:17 +10:00
Calle Pettersson
abd5a53045 Merge pull request #406 from floptical/master
Another msiexec install example
2019-10-01 17:53:51 +02:00
Calle Pettersson
aa394d1d8e Merge pull request #411 from breed808/established_gauge
Set tcp_connections_established to gauge type
2019-10-01 17:53:13 +02:00
Calle Pettersson
bdcc7b0913 Merge pull request #412 from Schlump/patch-1
Update collector.hyperv.md
2019-10-01 17:52:17 +02:00
Schlump
d7a908e6c0 Update collector.hyperv.md
Typo in metrics label.
Metrics exposed for Hyper-V collector are actually named "wmi_hyperv" and not "wmi_hyper".
2019-10-01 11:16:46 +02:00
Ben Reedy
c23a98ae90 Set tcp_connections_established to gauge type
While the ConnectionsEstablished property in the
Win32_PerfRawData_Tcpip_TCP class is listed as a counter, real-world
metric values have been shown to increase *and* decrease.

Documentation for the property states "Number of TCP connections for
which the *current* state is either ESTABLISHED or CLOSE-WAIT" which
would imply the metric is a gauge.
2019-09-27 19:48:16 +10:00
floptical
f8a7c99092 Another msiexec install example
msiexec install example for older windows versions
2019-09-25 15:48:36 -04:00
Ben Reedy
29b020999d Explicitly use first ADFS result
Perflib ADFS only returns a single data result, so looping over data is
unnecessary
2019-09-23 19:24:48 +10:00
Calle Pettersson
2f0a57898f Merge pull request #399 from breed808/adfs
Add adfs collector
2019-09-17 18:41:21 +02:00
Ben Reedy
1ad20d6eb8 Add adfs collector
Perflib is used to collect base AD FS performance counters.
A subset of the total performance counters has been added, but more will
likely be added in the future.

Documentation for the AD FS counters is poor. As such, some counters
have been omitted until their nature can be interpreted.
2019-09-17 21:45:53 +10:00
Calle Pettersson
de000b74c8 Merge pull request #396 from charlesmorin/patch-1
Added a required detail for the .prom file to work properly
2019-09-04 14:46:45 +02:00
Charles Morin
d860d92dc8 Added a required detail for the .prom file to work properly
After adding the `role.prom` file on Windows on around 15 virtual machines, we discovered that if omit to insert an empty line feed at the end of the file, Prometheus won't get metrics from the virtual machine. Adding a new line fixes the issue and immediately start gathering metrics for the virtual machine.
2019-09-04 08:44:26 -04:00
Calle Pettersson
3a19fe4e7d Merge pull request #393 from breed808/net_gauge
Set current_bandwidth to gauge type
2019-08-28 22:18:47 +02:00
Calle Pettersson
26a468f17a Merge pull request #392 from breed808/doc
Additional collector documentation
2019-08-28 22:18:17 +02:00
Calle Pettersson
a6f3b33928 Merge pull request #394 from martinlindhe/fix-cgo-build-tag
Remove cgo build tag from container collector
2019-08-28 22:13:23 +02:00
Calle Pettersson
8ef215cc7e Remove cgo build tag from container collector 2019-08-28 22:06:20 +02:00
Ben Reedy
2c155a12bd Set current_bandwidth to gauge type
Bandwidth estimate for an interface may decrease or increase
2019-08-28 21:12:14 +10:00
Ben Reedy
e1141c3ec0 Add documentation for tcp collector 2019-08-28 21:06:23 +10:00
Ben Reedy
b635ecc6c1 Add documentation for os collector 2019-08-28 20:57:34 +10:00
Calle Pettersson
a7b5cf7aa6 Merge pull request #389 from breed808/doc
Add collector documentation
2019-08-27 07:57:45 +02:00
Ben Reedy
719ccd4f7f Add documentation for system collector 2019-08-26 21:07:55 +10:00
Ben Reedy
7ab8c7dde4 Add documentation for net collector 2019-08-26 21:01:09 +10:00
Ben Reedy
eb002eb667 Add documentation for memory collector 2019-08-22 22:39:15 +10:00
Ben Reedy
a1638cdf4c Add query examples to cpu collector documentation 2019-08-22 22:06:34 +10:00
Ben Reedy
091406877a Add documentation for logical_disk collector 2019-08-22 21:53:44 +10:00
Ben Reedy
84970ac086 Add logon entry to collectors README 2019-08-22 21:53:41 +10:00
Calle Pettersson
d86f318010 Merge pull request #387 from breed808/logical-disk-new-counters
Add logical_disk latency metrics
2019-08-22 08:53:42 +02:00
Ben Reedy
853d615673 Add logical_disk latency metrics 2019-08-22 03:30:40 +00:00
Calle Pettersson
cd9a740e2b Merge pull request #384 from breed808/logon
Export logon sessions metric
2019-08-21 11:05:25 +02:00
Ben Reedy
c70e7674a5 Add logon collector documentation 2019-08-21 18:35:10 +10:00
Ben Reedy
d3e3835c29 Export user logon sessions
Use Win32_LogonSession class to provide user logon sessions by type.
2019-08-20 22:27:03 +10:00
Calle Pettersson
592c8a8d69 Merge pull request #376 from martinlindhe/fix-goroutine-leak
Fix goroutine leak
2019-08-09 10:24:36 +02:00
Calle Pettersson
6f6a479535 Fix goroutine leak 2019-08-08 21:09:21 +02:00
Calle Pettersson
d01c66986c Merge pull request #372 from callvirtual/features/sql-transactions-collector
Add support for MSSQL Transactions counters
2019-08-04 17:12:44 +02:00
Поляков Игорь
823ffb7597 Merge branch 'features/sql-transactions-collector' of https://github.com/callvirtual/wmi_exporter into features/sql-transactions-collector 2019-08-04 17:13:46 +03:00
Поляков Игорь
a90f9cda0f Change transaction_conflict_ratio to total 2019-08-04 17:13:41 +03:00
Igor Polyakov
31d4c28124 Merge branch 'master' into features/sql-transactions-collector 2019-08-04 15:50:20 +03:00
Calle Pettersson
e880889f07 Merge pull request #335 from martinlindhe/perflib-cpu
Use perflib for cpu collector
2019-08-04 14:30:46 +02:00
Поляков Игорь
a283608812 Fix review comments 2019-08-04 13:47:00 +03:00
Поляков Игорь
8251ddd176 Docs for mssql transactions 2019-08-04 03:04:00 +03:00
Поляков Игорь
4f0a3a89ab Counter struct field names now are strict equal to WMI names 2019-08-04 02:46:33 +03:00
Поляков Игорь
27cc1072fe Initial transactions collector implementation 2019-08-04 00:55:28 +03:00
Calle Pettersson
eb9cf56dee Merge pull request #370 from callvirtual/sql-errors-collector
Add support for SQL Errors counter
2019-08-03 22:23:15 +02:00
Calle Pettersson
3c20887433 Refactor timeout management 2019-08-03 21:34:19 +02:00
Поляков Игорь
37d1c4e958 Errors counter renamed 2019-08-03 20:24:02 +03:00
Calle Pettersson
33b6e17b2d Merge pull request #369 from callvirtual/fix-new-collector
Fix collector creation issue
2019-08-03 15:51:41 +02:00
Calle Pettersson
1a9d4afdd6 Update generator template 2019-08-03 15:10:27 +02:00
Calle Pettersson
9e198c55a4 Adapt thermalzone to new signature 2019-08-03 15:09:00 +02:00
Calle Pettersson
b309a05bde Merge remote-tracking branch 'origin/master' into perflib-cpu 2019-08-03 15:06:30 +02:00
Calle Pettersson
123a055242 Simple test of Perflib unmarshalling 2019-08-03 14:39:28 +02:00
Calle Pettersson
9308108284 Don't fail when a perflib counter isn't found 2019-08-03 13:15:45 +02:00
Calle Pettersson
0ecf3cd792 Merge pull request #340 from martinlindhe/timeout-scrapes
Abort scrapes after configurable timeout
2019-08-03 13:02:52 +02:00
Calle Pettersson
801444b35b Merge pull request #371 from martinlindhe/revert-364-master
Revert "Add container_name label to container collector"
2019-08-03 11:13:15 +02:00
Calle Pettersson
f4ab322e5b Keep the docs link 2019-08-03 11:12:56 +02:00
Calle Pettersson
72de199528 Revert "Add container_name label to container collector" 2019-08-03 11:10:35 +02:00
Поляков Игорь
304972580d Fix parenthesis 2019-08-01 18:01:33 +03:00
Поляков Игорь
6322bb124f Undo ps1 changes 2019-08-01 18:00:42 +03:00
Поляков Игорь
cb6a91b705 Typo fix 2019-08-01 17:58:44 +03:00
Поляков Игорь
4d9fb1be72 Added missing comments 2019-08-01 17:57:45 +03:00
Поляков Игорь
27e26037e3 Added collector for SQLErrors counter 2019-08-01 17:55:44 +03:00
Поляков Игорь
e09497116f Fix new-collector wmi members initialization 2019-08-01 14:44:07 +03:00
Calle Pettersson
3099e10555 Merge pull request #364 from Drewster727/master
Add container collector doc link to readme
2019-07-21 20:49:24 +02:00
Drewster727
3900504504 add container name 2019-07-20 13:00:23 -05:00
Drew McMinn
2c5e30d920 remove enabled by default for container collector in readme 2019-07-20 11:19:40 -05:00
Drew McMinn
b348c245e8 add readme link to container collector doc 2019-07-20 11:18:34 -05:00
Calle Pettersson
578bcc4959 Merge pull request #360 from chrisbloemker/master
adding windows service example alerts/queries
2019-07-16 13:14:54 +02:00
chrisbloemker
31a30474f1 changing label to instance to match common examples 2019-07-15 23:27:15 -04:00
Calle Pettersson
ce1005add8 Lower-case label name 2019-07-15 19:54:23 +02:00
Calle Pettersson
6107a59306 Merge pull request #359 from StewartThomson/thermalzone
Added thermal zone information and documentation
2019-07-15 19:52:36 +02:00
Stewart Thomson
47656b16bd - Removed HighPrecisionTemperature property and just mapped it to Temperature
- Converted decikelvin to Celsius
- Added a loop to get the values from each zone
- Added documentation for percent passive limit and throttle reasons
2019-07-15 09:50:02 -04:00
chrisbloemker
8fc47669be adding windows service example alerts/queries 2019-07-13 12:39:50 -04:00
Stewart Thomson
1a67ca54b6 Update collector.thermalzone.md
Removed references to tcp in collector.thermalzone.md
2019-07-13 08:55:10 -04:00
Stewart Thomson
c73f52338d Added thermal zone information and documentation 2019-07-12 14:54:20 -04:00
Calle Pettersson
c5f23b4e64 Fix float-format 2019-06-24 21:55:33 +02:00
Calle Pettersson
411954cf9d Create custom metrics endpoint to read timeout from request header 2019-06-23 22:01:43 +02:00
Calle Pettersson
56be7c63d5 Merge pull request #351 from sqlkabouter/master
Changed conversion from microseconds to seconds to match the descript…
2019-06-17 19:16:00 +02:00
Jody Treffers
6ffe504f7e Changed conversion from microseconds to seconds to match the description of the metric 2019-06-17 11:56:26 +02:00
34 changed files with 1811 additions and 550 deletions

2
.gitignore vendored
View File

@@ -3,3 +3,5 @@ VERSION
*.swp
*.un~
output/
.vscode
.idea

View File

@@ -10,12 +10,15 @@ Prometheus exporter for Windows machines, using the WMI (Windows Management Inst
Name | Description | Enabled by default
---------|-------------|--------------------
[ad](docs/collector.ad.md) | Active Directory Domain Services |
[adfs](docs/collector.adfs.md) | Active Directory Federation Services |
[cpu](docs/collector.cpu.md) | CPU usage | ✓
[cs](docs/collector.cs.md) | "Computer System" metrics (system properties, num cpus/total memory) | ✓
[container](docs/collector.container.md) | Container metrics |
[dns](docs/collector.dns.md) | DNS Server |
[hyperv](docs/collector.hyperv.md) | Hyper-V hosts |
[iis](docs/collector.iis.md) | IIS sites and applications |
[logical_disk](docs/collector.logical_disk.md) | Logical disks, disk I/O | ✓
[logon](docs/collector.logon.md) | User logon sessions |
[memory](docs/collector.memory.md) | Memory usage metrics |
[msmq](docs/collector.msmq.md) | MSMQ queues |
[mssql](docs/collector.mssql.md) | [SQL Server Performance Objects](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/use-sql-server-objects#SQLServerPOs) metrics |
@@ -33,6 +36,7 @@ Name | Description | Enabled by default
[service](docs/collector.service.md) | Service state metrics | ✓
[system](docs/collector.system.md) | System calls | ✓
[tcp](docs/collector.tcp.md) | TCP connections |
[thermalzone](docs/collector.thermalzone.md) | Thermal information
[textfile](docs/collector.textfile.md) | Read prometheus metrics from a text file | ✓
[vmware](docs/collector.vmware.md) | Performance counters installed by the Vmware Guest agent |
@@ -65,6 +69,11 @@ Example service collector with a custom query.
msiexec /i <path-to-msi-file> ENABLED_COLLECTORS=os,service --% EXTRA_FLAGS="--collector.service.services-where ""Name LIKE 'sql%'"""
```
On some older versions of Windows you may need to surround parameter values with double quotes to get the install command parsing properly:
```powershell
msiexec /i C:\Users\Administrator\Downloads\wmi_exporter.msi ENABLED_COLLECTORS="ad,iis,logon,memory,process,tcp,thermalzone" TEXTFILE_DIR="C:\custom_metrics\"
```
## Roadmap
See [open issues](https://github.com/martinlindhe/wmi_exporter/issues)

188
collector/adfs.go Normal file
View File

@@ -0,0 +1,188 @@
// +build windows
package collector
import (
"github.com/prometheus/client_golang/prometheus"
)
func init() {
Factories["adfs"] = newADFSCollector
}
type adfsCollector struct {
adLoginConnectionFailures *prometheus.Desc
certificateAuthentications *prometheus.Desc
deviceAuthentications *prometheus.Desc
extranetAccountLockouts *prometheus.Desc
federatedAuthentications *prometheus.Desc
passportAuthentications *prometheus.Desc
passiveRequests *prometheus.Desc
passwordChangeFailed *prometheus.Desc
passwordChangeSucceeded *prometheus.Desc
tokenRequests *prometheus.Desc
windowsIntegratedAuthentications *prometheus.Desc
}
// newADFSCollector constructs a new adfsCollector
func newADFSCollector() (Collector, error) {
const subsystem = "adfs"
return &adfsCollector{
adLoginConnectionFailures: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "ad_login_connection_failures"),
"Total number of connection failures to an Active Directory domain controller",
nil,
nil,
),
certificateAuthentications: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "certificate_authentications"),
"Total number of User Certificate authentications",
nil,
nil,
),
deviceAuthentications: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "device_authentications"),
"Total number of Device authentications",
nil,
nil,
),
extranetAccountLockouts: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "extranet_account_lockouts"),
"Total number of Extranet Account Lockouts",
nil,
nil,
),
federatedAuthentications: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "federated_authentications"),
"Total number of authentications from a federated source",
nil,
nil,
),
passportAuthentications: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "passport_authentications"),
"Total number of Microsoft Passport SSO authentications",
nil,
nil,
),
passiveRequests: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "passive_requests"),
"Total number of passive (browser-based) requests",
nil,
nil,
),
passwordChangeFailed: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "password_change_failed"),
"Total number of failed password changes",
nil,
nil,
),
passwordChangeSucceeded: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "password_change_succeeded"),
"Total number of successful password changes",
nil,
nil,
),
tokenRequests: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "token_requests"),
"Total number of token requests",
nil,
nil,
),
windowsIntegratedAuthentications: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "windows_integrated_authentications"),
"Total number of Windows integrated authentications (Kerberos/NTLM)",
nil,
nil,
),
}, nil
}
type perflibADFS struct {
AdLoginConnectionFailures float64 `perflib:"AD login Connection Failures"`
CertificateAuthentications float64 `perflib:"Certificate Authentications"`
DeviceAuthentications float64 `perflib:"Device Authentications"`
ExtranetAccountLockouts float64 `perflib:"Extranet Account Lockouts"`
FederatedAuthentications float64 `perflib:"Federated Authentications"`
PassportAuthentications float64 `perflib:"Microsoft Passport Authentications"`
PassiveRequests float64 `perflib:"Passive Requests"`
PasswordChangeFailed float64 `perflib:"Password Change Failed Requests"`
PasswordChangeSucceeded float64 `perflib:"Password Change Successful Requests"`
TokenRequests float64 `perflib:"Token Requests"`
WindowsIntegratedAuthentications float64 `perflib:"Windows Integrated Authentications"`
}
func (c *adfsCollector) Collect(ctx *ScrapeContext, ch chan<- prometheus.Metric) error {
var adfsData []perflibADFS
err := unmarshalObject(ctx.perfObjects["AD FS"], &adfsData)
if err != nil {
return err
}
ch <- prometheus.MustNewConstMetric(
c.adLoginConnectionFailures,
prometheus.CounterValue,
adfsData[0].AdLoginConnectionFailures,
)
ch <- prometheus.MustNewConstMetric(
c.certificateAuthentications,
prometheus.CounterValue,
adfsData[0].CertificateAuthentications,
)
ch <- prometheus.MustNewConstMetric(
c.deviceAuthentications,
prometheus.CounterValue,
adfsData[0].DeviceAuthentications,
)
ch <- prometheus.MustNewConstMetric(
c.extranetAccountLockouts,
prometheus.CounterValue,
adfsData[0].ExtranetAccountLockouts,
)
ch <- prometheus.MustNewConstMetric(
c.federatedAuthentications,
prometheus.CounterValue,
adfsData[0].FederatedAuthentications,
)
ch <- prometheus.MustNewConstMetric(
c.passportAuthentications,
prometheus.CounterValue,
adfsData[0].PassportAuthentications,
)
ch <- prometheus.MustNewConstMetric(
c.passiveRequests,
prometheus.CounterValue,
adfsData[0].PassiveRequests,
)
ch <- prometheus.MustNewConstMetric(
c.passwordChangeFailed,
prometheus.CounterValue,
adfsData[0].PasswordChangeFailed,
)
ch <- prometheus.MustNewConstMetric(
c.passwordChangeSucceeded,
prometheus.CounterValue,
adfsData[0].PasswordChangeSucceeded,
)
ch <- prometheus.MustNewConstMetric(
c.tokenRequests,
prometheus.CounterValue,
adfsData[0].TokenRequests,
)
ch <- prometheus.MustNewConstMetric(
c.windowsIntegratedAuthentications,
prometheus.CounterValue,
adfsData[0].WindowsIntegratedAuthentications,
)
return nil
}

View File

@@ -1,8 +1,12 @@
package collector
import (
"strconv"
"github.com/leoluk/perflib_exporter/perflib"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/log"
"golang.org/x/sys/windows/registry"
)
// ...
@@ -15,6 +19,34 @@ const (
windowsEpoch = 116444736000000000
)
// getWindowsVersion reads the version number of the OS from the Registry
// See https://docs.microsoft.com/en-us/windows/desktop/sysinfo/operating-system-version
func getWindowsVersion() float64 {
k, err := registry.OpenKey(registry.LOCAL_MACHINE, `SOFTWARE\Microsoft\Windows NT\CurrentVersion`, registry.QUERY_VALUE)
if err != nil {
log.Warn("Couldn't open registry", err)
return 0
}
defer func() {
err = k.Close()
if err != nil {
log.Warnf("Failed to close registry key: %v", err)
}
}()
currentv, _, err := k.GetStringValue("CurrentVersion")
if err != nil {
log.Warn("Couldn't open registry to determine current Windows version:", err)
return 0
}
currentv_flt, err := strconv.ParseFloat(currentv, 64)
log.Debugf("Detected Windows version %f\n", currentv_flt)
return currentv_flt
}
// Factories ...
var Factories = make(map[string]func() (Collector, error))

View File

@@ -1,4 +1,4 @@
// +build windows,cgo
// +build windows
package collector

View File

@@ -3,46 +3,15 @@
package collector
import (
"strconv"
"strings"
"golang.org/x/sys/windows/registry"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/log"
)
func init() {
Factories["cpu"] = newCPUCollector
}
// A function to get Windows version from registry
func getWindowsVersion() float64 {
k, err := registry.OpenKey(registry.LOCAL_MACHINE, `SOFTWARE\Microsoft\Windows NT\CurrentVersion`, registry.QUERY_VALUE)
if err != nil {
log.Warn("Couldn't open registry", err)
return 0
}
defer func() {
err = k.Close()
if err != nil {
log.Warnf("Failed to close registry key: %v", err)
}
}()
currentv, _, err := k.GetStringValue("CurrentVersion")
if err != nil {
log.Warn("Couldn't open registry to determine current Windows version:", err)
return 0
}
currentv_flt, err := strconv.ParseFloat(currentv, 64)
log.Debugf("Detected Windows version %f\n", currentv_flt)
return currentv_flt
}
type cpuCollectorBasic struct {
CStateSecondsTotal *prometheus.Desc
TimeTotal *prometheus.Desc
@@ -67,10 +36,12 @@ func newCPUCollector() (Collector, error) {
const subsystem = "cpu"
version := getWindowsVersion()
// Windows version by number https://docs.microsoft.com/en-us/windows/desktop/sysinfo/operating-system-version
// For Windows 2008 or earlier Windows version is 6.0 or lower, where we only have the older "Processor" counters
// For Windows 2008 R2 or later Windows version is 6.1 or higher, so we can use "ProcessorInformation" counters
// Value 6.05 was selected just to split between Windows versions
// For Windows 2008 (version 6.0) or earlier we only have the "Processor"
// class. As of Windows 2008 R2 (version 6.1) the more detailed
// "ProcessorInformation" set is available (although some of the counters
// are added in later versions, so we aren't guaranteed to get all of
// them).
// Value 6.05 was selected to split between Windows versions.
if version < 6.05 {
return &cpuCollectorBasic{
CStateSecondsTotal: prometheus.NewDesc(

View File

@@ -6,7 +6,6 @@ import (
"fmt"
"regexp"
"github.com/StackExchange/wmi"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/log"
"gopkg.in/alecthomas/kingpin.v2"
@@ -27,19 +26,22 @@ var (
).Default("").String()
)
// A LogicalDiskCollector is a Prometheus collector for WMI Win32_PerfRawData_PerfDisk_LogicalDisk metrics
// A LogicalDiskCollector is a Prometheus collector for perflib logicalDisk metrics
type LogicalDiskCollector struct {
RequestsQueued *prometheus.Desc
ReadBytesTotal *prometheus.Desc
ReadsTotal *prometheus.Desc
WriteBytesTotal *prometheus.Desc
WritesTotal *prometheus.Desc
ReadTime *prometheus.Desc
WriteTime *prometheus.Desc
TotalSpace *prometheus.Desc
FreeSpace *prometheus.Desc
IdleTime *prometheus.Desc
SplitIOs *prometheus.Desc
RequestsQueued *prometheus.Desc
ReadBytesTotal *prometheus.Desc
ReadsTotal *prometheus.Desc
WriteBytesTotal *prometheus.Desc
WritesTotal *prometheus.Desc
ReadTime *prometheus.Desc
WriteTime *prometheus.Desc
TotalSpace *prometheus.Desc
FreeSpace *prometheus.Desc
IdleTime *prometheus.Desc
SplitIOs *prometheus.Desc
ReadLatency *prometheus.Desc
WriteLatency *prometheus.Desc
ReadWriteLatency *prometheus.Desc
volumeWhitelistPattern *regexp.Regexp
volumeBlacklistPattern *regexp.Regexp
@@ -127,6 +129,27 @@ func NewLogicalDiskCollector() (Collector, error) {
nil,
),
ReadLatency: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "read_latency_seconds_total"),
"Shows the average time, in seconds, of a read operation from the disk (LogicalDisk.AvgDiskSecPerRead)",
[]string{"volume"},
nil,
),
WriteLatency: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "write_latency_seconds_total"),
"Shows the average time, in seconds, of a write operation to the disk (LogicalDisk.AvgDiskSecPerWrite)",
[]string{"volume"},
nil,
),
ReadWriteLatency: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "read_write_latency_seconds_total"),
"Shows the time, in seconds, of the average disk transfer (LogicalDisk.AvgDiskSecPerTransfer)",
[]string{"volume"},
nil,
),
volumeWhitelistPattern: regexp.MustCompile(fmt.Sprintf("^(?:%s)$", *volumeWhitelist)),
volumeBlacklistPattern: regexp.MustCompile(fmt.Sprintf("^(?:%s)$", *volumeBlacklist)),
}, nil
@@ -135,7 +158,7 @@ func NewLogicalDiskCollector() (Collector, error) {
// Collect sends the metric values for each metric
// to the provided prometheus Metric channel.
func (c *LogicalDiskCollector) Collect(ctx *ScrapeContext, ch chan<- prometheus.Metric) error {
if desc, err := c.collect(ch); err != nil {
if desc, err := c.collect(ctx, ch); err != nil {
log.Error("failed collecting logical_disk metrics:", desc, err)
return err
}
@@ -145,25 +168,27 @@ func (c *LogicalDiskCollector) Collect(ctx *ScrapeContext, ch chan<- prometheus.
// Win32_PerfRawData_PerfDisk_LogicalDisk docs:
// - https://msdn.microsoft.com/en-us/windows/hardware/aa394307(v=vs.71) - Win32_PerfRawData_PerfDisk_LogicalDisk class
// - https://msdn.microsoft.com/en-us/library/ms803973.aspx - LogicalDisk object reference
type Win32_PerfRawData_PerfDisk_LogicalDisk struct {
type logicalDisk struct {
Name string
CurrentDiskQueueLength uint32
DiskReadBytesPerSec uint64
DiskReadsPerSec uint32
DiskWriteBytesPerSec uint64
DiskWritesPerSec uint32
PercentDiskReadTime uint64
PercentDiskWriteTime uint64
PercentFreeSpace uint32
PercentFreeSpace_Base uint32
PercentIdleTime uint64
SplitIOPerSec uint32
CurrentDiskQueueLength float64 `perflib:"Current Disk Queue Length"`
DiskReadBytesPerSec float64 `perflib:"Disk Read Bytes/sec"`
DiskReadsPerSec float64 `perflib:"Disk Reads/sec"`
DiskWriteBytesPerSec float64 `perflib:"Disk Write Bytes/sec"`
DiskWritesPerSec float64 `perflib:"Disk Writes/sec"`
PercentDiskReadTime float64 `perflib:"% Disk Read Time"`
PercentDiskWriteTime float64 `perflib:"% Disk Write Time"`
PercentFreeSpace float64 `perflib:"% Free Space_Base"`
PercentFreeSpace_Base float64 `perflib:"Free Megabytes"`
PercentIdleTime float64 `perflib:"% Idle Time"`
SplitIOPerSec float64 `perflib:"Split IO/Sec"`
AvgDiskSecPerRead float64 `perflib:"Avg. Disk sec/Read"`
AvgDiskSecPerWrite float64 `perflib:"Avg. Disk sec/Write"`
AvgDiskSecPerTransfer float64 `perflib:"Avg. Disk sec/Transfer"`
}
func (c *LogicalDiskCollector) collect(ch chan<- prometheus.Metric) (*prometheus.Desc, error) {
var dst []Win32_PerfRawData_PerfDisk_LogicalDisk
q := queryAll(&dst)
if err := wmi.Query(q, &dst); err != nil {
func (c *LogicalDiskCollector) collect(ctx *ScrapeContext, ch chan<- prometheus.Metric) (*prometheus.Desc, error) {
var dst []logicalDisk
if err := unmarshalObject(ctx.perfObjects["LogicalDisk"], &dst); err != nil {
return nil, err
}
@@ -177,77 +202,98 @@ func (c *LogicalDiskCollector) collect(ch chan<- prometheus.Metric) (*prometheus
ch <- prometheus.MustNewConstMetric(
c.RequestsQueued,
prometheus.GaugeValue,
float64(volume.CurrentDiskQueueLength),
volume.CurrentDiskQueueLength,
volume.Name,
)
ch <- prometheus.MustNewConstMetric(
c.ReadBytesTotal,
prometheus.CounterValue,
float64(volume.DiskReadBytesPerSec),
volume.DiskReadBytesPerSec,
volume.Name,
)
ch <- prometheus.MustNewConstMetric(
c.ReadsTotal,
prometheus.CounterValue,
float64(volume.DiskReadsPerSec),
volume.DiskReadsPerSec,
volume.Name,
)
ch <- prometheus.MustNewConstMetric(
c.WriteBytesTotal,
prometheus.CounterValue,
float64(volume.DiskWriteBytesPerSec),
volume.DiskWriteBytesPerSec,
volume.Name,
)
ch <- prometheus.MustNewConstMetric(
c.WritesTotal,
prometheus.CounterValue,
float64(volume.DiskWritesPerSec),
volume.DiskWritesPerSec,
volume.Name,
)
ch <- prometheus.MustNewConstMetric(
c.ReadTime,
prometheus.CounterValue,
float64(volume.PercentDiskReadTime)*ticksToSecondsScaleFactor,
volume.PercentDiskReadTime,
volume.Name,
)
ch <- prometheus.MustNewConstMetric(
c.WriteTime,
prometheus.CounterValue,
float64(volume.PercentDiskWriteTime)*ticksToSecondsScaleFactor,
volume.PercentDiskWriteTime,
volume.Name,
)
ch <- prometheus.MustNewConstMetric(
c.FreeSpace,
prometheus.GaugeValue,
float64(volume.PercentFreeSpace)*1024*1024,
volume.PercentFreeSpace_Base*1024*1024,
volume.Name,
)
ch <- prometheus.MustNewConstMetric(
c.TotalSpace,
prometheus.GaugeValue,
float64(volume.PercentFreeSpace_Base)*1024*1024,
volume.PercentFreeSpace*1024*1024,
volume.Name,
)
ch <- prometheus.MustNewConstMetric(
c.IdleTime,
prometheus.CounterValue,
float64(volume.PercentIdleTime)*ticksToSecondsScaleFactor,
volume.PercentIdleTime,
volume.Name,
)
ch <- prometheus.MustNewConstMetric(
c.SplitIOs,
prometheus.CounterValue,
float64(volume.SplitIOPerSec),
volume.SplitIOPerSec,
volume.Name,
)
ch <- prometheus.MustNewConstMetric(
c.ReadLatency,
prometheus.CounterValue,
volume.AvgDiskSecPerRead*ticksToSecondsScaleFactor,
volume.Name,
)
ch <- prometheus.MustNewConstMetric(
c.WriteLatency,
prometheus.CounterValue,
volume.AvgDiskSecPerWrite*ticksToSecondsScaleFactor,
volume.Name,
)
ch <- prometheus.MustNewConstMetric(
c.ReadWriteLatency,
prometheus.CounterValue,
volume.AvgDiskSecPerTransfer*ticksToSecondsScaleFactor,
volume.Name,
)
}

199
collector/logon.go Normal file
View File

@@ -0,0 +1,199 @@
// +build windows
package collector
import (
"errors"
"github.com/StackExchange/wmi"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/log"
)
func init() {
Factories["logon"] = NewLogonCollector
}
// A LogonCollector is a Prometheus collector for WMI metrics
type LogonCollector struct {
LogonType *prometheus.Desc
}
// NewLogonCollector ...
func NewLogonCollector() (Collector, error) {
const subsystem = "logon"
return &LogonCollector{
LogonType: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "logon_type"),
"Number of active logon sessions (LogonSession.LogonType)",
[]string{"status"},
nil,
),
}, nil
}
// Collect sends the metric values for each metric
// to the provided prometheus Metric channel.
func (c *LogonCollector) Collect(ctx *ScrapeContext, ch chan<- prometheus.Metric) error {
if desc, err := c.collect(ch); err != nil {
log.Error("failed collecting user metrics:", desc, err)
return err
}
return nil
}
// Win32_LogonSession docs:
// - https://docs.microsoft.com/en-us/windows/win32/cimwin32prov/win32-logonsession
type Win32_LogonSession struct {
LogonType uint32
}
func (c *LogonCollector) collect(ch chan<- prometheus.Metric) (*prometheus.Desc, error) {
var dst []Win32_LogonSession
q := queryAll(&dst)
if err := wmi.Query(q, &dst); err != nil {
return nil, err
}
if len(dst) == 0 {
return nil, errors.New("WMI query returned empty result set")
}
// Init counters
system := 0
interactive := 0
network := 0
batch := 0
service := 0
proxy := 0
unlock := 0
networkcleartext := 0
newcredentials := 0
remoteinteractive := 0
cachedinteractive := 0
cachedremoteinteractive := 0
cachedunlock := 0
for _, entry := range dst {
switch entry.LogonType {
case 0:
system++
case 2:
interactive++
case 3:
network++
case 4:
batch++
case 5:
service++
case 6:
proxy++
case 7:
unlock++
case 8:
networkcleartext++
case 9:
newcredentials++
case 10:
remoteinteractive++
case 11:
cachedinteractive++
case 12:
cachedremoteinteractive++
case 13:
cachedunlock++
}
}
ch <- prometheus.MustNewConstMetric(
c.LogonType,
prometheus.GaugeValue,
float64(system),
"system",
)
ch <- prometheus.MustNewConstMetric(
c.LogonType,
prometheus.GaugeValue,
float64(interactive),
"interactive",
)
ch <- prometheus.MustNewConstMetric(
c.LogonType,
prometheus.GaugeValue,
float64(network),
"network",
)
ch <- prometheus.MustNewConstMetric(
c.LogonType,
prometheus.GaugeValue,
float64(batch),
"batch",
)
ch <- prometheus.MustNewConstMetric(
c.LogonType,
prometheus.GaugeValue,
float64(service),
"service",
)
ch <- prometheus.MustNewConstMetric(
c.LogonType,
prometheus.GaugeValue,
float64(proxy),
"proxy",
)
ch <- prometheus.MustNewConstMetric(
c.LogonType,
prometheus.GaugeValue,
float64(unlock),
"unlock",
)
ch <- prometheus.MustNewConstMetric(
c.LogonType,
prometheus.GaugeValue,
float64(networkcleartext),
"network_clear_text",
)
ch <- prometheus.MustNewConstMetric(
c.LogonType,
prometheus.GaugeValue,
float64(newcredentials),
"new_credentials",
)
ch <- prometheus.MustNewConstMetric(
c.LogonType,
prometheus.GaugeValue,
float64(remoteinteractive),
"remote_interactive",
)
ch <- prometheus.MustNewConstMetric(
c.LogonType,
prometheus.GaugeValue,
float64(cachedinteractive),
"cached_interactive",
)
ch <- prometheus.MustNewConstMetric(
c.LogonType,
prometheus.GaugeValue,
float64(remoteinteractive),
"cached_remote_interactive",
)
ch <- prometheus.MustNewConstMetric(
c.LogonType,
prometheus.GaugeValue,
float64(cachedunlock),
"cached_unlock",
)
return nil, nil
}

View File

@@ -6,7 +6,6 @@
package collector
import (
"github.com/StackExchange/wmi"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/log"
)
@@ -15,7 +14,7 @@ func init() {
Factories["memory"] = NewMemoryCollector
}
// A MemoryCollector is a Prometheus collector for WMI Win32_PerfRawData_PerfOS_Memory metrics
// A MemoryCollector is a Prometheus collector for perflib Memory metrics
type MemoryCollector struct {
AvailableBytes *prometheus.Desc
CacheBytes *prometheus.Desc
@@ -257,247 +256,246 @@ func NewMemoryCollector() (Collector, error) {
// Collect sends the metric values for each metric
// to the provided prometheus Metric channel.
func (c *MemoryCollector) Collect(ctx *ScrapeContext, ch chan<- prometheus.Metric) error {
if desc, err := c.collect(ch); err != nil {
if desc, err := c.collect(ctx, ch); err != nil {
log.Error("failed collecting memory metrics:", desc, err)
return err
}
return nil
}
type Win32_PerfRawData_PerfOS_Memory struct {
AvailableBytes uint64
AvailableKBytes uint64
AvailableMBytes uint64
CacheBytes uint64
CacheBytesPeak uint64
CacheFaultsPersec uint32
CommitLimit uint64
CommittedBytes uint64
DemandZeroFaultsPersec uint32
FreeAndZeroPageListBytes uint64
FreeSystemPageTableEntries uint32
ModifiedPageListBytes uint64
PageFaultsPersec uint32
PageReadsPersec uint32
PagesInputPersec uint32
PagesOutputPersec uint32
PagesPersec uint32
PageWritesPersec uint32
PoolNonpagedAllocs uint32
PoolNonpagedBytes uint64
PoolPagedAllocs uint32
PoolPagedBytes uint64
PoolPagedResidentBytes uint64
StandbyCacheCoreBytes uint64
StandbyCacheNormalPriorityBytes uint64
StandbyCacheReserveBytes uint64
SystemCacheResidentBytes uint64
SystemCodeResidentBytes uint64
SystemCodeTotalBytes uint64
SystemDriverResidentBytes uint64
SystemDriverTotalBytes uint64
TransitionFaultsPersec uint32
TransitionPagesRePurposedPersec uint32
WriteCopiesPersec uint32
type memory struct {
AvailableBytes float64 `perflib:"Available Bytes"`
AvailableKBytes float64 `perflib:"Available KBytes"`
AvailableMBytes float64 `perflib:"Available MBytes"`
CacheBytes float64 `perflib:"Cache Bytes"`
CacheBytesPeak float64 `perflib:"Cache Bytes Peak"`
CacheFaultsPersec float64 `perflib:"Cache Faults/sec"`
CommitLimit float64 `perflib:"Commit Limit"`
CommittedBytes float64 `perflib:"Committed Bytes"`
DemandZeroFaultsPersec float64 `perflib:"Demand Zero Faults/sec"`
FreeAndZeroPageListBytes float64 `perflib:"Free & Zero Page List Bytes"`
FreeSystemPageTableEntries float64 `perflib:"Free System Page Table Entries"`
ModifiedPageListBytes float64 `perflib:"Modified Page List Bytes"`
PageFaultsPersec float64 `perflib:"Page Faults/sec"`
PageReadsPersec float64 `perflib:"Page Reads/sec"`
PagesInputPersec float64 `perflib:"Pages Input/sec"`
PagesOutputPersec float64 `perflib:"Pages Output/sec"`
PagesPersec float64 `perflib:"Pages/sec"`
PageWritesPersec float64 `perflib:"Page Writes/sec"`
PoolNonpagedAllocs float64 `perflib:"Pool Nonpaged Allocs"`
PoolNonpagedBytes float64 `perflib:"Pool Nonpaged Bytes"`
PoolPagedAllocs float64 `perflib:"Pool Paged Allocs"`
PoolPagedBytes float64 `perflib:"Pool Paged Bytes"`
PoolPagedResidentBytes float64 `perflib:"Pool Paged Resident Bytes"`
StandbyCacheCoreBytes float64 `perflib:"Standby Cache Core Bytes"`
StandbyCacheNormalPriorityBytes float64 `perflib:"Standby Cache Normal Priority Bytes"`
StandbyCacheReserveBytes float64 `perflib:"Standby Cache Reserve Bytes"`
SystemCacheResidentBytes float64 `perflib:"System Cache Resident Bytes"`
SystemCodeResidentBytes float64 `perflib:"System Code Resident Bytes"`
SystemCodeTotalBytes float64 `perflib:"System Code Total Bytes"`
SystemDriverResidentBytes float64 `perflib:"System Driver Resident Bytes"`
SystemDriverTotalBytes float64 `perflib:"System Driver Total Bytes"`
TransitionFaultsPersec float64 `perflib:"Transition Faults/sec"`
TransitionPagesRePurposedPersec float64 `perflib:"Transition Pages RePurposed/sec"`
WriteCopiesPersec float64 `perflib:"Write Copies/sec"`
}
func (c *MemoryCollector) collect(ch chan<- prometheus.Metric) (*prometheus.Desc, error) {
var dst []Win32_PerfRawData_PerfOS_Memory
q := queryAll(&dst)
if err := wmi.Query(q, &dst); err != nil {
func (c *MemoryCollector) collect(ctx *ScrapeContext, ch chan<- prometheus.Metric) (*prometheus.Desc, error) {
var dst []memory
if err := unmarshalObject(ctx.perfObjects["Memory"], &dst); err != nil {
return nil, err
}
ch <- prometheus.MustNewConstMetric(
c.AvailableBytes,
prometheus.GaugeValue,
float64(dst[0].AvailableBytes),
dst[0].AvailableBytes,
)
ch <- prometheus.MustNewConstMetric(
c.CacheBytes,
prometheus.GaugeValue,
float64(dst[0].CacheBytes),
dst[0].CacheBytes,
)
ch <- prometheus.MustNewConstMetric(
c.CacheBytesPeak,
prometheus.GaugeValue,
float64(dst[0].CacheBytesPeak),
dst[0].CacheBytesPeak,
)
ch <- prometheus.MustNewConstMetric(
c.CacheFaultsTotal,
prometheus.GaugeValue,
float64(dst[0].CacheFaultsPersec),
dst[0].CacheFaultsPersec,
)
ch <- prometheus.MustNewConstMetric(
c.CommitLimit,
prometheus.GaugeValue,
float64(dst[0].CommitLimit),
dst[0].CommitLimit,
)
ch <- prometheus.MustNewConstMetric(
c.CommittedBytes,
prometheus.GaugeValue,
float64(dst[0].CommittedBytes),
dst[0].CommittedBytes,
)
ch <- prometheus.MustNewConstMetric(
c.DemandZeroFaultsTotal,
prometheus.GaugeValue,
float64(dst[0].DemandZeroFaultsPersec),
dst[0].DemandZeroFaultsPersec,
)
ch <- prometheus.MustNewConstMetric(
c.FreeAndZeroPageListBytes,
prometheus.GaugeValue,
float64(dst[0].FreeAndZeroPageListBytes),
dst[0].FreeAndZeroPageListBytes,
)
ch <- prometheus.MustNewConstMetric(
c.FreeSystemPageTableEntries,
prometheus.GaugeValue,
float64(dst[0].FreeSystemPageTableEntries),
dst[0].FreeSystemPageTableEntries,
)
ch <- prometheus.MustNewConstMetric(
c.ModifiedPageListBytes,
prometheus.GaugeValue,
float64(dst[0].ModifiedPageListBytes),
dst[0].ModifiedPageListBytes,
)
ch <- prometheus.MustNewConstMetric(
c.PageFaultsTotal,
prometheus.GaugeValue,
float64(dst[0].PageFaultsPersec),
dst[0].PageFaultsPersec,
)
ch <- prometheus.MustNewConstMetric(
c.SwapPageReadsTotal,
prometheus.GaugeValue,
float64(dst[0].PageReadsPersec),
dst[0].PageReadsPersec,
)
ch <- prometheus.MustNewConstMetric(
c.SwapPagesReadTotal,
prometheus.GaugeValue,
float64(dst[0].PagesInputPersec),
dst[0].PagesInputPersec,
)
ch <- prometheus.MustNewConstMetric(
c.SwapPagesWrittenTotal,
prometheus.GaugeValue,
float64(dst[0].PagesOutputPersec),
dst[0].PagesOutputPersec,
)
ch <- prometheus.MustNewConstMetric(
c.SwapPageOperationsTotal,
prometheus.GaugeValue,
float64(dst[0].PagesPersec),
dst[0].PagesPersec,
)
ch <- prometheus.MustNewConstMetric(
c.SwapPageWritesTotal,
prometheus.GaugeValue,
float64(dst[0].PageWritesPersec),
dst[0].PageWritesPersec,
)
ch <- prometheus.MustNewConstMetric(
c.PoolNonpagedAllocsTotal,
prometheus.GaugeValue,
float64(dst[0].PoolNonpagedAllocs),
dst[0].PoolNonpagedAllocs,
)
ch <- prometheus.MustNewConstMetric(
c.PoolNonpagedBytes,
prometheus.GaugeValue,
float64(dst[0].PoolNonpagedBytes),
dst[0].PoolNonpagedBytes,
)
ch <- prometheus.MustNewConstMetric(
c.PoolPagedAllocsTotal,
prometheus.GaugeValue,
float64(dst[0].PoolPagedAllocs),
dst[0].PoolPagedAllocs,
)
ch <- prometheus.MustNewConstMetric(
c.PoolPagedBytes,
prometheus.GaugeValue,
float64(dst[0].PoolPagedBytes),
dst[0].PoolPagedBytes,
)
ch <- prometheus.MustNewConstMetric(
c.PoolPagedResidentBytes,
prometheus.GaugeValue,
float64(dst[0].PoolPagedResidentBytes),
dst[0].PoolPagedResidentBytes,
)
ch <- prometheus.MustNewConstMetric(
c.StandbyCacheCoreBytes,
prometheus.GaugeValue,
float64(dst[0].StandbyCacheCoreBytes),
dst[0].StandbyCacheCoreBytes,
)
ch <- prometheus.MustNewConstMetric(
c.StandbyCacheNormalPriorityBytes,
prometheus.GaugeValue,
float64(dst[0].StandbyCacheNormalPriorityBytes),
dst[0].StandbyCacheNormalPriorityBytes,
)
ch <- prometheus.MustNewConstMetric(
c.StandbyCacheReserveBytes,
prometheus.GaugeValue,
float64(dst[0].StandbyCacheReserveBytes),
dst[0].StandbyCacheReserveBytes,
)
ch <- prometheus.MustNewConstMetric(
c.SystemCacheResidentBytes,
prometheus.GaugeValue,
float64(dst[0].SystemCacheResidentBytes),
dst[0].SystemCacheResidentBytes,
)
ch <- prometheus.MustNewConstMetric(
c.SystemCodeResidentBytes,
prometheus.GaugeValue,
float64(dst[0].SystemCodeResidentBytes),
dst[0].SystemCodeResidentBytes,
)
ch <- prometheus.MustNewConstMetric(
c.SystemCodeTotalBytes,
prometheus.GaugeValue,
float64(dst[0].SystemCodeTotalBytes),
dst[0].SystemCodeTotalBytes,
)
ch <- prometheus.MustNewConstMetric(
c.SystemDriverResidentBytes,
prometheus.GaugeValue,
float64(dst[0].SystemDriverResidentBytes),
dst[0].SystemDriverResidentBytes,
)
ch <- prometheus.MustNewConstMetric(
c.SystemDriverTotalBytes,
prometheus.GaugeValue,
float64(dst[0].SystemDriverTotalBytes),
dst[0].SystemDriverTotalBytes,
)
ch <- prometheus.MustNewConstMetric(
c.TransitionFaultsTotal,
prometheus.GaugeValue,
float64(dst[0].TransitionFaultsPersec),
dst[0].TransitionFaultsPersec,
)
ch <- prometheus.MustNewConstMetric(
c.TransitionPagesRepurposedTotal,
prometheus.GaugeValue,
float64(dst[0].TransitionPagesRePurposedPersec),
dst[0].TransitionPagesRePurposedPersec,
)
ch <- prometheus.MustNewConstMetric(
c.WriteCopiesTotal,
prometheus.GaugeValue,
float64(dst[0].WriteCopiesPersec),
dst[0].WriteCopiesPersec,
)
return nil, nil

View File

@@ -91,7 +91,7 @@ func mssqlBuildWMIInstanceClass(suffix string, instance string) string {
type mssqlCollectorsMap map[string]mssqlCollectorFunc
func mssqlAvailableClassCollectors() string {
return "accessmethods,availreplica,bufman,databases,dbreplica,genstats,locks,memmgr,sqlstats"
return "accessmethods,availreplica,bufman,databases,dbreplica,genstats,locks,memmgr,sqlstats,sqlerrors,transactions"
}
func (c *MSSQLCollector) getMSSQLCollectors() mssqlCollectorsMap {
@@ -105,6 +105,8 @@ func (c *MSSQLCollector) getMSSQLCollectors() mssqlCollectorsMap {
mssqlCollectors["locks"] = c.collectLocks
mssqlCollectors["memmgr"] = c.collectMemoryManager
mssqlCollectors["sqlstats"] = c.collectSQLStats
mssqlCollectors["sqlerrors"] = c.collectSQLErrors
mssqlCollectors["transactions"] = c.collectTransactions
return mssqlCollectors
}
@@ -358,6 +360,24 @@ type MSSQLCollector struct {
SQLStatsSQLReCompilations *prometheus.Desc
SQLStatsUnsafeAutoParams *prometheus.Desc
// Win32_PerfRawData_{instance}_SQLServerSQLErrors
SQLErrorsTotal *prometheus.Desc
// Win32_PerfRawData_{instance}_SQLServerTransactions
TransactionsTempDbFreeSpaceBytes *prometheus.Desc
TransactionsLongestTransactionRunningSeconds *prometheus.Desc
TransactionsNonSnapshotVersionActiveTotal *prometheus.Desc
TransactionsSnapshotActiveTotal *prometheus.Desc
TransactionsActiveTotal *prometheus.Desc
TransactionsUpdateConflictsTotal *prometheus.Desc
TransactionsUpdateSnapshotActiveTotal *prometheus.Desc
TransactionsVersionCleanupRateBytes *prometheus.Desc
TransactionsVersionGenerationRateBytes *prometheus.Desc
TransactionsVersionStoreSizeBytes *prometheus.Desc
TransactionsVersionStoreUnits *prometheus.Desc
TransactionsVersionStoreCreationUnits *prometheus.Desc
TransactionsVersionStoreTruncationUnits *prometheus.Desc
mssqlInstances mssqlInstancesType
mssqlCollectors mssqlCollectorsMap
mssqlChildCollectorFailure int
@@ -1637,6 +1657,94 @@ func NewMSSQLCollector() (Collector, error) {
nil,
),
// Win32_PerfRawData_{instance}_SQLServerSQLErrors
SQLErrorsTotal: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "sql_errors_total"),
"(SQLErrors.Total)",
[]string{"instance", "resource"},
nil,
),
// Win32_PerfRawData_{instance}_SQLServerTransactions
TransactionsTempDbFreeSpaceBytes: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "transactions_tempdb_free_space_bytes"),
"(Transactions.FreeSpaceInTempDbKB)",
[]string{"instance"},
nil,
),
TransactionsLongestTransactionRunningSeconds: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "transactions_longest_transaction_running_seconds"),
"(Transactions.LongestTransactionRunningTime)",
[]string{"instance"},
nil,
),
TransactionsNonSnapshotVersionActiveTotal: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "transactions_nonsnapshot_version_active_total"),
"(Transactions.NonSnapshotVersionTransactions)",
[]string{"instance"},
nil,
),
TransactionsSnapshotActiveTotal: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "transactions_snapshot_active_total"),
"(Transactions.SnapshotTransactions)",
[]string{"instance"},
nil,
),
TransactionsActiveTotal: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "transactions_active_total"),
"(Transactions.Transactions)",
[]string{"instance"},
nil,
),
TransactionsUpdateConflictsTotal: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "transactions_update_conflicts_total"),
"(Transactions.UpdateConflictRatio)",
[]string{"instance"},
nil,
),
TransactionsUpdateSnapshotActiveTotal: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "transactions_update_snapshot_active_total"),
"(Transactions.UpdateSnapshotTransactions)",
[]string{"instance"},
nil,
),
TransactionsVersionCleanupRateBytes: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "transactions_version_cleanup_rate_bytes"),
"(Transactions.VersionCleanupRateKBs)",
[]string{"instance"},
nil,
),
TransactionsVersionGenerationRateBytes: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "transactions_version_generation_rate_bytes"),
"(Transactions.VersionGenerationRateKBs)",
[]string{"instance"},
nil,
),
TransactionsVersionStoreSizeBytes: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "transactions_version_store_size_bytes"),
"(Transactions.VersionStoreSizeKB)",
[]string{"instance"},
nil,
),
TransactionsVersionStoreUnits: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "transactions_version_store_units"),
"(Transactions.VersionStoreUnitCount)",
[]string{"instance"},
nil,
),
TransactionsVersionStoreCreationUnits: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "transactions_version_store_creation_units"),
"(Transactions.VersionStoreUnitCreation)",
[]string{"instance"},
nil,
),
TransactionsVersionStoreTruncationUnits: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "transactions_version_store_truncation_units"),
"(Transactions.VersionStoreUnitTruncation)",
[]string{"instance"},
nil,
),
mssqlInstances: getMSSQLInstances(),
}
@@ -2575,7 +2683,7 @@ func (c *MSSQLCollector) collectDatabaseReplica(ch chan<- prometheus.Metric, sql
ch <- prometheus.MustNewConstMetric(
c.DBReplicaTransactionDelay,
prometheus.GaugeValue,
float64(v.TransactionDelay)*1000.0,
float64(v.TransactionDelay)/1000.0,
sqlInstance, replicaName,
)
}
@@ -3558,3 +3666,162 @@ func (c *MSSQLCollector) collectSQLStats(ch chan<- prometheus.Metric, sqlInstanc
return nil, nil
}
type win32PerfRawDataSQLServerSQLErrors struct {
Name string
ErrorsPersec uint64
}
// Win32_PerfRawData_MSSQLSERVER_SQLServerErrors docs:
// - https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-sql-errors-object
func (c *MSSQLCollector) collectSQLErrors(ch chan<- prometheus.Metric, sqlInstance string) (*prometheus.Desc, error) {
var dst []win32PerfRawDataSQLServerSQLErrors
log.Debugf("mssql_sqlerrors collector iterating sql instance %s.", sqlInstance)
class := mssqlBuildWMIInstanceClass("SQLErrors", sqlInstance)
q := queryAllForClassWhere(&dst, class, `Name <> '_Total'`)
if err := wmi.Query(q, &dst); err != nil {
return nil, err
}
for _, v := range dst {
resource := v.Name
ch <- prometheus.MustNewConstMetric(
c.SQLErrorsTotal,
prometheus.CounterValue,
float64(v.ErrorsPersec),
sqlInstance, resource,
)
}
return nil, nil
}
type win32PerfRawDataSqlServerTransactions struct {
FreeSpaceintempdbKB uint64
LongestTransactionRunningTime uint64
NonSnapshotVersionTransactions uint64
SnapshotTransactions uint64
Transactions uint64
Updateconflictratio uint64
UpdateSnapshotTransactions uint64
VersionCleanuprateKBPers uint64
VersionGenerationrateKBPers uint64
VersionStoreSizeKB uint64
VersionStoreunitcount uint64
VersionStoreunitcreation uint64
VersionStoreunittruncation uint64
}
// Win32_PerfRawData_MSSQLSERVER_Transactions docs:
// - https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-transactions-object
func (c *MSSQLCollector) collectTransactions(ch chan<- prometheus.Metric, sqlInstance string) (*prometheus.Desc, error) {
var dst []win32PerfRawDataSqlServerTransactions
log.Debugf("mssql_transactions collector iterating sql instance %s.", sqlInstance)
class := mssqlBuildWMIInstanceClass("Transactions", sqlInstance)
q := queryAllForClass(&dst, class)
if err := wmi.Query(q, &dst); err != nil {
return nil, err
}
if len(dst) == 0 {
return nil, errors.New("WMI query returned empty result set")
}
v := dst[0]
ch <- prometheus.MustNewConstMetric(
c.TransactionsTempDbFreeSpaceBytes,
prometheus.GaugeValue,
float64(v.FreeSpaceintempdbKB*1024),
sqlInstance,
)
ch <- prometheus.MustNewConstMetric(
c.TransactionsLongestTransactionRunningSeconds,
prometheus.GaugeValue,
float64(v.LongestTransactionRunningTime),
sqlInstance,
)
ch <- prometheus.MustNewConstMetric(
c.TransactionsNonSnapshotVersionActiveTotal,
prometheus.CounterValue,
float64(v.NonSnapshotVersionTransactions),
sqlInstance,
)
ch <- prometheus.MustNewConstMetric(
c.TransactionsSnapshotActiveTotal,
prometheus.CounterValue,
float64(v.SnapshotTransactions),
sqlInstance,
)
ch <- prometheus.MustNewConstMetric(
c.TransactionsActiveTotal,
prometheus.CounterValue,
float64(v.Transactions),
sqlInstance,
)
ch <- prometheus.MustNewConstMetric(
c.TransactionsUpdateConflictsTotal,
prometheus.CounterValue,
float64(v.Updateconflictratio),
sqlInstance,
)
ch <- prometheus.MustNewConstMetric(
c.TransactionsUpdateSnapshotActiveTotal,
prometheus.CounterValue,
float64(v.UpdateSnapshotTransactions),
sqlInstance,
)
ch <- prometheus.MustNewConstMetric(
c.TransactionsVersionCleanupRateBytes,
prometheus.GaugeValue,
float64(v.VersionCleanuprateKBPers*1024),
sqlInstance,
)
ch <- prometheus.MustNewConstMetric(
c.TransactionsVersionGenerationRateBytes,
prometheus.GaugeValue,
float64(v.VersionGenerationrateKBPers*1024),
sqlInstance,
)
ch <- prometheus.MustNewConstMetric(
c.TransactionsVersionStoreSizeBytes,
prometheus.GaugeValue,
float64(v.VersionStoreSizeKB*1024),
sqlInstance,
)
ch <- prometheus.MustNewConstMetric(
c.TransactionsVersionStoreUnits,
prometheus.CounterValue,
float64(v.VersionStoreunitcount),
sqlInstance,
)
ch <- prometheus.MustNewConstMetric(
c.TransactionsVersionStoreCreationUnits,
prometheus.CounterValue,
float64(v.VersionStoreunitcreation),
sqlInstance,
)
ch <- prometheus.MustNewConstMetric(
c.TransactionsVersionStoreTruncationUnits,
prometheus.CounterValue,
float64(v.VersionStoreunittruncation),
sqlInstance,
)
return nil, nil
}

View File

@@ -6,7 +6,6 @@ import (
"fmt"
"regexp"
"github.com/StackExchange/wmi"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/log"
"gopkg.in/alecthomas/kingpin.v2"
@@ -28,7 +27,7 @@ var (
nicNameToUnderscore = regexp.MustCompile("[^a-zA-Z0-9]")
)
// A NetworkCollector is a Prometheus collector for WMI Win32_PerfRawData_Tcpip_NetworkInterface metrics
// A NetworkCollector is a Prometheus collector for Perflib Network Interface metrics
type NetworkCollector struct {
BytesReceivedTotal *prometheus.Desc
BytesSentTotal *prometheus.Desc
@@ -133,7 +132,7 @@ func NewNetworkCollector() (Collector, error) {
// Collect sends the metric values for each metric
// to the provided prometheus Metric channel.
func (c *NetworkCollector) Collect(ctx *ScrapeContext, ch chan<- prometheus.Metric) error {
if desc, err := c.collect(ch); err != nil {
if desc, err := c.collect(ctx, ch); err != nil {
log.Error("failed collecting net metrics:", desc, err)
return err
}
@@ -141,34 +140,33 @@ func (c *NetworkCollector) Collect(ctx *ScrapeContext, ch chan<- prometheus.Metr
}
// mangleNetworkName mangles Network Adapter name (non-alphanumeric to _)
// that is used in Win32_PerfRawData_Tcpip_NetworkInterface.
// that is used in networkInterface.
func mangleNetworkName(name string) string {
return nicNameToUnderscore.ReplaceAllString(name, "_")
}
// Win32_PerfRawData_Tcpip_NetworkInterface docs:
// - https://technet.microsoft.com/en-us/security/aa394340(v=vs.80)
type Win32_PerfRawData_Tcpip_NetworkInterface struct {
BytesReceivedPerSec uint64
BytesSentPerSec uint64
BytesTotalPerSec uint64
type networkInterface struct {
BytesReceivedPerSec float64 `perflib:"Bytes Received/sec"`
BytesSentPerSec float64 `perflib:"Bytes Sent/sec"`
BytesTotalPerSec float64 `perflib:"Bytes Total/sec"`
Name string
PacketsOutboundDiscarded uint64
PacketsOutboundErrors uint64
PacketsPerSec uint64
PacketsReceivedDiscarded uint64
PacketsReceivedErrors uint64
PacketsReceivedPerSec uint64
PacketsReceivedUnknown uint64
PacketsSentPerSec uint64
CurrentBandwidth uint64
PacketsOutboundDiscarded float64 `perflib:"Packets Outbound Discarded"`
PacketsOutboundErrors float64 `perflib:"Packets Outbound Errors"`
PacketsPerSec float64 `perflib:"Packets/sec"`
PacketsReceivedDiscarded float64 `perflib:"Packets Received Discarded"`
PacketsReceivedErrors float64 `perflib:"Packets Received Errors"`
PacketsReceivedPerSec float64 `perflib:"Packets Received/sec"`
PacketsReceivedUnknown float64 `perflib:"Packets Received Unknown"`
PacketsSentPerSec float64 `perflib:"Packets Sent/sec"`
CurrentBandwidth float64 `perflib:"Current Bandwidth"`
}
func (c *NetworkCollector) collect(ch chan<- prometheus.Metric) (*prometheus.Desc, error) {
var dst []Win32_PerfRawData_Tcpip_NetworkInterface
func (c *NetworkCollector) collect(ctx *ScrapeContext, ch chan<- prometheus.Metric) (*prometheus.Desc, error) {
var dst []networkInterface
q := queryAll(&dst)
if err := wmi.Query(q, &dst); err != nil {
if err := unmarshalObject(ctx.perfObjects["Network Interface"], &dst); err != nil {
return nil, err
}
@@ -187,76 +185,75 @@ func (c *NetworkCollector) collect(ch chan<- prometheus.Metric) (*prometheus.Des
ch <- prometheus.MustNewConstMetric(
c.BytesReceivedTotal,
prometheus.CounterValue,
float64(nic.BytesReceivedPerSec),
nic.BytesReceivedPerSec,
name,
)
ch <- prometheus.MustNewConstMetric(
c.BytesSentTotal,
prometheus.CounterValue,
float64(nic.BytesSentPerSec),
nic.BytesSentPerSec,
name,
)
ch <- prometheus.MustNewConstMetric(
c.BytesTotal,
prometheus.CounterValue,
float64(nic.BytesTotalPerSec),
nic.BytesTotalPerSec,
name,
)
ch <- prometheus.MustNewConstMetric(
c.PacketsOutboundDiscarded,
prometheus.CounterValue,
float64(nic.PacketsOutboundDiscarded),
nic.PacketsOutboundDiscarded,
name,
)
ch <- prometheus.MustNewConstMetric(
c.PacketsOutboundErrors,
prometheus.CounterValue,
float64(nic.PacketsOutboundErrors),
nic.PacketsOutboundErrors,
name,
)
ch <- prometheus.MustNewConstMetric(
c.PacketsTotal,
prometheus.CounterValue,
float64(nic.PacketsPerSec),
nic.PacketsPerSec,
name,
)
ch <- prometheus.MustNewConstMetric(
c.PacketsReceivedDiscarded,
prometheus.CounterValue,
float64(nic.PacketsReceivedDiscarded),
nic.PacketsReceivedDiscarded,
name,
)
ch <- prometheus.MustNewConstMetric(
c.PacketsReceivedErrors,
prometheus.CounterValue,
float64(nic.PacketsReceivedErrors),
nic.PacketsReceivedErrors,
name,
)
ch <- prometheus.MustNewConstMetric(
c.PacketsReceivedTotal,
prometheus.CounterValue,
float64(nic.PacketsReceivedPerSec),
nic.PacketsReceivedPerSec,
name,
)
ch <- prometheus.MustNewConstMetric(
c.PacketsReceivedUnknown,
prometheus.CounterValue,
float64(nic.PacketsReceivedUnknown),
nic.PacketsReceivedUnknown,
name,
)
ch <- prometheus.MustNewConstMetric(
c.PacketsSentTotal,
prometheus.CounterValue,
float64(nic.PacketsSentPerSec),
nic.PacketsSentPerSec,
name,
)
ch <- prometheus.MustNewConstMetric(
c.CurrentBandwidth,
prometheus.CounterValue,
float64(nic.CurrentBandwidth),
prometheus.GaugeValue,
nic.CurrentBandwidth,
name,
)
}
return nil, nil
}

View File

@@ -63,11 +63,14 @@ func unmarshalObject(obj *perflib.PerfObject, vs interface{}) error {
ctr, found := counters[tag]
if !found {
log.Debugf("missing counter %q, has %v", tag, counters)
return fmt.Errorf("could not find counter %q on instance", tag)
log.Debugf("missing counter %q, have %v", tag, counterMapKeys(counters))
continue
}
if !target.Field(i).CanSet() {
return fmt.Errorf("tagged field %v cannot be written to", f)
return fmt.Errorf("tagged field %v cannot be written to", f.Name)
}
if fieldType := target.Field(i).Type(); fieldType != reflect.TypeOf((*float64)(nil)).Elem() {
return fmt.Errorf("tagged field %v has wrong type %v, must be float64", f.Name, fieldType)
}
switch ctr.Def.CounterType {
@@ -87,3 +90,11 @@ func unmarshalObject(obj *perflib.PerfObject, vs interface{}) error {
return nil
}
func counterMapKeys(m map[string]*perflib.PerfCounter) []string {
keys := make([]string, 0, len(m))
for k := range m {
keys = append(keys, k)
}
return keys
}

125
collector/perflib_test.go Normal file
View File

@@ -0,0 +1,125 @@
package collector
import (
"reflect"
"testing"
perflibCollector "github.com/leoluk/perflib_exporter/collector"
"github.com/leoluk/perflib_exporter/perflib"
)
type simple struct {
ValA float64 `perflib:"Something"`
ValB float64 `perflib:"Something Else"`
}
func TestUnmarshalPerflib(t *testing.T) {
cases := []struct {
name string
obj *perflib.PerfObject
expectedOutput []simple
expectError bool
}{
{
name: "nil check",
obj: nil,
expectedOutput: []simple{},
expectError: true,
},
{
name: "Simple",
obj: &perflib.PerfObject{
Instances: []*perflib.PerfInstance{
{
Counters: []*perflib.PerfCounter{
{
Def: &perflib.PerfCounterDef{
Name: "Something",
CounterType: perflibCollector.PERF_COUNTER_COUNTER,
},
Value: 123,
},
},
},
},
},
expectedOutput: []simple{{ValA: 123}},
expectError: false,
},
{
name: "Multiple properties",
obj: &perflib.PerfObject{
Instances: []*perflib.PerfInstance{
{
Counters: []*perflib.PerfCounter{
{
Def: &perflib.PerfCounterDef{
Name: "Something",
CounterType: perflibCollector.PERF_COUNTER_COUNTER,
},
Value: 123,
},
{
Def: &perflib.PerfCounterDef{
Name: "Something Else",
CounterType: perflibCollector.PERF_COUNTER_COUNTER,
},
Value: 256,
},
},
},
},
},
expectedOutput: []simple{{ValA: 123, ValB: 256}},
expectError: false,
},
{
name: "Multiple instances",
obj: &perflib.PerfObject{
Instances: []*perflib.PerfInstance{
{
Counters: []*perflib.PerfCounter{
{
Def: &perflib.PerfCounterDef{
Name: "Something",
CounterType: perflibCollector.PERF_COUNTER_COUNTER,
},
Value: 321,
},
},
},
{
Counters: []*perflib.PerfCounter{
{
Def: &perflib.PerfCounterDef{
Name: "Something",
CounterType: perflibCollector.PERF_COUNTER_COUNTER,
},
Value: 231,
},
},
},
},
},
expectedOutput: []simple{{ValA: 321}, {ValA: 231}},
expectError: false,
},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
output := make([]simple, 0)
err := unmarshalObject(c.obj, &output)
if err != nil && !c.expectError {
t.Errorf("Did not expect error, got %q", err)
}
if err == nil && c.expectError {
t.Errorf("Expected an error, but got ok")
}
if err == nil && !reflect.DeepEqual(output, c.expectedOutput) {
t.Errorf("Output mismatch, expected %+v, got %+v", c.expectedOutput, output)
}
})
}
}

View File

@@ -3,8 +3,6 @@
package collector
import (
"errors"
"github.com/StackExchange/wmi"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/log"
)
@@ -70,7 +68,7 @@ func NewSystemCollector() (Collector, error) {
// Collect sends the metric values for each metric
// to the provided prometheus Metric channel.
func (c *SystemCollector) Collect(ctx *ScrapeContext, ch chan<- prometheus.Metric) error {
if desc, err := c.collect(ch); err != nil {
if desc, err := c.collect(ctx, ch); err != nil {
log.Error("failed collecting system metrics:", desc, err)
return err
}
@@ -79,57 +77,50 @@ func (c *SystemCollector) Collect(ctx *ScrapeContext, ch chan<- prometheus.Metri
// Win32_PerfRawData_PerfOS_System docs:
// - https://web.archive.org/web/20050830140516/http://msdn.microsoft.com/library/en-us/wmisdk/wmi/win32_perfrawdata_perfos_system.asp
type Win32_PerfRawData_PerfOS_System struct {
ContextSwitchesPersec uint32
ExceptionDispatchesPersec uint32
Frequency_Object uint64
ProcessorQueueLength uint32
SystemCallsPersec uint32
SystemUpTime uint64
Threads uint32
Timestamp_Object uint64
type system struct {
ContextSwitchesPersec float64 `perflib:"Context Switches/sec"`
ExceptionDispatchesPersec float64 `perflib:"Exception Dispatches/sec"`
ProcessorQueueLength float64 `perflib:"Processor Queue Length"`
SystemCallsPersec float64 `perflib:"System Calls/sec"`
SystemUpTime float64 `perflib:"System Up Time"`
Threads float64 `perflib:"Threads"`
}
func (c *SystemCollector) collect(ch chan<- prometheus.Metric) (*prometheus.Desc, error) {
var dst []Win32_PerfRawData_PerfOS_System
q := queryAll(&dst)
if err := wmi.Query(q, &dst); err != nil {
func (c *SystemCollector) collect(ctx *ScrapeContext, ch chan<- prometheus.Metric) (*prometheus.Desc, error) {
var dst []system
if err := unmarshalObject(ctx.perfObjects["System"], &dst); err != nil {
return nil, err
}
if len(dst) == 0 {
return nil, errors.New("WMI query returned empty result set")
}
ch <- prometheus.MustNewConstMetric(
c.ContextSwitchesTotal,
prometheus.CounterValue,
float64(dst[0].ContextSwitchesPersec),
dst[0].ContextSwitchesPersec,
)
ch <- prometheus.MustNewConstMetric(
c.ExceptionDispatchesTotal,
prometheus.CounterValue,
float64(dst[0].ExceptionDispatchesPersec),
dst[0].ExceptionDispatchesPersec,
)
ch <- prometheus.MustNewConstMetric(
c.ProcessorQueueLength,
prometheus.GaugeValue,
float64(dst[0].ProcessorQueueLength),
dst[0].ProcessorQueueLength,
)
ch <- prometheus.MustNewConstMetric(
c.SystemCallsTotal,
prometheus.CounterValue,
float64(dst[0].SystemCallsPersec),
dst[0].SystemCallsPersec,
)
ch <- prometheus.MustNewConstMetric(
c.SystemUpTime,
prometheus.GaugeValue,
// convert from Windows timestamp (1 jan 1601) to unix timestamp (1 jan 1970)
float64(dst[0].SystemUpTime-116444736000000000)/float64(dst[0].Frequency_Object),
dst[0].SystemUpTime,
)
ch <- prometheus.MustNewConstMetric(
c.Threads,
prometheus.GaugeValue,
float64(dst[0].Threads),
dst[0].Threads,
)
return nil, nil
}

View File

@@ -136,7 +136,7 @@ func (c *TCPCollector) collect(ch chan<- prometheus.Metric) (*prometheus.Desc, e
)
ch <- prometheus.MustNewConstMetric(
c.ConnectionsEstablished,
prometheus.CounterValue,
prometheus.GaugeValue,
float64(dst[0].ConnectionsEstablished),
)
ch <- prometheus.MustNewConstMetric(

103
collector/thermalzone.go Normal file
View File

@@ -0,0 +1,103 @@
package collector
import (
"github.com/StackExchange/wmi"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/log"
)
func init() {
Factories["thermalzone"] = NewThermalZoneCollector
}
// A thermalZoneCollector is a Prometheus collector for WMI Win32_PerfRawData_Counters_ThermalZoneInformation metrics
type thermalZoneCollector struct {
PercentPassiveLimit *prometheus.Desc
Temperature *prometheus.Desc
ThrottleReasons *prometheus.Desc
}
// NewThermalZoneCollector ...
func NewThermalZoneCollector() (Collector, error) {
const subsystem = "thermalzone"
return &thermalZoneCollector{
Temperature: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "temperature_celsius"),
"(Temperature)",
[]string{
"name",
},
nil,
),
PercentPassiveLimit: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "percent_passive_limit"),
"(PercentPassiveLimit)",
[]string{
"name",
},
nil,
),
ThrottleReasons: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "throttle_reasons"),
"(ThrottleReasons)",
[]string{
"name",
},
nil,
),
}, nil
}
// Collect sends the metric values for each metric
// to the provided prometheus Metric channel.
func (c *thermalZoneCollector) Collect(ctx *ScrapeContext, ch chan<- prometheus.Metric) error {
if desc, err := c.collect(ch); err != nil {
log.Error("failed collecting thermalzone metrics:", desc, err)
return err
}
return nil
}
// Win32_PerfRawData_Counters_ThermalZoneInformation docs:
// https://wutils.com/wmi/root/cimv2/win32_perfrawdata_counters_thermalzoneinformation/
type Win32_PerfRawData_Counters_ThermalZoneInformation struct {
Name string
HighPrecisionTemperature uint32
PercentPassiveLimit uint32
ThrottleReasons uint32
}
func (c *thermalZoneCollector) collect(ch chan<- prometheus.Metric) (*prometheus.Desc, error) {
var dst []Win32_PerfRawData_Counters_ThermalZoneInformation
q := queryAll(&dst)
if err := wmi.Query(q, &dst); err != nil {
return nil, err
}
for _, info := range dst {
//Divide by 10 and subtract 273.15 to convert decikelvin to celsius
ch <- prometheus.MustNewConstMetric(
c.Temperature,
prometheus.GaugeValue,
(float64(info.HighPrecisionTemperature)/10.0)-273.15,
info.Name,
)
ch <- prometheus.MustNewConstMetric(
c.PercentPassiveLimit,
prometheus.GaugeValue,
float64(info.PercentPassiveLimit),
info.Name,
)
ch <- prometheus.MustNewConstMetric(
c.ThrottleReasons,
prometheus.GaugeValue,
float64(info.ThrottleReasons),
info.Name,
)
}
return nil, nil
}

View File

@@ -3,12 +3,14 @@ This directory contains documentation of the collectors in the WMI exporter, wit
# Collectors
- [`ad`](collector.ad.md)
- [`adfs`](collector.adfs.md)
- [`cpu`](collector.cpu.md)
- [`cs`](collector.cs.md)
- [`dns`](collector.dns.md)
- [`hyperv`](collector.hyperv.md)
- [`iis`](collector.iis.md)
- [`logical_disk`](collector.logical_disk.md)
- [`logon`](collector.logon.md)
- [`memory`](collector.memory.md)
- [`msmq`](collector.msmq.md)
- [`mssql`](collector.mssql.md)
@@ -27,4 +29,4 @@ This directory contains documentation of the collectors in the WMI exporter, wit
- [`system`](collector.system.md)
- [`tcp`](collector.tcp.md)
- [`textfile`](collector.textfile.md)
- [`vmware`](collector.vmware.md)
- [`vmware`](collector.vmware.md)

51
docs/collector.adfs.md Normal file
View File

@@ -0,0 +1,51 @@
# adfs collector
The adfs collector exposes metrics about Active Directory Federation Services. Note that this collector has only been tested against ADFS 4.0 (2016).
Other ADFS versions may work but are not tested.
|||
-|-
Metric name prefix | `adfs`
Data source | Perflib
Counters | `AD FS`
Enabled by default? | No
## Flags
None
## Metrics
Name | Description | Type | Labels
-----|-------------|------|-------
`wmi_adfs_ad_login_connection_failures` | Total number of connection failures between the ADFS server and the Active Directory domain controller(s) | counter | None
`wmi_adfs_certificate_authentications` | Total number of [User Certificate](https://docs.microsoft.com/en-us/windows-server/identity/ad-fs/operations/configure-user-certificate-authentication) authentications. I.E. smart cards or mobile devices with provisioned client certificates | counter | None
`wmi_adfs_device_authentications` | Total number of [device authentications](https://docs.microsoft.com/en-us/windows-server/identity/ad-fs/operations/device-authentication-controls-in-ad-fs) (SignedToken, clientTLS, PkeyAuth). Device authentication is only available on ADFS 2016 or later | counter | None
`wmi_adfs_extranet_account_lockouts` | Total number of [extranet lockouts](https://docs.microsoft.com/en-us/windows-server/identity/ad-fs/operations/configure-ad-fs-extranet-smart-lockout-protection). Requires the Extranet Lockout feature to be enabled | counter | None
`wmi_adfs_federated_authentications` | Total number of authentications from federated sources. E.G. Office365 | counter | None
`wmi_adfs_passport_authentications` | Total number of authentications from [Microsoft Passport](https://en.wikipedia.org/wiki/Microsoft_account) (now named Microsoft Account) | counter | None
`wmi_adfs_password_change_failed` | Total number of failed password changes. The Password Change Portal must be enabled in the AD FS Management tool in order to allow user password changes | counter | None
`wmi_adfs_password_change_succeeded` | Total number of succeeded password changes. The Password Change Portal must be enabled in the AD FS Management tool in order to allow user password changes | counter | None
`wmi_adfs_token_requests` | Total number of requested access tokens | counter | None
`wmi_adfs_windows_integrated_authentications` | Total number of Windows integrated authentications using Kerberos or NTLM | counter | None
### Example metric
Show rate of device authentications in AD FS:
```
rate(wmi_adfs_device_authentications)[2m]
```
## Useful queries
## Alerting examples
**prometheus.rules**
```yaml
- alert: "HighExtranetLockouts"
expr: "rate(wmi_adfs_extranet_account_lockouts)[2m] > 100"
for: "10m"
labels:
severity: "high"
annotations:
summary: "High number of AD FS extranet lockouts"
description: "High number of AD FS extranet lockouts may indicate a password spray attack.\n Server: {{ $labels.instance }}\n Number of lockouts: {{ $value }}"
```

View File

@@ -1,43 +1,60 @@
# cpu collector
The cpu collector exposes metrics about CPU usage
|||
-|-
Metric name prefix | `cpu`
Data source | Perflib
Counters | `ProcessorInformation` (Windows Server 2008R2 and later) `Processor` (older versions)
Enabled by default? | Yes
## Flags
None
## Metrics
These metrics are available on all versions of Windows:
Name | Description | Type | Labels
-----|-------------|------|-------
`wmi_cpu_cstate_seconds_total` | Time spent in low-power idle states | counter | `core`, `state`
`wmi_cpu_time_total` | Time that processor spent in different modes (idle, user, system, ...) | counter | `core`, `mode`
`wmi_cpu_interrupts_total` | Total number of received and serviced hardware interrupts | counter | `core`
`wmi_cpu_dpcs_total` | Total number of received and serviced deferred procedure calls (DPCs) | counter | `core`
These metrics are only exposed on Windows Server 2008R2 and later:
Name | Description | Type | Labels
-----|-------------|------|-------
`wmi_cpu_clock_interrupts_total` | Total number of received and serviced clock tick interrupts | `core`
`wmi_cpu_idle_break_events_total` | Total number of time processor was woken from idle | `core`
`wmi_cpu_parking_status` | Parking Status represents whether a processor is parked or not | `gauge`
`wmi_cpu_core_frequency_mhz` | Core frequency in megahertz | `gauge`
`wmi_cpu_processor_performance` | Processor Performance is the average performance of the processor while it is executing instructions, as a percentage of the nominal performance of the processor. On some processors, Processor Performance may exceed 100% | `gauge`
### Example metric
_This collector does not yet have explained examples, we would appreciate your help adding them!_
## Useful queries
_This collector does not yet have any useful queries added, we would appreciate your help adding them!_
## Alerting examples
_This collector does not yet have alerting examples, we would appreciate your help adding them!_
# cpu collector
The cpu collector exposes metrics about CPU usage
|||
-|-
Metric name prefix | `cpu`
Data source | Perflib
Counters | `ProcessorInformation` (Windows Server 2008R2 and later) `Processor` (older versions)
Enabled by default? | Yes
## Flags
None
## Metrics
These metrics are available on all versions of Windows:
Name | Description | Type | Labels
-----|-------------|------|-------
`wmi_cpu_cstate_seconds_total` | Time spent in low-power idle states | counter | `core`, `state`
`wmi_cpu_time_total` | Time that processor spent in different modes (idle, user, system, ...) | counter | `core`, `mode`
`wmi_cpu_interrupts_total` | Total number of received and serviced hardware interrupts | counter | `core`
`wmi_cpu_dpcs_total` | Total number of received and serviced deferred procedure calls (DPCs) | counter | `core`
These metrics are only exposed on Windows Server 2008R2 and later:
Name | Description | Type | Labels
-----|-------------|------|-------
`wmi_cpu_clock_interrupts_total` | Total number of received and serviced clock tick interrupts | `core`
`wmi_cpu_idle_break_events_total` | Total number of time processor was woken from idle | `core`
`wmi_cpu_parking_status` | Parking Status represents whether a processor is parked or not | `gauge`
`wmi_cpu_core_frequency_mhz` | Core frequency in megahertz | `gauge`
`wmi_cpu_processor_performance` | Processor Performance is the average performance of the processor while it is executing instructions, as a percentage of the nominal performance of the processor. On some processors, Processor Performance may exceed 100% | `gauge`
### Example metric
Show frequency of host CPU cores
```
wmi_cpu_core_frequency_mhz{instance="localhost"}
```
## Useful queries
Show cpu usage by mode.
```
sum by (mode) (irate(wmi_cpu_time_total{instance="localhost"}[5m]))
```
## Alerting examples
**prometheus.rules**
```
# Alert on hosts with more than 80% CPU usage over a 10 minute period
- alert: CpuUsage
expr: 100 - (avg by (instance) (irate(wmi_cpu_time_total{mode="idle"}[2m])) * 100) > 80
for: 10m
labels:
severity: warning
annotations:
summary: "CPU Usage (instance {{ $labels.instance }})"
description: "CPU Usage is more than 80%\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
```

View File

@@ -1,4 +1,4 @@
# hyperv collector
# hyperv collector
The hyperv collector exposes metrics about the Hyper-V hypervisor
@@ -16,81 +16,81 @@ None
Name | Description | Type | Labels
-----|-------------|------|-------
`wmi_hyper_health_critical` | _Not yet documented_ | counter | None
`wmi_hyper_health_ok` | _Not yet documented_ | counter | None
`wmi_hyper_vid_physical_pages_allocated` | _Not yet documented_ | counter | `vm`
`wmi_hyper_vid_preferred_numa_node_index` | _Not yet documented_ | counter | `vm`
`wmi_hyper_vid_remote_physical_pages` | _Not yet documented_ | counter | `vm`
`wmi_hyper_root_partition_address_spaces` | _Not yet documented_ | counter | None
`wmi_hyper_root_partition_attached_devices` | _Not yet documented_ | counter | None
`wmi_hyper_root_partition_deposited_pages` | _Not yet documented_ | counter | None
`wmi_hyper_root_partition_device_dma_errors` | _Not yet documented_ | counter | None
`wmi_hyper_root_partition_device_interrupt_errors` | _Not yet documented_ | counter | None
`wmi_hyper_root_partition_device_interrupt_mappings` | _Not yet documented_ | counter | None
`wmi_hyper_root_partition_device_interrupt_throttle_events` | _Not yet documented_ | counter | None
`wmi_hyper_root_partition_preferred_numa_node_index` | _Not yet documented_ | counter | None
`wmi_hyper_root_partition_gpa_space_modifications` | _Not yet documented_ | counter | None
`wmi_hyper_root_partition_io_tlb_flush_cost` | _Not yet documented_ | counter | None
`wmi_hyper_root_partition_io_tlb_flush` | _Not yet documented_ | counter | None
`wmi_hyper_root_partition_recommended_virtual_tlb_size` | _Not yet documented_ | counter | None
`wmi_hyper_root_partition_physical_pages_allocated` | _Not yet documented_ | counter | None
`wmi_hyper_root_partition_1G_device_pages` | _Not yet documented_ | counter | None
`wmi_hyper_root_partition_1G_gpa_pages` | _Not yet documented_ | counter | None
`wmi_hyper_root_partition_2M_device_pages` | _Not yet documented_ | counter | None
`wmi_hyper_root_partition_2M_gpa_pages` | _Not yet documented_ | counter | None
`wmi_hyper_root_partition_4K_device_pages` | _Not yet documented_ | counter | None
`wmi_hyper_root_partition_4K_gpa_pages` | _Not yet documented_ | counter | None
`wmi_hyper_root_partition_virtual_tlb_flush_entires` | _Not yet documented_ | counter | None
`wmi_hyper_root_partition_virtual_tlb_pages` | _Not yet documented_ | counter | None
`wmi_hyper_hypervisor_virtual_processors` | _Not yet documented_ | counter | None
`wmi_hyper_hypervisor_logical_processors` | _Not yet documented_ | counter | None
`wmi_hyper_host_cpu_guest_run_time` | _Not yet documented_ | counter | `core`
`wmi_hyper_host_cpu_hypervisor_run_time` | _Not yet documented_ | counter | `core`
`wmi_hyper_host_cpu_remote_run_time` | _Not yet documented_ | counter | `core`
`wmi_hyper_host_cpu_total_run_time` | _Not yet documented_ | counter | `core`
`wmi_hyper_vm_cpu_guest_run_time` | _Not yet documented_ | counter | `vm`, `core`
`wmi_hyper_vm_cpu_hypervisor_run_time` | _Not yet documented_ | counter | `vm`, `core`
`wmi_hyper_vm_cpu_remote_run_time` | _Not yet documented_ | counter | `vm`, `core`
`wmi_hyper_vm_cpu_total_run_time` | _Not yet documented_ | counter | `vm`, `core`
`wmi_hyper_vswitch_broadcast_packets_received_total` | _Not yet documented_ | counter | `vswitch`
`wmi_hyper_vswitch_broadcast_packets_sent_total` | _Not yet documented_ | counter | `vswitch`
`wmi_hyper_vswitch_bytes_total` | _Not yet documented_ | counter | `vswitch`
`wmi_hyper_vswitch_bytes_received_total` | _Not yet documented_ | counter | `vswitch`
`wmi_hyper_vswitch_bytes_sent_total` | _Not yet documented_ | counter | `vswitch`
`wmi_hyper_vswitch_directed_packets_received_total` | _Not yet documented_ | counter | `vswitch`
`wmi_hyper_vswitch_directed_packets_send_total` | _Not yet documented_ | counter | `vswitch`
`wmi_hyper_vswitch_dropped_packets_incoming_total` | _Not yet documented_ | counter | `vswitch`
`wmi_hyper_vswitch_dropped_packets_outcoming_total` | _Not yet documented_ | counter | `vswitch`
`wmi_hyper_vswitch_extensions_dropped_packets_incoming_total` | _Not yet documented_ | counter | `vswitch`
`wmi_hyper_vswitch_extensions_dropped_packets_outcoming_total` | _Not yet documented_ | counter | `vswitch`
`wmi_hyper_vswitch_learned_mac_addresses_total` | _Not yet documented_ | counter | `vswitch`
`wmi_hyper_vswitch_multicast_packets_received_total` | _Not yet documented_ | counter | `vswitch`
`wmi_hyper_vswitch_multicast_packets_sent_total` | _Not yet documented_ | counter | `vswitch`
`wmi_hyper_vswitch_number_of_send_channel_moves_total` | _Not yet documented_ | counter | `vswitch`
`wmi_hyper_vswitch_number_of_vmq_moves_total` | _Not yet documented_ | counter | `vswitch`
`wmi_hyper_vswitch_packets_flooded_total` | _Not yet documented_ | counter | `vswitch`
`wmi_hyper_vswitch_packets_total` | _Not yet documented_ | counter | `vswitch`
`wmi_hyper_vswitch_packets_received_total` | _Not yet documented_ | counter | `vswitch`
`wmi_hyper_vswitch_packets_sent_total` | _Not yet documented_ | counter | `vswitch`
`wmi_hyper_vswitch_purged_mac_addresses_total` | _Not yet documented_ | counter | `vswitch`
`wmi_hyper_ethernet_bytes_dropped` | _Not yet documented_ | counter | `adapter`
`wmi_hyper_ethernet_bytes_received` | _Not yet documented_ | counter | `adapter`
`wmi_hyper_ethernet_bytes_sent` | _Not yet documented_ | counter | `adapter`
`wmi_hyper_ethernet_frames_dropped` | _Not yet documented_ | counter | `adapter`
`wmi_hyper_ethernet_frames_received` | _Not yet documented_ | counter | `adapter`
`wmi_hyper_ethernet_frames_sent` | _Not yet documented_ | counter | `adapter`
`wmi_hyper_vm_device_error_count` | _Not yet documented_ | counter | `vm_device`
`wmi_hyper_vm_device_queue_length` | _Not yet documented_ | counter | `vm_device`
`wmi_hyper_vm_device_bytes_read` | _Not yet documented_ | counter | `vm_device`
`wmi_hyper_vm_device_operations_read` | _Not yet documented_ | counter | `vm_device`
`wmi_hyper_vm_device_bytes_written` | _Not yet documented_ | counter | `vm_device`
`wmi_hyper_vm_device_operations_written` | _Not yet documented_ | counter | `vm_device`
`wmi_hyper_vm_interface_bytes_received` | _Not yet documented_ | counter | `vm_interface`
`wmi_hyper_vm_interface_bytes_sent` | _Not yet documented_ | counter | `vm_interface`
`wmi_hyper_vm_interface_packets_incoming_dropped` | _Not yet documented_ | counter | `vm_interface`
`wmi_hyper_vm_interface_packets_outgoing_dropped` | _Not yet documented_ | counter | `vm_interface`
`wmi_hyper_vm_interface_packets_received` | _Not yet documented_ | counter | `vm_interface`
`wmi_hyper_vm_interface_packets_sent` | _Not yet documented_ | counter | `vm_interface`
`wmi_hyperv_health_critical` | _Not yet documented_ | counter | None
`wmi_hyperv_health_ok` | _Not yet documented_ | counter | None
`wmi_hyperv_vid_physical_pages_allocated` | _Not yet documented_ | counter | `vm`
`wmi_hyperv_vid_preferred_numa_node_index` | _Not yet documented_ | counter | `vm`
`wmi_hyperv_vid_remote_physical_pages` | _Not yet documented_ | counter | `vm`
`wmi_hyperv_root_partition_address_spaces` | _Not yet documented_ | counter | None
`wmi_hyperv_root_partition_attached_devices` | _Not yet documented_ | counter | None
`wmi_hyperv_root_partition_deposited_pages` | _Not yet documented_ | counter | None
`wmi_hyperv_root_partition_device_dma_errors` | _Not yet documented_ | counter | None
`wmi_hyperv_root_partition_device_interrupt_errors` | _Not yet documented_ | counter | None
`wmi_hyperv_root_partition_device_interrupt_mappings` | _Not yet documented_ | counter | None
`wmi_hyperv_root_partition_device_interrupt_throttle_events` | _Not yet documented_ | counter | None
`wmi_hyperv_root_partition_preferred_numa_node_index` | _Not yet documented_ | counter | None
`wmi_hyperv_root_partition_gpa_space_modifications` | _Not yet documented_ | counter | None
`wmi_hyperv_root_partition_io_tlb_flush_cost` | _Not yet documented_ | counter | None
`wmi_hyperv_root_partition_io_tlb_flush` | _Not yet documented_ | counter | None
`wmi_hyperv_root_partition_recommended_virtual_tlb_size` | _Not yet documented_ | counter | None
`wmi_hyperv_root_partition_physical_pages_allocated` | _Not yet documented_ | counter | None
`wmi_hyperv_root_partition_1G_device_pages` | _Not yet documented_ | counter | None
`wmi_hyperv_root_partition_1G_gpa_pages` | _Not yet documented_ | counter | None
`wmi_hyperv_root_partition_2M_device_pages` | _Not yet documented_ | counter | None
`wmi_hyperv_root_partition_2M_gpa_pages` | _Not yet documented_ | counter | None
`wmi_hyperv_root_partition_4K_device_pages` | _Not yet documented_ | counter | None
`wmi_hyperv_root_partition_4K_gpa_pages` | _Not yet documented_ | counter | None
`wmi_hyperv_root_partition_virtual_tlb_flush_entires` | _Not yet documented_ | counter | None
`wmi_hyperv_root_partition_virtual_tlb_pages` | _Not yet documented_ | counter | None
`wmi_hyperv_hypervisor_virtual_processors` | _Not yet documented_ | counter | None
`wmi_hyperv_hypervisor_logical_processors` | _Not yet documented_ | counter | None
`wmi_hyperv_host_cpu_guest_run_time` | _Not yet documented_ | counter | `core`
`wmi_hyperv_host_cpu_hypervisor_run_time` | _Not yet documented_ | counter | `core`
`wmi_hyperv_host_cpu_remote_run_time` | _Not yet documented_ | counter | `core`
`wmi_hyperv_host_cpu_total_run_time` | _Not yet documented_ | counter | `core`
`wmi_hyperv_vm_cpu_guest_run_time` | _Not yet documented_ | counter | `vm`, `core`
`wmi_hyperv_vm_cpu_hypervisor_run_time` | _Not yet documented_ | counter | `vm`, `core`
`wmi_hyperv_vm_cpu_remote_run_time` | _Not yet documented_ | counter | `vm`, `core`
`wmi_hyperv_vm_cpu_total_run_time` | _Not yet documented_ | counter | `vm`, `core`
`wmi_hyperv_vswitch_broadcast_packets_received_total` | _Not yet documented_ | counter | `vswitch`
`wmi_hyperv_vswitch_broadcast_packets_sent_total` | _Not yet documented_ | counter | `vswitch`
`wmi_hyperv_vswitch_bytes_total` | _Not yet documented_ | counter | `vswitch`
`wmi_hyperv_vswitch_bytes_received_total` | _Not yet documented_ | counter | `vswitch`
`wmi_hyperv_vswitch_bytes_sent_total` | _Not yet documented_ | counter | `vswitch`
`wmi_hyperv_vswitch_directed_packets_received_total` | _Not yet documented_ | counter | `vswitch`
`wmi_hyperv_vswitch_directed_packets_send_total` | _Not yet documented_ | counter | `vswitch`
`wmi_hyperv_vswitch_dropped_packets_incoming_total` | _Not yet documented_ | counter | `vswitch`
`wmi_hyperv_vswitch_dropped_packets_outcoming_total` | _Not yet documented_ | counter | `vswitch`
`wmi_hyperv_vswitch_extensions_dropped_packets_incoming_total` | _Not yet documented_ | counter | `vswitch`
`wmi_hyperv_vswitch_extensions_dropped_packets_outcoming_total` | _Not yet documented_ | counter | `vswitch`
`wmi_hyperv_vswitch_learned_mac_addresses_total` | _Not yet documented_ | counter | `vswitch`
`wmi_hyperv_vswitch_multicast_packets_received_total` | _Not yet documented_ | counter | `vswitch`
`wmi_hyperv_vswitch_multicast_packets_sent_total` | _Not yet documented_ | counter | `vswitch`
`wmi_hyperv_vswitch_number_of_send_channel_moves_total` | _Not yet documented_ | counter | `vswitch`
`wmi_hyperv_vswitch_number_of_vmq_moves_total` | _Not yet documented_ | counter | `vswitch`
`wmi_hyperv_vswitch_packets_flooded_total` | _Not yet documented_ | counter | `vswitch`
`wmi_hyperv_vswitch_packets_total` | _Not yet documented_ | counter | `vswitch`
`wmi_hyperv_vswitch_packets_received_total` | _Not yet documented_ | counter | `vswitch`
`wmi_hyperv_vswitch_packets_sent_total` | _Not yet documented_ | counter | `vswitch`
`wmi_hyperv_vswitch_purged_mac_addresses_total` | _Not yet documented_ | counter | `vswitch`
`wmi_hyperv_ethernet_bytes_dropped` | _Not yet documented_ | counter | `adapter`
`wmi_hyperv_ethernet_bytes_received` | _Not yet documented_ | counter | `adapter`
`wmi_hyperv_ethernet_bytes_sent` | _Not yet documented_ | counter | `adapter`
`wmi_hyperv_ethernet_frames_dropped` | _Not yet documented_ | counter | `adapter`
`wmi_hyperv_ethernet_frames_received` | _Not yet documented_ | counter | `adapter`
`wmi_hyperv_ethernet_frames_sent` | _Not yet documented_ | counter | `adapter`
`wmi_hyperv_vm_device_error_count` | _Not yet documented_ | counter | `vm_device`
`wmi_hyperv_vm_device_queue_length` | _Not yet documented_ | counter | `vm_device`
`wmi_hyperv_vm_device_bytes_read` | _Not yet documented_ | counter | `vm_device`
`wmi_hyperv_vm_device_operations_read` | _Not yet documented_ | counter | `vm_device`
`wmi_hyperv_vm_device_bytes_written` | _Not yet documented_ | counter | `vm_device`
`wmi_hyperv_vm_device_operations_written` | _Not yet documented_ | counter | `vm_device`
`wmi_hyperv_vm_interface_bytes_received` | _Not yet documented_ | counter | `vm_interface`
`wmi_hyperv_vm_interface_bytes_sent` | _Not yet documented_ | counter | `vm_interface`
`wmi_hyperv_vm_interface_packets_incoming_dropped` | _Not yet documented_ | counter | `vm_interface`
`wmi_hyperv_vm_interface_packets_outgoing_dropped` | _Not yet documented_ | counter | `vm_interface`
`wmi_hyperv_vm_interface_packets_received` | _Not yet documented_ | counter | `vm_interface`
`wmi_hyperv_vm_interface_packets_sent` | _Not yet documented_ | counter | `vm_interface`
### Example metric
_This collector does not yet have explained examples, we would appreciate your help adding them!_

View File

@@ -1,44 +1,76 @@
# logical_disk collector
The logical_disk collector exposes metrics about logical disks (in contrast to physical disks)
|||
-|-
Metric name prefix | `logical_disk`
Classes | [`Win32_PerfRawData_PerfDisk_LogicalDisk`](https://msdn.microsoft.com/en-us/windows/hardware/aa394307(v=vs.71))
Enabled by default? | Yes
## Flags
### `--collector.logical_disk.volume-whitelist`
If given, a disk needs to match the whitelist regexp in order for the corresponding disk metrics to be reported
### `--collector.logical_disk.volume-blacklist`
If given, a disk needs to *not* match the blacklist regexp in order for the corresponding disk metrics to be reported
## Metrics
Name | Description | Type | Labels
-----|-------------|------|-------
`requests_queued` | _Not yet documented_ | gauge | `volume`
`read_bytes_total` | _Not yet documented_ | counter | `volume`
`reads_total` | _Not yet documented_ | counter | `volume`
`write_bytes_total` | _Not yet documented_ | counter | `volume`
`writes_total` | _Not yet documented_ | counter | `volume`
`read_seconds_total` | _Not yet documented_ | counter | `volume`
`write_seconds_total` | _Not yet documented_ | counter | `volume`
`free_bytes` | _Not yet documented_ | gauge | `volume`
`size_bytes` | _Not yet documented_ | gauge | `volume`
`idle_seconds_total` | _Not yet documented_ | counter | `volume`
`split_ios_total` | _Not yet documented_ | counter | `volume`
### Example metric
_This collector does not yet have explained examples, we would appreciate your help adding them!_
## Useful queries
_This collector does not yet have any useful queries added, we would appreciate your help adding them!_
## Alerting examples
_This collector does not yet have alerting examples, we would appreciate your help adding them!_
# logical_disk collector
The logical_disk collector exposes metrics about logical disks (in contrast to physical disks)
|||
-|-
Metric name prefix | `logical_disk`
Data source | Perflib
Counters | `LogicalDisk` ([`Win32_PerfRawData_PerfDisk_LogicalDisk`](https://msdn.microsoft.com/en-us/windows/hardware/aa394307(v=vs.71)))
Enabled by default? | Yes
## Flags
### `--collector.logical_disk.volume-whitelist`
If given, a disk needs to match the whitelist regexp in order for the corresponding disk metrics to be reported
### `--collector.logical_disk.volume-blacklist`
If given, a disk needs to *not* match the blacklist regexp in order for the corresponding disk metrics to be reported
## Metrics
Name | Description | Type | Labels
-----|-------------|------|-------
`requests_queued` | Number of requests outstanding on the disk at the time the performance data is collected | gauge | `volume`
`read_bytes_total` | Rate at which bytes are transferred from the disk during read operations | counter | `volume`
`reads_total` | Rate of read operations on the disk | counter | `volume`
`write_bytes_total` | Rate at which bytes are transferred to the disk during write operations | counter | `volume`
`writes_total` | Rate of write operations on the disk | counter | `volume`
`read_seconds_total` | Seconds the disk was busy servicing read requests | counter | `volume`
`write_seconds_total` | Seconds the disk was busy servicing write requests | counter | `volume`
`free_bytes` | Unused space of the disk in bytes | gauge | `volume`
`size_bytes` | Total size of the disk in bytes | gauge | `volume`
`idle_seconds_total` | Seconds the disk was idle (not servicing read/write requests) | counter | `volume`
`split_ios_total` | Number of I/Os to the disk split into multiple I/Os | counter | `volume`
### Example metric
Query the rate of write operations to a disk
```
rate(wmi_logical_disk_read_bytes_total{instance="localhost", volume=~"C:"}[2m])
```
## Useful queries
Calculate rate of total IOPS for disk
```
rate(wmi_logical_disk_reads_total{instance="localhost", volume="C:"}[2m]) + rate(wmi_logical_disk_writes_total{instance="localhost", volume="C:"}[2m])
```
## Alerting examples
**prometheus.rules**
```
groups:
- name: Windows Disk Alerts
rules:
# Sends an alert when disk space usage is above 95%
- alert: DiskSpaceUsage
expr: 100.0 - 100 * (wmi_logical_disk_free_bytes / wmi_logical_disk_size_bytes) > 95
for: 10m
labels:
severity: high
annotations:
summary: "Disk Space Usage (instance {{ $labels.instance }})"
description: "Disk Space on Drive is used more than 95%\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
# Alerts on disks with over 85% space usage predicted to fill within the next four days
- alert: DiskFilling
expr: 100 * (wmi_logical_disk_free_bytes / wmi_logical_disk_size_bytes) < 15 and predict_linear(wmi_logical_disk_free_bytes[6h], 4 * 24 * 3600) < 0
for: 10m
labels:
severity: warning
annotations:
summary: "Disk full in four days (instance {{ $labels.instance }})"
description: "{{ $labels.volume }} is expected to fill up within four days. Currently {{ $value | humanize }}% is available.\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
```

34
docs/collector.logon.md Normal file
View File

@@ -0,0 +1,34 @@
# logon collector
The logon collector exposes metrics detailing the active user logon sessions.
|||
-|-
Metric name prefix | `logon`
Classes | [`Win32_LogonSession`](https://docs.microsoft.com/en-us/windows/win32/cimwin32prov/win32-logonsession)
Enabled by default? | No
## Flags
None
## Metrics
Name | Description | Type | Labels
-----|-------------|------|-------
`wmi_logon_logon_type` | Number of active user logon sessions | gauge | status
### Example metric
Query the total number of interactive logon sessions
```
wmi_logon_logon_type{status="interactive"}
```
## Useful queries
Query the total number of local and remote (I.E. Terminal Services) interactive sessions.
```
wmi_logon_logon_type{status=~"interactive|remoteinteractive"}
```
## Alerting examples
_This collector does not yet have alerting examples, we would appreciate your help adding them!_

View File

@@ -5,6 +5,7 @@ The memory collector exposes metrics about system memory usage
|||
-|-
Metric name prefix | `memory`
Data source | Perflib
Classes | `Win32_PerfRawData_PerfOS_Memory`
Enabled by default? | Yes
@@ -19,25 +20,25 @@ Name | Description | Type | Labels
`wmi_cs_logical_processors` | Number of installed logical processors | gauge | None
`wmi_cs_physical_memory_bytes` | Total installed physical memory | gauge | None
`wmi_memory_available_bytes` | The amount of physical memory immediately available for allocation to a process or for system use. It is equal to the sum of memory assigned to the standby (cached), free and zero page lists | gauge | None
`wmi_memory_cache_bytes` | _Not yet documented_ | gauge | None
`wmi_memory_cache_bytes_peak` | _Not yet documented_ | gauge | None
`wmi_memory_cache_faults_total` | _Not yet documented_ | gauge | None
`wmi_memory_commit_limit` | _Not yet documented_ | gauge | None
`wmi_memory_committed_bytes` | _Not yet documented_ | gauge | None
`wmi_memory_cache_bytes` | Number of bytes currently being used by the file system cache | gauge | None
`wmi_memory_cache_bytes_peak` | Maximum number of CacheBytes after the system was last restarted | gauge | None
`wmi_memory_cache_faults_total` | Number of faults which occur when a page sought in the file system cache is not found there and must be retrieved from elsewhere in memory (soft fault) or from disk (hard fault) | gauge | None
`wmi_memory_commit_limit` | Amount of virtual memory, in bytes, that can be committed without having to extend the paging file(s) | gauge | None
`wmi_memory_committed_bytes` | Amount of committed virtual memory, in bytes | gauge | None
`wmi_memory_demand_zero_faults_total` | The number of zeroed pages required to satisfy faults. Zeroed pages, pages emptied of previously stored data and filled with zeros, are a security feature of Windows that prevent processes from seeing data stored by earlier processes that used the memory space | gauge | None
`wmi_memory_free_and_zero_page_list_bytes` | _Not yet documented_ | gauge | None
`wmi_memory_free_system_page_table_entries` | _Not yet documented_ | gauge | None
`wmi_memory_free_system_page_table_entries` | Number of page table entries not being used by the system | gauge | None
`wmi_memory_modified_page_list_bytes` | _Not yet documented_ | gauge | None
`wmi_memory_page_faults_total` | _Not yet documented_ | gauge | None
`wmi_memory_page_faults_total` | Overall rate at which faulted pages are handled by the processor | gauge | None
`wmi_memory_swap_page_reads_total` | Number of disk page reads (a single read operation reading several pages is still only counted once) | gauge | None
`wmi_memory_swap_pages_read_total` | Number of pages read across all page reads (ie counting all pages read even if they are read in a single operation) | gauge | None
`wmi_memory_swap_pages_written_total` | Number of pages written across all page writes (ie counting all pages written even if they are written in a single operation) | gauge | None
`wmi_memory_swap_page_operations_total` | Total number of swap page read and writes (PagesPersec) | gauge | None
`wmi_memory_swap_page_writes_total` | Number of disk page writes (a single write operation writing several pages is still only counted once) | gauge | None
`wmi_memory_pool_nonpaged_allocs_total` | The number of calls to allocate space in the nonpaged pool. The nonpaged pool is an area of system memory area for objects that cannot be written to disk, and must remain in physical memory as long as they are allocated | gauge | None
`wmi_memory_pool_nonpaged_bytes_total` | _Not yet documented_ | gauge | None
`wmi_memory_pool_paged_allocs_total` | _Not yet documented_ | gauge | None
`wmi_memory_pool_paged_bytes` | _Not yet documented_ | gauge | None
`wmi_memory_pool_nonpaged_bytes_total` | Number of bytes in the non-paged pool | gauge | None
`wmi_memory_pool_paged_allocs_total` | Number of calls to allocate space in the paged pool, regardless of the amount of space allocated in each call | gauge | None
`wmi_memory_pool_paged_bytes` | Number of bytes in the paged pool | gauge | None
`wmi_memory_pool_paged_resident_bytes` | _Not yet documented_ | gauge | None
`wmi_memory_standby_cache_core_bytes` | _Not yet documented_ | gauge | None
`wmi_memory_standby_cache_normal_priority_bytes` | _Not yet documented_ | gauge | None

View File

@@ -5,14 +5,14 @@ The mssql collector exposes metrics about the MSSQL server
|||
-|-
Metric name prefix | `mssql`
Classes | [`Win32_PerfRawData_MSSQLSERVER_SQLServerAccessMethods`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-access-methods-object)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerAvailabilityReplica`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-availability-replica)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerBufferManager`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-buffer-manager-object)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerDatabaseReplica`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-database-replica)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerDatabases`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-databases-object?view=sql-server-2017)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerGeneralStatistics`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-general-statistics-object)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerLocks`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-locks-object)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerMemoryManager`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-memory-manager-object)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerSQLStatistics`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-sql-statistics-object)
Classes | [`Win32_PerfRawData_MSSQLSERVER_SQLServerAccessMethods`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-access-methods-object)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerAvailabilityReplica`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-availability-replica)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerBufferManager`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-buffer-manager-object)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerDatabaseReplica`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-database-replica)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerDatabases`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-databases-object?view=sql-server-2017)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerGeneralStatistics`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-general-statistics-object)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerLocks`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-locks-object)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerMemoryManager`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-memory-manager-object)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerSQLStatistics`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-sql-statistics-object)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerSQLErrors`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-sql-errors-object)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerTransactions`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-transactions-object)
Enabled by default? | No
## Flags
### `--collectors.mssql.classes-enabled`
Comma-separated list of MSSQL WMI classes to use. Supported values are `accessmethods`, `availreplica`, `bufman`, `databases`, `dbreplica`, `genstats`, `locks`, `memmgr` and `sqlstats`.
Comma-separated list of MSSQL WMI classes to use. Supported values are `accessmethods`, `availreplica`, `bufman`, `databases`, `dbreplica`, `genstats`, `locks`, `memmgr`, `sqlstats`, `sqlerrors` and `transactions`.
### `--collectors.mssql.class-print`
@@ -230,6 +230,20 @@ Name | Description | Type | Labels
`wmi_mssql_sqlstats_sql_compilations` | _Not yet documented_ | counter | `instance`
`wmi_mssql_sqlstats_sql_recompilations` | _Not yet documented_ | counter | `instance`
`wmi_mssql_sqlstats_unsafe_auto_parameterization_attempts` | _Not yet documented_ | counter | `instance`
`wmi_mssql_sql_errors_total` | _Not yet documented_ | counter | `instance`, `resource`
`wmi_mssql_transactions_tempdb_free_space_bytes` | _Not yet documented_ | gauge | `instance`
`wmi_mssql_transactions_longest_transaction_running_seconds` | _Not yet documented_ | gauge | `instance`
`wmi_mssql_transactions_nonsnapshot_version_active_total` | _Not yet documented_ | counter | `instance`
`wmi_mssql_transactions_snapshot_active_total` | _Not yet documented_ | counter | `instance`
`wmi_mssql_transactions_active_total` | _Not yet documented_ | counter | `instance`
`wmi_mssql_transactions_update_conflicts_total` | _Not yet documented_ | counter | `instance`
`wmi_mssql_transactions_update_snapshot_active_total` | _Not yet documented_ | counter | `instance`
`wmi_mssql_transactions_version_cleanup_rate_bytes` | _Not yet documented_ | gauge | `instance`
`wmi_mssql_transactions_version_generation_rate_bytes` | _Not yet documented_ | gauge | `instance`
`wmi_mssql_transactions_version_store_size_bytes` | _Not yet documented_ | gauge | `instance`
`wmi_mssql_transactions_version_store_units` | _Not yet documented_ | counter | `instance`
`wmi_mssql_transactions_version_store_creation_units` | _Not yet documented_ | counter | `instance`
`wmi_mssql_transactions_version_store_truncation_units` | _Not yet documented_ | counter | `instance`
### Example metric
_This collector does not yet have explained examples, we would appreciate your help adding them!_

View File

@@ -5,6 +5,7 @@ The net collector exposes metrics about network interfaces
|||
-|-
Metric name prefix | `net`
Data source | Perflib
Classes | [`Win32_PerfRawData_Tcpip_NetworkInterface`](https://technet.microsoft.com/en-us/security/aa394340(v=vs.80))
Enabled by default? | Yes
@@ -22,24 +23,40 @@ If given, an interface name needs to *not* match the blacklist regexp in order f
Name | Description | Type | Labels
-----|-------------|------|-------
`wmi_net_bytes_received_total` | _Not yet documented_ | counter | `nic`
`wmi_net_bytes_sent_total` | _Not yet documented_ | counter | `nic`
`wmi_net_bytes_total` | _Not yet documented_ | counter | `nic`
`wmi_net_packets_outbound_discarded` | _Not yet documented_ | counter | `nic`
`wmi_net_packets_outbound_errors` | _Not yet documented_ | counter | `nic`
`wmi_net_packets_received_discarded` | _Not yet documented_ | counter | `nic`
`wmi_net_packets_received_errors` | _Not yet documented_ | counter | `nic`
`wmi_net_packets_received_total` | _Not yet documented_ | counter | `nic`
`wmi_net_packets_received_unknown` | _Not yet documented_ | counter | `nic`
`wmi_net_packets_total` | _Not yet documented_ | counter | `nic`
`wmi_net_packets_sent_total` | _Not yet documented_ | counter | `nic`
`wmi_net_current_bandwidth` | _Not yet documented_ | counter | `nic`
`wmi_net_bytes_received_total` | Total bytes received by interface | counter | `nic`
`wmi_net_bytes_sent_total` | Total bytes transmitted by interface | counter | `nic`
`wmi_net_bytes_total` | Total bytes received and transmitted by interface | counter | `nic`
`wmi_net_packets_outbound_discarded` | Total outbound packets that were chosen to be discarded even though no errors had been detected to prevent transmission | counter | `nic`
`wmi_net_packets_outbound_errors` | Total packets that could not be transmitted due to errors | counter | `nic`
`wmi_net_packets_received_discarded` | Total inbound packets that were chosen to be discarded even though no errors had been detected to prevent delivery | counter | `nic`
`wmi_net_packets_received_errors` | Total packets that could not be received due to errors | counter | `nic`
`wmi_net_packets_received_total` | Total packets received by interface | counter | `nic`
`wmi_net_packets_received_unknown` | Total packets received by interface that were discarded because of an unknown or unsupported protocol | counter | `nic`
`wmi_net_packets_total` | Total packets received and transmitted by interface | counter | `nic`
`wmi_net_packets_sent_total` | Total packets transmitted by interface | counter | `nic`
`wmi_net_current_bandwidth` | Estimate of the interface's current bandwidth in bits per second (bps) | gauge | `nic`
### Example metric
_This collector does not yet have explained examples, we would appreciate your help adding them!_
Query the rate of transmitted network traffic
```
rate(wmi_net_bytes_sent_total{instance="localhost"}[2m])
```
## Useful queries
_This collector does not yet have any useful queries added, we would appreciate your help adding them!_
Get total utilisation of network interface as a percentage
```
rate(wmi_net_bytes_total{instance="localhost", nic="Microsoft_Hyper_V_Network_Adapter__1"}[2m]) * 8 / wmi_net_current_bandwidth{instance="locahost", nic="Microsoft_Hyper_V_Network_Adapter__1"} * 100
```
## Alerting examples
_This collector does not yet have alerting examples, we would appreciate your help adding them!_
**prometheus.rules**
```
- alert: NetInterfaceUsage
expr: rate(wmi_net_bytes_total[2m]) * 8 / wmi_net_current_bandwidth * 100 > 90
for: 10m
labels:
severity: high
annotations:
summary: "Network Interface Usage (instance {{ $labels.instance }})"
description: "Network traffic usage is greater than 95% for interface {{ $labels.nic }}\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
```

View File

@@ -16,24 +16,50 @@ None
Name | Description | Type | Labels
-----|-------------|------|-------
`wmi_os_paging_limit_bytes` | _Not yet documented_ | gauge | None
`wmi_os_paging_free_bytes` | _Not yet documented_ | gauge | None
`wmi_os_physical_memory_free_bytes` | _Not yet documented_ | gauge | None
`wmi_os_time` | _Not yet documented_ | gauge | None
`wmi_os_timezone` | _Not yet documented_ | gauge | `timezone`
`wmi_os_processes` | _Not yet documented_ | gauge | None
`wmi_os_processes_limit` | _Not yet documented_ | gauge | None
`wmi_os_process_memory_limix_bytes` | _Not yet documented_ | gauge | None
`wmi_os_users` | _Not yet documented_ | gauge | None
`wmi_os_virtual_memory_bytes` | _Not yet documented_ | gauge | None
`wmi_os_visible_memory_bytes` | _Not yet documented_ | gauge | None
`wmi_os_virtual_memory_free_bytes` | _Not yet documented_ | gauge | None
`wmi_os_paging_limit_bytes` | Total number of bytes that can be sotred in the operating system paging files. 0 (zero) indicates that there are no paging files | gauge | None
`wmi_os_paging_free_bytes` | Number of bytes that can be mapped into the operating system paging files without causing any other pages to be swapped out | gauge | None
`wmi_os_physical_memory_free_bytes` | Bytes of physical memory currently unused and available | gauge | None
`wmi_os_time` | Current time as reported by the operating system, in [Unix time](https://en.wikipedia.org/wiki/Unix_time). See [time.Unix()](https://golang.org/pkg/time/#Unix) for details | gauge | None
`wmi_os_timezone` | Current timezone as reported by the operating system. See [time.Zone()](https://golang.org/pkg/time/#Time.Zone) for details | gauge | `timezone`
`wmi_os_processes` | Number of process contexts currently loaded or running on the operating system | gauge | None
`wmi_os_processes_limit` | Maximum number of process contexts the operating system can support. The default value set by the provider is 4294967295 (0xFFFFFFFF) | gauge | None
`wmi_os_process_memory_limit_bytes` | Maximum number of bytes of memory that can be allocated to a process | gauge | None
`wmi_os_users` | Number of user sessions for which the operating system is storing state information currently. For a list of current active logon sessions, see [`logon`](collector.logon.md) | gauge | None
`wmi_os_virtual_memory_bytes` | Bytes of virtual memory | gauge | None
`wmi_os_visible_memory_bytes` | Total bytes of physical memory available to the operating system. This value does not necessarily indicate the true amount of physical memory, but what is reported to the operating system as available to it | gauge | None
`wmi_os_virtual_memory_free_bytes` | Bytes of virtual memory currently unused and available | gauge | None
### Example metric
_This collector does not yet have explained examples, we would appreciate your help adding them!_
Show current number of processes
```
wmi_os_processes{instance="localhost"}
```
## Useful queries
_This collector does not yet have any useful queries added, we would appreciate your help adding them!_
Find all devices not set to UTC timezone
```
wmi_os_timezone{timezone != "UTC"}
```
## Alerting examples
_This collector does not yet have alerting examples, we would appreciate your help adding them!_
**prometheus.rules**
```
# Alert on hosts that have exhausted all available physical memory
- alert: MemoryExhausted
expr: wmi_os_physical_memory_free_bytes == 0
for: 10m
labels:
severity: high
annotations:
summary: "Host {{ $labels.instance }} is out of memory"
description: "{{ $labels.instance }} has exhausted all available physical memory"
# Alert on hosts with greater than 90% memory usage
- alert: MemoryLow
expr: 100 - 100 * wmi_os_physical_memory_free_bytes / wmi_cs_physical_memory_bytes > 90
for: 10m
labels:
severity: warning
annotations:
summary: "Memory usage for host {{ $labels.instance }} is greater than 90%"
```

View File

@@ -66,10 +66,42 @@ A service can have any of the following statuses:
Note that there is some overlap with service state.
### Example metric
_This collector does not yet have explained examples, we would appreciate your help adding them!_
Lists the services that have a 'disabled' start mode.
```
wmi_service_start_mode{exported_name=~"(mssqlserver|sqlserveragent)",start_mode="disabled"}
```
## Useful queries
_This collector does not yet have any useful queries added, we would appreciate your help adding them!_
Counts the number of Microsoft SQL Server/Agent Processes
```
count(wmi_service_state{exported_name=~"(sqlserveragent|mssqlserver)",state="running"})
```
## Alerting examples
_This collector does not yet have alerting examples, we would appreciate your help adding them!_
**prometheus.rules**
```
groups:
- name: Microsoft SQL Server Alerts
rules:
# Sends an alert when the 'sqlserveragent' service is not in the running state for 3 minutes.
- alert: SQL Server Agent DOWN
expr: wmi_service_state{instance="SQL",exported_name="sqlserveragent",state="running"} == 0
for: 3m
labels:
severity: high
annotations:
summary: "Service {{ $labels.exported_name }} down"
description: "Service {{ $labels.exported_name }} on instance {{ $labels.instance }} has been down for more than 3 minutes."
# Sends an alert when the 'mssqlserver' service is not in the running state for 3 minutes.
- alert: SQL Server DOWN
expr: wmi_service_state{instance="SQL",exported_name="mssqlserver",state="running"} == 0
for: 3m
labels:
severity: high
annotations:
summary: "Service {{ $labels.exported_name }} down"
description: "Service {{ $labels.exported_name }} on instance {{ $labels.instance }} has been down for more than 3 minutes."
```
In this example, `instance` is the target label of the host. So each alert will be processed per host, which is then used in the alert description.

View File

@@ -5,6 +5,7 @@ The system collector exposes metrics about ...
|||
-|-
Metric name prefix | `system`
Data source | Perflib
Classes | [`Win32_PerfRawData_PerfOS_System`](https://web.archive.org/web/20050830140516/http://msdn.microsoft.com/library/en-us/wmisdk/wmi/win32_perfrawdata_perfos_system.asp)
Enabled by default? | Yes
@@ -16,18 +17,24 @@ None
Name | Description | Type | Labels
-----|-------------|------|-------
`wmi_system_context_switches_total` | _Not yet documented_ | counter | None
`wmi_system_exception_dispatches_total` | _Not yet documented_ | counter | None
`wmi_system_processor_queue_length` | _Not yet documented_ | gauge | None
`wmi_system_system_calls_total` | _Not yet documented_ | counter | None
`wmi_system_system_up_time` | _Not yet documented_ | gauge | None
`wmi_system_threads` | _Not yet documented_ | gauge | None
`wmi_system_context_switches_total` | Total number of [context switches](https://en.wikipedia.org/wiki/Context_switch) | counter | None
`wmi_system_exception_dispatches_total` | Total exceptions dispatched by the system | counter | None
`wmi_system_processor_queue_length` | Number of threads in the processor queue. There is a single queue for processor time even on computers with multiple processors. | gauge | None
`wmi_system_system_calls_total` | Total combined calls to Windows NT system service routines by all processes running on the computer | counter | None
`wmi_system_system_up_time` | Time of last boot of system | gauge | None
`wmi_system_threads` | Number of Windows system [threads](https://en.wikipedia.org/wiki/Thread_(computing)) | gauge | None
### Example metric
_This collector does not yet have explained examples, we would appreciate your help adding them!_
Show current number of system threads
```
wmi_system_threads{instance="localhost"}
```
## Useful queries
_This collector does not yet have any useful queries added, we would appreciate your help adding them!_
Find hosts that have rebooted in the last 24 hours
```
time() - wmi_system_system_up_time < 86400
```
## Alerting examples
_This collector does not yet have alerting examples, we would appreciate your help adding them!_

View File

@@ -16,15 +16,15 @@ None
Name | Description | Type | Labels
-----|-------------|------|-------
`wmi_tcp_connection_failures` | _Not yet documented_ | counter | None
`wmi_tcp_connections_active` | _Not yet documented_ | counter | None
`wmi_tcp_connections_established` | _Not yet documented_ | counter | None
`wmi_tcp_connections_passive` | _Not yet documented_ | counter | None
`wmi_tcp_connections_reset` | _Not yet documented_ | counter | None
`wmi_tcp_segments_total` | _Not yet documented_ | counter | None
`wmi_tcp_segments_received_total` | _Not yet documented_ | counter | None
`wmi_tcp_segments_retransmitted_total` | _Not yet documented_ | counter | None
`wmi_tcp_segments_sent_total` | _Not yet documented_ | counter | None
`wmi_tcp_connection_failures` | Number of times TCP connections have made a direct transition to the CLOSED state from the SYN-SENT state or the SYN-RCVD state, plus the number of times TCP connections have made a direct transition from the SYN-RCVD state to the LISTEN state | counter | None
`wmi_tcp_connections_active` | Number of times TCP connections have made a direct transition from the CLOSED state to the SYN-SENT state.| counter | None
`wmi_tcp_connections_established` | Number of TCP connections for which the current state is either ESTABLISHED or CLOSE-WAIT. | counter | None
`wmi_tcp_connections_passive` | Number of times TCP connections have made a direct transition from the LISTEN state to the SYN-RCVD state. | counter | None
`wmi_tcp_connections_reset` | Number of times TCP connections have made a direct transition from the LISTEN state to the SYN-RCVD state. | counter | None
`wmi_tcp_segments_total` | Total segments sent or received using the TCP protocol | counter | None
`wmi_tcp_segments_received_total` | Total segments received, including those received in error. This count includes segments received on currently established connections | counter | None
`wmi_tcp_segments_retransmitted_total` | Total segments retransmitted. That is, segments transmitted that contain one or more previously transmitted bytes | counter | None
`wmi_tcp_segments_sent_total` | Total segments sent, including those on current connections, but excluding those containing *only* retransmitted bytes | counter | None
### Example metric
_This collector does not yet have explained examples, we would appreciate your help adding them!_

View File

@@ -12,7 +12,7 @@ Enabled by default? | Yes
### `--collector.textfile.directory`
The directory containing the files to be ingested. Only files with the extension `.prom` are read.
The directory containing the files to be ingested. Only files with the extension `.prom` are read. The `.prom` file must end with an empty line feed to work properly.
Default value: `C:\Program Files\wmi_exporter\textfile_inputs`

View File

@@ -0,0 +1,32 @@
# thermalzone collector
The thermalzone collector exposes metrics about system temps. Note that temperature is given in Kelvin
|||
-|-
Metric name prefix | `thermalzone`
Classes | [`Win32_PerfRawData_Counters_ThermalZoneInformation`](https://wutils.com/wmi/root/cimv2/win32_perfrawdata_counters_thermalzoneinformation/#temperature_properties)
Enabled by default? | No
## Flags
None
## Metrics
Name | Description | Type | Labels
-----|-------------|------|-------
`wmi_thermalzone_percent_passive_limit` | % Passive Limit is the current limit this thermal zone is placing on the devices it controls. A limit of 100% indicates the devices are unconstrained. A limit of 0% indicates the devices are fully constrained. | gauge | None
`wmi_thermalzone_temperature_celsius ` | Temperature of the thermal zone, in degrees Celsius. | gauge | None
`wmi_thermalzone_throttle_reasons ` | Throttle Reasons indicate reasons why the thermal zone is limiting performance of the devices it controls. 0x0 The zone is not throttled. 0x1 The zone is throttled for thermal reasons. 0x2 The zone is throttled to limit electrical current. | gauge | None
[`Throttle reasons` source](https://docs.microsoft.com/en-us/windows-hardware/design/device-experiences/examples--requirements-and-diagnostics)
### Example metric
_This collector does not yet have explained examples, we would appreciate your help adding them!_
## Useful queries
_This collector does not yet have any useful queries added, we would appreciate your help adding them!_
## Alerting examples
_This collector does not yet have alerting examples, we would appreciate your help adding them!_

View File

@@ -5,7 +5,10 @@ package main
import (
"fmt"
"net/http"
_ "net/http/pprof"
"os"
"sort"
"strconv"
"strings"
"sync"
"time"
@@ -52,6 +55,12 @@ var (
[]string{"collector"},
nil,
)
snapshotDuration = prometheus.NewDesc(
prometheus.BuildFQName(collector.Namespace, "exporter", "perflib_snapshot_duration_seconds"),
"Duration of perflib snapshot capture",
nil,
nil,
)
// This can be removed when client_golang exposes this on Windows
// (See https://github.com/prometheus/client_golang/issues/376)
@@ -71,84 +80,113 @@ func (coll WmiCollector) Describe(ch chan<- *prometheus.Desc) {
ch <- scrapeSuccessDesc
}
type collectorOutcome int
const (
pending collectorOutcome = iota
success
failed
)
// Collect sends the collected metrics from each of the collectors to
// prometheus.
func (coll WmiCollector) Collect(ch chan<- prometheus.Metric) {
scrapeContext, err := collector.PrepareScrapeContext()
if err != nil {
ch <- prometheus.NewInvalidMetric(scrapeSuccessDesc, fmt.Errorf("failed to prepare scrape: %v", err))
return
}
remainingCollectors := make(map[string]bool)
for name := range coll.collectors {
remainingCollectors[name] = true
}
metricsBuffer := make(chan prometheus.Metric)
allDone := make(chan struct{})
stopped := false
go func() {
for {
select {
case m := <-metricsBuffer:
if !stopped {
ch <- m
}
case <-allDone:
return
}
}
}()
wg := sync.WaitGroup{}
wg.Add(len(coll.collectors))
go func() {
wg.Wait()
close(allDone)
close(metricsBuffer)
}()
for name, c := range coll.collectors {
go func(name string, c collector.Collector) {
execute(name, c, scrapeContext, metricsBuffer)
wg.Done()
delete(remainingCollectors, name)
}(name, c)
}
ch <- prometheus.MustNewConstMetric(
startTimeDesc,
prometheus.CounterValue,
startTime,
)
t := time.Now()
scrapeContext, err := collector.PrepareScrapeContext()
ch <- prometheus.MustNewConstMetric(
snapshotDuration,
prometheus.GaugeValue,
time.Since(t).Seconds(),
)
if err != nil {
ch <- prometheus.NewInvalidMetric(scrapeSuccessDesc, fmt.Errorf("failed to prepare scrape: %v", err))
return
}
wg := sync.WaitGroup{}
wg.Add(len(coll.collectors))
collectorOutcomes := make(map[string]collectorOutcome)
for name := range coll.collectors {
collectorOutcomes[name] = pending
}
metricsBuffer := make(chan prometheus.Metric)
l := sync.Mutex{}
finished := false
go func() {
for m := range metricsBuffer {
l.Lock()
if !finished {
ch <- m
}
l.Unlock()
}
}()
for name, c := range coll.collectors {
go func(name string, c collector.Collector) {
defer wg.Done()
outcome := execute(name, c, scrapeContext, metricsBuffer)
l.Lock()
if !finished {
collectorOutcomes[name] = outcome
}
l.Unlock()
}(name, c)
}
allDone := make(chan struct{})
go func() {
wg.Wait()
close(allDone)
close(metricsBuffer)
}()
// Wait until either all collectors finish, or timeout expires
select {
case <-allDone:
stopped = true
return
case <-time.After(coll.maxScrapeDuration):
stopped = true
remainingCollectorNames := make([]string, 0, len(remainingCollectors))
for rc := range remainingCollectors {
remainingCollectorNames = append(remainingCollectorNames, rc)
}
log.Warn("Collection timed out, still waiting for ", remainingCollectorNames)
for name := range remainingCollectors {
ch <- prometheus.MustNewConstMetric(
scrapeSuccessDesc,
prometheus.GaugeValue,
0.0,
name,
)
ch <- prometheus.MustNewConstMetric(
scrapeTimeoutDesc,
prometheus.GaugeValue,
1.0,
name,
)
}
}
l.Lock()
finished = true
remainingCollectorNames := make([]string, 0)
for name, outcome := range collectorOutcomes {
var successValue, timeoutValue float64
if outcome == pending {
timeoutValue = 1.0
remainingCollectorNames = append(remainingCollectorNames, name)
}
if outcome == success {
successValue = 1.0
}
ch <- prometheus.MustNewConstMetric(
scrapeSuccessDesc,
prometheus.GaugeValue,
successValue,
name,
)
ch <- prometheus.MustNewConstMetric(
scrapeTimeoutDesc,
prometheus.GaugeValue,
timeoutValue,
name,
)
}
if len(remainingCollectorNames) > 0 {
log.Warn("Collection timed out, still waiting for ", remainingCollectorNames)
}
l.Unlock()
}
func filterAvailableCollectors(collectors string) string {
@@ -162,37 +200,23 @@ func filterAvailableCollectors(collectors string) string {
return strings.Join(availableCollectors, ",")
}
func execute(name string, c collector.Collector, ctx *collector.ScrapeContext, ch chan<- prometheus.Metric) {
begin := time.Now()
func execute(name string, c collector.Collector, ctx *collector.ScrapeContext, ch chan<- prometheus.Metric) collectorOutcome {
t := time.Now()
err := c.Collect(ctx, ch)
duration := time.Since(begin)
var success float64
if err != nil {
log.Errorf("collector %s failed after %fs: %s", name, duration.Seconds(), err)
success = 0
} else {
log.Debugf("collector %s succeeded after %fs.", name, duration.Seconds())
success = 1
}
duration := time.Since(t).Seconds()
ch <- prometheus.MustNewConstMetric(
scrapeDurationDesc,
prometheus.GaugeValue,
duration.Seconds(),
name,
)
ch <- prometheus.MustNewConstMetric(
scrapeSuccessDesc,
prometheus.GaugeValue,
success,
name,
)
ch <- prometheus.MustNewConstMetric(
scrapeTimeoutDesc,
prometheus.GaugeValue,
0.0,
duration,
name,
)
if err != nil {
log.Errorf("collector %s failed after %fs: %s", name, duration, err)
return failed
}
log.Debugf("collector %s succeeded after %fs.", name, duration)
return success
}
func expandEnabledCollectors(enabled string) []string {
@@ -229,10 +253,6 @@ func loadCollectors(list string) (map[string]collector.Collector, error) {
return collectors, nil
}
func init() {
prometheus.MustRegister(version.NewCollector("wmi_exporter"))
}
func initWbem() {
// This initialization prevents a memory leak on WMF 5+. See
// https://github.com/martinlindhe/wmi_exporter/issues/77 and linked issues
@@ -264,10 +284,10 @@ func main() {
"collectors.print",
"If true, print available collectors and exit.",
).Bool()
maxScrapeDuration = kingpin.Flag(
"scrape.max-duration",
"Time after which collectors are aborted during a scrape",
).Default("30s").Duration()
timeoutMargin = kingpin.Flag(
"scrape.timeout-margin",
"Seconds to subtract from the timeout allowed by the client. Tune to allow for overhead or high loads.",
).Default("0.5").Float64()
)
log.AddFlags(kingpin.CommandLine)
@@ -312,13 +332,17 @@ func main() {
log.Infof("Enabled collectors: %v", strings.Join(keys(collectors), ", "))
exporter := WmiCollector{
collectors: collectors,
maxScrapeDuration: *maxScrapeDuration,
h := &metricsHandler{
timeoutMargin: *timeoutMargin,
collectorFactory: func(timeout time.Duration) *WmiCollector {
return &WmiCollector{
collectors: collectors,
maxScrapeDuration: timeout,
}
},
}
prometheus.MustRegister(exporter)
http.Handle(*metricsPath, promhttp.Handler())
http.Handle(*metricsPath, h)
http.HandleFunc("/health", healthCheck)
http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
http.Redirect(w, r, *metricsPath, http.StatusMovedPermanently)
@@ -382,3 +406,36 @@ loop:
changes <- svc.Status{State: svc.StopPending}
return
}
type metricsHandler struct {
timeoutMargin float64
collectorFactory func(timeout time.Duration) *WmiCollector
}
func (mh *metricsHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
const defaultTimeout = 10.0
var timeoutSeconds float64
if v := r.Header.Get("X-Prometheus-Scrape-Timeout-Seconds"); v != "" {
var err error
timeoutSeconds, err = strconv.ParseFloat(v, 64)
if err != nil {
log.Warnf("Couldn't parse X-Prometheus-Scrape-Timeout-Seconds: %q. Defaulting timeout to %f", v, defaultTimeout)
}
}
if timeoutSeconds == 0 {
timeoutSeconds = defaultTimeout
}
timeoutSeconds = timeoutSeconds - mh.timeoutMargin
reg := prometheus.NewRegistry()
reg.MustRegister(mh.collectorFactory(time.Duration(timeoutSeconds * float64(time.Second))))
reg.MustRegister(
prometheus.NewProcessCollector(os.Getpid(), ""),
prometheus.NewGoCollector(),
version.NewCollector("wmi_exporter"),
)
h := promhttp.HandlerFor(reg, promhttp.HandlerOpts{})
h.ServeHTTP(w, r)
}

View File

@@ -20,7 +20,7 @@ else {
$members = $wmiObject `
| Get-Member -MemberType Properties `
| Where-Object { $_.Definition -Match '^u?int' -and $_.Name -NotMatch '_' } `
| Select-Object Name, @{Name="Type";Expression={$_.Definition.Split(" ")[0]}}
| Select-Object Name, @{Name="Type";Expression={$_.Definition.Split(" ")[0]}})
$input = @{
"Class"=$Class;
"CollectorName"=$CollectorName;

View File

@@ -29,7 +29,7 @@ func New{{ .CollectorName }}Collector() (Collector, error) {
}
// Collect sends the metric values for each metric
// to the provided prometheus Metric channel.
func (c *{{ .CollectorName }}Collector) Collect(ch chan<- prometheus.Metric) error {
func (c *{{ .CollectorName }}Collector) Collect(ctx *ScrapeContext, ch chan<- prometheus.Metric) error {
if desc, err := c.collect(ch); err != nil {
log.Error("failed collecting {{ .CollectorName | toLower }} metrics:", desc, err)
return err