Compare commits

...

61 Commits

Author SHA1 Message Date
Calle Pettersson
a6f3b33928 Merge pull request #394 from martinlindhe/fix-cgo-build-tag
Remove cgo build tag from container collector
2019-08-28 22:13:23 +02:00
Calle Pettersson
8ef215cc7e Remove cgo build tag from container collector 2019-08-28 22:06:20 +02:00
Calle Pettersson
a7b5cf7aa6 Merge pull request #389 from breed808/doc
Add collector documentation
2019-08-27 07:57:45 +02:00
Ben Reedy
719ccd4f7f Add documentation for system collector 2019-08-26 21:07:55 +10:00
Ben Reedy
7ab8c7dde4 Add documentation for net collector 2019-08-26 21:01:09 +10:00
Ben Reedy
eb002eb667 Add documentation for memory collector 2019-08-22 22:39:15 +10:00
Ben Reedy
a1638cdf4c Add query examples to cpu collector documentation 2019-08-22 22:06:34 +10:00
Ben Reedy
091406877a Add documentation for logical_disk collector 2019-08-22 21:53:44 +10:00
Ben Reedy
84970ac086 Add logon entry to collectors README 2019-08-22 21:53:41 +10:00
Calle Pettersson
d86f318010 Merge pull request #387 from breed808/logical-disk-new-counters
Add logical_disk latency metrics
2019-08-22 08:53:42 +02:00
Ben Reedy
853d615673 Add logical_disk latency metrics 2019-08-22 03:30:40 +00:00
Calle Pettersson
cd9a740e2b Merge pull request #384 from breed808/logon
Export logon sessions metric
2019-08-21 11:05:25 +02:00
Ben Reedy
c70e7674a5 Add logon collector documentation 2019-08-21 18:35:10 +10:00
Ben Reedy
d3e3835c29 Export user logon sessions
Use Win32_LogonSession class to provide user logon sessions by type.
2019-08-20 22:27:03 +10:00
Calle Pettersson
592c8a8d69 Merge pull request #376 from martinlindhe/fix-goroutine-leak
Fix goroutine leak
2019-08-09 10:24:36 +02:00
Calle Pettersson
6f6a479535 Fix goroutine leak 2019-08-08 21:09:21 +02:00
Calle Pettersson
d01c66986c Merge pull request #372 from callvirtual/features/sql-transactions-collector
Add support for MSSQL Transactions counters
2019-08-04 17:12:44 +02:00
Поляков Игорь
823ffb7597 Merge branch 'features/sql-transactions-collector' of https://github.com/callvirtual/wmi_exporter into features/sql-transactions-collector 2019-08-04 17:13:46 +03:00
Поляков Игорь
a90f9cda0f Change transaction_conflict_ratio to total 2019-08-04 17:13:41 +03:00
Igor Polyakov
31d4c28124 Merge branch 'master' into features/sql-transactions-collector 2019-08-04 15:50:20 +03:00
Calle Pettersson
e880889f07 Merge pull request #335 from martinlindhe/perflib-cpu
Use perflib for cpu collector
2019-08-04 14:30:46 +02:00
Поляков Игорь
a283608812 Fix review comments 2019-08-04 13:47:00 +03:00
Поляков Игорь
8251ddd176 Docs for mssql transactions 2019-08-04 03:04:00 +03:00
Поляков Игорь
4f0a3a89ab Counter struct field names now are strict equal to WMI names 2019-08-04 02:46:33 +03:00
Поляков Игорь
27cc1072fe Initial transactions collector implementation 2019-08-04 00:55:28 +03:00
Calle Pettersson
eb9cf56dee Merge pull request #370 from callvirtual/sql-errors-collector
Add support for SQL Errors counter
2019-08-03 22:23:15 +02:00
Calle Pettersson
3c20887433 Refactor timeout management 2019-08-03 21:34:19 +02:00
Поляков Игорь
37d1c4e958 Errors counter renamed 2019-08-03 20:24:02 +03:00
Calle Pettersson
33b6e17b2d Merge pull request #369 from callvirtual/fix-new-collector
Fix collector creation issue
2019-08-03 15:51:41 +02:00
Calle Pettersson
1a9d4afdd6 Update generator template 2019-08-03 15:10:27 +02:00
Calle Pettersson
9e198c55a4 Adapt thermalzone to new signature 2019-08-03 15:09:00 +02:00
Calle Pettersson
b309a05bde Merge remote-tracking branch 'origin/master' into perflib-cpu 2019-08-03 15:06:30 +02:00
Calle Pettersson
123a055242 Simple test of Perflib unmarshalling 2019-08-03 14:39:28 +02:00
Calle Pettersson
9308108284 Don't fail when a perflib counter isn't found 2019-08-03 13:15:45 +02:00
Calle Pettersson
0ecf3cd792 Merge pull request #340 from martinlindhe/timeout-scrapes
Abort scrapes after configurable timeout
2019-08-03 13:02:52 +02:00
Calle Pettersson
801444b35b Merge pull request #371 from martinlindhe/revert-364-master
Revert "Add container_name label to container collector"
2019-08-03 11:13:15 +02:00
Calle Pettersson
f4ab322e5b Keep the docs link 2019-08-03 11:12:56 +02:00
Calle Pettersson
72de199528 Revert "Add container_name label to container collector" 2019-08-03 11:10:35 +02:00
Поляков Игорь
304972580d Fix parenthesis 2019-08-01 18:01:33 +03:00
Поляков Игорь
6322bb124f Undo ps1 changes 2019-08-01 18:00:42 +03:00
Поляков Игорь
cb6a91b705 Typo fix 2019-08-01 17:58:44 +03:00
Поляков Игорь
4d9fb1be72 Added missing comments 2019-08-01 17:57:45 +03:00
Поляков Игорь
27e26037e3 Added collector for SQLErrors counter 2019-08-01 17:55:44 +03:00
Поляков Игорь
e09497116f Fix new-collector wmi members initialization 2019-08-01 14:44:07 +03:00
Calle Pettersson
3099e10555 Merge pull request #364 from Drewster727/master
Add container collector doc link to readme
2019-07-21 20:49:24 +02:00
Drewster727
3900504504 add container name 2019-07-20 13:00:23 -05:00
Drew McMinn
2c5e30d920 remove enabled by default for container collector in readme 2019-07-20 11:19:40 -05:00
Drew McMinn
b348c245e8 add readme link to container collector doc 2019-07-20 11:18:34 -05:00
Calle Pettersson
578bcc4959 Merge pull request #360 from chrisbloemker/master
adding windows service example alerts/queries
2019-07-16 13:14:54 +02:00
chrisbloemker
31a30474f1 changing label to instance to match common examples 2019-07-15 23:27:15 -04:00
Calle Pettersson
ce1005add8 Lower-case label name 2019-07-15 19:54:23 +02:00
Calle Pettersson
6107a59306 Merge pull request #359 from StewartThomson/thermalzone
Added thermal zone information and documentation
2019-07-15 19:52:36 +02:00
Stewart Thomson
47656b16bd - Removed HighPrecisionTemperature property and just mapped it to Temperature
- Converted decikelvin to Celsius
- Added a loop to get the values from each zone
- Added documentation for percent passive limit and throttle reasons
2019-07-15 09:50:02 -04:00
chrisbloemker
8fc47669be adding windows service example alerts/queries 2019-07-13 12:39:50 -04:00
Stewart Thomson
1a67ca54b6 Update collector.thermalzone.md
Removed references to tcp in collector.thermalzone.md
2019-07-13 08:55:10 -04:00
Stewart Thomson
c73f52338d Added thermal zone information and documentation 2019-07-12 14:54:20 -04:00
Calle Pettersson
c5f23b4e64 Fix float-format 2019-06-24 21:55:33 +02:00
Calle Pettersson
411954cf9d Create custom metrics endpoint to read timeout from request header 2019-06-23 22:01:43 +02:00
Calle Pettersson
56be7c63d5 Merge pull request #351 from sqlkabouter/master
Changed conversion from microseconds to seconds to match the descript…
2019-06-17 19:16:00 +02:00
Jody Treffers
6ffe504f7e Changed conversion from microseconds to seconds to match the description of the metric 2019-06-17 11:56:26 +02:00
Calle Pettersson
daa6f3d111 Support releases with prerelease tags 2019-05-25 14:43:36 +02:00
25 changed files with 1289 additions and 287 deletions

2
.gitignore vendored
View File

@@ -3,3 +3,5 @@ VERSION
*.swp *.swp
*.un~ *.un~
output/ output/
.vscode
.idea

View File

@@ -12,10 +12,12 @@ Name | Description | Enabled by default
[ad](docs/collector.ad.md) | Active Directory Domain Services | [ad](docs/collector.ad.md) | Active Directory Domain Services |
[cpu](docs/collector.cpu.md) | CPU usage | ✓ [cpu](docs/collector.cpu.md) | CPU usage | ✓
[cs](docs/collector.cs.md) | "Computer System" metrics (system properties, num cpus/total memory) | ✓ [cs](docs/collector.cs.md) | "Computer System" metrics (system properties, num cpus/total memory) | ✓
[container](docs/collector.container.md) | Container metrics |
[dns](docs/collector.dns.md) | DNS Server | [dns](docs/collector.dns.md) | DNS Server |
[hyperv](docs/collector.hyperv.md) | Hyper-V hosts | [hyperv](docs/collector.hyperv.md) | Hyper-V hosts |
[iis](docs/collector.iis.md) | IIS sites and applications | [iis](docs/collector.iis.md) | IIS sites and applications |
[logical_disk](docs/collector.logical_disk.md) | Logical disks, disk I/O | ✓ [logical_disk](docs/collector.logical_disk.md) | Logical disks, disk I/O | ✓
[logon](docs/collector.logon.md) | User logon sessions |
[memory](docs/collector.memory.md) | Memory usage metrics | [memory](docs/collector.memory.md) | Memory usage metrics |
[msmq](docs/collector.msmq.md) | MSMQ queues | [msmq](docs/collector.msmq.md) | MSMQ queues |
[mssql](docs/collector.mssql.md) | [SQL Server Performance Objects](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/use-sql-server-objects#SQLServerPOs) metrics | [mssql](docs/collector.mssql.md) | [SQL Server Performance Objects](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/use-sql-server-objects#SQLServerPOs) metrics |
@@ -33,6 +35,7 @@ Name | Description | Enabled by default
[service](docs/collector.service.md) | Service state metrics | ✓ [service](docs/collector.service.md) | Service state metrics | ✓
[system](docs/collector.system.md) | System calls | ✓ [system](docs/collector.system.md) | System calls | ✓
[tcp](docs/collector.tcp.md) | TCP connections | [tcp](docs/collector.tcp.md) | TCP connections |
[thermalzone](docs/collector.thermalzone.md) | Thermal information
[textfile](docs/collector.textfile.md) | Read prometheus metrics from a text file | ✓ [textfile](docs/collector.textfile.md) | Read prometheus metrics from a text file | ✓
[vmware](docs/collector.vmware.md) | Performance counters installed by the Vmware Guest agent | [vmware](docs/collector.vmware.md) | Performance counters installed by the Vmware Guest agent |

View File

@@ -40,12 +40,13 @@ after_build:
return return
} }
$ErrorActionPreference = "Stop" $ErrorActionPreference = "Stop"
$BuildVersion = Get-Content VERSION
# The MSI version is not semver compliant, so just take the numerical parts # The MSI version is not semver compliant, so just take the numerical parts
$Version = $env:APPVEYOR_REPO_TAG_NAME -replace '^v?([0-9\.]+).*$','$1' $MSIVersion = $env:APPVEYOR_REPO_TAG_NAME -replace '^v?([0-9\.]+).*$','$1'
foreach($Arch in "amd64","386") { foreach($Arch in "amd64","386") {
Write-Verbose "Building wmi_exporter $Version msi for $Arch" Write-Verbose "Building wmi_exporter $MSIVersion msi for $Arch"
.\installer\build.ps1 -PathToExecutable .\output\$Arch\wmi_exporter-$Version-$Arch.exe -Version $Version -Arch "$Arch" .\installer\build.ps1 -PathToExecutable .\output\$Arch\wmi_exporter-$BuildVersion-$Arch.exe -Version $MSIVersion -Arch "$Arch"
Move-Item installer\Output\wmi_exporter-$Version-$Arch.msi output\$Arch\ Move-Item installer\Output\wmi_exporter-$MSIVersion-$Arch.msi output\$Arch\
} }
- promu checksum output\ - promu checksum output\

View File

@@ -1,8 +1,12 @@
package collector package collector
import ( import (
"strconv"
"github.com/leoluk/perflib_exporter/perflib" "github.com/leoluk/perflib_exporter/perflib"
"github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/log"
"golang.org/x/sys/windows/registry"
) )
// ... // ...
@@ -15,6 +19,34 @@ const (
windowsEpoch = 116444736000000000 windowsEpoch = 116444736000000000
) )
// getWindowsVersion reads the version number of the OS from the Registry
// See https://docs.microsoft.com/en-us/windows/desktop/sysinfo/operating-system-version
func getWindowsVersion() float64 {
k, err := registry.OpenKey(registry.LOCAL_MACHINE, `SOFTWARE\Microsoft\Windows NT\CurrentVersion`, registry.QUERY_VALUE)
if err != nil {
log.Warn("Couldn't open registry", err)
return 0
}
defer func() {
err = k.Close()
if err != nil {
log.Warnf("Failed to close registry key: %v", err)
}
}()
currentv, _, err := k.GetStringValue("CurrentVersion")
if err != nil {
log.Warn("Couldn't open registry to determine current Windows version:", err)
return 0
}
currentv_flt, err := strconv.ParseFloat(currentv, 64)
log.Debugf("Detected Windows version %f\n", currentv_flt)
return currentv_flt
}
// Factories ... // Factories ...
var Factories = make(map[string]func() (Collector, error)) var Factories = make(map[string]func() (Collector, error))

View File

@@ -1,4 +1,4 @@
// +build windows,cgo // +build windows
package collector package collector

View File

@@ -3,46 +3,15 @@
package collector package collector
import ( import (
"strconv"
"strings" "strings"
"golang.org/x/sys/windows/registry"
"github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/log"
) )
func init() { func init() {
Factories["cpu"] = newCPUCollector Factories["cpu"] = newCPUCollector
} }
// A function to get Windows version from registry
func getWindowsVersion() float64 {
k, err := registry.OpenKey(registry.LOCAL_MACHINE, `SOFTWARE\Microsoft\Windows NT\CurrentVersion`, registry.QUERY_VALUE)
if err != nil {
log.Warn("Couldn't open registry", err)
return 0
}
defer func() {
err = k.Close()
if err != nil {
log.Warnf("Failed to close registry key: %v", err)
}
}()
currentv, _, err := k.GetStringValue("CurrentVersion")
if err != nil {
log.Warn("Couldn't open registry to determine current Windows version:", err)
return 0
}
currentv_flt, err := strconv.ParseFloat(currentv, 64)
log.Debugf("Detected Windows version %f\n", currentv_flt)
return currentv_flt
}
type cpuCollectorBasic struct { type cpuCollectorBasic struct {
CStateSecondsTotal *prometheus.Desc CStateSecondsTotal *prometheus.Desc
TimeTotal *prometheus.Desc TimeTotal *prometheus.Desc
@@ -67,10 +36,12 @@ func newCPUCollector() (Collector, error) {
const subsystem = "cpu" const subsystem = "cpu"
version := getWindowsVersion() version := getWindowsVersion()
// Windows version by number https://docs.microsoft.com/en-us/windows/desktop/sysinfo/operating-system-version // For Windows 2008 (version 6.0) or earlier we only have the "Processor"
// For Windows 2008 or earlier Windows version is 6.0 or lower, where we only have the older "Processor" counters // class. As of Windows 2008 R2 (version 6.1) the more detailed
// For Windows 2008 R2 or later Windows version is 6.1 or higher, so we can use "ProcessorInformation" counters // "ProcessorInformation" set is available (although some of the counters
// Value 6.05 was selected just to split between Windows versions // are added in later versions, so we aren't guaranteed to get all of
// them).
// Value 6.05 was selected to split between Windows versions.
if version < 6.05 { if version < 6.05 {
return &cpuCollectorBasic{ return &cpuCollectorBasic{
CStateSecondsTotal: prometheus.NewDesc( CStateSecondsTotal: prometheus.NewDesc(

View File

@@ -29,17 +29,20 @@ var (
// A LogicalDiskCollector is a Prometheus collector for WMI Win32_PerfRawData_PerfDisk_LogicalDisk metrics // A LogicalDiskCollector is a Prometheus collector for WMI Win32_PerfRawData_PerfDisk_LogicalDisk metrics
type LogicalDiskCollector struct { type LogicalDiskCollector struct {
RequestsQueued *prometheus.Desc RequestsQueued *prometheus.Desc
ReadBytesTotal *prometheus.Desc ReadBytesTotal *prometheus.Desc
ReadsTotal *prometheus.Desc ReadsTotal *prometheus.Desc
WriteBytesTotal *prometheus.Desc WriteBytesTotal *prometheus.Desc
WritesTotal *prometheus.Desc WritesTotal *prometheus.Desc
ReadTime *prometheus.Desc ReadTime *prometheus.Desc
WriteTime *prometheus.Desc WriteTime *prometheus.Desc
TotalSpace *prometheus.Desc TotalSpace *prometheus.Desc
FreeSpace *prometheus.Desc FreeSpace *prometheus.Desc
IdleTime *prometheus.Desc IdleTime *prometheus.Desc
SplitIOs *prometheus.Desc SplitIOs *prometheus.Desc
ReadLatency *prometheus.Desc
WriteLatency *prometheus.Desc
ReadWriteLatency *prometheus.Desc
volumeWhitelistPattern *regexp.Regexp volumeWhitelistPattern *regexp.Regexp
volumeBlacklistPattern *regexp.Regexp volumeBlacklistPattern *regexp.Regexp
@@ -127,6 +130,27 @@ func NewLogicalDiskCollector() (Collector, error) {
nil, nil,
), ),
ReadLatency: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "read_latency_seconds_total"),
"Shows the average time, in seconds, of a read operation from the disk (LogicalDisk.AvgDiskSecPerRead)",
[]string{"volume"},
nil,
),
WriteLatency: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "write_latency_seconds_total"),
"Shows the average time, in seconds, of a write operation to the disk (LogicalDisk.AvgDiskSecPerWrite)",
[]string{"volume"},
nil,
),
ReadWriteLatency: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "read_write_latency_seconds_total"),
"Shows the time, in seconds, of the average disk transfer (LogicalDisk.AvgDiskSecPerTransfer)",
[]string{"volume"},
nil,
),
volumeWhitelistPattern: regexp.MustCompile(fmt.Sprintf("^(?:%s)$", *volumeWhitelist)), volumeWhitelistPattern: regexp.MustCompile(fmt.Sprintf("^(?:%s)$", *volumeWhitelist)),
volumeBlacklistPattern: regexp.MustCompile(fmt.Sprintf("^(?:%s)$", *volumeBlacklist)), volumeBlacklistPattern: regexp.MustCompile(fmt.Sprintf("^(?:%s)$", *volumeBlacklist)),
}, nil }, nil
@@ -158,6 +182,9 @@ type Win32_PerfRawData_PerfDisk_LogicalDisk struct {
PercentFreeSpace_Base uint32 PercentFreeSpace_Base uint32
PercentIdleTime uint64 PercentIdleTime uint64
SplitIOPerSec uint32 SplitIOPerSec uint32
AvgDiskSecPerRead uint64
AvgDiskSecPerWrite uint64
AvgDiskSecPerTransfer uint64
} }
func (c *LogicalDiskCollector) collect(ch chan<- prometheus.Metric) (*prometheus.Desc, error) { func (c *LogicalDiskCollector) collect(ch chan<- prometheus.Metric) (*prometheus.Desc, error) {
@@ -250,6 +277,27 @@ func (c *LogicalDiskCollector) collect(ch chan<- prometheus.Metric) (*prometheus
float64(volume.SplitIOPerSec), float64(volume.SplitIOPerSec),
volume.Name, volume.Name,
) )
ch <- prometheus.MustNewConstMetric(
c.ReadLatency,
prometheus.CounterValue,
float64(volume.AvgDiskSecPerRead),
volume.Name,
)
ch <- prometheus.MustNewConstMetric(
c.WriteLatency,
prometheus.CounterValue,
float64(volume.AvgDiskSecPerWrite),
volume.Name,
)
ch <- prometheus.MustNewConstMetric(
c.ReadWriteLatency,
prometheus.CounterValue,
float64(volume.AvgDiskSecPerTransfer),
volume.Name,
)
} }
return nil, nil return nil, nil

199
collector/logon.go Normal file
View File

@@ -0,0 +1,199 @@
// +build windows
package collector
import (
"errors"
"github.com/StackExchange/wmi"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/log"
)
func init() {
Factories["logon"] = NewLogonCollector
}
// A LogonCollector is a Prometheus collector for WMI metrics
type LogonCollector struct {
LogonType *prometheus.Desc
}
// NewLogonCollector ...
func NewLogonCollector() (Collector, error) {
const subsystem = "logon"
return &LogonCollector{
LogonType: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "logon_type"),
"Number of active logon sessions (LogonSession.LogonType)",
[]string{"status"},
nil,
),
}, nil
}
// Collect sends the metric values for each metric
// to the provided prometheus Metric channel.
func (c *LogonCollector) Collect(ctx *ScrapeContext, ch chan<- prometheus.Metric) error {
if desc, err := c.collect(ch); err != nil {
log.Error("failed collecting user metrics:", desc, err)
return err
}
return nil
}
// Win32_LogonSession docs:
// - https://docs.microsoft.com/en-us/windows/win32/cimwin32prov/win32-logonsession
type Win32_LogonSession struct {
LogonType uint32
}
func (c *LogonCollector) collect(ch chan<- prometheus.Metric) (*prometheus.Desc, error) {
var dst []Win32_LogonSession
q := queryAll(&dst)
if err := wmi.Query(q, &dst); err != nil {
return nil, err
}
if len(dst) == 0 {
return nil, errors.New("WMI query returned empty result set")
}
// Init counters
system := 0
interactive := 0
network := 0
batch := 0
service := 0
proxy := 0
unlock := 0
networkcleartext := 0
newcredentials := 0
remoteinteractive := 0
cachedinteractive := 0
cachedremoteinteractive := 0
cachedunlock := 0
for _, entry := range dst {
switch entry.LogonType {
case 0:
system++
case 2:
interactive++
case 3:
network++
case 4:
batch++
case 5:
service++
case 6:
proxy++
case 7:
unlock++
case 8:
networkcleartext++
case 9:
newcredentials++
case 10:
remoteinteractive++
case 11:
cachedinteractive++
case 12:
cachedremoteinteractive++
case 13:
cachedunlock++
}
}
ch <- prometheus.MustNewConstMetric(
c.LogonType,
prometheus.GaugeValue,
float64(system),
"system",
)
ch <- prometheus.MustNewConstMetric(
c.LogonType,
prometheus.GaugeValue,
float64(interactive),
"interactive",
)
ch <- prometheus.MustNewConstMetric(
c.LogonType,
prometheus.GaugeValue,
float64(network),
"network",
)
ch <- prometheus.MustNewConstMetric(
c.LogonType,
prometheus.GaugeValue,
float64(batch),
"batch",
)
ch <- prometheus.MustNewConstMetric(
c.LogonType,
prometheus.GaugeValue,
float64(service),
"service",
)
ch <- prometheus.MustNewConstMetric(
c.LogonType,
prometheus.GaugeValue,
float64(proxy),
"proxy",
)
ch <- prometheus.MustNewConstMetric(
c.LogonType,
prometheus.GaugeValue,
float64(unlock),
"unlock",
)
ch <- prometheus.MustNewConstMetric(
c.LogonType,
prometheus.GaugeValue,
float64(networkcleartext),
"network_clear_text",
)
ch <- prometheus.MustNewConstMetric(
c.LogonType,
prometheus.GaugeValue,
float64(newcredentials),
"new_credentials",
)
ch <- prometheus.MustNewConstMetric(
c.LogonType,
prometheus.GaugeValue,
float64(remoteinteractive),
"remote_interactive",
)
ch <- prometheus.MustNewConstMetric(
c.LogonType,
prometheus.GaugeValue,
float64(cachedinteractive),
"cached_interactive",
)
ch <- prometheus.MustNewConstMetric(
c.LogonType,
prometheus.GaugeValue,
float64(remoteinteractive),
"cached_remote_interactive",
)
ch <- prometheus.MustNewConstMetric(
c.LogonType,
prometheus.GaugeValue,
float64(cachedunlock),
"cached_unlock",
)
return nil, nil
}

View File

@@ -91,7 +91,7 @@ func mssqlBuildWMIInstanceClass(suffix string, instance string) string {
type mssqlCollectorsMap map[string]mssqlCollectorFunc type mssqlCollectorsMap map[string]mssqlCollectorFunc
func mssqlAvailableClassCollectors() string { func mssqlAvailableClassCollectors() string {
return "accessmethods,availreplica,bufman,databases,dbreplica,genstats,locks,memmgr,sqlstats" return "accessmethods,availreplica,bufman,databases,dbreplica,genstats,locks,memmgr,sqlstats,sqlerrors,transactions"
} }
func (c *MSSQLCollector) getMSSQLCollectors() mssqlCollectorsMap { func (c *MSSQLCollector) getMSSQLCollectors() mssqlCollectorsMap {
@@ -105,6 +105,8 @@ func (c *MSSQLCollector) getMSSQLCollectors() mssqlCollectorsMap {
mssqlCollectors["locks"] = c.collectLocks mssqlCollectors["locks"] = c.collectLocks
mssqlCollectors["memmgr"] = c.collectMemoryManager mssqlCollectors["memmgr"] = c.collectMemoryManager
mssqlCollectors["sqlstats"] = c.collectSQLStats mssqlCollectors["sqlstats"] = c.collectSQLStats
mssqlCollectors["sqlerrors"] = c.collectSQLErrors
mssqlCollectors["transactions"] = c.collectTransactions
return mssqlCollectors return mssqlCollectors
} }
@@ -358,6 +360,24 @@ type MSSQLCollector struct {
SQLStatsSQLReCompilations *prometheus.Desc SQLStatsSQLReCompilations *prometheus.Desc
SQLStatsUnsafeAutoParams *prometheus.Desc SQLStatsUnsafeAutoParams *prometheus.Desc
// Win32_PerfRawData_{instance}_SQLServerSQLErrors
SQLErrorsTotal *prometheus.Desc
// Win32_PerfRawData_{instance}_SQLServerTransactions
TransactionsTempDbFreeSpaceBytes *prometheus.Desc
TransactionsLongestTransactionRunningSeconds *prometheus.Desc
TransactionsNonSnapshotVersionActiveTotal *prometheus.Desc
TransactionsSnapshotActiveTotal *prometheus.Desc
TransactionsActiveTotal *prometheus.Desc
TransactionsUpdateConflictsTotal *prometheus.Desc
TransactionsUpdateSnapshotActiveTotal *prometheus.Desc
TransactionsVersionCleanupRateBytes *prometheus.Desc
TransactionsVersionGenerationRateBytes *prometheus.Desc
TransactionsVersionStoreSizeBytes *prometheus.Desc
TransactionsVersionStoreUnits *prometheus.Desc
TransactionsVersionStoreCreationUnits *prometheus.Desc
TransactionsVersionStoreTruncationUnits *prometheus.Desc
mssqlInstances mssqlInstancesType mssqlInstances mssqlInstancesType
mssqlCollectors mssqlCollectorsMap mssqlCollectors mssqlCollectorsMap
mssqlChildCollectorFailure int mssqlChildCollectorFailure int
@@ -1637,6 +1657,94 @@ func NewMSSQLCollector() (Collector, error) {
nil, nil,
), ),
// Win32_PerfRawData_{instance}_SQLServerSQLErrors
SQLErrorsTotal: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "sql_errors_total"),
"(SQLErrors.Total)",
[]string{"instance", "resource"},
nil,
),
// Win32_PerfRawData_{instance}_SQLServerTransactions
TransactionsTempDbFreeSpaceBytes: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "transactions_tempdb_free_space_bytes"),
"(Transactions.FreeSpaceInTempDbKB)",
[]string{"instance"},
nil,
),
TransactionsLongestTransactionRunningSeconds: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "transactions_longest_transaction_running_seconds"),
"(Transactions.LongestTransactionRunningTime)",
[]string{"instance"},
nil,
),
TransactionsNonSnapshotVersionActiveTotal: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "transactions_nonsnapshot_version_active_total"),
"(Transactions.NonSnapshotVersionTransactions)",
[]string{"instance"},
nil,
),
TransactionsSnapshotActiveTotal: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "transactions_snapshot_active_total"),
"(Transactions.SnapshotTransactions)",
[]string{"instance"},
nil,
),
TransactionsActiveTotal: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "transactions_active_total"),
"(Transactions.Transactions)",
[]string{"instance"},
nil,
),
TransactionsUpdateConflictsTotal: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "transactions_update_conflicts_total"),
"(Transactions.UpdateConflictRatio)",
[]string{"instance"},
nil,
),
TransactionsUpdateSnapshotActiveTotal: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "transactions_update_snapshot_active_total"),
"(Transactions.UpdateSnapshotTransactions)",
[]string{"instance"},
nil,
),
TransactionsVersionCleanupRateBytes: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "transactions_version_cleanup_rate_bytes"),
"(Transactions.VersionCleanupRateKBs)",
[]string{"instance"},
nil,
),
TransactionsVersionGenerationRateBytes: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "transactions_version_generation_rate_bytes"),
"(Transactions.VersionGenerationRateKBs)",
[]string{"instance"},
nil,
),
TransactionsVersionStoreSizeBytes: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "transactions_version_store_size_bytes"),
"(Transactions.VersionStoreSizeKB)",
[]string{"instance"},
nil,
),
TransactionsVersionStoreUnits: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "transactions_version_store_units"),
"(Transactions.VersionStoreUnitCount)",
[]string{"instance"},
nil,
),
TransactionsVersionStoreCreationUnits: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "transactions_version_store_creation_units"),
"(Transactions.VersionStoreUnitCreation)",
[]string{"instance"},
nil,
),
TransactionsVersionStoreTruncationUnits: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "transactions_version_store_truncation_units"),
"(Transactions.VersionStoreUnitTruncation)",
[]string{"instance"},
nil,
),
mssqlInstances: getMSSQLInstances(), mssqlInstances: getMSSQLInstances(),
} }
@@ -2575,7 +2683,7 @@ func (c *MSSQLCollector) collectDatabaseReplica(ch chan<- prometheus.Metric, sql
ch <- prometheus.MustNewConstMetric( ch <- prometheus.MustNewConstMetric(
c.DBReplicaTransactionDelay, c.DBReplicaTransactionDelay,
prometheus.GaugeValue, prometheus.GaugeValue,
float64(v.TransactionDelay)*1000.0, float64(v.TransactionDelay)/1000.0,
sqlInstance, replicaName, sqlInstance, replicaName,
) )
} }
@@ -3558,3 +3666,162 @@ func (c *MSSQLCollector) collectSQLStats(ch chan<- prometheus.Metric, sqlInstanc
return nil, nil return nil, nil
} }
type win32PerfRawDataSQLServerSQLErrors struct {
Name string
ErrorsPersec uint64
}
// Win32_PerfRawData_MSSQLSERVER_SQLServerErrors docs:
// - https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-sql-errors-object
func (c *MSSQLCollector) collectSQLErrors(ch chan<- prometheus.Metric, sqlInstance string) (*prometheus.Desc, error) {
var dst []win32PerfRawDataSQLServerSQLErrors
log.Debugf("mssql_sqlerrors collector iterating sql instance %s.", sqlInstance)
class := mssqlBuildWMIInstanceClass("SQLErrors", sqlInstance)
q := queryAllForClassWhere(&dst, class, `Name <> '_Total'`)
if err := wmi.Query(q, &dst); err != nil {
return nil, err
}
for _, v := range dst {
resource := v.Name
ch <- prometheus.MustNewConstMetric(
c.SQLErrorsTotal,
prometheus.CounterValue,
float64(v.ErrorsPersec),
sqlInstance, resource,
)
}
return nil, nil
}
type win32PerfRawDataSqlServerTransactions struct {
FreeSpaceintempdbKB uint64
LongestTransactionRunningTime uint64
NonSnapshotVersionTransactions uint64
SnapshotTransactions uint64
Transactions uint64
Updateconflictratio uint64
UpdateSnapshotTransactions uint64
VersionCleanuprateKBPers uint64
VersionGenerationrateKBPers uint64
VersionStoreSizeKB uint64
VersionStoreunitcount uint64
VersionStoreunitcreation uint64
VersionStoreunittruncation uint64
}
// Win32_PerfRawData_MSSQLSERVER_Transactions docs:
// - https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-transactions-object
func (c *MSSQLCollector) collectTransactions(ch chan<- prometheus.Metric, sqlInstance string) (*prometheus.Desc, error) {
var dst []win32PerfRawDataSqlServerTransactions
log.Debugf("mssql_transactions collector iterating sql instance %s.", sqlInstance)
class := mssqlBuildWMIInstanceClass("Transactions", sqlInstance)
q := queryAllForClass(&dst, class)
if err := wmi.Query(q, &dst); err != nil {
return nil, err
}
if len(dst) == 0 {
return nil, errors.New("WMI query returned empty result set")
}
v := dst[0]
ch <- prometheus.MustNewConstMetric(
c.TransactionsTempDbFreeSpaceBytes,
prometheus.GaugeValue,
float64(v.FreeSpaceintempdbKB*1024),
sqlInstance,
)
ch <- prometheus.MustNewConstMetric(
c.TransactionsLongestTransactionRunningSeconds,
prometheus.GaugeValue,
float64(v.LongestTransactionRunningTime),
sqlInstance,
)
ch <- prometheus.MustNewConstMetric(
c.TransactionsNonSnapshotVersionActiveTotal,
prometheus.CounterValue,
float64(v.NonSnapshotVersionTransactions),
sqlInstance,
)
ch <- prometheus.MustNewConstMetric(
c.TransactionsSnapshotActiveTotal,
prometheus.CounterValue,
float64(v.SnapshotTransactions),
sqlInstance,
)
ch <- prometheus.MustNewConstMetric(
c.TransactionsActiveTotal,
prometheus.CounterValue,
float64(v.Transactions),
sqlInstance,
)
ch <- prometheus.MustNewConstMetric(
c.TransactionsUpdateConflictsTotal,
prometheus.CounterValue,
float64(v.Updateconflictratio),
sqlInstance,
)
ch <- prometheus.MustNewConstMetric(
c.TransactionsUpdateSnapshotActiveTotal,
prometheus.CounterValue,
float64(v.UpdateSnapshotTransactions),
sqlInstance,
)
ch <- prometheus.MustNewConstMetric(
c.TransactionsVersionCleanupRateBytes,
prometheus.GaugeValue,
float64(v.VersionCleanuprateKBPers*1024),
sqlInstance,
)
ch <- prometheus.MustNewConstMetric(
c.TransactionsVersionGenerationRateBytes,
prometheus.GaugeValue,
float64(v.VersionGenerationrateKBPers*1024),
sqlInstance,
)
ch <- prometheus.MustNewConstMetric(
c.TransactionsVersionStoreSizeBytes,
prometheus.GaugeValue,
float64(v.VersionStoreSizeKB*1024),
sqlInstance,
)
ch <- prometheus.MustNewConstMetric(
c.TransactionsVersionStoreUnits,
prometheus.CounterValue,
float64(v.VersionStoreunitcount),
sqlInstance,
)
ch <- prometheus.MustNewConstMetric(
c.TransactionsVersionStoreCreationUnits,
prometheus.CounterValue,
float64(v.VersionStoreunitcreation),
sqlInstance,
)
ch <- prometheus.MustNewConstMetric(
c.TransactionsVersionStoreTruncationUnits,
prometheus.CounterValue,
float64(v.VersionStoreunittruncation),
sqlInstance,
)
return nil, nil
}

View File

@@ -63,11 +63,14 @@ func unmarshalObject(obj *perflib.PerfObject, vs interface{}) error {
ctr, found := counters[tag] ctr, found := counters[tag]
if !found { if !found {
log.Debugf("missing counter %q, has %v", tag, counters) log.Debugf("missing counter %q, have %v", tag, counterMapKeys(counters))
return fmt.Errorf("could not find counter %q on instance", tag) continue
} }
if !target.Field(i).CanSet() { if !target.Field(i).CanSet() {
return fmt.Errorf("tagged field %v cannot be written to", f) return fmt.Errorf("tagged field %v cannot be written to", f.Name)
}
if fieldType := target.Field(i).Type(); fieldType != reflect.TypeOf((*float64)(nil)).Elem() {
return fmt.Errorf("tagged field %v has wrong type %v, must be float64", f.Name, fieldType)
} }
switch ctr.Def.CounterType { switch ctr.Def.CounterType {
@@ -87,3 +90,11 @@ func unmarshalObject(obj *perflib.PerfObject, vs interface{}) error {
return nil return nil
} }
func counterMapKeys(m map[string]*perflib.PerfCounter) []string {
keys := make([]string, 0, len(m))
for k := range m {
keys = append(keys, k)
}
return keys
}

125
collector/perflib_test.go Normal file
View File

@@ -0,0 +1,125 @@
package collector
import (
"reflect"
"testing"
perflibCollector "github.com/leoluk/perflib_exporter/collector"
"github.com/leoluk/perflib_exporter/perflib"
)
type simple struct {
ValA float64 `perflib:"Something"`
ValB float64 `perflib:"Something Else"`
}
func TestUnmarshalPerflib(t *testing.T) {
cases := []struct {
name string
obj *perflib.PerfObject
expectedOutput []simple
expectError bool
}{
{
name: "nil check",
obj: nil,
expectedOutput: []simple{},
expectError: true,
},
{
name: "Simple",
obj: &perflib.PerfObject{
Instances: []*perflib.PerfInstance{
{
Counters: []*perflib.PerfCounter{
{
Def: &perflib.PerfCounterDef{
Name: "Something",
CounterType: perflibCollector.PERF_COUNTER_COUNTER,
},
Value: 123,
},
},
},
},
},
expectedOutput: []simple{{ValA: 123}},
expectError: false,
},
{
name: "Multiple properties",
obj: &perflib.PerfObject{
Instances: []*perflib.PerfInstance{
{
Counters: []*perflib.PerfCounter{
{
Def: &perflib.PerfCounterDef{
Name: "Something",
CounterType: perflibCollector.PERF_COUNTER_COUNTER,
},
Value: 123,
},
{
Def: &perflib.PerfCounterDef{
Name: "Something Else",
CounterType: perflibCollector.PERF_COUNTER_COUNTER,
},
Value: 256,
},
},
},
},
},
expectedOutput: []simple{{ValA: 123, ValB: 256}},
expectError: false,
},
{
name: "Multiple instances",
obj: &perflib.PerfObject{
Instances: []*perflib.PerfInstance{
{
Counters: []*perflib.PerfCounter{
{
Def: &perflib.PerfCounterDef{
Name: "Something",
CounterType: perflibCollector.PERF_COUNTER_COUNTER,
},
Value: 321,
},
},
},
{
Counters: []*perflib.PerfCounter{
{
Def: &perflib.PerfCounterDef{
Name: "Something",
CounterType: perflibCollector.PERF_COUNTER_COUNTER,
},
Value: 231,
},
},
},
},
},
expectedOutput: []simple{{ValA: 321}, {ValA: 231}},
expectError: false,
},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
output := make([]simple, 0)
err := unmarshalObject(c.obj, &output)
if err != nil && !c.expectError {
t.Errorf("Did not expect error, got %q", err)
}
if err == nil && c.expectError {
t.Errorf("Expected an error, but got ok")
}
if err == nil && !reflect.DeepEqual(output, c.expectedOutput) {
t.Errorf("Output mismatch, expected %+v, got %+v", c.expectedOutput, output)
}
})
}
}

103
collector/thermalzone.go Normal file
View File

@@ -0,0 +1,103 @@
package collector
import (
"github.com/StackExchange/wmi"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/log"
)
func init() {
Factories["thermalzone"] = NewThermalZoneCollector
}
// A thermalZoneCollector is a Prometheus collector for WMI Win32_PerfRawData_Counters_ThermalZoneInformation metrics
type thermalZoneCollector struct {
PercentPassiveLimit *prometheus.Desc
Temperature *prometheus.Desc
ThrottleReasons *prometheus.Desc
}
// NewThermalZoneCollector ...
func NewThermalZoneCollector() (Collector, error) {
const subsystem = "thermalzone"
return &thermalZoneCollector{
Temperature: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "temperature_celsius"),
"(Temperature)",
[]string{
"name",
},
nil,
),
PercentPassiveLimit: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "percent_passive_limit"),
"(PercentPassiveLimit)",
[]string{
"name",
},
nil,
),
ThrottleReasons: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "throttle_reasons"),
"(ThrottleReasons)",
[]string{
"name",
},
nil,
),
}, nil
}
// Collect sends the metric values for each metric
// to the provided prometheus Metric channel.
func (c *thermalZoneCollector) Collect(ctx *ScrapeContext, ch chan<- prometheus.Metric) error {
if desc, err := c.collect(ch); err != nil {
log.Error("failed collecting thermalzone metrics:", desc, err)
return err
}
return nil
}
// Win32_PerfRawData_Counters_ThermalZoneInformation docs:
// https://wutils.com/wmi/root/cimv2/win32_perfrawdata_counters_thermalzoneinformation/
type Win32_PerfRawData_Counters_ThermalZoneInformation struct {
Name string
HighPrecisionTemperature uint32
PercentPassiveLimit uint32
ThrottleReasons uint32
}
func (c *thermalZoneCollector) collect(ch chan<- prometheus.Metric) (*prometheus.Desc, error) {
var dst []Win32_PerfRawData_Counters_ThermalZoneInformation
q := queryAll(&dst)
if err := wmi.Query(q, &dst); err != nil {
return nil, err
}
for _, info := range dst {
//Divide by 10 and subtract 273.15 to convert decikelvin to celsius
ch <- prometheus.MustNewConstMetric(
c.Temperature,
prometheus.GaugeValue,
(float64(info.HighPrecisionTemperature)/10.0)-273.15,
info.Name,
)
ch <- prometheus.MustNewConstMetric(
c.PercentPassiveLimit,
prometheus.GaugeValue,
float64(info.PercentPassiveLimit),
info.Name,
)
ch <- prometheus.MustNewConstMetric(
c.ThrottleReasons,
prometheus.GaugeValue,
float64(info.ThrottleReasons),
info.Name,
)
}
return nil, nil
}

View File

@@ -9,6 +9,7 @@ This directory contains documentation of the collectors in the WMI exporter, wit
- [`hyperv`](collector.hyperv.md) - [`hyperv`](collector.hyperv.md)
- [`iis`](collector.iis.md) - [`iis`](collector.iis.md)
- [`logical_disk`](collector.logical_disk.md) - [`logical_disk`](collector.logical_disk.md)
- [`logon`](collector.logon.md)
- [`memory`](collector.memory.md) - [`memory`](collector.memory.md)
- [`msmq`](collector.msmq.md) - [`msmq`](collector.msmq.md)
- [`mssql`](collector.mssql.md) - [`mssql`](collector.mssql.md)

View File

@@ -34,10 +34,27 @@ Name | Description | Type | Labels
`wmi_cpu_processor_performance` | Processor Performance is the average performance of the processor while it is executing instructions, as a percentage of the nominal performance of the processor. On some processors, Processor Performance may exceed 100% | `gauge` `wmi_cpu_processor_performance` | Processor Performance is the average performance of the processor while it is executing instructions, as a percentage of the nominal performance of the processor. On some processors, Processor Performance may exceed 100% | `gauge`
### Example metric ### Example metric
_This collector does not yet have explained examples, we would appreciate your help adding them!_ Show frequency of host CPU cores
```
wmi_cpu_core_frequency_mhz{instance="localhost"}
```
## Useful queries ## Useful queries
_This collector does not yet have any useful queries added, we would appreciate your help adding them!_ Show cpu usage by mode.
```
sum by (mode) (irate(wmi_cpu_time_total{instance="localhost"}[5m]))
```
## Alerting examples ## Alerting examples
_This collector does not yet have alerting examples, we would appreciate your help adding them!_ **prometheus.rules**
```
# Alert on hosts with more than 80% CPU usage over a 10 minute period
- alert: CpuUsage
expr: 100 - (avg by (instance) (irate(wmi_cpu_time_total{mode="idle"}[2m])) * 100) > 80
for: 10m
labels:
severity: warning
annotations:
summary: "CPU Usage (instance {{ $labels.instance }})"
description: "CPU Usage is more than 80%\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
```

View File

@@ -22,23 +22,54 @@ If given, a disk needs to *not* match the blacklist regexp in order for the corr
Name | Description | Type | Labels Name | Description | Type | Labels
-----|-------------|------|------- -----|-------------|------|-------
`requests_queued` | _Not yet documented_ | gauge | `volume` `requests_queued` | Number of requests outstanding on the disk at the time the performance data is collected | gauge | `volume`
`read_bytes_total` | _Not yet documented_ | counter | `volume` `read_bytes_total` | Rate at which bytes are transferred from the disk during read operations | counter | `volume`
`reads_total` | _Not yet documented_ | counter | `volume` `reads_total` | Rate of read operations on the disk | counter | `volume`
`write_bytes_total` | _Not yet documented_ | counter | `volume` `write_bytes_total` | Rate at which bytes are transferred to the disk during write operations | counter | `volume`
`writes_total` | _Not yet documented_ | counter | `volume` `writes_total` | Rate of write operations on the disk | counter | `volume`
`read_seconds_total` | _Not yet documented_ | counter | `volume` `read_seconds_total` | Seconds the disk was busy servicing read requests | counter | `volume`
`write_seconds_total` | _Not yet documented_ | counter | `volume` `write_seconds_total` | Seconds the disk was busy servicing write requests | counter | `volume`
`free_bytes` | _Not yet documented_ | gauge | `volume` `free_bytes` | Unused space of the disk in bytes | gauge | `volume`
`size_bytes` | _Not yet documented_ | gauge | `volume` `size_bytes` | Total size of the disk in bytes | gauge | `volume`
`idle_seconds_total` | _Not yet documented_ | counter | `volume` `idle_seconds_total` | Seconds the disk was idle (not servicing read/write requests) | counter | `volume`
`split_ios_total` | _Not yet documented_ | counter | `volume` `split_ios_total` | Number of I/Os to the disk split into multiple I/Os | counter | `volume`
### Example metric ### Example metric
_This collector does not yet have explained examples, we would appreciate your help adding them!_ Query the rate of write operations to a disk
```
rate(wmi_logical_disk_read_bytes_total{instance="localhost", volume=~"C:"}[2m])
```
## Useful queries ## Useful queries
_This collector does not yet have any useful queries added, we would appreciate your help adding them!_ Calculate rate of total IOPS for disk
```
rate(wmi_logical_disk_reads_total{instance="localhost", volume="C:"}[2m]) + rate(wmi_logical_disk_writes_total{instance="localhost", volume="C:"}[2m])
```
## Alerting examples ## Alerting examples
_This collector does not yet have alerting examples, we would appreciate your help adding them!_ **prometheus.rules**
```
groups:
- name: Windows Disk Alerts
rules:
# Sends an alert when disk space usage is above 95%
- alert: DiskSpaceUsage
expr: 100.0 - 100 * (wmi_logical_disk_free_bytes / wmi_logical_disk_size_bytes) > 95
for: 10m
labels:
severity: high
annotations:
summary: "Disk Space Usage (instance {{ $labels.instance }})"
description: "Disk Space on Drive is used more than 95%\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
# Alerts on disks with over 85% space usage predicted to fill within the next four days
- alert: DiskFilling
expr: 100 * (wmi_logical_disk_free_bytes / wmi_logical_disk_size_bytes) < 15 and predict_linear(wmi_logical_disk_free_bytes[6h], 4 * 24 * 3600) < 0
for: 10m
labels:
severity: warning
annotations:
summary: "Disk full in four days (instance {{ $labels.instance }})"
description: "{{ $labels.volume }} is expected to fill up within four days. Currently {{ $value | humanize }}% is available.\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
```

34
docs/collector.logon.md Normal file
View File

@@ -0,0 +1,34 @@
# logon collector
The logon collector exposes metrics detailing the active user logon sessions.
|||
-|-
Metric name prefix | `logon`
Classes | [`Win32_LogonSession`](https://docs.microsoft.com/en-us/windows/win32/cimwin32prov/win32-logonsession)
Enabled by default? | No
## Flags
None
## Metrics
Name | Description | Type | Labels
-----|-------------|------|-------
`wmi_logon_logon_type` | Number of active user logon sessions | gauge | status
### Example metric
Query the total number of interactive logon sessions
```
wmi_logon_logon_type{status="interactive"}
```
## Useful queries
Query the total number of local and remote (I.E. Terminal Services) interactive sessions.
```
wmi_logon_logon_type{status=~"interactive|remoteinteractive"}
```
## Alerting examples
_This collector does not yet have alerting examples, we would appreciate your help adding them!_

View File

@@ -19,25 +19,25 @@ Name | Description | Type | Labels
`wmi_cs_logical_processors` | Number of installed logical processors | gauge | None `wmi_cs_logical_processors` | Number of installed logical processors | gauge | None
`wmi_cs_physical_memory_bytes` | Total installed physical memory | gauge | None `wmi_cs_physical_memory_bytes` | Total installed physical memory | gauge | None
`wmi_memory_available_bytes` | The amount of physical memory immediately available for allocation to a process or for system use. It is equal to the sum of memory assigned to the standby (cached), free and zero page lists | gauge | None `wmi_memory_available_bytes` | The amount of physical memory immediately available for allocation to a process or for system use. It is equal to the sum of memory assigned to the standby (cached), free and zero page lists | gauge | None
`wmi_memory_cache_bytes` | _Not yet documented_ | gauge | None `wmi_memory_cache_bytes` | Number of bytes currently being used by the file system cache | gauge | None
`wmi_memory_cache_bytes_peak` | _Not yet documented_ | gauge | None `wmi_memory_cache_bytes_peak` | Maximum number of CacheBytes after the system was last restarted | gauge | None
`wmi_memory_cache_faults_total` | _Not yet documented_ | gauge | None `wmi_memory_cache_faults_total` | Number of faults which occur when a page sought in the file system cache is not found there and must be retrieved from elsewhere in memory (soft fault) or from disk (hard fault) | gauge | None
`wmi_memory_commit_limit` | _Not yet documented_ | gauge | None `wmi_memory_commit_limit` | Amount of virtual memory, in bytes, that can be committed without having to extend the paging file(s) | gauge | None
`wmi_memory_committed_bytes` | _Not yet documented_ | gauge | None `wmi_memory_committed_bytes` | Amount of committed virtual memory, in bytes | gauge | None
`wmi_memory_demand_zero_faults_total` | The number of zeroed pages required to satisfy faults. Zeroed pages, pages emptied of previously stored data and filled with zeros, are a security feature of Windows that prevent processes from seeing data stored by earlier processes that used the memory space | gauge | None `wmi_memory_demand_zero_faults_total` | The number of zeroed pages required to satisfy faults. Zeroed pages, pages emptied of previously stored data and filled with zeros, are a security feature of Windows that prevent processes from seeing data stored by earlier processes that used the memory space | gauge | None
`wmi_memory_free_and_zero_page_list_bytes` | _Not yet documented_ | gauge | None `wmi_memory_free_and_zero_page_list_bytes` | _Not yet documented_ | gauge | None
`wmi_memory_free_system_page_table_entries` | _Not yet documented_ | gauge | None `wmi_memory_free_system_page_table_entries` | Number of page table entries not being used by the system | gauge | None
`wmi_memory_modified_page_list_bytes` | _Not yet documented_ | gauge | None `wmi_memory_modified_page_list_bytes` | _Not yet documented_ | gauge | None
`wmi_memory_page_faults_total` | _Not yet documented_ | gauge | None `wmi_memory_page_faults_total` | Overall rate at which faulted pages are handled by the processor | gauge | None
`wmi_memory_swap_page_reads_total` | Number of disk page reads (a single read operation reading several pages is still only counted once) | gauge | None `wmi_memory_swap_page_reads_total` | Number of disk page reads (a single read operation reading several pages is still only counted once) | gauge | None
`wmi_memory_swap_pages_read_total` | Number of pages read across all page reads (ie counting all pages read even if they are read in a single operation) | gauge | None `wmi_memory_swap_pages_read_total` | Number of pages read across all page reads (ie counting all pages read even if they are read in a single operation) | gauge | None
`wmi_memory_swap_pages_written_total` | Number of pages written across all page writes (ie counting all pages written even if they are written in a single operation) | gauge | None `wmi_memory_swap_pages_written_total` | Number of pages written across all page writes (ie counting all pages written even if they are written in a single operation) | gauge | None
`wmi_memory_swap_page_operations_total` | Total number of swap page read and writes (PagesPersec) | gauge | None `wmi_memory_swap_page_operations_total` | Total number of swap page read and writes (PagesPersec) | gauge | None
`wmi_memory_swap_page_writes_total` | Number of disk page writes (a single write operation writing several pages is still only counted once) | gauge | None `wmi_memory_swap_page_writes_total` | Number of disk page writes (a single write operation writing several pages is still only counted once) | gauge | None
`wmi_memory_pool_nonpaged_allocs_total` | The number of calls to allocate space in the nonpaged pool. The nonpaged pool is an area of system memory area for objects that cannot be written to disk, and must remain in physical memory as long as they are allocated | gauge | None `wmi_memory_pool_nonpaged_allocs_total` | The number of calls to allocate space in the nonpaged pool. The nonpaged pool is an area of system memory area for objects that cannot be written to disk, and must remain in physical memory as long as they are allocated | gauge | None
`wmi_memory_pool_nonpaged_bytes_total` | _Not yet documented_ | gauge | None `wmi_memory_pool_nonpaged_bytes_total` | Number of bytes in the non-paged pool | gauge | None
`wmi_memory_pool_paged_allocs_total` | _Not yet documented_ | gauge | None `wmi_memory_pool_paged_allocs_total` | Number of calls to allocate space in the paged pool, regardless of the amount of space allocated in each call | gauge | None
`wmi_memory_pool_paged_bytes` | _Not yet documented_ | gauge | None `wmi_memory_pool_paged_bytes` | Number of bytes in the paged pool | gauge | None
`wmi_memory_pool_paged_resident_bytes` | _Not yet documented_ | gauge | None `wmi_memory_pool_paged_resident_bytes` | _Not yet documented_ | gauge | None
`wmi_memory_standby_cache_core_bytes` | _Not yet documented_ | gauge | None `wmi_memory_standby_cache_core_bytes` | _Not yet documented_ | gauge | None
`wmi_memory_standby_cache_normal_priority_bytes` | _Not yet documented_ | gauge | None `wmi_memory_standby_cache_normal_priority_bytes` | _Not yet documented_ | gauge | None

View File

@@ -5,14 +5,14 @@ The mssql collector exposes metrics about the MSSQL server
||| |||
-|- -|-
Metric name prefix | `mssql` Metric name prefix | `mssql`
Classes | [`Win32_PerfRawData_MSSQLSERVER_SQLServerAccessMethods`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-access-methods-object)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerAvailabilityReplica`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-availability-replica)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerBufferManager`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-buffer-manager-object)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerDatabaseReplica`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-database-replica)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerDatabases`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-databases-object?view=sql-server-2017)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerGeneralStatistics`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-general-statistics-object)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerLocks`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-locks-object)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerMemoryManager`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-memory-manager-object)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerSQLStatistics`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-sql-statistics-object) Classes | [`Win32_PerfRawData_MSSQLSERVER_SQLServerAccessMethods`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-access-methods-object)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerAvailabilityReplica`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-availability-replica)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerBufferManager`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-buffer-manager-object)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerDatabaseReplica`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-database-replica)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerDatabases`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-databases-object?view=sql-server-2017)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerGeneralStatistics`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-general-statistics-object)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerLocks`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-locks-object)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerMemoryManager`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-memory-manager-object)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerSQLStatistics`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-sql-statistics-object)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerSQLErrors`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-sql-errors-object)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerTransactions`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-transactions-object)
Enabled by default? | No Enabled by default? | No
## Flags ## Flags
### `--collectors.mssql.classes-enabled` ### `--collectors.mssql.classes-enabled`
Comma-separated list of MSSQL WMI classes to use. Supported values are `accessmethods`, `availreplica`, `bufman`, `databases`, `dbreplica`, `genstats`, `locks`, `memmgr` and `sqlstats`. Comma-separated list of MSSQL WMI classes to use. Supported values are `accessmethods`, `availreplica`, `bufman`, `databases`, `dbreplica`, `genstats`, `locks`, `memmgr`, `sqlstats`, `sqlerrors` and `transactions`.
### `--collectors.mssql.class-print` ### `--collectors.mssql.class-print`
@@ -230,6 +230,20 @@ Name | Description | Type | Labels
`wmi_mssql_sqlstats_sql_compilations` | _Not yet documented_ | counter | `instance` `wmi_mssql_sqlstats_sql_compilations` | _Not yet documented_ | counter | `instance`
`wmi_mssql_sqlstats_sql_recompilations` | _Not yet documented_ | counter | `instance` `wmi_mssql_sqlstats_sql_recompilations` | _Not yet documented_ | counter | `instance`
`wmi_mssql_sqlstats_unsafe_auto_parameterization_attempts` | _Not yet documented_ | counter | `instance` `wmi_mssql_sqlstats_unsafe_auto_parameterization_attempts` | _Not yet documented_ | counter | `instance`
`wmi_mssql_sql_errors_total` | _Not yet documented_ | counter | `instance`, `resource`
`wmi_mssql_transactions_tempdb_free_space_bytes` | _Not yet documented_ | gauge | `instance`
`wmi_mssql_transactions_longest_transaction_running_seconds` | _Not yet documented_ | gauge | `instance`
`wmi_mssql_transactions_nonsnapshot_version_active_total` | _Not yet documented_ | counter | `instance`
`wmi_mssql_transactions_snapshot_active_total` | _Not yet documented_ | counter | `instance`
`wmi_mssql_transactions_active_total` | _Not yet documented_ | counter | `instance`
`wmi_mssql_transactions_update_conflicts_total` | _Not yet documented_ | counter | `instance`
`wmi_mssql_transactions_update_snapshot_active_total` | _Not yet documented_ | counter | `instance`
`wmi_mssql_transactions_version_cleanup_rate_bytes` | _Not yet documented_ | gauge | `instance`
`wmi_mssql_transactions_version_generation_rate_bytes` | _Not yet documented_ | gauge | `instance`
`wmi_mssql_transactions_version_store_size_bytes` | _Not yet documented_ | gauge | `instance`
`wmi_mssql_transactions_version_store_units` | _Not yet documented_ | counter | `instance`
`wmi_mssql_transactions_version_store_creation_units` | _Not yet documented_ | counter | `instance`
`wmi_mssql_transactions_version_store_truncation_units` | _Not yet documented_ | counter | `instance`
### Example metric ### Example metric
_This collector does not yet have explained examples, we would appreciate your help adding them!_ _This collector does not yet have explained examples, we would appreciate your help adding them!_

View File

@@ -22,24 +22,40 @@ If given, an interface name needs to *not* match the blacklist regexp in order f
Name | Description | Type | Labels Name | Description | Type | Labels
-----|-------------|------|------- -----|-------------|------|-------
`wmi_net_bytes_received_total` | _Not yet documented_ | counter | `nic` `wmi_net_bytes_received_total` | Total bytes received by interface | counter | `nic`
`wmi_net_bytes_sent_total` | _Not yet documented_ | counter | `nic` `wmi_net_bytes_sent_total` | Total bytes transmitted by interface | counter | `nic`
`wmi_net_bytes_total` | _Not yet documented_ | counter | `nic` `wmi_net_bytes_total` | Total bytes received and transmitted by interface | counter | `nic`
`wmi_net_packets_outbound_discarded` | _Not yet documented_ | counter | `nic` `wmi_net_packets_outbound_discarded` | Total outbound packets that were chosen to be discarded even though no errors had been detected to prevent transmission | counter | `nic`
`wmi_net_packets_outbound_errors` | _Not yet documented_ | counter | `nic` `wmi_net_packets_outbound_errors` | Total packets that could not be transmitted due to errors | counter | `nic`
`wmi_net_packets_received_discarded` | _Not yet documented_ | counter | `nic` `wmi_net_packets_received_discarded` | Total inbound packets that were chosen to be discarded even though no errors had been detected to prevent delivery | counter | `nic`
`wmi_net_packets_received_errors` | _Not yet documented_ | counter | `nic` `wmi_net_packets_received_errors` | Total packets that could not be received due to errors | counter | `nic`
`wmi_net_packets_received_total` | _Not yet documented_ | counter | `nic` `wmi_net_packets_received_total` | Total packets received by interface | counter | `nic`
`wmi_net_packets_received_unknown` | _Not yet documented_ | counter | `nic` `wmi_net_packets_received_unknown` | Total packets received by interface that were discarded because of an unknown or unsupported protocol | counter | `nic`
`wmi_net_packets_total` | _Not yet documented_ | counter | `nic` `wmi_net_packets_total` | Total packets received and transmitted by interface | counter | `nic`
`wmi_net_packets_sent_total` | _Not yet documented_ | counter | `nic` `wmi_net_packets_sent_total` | Total packets transmitted by interface | counter | `nic`
`wmi_net_current_bandwidth` | _Not yet documented_ | counter | `nic` `wmi_net_current_bandwidth` | Estimate of the interface's current bandwidth in bits per second (bps) | counter | `nic`
### Example metric ### Example metric
_This collector does not yet have explained examples, we would appreciate your help adding them!_ Query the rate of transmitted network traffic
```
rate(wmi_net_bytes_sent_total{instance="localhost"}[2m])
```
## Useful queries ## Useful queries
_This collector does not yet have any useful queries added, we would appreciate your help adding them!_ Get total utilisation of network interface as a percentage
```
rate(wmi_net_bytes_total{instance="localhost", nic="Microsoft_Hyper_V_Network_Adapter__1"}[2m]) * 8 / wmi_net_current_bandwidth{instance="locahost", nic="Microsoft_Hyper_V_Network_Adapter__1"} * 100
```
## Alerting examples ## Alerting examples
_This collector does not yet have alerting examples, we would appreciate your help adding them!_ **prometheus.rules**
```
- alert: NetInterfaceUsage
expr: rate(wmi_net_bytes_total[2m]) * 8 / wmi_net_current_bandwidth * 100 > 90
for: 10m
labels:
severity: high
annotations:
summary: "Network Interface Usage (instance {{ $labels.instance }})"
description: "Network traffic usage is greater than 95% for interface {{ $labels.nic }}\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
```

View File

@@ -66,10 +66,42 @@ A service can have any of the following statuses:
Note that there is some overlap with service state. Note that there is some overlap with service state.
### Example metric ### Example metric
_This collector does not yet have explained examples, we would appreciate your help adding them!_ Lists the services that have a 'disabled' start mode.
```
wmi_service_start_mode{exported_name=~"(mssqlserver|sqlserveragent)",start_mode="disabled"}
```
## Useful queries ## Useful queries
_This collector does not yet have any useful queries added, we would appreciate your help adding them!_ Counts the number of Microsoft SQL Server/Agent Processes
```
count(wmi_service_state{exported_name=~"(sqlserveragent|mssqlserver)",state="running"})
```
## Alerting examples ## Alerting examples
_This collector does not yet have alerting examples, we would appreciate your help adding them!_ **prometheus.rules**
```
groups:
- name: Microsoft SQL Server Alerts
rules:
# Sends an alert when the 'sqlserveragent' service is not in the running state for 3 minutes.
- alert: SQL Server Agent DOWN
expr: wmi_service_state{instance="SQL",exported_name="sqlserveragent",state="running"} == 0
for: 3m
labels:
severity: high
annotations:
summary: "Service {{ $labels.exported_name }} down"
description: "Service {{ $labels.exported_name }} on instance {{ $labels.instance }} has been down for more than 3 minutes."
# Sends an alert when the 'mssqlserver' service is not in the running state for 3 minutes.
- alert: SQL Server DOWN
expr: wmi_service_state{instance="SQL",exported_name="mssqlserver",state="running"} == 0
for: 3m
labels:
severity: high
annotations:
summary: "Service {{ $labels.exported_name }} down"
description: "Service {{ $labels.exported_name }} on instance {{ $labels.instance }} has been down for more than 3 minutes."
```
In this example, `instance` is the target label of the host. So each alert will be processed per host, which is then used in the alert description.

View File

@@ -16,18 +16,24 @@ None
Name | Description | Type | Labels Name | Description | Type | Labels
-----|-------------|------|------- -----|-------------|------|-------
`wmi_system_context_switches_total` | _Not yet documented_ | counter | None `wmi_system_context_switches_total` | Total number of [context switches](https://en.wikipedia.org/wiki/Context_switch) | counter | None
`wmi_system_exception_dispatches_total` | _Not yet documented_ | counter | None `wmi_system_exception_dispatches_total` | Total exceptions dispatched by the system | counter | None
`wmi_system_processor_queue_length` | _Not yet documented_ | gauge | None `wmi_system_processor_queue_length` | Number of threads in the processor queue. There is a single queue for processor time even on computers with multiple processors. | gauge | None
`wmi_system_system_calls_total` | _Not yet documented_ | counter | None `wmi_system_system_calls_total` | Total combined calls to Windows NT system service routines by all processes running on the computer | counter | None
`wmi_system_system_up_time` | _Not yet documented_ | gauge | None `wmi_system_system_up_time` | Time of last boot of system | gauge | None
`wmi_system_threads` | _Not yet documented_ | gauge | None `wmi_system_threads` | Number of Windows system [threads](https://en.wikipedia.org/wiki/Thread_(computing)) | gauge | None
### Example metric ### Example metric
_This collector does not yet have explained examples, we would appreciate your help adding them!_ Show current number of system threads
```
wmi_system_threads{instance="localhost"}
```
## Useful queries ## Useful queries
_This collector does not yet have any useful queries added, we would appreciate your help adding them!_ Find hosts that have rebooted in the last 24 hours
```
time() - wmi_system_system_up_time < 86400
```
## Alerting examples ## Alerting examples
_This collector does not yet have alerting examples, we would appreciate your help adding them!_ _This collector does not yet have alerting examples, we would appreciate your help adding them!_

View File

@@ -0,0 +1,32 @@
# thermalzone collector
The thermalzone collector exposes metrics about system temps. Note that temperature is given in Kelvin
|||
-|-
Metric name prefix | `thermalzone`
Classes | [`Win32_PerfRawData_Counters_ThermalZoneInformation`](https://wutils.com/wmi/root/cimv2/win32_perfrawdata_counters_thermalzoneinformation/#temperature_properties)
Enabled by default? | No
## Flags
None
## Metrics
Name | Description | Type | Labels
-----|-------------|------|-------
`wmi_thermalzone_percent_passive_limit` | % Passive Limit is the current limit this thermal zone is placing on the devices it controls. A limit of 100% indicates the devices are unconstrained. A limit of 0% indicates the devices are fully constrained. | gauge | None
`wmi_thermalzone_temperature_celsius ` | Temperature of the thermal zone, in degrees Celsius. | gauge | None
`wmi_thermalzone_throttle_reasons ` | Throttle Reasons indicate reasons why the thermal zone is limiting performance of the devices it controls. 0x0 The zone is not throttled. 0x1 The zone is throttled for thermal reasons. 0x2 The zone is throttled to limit electrical current. | gauge | None
[`Throttle reasons` source](https://docs.microsoft.com/en-us/windows-hardware/design/device-experiences/examples--requirements-and-diagnostics)
### Example metric
_This collector does not yet have explained examples, we would appreciate your help adding them!_
## Useful queries
_This collector does not yet have any useful queries added, we would appreciate your help adding them!_
## Alerting examples
_This collector does not yet have alerting examples, we would appreciate your help adding them!_

View File

@@ -5,7 +5,10 @@ package main
import ( import (
"fmt" "fmt"
"net/http" "net/http"
_ "net/http/pprof"
"os"
"sort" "sort"
"strconv"
"strings" "strings"
"sync" "sync"
"time" "time"
@@ -52,6 +55,12 @@ var (
[]string{"collector"}, []string{"collector"},
nil, nil,
) )
snapshotDuration = prometheus.NewDesc(
prometheus.BuildFQName(collector.Namespace, "exporter", "perflib_snapshot_duration_seconds"),
"Duration of perflib snapshot capture",
nil,
nil,
)
// This can be removed when client_golang exposes this on Windows // This can be removed when client_golang exposes this on Windows
// (See https://github.com/prometheus/client_golang/issues/376) // (See https://github.com/prometheus/client_golang/issues/376)
@@ -71,84 +80,113 @@ func (coll WmiCollector) Describe(ch chan<- *prometheus.Desc) {
ch <- scrapeSuccessDesc ch <- scrapeSuccessDesc
} }
type collectorOutcome int
const (
pending collectorOutcome = iota
success
failed
)
// Collect sends the collected metrics from each of the collectors to // Collect sends the collected metrics from each of the collectors to
// prometheus. // prometheus.
func (coll WmiCollector) Collect(ch chan<- prometheus.Metric) { func (coll WmiCollector) Collect(ch chan<- prometheus.Metric) {
scrapeContext, err := collector.PrepareScrapeContext()
if err != nil {
ch <- prometheus.NewInvalidMetric(scrapeSuccessDesc, fmt.Errorf("failed to prepare scrape: %v", err))
return
}
remainingCollectors := make(map[string]bool)
for name := range coll.collectors {
remainingCollectors[name] = true
}
metricsBuffer := make(chan prometheus.Metric)
allDone := make(chan struct{})
stopped := false
go func() {
for {
select {
case m := <-metricsBuffer:
if !stopped {
ch <- m
}
case <-allDone:
return
}
}
}()
wg := sync.WaitGroup{}
wg.Add(len(coll.collectors))
go func() {
wg.Wait()
close(allDone)
close(metricsBuffer)
}()
for name, c := range coll.collectors {
go func(name string, c collector.Collector) {
execute(name, c, scrapeContext, metricsBuffer)
wg.Done()
delete(remainingCollectors, name)
}(name, c)
}
ch <- prometheus.MustNewConstMetric( ch <- prometheus.MustNewConstMetric(
startTimeDesc, startTimeDesc,
prometheus.CounterValue, prometheus.CounterValue,
startTime, startTime,
) )
t := time.Now()
scrapeContext, err := collector.PrepareScrapeContext()
ch <- prometheus.MustNewConstMetric(
snapshotDuration,
prometheus.GaugeValue,
time.Since(t).Seconds(),
)
if err != nil {
ch <- prometheus.NewInvalidMetric(scrapeSuccessDesc, fmt.Errorf("failed to prepare scrape: %v", err))
return
}
wg := sync.WaitGroup{}
wg.Add(len(coll.collectors))
collectorOutcomes := make(map[string]collectorOutcome)
for name := range coll.collectors {
collectorOutcomes[name] = pending
}
metricsBuffer := make(chan prometheus.Metric)
l := sync.Mutex{}
finished := false
go func() {
for m := range metricsBuffer {
l.Lock()
if !finished {
ch <- m
}
l.Unlock()
}
}()
for name, c := range coll.collectors {
go func(name string, c collector.Collector) {
defer wg.Done()
outcome := execute(name, c, scrapeContext, metricsBuffer)
l.Lock()
if !finished {
collectorOutcomes[name] = outcome
}
l.Unlock()
}(name, c)
}
allDone := make(chan struct{})
go func() {
wg.Wait()
close(allDone)
close(metricsBuffer)
}()
// Wait until either all collectors finish, or timeout expires
select { select {
case <-allDone: case <-allDone:
stopped = true
return
case <-time.After(coll.maxScrapeDuration): case <-time.After(coll.maxScrapeDuration):
stopped = true
remainingCollectorNames := make([]string, 0, len(remainingCollectors))
for rc := range remainingCollectors {
remainingCollectorNames = append(remainingCollectorNames, rc)
}
log.Warn("Collection timed out, still waiting for ", remainingCollectorNames)
for name := range remainingCollectors {
ch <- prometheus.MustNewConstMetric(
scrapeSuccessDesc,
prometheus.GaugeValue,
0.0,
name,
)
ch <- prometheus.MustNewConstMetric(
scrapeTimeoutDesc,
prometheus.GaugeValue,
1.0,
name,
)
}
} }
l.Lock()
finished = true
remainingCollectorNames := make([]string, 0)
for name, outcome := range collectorOutcomes {
var successValue, timeoutValue float64
if outcome == pending {
timeoutValue = 1.0
remainingCollectorNames = append(remainingCollectorNames, name)
}
if outcome == success {
successValue = 1.0
}
ch <- prometheus.MustNewConstMetric(
scrapeSuccessDesc,
prometheus.GaugeValue,
successValue,
name,
)
ch <- prometheus.MustNewConstMetric(
scrapeTimeoutDesc,
prometheus.GaugeValue,
timeoutValue,
name,
)
}
if len(remainingCollectorNames) > 0 {
log.Warn("Collection timed out, still waiting for ", remainingCollectorNames)
}
l.Unlock()
} }
func filterAvailableCollectors(collectors string) string { func filterAvailableCollectors(collectors string) string {
@@ -162,37 +200,23 @@ func filterAvailableCollectors(collectors string) string {
return strings.Join(availableCollectors, ",") return strings.Join(availableCollectors, ",")
} }
func execute(name string, c collector.Collector, ctx *collector.ScrapeContext, ch chan<- prometheus.Metric) { func execute(name string, c collector.Collector, ctx *collector.ScrapeContext, ch chan<- prometheus.Metric) collectorOutcome {
begin := time.Now() t := time.Now()
err := c.Collect(ctx, ch) err := c.Collect(ctx, ch)
duration := time.Since(begin) duration := time.Since(t).Seconds()
var success float64
if err != nil {
log.Errorf("collector %s failed after %fs: %s", name, duration.Seconds(), err)
success = 0
} else {
log.Debugf("collector %s succeeded after %fs.", name, duration.Seconds())
success = 1
}
ch <- prometheus.MustNewConstMetric( ch <- prometheus.MustNewConstMetric(
scrapeDurationDesc, scrapeDurationDesc,
prometheus.GaugeValue, prometheus.GaugeValue,
duration.Seconds(), duration,
name,
)
ch <- prometheus.MustNewConstMetric(
scrapeSuccessDesc,
prometheus.GaugeValue,
success,
name,
)
ch <- prometheus.MustNewConstMetric(
scrapeTimeoutDesc,
prometheus.GaugeValue,
0.0,
name, name,
) )
if err != nil {
log.Errorf("collector %s failed after %fs: %s", name, duration, err)
return failed
}
log.Debugf("collector %s succeeded after %fs.", name, duration)
return success
} }
func expandEnabledCollectors(enabled string) []string { func expandEnabledCollectors(enabled string) []string {
@@ -229,10 +253,6 @@ func loadCollectors(list string) (map[string]collector.Collector, error) {
return collectors, nil return collectors, nil
} }
func init() {
prometheus.MustRegister(version.NewCollector("wmi_exporter"))
}
func initWbem() { func initWbem() {
// This initialization prevents a memory leak on WMF 5+. See // This initialization prevents a memory leak on WMF 5+. See
// https://github.com/martinlindhe/wmi_exporter/issues/77 and linked issues // https://github.com/martinlindhe/wmi_exporter/issues/77 and linked issues
@@ -264,10 +284,10 @@ func main() {
"collectors.print", "collectors.print",
"If true, print available collectors and exit.", "If true, print available collectors and exit.",
).Bool() ).Bool()
maxScrapeDuration = kingpin.Flag( timeoutMargin = kingpin.Flag(
"scrape.max-duration", "scrape.timeout-margin",
"Time after which collectors are aborted during a scrape", "Seconds to subtract from the timeout allowed by the client. Tune to allow for overhead or high loads.",
).Default("30s").Duration() ).Default("0.5").Float64()
) )
log.AddFlags(kingpin.CommandLine) log.AddFlags(kingpin.CommandLine)
@@ -312,13 +332,17 @@ func main() {
log.Infof("Enabled collectors: %v", strings.Join(keys(collectors), ", ")) log.Infof("Enabled collectors: %v", strings.Join(keys(collectors), ", "))
exporter := WmiCollector{ h := &metricsHandler{
collectors: collectors, timeoutMargin: *timeoutMargin,
maxScrapeDuration: *maxScrapeDuration, collectorFactory: func(timeout time.Duration) *WmiCollector {
return &WmiCollector{
collectors: collectors,
maxScrapeDuration: timeout,
}
},
} }
prometheus.MustRegister(exporter)
http.Handle(*metricsPath, promhttp.Handler()) http.Handle(*metricsPath, h)
http.HandleFunc("/health", healthCheck) http.HandleFunc("/health", healthCheck)
http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
http.Redirect(w, r, *metricsPath, http.StatusMovedPermanently) http.Redirect(w, r, *metricsPath, http.StatusMovedPermanently)
@@ -382,3 +406,36 @@ loop:
changes <- svc.Status{State: svc.StopPending} changes <- svc.Status{State: svc.StopPending}
return return
} }
type metricsHandler struct {
timeoutMargin float64
collectorFactory func(timeout time.Duration) *WmiCollector
}
func (mh *metricsHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
const defaultTimeout = 10.0
var timeoutSeconds float64
if v := r.Header.Get("X-Prometheus-Scrape-Timeout-Seconds"); v != "" {
var err error
timeoutSeconds, err = strconv.ParseFloat(v, 64)
if err != nil {
log.Warnf("Couldn't parse X-Prometheus-Scrape-Timeout-Seconds: %q. Defaulting timeout to %f", v, defaultTimeout)
}
}
if timeoutSeconds == 0 {
timeoutSeconds = defaultTimeout
}
timeoutSeconds = timeoutSeconds - mh.timeoutMargin
reg := prometheus.NewRegistry()
reg.MustRegister(mh.collectorFactory(time.Duration(timeoutSeconds * float64(time.Second))))
reg.MustRegister(
prometheus.NewProcessCollector(os.Getpid(), ""),
prometheus.NewGoCollector(),
version.NewCollector("wmi_exporter"),
)
h := promhttp.HandlerFor(reg, promhttp.HandlerOpts{})
h.ServeHTTP(w, r)
}

View File

@@ -20,7 +20,7 @@ else {
$members = $wmiObject ` $members = $wmiObject `
| Get-Member -MemberType Properties ` | Get-Member -MemberType Properties `
| Where-Object { $_.Definition -Match '^u?int' -and $_.Name -NotMatch '_' } ` | Where-Object { $_.Definition -Match '^u?int' -and $_.Name -NotMatch '_' } `
| Select-Object Name, @{Name="Type";Expression={$_.Definition.Split(" ")[0]}} | Select-Object Name, @{Name="Type";Expression={$_.Definition.Split(" ")[0]}})
$input = @{ $input = @{
"Class"=$Class; "Class"=$Class;
"CollectorName"=$CollectorName; "CollectorName"=$CollectorName;

View File

@@ -29,7 +29,7 @@ func New{{ .CollectorName }}Collector() (Collector, error) {
} }
// Collect sends the metric values for each metric // Collect sends the metric values for each metric
// to the provided prometheus Metric channel. // to the provided prometheus Metric channel.
func (c *{{ .CollectorName }}Collector) Collect(ch chan<- prometheus.Metric) error { func (c *{{ .CollectorName }}Collector) Collect(ctx *ScrapeContext, ch chan<- prometheus.Metric) error {
if desc, err := c.collect(ch); err != nil { if desc, err := c.collect(ch); err != nil {
log.Error("failed collecting {{ .CollectorName | toLower }} metrics:", desc, err) log.Error("failed collecting {{ .CollectorName | toLower }} metrics:", desc, err)
return err return err