mirror of
https://github.com/prometheus-community/windows_exporter.git
synced 2026-02-08 14:06:38 +00:00
Compare commits
66 Commits
v0.7.999-p
...
v0.8.3
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3a19fe4e7d | ||
|
|
26a468f17a | ||
|
|
a6f3b33928 | ||
|
|
8ef215cc7e | ||
|
|
2c155a12bd | ||
|
|
e1141c3ec0 | ||
|
|
b635ecc6c1 | ||
|
|
a7b5cf7aa6 | ||
|
|
719ccd4f7f | ||
|
|
7ab8c7dde4 | ||
|
|
eb002eb667 | ||
|
|
a1638cdf4c | ||
|
|
091406877a | ||
|
|
84970ac086 | ||
|
|
d86f318010 | ||
|
|
853d615673 | ||
|
|
cd9a740e2b | ||
|
|
c70e7674a5 | ||
|
|
d3e3835c29 | ||
|
|
592c8a8d69 | ||
|
|
6f6a479535 | ||
|
|
d01c66986c | ||
|
|
823ffb7597 | ||
|
|
a90f9cda0f | ||
|
|
31d4c28124 | ||
|
|
e880889f07 | ||
|
|
a283608812 | ||
|
|
8251ddd176 | ||
|
|
4f0a3a89ab | ||
|
|
27cc1072fe | ||
|
|
eb9cf56dee | ||
|
|
3c20887433 | ||
|
|
37d1c4e958 | ||
|
|
33b6e17b2d | ||
|
|
1a9d4afdd6 | ||
|
|
9e198c55a4 | ||
|
|
b309a05bde | ||
|
|
123a055242 | ||
|
|
9308108284 | ||
|
|
0ecf3cd792 | ||
|
|
801444b35b | ||
|
|
f4ab322e5b | ||
|
|
72de199528 | ||
|
|
304972580d | ||
|
|
6322bb124f | ||
|
|
cb6a91b705 | ||
|
|
4d9fb1be72 | ||
|
|
27e26037e3 | ||
|
|
e09497116f | ||
|
|
3099e10555 | ||
|
|
3900504504 | ||
|
|
2c5e30d920 | ||
|
|
b348c245e8 | ||
|
|
578bcc4959 | ||
|
|
31a30474f1 | ||
|
|
ce1005add8 | ||
|
|
6107a59306 | ||
|
|
47656b16bd | ||
|
|
8fc47669be | ||
|
|
1a67ca54b6 | ||
|
|
c73f52338d | ||
|
|
c5f23b4e64 | ||
|
|
411954cf9d | ||
|
|
56be7c63d5 | ||
|
|
6ffe504f7e | ||
|
|
daa6f3d111 |
2
.gitignore
vendored
2
.gitignore
vendored
@@ -3,3 +3,5 @@ VERSION
|
||||
*.swp
|
||||
*.un~
|
||||
output/
|
||||
.vscode
|
||||
.idea
|
||||
@@ -12,10 +12,12 @@ Name | Description | Enabled by default
|
||||
[ad](docs/collector.ad.md) | Active Directory Domain Services |
|
||||
[cpu](docs/collector.cpu.md) | CPU usage | ✓
|
||||
[cs](docs/collector.cs.md) | "Computer System" metrics (system properties, num cpus/total memory) | ✓
|
||||
[container](docs/collector.container.md) | Container metrics |
|
||||
[dns](docs/collector.dns.md) | DNS Server |
|
||||
[hyperv](docs/collector.hyperv.md) | Hyper-V hosts |
|
||||
[iis](docs/collector.iis.md) | IIS sites and applications |
|
||||
[logical_disk](docs/collector.logical_disk.md) | Logical disks, disk I/O | ✓
|
||||
[logon](docs/collector.logon.md) | User logon sessions |
|
||||
[memory](docs/collector.memory.md) | Memory usage metrics |
|
||||
[msmq](docs/collector.msmq.md) | MSMQ queues |
|
||||
[mssql](docs/collector.mssql.md) | [SQL Server Performance Objects](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/use-sql-server-objects#SQLServerPOs) metrics |
|
||||
@@ -33,6 +35,7 @@ Name | Description | Enabled by default
|
||||
[service](docs/collector.service.md) | Service state metrics | ✓
|
||||
[system](docs/collector.system.md) | System calls | ✓
|
||||
[tcp](docs/collector.tcp.md) | TCP connections |
|
||||
[thermalzone](docs/collector.thermalzone.md) | Thermal information
|
||||
[textfile](docs/collector.textfile.md) | Read prometheus metrics from a text file | ✓
|
||||
[vmware](docs/collector.vmware.md) | Performance counters installed by the Vmware Guest agent |
|
||||
|
||||
|
||||
@@ -40,12 +40,13 @@ after_build:
|
||||
return
|
||||
}
|
||||
$ErrorActionPreference = "Stop"
|
||||
$BuildVersion = Get-Content VERSION
|
||||
# The MSI version is not semver compliant, so just take the numerical parts
|
||||
$Version = $env:APPVEYOR_REPO_TAG_NAME -replace '^v?([0-9\.]+).*$','$1'
|
||||
$MSIVersion = $env:APPVEYOR_REPO_TAG_NAME -replace '^v?([0-9\.]+).*$','$1'
|
||||
foreach($Arch in "amd64","386") {
|
||||
Write-Verbose "Building wmi_exporter $Version msi for $Arch"
|
||||
.\installer\build.ps1 -PathToExecutable .\output\$Arch\wmi_exporter-$Version-$Arch.exe -Version $Version -Arch "$Arch"
|
||||
Move-Item installer\Output\wmi_exporter-$Version-$Arch.msi output\$Arch\
|
||||
Write-Verbose "Building wmi_exporter $MSIVersion msi for $Arch"
|
||||
.\installer\build.ps1 -PathToExecutable .\output\$Arch\wmi_exporter-$BuildVersion-$Arch.exe -Version $MSIVersion -Arch "$Arch"
|
||||
Move-Item installer\Output\wmi_exporter-$MSIVersion-$Arch.msi output\$Arch\
|
||||
}
|
||||
- promu checksum output\
|
||||
|
||||
|
||||
@@ -1,8 +1,12 @@
|
||||
package collector
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
|
||||
"github.com/leoluk/perflib_exporter/perflib"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/common/log"
|
||||
"golang.org/x/sys/windows/registry"
|
||||
)
|
||||
|
||||
// ...
|
||||
@@ -15,6 +19,34 @@ const (
|
||||
windowsEpoch = 116444736000000000
|
||||
)
|
||||
|
||||
// getWindowsVersion reads the version number of the OS from the Registry
|
||||
// See https://docs.microsoft.com/en-us/windows/desktop/sysinfo/operating-system-version
|
||||
func getWindowsVersion() float64 {
|
||||
k, err := registry.OpenKey(registry.LOCAL_MACHINE, `SOFTWARE\Microsoft\Windows NT\CurrentVersion`, registry.QUERY_VALUE)
|
||||
if err != nil {
|
||||
log.Warn("Couldn't open registry", err)
|
||||
return 0
|
||||
}
|
||||
defer func() {
|
||||
err = k.Close()
|
||||
if err != nil {
|
||||
log.Warnf("Failed to close registry key: %v", err)
|
||||
}
|
||||
}()
|
||||
|
||||
currentv, _, err := k.GetStringValue("CurrentVersion")
|
||||
if err != nil {
|
||||
log.Warn("Couldn't open registry to determine current Windows version:", err)
|
||||
return 0
|
||||
}
|
||||
|
||||
currentv_flt, err := strconv.ParseFloat(currentv, 64)
|
||||
|
||||
log.Debugf("Detected Windows version %f\n", currentv_flt)
|
||||
|
||||
return currentv_flt
|
||||
}
|
||||
|
||||
// Factories ...
|
||||
var Factories = make(map[string]func() (Collector, error))
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
// +build windows,cgo
|
||||
// +build windows
|
||||
|
||||
package collector
|
||||
|
||||
|
||||
@@ -3,46 +3,15 @@
|
||||
package collector
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/sys/windows/registry"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/common/log"
|
||||
)
|
||||
|
||||
func init() {
|
||||
Factories["cpu"] = newCPUCollector
|
||||
}
|
||||
|
||||
// A function to get Windows version from registry
|
||||
func getWindowsVersion() float64 {
|
||||
k, err := registry.OpenKey(registry.LOCAL_MACHINE, `SOFTWARE\Microsoft\Windows NT\CurrentVersion`, registry.QUERY_VALUE)
|
||||
if err != nil {
|
||||
log.Warn("Couldn't open registry", err)
|
||||
return 0
|
||||
}
|
||||
defer func() {
|
||||
err = k.Close()
|
||||
if err != nil {
|
||||
log.Warnf("Failed to close registry key: %v", err)
|
||||
}
|
||||
}()
|
||||
|
||||
currentv, _, err := k.GetStringValue("CurrentVersion")
|
||||
if err != nil {
|
||||
log.Warn("Couldn't open registry to determine current Windows version:", err)
|
||||
return 0
|
||||
}
|
||||
|
||||
currentv_flt, err := strconv.ParseFloat(currentv, 64)
|
||||
|
||||
log.Debugf("Detected Windows version %f\n", currentv_flt)
|
||||
|
||||
return currentv_flt
|
||||
}
|
||||
|
||||
type cpuCollectorBasic struct {
|
||||
CStateSecondsTotal *prometheus.Desc
|
||||
TimeTotal *prometheus.Desc
|
||||
@@ -67,10 +36,12 @@ func newCPUCollector() (Collector, error) {
|
||||
const subsystem = "cpu"
|
||||
|
||||
version := getWindowsVersion()
|
||||
// Windows version by number https://docs.microsoft.com/en-us/windows/desktop/sysinfo/operating-system-version
|
||||
// For Windows 2008 or earlier Windows version is 6.0 or lower, where we only have the older "Processor" counters
|
||||
// For Windows 2008 R2 or later Windows version is 6.1 or higher, so we can use "ProcessorInformation" counters
|
||||
// Value 6.05 was selected just to split between Windows versions
|
||||
// For Windows 2008 (version 6.0) or earlier we only have the "Processor"
|
||||
// class. As of Windows 2008 R2 (version 6.1) the more detailed
|
||||
// "ProcessorInformation" set is available (although some of the counters
|
||||
// are added in later versions, so we aren't guaranteed to get all of
|
||||
// them).
|
||||
// Value 6.05 was selected to split between Windows versions.
|
||||
if version < 6.05 {
|
||||
return &cpuCollectorBasic{
|
||||
CStateSecondsTotal: prometheus.NewDesc(
|
||||
|
||||
@@ -29,17 +29,20 @@ var (
|
||||
|
||||
// A LogicalDiskCollector is a Prometheus collector for WMI Win32_PerfRawData_PerfDisk_LogicalDisk metrics
|
||||
type LogicalDiskCollector struct {
|
||||
RequestsQueued *prometheus.Desc
|
||||
ReadBytesTotal *prometheus.Desc
|
||||
ReadsTotal *prometheus.Desc
|
||||
WriteBytesTotal *prometheus.Desc
|
||||
WritesTotal *prometheus.Desc
|
||||
ReadTime *prometheus.Desc
|
||||
WriteTime *prometheus.Desc
|
||||
TotalSpace *prometheus.Desc
|
||||
FreeSpace *prometheus.Desc
|
||||
IdleTime *prometheus.Desc
|
||||
SplitIOs *prometheus.Desc
|
||||
RequestsQueued *prometheus.Desc
|
||||
ReadBytesTotal *prometheus.Desc
|
||||
ReadsTotal *prometheus.Desc
|
||||
WriteBytesTotal *prometheus.Desc
|
||||
WritesTotal *prometheus.Desc
|
||||
ReadTime *prometheus.Desc
|
||||
WriteTime *prometheus.Desc
|
||||
TotalSpace *prometheus.Desc
|
||||
FreeSpace *prometheus.Desc
|
||||
IdleTime *prometheus.Desc
|
||||
SplitIOs *prometheus.Desc
|
||||
ReadLatency *prometheus.Desc
|
||||
WriteLatency *prometheus.Desc
|
||||
ReadWriteLatency *prometheus.Desc
|
||||
|
||||
volumeWhitelistPattern *regexp.Regexp
|
||||
volumeBlacklistPattern *regexp.Regexp
|
||||
@@ -127,6 +130,27 @@ func NewLogicalDiskCollector() (Collector, error) {
|
||||
nil,
|
||||
),
|
||||
|
||||
ReadLatency: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(Namespace, subsystem, "read_latency_seconds_total"),
|
||||
"Shows the average time, in seconds, of a read operation from the disk (LogicalDisk.AvgDiskSecPerRead)",
|
||||
[]string{"volume"},
|
||||
nil,
|
||||
),
|
||||
|
||||
WriteLatency: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(Namespace, subsystem, "write_latency_seconds_total"),
|
||||
"Shows the average time, in seconds, of a write operation to the disk (LogicalDisk.AvgDiskSecPerWrite)",
|
||||
[]string{"volume"},
|
||||
nil,
|
||||
),
|
||||
|
||||
ReadWriteLatency: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(Namespace, subsystem, "read_write_latency_seconds_total"),
|
||||
"Shows the time, in seconds, of the average disk transfer (LogicalDisk.AvgDiskSecPerTransfer)",
|
||||
[]string{"volume"},
|
||||
nil,
|
||||
),
|
||||
|
||||
volumeWhitelistPattern: regexp.MustCompile(fmt.Sprintf("^(?:%s)$", *volumeWhitelist)),
|
||||
volumeBlacklistPattern: regexp.MustCompile(fmt.Sprintf("^(?:%s)$", *volumeBlacklist)),
|
||||
}, nil
|
||||
@@ -158,6 +182,9 @@ type Win32_PerfRawData_PerfDisk_LogicalDisk struct {
|
||||
PercentFreeSpace_Base uint32
|
||||
PercentIdleTime uint64
|
||||
SplitIOPerSec uint32
|
||||
AvgDiskSecPerRead uint64
|
||||
AvgDiskSecPerWrite uint64
|
||||
AvgDiskSecPerTransfer uint64
|
||||
}
|
||||
|
||||
func (c *LogicalDiskCollector) collect(ch chan<- prometheus.Metric) (*prometheus.Desc, error) {
|
||||
@@ -250,6 +277,27 @@ func (c *LogicalDiskCollector) collect(ch chan<- prometheus.Metric) (*prometheus
|
||||
float64(volume.SplitIOPerSec),
|
||||
volume.Name,
|
||||
)
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
c.ReadLatency,
|
||||
prometheus.CounterValue,
|
||||
float64(volume.AvgDiskSecPerRead),
|
||||
volume.Name,
|
||||
)
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
c.WriteLatency,
|
||||
prometheus.CounterValue,
|
||||
float64(volume.AvgDiskSecPerWrite),
|
||||
volume.Name,
|
||||
)
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
c.ReadWriteLatency,
|
||||
prometheus.CounterValue,
|
||||
float64(volume.AvgDiskSecPerTransfer),
|
||||
volume.Name,
|
||||
)
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
|
||||
199
collector/logon.go
Normal file
199
collector/logon.go
Normal file
@@ -0,0 +1,199 @@
|
||||
// +build windows
|
||||
|
||||
package collector
|
||||
|
||||
import (
|
||||
"errors"
|
||||
|
||||
"github.com/StackExchange/wmi"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/common/log"
|
||||
)
|
||||
|
||||
func init() {
|
||||
Factories["logon"] = NewLogonCollector
|
||||
}
|
||||
|
||||
// A LogonCollector is a Prometheus collector for WMI metrics
|
||||
type LogonCollector struct {
|
||||
LogonType *prometheus.Desc
|
||||
}
|
||||
|
||||
// NewLogonCollector ...
|
||||
func NewLogonCollector() (Collector, error) {
|
||||
const subsystem = "logon"
|
||||
|
||||
return &LogonCollector{
|
||||
LogonType: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(Namespace, subsystem, "logon_type"),
|
||||
"Number of active logon sessions (LogonSession.LogonType)",
|
||||
[]string{"status"},
|
||||
nil,
|
||||
),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Collect sends the metric values for each metric
|
||||
// to the provided prometheus Metric channel.
|
||||
func (c *LogonCollector) Collect(ctx *ScrapeContext, ch chan<- prometheus.Metric) error {
|
||||
if desc, err := c.collect(ch); err != nil {
|
||||
log.Error("failed collecting user metrics:", desc, err)
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Win32_LogonSession docs:
|
||||
// - https://docs.microsoft.com/en-us/windows/win32/cimwin32prov/win32-logonsession
|
||||
type Win32_LogonSession struct {
|
||||
LogonType uint32
|
||||
}
|
||||
|
||||
func (c *LogonCollector) collect(ch chan<- prometheus.Metric) (*prometheus.Desc, error) {
|
||||
var dst []Win32_LogonSession
|
||||
q := queryAll(&dst)
|
||||
if err := wmi.Query(q, &dst); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if len(dst) == 0 {
|
||||
return nil, errors.New("WMI query returned empty result set")
|
||||
}
|
||||
|
||||
// Init counters
|
||||
system := 0
|
||||
interactive := 0
|
||||
network := 0
|
||||
batch := 0
|
||||
service := 0
|
||||
proxy := 0
|
||||
unlock := 0
|
||||
networkcleartext := 0
|
||||
newcredentials := 0
|
||||
remoteinteractive := 0
|
||||
cachedinteractive := 0
|
||||
cachedremoteinteractive := 0
|
||||
cachedunlock := 0
|
||||
|
||||
for _, entry := range dst {
|
||||
switch entry.LogonType {
|
||||
case 0:
|
||||
system++
|
||||
case 2:
|
||||
interactive++
|
||||
case 3:
|
||||
network++
|
||||
case 4:
|
||||
batch++
|
||||
case 5:
|
||||
service++
|
||||
case 6:
|
||||
proxy++
|
||||
case 7:
|
||||
unlock++
|
||||
case 8:
|
||||
networkcleartext++
|
||||
case 9:
|
||||
newcredentials++
|
||||
case 10:
|
||||
remoteinteractive++
|
||||
case 11:
|
||||
cachedinteractive++
|
||||
case 12:
|
||||
cachedremoteinteractive++
|
||||
case 13:
|
||||
cachedunlock++
|
||||
}
|
||||
}
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
c.LogonType,
|
||||
prometheus.GaugeValue,
|
||||
float64(system),
|
||||
"system",
|
||||
)
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
c.LogonType,
|
||||
prometheus.GaugeValue,
|
||||
float64(interactive),
|
||||
"interactive",
|
||||
)
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
c.LogonType,
|
||||
prometheus.GaugeValue,
|
||||
float64(network),
|
||||
"network",
|
||||
)
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
c.LogonType,
|
||||
prometheus.GaugeValue,
|
||||
float64(batch),
|
||||
"batch",
|
||||
)
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
c.LogonType,
|
||||
prometheus.GaugeValue,
|
||||
float64(service),
|
||||
"service",
|
||||
)
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
c.LogonType,
|
||||
prometheus.GaugeValue,
|
||||
float64(proxy),
|
||||
"proxy",
|
||||
)
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
c.LogonType,
|
||||
prometheus.GaugeValue,
|
||||
float64(unlock),
|
||||
"unlock",
|
||||
)
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
c.LogonType,
|
||||
prometheus.GaugeValue,
|
||||
float64(networkcleartext),
|
||||
"network_clear_text",
|
||||
)
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
c.LogonType,
|
||||
prometheus.GaugeValue,
|
||||
float64(newcredentials),
|
||||
"new_credentials",
|
||||
)
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
c.LogonType,
|
||||
prometheus.GaugeValue,
|
||||
float64(remoteinteractive),
|
||||
"remote_interactive",
|
||||
)
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
c.LogonType,
|
||||
prometheus.GaugeValue,
|
||||
float64(cachedinteractive),
|
||||
"cached_interactive",
|
||||
)
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
c.LogonType,
|
||||
prometheus.GaugeValue,
|
||||
float64(remoteinteractive),
|
||||
"cached_remote_interactive",
|
||||
)
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
c.LogonType,
|
||||
prometheus.GaugeValue,
|
||||
float64(cachedunlock),
|
||||
"cached_unlock",
|
||||
)
|
||||
return nil, nil
|
||||
}
|
||||
@@ -91,7 +91,7 @@ func mssqlBuildWMIInstanceClass(suffix string, instance string) string {
|
||||
type mssqlCollectorsMap map[string]mssqlCollectorFunc
|
||||
|
||||
func mssqlAvailableClassCollectors() string {
|
||||
return "accessmethods,availreplica,bufman,databases,dbreplica,genstats,locks,memmgr,sqlstats"
|
||||
return "accessmethods,availreplica,bufman,databases,dbreplica,genstats,locks,memmgr,sqlstats,sqlerrors,transactions"
|
||||
}
|
||||
|
||||
func (c *MSSQLCollector) getMSSQLCollectors() mssqlCollectorsMap {
|
||||
@@ -105,6 +105,8 @@ func (c *MSSQLCollector) getMSSQLCollectors() mssqlCollectorsMap {
|
||||
mssqlCollectors["locks"] = c.collectLocks
|
||||
mssqlCollectors["memmgr"] = c.collectMemoryManager
|
||||
mssqlCollectors["sqlstats"] = c.collectSQLStats
|
||||
mssqlCollectors["sqlerrors"] = c.collectSQLErrors
|
||||
mssqlCollectors["transactions"] = c.collectTransactions
|
||||
|
||||
return mssqlCollectors
|
||||
}
|
||||
@@ -358,6 +360,24 @@ type MSSQLCollector struct {
|
||||
SQLStatsSQLReCompilations *prometheus.Desc
|
||||
SQLStatsUnsafeAutoParams *prometheus.Desc
|
||||
|
||||
// Win32_PerfRawData_{instance}_SQLServerSQLErrors
|
||||
SQLErrorsTotal *prometheus.Desc
|
||||
|
||||
// Win32_PerfRawData_{instance}_SQLServerTransactions
|
||||
TransactionsTempDbFreeSpaceBytes *prometheus.Desc
|
||||
TransactionsLongestTransactionRunningSeconds *prometheus.Desc
|
||||
TransactionsNonSnapshotVersionActiveTotal *prometheus.Desc
|
||||
TransactionsSnapshotActiveTotal *prometheus.Desc
|
||||
TransactionsActiveTotal *prometheus.Desc
|
||||
TransactionsUpdateConflictsTotal *prometheus.Desc
|
||||
TransactionsUpdateSnapshotActiveTotal *prometheus.Desc
|
||||
TransactionsVersionCleanupRateBytes *prometheus.Desc
|
||||
TransactionsVersionGenerationRateBytes *prometheus.Desc
|
||||
TransactionsVersionStoreSizeBytes *prometheus.Desc
|
||||
TransactionsVersionStoreUnits *prometheus.Desc
|
||||
TransactionsVersionStoreCreationUnits *prometheus.Desc
|
||||
TransactionsVersionStoreTruncationUnits *prometheus.Desc
|
||||
|
||||
mssqlInstances mssqlInstancesType
|
||||
mssqlCollectors mssqlCollectorsMap
|
||||
mssqlChildCollectorFailure int
|
||||
@@ -1637,6 +1657,94 @@ func NewMSSQLCollector() (Collector, error) {
|
||||
nil,
|
||||
),
|
||||
|
||||
// Win32_PerfRawData_{instance}_SQLServerSQLErrors
|
||||
SQLErrorsTotal: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(Namespace, subsystem, "sql_errors_total"),
|
||||
"(SQLErrors.Total)",
|
||||
[]string{"instance", "resource"},
|
||||
nil,
|
||||
),
|
||||
|
||||
// Win32_PerfRawData_{instance}_SQLServerTransactions
|
||||
TransactionsTempDbFreeSpaceBytes: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(Namespace, subsystem, "transactions_tempdb_free_space_bytes"),
|
||||
"(Transactions.FreeSpaceInTempDbKB)",
|
||||
[]string{"instance"},
|
||||
nil,
|
||||
),
|
||||
TransactionsLongestTransactionRunningSeconds: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(Namespace, subsystem, "transactions_longest_transaction_running_seconds"),
|
||||
"(Transactions.LongestTransactionRunningTime)",
|
||||
[]string{"instance"},
|
||||
nil,
|
||||
),
|
||||
TransactionsNonSnapshotVersionActiveTotal: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(Namespace, subsystem, "transactions_nonsnapshot_version_active_total"),
|
||||
"(Transactions.NonSnapshotVersionTransactions)",
|
||||
[]string{"instance"},
|
||||
nil,
|
||||
),
|
||||
TransactionsSnapshotActiveTotal: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(Namespace, subsystem, "transactions_snapshot_active_total"),
|
||||
"(Transactions.SnapshotTransactions)",
|
||||
[]string{"instance"},
|
||||
nil,
|
||||
),
|
||||
TransactionsActiveTotal: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(Namespace, subsystem, "transactions_active_total"),
|
||||
"(Transactions.Transactions)",
|
||||
[]string{"instance"},
|
||||
nil,
|
||||
),
|
||||
TransactionsUpdateConflictsTotal: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(Namespace, subsystem, "transactions_update_conflicts_total"),
|
||||
"(Transactions.UpdateConflictRatio)",
|
||||
[]string{"instance"},
|
||||
nil,
|
||||
),
|
||||
TransactionsUpdateSnapshotActiveTotal: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(Namespace, subsystem, "transactions_update_snapshot_active_total"),
|
||||
"(Transactions.UpdateSnapshotTransactions)",
|
||||
[]string{"instance"},
|
||||
nil,
|
||||
),
|
||||
TransactionsVersionCleanupRateBytes: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(Namespace, subsystem, "transactions_version_cleanup_rate_bytes"),
|
||||
"(Transactions.VersionCleanupRateKBs)",
|
||||
[]string{"instance"},
|
||||
nil,
|
||||
),
|
||||
TransactionsVersionGenerationRateBytes: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(Namespace, subsystem, "transactions_version_generation_rate_bytes"),
|
||||
"(Transactions.VersionGenerationRateKBs)",
|
||||
[]string{"instance"},
|
||||
nil,
|
||||
),
|
||||
TransactionsVersionStoreSizeBytes: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(Namespace, subsystem, "transactions_version_store_size_bytes"),
|
||||
"(Transactions.VersionStoreSizeKB)",
|
||||
[]string{"instance"},
|
||||
nil,
|
||||
),
|
||||
TransactionsVersionStoreUnits: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(Namespace, subsystem, "transactions_version_store_units"),
|
||||
"(Transactions.VersionStoreUnitCount)",
|
||||
[]string{"instance"},
|
||||
nil,
|
||||
),
|
||||
TransactionsVersionStoreCreationUnits: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(Namespace, subsystem, "transactions_version_store_creation_units"),
|
||||
"(Transactions.VersionStoreUnitCreation)",
|
||||
[]string{"instance"},
|
||||
nil,
|
||||
),
|
||||
TransactionsVersionStoreTruncationUnits: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(Namespace, subsystem, "transactions_version_store_truncation_units"),
|
||||
"(Transactions.VersionStoreUnitTruncation)",
|
||||
[]string{"instance"},
|
||||
nil,
|
||||
),
|
||||
|
||||
mssqlInstances: getMSSQLInstances(),
|
||||
}
|
||||
|
||||
@@ -2575,7 +2683,7 @@ func (c *MSSQLCollector) collectDatabaseReplica(ch chan<- prometheus.Metric, sql
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
c.DBReplicaTransactionDelay,
|
||||
prometheus.GaugeValue,
|
||||
float64(v.TransactionDelay)*1000.0,
|
||||
float64(v.TransactionDelay)/1000.0,
|
||||
sqlInstance, replicaName,
|
||||
)
|
||||
}
|
||||
@@ -3558,3 +3666,162 @@ func (c *MSSQLCollector) collectSQLStats(ch chan<- prometheus.Metric, sqlInstanc
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
type win32PerfRawDataSQLServerSQLErrors struct {
|
||||
Name string
|
||||
ErrorsPersec uint64
|
||||
}
|
||||
|
||||
// Win32_PerfRawData_MSSQLSERVER_SQLServerErrors docs:
|
||||
// - https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-sql-errors-object
|
||||
func (c *MSSQLCollector) collectSQLErrors(ch chan<- prometheus.Metric, sqlInstance string) (*prometheus.Desc, error) {
|
||||
var dst []win32PerfRawDataSQLServerSQLErrors
|
||||
log.Debugf("mssql_sqlerrors collector iterating sql instance %s.", sqlInstance)
|
||||
|
||||
class := mssqlBuildWMIInstanceClass("SQLErrors", sqlInstance)
|
||||
q := queryAllForClassWhere(&dst, class, `Name <> '_Total'`)
|
||||
if err := wmi.Query(q, &dst); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, v := range dst {
|
||||
resource := v.Name
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
c.SQLErrorsTotal,
|
||||
prometheus.CounterValue,
|
||||
float64(v.ErrorsPersec),
|
||||
sqlInstance, resource,
|
||||
)
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
type win32PerfRawDataSqlServerTransactions struct {
|
||||
FreeSpaceintempdbKB uint64
|
||||
LongestTransactionRunningTime uint64
|
||||
NonSnapshotVersionTransactions uint64
|
||||
SnapshotTransactions uint64
|
||||
Transactions uint64
|
||||
Updateconflictratio uint64
|
||||
UpdateSnapshotTransactions uint64
|
||||
VersionCleanuprateKBPers uint64
|
||||
VersionGenerationrateKBPers uint64
|
||||
VersionStoreSizeKB uint64
|
||||
VersionStoreunitcount uint64
|
||||
VersionStoreunitcreation uint64
|
||||
VersionStoreunittruncation uint64
|
||||
}
|
||||
|
||||
// Win32_PerfRawData_MSSQLSERVER_Transactions docs:
|
||||
// - https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-transactions-object
|
||||
func (c *MSSQLCollector) collectTransactions(ch chan<- prometheus.Metric, sqlInstance string) (*prometheus.Desc, error) {
|
||||
var dst []win32PerfRawDataSqlServerTransactions
|
||||
log.Debugf("mssql_transactions collector iterating sql instance %s.", sqlInstance)
|
||||
|
||||
class := mssqlBuildWMIInstanceClass("Transactions", sqlInstance)
|
||||
q := queryAllForClass(&dst, class)
|
||||
if err := wmi.Query(q, &dst); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if len(dst) == 0 {
|
||||
return nil, errors.New("WMI query returned empty result set")
|
||||
}
|
||||
|
||||
v := dst[0]
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
c.TransactionsTempDbFreeSpaceBytes,
|
||||
prometheus.GaugeValue,
|
||||
float64(v.FreeSpaceintempdbKB*1024),
|
||||
sqlInstance,
|
||||
)
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
c.TransactionsLongestTransactionRunningSeconds,
|
||||
prometheus.GaugeValue,
|
||||
float64(v.LongestTransactionRunningTime),
|
||||
sqlInstance,
|
||||
)
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
c.TransactionsNonSnapshotVersionActiveTotal,
|
||||
prometheus.CounterValue,
|
||||
float64(v.NonSnapshotVersionTransactions),
|
||||
sqlInstance,
|
||||
)
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
c.TransactionsSnapshotActiveTotal,
|
||||
prometheus.CounterValue,
|
||||
float64(v.SnapshotTransactions),
|
||||
sqlInstance,
|
||||
)
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
c.TransactionsActiveTotal,
|
||||
prometheus.CounterValue,
|
||||
float64(v.Transactions),
|
||||
sqlInstance,
|
||||
)
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
c.TransactionsUpdateConflictsTotal,
|
||||
prometheus.CounterValue,
|
||||
float64(v.Updateconflictratio),
|
||||
sqlInstance,
|
||||
)
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
c.TransactionsUpdateSnapshotActiveTotal,
|
||||
prometheus.CounterValue,
|
||||
float64(v.UpdateSnapshotTransactions),
|
||||
sqlInstance,
|
||||
)
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
c.TransactionsVersionCleanupRateBytes,
|
||||
prometheus.GaugeValue,
|
||||
float64(v.VersionCleanuprateKBPers*1024),
|
||||
sqlInstance,
|
||||
)
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
c.TransactionsVersionGenerationRateBytes,
|
||||
prometheus.GaugeValue,
|
||||
float64(v.VersionGenerationrateKBPers*1024),
|
||||
sqlInstance,
|
||||
)
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
c.TransactionsVersionStoreSizeBytes,
|
||||
prometheus.GaugeValue,
|
||||
float64(v.VersionStoreSizeKB*1024),
|
||||
sqlInstance,
|
||||
)
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
c.TransactionsVersionStoreUnits,
|
||||
prometheus.CounterValue,
|
||||
float64(v.VersionStoreunitcount),
|
||||
sqlInstance,
|
||||
)
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
c.TransactionsVersionStoreCreationUnits,
|
||||
prometheus.CounterValue,
|
||||
float64(v.VersionStoreunitcreation),
|
||||
sqlInstance,
|
||||
)
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
c.TransactionsVersionStoreTruncationUnits,
|
||||
prometheus.CounterValue,
|
||||
float64(v.VersionStoreunittruncation),
|
||||
sqlInstance,
|
||||
)
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
@@ -252,7 +252,7 @@ func (c *NetworkCollector) collect(ch chan<- prometheus.Metric) (*prometheus.Des
|
||||
)
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
c.CurrentBandwidth,
|
||||
prometheus.CounterValue,
|
||||
prometheus.GaugeValue,
|
||||
float64(nic.CurrentBandwidth),
|
||||
name,
|
||||
)
|
||||
|
||||
@@ -63,11 +63,14 @@ func unmarshalObject(obj *perflib.PerfObject, vs interface{}) error {
|
||||
|
||||
ctr, found := counters[tag]
|
||||
if !found {
|
||||
log.Debugf("missing counter %q, has %v", tag, counters)
|
||||
return fmt.Errorf("could not find counter %q on instance", tag)
|
||||
log.Debugf("missing counter %q, have %v", tag, counterMapKeys(counters))
|
||||
continue
|
||||
}
|
||||
if !target.Field(i).CanSet() {
|
||||
return fmt.Errorf("tagged field %v cannot be written to", f)
|
||||
return fmt.Errorf("tagged field %v cannot be written to", f.Name)
|
||||
}
|
||||
if fieldType := target.Field(i).Type(); fieldType != reflect.TypeOf((*float64)(nil)).Elem() {
|
||||
return fmt.Errorf("tagged field %v has wrong type %v, must be float64", f.Name, fieldType)
|
||||
}
|
||||
|
||||
switch ctr.Def.CounterType {
|
||||
@@ -87,3 +90,11 @@ func unmarshalObject(obj *perflib.PerfObject, vs interface{}) error {
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func counterMapKeys(m map[string]*perflib.PerfCounter) []string {
|
||||
keys := make([]string, 0, len(m))
|
||||
for k := range m {
|
||||
keys = append(keys, k)
|
||||
}
|
||||
return keys
|
||||
}
|
||||
|
||||
125
collector/perflib_test.go
Normal file
125
collector/perflib_test.go
Normal file
@@ -0,0 +1,125 @@
|
||||
package collector
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
perflibCollector "github.com/leoluk/perflib_exporter/collector"
|
||||
"github.com/leoluk/perflib_exporter/perflib"
|
||||
)
|
||||
|
||||
type simple struct {
|
||||
ValA float64 `perflib:"Something"`
|
||||
ValB float64 `perflib:"Something Else"`
|
||||
}
|
||||
|
||||
func TestUnmarshalPerflib(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
obj *perflib.PerfObject
|
||||
|
||||
expectedOutput []simple
|
||||
expectError bool
|
||||
}{
|
||||
{
|
||||
name: "nil check",
|
||||
obj: nil,
|
||||
expectedOutput: []simple{},
|
||||
expectError: true,
|
||||
},
|
||||
{
|
||||
name: "Simple",
|
||||
obj: &perflib.PerfObject{
|
||||
Instances: []*perflib.PerfInstance{
|
||||
{
|
||||
Counters: []*perflib.PerfCounter{
|
||||
{
|
||||
Def: &perflib.PerfCounterDef{
|
||||
Name: "Something",
|
||||
CounterType: perflibCollector.PERF_COUNTER_COUNTER,
|
||||
},
|
||||
Value: 123,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedOutput: []simple{{ValA: 123}},
|
||||
expectError: false,
|
||||
},
|
||||
{
|
||||
name: "Multiple properties",
|
||||
obj: &perflib.PerfObject{
|
||||
Instances: []*perflib.PerfInstance{
|
||||
{
|
||||
Counters: []*perflib.PerfCounter{
|
||||
{
|
||||
Def: &perflib.PerfCounterDef{
|
||||
Name: "Something",
|
||||
CounterType: perflibCollector.PERF_COUNTER_COUNTER,
|
||||
},
|
||||
Value: 123,
|
||||
},
|
||||
{
|
||||
Def: &perflib.PerfCounterDef{
|
||||
Name: "Something Else",
|
||||
CounterType: perflibCollector.PERF_COUNTER_COUNTER,
|
||||
},
|
||||
Value: 256,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedOutput: []simple{{ValA: 123, ValB: 256}},
|
||||
expectError: false,
|
||||
},
|
||||
{
|
||||
name: "Multiple instances",
|
||||
obj: &perflib.PerfObject{
|
||||
Instances: []*perflib.PerfInstance{
|
||||
{
|
||||
Counters: []*perflib.PerfCounter{
|
||||
{
|
||||
Def: &perflib.PerfCounterDef{
|
||||
Name: "Something",
|
||||
CounterType: perflibCollector.PERF_COUNTER_COUNTER,
|
||||
},
|
||||
Value: 321,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Counters: []*perflib.PerfCounter{
|
||||
{
|
||||
Def: &perflib.PerfCounterDef{
|
||||
Name: "Something",
|
||||
CounterType: perflibCollector.PERF_COUNTER_COUNTER,
|
||||
},
|
||||
Value: 231,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedOutput: []simple{{ValA: 321}, {ValA: 231}},
|
||||
expectError: false,
|
||||
},
|
||||
}
|
||||
for _, c := range cases {
|
||||
t.Run(c.name, func(t *testing.T) {
|
||||
output := make([]simple, 0)
|
||||
err := unmarshalObject(c.obj, &output)
|
||||
if err != nil && !c.expectError {
|
||||
t.Errorf("Did not expect error, got %q", err)
|
||||
}
|
||||
if err == nil && c.expectError {
|
||||
t.Errorf("Expected an error, but got ok")
|
||||
}
|
||||
|
||||
if err == nil && !reflect.DeepEqual(output, c.expectedOutput) {
|
||||
t.Errorf("Output mismatch, expected %+v, got %+v", c.expectedOutput, output)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
103
collector/thermalzone.go
Normal file
103
collector/thermalzone.go
Normal file
@@ -0,0 +1,103 @@
|
||||
package collector
|
||||
|
||||
import (
|
||||
"github.com/StackExchange/wmi"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/common/log"
|
||||
)
|
||||
|
||||
func init() {
|
||||
Factories["thermalzone"] = NewThermalZoneCollector
|
||||
}
|
||||
|
||||
// A thermalZoneCollector is a Prometheus collector for WMI Win32_PerfRawData_Counters_ThermalZoneInformation metrics
|
||||
type thermalZoneCollector struct {
|
||||
PercentPassiveLimit *prometheus.Desc
|
||||
Temperature *prometheus.Desc
|
||||
ThrottleReasons *prometheus.Desc
|
||||
}
|
||||
|
||||
// NewThermalZoneCollector ...
|
||||
func NewThermalZoneCollector() (Collector, error) {
|
||||
const subsystem = "thermalzone"
|
||||
return &thermalZoneCollector{
|
||||
Temperature: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(Namespace, subsystem, "temperature_celsius"),
|
||||
"(Temperature)",
|
||||
[]string{
|
||||
"name",
|
||||
},
|
||||
nil,
|
||||
),
|
||||
PercentPassiveLimit: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(Namespace, subsystem, "percent_passive_limit"),
|
||||
"(PercentPassiveLimit)",
|
||||
[]string{
|
||||
"name",
|
||||
},
|
||||
nil,
|
||||
),
|
||||
ThrottleReasons: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(Namespace, subsystem, "throttle_reasons"),
|
||||
"(ThrottleReasons)",
|
||||
[]string{
|
||||
"name",
|
||||
},
|
||||
nil,
|
||||
),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Collect sends the metric values for each metric
|
||||
// to the provided prometheus Metric channel.
|
||||
func (c *thermalZoneCollector) Collect(ctx *ScrapeContext, ch chan<- prometheus.Metric) error {
|
||||
if desc, err := c.collect(ch); err != nil {
|
||||
log.Error("failed collecting thermalzone metrics:", desc, err)
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Win32_PerfRawData_Counters_ThermalZoneInformation docs:
|
||||
// https://wutils.com/wmi/root/cimv2/win32_perfrawdata_counters_thermalzoneinformation/
|
||||
type Win32_PerfRawData_Counters_ThermalZoneInformation struct {
|
||||
Name string
|
||||
|
||||
HighPrecisionTemperature uint32
|
||||
PercentPassiveLimit uint32
|
||||
ThrottleReasons uint32
|
||||
}
|
||||
|
||||
func (c *thermalZoneCollector) collect(ch chan<- prometheus.Metric) (*prometheus.Desc, error) {
|
||||
var dst []Win32_PerfRawData_Counters_ThermalZoneInformation
|
||||
q := queryAll(&dst)
|
||||
if err := wmi.Query(q, &dst); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, info := range dst {
|
||||
//Divide by 10 and subtract 273.15 to convert decikelvin to celsius
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
c.Temperature,
|
||||
prometheus.GaugeValue,
|
||||
(float64(info.HighPrecisionTemperature)/10.0)-273.15,
|
||||
info.Name,
|
||||
)
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
c.PercentPassiveLimit,
|
||||
prometheus.GaugeValue,
|
||||
float64(info.PercentPassiveLimit),
|
||||
info.Name,
|
||||
)
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
c.ThrottleReasons,
|
||||
prometheus.GaugeValue,
|
||||
float64(info.ThrottleReasons),
|
||||
info.Name,
|
||||
)
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
@@ -9,6 +9,7 @@ This directory contains documentation of the collectors in the WMI exporter, wit
|
||||
- [`hyperv`](collector.hyperv.md)
|
||||
- [`iis`](collector.iis.md)
|
||||
- [`logical_disk`](collector.logical_disk.md)
|
||||
- [`logon`](collector.logon.md)
|
||||
- [`memory`](collector.memory.md)
|
||||
- [`msmq`](collector.msmq.md)
|
||||
- [`mssql`](collector.mssql.md)
|
||||
@@ -27,4 +28,4 @@ This directory contains documentation of the collectors in the WMI exporter, wit
|
||||
- [`system`](collector.system.md)
|
||||
- [`tcp`](collector.tcp.md)
|
||||
- [`textfile`](collector.textfile.md)
|
||||
- [`vmware`](collector.vmware.md)
|
||||
- [`vmware`](collector.vmware.md)
|
||||
|
||||
@@ -1,43 +1,60 @@
|
||||
# cpu collector
|
||||
|
||||
The cpu collector exposes metrics about CPU usage
|
||||
|
||||
|||
|
||||
-|-
|
||||
Metric name prefix | `cpu`
|
||||
Data source | Perflib
|
||||
Counters | `ProcessorInformation` (Windows Server 2008R2 and later) `Processor` (older versions)
|
||||
Enabled by default? | Yes
|
||||
|
||||
## Flags
|
||||
|
||||
None
|
||||
|
||||
## Metrics
|
||||
These metrics are available on all versions of Windows:
|
||||
|
||||
Name | Description | Type | Labels
|
||||
-----|-------------|------|-------
|
||||
`wmi_cpu_cstate_seconds_total` | Time spent in low-power idle states | counter | `core`, `state`
|
||||
`wmi_cpu_time_total` | Time that processor spent in different modes (idle, user, system, ...) | counter | `core`, `mode`
|
||||
`wmi_cpu_interrupts_total` | Total number of received and serviced hardware interrupts | counter | `core`
|
||||
`wmi_cpu_dpcs_total` | Total number of received and serviced deferred procedure calls (DPCs) | counter | `core`
|
||||
|
||||
These metrics are only exposed on Windows Server 2008R2 and later:
|
||||
|
||||
Name | Description | Type | Labels
|
||||
-----|-------------|------|-------
|
||||
`wmi_cpu_clock_interrupts_total` | Total number of received and serviced clock tick interrupts | `core`
|
||||
`wmi_cpu_idle_break_events_total` | Total number of time processor was woken from idle | `core`
|
||||
`wmi_cpu_parking_status` | Parking Status represents whether a processor is parked or not | `gauge`
|
||||
`wmi_cpu_core_frequency_mhz` | Core frequency in megahertz | `gauge`
|
||||
`wmi_cpu_processor_performance` | Processor Performance is the average performance of the processor while it is executing instructions, as a percentage of the nominal performance of the processor. On some processors, Processor Performance may exceed 100% | `gauge`
|
||||
|
||||
### Example metric
|
||||
_This collector does not yet have explained examples, we would appreciate your help adding them!_
|
||||
|
||||
## Useful queries
|
||||
_This collector does not yet have any useful queries added, we would appreciate your help adding them!_
|
||||
|
||||
## Alerting examples
|
||||
_This collector does not yet have alerting examples, we would appreciate your help adding them!_
|
||||
# cpu collector
|
||||
|
||||
The cpu collector exposes metrics about CPU usage
|
||||
|
||||
|||
|
||||
-|-
|
||||
Metric name prefix | `cpu`
|
||||
Data source | Perflib
|
||||
Counters | `ProcessorInformation` (Windows Server 2008R2 and later) `Processor` (older versions)
|
||||
Enabled by default? | Yes
|
||||
|
||||
## Flags
|
||||
|
||||
None
|
||||
|
||||
## Metrics
|
||||
These metrics are available on all versions of Windows:
|
||||
|
||||
Name | Description | Type | Labels
|
||||
-----|-------------|------|-------
|
||||
`wmi_cpu_cstate_seconds_total` | Time spent in low-power idle states | counter | `core`, `state`
|
||||
`wmi_cpu_time_total` | Time that processor spent in different modes (idle, user, system, ...) | counter | `core`, `mode`
|
||||
`wmi_cpu_interrupts_total` | Total number of received and serviced hardware interrupts | counter | `core`
|
||||
`wmi_cpu_dpcs_total` | Total number of received and serviced deferred procedure calls (DPCs) | counter | `core`
|
||||
|
||||
These metrics are only exposed on Windows Server 2008R2 and later:
|
||||
|
||||
Name | Description | Type | Labels
|
||||
-----|-------------|------|-------
|
||||
`wmi_cpu_clock_interrupts_total` | Total number of received and serviced clock tick interrupts | `core`
|
||||
`wmi_cpu_idle_break_events_total` | Total number of time processor was woken from idle | `core`
|
||||
`wmi_cpu_parking_status` | Parking Status represents whether a processor is parked or not | `gauge`
|
||||
`wmi_cpu_core_frequency_mhz` | Core frequency in megahertz | `gauge`
|
||||
`wmi_cpu_processor_performance` | Processor Performance is the average performance of the processor while it is executing instructions, as a percentage of the nominal performance of the processor. On some processors, Processor Performance may exceed 100% | `gauge`
|
||||
|
||||
### Example metric
|
||||
Show frequency of host CPU cores
|
||||
```
|
||||
wmi_cpu_core_frequency_mhz{instance="localhost"}
|
||||
```
|
||||
|
||||
## Useful queries
|
||||
Show cpu usage by mode.
|
||||
```
|
||||
sum by (mode) (irate(wmi_cpu_time_total{instance="localhost"}[5m]))
|
||||
```
|
||||
|
||||
## Alerting examples
|
||||
**prometheus.rules**
|
||||
```
|
||||
# Alert on hosts with more than 80% CPU usage over a 10 minute period
|
||||
- alert: CpuUsage
|
||||
expr: 100 - (avg by (instance) (irate(wmi_cpu_time_total{mode="idle"}[2m])) * 100) > 80
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "CPU Usage (instance {{ $labels.instance }})"
|
||||
description: "CPU Usage is more than 80%\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
|
||||
```
|
||||
|
||||
@@ -1,44 +1,75 @@
|
||||
# logical_disk collector
|
||||
|
||||
The logical_disk collector exposes metrics about logical disks (in contrast to physical disks)
|
||||
|
||||
|||
|
||||
-|-
|
||||
Metric name prefix | `logical_disk`
|
||||
Classes | [`Win32_PerfRawData_PerfDisk_LogicalDisk`](https://msdn.microsoft.com/en-us/windows/hardware/aa394307(v=vs.71))
|
||||
Enabled by default? | Yes
|
||||
|
||||
## Flags
|
||||
|
||||
### `--collector.logical_disk.volume-whitelist`
|
||||
|
||||
If given, a disk needs to match the whitelist regexp in order for the corresponding disk metrics to be reported
|
||||
|
||||
### `--collector.logical_disk.volume-blacklist`
|
||||
|
||||
If given, a disk needs to *not* match the blacklist regexp in order for the corresponding disk metrics to be reported
|
||||
|
||||
## Metrics
|
||||
|
||||
Name | Description | Type | Labels
|
||||
-----|-------------|------|-------
|
||||
`requests_queued` | _Not yet documented_ | gauge | `volume`
|
||||
`read_bytes_total` | _Not yet documented_ | counter | `volume`
|
||||
`reads_total` | _Not yet documented_ | counter | `volume`
|
||||
`write_bytes_total` | _Not yet documented_ | counter | `volume`
|
||||
`writes_total` | _Not yet documented_ | counter | `volume`
|
||||
`read_seconds_total` | _Not yet documented_ | counter | `volume`
|
||||
`write_seconds_total` | _Not yet documented_ | counter | `volume`
|
||||
`free_bytes` | _Not yet documented_ | gauge | `volume`
|
||||
`size_bytes` | _Not yet documented_ | gauge | `volume`
|
||||
`idle_seconds_total` | _Not yet documented_ | counter | `volume`
|
||||
`split_ios_total` | _Not yet documented_ | counter | `volume`
|
||||
|
||||
### Example metric
|
||||
_This collector does not yet have explained examples, we would appreciate your help adding them!_
|
||||
|
||||
## Useful queries
|
||||
_This collector does not yet have any useful queries added, we would appreciate your help adding them!_
|
||||
|
||||
## Alerting examples
|
||||
_This collector does not yet have alerting examples, we would appreciate your help adding them!_
|
||||
# logical_disk collector
|
||||
|
||||
The logical_disk collector exposes metrics about logical disks (in contrast to physical disks)
|
||||
|
||||
|||
|
||||
-|-
|
||||
Metric name prefix | `logical_disk`
|
||||
Classes | [`Win32_PerfRawData_PerfDisk_LogicalDisk`](https://msdn.microsoft.com/en-us/windows/hardware/aa394307(v=vs.71))
|
||||
Enabled by default? | Yes
|
||||
|
||||
## Flags
|
||||
|
||||
### `--collector.logical_disk.volume-whitelist`
|
||||
|
||||
If given, a disk needs to match the whitelist regexp in order for the corresponding disk metrics to be reported
|
||||
|
||||
### `--collector.logical_disk.volume-blacklist`
|
||||
|
||||
If given, a disk needs to *not* match the blacklist regexp in order for the corresponding disk metrics to be reported
|
||||
|
||||
## Metrics
|
||||
|
||||
Name | Description | Type | Labels
|
||||
-----|-------------|------|-------
|
||||
`requests_queued` | Number of requests outstanding on the disk at the time the performance data is collected | gauge | `volume`
|
||||
`read_bytes_total` | Rate at which bytes are transferred from the disk during read operations | counter | `volume`
|
||||
`reads_total` | Rate of read operations on the disk | counter | `volume`
|
||||
`write_bytes_total` | Rate at which bytes are transferred to the disk during write operations | counter | `volume`
|
||||
`writes_total` | Rate of write operations on the disk | counter | `volume`
|
||||
`read_seconds_total` | Seconds the disk was busy servicing read requests | counter | `volume`
|
||||
`write_seconds_total` | Seconds the disk was busy servicing write requests | counter | `volume`
|
||||
`free_bytes` | Unused space of the disk in bytes | gauge | `volume`
|
||||
`size_bytes` | Total size of the disk in bytes | gauge | `volume`
|
||||
`idle_seconds_total` | Seconds the disk was idle (not servicing read/write requests) | counter | `volume`
|
||||
`split_ios_total` | Number of I/Os to the disk split into multiple I/Os | counter | `volume`
|
||||
|
||||
### Example metric
|
||||
Query the rate of write operations to a disk
|
||||
```
|
||||
rate(wmi_logical_disk_read_bytes_total{instance="localhost", volume=~"C:"}[2m])
|
||||
```
|
||||
|
||||
## Useful queries
|
||||
Calculate rate of total IOPS for disk
|
||||
```
|
||||
rate(wmi_logical_disk_reads_total{instance="localhost", volume="C:"}[2m]) + rate(wmi_logical_disk_writes_total{instance="localhost", volume="C:"}[2m])
|
||||
```
|
||||
|
||||
## Alerting examples
|
||||
**prometheus.rules**
|
||||
```
|
||||
groups:
|
||||
- name: Windows Disk Alerts
|
||||
rules:
|
||||
|
||||
# Sends an alert when disk space usage is above 95%
|
||||
- alert: DiskSpaceUsage
|
||||
expr: 100.0 - 100 * (wmi_logical_disk_free_bytes / wmi_logical_disk_size_bytes) > 95
|
||||
for: 10m
|
||||
labels:
|
||||
severity: high
|
||||
annotations:
|
||||
summary: "Disk Space Usage (instance {{ $labels.instance }})"
|
||||
description: "Disk Space on Drive is used more than 95%\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
|
||||
|
||||
# Alerts on disks with over 85% space usage predicted to fill within the next four days
|
||||
- alert: DiskFilling
|
||||
expr: 100 * (wmi_logical_disk_free_bytes / wmi_logical_disk_size_bytes) < 15 and predict_linear(wmi_logical_disk_free_bytes[6h], 4 * 24 * 3600) < 0
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Disk full in four days (instance {{ $labels.instance }})"
|
||||
description: "{{ $labels.volume }} is expected to fill up within four days. Currently {{ $value | humanize }}% is available.\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
|
||||
```
|
||||
|
||||
34
docs/collector.logon.md
Normal file
34
docs/collector.logon.md
Normal file
@@ -0,0 +1,34 @@
|
||||
# logon collector
|
||||
|
||||
The logon collector exposes metrics detailing the active user logon sessions.
|
||||
|
||||
|||
|
||||
-|-
|
||||
Metric name prefix | `logon`
|
||||
Classes | [`Win32_LogonSession`](https://docs.microsoft.com/en-us/windows/win32/cimwin32prov/win32-logonsession)
|
||||
Enabled by default? | No
|
||||
|
||||
## Flags
|
||||
|
||||
None
|
||||
|
||||
## Metrics
|
||||
|
||||
Name | Description | Type | Labels
|
||||
-----|-------------|------|-------
|
||||
`wmi_logon_logon_type` | Number of active user logon sessions | gauge | status
|
||||
|
||||
### Example metric
|
||||
Query the total number of interactive logon sessions
|
||||
```
|
||||
wmi_logon_logon_type{status="interactive"}
|
||||
```
|
||||
|
||||
## Useful queries
|
||||
Query the total number of local and remote (I.E. Terminal Services) interactive sessions.
|
||||
```
|
||||
wmi_logon_logon_type{status=~"interactive|remoteinteractive"}
|
||||
```
|
||||
|
||||
## Alerting examples
|
||||
_This collector does not yet have alerting examples, we would appreciate your help adding them!_
|
||||
@@ -19,25 +19,25 @@ Name | Description | Type | Labels
|
||||
`wmi_cs_logical_processors` | Number of installed logical processors | gauge | None
|
||||
`wmi_cs_physical_memory_bytes` | Total installed physical memory | gauge | None
|
||||
`wmi_memory_available_bytes` | The amount of physical memory immediately available for allocation to a process or for system use. It is equal to the sum of memory assigned to the standby (cached), free and zero page lists | gauge | None
|
||||
`wmi_memory_cache_bytes` | _Not yet documented_ | gauge | None
|
||||
`wmi_memory_cache_bytes_peak` | _Not yet documented_ | gauge | None
|
||||
`wmi_memory_cache_faults_total` | _Not yet documented_ | gauge | None
|
||||
`wmi_memory_commit_limit` | _Not yet documented_ | gauge | None
|
||||
`wmi_memory_committed_bytes` | _Not yet documented_ | gauge | None
|
||||
`wmi_memory_cache_bytes` | Number of bytes currently being used by the file system cache | gauge | None
|
||||
`wmi_memory_cache_bytes_peak` | Maximum number of CacheBytes after the system was last restarted | gauge | None
|
||||
`wmi_memory_cache_faults_total` | Number of faults which occur when a page sought in the file system cache is not found there and must be retrieved from elsewhere in memory (soft fault) or from disk (hard fault) | gauge | None
|
||||
`wmi_memory_commit_limit` | Amount of virtual memory, in bytes, that can be committed without having to extend the paging file(s) | gauge | None
|
||||
`wmi_memory_committed_bytes` | Amount of committed virtual memory, in bytes | gauge | None
|
||||
`wmi_memory_demand_zero_faults_total` | The number of zeroed pages required to satisfy faults. Zeroed pages, pages emptied of previously stored data and filled with zeros, are a security feature of Windows that prevent processes from seeing data stored by earlier processes that used the memory space | gauge | None
|
||||
`wmi_memory_free_and_zero_page_list_bytes` | _Not yet documented_ | gauge | None
|
||||
`wmi_memory_free_system_page_table_entries` | _Not yet documented_ | gauge | None
|
||||
`wmi_memory_free_system_page_table_entries` | Number of page table entries not being used by the system | gauge | None
|
||||
`wmi_memory_modified_page_list_bytes` | _Not yet documented_ | gauge | None
|
||||
`wmi_memory_page_faults_total` | _Not yet documented_ | gauge | None
|
||||
`wmi_memory_page_faults_total` | Overall rate at which faulted pages are handled by the processor | gauge | None
|
||||
`wmi_memory_swap_page_reads_total` | Number of disk page reads (a single read operation reading several pages is still only counted once) | gauge | None
|
||||
`wmi_memory_swap_pages_read_total` | Number of pages read across all page reads (ie counting all pages read even if they are read in a single operation) | gauge | None
|
||||
`wmi_memory_swap_pages_written_total` | Number of pages written across all page writes (ie counting all pages written even if they are written in a single operation) | gauge | None
|
||||
`wmi_memory_swap_page_operations_total` | Total number of swap page read and writes (PagesPersec) | gauge | None
|
||||
`wmi_memory_swap_page_writes_total` | Number of disk page writes (a single write operation writing several pages is still only counted once) | gauge | None
|
||||
`wmi_memory_pool_nonpaged_allocs_total` | The number of calls to allocate space in the nonpaged pool. The nonpaged pool is an area of system memory area for objects that cannot be written to disk, and must remain in physical memory as long as they are allocated | gauge | None
|
||||
`wmi_memory_pool_nonpaged_bytes_total` | _Not yet documented_ | gauge | None
|
||||
`wmi_memory_pool_paged_allocs_total` | _Not yet documented_ | gauge | None
|
||||
`wmi_memory_pool_paged_bytes` | _Not yet documented_ | gauge | None
|
||||
`wmi_memory_pool_nonpaged_bytes_total` | Number of bytes in the non-paged pool | gauge | None
|
||||
`wmi_memory_pool_paged_allocs_total` | Number of calls to allocate space in the paged pool, regardless of the amount of space allocated in each call | gauge | None
|
||||
`wmi_memory_pool_paged_bytes` | Number of bytes in the paged pool | gauge | None
|
||||
`wmi_memory_pool_paged_resident_bytes` | _Not yet documented_ | gauge | None
|
||||
`wmi_memory_standby_cache_core_bytes` | _Not yet documented_ | gauge | None
|
||||
`wmi_memory_standby_cache_normal_priority_bytes` | _Not yet documented_ | gauge | None
|
||||
|
||||
@@ -5,14 +5,14 @@ The mssql collector exposes metrics about the MSSQL server
|
||||
|||
|
||||
-|-
|
||||
Metric name prefix | `mssql`
|
||||
Classes | [`Win32_PerfRawData_MSSQLSERVER_SQLServerAccessMethods`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-access-methods-object)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerAvailabilityReplica`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-availability-replica)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerBufferManager`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-buffer-manager-object)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerDatabaseReplica`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-database-replica)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerDatabases`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-databases-object?view=sql-server-2017)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerGeneralStatistics`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-general-statistics-object)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerLocks`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-locks-object)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerMemoryManager`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-memory-manager-object)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerSQLStatistics`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-sql-statistics-object)
|
||||
Classes | [`Win32_PerfRawData_MSSQLSERVER_SQLServerAccessMethods`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-access-methods-object)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerAvailabilityReplica`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-availability-replica)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerBufferManager`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-buffer-manager-object)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerDatabaseReplica`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-database-replica)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerDatabases`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-databases-object?view=sql-server-2017)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerGeneralStatistics`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-general-statistics-object)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerLocks`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-locks-object)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerMemoryManager`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-memory-manager-object)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerSQLStatistics`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-sql-statistics-object)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerSQLErrors`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-sql-errors-object)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerTransactions`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-transactions-object)
|
||||
Enabled by default? | No
|
||||
|
||||
## Flags
|
||||
|
||||
### `--collectors.mssql.classes-enabled`
|
||||
|
||||
Comma-separated list of MSSQL WMI classes to use. Supported values are `accessmethods`, `availreplica`, `bufman`, `databases`, `dbreplica`, `genstats`, `locks`, `memmgr` and `sqlstats`.
|
||||
Comma-separated list of MSSQL WMI classes to use. Supported values are `accessmethods`, `availreplica`, `bufman`, `databases`, `dbreplica`, `genstats`, `locks`, `memmgr`, `sqlstats`, `sqlerrors` and `transactions`.
|
||||
|
||||
### `--collectors.mssql.class-print`
|
||||
|
||||
@@ -230,6 +230,20 @@ Name | Description | Type | Labels
|
||||
`wmi_mssql_sqlstats_sql_compilations` | _Not yet documented_ | counter | `instance`
|
||||
`wmi_mssql_sqlstats_sql_recompilations` | _Not yet documented_ | counter | `instance`
|
||||
`wmi_mssql_sqlstats_unsafe_auto_parameterization_attempts` | _Not yet documented_ | counter | `instance`
|
||||
`wmi_mssql_sql_errors_total` | _Not yet documented_ | counter | `instance`, `resource`
|
||||
`wmi_mssql_transactions_tempdb_free_space_bytes` | _Not yet documented_ | gauge | `instance`
|
||||
`wmi_mssql_transactions_longest_transaction_running_seconds` | _Not yet documented_ | gauge | `instance`
|
||||
`wmi_mssql_transactions_nonsnapshot_version_active_total` | _Not yet documented_ | counter | `instance`
|
||||
`wmi_mssql_transactions_snapshot_active_total` | _Not yet documented_ | counter | `instance`
|
||||
`wmi_mssql_transactions_active_total` | _Not yet documented_ | counter | `instance`
|
||||
`wmi_mssql_transactions_update_conflicts_total` | _Not yet documented_ | counter | `instance`
|
||||
`wmi_mssql_transactions_update_snapshot_active_total` | _Not yet documented_ | counter | `instance`
|
||||
`wmi_mssql_transactions_version_cleanup_rate_bytes` | _Not yet documented_ | gauge | `instance`
|
||||
`wmi_mssql_transactions_version_generation_rate_bytes` | _Not yet documented_ | gauge | `instance`
|
||||
`wmi_mssql_transactions_version_store_size_bytes` | _Not yet documented_ | gauge | `instance`
|
||||
`wmi_mssql_transactions_version_store_units` | _Not yet documented_ | counter | `instance`
|
||||
`wmi_mssql_transactions_version_store_creation_units` | _Not yet documented_ | counter | `instance`
|
||||
`wmi_mssql_transactions_version_store_truncation_units` | _Not yet documented_ | counter | `instance`
|
||||
|
||||
### Example metric
|
||||
_This collector does not yet have explained examples, we would appreciate your help adding them!_
|
||||
|
||||
@@ -22,24 +22,40 @@ If given, an interface name needs to *not* match the blacklist regexp in order f
|
||||
|
||||
Name | Description | Type | Labels
|
||||
-----|-------------|------|-------
|
||||
`wmi_net_bytes_received_total` | _Not yet documented_ | counter | `nic`
|
||||
`wmi_net_bytes_sent_total` | _Not yet documented_ | counter | `nic`
|
||||
`wmi_net_bytes_total` | _Not yet documented_ | counter | `nic`
|
||||
`wmi_net_packets_outbound_discarded` | _Not yet documented_ | counter | `nic`
|
||||
`wmi_net_packets_outbound_errors` | _Not yet documented_ | counter | `nic`
|
||||
`wmi_net_packets_received_discarded` | _Not yet documented_ | counter | `nic`
|
||||
`wmi_net_packets_received_errors` | _Not yet documented_ | counter | `nic`
|
||||
`wmi_net_packets_received_total` | _Not yet documented_ | counter | `nic`
|
||||
`wmi_net_packets_received_unknown` | _Not yet documented_ | counter | `nic`
|
||||
`wmi_net_packets_total` | _Not yet documented_ | counter | `nic`
|
||||
`wmi_net_packets_sent_total` | _Not yet documented_ | counter | `nic`
|
||||
`wmi_net_current_bandwidth` | _Not yet documented_ | counter | `nic`
|
||||
`wmi_net_bytes_received_total` | Total bytes received by interface | counter | `nic`
|
||||
`wmi_net_bytes_sent_total` | Total bytes transmitted by interface | counter | `nic`
|
||||
`wmi_net_bytes_total` | Total bytes received and transmitted by interface | counter | `nic`
|
||||
`wmi_net_packets_outbound_discarded` | Total outbound packets that were chosen to be discarded even though no errors had been detected to prevent transmission | counter | `nic`
|
||||
`wmi_net_packets_outbound_errors` | Total packets that could not be transmitted due to errors | counter | `nic`
|
||||
`wmi_net_packets_received_discarded` | Total inbound packets that were chosen to be discarded even though no errors had been detected to prevent delivery | counter | `nic`
|
||||
`wmi_net_packets_received_errors` | Total packets that could not be received due to errors | counter | `nic`
|
||||
`wmi_net_packets_received_total` | Total packets received by interface | counter | `nic`
|
||||
`wmi_net_packets_received_unknown` | Total packets received by interface that were discarded because of an unknown or unsupported protocol | counter | `nic`
|
||||
`wmi_net_packets_total` | Total packets received and transmitted by interface | counter | `nic`
|
||||
`wmi_net_packets_sent_total` | Total packets transmitted by interface | counter | `nic`
|
||||
`wmi_net_current_bandwidth` | Estimate of the interface's current bandwidth in bits per second (bps) | gauge | `nic`
|
||||
|
||||
### Example metric
|
||||
_This collector does not yet have explained examples, we would appreciate your help adding them!_
|
||||
Query the rate of transmitted network traffic
|
||||
```
|
||||
rate(wmi_net_bytes_sent_total{instance="localhost"}[2m])
|
||||
```
|
||||
|
||||
## Useful queries
|
||||
_This collector does not yet have any useful queries added, we would appreciate your help adding them!_
|
||||
Get total utilisation of network interface as a percentage
|
||||
```
|
||||
rate(wmi_net_bytes_total{instance="localhost", nic="Microsoft_Hyper_V_Network_Adapter__1"}[2m]) * 8 / wmi_net_current_bandwidth{instance="locahost", nic="Microsoft_Hyper_V_Network_Adapter__1"} * 100
|
||||
```
|
||||
|
||||
## Alerting examples
|
||||
_This collector does not yet have alerting examples, we would appreciate your help adding them!_
|
||||
**prometheus.rules**
|
||||
```
|
||||
- alert: NetInterfaceUsage
|
||||
expr: rate(wmi_net_bytes_total[2m]) * 8 / wmi_net_current_bandwidth * 100 > 90
|
||||
for: 10m
|
||||
labels:
|
||||
severity: high
|
||||
annotations:
|
||||
summary: "Network Interface Usage (instance {{ $labels.instance }})"
|
||||
description: "Network traffic usage is greater than 95% for interface {{ $labels.nic }}\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
|
||||
```
|
||||
|
||||
@@ -16,24 +16,50 @@ None
|
||||
|
||||
Name | Description | Type | Labels
|
||||
-----|-------------|------|-------
|
||||
`wmi_os_paging_limit_bytes` | _Not yet documented_ | gauge | None
|
||||
`wmi_os_paging_free_bytes` | _Not yet documented_ | gauge | None
|
||||
`wmi_os_physical_memory_free_bytes` | _Not yet documented_ | gauge | None
|
||||
`wmi_os_time` | _Not yet documented_ | gauge | None
|
||||
`wmi_os_timezone` | _Not yet documented_ | gauge | `timezone`
|
||||
`wmi_os_processes` | _Not yet documented_ | gauge | None
|
||||
`wmi_os_processes_limit` | _Not yet documented_ | gauge | None
|
||||
`wmi_os_process_memory_limix_bytes` | _Not yet documented_ | gauge | None
|
||||
`wmi_os_users` | _Not yet documented_ | gauge | None
|
||||
`wmi_os_virtual_memory_bytes` | _Not yet documented_ | gauge | None
|
||||
`wmi_os_visible_memory_bytes` | _Not yet documented_ | gauge | None
|
||||
`wmi_os_virtual_memory_free_bytes` | _Not yet documented_ | gauge | None
|
||||
`wmi_os_paging_limit_bytes` | Total number of bytes that can be sotred in the operating system paging files. 0 (zero) indicates that there are no paging files | gauge | None
|
||||
`wmi_os_paging_free_bytes` | Number of bytes that can be mapped into the operating system paging files without causing any other pages to be swapped out | gauge | None
|
||||
`wmi_os_physical_memory_free_bytes` | Bytes of physical memory currently unused and available | gauge | None
|
||||
`wmi_os_time` | Current time as reported by the operating system, in [Unix time](https://en.wikipedia.org/wiki/Unix_time). See [time.Unix()](https://golang.org/pkg/time/#Unix) for details | gauge | None
|
||||
`wmi_os_timezone` | Current timezone as reported by the operating system. See [time.Zone()](https://golang.org/pkg/time/#Time.Zone) for details | gauge | `timezone`
|
||||
`wmi_os_processes` | Number of process contexts currently loaded or running on the operating system | gauge | None
|
||||
`wmi_os_processes_limit` | Maximum number of process contexts the operating system can support. The default value set by the provider is 4294967295 (0xFFFFFFFF) | gauge | None
|
||||
`wmi_os_process_memory_limit_bytes` | Maximum number of bytes of memory that can be allocated to a process | gauge | None
|
||||
`wmi_os_users` | Number of user sessions for which the operating system is storing state information currently. For a list of current active logon sessions, see [`logon`](collector.logon.md) | gauge | None
|
||||
`wmi_os_virtual_memory_bytes` | Bytes of virtual memory | gauge | None
|
||||
`wmi_os_visible_memory_bytes` | Total bytes of physical memory available to the operating system. This value does not necessarily indicate the true amount of physical memory, but what is reported to the operating system as available to it | gauge | None
|
||||
`wmi_os_virtual_memory_free_bytes` | Bytes of virtual memory currently unused and available | gauge | None
|
||||
|
||||
### Example metric
|
||||
_This collector does not yet have explained examples, we would appreciate your help adding them!_
|
||||
Show current number of processes
|
||||
```
|
||||
wmi_os_processes{instance="localhost"}
|
||||
```
|
||||
|
||||
## Useful queries
|
||||
_This collector does not yet have any useful queries added, we would appreciate your help adding them!_
|
||||
Find all devices not set to UTC timezone
|
||||
```
|
||||
wmi_os_timezone{timezone != "UTC"}
|
||||
```
|
||||
|
||||
## Alerting examples
|
||||
_This collector does not yet have alerting examples, we would appreciate your help adding them!_
|
||||
**prometheus.rules**
|
||||
```
|
||||
# Alert on hosts that have exhausted all available physical memory
|
||||
- alert: MemoryExhausted
|
||||
expr: wmi_os_physical_memory_free_bytes == 0
|
||||
for: 10m
|
||||
labels:
|
||||
severity: high
|
||||
annotations:
|
||||
summary: "Host {{ $labels.instance }} is out of memory"
|
||||
description: "{{ $labels.instance }} has exhausted all available physical memory"
|
||||
|
||||
# Alert on hosts with greater than 90% memory usage
|
||||
- alert: MemoryLow
|
||||
expr: 100 - 100 * wmi_os_physical_memory_free_bytes / wmi_cs_physical_memory_bytes > 90
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Memory usage for host {{ $labels.instance }} is greater than 90%"
|
||||
```
|
||||
|
||||
@@ -66,10 +66,42 @@ A service can have any of the following statuses:
|
||||
Note that there is some overlap with service state.
|
||||
|
||||
### Example metric
|
||||
_This collector does not yet have explained examples, we would appreciate your help adding them!_
|
||||
Lists the services that have a 'disabled' start mode.
|
||||
```
|
||||
wmi_service_start_mode{exported_name=~"(mssqlserver|sqlserveragent)",start_mode="disabled"}
|
||||
```
|
||||
|
||||
## Useful queries
|
||||
_This collector does not yet have any useful queries added, we would appreciate your help adding them!_
|
||||
Counts the number of Microsoft SQL Server/Agent Processes
|
||||
```
|
||||
count(wmi_service_state{exported_name=~"(sqlserveragent|mssqlserver)",state="running"})
|
||||
```
|
||||
|
||||
## Alerting examples
|
||||
_This collector does not yet have alerting examples, we would appreciate your help adding them!_
|
||||
**prometheus.rules**
|
||||
```
|
||||
groups:
|
||||
- name: Microsoft SQL Server Alerts
|
||||
rules:
|
||||
|
||||
# Sends an alert when the 'sqlserveragent' service is not in the running state for 3 minutes.
|
||||
- alert: SQL Server Agent DOWN
|
||||
expr: wmi_service_state{instance="SQL",exported_name="sqlserveragent",state="running"} == 0
|
||||
for: 3m
|
||||
labels:
|
||||
severity: high
|
||||
annotations:
|
||||
summary: "Service {{ $labels.exported_name }} down"
|
||||
description: "Service {{ $labels.exported_name }} on instance {{ $labels.instance }} has been down for more than 3 minutes."
|
||||
|
||||
# Sends an alert when the 'mssqlserver' service is not in the running state for 3 minutes.
|
||||
- alert: SQL Server DOWN
|
||||
expr: wmi_service_state{instance="SQL",exported_name="mssqlserver",state="running"} == 0
|
||||
for: 3m
|
||||
labels:
|
||||
severity: high
|
||||
annotations:
|
||||
summary: "Service {{ $labels.exported_name }} down"
|
||||
description: "Service {{ $labels.exported_name }} on instance {{ $labels.instance }} has been down for more than 3 minutes."
|
||||
```
|
||||
In this example, `instance` is the target label of the host. So each alert will be processed per host, which is then used in the alert description.
|
||||
|
||||
@@ -16,18 +16,24 @@ None
|
||||
|
||||
Name | Description | Type | Labels
|
||||
-----|-------------|------|-------
|
||||
`wmi_system_context_switches_total` | _Not yet documented_ | counter | None
|
||||
`wmi_system_exception_dispatches_total` | _Not yet documented_ | counter | None
|
||||
`wmi_system_processor_queue_length` | _Not yet documented_ | gauge | None
|
||||
`wmi_system_system_calls_total` | _Not yet documented_ | counter | None
|
||||
`wmi_system_system_up_time` | _Not yet documented_ | gauge | None
|
||||
`wmi_system_threads` | _Not yet documented_ | gauge | None
|
||||
`wmi_system_context_switches_total` | Total number of [context switches](https://en.wikipedia.org/wiki/Context_switch) | counter | None
|
||||
`wmi_system_exception_dispatches_total` | Total exceptions dispatched by the system | counter | None
|
||||
`wmi_system_processor_queue_length` | Number of threads in the processor queue. There is a single queue for processor time even on computers with multiple processors. | gauge | None
|
||||
`wmi_system_system_calls_total` | Total combined calls to Windows NT system service routines by all processes running on the computer | counter | None
|
||||
`wmi_system_system_up_time` | Time of last boot of system | gauge | None
|
||||
`wmi_system_threads` | Number of Windows system [threads](https://en.wikipedia.org/wiki/Thread_(computing)) | gauge | None
|
||||
|
||||
### Example metric
|
||||
_This collector does not yet have explained examples, we would appreciate your help adding them!_
|
||||
Show current number of system threads
|
||||
```
|
||||
wmi_system_threads{instance="localhost"}
|
||||
```
|
||||
|
||||
## Useful queries
|
||||
_This collector does not yet have any useful queries added, we would appreciate your help adding them!_
|
||||
Find hosts that have rebooted in the last 24 hours
|
||||
```
|
||||
time() - wmi_system_system_up_time < 86400
|
||||
```
|
||||
|
||||
## Alerting examples
|
||||
_This collector does not yet have alerting examples, we would appreciate your help adding them!_
|
||||
|
||||
@@ -16,15 +16,15 @@ None
|
||||
|
||||
Name | Description | Type | Labels
|
||||
-----|-------------|------|-------
|
||||
`wmi_tcp_connection_failures` | _Not yet documented_ | counter | None
|
||||
`wmi_tcp_connections_active` | _Not yet documented_ | counter | None
|
||||
`wmi_tcp_connections_established` | _Not yet documented_ | counter | None
|
||||
`wmi_tcp_connections_passive` | _Not yet documented_ | counter | None
|
||||
`wmi_tcp_connections_reset` | _Not yet documented_ | counter | None
|
||||
`wmi_tcp_segments_total` | _Not yet documented_ | counter | None
|
||||
`wmi_tcp_segments_received_total` | _Not yet documented_ | counter | None
|
||||
`wmi_tcp_segments_retransmitted_total` | _Not yet documented_ | counter | None
|
||||
`wmi_tcp_segments_sent_total` | _Not yet documented_ | counter | None
|
||||
`wmi_tcp_connection_failures` | Number of times TCP connections have made a direct transition to the CLOSED state from the SYN-SENT state or the SYN-RCVD state, plus the number of times TCP connections have made a direct transition from the SYN-RCVD state to the LISTEN state | counter | None
|
||||
`wmi_tcp_connections_active` | Number of times TCP connections have made a direct transition from the CLOSED state to the SYN-SENT state.| counter | None
|
||||
`wmi_tcp_connections_established` | Number of TCP connections for which the current state is either ESTABLISHED or CLOSE-WAIT. | counter | None
|
||||
`wmi_tcp_connections_passive` | Number of times TCP connections have made a direct transition from the LISTEN state to the SYN-RCVD state. | counter | None
|
||||
`wmi_tcp_connections_reset` | Number of times TCP connections have made a direct transition from the LISTEN state to the SYN-RCVD state. | counter | None
|
||||
`wmi_tcp_segments_total` | Total segments sent or received using the TCP protocol | counter | None
|
||||
`wmi_tcp_segments_received_total` | Total segments received, including those received in error. This count includes segments received on currently established connections | counter | None
|
||||
`wmi_tcp_segments_retransmitted_total` | Total segments retransmitted. That is, segments transmitted that contain one or more previously transmitted bytes | counter | None
|
||||
`wmi_tcp_segments_sent_total` | Total segments sent, including those on current connections, but excluding those containing *only* retransmitted bytes | counter | None
|
||||
|
||||
### Example metric
|
||||
_This collector does not yet have explained examples, we would appreciate your help adding them!_
|
||||
|
||||
32
docs/collector.thermalzone.md
Normal file
32
docs/collector.thermalzone.md
Normal file
@@ -0,0 +1,32 @@
|
||||
# thermalzone collector
|
||||
|
||||
The thermalzone collector exposes metrics about system temps. Note that temperature is given in Kelvin
|
||||
|
||||
|||
|
||||
-|-
|
||||
Metric name prefix | `thermalzone`
|
||||
Classes | [`Win32_PerfRawData_Counters_ThermalZoneInformation`](https://wutils.com/wmi/root/cimv2/win32_perfrawdata_counters_thermalzoneinformation/#temperature_properties)
|
||||
Enabled by default? | No
|
||||
|
||||
## Flags
|
||||
|
||||
None
|
||||
|
||||
## Metrics
|
||||
|
||||
Name | Description | Type | Labels
|
||||
-----|-------------|------|-------
|
||||
`wmi_thermalzone_percent_passive_limit` | % Passive Limit is the current limit this thermal zone is placing on the devices it controls. A limit of 100% indicates the devices are unconstrained. A limit of 0% indicates the devices are fully constrained. | gauge | None
|
||||
`wmi_thermalzone_temperature_celsius ` | Temperature of the thermal zone, in degrees Celsius. | gauge | None
|
||||
`wmi_thermalzone_throttle_reasons ` | Throttle Reasons indicate reasons why the thermal zone is limiting performance of the devices it controls. 0x0 – The zone is not throttled. 0x1 – The zone is throttled for thermal reasons. 0x2 – The zone is throttled to limit electrical current. | gauge | None
|
||||
|
||||
[`Throttle reasons` source](https://docs.microsoft.com/en-us/windows-hardware/design/device-experiences/examples--requirements-and-diagnostics)
|
||||
|
||||
### Example metric
|
||||
_This collector does not yet have explained examples, we would appreciate your help adding them!_
|
||||
|
||||
## Useful queries
|
||||
_This collector does not yet have any useful queries added, we would appreciate your help adding them!_
|
||||
|
||||
## Alerting examples
|
||||
_This collector does not yet have alerting examples, we would appreciate your help adding them!_
|
||||
263
exporter.go
263
exporter.go
@@ -5,7 +5,10 @@ package main
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
_ "net/http/pprof"
|
||||
"os"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
@@ -52,6 +55,12 @@ var (
|
||||
[]string{"collector"},
|
||||
nil,
|
||||
)
|
||||
snapshotDuration = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(collector.Namespace, "exporter", "perflib_snapshot_duration_seconds"),
|
||||
"Duration of perflib snapshot capture",
|
||||
nil,
|
||||
nil,
|
||||
)
|
||||
|
||||
// This can be removed when client_golang exposes this on Windows
|
||||
// (See https://github.com/prometheus/client_golang/issues/376)
|
||||
@@ -71,84 +80,113 @@ func (coll WmiCollector) Describe(ch chan<- *prometheus.Desc) {
|
||||
ch <- scrapeSuccessDesc
|
||||
}
|
||||
|
||||
type collectorOutcome int
|
||||
|
||||
const (
|
||||
pending collectorOutcome = iota
|
||||
success
|
||||
failed
|
||||
)
|
||||
|
||||
// Collect sends the collected metrics from each of the collectors to
|
||||
// prometheus.
|
||||
func (coll WmiCollector) Collect(ch chan<- prometheus.Metric) {
|
||||
scrapeContext, err := collector.PrepareScrapeContext()
|
||||
if err != nil {
|
||||
ch <- prometheus.NewInvalidMetric(scrapeSuccessDesc, fmt.Errorf("failed to prepare scrape: %v", err))
|
||||
return
|
||||
}
|
||||
|
||||
remainingCollectors := make(map[string]bool)
|
||||
for name := range coll.collectors {
|
||||
remainingCollectors[name] = true
|
||||
}
|
||||
|
||||
metricsBuffer := make(chan prometheus.Metric)
|
||||
allDone := make(chan struct{})
|
||||
stopped := false
|
||||
go func() {
|
||||
for {
|
||||
select {
|
||||
case m := <-metricsBuffer:
|
||||
if !stopped {
|
||||
ch <- m
|
||||
}
|
||||
case <-allDone:
|
||||
return
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
wg := sync.WaitGroup{}
|
||||
wg.Add(len(coll.collectors))
|
||||
go func() {
|
||||
wg.Wait()
|
||||
close(allDone)
|
||||
close(metricsBuffer)
|
||||
}()
|
||||
|
||||
for name, c := range coll.collectors {
|
||||
go func(name string, c collector.Collector) {
|
||||
execute(name, c, scrapeContext, metricsBuffer)
|
||||
wg.Done()
|
||||
delete(remainingCollectors, name)
|
||||
}(name, c)
|
||||
}
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
startTimeDesc,
|
||||
prometheus.CounterValue,
|
||||
startTime,
|
||||
)
|
||||
|
||||
t := time.Now()
|
||||
scrapeContext, err := collector.PrepareScrapeContext()
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
snapshotDuration,
|
||||
prometheus.GaugeValue,
|
||||
time.Since(t).Seconds(),
|
||||
)
|
||||
if err != nil {
|
||||
ch <- prometheus.NewInvalidMetric(scrapeSuccessDesc, fmt.Errorf("failed to prepare scrape: %v", err))
|
||||
return
|
||||
}
|
||||
|
||||
wg := sync.WaitGroup{}
|
||||
wg.Add(len(coll.collectors))
|
||||
collectorOutcomes := make(map[string]collectorOutcome)
|
||||
for name := range coll.collectors {
|
||||
collectorOutcomes[name] = pending
|
||||
}
|
||||
|
||||
metricsBuffer := make(chan prometheus.Metric)
|
||||
l := sync.Mutex{}
|
||||
finished := false
|
||||
go func() {
|
||||
for m := range metricsBuffer {
|
||||
l.Lock()
|
||||
if !finished {
|
||||
ch <- m
|
||||
}
|
||||
l.Unlock()
|
||||
}
|
||||
}()
|
||||
|
||||
for name, c := range coll.collectors {
|
||||
go func(name string, c collector.Collector) {
|
||||
defer wg.Done()
|
||||
outcome := execute(name, c, scrapeContext, metricsBuffer)
|
||||
l.Lock()
|
||||
if !finished {
|
||||
collectorOutcomes[name] = outcome
|
||||
}
|
||||
l.Unlock()
|
||||
}(name, c)
|
||||
}
|
||||
|
||||
allDone := make(chan struct{})
|
||||
go func() {
|
||||
wg.Wait()
|
||||
close(allDone)
|
||||
close(metricsBuffer)
|
||||
}()
|
||||
|
||||
// Wait until either all collectors finish, or timeout expires
|
||||
select {
|
||||
case <-allDone:
|
||||
stopped = true
|
||||
return
|
||||
case <-time.After(coll.maxScrapeDuration):
|
||||
stopped = true
|
||||
remainingCollectorNames := make([]string, 0, len(remainingCollectors))
|
||||
for rc := range remainingCollectors {
|
||||
remainingCollectorNames = append(remainingCollectorNames, rc)
|
||||
}
|
||||
log.Warn("Collection timed out, still waiting for ", remainingCollectorNames)
|
||||
for name := range remainingCollectors {
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
scrapeSuccessDesc,
|
||||
prometheus.GaugeValue,
|
||||
0.0,
|
||||
name,
|
||||
)
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
scrapeTimeoutDesc,
|
||||
prometheus.GaugeValue,
|
||||
1.0,
|
||||
name,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
l.Lock()
|
||||
finished = true
|
||||
|
||||
remainingCollectorNames := make([]string, 0)
|
||||
for name, outcome := range collectorOutcomes {
|
||||
var successValue, timeoutValue float64
|
||||
if outcome == pending {
|
||||
timeoutValue = 1.0
|
||||
remainingCollectorNames = append(remainingCollectorNames, name)
|
||||
}
|
||||
if outcome == success {
|
||||
successValue = 1.0
|
||||
}
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
scrapeSuccessDesc,
|
||||
prometheus.GaugeValue,
|
||||
successValue,
|
||||
name,
|
||||
)
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
scrapeTimeoutDesc,
|
||||
prometheus.GaugeValue,
|
||||
timeoutValue,
|
||||
name,
|
||||
)
|
||||
}
|
||||
|
||||
if len(remainingCollectorNames) > 0 {
|
||||
log.Warn("Collection timed out, still waiting for ", remainingCollectorNames)
|
||||
}
|
||||
|
||||
l.Unlock()
|
||||
}
|
||||
|
||||
func filterAvailableCollectors(collectors string) string {
|
||||
@@ -162,37 +200,23 @@ func filterAvailableCollectors(collectors string) string {
|
||||
return strings.Join(availableCollectors, ",")
|
||||
}
|
||||
|
||||
func execute(name string, c collector.Collector, ctx *collector.ScrapeContext, ch chan<- prometheus.Metric) {
|
||||
begin := time.Now()
|
||||
func execute(name string, c collector.Collector, ctx *collector.ScrapeContext, ch chan<- prometheus.Metric) collectorOutcome {
|
||||
t := time.Now()
|
||||
err := c.Collect(ctx, ch)
|
||||
duration := time.Since(begin)
|
||||
var success float64
|
||||
|
||||
if err != nil {
|
||||
log.Errorf("collector %s failed after %fs: %s", name, duration.Seconds(), err)
|
||||
success = 0
|
||||
} else {
|
||||
log.Debugf("collector %s succeeded after %fs.", name, duration.Seconds())
|
||||
success = 1
|
||||
}
|
||||
duration := time.Since(t).Seconds()
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
scrapeDurationDesc,
|
||||
prometheus.GaugeValue,
|
||||
duration.Seconds(),
|
||||
name,
|
||||
)
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
scrapeSuccessDesc,
|
||||
prometheus.GaugeValue,
|
||||
success,
|
||||
name,
|
||||
)
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
scrapeTimeoutDesc,
|
||||
prometheus.GaugeValue,
|
||||
0.0,
|
||||
duration,
|
||||
name,
|
||||
)
|
||||
|
||||
if err != nil {
|
||||
log.Errorf("collector %s failed after %fs: %s", name, duration, err)
|
||||
return failed
|
||||
}
|
||||
log.Debugf("collector %s succeeded after %fs.", name, duration)
|
||||
return success
|
||||
}
|
||||
|
||||
func expandEnabledCollectors(enabled string) []string {
|
||||
@@ -229,10 +253,6 @@ func loadCollectors(list string) (map[string]collector.Collector, error) {
|
||||
return collectors, nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
prometheus.MustRegister(version.NewCollector("wmi_exporter"))
|
||||
}
|
||||
|
||||
func initWbem() {
|
||||
// This initialization prevents a memory leak on WMF 5+. See
|
||||
// https://github.com/martinlindhe/wmi_exporter/issues/77 and linked issues
|
||||
@@ -264,10 +284,10 @@ func main() {
|
||||
"collectors.print",
|
||||
"If true, print available collectors and exit.",
|
||||
).Bool()
|
||||
maxScrapeDuration = kingpin.Flag(
|
||||
"scrape.max-duration",
|
||||
"Time after which collectors are aborted during a scrape",
|
||||
).Default("30s").Duration()
|
||||
timeoutMargin = kingpin.Flag(
|
||||
"scrape.timeout-margin",
|
||||
"Seconds to subtract from the timeout allowed by the client. Tune to allow for overhead or high loads.",
|
||||
).Default("0.5").Float64()
|
||||
)
|
||||
|
||||
log.AddFlags(kingpin.CommandLine)
|
||||
@@ -312,13 +332,17 @@ func main() {
|
||||
|
||||
log.Infof("Enabled collectors: %v", strings.Join(keys(collectors), ", "))
|
||||
|
||||
exporter := WmiCollector{
|
||||
collectors: collectors,
|
||||
maxScrapeDuration: *maxScrapeDuration,
|
||||
h := &metricsHandler{
|
||||
timeoutMargin: *timeoutMargin,
|
||||
collectorFactory: func(timeout time.Duration) *WmiCollector {
|
||||
return &WmiCollector{
|
||||
collectors: collectors,
|
||||
maxScrapeDuration: timeout,
|
||||
}
|
||||
},
|
||||
}
|
||||
prometheus.MustRegister(exporter)
|
||||
|
||||
http.Handle(*metricsPath, promhttp.Handler())
|
||||
http.Handle(*metricsPath, h)
|
||||
http.HandleFunc("/health", healthCheck)
|
||||
http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
|
||||
http.Redirect(w, r, *metricsPath, http.StatusMovedPermanently)
|
||||
@@ -382,3 +406,36 @@ loop:
|
||||
changes <- svc.Status{State: svc.StopPending}
|
||||
return
|
||||
}
|
||||
|
||||
type metricsHandler struct {
|
||||
timeoutMargin float64
|
||||
collectorFactory func(timeout time.Duration) *WmiCollector
|
||||
}
|
||||
|
||||
func (mh *metricsHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
||||
const defaultTimeout = 10.0
|
||||
|
||||
var timeoutSeconds float64
|
||||
if v := r.Header.Get("X-Prometheus-Scrape-Timeout-Seconds"); v != "" {
|
||||
var err error
|
||||
timeoutSeconds, err = strconv.ParseFloat(v, 64)
|
||||
if err != nil {
|
||||
log.Warnf("Couldn't parse X-Prometheus-Scrape-Timeout-Seconds: %q. Defaulting timeout to %f", v, defaultTimeout)
|
||||
}
|
||||
}
|
||||
if timeoutSeconds == 0 {
|
||||
timeoutSeconds = defaultTimeout
|
||||
}
|
||||
timeoutSeconds = timeoutSeconds - mh.timeoutMargin
|
||||
|
||||
reg := prometheus.NewRegistry()
|
||||
reg.MustRegister(mh.collectorFactory(time.Duration(timeoutSeconds * float64(time.Second))))
|
||||
reg.MustRegister(
|
||||
prometheus.NewProcessCollector(os.Getpid(), ""),
|
||||
prometheus.NewGoCollector(),
|
||||
version.NewCollector("wmi_exporter"),
|
||||
)
|
||||
|
||||
h := promhttp.HandlerFor(reg, promhttp.HandlerOpts{})
|
||||
h.ServeHTTP(w, r)
|
||||
}
|
||||
|
||||
@@ -20,7 +20,7 @@ else {
|
||||
$members = $wmiObject `
|
||||
| Get-Member -MemberType Properties `
|
||||
| Where-Object { $_.Definition -Match '^u?int' -and $_.Name -NotMatch '_' } `
|
||||
| Select-Object Name, @{Name="Type";Expression={$_.Definition.Split(" ")[0]}}
|
||||
| Select-Object Name, @{Name="Type";Expression={$_.Definition.Split(" ")[0]}})
|
||||
$input = @{
|
||||
"Class"=$Class;
|
||||
"CollectorName"=$CollectorName;
|
||||
|
||||
@@ -29,7 +29,7 @@ func New{{ .CollectorName }}Collector() (Collector, error) {
|
||||
}
|
||||
// Collect sends the metric values for each metric
|
||||
// to the provided prometheus Metric channel.
|
||||
func (c *{{ .CollectorName }}Collector) Collect(ch chan<- prometheus.Metric) error {
|
||||
func (c *{{ .CollectorName }}Collector) Collect(ctx *ScrapeContext, ch chan<- prometheus.Metric) error {
|
||||
if desc, err := c.collect(ch); err != nil {
|
||||
log.Error("failed collecting {{ .CollectorName | toLower }} metrics:", desc, err)
|
||||
return err
|
||||
|
||||
Reference in New Issue
Block a user