Compare commits

...

16 Commits

Author SHA1 Message Date
Calle Pettersson
a6f3b33928 Merge pull request #394 from martinlindhe/fix-cgo-build-tag
Remove cgo build tag from container collector
2019-08-28 22:13:23 +02:00
Calle Pettersson
8ef215cc7e Remove cgo build tag from container collector 2019-08-28 22:06:20 +02:00
Calle Pettersson
a7b5cf7aa6 Merge pull request #389 from breed808/doc
Add collector documentation
2019-08-27 07:57:45 +02:00
Ben Reedy
719ccd4f7f Add documentation for system collector 2019-08-26 21:07:55 +10:00
Ben Reedy
7ab8c7dde4 Add documentation for net collector 2019-08-26 21:01:09 +10:00
Ben Reedy
eb002eb667 Add documentation for memory collector 2019-08-22 22:39:15 +10:00
Ben Reedy
a1638cdf4c Add query examples to cpu collector documentation 2019-08-22 22:06:34 +10:00
Ben Reedy
091406877a Add documentation for logical_disk collector 2019-08-22 21:53:44 +10:00
Ben Reedy
84970ac086 Add logon entry to collectors README 2019-08-22 21:53:41 +10:00
Calle Pettersson
d86f318010 Merge pull request #387 from breed808/logical-disk-new-counters
Add logical_disk latency metrics
2019-08-22 08:53:42 +02:00
Ben Reedy
853d615673 Add logical_disk latency metrics 2019-08-22 03:30:40 +00:00
Calle Pettersson
cd9a740e2b Merge pull request #384 from breed808/logon
Export logon sessions metric
2019-08-21 11:05:25 +02:00
Ben Reedy
c70e7674a5 Add logon collector documentation 2019-08-21 18:35:10 +10:00
Ben Reedy
d3e3835c29 Export user logon sessions
Use Win32_LogonSession class to provide user logon sessions by type.
2019-08-20 22:27:03 +10:00
Calle Pettersson
592c8a8d69 Merge pull request #376 from martinlindhe/fix-goroutine-leak
Fix goroutine leak
2019-08-09 10:24:36 +02:00
Calle Pettersson
6f6a479535 Fix goroutine leak 2019-08-08 21:09:21 +02:00
12 changed files with 488 additions and 133 deletions

View File

@@ -17,6 +17,7 @@ Name | Description | Enabled by default
[hyperv](docs/collector.hyperv.md) | Hyper-V hosts |
[iis](docs/collector.iis.md) | IIS sites and applications |
[logical_disk](docs/collector.logical_disk.md) | Logical disks, disk I/O | ✓
[logon](docs/collector.logon.md) | User logon sessions |
[memory](docs/collector.memory.md) | Memory usage metrics |
[msmq](docs/collector.msmq.md) | MSMQ queues |
[mssql](docs/collector.mssql.md) | [SQL Server Performance Objects](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/use-sql-server-objects#SQLServerPOs) metrics |

View File

@@ -1,4 +1,4 @@
// +build windows,cgo
// +build windows
package collector

View File

@@ -29,17 +29,20 @@ var (
// A LogicalDiskCollector is a Prometheus collector for WMI Win32_PerfRawData_PerfDisk_LogicalDisk metrics
type LogicalDiskCollector struct {
RequestsQueued *prometheus.Desc
ReadBytesTotal *prometheus.Desc
ReadsTotal *prometheus.Desc
WriteBytesTotal *prometheus.Desc
WritesTotal *prometheus.Desc
ReadTime *prometheus.Desc
WriteTime *prometheus.Desc
TotalSpace *prometheus.Desc
FreeSpace *prometheus.Desc
IdleTime *prometheus.Desc
SplitIOs *prometheus.Desc
RequestsQueued *prometheus.Desc
ReadBytesTotal *prometheus.Desc
ReadsTotal *prometheus.Desc
WriteBytesTotal *prometheus.Desc
WritesTotal *prometheus.Desc
ReadTime *prometheus.Desc
WriteTime *prometheus.Desc
TotalSpace *prometheus.Desc
FreeSpace *prometheus.Desc
IdleTime *prometheus.Desc
SplitIOs *prometheus.Desc
ReadLatency *prometheus.Desc
WriteLatency *prometheus.Desc
ReadWriteLatency *prometheus.Desc
volumeWhitelistPattern *regexp.Regexp
volumeBlacklistPattern *regexp.Regexp
@@ -127,6 +130,27 @@ func NewLogicalDiskCollector() (Collector, error) {
nil,
),
ReadLatency: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "read_latency_seconds_total"),
"Shows the average time, in seconds, of a read operation from the disk (LogicalDisk.AvgDiskSecPerRead)",
[]string{"volume"},
nil,
),
WriteLatency: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "write_latency_seconds_total"),
"Shows the average time, in seconds, of a write operation to the disk (LogicalDisk.AvgDiskSecPerWrite)",
[]string{"volume"},
nil,
),
ReadWriteLatency: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "read_write_latency_seconds_total"),
"Shows the time, in seconds, of the average disk transfer (LogicalDisk.AvgDiskSecPerTransfer)",
[]string{"volume"},
nil,
),
volumeWhitelistPattern: regexp.MustCompile(fmt.Sprintf("^(?:%s)$", *volumeWhitelist)),
volumeBlacklistPattern: regexp.MustCompile(fmt.Sprintf("^(?:%s)$", *volumeBlacklist)),
}, nil
@@ -158,6 +182,9 @@ type Win32_PerfRawData_PerfDisk_LogicalDisk struct {
PercentFreeSpace_Base uint32
PercentIdleTime uint64
SplitIOPerSec uint32
AvgDiskSecPerRead uint64
AvgDiskSecPerWrite uint64
AvgDiskSecPerTransfer uint64
}
func (c *LogicalDiskCollector) collect(ch chan<- prometheus.Metric) (*prometheus.Desc, error) {
@@ -250,6 +277,27 @@ func (c *LogicalDiskCollector) collect(ch chan<- prometheus.Metric) (*prometheus
float64(volume.SplitIOPerSec),
volume.Name,
)
ch <- prometheus.MustNewConstMetric(
c.ReadLatency,
prometheus.CounterValue,
float64(volume.AvgDiskSecPerRead),
volume.Name,
)
ch <- prometheus.MustNewConstMetric(
c.WriteLatency,
prometheus.CounterValue,
float64(volume.AvgDiskSecPerWrite),
volume.Name,
)
ch <- prometheus.MustNewConstMetric(
c.ReadWriteLatency,
prometheus.CounterValue,
float64(volume.AvgDiskSecPerTransfer),
volume.Name,
)
}
return nil, nil

199
collector/logon.go Normal file
View File

@@ -0,0 +1,199 @@
// +build windows
package collector
import (
"errors"
"github.com/StackExchange/wmi"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/log"
)
func init() {
Factories["logon"] = NewLogonCollector
}
// A LogonCollector is a Prometheus collector for WMI metrics
type LogonCollector struct {
LogonType *prometheus.Desc
}
// NewLogonCollector ...
func NewLogonCollector() (Collector, error) {
const subsystem = "logon"
return &LogonCollector{
LogonType: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "logon_type"),
"Number of active logon sessions (LogonSession.LogonType)",
[]string{"status"},
nil,
),
}, nil
}
// Collect sends the metric values for each metric
// to the provided prometheus Metric channel.
func (c *LogonCollector) Collect(ctx *ScrapeContext, ch chan<- prometheus.Metric) error {
if desc, err := c.collect(ch); err != nil {
log.Error("failed collecting user metrics:", desc, err)
return err
}
return nil
}
// Win32_LogonSession docs:
// - https://docs.microsoft.com/en-us/windows/win32/cimwin32prov/win32-logonsession
type Win32_LogonSession struct {
LogonType uint32
}
func (c *LogonCollector) collect(ch chan<- prometheus.Metric) (*prometheus.Desc, error) {
var dst []Win32_LogonSession
q := queryAll(&dst)
if err := wmi.Query(q, &dst); err != nil {
return nil, err
}
if len(dst) == 0 {
return nil, errors.New("WMI query returned empty result set")
}
// Init counters
system := 0
interactive := 0
network := 0
batch := 0
service := 0
proxy := 0
unlock := 0
networkcleartext := 0
newcredentials := 0
remoteinteractive := 0
cachedinteractive := 0
cachedremoteinteractive := 0
cachedunlock := 0
for _, entry := range dst {
switch entry.LogonType {
case 0:
system++
case 2:
interactive++
case 3:
network++
case 4:
batch++
case 5:
service++
case 6:
proxy++
case 7:
unlock++
case 8:
networkcleartext++
case 9:
newcredentials++
case 10:
remoteinteractive++
case 11:
cachedinteractive++
case 12:
cachedremoteinteractive++
case 13:
cachedunlock++
}
}
ch <- prometheus.MustNewConstMetric(
c.LogonType,
prometheus.GaugeValue,
float64(system),
"system",
)
ch <- prometheus.MustNewConstMetric(
c.LogonType,
prometheus.GaugeValue,
float64(interactive),
"interactive",
)
ch <- prometheus.MustNewConstMetric(
c.LogonType,
prometheus.GaugeValue,
float64(network),
"network",
)
ch <- prometheus.MustNewConstMetric(
c.LogonType,
prometheus.GaugeValue,
float64(batch),
"batch",
)
ch <- prometheus.MustNewConstMetric(
c.LogonType,
prometheus.GaugeValue,
float64(service),
"service",
)
ch <- prometheus.MustNewConstMetric(
c.LogonType,
prometheus.GaugeValue,
float64(proxy),
"proxy",
)
ch <- prometheus.MustNewConstMetric(
c.LogonType,
prometheus.GaugeValue,
float64(unlock),
"unlock",
)
ch <- prometheus.MustNewConstMetric(
c.LogonType,
prometheus.GaugeValue,
float64(networkcleartext),
"network_clear_text",
)
ch <- prometheus.MustNewConstMetric(
c.LogonType,
prometheus.GaugeValue,
float64(newcredentials),
"new_credentials",
)
ch <- prometheus.MustNewConstMetric(
c.LogonType,
prometheus.GaugeValue,
float64(remoteinteractive),
"remote_interactive",
)
ch <- prometheus.MustNewConstMetric(
c.LogonType,
prometheus.GaugeValue,
float64(cachedinteractive),
"cached_interactive",
)
ch <- prometheus.MustNewConstMetric(
c.LogonType,
prometheus.GaugeValue,
float64(remoteinteractive),
"cached_remote_interactive",
)
ch <- prometheus.MustNewConstMetric(
c.LogonType,
prometheus.GaugeValue,
float64(cachedunlock),
"cached_unlock",
)
return nil, nil
}

View File

@@ -9,6 +9,7 @@ This directory contains documentation of the collectors in the WMI exporter, wit
- [`hyperv`](collector.hyperv.md)
- [`iis`](collector.iis.md)
- [`logical_disk`](collector.logical_disk.md)
- [`logon`](collector.logon.md)
- [`memory`](collector.memory.md)
- [`msmq`](collector.msmq.md)
- [`mssql`](collector.mssql.md)
@@ -27,4 +28,4 @@ This directory contains documentation of the collectors in the WMI exporter, wit
- [`system`](collector.system.md)
- [`tcp`](collector.tcp.md)
- [`textfile`](collector.textfile.md)
- [`vmware`](collector.vmware.md)
- [`vmware`](collector.vmware.md)

View File

@@ -1,43 +1,60 @@
# cpu collector
The cpu collector exposes metrics about CPU usage
|||
-|-
Metric name prefix | `cpu`
Data source | Perflib
Counters | `ProcessorInformation` (Windows Server 2008R2 and later) `Processor` (older versions)
Enabled by default? | Yes
## Flags
None
## Metrics
These metrics are available on all versions of Windows:
Name | Description | Type | Labels
-----|-------------|------|-------
`wmi_cpu_cstate_seconds_total` | Time spent in low-power idle states | counter | `core`, `state`
`wmi_cpu_time_total` | Time that processor spent in different modes (idle, user, system, ...) | counter | `core`, `mode`
`wmi_cpu_interrupts_total` | Total number of received and serviced hardware interrupts | counter | `core`
`wmi_cpu_dpcs_total` | Total number of received and serviced deferred procedure calls (DPCs) | counter | `core`
These metrics are only exposed on Windows Server 2008R2 and later:
Name | Description | Type | Labels
-----|-------------|------|-------
`wmi_cpu_clock_interrupts_total` | Total number of received and serviced clock tick interrupts | `core`
`wmi_cpu_idle_break_events_total` | Total number of time processor was woken from idle | `core`
`wmi_cpu_parking_status` | Parking Status represents whether a processor is parked or not | `gauge`
`wmi_cpu_core_frequency_mhz` | Core frequency in megahertz | `gauge`
`wmi_cpu_processor_performance` | Processor Performance is the average performance of the processor while it is executing instructions, as a percentage of the nominal performance of the processor. On some processors, Processor Performance may exceed 100% | `gauge`
### Example metric
_This collector does not yet have explained examples, we would appreciate your help adding them!_
## Useful queries
_This collector does not yet have any useful queries added, we would appreciate your help adding them!_
## Alerting examples
_This collector does not yet have alerting examples, we would appreciate your help adding them!_
# cpu collector
The cpu collector exposes metrics about CPU usage
|||
-|-
Metric name prefix | `cpu`
Data source | Perflib
Counters | `ProcessorInformation` (Windows Server 2008R2 and later) `Processor` (older versions)
Enabled by default? | Yes
## Flags
None
## Metrics
These metrics are available on all versions of Windows:
Name | Description | Type | Labels
-----|-------------|------|-------
`wmi_cpu_cstate_seconds_total` | Time spent in low-power idle states | counter | `core`, `state`
`wmi_cpu_time_total` | Time that processor spent in different modes (idle, user, system, ...) | counter | `core`, `mode`
`wmi_cpu_interrupts_total` | Total number of received and serviced hardware interrupts | counter | `core`
`wmi_cpu_dpcs_total` | Total number of received and serviced deferred procedure calls (DPCs) | counter | `core`
These metrics are only exposed on Windows Server 2008R2 and later:
Name | Description | Type | Labels
-----|-------------|------|-------
`wmi_cpu_clock_interrupts_total` | Total number of received and serviced clock tick interrupts | `core`
`wmi_cpu_idle_break_events_total` | Total number of time processor was woken from idle | `core`
`wmi_cpu_parking_status` | Parking Status represents whether a processor is parked or not | `gauge`
`wmi_cpu_core_frequency_mhz` | Core frequency in megahertz | `gauge`
`wmi_cpu_processor_performance` | Processor Performance is the average performance of the processor while it is executing instructions, as a percentage of the nominal performance of the processor. On some processors, Processor Performance may exceed 100% | `gauge`
### Example metric
Show frequency of host CPU cores
```
wmi_cpu_core_frequency_mhz{instance="localhost"}
```
## Useful queries
Show cpu usage by mode.
```
sum by (mode) (irate(wmi_cpu_time_total{instance="localhost"}[5m]))
```
## Alerting examples
**prometheus.rules**
```
# Alert on hosts with more than 80% CPU usage over a 10 minute period
- alert: CpuUsage
expr: 100 - (avg by (instance) (irate(wmi_cpu_time_total{mode="idle"}[2m])) * 100) > 80
for: 10m
labels:
severity: warning
annotations:
summary: "CPU Usage (instance {{ $labels.instance }})"
description: "CPU Usage is more than 80%\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
```

View File

@@ -1,44 +1,75 @@
# logical_disk collector
The logical_disk collector exposes metrics about logical disks (in contrast to physical disks)
|||
-|-
Metric name prefix | `logical_disk`
Classes | [`Win32_PerfRawData_PerfDisk_LogicalDisk`](https://msdn.microsoft.com/en-us/windows/hardware/aa394307(v=vs.71))
Enabled by default? | Yes
## Flags
### `--collector.logical_disk.volume-whitelist`
If given, a disk needs to match the whitelist regexp in order for the corresponding disk metrics to be reported
### `--collector.logical_disk.volume-blacklist`
If given, a disk needs to *not* match the blacklist regexp in order for the corresponding disk metrics to be reported
## Metrics
Name | Description | Type | Labels
-----|-------------|------|-------
`requests_queued` | _Not yet documented_ | gauge | `volume`
`read_bytes_total` | _Not yet documented_ | counter | `volume`
`reads_total` | _Not yet documented_ | counter | `volume`
`write_bytes_total` | _Not yet documented_ | counter | `volume`
`writes_total` | _Not yet documented_ | counter | `volume`
`read_seconds_total` | _Not yet documented_ | counter | `volume`
`write_seconds_total` | _Not yet documented_ | counter | `volume`
`free_bytes` | _Not yet documented_ | gauge | `volume`
`size_bytes` | _Not yet documented_ | gauge | `volume`
`idle_seconds_total` | _Not yet documented_ | counter | `volume`
`split_ios_total` | _Not yet documented_ | counter | `volume`
### Example metric
_This collector does not yet have explained examples, we would appreciate your help adding them!_
## Useful queries
_This collector does not yet have any useful queries added, we would appreciate your help adding them!_
## Alerting examples
_This collector does not yet have alerting examples, we would appreciate your help adding them!_
# logical_disk collector
The logical_disk collector exposes metrics about logical disks (in contrast to physical disks)
|||
-|-
Metric name prefix | `logical_disk`
Classes | [`Win32_PerfRawData_PerfDisk_LogicalDisk`](https://msdn.microsoft.com/en-us/windows/hardware/aa394307(v=vs.71))
Enabled by default? | Yes
## Flags
### `--collector.logical_disk.volume-whitelist`
If given, a disk needs to match the whitelist regexp in order for the corresponding disk metrics to be reported
### `--collector.logical_disk.volume-blacklist`
If given, a disk needs to *not* match the blacklist regexp in order for the corresponding disk metrics to be reported
## Metrics
Name | Description | Type | Labels
-----|-------------|------|-------
`requests_queued` | Number of requests outstanding on the disk at the time the performance data is collected | gauge | `volume`
`read_bytes_total` | Rate at which bytes are transferred from the disk during read operations | counter | `volume`
`reads_total` | Rate of read operations on the disk | counter | `volume`
`write_bytes_total` | Rate at which bytes are transferred to the disk during write operations | counter | `volume`
`writes_total` | Rate of write operations on the disk | counter | `volume`
`read_seconds_total` | Seconds the disk was busy servicing read requests | counter | `volume`
`write_seconds_total` | Seconds the disk was busy servicing write requests | counter | `volume`
`free_bytes` | Unused space of the disk in bytes | gauge | `volume`
`size_bytes` | Total size of the disk in bytes | gauge | `volume`
`idle_seconds_total` | Seconds the disk was idle (not servicing read/write requests) | counter | `volume`
`split_ios_total` | Number of I/Os to the disk split into multiple I/Os | counter | `volume`
### Example metric
Query the rate of write operations to a disk
```
rate(wmi_logical_disk_read_bytes_total{instance="localhost", volume=~"C:"}[2m])
```
## Useful queries
Calculate rate of total IOPS for disk
```
rate(wmi_logical_disk_reads_total{instance="localhost", volume="C:"}[2m]) + rate(wmi_logical_disk_writes_total{instance="localhost", volume="C:"}[2m])
```
## Alerting examples
**prometheus.rules**
```
groups:
- name: Windows Disk Alerts
rules:
# Sends an alert when disk space usage is above 95%
- alert: DiskSpaceUsage
expr: 100.0 - 100 * (wmi_logical_disk_free_bytes / wmi_logical_disk_size_bytes) > 95
for: 10m
labels:
severity: high
annotations:
summary: "Disk Space Usage (instance {{ $labels.instance }})"
description: "Disk Space on Drive is used more than 95%\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
# Alerts on disks with over 85% space usage predicted to fill within the next four days
- alert: DiskFilling
expr: 100 * (wmi_logical_disk_free_bytes / wmi_logical_disk_size_bytes) < 15 and predict_linear(wmi_logical_disk_free_bytes[6h], 4 * 24 * 3600) < 0
for: 10m
labels:
severity: warning
annotations:
summary: "Disk full in four days (instance {{ $labels.instance }})"
description: "{{ $labels.volume }} is expected to fill up within four days. Currently {{ $value | humanize }}% is available.\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
```

34
docs/collector.logon.md Normal file
View File

@@ -0,0 +1,34 @@
# logon collector
The logon collector exposes metrics detailing the active user logon sessions.
|||
-|-
Metric name prefix | `logon`
Classes | [`Win32_LogonSession`](https://docs.microsoft.com/en-us/windows/win32/cimwin32prov/win32-logonsession)
Enabled by default? | No
## Flags
None
## Metrics
Name | Description | Type | Labels
-----|-------------|------|-------
`wmi_logon_logon_type` | Number of active user logon sessions | gauge | status
### Example metric
Query the total number of interactive logon sessions
```
wmi_logon_logon_type{status="interactive"}
```
## Useful queries
Query the total number of local and remote (I.E. Terminal Services) interactive sessions.
```
wmi_logon_logon_type{status=~"interactive|remoteinteractive"}
```
## Alerting examples
_This collector does not yet have alerting examples, we would appreciate your help adding them!_

View File

@@ -19,25 +19,25 @@ Name | Description | Type | Labels
`wmi_cs_logical_processors` | Number of installed logical processors | gauge | None
`wmi_cs_physical_memory_bytes` | Total installed physical memory | gauge | None
`wmi_memory_available_bytes` | The amount of physical memory immediately available for allocation to a process or for system use. It is equal to the sum of memory assigned to the standby (cached), free and zero page lists | gauge | None
`wmi_memory_cache_bytes` | _Not yet documented_ | gauge | None
`wmi_memory_cache_bytes_peak` | _Not yet documented_ | gauge | None
`wmi_memory_cache_faults_total` | _Not yet documented_ | gauge | None
`wmi_memory_commit_limit` | _Not yet documented_ | gauge | None
`wmi_memory_committed_bytes` | _Not yet documented_ | gauge | None
`wmi_memory_cache_bytes` | Number of bytes currently being used by the file system cache | gauge | None
`wmi_memory_cache_bytes_peak` | Maximum number of CacheBytes after the system was last restarted | gauge | None
`wmi_memory_cache_faults_total` | Number of faults which occur when a page sought in the file system cache is not found there and must be retrieved from elsewhere in memory (soft fault) or from disk (hard fault) | gauge | None
`wmi_memory_commit_limit` | Amount of virtual memory, in bytes, that can be committed without having to extend the paging file(s) | gauge | None
`wmi_memory_committed_bytes` | Amount of committed virtual memory, in bytes | gauge | None
`wmi_memory_demand_zero_faults_total` | The number of zeroed pages required to satisfy faults. Zeroed pages, pages emptied of previously stored data and filled with zeros, are a security feature of Windows that prevent processes from seeing data stored by earlier processes that used the memory space | gauge | None
`wmi_memory_free_and_zero_page_list_bytes` | _Not yet documented_ | gauge | None
`wmi_memory_free_system_page_table_entries` | _Not yet documented_ | gauge | None
`wmi_memory_free_system_page_table_entries` | Number of page table entries not being used by the system | gauge | None
`wmi_memory_modified_page_list_bytes` | _Not yet documented_ | gauge | None
`wmi_memory_page_faults_total` | _Not yet documented_ | gauge | None
`wmi_memory_page_faults_total` | Overall rate at which faulted pages are handled by the processor | gauge | None
`wmi_memory_swap_page_reads_total` | Number of disk page reads (a single read operation reading several pages is still only counted once) | gauge | None
`wmi_memory_swap_pages_read_total` | Number of pages read across all page reads (ie counting all pages read even if they are read in a single operation) | gauge | None
`wmi_memory_swap_pages_written_total` | Number of pages written across all page writes (ie counting all pages written even if they are written in a single operation) | gauge | None
`wmi_memory_swap_page_operations_total` | Total number of swap page read and writes (PagesPersec) | gauge | None
`wmi_memory_swap_page_writes_total` | Number of disk page writes (a single write operation writing several pages is still only counted once) | gauge | None
`wmi_memory_pool_nonpaged_allocs_total` | The number of calls to allocate space in the nonpaged pool. The nonpaged pool is an area of system memory area for objects that cannot be written to disk, and must remain in physical memory as long as they are allocated | gauge | None
`wmi_memory_pool_nonpaged_bytes_total` | _Not yet documented_ | gauge | None
`wmi_memory_pool_paged_allocs_total` | _Not yet documented_ | gauge | None
`wmi_memory_pool_paged_bytes` | _Not yet documented_ | gauge | None
`wmi_memory_pool_nonpaged_bytes_total` | Number of bytes in the non-paged pool | gauge | None
`wmi_memory_pool_paged_allocs_total` | Number of calls to allocate space in the paged pool, regardless of the amount of space allocated in each call | gauge | None
`wmi_memory_pool_paged_bytes` | Number of bytes in the paged pool | gauge | None
`wmi_memory_pool_paged_resident_bytes` | _Not yet documented_ | gauge | None
`wmi_memory_standby_cache_core_bytes` | _Not yet documented_ | gauge | None
`wmi_memory_standby_cache_normal_priority_bytes` | _Not yet documented_ | gauge | None

View File

@@ -22,24 +22,40 @@ If given, an interface name needs to *not* match the blacklist regexp in order f
Name | Description | Type | Labels
-----|-------------|------|-------
`wmi_net_bytes_received_total` | _Not yet documented_ | counter | `nic`
`wmi_net_bytes_sent_total` | _Not yet documented_ | counter | `nic`
`wmi_net_bytes_total` | _Not yet documented_ | counter | `nic`
`wmi_net_packets_outbound_discarded` | _Not yet documented_ | counter | `nic`
`wmi_net_packets_outbound_errors` | _Not yet documented_ | counter | `nic`
`wmi_net_packets_received_discarded` | _Not yet documented_ | counter | `nic`
`wmi_net_packets_received_errors` | _Not yet documented_ | counter | `nic`
`wmi_net_packets_received_total` | _Not yet documented_ | counter | `nic`
`wmi_net_packets_received_unknown` | _Not yet documented_ | counter | `nic`
`wmi_net_packets_total` | _Not yet documented_ | counter | `nic`
`wmi_net_packets_sent_total` | _Not yet documented_ | counter | `nic`
`wmi_net_current_bandwidth` | _Not yet documented_ | counter | `nic`
`wmi_net_bytes_received_total` | Total bytes received by interface | counter | `nic`
`wmi_net_bytes_sent_total` | Total bytes transmitted by interface | counter | `nic`
`wmi_net_bytes_total` | Total bytes received and transmitted by interface | counter | `nic`
`wmi_net_packets_outbound_discarded` | Total outbound packets that were chosen to be discarded even though no errors had been detected to prevent transmission | counter | `nic`
`wmi_net_packets_outbound_errors` | Total packets that could not be transmitted due to errors | counter | `nic`
`wmi_net_packets_received_discarded` | Total inbound packets that were chosen to be discarded even though no errors had been detected to prevent delivery | counter | `nic`
`wmi_net_packets_received_errors` | Total packets that could not be received due to errors | counter | `nic`
`wmi_net_packets_received_total` | Total packets received by interface | counter | `nic`
`wmi_net_packets_received_unknown` | Total packets received by interface that were discarded because of an unknown or unsupported protocol | counter | `nic`
`wmi_net_packets_total` | Total packets received and transmitted by interface | counter | `nic`
`wmi_net_packets_sent_total` | Total packets transmitted by interface | counter | `nic`
`wmi_net_current_bandwidth` | Estimate of the interface's current bandwidth in bits per second (bps) | counter | `nic`
### Example metric
_This collector does not yet have explained examples, we would appreciate your help adding them!_
Query the rate of transmitted network traffic
```
rate(wmi_net_bytes_sent_total{instance="localhost"}[2m])
```
## Useful queries
_This collector does not yet have any useful queries added, we would appreciate your help adding them!_
Get total utilisation of network interface as a percentage
```
rate(wmi_net_bytes_total{instance="localhost", nic="Microsoft_Hyper_V_Network_Adapter__1"}[2m]) * 8 / wmi_net_current_bandwidth{instance="locahost", nic="Microsoft_Hyper_V_Network_Adapter__1"} * 100
```
## Alerting examples
_This collector does not yet have alerting examples, we would appreciate your help adding them!_
**prometheus.rules**
```
- alert: NetInterfaceUsage
expr: rate(wmi_net_bytes_total[2m]) * 8 / wmi_net_current_bandwidth * 100 > 90
for: 10m
labels:
severity: high
annotations:
summary: "Network Interface Usage (instance {{ $labels.instance }})"
description: "Network traffic usage is greater than 95% for interface {{ $labels.nic }}\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
```

View File

@@ -16,18 +16,24 @@ None
Name | Description | Type | Labels
-----|-------------|------|-------
`wmi_system_context_switches_total` | _Not yet documented_ | counter | None
`wmi_system_exception_dispatches_total` | _Not yet documented_ | counter | None
`wmi_system_processor_queue_length` | _Not yet documented_ | gauge | None
`wmi_system_system_calls_total` | _Not yet documented_ | counter | None
`wmi_system_system_up_time` | _Not yet documented_ | gauge | None
`wmi_system_threads` | _Not yet documented_ | gauge | None
`wmi_system_context_switches_total` | Total number of [context switches](https://en.wikipedia.org/wiki/Context_switch) | counter | None
`wmi_system_exception_dispatches_total` | Total exceptions dispatched by the system | counter | None
`wmi_system_processor_queue_length` | Number of threads in the processor queue. There is a single queue for processor time even on computers with multiple processors. | gauge | None
`wmi_system_system_calls_total` | Total combined calls to Windows NT system service routines by all processes running on the computer | counter | None
`wmi_system_system_up_time` | Time of last boot of system | gauge | None
`wmi_system_threads` | Number of Windows system [threads](https://en.wikipedia.org/wiki/Thread_(computing)) | gauge | None
### Example metric
_This collector does not yet have explained examples, we would appreciate your help adding them!_
Show current number of system threads
```
wmi_system_threads{instance="localhost"}
```
## Useful queries
_This collector does not yet have any useful queries added, we would appreciate your help adding them!_
Find hosts that have rebooted in the last 24 hours
```
time() - wmi_system_system_up_time < 86400
```
## Alerting examples
_This collector does not yet have alerting examples, we would appreciate your help adding them!_

View File

@@ -5,6 +5,7 @@ package main
import (
"fmt"
"net/http"
_ "net/http/pprof"
"os"
"sort"
"strconv"
@@ -144,6 +145,7 @@ func (coll WmiCollector) Collect(ch chan<- prometheus.Metric) {
go func() {
wg.Wait()
close(allDone)
close(metricsBuffer)
}()
// Wait until either all collectors finish, or timeout expires