mirror of
https://github.com/prometheus-community/windows_exporter.git
synced 2026-02-24 21:56:36 +00:00
docs: add alerting examples for CPU and CSV (#2317)
Signed-off-by: EisenbergD <dominik.eisenberg@beiersdorf.com> Co-authored-by: EisenbergD <dominik.eisenberg@beiersdorf.com>
This commit is contained in:
committed by
GitHub
parent
ec6f705410
commit
e951e516de
@@ -80,7 +80,36 @@ avg by(instance) (
|
|||||||
|
|
||||||
|
|
||||||
## Alerting examples
|
## Alerting examples
|
||||||
**prometheus.rules**
|
#### Average CPU utilization over 1 hour exceeds 80% (New CPU metric)
|
||||||
|
```yaml
|
||||||
|
# Alert on hosts with 1h avg CPU more than 80%
|
||||||
|
- alert: HighCPUUtilization
|
||||||
|
expr: |
|
||||||
|
avg_over_time(
|
||||||
|
(
|
||||||
|
sum by (instance) (
|
||||||
|
(
|
||||||
|
rate(windows_cpu_processor_utility_total{}[1m])
|
||||||
|
/
|
||||||
|
rate(windows_cpu_processor_rtc_total{}[1m])
|
||||||
|
)
|
||||||
|
) /
|
||||||
|
count by (instance) (
|
||||||
|
windows_cpu_processor_utility_total{}
|
||||||
|
)
|
||||||
|
)[1h:]
|
||||||
|
) > 80
|
||||||
|
for: 1m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
metric_name: CPUUtilization
|
||||||
|
annotations:
|
||||||
|
summary: "High CPU utilization on {{ $labels.instance }}"
|
||||||
|
description: |
|
||||||
|
CPU utilization on {{ $labels.instance }} has averaged more than 80% over the last hour (current value: {{ printf "%.2f" $value }})
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Average CPU utilization over 1 hour exceeds 80% (Old CPU metric)
|
||||||
```yaml
|
```yaml
|
||||||
# Alert on hosts with more than 80% CPU usage over a 10 minute period
|
# Alert on hosts with more than 80% CPU usage over a 10 minute period
|
||||||
- alert: CpuUsage
|
- alert: CpuUsage
|
||||||
@@ -91,6 +120,10 @@ avg by(instance) (
|
|||||||
annotations:
|
annotations:
|
||||||
summary: "CPU Usage (instance {{ $labels.instance }})"
|
summary: "CPU Usage (instance {{ $labels.instance }})"
|
||||||
description: "CPU Usage is more than 80%\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
|
description: "CPU Usage is more than 80%\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
|
||||||
|
```
|
||||||
|
|
||||||
|
#### CPU not using boost frequencies
|
||||||
|
```yaml
|
||||||
# Alert on hosts which are not boosting their CPU frequencies
|
# Alert on hosts which are not boosting their CPU frequencies
|
||||||
- alert: NoCpuTurbo
|
- alert: NoCpuTurbo
|
||||||
expr: |
|
expr: |
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ Enabled by default? | No
|
|||||||
|
|
||||||
### `--collectors.mscluster.enabled`
|
### `--collectors.mscluster.enabled`
|
||||||
Comma-separated list of collectors to use, for example:
|
Comma-separated list of collectors to use, for example:
|
||||||
`--collectors.mscluster.enabled=cluster,network,node,resource,resouregroup`.
|
`--collectors.mscluster.enabled=cluster,network,node,resource,resouregroup`.
|
||||||
Matching is case-sensitive.
|
Matching is case-sensitive.
|
||||||
|
|
||||||
## Metrics
|
## Metrics
|
||||||
@@ -183,4 +183,21 @@ count(windows_mscluster_resource_state{type="Network Name"})
|
|||||||
```
|
```
|
||||||
|
|
||||||
## Alerting examples
|
## Alerting examples
|
||||||
_This collector does not yet have alerting examples, we would appreciate your help adding them!_
|
#### Low free space on cluster shared volume
|
||||||
|
```yaml
|
||||||
|
# Alerts if volume has less then 20% free space
|
||||||
|
- alert: LowCSVFreeSpace
|
||||||
|
expr: |
|
||||||
|
(
|
||||||
|
max by (name, cluster) (windows_mscluster_shared_volumes_free_bytes{name!="ClusterPerformanceHistory"})
|
||||||
|
/
|
||||||
|
max by (name, cluster) (windows_mscluster_shared_volumes_total_bytes{name!="ClusterPerformanceHistory"})
|
||||||
|
) * 100 < 20
|
||||||
|
for: 10m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
annotations:
|
||||||
|
summary: "Low CSV free space on {{ $labels.name }}"
|
||||||
|
description: |
|
||||||
|
Cluster Shared Volume {{ $labels.name }} on cluster {{ $labels.cluster }} has less than 20% free space (current: {{ printf "%.2f" $value }}%)
|
||||||
|
```
|
||||||
|
|||||||
@@ -38,4 +38,19 @@ windows_os_install_time_timestamp_seconds 1.6725312e+09
|
|||||||
_This collector does not yet have useful queries, we would appreciate your help adding them!_
|
_This collector does not yet have useful queries, we would appreciate your help adding them!_
|
||||||
|
|
||||||
## Alerting examples
|
## Alerting examples
|
||||||
_This collector does not yet have alerting examples, we would appreciate your help adding them!_
|
|
||||||
|
#### Average CPU utilization over 1 hour exceeds 80% (New CPU metric)
|
||||||
|
```yaml
|
||||||
|
# Alerts if Agent/Host is down for 5min
|
||||||
|
- alert: HypervHostDown
|
||||||
|
expr: up{app="hyper-v"} == 0
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
annotations:
|
||||||
|
summary: Hyper-V host {{ $labels.instance }} is down
|
||||||
|
description: |
|
||||||
|
Hyper-V host {{ $labels.instance }} has been unreachable for more than 5 minutes.
|
||||||
|
Job: {{ $labels.job }}
|
||||||
|
```
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user