Add Windows Time Service collector

Signed-off-by: Ben Reedy <breed808@breed808.com>
This commit is contained in:
Ben Reedy
2020-10-25 18:03:05 +10:00
committed by Ben Reedy
parent 24fe6813b2
commit cdfe3cf258
3 changed files with 170 additions and 0 deletions

View File

@@ -40,6 +40,7 @@ Name | Description | Enabled by default
[service](docs/collector.service.md) | Service state metrics | &#10003;
[system](docs/collector.system.md) | System calls | &#10003;
[tcp](docs/collector.tcp.md) | TCP connections |
[time](docs/collector.time.md) | Windows Time Service |
[thermalzone](docs/collector.thermalzone.md) | Thermal information
[terminal_services](docs/collector.terminal_services.md) | Terminal services (RDS)
[textfile](docs/collector.textfile.md) | Read prometheus metrics from a text file | &#10003;

124
collector/time.go Normal file
View File

@@ -0,0 +1,124 @@
// +build windows
package collector
import (
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/log"
)
func init() {
registerCollector("time", newTimeCollector, "Windows Time Service")
}
// TimeCollector is a Prometheus collector for Perflib counter metrics
type TimeCollector struct {
ClockFrequencyAdjustmentPPBTotal *prometheus.Desc
ComputedTimeOffset *prometheus.Desc
NTPClientTimeSourceCount *prometheus.Desc
NTPRoundtripDelay *prometheus.Desc
NTPServerIncomingRequestsTotal *prometheus.Desc
NTPServerOutgoingResponsesTotal *prometheus.Desc
}
func newTimeCollector() (Collector, error) {
const subsystem = "time"
return &TimeCollector{
ClockFrequencyAdjustmentPPBTotal: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "clock_frequency_adjustment_ppb_total"),
"Total adjustment made to the local system clock frequency by W32Time in Parts Per Billion (PPB) units.",
nil,
nil,
),
ComputedTimeOffset: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "computed_time_offset_seconds"),
"Absolute time offset between the system clock and the chosen time source, in seconds",
nil,
nil,
),
NTPClientTimeSourceCount: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "ntp_client_time_source_count"),
"Active number of NTP Time sources being used by the client",
nil,
nil,
),
NTPRoundtripDelay: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "ntp_round_trip_delay_seconds"),
"Roundtrip delay experienced by the NTP client in receiving a response from the server for the most recent request, in seconds",
nil,
nil,
),
NTPServerOutgoingResponsesTotal: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "ntp_server_outgoing_responses_total"),
"Total number of requests responded to by NTP server",
nil,
nil,
),
NTPServerIncomingRequestsTotal: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "ntp_server_incoming_requests_total"),
"Total number of requests received by NTP server",
nil,
nil,
),
}, nil
}
// Collect sends the metric values for each metric
// to the provided prometheus Metric channel.
func (c *TimeCollector) Collect(ctx *ScrapeContext, ch chan<- prometheus.Metric) error {
if desc, err := c.collect(ctx, ch); err != nil {
log.Error("failed collecting time metrics:", desc, err)
return err
}
return nil
}
// Perflib "Windows Time Service"
type windowsTime struct {
ClockFrequencyAdjustmentPPBTotal float64 `perflib:"Clock Frequency Adjustment (ppb)"`
ComputedTimeOffset float64 `perflib:"Computed Time Offset"`
NTPClientTimeSourceCount float64 `perflib:"NTP Client Time Source Count"`
NTPRoundtripDelay float64 `perflib:"NTP Roundtrip Delay"`
NTPServerIncomingRequestsTotal float64 `perflib:"NTP Server Incoming Requests"`
NTPServerOutgoingResponsesTotal float64 `perflib:"NTP Server Outgoing Responses"`
}
func (c *TimeCollector) collect(ctx *ScrapeContext, ch chan<- prometheus.Metric) (*prometheus.Desc, error) {
var dst []windowsTime // Single-instance class, array is required but will have single entry.
if err := unmarshalObject(ctx.perfObjects["Windows Time Service"], &dst); err != nil {
return nil, err
}
ch <- prometheus.MustNewConstMetric(
c.ClockFrequencyAdjustmentPPBTotal,
prometheus.CounterValue,
dst[0].ClockFrequencyAdjustmentPPBTotal,
)
ch <- prometheus.MustNewConstMetric(
c.ComputedTimeOffset,
prometheus.GaugeValue,
dst[0].ComputedTimeOffset/1000000, // microseconds -> seconds
)
ch <- prometheus.MustNewConstMetric(
c.NTPClientTimeSourceCount,
prometheus.GaugeValue,
dst[0].NTPClientTimeSourceCount,
)
ch <- prometheus.MustNewConstMetric(
c.NTPRoundtripDelay,
prometheus.GaugeValue,
dst[0].NTPRoundtripDelay/1000000, // microseconds -> seconds
)
ch <- prometheus.MustNewConstMetric(
c.NTPServerIncomingRequestsTotal,
prometheus.CounterValue,
dst[0].NTPServerIncomingRequestsTotal,
)
ch <- prometheus.MustNewConstMetric(
c.NTPServerOutgoingResponsesTotal,
prometheus.CounterValue,
dst[0].NTPServerOutgoingResponsesTotal,
)
return nil, nil
}

45
docs/collector.time.md Normal file
View File

@@ -0,0 +1,45 @@
# time collector
The time collector exposes the Windows Time Service metrics. Note that the Windows Time Service must be running, else metric collection will fail.
If the Windows Time Service is stopped after collection has started, collector metric values will reset to 0.
|||
-|-
Metric name prefix | `time`
Data source | Perflib
Enabled by default? | No
## Flags
None
## Metrics
Name | Description | Type | Labels
-----|-------------|------|-------
`windows_time_clock_frequency_Adjustment_ppb_total` | Total adjustment made to the local system clock frequency by W32Time in parts per billion (PPB) units. 1 PPB adjustment implies the system clock was adjusted at a rate of 1 nanosecond per second (1 ns/s). The smallest possible adjustment can vary and is expected to be in the order of 100's of PPB. | counter | None
`windows_time_computed_time_offset_seconds` | Absolute time offset between the system clock and the chosen time source, in seconds. | counter | None
`windows_time_ntp_client_time_source_count` | Active number of NTP Time sources being used by the client. This is a count of active, distinct IP addresses of time servers that are responding to this client's requests. | gauge | None
`windows_time_ntp_round_trip_delay_seconds` | Total roundtrip delay experienced by the NTP client in receiving a response from the server for the most recent request, in seconds. This is the time elapsed on the NTP client between transmitting a request to the NTP server and receiving a valid response from the server. | gauge | None
`windows_time_ntp_server_outgoing_responses_total` | Total number of requests responded to by the NTP server. | counter | None
`windows_time_ntp_server_incoming_requests_total` | Total number of requests received by the NTP server. | counter | None
### Example metric
_This collector does not yet have explained examples, we would appreciate your help adding them!_
## Useful queries
_This collector does not yet have any useful queries added, we would appreciate your help adding them!_
## Alerting examples
**prometheus.rules**
```yaml
# Alert on hosts with an NTP client delay of more than 1 second, for a 5 minute period or longer.
- alert: NTPClientDelay
expr: windows_time_ntp_round_trip_delay_seconds > 1
for: 5m
labels:
severity: warning
annotations:
summary: "NTP client delay: (instance {{ $labels.instance }})"
description: "RTT for NTP client is greater than 1 second!\nVALUE = {{ $value }}sec\n LABELS: {{ $labels }}"
```