diff --git a/README.md b/README.md index f24a5a86..53751f44 100644 --- a/README.md +++ b/README.md @@ -40,6 +40,7 @@ Name | Description | Enabled by default [service](docs/collector.service.md) | Service state metrics | ✓ [system](docs/collector.system.md) | System calls | ✓ [tcp](docs/collector.tcp.md) | TCP connections | +[time](docs/collector.time.md) | Windows Time Service | [thermalzone](docs/collector.thermalzone.md) | Thermal information [terminal_services](docs/collector.terminal_services.md) | Terminal services (RDS) [textfile](docs/collector.textfile.md) | Read prometheus metrics from a text file | ✓ diff --git a/collector/time.go b/collector/time.go new file mode 100644 index 00000000..4ec4b5ab --- /dev/null +++ b/collector/time.go @@ -0,0 +1,124 @@ +// +build windows + +package collector + +import ( + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/common/log" +) + +func init() { + registerCollector("time", newTimeCollector, "Windows Time Service") +} + +// TimeCollector is a Prometheus collector for Perflib counter metrics +type TimeCollector struct { + ClockFrequencyAdjustmentPPBTotal *prometheus.Desc + ComputedTimeOffset *prometheus.Desc + NTPClientTimeSourceCount *prometheus.Desc + NTPRoundtripDelay *prometheus.Desc + NTPServerIncomingRequestsTotal *prometheus.Desc + NTPServerOutgoingResponsesTotal *prometheus.Desc +} + +func newTimeCollector() (Collector, error) { + const subsystem = "time" + + return &TimeCollector{ + ClockFrequencyAdjustmentPPBTotal: prometheus.NewDesc( + prometheus.BuildFQName(Namespace, subsystem, "clock_frequency_adjustment_ppb_total"), + "Total adjustment made to the local system clock frequency by W32Time in Parts Per Billion (PPB) units.", + nil, + nil, + ), + ComputedTimeOffset: prometheus.NewDesc( + prometheus.BuildFQName(Namespace, subsystem, "computed_time_offset_seconds"), + "Absolute time offset between the system clock and the chosen time source, in seconds", + nil, + nil, + ), + NTPClientTimeSourceCount: prometheus.NewDesc( + prometheus.BuildFQName(Namespace, subsystem, "ntp_client_time_source_count"), + "Active number of NTP Time sources being used by the client", + nil, + nil, + ), + NTPRoundtripDelay: prometheus.NewDesc( + prometheus.BuildFQName(Namespace, subsystem, "ntp_round_trip_delay_seconds"), + "Roundtrip delay experienced by the NTP client in receiving a response from the server for the most recent request, in seconds", + nil, + nil, + ), + NTPServerOutgoingResponsesTotal: prometheus.NewDesc( + prometheus.BuildFQName(Namespace, subsystem, "ntp_server_outgoing_responses_total"), + "Total number of requests responded to by NTP server", + nil, + nil, + ), + NTPServerIncomingRequestsTotal: prometheus.NewDesc( + prometheus.BuildFQName(Namespace, subsystem, "ntp_server_incoming_requests_total"), + "Total number of requests received by NTP server", + nil, + nil, + ), + }, nil +} + +// Collect sends the metric values for each metric +// to the provided prometheus Metric channel. +func (c *TimeCollector) Collect(ctx *ScrapeContext, ch chan<- prometheus.Metric) error { + if desc, err := c.collect(ctx, ch); err != nil { + log.Error("failed collecting time metrics:", desc, err) + return err + } + return nil +} + +// Perflib "Windows Time Service" +type windowsTime struct { + ClockFrequencyAdjustmentPPBTotal float64 `perflib:"Clock Frequency Adjustment (ppb)"` + ComputedTimeOffset float64 `perflib:"Computed Time Offset"` + NTPClientTimeSourceCount float64 `perflib:"NTP Client Time Source Count"` + NTPRoundtripDelay float64 `perflib:"NTP Roundtrip Delay"` + NTPServerIncomingRequestsTotal float64 `perflib:"NTP Server Incoming Requests"` + NTPServerOutgoingResponsesTotal float64 `perflib:"NTP Server Outgoing Responses"` +} + +func (c *TimeCollector) collect(ctx *ScrapeContext, ch chan<- prometheus.Metric) (*prometheus.Desc, error) { + var dst []windowsTime // Single-instance class, array is required but will have single entry. + if err := unmarshalObject(ctx.perfObjects["Windows Time Service"], &dst); err != nil { + return nil, err + } + + ch <- prometheus.MustNewConstMetric( + c.ClockFrequencyAdjustmentPPBTotal, + prometheus.CounterValue, + dst[0].ClockFrequencyAdjustmentPPBTotal, + ) + ch <- prometheus.MustNewConstMetric( + c.ComputedTimeOffset, + prometheus.GaugeValue, + dst[0].ComputedTimeOffset/1000000, // microseconds -> seconds + ) + ch <- prometheus.MustNewConstMetric( + c.NTPClientTimeSourceCount, + prometheus.GaugeValue, + dst[0].NTPClientTimeSourceCount, + ) + ch <- prometheus.MustNewConstMetric( + c.NTPRoundtripDelay, + prometheus.GaugeValue, + dst[0].NTPRoundtripDelay/1000000, // microseconds -> seconds + ) + ch <- prometheus.MustNewConstMetric( + c.NTPServerIncomingRequestsTotal, + prometheus.CounterValue, + dst[0].NTPServerIncomingRequestsTotal, + ) + ch <- prometheus.MustNewConstMetric( + c.NTPServerOutgoingResponsesTotal, + prometheus.CounterValue, + dst[0].NTPServerOutgoingResponsesTotal, + ) + return nil, nil +} diff --git a/docs/collector.time.md b/docs/collector.time.md new file mode 100644 index 00000000..5e7c1ca8 --- /dev/null +++ b/docs/collector.time.md @@ -0,0 +1,45 @@ +# time collector + +The time collector exposes the Windows Time Service metrics. Note that the Windows Time Service must be running, else metric collection will fail. +If the Windows Time Service is stopped after collection has started, collector metric values will reset to 0. + +||| +-|- +Metric name prefix | `time` +Data source | Perflib +Enabled by default? | No + +## Flags + +None + +## Metrics + +Name | Description | Type | Labels +-----|-------------|------|------- +`windows_time_clock_frequency_Adjustment_ppb_total` | Total adjustment made to the local system clock frequency by W32Time in parts per billion (PPB) units. 1 PPB adjustment implies the system clock was adjusted at a rate of 1 nanosecond per second (1 ns/s). The smallest possible adjustment can vary and is expected to be in the order of 100's of PPB. | counter | None +`windows_time_computed_time_offset_seconds` | Absolute time offset between the system clock and the chosen time source, in seconds. | counter | None +`windows_time_ntp_client_time_source_count` | Active number of NTP Time sources being used by the client. This is a count of active, distinct IP addresses of time servers that are responding to this client's requests. | gauge | None +`windows_time_ntp_round_trip_delay_seconds` | Total roundtrip delay experienced by the NTP client in receiving a response from the server for the most recent request, in seconds. This is the time elapsed on the NTP client between transmitting a request to the NTP server and receiving a valid response from the server. | gauge | None +`windows_time_ntp_server_outgoing_responses_total` | Total number of requests responded to by the NTP server. | counter | None +`windows_time_ntp_server_incoming_requests_total` | Total number of requests received by the NTP server. | counter | None + +### Example metric +_This collector does not yet have explained examples, we would appreciate your help adding them!_ + +## Useful queries +_This collector does not yet have any useful queries added, we would appreciate your help adding them!_ + +## Alerting examples +**prometheus.rules** +```yaml +# Alert on hosts with an NTP client delay of more than 1 second, for a 5 minute period or longer. +- alert: NTPClientDelay + expr: windows_time_ntp_round_trip_delay_seconds > 1 + for: 5m + labels: + severity: warning + annotations: + summary: "NTP client delay: (instance {{ $labels.instance }})" + description: "RTT for NTP client is greater than 1 second!\nVALUE = {{ $value }}sec\n LABELS: {{ $labels }}" +```