From 1e24d7b2c94937a0276845b17d252bb7cd15949c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan-Otto=20Kr=C3=B6pke?= Date: Sat, 17 May 2025 14:15:07 +0200 Subject: [PATCH] dns: add enhanced metrics (#1999) (#2040) Co-authored-by: Matthew Wimpelberg <120263653+mwimpelberg28@users.noreply.github.com> Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com> --- docs/collector.dns.md | 63 +++++++++++++-- internal/collector/dns/dns.go | 138 ++++++++++++++++++++++++++++++-- internal/collector/dns/types.go | 8 ++ internal/mi/types.go | 1 + 4 files changed, 197 insertions(+), 13 deletions(-) diff --git a/docs/collector.dns.md b/docs/collector.dns.md index 41c93d28..766c36d4 100644 --- a/docs/collector.dns.md +++ b/docs/collector.dns.md @@ -3,14 +3,19 @@ The dns collector exposes metrics about the DNS server ||| --|- -Metric name prefix | `dns` -Classes | [`Win32_PerfRawData_DNS_DNS`](https://technet.microsoft.com/en-us/library/cc977686.aspx) -Enabled by default? | No +-|-|- +Metric name prefix | `dns` | +Classes | [`Win32_PerfRawData_DNS_DNS`](https://technet.microsoft.com/en-us/library/cc977686.aspx) | +Enabled by default | Yes | +Metric name prefix (error stats) | `windows_dns` | +Classes | [`MicrosoftDNS_Statistic`](https://learn.microsoft.com/en-us/windows/win32/dns/dns-wmi-provider-overview) | +Enabled by default (error stats)? | Yes | ## Flags -None +Name | Description +-----|------------ +`collector.dns.enabled` | Comma-separated list of collectors to use. Available collectors: `metrics`, `error_stats`. Defaults to all collectors if not specified. ## Metrics @@ -38,12 +43,56 @@ Name | Description | Type | Labels `windows_dns_wins_queries_total` | _Not yet documented_ | counter | `direction` `windows_dns_wins_responses_total` | _Not yet documented_ | counter | `direction` `windows_dns_unmatched_responses_total` | _Not yet documented_ | counter | None +`windows_dns_error_stats_total` | DNS error statistics from MicrosoftDNS_Statistic | counter | `name`, `collection_name`, `dns_server` + +### Sub-collectors + +The DNS collector is split into two sub-collectors: + +1. `metrics` - Collects standard DNS performance metrics using PDH (Performance Data Helper) +2. `wmi_stats` - Collects DNS error statistics from the MicrosoftDNS_Statistic WMI class + +By default, both sub-collectors are enabled. You can enable specific sub-collectors using the `collector.dns.enabled` flag. + +### Example Usage + +To enable only DNS error statistics collection: +```powershell +windows_exporter.exe --collector.dns.enabled=wmi_stats +``` + +To enable only standard DNS metrics: +```powershell +windows_exporter.exe --collector.dns.enabled=metrics +``` + +To enable both (default behavior): +```powershell +windows_exporter.exe --collector.dns.enabled=metrics,wmi_stats +``` ### Example metric -_This collector does not yet have explained examples, we would appreciate your help adding them!_ +``` +windows_dns_wmi_stats_total{collection_name="Error Stats",dns_server="EC2AMAZ-5NNM8M1",name="BadKey"} 0 +windows_dns_wmi_stats_total{collection_name="Error Stats",dns_server="EC2AMAZ-5NNM8M1",name="BadSig"} 0 +windows_dns_wmi_stats_total{collection_name="Error Stats",dns_server="EC2AMAZ-5NNM8M1",name="BadTime"} 0 +windows_dns_wmi_stats_total{collection_name="Error Stats",dns_server="EC2AMAZ-5NNM8M1",name="FormError"} 0 +windows_dns_wmi_stats_total{collection_name="Error Stats",dns_server="EC2AMAZ-5NNM8M1",name="Max"} 0 +windows_dns_wmi_stats_total{collection_name="Error Stats",dns_server="EC2AMAZ-5NNM8M1",name="NoError"} 0 +windows_dns_wmi_stats_total{collection_name="Error Stats",dns_server="EC2AMAZ-5NNM8M1",name="NotAuth"} 0 +windows_dns_wmi_stats_total{collection_name="Error Stats",dns_server="EC2AMAZ-5NNM8M1",name="NotImpl"} 0 +windows_dns_wmi_stats_total{collection_name="Error Stats",dns_server="EC2AMAZ-5NNM8M1",name="NotZone"} 0 +windows_dns_wmi_stats_total{collection_name="Error Stats",dns_server="EC2AMAZ-5NNM8M1",name="NxDomain"} 0 +windows_dns_wmi_stats_total{collection_name="Error Stats",dns_server="EC2AMAZ-5NNM8M1",name="NxRRSet"} 0 +windows_dns_wmi_stats_total{collection_name="Error Stats",dns_server="EC2AMAZ-5NNM8M1",name="Refused"} 0 +windows_dns_wmi_stats_total{collection_name="Error Stats",dns_server="EC2AMAZ-5NNM8M1",name="ServFail"} 0 +windows_dns_wmi_stats_total{collection_name="Error Stats",dns_server="EC2AMAZ-5NNM8M1",name="UnknownError"} 0 +windows_dns_wmi_stats_total{collection_name="Error Stats",dns_server="EC2AMAZ-5NNM8M1",name="YxDomain"} 0 +windows_dns_wmi_stats_total{collection_name="Error Stats",dns_server="EC2AMAZ-5NNM8M1",name="YxRRSet"} 0 +``` ## Useful queries _This collector does not yet have any useful queries added, we would appreciate your help adding them!_ ## Alerting examples -_This collector does not yet have alerting examples, we would appreciate your help adding them!_ +_This collector does not yet have alerting examples, we would appreciate your help adding them!_ \ No newline at end of file diff --git a/internal/collector/dns/dns.go b/internal/collector/dns/dns.go index e93224be..957bb9f9 100644 --- a/internal/collector/dns/dns.go +++ b/internal/collector/dns/dns.go @@ -16,8 +16,11 @@ package dns import ( + "errors" "fmt" "log/slog" + "slices" + "strings" "github.com/alecthomas/kingpin/v2" "github.com/prometheus-community/windows_exporter/internal/mi" @@ -26,12 +29,23 @@ import ( "github.com/prometheus/client_golang/prometheus" ) -const Name = "dns" +const ( + Name = "dns" + subCollectorMetrics = "metrics" + subCollectorWMIStats = "wmi_stats" +) -type Config struct{} +type Config struct { + CollectorsEnabled []string `yaml:"collectors_enabled"` +} //nolint:gochecknoglobals -var ConfigDefaults = Config{} +var ConfigDefaults = Config{ + CollectorsEnabled: []string{ + subCollectorMetrics, + subCollectorWMIStats, + }, +} // A Collector is a Prometheus Collector for WMI Win32_PerfRawData_DNS_DNS metrics. type Collector struct { @@ -40,6 +54,9 @@ type Collector struct { perfDataCollector *pdh.Collector perfDataObject []perfDataCounterValues + miSession *mi.Session + miQuery mi.Query + dynamicUpdatesFailures *prometheus.Desc dynamicUpdatesQueued *prometheus.Desc dynamicUpdatesReceived *prometheus.Desc @@ -62,6 +79,7 @@ type Collector struct { zoneTransferResponsesReceived *prometheus.Desc zoneTransferSuccessReceived *prometheus.Desc zoneTransferSuccessSent *prometheus.Desc + dnsWMIStats *prometheus.Desc } func New(config *Config) *Collector { @@ -69,6 +87,10 @@ func New(config *Config) *Collector { config = &ConfigDefaults } + if config.CollectorsEnabled == nil { + config.CollectorsEnabled = ConfigDefaults.CollectorsEnabled + } + c := &Collector{ config: *config, } @@ -76,8 +98,26 @@ func New(config *Config) *Collector { return c } -func NewWithFlags(_ *kingpin.Application) *Collector { - return &Collector{} +func NewWithFlags(app *kingpin.Application) *Collector { + c := &Collector{ + config: ConfigDefaults, + } + c.config.CollectorsEnabled = make([]string, 0) + + var collectorsEnabled string + + app.Flag( + "collector.dns.enabled", + "Comma-separated list of collectors to use. Defaults to all, if not specified.", + ).Default(strings.Join(ConfigDefaults.CollectorsEnabled, ",")).StringVar(&collectorsEnabled) + + app.Action(func(*kingpin.ParseContext) error { + c.config.CollectorsEnabled = strings.Split(collectorsEnabled, ",") + + return nil + }) + + return c } func (c *Collector) GetName() string { @@ -90,7 +130,31 @@ func (c *Collector) Close() error { return nil } -func (c *Collector) Build(_ *slog.Logger, _ *mi.Session) error { +func (c *Collector) Build(_ *slog.Logger, miSession *mi.Session) error { + for _, collector := range c.config.CollectorsEnabled { + if !slices.Contains([]string{subCollectorMetrics, subCollectorWMIStats}, collector) { + return fmt.Errorf("unknown sub collector: %s. Possible values: %s", collector, + strings.Join([]string{subCollectorMetrics, subCollectorWMIStats}, ", "), + ) + } + } + + if slices.Contains(c.config.CollectorsEnabled, subCollectorMetrics) { + if err := c.buildMetricsCollector(); err != nil { + return err + } + } + + if slices.Contains(c.config.CollectorsEnabled, subCollectorWMIStats) { + if err := c.buildErrorStatsCollector(miSession); err != nil { + return err + } + } + + return nil +} + +func (c *Collector) buildMetricsCollector() error { c.zoneTransferRequestsReceived = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "zone_transfer_requests_received_total"), "Number of zone transfer requests (AXFR/IXFR) received by the master DNS server", @@ -224,6 +288,13 @@ func (c *Collector) Build(_ *slog.Logger, _ *mi.Session) error { nil, ) + c.dnsWMIStats = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, Name, "wmi_stats_total"), + "DNS WMI statistics from MicrosoftDNS_Statistic", + []string{"name", "collection_name", "dns_server"}, + nil, + ) + var err error c.perfDataCollector, err = pdh.NewCollector[perfDataCounterValues](pdh.CounterTypeRaw, "DNS", pdh.InstancesAll) @@ -234,9 +305,43 @@ func (c *Collector) Build(_ *slog.Logger, _ *mi.Session) error { return nil } +func (c *Collector) buildErrorStatsCollector(miSession *mi.Session) error { + if miSession == nil { + return errors.New("miSession is nil") + } + + query, err := mi.NewQuery("SELECT Name, CollectionName, Value, DnsServerName FROM MicrosoftDNS_Statistic WHERE CollectionName = 'Error Stats'") + if err != nil { + return fmt.Errorf("failed to create query: %w", err) + } + + c.miSession = miSession + c.miQuery = query + + return nil +} + // Collect sends the metric values for each metric // to the provided prometheus Metric channel. func (c *Collector) Collect(ch chan<- prometheus.Metric) error { + errs := make([]error, 0) + + if slices.Contains(c.config.CollectorsEnabled, subCollectorMetrics) { + if err := c.collectMetrics(ch); err != nil { + errs = append(errs, fmt.Errorf("failed collecting metrics: %w", err)) + } + } + + if slices.Contains(c.config.CollectorsEnabled, subCollectorWMIStats) { + if err := c.collectErrorStats(ch); err != nil { + errs = append(errs, fmt.Errorf("failed collecting WMI statistics: %w", err)) + } + } + + return errors.Join(errs...) +} + +func (c *Collector) collectMetrics(ch chan<- prometheus.Metric) error { err := c.perfDataCollector.Collect(&c.perfDataObject) if err != nil { return fmt.Errorf("failed to collect DNS metrics: %w", err) @@ -493,3 +598,24 @@ func (c *Collector) Collect(ch chan<- prometheus.Metric) error { return nil } + +func (c *Collector) collectErrorStats(ch chan<- prometheus.Metric) error { + var stats []Statistic + if err := c.miSession.Query(&stats, mi.NamespaceRootMicrosoftDNS, c.miQuery); err != nil { + return fmt.Errorf("failed to query DNS statistics: %w", err) + } + + // Collect DNS error statistics + for _, stat := range stats { + ch <- prometheus.MustNewConstMetric( + c.dnsWMIStats, + prometheus.CounterValue, + float64(stat.Value), + stat.Name, + stat.CollectionName, + stat.DnsServerName, + ) + } + + return nil +} diff --git a/internal/collector/dns/types.go b/internal/collector/dns/types.go index 22953367..bf189d5a 100644 --- a/internal/collector/dns/types.go +++ b/internal/collector/dns/types.go @@ -105,3 +105,11 @@ type perfDataCounterValues struct { _ float64 `perfdata:"Zone Transfer SOA Request Sent"` _ float64 `perfdata:"Zone Transfer Success"` } + +// Statistic represents the structure for DNS error statistics +type Statistic struct { + Name string `mi:"Name"` + CollectionName string `mi:"CollectionName"` + Value uint64 `mi:"Value"` + DnsServerName string `mi:"DnsServerName"` +} diff --git a/internal/mi/types.go b/internal/mi/types.go index ec4dbbb5..f3bff408 100644 --- a/internal/mi/types.go +++ b/internal/mi/types.go @@ -51,6 +51,7 @@ var ( NamespaceRootWindowsFSRM = utils.Must(NewNamespace("root/microsoft/windows/fsrm")) NamespaceRootWebAdministration = utils.Must(NewNamespace("root/WebAdministration")) NamespaceRootMSCluster = utils.Must(NewNamespace("root/MSCluster")) + NamespaceRootMicrosoftDNS = utils.Must(NewNamespace("root/MicrosoftDNS")) ) type Query *uint16