Compare commits

...

4 Commits

Author SHA1 Message Date
Jan-Otto Kröpke
1e24d7b2c9 dns: add enhanced metrics (#1999) (#2040)
Co-authored-by: Matthew Wimpelberg <120263653+mwimpelberg28@users.noreply.github.com>
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2025-05-17 14:15:07 +02:00
Jan-Otto Kröpke
109f537c14 terminal_services: Expose disconnected sessions agains (#2026) (#2039) 2025-05-17 14:14:54 +02:00
Jan-Otto Kröpke
62b796e6f6 exchange: fix The specified counter could not be found (#1994) (#2038) 2025-05-17 14:12:43 +02:00
Jan-Otto Kröpke
8bae1abe20 fix: Support running as Windows Service within containers [0.30.x] (#2009) 2025-04-24 10:57:58 +02:00
11 changed files with 221 additions and 39 deletions

View File

@@ -38,7 +38,7 @@ var (
stopCh = make(chan struct{})
// serviceManagerFinishedCh is a channel to send a signal to the main function that the service manager has stopped the service.
serviceManagerFinishedCh = make(chan struct{})
serviceManagerFinishedCh = make(chan struct{}, 1)
)
// IsService variable declaration allows initiating time-sensitive components like registering the Windows service

View File

@@ -3,14 +3,19 @@
The dns collector exposes metrics about the DNS server
|||
-|-
Metric name prefix | `dns`
Classes | [`Win32_PerfRawData_DNS_DNS`](https://technet.microsoft.com/en-us/library/cc977686.aspx)
Enabled by default? | No
-|-|-
Metric name prefix | `dns` |
Classes | [`Win32_PerfRawData_DNS_DNS`](https://technet.microsoft.com/en-us/library/cc977686.aspx) |
Enabled by default | Yes |
Metric name prefix (error stats) | `windows_dns` |
Classes | [`MicrosoftDNS_Statistic`](https://learn.microsoft.com/en-us/windows/win32/dns/dns-wmi-provider-overview) |
Enabled by default (error stats)? | Yes |
## Flags
None
Name | Description
-----|------------
`collector.dns.enabled` | Comma-separated list of collectors to use. Available collectors: `metrics`, `error_stats`. Defaults to all collectors if not specified.
## Metrics
@@ -38,12 +43,56 @@ Name | Description | Type | Labels
`windows_dns_wins_queries_total` | _Not yet documented_ | counter | `direction`
`windows_dns_wins_responses_total` | _Not yet documented_ | counter | `direction`
`windows_dns_unmatched_responses_total` | _Not yet documented_ | counter | None
`windows_dns_error_stats_total` | DNS error statistics from MicrosoftDNS_Statistic | counter | `name`, `collection_name`, `dns_server`
### Sub-collectors
The DNS collector is split into two sub-collectors:
1. `metrics` - Collects standard DNS performance metrics using PDH (Performance Data Helper)
2. `wmi_stats` - Collects DNS error statistics from the MicrosoftDNS_Statistic WMI class
By default, both sub-collectors are enabled. You can enable specific sub-collectors using the `collector.dns.enabled` flag.
### Example Usage
To enable only DNS error statistics collection:
```powershell
windows_exporter.exe --collector.dns.enabled=wmi_stats
```
To enable only standard DNS metrics:
```powershell
windows_exporter.exe --collector.dns.enabled=metrics
```
To enable both (default behavior):
```powershell
windows_exporter.exe --collector.dns.enabled=metrics,wmi_stats
```
### Example metric
_This collector does not yet have explained examples, we would appreciate your help adding them!_
```
windows_dns_wmi_stats_total{collection_name="Error Stats",dns_server="EC2AMAZ-5NNM8M1",name="BadKey"} 0
windows_dns_wmi_stats_total{collection_name="Error Stats",dns_server="EC2AMAZ-5NNM8M1",name="BadSig"} 0
windows_dns_wmi_stats_total{collection_name="Error Stats",dns_server="EC2AMAZ-5NNM8M1",name="BadTime"} 0
windows_dns_wmi_stats_total{collection_name="Error Stats",dns_server="EC2AMAZ-5NNM8M1",name="FormError"} 0
windows_dns_wmi_stats_total{collection_name="Error Stats",dns_server="EC2AMAZ-5NNM8M1",name="Max"} 0
windows_dns_wmi_stats_total{collection_name="Error Stats",dns_server="EC2AMAZ-5NNM8M1",name="NoError"} 0
windows_dns_wmi_stats_total{collection_name="Error Stats",dns_server="EC2AMAZ-5NNM8M1",name="NotAuth"} 0
windows_dns_wmi_stats_total{collection_name="Error Stats",dns_server="EC2AMAZ-5NNM8M1",name="NotImpl"} 0
windows_dns_wmi_stats_total{collection_name="Error Stats",dns_server="EC2AMAZ-5NNM8M1",name="NotZone"} 0
windows_dns_wmi_stats_total{collection_name="Error Stats",dns_server="EC2AMAZ-5NNM8M1",name="NxDomain"} 0
windows_dns_wmi_stats_total{collection_name="Error Stats",dns_server="EC2AMAZ-5NNM8M1",name="NxRRSet"} 0
windows_dns_wmi_stats_total{collection_name="Error Stats",dns_server="EC2AMAZ-5NNM8M1",name="Refused"} 0
windows_dns_wmi_stats_total{collection_name="Error Stats",dns_server="EC2AMAZ-5NNM8M1",name="ServFail"} 0
windows_dns_wmi_stats_total{collection_name="Error Stats",dns_server="EC2AMAZ-5NNM8M1",name="UnknownError"} 0
windows_dns_wmi_stats_total{collection_name="Error Stats",dns_server="EC2AMAZ-5NNM8M1",name="YxDomain"} 0
windows_dns_wmi_stats_total{collection_name="Error Stats",dns_server="EC2AMAZ-5NNM8M1",name="YxRRSet"} 0
```
## Useful queries
_This collector does not yet have any useful queries added, we would appreciate your help adding them!_
## Alerting examples
_This collector does not yet have alerting examples, we would appreciate your help adding them!_
_This collector does not yet have alerting examples, we would appreciate your help adding them!_

View File

@@ -43,7 +43,7 @@ Comma-separated list of collectors to use, for example: `--collectors.exchange.e
| `windows_exchange_transport_queues_messages_submitted_total` | Messages Submitted Total |
| `windows_exchange_transport_queues_messages_delayed_total` | Messages Delayed Total |
| `windows_exchange_transport_queues_messages_completed_delivery_total` | Messages Completed Delivery Total |
| `windows_exchange_transport_queues_shadow_queue_length` | Shadow Queue Length |
| `windows_exchange_transport_queues_aggregate_shadow_queue_length` | The current number of messages in shadow queues |
| `windows_exchange_transport_queues_submission_queue_length` | Submission Queue Length |
| `windows_exchange_transport_queues_delay_queue_length` | Delay Queue Length |
| `windows_exchange_transport_queues_items_completed_delivery_total` | Items Completed Delivery Total |
@@ -54,7 +54,7 @@ Comma-separated list of collectors to use, for example: `--collectors.exchange.e
| `windows_exchange_http_proxy_avg_auth_latency` | Average time spent authenticating CAS requests over the last 200 samples |
| `windows_exchange_http_proxy_outstanding_proxy_requests` | Number of concurrent outstanding proxy requests |
| `windows_exchange_http_proxy_requests_total` | Number of proxy requests processed each second |
| `windows_exchange_avail_service_requests_per_sec` | Number of requests serviced per second |
| `windows_exchange_availability_service_requests_per_sec` | Number of requests serviced per second |
| `windows_exchange_owa_current_unique_users` | Number of unique users currently logged on to Outlook Web App |
| `windows_exchange_owa_requests_total` | Number of requests handled by Outlook Web App per second |
| `windows_exchange_autodiscover_requests_total` | Number of autodiscover service requests processed each second |
@@ -77,4 +77,3 @@ _This collector does not yet have any useful queries added, we would appreciate
## Alerting examples
_This collector does not yet have alerting examples, we would appreciate your help adding them!_

View File

@@ -16,8 +16,11 @@
package dns
import (
"errors"
"fmt"
"log/slog"
"slices"
"strings"
"github.com/alecthomas/kingpin/v2"
"github.com/prometheus-community/windows_exporter/internal/mi"
@@ -26,12 +29,23 @@ import (
"github.com/prometheus/client_golang/prometheus"
)
const Name = "dns"
const (
Name = "dns"
subCollectorMetrics = "metrics"
subCollectorWMIStats = "wmi_stats"
)
type Config struct{}
type Config struct {
CollectorsEnabled []string `yaml:"collectors_enabled"`
}
//nolint:gochecknoglobals
var ConfigDefaults = Config{}
var ConfigDefaults = Config{
CollectorsEnabled: []string{
subCollectorMetrics,
subCollectorWMIStats,
},
}
// A Collector is a Prometheus Collector for WMI Win32_PerfRawData_DNS_DNS metrics.
type Collector struct {
@@ -40,6 +54,9 @@ type Collector struct {
perfDataCollector *pdh.Collector
perfDataObject []perfDataCounterValues
miSession *mi.Session
miQuery mi.Query
dynamicUpdatesFailures *prometheus.Desc
dynamicUpdatesQueued *prometheus.Desc
dynamicUpdatesReceived *prometheus.Desc
@@ -62,6 +79,7 @@ type Collector struct {
zoneTransferResponsesReceived *prometheus.Desc
zoneTransferSuccessReceived *prometheus.Desc
zoneTransferSuccessSent *prometheus.Desc
dnsWMIStats *prometheus.Desc
}
func New(config *Config) *Collector {
@@ -69,6 +87,10 @@ func New(config *Config) *Collector {
config = &ConfigDefaults
}
if config.CollectorsEnabled == nil {
config.CollectorsEnabled = ConfigDefaults.CollectorsEnabled
}
c := &Collector{
config: *config,
}
@@ -76,8 +98,26 @@ func New(config *Config) *Collector {
return c
}
func NewWithFlags(_ *kingpin.Application) *Collector {
return &Collector{}
func NewWithFlags(app *kingpin.Application) *Collector {
c := &Collector{
config: ConfigDefaults,
}
c.config.CollectorsEnabled = make([]string, 0)
var collectorsEnabled string
app.Flag(
"collector.dns.enabled",
"Comma-separated list of collectors to use. Defaults to all, if not specified.",
).Default(strings.Join(ConfigDefaults.CollectorsEnabled, ",")).StringVar(&collectorsEnabled)
app.Action(func(*kingpin.ParseContext) error {
c.config.CollectorsEnabled = strings.Split(collectorsEnabled, ",")
return nil
})
return c
}
func (c *Collector) GetName() string {
@@ -90,7 +130,31 @@ func (c *Collector) Close() error {
return nil
}
func (c *Collector) Build(_ *slog.Logger, _ *mi.Session) error {
func (c *Collector) Build(_ *slog.Logger, miSession *mi.Session) error {
for _, collector := range c.config.CollectorsEnabled {
if !slices.Contains([]string{subCollectorMetrics, subCollectorWMIStats}, collector) {
return fmt.Errorf("unknown sub collector: %s. Possible values: %s", collector,
strings.Join([]string{subCollectorMetrics, subCollectorWMIStats}, ", "),
)
}
}
if slices.Contains(c.config.CollectorsEnabled, subCollectorMetrics) {
if err := c.buildMetricsCollector(); err != nil {
return err
}
}
if slices.Contains(c.config.CollectorsEnabled, subCollectorWMIStats) {
if err := c.buildErrorStatsCollector(miSession); err != nil {
return err
}
}
return nil
}
func (c *Collector) buildMetricsCollector() error {
c.zoneTransferRequestsReceived = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "zone_transfer_requests_received_total"),
"Number of zone transfer requests (AXFR/IXFR) received by the master DNS server",
@@ -224,6 +288,13 @@ func (c *Collector) Build(_ *slog.Logger, _ *mi.Session) error {
nil,
)
c.dnsWMIStats = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "wmi_stats_total"),
"DNS WMI statistics from MicrosoftDNS_Statistic",
[]string{"name", "collection_name", "dns_server"},
nil,
)
var err error
c.perfDataCollector, err = pdh.NewCollector[perfDataCounterValues](pdh.CounterTypeRaw, "DNS", pdh.InstancesAll)
@@ -234,9 +305,43 @@ func (c *Collector) Build(_ *slog.Logger, _ *mi.Session) error {
return nil
}
func (c *Collector) buildErrorStatsCollector(miSession *mi.Session) error {
if miSession == nil {
return errors.New("miSession is nil")
}
query, err := mi.NewQuery("SELECT Name, CollectionName, Value, DnsServerName FROM MicrosoftDNS_Statistic WHERE CollectionName = 'Error Stats'")
if err != nil {
return fmt.Errorf("failed to create query: %w", err)
}
c.miSession = miSession
c.miQuery = query
return nil
}
// Collect sends the metric values for each metric
// to the provided prometheus Metric channel.
func (c *Collector) Collect(ch chan<- prometheus.Metric) error {
errs := make([]error, 0)
if slices.Contains(c.config.CollectorsEnabled, subCollectorMetrics) {
if err := c.collectMetrics(ch); err != nil {
errs = append(errs, fmt.Errorf("failed collecting metrics: %w", err))
}
}
if slices.Contains(c.config.CollectorsEnabled, subCollectorWMIStats) {
if err := c.collectErrorStats(ch); err != nil {
errs = append(errs, fmt.Errorf("failed collecting WMI statistics: %w", err))
}
}
return errors.Join(errs...)
}
func (c *Collector) collectMetrics(ch chan<- prometheus.Metric) error {
err := c.perfDataCollector.Collect(&c.perfDataObject)
if err != nil {
return fmt.Errorf("failed to collect DNS metrics: %w", err)
@@ -493,3 +598,24 @@ func (c *Collector) Collect(ch chan<- prometheus.Metric) error {
return nil
}
func (c *Collector) collectErrorStats(ch chan<- prometheus.Metric) error {
var stats []Statistic
if err := c.miSession.Query(&stats, mi.NamespaceRootMicrosoftDNS, c.miQuery); err != nil {
return fmt.Errorf("failed to query DNS statistics: %w", err)
}
// Collect DNS error statistics
for _, stat := range stats {
ch <- prometheus.MustNewConstMetric(
c.dnsWMIStats,
prometheus.CounterValue,
float64(stat.Value),
stat.Name,
stat.CollectionName,
stat.DnsServerName,
)
}
return nil
}

View File

@@ -105,3 +105,11 @@ type perfDataCounterValues struct {
_ float64 `perfdata:"Zone Transfer SOA Request Sent"`
_ float64 `perfdata:"Zone Transfer Success"`
}
// Statistic represents the structure for DNS error statistics
type Statistic struct {
Name string `mi:"Name"`
CollectionName string `mi:"CollectionName"`
Value uint64 `mi:"Value"`
DnsServerName string `mi:"DnsServerName"`
}

View File

@@ -37,7 +37,7 @@ type perfDataCounterValuesAutoDiscover struct {
func (c *Collector) buildAutoDiscover() error {
var err error
c.perfDataCollectorAutoDiscover, err = pdh.NewCollector[perfDataCounterValuesAutoDiscover](pdh.CounterTypeRaw, "MSExchange Autodiscover", pdh.InstancesAll)
c.perfDataCollectorAutoDiscover, err = pdh.NewCollector[perfDataCounterValuesAutoDiscover](pdh.CounterTypeRaw, "MSExchangeAutodiscover", nil)
if err != nil {
return fmt.Errorf("failed to create MSExchange Autodiscover collector: %w", err)
}

View File

@@ -31,7 +31,7 @@ type collectorAvailabilityService struct {
}
type perfDataCounterValuesAvailabilityService struct {
RequestsPerSec float64 `perfdata:"Requests/sec"`
AvailabilityRequestsPerSec float64 `perfdata:"Availability Requests (sec)"`
}
func (c *Collector) buildAvailabilityService() error {
@@ -43,7 +43,7 @@ func (c *Collector) buildAvailabilityService() error {
}
c.availabilityRequestsSec = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "avail_service_requests_per_sec"),
prometheus.BuildFQName(types.Namespace, Name, "availability_service_requests_per_sec"),
"Number of requests serviced per second",
nil,
nil,
@@ -62,7 +62,7 @@ func (c *Collector) collectAvailabilityService(ch chan<- prometheus.Metric) erro
ch <- prometheus.MustNewConstMetric(
c.availabilityRequestsSec,
prometheus.CounterValue,
data.RequestsPerSec,
data.AvailabilityRequestsPerSec,
)
}

View File

@@ -39,7 +39,7 @@ type collectorTransportQueues struct {
messagesSubmittedTotal *prometheus.Desc
messagesDelayedTotal *prometheus.Desc
messagesCompletedDeliveryTotal *prometheus.Desc
shadowQueueLength *prometheus.Desc
aggregateShadowQueueLength *prometheus.Desc
submissionQueueLength *prometheus.Desc
delayQueueLength *prometheus.Desc
itemsCompletedDeliveryTotal *prometheus.Desc
@@ -63,7 +63,7 @@ type perfDataCounterValuesTransportQueues struct {
MessagesSubmittedTotal float64 `perfdata:"Messages Submitted Total"`
MessagesDelayedTotal float64 `perfdata:"Messages Delayed Total"`
MessagesCompletedDeliveryTotal float64 `perfdata:"Messages Completed Delivery Total"`
ShadowQueueLength float64 `perfdata:"Shadow Queue Length"`
AggregateShadowQueueLength float64 `perfdata:"Aggregate Shadow Queue Length"`
SubmissionQueueLength float64 `perfdata:"Submission Queue Length"`
DelayQueueLength float64 `perfdata:"Delay Queue Length"`
ItemsCompletedDeliveryTotal float64 `perfdata:"Items Completed Delivery Total"`
@@ -152,9 +152,9 @@ func (c *Collector) buildTransportQueues() error {
[]string{"name"},
nil,
)
c.shadowQueueLength = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "transport_queues_shadow_queue_length"),
"Shadow Queue Length",
c.aggregateShadowQueueLength = prometheus.NewDesc(
prometheus.BuildFQName(types.Namespace, Name, "transport_queues_aggregate_shadow_queue_length"),
"The current number of messages in shadow queues.",
[]string{"name"},
nil,
)
@@ -280,9 +280,9 @@ func (c *Collector) collectTransportQueues(ch chan<- prometheus.Metric) error {
labelName,
)
ch <- prometheus.MustNewConstMetric(
c.shadowQueueLength,
c.aggregateShadowQueueLength,
prometheus.GaugeValue,
data.ShadowQueueLength,
data.AggregateShadowQueueLength,
labelName,
)
ch <- prometheus.MustNewConstMetric(

View File

@@ -420,15 +420,6 @@ func (c *Collector) getProcessStartTime(pid uint32) (uint64, error) {
return 0, fmt.Errorf("failed to open process %w", err)
}
defer func(handle windows.Handle) {
err := windows.CloseHandle(handle)
if err != nil {
c.logger.Warn("failed to close process handle",
slog.Any("err", err),
)
}
}(handle)
var (
creation windows.Filetime
exit windows.Filetime
@@ -437,6 +428,14 @@ func (c *Collector) getProcessStartTime(pid uint32) (uint64, error) {
)
err = windows.GetProcessTimes(handle, &creation, &exit, &krn, &user)
if err := windows.CloseHandle(handle); err != nil {
c.logger.LogAttrs(context.Background(), slog.LevelWarn, "failed to close process handle",
slog.Any("err", err),
slog.Uint64("pid", uint64(pid)),
)
}
if err != nil {
return 0, fmt.Errorf("failed to get process times %w", err)
}
@@ -477,7 +476,7 @@ func (c *Collector) getServiceConfig(service *mgr.Service) (mgr.Config, error) {
*buf = make([]byte, bytesNeeded)
}
c.serviceConfigPoolBytes.Put(buf)
defer c.serviceConfigPoolBytes.Put(buf)
return mgr.Config{
BinaryPathName: windows.UTF16PtrToString(serviceConfig.BinaryPathName),

View File

@@ -437,7 +437,7 @@ func (c *Collector) collectWTSSessions(ch chan<- prometheus.Metric) error {
for _, session := range sessions {
// only connect metrics for remote named sessions
n := strings.ReplaceAll(session.SessionName, "#", " ")
if n == "" || n == "Services" {
if n == "Services" {
continue
}

View File

@@ -51,6 +51,7 @@ var (
NamespaceRootWindowsFSRM = utils.Must(NewNamespace("root/microsoft/windows/fsrm"))
NamespaceRootWebAdministration = utils.Must(NewNamespace("root/WebAdministration"))
NamespaceRootMSCluster = utils.Must(NewNamespace("root/MSCluster"))
NamespaceRootMicrosoftDNS = utils.Must(NewNamespace("root/MicrosoftDNS"))
)
type Query *uint16