diff --git a/collector/iis.go b/collector/iis.go index 67c89600..1d156059 100644 --- a/collector/iis.go +++ b/collector/iis.go @@ -1,5 +1,7 @@ -// returns data points from Win32_PerfRawData_W3SVC_WebService +// returns data points from the following classes: +// - Win32_PerfRawData_W3SVC_WebService // https://msdn.microsoft.com/en-us/library/aa394345 - Win32_OperatingSystem class +// - Win32_PerfRawData_APPPOOLCountersProvider_APPPOOLWAS package collector @@ -20,9 +22,12 @@ func init() { var ( siteWhitelist = flag.String("collector.iis.site-whitelist", ".+", "Regexp of sites to whitelist. Site name must both match whitelist and not match blacklist to be included.") siteBlacklist = flag.String("collector.iis.site-blacklist", "", "Regexp of sites to blacklist. Site name must both match whitelist and not match blacklist to be included.") + appWhitelist = flag.String("collector.iis.app-whitelist", ".+", "Regexp of apps to whitelist. App name must both match whitelist and not match blacklist to be included.") + appBlacklist = flag.String("collector.iis.app-blacklist", "", "Regexp of apps to blacklist. App name must both match whitelist and not match blacklist to be included.") ) -// A IISCollector is a Prometheus collector for WMI Win32_PerfRawData_W3SVC_WebService metrics +// A IISCollector is a Prometheus collector for WMI Win32_PerfRawData_W3SVC_WebService +// and Win32_PerfRawData_APPPOOLCountersProvider_APPPOOLWAS metrics type IISCollector struct { CurrentAnonymousUsers *prometheus.Desc CurrentBlockedAsyncIORequests *prometheus.Desc @@ -49,6 +54,23 @@ type IISCollector struct { siteWhitelistPattern *regexp.Regexp siteBlacklistPattern *regexp.Regexp + + CurrentApplicationPoolState *prometheus.Desc + CurrentApplicationPoolUptime *prometheus.Desc + CurrentWorkerProcesses *prometheus.Desc + MaximumWorkerProcesses *prometheus.Desc + RecentWorkerProcessFailures *prometheus.Desc + TimeSinceLastWorkerProcessFailure *prometheus.Desc + TotalApplicationPoolRecycles *prometheus.Desc + TotalApplicationPoolUptime *prometheus.Desc + TotalWorkerProcessesCreated *prometheus.Desc + TotalWorkerProcessFailures *prometheus.Desc + TotalWorkerProcessPingFailures *prometheus.Desc + TotalWorkerProcessShutdownFailures *prometheus.Desc + TotalWorkerProcessStartupFailures *prometheus.Desc + + appWhitelistPattern *regexp.Regexp + appBlacklistPattern *regexp.Regexp } // NewIISCollector ... @@ -56,6 +78,7 @@ func NewIISCollector() (Collector, error) { const subsystem = "iis" return &IISCollector{ + // Websites // Gauges CurrentAnonymousUsers: prometheus.NewDesc( prometheus.BuildFQName(Namespace, subsystem, "current_anonymous_users"), @@ -188,6 +211,92 @@ func NewIISCollector() (Collector, error) { siteWhitelistPattern: regexp.MustCompile(fmt.Sprintf("^(?:%s)$", *siteWhitelist)), siteBlacklistPattern: regexp.MustCompile(fmt.Sprintf("^(?:%s)$", *siteBlacklist)), + + // App Pools + // Guages + CurrentApplicationPoolState: prometheus.NewDesc( + prometheus.BuildFQName(Namespace, subsystem, "current_application_pool_state"), + "The current status of the application pool (1 - Uninitialized, 2 - Initialized, 3 - Running, 4 - Disabling, 5 - Disabled, 6 - Shutdown Pending, 7 - Delete Pending) (CurrentApplicationPoolState)", + []string{"app","state"}, + nil, + ), + CurrentApplicationPoolUptime: prometheus.NewDesc( + prometheus.BuildFQName(Namespace, subsystem, "current_application_pool_start_time"), + "The unix timestamp for the application pool start time (CurrentApplicationPoolUptime)", + []string{"app"}, + nil, + ), + CurrentWorkerProcesses: prometheus.NewDesc( + prometheus.BuildFQName(Namespace, subsystem, "current_worker_processes"), + "The current number of worker processes that are running in the application pool (CurrentWorkerProcesses)", + []string{"app"}, + nil, + ), + MaximumWorkerProcesses: prometheus.NewDesc( + prometheus.BuildFQName(Namespace, subsystem, "maximum_worker_processes"), + "The maximum number of worker processes that have been created for the application pool since Windows Process Activation Service (WAS) started (MaximumWorkerProcesses)", + []string{"app"}, + nil, + ), + RecentWorkerProcessFailures: prometheus.NewDesc( + prometheus.BuildFQName(Namespace, subsystem, "recent_worker_process_failures"), + "The number of times that worker processes for the application pool failed during the rapid-fail protection interval (RecentWorkerProcessFailures)", + []string{"app"}, + nil, + ), + + // Counters + TimeSinceLastWorkerProcessFailure: prometheus.NewDesc( + prometheus.BuildFQName(Namespace, subsystem, "time_since_last_worker_process_failure"), + "The length of time, in seconds, since the last worker process failure occurred for the application pool (TimeSinceLastWorkerProcessFailure)", + []string{"app"}, + nil, + ), + TotalApplicationPoolRecycles: prometheus.NewDesc( + prometheus.BuildFQName(Namespace, subsystem, "total_application_pool_recycles"), + "The number of times that the application pool has been recycled since Windows Process Activation Service (WAS) started (TotalApplicationPoolRecycles)", + []string{"app"}, + nil, + ), + TotalApplicationPoolUptime: prometheus.NewDesc( + prometheus.BuildFQName(Namespace, subsystem, "total_application_pool_start_time"), + "The unix timestamp for the application pool of when the Windows Process Activation Service (WAS) started (TotalApplicationPoolUptime)", + []string{"app"}, + nil, + ), + TotalWorkerProcessesCreated: prometheus.NewDesc( + prometheus.BuildFQName(Namespace, subsystem, "total_worker_processes_created"), + "The number of worker processes created for the application pool since Windows Process Activation Service (WAS) started (TotalWorkerProcessesCreated)", + []string{"app"}, + nil, + ), + TotalWorkerProcessFailures: prometheus.NewDesc( + prometheus.BuildFQName(Namespace, subsystem, "total_worker_process_failures"), + "The number of times that worker processes have crashed since the application pool was started (TotalWorkerProcessFailures)", + []string{"app"}, + nil, + ), + TotalWorkerProcessPingFailures: prometheus.NewDesc( + prometheus.BuildFQName(Namespace, subsystem, "total_worker_process_ping_failures"), + "The number of times that Windows Process Activation Service (WAS) did not receive a response to ping messages sent to a worker process (TotalWorkerProcessPingFailures)", + []string{"app"}, + nil, + ), + TotalWorkerProcessShutdownFailures: prometheus.NewDesc( + prometheus.BuildFQName(Namespace, subsystem, "total_worker_process_shutdown_failures"), + "The number of times that Windows Process Activation Service (WAS) failed to shut down a worker process (TotalWorkerProcessShutdownFailures)", + []string{"app"}, + nil, + ), + TotalWorkerProcessStartupFailures: prometheus.NewDesc( + prometheus.BuildFQName(Namespace, subsystem, "total_worker_process_startup_failures"), + "The number of times that Windows Process Activation Service (WAS) failed to start a worker process (TotalWorkerProcessStartupFailures)", + []string{"app"}, + nil, + ), + + appWhitelistPattern: regexp.MustCompile(fmt.Sprintf("^(?:%s)$", *siteWhitelist)), + appBlacklistPattern: regexp.MustCompile(fmt.Sprintf("^(?:%s)$", *siteBlacklist)), }, nil } @@ -245,6 +354,36 @@ type Win32_PerfRawData_W3SVC_WebService struct { TotalUnlockRequests uint32 } +type Win32_PerfRawData_APPPOOLCountersProvider_APPPOOLWAS struct { + Name string + Frequency_Object uint64 + Timestamp_Object uint64 + + CurrentApplicationPoolState uint32 + CurrentApplicationPoolUptime uint64 + CurrentWorkerProcesses uint32 + MaximumWorkerProcesses uint32 + RecentWorkerProcessFailures uint32 + TimeSinceLastWorkerProcessFailure uint64 + TotalApplicationPoolRecycles uint32 + TotalApplicationPoolUptime uint64 + TotalWorkerProcessesCreated uint32 + TotalWorkerProcessFailures uint32 + TotalWorkerProcessPingFailures uint32 + TotalWorkerProcessShutdownFailures uint32 + TotalWorkerProcessStartupFailures uint32 +} + +var ApplicationStates = map[uint32]string{ + 1: "Uninitialized", + 2: "Initialized", + 3: "Running", + 4: "Disabling", + 5: "Disabled", + 6: "Shutdown Pending", + 7: "Delete Pending", +} + func (c *IISCollector) collect(ch chan<- prometheus.Metric) (*prometheus.Desc, error) { var dst []Win32_PerfRawData_W3SVC_WebService q := wmi.CreateQuery(&dst, "") @@ -498,6 +637,124 @@ func (c *IISCollector) collect(ch chan<- prometheus.Metric) (*prometheus.Desc, e ) } + + var dst2 []Win32_PerfRawData_APPPOOLCountersProvider_APPPOOLWAS + q2 := wmi.CreateQuery(&dst2, "") + if err := wmi.Query(q2, &dst2); err != nil { + return nil, err + } + + for _, app := range dst2 { + if app.Name == "_Total" || + c.appBlacklistPattern.MatchString(app.Name) || + !c.appWhitelistPattern.MatchString(app.Name) { + continue + } + + // Guages + for key, label := range ApplicationStates { + isCurrentState := 0.0 + if key == app.CurrentApplicationPoolState { + isCurrentState = 1.0 + } + ch <- prometheus.MustNewConstMetric( + c.CurrentApplicationPoolState, + prometheus.GaugeValue, + isCurrentState, + app.Name, + label, + ) + } + + + ch <- prometheus.MustNewConstMetric( + c.CurrentApplicationPoolUptime, + prometheus.GaugeValue, + // convert from Windows timestamp (1 jan 1601) to unix timestamp (1 jan 1970) + float64(app.CurrentApplicationPoolUptime - 116444736000000000) / float64(app.Frequency_Object), + app.Name, + ) + + ch <- prometheus.MustNewConstMetric( + c.CurrentWorkerProcesses, + prometheus.GaugeValue, + float64(app.CurrentWorkerProcesses), + app.Name, + ) + + ch <- prometheus.MustNewConstMetric( + c.MaximumWorkerProcesses, + prometheus.GaugeValue, + float64(app.MaximumWorkerProcesses), + app.Name, + ) + + ch <- prometheus.MustNewConstMetric( + c.RecentWorkerProcessFailures, + prometheus.GaugeValue, + float64(app.RecentWorkerProcessFailures), + app.Name, + ) + + ch <- prometheus.MustNewConstMetric( + c.TimeSinceLastWorkerProcessFailure, + prometheus.GaugeValue, + float64(app.TimeSinceLastWorkerProcessFailure), + app.Name, + ) + + // Counters + ch <- prometheus.MustNewConstMetric( + c.TotalApplicationPoolRecycles, + prometheus.CounterValue, + float64(app.TotalApplicationPoolRecycles), + app.Name, + ) + + ch <- prometheus.MustNewConstMetric( + c.TotalApplicationPoolUptime, + prometheus.CounterValue, + // convert from Windows timestamp (1 jan 1601) to unix timestamp (1 jan 1970) + float64(app.TotalApplicationPoolUptime - 116444736000000000) / float64(app.Frequency_Object), + app.Name, + ) + + ch <- prometheus.MustNewConstMetric( + c.TotalWorkerProcessesCreated, + prometheus.CounterValue, + float64(app.TotalWorkerProcessesCreated), + app.Name, + ) + + ch <- prometheus.MustNewConstMetric( + c.TotalWorkerProcessFailures, + prometheus.CounterValue, + float64(app.TotalWorkerProcessFailures), + app.Name, + ) + + ch <- prometheus.MustNewConstMetric( + c.TotalWorkerProcessPingFailures, + prometheus.CounterValue, + float64(app.TotalWorkerProcessPingFailures), + app.Name, + ) + + ch <- prometheus.MustNewConstMetric( + c.TotalWorkerProcessShutdownFailures, + prometheus.CounterValue, + float64(app.TotalWorkerProcessShutdownFailures), + app.Name, + ) + + ch <- prometheus.MustNewConstMetric( + c.TotalWorkerProcessStartupFailures, + prometheus.CounterValue, + float64(app.TotalWorkerProcessStartupFailures), + app.Name, + ) + + } return nil, nil }