Add App Pool metrics to IIS module (#70)

* Initial attempt at app pool collector

* Rename and tweak

* Attempt to convert uptime to unix timestamp

* Merge app pool metrics with existing IIS modules

* Update comments to reference new class

* Add state label to CurrentApplicationPoolState metric

* Return 0 value metrics for all non-active app states

Return 1 for active state

* Remove unnecessary float cast

* Add calculation for uptime

* Convert iis uptimes to unix timestamps and alter help msg

* Rename application pool uptime to start_time
This commit is contained in:
Simon Willcock
2017-04-26 23:19:33 +10:00
committed by Calle Pettersson
parent 012e143601
commit 9d515255a6

View File

@@ -1,5 +1,7 @@
// returns data points from Win32_PerfRawData_W3SVC_WebService
// returns data points from the following classes:
// - Win32_PerfRawData_W3SVC_WebService
// https://msdn.microsoft.com/en-us/library/aa394345 - Win32_OperatingSystem class
// - Win32_PerfRawData_APPPOOLCountersProvider_APPPOOLWAS
package collector
@@ -20,9 +22,12 @@ func init() {
var (
siteWhitelist = flag.String("collector.iis.site-whitelist", ".+", "Regexp of sites to whitelist. Site name must both match whitelist and not match blacklist to be included.")
siteBlacklist = flag.String("collector.iis.site-blacklist", "", "Regexp of sites to blacklist. Site name must both match whitelist and not match blacklist to be included.")
appWhitelist = flag.String("collector.iis.app-whitelist", ".+", "Regexp of apps to whitelist. App name must both match whitelist and not match blacklist to be included.")
appBlacklist = flag.String("collector.iis.app-blacklist", "", "Regexp of apps to blacklist. App name must both match whitelist and not match blacklist to be included.")
)
// A IISCollector is a Prometheus collector for WMI Win32_PerfRawData_W3SVC_WebService metrics
// A IISCollector is a Prometheus collector for WMI Win32_PerfRawData_W3SVC_WebService
// and Win32_PerfRawData_APPPOOLCountersProvider_APPPOOLWAS metrics
type IISCollector struct {
CurrentAnonymousUsers *prometheus.Desc
CurrentBlockedAsyncIORequests *prometheus.Desc
@@ -49,6 +54,23 @@ type IISCollector struct {
siteWhitelistPattern *regexp.Regexp
siteBlacklistPattern *regexp.Regexp
CurrentApplicationPoolState *prometheus.Desc
CurrentApplicationPoolUptime *prometheus.Desc
CurrentWorkerProcesses *prometheus.Desc
MaximumWorkerProcesses *prometheus.Desc
RecentWorkerProcessFailures *prometheus.Desc
TimeSinceLastWorkerProcessFailure *prometheus.Desc
TotalApplicationPoolRecycles *prometheus.Desc
TotalApplicationPoolUptime *prometheus.Desc
TotalWorkerProcessesCreated *prometheus.Desc
TotalWorkerProcessFailures *prometheus.Desc
TotalWorkerProcessPingFailures *prometheus.Desc
TotalWorkerProcessShutdownFailures *prometheus.Desc
TotalWorkerProcessStartupFailures *prometheus.Desc
appWhitelistPattern *regexp.Regexp
appBlacklistPattern *regexp.Regexp
}
// NewIISCollector ...
@@ -56,6 +78,7 @@ func NewIISCollector() (Collector, error) {
const subsystem = "iis"
return &IISCollector{
// Websites
// Gauges
CurrentAnonymousUsers: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "current_anonymous_users"),
@@ -188,6 +211,92 @@ func NewIISCollector() (Collector, error) {
siteWhitelistPattern: regexp.MustCompile(fmt.Sprintf("^(?:%s)$", *siteWhitelist)),
siteBlacklistPattern: regexp.MustCompile(fmt.Sprintf("^(?:%s)$", *siteBlacklist)),
// App Pools
// Guages
CurrentApplicationPoolState: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "current_application_pool_state"),
"The current status of the application pool (1 - Uninitialized, 2 - Initialized, 3 - Running, 4 - Disabling, 5 - Disabled, 6 - Shutdown Pending, 7 - Delete Pending) (CurrentApplicationPoolState)",
[]string{"app","state"},
nil,
),
CurrentApplicationPoolUptime: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "current_application_pool_start_time"),
"The unix timestamp for the application pool start time (CurrentApplicationPoolUptime)",
[]string{"app"},
nil,
),
CurrentWorkerProcesses: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "current_worker_processes"),
"The current number of worker processes that are running in the application pool (CurrentWorkerProcesses)",
[]string{"app"},
nil,
),
MaximumWorkerProcesses: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "maximum_worker_processes"),
"The maximum number of worker processes that have been created for the application pool since Windows Process Activation Service (WAS) started (MaximumWorkerProcesses)",
[]string{"app"},
nil,
),
RecentWorkerProcessFailures: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "recent_worker_process_failures"),
"The number of times that worker processes for the application pool failed during the rapid-fail protection interval (RecentWorkerProcessFailures)",
[]string{"app"},
nil,
),
// Counters
TimeSinceLastWorkerProcessFailure: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "time_since_last_worker_process_failure"),
"The length of time, in seconds, since the last worker process failure occurred for the application pool (TimeSinceLastWorkerProcessFailure)",
[]string{"app"},
nil,
),
TotalApplicationPoolRecycles: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "total_application_pool_recycles"),
"The number of times that the application pool has been recycled since Windows Process Activation Service (WAS) started (TotalApplicationPoolRecycles)",
[]string{"app"},
nil,
),
TotalApplicationPoolUptime: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "total_application_pool_start_time"),
"The unix timestamp for the application pool of when the Windows Process Activation Service (WAS) started (TotalApplicationPoolUptime)",
[]string{"app"},
nil,
),
TotalWorkerProcessesCreated: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "total_worker_processes_created"),
"The number of worker processes created for the application pool since Windows Process Activation Service (WAS) started (TotalWorkerProcessesCreated)",
[]string{"app"},
nil,
),
TotalWorkerProcessFailures: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "total_worker_process_failures"),
"The number of times that worker processes have crashed since the application pool was started (TotalWorkerProcessFailures)",
[]string{"app"},
nil,
),
TotalWorkerProcessPingFailures: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "total_worker_process_ping_failures"),
"The number of times that Windows Process Activation Service (WAS) did not receive a response to ping messages sent to a worker process (TotalWorkerProcessPingFailures)",
[]string{"app"},
nil,
),
TotalWorkerProcessShutdownFailures: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "total_worker_process_shutdown_failures"),
"The number of times that Windows Process Activation Service (WAS) failed to shut down a worker process (TotalWorkerProcessShutdownFailures)",
[]string{"app"},
nil,
),
TotalWorkerProcessStartupFailures: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "total_worker_process_startup_failures"),
"The number of times that Windows Process Activation Service (WAS) failed to start a worker process (TotalWorkerProcessStartupFailures)",
[]string{"app"},
nil,
),
appWhitelistPattern: regexp.MustCompile(fmt.Sprintf("^(?:%s)$", *siteWhitelist)),
appBlacklistPattern: regexp.MustCompile(fmt.Sprintf("^(?:%s)$", *siteBlacklist)),
}, nil
}
@@ -245,6 +354,36 @@ type Win32_PerfRawData_W3SVC_WebService struct {
TotalUnlockRequests uint32
}
type Win32_PerfRawData_APPPOOLCountersProvider_APPPOOLWAS struct {
Name string
Frequency_Object uint64
Timestamp_Object uint64
CurrentApplicationPoolState uint32
CurrentApplicationPoolUptime uint64
CurrentWorkerProcesses uint32
MaximumWorkerProcesses uint32
RecentWorkerProcessFailures uint32
TimeSinceLastWorkerProcessFailure uint64
TotalApplicationPoolRecycles uint32
TotalApplicationPoolUptime uint64
TotalWorkerProcessesCreated uint32
TotalWorkerProcessFailures uint32
TotalWorkerProcessPingFailures uint32
TotalWorkerProcessShutdownFailures uint32
TotalWorkerProcessStartupFailures uint32
}
var ApplicationStates = map[uint32]string{
1: "Uninitialized",
2: "Initialized",
3: "Running",
4: "Disabling",
5: "Disabled",
6: "Shutdown Pending",
7: "Delete Pending",
}
func (c *IISCollector) collect(ch chan<- prometheus.Metric) (*prometheus.Desc, error) {
var dst []Win32_PerfRawData_W3SVC_WebService
q := wmi.CreateQuery(&dst, "")
@@ -498,6 +637,124 @@ func (c *IISCollector) collect(ch chan<- prometheus.Metric) (*prometheus.Desc, e
)
}
var dst2 []Win32_PerfRawData_APPPOOLCountersProvider_APPPOOLWAS
q2 := wmi.CreateQuery(&dst2, "")
if err := wmi.Query(q2, &dst2); err != nil {
return nil, err
}
for _, app := range dst2 {
if app.Name == "_Total" ||
c.appBlacklistPattern.MatchString(app.Name) ||
!c.appWhitelistPattern.MatchString(app.Name) {
continue
}
// Guages
for key, label := range ApplicationStates {
isCurrentState := 0.0
if key == app.CurrentApplicationPoolState {
isCurrentState = 1.0
}
ch <- prometheus.MustNewConstMetric(
c.CurrentApplicationPoolState,
prometheus.GaugeValue,
isCurrentState,
app.Name,
label,
)
}
ch <- prometheus.MustNewConstMetric(
c.CurrentApplicationPoolUptime,
prometheus.GaugeValue,
// convert from Windows timestamp (1 jan 1601) to unix timestamp (1 jan 1970)
float64(app.CurrentApplicationPoolUptime - 116444736000000000) / float64(app.Frequency_Object),
app.Name,
)
ch <- prometheus.MustNewConstMetric(
c.CurrentWorkerProcesses,
prometheus.GaugeValue,
float64(app.CurrentWorkerProcesses),
app.Name,
)
ch <- prometheus.MustNewConstMetric(
c.MaximumWorkerProcesses,
prometheus.GaugeValue,
float64(app.MaximumWorkerProcesses),
app.Name,
)
ch <- prometheus.MustNewConstMetric(
c.RecentWorkerProcessFailures,
prometheus.GaugeValue,
float64(app.RecentWorkerProcessFailures),
app.Name,
)
ch <- prometheus.MustNewConstMetric(
c.TimeSinceLastWorkerProcessFailure,
prometheus.GaugeValue,
float64(app.TimeSinceLastWorkerProcessFailure),
app.Name,
)
// Counters
ch <- prometheus.MustNewConstMetric(
c.TotalApplicationPoolRecycles,
prometheus.CounterValue,
float64(app.TotalApplicationPoolRecycles),
app.Name,
)
ch <- prometheus.MustNewConstMetric(
c.TotalApplicationPoolUptime,
prometheus.CounterValue,
// convert from Windows timestamp (1 jan 1601) to unix timestamp (1 jan 1970)
float64(app.TotalApplicationPoolUptime - 116444736000000000) / float64(app.Frequency_Object),
app.Name,
)
ch <- prometheus.MustNewConstMetric(
c.TotalWorkerProcessesCreated,
prometheus.CounterValue,
float64(app.TotalWorkerProcessesCreated),
app.Name,
)
ch <- prometheus.MustNewConstMetric(
c.TotalWorkerProcessFailures,
prometheus.CounterValue,
float64(app.TotalWorkerProcessFailures),
app.Name,
)
ch <- prometheus.MustNewConstMetric(
c.TotalWorkerProcessPingFailures,
prometheus.CounterValue,
float64(app.TotalWorkerProcessPingFailures),
app.Name,
)
ch <- prometheus.MustNewConstMetric(
c.TotalWorkerProcessShutdownFailures,
prometheus.CounterValue,
float64(app.TotalWorkerProcessShutdownFailures),
app.Name,
)
ch <- prometheus.MustNewConstMetric(
c.TotalWorkerProcessStartupFailures,
prometheus.CounterValue,
float64(app.TotalWorkerProcessStartupFailures),
app.Name,
)
}
return nil, nil
}