diff --git a/README.md b/README.md index af6871f1..17a38e82 100644 --- a/README.md +++ b/README.md @@ -41,6 +41,7 @@ Name | Description | Enabled by default [os](docs/collector.os.md) | OS metrics (memory, processes, users) | ✓ [process](docs/collector.process.md) | Per-process metrics | [remote_fx](docs/collector.remote_fx.md) | RemoteFX protocol (RDP) metrics | +[scheduled_task](docs/collector.scheduled_task.md) | Scheduled Tasks metrics | [service](docs/collector.service.md) | Service state metrics | ✓ [smtp](docs/collector.smtp.md) | IIS SMTP Server | [system](docs/collector.system.md) | System calls | ✓ @@ -78,7 +79,7 @@ Flag | Description | Default value `--telemetry.path` | URL path for surfacing collected metrics. | `/metrics` `--telemetry.max-requests` | Maximum number of concurrent requests. 0 to disable. | `5` `--collectors.enabled` | Comma-separated list of collectors to use. Use `[defaults]` as a placeholder which gets expanded containing all the collectors enabled by default." | `[defaults]` -`--collectors.print` | If true, print available collectors and exit. | +`--collectors.print` | If true, print available collectors and exit. | `--scrape.timeout-margin` | Seconds to subtract from the timeout allowed by the client. Tune to allow for overhead or high loads. | `0.5` `--web.config.file` | A [web config][web_config] for setting up TLS and Auth | None @@ -151,7 +152,7 @@ When there are multiple processes with the same name, WMI represents those after Using `[defaults]` with `--collectors.enabled` argument which gets expanded with all default collectors. .\windows_exporter.exe --collectors.enabled "[defaults],process,container" - + This enables the additional process and container collectors on top of the defaults. ### Using a configuration file diff --git a/collector/scheduled_task.go b/collector/scheduled_task.go new file mode 100644 index 00000000..dd34dcb3 --- /dev/null +++ b/collector/scheduled_task.go @@ -0,0 +1,314 @@ +//go:build windows +// +build windows + +package collector + +import ( + "fmt" + "regexp" + "runtime" + "strings" + + ole "github.com/go-ole/go-ole" + "github.com/go-ole/go-ole/oleutil" + "github.com/prometheus-community/windows_exporter/log" + "github.com/prometheus/client_golang/prometheus" + "gopkg.in/alecthomas/kingpin.v2" +) + +var ( + taskWhitelist = kingpin.Flag( + "collector.scheduled_task.whitelist", + "Regexp of tasks to whitelist. Task path must both match whitelist and not match blacklist to be included.", + ).Default(".+").String() + taskBlacklist = kingpin.Flag( + "collector.scheduled_task.blacklist", + "Regexp of tasks to blacklist. Task path must both match whitelist and not match blacklist to be included.", + ).String() +) + +type ScheduledTaskCollector struct { + LastResult *prometheus.Desc + MissedRuns *prometheus.Desc + State *prometheus.Desc + + taskWhitelistPattern *regexp.Regexp + taskBlacklistPattern *regexp.Regexp +} + +// TaskState ... +// https://docs.microsoft.com/en-us/windows/desktop/api/taskschd/ne-taskschd-task_state +type TaskState uint + +type TaskResult uint + +const ( + TASK_STATE_UNKNOWN TaskState = iota + TASK_STATE_DISABLED + TASK_STATE_QUEUED + TASK_STATE_READY + TASK_STATE_RUNNING + TASK_RESULT_SUCCESS TaskResult = 0x0 +) + +// RegisteredTask ... +type ScheduledTask struct { + Name string + Path string + Enabled bool + State TaskState + MissedRunsCount float64 + LastTaskResult TaskResult +} + +type ScheduledTasks []ScheduledTask + +func init() { + registerCollector("scheduled_task", NewScheduledTask) +} + +// NewScheduledTask ... +func NewScheduledTask() (Collector, error) { + const subsystem = "scheduled_task" + + runtime.LockOSThread() + defer runtime.UnlockOSThread() + + err := ole.CoInitializeEx(0, ole.COINIT_MULTITHREADED) + if err != nil { + code := err.(*ole.OleError).Code() + if code != ole.S_OK && code != S_FALSE { + return nil, err + } + } + defer ole.CoUninitialize() + + return &ScheduledTaskCollector{ + LastResult: prometheus.NewDesc( + prometheus.BuildFQName(Namespace, subsystem, "last_result"), + "The result that was returned the last time the registered task was run", + []string{"task"}, + nil, + ), + + MissedRuns: prometheus.NewDesc( + prometheus.BuildFQName(Namespace, subsystem, "missed_runs"), + "The number of times the registered task missed a scheduled run", + []string{"task"}, + nil, + ), + + State: prometheus.NewDesc( + prometheus.BuildFQName(Namespace, subsystem, "state"), + "The current state of a scheduled task", + []string{"task", "state"}, + nil, + ), + + taskWhitelistPattern: regexp.MustCompile(fmt.Sprintf("^(?:%s)$", *taskWhitelist)), + taskBlacklistPattern: regexp.MustCompile(fmt.Sprintf("^(?:%s)$", *taskBlacklist)), + }, nil +} + +func (c *ScheduledTaskCollector) Collect(ctx *ScrapeContext, ch chan<- prometheus.Metric) error { + if desc, err := c.collect(ch); err != nil { + log.Error("failed collecting user metrics:", desc, err) + return err + } + + return nil +} + +var TASK_STATES = []string{"disabled", "queued", "ready", "running", "unknown"} + +func (c *ScheduledTaskCollector) collect(ch chan<- prometheus.Metric) (*prometheus.Desc, error) { + scheduledTasks, err := getScheduledTasks() + if err != nil { + return nil, err + } + + for _, task := range scheduledTasks { + if c.taskBlacklistPattern.MatchString(task.Path) || + !c.taskWhitelistPattern.MatchString(task.Path) { + continue + } + + lastResult := 0.0 + if task.LastTaskResult == TASK_RESULT_SUCCESS { + lastResult = 1.0 + } + + ch <- prometheus.MustNewConstMetric( + c.LastResult, + prometheus.GaugeValue, + lastResult, + task.Path, + ) + + ch <- prometheus.MustNewConstMetric( + c.MissedRuns, + prometheus.GaugeValue, + task.MissedRunsCount, + task.Path, + ) + + for _, state := range TASK_STATES { + var stateValue float64 + + if strings.ToLower(task.State.String()) == state { + stateValue = 1.0 + } + + ch <- prometheus.MustNewConstMetric( + c.State, + prometheus.GaugeValue, + stateValue, + task.Path, + state, + ) + } + } + + return nil, nil +} + +const SCHEDULED_TASK_PROGRAM_ID = "Schedule.Service.1" + +// S_FALSE is returned by CoInitialize if it was already called on this thread. +const S_FALSE = 0x00000001 + +func getScheduledTasks() (scheduledTasks ScheduledTasks, err error) { + schedClassID, err := ole.ClassIDFrom(SCHEDULED_TASK_PROGRAM_ID) + if err != nil { + return scheduledTasks, err + } + + taskSchedulerObj, err := ole.CreateInstance(schedClassID, nil) + if err != nil || taskSchedulerObj == nil { + return scheduledTasks, err + } + defer taskSchedulerObj.Release() + + taskServiceObj := taskSchedulerObj.MustQueryInterface(ole.IID_IDispatch) + _, err = oleutil.CallMethod(taskServiceObj, "Connect") + if err != nil { + return scheduledTasks, err + } + defer taskServiceObj.Release() + + res, err := oleutil.CallMethod(taskServiceObj, "GetFolder", `\`) + if err != nil { + return scheduledTasks, err + } + + rootFolderObj := res.ToIDispatch() + defer rootFolderObj.Release() + + err = fetchTasksRecursively(rootFolderObj, &scheduledTasks) + + return scheduledTasks, err +} + +func fetchTasksInFolder(folder *ole.IDispatch, scheduledTasks *ScheduledTasks) error { + res, err := oleutil.CallMethod(folder, "GetTasks", 1) + if err != nil { + return err + } + + tasks := res.ToIDispatch() + defer tasks.Release() + + err = oleutil.ForEach(tasks, func(v *ole.VARIANT) error { + task := v.ToIDispatch() + + parsedTask, err := parseTask(task) + if err != nil { + return err + } + + *scheduledTasks = append(*scheduledTasks, parsedTask) + + return nil + }) + + return err +} + +func fetchTasksRecursively(folder *ole.IDispatch, scheduledTasks *ScheduledTasks) error { + if err := fetchTasksInFolder(folder, scheduledTasks); err != nil { + return err + } + + res, err := oleutil.CallMethod(folder, "GetFolders", 1) + if err != nil { + return err + } + + subFolders := res.ToIDispatch() + defer subFolders.Release() + + err = oleutil.ForEach(subFolders, func(v *ole.VARIANT) error { + subFolder := v.ToIDispatch() + return fetchTasksRecursively(subFolder, scheduledTasks) + }) + + return err +} + +func parseTask(task *ole.IDispatch) (scheduledTask ScheduledTask, err error) { + taskNameVar, err := oleutil.GetProperty(task, "Name") + if err != nil { + return scheduledTask, err + } + + taskPathVar, err := oleutil.GetProperty(task, "Path") + if err != nil { + return scheduledTask, err + } + + taskEnabledVar, err := oleutil.GetProperty(task, "Enabled") + if err != nil { + return scheduledTask, err + } + + taskStateVar, err := oleutil.GetProperty(task, "State") + if err != nil { + return scheduledTask, err + } + + taskNumberOfMissedRunsVar, err := oleutil.GetProperty(task, "NumberOfMissedRuns") + if err != nil { + return scheduledTask, err + } + + taskLastTaskResultVar, err := oleutil.GetProperty(task, "LastTaskResult") + if err != nil { + return scheduledTask, err + } + + scheduledTask.Name = taskNameVar.ToString() + scheduledTask.Path = strings.ReplaceAll(taskPathVar.ToString(), "\\", "/") + scheduledTask.Enabled = taskEnabledVar.Value().(bool) + scheduledTask.State = TaskState(taskStateVar.Val) + scheduledTask.MissedRunsCount = float64(taskNumberOfMissedRunsVar.Val) + scheduledTask.LastTaskResult = TaskResult(taskLastTaskResultVar.Val) + + return scheduledTask, err +} + +func (t TaskState) String() string { + switch t { + case TASK_STATE_UNKNOWN: + return "Unknown" + case TASK_STATE_DISABLED: + return "Disabled" + case TASK_STATE_QUEUED: + return "Queued" + case TASK_STATE_READY: + return "Ready" + case TASK_STATE_RUNNING: + return "Running" + default: + return "" + } +} diff --git a/collector/scheduled_task_test.go b/collector/scheduled_task_test.go new file mode 100644 index 00000000..a294550a --- /dev/null +++ b/collector/scheduled_task_test.go @@ -0,0 +1,9 @@ +package collector + +import ( + "testing" +) + +func BenchmarkScheduledTaskCollector(b *testing.B) { + benchmarkCollector(b, "scheduled_task", NewScheduledTask) +} diff --git a/docs/README.md b/docs/README.md index 08526d9a..7f53c93d 100644 --- a/docs/README.md +++ b/docs/README.md @@ -28,6 +28,7 @@ This directory contains documentation of the collectors in the windows_exporter, - [`os`](collector.os.md) - [`process`](collector.process.md) - [`remote_fx`](collector.remote_fx.md) +- [`scheduled_task`](collector.scheduled_task.md) - [`service`](collector.service.md) - [`smtp`](collector.smtp.md) - [`system`](collector.system.md) diff --git a/docs/collector.scheduled_task.md b/docs/collector.scheduled_task.md new file mode 100644 index 00000000..5c42d7bb --- /dev/null +++ b/docs/collector.scheduled_task.md @@ -0,0 +1,67 @@ +# scheduled_task collector + +The scheduled_task collector exposes metrics about Windows Task Scheduler + +||| +-|- +Metric name prefix | `scheduled_task` +Data source | OLE +Enabled by default? | No + +## Flags + +### `--collector.scheduled_task.whitelist` + +If given, the path of the task needs to match the whitelist regexp in order for the corresponding metrics to be reported. + +### `--collector.scheduled_task.blacklist` + +If given, the path of the task needs to *not* match the blacklist regexp in order for the corresponding metrics to be reported. + +## Metrics + +Name | Description | Type | Labels +-----|-------------|------|------- +`windows_scheduled_task_last_result` | The result that was returned the last time the registered task was run | gauge | task +`windows_scheduled_task_missed_runs` | The number of times the registered task missed a scheduled run | gauge | task +`windows_scheduled_task_state` | The current state of a scheduled task | gauge | task, state + +For the values of the `state` label, see below. + +### State + +A task can be in the following states: +- `disabled` +- `queued` +- `ready` +- `running` +- `unknown` + + +### Example metric + +``` +windows_scheduled_task_last_result{task="/Microsoft/Windows/Chkdsk/SyspartRepair"} 1 +windows_scheduled_task_missed_runs{task="/Microsoft/Windows/Chkdsk/SyspartRepair"} 0 +windows_scheduled_task_state{state="disabled",task="/Microsoft/Windows/Chkdsk/SyspartRepair"} 1 +windows_scheduled_task_state{state="queued",task="/Microsoft/Windows/Chkdsk/SyspartRepair"} 0 +windows_scheduled_task_state{state="ready",task="/Microsoft/Windows/Chkdsk/SyspartRepair"} 0 +windows_scheduled_task_state{state="running",task="/Microsoft/Windows/Chkdsk/SyspartRepair"} 0 +windows_scheduled_task_state{state="unknown",task="/Microsoft/Windows/Chkdsk/SyspartRepair"} 0 +``` + +## Useful queries +_This collector does not yet have any useful queries added, we would appreciate your help adding them!_ + +## Alerting examples +**prometheus.rules** +```yaml + - alert: "WindowsScheduledTaskFailure" + expr: "windows_scheduled_task_last_result == 0" + for: "1d" + labels: + severity: "high" + annotations: + summary: "Scheduled Task Failed" + description: "Scheduled task '{{ $labels.task }}' failed for 1 day" +``` diff --git a/docs/example_config.yml b/docs/example_config.yml index f3a3e186..20dec2c5 100644 --- a/docs/example_config.yml +++ b/docs/example_config.yml @@ -5,6 +5,8 @@ collectors: collector: service: services-where: Name='windows_exporter' + scheduled_task: + blacklist: /Microsoft/.+ log: level: debug scrape: diff --git a/go.mod b/go.mod index 85300f77..d3d4316d 100644 --- a/go.mod +++ b/go.mod @@ -7,7 +7,7 @@ require ( github.com/StackExchange/wmi v0.0.0-20180725035823-b12b22c5341f github.com/dimchansky/utfbom v1.1.1 github.com/go-kit/log v0.2.0 - github.com/go-ole/go-ole v1.2.1 // indirect + github.com/go-ole/go-ole v1.2.5 github.com/leoluk/perflib_exporter v0.1.1-0.20211204221052-9e3696429c20 github.com/prometheus/client_golang v1.12.1 github.com/prometheus/client_model v0.2.0 diff --git a/go.sum b/go.sum index 07954b0f..b44c8c78 100644 --- a/go.sum +++ b/go.sum @@ -297,8 +297,8 @@ github.com/go-logfmt/logfmt v0.5.1 h1:otpy5pqBCBZ1ng9RQ0dPu4PN7ba75Y/aA+UpowDyNV github.com/go-logfmt/logfmt v0.5.1/go.mod h1:WYhtIu8zTZfxdn5+rREduYbwxfcBr/Vr6KEVveWlfTs= github.com/go-logr/logr v0.1.0/go.mod h1:ixOQHD9gLJUVQQ2ZOR7zLEifBX6tGkNJF4QyIY7sIas= github.com/go-logr/logr v0.2.0/go.mod h1:z6/tIYblkpsD+a4lm/fGIIU9mZ+XfAiaFtq7xTgseGU= -github.com/go-ole/go-ole v1.2.1 h1:2lOsA72HgjxAuMlKpFiCbHTvu44PIVkZ5hqm3RSdI/E= -github.com/go-ole/go-ole v1.2.1/go.mod h1:7FAglXiTm7HKlQRDeOQ6ZNUHidzCWXuZWq/1dTyBNF8= +github.com/go-ole/go-ole v1.2.5 h1:t4MGB5xEDZvXI+0rMjjsfBsD7yAgp/s9ZDkL1JndXwY= +github.com/go-ole/go-ole v1.2.5/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0= github.com/go-openapi/jsonpointer v0.0.0-20160704185906-46af16f9f7b1/go.mod h1:+35s3my2LFTysnkMfxsJBAMHj/DoqoB9knIWoYG/Vk0= github.com/go-openapi/jsonpointer v0.19.2/go.mod h1:3akKfEdA7DF1sugOqz1dVQHBcuDBPKZGEoHC/NkiQRg= github.com/go-openapi/jsonpointer v0.19.3/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg=