Compare commits

..

8 Commits

Author SHA1 Message Date
Calle Pettersson
b0f1e1843b First implementation of AD collector (#93) 2017-07-16 12:19:40 +02:00
Calle Pettersson
00f57c183d Merge pull request #79 from andrewmostello/master
Add Win32_PerfRawData_PerfProc_Process collector
2017-07-16 10:54:23 +02:00
Calle Pettersson
dc76c4227d Remove process index from name label, add README 2017-07-16 09:49:42 +01:00
Calle Pettersson
1ebee26c30 Change scrape_duration_seconds to gauge (#90)
* Change and rename scrape_duration metric to gauge collector_duration, split out collector_success

* Change to const metrics
2017-07-05 13:12:42 +02:00
Martin Lindhe
1db60e22b9 add a Makefile with a 'fmt' target 2017-06-30 22:50:48 +02:00
Calle Pettersson
0c4c15c4ce Update WiX download url (#84)
WiX is now on GitHub, which means stable download urls. Yay!
2017-06-27 14:10:38 +02:00
Andrew Mostello
95b04ec0a1 Add flag to specify "where" in process WMI query
This flag can be used to limit the Win32_PerfRawData_PerfProc_Process
response and pull metrics for only the desired processes.
2017-05-28 22:04:31 -04:00
Andrew Mostello
2c84c5ad8a Add Win32_PerfRawData_PerfProc_Process collector
Collector to scrape per process data metrics from WMI. Collector not
enabled by default.
2017-05-28 10:16:40 -04:00
6 changed files with 1723 additions and 17 deletions

2
Makefile Normal file
View File

@@ -0,0 +1,2 @@
fmt:
gofmt -l -w -s .

View File

@@ -9,6 +9,7 @@ Prometheus exporter for Windows machines, using the WMI (Windows Management Inst
Name | Description | Enabled by default
---------|-------------|--------------------
ad | [Win32_PerfRawData_DirectoryServices_DirectoryServices](https://msdn.microsoft.com/en-us/library/ms803980.aspx) Active Directory |
cpu | [Win32_PerfRawData_PerfOS_Processor](https://msdn.microsoft.com/en-us/library/aa394317(v=vs.90).aspx) metrics (cpu usage) | ✓
cs | [Win32_ComputerSystem](https://msdn.microsoft.com/en-us/library/aa394102) metrics (system properties, num cpus/total memory) | ✓
dns | [Win32_PerfRawData_DNS_DNS](https://technet.microsoft.com/en-us/library/cc977686.aspx) metrics (DNS Server) |
@@ -16,6 +17,7 @@ iis | [Win32_PerfRawData_W3SVC_WebService](https://msdn.microsoft.com/en-us/libr
logical_disk | [Win32_PerfRawData_PerfDisk_LogicalDisk](https://msdn.microsoft.com/en-us/windows/hardware/aa394307(v=vs.71)) metrics (disk I/O) | ✓
net | [Win32_PerfRawData_Tcpip_NetworkInterface](https://technet.microsoft.com/en-us/security/aa394340(v=vs.80)) metrics (network interface I/O) | ✓
os | [Win32_OperatingSystem](https://msdn.microsoft.com/en-us/library/aa394239) metrics (memory, processes, users) | ✓
process | [Win32_PerfRawData_PerfProc_Process](https://msdn.microsoft.com/en-us/library/aa394323(v=vs.85).aspx) metrics (per-process stats) |
service | [Win32_Service](https://msdn.microsoft.com/en-us/library/aa394418(v=vs.85).aspx) metrics (service states) | ✓
system | Win32_PerfRawData_PerfOS_System metrics (system calls) | ✓
vmware | Performance counters installed by the Vmware Guest agent |

1307
collector/ad.go Normal file

File diff suppressed because it is too large Load Diff

383
collector/process.go Normal file
View File

@@ -0,0 +1,383 @@
// returns data points from Win32_PerfRawData_PerfProc_Process
// https://msdn.microsoft.com/en-us/library/aa394323(v=vs.85).aspx - Win32_PerfRawData_PerfProc_Process class
package collector
import (
"bytes"
"flag"
"strconv"
"strings"
"github.com/StackExchange/wmi"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/log"
)
func init() {
Factories["process"] = NewProcessCollector
}
var (
processWhereClause = flag.String("collector.process.processes-where", "", "WQL 'where' clause to use in WMI metrics query. Limits the response to the processes you specify and reduces the size of the response.")
)
// A ProcessCollector is a Prometheus collector for WMI Win32_PerfRawData_PerfProc_Process metrics
type ProcessCollector struct {
StartTime *prometheus.Desc
CPUTimeTotal *prometheus.Desc
HandleCount *prometheus.Desc
IOBytesTotal *prometheus.Desc
IOOperationsTotal *prometheus.Desc
PageFaultsTotal *prometheus.Desc
PageFileBytes *prometheus.Desc
PoolBytes *prometheus.Desc
PriorityBase *prometheus.Desc
PrivateBytes *prometheus.Desc
ThreadCount *prometheus.Desc
VirtualBytes *prometheus.Desc
WorkingSet *prometheus.Desc
queryWhereClause string
}
// NewProcessCollector ...
func NewProcessCollector() (Collector, error) {
const subsystem = "process"
var wc bytes.Buffer
if *processWhereClause != "" {
wc.WriteString("WHERE ")
wc.WriteString(*processWhereClause)
} else {
log.Warn("No where-clause specified for process collector. This will generate a very large number of metrics!")
}
return &ProcessCollector{
StartTime: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "start_time"),
"Time of process start.",
[]string{"process", "process_id", "creating_process_id"},
nil,
),
CPUTimeTotal: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "cpu_time_total"),
"Returns elapsed time that all of the threads of this process used the processor to execute instructions by mode (privileged, user). An instruction is the basic unit of execution in a computer, a thread is the object that executes instructions, and a process is the object created when a program is run. Code executed to handle some hardware interrupts and trap conditions is included in this count.",
[]string{"process", "process_id", "creating_process_id", "mode"},
nil,
),
HandleCount: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "handle_count"),
"Total number of handles the process has open. This number is the sum of the handles currently open by each thread in the process.",
[]string{"process", "process_id", "creating_process_id"},
nil,
),
IOBytesTotal: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "io_bytes_total"),
"Bytes issued to I/O operations in different modes (read, write, other). This property counts all I/O activity generated by the process to include file, network, and device I/Os. Read and write mode includes data operations; other mode includes those that do not involve data, such as control operations. ",
[]string{"process", "process_id", "creating_process_id", "mode"},
nil,
),
IOOperationsTotal: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "io_operations_total"),
"I/O operations issued in different modes (read, write, other). This property counts all I/O activity generated by the process to include file, network, and device I/Os. Read and write mode includes data operations; other mode includes those that do not involve data, such as control operations. ",
[]string{"process", "process_id", "creating_process_id", "mode"},
nil,
),
PageFaultsTotal: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "page_faults_total"),
"Page faults by the threads executing in this process. A page fault occurs when a thread refers to a virtual memory page that is not in its working set in main memory. This can cause the page not to be fetched from disk if it is on the standby list and hence already in main memory, or if it is in use by another process with which the page is shared.",
[]string{"process", "process_id", "creating_process_id"},
nil,
),
PageFileBytes: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "page_file_bytes"),
"Current number of bytes this process has used in the paging file(s). Paging files are used to store pages of memory used by the process that are not contained in other files. Paging files are shared by all processes, and lack of space in paging files can prevent other processes from allocating memory.",
[]string{"process", "process_id", "creating_process_id"},
nil,
),
PoolBytes: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "pool_bytes"),
"Pool Bytes is the last observed number of bytes in the paged or nonpaged pool. The nonpaged pool is an area of system memory (physical memory used by the operating system) for objects that cannot be written to disk, but must remain in physical memory as long as they are allocated. The paged pool is an area of system memory (physical memory used by the operating system) for objects that can be written to disk when they are not being used. Nonpaged pool bytes is calculated differently than paged pool bytes, so it might not equal the total of paged pool bytes.",
[]string{"process", "process_id", "creating_process_id", "pool"},
nil,
),
PriorityBase: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "priority_base"),
"Current base priority of this process. Threads within a process can raise and lower their own base priority relative to the process base priority of the process.",
[]string{"process", "process_id", "creating_process_id"},
nil,
),
PrivateBytes: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "private_bytes"),
"Current number of bytes this process has allocated that cannot be shared with other processes.",
[]string{"process", "process_id", "creating_process_id"},
nil,
),
ThreadCount: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "thread_count"),
"Number of threads currently active in this process. An instruction is the basic unit of execution in a processor, and a thread is the object that executes instructions. Every running process has at least one thread.",
[]string{"process", "process_id", "creating_process_id"},
nil,
),
VirtualBytes: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "virtual_bytes"),
"Current size, in bytes, of the virtual address space that the process is using. Use of virtual address space does not necessarily imply corresponding use of either disk or main memory pages. Virtual space is finite and, by using too much, the process can limit its ability to load libraries.",
[]string{"process", "process_id", "creating_process_id"},
nil,
),
WorkingSet: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "working_set"),
"Maximum number of bytes in the working set of this process at any point in time. The working set is the set of memory pages touched recently by the threads in the process. If free memory in the computer is above a threshold, pages are left in the working set of a process even if they are not in use. When free memory falls below a threshold, pages are trimmed from working sets. If they are needed, they are then soft-faulted back into the working set before they leave main memory.",
[]string{"process", "process_id", "creating_process_id"},
nil,
),
queryWhereClause: wc.String(),
}, nil
}
// Collect sends the metric values for each metric
// to the provided prometheus Metric channel.
func (c *ProcessCollector) Collect(ch chan<- prometheus.Metric) error {
if desc, err := c.collect(ch); err != nil {
log.Errorln("[ERROR] failed collecting process metrics:", desc, err)
return err
}
return nil
}
type Win32_PerfRawData_PerfProc_Process struct {
Name string
CreatingProcessID uint32
ElapsedTime uint64
Frequency_Object uint64
HandleCount uint32
IDProcess uint32
IODataBytesPersec uint64
IODataOperationsPersec uint64
IOOtherBytesPersec uint64
IOOtherOperationsPersec uint64
IOReadBytesPersec uint64
IOReadOperationsPersec uint64
IOWriteBytesPersec uint64
IOWriteOperationsPersec uint64
PageFaultsPersec uint32
PageFileBytes uint64
PageFileBytesPeak uint64
PercentPrivilegedTime uint64
PercentProcessorTime uint64
PercentUserTime uint64
PoolNonpagedBytes uint32
PoolPagedBytes uint32
PriorityBase uint32
PrivateBytes uint64
ThreadCount uint32
Timestamp_Object uint64
VirtualBytes uint64
VirtualBytesPeak uint64
WorkingSet uint64
WorkingSetPeak uint64
WorkingSetPrivate uint64
}
func (c *ProcessCollector) collect(ch chan<- prometheus.Metric) (*prometheus.Desc, error) {
var dst []Win32_PerfRawData_PerfProc_Process
q := wmi.CreateQuery(&dst, c.queryWhereClause)
if err := wmi.Query(q, &dst); err != nil {
return nil, err
}
for _, process := range dst {
if process.Name == "_Total" {
continue
}
// Duplicate processes are suffixed # and an index number. Remove those.
processName := strings.Split(process.Name, "#")[0]
pid := strconv.FormatUint(uint64(process.IDProcess), 10)
cpid := strconv.FormatUint(uint64(process.CreatingProcessID), 10)
ch <- prometheus.MustNewConstMetric(
c.StartTime,
prometheus.GaugeValue,
// convert from Windows timestamp (1 jan 1601) to unix timestamp (1 jan 1970)
float64(process.ElapsedTime-116444736000000000)/float64(process.Frequency_Object),
processName,
pid,
cpid,
)
ch <- prometheus.MustNewConstMetric(
c.HandleCount,
prometheus.GaugeValue,
float64(process.HandleCount),
processName,
pid,
cpid,
)
ch <- prometheus.MustNewConstMetric(
c.CPUTimeTotal,
prometheus.CounterValue,
float64(process.PercentPrivilegedTime)*ticksToSecondsScaleFactor,
processName,
pid,
cpid,
"privileged",
)
ch <- prometheus.MustNewConstMetric(
c.CPUTimeTotal,
prometheus.CounterValue,
float64(process.PercentUserTime)*ticksToSecondsScaleFactor,
processName,
pid,
cpid,
"user",
)
ch <- prometheus.MustNewConstMetric(
c.IOBytesTotal,
prometheus.CounterValue,
float64(process.IOOtherBytesPersec),
processName,
pid,
cpid,
"other",
)
ch <- prometheus.MustNewConstMetric(
c.IOOperationsTotal,
prometheus.CounterValue,
float64(process.IOOtherOperationsPersec),
processName,
pid,
cpid,
"other",
)
ch <- prometheus.MustNewConstMetric(
c.IOBytesTotal,
prometheus.CounterValue,
float64(process.IOReadBytesPersec),
processName,
pid,
cpid,
"read",
)
ch <- prometheus.MustNewConstMetric(
c.IOOperationsTotal,
prometheus.CounterValue,
float64(process.IOReadOperationsPersec),
processName,
pid,
cpid,
"read",
)
ch <- prometheus.MustNewConstMetric(
c.IOBytesTotal,
prometheus.CounterValue,
float64(process.IOWriteBytesPersec),
processName,
pid,
cpid,
"write",
)
ch <- prometheus.MustNewConstMetric(
c.IOOperationsTotal,
prometheus.CounterValue,
float64(process.IOWriteOperationsPersec),
processName,
pid,
cpid,
"write",
)
ch <- prometheus.MustNewConstMetric(
c.PageFaultsTotal,
prometheus.CounterValue,
float64(process.PageFaultsPersec),
processName,
pid,
cpid,
)
ch <- prometheus.MustNewConstMetric(
c.PageFileBytes,
prometheus.GaugeValue,
float64(process.PageFileBytes),
processName,
pid,
cpid,
)
ch <- prometheus.MustNewConstMetric(
c.PoolBytes,
prometheus.GaugeValue,
float64(process.PoolNonpagedBytes),
processName,
pid,
cpid,
"nonpaged",
)
ch <- prometheus.MustNewConstMetric(
c.PoolBytes,
prometheus.GaugeValue,
float64(process.PoolPagedBytes),
processName,
pid,
cpid,
"paged",
)
ch <- prometheus.MustNewConstMetric(
c.PriorityBase,
prometheus.GaugeValue,
float64(process.PriorityBase),
processName,
pid,
cpid,
)
ch <- prometheus.MustNewConstMetric(
c.PrivateBytes,
prometheus.GaugeValue,
float64(process.PrivateBytes),
processName,
pid,
cpid,
)
ch <- prometheus.MustNewConstMetric(
c.ThreadCount,
prometheus.GaugeValue,
float64(process.ThreadCount),
processName,
pid,
cpid,
)
ch <- prometheus.MustNewConstMetric(
c.VirtualBytes,
prometheus.GaugeValue,
float64(process.VirtualBytes),
processName,
pid,
cpid,
)
ch <- prometheus.MustNewConstMetric(
c.WorkingSet,
prometheus.GaugeValue,
float64(process.WorkingSet),
processName,
pid,
cpid,
)
}
return nil, nil
}

View File

@@ -33,21 +33,25 @@ const (
)
var (
scrapeDurations = prometheus.NewSummaryVec(
prometheus.SummaryOpts{
Namespace: collector.Namespace,
Subsystem: "exporter",
Name: "scrape_duration_seconds",
Help: "wmi_exporter: Duration of a scrape job.",
},
[]string{"collector", "result"},
scrapeDurationDesc = prometheus.NewDesc(
prometheus.BuildFQName(collector.Namespace, "exporter", "collector_duration_seconds"),
"wmi_exporter: Duration of a collection.",
[]string{"collector"},
nil,
)
scrapeSuccessDesc = prometheus.NewDesc(
prometheus.BuildFQName(collector.Namespace, "exporter", "collector_success"),
"wmi_exporter: Whether the collector was successful.",
[]string{"collector"},
nil,
)
)
// Describe sends all the descriptors of the collectors included to
// the provided channel.
func (coll WmiCollector) Describe(ch chan<- *prometheus.Desc) {
scrapeDurations.Describe(ch)
ch <- scrapeDurationDesc
ch <- scrapeSuccessDesc
}
// Collect sends the collected metrics from each of the collectors to
@@ -63,7 +67,6 @@ func (coll WmiCollector) Collect(ch chan<- prometheus.Metric) {
}(name, c)
}
wg.Wait()
scrapeDurations.Collect(ch)
}
func filterAvailableCollectors(collectors string) string {
@@ -81,16 +84,27 @@ func execute(name string, c collector.Collector, ch chan<- prometheus.Metric) {
begin := time.Now()
err := c.Collect(ch)
duration := time.Since(begin)
var result string
var success float64
if err != nil {
log.Errorf("ERROR: %s collector failed after %fs: %s", name, duration.Seconds(), err)
result = "error"
success = 0
} else {
log.Debugf("OK: %s collector succeeded after %fs.", name, duration.Seconds())
result = "success"
success = 1
}
scrapeDurations.WithLabelValues(name, result).Observe(duration.Seconds())
ch <- prometheus.MustNewConstMetric(
scrapeDurationDesc,
prometheus.GaugeValue,
duration.Seconds(),
name,
)
ch <- prometheus.MustNewConstMetric(
scrapeSuccessDesc,
prometheus.GaugeValue,
success,
name,
)
}
func expandEnabledCollectors(enabled string) []string {

View File

@@ -42,10 +42,8 @@ function Get-FileIfNotExists {
$sourceDir = mkdir -Force Source
mkdir -Force Work,Output | Out-Null
Write-Verbose "Downloading files"
# Somewhat obscure url, points to WiX 3.10 binary release
Write-Verbose "Downloading WiX..."
Get-FileIfNotExists "http://download-codeplex.sec.s-msft.com/Download/Release?ProjectName=wix&DownloadId=1587180&FileTime=131118854877130000&Build=21050" "$sourceDir\wix-binaries.zip"
Get-FileIfNotExists "https://github.com/wixtoolset/wix3/releases/download/wix311rtm/wix311-binaries.zip" "$sourceDir\wix-binaries.zip"
mkdir -Force WiX | Out-Null
Expand-Archive -Path "${sourceDir}\wix-binaries.zip" -DestinationPath WiX -Force