mirror of
https://github.com/prometheus-community/windows_exporter.git
synced 2026-02-08 14:06:38 +00:00
Compare commits
44 Commits
v0.7.999-p
...
v0.8.0
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d01c66986c | ||
|
|
823ffb7597 | ||
|
|
a90f9cda0f | ||
|
|
31d4c28124 | ||
|
|
e880889f07 | ||
|
|
a283608812 | ||
|
|
8251ddd176 | ||
|
|
4f0a3a89ab | ||
|
|
27cc1072fe | ||
|
|
eb9cf56dee | ||
|
|
3c20887433 | ||
|
|
37d1c4e958 | ||
|
|
33b6e17b2d | ||
|
|
1a9d4afdd6 | ||
|
|
9e198c55a4 | ||
|
|
b309a05bde | ||
|
|
123a055242 | ||
|
|
9308108284 | ||
|
|
0ecf3cd792 | ||
|
|
801444b35b | ||
|
|
f4ab322e5b | ||
|
|
72de199528 | ||
|
|
304972580d | ||
|
|
6322bb124f | ||
|
|
cb6a91b705 | ||
|
|
4d9fb1be72 | ||
|
|
27e26037e3 | ||
|
|
e09497116f | ||
|
|
3099e10555 | ||
|
|
3900504504 | ||
|
|
2c5e30d920 | ||
|
|
b348c245e8 | ||
|
|
578bcc4959 | ||
|
|
31a30474f1 | ||
|
|
ce1005add8 | ||
|
|
6107a59306 | ||
|
|
47656b16bd | ||
|
|
8fc47669be | ||
|
|
1a67ca54b6 | ||
|
|
c73f52338d | ||
|
|
c5f23b4e64 | ||
|
|
411954cf9d | ||
|
|
56be7c63d5 | ||
|
|
6ffe504f7e |
2
.gitignore
vendored
2
.gitignore
vendored
@@ -3,3 +3,5 @@ VERSION
|
||||
*.swp
|
||||
*.un~
|
||||
output/
|
||||
.vscode
|
||||
.idea
|
||||
@@ -12,6 +12,7 @@ Name | Description | Enabled by default
|
||||
[ad](docs/collector.ad.md) | Active Directory Domain Services |
|
||||
[cpu](docs/collector.cpu.md) | CPU usage | ✓
|
||||
[cs](docs/collector.cs.md) | "Computer System" metrics (system properties, num cpus/total memory) | ✓
|
||||
[container](docs/collector.container.md) | Container metrics |
|
||||
[dns](docs/collector.dns.md) | DNS Server |
|
||||
[hyperv](docs/collector.hyperv.md) | Hyper-V hosts |
|
||||
[iis](docs/collector.iis.md) | IIS sites and applications |
|
||||
@@ -33,6 +34,7 @@ Name | Description | Enabled by default
|
||||
[service](docs/collector.service.md) | Service state metrics | ✓
|
||||
[system](docs/collector.system.md) | System calls | ✓
|
||||
[tcp](docs/collector.tcp.md) | TCP connections |
|
||||
[thermalzone](docs/collector.thermalzone.md) | Thermal information
|
||||
[textfile](docs/collector.textfile.md) | Read prometheus metrics from a text file | ✓
|
||||
[vmware](docs/collector.vmware.md) | Performance counters installed by the Vmware Guest agent |
|
||||
|
||||
|
||||
@@ -1,8 +1,12 @@
|
||||
package collector
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
|
||||
"github.com/leoluk/perflib_exporter/perflib"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/common/log"
|
||||
"golang.org/x/sys/windows/registry"
|
||||
)
|
||||
|
||||
// ...
|
||||
@@ -15,6 +19,34 @@ const (
|
||||
windowsEpoch = 116444736000000000
|
||||
)
|
||||
|
||||
// getWindowsVersion reads the version number of the OS from the Registry
|
||||
// See https://docs.microsoft.com/en-us/windows/desktop/sysinfo/operating-system-version
|
||||
func getWindowsVersion() float64 {
|
||||
k, err := registry.OpenKey(registry.LOCAL_MACHINE, `SOFTWARE\Microsoft\Windows NT\CurrentVersion`, registry.QUERY_VALUE)
|
||||
if err != nil {
|
||||
log.Warn("Couldn't open registry", err)
|
||||
return 0
|
||||
}
|
||||
defer func() {
|
||||
err = k.Close()
|
||||
if err != nil {
|
||||
log.Warnf("Failed to close registry key: %v", err)
|
||||
}
|
||||
}()
|
||||
|
||||
currentv, _, err := k.GetStringValue("CurrentVersion")
|
||||
if err != nil {
|
||||
log.Warn("Couldn't open registry to determine current Windows version:", err)
|
||||
return 0
|
||||
}
|
||||
|
||||
currentv_flt, err := strconv.ParseFloat(currentv, 64)
|
||||
|
||||
log.Debugf("Detected Windows version %f\n", currentv_flt)
|
||||
|
||||
return currentv_flt
|
||||
}
|
||||
|
||||
// Factories ...
|
||||
var Factories = make(map[string]func() (Collector, error))
|
||||
|
||||
|
||||
@@ -3,46 +3,15 @@
|
||||
package collector
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/sys/windows/registry"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/common/log"
|
||||
)
|
||||
|
||||
func init() {
|
||||
Factories["cpu"] = newCPUCollector
|
||||
}
|
||||
|
||||
// A function to get Windows version from registry
|
||||
func getWindowsVersion() float64 {
|
||||
k, err := registry.OpenKey(registry.LOCAL_MACHINE, `SOFTWARE\Microsoft\Windows NT\CurrentVersion`, registry.QUERY_VALUE)
|
||||
if err != nil {
|
||||
log.Warn("Couldn't open registry", err)
|
||||
return 0
|
||||
}
|
||||
defer func() {
|
||||
err = k.Close()
|
||||
if err != nil {
|
||||
log.Warnf("Failed to close registry key: %v", err)
|
||||
}
|
||||
}()
|
||||
|
||||
currentv, _, err := k.GetStringValue("CurrentVersion")
|
||||
if err != nil {
|
||||
log.Warn("Couldn't open registry to determine current Windows version:", err)
|
||||
return 0
|
||||
}
|
||||
|
||||
currentv_flt, err := strconv.ParseFloat(currentv, 64)
|
||||
|
||||
log.Debugf("Detected Windows version %f\n", currentv_flt)
|
||||
|
||||
return currentv_flt
|
||||
}
|
||||
|
||||
type cpuCollectorBasic struct {
|
||||
CStateSecondsTotal *prometheus.Desc
|
||||
TimeTotal *prometheus.Desc
|
||||
@@ -67,10 +36,12 @@ func newCPUCollector() (Collector, error) {
|
||||
const subsystem = "cpu"
|
||||
|
||||
version := getWindowsVersion()
|
||||
// Windows version by number https://docs.microsoft.com/en-us/windows/desktop/sysinfo/operating-system-version
|
||||
// For Windows 2008 or earlier Windows version is 6.0 or lower, where we only have the older "Processor" counters
|
||||
// For Windows 2008 R2 or later Windows version is 6.1 or higher, so we can use "ProcessorInformation" counters
|
||||
// Value 6.05 was selected just to split between Windows versions
|
||||
// For Windows 2008 (version 6.0) or earlier we only have the "Processor"
|
||||
// class. As of Windows 2008 R2 (version 6.1) the more detailed
|
||||
// "ProcessorInformation" set is available (although some of the counters
|
||||
// are added in later versions, so we aren't guaranteed to get all of
|
||||
// them).
|
||||
// Value 6.05 was selected to split between Windows versions.
|
||||
if version < 6.05 {
|
||||
return &cpuCollectorBasic{
|
||||
CStateSecondsTotal: prometheus.NewDesc(
|
||||
|
||||
@@ -91,7 +91,7 @@ func mssqlBuildWMIInstanceClass(suffix string, instance string) string {
|
||||
type mssqlCollectorsMap map[string]mssqlCollectorFunc
|
||||
|
||||
func mssqlAvailableClassCollectors() string {
|
||||
return "accessmethods,availreplica,bufman,databases,dbreplica,genstats,locks,memmgr,sqlstats"
|
||||
return "accessmethods,availreplica,bufman,databases,dbreplica,genstats,locks,memmgr,sqlstats,sqlerrors,transactions"
|
||||
}
|
||||
|
||||
func (c *MSSQLCollector) getMSSQLCollectors() mssqlCollectorsMap {
|
||||
@@ -105,6 +105,8 @@ func (c *MSSQLCollector) getMSSQLCollectors() mssqlCollectorsMap {
|
||||
mssqlCollectors["locks"] = c.collectLocks
|
||||
mssqlCollectors["memmgr"] = c.collectMemoryManager
|
||||
mssqlCollectors["sqlstats"] = c.collectSQLStats
|
||||
mssqlCollectors["sqlerrors"] = c.collectSQLErrors
|
||||
mssqlCollectors["transactions"] = c.collectTransactions
|
||||
|
||||
return mssqlCollectors
|
||||
}
|
||||
@@ -358,6 +360,24 @@ type MSSQLCollector struct {
|
||||
SQLStatsSQLReCompilations *prometheus.Desc
|
||||
SQLStatsUnsafeAutoParams *prometheus.Desc
|
||||
|
||||
// Win32_PerfRawData_{instance}_SQLServerSQLErrors
|
||||
SQLErrorsTotal *prometheus.Desc
|
||||
|
||||
// Win32_PerfRawData_{instance}_SQLServerTransactions
|
||||
TransactionsTempDbFreeSpaceBytes *prometheus.Desc
|
||||
TransactionsLongestTransactionRunningSeconds *prometheus.Desc
|
||||
TransactionsNonSnapshotVersionActiveTotal *prometheus.Desc
|
||||
TransactionsSnapshotActiveTotal *prometheus.Desc
|
||||
TransactionsActiveTotal *prometheus.Desc
|
||||
TransactionsUpdateConflictsTotal *prometheus.Desc
|
||||
TransactionsUpdateSnapshotActiveTotal *prometheus.Desc
|
||||
TransactionsVersionCleanupRateBytes *prometheus.Desc
|
||||
TransactionsVersionGenerationRateBytes *prometheus.Desc
|
||||
TransactionsVersionStoreSizeBytes *prometheus.Desc
|
||||
TransactionsVersionStoreUnits *prometheus.Desc
|
||||
TransactionsVersionStoreCreationUnits *prometheus.Desc
|
||||
TransactionsVersionStoreTruncationUnits *prometheus.Desc
|
||||
|
||||
mssqlInstances mssqlInstancesType
|
||||
mssqlCollectors mssqlCollectorsMap
|
||||
mssqlChildCollectorFailure int
|
||||
@@ -1637,6 +1657,94 @@ func NewMSSQLCollector() (Collector, error) {
|
||||
nil,
|
||||
),
|
||||
|
||||
// Win32_PerfRawData_{instance}_SQLServerSQLErrors
|
||||
SQLErrorsTotal: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(Namespace, subsystem, "sql_errors_total"),
|
||||
"(SQLErrors.Total)",
|
||||
[]string{"instance", "resource"},
|
||||
nil,
|
||||
),
|
||||
|
||||
// Win32_PerfRawData_{instance}_SQLServerTransactions
|
||||
TransactionsTempDbFreeSpaceBytes: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(Namespace, subsystem, "transactions_tempdb_free_space_bytes"),
|
||||
"(Transactions.FreeSpaceInTempDbKB)",
|
||||
[]string{"instance"},
|
||||
nil,
|
||||
),
|
||||
TransactionsLongestTransactionRunningSeconds: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(Namespace, subsystem, "transactions_longest_transaction_running_seconds"),
|
||||
"(Transactions.LongestTransactionRunningTime)",
|
||||
[]string{"instance"},
|
||||
nil,
|
||||
),
|
||||
TransactionsNonSnapshotVersionActiveTotal: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(Namespace, subsystem, "transactions_nonsnapshot_version_active_total"),
|
||||
"(Transactions.NonSnapshotVersionTransactions)",
|
||||
[]string{"instance"},
|
||||
nil,
|
||||
),
|
||||
TransactionsSnapshotActiveTotal: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(Namespace, subsystem, "transactions_snapshot_active_total"),
|
||||
"(Transactions.SnapshotTransactions)",
|
||||
[]string{"instance"},
|
||||
nil,
|
||||
),
|
||||
TransactionsActiveTotal: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(Namespace, subsystem, "transactions_active_total"),
|
||||
"(Transactions.Transactions)",
|
||||
[]string{"instance"},
|
||||
nil,
|
||||
),
|
||||
TransactionsUpdateConflictsTotal: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(Namespace, subsystem, "transactions_update_conflicts_total"),
|
||||
"(Transactions.UpdateConflictRatio)",
|
||||
[]string{"instance"},
|
||||
nil,
|
||||
),
|
||||
TransactionsUpdateSnapshotActiveTotal: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(Namespace, subsystem, "transactions_update_snapshot_active_total"),
|
||||
"(Transactions.UpdateSnapshotTransactions)",
|
||||
[]string{"instance"},
|
||||
nil,
|
||||
),
|
||||
TransactionsVersionCleanupRateBytes: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(Namespace, subsystem, "transactions_version_cleanup_rate_bytes"),
|
||||
"(Transactions.VersionCleanupRateKBs)",
|
||||
[]string{"instance"},
|
||||
nil,
|
||||
),
|
||||
TransactionsVersionGenerationRateBytes: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(Namespace, subsystem, "transactions_version_generation_rate_bytes"),
|
||||
"(Transactions.VersionGenerationRateKBs)",
|
||||
[]string{"instance"},
|
||||
nil,
|
||||
),
|
||||
TransactionsVersionStoreSizeBytes: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(Namespace, subsystem, "transactions_version_store_size_bytes"),
|
||||
"(Transactions.VersionStoreSizeKB)",
|
||||
[]string{"instance"},
|
||||
nil,
|
||||
),
|
||||
TransactionsVersionStoreUnits: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(Namespace, subsystem, "transactions_version_store_units"),
|
||||
"(Transactions.VersionStoreUnitCount)",
|
||||
[]string{"instance"},
|
||||
nil,
|
||||
),
|
||||
TransactionsVersionStoreCreationUnits: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(Namespace, subsystem, "transactions_version_store_creation_units"),
|
||||
"(Transactions.VersionStoreUnitCreation)",
|
||||
[]string{"instance"},
|
||||
nil,
|
||||
),
|
||||
TransactionsVersionStoreTruncationUnits: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(Namespace, subsystem, "transactions_version_store_truncation_units"),
|
||||
"(Transactions.VersionStoreUnitTruncation)",
|
||||
[]string{"instance"},
|
||||
nil,
|
||||
),
|
||||
|
||||
mssqlInstances: getMSSQLInstances(),
|
||||
}
|
||||
|
||||
@@ -2575,7 +2683,7 @@ func (c *MSSQLCollector) collectDatabaseReplica(ch chan<- prometheus.Metric, sql
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
c.DBReplicaTransactionDelay,
|
||||
prometheus.GaugeValue,
|
||||
float64(v.TransactionDelay)*1000.0,
|
||||
float64(v.TransactionDelay)/1000.0,
|
||||
sqlInstance, replicaName,
|
||||
)
|
||||
}
|
||||
@@ -3558,3 +3666,162 @@ func (c *MSSQLCollector) collectSQLStats(ch chan<- prometheus.Metric, sqlInstanc
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
type win32PerfRawDataSQLServerSQLErrors struct {
|
||||
Name string
|
||||
ErrorsPersec uint64
|
||||
}
|
||||
|
||||
// Win32_PerfRawData_MSSQLSERVER_SQLServerErrors docs:
|
||||
// - https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-sql-errors-object
|
||||
func (c *MSSQLCollector) collectSQLErrors(ch chan<- prometheus.Metric, sqlInstance string) (*prometheus.Desc, error) {
|
||||
var dst []win32PerfRawDataSQLServerSQLErrors
|
||||
log.Debugf("mssql_sqlerrors collector iterating sql instance %s.", sqlInstance)
|
||||
|
||||
class := mssqlBuildWMIInstanceClass("SQLErrors", sqlInstance)
|
||||
q := queryAllForClassWhere(&dst, class, `Name <> '_Total'`)
|
||||
if err := wmi.Query(q, &dst); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, v := range dst {
|
||||
resource := v.Name
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
c.SQLErrorsTotal,
|
||||
prometheus.CounterValue,
|
||||
float64(v.ErrorsPersec),
|
||||
sqlInstance, resource,
|
||||
)
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
type win32PerfRawDataSqlServerTransactions struct {
|
||||
FreeSpaceintempdbKB uint64
|
||||
LongestTransactionRunningTime uint64
|
||||
NonSnapshotVersionTransactions uint64
|
||||
SnapshotTransactions uint64
|
||||
Transactions uint64
|
||||
Updateconflictratio uint64
|
||||
UpdateSnapshotTransactions uint64
|
||||
VersionCleanuprateKBPers uint64
|
||||
VersionGenerationrateKBPers uint64
|
||||
VersionStoreSizeKB uint64
|
||||
VersionStoreunitcount uint64
|
||||
VersionStoreunitcreation uint64
|
||||
VersionStoreunittruncation uint64
|
||||
}
|
||||
|
||||
// Win32_PerfRawData_MSSQLSERVER_Transactions docs:
|
||||
// - https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-transactions-object
|
||||
func (c *MSSQLCollector) collectTransactions(ch chan<- prometheus.Metric, sqlInstance string) (*prometheus.Desc, error) {
|
||||
var dst []win32PerfRawDataSqlServerTransactions
|
||||
log.Debugf("mssql_transactions collector iterating sql instance %s.", sqlInstance)
|
||||
|
||||
class := mssqlBuildWMIInstanceClass("Transactions", sqlInstance)
|
||||
q := queryAllForClass(&dst, class)
|
||||
if err := wmi.Query(q, &dst); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if len(dst) == 0 {
|
||||
return nil, errors.New("WMI query returned empty result set")
|
||||
}
|
||||
|
||||
v := dst[0]
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
c.TransactionsTempDbFreeSpaceBytes,
|
||||
prometheus.GaugeValue,
|
||||
float64(v.FreeSpaceintempdbKB*1024),
|
||||
sqlInstance,
|
||||
)
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
c.TransactionsLongestTransactionRunningSeconds,
|
||||
prometheus.GaugeValue,
|
||||
float64(v.LongestTransactionRunningTime),
|
||||
sqlInstance,
|
||||
)
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
c.TransactionsNonSnapshotVersionActiveTotal,
|
||||
prometheus.CounterValue,
|
||||
float64(v.NonSnapshotVersionTransactions),
|
||||
sqlInstance,
|
||||
)
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
c.TransactionsSnapshotActiveTotal,
|
||||
prometheus.CounterValue,
|
||||
float64(v.SnapshotTransactions),
|
||||
sqlInstance,
|
||||
)
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
c.TransactionsActiveTotal,
|
||||
prometheus.CounterValue,
|
||||
float64(v.Transactions),
|
||||
sqlInstance,
|
||||
)
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
c.TransactionsUpdateConflictsTotal,
|
||||
prometheus.CounterValue,
|
||||
float64(v.Updateconflictratio),
|
||||
sqlInstance,
|
||||
)
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
c.TransactionsUpdateSnapshotActiveTotal,
|
||||
prometheus.CounterValue,
|
||||
float64(v.UpdateSnapshotTransactions),
|
||||
sqlInstance,
|
||||
)
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
c.TransactionsVersionCleanupRateBytes,
|
||||
prometheus.GaugeValue,
|
||||
float64(v.VersionCleanuprateKBPers*1024),
|
||||
sqlInstance,
|
||||
)
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
c.TransactionsVersionGenerationRateBytes,
|
||||
prometheus.GaugeValue,
|
||||
float64(v.VersionGenerationrateKBPers*1024),
|
||||
sqlInstance,
|
||||
)
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
c.TransactionsVersionStoreSizeBytes,
|
||||
prometheus.GaugeValue,
|
||||
float64(v.VersionStoreSizeKB*1024),
|
||||
sqlInstance,
|
||||
)
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
c.TransactionsVersionStoreUnits,
|
||||
prometheus.CounterValue,
|
||||
float64(v.VersionStoreunitcount),
|
||||
sqlInstance,
|
||||
)
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
c.TransactionsVersionStoreCreationUnits,
|
||||
prometheus.CounterValue,
|
||||
float64(v.VersionStoreunitcreation),
|
||||
sqlInstance,
|
||||
)
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
c.TransactionsVersionStoreTruncationUnits,
|
||||
prometheus.CounterValue,
|
||||
float64(v.VersionStoreunittruncation),
|
||||
sqlInstance,
|
||||
)
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
@@ -63,11 +63,14 @@ func unmarshalObject(obj *perflib.PerfObject, vs interface{}) error {
|
||||
|
||||
ctr, found := counters[tag]
|
||||
if !found {
|
||||
log.Debugf("missing counter %q, has %v", tag, counters)
|
||||
return fmt.Errorf("could not find counter %q on instance", tag)
|
||||
log.Debugf("missing counter %q, have %v", tag, counterMapKeys(counters))
|
||||
continue
|
||||
}
|
||||
if !target.Field(i).CanSet() {
|
||||
return fmt.Errorf("tagged field %v cannot be written to", f)
|
||||
return fmt.Errorf("tagged field %v cannot be written to", f.Name)
|
||||
}
|
||||
if fieldType := target.Field(i).Type(); fieldType != reflect.TypeOf((*float64)(nil)).Elem() {
|
||||
return fmt.Errorf("tagged field %v has wrong type %v, must be float64", f.Name, fieldType)
|
||||
}
|
||||
|
||||
switch ctr.Def.CounterType {
|
||||
@@ -87,3 +90,11 @@ func unmarshalObject(obj *perflib.PerfObject, vs interface{}) error {
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func counterMapKeys(m map[string]*perflib.PerfCounter) []string {
|
||||
keys := make([]string, 0, len(m))
|
||||
for k := range m {
|
||||
keys = append(keys, k)
|
||||
}
|
||||
return keys
|
||||
}
|
||||
|
||||
125
collector/perflib_test.go
Normal file
125
collector/perflib_test.go
Normal file
@@ -0,0 +1,125 @@
|
||||
package collector
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
perflibCollector "github.com/leoluk/perflib_exporter/collector"
|
||||
"github.com/leoluk/perflib_exporter/perflib"
|
||||
)
|
||||
|
||||
type simple struct {
|
||||
ValA float64 `perflib:"Something"`
|
||||
ValB float64 `perflib:"Something Else"`
|
||||
}
|
||||
|
||||
func TestUnmarshalPerflib(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
obj *perflib.PerfObject
|
||||
|
||||
expectedOutput []simple
|
||||
expectError bool
|
||||
}{
|
||||
{
|
||||
name: "nil check",
|
||||
obj: nil,
|
||||
expectedOutput: []simple{},
|
||||
expectError: true,
|
||||
},
|
||||
{
|
||||
name: "Simple",
|
||||
obj: &perflib.PerfObject{
|
||||
Instances: []*perflib.PerfInstance{
|
||||
{
|
||||
Counters: []*perflib.PerfCounter{
|
||||
{
|
||||
Def: &perflib.PerfCounterDef{
|
||||
Name: "Something",
|
||||
CounterType: perflibCollector.PERF_COUNTER_COUNTER,
|
||||
},
|
||||
Value: 123,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedOutput: []simple{{ValA: 123}},
|
||||
expectError: false,
|
||||
},
|
||||
{
|
||||
name: "Multiple properties",
|
||||
obj: &perflib.PerfObject{
|
||||
Instances: []*perflib.PerfInstance{
|
||||
{
|
||||
Counters: []*perflib.PerfCounter{
|
||||
{
|
||||
Def: &perflib.PerfCounterDef{
|
||||
Name: "Something",
|
||||
CounterType: perflibCollector.PERF_COUNTER_COUNTER,
|
||||
},
|
||||
Value: 123,
|
||||
},
|
||||
{
|
||||
Def: &perflib.PerfCounterDef{
|
||||
Name: "Something Else",
|
||||
CounterType: perflibCollector.PERF_COUNTER_COUNTER,
|
||||
},
|
||||
Value: 256,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedOutput: []simple{{ValA: 123, ValB: 256}},
|
||||
expectError: false,
|
||||
},
|
||||
{
|
||||
name: "Multiple instances",
|
||||
obj: &perflib.PerfObject{
|
||||
Instances: []*perflib.PerfInstance{
|
||||
{
|
||||
Counters: []*perflib.PerfCounter{
|
||||
{
|
||||
Def: &perflib.PerfCounterDef{
|
||||
Name: "Something",
|
||||
CounterType: perflibCollector.PERF_COUNTER_COUNTER,
|
||||
},
|
||||
Value: 321,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Counters: []*perflib.PerfCounter{
|
||||
{
|
||||
Def: &perflib.PerfCounterDef{
|
||||
Name: "Something",
|
||||
CounterType: perflibCollector.PERF_COUNTER_COUNTER,
|
||||
},
|
||||
Value: 231,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedOutput: []simple{{ValA: 321}, {ValA: 231}},
|
||||
expectError: false,
|
||||
},
|
||||
}
|
||||
for _, c := range cases {
|
||||
t.Run(c.name, func(t *testing.T) {
|
||||
output := make([]simple, 0)
|
||||
err := unmarshalObject(c.obj, &output)
|
||||
if err != nil && !c.expectError {
|
||||
t.Errorf("Did not expect error, got %q", err)
|
||||
}
|
||||
if err == nil && c.expectError {
|
||||
t.Errorf("Expected an error, but got ok")
|
||||
}
|
||||
|
||||
if err == nil && !reflect.DeepEqual(output, c.expectedOutput) {
|
||||
t.Errorf("Output mismatch, expected %+v, got %+v", c.expectedOutput, output)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
103
collector/thermalzone.go
Normal file
103
collector/thermalzone.go
Normal file
@@ -0,0 +1,103 @@
|
||||
package collector
|
||||
|
||||
import (
|
||||
"github.com/StackExchange/wmi"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/common/log"
|
||||
)
|
||||
|
||||
func init() {
|
||||
Factories["thermalzone"] = NewThermalZoneCollector
|
||||
}
|
||||
|
||||
// A thermalZoneCollector is a Prometheus collector for WMI Win32_PerfRawData_Counters_ThermalZoneInformation metrics
|
||||
type thermalZoneCollector struct {
|
||||
PercentPassiveLimit *prometheus.Desc
|
||||
Temperature *prometheus.Desc
|
||||
ThrottleReasons *prometheus.Desc
|
||||
}
|
||||
|
||||
// NewThermalZoneCollector ...
|
||||
func NewThermalZoneCollector() (Collector, error) {
|
||||
const subsystem = "thermalzone"
|
||||
return &thermalZoneCollector{
|
||||
Temperature: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(Namespace, subsystem, "temperature_celsius"),
|
||||
"(Temperature)",
|
||||
[]string{
|
||||
"name",
|
||||
},
|
||||
nil,
|
||||
),
|
||||
PercentPassiveLimit: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(Namespace, subsystem, "percent_passive_limit"),
|
||||
"(PercentPassiveLimit)",
|
||||
[]string{
|
||||
"name",
|
||||
},
|
||||
nil,
|
||||
),
|
||||
ThrottleReasons: prometheus.NewDesc(
|
||||
prometheus.BuildFQName(Namespace, subsystem, "throttle_reasons"),
|
||||
"(ThrottleReasons)",
|
||||
[]string{
|
||||
"name",
|
||||
},
|
||||
nil,
|
||||
),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Collect sends the metric values for each metric
|
||||
// to the provided prometheus Metric channel.
|
||||
func (c *thermalZoneCollector) Collect(ctx *ScrapeContext, ch chan<- prometheus.Metric) error {
|
||||
if desc, err := c.collect(ch); err != nil {
|
||||
log.Error("failed collecting thermalzone metrics:", desc, err)
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Win32_PerfRawData_Counters_ThermalZoneInformation docs:
|
||||
// https://wutils.com/wmi/root/cimv2/win32_perfrawdata_counters_thermalzoneinformation/
|
||||
type Win32_PerfRawData_Counters_ThermalZoneInformation struct {
|
||||
Name string
|
||||
|
||||
HighPrecisionTemperature uint32
|
||||
PercentPassiveLimit uint32
|
||||
ThrottleReasons uint32
|
||||
}
|
||||
|
||||
func (c *thermalZoneCollector) collect(ch chan<- prometheus.Metric) (*prometheus.Desc, error) {
|
||||
var dst []Win32_PerfRawData_Counters_ThermalZoneInformation
|
||||
q := queryAll(&dst)
|
||||
if err := wmi.Query(q, &dst); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, info := range dst {
|
||||
//Divide by 10 and subtract 273.15 to convert decikelvin to celsius
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
c.Temperature,
|
||||
prometheus.GaugeValue,
|
||||
(float64(info.HighPrecisionTemperature)/10.0)-273.15,
|
||||
info.Name,
|
||||
)
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
c.PercentPassiveLimit,
|
||||
prometheus.GaugeValue,
|
||||
float64(info.PercentPassiveLimit),
|
||||
info.Name,
|
||||
)
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
c.ThrottleReasons,
|
||||
prometheus.GaugeValue,
|
||||
float64(info.ThrottleReasons),
|
||||
info.Name,
|
||||
)
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
@@ -5,14 +5,14 @@ The mssql collector exposes metrics about the MSSQL server
|
||||
|||
|
||||
-|-
|
||||
Metric name prefix | `mssql`
|
||||
Classes | [`Win32_PerfRawData_MSSQLSERVER_SQLServerAccessMethods`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-access-methods-object)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerAvailabilityReplica`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-availability-replica)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerBufferManager`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-buffer-manager-object)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerDatabaseReplica`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-database-replica)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerDatabases`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-databases-object?view=sql-server-2017)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerGeneralStatistics`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-general-statistics-object)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerLocks`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-locks-object)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerMemoryManager`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-memory-manager-object)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerSQLStatistics`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-sql-statistics-object)
|
||||
Classes | [`Win32_PerfRawData_MSSQLSERVER_SQLServerAccessMethods`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-access-methods-object)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerAvailabilityReplica`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-availability-replica)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerBufferManager`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-buffer-manager-object)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerDatabaseReplica`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-database-replica)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerDatabases`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-databases-object?view=sql-server-2017)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerGeneralStatistics`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-general-statistics-object)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerLocks`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-locks-object)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerMemoryManager`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-memory-manager-object)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerSQLStatistics`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-sql-statistics-object)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerSQLErrors`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-sql-errors-object)<br/>[`Win32_PerfRawData_MSSQLSERVER_SQLServerTransactions`](https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-transactions-object)
|
||||
Enabled by default? | No
|
||||
|
||||
## Flags
|
||||
|
||||
### `--collectors.mssql.classes-enabled`
|
||||
|
||||
Comma-separated list of MSSQL WMI classes to use. Supported values are `accessmethods`, `availreplica`, `bufman`, `databases`, `dbreplica`, `genstats`, `locks`, `memmgr` and `sqlstats`.
|
||||
Comma-separated list of MSSQL WMI classes to use. Supported values are `accessmethods`, `availreplica`, `bufman`, `databases`, `dbreplica`, `genstats`, `locks`, `memmgr`, `sqlstats`, `sqlerrors` and `transactions`.
|
||||
|
||||
### `--collectors.mssql.class-print`
|
||||
|
||||
@@ -230,6 +230,20 @@ Name | Description | Type | Labels
|
||||
`wmi_mssql_sqlstats_sql_compilations` | _Not yet documented_ | counter | `instance`
|
||||
`wmi_mssql_sqlstats_sql_recompilations` | _Not yet documented_ | counter | `instance`
|
||||
`wmi_mssql_sqlstats_unsafe_auto_parameterization_attempts` | _Not yet documented_ | counter | `instance`
|
||||
`wmi_mssql_sql_errors_total` | _Not yet documented_ | counter | `instance`, `resource`
|
||||
`wmi_mssql_transactions_tempdb_free_space_bytes` | _Not yet documented_ | gauge | `instance`
|
||||
`wmi_mssql_transactions_longest_transaction_running_seconds` | _Not yet documented_ | gauge | `instance`
|
||||
`wmi_mssql_transactions_nonsnapshot_version_active_total` | _Not yet documented_ | counter | `instance`
|
||||
`wmi_mssql_transactions_snapshot_active_total` | _Not yet documented_ | counter | `instance`
|
||||
`wmi_mssql_transactions_active_total` | _Not yet documented_ | counter | `instance`
|
||||
`wmi_mssql_transactions_update_conflicts_total` | _Not yet documented_ | counter | `instance`
|
||||
`wmi_mssql_transactions_update_snapshot_active_total` | _Not yet documented_ | counter | `instance`
|
||||
`wmi_mssql_transactions_version_cleanup_rate_bytes` | _Not yet documented_ | gauge | `instance`
|
||||
`wmi_mssql_transactions_version_generation_rate_bytes` | _Not yet documented_ | gauge | `instance`
|
||||
`wmi_mssql_transactions_version_store_size_bytes` | _Not yet documented_ | gauge | `instance`
|
||||
`wmi_mssql_transactions_version_store_units` | _Not yet documented_ | counter | `instance`
|
||||
`wmi_mssql_transactions_version_store_creation_units` | _Not yet documented_ | counter | `instance`
|
||||
`wmi_mssql_transactions_version_store_truncation_units` | _Not yet documented_ | counter | `instance`
|
||||
|
||||
### Example metric
|
||||
_This collector does not yet have explained examples, we would appreciate your help adding them!_
|
||||
|
||||
@@ -66,10 +66,42 @@ A service can have any of the following statuses:
|
||||
Note that there is some overlap with service state.
|
||||
|
||||
### Example metric
|
||||
_This collector does not yet have explained examples, we would appreciate your help adding them!_
|
||||
Lists the services that have a 'disabled' start mode.
|
||||
```
|
||||
wmi_service_start_mode{exported_name=~"(mssqlserver|sqlserveragent)",start_mode="disabled"}
|
||||
```
|
||||
|
||||
## Useful queries
|
||||
_This collector does not yet have any useful queries added, we would appreciate your help adding them!_
|
||||
Counts the number of Microsoft SQL Server/Agent Processes
|
||||
```
|
||||
count(wmi_service_state{exported_name=~"(sqlserveragent|mssqlserver)",state="running"})
|
||||
```
|
||||
|
||||
## Alerting examples
|
||||
_This collector does not yet have alerting examples, we would appreciate your help adding them!_
|
||||
**prometheus.rules**
|
||||
```
|
||||
groups:
|
||||
- name: Microsoft SQL Server Alerts
|
||||
rules:
|
||||
|
||||
# Sends an alert when the 'sqlserveragent' service is not in the running state for 3 minutes.
|
||||
- alert: SQL Server Agent DOWN
|
||||
expr: wmi_service_state{instance="SQL",exported_name="sqlserveragent",state="running"} == 0
|
||||
for: 3m
|
||||
labels:
|
||||
severity: high
|
||||
annotations:
|
||||
summary: "Service {{ $labels.exported_name }} down"
|
||||
description: "Service {{ $labels.exported_name }} on instance {{ $labels.instance }} has been down for more than 3 minutes."
|
||||
|
||||
# Sends an alert when the 'mssqlserver' service is not in the running state for 3 minutes.
|
||||
- alert: SQL Server DOWN
|
||||
expr: wmi_service_state{instance="SQL",exported_name="mssqlserver",state="running"} == 0
|
||||
for: 3m
|
||||
labels:
|
||||
severity: high
|
||||
annotations:
|
||||
summary: "Service {{ $labels.exported_name }} down"
|
||||
description: "Service {{ $labels.exported_name }} on instance {{ $labels.instance }} has been down for more than 3 minutes."
|
||||
```
|
||||
In this example, `instance` is the target label of the host. So each alert will be processed per host, which is then used in the alert description.
|
||||
|
||||
32
docs/collector.thermalzone.md
Normal file
32
docs/collector.thermalzone.md
Normal file
@@ -0,0 +1,32 @@
|
||||
# thermalzone collector
|
||||
|
||||
The thermalzone collector exposes metrics about system temps. Note that temperature is given in Kelvin
|
||||
|
||||
|||
|
||||
-|-
|
||||
Metric name prefix | `thermalzone`
|
||||
Classes | [`Win32_PerfRawData_Counters_ThermalZoneInformation`](https://wutils.com/wmi/root/cimv2/win32_perfrawdata_counters_thermalzoneinformation/#temperature_properties)
|
||||
Enabled by default? | No
|
||||
|
||||
## Flags
|
||||
|
||||
None
|
||||
|
||||
## Metrics
|
||||
|
||||
Name | Description | Type | Labels
|
||||
-----|-------------|------|-------
|
||||
`wmi_thermalzone_percent_passive_limit` | % Passive Limit is the current limit this thermal zone is placing on the devices it controls. A limit of 100% indicates the devices are unconstrained. A limit of 0% indicates the devices are fully constrained. | gauge | None
|
||||
`wmi_thermalzone_temperature_celsius ` | Temperature of the thermal zone, in degrees Celsius. | gauge | None
|
||||
`wmi_thermalzone_throttle_reasons ` | Throttle Reasons indicate reasons why the thermal zone is limiting performance of the devices it controls. 0x0 – The zone is not throttled. 0x1 – The zone is throttled for thermal reasons. 0x2 – The zone is throttled to limit electrical current. | gauge | None
|
||||
|
||||
[`Throttle reasons` source](https://docs.microsoft.com/en-us/windows-hardware/design/device-experiences/examples--requirements-and-diagnostics)
|
||||
|
||||
### Example metric
|
||||
_This collector does not yet have explained examples, we would appreciate your help adding them!_
|
||||
|
||||
## Useful queries
|
||||
_This collector does not yet have any useful queries added, we would appreciate your help adding them!_
|
||||
|
||||
## Alerting examples
|
||||
_This collector does not yet have alerting examples, we would appreciate your help adding them!_
|
||||
261
exporter.go
261
exporter.go
@@ -5,7 +5,9 @@ package main
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"os"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
@@ -52,6 +54,12 @@ var (
|
||||
[]string{"collector"},
|
||||
nil,
|
||||
)
|
||||
snapshotDuration = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(collector.Namespace, "exporter", "perflib_snapshot_duration_seconds"),
|
||||
"Duration of perflib snapshot capture",
|
||||
nil,
|
||||
nil,
|
||||
)
|
||||
|
||||
// This can be removed when client_golang exposes this on Windows
|
||||
// (See https://github.com/prometheus/client_golang/issues/376)
|
||||
@@ -71,84 +79,112 @@ func (coll WmiCollector) Describe(ch chan<- *prometheus.Desc) {
|
||||
ch <- scrapeSuccessDesc
|
||||
}
|
||||
|
||||
type collectorOutcome int
|
||||
|
||||
const (
|
||||
pending collectorOutcome = iota
|
||||
success
|
||||
failed
|
||||
)
|
||||
|
||||
// Collect sends the collected metrics from each of the collectors to
|
||||
// prometheus.
|
||||
func (coll WmiCollector) Collect(ch chan<- prometheus.Metric) {
|
||||
scrapeContext, err := collector.PrepareScrapeContext()
|
||||
if err != nil {
|
||||
ch <- prometheus.NewInvalidMetric(scrapeSuccessDesc, fmt.Errorf("failed to prepare scrape: %v", err))
|
||||
return
|
||||
}
|
||||
|
||||
remainingCollectors := make(map[string]bool)
|
||||
for name := range coll.collectors {
|
||||
remainingCollectors[name] = true
|
||||
}
|
||||
|
||||
metricsBuffer := make(chan prometheus.Metric)
|
||||
allDone := make(chan struct{})
|
||||
stopped := false
|
||||
go func() {
|
||||
for {
|
||||
select {
|
||||
case m := <-metricsBuffer:
|
||||
if !stopped {
|
||||
ch <- m
|
||||
}
|
||||
case <-allDone:
|
||||
return
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
wg := sync.WaitGroup{}
|
||||
wg.Add(len(coll.collectors))
|
||||
go func() {
|
||||
wg.Wait()
|
||||
close(allDone)
|
||||
close(metricsBuffer)
|
||||
}()
|
||||
|
||||
for name, c := range coll.collectors {
|
||||
go func(name string, c collector.Collector) {
|
||||
execute(name, c, scrapeContext, metricsBuffer)
|
||||
wg.Done()
|
||||
delete(remainingCollectors, name)
|
||||
}(name, c)
|
||||
}
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
startTimeDesc,
|
||||
prometheus.CounterValue,
|
||||
startTime,
|
||||
)
|
||||
|
||||
t := time.Now()
|
||||
scrapeContext, err := collector.PrepareScrapeContext()
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
snapshotDuration,
|
||||
prometheus.GaugeValue,
|
||||
time.Since(t).Seconds(),
|
||||
)
|
||||
if err != nil {
|
||||
ch <- prometheus.NewInvalidMetric(scrapeSuccessDesc, fmt.Errorf("failed to prepare scrape: %v", err))
|
||||
return
|
||||
}
|
||||
|
||||
wg := sync.WaitGroup{}
|
||||
wg.Add(len(coll.collectors))
|
||||
collectorOutcomes := make(map[string]collectorOutcome)
|
||||
for name := range coll.collectors {
|
||||
collectorOutcomes[name] = pending
|
||||
}
|
||||
|
||||
metricsBuffer := make(chan prometheus.Metric)
|
||||
l := sync.Mutex{}
|
||||
finished := false
|
||||
go func() {
|
||||
for m := range metricsBuffer {
|
||||
l.Lock()
|
||||
if !finished {
|
||||
ch <- m
|
||||
}
|
||||
l.Unlock()
|
||||
}
|
||||
}()
|
||||
|
||||
for name, c := range coll.collectors {
|
||||
go func(name string, c collector.Collector) {
|
||||
defer wg.Done()
|
||||
outcome := execute(name, c, scrapeContext, metricsBuffer)
|
||||
l.Lock()
|
||||
if !finished {
|
||||
collectorOutcomes[name] = outcome
|
||||
}
|
||||
l.Unlock()
|
||||
}(name, c)
|
||||
}
|
||||
|
||||
allDone := make(chan struct{})
|
||||
go func() {
|
||||
wg.Wait()
|
||||
close(allDone)
|
||||
}()
|
||||
|
||||
// Wait until either all collectors finish, or timeout expires
|
||||
select {
|
||||
case <-allDone:
|
||||
stopped = true
|
||||
return
|
||||
case <-time.After(coll.maxScrapeDuration):
|
||||
stopped = true
|
||||
remainingCollectorNames := make([]string, 0, len(remainingCollectors))
|
||||
for rc := range remainingCollectors {
|
||||
remainingCollectorNames = append(remainingCollectorNames, rc)
|
||||
}
|
||||
log.Warn("Collection timed out, still waiting for ", remainingCollectorNames)
|
||||
for name := range remainingCollectors {
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
scrapeSuccessDesc,
|
||||
prometheus.GaugeValue,
|
||||
0.0,
|
||||
name,
|
||||
)
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
scrapeTimeoutDesc,
|
||||
prometheus.GaugeValue,
|
||||
1.0,
|
||||
name,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
l.Lock()
|
||||
finished = true
|
||||
|
||||
remainingCollectorNames := make([]string, 0)
|
||||
for name, outcome := range collectorOutcomes {
|
||||
var successValue, timeoutValue float64
|
||||
if outcome == pending {
|
||||
timeoutValue = 1.0
|
||||
remainingCollectorNames = append(remainingCollectorNames, name)
|
||||
}
|
||||
if outcome == success {
|
||||
successValue = 1.0
|
||||
}
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
scrapeSuccessDesc,
|
||||
prometheus.GaugeValue,
|
||||
successValue,
|
||||
name,
|
||||
)
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
scrapeTimeoutDesc,
|
||||
prometheus.GaugeValue,
|
||||
timeoutValue,
|
||||
name,
|
||||
)
|
||||
}
|
||||
|
||||
if len(remainingCollectorNames) > 0 {
|
||||
log.Warn("Collection timed out, still waiting for ", remainingCollectorNames)
|
||||
}
|
||||
|
||||
l.Unlock()
|
||||
}
|
||||
|
||||
func filterAvailableCollectors(collectors string) string {
|
||||
@@ -162,37 +198,23 @@ func filterAvailableCollectors(collectors string) string {
|
||||
return strings.Join(availableCollectors, ",")
|
||||
}
|
||||
|
||||
func execute(name string, c collector.Collector, ctx *collector.ScrapeContext, ch chan<- prometheus.Metric) {
|
||||
begin := time.Now()
|
||||
func execute(name string, c collector.Collector, ctx *collector.ScrapeContext, ch chan<- prometheus.Metric) collectorOutcome {
|
||||
t := time.Now()
|
||||
err := c.Collect(ctx, ch)
|
||||
duration := time.Since(begin)
|
||||
var success float64
|
||||
|
||||
if err != nil {
|
||||
log.Errorf("collector %s failed after %fs: %s", name, duration.Seconds(), err)
|
||||
success = 0
|
||||
} else {
|
||||
log.Debugf("collector %s succeeded after %fs.", name, duration.Seconds())
|
||||
success = 1
|
||||
}
|
||||
duration := time.Since(t).Seconds()
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
scrapeDurationDesc,
|
||||
prometheus.GaugeValue,
|
||||
duration.Seconds(),
|
||||
name,
|
||||
)
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
scrapeSuccessDesc,
|
||||
prometheus.GaugeValue,
|
||||
success,
|
||||
name,
|
||||
)
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
scrapeTimeoutDesc,
|
||||
prometheus.GaugeValue,
|
||||
0.0,
|
||||
duration,
|
||||
name,
|
||||
)
|
||||
|
||||
if err != nil {
|
||||
log.Errorf("collector %s failed after %fs: %s", name, duration, err)
|
||||
return failed
|
||||
}
|
||||
log.Debugf("collector %s succeeded after %fs.", name, duration)
|
||||
return success
|
||||
}
|
||||
|
||||
func expandEnabledCollectors(enabled string) []string {
|
||||
@@ -229,10 +251,6 @@ func loadCollectors(list string) (map[string]collector.Collector, error) {
|
||||
return collectors, nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
prometheus.MustRegister(version.NewCollector("wmi_exporter"))
|
||||
}
|
||||
|
||||
func initWbem() {
|
||||
// This initialization prevents a memory leak on WMF 5+. See
|
||||
// https://github.com/martinlindhe/wmi_exporter/issues/77 and linked issues
|
||||
@@ -264,10 +282,10 @@ func main() {
|
||||
"collectors.print",
|
||||
"If true, print available collectors and exit.",
|
||||
).Bool()
|
||||
maxScrapeDuration = kingpin.Flag(
|
||||
"scrape.max-duration",
|
||||
"Time after which collectors are aborted during a scrape",
|
||||
).Default("30s").Duration()
|
||||
timeoutMargin = kingpin.Flag(
|
||||
"scrape.timeout-margin",
|
||||
"Seconds to subtract from the timeout allowed by the client. Tune to allow for overhead or high loads.",
|
||||
).Default("0.5").Float64()
|
||||
)
|
||||
|
||||
log.AddFlags(kingpin.CommandLine)
|
||||
@@ -312,13 +330,17 @@ func main() {
|
||||
|
||||
log.Infof("Enabled collectors: %v", strings.Join(keys(collectors), ", "))
|
||||
|
||||
exporter := WmiCollector{
|
||||
collectors: collectors,
|
||||
maxScrapeDuration: *maxScrapeDuration,
|
||||
h := &metricsHandler{
|
||||
timeoutMargin: *timeoutMargin,
|
||||
collectorFactory: func(timeout time.Duration) *WmiCollector {
|
||||
return &WmiCollector{
|
||||
collectors: collectors,
|
||||
maxScrapeDuration: timeout,
|
||||
}
|
||||
},
|
||||
}
|
||||
prometheus.MustRegister(exporter)
|
||||
|
||||
http.Handle(*metricsPath, promhttp.Handler())
|
||||
http.Handle(*metricsPath, h)
|
||||
http.HandleFunc("/health", healthCheck)
|
||||
http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
|
||||
http.Redirect(w, r, *metricsPath, http.StatusMovedPermanently)
|
||||
@@ -382,3 +404,36 @@ loop:
|
||||
changes <- svc.Status{State: svc.StopPending}
|
||||
return
|
||||
}
|
||||
|
||||
type metricsHandler struct {
|
||||
timeoutMargin float64
|
||||
collectorFactory func(timeout time.Duration) *WmiCollector
|
||||
}
|
||||
|
||||
func (mh *metricsHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
||||
const defaultTimeout = 10.0
|
||||
|
||||
var timeoutSeconds float64
|
||||
if v := r.Header.Get("X-Prometheus-Scrape-Timeout-Seconds"); v != "" {
|
||||
var err error
|
||||
timeoutSeconds, err = strconv.ParseFloat(v, 64)
|
||||
if err != nil {
|
||||
log.Warnf("Couldn't parse X-Prometheus-Scrape-Timeout-Seconds: %q. Defaulting timeout to %f", v, defaultTimeout)
|
||||
}
|
||||
}
|
||||
if timeoutSeconds == 0 {
|
||||
timeoutSeconds = defaultTimeout
|
||||
}
|
||||
timeoutSeconds = timeoutSeconds - mh.timeoutMargin
|
||||
|
||||
reg := prometheus.NewRegistry()
|
||||
reg.MustRegister(mh.collectorFactory(time.Duration(timeoutSeconds * float64(time.Second))))
|
||||
reg.MustRegister(
|
||||
prometheus.NewProcessCollector(os.Getpid(), ""),
|
||||
prometheus.NewGoCollector(),
|
||||
version.NewCollector("wmi_exporter"),
|
||||
)
|
||||
|
||||
h := promhttp.HandlerFor(reg, promhttp.HandlerOpts{})
|
||||
h.ServeHTTP(w, r)
|
||||
}
|
||||
|
||||
@@ -20,7 +20,7 @@ else {
|
||||
$members = $wmiObject `
|
||||
| Get-Member -MemberType Properties `
|
||||
| Where-Object { $_.Definition -Match '^u?int' -and $_.Name -NotMatch '_' } `
|
||||
| Select-Object Name, @{Name="Type";Expression={$_.Definition.Split(" ")[0]}}
|
||||
| Select-Object Name, @{Name="Type";Expression={$_.Definition.Split(" ")[0]}})
|
||||
$input = @{
|
||||
"Class"=$Class;
|
||||
"CollectorName"=$CollectorName;
|
||||
|
||||
@@ -29,7 +29,7 @@ func New{{ .CollectorName }}Collector() (Collector, error) {
|
||||
}
|
||||
// Collect sends the metric values for each metric
|
||||
// to the provided prometheus Metric channel.
|
||||
func (c *{{ .CollectorName }}Collector) Collect(ch chan<- prometheus.Metric) error {
|
||||
func (c *{{ .CollectorName }}Collector) Collect(ctx *ScrapeContext, ch chan<- prometheus.Metric) error {
|
||||
if desc, err := c.collect(ch); err != nil {
|
||||
log.Error("failed collecting {{ .CollectorName | toLower }} metrics:", desc, err)
|
||||
return err
|
||||
|
||||
Reference in New Issue
Block a user