rework mssql cache metrics

This commit is contained in:
sebastian.poxhofer
2020-03-02 22:34:17 +01:00
parent b64ccbe683
commit 6dad58fc8f
2 changed files with 148 additions and 68 deletions

View File

@@ -180,6 +180,7 @@ type MSSQLCollector struct {
AccessMethodsWorkfilesCreated *prometheus.Desc AccessMethodsWorkfilesCreated *prometheus.Desc
AccessMethodsWorktablesCreated *prometheus.Desc AccessMethodsWorktablesCreated *prometheus.Desc
AccessMethodsWorktablesFromCacheRatio *prometheus.Desc AccessMethodsWorktablesFromCacheRatio *prometheus.Desc
AccessMethodsWorktablesFromCacheRatio_Base *prometheus.Desc
// Win32_PerfRawData_{instance}_SQLServerAvailabilityReplica // Win32_PerfRawData_{instance}_SQLServerAvailabilityReplica
AvailReplicaBytesReceivedfromReplica *prometheus.Desc AvailReplicaBytesReceivedfromReplica *prometheus.Desc
@@ -194,7 +195,8 @@ type MSSQLCollector struct {
// Win32_PerfRawData_{instance}_SQLServerBufferManager // Win32_PerfRawData_{instance}_SQLServerBufferManager
BufManBackgroundwriterpages *prometheus.Desc BufManBackgroundwriterpages *prometheus.Desc
BufManBuffercachehitratio *prometheus.Desc BufManBuffercachehits *prometheus.Desc
BufManBuffercachelookups *prometheus.Desc
BufManCheckpointpages *prometheus.Desc BufManCheckpointpages *prometheus.Desc
BufManDatabasepages *prometheus.Desc BufManDatabasepages *prometheus.Desc
BufManExtensionallocatedpages *prometheus.Desc BufManExtensionallocatedpages *prometheus.Desc
@@ -253,6 +255,7 @@ type MSSQLCollector struct {
DatabasesGroupCommitTime *prometheus.Desc DatabasesGroupCommitTime *prometheus.Desc
DatabasesLogBytesFlushed *prometheus.Desc DatabasesLogBytesFlushed *prometheus.Desc
DatabasesLogCacheHitRatio *prometheus.Desc DatabasesLogCacheHitRatio *prometheus.Desc
DatabasesLogCacheHitRatio_Base *prometheus.Desc
DatabasesLogCacheReads *prometheus.Desc DatabasesLogCacheReads *prometheus.Desc
DatabasesLogFilesSizeKB *prometheus.Desc DatabasesLogFilesSizeKB *prometheus.Desc
DatabasesLogFilesUsedSizeKB *prometheus.Desc DatabasesLogFilesUsedSizeKB *prometheus.Desc
@@ -318,6 +321,7 @@ type MSSQLCollector struct {
// Win32_PerfRawData_{instance}_SQLServerLocks // Win32_PerfRawData_{instance}_SQLServerLocks
LocksAverageWaitTimems *prometheus.Desc LocksAverageWaitTimems *prometheus.Desc
LocksAverageWaitTimems_Base *prometheus.Desc
LocksLockRequests *prometheus.Desc LocksLockRequests *prometheus.Desc
LocksLockTimeouts *prometheus.Desc LocksLockTimeouts *prometheus.Desc
LocksLockTimeoutstimeout0 *prometheus.Desc LocksLockTimeoutstimeout0 *prometheus.Desc
@@ -657,11 +661,17 @@ func NewMSSQLCollector() (Collector, error) {
nil, nil,
), ),
AccessMethodsWorktablesFromCacheRatio: prometheus.NewDesc( AccessMethodsWorktablesFromCacheRatio: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "accessmethods_worktables_from_cache_ratio"), prometheus.BuildFQName(Namespace, subsystem, "accessmethods_worktables_from_cache_hits"),
"(AccessMethods.WorktablesFromCacheRatio)", "(AccessMethods.WorktablesFromCacheRatio)",
[]string{"instance"}, []string{"instance"},
nil, nil,
), ),
AccessMethodsWorktablesFromCacheRatio_Base: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "accessmethods_worktables_from_cache_lookups"),
"(AccessMethods.WorktablesFromCacheRatio_Base)",
[]string{"instance"},
nil,
),
// Win32_PerfRawData_{instance}_SQLServerAvailabilityReplica // Win32_PerfRawData_{instance}_SQLServerAvailabilityReplica
AvailReplicaBytesReceivedfromReplica: prometheus.NewDesc( AvailReplicaBytesReceivedfromReplica: prometheus.NewDesc(
@@ -726,12 +736,18 @@ func NewMSSQLCollector() (Collector, error) {
[]string{"instance"}, []string{"instance"},
nil, nil,
), ),
BufManBuffercachehitratio: prometheus.NewDesc( BufManBuffercachehits: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "bufman_buffer_cache_hit_ratio"), prometheus.BuildFQName(Namespace, subsystem, "bufman_buffer_cache_hits"),
"(BufferManager.Buffercachehitratio)", "(BufferManager.Buffercachehitratio)",
[]string{"instance"}, []string{"instance"},
nil, nil,
), ),
BufManBuffercachelookups: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "bufman_buffer_cache_lookups"),
"(BufferManager.Buffercachehitratio_Base)",
[]string{"instance"},
nil,
),
BufManCheckpointpages: prometheus.NewDesc( BufManCheckpointpages: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "bufman_checkpoint_pages"), prometheus.BuildFQName(Namespace, subsystem, "bufman_checkpoint_pages"),
"(BufferManager.Checkpointpages)", "(BufferManager.Checkpointpages)",
@@ -1055,8 +1071,14 @@ func NewMSSQLCollector() (Collector, error) {
nil, nil,
), ),
DatabasesLogCacheHitRatio: prometheus.NewDesc( DatabasesLogCacheHitRatio: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "databases_log_cache_hit_ratio"), prometheus.BuildFQName(Namespace, subsystem, "databases_log_cache_hits"),
"(Databases.LogCacheHitRatio)", "(Databases.LogCacheHits)",
[]string{"instance", "database"},
nil,
),
DatabasesLogCacheHitRatio_Base: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "databases_log_cache_lookups"),
"(Databases.LogCacheLookups)",
[]string{"instance", "database"}, []string{"instance", "database"},
nil, nil,
), ),
@@ -1425,8 +1447,14 @@ func NewMSSQLCollector() (Collector, error) {
// Win32_PerfRawData_{instance}_SQLServerLocks // Win32_PerfRawData_{instance}_SQLServerLocks
LocksAverageWaitTimems: prometheus.NewDesc( LocksAverageWaitTimems: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "locks_average_wait_seconds"), prometheus.BuildFQName(Namespace, subsystem, "locks_wait_time_seconds"),
"(Locks.AverageWaitTimems)", "(Locks.LockWaitTime. Total time in ms which locks have been holding resources)",
[]string{"instance", "resource"},
nil,
),
LocksAverageWaitTimems_Base: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "locks_count"),
"(Locks.LockCount. count of how often requests have run into locks)",
[]string{"instance", "resource"}, []string{"instance", "resource"},
nil, nil,
), ),
@@ -1862,7 +1890,8 @@ type win32PerfRawDataSQLServerAccessMethods struct {
Usedtreepagecookie uint64 Usedtreepagecookie uint64
WorkfilesCreatedPersec uint64 WorkfilesCreatedPersec uint64
WorktablesCreatedPersec uint64 WorktablesCreatedPersec uint64
WorktablesFromCacheRatio uint64 WorktablesFromCacheHits uint64
WorktablesFromCacheLookups uint64
} }
func (c *MSSQLCollector) collectAccessMethods(ch chan<- prometheus.Metric, sqlInstance string) (*prometheus.Desc, error) { func (c *MSSQLCollector) collectAccessMethods(ch chan<- prometheus.Metric, sqlInstance string) (*prometheus.Desc, error) {
@@ -2177,7 +2206,14 @@ func (c *MSSQLCollector) collectAccessMethods(ch chan<- prometheus.Metric, sqlIn
ch <- prometheus.MustNewConstMetric( ch <- prometheus.MustNewConstMetric(
c.AccessMethodsWorktablesFromCacheRatio, c.AccessMethodsWorktablesFromCacheRatio,
prometheus.CounterValue, prometheus.CounterValue,
float64(v.WorktablesFromCacheRatio), float64(v.WorktablesFromCacheHits),
sqlInstance,
)
ch <- prometheus.MustNewConstMetric(
c.AccessMethodsWorktablesFromCacheRatio_Base,
prometheus.CounterValue,
float64(v.WorktablesFromCacheLookups),
sqlInstance, sqlInstance,
) )
return nil, nil return nil, nil
@@ -2281,7 +2317,8 @@ func (c *MSSQLCollector) collectAvailabilityReplica(ch chan<- prometheus.Metric,
// https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-buffer-manager-object // https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-buffer-manager-object
type win32PerfRawDataSQLServerBufferManager struct { type win32PerfRawDataSQLServerBufferManager struct {
BackgroundwriterpagesPersec uint64 BackgroundwriterpagesPersec uint64
Buffercachehitratio uint64 Buffercachehits uint64
Buffercachelookups uint64
CheckpointpagesPersec uint64 CheckpointpagesPersec uint64
Databasepages uint64 Databasepages uint64
Extensionallocatedpages uint64 Extensionallocatedpages uint64
@@ -2327,9 +2364,16 @@ func (c *MSSQLCollector) collectBufferManager(ch chan<- prometheus.Metric, sqlIn
) )
ch <- prometheus.MustNewConstMetric( ch <- prometheus.MustNewConstMetric(
c.BufManBuffercachehitratio, c.BufManBuffercachehits,
prometheus.GaugeValue, prometheus.GaugeValue,
float64(v.Buffercachehitratio), float64(v.Buffercachehits),
sqlInstance,
)
ch <- prometheus.MustNewConstMetric(
c.BufManBuffercachelookups,
prometheus.GaugeValue,
float64(v.Buffercachehits),
sqlInstance, sqlInstance,
) )
@@ -2703,7 +2747,8 @@ type win32PerfRawDataSQLServerDatabases struct {
DBCCLogicalScanBytesPersec uint64 DBCCLogicalScanBytesPersec uint64
GroupCommitTimePersec uint64 GroupCommitTimePersec uint64
LogBytesFlushedPersec uint64 LogBytesFlushedPersec uint64
LogCacheHitRatio uint64 LogCacheHits uint64
LogCacheLookups uint64
LogCacheReadsPersec uint64 LogCacheReadsPersec uint64
LogFilesSizeKB uint64 LogFilesSizeKB uint64
LogFilesUsedSizeKB uint64 LogFilesUsedSizeKB uint64
@@ -2821,7 +2866,14 @@ func (c *MSSQLCollector) collectDatabases(ch chan<- prometheus.Metric, sqlInstan
ch <- prometheus.MustNewConstMetric( ch <- prometheus.MustNewConstMetric(
c.DatabasesLogCacheHitRatio, c.DatabasesLogCacheHitRatio,
prometheus.GaugeValue, prometheus.GaugeValue,
float64(v.LogCacheHitRatio), float64(v.LogCacheHits),
sqlInstance, dbName,
)
ch <- prometheus.MustNewConstMetric(
c.DatabasesLogCacheHitRatio_Base,
prometheus.GaugeValue,
float64(v.LogCacheLookups),
sqlInstance, dbName, sqlInstance, dbName,
) )
@@ -3298,7 +3350,8 @@ func (c *MSSQLCollector) collectGeneralStatistics(ch chan<- prometheus.Metric, s
// - https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-locks-object // - https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-locks-object
type win32PerfRawDataSQLServerLocks struct { type win32PerfRawDataSQLServerLocks struct {
Name string Name string
AverageWaitTimems uint64 LockWaitTime uint64
LockCount uint64
LockRequestsPersec uint64 LockRequestsPersec uint64
LockTimeoutsPersec uint64 LockTimeoutsPersec uint64
LockTimeoutstimeout0Persec uint64 LockTimeoutstimeout0Persec uint64
@@ -3323,7 +3376,14 @@ func (c *MSSQLCollector) collectLocks(ch chan<- prometheus.Metric, sqlInstance s
ch <- prometheus.MustNewConstMetric( ch <- prometheus.MustNewConstMetric(
c.LocksAverageWaitTimems, c.LocksAverageWaitTimems,
prometheus.GaugeValue, prometheus.GaugeValue,
float64(v.AverageWaitTimems)/1000.0, float64(v.LockWaitTime)/1000.0,
sqlInstance, lockResourceName,
)
ch <- prometheus.MustNewConstMetric(
c.LocksAverageWaitTimems_Base,
prometheus.GaugeValue,
float64(v.LockCount)/1000.0,
sqlInstance, lockResourceName, sqlInstance, lockResourceName,
) )

View File

@@ -249,7 +249,27 @@ Name | Description | Type | Labels
_This collector does not yet have explained examples, we would appreciate your help adding them!_ _This collector does not yet have explained examples, we would appreciate your help adding them!_
## Useful queries ## Useful queries
_This collector does not yet have any useful queries added, we would appreciate your help adding them!_
### Buffer Cache Hit Ratio
When you read the counter in perfmon you will get the the percentage pages found in the buffer cache. This percentage is calculated internally based on the total number of cache hits divided by the total number of cache lookups over the last few thousand page accesses.
This collector retrieves the two internal values separately. In order to calculate the Buffer Cache Hit Ratio in PromQL.
```
wmi_mssql_bufman_buffer_cache_hits{instance="host:9182", exported_instance="MSSQLSERVER"} /
wmi_mssql_bufman_buffer_cache_lookups{instance="host:9182", exported_instance="MSSQLSERVER"}
```
This principal can be used for following metrics too:
- AccessMethodsWorktablesFromCacheHitRatio
- accessmethods_worktables_from_cache_hits
- accessmethods_worktables_from_cache_lookups
- LogCacheHitRatio
- databases_log_cache_hits
- databases_log_cache_lookups
- AverageLockWaitTime
- locks_wait_time_seconds
- locks_count
## Alerting examples ## Alerting examples
_This collector does not yet have alerting examples, we would appreciate your help adding them!_ _This collector does not yet have alerting examples, we would appreciate your help adding them!_