rework mssql cache metrics

This commit is contained in:
sebastian.poxhofer
2020-03-02 22:34:17 +01:00
parent b64ccbe683
commit 6dad58fc8f
2 changed files with 148 additions and 68 deletions

View File

@@ -137,49 +137,50 @@ type MSSQLCollector struct {
mssqlScrapeSuccessDesc *prometheus.Desc mssqlScrapeSuccessDesc *prometheus.Desc
// Win32_PerfRawData_{instance}_SQLServerAccessMethods // Win32_PerfRawData_{instance}_SQLServerAccessMethods
AccessMethodsAUcleanupbatches *prometheus.Desc AccessMethodsAUcleanupbatches *prometheus.Desc
AccessMethodsAUcleanups *prometheus.Desc AccessMethodsAUcleanups *prometheus.Desc
AccessMethodsByreferenceLobCreateCount *prometheus.Desc AccessMethodsByreferenceLobCreateCount *prometheus.Desc
AccessMethodsByreferenceLobUseCount *prometheus.Desc AccessMethodsByreferenceLobUseCount *prometheus.Desc
AccessMethodsCountLobReadahead *prometheus.Desc AccessMethodsCountLobReadahead *prometheus.Desc
AccessMethodsCountPullInRow *prometheus.Desc AccessMethodsCountPullInRow *prometheus.Desc
AccessMethodsCountPushOffRow *prometheus.Desc AccessMethodsCountPushOffRow *prometheus.Desc
AccessMethodsDeferreddroppedAUs *prometheus.Desc AccessMethodsDeferreddroppedAUs *prometheus.Desc
AccessMethodsDeferredDroppedrowsets *prometheus.Desc AccessMethodsDeferredDroppedrowsets *prometheus.Desc
AccessMethodsDroppedrowsetcleanups *prometheus.Desc AccessMethodsDroppedrowsetcleanups *prometheus.Desc
AccessMethodsDroppedrowsetsskipped *prometheus.Desc AccessMethodsDroppedrowsetsskipped *prometheus.Desc
AccessMethodsExtentDeallocations *prometheus.Desc AccessMethodsExtentDeallocations *prometheus.Desc
AccessMethodsExtentsAllocated *prometheus.Desc AccessMethodsExtentsAllocated *prometheus.Desc
AccessMethodsFailedAUcleanupbatches *prometheus.Desc AccessMethodsFailedAUcleanupbatches *prometheus.Desc
AccessMethodsFailedleafpagecookie *prometheus.Desc AccessMethodsFailedleafpagecookie *prometheus.Desc
AccessMethodsFailedtreepagecookie *prometheus.Desc AccessMethodsFailedtreepagecookie *prometheus.Desc
AccessMethodsForwardedRecords *prometheus.Desc AccessMethodsForwardedRecords *prometheus.Desc
AccessMethodsFreeSpacePageFetches *prometheus.Desc AccessMethodsFreeSpacePageFetches *prometheus.Desc
AccessMethodsFreeSpaceScans *prometheus.Desc AccessMethodsFreeSpaceScans *prometheus.Desc
AccessMethodsFullScans *prometheus.Desc AccessMethodsFullScans *prometheus.Desc
AccessMethodsIndexSearches *prometheus.Desc AccessMethodsIndexSearches *prometheus.Desc
AccessMethodsInSysXactwaits *prometheus.Desc AccessMethodsInSysXactwaits *prometheus.Desc
AccessMethodsLobHandleCreateCount *prometheus.Desc AccessMethodsLobHandleCreateCount *prometheus.Desc
AccessMethodsLobHandleDestroyCount *prometheus.Desc AccessMethodsLobHandleDestroyCount *prometheus.Desc
AccessMethodsLobSSProviderCreateCount *prometheus.Desc AccessMethodsLobSSProviderCreateCount *prometheus.Desc
AccessMethodsLobSSProviderDestroyCount *prometheus.Desc AccessMethodsLobSSProviderDestroyCount *prometheus.Desc
AccessMethodsLobSSProviderTruncationCount *prometheus.Desc AccessMethodsLobSSProviderTruncationCount *prometheus.Desc
AccessMethodsMixedpageallocations *prometheus.Desc AccessMethodsMixedpageallocations *prometheus.Desc
AccessMethodsPagecompressionattempts *prometheus.Desc AccessMethodsPagecompressionattempts *prometheus.Desc
AccessMethodsPageDeallocations *prometheus.Desc AccessMethodsPageDeallocations *prometheus.Desc
AccessMethodsPagesAllocated *prometheus.Desc AccessMethodsPagesAllocated *prometheus.Desc
AccessMethodsPagescompressed *prometheus.Desc AccessMethodsPagescompressed *prometheus.Desc
AccessMethodsPageSplits *prometheus.Desc AccessMethodsPageSplits *prometheus.Desc
AccessMethodsProbeScans *prometheus.Desc AccessMethodsProbeScans *prometheus.Desc
AccessMethodsRangeScans *prometheus.Desc AccessMethodsRangeScans *prometheus.Desc
AccessMethodsScanPointRevalidations *prometheus.Desc AccessMethodsScanPointRevalidations *prometheus.Desc
AccessMethodsSkippedGhostedRecords *prometheus.Desc AccessMethodsSkippedGhostedRecords *prometheus.Desc
AccessMethodsTableLockEscalations *prometheus.Desc AccessMethodsTableLockEscalations *prometheus.Desc
AccessMethodsUsedleafpagecookie *prometheus.Desc AccessMethodsUsedleafpagecookie *prometheus.Desc
AccessMethodsUsedtreepagecookie *prometheus.Desc AccessMethodsUsedtreepagecookie *prometheus.Desc
AccessMethodsWorkfilesCreated *prometheus.Desc AccessMethodsWorkfilesCreated *prometheus.Desc
AccessMethodsWorktablesCreated *prometheus.Desc AccessMethodsWorktablesCreated *prometheus.Desc
AccessMethodsWorktablesFromCacheRatio *prometheus.Desc AccessMethodsWorktablesFromCacheRatio *prometheus.Desc
AccessMethodsWorktablesFromCacheRatio_Base *prometheus.Desc
// Win32_PerfRawData_{instance}_SQLServerAvailabilityReplica // Win32_PerfRawData_{instance}_SQLServerAvailabilityReplica
AvailReplicaBytesReceivedfromReplica *prometheus.Desc AvailReplicaBytesReceivedfromReplica *prometheus.Desc
@@ -194,7 +195,8 @@ type MSSQLCollector struct {
// Win32_PerfRawData_{instance}_SQLServerBufferManager // Win32_PerfRawData_{instance}_SQLServerBufferManager
BufManBackgroundwriterpages *prometheus.Desc BufManBackgroundwriterpages *prometheus.Desc
BufManBuffercachehitratio *prometheus.Desc BufManBuffercachehits *prometheus.Desc
BufManBuffercachelookups *prometheus.Desc
BufManCheckpointpages *prometheus.Desc BufManCheckpointpages *prometheus.Desc
BufManDatabasepages *prometheus.Desc BufManDatabasepages *prometheus.Desc
BufManExtensionallocatedpages *prometheus.Desc BufManExtensionallocatedpages *prometheus.Desc
@@ -253,6 +255,7 @@ type MSSQLCollector struct {
DatabasesGroupCommitTime *prometheus.Desc DatabasesGroupCommitTime *prometheus.Desc
DatabasesLogBytesFlushed *prometheus.Desc DatabasesLogBytesFlushed *prometheus.Desc
DatabasesLogCacheHitRatio *prometheus.Desc DatabasesLogCacheHitRatio *prometheus.Desc
DatabasesLogCacheHitRatio_Base *prometheus.Desc
DatabasesLogCacheReads *prometheus.Desc DatabasesLogCacheReads *prometheus.Desc
DatabasesLogFilesSizeKB *prometheus.Desc DatabasesLogFilesSizeKB *prometheus.Desc
DatabasesLogFilesUsedSizeKB *prometheus.Desc DatabasesLogFilesUsedSizeKB *prometheus.Desc
@@ -317,13 +320,14 @@ type MSSQLCollector struct {
GenStatsUserConnections *prometheus.Desc GenStatsUserConnections *prometheus.Desc
// Win32_PerfRawData_{instance}_SQLServerLocks // Win32_PerfRawData_{instance}_SQLServerLocks
LocksAverageWaitTimems *prometheus.Desc LocksAverageWaitTimems *prometheus.Desc
LocksLockRequests *prometheus.Desc LocksAverageWaitTimems_Base *prometheus.Desc
LocksLockTimeouts *prometheus.Desc LocksLockRequests *prometheus.Desc
LocksLockTimeoutstimeout0 *prometheus.Desc LocksLockTimeouts *prometheus.Desc
LocksLockWaits *prometheus.Desc LocksLockTimeoutstimeout0 *prometheus.Desc
LocksLockWaitTimems *prometheus.Desc LocksLockWaits *prometheus.Desc
LocksNumberofDeadlocks *prometheus.Desc LocksLockWaitTimems *prometheus.Desc
LocksNumberofDeadlocks *prometheus.Desc
// Win32_PerfRawData_{instance}_SQLServerMemoryManager // Win32_PerfRawData_{instance}_SQLServerMemoryManager
MemMgrConnectionMemoryKB *prometheus.Desc MemMgrConnectionMemoryKB *prometheus.Desc
@@ -657,11 +661,17 @@ func NewMSSQLCollector() (Collector, error) {
nil, nil,
), ),
AccessMethodsWorktablesFromCacheRatio: prometheus.NewDesc( AccessMethodsWorktablesFromCacheRatio: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "accessmethods_worktables_from_cache_ratio"), prometheus.BuildFQName(Namespace, subsystem, "accessmethods_worktables_from_cache_hits"),
"(AccessMethods.WorktablesFromCacheRatio)", "(AccessMethods.WorktablesFromCacheRatio)",
[]string{"instance"}, []string{"instance"},
nil, nil,
), ),
AccessMethodsWorktablesFromCacheRatio_Base: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "accessmethods_worktables_from_cache_lookups"),
"(AccessMethods.WorktablesFromCacheRatio_Base)",
[]string{"instance"},
nil,
),
// Win32_PerfRawData_{instance}_SQLServerAvailabilityReplica // Win32_PerfRawData_{instance}_SQLServerAvailabilityReplica
AvailReplicaBytesReceivedfromReplica: prometheus.NewDesc( AvailReplicaBytesReceivedfromReplica: prometheus.NewDesc(
@@ -726,12 +736,18 @@ func NewMSSQLCollector() (Collector, error) {
[]string{"instance"}, []string{"instance"},
nil, nil,
), ),
BufManBuffercachehitratio: prometheus.NewDesc( BufManBuffercachehits: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "bufman_buffer_cache_hit_ratio"), prometheus.BuildFQName(Namespace, subsystem, "bufman_buffer_cache_hits"),
"(BufferManager.Buffercachehitratio)", "(BufferManager.Buffercachehitratio)",
[]string{"instance"}, []string{"instance"},
nil, nil,
), ),
BufManBuffercachelookups: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "bufman_buffer_cache_lookups"),
"(BufferManager.Buffercachehitratio_Base)",
[]string{"instance"},
nil,
),
BufManCheckpointpages: prometheus.NewDesc( BufManCheckpointpages: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "bufman_checkpoint_pages"), prometheus.BuildFQName(Namespace, subsystem, "bufman_checkpoint_pages"),
"(BufferManager.Checkpointpages)", "(BufferManager.Checkpointpages)",
@@ -1055,8 +1071,14 @@ func NewMSSQLCollector() (Collector, error) {
nil, nil,
), ),
DatabasesLogCacheHitRatio: prometheus.NewDesc( DatabasesLogCacheHitRatio: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "databases_log_cache_hit_ratio"), prometheus.BuildFQName(Namespace, subsystem, "databases_log_cache_hits"),
"(Databases.LogCacheHitRatio)", "(Databases.LogCacheHits)",
[]string{"instance", "database"},
nil,
),
DatabasesLogCacheHitRatio_Base: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "databases_log_cache_lookups"),
"(Databases.LogCacheLookups)",
[]string{"instance", "database"}, []string{"instance", "database"},
nil, nil,
), ),
@@ -1425,8 +1447,14 @@ func NewMSSQLCollector() (Collector, error) {
// Win32_PerfRawData_{instance}_SQLServerLocks // Win32_PerfRawData_{instance}_SQLServerLocks
LocksAverageWaitTimems: prometheus.NewDesc( LocksAverageWaitTimems: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "locks_average_wait_seconds"), prometheus.BuildFQName(Namespace, subsystem, "locks_wait_time_seconds"),
"(Locks.AverageWaitTimems)", "(Locks.LockWaitTime. Total time in ms which locks have been holding resources)",
[]string{"instance", "resource"},
nil,
),
LocksAverageWaitTimems_Base: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "locks_count"),
"(Locks.LockCount. count of how often requests have run into locks)",
[]string{"instance", "resource"}, []string{"instance", "resource"},
nil, nil,
), ),
@@ -1862,7 +1890,8 @@ type win32PerfRawDataSQLServerAccessMethods struct {
Usedtreepagecookie uint64 Usedtreepagecookie uint64
WorkfilesCreatedPersec uint64 WorkfilesCreatedPersec uint64
WorktablesCreatedPersec uint64 WorktablesCreatedPersec uint64
WorktablesFromCacheRatio uint64 WorktablesFromCacheHits uint64
WorktablesFromCacheLookups uint64
} }
func (c *MSSQLCollector) collectAccessMethods(ch chan<- prometheus.Metric, sqlInstance string) (*prometheus.Desc, error) { func (c *MSSQLCollector) collectAccessMethods(ch chan<- prometheus.Metric, sqlInstance string) (*prometheus.Desc, error) {
@@ -2177,7 +2206,14 @@ func (c *MSSQLCollector) collectAccessMethods(ch chan<- prometheus.Metric, sqlIn
ch <- prometheus.MustNewConstMetric( ch <- prometheus.MustNewConstMetric(
c.AccessMethodsWorktablesFromCacheRatio, c.AccessMethodsWorktablesFromCacheRatio,
prometheus.CounterValue, prometheus.CounterValue,
float64(v.WorktablesFromCacheRatio), float64(v.WorktablesFromCacheHits),
sqlInstance,
)
ch <- prometheus.MustNewConstMetric(
c.AccessMethodsWorktablesFromCacheRatio_Base,
prometheus.CounterValue,
float64(v.WorktablesFromCacheLookups),
sqlInstance, sqlInstance,
) )
return nil, nil return nil, nil
@@ -2281,7 +2317,8 @@ func (c *MSSQLCollector) collectAvailabilityReplica(ch chan<- prometheus.Metric,
// https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-buffer-manager-object // https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-buffer-manager-object
type win32PerfRawDataSQLServerBufferManager struct { type win32PerfRawDataSQLServerBufferManager struct {
BackgroundwriterpagesPersec uint64 BackgroundwriterpagesPersec uint64
Buffercachehitratio uint64 Buffercachehits uint64
Buffercachelookups uint64
CheckpointpagesPersec uint64 CheckpointpagesPersec uint64
Databasepages uint64 Databasepages uint64
Extensionallocatedpages uint64 Extensionallocatedpages uint64
@@ -2327,9 +2364,16 @@ func (c *MSSQLCollector) collectBufferManager(ch chan<- prometheus.Metric, sqlIn
) )
ch <- prometheus.MustNewConstMetric( ch <- prometheus.MustNewConstMetric(
c.BufManBuffercachehitratio, c.BufManBuffercachehits,
prometheus.GaugeValue, prometheus.GaugeValue,
float64(v.Buffercachehitratio), float64(v.Buffercachehits),
sqlInstance,
)
ch <- prometheus.MustNewConstMetric(
c.BufManBuffercachelookups,
prometheus.GaugeValue,
float64(v.Buffercachehits),
sqlInstance, sqlInstance,
) )
@@ -2703,7 +2747,8 @@ type win32PerfRawDataSQLServerDatabases struct {
DBCCLogicalScanBytesPersec uint64 DBCCLogicalScanBytesPersec uint64
GroupCommitTimePersec uint64 GroupCommitTimePersec uint64
LogBytesFlushedPersec uint64 LogBytesFlushedPersec uint64
LogCacheHitRatio uint64 LogCacheHits uint64
LogCacheLookups uint64
LogCacheReadsPersec uint64 LogCacheReadsPersec uint64
LogFilesSizeKB uint64 LogFilesSizeKB uint64
LogFilesUsedSizeKB uint64 LogFilesUsedSizeKB uint64
@@ -2821,7 +2866,14 @@ func (c *MSSQLCollector) collectDatabases(ch chan<- prometheus.Metric, sqlInstan
ch <- prometheus.MustNewConstMetric( ch <- prometheus.MustNewConstMetric(
c.DatabasesLogCacheHitRatio, c.DatabasesLogCacheHitRatio,
prometheus.GaugeValue, prometheus.GaugeValue,
float64(v.LogCacheHitRatio), float64(v.LogCacheHits),
sqlInstance, dbName,
)
ch <- prometheus.MustNewConstMetric(
c.DatabasesLogCacheHitRatio_Base,
prometheus.GaugeValue,
float64(v.LogCacheLookups),
sqlInstance, dbName, sqlInstance, dbName,
) )
@@ -3298,7 +3350,8 @@ func (c *MSSQLCollector) collectGeneralStatistics(ch chan<- prometheus.Metric, s
// - https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-locks-object // - https://docs.microsoft.com/en-us/sql/relational-databases/performance-monitor/sql-server-locks-object
type win32PerfRawDataSQLServerLocks struct { type win32PerfRawDataSQLServerLocks struct {
Name string Name string
AverageWaitTimems uint64 LockWaitTime uint64
LockCount uint64
LockRequestsPersec uint64 LockRequestsPersec uint64
LockTimeoutsPersec uint64 LockTimeoutsPersec uint64
LockTimeoutstimeout0Persec uint64 LockTimeoutstimeout0Persec uint64
@@ -3323,7 +3376,14 @@ func (c *MSSQLCollector) collectLocks(ch chan<- prometheus.Metric, sqlInstance s
ch <- prometheus.MustNewConstMetric( ch <- prometheus.MustNewConstMetric(
c.LocksAverageWaitTimems, c.LocksAverageWaitTimems,
prometheus.GaugeValue, prometheus.GaugeValue,
float64(v.AverageWaitTimems)/1000.0, float64(v.LockWaitTime)/1000.0,
sqlInstance, lockResourceName,
)
ch <- prometheus.MustNewConstMetric(
c.LocksAverageWaitTimems_Base,
prometheus.GaugeValue,
float64(v.LockCount)/1000.0,
sqlInstance, lockResourceName, sqlInstance, lockResourceName,
) )

View File

@@ -249,7 +249,27 @@ Name | Description | Type | Labels
_This collector does not yet have explained examples, we would appreciate your help adding them!_ _This collector does not yet have explained examples, we would appreciate your help adding them!_
## Useful queries ## Useful queries
_This collector does not yet have any useful queries added, we would appreciate your help adding them!_
### Buffer Cache Hit Ratio
When you read the counter in perfmon you will get the the percentage pages found in the buffer cache. This percentage is calculated internally based on the total number of cache hits divided by the total number of cache lookups over the last few thousand page accesses.
This collector retrieves the two internal values separately. In order to calculate the Buffer Cache Hit Ratio in PromQL.
```
wmi_mssql_bufman_buffer_cache_hits{instance="host:9182", exported_instance="MSSQLSERVER"} /
wmi_mssql_bufman_buffer_cache_lookups{instance="host:9182", exported_instance="MSSQLSERVER"}
```
This principal can be used for following metrics too:
- AccessMethodsWorktablesFromCacheHitRatio
- accessmethods_worktables_from_cache_hits
- accessmethods_worktables_from_cache_lookups
- LogCacheHitRatio
- databases_log_cache_hits
- databases_log_cache_lookups
- AverageLockWaitTime
- locks_wait_time_seconds
- locks_count
## Alerting examples ## Alerting examples
_This collector does not yet have alerting examples, we would appreciate your help adding them!_ _This collector does not yet have alerting examples, we would appreciate your help adding them!_