chore: Added/updated documentation for mscluster_cluster collector

Signed-off-by: Sam Storie <sam.storie@emerson.com>
This commit is contained in:
Sam Storie
2022-01-09 16:16:03 -06:00
committed by Ben Reedy
parent c7cbc48afc
commit 3d50cf4309
2 changed files with 176 additions and 72 deletions

View File

@@ -95,151 +95,151 @@ func newMSCluster_ClusterCollector() (Collector, error) {
return &MSCluster_ClusterCollector{ return &MSCluster_ClusterCollector{
AddEvictDelay: prometheus.NewDesc( AddEvictDelay: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "add_evict_delay"), prometheus.BuildFQName(Namespace, subsystem, "add_evict_delay"),
"(AddEvictDelay)", "Provides access to the cluster's AddEvictDelay property, which is the number a seconds that a new node is delayed after an eviction of another node.",
[]string{"name"}, []string{"name"},
nil, nil,
), ),
AdminAccessPoint: prometheus.NewDesc( AdminAccessPoint: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "admin_access_point"), prometheus.BuildFQName(Namespace, subsystem, "admin_access_point"),
"(AdminAccessPoint)", "The type of the cluster administrative access point.",
[]string{"name"}, []string{"name"},
nil, nil,
), ),
AutoAssignNodeSite: prometheus.NewDesc( AutoAssignNodeSite: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "auto_assign_node_site"), prometheus.BuildFQName(Namespace, subsystem, "auto_assign_node_site"),
"(AutoAssignNodeSite)", "Determines whether or not the cluster will attempt to automatically assign nodes to sites based on networks and Active Directory Site information.",
[]string{"name"}, []string{"name"},
nil, nil,
), ),
AutoBalancerLevel: prometheus.NewDesc( AutoBalancerLevel: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "auto_balancer_level"), prometheus.BuildFQName(Namespace, subsystem, "auto_balancer_level"),
"(AutoBalancerLevel)", "Determines the level of aggressiveness of AutoBalancer.",
[]string{"name"}, []string{"name"},
nil, nil,
), ),
AutoBalancerMode: prometheus.NewDesc( AutoBalancerMode: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "auto_balancer_mode"), prometheus.BuildFQName(Namespace, subsystem, "auto_balancer_mode"),
"(AutoBalancerMode)", "Determines whether or not the auto balancer is enabled.",
[]string{"name"}, []string{"name"},
nil, nil,
), ),
BackupInProgress: prometheus.NewDesc( BackupInProgress: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "backup_in_progress"), prometheus.BuildFQName(Namespace, subsystem, "backup_in_progress"),
"(BackupInProgress)", "Indicates whether a backup is in progress.",
[]string{"name"}, []string{"name"},
nil, nil,
), ),
BlockCacheSize: prometheus.NewDesc( BlockCacheSize: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "block_cache_size"), prometheus.BuildFQName(Namespace, subsystem, "block_cache_size"),
"(BlockCacheSize)", "CSV BlockCache Size in MB.",
[]string{"name"}, []string{"name"},
nil, nil,
), ),
ClusSvcHangTimeout: prometheus.NewDesc( ClusSvcHangTimeout: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "clus_svc_hang_timeout"), prometheus.BuildFQName(Namespace, subsystem, "clus_svc_hang_timeout"),
"(ClusSvcHangTimeout)", "Controls how long the cluster network driver waits between Failover Cluster Service heartbeats before it determines that the Failover Cluster Service has stopped responding.",
[]string{"name"}, []string{"name"},
nil, nil,
), ),
ClusSvcRegroupOpeningTimeout: prometheus.NewDesc( ClusSvcRegroupOpeningTimeout: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "clus_svc_regroup_opening_timeout"), prometheus.BuildFQName(Namespace, subsystem, "clus_svc_regroup_opening_timeout"),
"(ClusSvcRegroupOpeningTimeout)", "Controls how long a node will wait on other nodes in the opening stage before deciding that they failed.",
[]string{"name"}, []string{"name"},
nil, nil,
), ),
ClusSvcRegroupPruningTimeout: prometheus.NewDesc( ClusSvcRegroupPruningTimeout: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "clus_svc_regroup_pruning_timeout"), prometheus.BuildFQName(Namespace, subsystem, "clus_svc_regroup_pruning_timeout"),
"(ClusSvcRegroupPruningTimeout)", "Controls how long the membership leader will wait to reach full connectivity between cluster nodes.",
[]string{"name"}, []string{"name"},
nil, nil,
), ),
ClusSvcRegroupStageTimeout: prometheus.NewDesc( ClusSvcRegroupStageTimeout: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "clus_svc_regroup_stage_timeout"), prometheus.BuildFQName(Namespace, subsystem, "clus_svc_regroup_stage_timeout"),
"(ClusSvcRegroupStageTimeout)", "Controls how long a node will wait on other nodes in a membership stage before deciding that they failed.",
[]string{"name"}, []string{"name"},
nil, nil,
), ),
ClusSvcRegroupTickInMilliseconds: prometheus.NewDesc( ClusSvcRegroupTickInMilliseconds: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "clus_svc_regroup_tick_in_milliseconds"), prometheus.BuildFQName(Namespace, subsystem, "clus_svc_regroup_tick_in_milliseconds"),
"(ClusSvcRegroupTickInMilliseconds)", "Controls how frequently the membership algorithm is sending periodic membership messages.",
[]string{"name"}, []string{"name"},
nil, nil,
), ),
ClusterEnforcedAntiAffinity: prometheus.NewDesc( ClusterEnforcedAntiAffinity: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "cluster_enforced_anti_affinity"), prometheus.BuildFQName(Namespace, subsystem, "cluster_enforced_anti_affinity"),
"(ClusterEnforcedAntiAffinity)", "Enables or disables hard enforcement of group anti-affinity classes.",
[]string{"name"}, []string{"name"},
nil, nil,
), ),
ClusterFunctionalLevel: prometheus.NewDesc( ClusterFunctionalLevel: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "cluster_functional_level"), prometheus.BuildFQName(Namespace, subsystem, "cluster_functional_level"),
"(ClusterFunctionalLevel)", "The functional level the cluster is currently running in.",
[]string{"name"}, []string{"name"},
nil, nil,
), ),
ClusterGroupWaitDelay: prometheus.NewDesc( ClusterGroupWaitDelay: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "cluster_group_wait_delay"), prometheus.BuildFQName(Namespace, subsystem, "cluster_group_wait_delay"),
"(ClusterGroupWaitDelay)", "Maximum time in seconds that a group waits for its preferred node to come online during cluster startup before coming online on a different node.",
[]string{"name"}, []string{"name"},
nil, nil,
), ),
ClusterLogLevel: prometheus.NewDesc( ClusterLogLevel: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "cluster_log_level"), prometheus.BuildFQName(Namespace, subsystem, "cluster_log_level"),
"(ClusterLogLevel)", "Controls the level of cluster logging.",
[]string{"name"}, []string{"name"},
nil, nil,
), ),
ClusterLogSize: prometheus.NewDesc( ClusterLogSize: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "cluster_log_size"), prometheus.BuildFQName(Namespace, subsystem, "cluster_log_size"),
"(ClusterLogSize)", "Controls the maximum size of the cluster log files on each of the nodes.",
[]string{"name"}, []string{"name"},
nil, nil,
), ),
ClusterUpgradeVersion: prometheus.NewDesc( ClusterUpgradeVersion: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "cluster_upgrade_version"), prometheus.BuildFQName(Namespace, subsystem, "cluster_upgrade_version"),
"(ClusterUpgradeVersion)", "Specifies the upgrade version the cluster is currently running in.",
[]string{"name"}, []string{"name"},
nil, nil,
), ),
CrossSiteDelay: prometheus.NewDesc( CrossSiteDelay: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "cross_site_delay"), prometheus.BuildFQName(Namespace, subsystem, "cross_site_delay"),
"(CrossSiteDelay)", "Controls how long the cluster network driver waits in milliseconds between sending Cluster Service heartbeats across sites.",
[]string{"name"}, []string{"name"},
nil, nil,
), ),
CrossSiteThreshold: prometheus.NewDesc( CrossSiteThreshold: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "cross_site_threshold"), prometheus.BuildFQName(Namespace, subsystem, "cross_site_threshold"),
"(CrossSiteThreshold)", "Controls how many Cluster Service heartbeats can be missed across sites before it determines that Cluster Service has stopped responding.",
[]string{"name"}, []string{"name"},
nil, nil,
), ),
CrossSubnetDelay: prometheus.NewDesc( CrossSubnetDelay: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "cross_subnet_delay"), prometheus.BuildFQName(Namespace, subsystem, "cross_subnet_delay"),
"(CrossSubnetDelay)", "Controls how long the cluster network driver waits in milliseconds between sending Cluster Service heartbeats across subnets.",
[]string{"name"}, []string{"name"},
nil, nil,
), ),
CrossSubnetThreshold: prometheus.NewDesc( CrossSubnetThreshold: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "cross_subnet_threshold"), prometheus.BuildFQName(Namespace, subsystem, "cross_subnet_threshold"),
"(CrossSubnetThreshold)", "Controls how many Cluster Service heartbeats can be missed across subnets before it determines that Cluster Service has stopped responding.",
[]string{"name"}, []string{"name"},
nil, nil,
), ),
CsvBalancer: prometheus.NewDesc( CsvBalancer: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "csv_balancer"), prometheus.BuildFQName(Namespace, subsystem, "csv_balancer"),
"(CsvBalancer)", "Whether automatic balancing for CSV is enabled.",
[]string{"name"}, []string{"name"},
nil, nil,
), ),
DatabaseReadWriteMode: prometheus.NewDesc( DatabaseReadWriteMode: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "database_read_write_mode"), prometheus.BuildFQName(Namespace, subsystem, "database_read_write_mode"),
"(DatabaseReadWriteMode)", "Sets the database read and write mode.",
[]string{"name"}, []string{"name"},
nil, nil,
), ),
DefaultNetworkRole: prometheus.NewDesc( DefaultNetworkRole: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "default_network_role"), prometheus.BuildFQName(Namespace, subsystem, "default_network_role"),
"(DefaultNetworkRole)", "Provides access to the cluster's DefaultNetworkRole property.",
[]string{"name"}, []string{"name"},
nil, nil,
), ),
@@ -269,247 +269,247 @@ func newMSCluster_ClusterCollector() (Collector, error) {
), ),
DrainOnShutdown: prometheus.NewDesc( DrainOnShutdown: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "drain_on_shutdown"), prometheus.BuildFQName(Namespace, subsystem, "drain_on_shutdown"),
"(DrainOnShutdown)", "Whether to drain the node when cluster service is being stopped.",
[]string{"name"}, []string{"name"},
nil, nil,
), ),
DynamicQuorumEnabled: prometheus.NewDesc( DynamicQuorumEnabled: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "dynamic_quorum_enabled"), prometheus.BuildFQName(Namespace, subsystem, "dynamic_quorum_enabled"),
"(DynamicQuorumEnabled)", "Allows cluster service to adjust node weights as needed to increase availability.",
[]string{"name"}, []string{"name"},
nil, nil,
), ),
EnableSharedVolumes: prometheus.NewDesc( EnableSharedVolumes: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "enable_shared_volumes"), prometheus.BuildFQName(Namespace, subsystem, "enable_shared_volumes"),
"(EnableSharedVolumes)", "Enables or disables cluster shared volumes on this cluster.",
[]string{"name"}, []string{"name"},
nil, nil,
), ),
FixQuorum: prometheus.NewDesc( FixQuorum: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "fix_quorum"), prometheus.BuildFQName(Namespace, subsystem, "fix_quorum"),
"(FixQuorum)", "Provides access to the cluster's FixQuorum property, which specifies if the cluster is in a fix quorum state.",
[]string{"name"}, []string{"name"},
nil, nil,
), ),
GracePeriodEnabled: prometheus.NewDesc( GracePeriodEnabled: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "grace_period_enabled"), prometheus.BuildFQName(Namespace, subsystem, "grace_period_enabled"),
"(GracePeriodEnabled)", "Whether the node grace period feature of this cluster is enabled.",
[]string{"name"}, []string{"name"},
nil, nil,
), ),
GracePeriodTimeout: prometheus.NewDesc( GracePeriodTimeout: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "grace_period_timeout"), prometheus.BuildFQName(Namespace, subsystem, "grace_period_timeout"),
"(GracePeriodTimeout)", "The grace period timeout in milliseconds.",
[]string{"name"}, []string{"name"},
nil, nil,
), ),
GroupDependencyTimeout: prometheus.NewDesc( GroupDependencyTimeout: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "group_dependency_timeout"), prometheus.BuildFQName(Namespace, subsystem, "group_dependency_timeout"),
"(GroupDependencyTimeout)", "The timeout after which a group will be brought online despite unsatisfied dependencies",
[]string{"name"}, []string{"name"},
nil, nil,
), ),
HangRecoveryAction: prometheus.NewDesc( HangRecoveryAction: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "hang_recovery_action"), prometheus.BuildFQName(Namespace, subsystem, "hang_recovery_action"),
"(HangRecoveryAction)", "Controls the action to take if the user-mode processes have stopped responding.",
[]string{"name"}, []string{"name"},
nil, nil,
), ),
IgnorePersistentStateOnStartup: prometheus.NewDesc( IgnorePersistentStateOnStartup: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "ignore_persistent_state_on_startup"), prometheus.BuildFQName(Namespace, subsystem, "ignore_persistent_state_on_startup"),
"(IgnorePersistentStateOnStartup)", "Provides access to the cluster's IgnorePersistentStateOnStartup property, which specifies whether the cluster will bring online groups that were online when the cluster was shut down.",
[]string{"name"}, []string{"name"},
nil, nil,
), ),
LogResourceControls: prometheus.NewDesc( LogResourceControls: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "log_resource_controls"), prometheus.BuildFQName(Namespace, subsystem, "log_resource_controls"),
"(LogResourceControls)", "Controls the logging of resource controls.",
[]string{"name"}, []string{"name"},
nil, nil,
), ),
LowerQuorumPriorityNodeId: prometheus.NewDesc( LowerQuorumPriorityNodeId: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "lower_quorum_priority_node_id"), prometheus.BuildFQName(Namespace, subsystem, "lower_quorum_priority_node_id"),
"(LowerQuorumPriorityNodeId)", "Specifies the Node ID that has a lower priority when voting for quorum is performed. If the quorum vote is split 50/50%, the specified node's vote would be ignored to break the tie. If this is not set then the cluster will pick a node at random to break the tie.",
[]string{"name"}, []string{"name"},
nil, nil,
), ),
MaxNumberOfNodes: prometheus.NewDesc( MaxNumberOfNodes: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "max_number_of_nodes"), prometheus.BuildFQName(Namespace, subsystem, "max_number_of_nodes"),
"(MaxNumberOfNodes)", "Indicates the maximum number of nodes that may participate in the Cluster.",
[]string{"name"}, []string{"name"},
nil, nil,
), ),
MessageBufferLength: prometheus.NewDesc( MessageBufferLength: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "message_buffer_length"), prometheus.BuildFQName(Namespace, subsystem, "message_buffer_length"),
"(MessageBufferLength)", "The maximum unacknowledged message count for GEM.",
[]string{"name"}, []string{"name"},
nil, nil,
), ),
MinimumNeverPreemptPriority: prometheus.NewDesc( MinimumNeverPreemptPriority: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "minimum_never_preempt_priority"), prometheus.BuildFQName(Namespace, subsystem, "minimum_never_preempt_priority"),
"(MinimumNeverPreemptPriority)", "Groups with this priority or higher cannot be preempted.",
[]string{"name"}, []string{"name"},
nil, nil,
), ),
MinimumPreemptorPriority: prometheus.NewDesc( MinimumPreemptorPriority: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "minimum_preemptor_priority"), prometheus.BuildFQName(Namespace, subsystem, "minimum_preemptor_priority"),
"(MinimumPreemptorPriority)", "Minimum priority a cluster group must have to be able to preempt another group.",
[]string{"name"}, []string{"name"},
nil, nil,
), ),
NetftIPSecEnabled: prometheus.NewDesc( NetftIPSecEnabled: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "netft_ip_sec_enabled"), prometheus.BuildFQName(Namespace, subsystem, "netft_ip_sec_enabled"),
"(NetftIPSecEnabled)", "Whether IPSec is enabled for cluster internal traffic.",
[]string{"name"}, []string{"name"},
nil, nil,
), ),
PlacementOptions: prometheus.NewDesc( PlacementOptions: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "placement_options"), prometheus.BuildFQName(Namespace, subsystem, "placement_options"),
"(PlacementOptions)", "Various option flags to modify default placement behavior.",
[]string{"name"}, []string{"name"},
nil, nil,
), ),
PlumbAllCrossSubnetRoutes: prometheus.NewDesc( PlumbAllCrossSubnetRoutes: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "plumb_all_cross_subnet_routes"), prometheus.BuildFQName(Namespace, subsystem, "plumb_all_cross_subnet_routes"),
"(PlumbAllCrossSubnetRoutes)", "Plumbs all possible cross subnet routes to all nodes.",
[]string{"name"}, []string{"name"},
nil, nil,
), ),
PreventQuorum: prometheus.NewDesc( PreventQuorum: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "prevent_quorum"), prometheus.BuildFQName(Namespace, subsystem, "prevent_quorum"),
"(PreventQuorum)", "Whether the cluster will ignore group persistent state on startup.",
[]string{"name"}, []string{"name"},
nil, nil,
), ),
QuarantineDuration: prometheus.NewDesc( QuarantineDuration: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "quarantine_duration"), prometheus.BuildFQName(Namespace, subsystem, "quarantine_duration"),
"(QuarantineDuration)", "The quarantine period timeout in milliseconds.",
[]string{"name"}, []string{"name"},
nil, nil,
), ),
QuarantineThreshold: prometheus.NewDesc( QuarantineThreshold: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "quarantine_threshold"), prometheus.BuildFQName(Namespace, subsystem, "quarantine_threshold"),
"(QuarantineThreshold)", "Number of node failures before it will be quarantined.",
[]string{"name"}, []string{"name"},
nil, nil,
), ),
QuorumArbitrationTimeMax: prometheus.NewDesc( QuorumArbitrationTimeMax: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "quorum_arbitration_time_max"), prometheus.BuildFQName(Namespace, subsystem, "quorum_arbitration_time_max"),
"(QuorumArbitrationTimeMax)", "Controls the maximum time necessary to decide the Quorum owner node.",
[]string{"name"}, []string{"name"},
nil, nil,
), ),
QuorumArbitrationTimeMin: prometheus.NewDesc( QuorumArbitrationTimeMin: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "quorum_arbitration_time_min"), prometheus.BuildFQName(Namespace, subsystem, "quorum_arbitration_time_min"),
"(QuorumArbitrationTimeMin)", "Controls the minimum time necessary to decide the Quorum owner node.",
[]string{"name"}, []string{"name"},
nil, nil,
), ),
QuorumLogFileSize: prometheus.NewDesc( QuorumLogFileSize: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "quorum_log_file_size"), prometheus.BuildFQName(Namespace, subsystem, "quorum_log_file_size"),
"(QuorumLogFileSize)", "This property is obsolete.",
[]string{"name"}, []string{"name"},
nil, nil,
), ),
QuorumTypeValue: prometheus.NewDesc( QuorumTypeValue: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "quorum_type_value"), prometheus.BuildFQName(Namespace, subsystem, "quorum_type_value"),
"(QuorumTypeValue)", "Get the current quorum type value. -1: Unknown; 1: Node; 2: FileShareWitness; 3: Storage; 4: None",
[]string{"name"}, []string{"name"},
nil, nil,
), ),
RequestReplyTimeout: prometheus.NewDesc( RequestReplyTimeout: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "request_reply_timeout"), prometheus.BuildFQName(Namespace, subsystem, "request_reply_timeout"),
"(RequestReplyTimeout)", "Controls the request reply time-out period.",
[]string{"name"}, []string{"name"},
nil, nil,
), ),
ResiliencyDefaultPeriod: prometheus.NewDesc( ResiliencyDefaultPeriod: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "resiliency_default_period"), prometheus.BuildFQName(Namespace, subsystem, "resiliency_default_period"),
"(ResiliencyDefaultPeriod)", "The default resiliency period, in seconds, for the cluster.",
[]string{"name"}, []string{"name"},
nil, nil,
), ),
ResiliencyLevel: prometheus.NewDesc( ResiliencyLevel: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "resiliency_level"), prometheus.BuildFQName(Namespace, subsystem, "resiliency_level"),
"(ResiliencyLevel)", "The resiliency level for the cluster.",
[]string{"name"}, []string{"name"},
nil, nil,
), ),
ResourceDllDeadlockPeriod: prometheus.NewDesc( ResourceDllDeadlockPeriod: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "resource_dll_deadlock_period"), prometheus.BuildFQName(Namespace, subsystem, "resource_dll_deadlock_period"),
"(ResourceDllDeadlockPeriod)", "This property is obsolete.",
[]string{"name"}, []string{"name"},
nil, nil,
), ),
RootMemoryReserved: prometheus.NewDesc( RootMemoryReserved: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "root_memory_reserved"), prometheus.BuildFQName(Namespace, subsystem, "root_memory_reserved"),
"(RootMemoryReserved)", "Controls the amount of memory reserved for the parent partition on all cluster nodes.",
[]string{"name"}, []string{"name"},
nil, nil,
), ),
RouteHistoryLength: prometheus.NewDesc( RouteHistoryLength: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "route_history_length"), prometheus.BuildFQName(Namespace, subsystem, "route_history_length"),
"(RouteHistoryLength)", "The history length for routes to help finding network issues.",
[]string{"name"}, []string{"name"},
nil, nil,
), ),
S2DBusTypes: prometheus.NewDesc( S2DBusTypes: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "s2d_bus_types"), prometheus.BuildFQName(Namespace, subsystem, "s2d_bus_types"),
"(S2DBusTypes)", "Bus types for storage spaces direct.",
[]string{"name"}, []string{"name"},
nil, nil,
), ),
S2DCacheDesiredState: prometheus.NewDesc( S2DCacheDesiredState: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "s2d_cache_desired_state"), prometheus.BuildFQName(Namespace, subsystem, "s2d_cache_desired_state"),
"(S2DCacheDesiredState)", "Desired state of the storage spaces direct cache.",
[]string{"name"}, []string{"name"},
nil, nil,
), ),
S2DCacheFlashReservePercent: prometheus.NewDesc( S2DCacheFlashReservePercent: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "s2d_cache_flash_reserve_percent"), prometheus.BuildFQName(Namespace, subsystem, "s2d_cache_flash_reserve_percent"),
"(S2DCacheFlashReservePercent)", "Percentage of allocated flash space to utilize when caching.",
[]string{"name"}, []string{"name"},
nil, nil,
), ),
S2DCachePageSizeKBytes: prometheus.NewDesc( S2DCachePageSizeKBytes: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "s2d_cache_page_size_k_bytes"), prometheus.BuildFQName(Namespace, subsystem, "s2d_cache_page_size_k_bytes"),
"(S2DCachePageSizeKBytes)", "Page size in KB used by S2D cache.",
[]string{"name"}, []string{"name"},
nil, nil,
), ),
S2DEnabled: prometheus.NewDesc( S2DEnabled: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "s2d_enabled"), prometheus.BuildFQName(Namespace, subsystem, "s2d_enabled"),
"(S2DEnabled)", "Whether direct attached storage (DAS) is enabled.",
[]string{"name"}, []string{"name"},
nil, nil,
), ),
S2DIOLatencyThreshold: prometheus.NewDesc( S2DIOLatencyThreshold: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "s2dio_latency_threshold"), prometheus.BuildFQName(Namespace, subsystem, "s2dio_latency_threshold"),
"(S2DIOLatencyThreshold)", "The I/O latency threshold for storage spaces direct.",
[]string{"name"}, []string{"name"},
nil, nil,
), ),
S2DOptimizations: prometheus.NewDesc( S2DOptimizations: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "s2d_optimizations"), prometheus.BuildFQName(Namespace, subsystem, "s2d_optimizations"),
"(S2DOptimizations)", "Optimization flags for storage spaces direct.",
[]string{"name"}, []string{"name"},
nil, nil,
), ),
SameSubnetDelay: prometheus.NewDesc( SameSubnetDelay: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "same_subnet_delay"), prometheus.BuildFQName(Namespace, subsystem, "same_subnet_delay"),
"(SameSubnetDelay)", "Controls how long the cluster network driver waits in milliseconds between sending Cluster Service heartbeats on the same subnet.",
[]string{"name"}, []string{"name"},
nil, nil,
), ),
SameSubnetThreshold: prometheus.NewDesc( SameSubnetThreshold: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "same_subnet_threshold"), prometheus.BuildFQName(Namespace, subsystem, "same_subnet_threshold"),
"(SameSubnetThreshold)", "Controls how many Cluster Service heartbeats can be missed on the same subnet before it determines that Cluster Service has stopped responding.",
[]string{"name"}, []string{"name"},
nil, nil,
), ),
SecurityLevel: prometheus.NewDesc( SecurityLevel: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "security_level"), prometheus.BuildFQName(Namespace, subsystem, "security_level"),
"(SecurityLevel)", "Controls the level of security that should apply to intracluster messages. 0: Clear Text; 1: Sign; 2: Encrypt ",
[]string{"name"}, []string{"name"},
nil, nil,
), ),
@@ -521,37 +521,37 @@ func newMSCluster_ClusterCollector() (Collector, error) {
), ),
SharedVolumeVssWriterOperationTimeout: prometheus.NewDesc( SharedVolumeVssWriterOperationTimeout: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "shared_volume_vss_writer_operation_timeout"), prometheus.BuildFQName(Namespace, subsystem, "shared_volume_vss_writer_operation_timeout"),
"(SharedVolumeVssWriterOperationTimeout)", "CSV VSS Writer operation timeout in seconds.",
[]string{"name"}, []string{"name"},
nil, nil,
), ),
ShutdownTimeoutInMinutes: prometheus.NewDesc( ShutdownTimeoutInMinutes: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "shutdown_timeout_in_minutes"), prometheus.BuildFQName(Namespace, subsystem, "shutdown_timeout_in_minutes"),
"(ShutdownTimeoutInMinutes)", "The maximum time in minutes allowed for cluster resources to come offline during cluster service shutdown.",
[]string{"name"}, []string{"name"},
nil, nil,
), ),
UseClientAccessNetworksForSharedVolumes: prometheus.NewDesc( UseClientAccessNetworksForSharedVolumes: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "use_client_access_networks_for_shared_volumes"), prometheus.BuildFQName(Namespace, subsystem, "use_client_access_networks_for_shared_volumes"),
"(UseClientAccessNetworksForSharedVolumes)", "Whether the use of client access networks for cluster shared volumes feature of this cluster is enabled. 0: Disabled; 1: Enabled; 2: Auto",
[]string{"name"}, []string{"name"},
nil, nil,
), ),
WitnessDatabaseWriteTimeout: prometheus.NewDesc( WitnessDatabaseWriteTimeout: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "witness_database_write_timeout"), prometheus.BuildFQName(Namespace, subsystem, "witness_database_write_timeout"),
"(WitnessDatabaseWriteTimeout)", "Controls the maximum time in seconds that a cluster database write to a witness can take before the write is abandoned.",
[]string{"name"}, []string{"name"},
nil, nil,
), ),
WitnessDynamicWeight: prometheus.NewDesc( WitnessDynamicWeight: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "witness_dynamic_weight"), prometheus.BuildFQName(Namespace, subsystem, "witness_dynamic_weight"),
"(WitnessDynamicWeight)", "The weight of the configured witness.",
[]string{"name"}, []string{"name"},
nil, nil,
), ),
WitnessRestartInterval: prometheus.NewDesc( WitnessRestartInterval: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "witness_restart_interval"), prometheus.BuildFQName(Namespace, subsystem, "witness_restart_interval"),
"(WitnessRestartInterval)", "Controls the witness restart interval.",
[]string{"name"}, []string{"name"},
nil, nil,
), ),

View File

@@ -0,0 +1,104 @@
# mscluster_cluster collector
The The MSCluster_Cluster class is a dynamic WMI class that represents a cluster.
|||
-|-
Metric name prefix | `mscluster_cluster`
Classes | `MSCluster_Cluster`
Enabled by default? | No
## Flags
None
## Metrics
Name | Description | Type | Labels
-----|-------------|------|-------
`AddEvictDelay` | Provides access to the cluster's AddEvictDelay property, which is the number a seconds that a new node is delayed after an eviction of another node. | guage | None
`AdminAccessPoint` | The type of the cluster administrative access point. | guage | None
`AutoAssignNodeSite` | Determines whether or not the cluster will attempt to automatically assign nodes to sites based on networks and Active Directory Site information. | guage | None
`AutoBalancerLevel` | Determines the level of aggressiveness of AutoBalancer. | guage | None
`AutoBalancerMode` | Determines whether or not the auto balancer is enabled. | guage | None
`BackupInProgress` | Indicates whether a backup is in progress. | guage | None
`BlockCacheSize` | CSV BlockCache Size in MB. | guage | None
`ClusSvcHangTimeout` | Controls how long the cluster network driver waits between Failover Cluster Service heartbeats before it determines that the Failover Cluster Service has stopped responding. | guage | None
`ClusSvcRegroupOpeningTimeout` | Controls how long a node will wait on other nodes in the opening stage before deciding that they failed. | guage | None
`ClusSvcRegroupPruningTimeout` | Controls how long the membership leader will wait to reach full connectivity between cluster nodes. | guage | None
`ClusSvcRegroupStageTimeout` | Controls how long a node will wait on other nodes in a membership stage before deciding that they failed. | guage | None
`ClusSvcRegroupTickInMilliseconds` | Controls how frequently the membership algorithm is sending periodic membership messages. | guage | None
`ClusterEnforcedAntiAffinity` | Enables or disables hard enforcement of group anti-affinity classes. | guage | None
`ClusterFunctionalLevel` | The functional level the cluster is currently running in. | guage | None
`ClusterGroupWaitDelay` | Maximum time in seconds that a group waits for its preferred node to come online during cluster startup before coming online on a different node. | guage | None
`ClusterLogLevel` | Controls the level of cluster logging. | guage | None
`ClusterLogSize` | Controls the maximum size of the cluster log files on each of the nodes. | guage | None
`ClusterUpgradeVersion` | Specifies the upgrade version the cluster is currently running in. | guage | None
`CrossSiteDelay` | Controls how long the cluster network driver waits in milliseconds between sending Cluster Service heartbeats across sites. | guage | None
`CrossSiteThreshold` | Controls how many Cluster Service heartbeats can be missed across sites before it determines that Cluster Service has stopped responding. | guage | None
`CrossSubnetDelay` | Controls how long the cluster network driver waits in milliseconds between sending Cluster Service heartbeats across subnets. | guage | None
`CrossSubnetThreshold` | Controls how many Cluster Service heartbeats can be missed across subnets before it determines that Cluster Service has stopped responding. | guage | None
`CsvBalancer` | Whether automatic balancing for CSV is enabled. | guage | None
`DatabaseReadWriteMode` | Sets the database read and write mode. | guage | None
`DefaultNetworkRole` | Provides access to the cluster's DefaultNetworkRole property. | guage | None
`DetectedCloudPlatform` | | guage | None
`DetectManagedEvents` | | guage | None
`DetectManagedEventsThreshold` | | guage | None
`DisableGroupPreferredOwnerRandomization` | | guage | None
`DrainOnShutdown` | Whether to drain the node when cluster service is being stopped. | guage | None
`DynamicQuorumEnabled` | Allows cluster service to adjust node weights as needed to increase availability. | guage | None
`EnableSharedVolumes` | Enables or disables cluster shared volumes on this cluster. | guage | None
`FixQuorum` | Provides access to the cluster's FixQuorum property, which specifies if the cluster is in a fix quorum state. | guage | None
`GracePeriodEnabled` | Whether the node grace period feature of this cluster is enabled. | guage | None
`GracePeriodTimeout` | The grace period timeout in milliseconds. | guage | None
`GroupDependencyTimeout` | The timeout after which a group will be brought online despite unsatisfied dependencies | guage | None
`HangRecoveryAction` | Controls the action to take if the user-mode processes have stopped responding. | guage | None
`IgnorePersistentStateOnStartup` | Provides access to the cluster's IgnorePersistentStateOnStartup property, which specifies whether the cluster will bring online groups that were online when the cluster was shut down. | guage | None
`LogResourceControls` | Controls the logging of resource controls. | guage | None
`LowerQuorumPriorityNodeId` | Specifies the Node ID that has a lower priority when voting for quorum is performed. If the quorum vote is split 50/50%, the specified node's vote would be ignored to break the tie. If this is not set then the cluster will pick a node at random to break the tie. | guage | None
`MaxNumberOfNodes` | Indicates the maximum number of nodes that may participate in the Cluster. | guage | None
`MessageBufferLength` | The maximum unacknowledged message count for GEM. | guage | None
`MinimumNeverPreemptPriority` | Groups with this priority or higher cannot be preempted. | guage | None
`MinimumPreemptorPriority` | Minimum priority a cluster group must have to be able to preempt another group. | guage | None
`NetftIPSecEnabled` | Whether IPSec is enabled for cluster internal traffic. | guage | None
`PlacementOptions` | Various option flags to modify default placement behavior. | guage | None
`PlumbAllCrossSubnetRoutes` | Plumbs all possible cross subnet routes to all nodes. | guage | None
`PreventQuorum` | Whether the cluster will ignore group persistent state on startup. | guage | None
`QuarantineDuration` | The quarantine period timeout in milliseconds. | guage | None
`QuarantineThreshold` | Number of node failures before it will be quarantined. | guage | None
`QuorumArbitrationTimeMax` | Controls the maximum time necessary to decide the Quorum owner node. | guage | None
`QuorumArbitrationTimeMin` | Controls the minimum time necessary to decide the Quorum owner node. | guage | None
`QuorumLogFileSize` | This property is obsolete. | guage | None
`QuorumTypeValue` | Get the current quorum type value. -1: Unknown; 1: Node; 2: FileShareWitness; 3: Storage; 4: None | guage | None
`RequestReplyTimeout` | Controls the request reply time-out period. | guage | None
`ResiliencyDefaultPeriod` | The default resiliency period, in seconds, for the cluster. | guage | None
`ResiliencyLevel` | The resiliency level for the cluster. | guage | None
`ResourceDllDeadlockPeriod` | This property is obsolete. | guage | None
`RootMemoryReserved` | Controls the amount of memory reserved for the parent partition on all cluster nodes. | guage | None
`RouteHistoryLength` | The history length for routes to help finding network issues. | guage | None
`S2DBusTypes` | Bus types for storage spaces direct. | guage | None
`S2DCacheDesiredState` | Desired state of the storage spaces direct cache. | guage | None
`S2DCacheFlashReservePercent` | Percentage of allocated flash space to utilize when caching. | guage | None
`S2DCachePageSizeKBytes` | Page size in KB used by S2D cache. | guage | None
`S2DEnabled` | Whether direct attached storage (DAS) is enabled. | guage | None
`S2DIOLatencyThreshold` | The I/O latency threshold for storage spaces direct. | guage | None
`S2DOptimizations` | Optimization flags for storage spaces direct. | guage | None
`SameSubnetDelay` | Controls how long the cluster network driver waits in milliseconds between sending Cluster Service heartbeats on the same subnet. | guage | None
`SameSubnetThreshold` | Controls how many Cluster Service heartbeats can be missed on the same subnet before it determines that Cluster Service has stopped responding. | guage | None
`SecurityLevel` | Controls the level of security that should apply to intracluster messages. 0: Clear Text; 1: Sign; 2: Encrypt | guage | None
`SecurityLevelForStorage` | | guage | None
`SharedVolumeVssWriterOperationTimeout` | CSV VSS Writer operation timeout in seconds. | guage | None
`ShutdownTimeoutInMinutes` | The maximum time in minutes allowed for cluster resources to come offline during cluster service shutdown. | guage | None
`UseClientAccessNetworksForSharedVolumes` | Whether the use of client access networks for cluster shared volumes feature of this cluster is enabled. 0: Disabled; 1: Enabled; 2: Auto | guage | None
`WitnessDatabaseWriteTimeout` | Controls the maximum time in seconds that a cluster database write to a witness can take before the write is abandoned. | guage | None
`WitnessDynamicWeight` | The weight of the configured witness. | guage | None
`WitnessRestartInterval` | Controls the witness restart interval. | guage | None
### Example metric
_This collector does not yet have explained examples, we would appreciate your help adding them!_
## Useful queries
_This collector does not yet have any useful queries added, we would appreciate your help adding them!_
## Alerting examples
_This collector does not yet have alerting examples, we would appreciate your help adding them!_