diff --git a/docs/collector.mscluster_resource.md b/docs/collector.mscluster_resource.md index 11754beb..c7ad995a 100644 --- a/docs/collector.mscluster_resource.md +++ b/docs/collector.mscluster_resource.md @@ -23,6 +23,7 @@ Name | Description | Type | Labels `IsAlivePollInterval` | Provides access to the resource's IsAlivePollInterval property, which is the recommended interval in milliseconds at which the Cluster Service should poll the resource to determine whether it is operational. If the property is set to 0xFFFFFFFF, the Cluster Service uses the IsAlivePollInterval property for the resource type associated with the resource. | gauge | `type`, `owner_group`, `name` `LooksAlivePollInterval` | Provides access to the resource's LooksAlivePollInterval property, which is the recommended interval in milliseconds at which the Cluster Service should poll the resource to determine whether it appears operational. If the property is set to 0xFFFFFFFF, the Cluster Service uses the LooksAlivePollInterval property for the resource type associated with the resource. | gauge | `type`, `owner_group`, `name` `MonitorProcessId` | Provides the process ID of the resource host service that is currently hosting the resource. | gauge | `type`, `owner_group`, `name` +`OwnerNode` | The node hosting the resource. | gauge | `type`, `owner_group`, `node_name`, `name` `PendingTimeout` | Provides access to the resource's PendingTimeout property. If a resource cannot be brought online or taken offline in the number of milliseconds specified by the PendingTimeout property, the resource is forcibly terminated. | gauge | `type`, `owner_group`, `name` `ResourceClass` | Gets or sets the resource class of a resource. 0: Unknown; 1: Storage; 2: Network; 32768: Unknown | gauge | `type`, `owner_group`, `name` `RestartAction` | Provides access to the resource's RestartAction property, which is the action to be taken by the Cluster Service if the resource fails. | gauge | `type`, `owner_group`, `name` @@ -34,10 +35,16 @@ Name | Description | Type | Labels `Subclass` | Provides the list of references to nodes that can be the owner of this resource. | gauge | `type`, `owner_group`, `name` ### Example metric -_This collector does not yet have explained examples, we would appreciate your help adding them!_ +Query the state of all cluster resource owned by node1 +``` +windows_mscluster_resource_owner_node{node_name="node1"} +``` ## Useful queries -_This collector does not yet have any useful queries added, we would appreciate your help adding them!_ +Counts the number of Network Name cluster resource +``` +count(windows_mscluster_resource_state{type="Network Name"}) +``` ## Alerting examples _This collector does not yet have alerting examples, we would appreciate your help adding them!_ diff --git a/docs/collector.mscluster_resourcegroup.md b/docs/collector.mscluster_resourcegroup.md index 44ad6d93..88c07b6e 100644 --- a/docs/collector.mscluster_resourcegroup.md +++ b/docs/collector.mscluster_resourcegroup.md @@ -26,16 +26,23 @@ Name | Description | Type | Labels `FailoverThreshold` | The FailoverThreshold property specifies the maximum number of failover attempts. | gauge | `name` `Flags` | Provides access to the flags set for the group. The cluster defines flags only for resources. For a description of these flags, see [CLUSCTL_RESOURCE_GET_FLAGS](https://docs.microsoft.com/en-us/previous-versions/windows/desktop/mscs/clusctl-resource-get-flags). | gauge | `name` `GroupType` | The Type of the resource group. | gauge | `name` +`OwnerNode` | The node hosting the resource group. | gauge | `node_name`, `name` `Priority` | Priority value of the resource group | gauge | `name` `ResiliencyPeriod` | The resiliency period for this group, in seconds. | gauge | `name` `State` | The current state of the resource group. -1: Unknown; 0: Online; 1: Offline; 2: Failed; 3: Partial Online; 4: Pending | gauge | `name` `UpdateDomain` | | gauge | `name` ### Example metric -_This collector does not yet have explained examples, we would appreciate your help adding them!_ +Query the state of all cluster group owned by node1 +``` +windows_mscluster_resourcegroup_owner_node{node_name="node1"} +``` ## Useful queries -_This collector does not yet have any useful queries added, we would appreciate your help adding them!_ +Counts the number of cluster group by type +``` +count_values("count", windows_mscluster_resourcegroup_group_type) +``` ## Alerting examples _This collector does not yet have alerting examples, we would appreciate your help adding them!_ diff --git a/pkg/collector/mscluster_node/mscluster_node.go b/pkg/collector/mscluster_node/mscluster_node.go index 2dc75b12..48083e7c 100644 --- a/pkg/collector/mscluster_node/mscluster_node.go +++ b/pkg/collector/mscluster_node/mscluster_node.go @@ -15,6 +15,9 @@ type Config struct{} var ConfigDefaults = Config{} +// Variable used by mscluster_resource and mscluster_resourcegroup +var NodeName []string + // A collector is a Prometheus collector for WMI MSCluster_Node metrics type collector struct { logger log.Logger @@ -175,6 +178,8 @@ func (c *collector) Collect(_ *types.ScrapeContext, ch chan<- prometheus.Metric) return err } + NodeName = []string{} + for _, v := range dst { ch <- prometheus.MustNewConstMetric( @@ -274,6 +279,8 @@ func (c *collector) Collect(_ *types.ScrapeContext, ch chan<- prometheus.Metric) float64(v.StatusInformation), v.Name, ) + + NodeName = append(NodeName, v.Name) } return nil diff --git a/pkg/collector/mscluster_resource/mscluster_resource.go b/pkg/collector/mscluster_resource/mscluster_resource.go index bd972648..01e4df33 100644 --- a/pkg/collector/mscluster_resource/mscluster_resource.go +++ b/pkg/collector/mscluster_resource/mscluster_resource.go @@ -1,6 +1,7 @@ package mscluster_resource import ( + "github.com/prometheus-community/windows_exporter/pkg/collector/mscluster_node" "github.com/prometheus-community/windows_exporter/pkg/types" "github.com/prometheus-community/windows_exporter/pkg/wmi" @@ -26,6 +27,7 @@ type collector struct { IsAlivePollInterval *prometheus.Desc LooksAlivePollInterval *prometheus.Desc MonitorProcessId *prometheus.Desc + OwnerNode *prometheus.Desc PendingTimeout *prometheus.Desc ResourceClass *prometheus.Desc RestartAction *prometheus.Desc @@ -102,6 +104,18 @@ func (c *collector) Build() error { []string{"type", "owner_group", "name"}, nil, ) + c.OwnerNode = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, Name, "owner_node"), + "The node hosting the resource. 0: Not hosted; 1: Hosted", + []string{"type", "owner_group", "node_name", "name"}, + nil, + ) + c.OwnerNode = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, Name, "owner_node"), + "The node hosting the resource. 0: Not hosted; 1: Hosted", + []string{"type", "owner_group", "node_name", "name"}, + nil, + ) c.PendingTimeout = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "pending_timeout"), "Provides access to the resource's PendingTimeout property. If a resource cannot be brought online or taken offline in the number of milliseconds specified by the PendingTimeout property, the resource is forcibly terminated.", @@ -165,6 +179,7 @@ type MSCluster_Resource struct { Name string Type string OwnerGroup string + OwnerNode string Characteristics uint DeadlockTimeout uint @@ -244,6 +259,21 @@ func (c *collector) Collect(_ *types.ScrapeContext, ch chan<- prometheus.Metric) v.Type, v.OwnerGroup, v.Name, ) + if mscluster_node.NodeName != nil { + for _, node_name := range mscluster_node.NodeName { + isCurrentState := 0.0 + if v.OwnerNode == node_name { + isCurrentState = 1.0 + } + ch <- prometheus.MustNewConstMetric( + c.OwnerNode, + prometheus.GaugeValue, + isCurrentState, + v.Type, v.OwnerGroup, node_name, v.Name, + ) + } + } + ch <- prometheus.MustNewConstMetric( c.PendingTimeout, prometheus.GaugeValue, diff --git a/pkg/collector/mscluster_resourcegroup/mscluster_resourcegroup.go b/pkg/collector/mscluster_resourcegroup/mscluster_resourcegroup.go index 5bf8c3a9..10264ca5 100644 --- a/pkg/collector/mscluster_resourcegroup/mscluster_resourcegroup.go +++ b/pkg/collector/mscluster_resourcegroup/mscluster_resourcegroup.go @@ -1,6 +1,7 @@ package mscluster_resourcegroup import ( + "github.com/prometheus-community/windows_exporter/pkg/collector/mscluster_node" "github.com/prometheus-community/windows_exporter/pkg/types" "github.com/prometheus-community/windows_exporter/pkg/wmi" @@ -31,6 +32,7 @@ type collector struct { Flags *prometheus.Desc GroupType *prometheus.Desc PlacementOptions *prometheus.Desc + OwnerNode *prometheus.Desc Priority *prometheus.Desc ResiliencyPeriod *prometheus.Desc State *prometheus.Desc @@ -119,6 +121,18 @@ func (c *collector) Build() error { []string{"name"}, nil, ) + c.OwnerNode = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, Name, "owner_node"), + "The node hosting the resource group. 0: Not hosted; 1: Hosted", + []string{"node_name", "name"}, + nil, + ) + c.OwnerNode = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, Name, "owner_node"), + "The node hosting the resource group. 0: Not hosted; 1: Hosted", + []string{"node_name", "name"}, + nil, + ) c.Priority = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "priority"), "Priority value of the resource group", @@ -155,6 +169,7 @@ type MSCluster_ResourceGroup struct { FailoverThreshold uint Flags uint GroupType uint + OwnerNode string Priority uint ResiliencyPeriod uint State uint @@ -241,6 +256,21 @@ func (c *collector) Collect(_ *types.ScrapeContext, ch chan<- prometheus.Metric) v.Name, ) + if mscluster_node.NodeName != nil { + for _, node_name := range mscluster_node.NodeName { + isCurrentState := 0.0 + if v.OwnerNode == node_name { + isCurrentState = 1.0 + } + ch <- prometheus.MustNewConstMetric( + c.OwnerNode, + prometheus.GaugeValue, + isCurrentState, + node_name, v.Name, + ) + } + } + ch <- prometheus.MustNewConstMetric( c.Priority, prometheus.GaugeValue,