From d07845ca0813201582f0f83bdb0790d6862ecf9b Mon Sep 17 00:00:00 2001 From: Dinifarb Date: Fri, 12 Jan 2024 16:45:56 +0100 Subject: [PATCH 01/15] fix:(#1382): rename TEXTFILE_DIR to TEXTFILE_DIRS Signed-off-by: Dinifarb Signed-off-by: Alexandre JARDON <28548335+webalexeu@users.noreply.github.com> --- README.md | 4 ++-- installer/windows_exporter.wxs | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 87dd22b5..a881cb73 100644 --- a/README.md +++ b/README.md @@ -106,7 +106,7 @@ Name | Description `LISTEN_ADDR` | The IP address to bind to. Defaults to 0.0.0.0 `LISTEN_PORT` | The port to bind to. Defaults to 9182. `METRICS_PATH` | The path at which to serve metrics. Defaults to `/metrics` -`TEXTFILE_DIR` | As the `--collector.textfile.directory` flag, provide a directory to read text files with metrics from +`TEXTFILE_DIRS` | As the `--collector.textfile.directories` flag, provide a directory to read text files with metrics from `REMOTE_ADDR` | Allows setting comma separated remote IP addresses for the Windows Firewall exception (allow list). Defaults to an empty string (any remote address). `EXTRA_FLAGS` | Allows passing full CLI flags. Defaults to an empty string. @@ -123,7 +123,7 @@ msiexec /i ENABLED_COLLECTORS=os,service --% EXTRA_FLAGS="--c On some older versions of Windows you may need to surround parameter values with double quotes to get the install command parsing properly: ```powershell -msiexec /i C:\Users\Administrator\Downloads\windows_exporter.msi ENABLED_COLLECTORS="ad,iis,logon,memory,process,tcp,textfile,thermalzone" TEXTFILE_DIR="C:\custom_metrics\" +msiexec /i C:\Users\Administrator\Downloads\windows_exporter.msi ENABLED_COLLECTORS="ad,iis,logon,memory,process,tcp,textfile,thermalzone" TEXTFILE_DIRS="C:\custom_metrics\" ``` Powershell versions 7.3 and above require [PSNativeCommandArgumentPassing](https://learn.microsoft.com/en-us/powershell/scripting/learn/experimental-features?view=powershell-7.3) to be set to `Legacy` when using `--% EXTRA_FLAGS`: diff --git a/installer/windows_exporter.wxs b/installer/windows_exporter.wxs index a67eff95..e13400c3 100644 --- a/installer/windows_exporter.wxs +++ b/installer/windows_exporter.wxs @@ -44,8 +44,8 @@ - - + + From e2ceb0d46d33feefdb63166eba02eab2ed0a6d1e Mon Sep 17 00:00:00 2001 From: Dinifarb Date: Fri, 12 Jan 2024 16:53:02 +0100 Subject: [PATCH 02/15] fix:(#1382): update TextfileDirFlag to TextfileDirsFlag Signed-off-by: Dinifarb Signed-off-by: Alexandre JARDON <28548335+webalexeu@users.noreply.github.com> --- installer/windows_exporter.wxs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/installer/windows_exporter.wxs b/installer/windows_exporter.wxs index e13400c3..bf52f61f 100644 --- a/installer/windows_exporter.wxs +++ b/installer/windows_exporter.wxs @@ -54,7 +54,7 @@ - + From 4032b7f610aff12f03567352124d8b86f1e220b1 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 15 Jan 2024 11:08:47 +0000 Subject: [PATCH 03/15] chore(deps): bump github.com/prometheus/common from 0.45.0 to 0.46.0 Bumps [github.com/prometheus/common](https://github.com/prometheus/common) from 0.45.0 to 0.46.0. - [Release notes](https://github.com/prometheus/common/releases) - [Commits](https://github.com/prometheus/common/compare/v0.45.0...v0.46.0) --- updated-dependencies: - dependency-name: github.com/prometheus/common dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Signed-off-by: Alexandre JARDON <28548335+webalexeu@users.noreply.github.com> --- go.mod | 11 +++++------ go.sum | 22 ++++++++++------------ 2 files changed, 15 insertions(+), 18 deletions(-) diff --git a/go.mod b/go.mod index 639048b5..6df655cd 100644 --- a/go.mod +++ b/go.mod @@ -10,7 +10,7 @@ require ( github.com/go-ole/go-ole v1.3.0 github.com/prometheus/client_golang v1.18.0 github.com/prometheus/client_model v0.5.0 - github.com/prometheus/common v0.45.0 + github.com/prometheus/common v0.46.0 github.com/prometheus/exporter-toolkit v0.11.0 github.com/sirupsen/logrus v1.9.3 // indirect github.com/stretchr/testify v1.8.4 @@ -35,22 +35,21 @@ require ( github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect github.com/golang/protobuf v1.5.3 // indirect github.com/jpillora/backoff v1.0.0 // indirect - github.com/matttproud/golang_protobuf_extensions/v2 v2.0.0 // indirect github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f // indirect github.com/pkg/errors v0.9.1 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect github.com/prometheus/procfs v0.12.0 // indirect github.com/xhit/go-str2duration/v2 v2.1.0 // indirect - golang.org/x/crypto v0.17.0 // indirect + golang.org/x/crypto v0.18.0 // indirect golang.org/x/mod v0.14.0 // indirect - golang.org/x/net v0.18.0 // indirect - golang.org/x/oauth2 v0.12.0 // indirect + golang.org/x/net v0.20.0 // indirect + golang.org/x/oauth2 v0.16.0 // indirect golang.org/x/sync v0.5.0 // indirect golang.org/x/text v0.14.0 // indirect golang.org/x/tools v0.15.0 // indirect google.golang.org/appengine v1.6.7 // indirect google.golang.org/genproto v0.0.0-20230410155749-daa745c078e1 // indirect google.golang.org/grpc v1.56.3 // indirect - google.golang.org/protobuf v1.31.0 // indirect + google.golang.org/protobuf v1.32.0 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect ) diff --git a/go.sum b/go.sum index 7cd2b4f5..4c041e19 100644 --- a/go.sum +++ b/go.sum @@ -76,8 +76,6 @@ github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= -github.com/matttproud/golang_protobuf_extensions/v2 v2.0.0 h1:jWpvCLoY8Z/e3VKvlsiIGKtc+UG6U5vzxaoagmhXfyg= -github.com/matttproud/golang_protobuf_extensions/v2 v2.0.0/go.mod h1:QUyp042oQthUoa9bqDv0ER0wrtXnBruoNd7aNjkbP+k= github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f h1:KUppIJq7/+SVif2QVs3tOP0zanoHgBEVAwHxUSIzRqU= github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= @@ -89,8 +87,8 @@ github.com/prometheus/client_golang v1.18.0/go.mod h1:T+GXkCk5wSJyOqMIzVgvvjFDlk github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.5.0 h1:VQw1hfvPvk3Uv6Qf29VrPF32JB6rtbgI6cYPYQjL0Qw= github.com/prometheus/client_model v0.5.0/go.mod h1:dTiFglRmd66nLR9Pv9f0mZi7B7fk5Pm3gvsjB5tr+kI= -github.com/prometheus/common v0.45.0 h1:2BGz0eBc2hdMDLnO/8n0jeB3oPrt2D08CekT0lneoxM= -github.com/prometheus/common v0.45.0/go.mod h1:YJmSTw9BoKxJplESWWxlbyttQR4uaEcGyv9MZjVOJsY= +github.com/prometheus/common v0.46.0 h1:doXzt5ybi1HBKpsZOL0sSkaNHJJqkyfEWZGGqqScV0Y= +github.com/prometheus/common v0.46.0/go.mod h1:Tp0qkxpb9Jsg54QMe+EAmqXkSV7Evdy1BTn+g2pa/hQ= github.com/prometheus/exporter-toolkit v0.11.0 h1:yNTsuZ0aNCNFQ3aFTD2uhPOvr4iD7fdBvKPAEGkNf+g= github.com/prometheus/exporter-toolkit v0.11.0/go.mod h1:BVnENhnNecpwoTLiABx7mrPB/OLRIgN74qlQbV+FK1Q= github.com/prometheus/procfs v0.12.0 h1:jluTpSng7V9hY0O2R9DzzJHYb2xULk9VTR1V1R/k6Bo= @@ -120,8 +118,8 @@ go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/crypto v0.17.0 h1:r8bRNjWL3GshPW3gkd+RpvzWrZAwPS49OmTGZ/uhM4k= -golang.org/x/crypto v0.17.0/go.mod h1:gCAAfMLgwOJRpTjQ2zCCt2OcSfYMTeZVSRtQlPC7Nq4= +golang.org/x/crypto v0.18.0 h1:PGVlW0xEltQnzFZ55hkuX5+KLyrMYhHld1YHO4AKcdc= +golang.org/x/crypto v0.18.0/go.mod h1:R0j02AL6hcrfOiy9T4ZYp/rcWeMxM3L6QYxlOuEG1mg= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20231110203233-9a3e6036ecaa h1:FRnLl4eNAQl8hwxVVC17teOw8kdjVDVAiFMtgUdTSRQ= golang.org/x/exp v0.0.0-20231110203233-9a3e6036ecaa/go.mod h1:zk2irFbV9DP96SEBUUAy67IdHUaZuSnrz1n472HUCLE= @@ -142,11 +140,11 @@ golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLL golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.18.0 h1:mIYleuAkSbHh0tCv7RvjL3F6ZVbLjq4+R7zbOn3Kokg= -golang.org/x/net v0.18.0/go.mod h1:/czyP5RqHAH4odGYxBJ1qz0+CE5WZ+2j1YgoEo8F2jQ= +golang.org/x/net v0.20.0 h1:aCL9BSgETF1k+blQaYUBx9hJ9LOGP3gAVemcZlf1Kpo= +golang.org/x/net v0.20.0/go.mod h1:z8BVo6PvndSri0LbOE3hAn0apkU+1YvI6E70E9jsnvY= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= -golang.org/x/oauth2 v0.12.0 h1:smVPGxink+n1ZI5pkQa8y6fZT0RW0MgCO5bFpepy4B4= -golang.org/x/oauth2 v0.12.0/go.mod h1:A74bZ3aGXgCY0qaIC9Ahg6Lglin4AMAco8cIv9baba4= +golang.org/x/oauth2 v0.16.0 h1:aDkGMBSYxElaoP81NpoUoz2oo2R2wHdZpGToUxfyQrQ= +golang.org/x/oauth2 v0.16.0/go.mod h1:hqZ+0LWXsiVoZpeld6jVt06P3adbS2Uu911W1SsJv2o= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -209,8 +207,8 @@ google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpAD google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= -google.golang.org/protobuf v1.31.0 h1:g0LDEJHgrBl9N9r17Ru3sqWhkIx2NB67okBHPwC7hs8= -google.golang.org/protobuf v1.31.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= +google.golang.org/protobuf v1.32.0 h1:pPC6BG5ex8PDFnkbrGU3EixyhKcQ2aDuBS36lqK/C7I= +google.golang.org/protobuf v1.32.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= From 688ea45e7e325924f9f930b0e26752dc58670895 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 15 Jan 2024 11:15:19 +0000 Subject: [PATCH 04/15] chore(deps): bump github.com/containerd/containerd from 1.7.0 to 1.7.11 Bumps [github.com/containerd/containerd](https://github.com/containerd/containerd) from 1.7.0 to 1.7.11. - [Release notes](https://github.com/containerd/containerd/releases) - [Changelog](https://github.com/containerd/containerd/blob/main/RELEASES.md) - [Commits](https://github.com/containerd/containerd/compare/v1.7.0...v1.7.11) --- updated-dependencies: - dependency-name: github.com/containerd/containerd dependency-type: indirect ... Signed-off-by: dependabot[bot] Signed-off-by: Alexandre JARDON <28548335+webalexeu@users.noreply.github.com> --- go.mod | 6 +++--- go.sum | 12 ++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/go.mod b/go.mod index 6df655cd..a5c8c3db 100644 --- a/go.mod +++ b/go.mod @@ -27,7 +27,7 @@ require ( github.com/beorn7/perks v1.0.1 // indirect github.com/cespare/xxhash/v2 v2.2.0 // indirect github.com/containerd/cgroups v1.1.0 // indirect - github.com/containerd/containerd v1.7.0 // indirect + github.com/containerd/containerd v1.7.11 // indirect github.com/coreos/go-systemd/v22 v22.5.0 // indirect github.com/davecgh/go-spew v1.1.1 // indirect github.com/go-logfmt/logfmt v0.5.1 // indirect @@ -48,8 +48,8 @@ require ( golang.org/x/text v0.14.0 // indirect golang.org/x/tools v0.15.0 // indirect google.golang.org/appengine v1.6.7 // indirect - google.golang.org/genproto v0.0.0-20230410155749-daa745c078e1 // indirect - google.golang.org/grpc v1.56.3 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20230711160842-782d3b101e98 // indirect + google.golang.org/grpc v1.58.3 // indirect google.golang.org/protobuf v1.32.0 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect ) diff --git a/go.sum b/go.sum index 4c041e19..d59866b3 100644 --- a/go.sum +++ b/go.sum @@ -17,8 +17,8 @@ github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDk github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= github.com/containerd/cgroups v1.1.0 h1:v8rEWFl6EoqHB+swVNjVoCJE8o3jX7e8nqBGPLaDFBM= github.com/containerd/cgroups v1.1.0/go.mod h1:6ppBcbh/NOOUU+dMKrykgaBnK9lCIBxHqJDGwsa1mIw= -github.com/containerd/containerd v1.7.0 h1:G/ZQr3gMZs6ZT0qPUZ15znx5QSdQdASW11nXTLTM2Pg= -github.com/containerd/containerd v1.7.0/go.mod h1:QfR7Efgb/6X2BDpTPJRvPTYDE9rsF0FsXX9J8sIs/sc= +github.com/containerd/containerd v1.7.11 h1:lfGKw3eU35sjV0aG2eYZTiwFEY1pCzxdzicHP3SZILw= +github.com/containerd/containerd v1.7.11/go.mod h1:5UluHxHTX2rdvYuZ5OJTC5m/KJNs0Zs9wVoJm9zf5ZE= github.com/coreos/go-systemd/v22 v22.5.0 h1:RrqgGjYQKalulkV8NGVIfkXQf6YYmOyiJKk8iXXhfZs= github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -187,15 +187,15 @@ google.golang.org/appengine v1.6.7/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCID google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= -google.golang.org/genproto v0.0.0-20230410155749-daa745c078e1 h1:KpwkzHKEF7B9Zxg18WzOa7djJ+Ha5DzthMyZYQfEn2A= -google.golang.org/genproto v0.0.0-20230410155749-daa745c078e1/go.mod h1:nKE/iIaLqn2bQwXBg8f1g2Ylh6r5MN5CmZvuzZCgsCU= +google.golang.org/genproto/googleapis/rpc v0.0.0-20230711160842-782d3b101e98 h1:bVf09lpb+OJbByTj913DRJioFFAjf/ZGxEz7MajTp2U= +google.golang.org/genproto/googleapis/rpc v0.0.0-20230711160842-782d3b101e98/go.mod h1:TUfxEVdsvPg18p6AslUXFoLdpED4oBnGwyqk3dV1XzM= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY= google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= google.golang.org/grpc v1.33.2/go.mod h1:JMHMWHQWaTccqQQlmk3MJZS+GWXOdAesneDmEnv2fbc= -google.golang.org/grpc v1.56.3 h1:8I4C0Yq1EjstUzUJzpcRVbuYA2mODtEmpWiQoN/b2nc= -google.golang.org/grpc v1.56.3/go.mod h1:I9bI3vqKfayGqPUAwGdOSu7kt6oIJLixfffKrpXqQ9s= +google.golang.org/grpc v1.58.3 h1:BjnpXut1btbtgN/6sp+brB2Kbm2LjNXnidYujAVbSoQ= +google.golang.org/grpc v1.58.3/go.mod h1:tgX3ZQDlNJGU96V6yHh1T/JeoBQ2TXdr43YbYSsCJk0= google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= From 6ca67f7aa2881c7764ed4da00c3cdc3e3d01c943 Mon Sep 17 00:00:00 2001 From: Alexandre JARDON <28548335+webalexeu@users.noreply.github.com> Date: Thu, 25 Jan 2024 15:59:52 +0100 Subject: [PATCH 05/15] Add owner_node on resource and resourcegroup for mscluster Signed-off-by: Alexandre JARDON <28548335+webalexeu@users.noreply.github.com> --- docs/collector.mscluster_resource.md | 32 ++++----- docs/collector.mscluster_resourcegroup.md | 28 ++++---- .../mscluster_resource/mscluster_resource.go | 65 ++++++++++--------- .../mscluster_resourcegroup.go | 55 ++++++++-------- 4 files changed, 91 insertions(+), 89 deletions(-) diff --git a/docs/collector.mscluster_resource.md b/docs/collector.mscluster_resource.md index 11754beb..9b0e7951 100644 --- a/docs/collector.mscluster_resource.md +++ b/docs/collector.mscluster_resource.md @@ -16,22 +16,22 @@ None Name | Description | Type | Labels -----|-------------|------|------- -`Characteristics` | Provides the characteristics of the object. The cluster defines characteristics only for resources. For a description of these characteristics, see [CLUSCTL_RESOURCE_GET_CHARACTERISTICS](https://docs.microsoft.com/en-us/previous-versions/windows/desktop/mscs/clusctl-resource-get-characteristics). | gauge | `type`, `owner_group`, `name` -`DeadlockTimeout` | Indicates the length of time to wait, in milliseconds, before declaring a deadlock in any call into a resource. | gauge | `type`, `owner_group`, `name` -`EmbeddedFailureAction` | The time, in milliseconds, that a resource should remain in a failed state before the Cluster service attempts to restart it. | gauge | `type`, `owner_group`, `name` -`Flags` | Provides access to the flags set for the object. The cluster defines flags only for resources. For a description of these flags, see [CLUSCTL_RESOURCE_GET_FLAGS](https://docs.microsoft.com/en-us/previous-versions/windows/desktop/mscs/clusctl-resource-get-flags). | gauge | `type`, `owner_group`, `name` -`IsAlivePollInterval` | Provides access to the resource's IsAlivePollInterval property, which is the recommended interval in milliseconds at which the Cluster Service should poll the resource to determine whether it is operational. If the property is set to 0xFFFFFFFF, the Cluster Service uses the IsAlivePollInterval property for the resource type associated with the resource. | gauge | `type`, `owner_group`, `name` -`LooksAlivePollInterval` | Provides access to the resource's LooksAlivePollInterval property, which is the recommended interval in milliseconds at which the Cluster Service should poll the resource to determine whether it appears operational. If the property is set to 0xFFFFFFFF, the Cluster Service uses the LooksAlivePollInterval property for the resource type associated with the resource. | gauge | `type`, `owner_group`, `name` -`MonitorProcessId` | Provides the process ID of the resource host service that is currently hosting the resource. | gauge | `type`, `owner_group`, `name` -`PendingTimeout` | Provides access to the resource's PendingTimeout property. If a resource cannot be brought online or taken offline in the number of milliseconds specified by the PendingTimeout property, the resource is forcibly terminated. | gauge | `type`, `owner_group`, `name` -`ResourceClass` | Gets or sets the resource class of a resource. 0: Unknown; 1: Storage; 2: Network; 32768: Unknown | gauge | `type`, `owner_group`, `name` -`RestartAction` | Provides access to the resource's RestartAction property, which is the action to be taken by the Cluster Service if the resource fails. | gauge | `type`, `owner_group`, `name` -`RestartDelay` | Indicates the time delay before a failed resource is restarted. | gauge | `type`, `owner_group`, `name` -`RestartPeriod` | Provides access to the resource's RestartPeriod property, which is interval of time, in milliseconds, during which a specified number of restart attempts can be made on a nonresponsive resource. | gauge | `type`, `owner_group`, `name` -`RestartThreshold` | Provides access to the resource's RestartThreshold property which is the maximum number of restart attempts that can be made on a resource within an interval defined by the RestartPeriod property before the Cluster Service initiates the action specified by the RestartAction property. | gauge | `type`, `owner_group`, `name` -`RetryPeriodOnFailure` | Provides access to the resource's RetryPeriodOnFailure property, which is the interval of time (in milliseconds) that a resource should remain in a failed state before the Cluster service attempts to restart it. | gauge | `type`, `owner_group`, `name` -`State` | The current state of the resource. -1: Unknown; 0: Inherited; 1: Initializing; 2: Online; 3: Offline; 4: Failed; 128: Pending; 129: Online Pending; 130: Offline Pending | gauge | `type`, `owner_group`, `name` -`Subclass` | Provides the list of references to nodes that can be the owner of this resource. | gauge | `type`, `owner_group`, `name` +`Characteristics` | Provides the characteristics of the object. The cluster defines characteristics only for resources. For a description of these characteristics, see [CLUSCTL_RESOURCE_GET_CHARACTERISTICS](https://docs.microsoft.com/en-us/previous-versions/windows/desktop/mscs/clusctl-resource-get-characteristics). | gauge | `type`, `owner_group`, `owner_node`, `name` +`DeadlockTimeout` | Indicates the length of time to wait, in milliseconds, before declaring a deadlock in any call into a resource. | gauge | `type`, `owner_group`, `owner_node`, `name` +`EmbeddedFailureAction` | The time, in milliseconds, that a resource should remain in a failed state before the Cluster service attempts to restart it. | gauge | `type`, `owner_group`, `owner_node`, `name` +`Flags` | Provides access to the flags set for the object. The cluster defines flags only for resources. For a description of these flags, see [CLUSCTL_RESOURCE_GET_FLAGS](https://docs.microsoft.com/en-us/previous-versions/windows/desktop/mscs/clusctl-resource-get-flags). | gauge | `type`, `owner_group`, `owner_node`, `name` +`IsAlivePollInterval` | Provides access to the resource's IsAlivePollInterval property, which is the recommended interval in milliseconds at which the Cluster Service should poll the resource to determine whether it is operational. If the property is set to 0xFFFFFFFF, the Cluster Service uses the IsAlivePollInterval property for the resource type associated with the resource. | gauge | `type`, `owner_group`, `owner_node`, `name` +`LooksAlivePollInterval` | Provides access to the resource's LooksAlivePollInterval property, which is the recommended interval in milliseconds at which the Cluster Service should poll the resource to determine whether it appears operational. If the property is set to 0xFFFFFFFF, the Cluster Service uses the LooksAlivePollInterval property for the resource type associated with the resource. | gauge | `type`, `owner_group`, `owner_node`, `name` +`MonitorProcessId` | Provides the process ID of the resource host service that is currently hosting the resource. | gauge | `type`, `owner_group`, `owner_node`, `name` +`PendingTimeout` | Provides access to the resource's PendingTimeout property. If a resource cannot be brought online or taken offline in the number of milliseconds specified by the PendingTimeout property, the resource is forcibly terminated. | gauge | `type`, `owner_group`, `owner_node`, `name` +`ResourceClass` | Gets or sets the resource class of a resource. 0: Unknown; 1: Storage; 2: Network; 32768: Unknown | gauge | `type`, `owner_group`, `owner_node`, `name` +`RestartAction` | Provides access to the resource's RestartAction property, which is the action to be taken by the Cluster Service if the resource fails. | gauge | `type`, `owner_group`, `owner_node`, `name` +`RestartDelay` | Indicates the time delay before a failed resource is restarted. | gauge | `type`, `owner_group`, `owner_node`, `name` +`RestartPeriod` | Provides access to the resource's RestartPeriod property, which is interval of time, in milliseconds, during which a specified number of restart attempts can be made on a nonresponsive resource. | gauge | `type`, `owner_group`, `owner_node`, `name` +`RestartThreshold` | Provides access to the resource's RestartThreshold property which is the maximum number of restart attempts that can be made on a resource within an interval defined by the RestartPeriod property before the Cluster Service initiates the action specified by the RestartAction property. | gauge | `type`, `owner_group`, `owner_node`, `name` +`RetryPeriodOnFailure` | Provides access to the resource's RetryPeriodOnFailure property, which is the interval of time (in milliseconds) that a resource should remain in a failed state before the Cluster service attempts to restart it. | gauge | `type`, `owner_group`, `owner_node`, `name` +`State` | The current state of the resource. -1: Unknown; 0: Inherited; 1: Initializing; 2: Online; 3: Offline; 4: Failed; 128: Pending; 129: Online Pending; 130: Offline Pending | gauge | `type`, `owner_group`, `owner_node`, `name` +`Subclass` | Provides the list of references to nodes that can be the owner of this resource. | gauge | `type`, `owner_group`, `owner_node`, `name` ### Example metric _This collector does not yet have explained examples, we would appreciate your help adding them!_ diff --git a/docs/collector.mscluster_resourcegroup.md b/docs/collector.mscluster_resourcegroup.md index 44ad6d93..cd753552 100644 --- a/docs/collector.mscluster_resourcegroup.md +++ b/docs/collector.mscluster_resourcegroup.md @@ -16,20 +16,20 @@ None Name | Description | Type | Labels -----|-------------|------|------- -`AutoFailbackType` | Provides access to the group's AutoFailbackType property. | gauge | `name` -`Characteristics` | Provides the characteristics of the group. The cluster defines characteristics only for resources. For a description of these characteristics, see [CLUSCTL_RESOURCE_GET_CHARACTERISTICS](https://docs.microsoft.com/en-us/previous-versions/windows/desktop/mscs/clusctl-resource-get-characteristics). | gauge | `name` -`ColdStartSetting` | Indicates whether a group can start after a cluster cold start. | gauge | `name` -`DefaultOwner` | Number of the last node the resource group was activated on or explicitly moved to. | gauge | `name` -`FailbackWindowEnd` | The FailbackWindowEnd property provides the latest time that the group can be moved back to the node identified as its preferred node. | gauge | `name` -`FailbackWindowStart` | The FailbackWindowStart property provides the earliest time (that is, local time as kept by the cluster) that the group can be moved back to the node identified as its preferred node. | gauge | `name` -`FailoverPeriod` | The FailoverPeriod property specifies a number of hours during which a maximum number of failover attempts, specified by the FailoverThreshold property, can occur. | gauge | `name` -`FailoverThreshold` | The FailoverThreshold property specifies the maximum number of failover attempts. | gauge | `name` -`Flags` | Provides access to the flags set for the group. The cluster defines flags only for resources. For a description of these flags, see [CLUSCTL_RESOURCE_GET_FLAGS](https://docs.microsoft.com/en-us/previous-versions/windows/desktop/mscs/clusctl-resource-get-flags). | gauge | `name` -`GroupType` | The Type of the resource group. | gauge | `name` -`Priority` | Priority value of the resource group | gauge | `name` -`ResiliencyPeriod` | The resiliency period for this group, in seconds. | gauge | `name` -`State` | The current state of the resource group. -1: Unknown; 0: Online; 1: Offline; 2: Failed; 3: Partial Online; 4: Pending | gauge | `name` -`UpdateDomain` | | gauge | `name` +`AutoFailbackType` | Provides access to the group's AutoFailbackType property. | gauge | `owner_node`, `name` +`Characteristics` | Provides the characteristics of the group. The cluster defines characteristics only for resources. For a description of these characteristics, see [CLUSCTL_RESOURCE_GET_CHARACTERISTICS](https://docs.microsoft.com/en-us/previous-versions/windows/desktop/mscs/clusctl-resource-get-characteristics). | gauge | `owner_node`, `name` +`ColdStartSetting` | Indicates whether a group can start after a cluster cold start. | gauge | `owner_node`, `name` +`DefaultOwner` | Number of the last node the resource group was activated on or explicitly moved to. | gauge | `owner_node`, `name` +`FailbackWindowEnd` | The FailbackWindowEnd property provides the latest time that the group can be moved back to the node identified as its preferred node. | gauge | `owner_node`, `name` +`FailbackWindowStart` | The FailbackWindowStart property provides the earliest time (that is, local time as kept by the cluster) that the group can be moved back to the node identified as its preferred node. | gauge | `owner_node`, `name` +`FailoverPeriod` | The FailoverPeriod property specifies a number of hours during which a maximum number of failover attempts, specified by the FailoverThreshold property, can occur. | gauge | `owner_node`, `name` +`FailoverThreshold` | The FailoverThreshold property specifies the maximum number of failover attempts. | gauge | `owner_node`, `name` +`Flags` | Provides access to the flags set for the group. The cluster defines flags only for resources. For a description of these flags, see [CLUSCTL_RESOURCE_GET_FLAGS](https://docs.microsoft.com/en-us/previous-versions/windows/desktop/mscs/clusctl-resource-get-flags). | gauge | `owner_node`, `name` +`GroupType` | The Type of the resource group. | gauge | `owner_node`, `name` +`Priority` | Priority value of the resource group | gauge | `owner_node`, `name` +`ResiliencyPeriod` | The resiliency period for this group, in seconds. | gauge | `owner_node`, `name` +`State` | The current state of the resource group. -1: Unknown; 0: Online; 1: Offline; 2: Failed; 3: Partial Online; 4: Pending | gauge | `owner_node`, `name` +`UpdateDomain` | | gauge | `owner_node`, `name` ### Example metric _This collector does not yet have explained examples, we would appreciate your help adding them!_ diff --git a/pkg/collector/mscluster_resource/mscluster_resource.go b/pkg/collector/mscluster_resource/mscluster_resource.go index bd972648..bd7540d0 100644 --- a/pkg/collector/mscluster_resource/mscluster_resource.go +++ b/pkg/collector/mscluster_resource/mscluster_resource.go @@ -63,97 +63,97 @@ func (c *collector) Build() error { c.Characteristics = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "characteristics"), "Provides the characteristics of the object.", - []string{"type", "owner_group", "name"}, + []string{"type", "owner_group", "owner_node", "name"}, nil, ) c.DeadlockTimeout = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "deadlock_timeout"), "Indicates the length of time to wait, in milliseconds, before declaring a deadlock in any call into a resource.", - []string{"type", "owner_group", "name"}, + []string{"type", "owner_group", "owner_node", "name"}, nil, ) c.EmbeddedFailureAction = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "embedded_failure_action"), "The time, in milliseconds, that a resource should remain in a failed state before the Cluster service attempts to restart it.", - []string{"type", "owner_group", "name"}, + []string{"type", "owner_group", "owner_node", "name"}, nil, ) c.Flags = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "flags"), "Provides access to the flags set for the object.", - []string{"type", "owner_group", "name"}, + []string{"type", "owner_group", "owner_node", "name"}, nil, ) c.IsAlivePollInterval = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "is_alive_poll_interval"), "Provides access to the resource's IsAlivePollInterval property, which is the recommended interval in milliseconds at which the Cluster Service should poll the resource to determine whether it is operational. If the property is set to 0xFFFFFFFF, the Cluster Service uses the IsAlivePollInterval property for the resource type associated with the resource.", - []string{"type", "owner_group", "name"}, + []string{"type", "owner_group", "owner_node", "name"}, nil, ) c.LooksAlivePollInterval = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "looks_alive_poll_interval"), "Provides access to the resource's LooksAlivePollInterval property, which is the recommended interval in milliseconds at which the Cluster Service should poll the resource to determine whether it appears operational. If the property is set to 0xFFFFFFFF, the Cluster Service uses the LooksAlivePollInterval property for the resource type associated with the resource.", - []string{"type", "owner_group", "name"}, + []string{"type", "owner_group", "owner_node", "name"}, nil, ) c.MonitorProcessId = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "monitor_process_id"), "Provides the process ID of the resource host service that is currently hosting the resource.", - []string{"type", "owner_group", "name"}, + []string{"type", "owner_group", "owner_node", "name"}, nil, ) c.PendingTimeout = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "pending_timeout"), "Provides access to the resource's PendingTimeout property. If a resource cannot be brought online or taken offline in the number of milliseconds specified by the PendingTimeout property, the resource is forcibly terminated.", - []string{"type", "owner_group", "name"}, + []string{"type", "owner_group", "owner_node", "name"}, nil, ) c.ResourceClass = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "resource_class"), "Gets or sets the resource class of a resource. 0: Unknown; 1: Storage; 2: Network; 32768: Unknown ", - []string{"type", "owner_group", "name"}, + []string{"type", "owner_group", "owner_node", "name"}, nil, ) c.RestartAction = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "restart_action"), "Provides access to the resource's RestartAction property, which is the action to be taken by the Cluster Service if the resource fails.", - []string{"type", "owner_group", "name"}, + []string{"type", "owner_group", "owner_node", "name"}, nil, ) c.RestartDelay = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "restart_delay"), "Indicates the time delay before a failed resource is restarted.", - []string{"type", "owner_group", "name"}, + []string{"type", "owner_group", "owner_node", "name"}, nil, ) c.RestartPeriod = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "restart_period"), "Provides access to the resource's RestartPeriod property, which is interval of time, in milliseconds, during which a specified number of restart attempts can be made on a nonresponsive resource.", - []string{"type", "owner_group", "name"}, + []string{"type", "owner_group", "owner_node", "name"}, nil, ) c.RestartThreshold = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "restart_threshold"), "Provides access to the resource's RestartThreshold property which is the maximum number of restart attempts that can be made on a resource within an interval defined by the RestartPeriod property before the Cluster Service initiates the action specified by the RestartAction property.", - []string{"type", "owner_group", "name"}, + []string{"type", "owner_group", "owner_node", "name"}, nil, ) c.RetryPeriodOnFailure = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "retry_period_on_failure"), "Provides access to the resource's RetryPeriodOnFailure property, which is the interval of time (in milliseconds) that a resource should remain in a failed state before the Cluster service attempts to restart it.", - []string{"type", "owner_group", "name"}, + []string{"type", "owner_group", "owner_node", "name"}, nil, ) c.State = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "state"), "The current state of the resource. -1: Unknown; 0: Inherited; 1: Initializing; 2: Online; 3: Offline; 4: Failed; 128: Pending; 129: Online Pending; 130: Offline Pending ", - []string{"type", "owner_group", "name"}, + []string{"type", "owner_group", "owner_node", "name"}, nil, ) c.Subclass = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "subclass"), "Provides the list of references to nodes that can be the owner of this resource.", - []string{"type", "owner_group", "name"}, + []string{"type", "owner_group", "owner_node", "name"}, nil, ) return nil @@ -165,6 +165,7 @@ type MSCluster_Resource struct { Name string Type string OwnerGroup string + OwnerNode string Characteristics uint DeadlockTimeout uint @@ -199,112 +200,112 @@ func (c *collector) Collect(_ *types.ScrapeContext, ch chan<- prometheus.Metric) c.Characteristics, prometheus.GaugeValue, float64(v.Characteristics), - v.Type, v.OwnerGroup, v.Name, + v.Type, v.OwnerGroup, v.OwnerNode, v.Name, ) ch <- prometheus.MustNewConstMetric( c.DeadlockTimeout, prometheus.GaugeValue, float64(v.DeadlockTimeout), - v.Type, v.OwnerGroup, v.Name, + v.Type, v.OwnerGroup, v.OwnerNode, v.Name, ) ch <- prometheus.MustNewConstMetric( c.EmbeddedFailureAction, prometheus.GaugeValue, float64(v.EmbeddedFailureAction), - v.Type, v.OwnerGroup, v.Name, + v.Type, v.OwnerGroup, v.OwnerNode, v.Name, ) ch <- prometheus.MustNewConstMetric( c.Flags, prometheus.GaugeValue, float64(v.Flags), - v.Type, v.OwnerGroup, v.Name, + v.Type, v.OwnerGroup, v.OwnerNode, v.Name, ) ch <- prometheus.MustNewConstMetric( c.IsAlivePollInterval, prometheus.GaugeValue, float64(v.IsAlivePollInterval), - v.Type, v.OwnerGroup, v.Name, + v.Type, v.OwnerGroup, v.OwnerNode, v.Name, ) ch <- prometheus.MustNewConstMetric( c.LooksAlivePollInterval, prometheus.GaugeValue, float64(v.LooksAlivePollInterval), - v.Type, v.OwnerGroup, v.Name, + v.Type, v.OwnerGroup, v.OwnerNode, v.Name, ) ch <- prometheus.MustNewConstMetric( c.MonitorProcessId, prometheus.GaugeValue, float64(v.MonitorProcessId), - v.Type, v.OwnerGroup, v.Name, + v.Type, v.OwnerGroup, v.OwnerNode, v.Name, ) ch <- prometheus.MustNewConstMetric( c.PendingTimeout, prometheus.GaugeValue, float64(v.PendingTimeout), - v.Type, v.OwnerGroup, v.Name, + v.Type, v.OwnerGroup, v.OwnerNode, v.Name, ) ch <- prometheus.MustNewConstMetric( c.ResourceClass, prometheus.GaugeValue, float64(v.ResourceClass), - v.Type, v.OwnerGroup, v.Name, + v.Type, v.OwnerGroup, v.OwnerNode, v.Name, ) ch <- prometheus.MustNewConstMetric( c.RestartAction, prometheus.GaugeValue, float64(v.RestartAction), - v.Type, v.OwnerGroup, v.Name, + v.Type, v.OwnerGroup, v.OwnerNode, v.Name, ) ch <- prometheus.MustNewConstMetric( c.RestartDelay, prometheus.GaugeValue, float64(v.RestartDelay), - v.Type, v.OwnerGroup, v.Name, + v.Type, v.OwnerGroup, v.OwnerNode, v.Name, ) ch <- prometheus.MustNewConstMetric( c.RestartPeriod, prometheus.GaugeValue, float64(v.RestartPeriod), - v.Type, v.OwnerGroup, v.Name, + v.Type, v.OwnerGroup, v.OwnerNode, v.Name, ) ch <- prometheus.MustNewConstMetric( c.RestartThreshold, prometheus.GaugeValue, float64(v.RestartThreshold), - v.Type, v.OwnerGroup, v.Name, + v.Type, v.OwnerGroup, v.OwnerNode, v.Name, ) ch <- prometheus.MustNewConstMetric( c.RetryPeriodOnFailure, prometheus.GaugeValue, float64(v.RetryPeriodOnFailure), - v.Type, v.OwnerGroup, v.Name, + v.Type, v.OwnerGroup, v.OwnerNode, v.Name, ) ch <- prometheus.MustNewConstMetric( c.State, prometheus.GaugeValue, float64(v.State), - v.Type, v.OwnerGroup, v.Name, + v.Type, v.OwnerGroup, v.OwnerNode, v.Name, ) ch <- prometheus.MustNewConstMetric( c.Subclass, prometheus.GaugeValue, float64(v.Subclass), - v.Type, v.OwnerGroup, v.Name, + v.Type, v.OwnerGroup, v.OwnerNode, v.Name, ) } diff --git a/pkg/collector/mscluster_resourcegroup/mscluster_resourcegroup.go b/pkg/collector/mscluster_resourcegroup/mscluster_resourcegroup.go index 5bf8c3a9..ae404aca 100644 --- a/pkg/collector/mscluster_resourcegroup/mscluster_resourcegroup.go +++ b/pkg/collector/mscluster_resourcegroup/mscluster_resourcegroup.go @@ -62,79 +62,79 @@ func (c *collector) Build() error { c.AutoFailbackType = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "auto_failback_type"), "Provides access to the group's AutoFailbackType property.", - []string{"name"}, + []string{"owner_node", "name"}, nil, ) c.Characteristics = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "characteristics"), "Provides the characteristics of the group.", - []string{"name"}, + []string{"owner_node", "name"}, nil, ) c.ColdStartSetting = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "cold_start_setting"), "Indicates whether a group can start after a cluster cold start.", - []string{"name"}, + []string{"owner_node", "name"}, nil, ) c.DefaultOwner = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "default_owner"), "Number of the last node the resource group was activated on or explicitly moved to.", - []string{"name"}, + []string{"owner_node", "name"}, nil, ) c.FailbackWindowEnd = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "failback_window_end"), "The FailbackWindowEnd property provides the latest time that the group can be moved back to the node identified as its preferred node.", - []string{"name"}, + []string{"owner_node", "name"}, nil, ) c.FailbackWindowStart = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "failback_window_start"), "The FailbackWindowStart property provides the earliest time (that is, local time as kept by the cluster) that the group can be moved back to the node identified as its preferred node.", - []string{"name"}, + []string{"owner_node", "name"}, nil, ) c.FailoverPeriod = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "failover_period"), "The FailoverPeriod property specifies a number of hours during which a maximum number of failover attempts, specified by the FailoverThreshold property, can occur.", - []string{"name"}, + []string{"owner_node", "name"}, nil, ) c.FailoverThreshold = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "failover_threshold"), "The FailoverThreshold property specifies the maximum number of failover attempts.", - []string{"name"}, + []string{"owner_node", "name"}, nil, ) c.Flags = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "flags"), "Provides access to the flags set for the group. ", - []string{"name"}, + []string{"owner_node", "name"}, nil, ) c.GroupType = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "group_type"), "The Type of the resource group.", - []string{"name"}, + []string{"owner_node", "name"}, nil, ) c.Priority = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "priority"), "Priority value of the resource group", - []string{"name"}, + []string{"owner_node", "name"}, nil, ) c.ResiliencyPeriod = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "resiliency_period"), "The resiliency period for this group, in seconds.", - []string{"name"}, + []string{"owner_node", "name"}, nil, ) c.State = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "state"), "The current state of the resource group. -1: Unknown; 0: Online; 1: Offline; 2: Failed; 3: Partial Online; 4: Pending", - []string{"name"}, + []string{"owner_node", "name"}, nil, ) return nil @@ -143,7 +143,8 @@ func (c *collector) Build() error { // MSCluster_ResourceGroup docs: // - https://docs.microsoft.com/en-us/previous-versions/windows/desktop/cluswmi/mscluster-resourcegroup type MSCluster_ResourceGroup struct { - Name string + Name string + OwnerNode string AutoFailbackType uint Characteristics uint @@ -175,91 +176,91 @@ func (c *collector) Collect(_ *types.ScrapeContext, ch chan<- prometheus.Metric) c.AutoFailbackType, prometheus.GaugeValue, float64(v.AutoFailbackType), - v.Name, + v.OwnerNode, v.Name, ) ch <- prometheus.MustNewConstMetric( c.Characteristics, prometheus.GaugeValue, float64(v.Characteristics), - v.Name, + v.OwnerNode, v.Name, ) ch <- prometheus.MustNewConstMetric( c.ColdStartSetting, prometheus.GaugeValue, float64(v.ColdStartSetting), - v.Name, + v.OwnerNode, v.Name, ) ch <- prometheus.MustNewConstMetric( c.DefaultOwner, prometheus.GaugeValue, float64(v.DefaultOwner), - v.Name, + v.OwnerNode, v.Name, ) ch <- prometheus.MustNewConstMetric( c.FailbackWindowEnd, prometheus.GaugeValue, float64(v.FailbackWindowEnd), - v.Name, + v.OwnerNode, v.Name, ) ch <- prometheus.MustNewConstMetric( c.FailbackWindowStart, prometheus.GaugeValue, float64(v.FailbackWindowStart), - v.Name, + v.OwnerNode, v.Name, ) ch <- prometheus.MustNewConstMetric( c.FailoverPeriod, prometheus.GaugeValue, float64(v.FailoverPeriod), - v.Name, + v.OwnerNode, v.Name, ) ch <- prometheus.MustNewConstMetric( c.FailoverThreshold, prometheus.GaugeValue, float64(v.FailoverThreshold), - v.Name, + v.OwnerNode, v.Name, ) ch <- prometheus.MustNewConstMetric( c.Flags, prometheus.GaugeValue, float64(v.Flags), - v.Name, + v.OwnerNode, v.Name, ) ch <- prometheus.MustNewConstMetric( c.GroupType, prometheus.GaugeValue, float64(v.GroupType), - v.Name, + v.OwnerNode, v.Name, ) ch <- prometheus.MustNewConstMetric( c.Priority, prometheus.GaugeValue, float64(v.Priority), - v.Name, + v.OwnerNode, v.Name, ) ch <- prometheus.MustNewConstMetric( c.ResiliencyPeriod, prometheus.GaugeValue, float64(v.ResiliencyPeriod), - v.Name, + v.OwnerNode, v.Name, ) ch <- prometheus.MustNewConstMetric( c.State, prometheus.GaugeValue, float64(v.State), - v.Name, + v.OwnerNode, v.Name, ) } From eab87292c1cd79a89d5b0dc7ce7c4e953b51dba4 Mon Sep 17 00:00:00 2001 From: Alexandre JARDON <28548335+webalexeu@users.noreply.github.com> Date: Thu, 25 Jan 2024 21:01:13 +0100 Subject: [PATCH 06/15] Add examples Signed-off-by: Alexandre JARDON <28548335+webalexeu@users.noreply.github.com> --- docs/collector.mscluster_resource.md | 10 ++++++++-- docs/collector.mscluster_resourcegroup.md | 10 ++++++++-- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/docs/collector.mscluster_resource.md b/docs/collector.mscluster_resource.md index 9b0e7951..ada33aa8 100644 --- a/docs/collector.mscluster_resource.md +++ b/docs/collector.mscluster_resource.md @@ -34,10 +34,16 @@ Name | Description | Type | Labels `Subclass` | Provides the list of references to nodes that can be the owner of this resource. | gauge | `type`, `owner_group`, `owner_node`, `name` ### Example metric -_This collector does not yet have explained examples, we would appreciate your help adding them!_ +Query the state of all cluster resource owned by node1 +``` +windows_mscluster_resource_state{owner_node="node1"} +``` ## Useful queries -_This collector does not yet have any useful queries added, we would appreciate your help adding them!_ +Counts the number of Network Name cluster resource +``` +count(windows_mscluster_resource_state{type="Network Name"}) +``` ## Alerting examples _This collector does not yet have alerting examples, we would appreciate your help adding them!_ diff --git a/docs/collector.mscluster_resourcegroup.md b/docs/collector.mscluster_resourcegroup.md index cd753552..0eb710ec 100644 --- a/docs/collector.mscluster_resourcegroup.md +++ b/docs/collector.mscluster_resourcegroup.md @@ -32,10 +32,16 @@ Name | Description | Type | Labels `UpdateDomain` | | gauge | `owner_node`, `name` ### Example metric -_This collector does not yet have explained examples, we would appreciate your help adding them!_ +Query the state of all cluster group owned by node1 +``` +windows_mscluster_resourcegroup_state{owner_node="node1"} +``` ## Useful queries -_This collector does not yet have any useful queries added, we would appreciate your help adding them!_ +Counts the number of cluster group by type +``` +count_values("count", windows_mscluster_resourcegroup_group_type) +``` ## Alerting examples _This collector does not yet have alerting examples, we would appreciate your help adding them!_ From 9b5568354c5621ca8f76698e78aada1e5f3907fd Mon Sep 17 00:00:00 2001 From: Alexandre JARDON <28548335+webalexeu@users.noreply.github.com> Date: Fri, 16 Feb 2024 08:42:25 +0100 Subject: [PATCH 07/15] Create dedicated metric for owner_node Signed-off-by: Alexandre JARDON <28548335+webalexeu@users.noreply.github.com> --- docs/collector.mscluster_resource.md | 2 +- docs/collector.mscluster_resourcegroup.md | 2 +- .../mscluster_node/mscluster_node.go | 7 ++ .../mscluster_resource/mscluster_resource.go | 87 ++++++++++++------- .../mscluster_resourcegroup.go | 79 +++++++++++------ 5 files changed, 115 insertions(+), 62 deletions(-) diff --git a/docs/collector.mscluster_resource.md b/docs/collector.mscluster_resource.md index ada33aa8..e3a408da 100644 --- a/docs/collector.mscluster_resource.md +++ b/docs/collector.mscluster_resource.md @@ -36,7 +36,7 @@ Name | Description | Type | Labels ### Example metric Query the state of all cluster resource owned by node1 ``` -windows_mscluster_resource_state{owner_node="node1"} +windows_mscluster_resource_owner_node{node_name="node1"} ``` ## Useful queries diff --git a/docs/collector.mscluster_resourcegroup.md b/docs/collector.mscluster_resourcegroup.md index 0eb710ec..2029e4ac 100644 --- a/docs/collector.mscluster_resourcegroup.md +++ b/docs/collector.mscluster_resourcegroup.md @@ -34,7 +34,7 @@ Name | Description | Type | Labels ### Example metric Query the state of all cluster group owned by node1 ``` -windows_mscluster_resourcegroup_state{owner_node="node1"} +windows_mscluster_resourcegroup_owner_node{node_name="node1"} ``` ## Useful queries diff --git a/pkg/collector/mscluster_node/mscluster_node.go b/pkg/collector/mscluster_node/mscluster_node.go index 2dc75b12..48083e7c 100644 --- a/pkg/collector/mscluster_node/mscluster_node.go +++ b/pkg/collector/mscluster_node/mscluster_node.go @@ -15,6 +15,9 @@ type Config struct{} var ConfigDefaults = Config{} +// Variable used by mscluster_resource and mscluster_resourcegroup +var NodeName []string + // A collector is a Prometheus collector for WMI MSCluster_Node metrics type collector struct { logger log.Logger @@ -175,6 +178,8 @@ func (c *collector) Collect(_ *types.ScrapeContext, ch chan<- prometheus.Metric) return err } + NodeName = []string{} + for _, v := range dst { ch <- prometheus.MustNewConstMetric( @@ -274,6 +279,8 @@ func (c *collector) Collect(_ *types.ScrapeContext, ch chan<- prometheus.Metric) float64(v.StatusInformation), v.Name, ) + + NodeName = append(NodeName, v.Name) } return nil diff --git a/pkg/collector/mscluster_resource/mscluster_resource.go b/pkg/collector/mscluster_resource/mscluster_resource.go index bd7540d0..650189ee 100644 --- a/pkg/collector/mscluster_resource/mscluster_resource.go +++ b/pkg/collector/mscluster_resource/mscluster_resource.go @@ -1,6 +1,7 @@ package mscluster_resource import ( + "github.com/prometheus-community/windows_exporter/pkg/collector/mscluster_node" "github.com/prometheus-community/windows_exporter/pkg/types" "github.com/prometheus-community/windows_exporter/pkg/wmi" @@ -26,6 +27,7 @@ type collector struct { IsAlivePollInterval *prometheus.Desc LooksAlivePollInterval *prometheus.Desc MonitorProcessId *prometheus.Desc + OwnerNode *prometheus.Desc PendingTimeout *prometheus.Desc ResourceClass *prometheus.Desc RestartAction *prometheus.Desc @@ -63,97 +65,103 @@ func (c *collector) Build() error { c.Characteristics = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "characteristics"), "Provides the characteristics of the object.", - []string{"type", "owner_group", "owner_node", "name"}, + []string{"type", "owner_group", "name"}, nil, ) c.DeadlockTimeout = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "deadlock_timeout"), "Indicates the length of time to wait, in milliseconds, before declaring a deadlock in any call into a resource.", - []string{"type", "owner_group", "owner_node", "name"}, + []string{"type", "owner_group", "name"}, nil, ) c.EmbeddedFailureAction = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "embedded_failure_action"), "The time, in milliseconds, that a resource should remain in a failed state before the Cluster service attempts to restart it.", - []string{"type", "owner_group", "owner_node", "name"}, + []string{"type", "owner_group", "name"}, nil, ) c.Flags = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "flags"), "Provides access to the flags set for the object.", - []string{"type", "owner_group", "owner_node", "name"}, + []string{"type", "owner_group", "name"}, nil, ) c.IsAlivePollInterval = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "is_alive_poll_interval"), "Provides access to the resource's IsAlivePollInterval property, which is the recommended interval in milliseconds at which the Cluster Service should poll the resource to determine whether it is operational. If the property is set to 0xFFFFFFFF, the Cluster Service uses the IsAlivePollInterval property for the resource type associated with the resource.", - []string{"type", "owner_group", "owner_node", "name"}, + []string{"type", "owner_group", "name"}, nil, ) c.LooksAlivePollInterval = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "looks_alive_poll_interval"), "Provides access to the resource's LooksAlivePollInterval property, which is the recommended interval in milliseconds at which the Cluster Service should poll the resource to determine whether it appears operational. If the property is set to 0xFFFFFFFF, the Cluster Service uses the LooksAlivePollInterval property for the resource type associated with the resource.", - []string{"type", "owner_group", "owner_node", "name"}, + []string{"type", "owner_group", "name"}, nil, ) c.MonitorProcessId = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "monitor_process_id"), "Provides the process ID of the resource host service that is currently hosting the resource.", - []string{"type", "owner_group", "owner_node", "name"}, + []string{"type", "owner_group", "name"}, + nil, + ) + c.OwnerNode = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, Name, "owner_node"), + "The node hosting the resource. 0: Not hosted; 1: Hosted", + []string{"type", "owner_group", "node_name", "name"}, nil, ) c.PendingTimeout = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "pending_timeout"), "Provides access to the resource's PendingTimeout property. If a resource cannot be brought online or taken offline in the number of milliseconds specified by the PendingTimeout property, the resource is forcibly terminated.", - []string{"type", "owner_group", "owner_node", "name"}, + []string{"type", "owner_group", "name"}, nil, ) c.ResourceClass = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "resource_class"), "Gets or sets the resource class of a resource. 0: Unknown; 1: Storage; 2: Network; 32768: Unknown ", - []string{"type", "owner_group", "owner_node", "name"}, + []string{"type", "owner_group", "name"}, nil, ) c.RestartAction = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "restart_action"), "Provides access to the resource's RestartAction property, which is the action to be taken by the Cluster Service if the resource fails.", - []string{"type", "owner_group", "owner_node", "name"}, + []string{"type", "owner_group", "name"}, nil, ) c.RestartDelay = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "restart_delay"), "Indicates the time delay before a failed resource is restarted.", - []string{"type", "owner_group", "owner_node", "name"}, + []string{"type", "owner_group", "name"}, nil, ) c.RestartPeriod = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "restart_period"), "Provides access to the resource's RestartPeriod property, which is interval of time, in milliseconds, during which a specified number of restart attempts can be made on a nonresponsive resource.", - []string{"type", "owner_group", "owner_node", "name"}, + []string{"type", "owner_group", "name"}, nil, ) c.RestartThreshold = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "restart_threshold"), "Provides access to the resource's RestartThreshold property which is the maximum number of restart attempts that can be made on a resource within an interval defined by the RestartPeriod property before the Cluster Service initiates the action specified by the RestartAction property.", - []string{"type", "owner_group", "owner_node", "name"}, + []string{"type", "owner_group", "name"}, nil, ) c.RetryPeriodOnFailure = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "retry_period_on_failure"), "Provides access to the resource's RetryPeriodOnFailure property, which is the interval of time (in milliseconds) that a resource should remain in a failed state before the Cluster service attempts to restart it.", - []string{"type", "owner_group", "owner_node", "name"}, + []string{"type", "owner_group", "name"}, nil, ) c.State = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "state"), "The current state of the resource. -1: Unknown; 0: Inherited; 1: Initializing; 2: Online; 3: Offline; 4: Failed; 128: Pending; 129: Online Pending; 130: Offline Pending ", - []string{"type", "owner_group", "owner_node", "name"}, + []string{"type", "owner_group", "name"}, nil, ) c.Subclass = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "subclass"), "Provides the list of references to nodes that can be the owner of this resource.", - []string{"type", "owner_group", "owner_node", "name"}, + []string{"type", "owner_group", "name"}, nil, ) return nil @@ -200,112 +208,127 @@ func (c *collector) Collect(_ *types.ScrapeContext, ch chan<- prometheus.Metric) c.Characteristics, prometheus.GaugeValue, float64(v.Characteristics), - v.Type, v.OwnerGroup, v.OwnerNode, v.Name, + v.Type, v.OwnerGroup, v.Name, ) ch <- prometheus.MustNewConstMetric( c.DeadlockTimeout, prometheus.GaugeValue, float64(v.DeadlockTimeout), - v.Type, v.OwnerGroup, v.OwnerNode, v.Name, + v.Type, v.OwnerGroup, v.Name, ) ch <- prometheus.MustNewConstMetric( c.EmbeddedFailureAction, prometheus.GaugeValue, float64(v.EmbeddedFailureAction), - v.Type, v.OwnerGroup, v.OwnerNode, v.Name, + v.Type, v.OwnerGroup, v.Name, ) ch <- prometheus.MustNewConstMetric( c.Flags, prometheus.GaugeValue, float64(v.Flags), - v.Type, v.OwnerGroup, v.OwnerNode, v.Name, + v.Type, v.OwnerGroup, v.Name, ) ch <- prometheus.MustNewConstMetric( c.IsAlivePollInterval, prometheus.GaugeValue, float64(v.IsAlivePollInterval), - v.Type, v.OwnerGroup, v.OwnerNode, v.Name, + v.Type, v.OwnerGroup, v.Name, ) ch <- prometheus.MustNewConstMetric( c.LooksAlivePollInterval, prometheus.GaugeValue, float64(v.LooksAlivePollInterval), - v.Type, v.OwnerGroup, v.OwnerNode, v.Name, + v.Type, v.OwnerGroup, v.Name, ) ch <- prometheus.MustNewConstMetric( c.MonitorProcessId, prometheus.GaugeValue, float64(v.MonitorProcessId), - v.Type, v.OwnerGroup, v.OwnerNode, v.Name, + v.Type, v.OwnerGroup, v.Name, ) + if mscluster_node.NodeName != nil { + for _, node_name := range mscluster_node.NodeName { + isCurrentState := 0.0 + if v.OwnerNode == node_name { + isCurrentState = 1.0 + } + ch <- prometheus.MustNewConstMetric( + c.OwnerNode, + prometheus.GaugeValue, + isCurrentState, + v.Type, v.OwnerGroup, node_name, v.Name, + ) + } + } + ch <- prometheus.MustNewConstMetric( c.PendingTimeout, prometheus.GaugeValue, float64(v.PendingTimeout), - v.Type, v.OwnerGroup, v.OwnerNode, v.Name, + v.Type, v.OwnerGroup, v.Name, ) ch <- prometheus.MustNewConstMetric( c.ResourceClass, prometheus.GaugeValue, float64(v.ResourceClass), - v.Type, v.OwnerGroup, v.OwnerNode, v.Name, + v.Type, v.OwnerGroup, v.Name, ) ch <- prometheus.MustNewConstMetric( c.RestartAction, prometheus.GaugeValue, float64(v.RestartAction), - v.Type, v.OwnerGroup, v.OwnerNode, v.Name, + v.Type, v.OwnerGroup, v.Name, ) ch <- prometheus.MustNewConstMetric( c.RestartDelay, prometheus.GaugeValue, float64(v.RestartDelay), - v.Type, v.OwnerGroup, v.OwnerNode, v.Name, + v.Type, v.OwnerGroup, v.Name, ) ch <- prometheus.MustNewConstMetric( c.RestartPeriod, prometheus.GaugeValue, float64(v.RestartPeriod), - v.Type, v.OwnerGroup, v.OwnerNode, v.Name, + v.Type, v.OwnerGroup, v.Name, ) ch <- prometheus.MustNewConstMetric( c.RestartThreshold, prometheus.GaugeValue, float64(v.RestartThreshold), - v.Type, v.OwnerGroup, v.OwnerNode, v.Name, + v.Type, v.OwnerGroup, v.Name, ) ch <- prometheus.MustNewConstMetric( c.RetryPeriodOnFailure, prometheus.GaugeValue, float64(v.RetryPeriodOnFailure), - v.Type, v.OwnerGroup, v.OwnerNode, v.Name, + v.Type, v.OwnerGroup, v.Name, ) ch <- prometheus.MustNewConstMetric( c.State, prometheus.GaugeValue, float64(v.State), - v.Type, v.OwnerGroup, v.OwnerNode, v.Name, + v.Type, v.OwnerGroup, v.Name, ) ch <- prometheus.MustNewConstMetric( c.Subclass, prometheus.GaugeValue, float64(v.Subclass), - v.Type, v.OwnerGroup, v.OwnerNode, v.Name, + v.Type, v.OwnerGroup, v.Name, ) } diff --git a/pkg/collector/mscluster_resourcegroup/mscluster_resourcegroup.go b/pkg/collector/mscluster_resourcegroup/mscluster_resourcegroup.go index ae404aca..7169830c 100644 --- a/pkg/collector/mscluster_resourcegroup/mscluster_resourcegroup.go +++ b/pkg/collector/mscluster_resourcegroup/mscluster_resourcegroup.go @@ -1,6 +1,7 @@ package mscluster_resourcegroup import ( + "github.com/prometheus-community/windows_exporter/pkg/collector/mscluster_node" "github.com/prometheus-community/windows_exporter/pkg/types" "github.com/prometheus-community/windows_exporter/pkg/wmi" @@ -31,6 +32,7 @@ type collector struct { Flags *prometheus.Desc GroupType *prometheus.Desc PlacementOptions *prometheus.Desc + OwnerNode *prometheus.Desc Priority *prometheus.Desc ResiliencyPeriod *prometheus.Desc State *prometheus.Desc @@ -62,79 +64,85 @@ func (c *collector) Build() error { c.AutoFailbackType = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "auto_failback_type"), "Provides access to the group's AutoFailbackType property.", - []string{"owner_node", "name"}, + []string{"name"}, nil, ) c.Characteristics = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "characteristics"), "Provides the characteristics of the group.", - []string{"owner_node", "name"}, + []string{"name"}, nil, ) c.ColdStartSetting = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "cold_start_setting"), "Indicates whether a group can start after a cluster cold start.", - []string{"owner_node", "name"}, + []string{"name"}, nil, ) c.DefaultOwner = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "default_owner"), "Number of the last node the resource group was activated on or explicitly moved to.", - []string{"owner_node", "name"}, + []string{"name"}, nil, ) c.FailbackWindowEnd = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "failback_window_end"), "The FailbackWindowEnd property provides the latest time that the group can be moved back to the node identified as its preferred node.", - []string{"owner_node", "name"}, + []string{"name"}, nil, ) c.FailbackWindowStart = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "failback_window_start"), "The FailbackWindowStart property provides the earliest time (that is, local time as kept by the cluster) that the group can be moved back to the node identified as its preferred node.", - []string{"owner_node", "name"}, + []string{"name"}, nil, ) c.FailoverPeriod = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "failover_period"), "The FailoverPeriod property specifies a number of hours during which a maximum number of failover attempts, specified by the FailoverThreshold property, can occur.", - []string{"owner_node", "name"}, + []string{"name"}, nil, ) c.FailoverThreshold = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "failover_threshold"), "The FailoverThreshold property specifies the maximum number of failover attempts.", - []string{"owner_node", "name"}, + []string{"name"}, nil, ) c.Flags = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "flags"), "Provides access to the flags set for the group. ", - []string{"owner_node", "name"}, + []string{"name"}, nil, ) c.GroupType = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "group_type"), "The Type of the resource group.", - []string{"owner_node", "name"}, + []string{"name"}, + nil, + ) + c.OwnerNode = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, Name, "owner_node"), + "The node hosting the resource group. 0: Not hosted; 1: Hosted", + []string{"node_name", "name"}, nil, ) c.Priority = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "priority"), "Priority value of the resource group", - []string{"owner_node", "name"}, + []string{"name"}, nil, ) c.ResiliencyPeriod = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "resiliency_period"), "The resiliency period for this group, in seconds.", - []string{"owner_node", "name"}, + []string{"name"}, nil, ) c.State = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "state"), "The current state of the resource group. -1: Unknown; 0: Online; 1: Offline; 2: Failed; 3: Partial Online; 4: Pending", - []string{"owner_node", "name"}, + []string{"name"}, nil, ) return nil @@ -143,8 +151,7 @@ func (c *collector) Build() error { // MSCluster_ResourceGroup docs: // - https://docs.microsoft.com/en-us/previous-versions/windows/desktop/cluswmi/mscluster-resourcegroup type MSCluster_ResourceGroup struct { - Name string - OwnerNode string + Name string AutoFailbackType uint Characteristics uint @@ -156,6 +163,7 @@ type MSCluster_ResourceGroup struct { FailoverThreshold uint Flags uint GroupType uint + OwnerNode string Priority uint ResiliencyPeriod uint State uint @@ -176,91 +184,106 @@ func (c *collector) Collect(_ *types.ScrapeContext, ch chan<- prometheus.Metric) c.AutoFailbackType, prometheus.GaugeValue, float64(v.AutoFailbackType), - v.OwnerNode, v.Name, + v.Name, ) ch <- prometheus.MustNewConstMetric( c.Characteristics, prometheus.GaugeValue, float64(v.Characteristics), - v.OwnerNode, v.Name, + v.Name, ) ch <- prometheus.MustNewConstMetric( c.ColdStartSetting, prometheus.GaugeValue, float64(v.ColdStartSetting), - v.OwnerNode, v.Name, + v.Name, ) ch <- prometheus.MustNewConstMetric( c.DefaultOwner, prometheus.GaugeValue, float64(v.DefaultOwner), - v.OwnerNode, v.Name, + v.Name, ) ch <- prometheus.MustNewConstMetric( c.FailbackWindowEnd, prometheus.GaugeValue, float64(v.FailbackWindowEnd), - v.OwnerNode, v.Name, + v.Name, ) ch <- prometheus.MustNewConstMetric( c.FailbackWindowStart, prometheus.GaugeValue, float64(v.FailbackWindowStart), - v.OwnerNode, v.Name, + v.Name, ) ch <- prometheus.MustNewConstMetric( c.FailoverPeriod, prometheus.GaugeValue, float64(v.FailoverPeriod), - v.OwnerNode, v.Name, + v.Name, ) ch <- prometheus.MustNewConstMetric( c.FailoverThreshold, prometheus.GaugeValue, float64(v.FailoverThreshold), - v.OwnerNode, v.Name, + v.Name, ) ch <- prometheus.MustNewConstMetric( c.Flags, prometheus.GaugeValue, float64(v.Flags), - v.OwnerNode, v.Name, + v.Name, ) ch <- prometheus.MustNewConstMetric( c.GroupType, prometheus.GaugeValue, float64(v.GroupType), - v.OwnerNode, v.Name, + v.Name, ) + if mscluster_node.NodeName != nil { + for _, node_name := range mscluster_node.NodeName { + isCurrentState := 0.0 + if v.OwnerNode == node_name { + isCurrentState = 1.0 + } + ch <- prometheus.MustNewConstMetric( + c.OwnerNode, + prometheus.GaugeValue, + isCurrentState, + node_name, v.Name, + ) + } + } + ch <- prometheus.MustNewConstMetric( c.Priority, prometheus.GaugeValue, float64(v.Priority), - v.OwnerNode, v.Name, + v.Name, ) ch <- prometheus.MustNewConstMetric( c.ResiliencyPeriod, prometheus.GaugeValue, float64(v.ResiliencyPeriod), - v.OwnerNode, v.Name, + v.Name, ) ch <- prometheus.MustNewConstMetric( c.State, prometheus.GaugeValue, float64(v.State), - v.OwnerNode, v.Name, + v.Name, ) } From 5ef7c1f0e9beab3d12aabfb58e68175b1c125f51 Mon Sep 17 00:00:00 2001 From: Alexandre JARDON <28548335+webalexeu@users.noreply.github.com> Date: Fri, 16 Feb 2024 08:50:17 +0100 Subject: [PATCH 08/15] Adapat documentation based on new metric Signed-off-by: Alexandre JARDON <28548335+webalexeu@users.noreply.github.com> --- docs/collector.mscluster_resource.md | 33 ++++++++++++----------- docs/collector.mscluster_resourcegroup.md | 29 ++++++++++---------- 2 files changed, 32 insertions(+), 30 deletions(-) diff --git a/docs/collector.mscluster_resource.md b/docs/collector.mscluster_resource.md index e3a408da..c7ad995a 100644 --- a/docs/collector.mscluster_resource.md +++ b/docs/collector.mscluster_resource.md @@ -16,22 +16,23 @@ None Name | Description | Type | Labels -----|-------------|------|------- -`Characteristics` | Provides the characteristics of the object. The cluster defines characteristics only for resources. For a description of these characteristics, see [CLUSCTL_RESOURCE_GET_CHARACTERISTICS](https://docs.microsoft.com/en-us/previous-versions/windows/desktop/mscs/clusctl-resource-get-characteristics). | gauge | `type`, `owner_group`, `owner_node`, `name` -`DeadlockTimeout` | Indicates the length of time to wait, in milliseconds, before declaring a deadlock in any call into a resource. | gauge | `type`, `owner_group`, `owner_node`, `name` -`EmbeddedFailureAction` | The time, in milliseconds, that a resource should remain in a failed state before the Cluster service attempts to restart it. | gauge | `type`, `owner_group`, `owner_node`, `name` -`Flags` | Provides access to the flags set for the object. The cluster defines flags only for resources. For a description of these flags, see [CLUSCTL_RESOURCE_GET_FLAGS](https://docs.microsoft.com/en-us/previous-versions/windows/desktop/mscs/clusctl-resource-get-flags). | gauge | `type`, `owner_group`, `owner_node`, `name` -`IsAlivePollInterval` | Provides access to the resource's IsAlivePollInterval property, which is the recommended interval in milliseconds at which the Cluster Service should poll the resource to determine whether it is operational. If the property is set to 0xFFFFFFFF, the Cluster Service uses the IsAlivePollInterval property for the resource type associated with the resource. | gauge | `type`, `owner_group`, `owner_node`, `name` -`LooksAlivePollInterval` | Provides access to the resource's LooksAlivePollInterval property, which is the recommended interval in milliseconds at which the Cluster Service should poll the resource to determine whether it appears operational. If the property is set to 0xFFFFFFFF, the Cluster Service uses the LooksAlivePollInterval property for the resource type associated with the resource. | gauge | `type`, `owner_group`, `owner_node`, `name` -`MonitorProcessId` | Provides the process ID of the resource host service that is currently hosting the resource. | gauge | `type`, `owner_group`, `owner_node`, `name` -`PendingTimeout` | Provides access to the resource's PendingTimeout property. If a resource cannot be brought online or taken offline in the number of milliseconds specified by the PendingTimeout property, the resource is forcibly terminated. | gauge | `type`, `owner_group`, `owner_node`, `name` -`ResourceClass` | Gets or sets the resource class of a resource. 0: Unknown; 1: Storage; 2: Network; 32768: Unknown | gauge | `type`, `owner_group`, `owner_node`, `name` -`RestartAction` | Provides access to the resource's RestartAction property, which is the action to be taken by the Cluster Service if the resource fails. | gauge | `type`, `owner_group`, `owner_node`, `name` -`RestartDelay` | Indicates the time delay before a failed resource is restarted. | gauge | `type`, `owner_group`, `owner_node`, `name` -`RestartPeriod` | Provides access to the resource's RestartPeriod property, which is interval of time, in milliseconds, during which a specified number of restart attempts can be made on a nonresponsive resource. | gauge | `type`, `owner_group`, `owner_node`, `name` -`RestartThreshold` | Provides access to the resource's RestartThreshold property which is the maximum number of restart attempts that can be made on a resource within an interval defined by the RestartPeriod property before the Cluster Service initiates the action specified by the RestartAction property. | gauge | `type`, `owner_group`, `owner_node`, `name` -`RetryPeriodOnFailure` | Provides access to the resource's RetryPeriodOnFailure property, which is the interval of time (in milliseconds) that a resource should remain in a failed state before the Cluster service attempts to restart it. | gauge | `type`, `owner_group`, `owner_node`, `name` -`State` | The current state of the resource. -1: Unknown; 0: Inherited; 1: Initializing; 2: Online; 3: Offline; 4: Failed; 128: Pending; 129: Online Pending; 130: Offline Pending | gauge | `type`, `owner_group`, `owner_node`, `name` -`Subclass` | Provides the list of references to nodes that can be the owner of this resource. | gauge | `type`, `owner_group`, `owner_node`, `name` +`Characteristics` | Provides the characteristics of the object. The cluster defines characteristics only for resources. For a description of these characteristics, see [CLUSCTL_RESOURCE_GET_CHARACTERISTICS](https://docs.microsoft.com/en-us/previous-versions/windows/desktop/mscs/clusctl-resource-get-characteristics). | gauge | `type`, `owner_group`, `name` +`DeadlockTimeout` | Indicates the length of time to wait, in milliseconds, before declaring a deadlock in any call into a resource. | gauge | `type`, `owner_group`, `name` +`EmbeddedFailureAction` | The time, in milliseconds, that a resource should remain in a failed state before the Cluster service attempts to restart it. | gauge | `type`, `owner_group`, `name` +`Flags` | Provides access to the flags set for the object. The cluster defines flags only for resources. For a description of these flags, see [CLUSCTL_RESOURCE_GET_FLAGS](https://docs.microsoft.com/en-us/previous-versions/windows/desktop/mscs/clusctl-resource-get-flags). | gauge | `type`, `owner_group`, `name` +`IsAlivePollInterval` | Provides access to the resource's IsAlivePollInterval property, which is the recommended interval in milliseconds at which the Cluster Service should poll the resource to determine whether it is operational. If the property is set to 0xFFFFFFFF, the Cluster Service uses the IsAlivePollInterval property for the resource type associated with the resource. | gauge | `type`, `owner_group`, `name` +`LooksAlivePollInterval` | Provides access to the resource's LooksAlivePollInterval property, which is the recommended interval in milliseconds at which the Cluster Service should poll the resource to determine whether it appears operational. If the property is set to 0xFFFFFFFF, the Cluster Service uses the LooksAlivePollInterval property for the resource type associated with the resource. | gauge | `type`, `owner_group`, `name` +`MonitorProcessId` | Provides the process ID of the resource host service that is currently hosting the resource. | gauge | `type`, `owner_group`, `name` +`OwnerNode` | The node hosting the resource. | gauge | `type`, `owner_group`, `node_name`, `name` +`PendingTimeout` | Provides access to the resource's PendingTimeout property. If a resource cannot be brought online or taken offline in the number of milliseconds specified by the PendingTimeout property, the resource is forcibly terminated. | gauge | `type`, `owner_group`, `name` +`ResourceClass` | Gets or sets the resource class of a resource. 0: Unknown; 1: Storage; 2: Network; 32768: Unknown | gauge | `type`, `owner_group`, `name` +`RestartAction` | Provides access to the resource's RestartAction property, which is the action to be taken by the Cluster Service if the resource fails. | gauge | `type`, `owner_group`, `name` +`RestartDelay` | Indicates the time delay before a failed resource is restarted. | gauge | `type`, `owner_group`, `name` +`RestartPeriod` | Provides access to the resource's RestartPeriod property, which is interval of time, in milliseconds, during which a specified number of restart attempts can be made on a nonresponsive resource. | gauge | `type`, `owner_group`, `name` +`RestartThreshold` | Provides access to the resource's RestartThreshold property which is the maximum number of restart attempts that can be made on a resource within an interval defined by the RestartPeriod property before the Cluster Service initiates the action specified by the RestartAction property. | gauge | `type`, `owner_group`, `name` +`RetryPeriodOnFailure` | Provides access to the resource's RetryPeriodOnFailure property, which is the interval of time (in milliseconds) that a resource should remain in a failed state before the Cluster service attempts to restart it. | gauge | `type`, `owner_group`, `name` +`State` | The current state of the resource. -1: Unknown; 0: Inherited; 1: Initializing; 2: Online; 3: Offline; 4: Failed; 128: Pending; 129: Online Pending; 130: Offline Pending | gauge | `type`, `owner_group`, `name` +`Subclass` | Provides the list of references to nodes that can be the owner of this resource. | gauge | `type`, `owner_group`, `name` ### Example metric Query the state of all cluster resource owned by node1 diff --git a/docs/collector.mscluster_resourcegroup.md b/docs/collector.mscluster_resourcegroup.md index 2029e4ac..88c07b6e 100644 --- a/docs/collector.mscluster_resourcegroup.md +++ b/docs/collector.mscluster_resourcegroup.md @@ -16,20 +16,21 @@ None Name | Description | Type | Labels -----|-------------|------|------- -`AutoFailbackType` | Provides access to the group's AutoFailbackType property. | gauge | `owner_node`, `name` -`Characteristics` | Provides the characteristics of the group. The cluster defines characteristics only for resources. For a description of these characteristics, see [CLUSCTL_RESOURCE_GET_CHARACTERISTICS](https://docs.microsoft.com/en-us/previous-versions/windows/desktop/mscs/clusctl-resource-get-characteristics). | gauge | `owner_node`, `name` -`ColdStartSetting` | Indicates whether a group can start after a cluster cold start. | gauge | `owner_node`, `name` -`DefaultOwner` | Number of the last node the resource group was activated on or explicitly moved to. | gauge | `owner_node`, `name` -`FailbackWindowEnd` | The FailbackWindowEnd property provides the latest time that the group can be moved back to the node identified as its preferred node. | gauge | `owner_node`, `name` -`FailbackWindowStart` | The FailbackWindowStart property provides the earliest time (that is, local time as kept by the cluster) that the group can be moved back to the node identified as its preferred node. | gauge | `owner_node`, `name` -`FailoverPeriod` | The FailoverPeriod property specifies a number of hours during which a maximum number of failover attempts, specified by the FailoverThreshold property, can occur. | gauge | `owner_node`, `name` -`FailoverThreshold` | The FailoverThreshold property specifies the maximum number of failover attempts. | gauge | `owner_node`, `name` -`Flags` | Provides access to the flags set for the group. The cluster defines flags only for resources. For a description of these flags, see [CLUSCTL_RESOURCE_GET_FLAGS](https://docs.microsoft.com/en-us/previous-versions/windows/desktop/mscs/clusctl-resource-get-flags). | gauge | `owner_node`, `name` -`GroupType` | The Type of the resource group. | gauge | `owner_node`, `name` -`Priority` | Priority value of the resource group | gauge | `owner_node`, `name` -`ResiliencyPeriod` | The resiliency period for this group, in seconds. | gauge | `owner_node`, `name` -`State` | The current state of the resource group. -1: Unknown; 0: Online; 1: Offline; 2: Failed; 3: Partial Online; 4: Pending | gauge | `owner_node`, `name` -`UpdateDomain` | | gauge | `owner_node`, `name` +`AutoFailbackType` | Provides access to the group's AutoFailbackType property. | gauge | `name` +`Characteristics` | Provides the characteristics of the group. The cluster defines characteristics only for resources. For a description of these characteristics, see [CLUSCTL_RESOURCE_GET_CHARACTERISTICS](https://docs.microsoft.com/en-us/previous-versions/windows/desktop/mscs/clusctl-resource-get-characteristics). | gauge | `name` +`ColdStartSetting` | Indicates whether a group can start after a cluster cold start. | gauge | `name` +`DefaultOwner` | Number of the last node the resource group was activated on or explicitly moved to. | gauge | `name` +`FailbackWindowEnd` | The FailbackWindowEnd property provides the latest time that the group can be moved back to the node identified as its preferred node. | gauge | `name` +`FailbackWindowStart` | The FailbackWindowStart property provides the earliest time (that is, local time as kept by the cluster) that the group can be moved back to the node identified as its preferred node. | gauge | `name` +`FailoverPeriod` | The FailoverPeriod property specifies a number of hours during which a maximum number of failover attempts, specified by the FailoverThreshold property, can occur. | gauge | `name` +`FailoverThreshold` | The FailoverThreshold property specifies the maximum number of failover attempts. | gauge | `name` +`Flags` | Provides access to the flags set for the group. The cluster defines flags only for resources. For a description of these flags, see [CLUSCTL_RESOURCE_GET_FLAGS](https://docs.microsoft.com/en-us/previous-versions/windows/desktop/mscs/clusctl-resource-get-flags). | gauge | `name` +`GroupType` | The Type of the resource group. | gauge | `name` +`OwnerNode` | The node hosting the resource group. | gauge | `node_name`, `name` +`Priority` | Priority value of the resource group | gauge | `name` +`ResiliencyPeriod` | The resiliency period for this group, in seconds. | gauge | `name` +`State` | The current state of the resource group. -1: Unknown; 0: Online; 1: Offline; 2: Failed; 3: Partial Online; 4: Pending | gauge | `name` +`UpdateDomain` | | gauge | `name` ### Example metric Query the state of all cluster group owned by node1 From 941019ebc959a78c2ce695d0a66fe3fa583d8404 Mon Sep 17 00:00:00 2001 From: dinifarb Date: Mon, 29 Jan 2024 07:55:37 +0100 Subject: [PATCH 09/15] fix(#1390): global kingpin instead of app var Signed-off-by: dinifarb Signed-off-by: Alexandre JARDON <28548335+webalexeu@users.noreply.github.com> --- pkg/collector/process/process.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/collector/process/process.go b/pkg/collector/process/process.go index 70cad74f..9ef04762 100644 --- a/pkg/collector/process/process.go +++ b/pkg/collector/process/process.go @@ -90,7 +90,7 @@ func NewWithFlags(app *kingpin.Application) types.Collector { "Regexp of processes to exclude. Process name must both match include and not match exclude to be included.", ).Default(ConfigDefaults.ProcessExclude).String(), - enableWorkerProcess: kingpin.Flag( + enableWorkerProcess: app.Flag( "collector.process.iis", "Enable IIS worker process name queries. May cause the collector to leak memory.", ).Default("false").Bool(), From 9541560e43d98c7c310d44fdfa422f00c4c1fb9b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 29 Jan 2024 11:04:27 +0000 Subject: [PATCH 10/15] chore(deps): bump github.com/yusufpapurcu/wmi from 1.2.3 to 1.2.4 Bumps [github.com/yusufpapurcu/wmi](https://github.com/yusufpapurcu/wmi) from 1.2.3 to 1.2.4. - [Release notes](https://github.com/yusufpapurcu/wmi/releases) - [Commits](https://github.com/yusufpapurcu/wmi/compare/v1.2.3...v1.2.4) --- updated-dependencies: - dependency-name: github.com/yusufpapurcu/wmi dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Signed-off-by: Alexandre JARDON <28548335+webalexeu@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index a5c8c3db..fdde716c 100644 --- a/go.mod +++ b/go.mod @@ -14,7 +14,7 @@ require ( github.com/prometheus/exporter-toolkit v0.11.0 github.com/sirupsen/logrus v1.9.3 // indirect github.com/stretchr/testify v1.8.4 - github.com/yusufpapurcu/wmi v1.2.3 + github.com/yusufpapurcu/wmi v1.2.4 go.opencensus.io v0.24.0 // indirect golang.org/x/exp v0.0.0-20231110203233-9a3e6036ecaa golang.org/x/sys v0.16.0 diff --git a/go.sum b/go.sum index d59866b3..4f6ac216 100644 --- a/go.sum +++ b/go.sum @@ -111,8 +111,8 @@ github.com/xhit/go-str2duration/v2 v2.1.0 h1:lxklc02Drh6ynqX+DdPyp5pCKLUQpRT8bp8 github.com/xhit/go-str2duration/v2 v2.1.0/go.mod h1:ohY8p+0f07DiV6Em5LKB0s2YpLtXVyJfNt1+BlmyAsU= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= -github.com/yusufpapurcu/wmi v1.2.3 h1:E1ctvB7uKFMOJw3fdOW32DwGE9I7t++CRUEMKvFoFiw= -github.com/yusufpapurcu/wmi v1.2.3/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0= +github.com/yusufpapurcu/wmi v1.2.4 h1:zFUKzehAFReQwLys1b/iSMl+JQGSCSjtVqQn9bBrPo0= +github.com/yusufpapurcu/wmi v1.2.4/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0= go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0= go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= From 7635e6e094d25ae0dc7ecc0325bb1a9467038868 Mon Sep 17 00:00:00 2001 From: Ben Reedy Date: Sat, 27 Jan 2024 08:33:17 +1000 Subject: [PATCH 11/15] docs!: Remove support for EOL Windows versions Microsoft currently support Windows Server 2016 or newer, and Windows 10 and Windows 11 (21HR or later). Dropping support for end-of-life Windows Server versions will reduce maintenance overhead for project maintainers. Signed-off-by: Ben Reedy Signed-off-by: Alexandre JARDON <28548335+webalexeu@users.noreply.github.com> --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index a881cb73..5e1a41d6 100644 --- a/README.md +++ b/README.md @@ -140,7 +140,7 @@ See detailed steps to install on Windows Kubernetes [here](./kubernetes/kubernet ## Supported versions -windows_exporter supports Windows Server versions 2008R2 and later, and desktop Windows version 7 and later. +`windows_exporter` supports Windows Server versions 2016 and later, and desktop Windows version 10 and 11 (21H2 or later). ## Usage From 143ab2247a9a58805484a13a2fbbad04c7dae2fc Mon Sep 17 00:00:00 2001 From: Ben Reedy Date: Fri, 2 Feb 2024 05:40:16 +1000 Subject: [PATCH 12/15] Update README.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Jan-Otto Kröpke Signed-off-by: Ben Reedy Signed-off-by: Alexandre JARDON <28548335+webalexeu@users.noreply.github.com> --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 5e1a41d6..54bddd77 100644 --- a/README.md +++ b/README.md @@ -142,6 +142,8 @@ See detailed steps to install on Windows Kubernetes [here](./kubernetes/kubernet `windows_exporter` supports Windows Server versions 2016 and later, and desktop Windows version 10 and 11 (21H2 or later). +Windows Server 2012 and 2012R2 are supported as best-effort only, but not guaranteed to work. + ## Usage go get -u github.com/prometheus/promu From 67fca296bf18c4778b5b53d4f82f0b16192f6d2c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 12 Feb 2024 11:15:08 +0000 Subject: [PATCH 13/15] chore(deps): bump golang.org/x/sys from 0.16.0 to 0.17.0 Bumps [golang.org/x/sys](https://github.com/golang/sys) from 0.16.0 to 0.17.0. - [Commits](https://github.com/golang/sys/compare/v0.16.0...v0.17.0) --- updated-dependencies: - dependency-name: golang.org/x/sys dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Signed-off-by: Alexandre JARDON <28548335+webalexeu@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index fdde716c..ce00c9ff 100644 --- a/go.mod +++ b/go.mod @@ -17,7 +17,7 @@ require ( github.com/yusufpapurcu/wmi v1.2.4 go.opencensus.io v0.24.0 // indirect golang.org/x/exp v0.0.0-20231110203233-9a3e6036ecaa - golang.org/x/sys v0.16.0 + golang.org/x/sys v0.17.0 gopkg.in/yaml.v3 v3.0.1 ) diff --git a/go.sum b/go.sum index 4f6ac216..1e96a771 100644 --- a/go.sum +++ b/go.sum @@ -159,8 +159,8 @@ golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.16.0 h1:xWw16ngr6ZMtmxDyKyIgsE93KNKz5HKmMa3b8ALHidU= -golang.org/x/sys v0.16.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.17.0 h1:25cE3gD+tdBA7lp7QfhuV+rJiE9YXTcS3VG1SqssI/Y= +golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= From 233470cdf099c4010a4b75c95106f6a6deff1bc7 Mon Sep 17 00:00:00 2001 From: Alexandre JARDON <28548335+webalexeu@users.noreply.github.com> Date: Thu, 25 Jan 2024 15:59:52 +0100 Subject: [PATCH 14/15] Add owner_node on resource and resourcegroup for mscluster Signed-off-by: Alexandre JARDON <28548335+webalexeu@users.noreply.github.com> --- .../mscluster_resource/mscluster_resource.go | 64 +++++++++---------- .../mscluster_resourcegroup.go | 55 ++++++++-------- 2 files changed, 60 insertions(+), 59 deletions(-) diff --git a/pkg/collector/mscluster_resource/mscluster_resource.go b/pkg/collector/mscluster_resource/mscluster_resource.go index 650189ee..21cc79e6 100644 --- a/pkg/collector/mscluster_resource/mscluster_resource.go +++ b/pkg/collector/mscluster_resource/mscluster_resource.go @@ -65,43 +65,43 @@ func (c *collector) Build() error { c.Characteristics = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "characteristics"), "Provides the characteristics of the object.", - []string{"type", "owner_group", "name"}, + []string{"type", "owner_group", "owner_node", "name"}, nil, ) c.DeadlockTimeout = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "deadlock_timeout"), "Indicates the length of time to wait, in milliseconds, before declaring a deadlock in any call into a resource.", - []string{"type", "owner_group", "name"}, + []string{"type", "owner_group", "owner_node", "name"}, nil, ) c.EmbeddedFailureAction = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "embedded_failure_action"), "The time, in milliseconds, that a resource should remain in a failed state before the Cluster service attempts to restart it.", - []string{"type", "owner_group", "name"}, + []string{"type", "owner_group", "owner_node", "name"}, nil, ) c.Flags = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "flags"), "Provides access to the flags set for the object.", - []string{"type", "owner_group", "name"}, + []string{"type", "owner_group", "owner_node", "name"}, nil, ) c.IsAlivePollInterval = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "is_alive_poll_interval"), "Provides access to the resource's IsAlivePollInterval property, which is the recommended interval in milliseconds at which the Cluster Service should poll the resource to determine whether it is operational. If the property is set to 0xFFFFFFFF, the Cluster Service uses the IsAlivePollInterval property for the resource type associated with the resource.", - []string{"type", "owner_group", "name"}, + []string{"type", "owner_group", "owner_node", "name"}, nil, ) c.LooksAlivePollInterval = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "looks_alive_poll_interval"), "Provides access to the resource's LooksAlivePollInterval property, which is the recommended interval in milliseconds at which the Cluster Service should poll the resource to determine whether it appears operational. If the property is set to 0xFFFFFFFF, the Cluster Service uses the LooksAlivePollInterval property for the resource type associated with the resource.", - []string{"type", "owner_group", "name"}, + []string{"type", "owner_group", "owner_node", "name"}, nil, ) c.MonitorProcessId = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "monitor_process_id"), "Provides the process ID of the resource host service that is currently hosting the resource.", - []string{"type", "owner_group", "name"}, + []string{"type", "owner_group", "owner_node", "name"}, nil, ) c.OwnerNode = prometheus.NewDesc( @@ -113,55 +113,55 @@ func (c *collector) Build() error { c.PendingTimeout = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "pending_timeout"), "Provides access to the resource's PendingTimeout property. If a resource cannot be brought online or taken offline in the number of milliseconds specified by the PendingTimeout property, the resource is forcibly terminated.", - []string{"type", "owner_group", "name"}, + []string{"type", "owner_group", "owner_node", "name"}, nil, ) c.ResourceClass = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "resource_class"), "Gets or sets the resource class of a resource. 0: Unknown; 1: Storage; 2: Network; 32768: Unknown ", - []string{"type", "owner_group", "name"}, + []string{"type", "owner_group", "owner_node", "name"}, nil, ) c.RestartAction = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "restart_action"), "Provides access to the resource's RestartAction property, which is the action to be taken by the Cluster Service if the resource fails.", - []string{"type", "owner_group", "name"}, + []string{"type", "owner_group", "owner_node", "name"}, nil, ) c.RestartDelay = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "restart_delay"), "Indicates the time delay before a failed resource is restarted.", - []string{"type", "owner_group", "name"}, + []string{"type", "owner_group", "owner_node", "name"}, nil, ) c.RestartPeriod = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "restart_period"), "Provides access to the resource's RestartPeriod property, which is interval of time, in milliseconds, during which a specified number of restart attempts can be made on a nonresponsive resource.", - []string{"type", "owner_group", "name"}, + []string{"type", "owner_group", "owner_node", "name"}, nil, ) c.RestartThreshold = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "restart_threshold"), "Provides access to the resource's RestartThreshold property which is the maximum number of restart attempts that can be made on a resource within an interval defined by the RestartPeriod property before the Cluster Service initiates the action specified by the RestartAction property.", - []string{"type", "owner_group", "name"}, + []string{"type", "owner_group", "owner_node", "name"}, nil, ) c.RetryPeriodOnFailure = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "retry_period_on_failure"), "Provides access to the resource's RetryPeriodOnFailure property, which is the interval of time (in milliseconds) that a resource should remain in a failed state before the Cluster service attempts to restart it.", - []string{"type", "owner_group", "name"}, + []string{"type", "owner_group", "owner_node", "name"}, nil, ) c.State = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "state"), "The current state of the resource. -1: Unknown; 0: Inherited; 1: Initializing; 2: Online; 3: Offline; 4: Failed; 128: Pending; 129: Online Pending; 130: Offline Pending ", - []string{"type", "owner_group", "name"}, + []string{"type", "owner_group", "owner_node", "name"}, nil, ) c.Subclass = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "subclass"), "Provides the list of references to nodes that can be the owner of this resource.", - []string{"type", "owner_group", "name"}, + []string{"type", "owner_group", "owner_node", "name"}, nil, ) return nil @@ -208,49 +208,49 @@ func (c *collector) Collect(_ *types.ScrapeContext, ch chan<- prometheus.Metric) c.Characteristics, prometheus.GaugeValue, float64(v.Characteristics), - v.Type, v.OwnerGroup, v.Name, + v.Type, v.OwnerGroup, v.OwnerNode, v.Name, ) ch <- prometheus.MustNewConstMetric( c.DeadlockTimeout, prometheus.GaugeValue, float64(v.DeadlockTimeout), - v.Type, v.OwnerGroup, v.Name, + v.Type, v.OwnerGroup, v.OwnerNode, v.Name, ) ch <- prometheus.MustNewConstMetric( c.EmbeddedFailureAction, prometheus.GaugeValue, float64(v.EmbeddedFailureAction), - v.Type, v.OwnerGroup, v.Name, + v.Type, v.OwnerGroup, v.OwnerNode, v.Name, ) ch <- prometheus.MustNewConstMetric( c.Flags, prometheus.GaugeValue, float64(v.Flags), - v.Type, v.OwnerGroup, v.Name, + v.Type, v.OwnerGroup, v.OwnerNode, v.Name, ) ch <- prometheus.MustNewConstMetric( c.IsAlivePollInterval, prometheus.GaugeValue, float64(v.IsAlivePollInterval), - v.Type, v.OwnerGroup, v.Name, + v.Type, v.OwnerGroup, v.OwnerNode, v.Name, ) ch <- prometheus.MustNewConstMetric( c.LooksAlivePollInterval, prometheus.GaugeValue, float64(v.LooksAlivePollInterval), - v.Type, v.OwnerGroup, v.Name, + v.Type, v.OwnerGroup, v.OwnerNode, v.Name, ) ch <- prometheus.MustNewConstMetric( c.MonitorProcessId, prometheus.GaugeValue, float64(v.MonitorProcessId), - v.Type, v.OwnerGroup, v.Name, + v.Type, v.OwnerGroup, v.OwnerNode, v.Name, ) if mscluster_node.NodeName != nil { @@ -272,63 +272,63 @@ func (c *collector) Collect(_ *types.ScrapeContext, ch chan<- prometheus.Metric) c.PendingTimeout, prometheus.GaugeValue, float64(v.PendingTimeout), - v.Type, v.OwnerGroup, v.Name, + v.Type, v.OwnerGroup, v.OwnerNode, v.Name, ) ch <- prometheus.MustNewConstMetric( c.ResourceClass, prometheus.GaugeValue, float64(v.ResourceClass), - v.Type, v.OwnerGroup, v.Name, + v.Type, v.OwnerGroup, v.OwnerNode, v.Name, ) ch <- prometheus.MustNewConstMetric( c.RestartAction, prometheus.GaugeValue, float64(v.RestartAction), - v.Type, v.OwnerGroup, v.Name, + v.Type, v.OwnerGroup, v.OwnerNode, v.Name, ) ch <- prometheus.MustNewConstMetric( c.RestartDelay, prometheus.GaugeValue, float64(v.RestartDelay), - v.Type, v.OwnerGroup, v.Name, + v.Type, v.OwnerGroup, v.OwnerNode, v.Name, ) ch <- prometheus.MustNewConstMetric( c.RestartPeriod, prometheus.GaugeValue, float64(v.RestartPeriod), - v.Type, v.OwnerGroup, v.Name, + v.Type, v.OwnerGroup, v.OwnerNode, v.Name, ) ch <- prometheus.MustNewConstMetric( c.RestartThreshold, prometheus.GaugeValue, float64(v.RestartThreshold), - v.Type, v.OwnerGroup, v.Name, + v.Type, v.OwnerGroup, v.OwnerNode, v.Name, ) ch <- prometheus.MustNewConstMetric( c.RetryPeriodOnFailure, prometheus.GaugeValue, float64(v.RetryPeriodOnFailure), - v.Type, v.OwnerGroup, v.Name, + v.Type, v.OwnerGroup, v.OwnerNode, v.Name, ) ch <- prometheus.MustNewConstMetric( c.State, prometheus.GaugeValue, float64(v.State), - v.Type, v.OwnerGroup, v.Name, + v.Type, v.OwnerGroup, v.OwnerNode, v.Name, ) ch <- prometheus.MustNewConstMetric( c.Subclass, prometheus.GaugeValue, float64(v.Subclass), - v.Type, v.OwnerGroup, v.Name, + v.Type, v.OwnerGroup, v.OwnerNode, v.Name, ) } diff --git a/pkg/collector/mscluster_resourcegroup/mscluster_resourcegroup.go b/pkg/collector/mscluster_resourcegroup/mscluster_resourcegroup.go index 7169830c..bbb6193c 100644 --- a/pkg/collector/mscluster_resourcegroup/mscluster_resourcegroup.go +++ b/pkg/collector/mscluster_resourcegroup/mscluster_resourcegroup.go @@ -64,61 +64,61 @@ func (c *collector) Build() error { c.AutoFailbackType = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "auto_failback_type"), "Provides access to the group's AutoFailbackType property.", - []string{"name"}, + []string{"owner_node", "name"}, nil, ) c.Characteristics = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "characteristics"), "Provides the characteristics of the group.", - []string{"name"}, + []string{"owner_node", "name"}, nil, ) c.ColdStartSetting = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "cold_start_setting"), "Indicates whether a group can start after a cluster cold start.", - []string{"name"}, + []string{"owner_node", "name"}, nil, ) c.DefaultOwner = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "default_owner"), "Number of the last node the resource group was activated on or explicitly moved to.", - []string{"name"}, + []string{"owner_node", "name"}, nil, ) c.FailbackWindowEnd = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "failback_window_end"), "The FailbackWindowEnd property provides the latest time that the group can be moved back to the node identified as its preferred node.", - []string{"name"}, + []string{"owner_node", "name"}, nil, ) c.FailbackWindowStart = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "failback_window_start"), "The FailbackWindowStart property provides the earliest time (that is, local time as kept by the cluster) that the group can be moved back to the node identified as its preferred node.", - []string{"name"}, + []string{"owner_node", "name"}, nil, ) c.FailoverPeriod = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "failover_period"), "The FailoverPeriod property specifies a number of hours during which a maximum number of failover attempts, specified by the FailoverThreshold property, can occur.", - []string{"name"}, + []string{"owner_node", "name"}, nil, ) c.FailoverThreshold = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "failover_threshold"), "The FailoverThreshold property specifies the maximum number of failover attempts.", - []string{"name"}, + []string{"owner_node", "name"}, nil, ) c.Flags = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "flags"), "Provides access to the flags set for the group. ", - []string{"name"}, + []string{"owner_node", "name"}, nil, ) c.GroupType = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "group_type"), "The Type of the resource group.", - []string{"name"}, + []string{"owner_node", "name"}, nil, ) c.OwnerNode = prometheus.NewDesc( @@ -130,19 +130,19 @@ func (c *collector) Build() error { c.Priority = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "priority"), "Priority value of the resource group", - []string{"name"}, + []string{"owner_node", "name"}, nil, ) c.ResiliencyPeriod = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "resiliency_period"), "The resiliency period for this group, in seconds.", - []string{"name"}, + []string{"owner_node", "name"}, nil, ) c.State = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "state"), "The current state of the resource group. -1: Unknown; 0: Online; 1: Offline; 2: Failed; 3: Partial Online; 4: Pending", - []string{"name"}, + []string{"owner_node", "name"}, nil, ) return nil @@ -151,7 +151,8 @@ func (c *collector) Build() error { // MSCluster_ResourceGroup docs: // - https://docs.microsoft.com/en-us/previous-versions/windows/desktop/cluswmi/mscluster-resourcegroup type MSCluster_ResourceGroup struct { - Name string + Name string + OwnerNode string AutoFailbackType uint Characteristics uint @@ -184,70 +185,70 @@ func (c *collector) Collect(_ *types.ScrapeContext, ch chan<- prometheus.Metric) c.AutoFailbackType, prometheus.GaugeValue, float64(v.AutoFailbackType), - v.Name, + v.OwnerNode, v.Name, ) ch <- prometheus.MustNewConstMetric( c.Characteristics, prometheus.GaugeValue, float64(v.Characteristics), - v.Name, + v.OwnerNode, v.Name, ) ch <- prometheus.MustNewConstMetric( c.ColdStartSetting, prometheus.GaugeValue, float64(v.ColdStartSetting), - v.Name, + v.OwnerNode, v.Name, ) ch <- prometheus.MustNewConstMetric( c.DefaultOwner, prometheus.GaugeValue, float64(v.DefaultOwner), - v.Name, + v.OwnerNode, v.Name, ) ch <- prometheus.MustNewConstMetric( c.FailbackWindowEnd, prometheus.GaugeValue, float64(v.FailbackWindowEnd), - v.Name, + v.OwnerNode, v.Name, ) ch <- prometheus.MustNewConstMetric( c.FailbackWindowStart, prometheus.GaugeValue, float64(v.FailbackWindowStart), - v.Name, + v.OwnerNode, v.Name, ) ch <- prometheus.MustNewConstMetric( c.FailoverPeriod, prometheus.GaugeValue, float64(v.FailoverPeriod), - v.Name, + v.OwnerNode, v.Name, ) ch <- prometheus.MustNewConstMetric( c.FailoverThreshold, prometheus.GaugeValue, float64(v.FailoverThreshold), - v.Name, + v.OwnerNode, v.Name, ) ch <- prometheus.MustNewConstMetric( c.Flags, prometheus.GaugeValue, float64(v.Flags), - v.Name, + v.OwnerNode, v.Name, ) ch <- prometheus.MustNewConstMetric( c.GroupType, prometheus.GaugeValue, float64(v.GroupType), - v.Name, + v.OwnerNode, v.Name, ) if mscluster_node.NodeName != nil { @@ -269,21 +270,21 @@ func (c *collector) Collect(_ *types.ScrapeContext, ch chan<- prometheus.Metric) c.Priority, prometheus.GaugeValue, float64(v.Priority), - v.Name, + v.OwnerNode, v.Name, ) ch <- prometheus.MustNewConstMetric( c.ResiliencyPeriod, prometheus.GaugeValue, float64(v.ResiliencyPeriod), - v.Name, + v.OwnerNode, v.Name, ) ch <- prometheus.MustNewConstMetric( c.State, prometheus.GaugeValue, float64(v.State), - v.Name, + v.OwnerNode, v.Name, ) } From 8d1d55235107dac8d1a0734365a0f33ecc057f4a Mon Sep 17 00:00:00 2001 From: Alexandre JARDON <28548335+webalexeu@users.noreply.github.com> Date: Fri, 16 Feb 2024 08:42:25 +0100 Subject: [PATCH 15/15] Create dedicated metric for owner_node Signed-off-by: Alexandre JARDON <28548335+webalexeu@users.noreply.github.com> --- .../mscluster_resource/mscluster_resource.go | 70 ++++++++++--------- .../mscluster_resourcegroup.go | 61 ++++++++-------- 2 files changed, 71 insertions(+), 60 deletions(-) diff --git a/pkg/collector/mscluster_resource/mscluster_resource.go b/pkg/collector/mscluster_resource/mscluster_resource.go index 21cc79e6..01e4df33 100644 --- a/pkg/collector/mscluster_resource/mscluster_resource.go +++ b/pkg/collector/mscluster_resource/mscluster_resource.go @@ -65,43 +65,49 @@ func (c *collector) Build() error { c.Characteristics = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "characteristics"), "Provides the characteristics of the object.", - []string{"type", "owner_group", "owner_node", "name"}, + []string{"type", "owner_group", "name"}, nil, ) c.DeadlockTimeout = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "deadlock_timeout"), "Indicates the length of time to wait, in milliseconds, before declaring a deadlock in any call into a resource.", - []string{"type", "owner_group", "owner_node", "name"}, + []string{"type", "owner_group", "name"}, nil, ) c.EmbeddedFailureAction = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "embedded_failure_action"), "The time, in milliseconds, that a resource should remain in a failed state before the Cluster service attempts to restart it.", - []string{"type", "owner_group", "owner_node", "name"}, + []string{"type", "owner_group", "name"}, nil, ) c.Flags = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "flags"), "Provides access to the flags set for the object.", - []string{"type", "owner_group", "owner_node", "name"}, + []string{"type", "owner_group", "name"}, nil, ) c.IsAlivePollInterval = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "is_alive_poll_interval"), "Provides access to the resource's IsAlivePollInterval property, which is the recommended interval in milliseconds at which the Cluster Service should poll the resource to determine whether it is operational. If the property is set to 0xFFFFFFFF, the Cluster Service uses the IsAlivePollInterval property for the resource type associated with the resource.", - []string{"type", "owner_group", "owner_node", "name"}, + []string{"type", "owner_group", "name"}, nil, ) c.LooksAlivePollInterval = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "looks_alive_poll_interval"), "Provides access to the resource's LooksAlivePollInterval property, which is the recommended interval in milliseconds at which the Cluster Service should poll the resource to determine whether it appears operational. If the property is set to 0xFFFFFFFF, the Cluster Service uses the LooksAlivePollInterval property for the resource type associated with the resource.", - []string{"type", "owner_group", "owner_node", "name"}, + []string{"type", "owner_group", "name"}, nil, ) c.MonitorProcessId = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "monitor_process_id"), "Provides the process ID of the resource host service that is currently hosting the resource.", - []string{"type", "owner_group", "owner_node", "name"}, + []string{"type", "owner_group", "name"}, + nil, + ) + c.OwnerNode = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, Name, "owner_node"), + "The node hosting the resource. 0: Not hosted; 1: Hosted", + []string{"type", "owner_group", "node_name", "name"}, nil, ) c.OwnerNode = prometheus.NewDesc( @@ -113,55 +119,55 @@ func (c *collector) Build() error { c.PendingTimeout = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "pending_timeout"), "Provides access to the resource's PendingTimeout property. If a resource cannot be brought online or taken offline in the number of milliseconds specified by the PendingTimeout property, the resource is forcibly terminated.", - []string{"type", "owner_group", "owner_node", "name"}, + []string{"type", "owner_group", "name"}, nil, ) c.ResourceClass = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "resource_class"), "Gets or sets the resource class of a resource. 0: Unknown; 1: Storage; 2: Network; 32768: Unknown ", - []string{"type", "owner_group", "owner_node", "name"}, + []string{"type", "owner_group", "name"}, nil, ) c.RestartAction = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "restart_action"), "Provides access to the resource's RestartAction property, which is the action to be taken by the Cluster Service if the resource fails.", - []string{"type", "owner_group", "owner_node", "name"}, + []string{"type", "owner_group", "name"}, nil, ) c.RestartDelay = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "restart_delay"), "Indicates the time delay before a failed resource is restarted.", - []string{"type", "owner_group", "owner_node", "name"}, + []string{"type", "owner_group", "name"}, nil, ) c.RestartPeriod = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "restart_period"), "Provides access to the resource's RestartPeriod property, which is interval of time, in milliseconds, during which a specified number of restart attempts can be made on a nonresponsive resource.", - []string{"type", "owner_group", "owner_node", "name"}, + []string{"type", "owner_group", "name"}, nil, ) c.RestartThreshold = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "restart_threshold"), "Provides access to the resource's RestartThreshold property which is the maximum number of restart attempts that can be made on a resource within an interval defined by the RestartPeriod property before the Cluster Service initiates the action specified by the RestartAction property.", - []string{"type", "owner_group", "owner_node", "name"}, + []string{"type", "owner_group", "name"}, nil, ) c.RetryPeriodOnFailure = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "retry_period_on_failure"), "Provides access to the resource's RetryPeriodOnFailure property, which is the interval of time (in milliseconds) that a resource should remain in a failed state before the Cluster service attempts to restart it.", - []string{"type", "owner_group", "owner_node", "name"}, + []string{"type", "owner_group", "name"}, nil, ) c.State = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "state"), "The current state of the resource. -1: Unknown; 0: Inherited; 1: Initializing; 2: Online; 3: Offline; 4: Failed; 128: Pending; 129: Online Pending; 130: Offline Pending ", - []string{"type", "owner_group", "owner_node", "name"}, + []string{"type", "owner_group", "name"}, nil, ) c.Subclass = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "subclass"), "Provides the list of references to nodes that can be the owner of this resource.", - []string{"type", "owner_group", "owner_node", "name"}, + []string{"type", "owner_group", "name"}, nil, ) return nil @@ -208,49 +214,49 @@ func (c *collector) Collect(_ *types.ScrapeContext, ch chan<- prometheus.Metric) c.Characteristics, prometheus.GaugeValue, float64(v.Characteristics), - v.Type, v.OwnerGroup, v.OwnerNode, v.Name, + v.Type, v.OwnerGroup, v.Name, ) ch <- prometheus.MustNewConstMetric( c.DeadlockTimeout, prometheus.GaugeValue, float64(v.DeadlockTimeout), - v.Type, v.OwnerGroup, v.OwnerNode, v.Name, + v.Type, v.OwnerGroup, v.Name, ) ch <- prometheus.MustNewConstMetric( c.EmbeddedFailureAction, prometheus.GaugeValue, float64(v.EmbeddedFailureAction), - v.Type, v.OwnerGroup, v.OwnerNode, v.Name, + v.Type, v.OwnerGroup, v.Name, ) ch <- prometheus.MustNewConstMetric( c.Flags, prometheus.GaugeValue, float64(v.Flags), - v.Type, v.OwnerGroup, v.OwnerNode, v.Name, + v.Type, v.OwnerGroup, v.Name, ) ch <- prometheus.MustNewConstMetric( c.IsAlivePollInterval, prometheus.GaugeValue, float64(v.IsAlivePollInterval), - v.Type, v.OwnerGroup, v.OwnerNode, v.Name, + v.Type, v.OwnerGroup, v.Name, ) ch <- prometheus.MustNewConstMetric( c.LooksAlivePollInterval, prometheus.GaugeValue, float64(v.LooksAlivePollInterval), - v.Type, v.OwnerGroup, v.OwnerNode, v.Name, + v.Type, v.OwnerGroup, v.Name, ) ch <- prometheus.MustNewConstMetric( c.MonitorProcessId, prometheus.GaugeValue, float64(v.MonitorProcessId), - v.Type, v.OwnerGroup, v.OwnerNode, v.Name, + v.Type, v.OwnerGroup, v.Name, ) if mscluster_node.NodeName != nil { @@ -272,63 +278,63 @@ func (c *collector) Collect(_ *types.ScrapeContext, ch chan<- prometheus.Metric) c.PendingTimeout, prometheus.GaugeValue, float64(v.PendingTimeout), - v.Type, v.OwnerGroup, v.OwnerNode, v.Name, + v.Type, v.OwnerGroup, v.Name, ) ch <- prometheus.MustNewConstMetric( c.ResourceClass, prometheus.GaugeValue, float64(v.ResourceClass), - v.Type, v.OwnerGroup, v.OwnerNode, v.Name, + v.Type, v.OwnerGroup, v.Name, ) ch <- prometheus.MustNewConstMetric( c.RestartAction, prometheus.GaugeValue, float64(v.RestartAction), - v.Type, v.OwnerGroup, v.OwnerNode, v.Name, + v.Type, v.OwnerGroup, v.Name, ) ch <- prometheus.MustNewConstMetric( c.RestartDelay, prometheus.GaugeValue, float64(v.RestartDelay), - v.Type, v.OwnerGroup, v.OwnerNode, v.Name, + v.Type, v.OwnerGroup, v.Name, ) ch <- prometheus.MustNewConstMetric( c.RestartPeriod, prometheus.GaugeValue, float64(v.RestartPeriod), - v.Type, v.OwnerGroup, v.OwnerNode, v.Name, + v.Type, v.OwnerGroup, v.Name, ) ch <- prometheus.MustNewConstMetric( c.RestartThreshold, prometheus.GaugeValue, float64(v.RestartThreshold), - v.Type, v.OwnerGroup, v.OwnerNode, v.Name, + v.Type, v.OwnerGroup, v.Name, ) ch <- prometheus.MustNewConstMetric( c.RetryPeriodOnFailure, prometheus.GaugeValue, float64(v.RetryPeriodOnFailure), - v.Type, v.OwnerGroup, v.OwnerNode, v.Name, + v.Type, v.OwnerGroup, v.Name, ) ch <- prometheus.MustNewConstMetric( c.State, prometheus.GaugeValue, float64(v.State), - v.Type, v.OwnerGroup, v.OwnerNode, v.Name, + v.Type, v.OwnerGroup, v.Name, ) ch <- prometheus.MustNewConstMetric( c.Subclass, prometheus.GaugeValue, float64(v.Subclass), - v.Type, v.OwnerGroup, v.OwnerNode, v.Name, + v.Type, v.OwnerGroup, v.Name, ) } diff --git a/pkg/collector/mscluster_resourcegroup/mscluster_resourcegroup.go b/pkg/collector/mscluster_resourcegroup/mscluster_resourcegroup.go index bbb6193c..10264ca5 100644 --- a/pkg/collector/mscluster_resourcegroup/mscluster_resourcegroup.go +++ b/pkg/collector/mscluster_resourcegroup/mscluster_resourcegroup.go @@ -64,61 +64,67 @@ func (c *collector) Build() error { c.AutoFailbackType = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "auto_failback_type"), "Provides access to the group's AutoFailbackType property.", - []string{"owner_node", "name"}, + []string{"name"}, nil, ) c.Characteristics = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "characteristics"), "Provides the characteristics of the group.", - []string{"owner_node", "name"}, + []string{"name"}, nil, ) c.ColdStartSetting = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "cold_start_setting"), "Indicates whether a group can start after a cluster cold start.", - []string{"owner_node", "name"}, + []string{"name"}, nil, ) c.DefaultOwner = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "default_owner"), "Number of the last node the resource group was activated on or explicitly moved to.", - []string{"owner_node", "name"}, + []string{"name"}, nil, ) c.FailbackWindowEnd = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "failback_window_end"), "The FailbackWindowEnd property provides the latest time that the group can be moved back to the node identified as its preferred node.", - []string{"owner_node", "name"}, + []string{"name"}, nil, ) c.FailbackWindowStart = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "failback_window_start"), "The FailbackWindowStart property provides the earliest time (that is, local time as kept by the cluster) that the group can be moved back to the node identified as its preferred node.", - []string{"owner_node", "name"}, + []string{"name"}, nil, ) c.FailoverPeriod = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "failover_period"), "The FailoverPeriod property specifies a number of hours during which a maximum number of failover attempts, specified by the FailoverThreshold property, can occur.", - []string{"owner_node", "name"}, + []string{"name"}, nil, ) c.FailoverThreshold = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "failover_threshold"), "The FailoverThreshold property specifies the maximum number of failover attempts.", - []string{"owner_node", "name"}, + []string{"name"}, nil, ) c.Flags = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "flags"), "Provides access to the flags set for the group. ", - []string{"owner_node", "name"}, + []string{"name"}, nil, ) c.GroupType = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "group_type"), "The Type of the resource group.", - []string{"owner_node", "name"}, + []string{"name"}, + nil, + ) + c.OwnerNode = prometheus.NewDesc( + prometheus.BuildFQName(types.Namespace, Name, "owner_node"), + "The node hosting the resource group. 0: Not hosted; 1: Hosted", + []string{"node_name", "name"}, nil, ) c.OwnerNode = prometheus.NewDesc( @@ -130,19 +136,19 @@ func (c *collector) Build() error { c.Priority = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "priority"), "Priority value of the resource group", - []string{"owner_node", "name"}, + []string{"name"}, nil, ) c.ResiliencyPeriod = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "resiliency_period"), "The resiliency period for this group, in seconds.", - []string{"owner_node", "name"}, + []string{"name"}, nil, ) c.State = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, Name, "state"), "The current state of the resource group. -1: Unknown; 0: Online; 1: Offline; 2: Failed; 3: Partial Online; 4: Pending", - []string{"owner_node", "name"}, + []string{"name"}, nil, ) return nil @@ -151,8 +157,7 @@ func (c *collector) Build() error { // MSCluster_ResourceGroup docs: // - https://docs.microsoft.com/en-us/previous-versions/windows/desktop/cluswmi/mscluster-resourcegroup type MSCluster_ResourceGroup struct { - Name string - OwnerNode string + Name string AutoFailbackType uint Characteristics uint @@ -185,70 +190,70 @@ func (c *collector) Collect(_ *types.ScrapeContext, ch chan<- prometheus.Metric) c.AutoFailbackType, prometheus.GaugeValue, float64(v.AutoFailbackType), - v.OwnerNode, v.Name, + v.Name, ) ch <- prometheus.MustNewConstMetric( c.Characteristics, prometheus.GaugeValue, float64(v.Characteristics), - v.OwnerNode, v.Name, + v.Name, ) ch <- prometheus.MustNewConstMetric( c.ColdStartSetting, prometheus.GaugeValue, float64(v.ColdStartSetting), - v.OwnerNode, v.Name, + v.Name, ) ch <- prometheus.MustNewConstMetric( c.DefaultOwner, prometheus.GaugeValue, float64(v.DefaultOwner), - v.OwnerNode, v.Name, + v.Name, ) ch <- prometheus.MustNewConstMetric( c.FailbackWindowEnd, prometheus.GaugeValue, float64(v.FailbackWindowEnd), - v.OwnerNode, v.Name, + v.Name, ) ch <- prometheus.MustNewConstMetric( c.FailbackWindowStart, prometheus.GaugeValue, float64(v.FailbackWindowStart), - v.OwnerNode, v.Name, + v.Name, ) ch <- prometheus.MustNewConstMetric( c.FailoverPeriod, prometheus.GaugeValue, float64(v.FailoverPeriod), - v.OwnerNode, v.Name, + v.Name, ) ch <- prometheus.MustNewConstMetric( c.FailoverThreshold, prometheus.GaugeValue, float64(v.FailoverThreshold), - v.OwnerNode, v.Name, + v.Name, ) ch <- prometheus.MustNewConstMetric( c.Flags, prometheus.GaugeValue, float64(v.Flags), - v.OwnerNode, v.Name, + v.Name, ) ch <- prometheus.MustNewConstMetric( c.GroupType, prometheus.GaugeValue, float64(v.GroupType), - v.OwnerNode, v.Name, + v.Name, ) if mscluster_node.NodeName != nil { @@ -270,21 +275,21 @@ func (c *collector) Collect(_ *types.ScrapeContext, ch chan<- prometheus.Metric) c.Priority, prometheus.GaugeValue, float64(v.Priority), - v.OwnerNode, v.Name, + v.Name, ) ch <- prometheus.MustNewConstMetric( c.ResiliencyPeriod, prometheus.GaugeValue, float64(v.ResiliencyPeriod), - v.OwnerNode, v.Name, + v.Name, ) ch <- prometheus.MustNewConstMetric( c.State, prometheus.GaugeValue, float64(v.State), - v.OwnerNode, v.Name, + v.Name, ) }