memory: fix panics if metrics does not exists (#1960)

Signed-off-by: Jan-Otto Kröpke <mail@jkroepke.de>
This commit is contained in:
Jan-Otto Kröpke
2025-04-04 23:21:26 +02:00
committed by GitHub
parent 88c929ac6f
commit ecc805f0fa
39 changed files with 565 additions and 364 deletions

View File

@@ -47,7 +47,11 @@ import (
)
func main() {
exitCode := run()
ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt, os.Kill)
exitCode := run(ctx, os.Args[1:])
stop()
// If we are running as a service, we need to signal the service control manager that we are done.
if !IsService {
@@ -60,9 +64,8 @@ func main() {
<-serviceManagerFinishedCh
}
func run() int {
func run(ctx context.Context, args []string) int {
startTime := time.Now()
ctx := context.Background()
app := kingpin.New("windows_exporter", "A metrics collector for Windows.")
@@ -118,9 +121,9 @@ func run() int {
// Initialize collectors before loading and parsing CLI arguments
collectors := collector.NewWithFlags(app)
if err := config.Parse(app, os.Args[1:]); err != nil {
if err := config.Parse(app, args); err != nil {
//nolint:sloglint // we do not have an logger yet
slog.Error("Failed to load configuration",
slog.LogAttrs(ctx, slog.LevelError, "Failed to load configuration",
slog.Any("err", err),
)
@@ -131,8 +134,7 @@ func run() int {
logger, err := log.New(logConfig)
if err != nil {
//nolint:sloglint // we do not have an logger yet
slog.Error("failed to create logger",
logger.LogAttrs(ctx, slog.LevelError, "failed to create logger",
slog.Any("err", err),
)
@@ -145,8 +147,8 @@ func run() int {
logger.InfoContext(ctx, "using configuration file: "+*configFile)
}
if err = setPriorityWindows(logger, os.Getpid(), *processPriority); err != nil {
logger.Error("failed to set process priority",
if err = setPriorityWindows(ctx, logger, os.Getpid(), *processPriority); err != nil {
logger.LogAttrs(ctx, slog.LevelError, "failed to set process priority",
slog.Any("err", err),
)
@@ -155,7 +157,7 @@ func run() int {
enabledCollectorList := expandEnabledCollectors(*enabledCollectors)
if err := collectors.Enable(enabledCollectorList); err != nil {
logger.Error("couldn't enable collectors",
logger.LogAttrs(ctx, slog.LevelError, "couldn't enable collectors",
slog.Any("err", err),
)
@@ -163,9 +165,9 @@ func run() int {
}
// Initialize collectors before loading
if err = collectors.Build(logger); err != nil {
if err = collectors.Build(ctx, logger); err != nil {
for _, err := range utils.SplitError(err) {
logger.Error("couldn't initialize collector",
logger.LogAttrs(ctx, slog.LevelError, "couldn't initialize collector",
slog.Any("err", err),
)
@@ -220,17 +222,14 @@ func run() int {
close(errCh)
}()
ctx, stop := signal.NotifyContext(ctx, os.Interrupt, os.Kill)
defer stop()
select {
case <-ctx.Done():
logger.Info("Shutting down windows_exporter via kill signal")
logger.LogAttrs(ctx, slog.LevelInfo, "Shutting down windows_exporter via kill signal")
case <-stopCh:
logger.Info("Shutting down windows_exporter via service control")
logger.LogAttrs(ctx, slog.LevelInfo, "Shutting down windows_exporter via service control")
case err := <-errCh:
if err != nil {
logger.ErrorContext(ctx, "Failed to start windows_exporter",
logger.LogAttrs(ctx, slog.LevelError, "Failed to start windows_exporter",
slog.Any("err", err),
)
@@ -241,9 +240,9 @@ func run() int {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
_ = server.Shutdown(ctx)
_ = server.Shutdown(ctx) //nolint:contextcheck // create a new context for server shutdown
logger.InfoContext(ctx, "windows_exporter has shut down")
logger.LogAttrs(ctx, slog.LevelInfo, "windows_exporter has shut down") //nolint:contextcheck
return 0
}
@@ -266,7 +265,7 @@ func logCurrentUser(logger *slog.Logger) {
}
// setPriorityWindows sets the priority of the current process to the specified value.
func setPriorityWindows(logger *slog.Logger, pid int, priority string) error {
func setPriorityWindows(ctx context.Context, logger *slog.Logger, pid int, priority string) error {
// Mapping of priority names to uin32 values required by windows.SetPriorityClass.
priorityStringToInt := map[string]uint32{
"realtime": windows.REALTIME_PRIORITY_CLASS,
@@ -284,7 +283,7 @@ func setPriorityWindows(logger *slog.Logger, pid int, priority string) error {
return nil
}
logger.LogAttrs(context.Background(), slog.LevelDebug, "setting process priority to "+priority)
logger.LogAttrs(ctx, slog.LevelDebug, "setting process priority to "+priority)
// https://learn.microsoft.com/en-us/windows/win32/procthread/process-security-and-access-rights
handle, err := windows.OpenProcess(

View File

@@ -0,0 +1,188 @@
// Copyright 2024 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build windows
package main
import (
"context"
"errors"
"fmt"
"io"
"net"
"net/http"
"net/url"
"os"
"strings"
"testing"
"time"
"github.com/stretchr/testify/require"
"golang.org/x/sys/windows"
)
//nolint:tparallel
func TestRun(t *testing.T) {
t.Parallel()
for _, tc := range []struct {
name string
args []string
config string
metricsEndpoint string
exitCode int
}{
{
name: "default",
args: []string{},
metricsEndpoint: "http://127.0.0.1:9182/metrics",
},
{
name: "web.listen-address",
args: []string{"--web.listen-address=127.0.0.1:8080"},
metricsEndpoint: "http://127.0.0.1:8080/metrics",
},
{
name: "web.listen-address",
args: []string{"--web.listen-address=127.0.0.1:8081", "--web.listen-address=[::1]:8081"},
metricsEndpoint: "http://[::1]:8081/metrics",
},
{
name: "config",
args: []string{"--config.file=config.yaml"},
config: `{"web":{"listen-address":"127.0.0.1:8082"}}`,
metricsEndpoint: "http://127.0.0.1:8082/metrics",
},
{
name: "web.listen-address with config",
args: []string{"--config.file=config.yaml", "--web.listen-address=127.0.0.1:8084"},
config: `{"web":{"listen-address":"127.0.0.1:8083"}}`,
metricsEndpoint: "http://127.0.0.1:8084/metrics",
},
} {
t.Run(tc.name, func(t *testing.T) {
ctx, cancel := context.WithCancel(t.Context())
defer cancel()
if tc.config != "" {
// Create a temporary config file.
tmpfile, err := os.CreateTemp(t.TempDir(), "config-*.yaml")
require.NoError(t, err)
t.Cleanup(func() {
require.NoError(t, tmpfile.Close())
})
_, err = tmpfile.WriteString(tc.config)
require.NoError(t, err)
for i, arg := range tc.args {
tc.args[i] = strings.ReplaceAll(arg, "config.yaml", tmpfile.Name())
}
}
exitCodeCh := make(chan int)
var stdout string
go func() {
stdout = captureOutput(t, func() {
// Simulate the service control manager signaling that we are done.
exitCodeCh <- run(ctx, tc.args)
})
}()
t.Cleanup(func() {
select {
case exitCode := <-exitCodeCh:
require.Equal(t, tc.exitCode, exitCode)
case <-time.After(2 * time.Second):
t.Fatalf("timed out waiting for exit code, want %d", tc.exitCode)
}
})
if tc.exitCode != 0 {
return
}
uri, err := url.Parse(tc.metricsEndpoint)
require.NoError(t, err)
err = waitUntilListening(t, "tcp", uri.Host)
require.NoError(t, err, "LOGS:\n%s", stdout)
req, err := http.NewRequestWithContext(ctx, http.MethodGet, tc.metricsEndpoint, nil)
require.NoError(t, err)
resp, err := http.DefaultClient.Do(req)
require.NoError(t, err, "LOGS:\n%s", stdout)
require.Equal(t, http.StatusOK, resp.StatusCode)
body, err := io.ReadAll(resp.Body)
require.NoError(t, err)
err = resp.Body.Close()
require.NoError(t, err)
require.NotEmpty(t, body)
require.Contains(t, string(body), "# HELP windows_exporter_build_info")
cancel()
})
}
}
func captureOutput(tb testing.TB, f func()) string {
tb.Helper()
orig := os.Stdout
r, w, _ := os.Pipe()
os.Stdout = w
f()
os.Stdout = orig
_ = w.Close()
out, _ := io.ReadAll(r)
return string(out)
}
func waitUntilListening(tb testing.TB, network, address string) error {
tb.Helper()
var (
conn net.Conn
err error
)
for range 10 {
conn, err = net.DialTimeout(network, address, 100*time.Millisecond)
if err == nil {
_ = conn.Close()
return nil
}
if errors.Is(err, windows.Errno(10061)) {
time.Sleep(50 * time.Millisecond)
continue
}
}
return fmt.Errorf("listener not listening: %w", err)
}