diff --git a/client/cmd/root.go b/client/cmd/root.go index 9c4ad99de..f81a01e36 100644 --- a/client/cmd/root.go +++ b/client/cmd/root.go @@ -157,6 +157,7 @@ func SetupCloseHandler(ctx context.Context, cancel context.CancelFunc) { case <-done: case <-termCh: } + dumpStacks() log.Info("shutdown signal received") cancel() diff --git a/client/cmd/service_controller.go b/client/cmd/service_controller.go index d416afaac..b712d8e1c 100644 --- a/client/cmd/service_controller.go +++ b/client/cmd/service_controller.go @@ -5,6 +5,9 @@ import ( "fmt" "net" "os" + "os/signal" + "runtime" + "runtime/debug" "strings" "time" @@ -20,6 +23,11 @@ import ( ) func (p *program) Start(svc service.Service) error { + defer func() { + if r := recover(); r != nil { + log.Panicf("Panic occurred: %v, stack trace: %s", r, string(debug.Stack())) + } + }() // Start should not block. Do the actual work async. log.Info("starting Netbird service") //nolint // in any case, even if configuration does not exists we run daemon to serve CLI gRPC API. @@ -44,8 +52,15 @@ func (p *program) Start(svc service.Service) error { if err != nil { return fmt.Errorf("failed to listen daemon interface: %w", err) } + + setupSignalHandler() go func() { defer listen.Close() + defer func() { + if r := recover(); r != nil { + log.Panicf("Panic occurred: %v, stack trace: %s", r, string(debug.Stack())) + } + }() if split[0] == "unix" { err = os.Chmod(split[1], 0666) @@ -81,6 +96,33 @@ func (p *program) Stop(srv service.Service) error { return nil } +func dumpStacks() { + buf := make([]byte, 1<<20) // Adjust size according to your needs + for { + n := runtime.Stack(buf, true) + if n < len(buf) { + buf = buf[:n] + break + } + buf = make([]byte, 2*len(buf)) + } + log.Println("=== BEGIN STACK TRACE ===") + log.Println(string(buf)) + log.Println("=== END STACK TRACE ===") +} + +func setupSignalHandler() { + c := make(chan os.Signal, 1) + // Although SIGQUIT is not supported in Windows, os.Interrupt can be used as an alternative + signal.Notify(c, os.Interrupt) + go func() { + for range c { + dumpStacks() + os.Exit(1) + } + }() +} + var runCmd = &cobra.Command{ Use: "run", Short: "runs Netbird as service", diff --git a/client/internal/routemanager/systemops_windows.go b/client/internal/routemanager/systemops_windows.go index 334ace453..ba211082f 100644 --- a/client/internal/routemanager/systemops_windows.go +++ b/client/internal/routemanager/systemops_windows.go @@ -73,7 +73,7 @@ func addRoutePowershell(prefix netip.Prefix, nexthop netip.Addr, intf, intfIdx s } script := fmt.Sprintf( - `%s -AddressFamily "%s" -DestinationPrefix "%s" -Confirm:$False -ErrorAction Stop`, + `%s -AddressFamily "%s" -DestinationPrefix "%s" -Confirm:$False -ErrorAction Stop -PolicyStore ActiveStore`, psCmd, addressFamily, destinationPrefix, ) diff --git a/client/server/server.go b/client/server/server.go index d1d9dbda4..5b2ea8e83 100644 --- a/client/server/server.go +++ b/client/server/server.go @@ -6,6 +6,7 @@ import ( "os" "os/exec" "runtime" + "runtime/debug" "strconv" "sync" "time" @@ -92,6 +93,11 @@ func New(ctx context.Context, configPath, logFile string) *Server { func (s *Server) Start() error { s.mutex.Lock() defer s.mutex.Unlock() + defer func() { + if r := recover(); r != nil { + log.Panicf("Panic occurred: %v, stack trace: %s", r, string(debug.Stack())) + } + }() state := internal.CtxGetState(s.rootCtx) // if current state contains any error, return it @@ -156,8 +162,18 @@ func (s *Server) connectWithRetryRuns(ctx context.Context, config *internal.Conf ) { backOff := getConnectWithBackoff(ctx) retryStarted := false + defer func() { + if r := recover(); r != nil { + log.Panicf("Panic occurred: %v, stack trace: %s", r, string(debug.Stack())) + } + }() go func() { + defer func() { + if r := recover(); r != nil { + log.Panicf("Panic occurred: %v, stack trace: %s", r, string(debug.Stack())) + } + }() t := time.NewTicker(24 * time.Hour) for { select { @@ -181,6 +197,11 @@ func (s *Server) connectWithRetryRuns(ctx context.Context, config *internal.Conf }() runOperation := func() error { + defer func() { + if r := recover(); r != nil { + log.Panicf("Panic occurred: %v, stack trace: %s", r, string(debug.Stack())) + } + }() log.Tracef("running client connection") err := internal.RunClientWithProbes(ctx, config, statusRecorder, mgmProbe, signalProbe, relayProbe, wgProbe) if err != nil {