Add graceful shutdown for Kubernetes

This commit is contained in:
Viktor Liu
2026-02-09 20:17:12 +08:00
parent fd442138e6
commit 53c1016a8e
6 changed files with 158 additions and 39 deletions

View File

@@ -68,6 +68,9 @@ type flockLocker struct {
}
func newFlockLocker(certDir string, logger *log.Logger) *flockLocker {
if logger == nil {
logger = log.StandardLogger()
}
return &flockLocker{certDir: certDir, logger: logger}
}

View File

@@ -32,6 +32,9 @@ func Lock(ctx context.Context, path string) (*os.File, error) {
return nil, fmt.Errorf("open lock file %s: %w", path, err)
}
timer := time.NewTimer(retryInterval)
defer timer.Stop()
for {
if err := syscall.Flock(int(f.Fd()), syscall.LOCK_EX|syscall.LOCK_NB); err == nil {
return f, nil
@@ -48,7 +51,8 @@ func Lock(ctx context.Context, path string) (*os.File, error) {
log.Debugf("close lock file %s: %v", path, cerr)
}
return nil, ctx.Err()
case <-time.After(retryInterval):
case <-timer.C:
timer.Reset(retryInterval)
}
}
}

View File

@@ -17,8 +17,8 @@ import (
)
const (
maxConcurrentChecks = 3
maxClientCheckTimeout = 5 * time.Minute
maxConcurrentChecks = 3
maxClientCheckTimeout = 5 * time.Minute
)
// clientProvider provides access to NetBird clients for health checks.
@@ -34,6 +34,7 @@ type Checker struct {
mu sync.RWMutex
managementConnected bool
initialSyncComplete bool
shuttingDown bool
// checkSem limits concurrent client health checks.
checkSem chan struct{}
@@ -77,6 +78,14 @@ func (c *Checker) SetInitialSyncComplete() {
c.initialSyncComplete = true
}
// SetShuttingDown marks the server as shutting down.
// This causes ReadinessProbe to return false so load balancers stop routing traffic.
func (c *Checker) SetShuttingDown() {
c.mu.Lock()
defer c.mu.Unlock()
c.shuttingDown = true
}
// CheckClientsConnected verifies all clients are connected to management/signal/relay.
// Uses the provided context for timeout/cancellation, with a maximum bound of maxClientCheckTimeout.
// Limits concurrent checks via semaphore.
@@ -145,6 +154,9 @@ func (c *Checker) LivenessProbe() bool {
func (c *Checker) ReadinessProbe() bool {
c.mu.RLock()
defer c.mu.RUnlock()
if c.shuttingDown {
return false
}
return c.managementConnected
}

View File

@@ -10,7 +10,6 @@ import (
"crypto/tls"
"crypto/x509"
"encoding/hex"
"bytes"
"encoding/json"
"errors"
"fmt"