mirror of
https://github.com/netbirdio/netbird.git
synced 2026-04-18 08:16:39 +00:00
Add graceful shutdown for Kubernetes
This commit is contained in:
@@ -68,6 +68,9 @@ type flockLocker struct {
|
||||
}
|
||||
|
||||
func newFlockLocker(certDir string, logger *log.Logger) *flockLocker {
|
||||
if logger == nil {
|
||||
logger = log.StandardLogger()
|
||||
}
|
||||
return &flockLocker{certDir: certDir, logger: logger}
|
||||
}
|
||||
|
||||
|
||||
@@ -32,6 +32,9 @@ func Lock(ctx context.Context, path string) (*os.File, error) {
|
||||
return nil, fmt.Errorf("open lock file %s: %w", path, err)
|
||||
}
|
||||
|
||||
timer := time.NewTimer(retryInterval)
|
||||
defer timer.Stop()
|
||||
|
||||
for {
|
||||
if err := syscall.Flock(int(f.Fd()), syscall.LOCK_EX|syscall.LOCK_NB); err == nil {
|
||||
return f, nil
|
||||
@@ -48,7 +51,8 @@ func Lock(ctx context.Context, path string) (*os.File, error) {
|
||||
log.Debugf("close lock file %s: %v", path, cerr)
|
||||
}
|
||||
return nil, ctx.Err()
|
||||
case <-time.After(retryInterval):
|
||||
case <-timer.C:
|
||||
timer.Reset(retryInterval)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,8 +17,8 @@ import (
|
||||
)
|
||||
|
||||
const (
|
||||
maxConcurrentChecks = 3
|
||||
maxClientCheckTimeout = 5 * time.Minute
|
||||
maxConcurrentChecks = 3
|
||||
maxClientCheckTimeout = 5 * time.Minute
|
||||
)
|
||||
|
||||
// clientProvider provides access to NetBird clients for health checks.
|
||||
@@ -34,6 +34,7 @@ type Checker struct {
|
||||
mu sync.RWMutex
|
||||
managementConnected bool
|
||||
initialSyncComplete bool
|
||||
shuttingDown bool
|
||||
|
||||
// checkSem limits concurrent client health checks.
|
||||
checkSem chan struct{}
|
||||
@@ -77,6 +78,14 @@ func (c *Checker) SetInitialSyncComplete() {
|
||||
c.initialSyncComplete = true
|
||||
}
|
||||
|
||||
// SetShuttingDown marks the server as shutting down.
|
||||
// This causes ReadinessProbe to return false so load balancers stop routing traffic.
|
||||
func (c *Checker) SetShuttingDown() {
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
c.shuttingDown = true
|
||||
}
|
||||
|
||||
// CheckClientsConnected verifies all clients are connected to management/signal/relay.
|
||||
// Uses the provided context for timeout/cancellation, with a maximum bound of maxClientCheckTimeout.
|
||||
// Limits concurrent checks via semaphore.
|
||||
@@ -145,6 +154,9 @@ func (c *Checker) LivenessProbe() bool {
|
||||
func (c *Checker) ReadinessProbe() bool {
|
||||
c.mu.RLock()
|
||||
defer c.mu.RUnlock()
|
||||
if c.shuttingDown {
|
||||
return false
|
||||
}
|
||||
return c.managementConnected
|
||||
}
|
||||
|
||||
|
||||
@@ -10,7 +10,6 @@ import (
|
||||
"crypto/tls"
|
||||
"crypto/x509"
|
||||
"encoding/hex"
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
|
||||
Reference in New Issue
Block a user