mirror of
https://github.com/netbirdio/netbird.git
synced 2026-04-27 20:56:44 +00:00
[management] Skip full network map on Sync when peer state is unchanged
Introduce a peer-sync cache keyed by WireGuard pubkey that records the NetworkMap.Serial and meta hash the server last delivered to each peer. When a Sync request arrives from a non-Android peer whose cached serial matches the current account serial and whose meta hash matches the last delivery, short-circuit SyncAndMarkPeer and reply with a NetbirdConfig-only SyncResponse mirroring the shape TimeBasedAuthSecretsManager already pushes for TURN/Relay token rotation. The client keeps its existing network map state and refreshes only control-plane credentials. The fast path avoids GetAccountWithBackpressure, the full per-peer map assembly, posture-check recomputation and the large encrypted payload on every reconnect of a peer whose account is quiescent. Slow path remains the source of truth for any real state change; every full-map send (initial sync or streamed NetworkMap update) rewrites the cache, and every Login deletes it so a fresh map is guaranteed after SSH key rotation, approval changes or re-registration. Backend-only: no proto changes and no client changes. Compatibility is provided by the existing client handling of nil NetworkMap in handleSync (every version from v0.20.0 on). Android is gated out at the server because its readInitialSettings path calls GrpcClient.GetNetworkMap which errors on nil map. The cache is wired through BaseServer.CacheStore() so it shares the same Redis/in-memory backend as OneTimeTokenStore and PKCEVerifierStore. Test coverage lands in four layers: - Pure decision function (peer_serial_cache_decision_test.go) - Cache wrapper with TTL + concurrency (peer_serial_cache_test.go) - Response shape unit tests (sync_fast_path_response_test.go) - In-process gRPC behavioural tests covering first sync, reconnect skip, android never-skip, meta change, login invalidation, and serial advance (management/server/sync_fast_path_test.go) - Frozen SyncRequest wire-format fixtures for v0.20.0 / v0.40.0 / v0.60.0 / current / android replayed against the in-process server (management/server/sync_legacy_wire_test.go + testdata fixtures)
This commit is contained in:
89
management/internals/shared/grpc/peer_serial_cache.go
Normal file
89
management/internals/shared/grpc/peer_serial_cache.go
Normal file
@@ -0,0 +1,89 @@
|
||||
package grpc
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/eko/gocache/lib/v4/cache"
|
||||
"github.com/eko/gocache/lib/v4/store"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
const (
|
||||
peerSerialCacheKeyPrefix = "peer-sync:"
|
||||
|
||||
// DefaultPeerSerialCacheTTL bounds how long a cached serial survives. If the
|
||||
// cache write on a full-map send ever drops, entries naturally expire and
|
||||
// the next Sync falls back to the full path, re-priming the cache.
|
||||
DefaultPeerSerialCacheTTL = 24 * time.Hour
|
||||
)
|
||||
|
||||
// PeerSerialCache records the NetworkMap serial and meta hash last delivered to
|
||||
// each peer on Sync. Lookups are used to skip full network map computation when
|
||||
// the peer already has the latest state. Backed by the shared cache store so
|
||||
// entries survive management replicas sharing a Redis instance.
|
||||
type PeerSerialCache struct {
|
||||
cache *cache.Cache[string]
|
||||
ctx context.Context
|
||||
ttl time.Duration
|
||||
}
|
||||
|
||||
// NewPeerSerialCache creates a cache wrapper bound to the shared cache store.
|
||||
// The ttl is applied to every Set call; entries older than ttl are treated as
|
||||
// misses so the server eventually converges to delivering a full map even if
|
||||
// an earlier Set was lost.
|
||||
func NewPeerSerialCache(ctx context.Context, cacheStore store.StoreInterface, ttl time.Duration) *PeerSerialCache {
|
||||
return &PeerSerialCache{
|
||||
cache: cache.New[string](cacheStore),
|
||||
ctx: ctx,
|
||||
ttl: ttl,
|
||||
}
|
||||
}
|
||||
|
||||
// Get returns the entry previously recorded for this peer and whether a valid
|
||||
// entry was found. A cache miss or any read error is reported as a miss so
|
||||
// callers fall back to the full map path.
|
||||
func (c *PeerSerialCache) Get(pubKey string) (peerSyncEntry, bool) {
|
||||
raw, err := c.cache.Get(c.ctx, peerSerialCacheKeyPrefix+pubKey)
|
||||
if err != nil {
|
||||
return peerSyncEntry{}, false
|
||||
}
|
||||
|
||||
entry := peerSyncEntry{}
|
||||
if err := json.Unmarshal([]byte(raw), &entry); err != nil {
|
||||
log.Debugf("peer serial cache: unmarshal entry for %s: %v", pubKey, err)
|
||||
return peerSyncEntry{}, false
|
||||
}
|
||||
return entry, true
|
||||
}
|
||||
|
||||
// Set records what the server most recently delivered to this peer. Errors are
|
||||
// logged at debug level so cache outages degrade gracefully into the full map
|
||||
// path on the next Sync rather than failing the current Sync.
|
||||
func (c *PeerSerialCache) Set(pubKey string, entry peerSyncEntry) {
|
||||
payload, err := json.Marshal(entry)
|
||||
if err != nil {
|
||||
log.Debugf("peer serial cache: marshal entry for %s: %v", pubKey, err)
|
||||
return
|
||||
}
|
||||
|
||||
if err := c.cache.Set(c.ctx, peerSerialCacheKeyPrefix+pubKey, string(payload), store.WithExpiration(c.ttl)); err != nil {
|
||||
log.Debugf("peer serial cache: set entry for %s: %v", pubKey, err)
|
||||
}
|
||||
}
|
||||
|
||||
// Delete removes any cached entry for this peer. Used on Login so the next
|
||||
// Sync always sees a miss and delivers a full map.
|
||||
func (c *PeerSerialCache) Delete(pubKey string) {
|
||||
if err := c.cache.Delete(c.ctx, peerSerialCacheKeyPrefix+pubKey); err != nil {
|
||||
log.Debugf("peer serial cache: delete entry for %s: %v", pubKey, err)
|
||||
}
|
||||
}
|
||||
|
||||
// cacheKey exposes the namespaced key for tests that need to peek at the raw
|
||||
// storage, e.g. when asserting TTL behaviour against Redis.
|
||||
func (c *PeerSerialCache) cacheKey(pubKey string) string {
|
||||
return fmt.Sprintf("%s%s", peerSerialCacheKeyPrefix, pubKey)
|
||||
}
|
||||
Reference in New Issue
Block a user