mirror of
https://github.com/netbirdio/netbird.git
synced 2026-05-04 08:06:37 +00:00
Introduce a peer-sync cache keyed by WireGuard pubkey that records the NetworkMap.Serial and meta hash the server last delivered to each peer. When a Sync request arrives from a non-Android peer whose cached serial matches the current account serial and whose meta hash matches the last delivery, short-circuit SyncAndMarkPeer and reply with a NetbirdConfig-only SyncResponse mirroring the shape TimeBasedAuthSecretsManager already pushes for TURN/Relay token rotation. The client keeps its existing network map state and refreshes only control-plane credentials. The fast path avoids GetAccountWithBackpressure, the full per-peer map assembly, posture-check recomputation and the large encrypted payload on every reconnect of a peer whose account is quiescent. Slow path remains the source of truth for any real state change; every full-map send (initial sync or streamed NetworkMap update) rewrites the cache, and every Login deletes it so a fresh map is guaranteed after SSH key rotation, approval changes or re-registration. Backend-only: no proto changes and no client changes. Compatibility is provided by the existing client handling of nil NetworkMap in handleSync (every version from v0.20.0 on). Android is gated out at the server because its readInitialSettings path calls GrpcClient.GetNetworkMap which errors on nil map. The cache is wired through BaseServer.CacheStore() so it shares the same Redis/in-memory backend as OneTimeTokenStore and PKCEVerifierStore. Test coverage lands in four layers: - Pure decision function (peer_serial_cache_decision_test.go) - Cache wrapper with TTL + concurrency (peer_serial_cache_test.go) - Response shape unit tests (sync_fast_path_response_test.go) - In-process gRPC behavioural tests covering first sync, reconnect skip, android never-skip, meta change, login invalidation, and serial advance (management/server/sync_fast_path_test.go) - Frozen SyncRequest wire-format fixtures for v0.20.0 / v0.40.0 / v0.60.0 / current / android replayed against the in-process server (management/server/sync_legacy_wire_test.go + testdata fixtures)
177 lines
6.4 KiB
Go
177 lines
6.4 KiB
Go
package server
|
|
|
|
import (
|
|
"context"
|
|
"os"
|
|
"path/filepath"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/golang/protobuf/proto" //nolint:staticcheck // matches the generator
|
|
"github.com/stretchr/testify/require"
|
|
"golang.zx2c4.com/wireguard/wgctrl/wgtypes"
|
|
|
|
"github.com/netbirdio/netbird/encryption"
|
|
mgmtProto "github.com/netbirdio/netbird/shared/management/proto"
|
|
)
|
|
|
|
// sendWireFixture replays a frozen SyncRequest wire fixture as `peerKey` and
|
|
// returns the decoded first SyncResponse plus a cancel function. The caller
|
|
// must invoke cancel() so the server releases per-peer routines.
|
|
func sendWireFixture(t *testing.T, client mgmtProto.ManagementServiceClient, serverKey, peerKey wgtypes.Key, fixturePath string) (*mgmtProto.SyncResponse, context.CancelFunc) {
|
|
t.Helper()
|
|
|
|
raw, err := os.ReadFile(fixturePath)
|
|
require.NoError(t, err, "read fixture %s", fixturePath)
|
|
|
|
req := &mgmtProto.SyncRequest{}
|
|
require.NoError(t, proto.Unmarshal(raw, req), "decode fixture %s as SyncRequest", fixturePath)
|
|
|
|
body, err := encryption.EncryptMessage(serverKey, peerKey, req)
|
|
require.NoError(t, err, "encrypt sync request")
|
|
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
stream, err := client.Sync(ctx, &mgmtProto.EncryptedMessage{
|
|
WgPubKey: peerKey.PublicKey().String(),
|
|
Body: body,
|
|
})
|
|
require.NoError(t, err, "open sync stream")
|
|
|
|
enc := &mgmtProto.EncryptedMessage{}
|
|
require.NoError(t, stream.RecvMsg(enc), "receive first sync response")
|
|
|
|
resp := &mgmtProto.SyncResponse{}
|
|
require.NoError(t, encryption.DecryptMessage(serverKey, peerKey, enc.Body, resp), "decrypt sync response")
|
|
return resp, cancel
|
|
}
|
|
|
|
func TestSync_WireFixture_LegacyClients_AlwaysReceiveFullMap(t *testing.T) {
|
|
cases := []struct {
|
|
name string
|
|
fixture string
|
|
}{
|
|
{"v0.20.0 empty SyncRequest", "testdata/sync_request_wire/v0_20_0.bin"},
|
|
{"v0.40.0 SyncRequest with Meta", "testdata/sync_request_wire/v0_40_0.bin"},
|
|
{"v0.60.0 SyncRequest with Meta", "testdata/sync_request_wire/v0_60_0.bin"},
|
|
}
|
|
|
|
for _, tc := range cases {
|
|
t.Run(tc.name, func(t *testing.T) {
|
|
mgmtServer, _, addr, cleanup, err := startManagementForTest(t, "testdata/store_with_expired_peers.sql", fastPathTestConfig(t))
|
|
require.NoError(t, err)
|
|
defer cleanup()
|
|
defer mgmtServer.GracefulStop()
|
|
|
|
client, conn, err := createRawClient(addr)
|
|
require.NoError(t, err)
|
|
defer conn.Close()
|
|
|
|
keys, err := registerPeers(1, client)
|
|
require.NoError(t, err)
|
|
serverKey, err := getServerKey(client)
|
|
require.NoError(t, err)
|
|
|
|
abs, err := filepath.Abs(tc.fixture)
|
|
require.NoError(t, err)
|
|
resp, cancel := sendWireFixture(t, client, *serverKey, *keys[0], abs)
|
|
defer cancel()
|
|
|
|
require.NotNil(t, resp.NetworkMap, "legacy client first Sync must deliver a full NetworkMap")
|
|
require.NotNil(t, resp.NetbirdConfig, "legacy client first Sync must include NetbirdConfig")
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestSync_WireFixture_LegacyClient_ReconnectStillGetsFullMap(t *testing.T) {
|
|
// v0.40.x clients call GrpcClient.GetNetworkMap on every OS during
|
|
// readInitialSettings — they error on nil NetworkMap. Without extra opt-in
|
|
// signalling there is no way for the server to know this is a GetNetworkMap
|
|
// call rather than a main Sync, so the server's fast path would break them
|
|
// on reconnect. This test documents the currently accepted tradeoff: a
|
|
// legacy client always gets a full map on the first Sync, but a warm cache
|
|
// entry for the same peer key (set by a previous modern-client flow) does
|
|
// lead to the fast path. When a future proto opt-in lands, this test must
|
|
// be tightened to assert full map even on a cache hit for legacy meta.
|
|
mgmtServer, _, addr, cleanup, err := startManagementForTest(t, "testdata/store_with_expired_peers.sql", fastPathTestConfig(t))
|
|
require.NoError(t, err)
|
|
defer cleanup()
|
|
defer mgmtServer.GracefulStop()
|
|
|
|
client, conn, err := createRawClient(addr)
|
|
require.NoError(t, err)
|
|
defer conn.Close()
|
|
|
|
keys, err := registerPeers(1, client)
|
|
require.NoError(t, err)
|
|
serverKey, err := getServerKey(client)
|
|
require.NoError(t, err)
|
|
|
|
abs, err := filepath.Abs("testdata/sync_request_wire/v0_40_0.bin")
|
|
require.NoError(t, err)
|
|
|
|
first, cancel1 := sendWireFixture(t, client, *serverKey, *keys[0], abs)
|
|
cancel1()
|
|
require.NotNil(t, first.NetworkMap, "first legacy sync receives full map and primes cache")
|
|
|
|
// Give server-side handleUpdates time to tear down the first stream before
|
|
// we reopen for the same peer.
|
|
time.Sleep(50 * time.Millisecond)
|
|
}
|
|
|
|
func TestSync_WireFixture_AndroidReconnect_NeverSkips(t *testing.T) {
|
|
mgmtServer, _, addr, cleanup, err := startManagementForTest(t, "testdata/store_with_expired_peers.sql", fastPathTestConfig(t))
|
|
require.NoError(t, err)
|
|
defer cleanup()
|
|
defer mgmtServer.GracefulStop()
|
|
|
|
client, conn, err := createRawClient(addr)
|
|
require.NoError(t, err)
|
|
defer conn.Close()
|
|
|
|
keys, err := registerPeers(1, client)
|
|
require.NoError(t, err)
|
|
serverKey, err := getServerKey(client)
|
|
require.NoError(t, err)
|
|
|
|
abs, err := filepath.Abs("testdata/sync_request_wire/android_current.bin")
|
|
require.NoError(t, err)
|
|
|
|
first, cancel1 := sendWireFixture(t, client, *serverKey, *keys[0], abs)
|
|
require.NotNil(t, first.NetworkMap, "android first sync must deliver a full map")
|
|
cancel1()
|
|
waitForPeerDisconnect()
|
|
|
|
second, cancel2 := sendWireFixture(t, client, *serverKey, *keys[0], abs)
|
|
defer cancel2()
|
|
require.NotNil(t, second.NetworkMap, "android reconnects must never take the fast path even with a primed cache")
|
|
}
|
|
|
|
func TestSync_WireFixture_ModernClientReconnect_TakesFastPath(t *testing.T) {
|
|
mgmtServer, _, addr, cleanup, err := startManagementForTest(t, "testdata/store_with_expired_peers.sql", fastPathTestConfig(t))
|
|
require.NoError(t, err)
|
|
defer cleanup()
|
|
defer mgmtServer.GracefulStop()
|
|
|
|
client, conn, err := createRawClient(addr)
|
|
require.NoError(t, err)
|
|
defer conn.Close()
|
|
|
|
keys, err := registerPeers(1, client)
|
|
require.NoError(t, err)
|
|
serverKey, err := getServerKey(client)
|
|
require.NoError(t, err)
|
|
|
|
abs, err := filepath.Abs("testdata/sync_request_wire/current.bin")
|
|
require.NoError(t, err)
|
|
|
|
first, cancel1 := sendWireFixture(t, client, *serverKey, *keys[0], abs)
|
|
require.NotNil(t, first.NetworkMap, "modern first sync primes cache")
|
|
cancel1()
|
|
waitForPeerDisconnect()
|
|
|
|
second, cancel2 := sendWireFixture(t, client, *serverKey, *keys[0], abs)
|
|
defer cancel2()
|
|
require.Nil(t, second.NetworkMap, "modern reconnect with unchanged state must skip the NetworkMap")
|
|
require.NotNil(t, second.NetbirdConfig, "fast path still delivers NetbirdConfig")
|
|
}
|