Files
netbird/proxy/internal/auth/tunnel_cache_test.go
mlsmaycon 167ee08e14 feat(private-service): expose NetBird-only services over tunnel peers
Adds a new "private" service mode for the reverse proxy: services
reachable exclusively over the embedded WireGuard tunnel, gated by
per-peer group membership instead of operator auth schemes.

Wire contract
- ProxyMapping.private (field 13): the proxy MUST call
  ValidateTunnelPeer and fail closed; operator schemes are bypassed.
- ProxyCapabilities.private (4) + supports_private_service (5):
  capability gate. Management never streams private mappings to
  proxies that don't claim the capability; the broadcast path applies
  the same filter via filterMappingsForProxy.
- ValidateTunnelPeer RPC: resolves an inbound tunnel IP to a peer,
  checks the peer's groups against service.AccessGroups, and mints
  a session JWT on success. checkPeerGroupAccess fails closed when
  a private service has empty AccessGroups.
- ValidateSession/ValidateTunnelPeer responses now carry
  peer_group_ids + peer_group_names so the proxy can authorise
  policy-aware middlewares without an extra management round-trip.
- ProxyInboundListener + SendStatusUpdate.inbound_listener: per-account
  inbound listener state surfaced to dashboards.
- PathTargetOptions.direct_upstream (11): bypass the embedded NetBird
  client and dial the target via the proxy host's network stack for
  upstreams reachable without WireGuard.

Data model
- Service.Private (bool) + Service.AccessGroups ([]string, JSON-
  serialised). Validate() rejects bearer auth on private services.
  Copy() deep-copies AccessGroups. pgx getServices loads the columns.
- DomainConfig.Private threaded into the proxy auth middleware.
  Request handler routes private services through forwardWithTunnelPeer
  and returns 403 on validation failure.
- Account-level SynthesizePrivateServiceZones (synthetic DNS) and
  injectPrivateServicePolicies (synthetic ACL) gate on
  len(svc.AccessGroups) > 0.

Proxy
- /netbird proxy --private (embedded mode) flag; Config.Private in
  proxy/lifecycle.go.
- Per-account inbound listener (proxy/inbound.go) binding HTTP/HTTPS
  on the embedded NetBird client's WireGuard tunnel netstack.
- proxy/internal/auth/tunnel_cache: ValidateTunnelPeer response cache
  with single-flight de-duplication and per-account eviction.
- Local peerstore short-circuit: when the inbound IP isn't in the
  account roster, deny fast without an RPC.
- proxy/server.go reports SupportsPrivateService=true and redacts the
  full ProxyMapping JSON from info logs (auth_token + header-auth
  hashed values now only at debug level).

Identity forwarding
- ValidateSessionJWT returns user_id, email, method, groups,
  group_names. sessionkey.Claims carries Email + Groups + GroupNames
  so the proxy can stamp identity onto upstream requests without an
  extra management round-trip on every cookie-bearing request.
- CapturedData carries userEmail / userGroups / userGroupNames; the
  proxy stamps X-NetBird-User and X-NetBird-Groups on r.Out from the
  authenticated identity (strips client-supplied values first to
  prevent spoofing).
- AccessLog.UserGroups: access-log enrichment captures the user's
  group memberships at write time so the dashboard can render group
  context without reverse-resolving stale memberships.

OpenAPI/dashboard surface
- ReverseProxyService gains private + access_groups; ReverseProxyCluster
  gains private + supports_private. ReverseProxyTarget target_type
  enum gains "cluster". ServiceTargetOptions gains direct_upstream.
  ProxyAccessLog gains user_groups.
2026-05-20 22:46:18 +02:00

172 lines
6.3 KiB
Go

package auth
import (
"context"
"net/netip"
"sync"
"sync/atomic"
"testing"
"time"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/netbirdio/netbird/proxy/internal/types"
"github.com/netbirdio/netbird/shared/management/proto"
)
func newTestKey(account types.AccountID, ip string, domain string) tunnelCacheKey {
return tunnelCacheKey{
accountID: account,
tunnelIP: netip.MustParseAddr(ip),
domain: domain,
}
}
func TestTunnelCache_HitSkipsRPC(t *testing.T) {
cache := newTunnelValidationCache()
key := newTestKey("acct-1", "100.64.0.10", "svc.example")
var calls int32
validate := func(_ context.Context, req *proto.ValidateTunnelPeerRequest) (*proto.ValidateTunnelPeerResponse, error) {
atomic.AddInt32(&calls, 1)
return &proto.ValidateTunnelPeerResponse{Valid: true, SessionToken: "tok", UserId: "user-1"}, nil
}
resp, fromCache, err := cache.fetch(context.Background(), key, validate)
require.NoError(t, err)
require.NotNil(t, resp, "first fetch returns RPC response")
assert.False(t, fromCache, "first fetch must not be cached")
resp2, fromCache2, err := cache.fetch(context.Background(), key, validate)
require.NoError(t, err)
require.NotNil(t, resp2, "second fetch returns cached response")
assert.True(t, fromCache2, "second fetch must be served from cache")
assert.Equal(t, "user-1", resp2.GetUserId(), "cached response should preserve user identity")
assert.Equal(t, int32(1), atomic.LoadInt32(&calls), "validate should run exactly once with one cache hit")
}
func TestTunnelCache_ExpiredEntryRefetches(t *testing.T) {
cache := newTunnelValidationCache()
clock := time.Now()
cache.now = func() time.Time { return clock }
key := newTestKey("acct-1", "100.64.0.10", "svc.example")
var calls int32
validate := func(_ context.Context, _ *proto.ValidateTunnelPeerRequest) (*proto.ValidateTunnelPeerResponse, error) {
atomic.AddInt32(&calls, 1)
return &proto.ValidateTunnelPeerResponse{Valid: true, SessionToken: "tok"}, nil
}
_, _, err := cache.fetch(context.Background(), key, validate)
require.NoError(t, err)
assert.Equal(t, int32(1), atomic.LoadInt32(&calls), "first fetch issues one RPC")
clock = clock.Add(tunnelCacheTTL + time.Second)
_, fromCache, err := cache.fetch(context.Background(), key, validate)
require.NoError(t, err)
assert.False(t, fromCache, "expired entry must miss the cache")
assert.Equal(t, int32(2), atomic.LoadInt32(&calls), "expired entry forces a re-fetch")
}
func TestTunnelCache_DeniedResponseNotCached(t *testing.T) {
cache := newTunnelValidationCache()
key := newTestKey("acct-1", "100.64.0.10", "svc.example")
var calls int32
validate := func(_ context.Context, _ *proto.ValidateTunnelPeerRequest) (*proto.ValidateTunnelPeerResponse, error) {
atomic.AddInt32(&calls, 1)
return &proto.ValidateTunnelPeerResponse{Valid: false, DeniedReason: "not_in_group"}, nil
}
for i := 0; i < 3; i++ {
_, _, err := cache.fetch(context.Background(), key, validate)
require.NoError(t, err, "fetch must not error on denied response")
}
assert.Equal(t, int32(3), atomic.LoadInt32(&calls), "denied responses bypass the cache so policy changes apply immediately")
}
func TestTunnelCache_ConcurrentColdHitsCoalesce(t *testing.T) {
cache := newTunnelValidationCache()
key := newTestKey("acct-1", "100.64.0.10", "svc.example")
gate := make(chan struct{})
var calls int32
validate := func(_ context.Context, _ *proto.ValidateTunnelPeerRequest) (*proto.ValidateTunnelPeerResponse, error) {
atomic.AddInt32(&calls, 1)
<-gate
return &proto.ValidateTunnelPeerResponse{Valid: true, SessionToken: "tok"}, nil
}
const workers = 16
var wg sync.WaitGroup
wg.Add(workers)
results := make([]bool, workers)
for i := 0; i < workers; i++ {
go func(idx int) {
defer wg.Done()
resp, _, err := cache.fetch(context.Background(), key, validate)
results[idx] = err == nil && resp.GetValid()
}(i)
}
time.Sleep(20 * time.Millisecond)
close(gate)
wg.Wait()
for i, ok := range results {
assert.Truef(t, ok, "worker %d should observe a successful response", i)
}
assert.Equal(t, int32(1), atomic.LoadInt32(&calls), "single-flight must collapse concurrent cold fetches into one RPC")
}
func TestTunnelCache_PerAccountIsolation(t *testing.T) {
cache := newTunnelValidationCache()
keyA := newTestKey("acct-a", "100.64.0.10", "svc.example")
keyB := newTestKey("acct-b", "100.64.0.10", "svc.example")
var callsA, callsB int32
validateA := func(_ context.Context, _ *proto.ValidateTunnelPeerRequest) (*proto.ValidateTunnelPeerResponse, error) {
atomic.AddInt32(&callsA, 1)
return &proto.ValidateTunnelPeerResponse{Valid: true, SessionToken: "tok-a", UserId: "user-a"}, nil
}
validateB := func(_ context.Context, _ *proto.ValidateTunnelPeerRequest) (*proto.ValidateTunnelPeerResponse, error) {
atomic.AddInt32(&callsB, 1)
return &proto.ValidateTunnelPeerResponse{Valid: true, SessionToken: "tok-b", UserId: "user-b"}, nil
}
respA, _, err := cache.fetch(context.Background(), keyA, validateA)
require.NoError(t, err)
respB, _, err := cache.fetch(context.Background(), keyB, validateB)
require.NoError(t, err)
assert.Equal(t, "user-a", respA.GetUserId(), "account A response should belong to user-a")
assert.Equal(t, "user-b", respB.GetUserId(), "account B response must not be served from account A's cache")
assert.Equal(t, int32(1), atomic.LoadInt32(&callsA), "validateA called exactly once")
assert.Equal(t, int32(1), atomic.LoadInt32(&callsB), "validateB called exactly once")
}
func TestTunnelCache_BoundedSizeEvictsOldest(t *testing.T) {
cache := newTunnelValidationCache()
cache.maxSize = 2
validate := func(_ context.Context, req *proto.ValidateTunnelPeerRequest) (*proto.ValidateTunnelPeerResponse, error) {
return &proto.ValidateTunnelPeerResponse{Valid: true, SessionToken: "tok-" + req.GetTunnelIp()}, nil
}
keys := []tunnelCacheKey{
newTestKey("acct-1", "100.64.0.10", "svc"),
newTestKey("acct-1", "100.64.0.11", "svc"),
newTestKey("acct-1", "100.64.0.12", "svc"),
}
for _, k := range keys {
_, _, err := cache.fetch(context.Background(), k, validate)
require.NoError(t, err)
}
assert.Nil(t, cache.get(keys[0]), "oldest key should be evicted past maxSize")
assert.NotNil(t, cache.get(keys[1]), "second-newest must remain cached")
assert.NotNil(t, cache.get(keys[2]), "newest must remain cached")
}