Compare commits

...

4 Commits

Author SHA1 Message Date
Zoltan Papp
4b3dd9103d [client] Fix slow wg operations (#6633)
* [iface] Drop redundant device dump in kernel configure()

wgctrl.ConfigureDevice already returns an error when the interface is
missing, so the preceding wg.Device() existence check is redundant. That
check dumps the entire device (all peers) on every configure() call,
making it O(peers) per call and turning bulk peer insertion into
O(peers^2): inserting N peers one by one re-parsed the whole growing peer
list N times. Removing it keeps each peer write constant-time regardless
of how many peers are already configured.

* [iface] Cache WireGuard stats to collapse per-peer device dumps

Each peer runs a WGWatcher that polls GetStats(), and every call dumps
the whole device, so with N peers the watchers perform O(N) full dumps
per poll cycle (O(N^2) work) while each keeps only its own peer's entry.

Wrap the kernel and userspace configurer GetStats() in a short-TTL cache
with singleflight: the staggered per-peer calls share a single device
dump per window and concurrent misses collapse into one dump. The kernel
and userspace WireGuard APIs have no per-peer stats query (a get always
returns the whole device), so a shared cached snapshot avoids the
repeated full dumps.

* Ignore .claude directory
2026-07-02 20:42:43 +02:00
Riccardo Manfrin
8e3b284f4b [client] Increase mgmt grpc buff size to 16MB (#6641) 2026-07-02 17:50:18 +02:00
Maycon Santos
21aa933584 [misc] Fix GHCR image push after dockers_v2 migration (#6653) 2026-07-02 17:21:06 +02:00
Misha Bragin
1dfa85a917 [management] Add vLLM e2e test (#6649)
* Add vLLM to Agent Network

* Add vllm e2e test
2026-07-02 15:36:51 +02:00
10 changed files with 446 additions and 22 deletions

View File

@@ -293,8 +293,11 @@ jobs:
${{ steps.goreleaser.outputs.artifacts }}
JSON
# dockers_v2 artifacts have no top-level goarch field, so match the
# per-platform -amd64 tag suffix instead; it works for both the old
# dockers and the new dockers_v2 image naming.
mapfile -t src_images < <(
jq -r '.[] | select(.type == "Docker Image") | select(.goarch == "amd64") | .name | select(startswith("ghcr.io/"))' /tmp/goreleaser-artifacts.json
jq -r '.[] | select(.type == "Docker Image") | .name | select(startswith("ghcr.io/") and endswith("-amd64"))' /tmp/goreleaser-artifacts.json
)
for src in "${src_images[@]}"; do

1
.gitignore vendored
View File

@@ -1,3 +1,4 @@
.claude
.idea
.run
*.iml

View File

@@ -17,12 +17,15 @@ import (
type KernelConfigurer struct {
deviceName string
statsCache *statsCache
}
func NewKernelConfigurer(deviceName string) *KernelConfigurer {
return &KernelConfigurer{
c := &KernelConfigurer{
deviceName: deviceName,
}
c.statsCache = newStatsCache(statsCacheTTL, c.fetchStats)
return c
}
func (c *KernelConfigurer) ConfigureInterface(privateKey string, port int) error {
@@ -246,12 +249,6 @@ func (c *KernelConfigurer) configure(config wgtypes.Config) error {
}
}()
// validate if device with name exists
_, err = wg.Device(c.deviceName)
if err != nil {
return err
}
return wg.ConfigureDevice(c.deviceName, config)
}
@@ -300,6 +297,14 @@ func (c *KernelConfigurer) FullStats() (*Stats, error) {
}
func (c *KernelConfigurer) GetStats() (map[string]WGStats, error) {
return c.statsCache.get()
}
func (c *KernelConfigurer) LastActivities() map[string]monotime.Time {
return nil
}
func (c *KernelConfigurer) fetchStats() (map[string]WGStats, error) {
stats := make(map[string]WGStats)
wg, err := wgctrl.New()
if err != nil {
@@ -326,7 +331,3 @@ func (c *KernelConfigurer) GetStats() (map[string]WGStats, error) {
}
return stats, nil
}
func (c *KernelConfigurer) LastActivities() map[string]monotime.Time {
return nil
}

View File

@@ -0,0 +1,52 @@
package configurer
import (
"sync"
"time"
"golang.org/x/sync/singleflight"
)
const statsCacheTTL = 1 * time.Second
type statsCache struct {
ttl time.Duration
fetch func() (map[string]WGStats, error)
mu sync.RWMutex
value map[string]WGStats
expireAt time.Time
sf singleflight.Group
}
func newStatsCache(ttl time.Duration, fetch func() (map[string]WGStats, error)) *statsCache {
return &statsCache{ttl: ttl, fetch: fetch}
}
func (c *statsCache) get() (map[string]WGStats, error) {
c.mu.RLock()
if c.value != nil && time.Now().Before(c.expireAt) {
value := c.value
c.mu.RUnlock()
return value, nil
}
c.mu.RUnlock()
value, err, _ := c.sf.Do("stats", func() (interface{}, error) {
res, err := c.fetch()
if err != nil {
return nil, err
}
c.mu.Lock()
c.value = res
c.expireAt = time.Now().Add(c.ttl)
c.mu.Unlock()
return res, nil
})
if err != nil {
return nil, err
}
return value.(map[string]WGStats), nil
}

View File

@@ -0,0 +1,70 @@
package configurer
import (
"errors"
"sync"
"sync/atomic"
"testing"
"time"
"github.com/stretchr/testify/require"
)
func TestStatsCache_CachesWithinTTL(t *testing.T) {
var calls atomic.Int64
c := newStatsCache(50*time.Millisecond, func() (map[string]WGStats, error) {
calls.Add(1)
return map[string]WGStats{"p": {}}, nil
})
for i := 0; i < 10; i++ {
_, err := c.get()
require.NoError(t, err)
}
require.Equal(t, int64(1), calls.Load(), "within TTL only one underlying fetch")
time.Sleep(60 * time.Millisecond)
_, err := c.get()
require.NoError(t, err)
require.Equal(t, int64(2), calls.Load(), "after TTL expiry a fresh fetch happens")
}
func TestStatsCache_SingleFlight(t *testing.T) {
var calls atomic.Int64
release := make(chan struct{})
c := newStatsCache(time.Minute, func() (map[string]WGStats, error) {
calls.Add(1)
<-release
return map[string]WGStats{}, nil
})
const n = 50
var wg sync.WaitGroup
wg.Add(n)
for i := 0; i < n; i++ {
go func() {
defer wg.Done()
_, _ = c.get()
}()
}
time.Sleep(20 * time.Millisecond)
close(release)
wg.Wait()
require.Equal(t, int64(1), calls.Load(), "concurrent misses collapse into one fetch")
}
func TestStatsCache_ErrorNotCached(t *testing.T) {
var calls atomic.Int64
wantErr := errors.New("dump failed")
c := newStatsCache(time.Minute, func() (map[string]WGStats, error) {
calls.Add(1)
return nil, wantErr
})
_, err := c.get()
require.ErrorIs(t, err, wantErr)
_, err = c.get()
require.ErrorIs(t, err, wantErr)
require.Equal(t, int64(2), calls.Load(), "errors are not cached; each call retries")
}

View File

@@ -40,6 +40,7 @@ type WGUSPConfigurer struct {
device *device.Device
deviceName string
activityRecorder *bind.ActivityRecorder
statsCache *statsCache
uapiListener net.Listener
}
@@ -50,16 +51,19 @@ func NewUSPConfigurer(device *device.Device, deviceName string, activityRecorder
deviceName: deviceName,
activityRecorder: activityRecorder,
}
wgCfg.statsCache = newStatsCache(statsCacheTTL, wgCfg.fetchStats)
wgCfg.startUAPI()
return wgCfg
}
func NewUSPConfigurerNoUAPI(device *device.Device, deviceName string, activityRecorder *bind.ActivityRecorder) *WGUSPConfigurer {
return &WGUSPConfigurer{
wgCfg := &WGUSPConfigurer{
device: device,
deviceName: deviceName,
activityRecorder: activityRecorder,
}
wgCfg.statsCache = newStatsCache(statsCacheTTL, wgCfg.fetchStats)
return wgCfg
}
func (c *WGUSPConfigurer) ConfigureInterface(privateKey string, port int) error {
@@ -348,6 +352,10 @@ func (t *WGUSPConfigurer) Close() {
}
func (t *WGUSPConfigurer) GetStats() (map[string]WGStats, error) {
return t.statsCache.get()
}
func (t *WGUSPConfigurer) fetchStats() (map[string]WGStats, error) {
ipc, err := t.device.IpcGet()
if err != nil {
return nil, fmt.Errorf("ipc get: %w", err)

View File

@@ -0,0 +1,171 @@
//go:build e2e
package agentnetwork
import (
"context"
"strings"
"testing"
"time"
"github.com/stretchr/testify/require"
"github.com/netbirdio/netbird/e2e/harness"
"github.com/netbirdio/netbird/shared/management/http/api"
)
// TestVLLMProvider proves the proxy supports a self-hosted vLLM backend. vLLM is
// OpenAI-compatible, so it uses the "vllm" catalog entry (KindCustom) and is
// reached over plain HTTP — no TLS anywhere on the path:
//
// client --tunnel--> netbird proxy --http--> vllm (:8000, OpenAI-compatible)
//
// The mock vLLM server answers /v1/chat/completions with an OpenAI-shaped
// completion carrying a non-zero usage block. The test asserts the chat returns
// 200 with the completion, that the request is recorded in the access log by its
// session id, and that vLLM's usage block is metered into a consumption row —
// which together prove request routing, response parsing, and token accounting
// all work for a self-hosted OpenAI-compatible provider.
//
// It needs no external credentials (the mock ignores auth), so it always runs.
func TestVLLMProvider(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), 15*time.Minute)
defer cancel()
vllm, err := harness.StartVLLM(ctx, srv)
require.NoError(t, err, "start mock vLLM server")
t.Cleanup(func() { _ = vllm.Terminate(context.Background()) })
grp, err := srv.API().Groups.Create(ctx, api.PostApiGroupsJSONRequestBody{Name: "e2e-vllm"})
require.NoError(t, err, "create group")
t.Cleanup(func() { _ = srv.API().Groups.Delete(context.Background(), grp.Id) })
ephemeral := false
sk, err := srv.API().SetupKeys.Create(ctx, api.PostApiSetupKeysJSONRequestBody{
Name: "e2e-vllm-client",
Type: "reusable",
ExpiresIn: 86400,
UsageLimit: 0,
AutoGroups: []string{grp.Id},
Ephemeral: &ephemeral,
})
require.NoError(t, err, "mint setup key")
require.NotEmpty(t, sk.Key, "setup key plaintext")
// vLLM provider pointed at the mock over plain HTTP. The mock ignores auth,
// so a dummy key satisfies the "Bearer ${API_KEY}" template. The served model
// is enumerated so the router dispatches this model string to this provider.
dummyKey := "sk-vllm-e2e"
prov, err := srv.CreateProvider(ctx, api.AgentNetworkProviderRequest{
Name: "vllm",
ProviderId: "vllm",
UpstreamUrl: vllm.URL,
ApiKey: &dummyKey,
Enabled: ptr(true),
BootstrapCluster: ptr(harness.AgentNetworkCluster),
Models: &[]api.AgentNetworkProviderModel{
{Id: harness.VLLMModel, InputPer1k: 0.001, OutputPer1k: 0.002},
},
})
require.NoError(t, err, "create vllm provider")
t.Cleanup(func() { _ = srv.DeleteProvider(context.Background(), prov.Id) })
// Token limit far above the handful of tokens this test drives, so it never
// blocks but switches on usage metering — the switch that makes consumption
// rows get recorded.
enabled := true
pol, err := srv.CreatePolicy(ctx, api.AgentNetworkPolicyRequest{
Name: "e2e-vllm-allow",
Enabled: &enabled,
SourceGroups: []string{grp.Id},
DestinationProviderIds: []string{prov.Id},
Limits: &api.AgentNetworkPolicyLimits{
TokenLimit: api.AgentNetworkPolicyTokenLimit{
Enabled: true,
GroupCap: 10_000_000,
UserCap: 10_000_000,
WindowSeconds: 60,
},
},
})
require.NoError(t, err, "create policy")
t.Cleanup(func() { _ = srv.DeletePolicy(context.Background(), pol.Id) })
settings, err := srv.GetSettings(ctx)
require.NoError(t, err, "read settings")
require.NotEmpty(t, settings.Endpoint, "endpoint must be assigned")
proxyToken, err := srv.CreateProxyTokenCLI(ctx, "e2e-vllm-proxy")
require.NoError(t, err, "mint proxy token")
px, err := harness.StartProxy(ctx, srv, proxyToken)
require.NoError(t, err, "start proxy")
t.Cleanup(func() { _ = px.Terminate(context.Background()) })
cl, err := harness.StartClient(ctx, srv, sk.Key)
require.NoError(t, err, "start client")
t.Cleanup(func() { _ = cl.Terminate(context.Background()) })
require.NoError(t, cl.WaitConnected(ctx, 90*time.Second), "client must connect to management")
if err := cl.WaitProxyPeer(ctx, 180*time.Second); err != nil {
t.Fatalf("client did not see the proxy peer: %v\n=== proxy logs ===\n%s", err, px.Logs(context.Background()))
}
proxyIP, err := cl.ResolveProxyIP(ctx, settings.Endpoint)
require.NoError(t, err, "resolve endpoint to proxy IP")
before, _ := srv.ListAccessLogs(ctx)
sessionID := "e2e-session-vllm"
// Retry to absorb tunnel/DNS jitter on the first call.
var code int
var body string
deadline := time.Now().Add(90 * time.Second)
for time.Now().Before(deadline) {
c, b, cerr := cl.Chat(ctx, settings.Endpoint, proxyIP, harness.WireChat, harness.VLLMModel, "Reply with exactly: pong", sessionID)
if cerr == nil {
code, body = c, b
if code == 200 {
break
}
}
time.Sleep(5 * time.Second)
}
require.Equal(t, 200, code,
"chat through the vLLM provider must return 200; body: %s\n=== vllm logs ===\n%s\n=== proxy logs ===\n%s",
body, vllm.Logs(context.Background()), px.Logs(context.Background()))
require.True(t, strings.Contains(body, "chat.completion"),
"body should be an OpenAI-compatible chat completion; got: %s", body)
// The request must surface as an access-log row carrying our session id.
require.Eventually(t, func() bool {
logs, lerr := srv.ListAccessLogs(ctx)
return lerr == nil && logs.TotalRecords > before.TotalRecords
}, 30*time.Second, 2*time.Second, "an access-log row should be ingested for the vLLM provider")
require.Eventually(t, func() bool {
logs, lerr := srv.ListAccessLogs(ctx)
if lerr != nil {
return false
}
for _, r := range logs.Data {
if r.SessionId != nil && *r.SessionId == sessionID {
return true
}
}
return false
}, 30*time.Second, 2*time.Second, "session id %q must be recorded in an access-log row", sessionID)
// vLLM's usage block (prompt_tokens=11, completion_tokens=2) must be parsed
// and metered into a consumption row with positive token counts.
require.Eventually(t, func() bool {
rows, lerr := srv.ListConsumption(ctx)
if lerr != nil {
return false
}
for _, r := range rows {
if r.TokensInput > 0 && r.TokensOutput > 0 {
return true
}
}
return false
}, 60*time.Second, 3*time.Second, "vLLM usage must be metered into a consumption row")
}

113
e2e/harness/vllm.go Normal file
View File

@@ -0,0 +1,113 @@
//go:build e2e
package harness
import (
"context"
"fmt"
"os"
"path/filepath"
"time"
"github.com/docker/docker/api/types/container"
"github.com/testcontainers/testcontainers-go"
"github.com/testcontainers/testcontainers-go/wait"
)
const (
vllmImage = "nginx:alpine"
vllmAlias = "vllm"
vllmPort = "8000/tcp"
// VLLMModel is the served model id the mock advertises and echoes back. It
// matches a real small model commonly served by vLLM so the provider's
// enumerated model and the client's request line up.
VLLMModel = "Qwen/Qwen2.5-0.5B-Instruct"
)
// vllmNginxConf emulates a vLLM OpenAI-compatible server over plain HTTP (vLLM's
// default: no TLS, port 8000). It answers /v1/models with a one-model list and
// any chat/completions path with a canned OpenAI-shaped chat completion carrying
// a non-zero usage block, so the proxy's OpenAI parser records real token
// consumption. Running actual vLLM in CI is infeasible (GPU + multi-GB model
// download), so this stands in for the wire contract the proxy depends on.
const vllmNginxConf = `pid /tmp/nginx.pid;
events {}
http {
server {
listen 8000;
location = /v1/models {
default_type application/json;
return 200 '{"object":"list","data":[{"id":"Qwen/Qwen2.5-0.5B-Instruct","object":"model","owned_by":"vllm"}]}';
}
location / {
default_type application/json;
return 200 '{"id":"chatcmpl-e2e-vllm","object":"chat.completion","created":1700000000,"model":"Qwen/Qwen2.5-0.5B-Instruct","choices":[{"index":0,"message":{"role":"assistant","content":"pong"},"finish_reason":"stop"}],"usage":{"prompt_tokens":11,"completion_tokens":2,"total_tokens":13}}';
}
}
}
`
// VLLM is a mock vLLM OpenAI-compatible server on the combined server's network,
// reachable at http://vllm:8000. A "vllm" provider points at it to exercise the
// proxy's support for self-hosted OpenAI-compatible backends.
type VLLM struct {
container testcontainers.Container
workDir string
// URL is the upstream URL the vllm provider points at (http://<alias>:8000).
URL string
}
// StartVLLM runs the mock vLLM server on the shared network over plain HTTP.
func StartVLLM(ctx context.Context, c *Combined) (*VLLM, error) {
workDir, err := os.MkdirTemp("/tmp", "nb-e2e-vllm-*")
if err != nil {
return nil, fmt.Errorf("create vllm work dir: %w", err)
}
// Widen so the (non-root worker) nginx container can traverse the bind mount.
if err := os.Chmod(workDir, 0o755); err != nil { //nolint:gosec // throwaway e2e config dir
return nil, fmt.Errorf("chmod vllm dir: %w", err)
}
if err := os.WriteFile(filepath.Join(workDir, "nginx.conf"), []byte(vllmNginxConf), 0o644); err != nil { //nolint:gosec // non-secret e2e config
return nil, fmt.Errorf("write nginx conf: %w", err)
}
req := testcontainers.ContainerRequest{
Image: vllmImage,
ExposedPorts: []string{vllmPort},
Networks: []string{c.network.Name},
NetworkAliases: map[string][]string{c.network.Name: {vllmAlias}},
Cmd: []string{"nginx", "-c", "/conf/nginx.conf", "-g", "daemon off;"},
HostConfigModifier: func(hc *container.HostConfig) {
hc.Binds = append(hc.Binds, workDir+":/conf:ro")
},
WaitingFor: wait.ForListeningPort(vllmPort).WithStartupTimeout(60 * time.Second),
}
ctr, err := testcontainers.GenericContainer(ctx, testcontainers.GenericContainerRequest{
ContainerRequest: req,
Started: true,
})
if err != nil {
_ = os.RemoveAll(workDir)
return nil, fmt.Errorf("start vllm container: %w", err)
}
return &VLLM{container: ctr, workDir: workDir, URL: "http://" + vllmAlias + ":8000"}, nil
}
// Logs returns the vLLM container logs, for diagnostics on failure.
func (v *VLLM) Logs(ctx context.Context) string {
return containerLogs(ctx, v.container)
}
// Terminate stops the vLLM container and cleans its work dir.
func (v *VLLM) Terminate(ctx context.Context) error {
var err error
if v.container != nil {
err = v.container.Terminate(ctx)
}
if v.workDir != "" {
_ = os.RemoveAll(v.workDir)
}
return err
}

View File

@@ -33,10 +33,15 @@ const ConnectTimeout = 10 * time.Second
const healthCheckTimeout = 5 * time.Second
const (
// EnvMaxRecvMsgSize overrides the default gRPC max receive message size (4 MB)
// EnvMaxRecvMsgSize overrides the default gRPC max receive message size
// for the management client connection. Value is in bytes.
EnvMaxRecvMsgSize = "NB_MANAGEMENT_GRPC_MAX_MSG_SIZE"
// defaultMaxRecvMsgSize is the max gRPC receive message size used for the
// management client connection when EnvMaxRecvMsgSize is unset or invalid.
// It overrides the gRPC library default of 4 MB.
defaultMaxRecvMsgSize = 1024 * 1024 * 16
errMsgMgmtPublicKey = "failed getting Management Service public key: %s"
errMsgNoMgmtConnection = "no connection to management"
)
@@ -84,22 +89,22 @@ type ExposeResponse struct {
}
// MaxRecvMsgSize returns the configured max gRPC receive message size from
// the environment, or 0 if unset (which uses the gRPC default of 4 MB).
// the environment, or defaultMaxRecvMsgSize (16 MB) if unset or invalid.
func MaxRecvMsgSize() int {
val := os.Getenv(EnvMaxRecvMsgSize)
if val == "" {
return 0
return defaultMaxRecvMsgSize
}
size, err := strconv.Atoi(val)
if err != nil {
log.Warnf("invalid %s value %q, using default: %v", EnvMaxRecvMsgSize, val, err)
return 0
return defaultMaxRecvMsgSize
}
if size <= 0 {
log.Warnf("invalid %s value %d, must be positive, using default", EnvMaxRecvMsgSize, size)
return 0
return defaultMaxRecvMsgSize
}
return size

View File

@@ -21,11 +21,11 @@ func TestMaxRecvMsgSize(t *testing.T) {
envValue string
expected int
}{
{name: "unset returns 0", envValue: "", expected: 0},
{name: "unset returns default", envValue: "", expected: defaultMaxRecvMsgSize},
{name: "valid value", envValue: "10485760", expected: 10485760},
{name: "non-numeric returns 0", envValue: "abc", expected: 0},
{name: "negative returns 0", envValue: "-1", expected: 0},
{name: "zero returns 0", envValue: "0", expected: 0},
{name: "non-numeric returns default", envValue: "abc", expected: defaultMaxRecvMsgSize},
{name: "negative returns default", envValue: "-1", expected: defaultMaxRecvMsgSize},
{name: "zero returns default", envValue: "0", expected: defaultMaxRecvMsgSize},
}
for _, tt := range tests {