relay: add WebTransport listener + WASM client, share UDP/443 via ALPN mux

The relay now accepts WebTransport sessions on the same UDP socket that
serves raw QUIC. The ALPN-multiplexing QUIC listener owns the socket and
dispatches incoming connections: "nb-quic" continues to the existing
relay handler, "h3" is handed to webtransport-go via http3.Server.
Browsers reach the relay over 443/udp without a second port.

Client side:
- Native builds keep using raw QUIC (no WT dialer registered).
- WASM/browser builds gain a WebTransport dialer that bridges syscall/js
  to the browser's WebTransport API and uses datagrams (matching the
  native QUIC dialer's semantics — no head-of-line blocking).
- The race dialer learned a transport hint so clients skip dialers a
  given relay has not advertised.

Management protocol carries the hint as a new RelayEndpoint{url,
transports[]} list on RelayConfig, mirroring how peers and proxies
announce capabilities. Older management servers that only send urls keep
working unchanged.

devcert build: relay generates an ECDSA P-256 cert with 13-day validity
(within the WebTransport serverCertificateHashes 14-day cap) and exposes
its SHA-256 so the WASM dialer can pin it.

Bumps quic-go v0.55.0 -> v0.59.0 (no API breaks for relay's importers)
and adds github.com/quic-go/webtransport-go v0.10.0.
This commit is contained in:
Claude
2026-05-17 11:08:30 +00:00
parent 3f91f49277
commit 078c323ef3
21 changed files with 1693 additions and 784 deletions

View File

@@ -172,6 +172,17 @@ type Client struct {
stateSubscription *PeersStateSubscription
mtu uint16
// transportHint optionally restricts the race dialer to the listed
// protocols. Empty means try every locally compiled dialer.
transportHint []string
}
// SetTransportHint configures the relay client to only attempt the given
// transports during the dial race. Pass nil/empty to clear the restriction.
// Must be set before Connect.
func (c *Client) SetTransportHint(transports []string) {
c.transportHint = transports
}
// NewClient creates a new client for the relay server. The client is not connected to the server until the Connect
@@ -374,7 +385,8 @@ func (c *Client) connect(ctx context.Context) (*RelayAddr, error) {
}
if conn == nil {
rd := dialer.NewRaceDial(c.log, dialer.DefaultConnectionTimeout, c.connectionURL, dialers...)
rd := dialer.NewRaceDial(c.log, dialer.DefaultConnectionTimeout, c.connectionURL, dialers...).
WithTransportHint(c.transportHint)
var err error
conn, err = rd.Dial(ctx)
if err != nil {
@@ -405,7 +417,8 @@ func (c *Client) dialRaceDirect(ctx context.Context, dialers []dialer.DialeFn) (
c.log.Debugf("dialing via server IP %s (SNI=%s)", c.serverIP, serverName)
rd := dialer.NewRaceDial(c.log, dialer.DefaultConnectionTimeout, directURL, dialers...).
WithServerName(serverName)
WithServerName(serverName).
WithTransportHint(c.transportHint)
return rd.Dial(ctx)
}

View File

@@ -32,6 +32,7 @@ type RaceDial struct {
serverName string
dialerFns []DialeFn
connectionTimeout time.Duration
transportHint []string
}
func NewRaceDial(log *log.Entry, connectionTimeout time.Duration, serverURL string, dialerFns ...DialeFn) *RaceDial {
@@ -53,17 +54,53 @@ func (r *RaceDial) WithServerName(serverName string) *RaceDial {
return r
}
// WithTransportHint restricts the dial race to dialers whose Protocol() is
// listed in hint. An empty or nil hint means "try every configured dialer"
// (legacy behavior). Used to skip dialers a relay has advertised it doesn't
// support — e.g. don't burn a WebTransport handshake on an old relay.
func (r *RaceDial) WithTransportHint(hint []string) *RaceDial {
r.transportHint = hint
return r
}
// activeDialers returns the subset of dialerFns that match the transport hint.
// With no hint set, all dialers are returned.
func (r *RaceDial) activeDialers() []DialeFn {
if len(r.transportHint) == 0 {
return r.dialerFns
}
allowed := make(map[string]struct{}, len(r.transportHint))
for _, p := range r.transportHint {
allowed[p] = struct{}{}
}
out := make([]DialeFn, 0, len(r.dialerFns))
for _, d := range r.dialerFns {
if _, ok := allowed[d.Protocol()]; ok {
out = append(out, d)
}
}
if len(out) == 0 {
// Hint matched nothing the local build supports — fall back to all
// rather than fail with no dialers. Mirrors race-dialer's "try
// everything" default.
r.log.Debugf("transport hint %v matched no local dialer; falling back to all", r.transportHint)
return r.dialerFns
}
return out
}
func (r *RaceDial) Dial(ctx context.Context) (net.Conn, error) {
connChan := make(chan dialResult, len(r.dialerFns))
dialers := r.activeDialers()
connChan := make(chan dialResult, len(dialers))
winnerConn := make(chan net.Conn, 1)
abortCtx, abort := context.WithCancel(ctx)
defer abort()
for _, dfn := range r.dialerFns {
for _, dfn := range dialers {
go r.dial(dfn, abortCtx, connChan)
}
go r.processResults(connChan, winnerConn, abort)
go r.processResults(connChan, winnerConn, abort, len(dialers))
conn, ok := <-winnerConn
if !ok {
@@ -81,9 +118,9 @@ func (r *RaceDial) dial(dfn DialeFn, abortCtx context.Context, connChan chan dia
connChan <- dialResult{Conn: conn, Protocol: dfn.Protocol(), Err: err}
}
func (r *RaceDial) processResults(connChan chan dialResult, winnerConn chan net.Conn, abort context.CancelFunc) {
func (r *RaceDial) processResults(connChan chan dialResult, winnerConn chan net.Conn, abort context.CancelFunc, total int) {
var hasWinner bool
for i := 0; i < len(r.dialerFns); i++ {
for i := 0; i < total; i++ {
dr := <-connChan
if dr.Err != nil {
if errors.Is(dr.Err, context.Canceled) {

View File

@@ -0,0 +1,140 @@
//go:build js
package wt
import (
"context"
"errors"
"fmt"
"io"
"net"
"sync"
"syscall/js"
"time"
netErr "github.com/netbirdio/netbird/shared/relay/client/dialer/net"
)
// addr satisfies net.Addr for the WebTransport-backed conn. The remote address
// is opaque (the browser doesn't expose the underlying UDP 4-tuple), so we
// surface the dial URL instead.
type addr struct{ s string }
func (a addr) Network() string { return Network }
func (a addr) String() string { return a.s }
// conn wraps a WebTransport session and implements net.Conn over its datagram
// channels. Each Read consumes exactly one inbound datagram (= one relay
// message); each Write transmits exactly one (= one relay message). This
// preserves the message-boundary semantics the relay framing assumes.
type conn struct {
wt js.Value
writer js.Value // datagrams.writable.getWriter()
reader js.Value // datagrams.readable.getReader()
ctx context.Context
cancel context.CancelFunc
closeOnce sync.Once
closed chan struct{}
remote addr
}
func newConn(wt js.Value, dialURL string) *conn {
ctx, cancel := context.WithCancel(context.Background())
c := &conn{
wt: wt,
writer: wt.Get("datagrams").Get("writable").Call("getWriter"),
reader: wt.Get("datagrams").Get("readable").Call("getReader"),
ctx: ctx,
cancel: cancel,
closed: make(chan struct{}),
remote: addr{s: dialURL},
}
// Best-effort close detection: when the session closes, surface it as
// net.ErrClosed on subsequent ops.
go c.watchClosed()
return c
}
func (c *conn) watchClosed() {
closedP := c.wt.Get("closed")
if !closedP.Truthy() {
return
}
_, _ = awaitPromise(c.ctx, closedP)
c.markClosed()
}
func (c *conn) Read(b []byte) (int, error) {
for {
select {
case <-c.closed:
return 0, net.ErrClosed
default:
}
readP := c.reader.Call("read")
v, err := awaitPromise(c.ctx, readP)
if err != nil {
if errors.Is(err, context.Canceled) {
return 0, net.ErrClosed
}
return 0, netErr.ErrClosedByServer
}
if v.Get("done").Bool() {
c.markClosed()
return 0, io.EOF
}
val := v.Get("value")
if !val.Truthy() {
continue
}
n := val.Get("byteLength").Int()
if n > len(b) {
// Datagrams shouldn't exceed the relay's MaxMessageSize (8 KB) so
// this branch is defensive — truncate rather than fail hard.
n = len(b)
}
js.CopyBytesToGo(b[:n], val)
return n, nil
}
}
func (c *conn) Write(b []byte) (int, error) {
select {
case <-c.closed:
return 0, net.ErrClosed
default:
}
u8 := js.Global().Get("Uint8Array").New(len(b))
js.CopyBytesToJS(u8, b)
writeP := c.writer.Call("write", u8)
if _, err := awaitPromise(c.ctx, writeP); err != nil {
if errors.Is(err, context.Canceled) {
return 0, net.ErrClosed
}
return 0, netErr.ErrClosedByServer
}
return len(b), nil
}
func (c *conn) Close() error {
c.markClosed()
_ = safeCall(c.wt, "close")
return nil
}
func (c *conn) markClosed() {
c.closeOnce.Do(func() {
c.cancel()
close(c.closed)
})
}
func (c *conn) LocalAddr() net.Addr { return addr{s: "wasm"} }
func (c *conn) RemoteAddr() net.Addr { return c.remote }
func (c *conn) SetDeadline(time.Time) error { return nil }
func (c *conn) SetReadDeadline(time.Time) error { return fmt.Errorf("SetReadDeadline not implemented") }
func (c *conn) SetWriteDeadline(time.Time) error { return fmt.Errorf("SetWriteDeadline not implemented") }

View File

@@ -0,0 +1,142 @@
//go:build js
// Package wt is the browser/WASM WebTransport dialer for the relay client.
//
// WebTransport is the only browser-exposed primitive over HTTP/3 that gives us
// raw bidi-capable QUIC sessions with datagrams. The relay protocol is
// message-framed and small (<= 8 KB) so we use datagrams here, matching the
// raw-QUIC native dialer's semantics (no head-of-line blocking, unreliable).
//
// In production builds the browser performs normal TLS validation against the
// system trust store. Under the `devcert` build tag the server publishes a
// short-lived ECDSA self-signed cert; the WASM client pins its SHA-256 hash
// through `serverCertificateHashes` so the browser will accept it without a
// trusted CA.
package wt
import (
"context"
"errors"
"fmt"
"net"
"net/url"
"syscall/js"
log "github.com/sirupsen/logrus"
"github.com/netbirdio/netbird/shared/relay"
relaytls "github.com/netbirdio/netbird/shared/relay/tls"
)
// Network is the protocol identifier reported via Dialer.Protocol.
const Network = "wt"
type Dialer struct{}
func (Dialer) Protocol() string { return Network }
func (Dialer) Dial(ctx context.Context, address, serverName string) (net.Conn, error) {
wtURL, err := prepareURL(address)
if err != nil {
return nil, err
}
jsWebTransport := js.Global().Get("WebTransport")
if !jsWebTransport.Truthy() {
return nil, errors.New("WebTransport is not supported in this browser")
}
opts := map[string]interface{}{}
if hash := relaytls.DevCertHash(); hash != nil {
u8 := js.Global().Get("Uint8Array").New(len(hash))
js.CopyBytesToJS(u8, hash)
opts["serverCertificateHashes"] = []interface{}{
map[string]interface{}{"algorithm": "sha-256", "value": u8},
}
}
wt := jsWebTransport.New(wtURL, opts)
if _, err := awaitPromise(ctx, wt.Get("ready")); err != nil {
_ = safeCall(wt, "close")
return nil, fmt.Errorf("WebTransport handshake to %s: %w", wtURL, err)
}
log.Debugf("WebTransport session established to %s", wtURL)
return newConn(wt, address), nil
}
// prepareURL rewrites a rels://host[:port] address into the https URL the
// browser dials. Plain rel:// is not supported — WebTransport requires HTTPS.
func prepareURL(address string) (string, error) {
parsed, err := url.Parse(address)
if err != nil {
return "", fmt.Errorf("parse relay address %q: %w", address, err)
}
switch parsed.Scheme {
case "rels":
parsed.Scheme = "https"
case "rel":
return "", errors.New("WebTransport requires TLS; use rels:// not rel://")
default:
return "", fmt.Errorf("unsupported scheme: %s", parsed.Scheme)
}
if parsed.Host == "" {
return "", fmt.Errorf("missing host in relay address %q", address)
}
parsed.Path = relay.WebSocketURLPath
return parsed.String(), nil
}
// awaitPromise bridges a JS Promise to a Go return. It respects ctx
// cancellation and releases its js.Func callbacks on the resolve/reject path.
func awaitPromise(ctx context.Context, p js.Value) (js.Value, error) {
type res struct {
val js.Value
err error
}
ch := make(chan res, 1)
var thenFn, catchFn js.Func
release := func() {
thenFn.Release()
catchFn.Release()
}
thenFn = js.FuncOf(func(_ js.Value, args []js.Value) interface{} {
var v js.Value
if len(args) > 0 {
v = args[0]
}
select {
case ch <- res{val: v}:
default:
}
return nil
})
catchFn = js.FuncOf(func(_ js.Value, args []js.Value) interface{} {
msg := "promise rejected"
if len(args) > 0 && args[0].Truthy() {
msg = args[0].Call("toString").String()
}
select {
case ch <- res{err: errors.New(msg)}:
default:
}
return nil
})
p.Call("then", thenFn).Call("catch", catchFn)
select {
case r := <-ch:
release()
return r.val, r.err
case <-ctx.Done():
release()
return js.Value{}, ctx.Err()
}
}
// safeCall invokes a js method and swallows panics from a dead JS object.
func safeCall(v js.Value, method string, args ...interface{}) (out js.Value) {
defer func() { _ = recover() }()
out = v.Call(method, args...)
return
}

View File

@@ -0,0 +1,22 @@
//go:build !js
// Package wt's WebTransport dialer is browser-only. This stub keeps the
// package importable from non-WASM builds (for tooling, `go vet`, etc.) without
// pulling in syscall/js. The Dialer here returns an error if used.
package wt
import (
"context"
"errors"
"net"
)
const Network = "wt"
type Dialer struct{}
func (Dialer) Protocol() string { return Network }
func (Dialer) Dial(_ context.Context, _, _ string) (net.Conn, error) {
return nil, errors.New("WebTransport dialer is only available in WASM builds")
}

View File

@@ -5,9 +5,16 @@ package client
import (
"github.com/netbirdio/netbird/shared/relay/client/dialer"
"github.com/netbirdio/netbird/shared/relay/client/dialer/ws"
"github.com/netbirdio/netbird/shared/relay/client/dialer/wt"
)
// getDialers returns the dialers used by the WASM/browser relay client.
//
// WebTransport is tried alongside WebSocket via the race dialer: whichever
// handshake completes first wins. WT loses fast against a relay that doesn't
// speak h3 (TLS no_application_protocol within one RTT), so the fallback to
// WS is cheap. The WASM client never uses raw QUIC — browsers don't expose
// UDP sockets.
func (c *Client) getDialers() []dialer.DialeFn {
// JS/WASM build only uses WebSocket transport
return []dialer.DialeFn{ws.Dialer{}}
return []dialer.DialeFn{wt.Dialer{}, ws.Dialer{}}
}

View File

@@ -0,0 +1,37 @@
package client
// ServerEndpoint announces a relay server along with the transports it speaks.
// Carried via the management RelayConfig.endpoints field; falls back to a
// URL-only entry (Transports == nil, meaning "try all dialers") for back-compat
// with older management servers that only sent flat URLs.
type ServerEndpoint struct {
URL string
Transports []string
}
// EndpointsFromURLs builds a list of hint-less endpoints from a flat URL list.
// Used when the management server sends only RelayConfig.urls (no per-relay
// capability metadata).
func EndpointsFromURLs(urls []string) []ServerEndpoint {
if len(urls) == 0 {
return nil
}
out := make([]ServerEndpoint, len(urls))
for i, u := range urls {
out[i] = ServerEndpoint{URL: u}
}
return out
}
// URLsFromEndpoints projects a list of endpoints back to a flat URL slice,
// preserving order. Used by call sites that don't yet consume transport hints.
func URLsFromEndpoints(endpoints []ServerEndpoint) []string {
if len(endpoints) == 0 {
return nil
}
out := make([]string, len(endpoints))
for i, e := range endpoints {
out[i] = e.URL
}
return out
}

View File

@@ -244,6 +244,16 @@ func (m *Manager) HasRelayAddress() bool {
func (m *Manager) UpdateServerURLs(serverURLs []string) {
log.Infof("update relay server URLs: %v", serverURLs)
m.serverPicker.ServerURLs.Store(serverURLs)
m.serverPicker.Endpoints.Store(EndpointsFromURLs(serverURLs))
}
// UpdateServerEndpoints replaces the picker's relay endpoint list, including
// per-relay transport capability hints announced by the management server.
// Callers that don't have hints should keep using UpdateServerURLs.
func (m *Manager) UpdateServerEndpoints(endpoints []ServerEndpoint) {
log.Infof("update relay endpoints: %d entries", len(endpoints))
m.serverPicker.ServerURLs.Store(URLsFromEndpoints(endpoints))
m.serverPicker.Endpoints.Store(endpoints)
}
// UpdateToken updates the token in the token store.

View File

@@ -24,33 +24,55 @@ type connResult struct {
}
type ServerPicker struct {
TokenStore *auth.TokenStore
TokenStore *auth.TokenStore
// ServerURLs holds the legacy flat list of relay URLs, kept for callers
// that don't yet thread through transport hints. Endpoints, when set, is
// the source of truth — ServerURLs is derived from it.
ServerURLs atomic.Value
Endpoints atomic.Value // []ServerEndpoint
PeerID string
MTU uint16
ConnectionTimeout time.Duration
}
// loadEndpoints returns the per-endpoint list, falling back to a URL-only
// projection of ServerURLs if no endpoints have been set yet (older callers
// that still call UpdateServerURLs without the hint-aware path).
func (sp *ServerPicker) loadEndpoints() []ServerEndpoint {
if v := sp.Endpoints.Load(); v != nil {
if eps, ok := v.([]ServerEndpoint); ok && len(eps) > 0 {
return eps
}
}
if v := sp.ServerURLs.Load(); v != nil {
if urls, ok := v.([]string); ok {
return EndpointsFromURLs(urls)
}
}
return nil
}
func (sp *ServerPicker) PickServer(parentCtx context.Context) (*Client, error) {
ctx, cancel := context.WithTimeout(parentCtx, sp.ConnectionTimeout)
defer cancel()
totalServers := len(sp.ServerURLs.Load().([]string))
endpoints := sp.loadEndpoints()
totalServers := len(endpoints)
connResultChan := make(chan connResult, totalServers)
successChan := make(chan connResult, 1)
concurrentLimiter := make(chan struct{}, maxConcurrentServers)
log.Debugf("pick server from list: %v", sp.ServerURLs.Load().([]string))
for _, url := range sp.ServerURLs.Load().([]string) {
log.Debugf("pick server from list: %d endpoint(s)", totalServers)
for _, ep := range endpoints {
// todo check if we have a successful connection so we do not need to connect to other servers
concurrentLimiter <- struct{}{}
go func(url string) {
go func(ep ServerEndpoint) {
defer func() {
<-concurrentLimiter
}()
sp.startConnection(parentCtx, connResultChan, url)
}(url)
sp.startConnection(parentCtx, connResultChan, ep)
}(ep)
}
go sp.processConnResults(connResultChan, successChan)
@@ -67,13 +89,16 @@ func (sp *ServerPicker) PickServer(parentCtx context.Context) (*Client, error) {
}
}
func (sp *ServerPicker) startConnection(ctx context.Context, resultChan chan connResult, url string) {
log.Infof("try to connecting to relay server: %s", url)
relayClient := NewClient(url, sp.TokenStore, sp.PeerID, sp.MTU)
func (sp *ServerPicker) startConnection(ctx context.Context, resultChan chan connResult, ep ServerEndpoint) {
log.Infof("try to connecting to relay server: %s (transports=%v)", ep.URL, ep.Transports)
relayClient := NewClient(ep.URL, sp.TokenStore, sp.PeerID, sp.MTU)
if len(ep.Transports) > 0 {
relayClient.SetTransportHint(ep.Transports)
}
err := relayClient.Connect(ctx)
resultChan <- connResult{
RelayClient: relayClient,
Url: url,
Url: ep.URL,
Err: err,
}
}