mirror of
https://github.com/netbirdio/netbird.git
synced 2026-04-18 08:16:39 +00:00
[client] Fix/ice handshake (#4281)
In this PR, speed up the GRPC message processing, force the recreation of the ICE agent when getting a new, remote offer (do not wait for local STUN timeout).
This commit is contained in:
@@ -55,11 +55,11 @@ import (
|
||||
nbssh "github.com/netbirdio/netbird/client/ssh"
|
||||
"github.com/netbirdio/netbird/client/system"
|
||||
nbdns "github.com/netbirdio/netbird/dns"
|
||||
"github.com/netbirdio/netbird/route"
|
||||
mgm "github.com/netbirdio/netbird/shared/management/client"
|
||||
mgmProto "github.com/netbirdio/netbird/shared/management/proto"
|
||||
auth "github.com/netbirdio/netbird/shared/relay/auth/hmac"
|
||||
relayClient "github.com/netbirdio/netbird/shared/relay/client"
|
||||
"github.com/netbirdio/netbird/route"
|
||||
signal "github.com/netbirdio/netbird/shared/signal/client"
|
||||
sProto "github.com/netbirdio/netbird/shared/signal/proto"
|
||||
"github.com/netbirdio/netbird/util"
|
||||
@@ -254,6 +254,7 @@ func NewEngine(
|
||||
}
|
||||
engine.stateManager = statemanager.New(path)
|
||||
|
||||
log.Infof("I am: %s", config.WgPrivateKey.PublicKey().String())
|
||||
return engine
|
||||
}
|
||||
|
||||
@@ -1330,52 +1331,17 @@ func (e *Engine) receiveSignalEvents() {
|
||||
}
|
||||
|
||||
switch msg.GetBody().Type {
|
||||
case sProto.Body_OFFER:
|
||||
remoteCred, err := signal.UnMarshalCredential(msg)
|
||||
case sProto.Body_OFFER, sProto.Body_ANSWER:
|
||||
offerAnswer, err := convertToOfferAnswer(msg)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var rosenpassPubKey []byte
|
||||
rosenpassAddr := ""
|
||||
if msg.GetBody().GetRosenpassConfig() != nil {
|
||||
rosenpassPubKey = msg.GetBody().GetRosenpassConfig().GetRosenpassPubKey()
|
||||
rosenpassAddr = msg.GetBody().GetRosenpassConfig().GetRosenpassServerAddr()
|
||||
if msg.Body.Type == sProto.Body_OFFER {
|
||||
conn.OnRemoteOffer(*offerAnswer)
|
||||
} else {
|
||||
conn.OnRemoteAnswer(*offerAnswer)
|
||||
}
|
||||
conn.OnRemoteOffer(peer.OfferAnswer{
|
||||
IceCredentials: peer.IceCredentials{
|
||||
UFrag: remoteCred.UFrag,
|
||||
Pwd: remoteCred.Pwd,
|
||||
},
|
||||
WgListenPort: int(msg.GetBody().GetWgListenPort()),
|
||||
Version: msg.GetBody().GetNetBirdVersion(),
|
||||
RosenpassPubKey: rosenpassPubKey,
|
||||
RosenpassAddr: rosenpassAddr,
|
||||
RelaySrvAddress: msg.GetBody().GetRelayServerAddress(),
|
||||
})
|
||||
case sProto.Body_ANSWER:
|
||||
remoteCred, err := signal.UnMarshalCredential(msg)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var rosenpassPubKey []byte
|
||||
rosenpassAddr := ""
|
||||
if msg.GetBody().GetRosenpassConfig() != nil {
|
||||
rosenpassPubKey = msg.GetBody().GetRosenpassConfig().GetRosenpassPubKey()
|
||||
rosenpassAddr = msg.GetBody().GetRosenpassConfig().GetRosenpassServerAddr()
|
||||
}
|
||||
conn.OnRemoteAnswer(peer.OfferAnswer{
|
||||
IceCredentials: peer.IceCredentials{
|
||||
UFrag: remoteCred.UFrag,
|
||||
Pwd: remoteCred.Pwd,
|
||||
},
|
||||
WgListenPort: int(msg.GetBody().GetWgListenPort()),
|
||||
Version: msg.GetBody().GetNetBirdVersion(),
|
||||
RosenpassPubKey: rosenpassPubKey,
|
||||
RosenpassAddr: rosenpassAddr,
|
||||
RelaySrvAddress: msg.GetBody().GetRelayServerAddress(),
|
||||
})
|
||||
case sProto.Body_CANDIDATE:
|
||||
candidate, err := ice.UnmarshalCandidate(msg.GetBody().Payload)
|
||||
if err != nil {
|
||||
@@ -2073,3 +2039,44 @@ func createFile(path string) error {
|
||||
}
|
||||
return file.Close()
|
||||
}
|
||||
|
||||
func convertToOfferAnswer(msg *sProto.Message) (*peer.OfferAnswer, error) {
|
||||
remoteCred, err := signal.UnMarshalCredential(msg)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var (
|
||||
rosenpassPubKey []byte
|
||||
rosenpassAddr string
|
||||
)
|
||||
if cfg := msg.GetBody().GetRosenpassConfig(); cfg != nil {
|
||||
rosenpassPubKey = cfg.GetRosenpassPubKey()
|
||||
rosenpassAddr = cfg.GetRosenpassServerAddr()
|
||||
}
|
||||
|
||||
// Handle optional SessionID
|
||||
var sessionID *peer.ICESessionID
|
||||
if sessionBytes := msg.GetBody().GetSessionId(); sessionBytes != nil {
|
||||
if id, err := peer.ICESessionIDFromBytes(sessionBytes); err != nil {
|
||||
log.Warnf("Invalid session ID in message: %v", err)
|
||||
sessionID = nil // Set to nil if conversion fails
|
||||
} else {
|
||||
sessionID = &id
|
||||
}
|
||||
}
|
||||
|
||||
offerAnswer := peer.OfferAnswer{
|
||||
IceCredentials: peer.IceCredentials{
|
||||
UFrag: remoteCred.UFrag,
|
||||
Pwd: remoteCred.Pwd,
|
||||
},
|
||||
WgListenPort: int(msg.GetBody().GetWgListenPort()),
|
||||
Version: msg.GetBody().GetNetBirdVersion(),
|
||||
RosenpassPubKey: rosenpassPubKey,
|
||||
RosenpassAddr: rosenpassAddr,
|
||||
RelaySrvAddress: msg.GetBody().GetRelayServerAddress(),
|
||||
SessionID: sessionID,
|
||||
}
|
||||
return &offerAnswer, nil
|
||||
}
|
||||
|
||||
@@ -24,8 +24,8 @@ import (
|
||||
"github.com/netbirdio/netbird/client/internal/peer/id"
|
||||
"github.com/netbirdio/netbird/client/internal/peer/worker"
|
||||
"github.com/netbirdio/netbird/client/internal/stdnet"
|
||||
relayClient "github.com/netbirdio/netbird/shared/relay/client"
|
||||
"github.com/netbirdio/netbird/route"
|
||||
relayClient "github.com/netbirdio/netbird/shared/relay/client"
|
||||
semaphoregroup "github.com/netbirdio/netbird/util/semaphore-group"
|
||||
)
|
||||
|
||||
@@ -200,19 +200,11 @@ func (conn *Conn) Open(engineCtx context.Context) error {
|
||||
conn.wg.Add(1)
|
||||
go func() {
|
||||
defer conn.wg.Done()
|
||||
|
||||
conn.waitInitialRandomSleepTime(conn.ctx)
|
||||
conn.semaphore.Done(conn.ctx)
|
||||
|
||||
conn.dumpState.SendOffer()
|
||||
if err := conn.handshaker.sendOffer(); err != nil {
|
||||
conn.Log.Errorf("failed to send initial offer: %v", err)
|
||||
}
|
||||
|
||||
conn.wg.Add(1)
|
||||
go func() {
|
||||
conn.guard.Start(conn.ctx, conn.onGuardEvent)
|
||||
conn.wg.Done()
|
||||
}()
|
||||
conn.guard.Start(conn.ctx, conn.onGuardEvent)
|
||||
}()
|
||||
conn.opened = true
|
||||
return nil
|
||||
@@ -274,10 +266,10 @@ func (conn *Conn) Close(signalToRemote bool) {
|
||||
|
||||
// OnRemoteAnswer handles an offer from the remote peer and returns true if the message was accepted, false otherwise
|
||||
// doesn't block, discards the message if connection wasn't ready
|
||||
func (conn *Conn) OnRemoteAnswer(answer OfferAnswer) bool {
|
||||
func (conn *Conn) OnRemoteAnswer(answer OfferAnswer) {
|
||||
conn.dumpState.RemoteAnswer()
|
||||
conn.Log.Infof("OnRemoteAnswer, priority: %s, status ICE: %s, status relay: %s", conn.currentConnPriority, conn.statusICE, conn.statusRelay)
|
||||
return conn.handshaker.OnRemoteAnswer(answer)
|
||||
conn.handshaker.OnRemoteAnswer(answer)
|
||||
}
|
||||
|
||||
// OnRemoteCandidate Handles ICE connection Candidate provided by the remote peer.
|
||||
@@ -296,10 +288,10 @@ func (conn *Conn) SetOnDisconnected(handler func(remotePeer string)) {
|
||||
conn.onDisconnected = handler
|
||||
}
|
||||
|
||||
func (conn *Conn) OnRemoteOffer(offer OfferAnswer) bool {
|
||||
func (conn *Conn) OnRemoteOffer(offer OfferAnswer) {
|
||||
conn.dumpState.RemoteOffer()
|
||||
conn.Log.Infof("OnRemoteOffer, on status ICE: %s, status Relay: %s", conn.statusICE, conn.statusRelay)
|
||||
return conn.handshaker.OnRemoteOffer(offer)
|
||||
conn.handshaker.OnRemoteOffer(offer)
|
||||
}
|
||||
|
||||
// WgConfig returns the WireGuard config
|
||||
@@ -548,7 +540,6 @@ func (conn *Conn) onRelayDisconnected() {
|
||||
}
|
||||
|
||||
func (conn *Conn) onGuardEvent() {
|
||||
conn.Log.Debugf("send offer to peer")
|
||||
conn.dumpState.SendOffer()
|
||||
if err := conn.handshaker.SendOffer(); err != nil {
|
||||
conn.Log.Errorf("failed to send offer: %v", err)
|
||||
@@ -672,7 +663,7 @@ func (conn *Conn) isConnectedOnAllWay() (connected bool) {
|
||||
}
|
||||
}()
|
||||
|
||||
if conn.statusICE.Get() == worker.StatusDisconnected {
|
||||
if conn.statusICE.Get() == worker.StatusDisconnected && !conn.workerICE.InProgress() {
|
||||
return false
|
||||
}
|
||||
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
package peer
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
@@ -79,31 +79,30 @@ func TestConn_OnRemoteOffer(t *testing.T) {
|
||||
return
|
||||
}
|
||||
|
||||
wg := sync.WaitGroup{}
|
||||
wg.Add(2)
|
||||
go func() {
|
||||
<-conn.handshaker.remoteOffersCh
|
||||
wg.Done()
|
||||
}()
|
||||
onNewOffeChan := make(chan struct{})
|
||||
|
||||
go func() {
|
||||
for {
|
||||
accepted := conn.OnRemoteOffer(OfferAnswer{
|
||||
IceCredentials: IceCredentials{
|
||||
UFrag: "test",
|
||||
Pwd: "test",
|
||||
},
|
||||
WgListenPort: 0,
|
||||
Version: "",
|
||||
})
|
||||
if accepted {
|
||||
wg.Done()
|
||||
return
|
||||
}
|
||||
}
|
||||
}()
|
||||
conn.handshaker.AddOnNewOfferListener(func(remoteOfferAnswer *OfferAnswer) {
|
||||
onNewOffeChan <- struct{}{}
|
||||
})
|
||||
|
||||
wg.Wait()
|
||||
conn.OnRemoteOffer(OfferAnswer{
|
||||
IceCredentials: IceCredentials{
|
||||
UFrag: "test",
|
||||
Pwd: "test",
|
||||
},
|
||||
WgListenPort: 0,
|
||||
Version: "",
|
||||
})
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer cancel()
|
||||
|
||||
select {
|
||||
case <-onNewOffeChan:
|
||||
// success
|
||||
case <-ctx.Done():
|
||||
t.Error("expected to receive a new offer notification, but timed out")
|
||||
}
|
||||
}
|
||||
|
||||
func TestConn_OnRemoteAnswer(t *testing.T) {
|
||||
@@ -119,31 +118,29 @@ func TestConn_OnRemoteAnswer(t *testing.T) {
|
||||
return
|
||||
}
|
||||
|
||||
wg := sync.WaitGroup{}
|
||||
wg.Add(2)
|
||||
go func() {
|
||||
<-conn.handshaker.remoteAnswerCh
|
||||
wg.Done()
|
||||
}()
|
||||
onNewOffeChan := make(chan struct{})
|
||||
|
||||
go func() {
|
||||
for {
|
||||
accepted := conn.OnRemoteAnswer(OfferAnswer{
|
||||
IceCredentials: IceCredentials{
|
||||
UFrag: "test",
|
||||
Pwd: "test",
|
||||
},
|
||||
WgListenPort: 0,
|
||||
Version: "",
|
||||
})
|
||||
if accepted {
|
||||
wg.Done()
|
||||
return
|
||||
}
|
||||
}
|
||||
}()
|
||||
conn.handshaker.AddOnNewOfferListener(func(remoteOfferAnswer *OfferAnswer) {
|
||||
onNewOffeChan <- struct{}{}
|
||||
})
|
||||
|
||||
wg.Wait()
|
||||
conn.OnRemoteAnswer(OfferAnswer{
|
||||
IceCredentials: IceCredentials{
|
||||
UFrag: "test",
|
||||
Pwd: "test",
|
||||
},
|
||||
WgListenPort: 0,
|
||||
Version: "",
|
||||
})
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer cancel()
|
||||
|
||||
select {
|
||||
case <-onNewOffeChan:
|
||||
// success
|
||||
case <-ctx.Done():
|
||||
t.Error("expected to receive a new offer notification, but timed out")
|
||||
}
|
||||
}
|
||||
|
||||
func TestConn_presharedKey(t *testing.T) {
|
||||
|
||||
@@ -19,7 +19,6 @@ type isConnectedFunc func() bool
|
||||
// - Relayed connection disconnected
|
||||
// - ICE candidate changes
|
||||
type Guard struct {
|
||||
Reconnect chan struct{}
|
||||
log *log.Entry
|
||||
isConnectedOnAllWay isConnectedFunc
|
||||
timeout time.Duration
|
||||
@@ -30,7 +29,6 @@ type Guard struct {
|
||||
|
||||
func NewGuard(log *log.Entry, isConnectedFn isConnectedFunc, timeout time.Duration, srWatcher *SRWatcher) *Guard {
|
||||
return &Guard{
|
||||
Reconnect: make(chan struct{}, 1),
|
||||
log: log,
|
||||
isConnectedOnAllWay: isConnectedFn,
|
||||
timeout: timeout,
|
||||
@@ -41,6 +39,7 @@ func NewGuard(log *log.Entry, isConnectedFn isConnectedFunc, timeout time.Durati
|
||||
}
|
||||
|
||||
func (g *Guard) Start(ctx context.Context, eventCallback func()) {
|
||||
g.log.Infof("starting guard for reconnection with MaxInterval: %s", g.timeout)
|
||||
g.reconnectLoopWithRetry(ctx, eventCallback)
|
||||
}
|
||||
|
||||
@@ -61,17 +60,14 @@ func (g *Guard) SetICEConnDisconnected() {
|
||||
// reconnectLoopWithRetry periodically check the connection status.
|
||||
// Try to send offer while the P2P is not established or while the Relay is not connected if is it supported
|
||||
func (g *Guard) reconnectLoopWithRetry(ctx context.Context, callback func()) {
|
||||
waitForInitialConnectionTry(ctx)
|
||||
|
||||
srReconnectedChan := g.srWatcher.NewListener()
|
||||
defer g.srWatcher.RemoveListener(srReconnectedChan)
|
||||
|
||||
ticker := g.prepareExponentTicker(ctx)
|
||||
ticker := g.initialTicker(ctx)
|
||||
defer ticker.Stop()
|
||||
|
||||
tickerChannel := ticker.C
|
||||
|
||||
g.log.Infof("start reconnect loop...")
|
||||
for {
|
||||
select {
|
||||
case t := <-tickerChannel:
|
||||
@@ -85,7 +81,6 @@ func (g *Guard) reconnectLoopWithRetry(ctx context.Context, callback func()) {
|
||||
if !g.isConnectedOnAllWay() {
|
||||
callback()
|
||||
}
|
||||
|
||||
case <-g.relayedConnDisconnected:
|
||||
g.log.Debugf("Relay connection changed, reset reconnection ticker")
|
||||
ticker.Stop()
|
||||
@@ -111,6 +106,20 @@ func (g *Guard) reconnectLoopWithRetry(ctx context.Context, callback func()) {
|
||||
}
|
||||
}
|
||||
|
||||
// initialTicker give chance to the peer to establish the initial connection.
|
||||
func (g *Guard) initialTicker(ctx context.Context) *backoff.Ticker {
|
||||
bo := backoff.WithContext(&backoff.ExponentialBackOff{
|
||||
InitialInterval: 3 * time.Second,
|
||||
RandomizationFactor: 0.1,
|
||||
Multiplier: 2,
|
||||
MaxInterval: g.timeout,
|
||||
Stop: backoff.Stop,
|
||||
Clock: backoff.SystemClock,
|
||||
}, ctx)
|
||||
|
||||
return backoff.NewTicker(bo)
|
||||
}
|
||||
|
||||
func (g *Guard) prepareExponentTicker(ctx context.Context) *backoff.Ticker {
|
||||
bo := backoff.WithContext(&backoff.ExponentialBackOff{
|
||||
InitialInterval: 800 * time.Millisecond,
|
||||
@@ -126,13 +135,3 @@ func (g *Guard) prepareExponentTicker(ctx context.Context) *backoff.Ticker {
|
||||
|
||||
return ticker
|
||||
}
|
||||
|
||||
// Give chance to the peer to establish the initial connection.
|
||||
// With it, we can decrease to send necessary offer
|
||||
func waitForInitialConnectionTry(ctx context.Context) {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-time.After(3 * time.Second):
|
||||
}
|
||||
}
|
||||
|
||||
@@ -39,6 +39,15 @@ type OfferAnswer struct {
|
||||
|
||||
// relay server address
|
||||
RelaySrvAddress string
|
||||
// SessionID is the unique identifier of the session, used to discard old messages
|
||||
SessionID *ICESessionID
|
||||
}
|
||||
|
||||
func (oa *OfferAnswer) SessionIDString() string {
|
||||
if oa.SessionID == nil {
|
||||
return "unknown"
|
||||
}
|
||||
return oa.SessionID.String()
|
||||
}
|
||||
|
||||
type Handshaker struct {
|
||||
@@ -74,21 +83,25 @@ func (h *Handshaker) AddOnNewOfferListener(offer func(remoteOfferAnswer *OfferAn
|
||||
|
||||
func (h *Handshaker) Listen(ctx context.Context) {
|
||||
for {
|
||||
h.log.Info("wait for remote offer confirmation")
|
||||
remoteOfferAnswer, err := h.waitForRemoteOfferConfirmation(ctx)
|
||||
if err != nil {
|
||||
var connectionClosedError *ConnectionClosedError
|
||||
if errors.As(err, &connectionClosedError) {
|
||||
h.log.Info("exit from handshaker")
|
||||
return
|
||||
select {
|
||||
case remoteOfferAnswer := <-h.remoteOffersCh:
|
||||
// received confirmation from the remote peer -> ready to proceed
|
||||
if err := h.sendAnswer(); err != nil {
|
||||
h.log.Errorf("failed to send remote offer confirmation: %s", err)
|
||||
continue
|
||||
}
|
||||
h.log.Errorf("failed to received remote offer confirmation: %s", err)
|
||||
continue
|
||||
}
|
||||
|
||||
h.log.Infof("received connection confirmation, running version %s and with remote WireGuard listen port %d", remoteOfferAnswer.Version, remoteOfferAnswer.WgListenPort)
|
||||
for _, listener := range h.onNewOfferListeners {
|
||||
go listener(remoteOfferAnswer)
|
||||
for _, listener := range h.onNewOfferListeners {
|
||||
listener(&remoteOfferAnswer)
|
||||
}
|
||||
h.log.Infof("received offer, running version %s, remote WireGuard listen port %d, session id: %s", remoteOfferAnswer.Version, remoteOfferAnswer.WgListenPort, remoteOfferAnswer.SessionIDString())
|
||||
case remoteOfferAnswer := <-h.remoteAnswerCh:
|
||||
h.log.Infof("received answer, running version %s, remote WireGuard listen port %d, session id: %s", remoteOfferAnswer.Version, remoteOfferAnswer.WgListenPort, remoteOfferAnswer.SessionIDString())
|
||||
for _, listener := range h.onNewOfferListeners {
|
||||
listener(&remoteOfferAnswer)
|
||||
}
|
||||
case <-ctx.Done():
|
||||
h.log.Infof("stop listening for remote offers and answers")
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -101,43 +114,27 @@ func (h *Handshaker) SendOffer() error {
|
||||
|
||||
// OnRemoteOffer handles an offer from the remote peer and returns true if the message was accepted, false otherwise
|
||||
// doesn't block, discards the message if connection wasn't ready
|
||||
func (h *Handshaker) OnRemoteOffer(offer OfferAnswer) bool {
|
||||
func (h *Handshaker) OnRemoteOffer(offer OfferAnswer) {
|
||||
select {
|
||||
case h.remoteOffersCh <- offer:
|
||||
return true
|
||||
return
|
||||
default:
|
||||
h.log.Warnf("OnRemoteOffer skipping message because is not ready")
|
||||
h.log.Warnf("skipping remote offer message because receiver not ready")
|
||||
// connection might not be ready yet to receive so we ignore the message
|
||||
return false
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// OnRemoteAnswer handles an offer from the remote peer and returns true if the message was accepted, false otherwise
|
||||
// doesn't block, discards the message if connection wasn't ready
|
||||
func (h *Handshaker) OnRemoteAnswer(answer OfferAnswer) bool {
|
||||
func (h *Handshaker) OnRemoteAnswer(answer OfferAnswer) {
|
||||
select {
|
||||
case h.remoteAnswerCh <- answer:
|
||||
return true
|
||||
return
|
||||
default:
|
||||
// connection might not be ready yet to receive so we ignore the message
|
||||
h.log.Debugf("OnRemoteAnswer skipping message because is not ready")
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func (h *Handshaker) waitForRemoteOfferConfirmation(ctx context.Context) (*OfferAnswer, error) {
|
||||
select {
|
||||
case remoteOfferAnswer := <-h.remoteOffersCh:
|
||||
// received confirmation from the remote peer -> ready to proceed
|
||||
if err := h.sendAnswer(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &remoteOfferAnswer, nil
|
||||
case remoteOfferAnswer := <-h.remoteAnswerCh:
|
||||
return &remoteOfferAnswer, nil
|
||||
case <-ctx.Done():
|
||||
// closed externally
|
||||
return nil, NewConnectionClosedError(h.config.Key)
|
||||
h.log.Warnf("skipping remote answer message because receiver not ready")
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
@@ -147,43 +144,34 @@ func (h *Handshaker) sendOffer() error {
|
||||
return ErrSignalIsNotReady
|
||||
}
|
||||
|
||||
iceUFrag, icePwd := h.ice.GetLocalUserCredentials()
|
||||
offer := OfferAnswer{
|
||||
IceCredentials: IceCredentials{iceUFrag, icePwd},
|
||||
WgListenPort: h.config.LocalWgPort,
|
||||
Version: version.NetbirdVersion(),
|
||||
RosenpassPubKey: h.config.RosenpassConfig.PubKey,
|
||||
RosenpassAddr: h.config.RosenpassConfig.Addr,
|
||||
}
|
||||
|
||||
addr, err := h.relay.RelayInstanceAddress()
|
||||
if err == nil {
|
||||
offer.RelaySrvAddress = addr
|
||||
}
|
||||
offer := h.buildOfferAnswer()
|
||||
h.log.Infof("sending offer with serial: %s", offer.SessionIDString())
|
||||
|
||||
return h.signaler.SignalOffer(offer, h.config.Key)
|
||||
}
|
||||
|
||||
func (h *Handshaker) sendAnswer() error {
|
||||
h.log.Infof("sending answer")
|
||||
uFrag, pwd := h.ice.GetLocalUserCredentials()
|
||||
answer := h.buildOfferAnswer()
|
||||
h.log.Infof("sending answer with serial: %s", answer.SessionIDString())
|
||||
|
||||
return h.signaler.SignalAnswer(answer, h.config.Key)
|
||||
}
|
||||
|
||||
func (h *Handshaker) buildOfferAnswer() OfferAnswer {
|
||||
uFrag, pwd := h.ice.GetLocalUserCredentials()
|
||||
sid := h.ice.SessionID()
|
||||
answer := OfferAnswer{
|
||||
IceCredentials: IceCredentials{uFrag, pwd},
|
||||
WgListenPort: h.config.LocalWgPort,
|
||||
Version: version.NetbirdVersion(),
|
||||
RosenpassPubKey: h.config.RosenpassConfig.PubKey,
|
||||
RosenpassAddr: h.config.RosenpassConfig.Addr,
|
||||
SessionID: &sid,
|
||||
}
|
||||
addr, err := h.relay.RelayInstanceAddress()
|
||||
if err == nil {
|
||||
|
||||
if addr, err := h.relay.RelayInstanceAddress(); err == nil {
|
||||
answer.RelaySrvAddress = addr
|
||||
}
|
||||
|
||||
err = h.signaler.SignalAnswer(answer, h.config.Key)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
return answer
|
||||
}
|
||||
|
||||
47
client/internal/peer/session_id.go
Normal file
47
client/internal/peer/session_id.go
Normal file
@@ -0,0 +1,47 @@
|
||||
package peer
|
||||
|
||||
import (
|
||||
"crypto/rand"
|
||||
"encoding/hex"
|
||||
"fmt"
|
||||
"io"
|
||||
)
|
||||
|
||||
const sessionIDSize = 5
|
||||
|
||||
type ICESessionID string
|
||||
|
||||
// NewICESessionID generates a new session ID for distinguishing sessions
|
||||
func NewICESessionID() (ICESessionID, error) {
|
||||
b := make([]byte, sessionIDSize)
|
||||
if _, err := io.ReadFull(rand.Reader, b); err != nil {
|
||||
return "", fmt.Errorf("failed to generate session ID: %w", err)
|
||||
}
|
||||
return ICESessionID(hex.EncodeToString(b)), nil
|
||||
}
|
||||
|
||||
func ICESessionIDFromBytes(b []byte) (ICESessionID, error) {
|
||||
if len(b) != sessionIDSize {
|
||||
return "", fmt.Errorf("invalid session ID length: %d", len(b))
|
||||
}
|
||||
return ICESessionID(hex.EncodeToString(b)), nil
|
||||
}
|
||||
|
||||
// Bytes returns the raw bytes of the session ID for protobuf serialization
|
||||
func (id ICESessionID) Bytes() ([]byte, error) {
|
||||
if len(id) == 0 {
|
||||
return nil, fmt.Errorf("ICE session ID is empty")
|
||||
}
|
||||
b, err := hex.DecodeString(string(id))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid ICE session ID encoding: %w", err)
|
||||
}
|
||||
if len(b) != sessionIDSize {
|
||||
return nil, fmt.Errorf("invalid ICE session ID length: expected %d bytes, got %d", sessionIDSize, len(b))
|
||||
}
|
||||
return b, nil
|
||||
}
|
||||
|
||||
func (id ICESessionID) String() string {
|
||||
return string(id)
|
||||
}
|
||||
@@ -2,6 +2,7 @@ package peer
|
||||
|
||||
import (
|
||||
"github.com/pion/ice/v3"
|
||||
log "github.com/sirupsen/logrus"
|
||||
"golang.zx2c4.com/wireguard/wgctrl/wgtypes"
|
||||
|
||||
signal "github.com/netbirdio/netbird/shared/signal/client"
|
||||
@@ -45,6 +46,10 @@ func (s *Signaler) Ready() bool {
|
||||
|
||||
// SignalOfferAnswer signals either an offer or an answer to remote peer
|
||||
func (s *Signaler) signalOfferAnswer(offerAnswer OfferAnswer, remoteKey string, bodyType sProto.Body_Type) error {
|
||||
sessionIDBytes, err := offerAnswer.SessionID.Bytes()
|
||||
if err != nil {
|
||||
log.Warnf("failed to get session ID bytes: %v", err)
|
||||
}
|
||||
msg, err := signal.MarshalCredential(
|
||||
s.wgPrivateKey,
|
||||
offerAnswer.WgListenPort,
|
||||
@@ -56,13 +61,13 @@ func (s *Signaler) signalOfferAnswer(offerAnswer OfferAnswer, remoteKey string,
|
||||
bodyType,
|
||||
offerAnswer.RosenpassPubKey,
|
||||
offerAnswer.RosenpassAddr,
|
||||
offerAnswer.RelaySrvAddress)
|
||||
offerAnswer.RelaySrvAddress,
|
||||
sessionIDBytes)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = s.signal.Send(msg)
|
||||
if err != nil {
|
||||
if err = s.signal.Send(msg); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
|
||||
@@ -42,8 +42,18 @@ type WorkerICE struct {
|
||||
statusRecorder *Status
|
||||
hasRelayOnLocally bool
|
||||
|
||||
agent *ice.Agent
|
||||
muxAgent sync.Mutex
|
||||
agent *ice.Agent
|
||||
agentDialerCancel context.CancelFunc
|
||||
agentConnecting bool // while it is true, drop all incoming offers
|
||||
lastSuccess time.Time // with this avoid the too frequent ICE agent recreation
|
||||
// remoteSessionID represents the peer's session identifier from the latest remote offer.
|
||||
remoteSessionID ICESessionID
|
||||
// sessionID is used to track the current session ID of the ICE agent
|
||||
// increase by one when disconnecting the agent
|
||||
// with it the remote peer can discard the already deprecated offer/answer
|
||||
// Without it the remote peer may recreate a workable ICE connection
|
||||
sessionID ICESessionID
|
||||
muxAgent sync.Mutex
|
||||
|
||||
StunTurn []*stun.URI
|
||||
|
||||
@@ -57,6 +67,11 @@ type WorkerICE struct {
|
||||
}
|
||||
|
||||
func NewWorkerICE(ctx context.Context, log *log.Entry, config ConnConfig, conn *Conn, signaler *Signaler, ifaceDiscover stdnet.ExternalIFaceDiscover, statusRecorder *Status, hasRelayOnLocally bool) (*WorkerICE, error) {
|
||||
sessionID, err := NewICESessionID()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
w := &WorkerICE{
|
||||
ctx: ctx,
|
||||
log: log,
|
||||
@@ -67,6 +82,7 @@ func NewWorkerICE(ctx context.Context, log *log.Entry, config ConnConfig, conn *
|
||||
statusRecorder: statusRecorder,
|
||||
hasRelayOnLocally: hasRelayOnLocally,
|
||||
lastKnownState: ice.ConnectionStateDisconnected,
|
||||
sessionID: sessionID,
|
||||
}
|
||||
|
||||
localUfrag, localPwd, err := icemaker.GenerateICECredentials()
|
||||
@@ -79,15 +95,35 @@ func NewWorkerICE(ctx context.Context, log *log.Entry, config ConnConfig, conn *
|
||||
}
|
||||
|
||||
func (w *WorkerICE) OnNewOffer(remoteOfferAnswer *OfferAnswer) {
|
||||
w.log.Debugf("OnNewOffer for ICE")
|
||||
w.log.Debugf("OnNewOffer for ICE, serial: %s", remoteOfferAnswer.SessionIDString())
|
||||
w.muxAgent.Lock()
|
||||
|
||||
if w.agent != nil {
|
||||
w.log.Debugf("agent already exists, skipping the offer")
|
||||
if w.agentConnecting {
|
||||
w.log.Debugf("agent connection is in progress, skipping the offer")
|
||||
w.muxAgent.Unlock()
|
||||
return
|
||||
}
|
||||
|
||||
if w.agent != nil {
|
||||
// backward compatibility with old clients that do not send session ID
|
||||
if remoteOfferAnswer.SessionID == nil {
|
||||
w.log.Debugf("agent already exists, skipping the offer")
|
||||
w.muxAgent.Unlock()
|
||||
return
|
||||
}
|
||||
if w.remoteSessionID == *remoteOfferAnswer.SessionID {
|
||||
w.log.Debugf("agent already exists and session ID matches, skipping the offer: %s", remoteOfferAnswer.SessionIDString())
|
||||
w.muxAgent.Unlock()
|
||||
return
|
||||
}
|
||||
w.log.Debugf("agent already exists, recreate the connection")
|
||||
w.agentDialerCancel()
|
||||
if err := w.agent.Close(); err != nil {
|
||||
w.log.Warnf("failed to close ICE agent: %s", err)
|
||||
}
|
||||
// todo consider to switch to Relay connection while establishing a new ICE connection
|
||||
}
|
||||
|
||||
var preferredCandidateTypes []ice.CandidateType
|
||||
if w.hasRelayOnLocally && remoteOfferAnswer.RelaySrvAddress != "" {
|
||||
preferredCandidateTypes = icemaker.CandidateTypesP2P()
|
||||
@@ -96,36 +132,124 @@ func (w *WorkerICE) OnNewOffer(remoteOfferAnswer *OfferAnswer) {
|
||||
}
|
||||
|
||||
w.log.Debugf("recreate ICE agent")
|
||||
agentCtx, agentCancel := context.WithCancel(w.ctx)
|
||||
agent, err := w.reCreateAgent(agentCancel, preferredCandidateTypes)
|
||||
dialerCtx, dialerCancel := context.WithCancel(w.ctx)
|
||||
agent, err := w.reCreateAgent(dialerCancel, preferredCandidateTypes)
|
||||
if err != nil {
|
||||
w.log.Errorf("failed to recreate ICE Agent: %s", err)
|
||||
w.muxAgent.Unlock()
|
||||
return
|
||||
}
|
||||
w.sentExtraSrflx = false
|
||||
w.agent = agent
|
||||
w.agentDialerCancel = dialerCancel
|
||||
w.agentConnecting = true
|
||||
w.muxAgent.Unlock()
|
||||
|
||||
w.log.Debugf("gather candidates")
|
||||
err = w.agent.GatherCandidates()
|
||||
if err != nil {
|
||||
w.log.Debugf("failed to gather candidates: %s", err)
|
||||
go w.connect(dialerCtx, agent, remoteOfferAnswer)
|
||||
}
|
||||
|
||||
// OnRemoteCandidate Handles ICE connection Candidate provided by the remote peer.
|
||||
func (w *WorkerICE) OnRemoteCandidate(candidate ice.Candidate, haRoutes route.HAMap) {
|
||||
w.muxAgent.Lock()
|
||||
defer w.muxAgent.Unlock()
|
||||
w.log.Debugf("OnRemoteCandidate from peer %s -> %s", w.config.Key, candidate.String())
|
||||
if w.agent == nil {
|
||||
w.log.Warnf("ICE Agent is not initialized yet")
|
||||
return
|
||||
}
|
||||
|
||||
if candidateViaRoutes(candidate, haRoutes) {
|
||||
return
|
||||
}
|
||||
|
||||
if err := w.agent.AddRemoteCandidate(candidate); err != nil {
|
||||
w.log.Errorf("error while handling remote candidate")
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
func (w *WorkerICE) GetLocalUserCredentials() (frag string, pwd string) {
|
||||
return w.localUfrag, w.localPwd
|
||||
}
|
||||
|
||||
func (w *WorkerICE) InProgress() bool {
|
||||
w.muxAgent.Lock()
|
||||
defer w.muxAgent.Unlock()
|
||||
|
||||
return w.agentConnecting
|
||||
}
|
||||
|
||||
func (w *WorkerICE) Close() {
|
||||
w.muxAgent.Lock()
|
||||
defer w.muxAgent.Unlock()
|
||||
|
||||
if w.agent == nil {
|
||||
return
|
||||
}
|
||||
|
||||
w.agentDialerCancel()
|
||||
if err := w.agent.Close(); err != nil {
|
||||
w.log.Warnf("failed to close ICE agent: %s", err)
|
||||
}
|
||||
|
||||
w.agent = nil
|
||||
}
|
||||
|
||||
func (w *WorkerICE) reCreateAgent(dialerCancel context.CancelFunc, candidates []ice.CandidateType) (*ice.Agent, error) {
|
||||
agent, err := icemaker.NewAgent(w.iFaceDiscover, w.config.ICEConfig, candidates, w.localUfrag, w.localPwd)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("create agent: %w", err)
|
||||
}
|
||||
|
||||
if err := agent.OnCandidate(w.onICECandidate); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := agent.OnConnectionStateChange(w.onConnectionStateChange(agent, dialerCancel)); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := agent.OnSelectedCandidatePairChange(w.onICESelectedCandidatePair); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := agent.OnSuccessfulSelectedPairBindingResponse(w.onSuccessfulSelectedPairBindingResponse); err != nil {
|
||||
return nil, fmt.Errorf("failed setting binding response callback: %w", err)
|
||||
}
|
||||
|
||||
return agent, nil
|
||||
}
|
||||
|
||||
func (w *WorkerICE) SessionID() ICESessionID {
|
||||
w.muxAgent.Lock()
|
||||
defer w.muxAgent.Unlock()
|
||||
|
||||
return w.sessionID
|
||||
}
|
||||
|
||||
// will block until connection succeeded
|
||||
// but it won't release if ICE Agent went into Disconnected or Failed state,
|
||||
// so we have to cancel it with the provided context once agent detected a broken connection
|
||||
func (w *WorkerICE) connect(ctx context.Context, agent *ice.Agent, remoteOfferAnswer *OfferAnswer) {
|
||||
w.log.Debugf("gather candidates")
|
||||
if err := agent.GatherCandidates(); err != nil {
|
||||
w.log.Warnf("failed to gather candidates: %s", err)
|
||||
w.closeAgent(agent, w.agentDialerCancel)
|
||||
return
|
||||
}
|
||||
|
||||
// will block until connection succeeded
|
||||
// but it won't release if ICE Agent went into Disconnected or Failed state,
|
||||
// so we have to cancel it with the provided context once agent detected a broken connection
|
||||
w.log.Debugf("turn agent dial")
|
||||
remoteConn, err := w.turnAgentDial(agentCtx, remoteOfferAnswer)
|
||||
remoteConn, err := w.turnAgentDial(ctx, remoteOfferAnswer)
|
||||
if err != nil {
|
||||
w.log.Debugf("failed to dial the remote peer: %s", err)
|
||||
w.closeAgent(agent, w.agentDialerCancel)
|
||||
return
|
||||
}
|
||||
w.log.Debugf("agent dial succeeded")
|
||||
|
||||
pair, err := w.agent.GetSelectedCandidatePair()
|
||||
pair, err := agent.GetSelectedCandidatePair()
|
||||
if err != nil {
|
||||
w.closeAgent(agent, w.agentDialerCancel)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -152,114 +276,38 @@ func (w *WorkerICE) OnNewOffer(remoteOfferAnswer *OfferAnswer) {
|
||||
RelayedOnLocal: isRelayCandidate(pair.Local),
|
||||
}
|
||||
w.log.Debugf("on ICE conn is ready to use")
|
||||
go w.conn.onICEConnectionIsReady(selectedPriority(pair), ci)
|
||||
}
|
||||
|
||||
// OnRemoteCandidate Handles ICE connection Candidate provided by the remote peer.
|
||||
func (w *WorkerICE) OnRemoteCandidate(candidate ice.Candidate, haRoutes route.HAMap) {
|
||||
w.log.Infof("connection succeeded with offer session: %s", remoteOfferAnswer.SessionIDString())
|
||||
w.muxAgent.Lock()
|
||||
defer w.muxAgent.Unlock()
|
||||
w.log.Debugf("OnRemoteCandidate from peer %s -> %s", w.config.Key, candidate.String())
|
||||
if w.agent == nil {
|
||||
w.log.Warnf("ICE Agent is not initialized yet")
|
||||
return
|
||||
w.agentConnecting = false
|
||||
w.lastSuccess = time.Now()
|
||||
if remoteOfferAnswer.SessionID != nil {
|
||||
w.remoteSessionID = *remoteOfferAnswer.SessionID
|
||||
}
|
||||
w.muxAgent.Unlock()
|
||||
|
||||
if candidateViaRoutes(candidate, haRoutes) {
|
||||
return
|
||||
}
|
||||
|
||||
err := w.agent.AddRemoteCandidate(candidate)
|
||||
if err != nil {
|
||||
w.log.Errorf("error while handling remote candidate")
|
||||
return
|
||||
}
|
||||
// todo: the potential problem is a race between the onConnectionStateChange
|
||||
w.conn.onICEConnectionIsReady(selectedPriority(pair), ci)
|
||||
}
|
||||
|
||||
func (w *WorkerICE) GetLocalUserCredentials() (frag string, pwd string) {
|
||||
w.muxAgent.Lock()
|
||||
defer w.muxAgent.Unlock()
|
||||
return w.localUfrag, w.localPwd
|
||||
}
|
||||
|
||||
func (w *WorkerICE) Close() {
|
||||
w.muxAgent.Lock()
|
||||
defer w.muxAgent.Unlock()
|
||||
|
||||
if w.agent == nil {
|
||||
return
|
||||
}
|
||||
|
||||
if err := w.agent.Close(); err != nil {
|
||||
w.log.Warnf("failed to close ICE agent: %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
func (w *WorkerICE) reCreateAgent(agentCancel context.CancelFunc, candidates []ice.CandidateType) (*ice.Agent, error) {
|
||||
w.sentExtraSrflx = false
|
||||
|
||||
agent, err := icemaker.NewAgent(w.iFaceDiscover, w.config.ICEConfig, candidates, w.localUfrag, w.localPwd)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("create agent: %w", err)
|
||||
}
|
||||
|
||||
err = agent.OnCandidate(w.onICECandidate)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
err = agent.OnConnectionStateChange(func(state ice.ConnectionState) {
|
||||
w.log.Debugf("ICE ConnectionState has changed to %s", state.String())
|
||||
switch state {
|
||||
case ice.ConnectionStateConnected:
|
||||
w.lastKnownState = ice.ConnectionStateConnected
|
||||
return
|
||||
case ice.ConnectionStateFailed, ice.ConnectionStateDisconnected:
|
||||
if w.lastKnownState == ice.ConnectionStateConnected {
|
||||
w.lastKnownState = ice.ConnectionStateDisconnected
|
||||
w.conn.onICEStateDisconnected()
|
||||
}
|
||||
w.closeAgent(agentCancel)
|
||||
default:
|
||||
return
|
||||
}
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
err = agent.OnSelectedCandidatePairChange(w.onICESelectedCandidatePair)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
err = agent.OnSuccessfulSelectedPairBindingResponse(func(p *ice.CandidatePair) {
|
||||
err := w.statusRecorder.UpdateLatency(w.config.Key, p.Latency())
|
||||
if err != nil {
|
||||
w.log.Debugf("failed to update latency for peer: %s", err)
|
||||
return
|
||||
}
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed setting binding response callback: %w", err)
|
||||
}
|
||||
|
||||
return agent, nil
|
||||
}
|
||||
|
||||
func (w *WorkerICE) closeAgent(cancel context.CancelFunc) {
|
||||
w.muxAgent.Lock()
|
||||
defer w.muxAgent.Unlock()
|
||||
|
||||
func (w *WorkerICE) closeAgent(agent *ice.Agent, cancel context.CancelFunc) {
|
||||
cancel()
|
||||
if w.agent == nil {
|
||||
return
|
||||
}
|
||||
|
||||
if err := w.agent.Close(); err != nil {
|
||||
if err := agent.Close(); err != nil {
|
||||
w.log.Warnf("failed to close ICE agent: %s", err)
|
||||
}
|
||||
w.agent = nil
|
||||
|
||||
w.muxAgent.Lock()
|
||||
sessionID, err := NewICESessionID()
|
||||
if err != nil {
|
||||
w.log.Errorf("failed to create new session ID: %s", err)
|
||||
}
|
||||
w.sessionID = sessionID
|
||||
|
||||
if w.agent == agent {
|
||||
w.agent = nil
|
||||
w.agentConnecting = false
|
||||
}
|
||||
w.muxAgent.Unlock()
|
||||
}
|
||||
|
||||
func (w *WorkerICE) punchRemoteWGPort(pair *ice.CandidatePair, remoteWgPort int) {
|
||||
@@ -331,6 +379,32 @@ func (w *WorkerICE) onICESelectedCandidatePair(c1 ice.Candidate, c2 ice.Candidat
|
||||
w.config.Key)
|
||||
}
|
||||
|
||||
func (w *WorkerICE) onConnectionStateChange(agent *ice.Agent, dialerCancel context.CancelFunc) func(ice.ConnectionState) {
|
||||
return func(state ice.ConnectionState) {
|
||||
w.log.Debugf("ICE ConnectionState has changed to %s", state.String())
|
||||
switch state {
|
||||
case ice.ConnectionStateConnected:
|
||||
w.lastKnownState = ice.ConnectionStateConnected
|
||||
return
|
||||
case ice.ConnectionStateFailed, ice.ConnectionStateDisconnected:
|
||||
if w.lastKnownState == ice.ConnectionStateConnected {
|
||||
w.lastKnownState = ice.ConnectionStateDisconnected
|
||||
w.conn.onICEStateDisconnected()
|
||||
}
|
||||
w.closeAgent(agent, dialerCancel)
|
||||
default:
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (w *WorkerICE) onSuccessfulSelectedPairBindingResponse(pair *ice.CandidatePair) {
|
||||
if err := w.statusRecorder.UpdateLatency(w.config.Key, pair.Latency()); err != nil {
|
||||
w.log.Debugf("failed to update latency for peer: %s", err)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
func (w *WorkerICE) shouldSendExtraSrflxCandidate(candidate ice.Candidate) bool {
|
||||
if !w.sentExtraSrflx && candidate.Type() == ice.CandidateTypeServerReflexive && candidate.Port() != candidate.RelatedAddress().Port {
|
||||
return true
|
||||
|
||||
Reference in New Issue
Block a user