mirror of
https://github.com/netbirdio/netbird.git
synced 2026-05-20 23:59:55 +00:00
Reset connection stage timestamps during reconnections to exclude unnecessary metrics tracking
This commit is contained in:
@@ -84,7 +84,7 @@ func (m *victoriaMetrics) RecordConnectionStages(
|
|||||||
m.getMetricName("netbird_peer_connection_total_creation_to_handshake", connTypeStr, attemptType),
|
m.getMetricName("netbird_peer_connection_total_creation_to_handshake", connTypeStr, attemptType),
|
||||||
).Update(totalDuration)
|
).Update(totalDuration)
|
||||||
|
|
||||||
log.Infof("--- Peer connection metrics [%s, %s, %s]: creation→semaphore: %.3fs, semaphore→signaling: %.3fs, signaling→connection: %.3fs, connection→handshake: %.3fs, total: %.3fs",
|
log.Tracef("peer connection metrics [%s, %s, %s]: creation→semaphore: %.3fs, semaphore→signaling: %.3fs, signaling→connection: %.3fs, connection→handshake: %.3fs, total: %.3fs",
|
||||||
m.deploymentType.String(), connTypeStr, attemptType,
|
m.deploymentType.String(), connTypeStr, attemptType,
|
||||||
creationToSemaphore, semaphoreToSignaling, signalingToConnection, connectionToHandshake,
|
creationToSemaphore, semaphoreToSignaling, signalingToConnection, connectionToHandshake,
|
||||||
totalDuration)
|
totalDuration)
|
||||||
|
|||||||
@@ -303,7 +303,6 @@ func (conn *Conn) Close(signalToRemote bool) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
conn.setStatusToDisconnected()
|
conn.setStatusToDisconnected()
|
||||||
|
|
||||||
conn.opened = false
|
conn.opened = false
|
||||||
conn.wg.Wait()
|
conn.wg.Wait()
|
||||||
conn.Log.Infof("peer connection closed")
|
conn.Log.Infof("peer connection closed")
|
||||||
@@ -343,7 +342,6 @@ func (conn *Conn) SetRosenpassInitializedPresharedKeyValidator(handler func(peer
|
|||||||
func (conn *Conn) OnRemoteOffer(offer OfferAnswer) {
|
func (conn *Conn) OnRemoteOffer(offer OfferAnswer) {
|
||||||
conn.dumpState.RemoteOffer()
|
conn.dumpState.RemoteOffer()
|
||||||
conn.Log.Infof("OnRemoteOffer, on status ICE: %s, status Relay: %s", conn.statusICE, conn.statusRelay)
|
conn.Log.Infof("OnRemoteOffer, on status ICE: %s, status Relay: %s", conn.statusICE, conn.statusRelay)
|
||||||
|
|
||||||
conn.handshaker.OnRemoteOffer(offer)
|
conn.handshaker.OnRemoteOffer(offer)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -387,7 +385,7 @@ func (conn *Conn) onICEConnectionIsReady(priority conntype.ConnPriority, iceConn
|
|||||||
if conn.currentConnPriority > priority {
|
if conn.currentConnPriority > priority {
|
||||||
conn.Log.Infof("current connection priority (%s) is higher than the new one (%s), do not upgrade connection", conn.currentConnPriority, priority)
|
conn.Log.Infof("current connection priority (%s) is higher than the new one (%s), do not upgrade connection", conn.currentConnPriority, priority)
|
||||||
conn.statusICE.SetConnected()
|
conn.statusICE.SetConnected()
|
||||||
conn.updateIceState(iceConnInfo)
|
conn.updateIceState(iceConnInfo, time.Now())
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -428,11 +426,14 @@ func (conn *Conn) onICEConnectionIsReady(priority conntype.ConnPriority, iceConn
|
|||||||
|
|
||||||
conn.Log.Infof("configure WireGuard endpoint to: %s", ep.String())
|
conn.Log.Infof("configure WireGuard endpoint to: %s", ep.String())
|
||||||
presharedKey := conn.presharedKey(iceConnInfo.RosenpassPubKey)
|
presharedKey := conn.presharedKey(iceConnInfo.RosenpassPubKey)
|
||||||
conn.enableWgWatcherIfNeeded()
|
updateTime := time.Now()
|
||||||
if err = conn.endpointUpdater.ConfigureWGEndpoint(ep, presharedKey); err != nil {
|
if err = conn.endpointUpdater.ConfigureWGEndpoint(ep, presharedKey); err != nil {
|
||||||
conn.handleConfigurationFailure(err, wgProxy)
|
conn.handleConfigurationFailure(err, wgProxy)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
conn.enableWgWatcherIfNeeded(updateTime)
|
||||||
|
|
||||||
wgConfigWorkaround()
|
wgConfigWorkaround()
|
||||||
|
|
||||||
if conn.wgProxyRelay != nil {
|
if conn.wgProxyRelay != nil {
|
||||||
@@ -442,8 +443,8 @@ func (conn *Conn) onICEConnectionIsReady(priority conntype.ConnPriority, iceConn
|
|||||||
|
|
||||||
conn.currentConnPriority = priority
|
conn.currentConnPriority = priority
|
||||||
conn.statusICE.SetConnected()
|
conn.statusICE.SetConnected()
|
||||||
conn.updateIceState(iceConnInfo)
|
conn.updateIceState(iceConnInfo, updateTime)
|
||||||
conn.doOnConnected(iceConnInfo.RosenpassPubKey, iceConnInfo.RosenpassAddr)
|
conn.doOnConnected(iceConnInfo.RosenpassPubKey, iceConnInfo.RosenpassAddr, updateTime)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (conn *Conn) onICEStateDisconnected() {
|
func (conn *Conn) onICEStateDisconnected() {
|
||||||
@@ -535,13 +536,13 @@ func (conn *Conn) onRelayConnectionIsReady(rci RelayConnInfo) {
|
|||||||
conn.Log.Debugf("do not switch to relay because current priority is: %s", conn.currentConnPriority.String())
|
conn.Log.Debugf("do not switch to relay because current priority is: %s", conn.currentConnPriority.String())
|
||||||
conn.setRelayedProxy(wgProxy)
|
conn.setRelayedProxy(wgProxy)
|
||||||
conn.statusRelay.SetConnected()
|
conn.statusRelay.SetConnected()
|
||||||
conn.updateRelayStatus(rci.relayedConn.RemoteAddr().String(), rci.rosenpassPubKey)
|
conn.updateRelayStatus(rci.relayedConn.RemoteAddr().String(), rci.rosenpassPubKey, time.Now())
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
wgProxy.Work()
|
wgProxy.Work()
|
||||||
presharedKey := conn.presharedKey(rci.rosenpassPubKey)
|
presharedKey := conn.presharedKey(rci.rosenpassPubKey)
|
||||||
conn.enableWgWatcherIfNeeded()
|
updateTime := time.Now()
|
||||||
if err := conn.endpointUpdater.ConfigureWGEndpoint(wgProxy.EndpointAddr(), presharedKey); err != nil {
|
if err := conn.endpointUpdater.ConfigureWGEndpoint(wgProxy.EndpointAddr(), presharedKey); err != nil {
|
||||||
if err := wgProxy.CloseConn(); err != nil {
|
if err := wgProxy.CloseConn(); err != nil {
|
||||||
conn.Log.Warnf("Failed to close relay connection: %v", err)
|
conn.Log.Warnf("Failed to close relay connection: %v", err)
|
||||||
@@ -550,14 +551,17 @@ func (conn *Conn) onRelayConnectionIsReady(rci RelayConnInfo) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
conn.enableWgWatcherIfNeeded(updateTime)
|
||||||
|
|
||||||
wgConfigWorkaround()
|
wgConfigWorkaround()
|
||||||
|
|
||||||
conn.rosenpassRemoteKey = rci.rosenpassPubKey
|
conn.rosenpassRemoteKey = rci.rosenpassPubKey
|
||||||
conn.currentConnPriority = conntype.Relay
|
conn.currentConnPriority = conntype.Relay
|
||||||
conn.statusRelay.SetConnected()
|
conn.statusRelay.SetConnected()
|
||||||
conn.setRelayedProxy(wgProxy)
|
conn.setRelayedProxy(wgProxy)
|
||||||
conn.updateRelayStatus(rci.relayedConn.RemoteAddr().String(), rci.rosenpassPubKey)
|
conn.updateRelayStatus(rci.relayedConn.RemoteAddr().String(), rci.rosenpassPubKey, updateTime)
|
||||||
conn.Log.Infof("start to communicate with peer via relay")
|
conn.Log.Infof("start to communicate with peer via relay")
|
||||||
conn.doOnConnected(rci.rosenpassPubKey, rci.rosenpassAddr)
|
conn.doOnConnected(rci.rosenpassPubKey, rci.rosenpassAddr, updateTime)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (conn *Conn) onRelayDisconnected() {
|
func (conn *Conn) onRelayDisconnected() {
|
||||||
@@ -640,10 +644,10 @@ func (conn *Conn) onWGDisconnected() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (conn *Conn) updateRelayStatus(relayServerAddr string, rosenpassPubKey []byte) {
|
func (conn *Conn) updateRelayStatus(relayServerAddr string, rosenpassPubKey []byte, updateTime time.Time) {
|
||||||
peerState := State{
|
peerState := State{
|
||||||
PubKey: conn.config.Key,
|
PubKey: conn.config.Key,
|
||||||
ConnStatusUpdate: time.Now(),
|
ConnStatusUpdate: updateTime,
|
||||||
ConnStatus: conn.evalStatus(),
|
ConnStatus: conn.evalStatus(),
|
||||||
Relayed: conn.isRelayed(),
|
Relayed: conn.isRelayed(),
|
||||||
RelayServerAddress: relayServerAddr,
|
RelayServerAddress: relayServerAddr,
|
||||||
@@ -656,10 +660,10 @@ func (conn *Conn) updateRelayStatus(relayServerAddr string, rosenpassPubKey []by
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (conn *Conn) updateIceState(iceConnInfo ICEConnInfo) {
|
func (conn *Conn) updateIceState(iceConnInfo ICEConnInfo, updateTime time.Time) {
|
||||||
peerState := State{
|
peerState := State{
|
||||||
PubKey: conn.config.Key,
|
PubKey: conn.config.Key,
|
||||||
ConnStatusUpdate: time.Now(),
|
ConnStatusUpdate: updateTime,
|
||||||
ConnStatus: conn.evalStatus(),
|
ConnStatus: conn.evalStatus(),
|
||||||
Relayed: iceConnInfo.Relayed,
|
Relayed: iceConnInfo.Relayed,
|
||||||
LocalIceCandidateType: iceConnInfo.LocalIceCandidateType,
|
LocalIceCandidateType: iceConnInfo.LocalIceCandidateType,
|
||||||
@@ -697,12 +701,12 @@ func (conn *Conn) setStatusToDisconnected() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (conn *Conn) doOnConnected(remoteRosenpassPubKey []byte, remoteRosenpassAddr string) {
|
func (conn *Conn) doOnConnected(remoteRosenpassPubKey []byte, remoteRosenpassAddr string, updateTime time.Time) {
|
||||||
if runtime.GOOS == "ios" {
|
if runtime.GOOS == "ios" {
|
||||||
runtime.GC()
|
runtime.GC()
|
||||||
}
|
}
|
||||||
|
|
||||||
conn.metricsStages.RecordConnectionReady()
|
conn.metricsStages.RecordConnectionReady(updateTime)
|
||||||
|
|
||||||
if conn.onConnected != nil {
|
if conn.onConnected != nil {
|
||||||
conn.onConnected(conn.config.Key, remoteRosenpassPubKey, conn.config.WgConfig.AllowedIps[0].Addr().String(), remoteRosenpassAddr)
|
conn.onConnected(conn.config.Key, remoteRosenpassPubKey, conn.config.WgConfig.AllowedIps[0].Addr().String(), remoteRosenpassAddr)
|
||||||
@@ -768,15 +772,14 @@ func (conn *Conn) isConnectedOnAllWay() (connected bool) {
|
|||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
func (conn *Conn) enableWgWatcherIfNeeded() {
|
func (conn *Conn) enableWgWatcherIfNeeded(enabledTime time.Time) {
|
||||||
if !conn.wgWatcher.IsEnabled() {
|
if !conn.wgWatcher.IsEnabled() {
|
||||||
wgWatcherCtx, wgWatcherCancel := context.WithCancel(conn.ctx)
|
wgWatcherCtx, wgWatcherCancel := context.WithCancel(conn.ctx)
|
||||||
conn.wgWatcherCancel = wgWatcherCancel
|
conn.wgWatcherCancel = wgWatcherCancel
|
||||||
conn.wgWatcherWg.Add(1)
|
conn.wgWatcherWg.Add(1)
|
||||||
now := time.Now()
|
|
||||||
go func() {
|
go func() {
|
||||||
defer conn.wgWatcherWg.Done()
|
defer conn.wgWatcherWg.Done()
|
||||||
conn.wgWatcher.EnableWgWatcher(wgWatcherCtx, now, conn.onWGDisconnected, conn.onWGHandshakeSuccess)
|
conn.wgWatcher.EnableWgWatcher(wgWatcherCtx, enabledTime, conn.onWGDisconnected, conn.onWGHandshakeSuccess)
|
||||||
}()
|
}()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -848,9 +851,7 @@ func (conn *Conn) onWGHandshakeSuccess(when time.Time) {
|
|||||||
|
|
||||||
// recordConnectionMetrics records connection stage timestamps as metrics
|
// recordConnectionMetrics records connection stage timestamps as metrics
|
||||||
func (conn *Conn) recordConnectionMetrics() {
|
func (conn *Conn) recordConnectionMetrics() {
|
||||||
log.Infof("--- record Metrics")
|
|
||||||
if conn.metricsRecorder == nil {
|
if conn.metricsRecorder == nil {
|
||||||
log.Infof("--- is nil")
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -863,7 +864,6 @@ func (conn *Conn) recordConnectionMetrics() {
|
|||||||
connType = metrics.ConnectionTypeICE
|
connType = metrics.ConnectionTypeICE
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Infof("-- record: connType: %v, %v, %v", connType, conn.metricsStages.IsReconnection(), conn.metricsStages.GetTimestamps())
|
|
||||||
// Record metrics with timestamps - duration calculation happens in metrics package
|
// Record metrics with timestamps - duration calculation happens in metrics package
|
||||||
conn.metricsRecorder.RecordConnectionStages(
|
conn.metricsRecorder.RecordConnectionStages(
|
||||||
context.Background(),
|
context.Background(),
|
||||||
|
|||||||
@@ -4,8 +4,6 @@ import (
|
|||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
log "github.com/sirupsen/logrus"
|
|
||||||
|
|
||||||
"github.com/netbirdio/netbird/client/internal/metrics"
|
"github.com/netbirdio/netbird/client/internal/metrics"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -18,14 +16,12 @@ type MetricsStages struct {
|
|||||||
func (s *MetricsStages) RecordCreated() {
|
func (s *MetricsStages) RecordCreated() {
|
||||||
s.mu.Lock()
|
s.mu.Lock()
|
||||||
defer s.mu.Unlock()
|
defer s.mu.Unlock()
|
||||||
log.Infof("--- RecordCreated")
|
|
||||||
s.stageTimestamps.Created = time.Now()
|
s.stageTimestamps.Created = time.Now()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *MetricsStages) RecordSemaphoreAcquired() {
|
func (s *MetricsStages) RecordSemaphoreAcquired() {
|
||||||
s.mu.Lock()
|
s.mu.Lock()
|
||||||
defer s.mu.Unlock()
|
defer s.mu.Unlock()
|
||||||
log.Infof("--- RecordSemaphoreAcquired")
|
|
||||||
s.stageTimestamps.SemaphoreAcquired = time.Now()
|
s.stageTimestamps.SemaphoreAcquired = time.Now()
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -35,14 +31,12 @@ func (s *MetricsStages) RecordSemaphoreAcquired() {
|
|||||||
func (s *MetricsStages) RecordSignaling() {
|
func (s *MetricsStages) RecordSignaling() {
|
||||||
s.mu.Lock()
|
s.mu.Lock()
|
||||||
defer s.mu.Unlock()
|
defer s.mu.Unlock()
|
||||||
log.Infof("--- RecordSignaling (send)")
|
|
||||||
|
|
||||||
if s.isReconnectionAttempt {
|
if s.isReconnectionAttempt {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
if s.stageTimestamps.Signaling.IsZero() {
|
if s.stageTimestamps.Signaling.IsZero() {
|
||||||
log.Infof("--- Recorded Signaling (initial connection, sending)")
|
|
||||||
s.stageTimestamps.Signaling = time.Now()
|
s.stageTimestamps.Signaling = time.Now()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -53,47 +47,47 @@ func (s *MetricsStages) RecordSignaling() {
|
|||||||
func (s *MetricsStages) RecordSignalingReceived() {
|
func (s *MetricsStages) RecordSignalingReceived() {
|
||||||
s.mu.Lock()
|
s.mu.Lock()
|
||||||
defer s.mu.Unlock()
|
defer s.mu.Unlock()
|
||||||
log.Infof("--- RecordSignalingReceived (receive)")
|
|
||||||
|
|
||||||
// Only record for reconnections when we receive a signal
|
// Only record for reconnections when we receive a signal
|
||||||
if s.isReconnectionAttempt && s.stageTimestamps.Signaling.IsZero() {
|
if s.isReconnectionAttempt && s.stageTimestamps.Signaling.IsZero() {
|
||||||
log.Infof("--- Recorded Signaling (reconnection, receiving)")
|
|
||||||
s.stageTimestamps.Signaling = time.Now()
|
s.stageTimestamps.Signaling = time.Now()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *MetricsStages) RecordConnectionReady() {
|
func (s *MetricsStages) RecordConnectionReady(when time.Time) {
|
||||||
s.mu.Lock()
|
s.mu.Lock()
|
||||||
defer s.mu.Unlock()
|
defer s.mu.Unlock()
|
||||||
log.Infof("--- RecordConnectionReady")
|
|
||||||
if s.stageTimestamps.ConnectionReady.IsZero() {
|
if s.stageTimestamps.ConnectionReady.IsZero() {
|
||||||
log.Infof("--- Recorded ConnectionReady")
|
s.stageTimestamps.ConnectionReady = when
|
||||||
s.stageTimestamps.ConnectionReady = time.Now()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *MetricsStages) RecordWGHandshakeSuccess(elapsed time.Time) {
|
func (s *MetricsStages) RecordWGHandshakeSuccess(handshakeTime time.Time) {
|
||||||
s.mu.Lock()
|
s.mu.Lock()
|
||||||
defer s.mu.Unlock()
|
defer s.mu.Unlock()
|
||||||
|
|
||||||
log.Infof("--- record: %v, %v", s.stageTimestamps.ConnectionReady, elapsed)
|
|
||||||
if !s.stageTimestamps.ConnectionReady.IsZero() {
|
if !s.stageTimestamps.ConnectionReady.IsZero() {
|
||||||
// todo, check if it is earlier then ConnectionReady
|
// WireGuard only reports handshake times with second precision, but ConnectionReady
|
||||||
s.stageTimestamps.WgHandshakeSuccess = elapsed
|
// is captured with microsecond precision. If handshake appears before ConnectionReady
|
||||||
|
// due to truncation (e.g., handshake at 6.042s truncated to 6.000s), normalize to
|
||||||
|
// ConnectionReady to avoid negative duration metrics.
|
||||||
|
if handshakeTime.Before(s.stageTimestamps.ConnectionReady) {
|
||||||
|
s.stageTimestamps.WgHandshakeSuccess = s.stageTimestamps.ConnectionReady
|
||||||
|
} else {
|
||||||
|
s.stageTimestamps.WgHandshakeSuccess = handshakeTime
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Disconnected sets the mode to reconnection. It is called only when both ICE and Relay have been disconnected at the same time.
|
||||||
func (s *MetricsStages) Disconnected() {
|
func (s *MetricsStages) Disconnected() {
|
||||||
log.Infof("--- Disconnected")
|
|
||||||
s.mu.Lock()
|
s.mu.Lock()
|
||||||
defer s.mu.Unlock()
|
defer s.mu.Unlock()
|
||||||
|
|
||||||
now := time.Now()
|
// Reset all timestamps for reconnection
|
||||||
s.stageTimestamps = metrics.ConnectionStageTimestamps{
|
// For reconnections, we only track from Signaling onwards
|
||||||
Created: now,
|
// This avoids meaningless creation→semaphore and semaphore→signaling metrics
|
||||||
SemaphoreAcquired: now,
|
s.stageTimestamps = metrics.ConnectionStageTimestamps{}
|
||||||
}
|
|
||||||
s.isReconnectionAttempt = true
|
s.isReconnectionAttempt = true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -94,8 +94,7 @@ func (w *WGWatcher) periodicHandshakeCheck(ctx context.Context, onDisconnectedFn
|
|||||||
}
|
}
|
||||||
if lastHandshake.IsZero() {
|
if lastHandshake.IsZero() {
|
||||||
elapsed := calcElapsed(enabledTime, *handshake)
|
elapsed := calcElapsed(enabledTime, *handshake)
|
||||||
w.log.Infof("--- first wg handshake detected within: %.2fsec, (%s - %s)", elapsed, enabledTime, handshake)
|
w.log.Infof("first wg handshake detected within: %.2fsec, (%s)", elapsed, handshake)
|
||||||
onHandshakeSuccessFn(*handshake)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
lastHandshake = *handshake
|
lastHandshake = *handshake
|
||||||
|
|||||||
Reference in New Issue
Block a user