Extend netbird status command to include health information (#1471)

* Adds management, signal, and relay (STUN/TURN) health probes to the status command.

* Adds a reason when the management or signal connections are disconnected.

* Adds last wireguard handshake and received/sent bytes per peer
This commit is contained in:
Viktor Liu
2024-01-22 12:20:24 +01:00
committed by GitHub
parent d4194cba6a
commit a7d6632298
28 changed files with 1571 additions and 339 deletions

View File

@@ -408,12 +408,14 @@ func (conn *Conn) configureConnection(remoteConn net.Conn, remoteWgPort int, rem
conn.status = StatusConnected
peerState := State{
PubKey: conn.config.Key,
ConnStatus: conn.status,
ConnStatusUpdate: time.Now(),
LocalIceCandidateType: pair.Local.Type().String(),
RemoteIceCandidateType: pair.Remote.Type().String(),
Direct: !isRelayCandidate(pair.Local),
PubKey: conn.config.Key,
ConnStatus: conn.status,
ConnStatusUpdate: time.Now(),
LocalIceCandidateType: pair.Local.Type().String(),
RemoteIceCandidateType: pair.Remote.Type().String(),
LocalIceCandidateEndpoint: fmt.Sprintf("%s:%d", pair.Local.Address(), pair.Local.Port()),
RemoteIceCandidateEndpoint: fmt.Sprintf("%s:%d", pair.Remote.Address(), pair.Local.Port()),
Direct: !isRelayCandidate(pair.Local),
}
if pair.Local.Type() == ice.CandidateTypeRelay || pair.Remote.Type() == ice.CandidateTypeRelay {
peerState.Relayed = true
@@ -500,6 +502,9 @@ func (conn *Conn) cleanup() error {
// todo rethink status updates
log.Debugf("error while updating peer's %s state, err: %v", conn.config.Key, err)
}
if err := conn.statusRecorder.UpdateWireguardPeerState(conn.config.Key, iface.WGStats{}); err != nil {
log.Debugf("failed to reset wireguard stats for peer %s: %s", conn.config.Key, err)
}
log.Debugf("cleaned up connection to peer %s", conn.config.Key)
if err1 != nil {

View File

@@ -4,19 +4,27 @@ import (
"errors"
"sync"
"time"
"github.com/netbirdio/netbird/client/internal/relay"
"github.com/netbirdio/netbird/iface"
)
// State contains the latest state of a peer
type State struct {
IP string
PubKey string
FQDN string
ConnStatus ConnStatus
ConnStatusUpdate time.Time
Relayed bool
Direct bool
LocalIceCandidateType string
RemoteIceCandidateType string
IP string
PubKey string
FQDN string
ConnStatus ConnStatus
ConnStatusUpdate time.Time
Relayed bool
Direct bool
LocalIceCandidateType string
RemoteIceCandidateType string
LocalIceCandidateEndpoint string
RemoteIceCandidateEndpoint string
LastWireguardHandshake time.Time
BytesTx int64
BytesRx int64
}
// LocalPeerState contains the latest state of the local peer
@@ -31,12 +39,14 @@ type LocalPeerState struct {
type SignalState struct {
URL string
Connected bool
Error error
}
// ManagementState contains the latest state of a management connection
type ManagementState struct {
URL string
Connected bool
Error error
}
// FullStatus contains the full state held by the Status instance
@@ -45,15 +55,19 @@ type FullStatus struct {
ManagementState ManagementState
SignalState SignalState
LocalPeerState LocalPeerState
Relays []relay.ProbeResult
}
// Status holds a state of peers, signal and management connections
// Status holds a state of peers, signal, management connections and relays
type Status struct {
mux sync.Mutex
peers map[string]State
changeNotify map[string]chan struct{}
signalState bool
signalError error
managementState bool
managementError error
relayStates []relay.ProbeResult
localPeer LocalPeerState
offlinePeers []State
mgmAddress string
@@ -156,6 +170,8 @@ func (d *Status) UpdatePeerState(receivedState State) error {
peerState.Relayed = receivedState.Relayed
peerState.LocalIceCandidateType = receivedState.LocalIceCandidateType
peerState.RemoteIceCandidateType = receivedState.RemoteIceCandidateType
peerState.LocalIceCandidateEndpoint = receivedState.LocalIceCandidateEndpoint
peerState.RemoteIceCandidateEndpoint = receivedState.RemoteIceCandidateEndpoint
}
d.peers[receivedState.PubKey] = peerState
@@ -174,6 +190,25 @@ func (d *Status) UpdatePeerState(receivedState State) error {
return nil
}
// UpdateWireguardPeerState updates the wireguard bits of the peer state
func (d *Status) UpdateWireguardPeerState(pubKey string, wgStats iface.WGStats) error {
d.mux.Lock()
defer d.mux.Unlock()
peerState, ok := d.peers[pubKey]
if !ok {
return errors.New("peer doesn't exist")
}
peerState.LastWireguardHandshake = wgStats.LastHandshake
peerState.BytesRx = wgStats.RxBytes
peerState.BytesTx = wgStats.TxBytes
d.peers[pubKey] = peerState
return nil
}
func shouldSkipNotify(received, curr State) bool {
switch {
case received.ConnStatus == StatusConnecting:
@@ -248,12 +283,13 @@ func (d *Status) CleanLocalPeerState() {
}
// MarkManagementDisconnected sets ManagementState to disconnected
func (d *Status) MarkManagementDisconnected() {
func (d *Status) MarkManagementDisconnected(err error) {
d.mux.Lock()
defer d.mux.Unlock()
defer d.onConnectionChanged()
d.managementState = false
d.managementError = err
}
// MarkManagementConnected sets ManagementState to connected
@@ -263,6 +299,7 @@ func (d *Status) MarkManagementConnected() {
defer d.onConnectionChanged()
d.managementState = true
d.managementError = nil
}
// UpdateSignalAddress update the address of the signal server
@@ -280,12 +317,13 @@ func (d *Status) UpdateManagementAddress(mgmAddress string) {
}
// MarkSignalDisconnected sets SignalState to disconnected
func (d *Status) MarkSignalDisconnected() {
func (d *Status) MarkSignalDisconnected(err error) {
d.mux.Lock()
defer d.mux.Unlock()
defer d.onConnectionChanged()
d.signalState = false
d.signalError = err
}
// MarkSignalConnected sets SignalState to connected
@@ -295,6 +333,33 @@ func (d *Status) MarkSignalConnected() {
defer d.onConnectionChanged()
d.signalState = true
d.signalError = nil
}
func (d *Status) UpdateRelayStates(relayResults []relay.ProbeResult) {
d.mux.Lock()
defer d.mux.Unlock()
d.relayStates = relayResults
}
func (d *Status) GetManagementState() ManagementState {
return ManagementState{
d.mgmAddress,
d.managementState,
d.managementError,
}
}
func (d *Status) GetSignalState() SignalState {
return SignalState{
d.signalAddress,
d.signalState,
d.signalError,
}
}
func (d *Status) GetRelayStates() []relay.ProbeResult {
return d.relayStates
}
// GetFullStatus gets full status
@@ -303,15 +368,10 @@ func (d *Status) GetFullStatus() FullStatus {
defer d.mux.Unlock()
fullStatus := FullStatus{
ManagementState: ManagementState{
d.mgmAddress,
d.managementState,
},
SignalState: SignalState{
d.signalAddress,
d.signalState,
},
LocalPeerState: d.localPeer,
ManagementState: d.GetManagementState(),
SignalState: d.GetSignalState(),
LocalPeerState: d.localPeer,
Relays: d.GetRelayStates(),
}
for _, status := range d.peers {

View File

@@ -1,6 +1,7 @@
package peer
import (
"errors"
"testing"
"github.com/stretchr/testify/assert"
@@ -152,9 +153,10 @@ func TestUpdateSignalState(t *testing.T) {
name string
connected bool
want bool
err error
}{
{"should mark as connected", true, true},
{"should mark as disconnected", false, false},
{"should mark as connected", true, true, nil},
{"should mark as disconnected", false, false, errors.New("test")},
}
status := NewRecorder("https://mgm")
@@ -165,9 +167,10 @@ func TestUpdateSignalState(t *testing.T) {
if test.connected {
status.MarkSignalConnected()
} else {
status.MarkSignalDisconnected()
status.MarkSignalDisconnected(test.err)
}
assert.Equal(t, test.want, status.signalState, "signal status should be equal")
assert.Equal(t, test.err, status.signalError)
})
}
}
@@ -178,9 +181,10 @@ func TestUpdateManagementState(t *testing.T) {
name string
connected bool
want bool
err error
}{
{"should mark as connected", true, true},
{"should mark as disconnected", false, false},
{"should mark as connected", true, true, nil},
{"should mark as disconnected", false, false, errors.New("test")},
}
status := NewRecorder(url)
@@ -190,9 +194,10 @@ func TestUpdateManagementState(t *testing.T) {
if test.connected {
status.MarkManagementConnected()
} else {
status.MarkManagementDisconnected()
status.MarkManagementDisconnected(test.err)
}
assert.Equal(t, test.want, status.managementState, "signalState status should be equal")
assert.Equal(t, test.err, status.managementError)
})
}
}