Extend netbird status command to include health information (#1471)

* Adds management, signal, and relay (STUN/TURN) health probes to the status command.

* Adds a reason when the management or signal connections are disconnected.

* Adds last wireguard handshake and received/sent bytes per peer
This commit is contained in:
Viktor Liu
2024-01-22 12:20:24 +01:00
committed by GitHub
parent d4194cba6a
commit a7d6632298
28 changed files with 1571 additions and 339 deletions

View File

@@ -17,4 +17,5 @@ type Client interface {
GetDeviceAuthorizationFlow(serverKey wgtypes.Key) (*proto.DeviceAuthorizationFlow, error)
GetPKCEAuthorizationFlow(serverKey wgtypes.Key) (*proto.PKCEAuthorizationFlow, error)
GetNetworkMap() (*proto.NetworkMap, error)
IsHealthy() bool
}

View File

@@ -28,7 +28,7 @@ import (
// ConnStateNotifier is a wrapper interface of the status recorders
type ConnStateNotifier interface {
MarkManagementDisconnected()
MarkManagementDisconnected(error)
MarkManagementConnected()
}
@@ -154,7 +154,7 @@ func (c *GrpcClient) Sync(msgHandler func(msg *proto.SyncResponse) error) error
return nil
default:
backOff.Reset() // reset backoff counter after successful connection
c.notifyDisconnected()
c.notifyDisconnected(err)
log.Warnf("disconnected from the Management service but will retry silently. Reason: %v", err)
return err
}
@@ -283,6 +283,32 @@ func (c *GrpcClient) GetServerPublicKey() (*wgtypes.Key, error) {
return &serverKey, nil
}
// IsHealthy probes the gRPC connection and returns false on errors
func (c *GrpcClient) IsHealthy() bool {
switch c.conn.GetState() {
case connectivity.TransientFailure:
return false
case connectivity.Connecting:
return true
case connectivity.Shutdown:
return true
case connectivity.Idle:
case connectivity.Ready:
}
ctx, cancel := context.WithTimeout(c.ctx, 1*time.Second)
defer cancel()
_, err := c.realClient.GetServerKey(ctx, &proto.Empty{})
if err != nil {
c.notifyDisconnected(err)
log.Warnf("health check returned: %s", err)
return false
}
c.notifyConnected()
return true
}
func (c *GrpcClient) login(serverKey wgtypes.Key, req *proto.LoginRequest) (*proto.LoginResponse, error) {
if !c.ready() {
return nil, fmt.Errorf("no connection to management")
@@ -400,14 +426,14 @@ func (c *GrpcClient) GetPKCEAuthorizationFlow(serverKey wgtypes.Key) (*proto.PKC
return flowInfoResp, nil
}
func (c *GrpcClient) notifyDisconnected() {
func (c *GrpcClient) notifyDisconnected(err error) {
c.connStateCallbackLock.RLock()
defer c.connStateCallbackLock.RUnlock()
if c.connStateCallback == nil {
return
}
c.connStateCallback.MarkManagementDisconnected()
c.connStateCallback.MarkManagementDisconnected(err)
}
func (c *GrpcClient) notifyConnected() {

View File

@@ -1,9 +1,10 @@
package client
import (
"golang.zx2c4.com/wireguard/wgctrl/wgtypes"
"github.com/netbirdio/netbird/client/system"
"github.com/netbirdio/netbird/management/proto"
"golang.zx2c4.com/wireguard/wgctrl/wgtypes"
)
type MockClient struct {
@@ -16,6 +17,10 @@ type MockClient struct {
GetPKCEAuthorizationFlowFunc func(serverKey wgtypes.Key) (*proto.PKCEAuthorizationFlow, error)
}
func (m *MockClient) IsHealthy() bool {
return true
}
func (m *MockClient) Close() error {
if m.CloseFunc == nil {
return nil