mirror of
https://github.com/netbirdio/netbird.git
synced 2026-04-16 07:16:38 +00:00
This PR adds the ability to trigger debug bundle generation remotely from the Management API/Dashboard.
This commit is contained in:
@@ -14,6 +14,7 @@ import (
|
||||
type Client interface {
|
||||
io.Closer
|
||||
Sync(ctx context.Context, sysInfo *system.Info, msgHandler func(msg *proto.SyncResponse) error) error
|
||||
Job(ctx context.Context, msgHandler func(msg *proto.JobRequest) *proto.JobResponse) error
|
||||
GetServerPublicKey() (*wgtypes.Key, error)
|
||||
Register(serverKey wgtypes.Key, setupKey string, jwtToken string, sysInfo *system.Info, sshKey []byte, dnsLabels domain.List) (*proto.LoginResponse, error)
|
||||
Login(serverKey wgtypes.Key, sysInfo *system.Info, sshKey []byte, dnsLabels domain.List) (*proto.LoginResponse, error)
|
||||
|
||||
@@ -18,12 +18,13 @@ import (
|
||||
"google.golang.org/grpc/status"
|
||||
|
||||
"github.com/netbirdio/management-integrations/integrations"
|
||||
ephemeral_manager "github.com/netbirdio/netbird/management/internals/modules/peers/ephemeral/manager"
|
||||
|
||||
"github.com/netbirdio/netbird/management/internals/controllers/network_map/controller"
|
||||
"github.com/netbirdio/netbird/management/internals/controllers/network_map/update_channel"
|
||||
"github.com/netbirdio/netbird/management/internals/modules/peers"
|
||||
"github.com/netbirdio/netbird/management/internals/modules/peers/ephemeral/manager"
|
||||
nbgrpc "github.com/netbirdio/netbird/management/internals/shared/grpc"
|
||||
"github.com/netbirdio/netbird/management/server/job"
|
||||
|
||||
"github.com/netbirdio/netbird/client/system"
|
||||
"github.com/netbirdio/netbird/encryption"
|
||||
@@ -92,6 +93,7 @@ func startManagement(t *testing.T) (*grpc.Server, net.Listener) {
|
||||
|
||||
peersManger := peers.NewManager(store, permissionsManagerMock)
|
||||
settingsManagerMock := settings.NewMockManager(ctrl)
|
||||
jobManager := job.NewJobManager(nil, store, peersManger)
|
||||
|
||||
ia, _ := integrations.NewIntegratedValidator(context.Background(), peersManger, settingsManagerMock, eventStore)
|
||||
|
||||
@@ -117,8 +119,8 @@ func startManagement(t *testing.T) (*grpc.Server, net.Listener) {
|
||||
ctx := context.Background()
|
||||
updateManager := update_channel.NewPeersUpdateManager(metrics)
|
||||
requestBuffer := mgmt.NewAccountRequestBuffer(ctx, store)
|
||||
networkMapController := controller.NewController(ctx, store, metrics, updateManager, requestBuffer, mgmt.MockIntegratedValidator{}, settingsMockManager, "netbird.selfhosted", port_forwarding.NewControllerMock(), manager.NewEphemeralManager(store, peersManger), config)
|
||||
accountManager, err := mgmt.BuildManager(context.Background(), config, store, networkMapController, nil, "", eventStore, nil, false, ia, metrics, port_forwarding.NewControllerMock(), settingsMockManager, permissionsManagerMock, false)
|
||||
networkMapController := controller.NewController(ctx, store, metrics, updateManager, requestBuffer, mgmt.MockIntegratedValidator{}, settingsMockManager, "netbird.selfhosted", port_forwarding.NewControllerMock(), ephemeral_manager.NewEphemeralManager(store, peersManger), config)
|
||||
accountManager, err := mgmt.BuildManager(context.Background(), config, store, networkMapController, jobManager, nil, "", eventStore, nil, false, ia, metrics, port_forwarding.NewControllerMock(), settingsMockManager, permissionsManagerMock, false)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@@ -129,7 +131,7 @@ func startManagement(t *testing.T) (*grpc.Server, net.Listener) {
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
mgmtServer, err := nbgrpc.NewServer(config, accountManager, settingsMockManager, secretsManager, nil, nil, mgmt.MockIntegratedValidator{}, networkMapController, nil)
|
||||
mgmtServer, err := nbgrpc.NewServer(config, accountManager, settingsMockManager, jobManager, secretsManager, nil, nil, mgmt.MockIntegratedValidator{}, networkMapController, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
@@ -12,6 +12,7 @@ import (
|
||||
gstatus "google.golang.org/grpc/status"
|
||||
|
||||
"github.com/cenkalti/backoff/v4"
|
||||
"github.com/google/uuid"
|
||||
log "github.com/sirupsen/logrus"
|
||||
"golang.zx2c4.com/wireguard/wgctrl/wgtypes"
|
||||
"google.golang.org/grpc"
|
||||
@@ -111,8 +112,26 @@ func (c *GrpcClient) ready() bool {
|
||||
// Sync wraps the real client's Sync endpoint call and takes care of retries and encryption/decryption of messages
|
||||
// Blocking request. The result will be sent via msgHandler callback function
|
||||
func (c *GrpcClient) Sync(ctx context.Context, sysInfo *system.Info, msgHandler func(msg *proto.SyncResponse) error) error {
|
||||
backOff := defaultBackoff(ctx)
|
||||
return c.withMgmtStream(ctx, func(ctx context.Context, serverPubKey wgtypes.Key) error {
|
||||
return c.handleSyncStream(ctx, serverPubKey, sysInfo, msgHandler)
|
||||
})
|
||||
}
|
||||
|
||||
// Job wraps the real client's Job endpoint call and takes care of retries and encryption/decryption of messages
|
||||
// Blocking request. The result will be sent via msgHandler callback function
|
||||
func (c *GrpcClient) Job(ctx context.Context, msgHandler func(msg *proto.JobRequest) *proto.JobResponse) error {
|
||||
return c.withMgmtStream(ctx, func(ctx context.Context, serverPubKey wgtypes.Key) error {
|
||||
return c.handleJobStream(ctx, serverPubKey, msgHandler)
|
||||
})
|
||||
}
|
||||
|
||||
// withMgmtStream runs a streaming operation against the ManagementService
|
||||
// It takes care of retries, connection readiness, and fetching server public key.
|
||||
func (c *GrpcClient) withMgmtStream(
|
||||
ctx context.Context,
|
||||
handler func(ctx context.Context, serverPubKey wgtypes.Key) error,
|
||||
) error {
|
||||
backOff := defaultBackoff(ctx)
|
||||
operation := func() error {
|
||||
log.Debugf("management connection state %v", c.conn.GetState())
|
||||
connState := c.conn.GetState()
|
||||
@@ -130,7 +149,7 @@ func (c *GrpcClient) Sync(ctx context.Context, sysInfo *system.Info, msgHandler
|
||||
return err
|
||||
}
|
||||
|
||||
return c.handleStream(ctx, *serverPubKey, sysInfo, msgHandler, backOff)
|
||||
return handler(ctx, *serverPubKey)
|
||||
}
|
||||
|
||||
err := backoff.Retry(operation, backOff)
|
||||
@@ -141,12 +160,151 @@ func (c *GrpcClient) Sync(ctx context.Context, sysInfo *system.Info, msgHandler
|
||||
return err
|
||||
}
|
||||
|
||||
func (c *GrpcClient) handleStream(ctx context.Context, serverPubKey wgtypes.Key, sysInfo *system.Info,
|
||||
msgHandler func(msg *proto.SyncResponse) error, backOff backoff.BackOff) error {
|
||||
func (c *GrpcClient) handleJobStream(
|
||||
ctx context.Context,
|
||||
serverPubKey wgtypes.Key,
|
||||
msgHandler func(msg *proto.JobRequest) *proto.JobResponse,
|
||||
) error {
|
||||
ctx, cancelStream := context.WithCancel(ctx)
|
||||
defer cancelStream()
|
||||
|
||||
stream, err := c.connectToStream(ctx, serverPubKey, sysInfo)
|
||||
stream, err := c.realClient.Job(ctx)
|
||||
if err != nil {
|
||||
log.Errorf("failed to open job stream: %v", err)
|
||||
return err
|
||||
}
|
||||
|
||||
// Handshake with the server
|
||||
if err := c.sendHandshake(ctx, stream, serverPubKey); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
log.Debug("job stream handshake sent successfully")
|
||||
|
||||
// Main loop: receive, process, respond
|
||||
for {
|
||||
jobReq, err := c.receiveJobRequest(ctx, stream, serverPubKey)
|
||||
if err != nil {
|
||||
c.notifyDisconnected(err)
|
||||
if s, ok := gstatus.FromError(err); ok {
|
||||
switch s.Code() {
|
||||
case codes.PermissionDenied:
|
||||
return backoff.Permanent(err) // unrecoverable error, propagate to the upper layer
|
||||
case codes.Canceled:
|
||||
log.Debugf("management connection context has been canceled, this usually indicates shutdown")
|
||||
return err
|
||||
case codes.Unimplemented:
|
||||
log.Warn("Job feature is not supported by the current management server version. " +
|
||||
"Please update the management service to use this feature.")
|
||||
return nil
|
||||
default:
|
||||
log.Warnf("disconnected from the Management service but will retry silently. Reason: %v", err)
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
// non-gRPC error
|
||||
log.Warnf("disconnected from the Management service but will retry silently. Reason: %v", err)
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if jobReq == nil || len(jobReq.ID) == 0 {
|
||||
log.Debug("received unknown or empty job request, skipping")
|
||||
continue
|
||||
}
|
||||
|
||||
log.Infof("received a new job from the management server (ID: %s)", jobReq.ID)
|
||||
jobResp := c.processJobRequest(ctx, jobReq, msgHandler)
|
||||
if err := c.sendJobResponse(ctx, stream, serverPubKey, jobResp); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// sendHandshake sends the initial handshake message
|
||||
func (c *GrpcClient) sendHandshake(ctx context.Context, stream proto.ManagementService_JobClient, serverPubKey wgtypes.Key) error {
|
||||
handshakeReq := &proto.JobRequest{
|
||||
ID: []byte(uuid.New().String()),
|
||||
}
|
||||
encHello, err := encryption.EncryptMessage(serverPubKey, c.key, handshakeReq)
|
||||
if err != nil {
|
||||
log.Errorf("failed to encrypt handshake message: %v", err)
|
||||
return err
|
||||
}
|
||||
return stream.Send(&proto.EncryptedMessage{
|
||||
WgPubKey: c.key.PublicKey().String(),
|
||||
Body: encHello,
|
||||
})
|
||||
}
|
||||
|
||||
// receiveJobRequest waits for and decrypts a job request
|
||||
func (c *GrpcClient) receiveJobRequest(
|
||||
ctx context.Context,
|
||||
stream proto.ManagementService_JobClient,
|
||||
serverPubKey wgtypes.Key,
|
||||
) (*proto.JobRequest, error) {
|
||||
encryptedMsg, err := stream.Recv()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
jobReq := &proto.JobRequest{}
|
||||
if err := encryption.DecryptMessage(serverPubKey, c.key, encryptedMsg.Body, jobReq); err != nil {
|
||||
log.Warnf("failed to decrypt job request: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return jobReq, nil
|
||||
}
|
||||
|
||||
// processJobRequest executes the handler and ensures a valid response
|
||||
func (c *GrpcClient) processJobRequest(
|
||||
ctx context.Context,
|
||||
jobReq *proto.JobRequest,
|
||||
msgHandler func(msg *proto.JobRequest) *proto.JobResponse,
|
||||
) *proto.JobResponse {
|
||||
jobResp := msgHandler(jobReq)
|
||||
if jobResp == nil {
|
||||
jobResp = &proto.JobResponse{
|
||||
ID: jobReq.ID,
|
||||
Status: proto.JobStatus_failed,
|
||||
Reason: []byte("handler returned nil response"),
|
||||
}
|
||||
log.Warnf("job handler returned nil for job %s", string(jobReq.ID))
|
||||
}
|
||||
return jobResp
|
||||
}
|
||||
|
||||
// sendJobResponse encrypts and sends a job response
|
||||
func (c *GrpcClient) sendJobResponse(
|
||||
ctx context.Context,
|
||||
stream proto.ManagementService_JobClient,
|
||||
serverPubKey wgtypes.Key,
|
||||
resp *proto.JobResponse,
|
||||
) error {
|
||||
encResp, err := encryption.EncryptMessage(serverPubKey, c.key, resp)
|
||||
if err != nil {
|
||||
log.Errorf("failed to encrypt job response for job %s: %v", string(resp.ID), err)
|
||||
return err
|
||||
}
|
||||
|
||||
if err := stream.Send(&proto.EncryptedMessage{
|
||||
WgPubKey: c.key.PublicKey().String(),
|
||||
Body: encResp,
|
||||
}); err != nil {
|
||||
log.Errorf("failed to send job response for job %s: %v", string(resp.ID), err)
|
||||
return err
|
||||
}
|
||||
|
||||
log.Infof("job response sent for job %s (status: %s)", string(resp.ID), resp.Status.String())
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *GrpcClient) handleSyncStream(ctx context.Context, serverPubKey wgtypes.Key, sysInfo *system.Info, msgHandler func(msg *proto.SyncResponse) error) error {
|
||||
ctx, cancelStream := context.WithCancel(ctx)
|
||||
defer cancelStream()
|
||||
|
||||
stream, err := c.connectToSyncStream(ctx, serverPubKey, sysInfo)
|
||||
if err != nil {
|
||||
log.Debugf("failed to open Management Service stream: %s", err)
|
||||
if s, ok := gstatus.FromError(err); ok && s.Code() == codes.PermissionDenied {
|
||||
@@ -159,20 +317,22 @@ func (c *GrpcClient) handleStream(ctx context.Context, serverPubKey wgtypes.Key,
|
||||
c.notifyConnected()
|
||||
|
||||
// blocking until error
|
||||
err = c.receiveEvents(stream, serverPubKey, msgHandler)
|
||||
// we need this reset because after a successful connection and a consequent error, backoff lib doesn't
|
||||
// reset times and next try will start with a long delay
|
||||
backOff.Reset()
|
||||
err = c.receiveUpdatesEvents(stream, serverPubKey, msgHandler)
|
||||
if err != nil {
|
||||
c.notifyDisconnected(err)
|
||||
s, _ := gstatus.FromError(err)
|
||||
switch s.Code() {
|
||||
case codes.PermissionDenied:
|
||||
return backoff.Permanent(err) // unrecoverable error, propagate to the upper layer
|
||||
case codes.Canceled:
|
||||
log.Debugf("management connection context has been canceled, this usually indicates shutdown")
|
||||
return nil
|
||||
default:
|
||||
if s, ok := gstatus.FromError(err); ok {
|
||||
switch s.Code() {
|
||||
case codes.PermissionDenied:
|
||||
return backoff.Permanent(err) // unrecoverable error, propagate to the upper layer
|
||||
case codes.Canceled:
|
||||
log.Debugf("management connection context has been canceled, this usually indicates shutdown")
|
||||
return nil
|
||||
default:
|
||||
log.Warnf("disconnected from the Management service but will retry silently. Reason: %v", err)
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
// non-gRPC error
|
||||
log.Warnf("disconnected from the Management service but will retry silently. Reason: %v", err)
|
||||
return err
|
||||
}
|
||||
@@ -191,7 +351,7 @@ func (c *GrpcClient) GetNetworkMap(sysInfo *system.Info) (*proto.NetworkMap, err
|
||||
|
||||
ctx, cancelStream := context.WithCancel(c.ctx)
|
||||
defer cancelStream()
|
||||
stream, err := c.connectToStream(ctx, *serverPubKey, sysInfo)
|
||||
stream, err := c.connectToSyncStream(ctx, *serverPubKey, sysInfo)
|
||||
if err != nil {
|
||||
log.Debugf("failed to open Management Service stream: %s", err)
|
||||
return nil, err
|
||||
@@ -224,7 +384,7 @@ func (c *GrpcClient) GetNetworkMap(sysInfo *system.Info) (*proto.NetworkMap, err
|
||||
return decryptedResp.GetNetworkMap(), nil
|
||||
}
|
||||
|
||||
func (c *GrpcClient) connectToStream(ctx context.Context, serverPubKey wgtypes.Key, sysInfo *system.Info) (proto.ManagementService_SyncClient, error) {
|
||||
func (c *GrpcClient) connectToSyncStream(ctx context.Context, serverPubKey wgtypes.Key, sysInfo *system.Info) (proto.ManagementService_SyncClient, error) {
|
||||
req := &proto.SyncRequest{Meta: infoToMetaData(sysInfo)}
|
||||
|
||||
myPrivateKey := c.key
|
||||
@@ -243,7 +403,7 @@ func (c *GrpcClient) connectToStream(ctx context.Context, serverPubKey wgtypes.K
|
||||
return sync, nil
|
||||
}
|
||||
|
||||
func (c *GrpcClient) receiveEvents(stream proto.ManagementService_SyncClient, serverPubKey wgtypes.Key, msgHandler func(msg *proto.SyncResponse) error) error {
|
||||
func (c *GrpcClient) receiveUpdatesEvents(stream proto.ManagementService_SyncClient, serverPubKey wgtypes.Key, msgHandler func(msg *proto.SyncResponse) error) error {
|
||||
for {
|
||||
update, err := stream.Recv()
|
||||
if err == io.EOF {
|
||||
|
||||
@@ -20,6 +20,7 @@ type MockClient struct {
|
||||
GetPKCEAuthorizationFlowFunc func(serverKey wgtypes.Key) (*proto.PKCEAuthorizationFlow, error)
|
||||
SyncMetaFunc func(sysInfo *system.Info) error
|
||||
LogoutFunc func() error
|
||||
JobFunc func(ctx context.Context, msgHandler func(msg *proto.JobRequest) *proto.JobResponse) error
|
||||
}
|
||||
|
||||
func (m *MockClient) IsHealthy() bool {
|
||||
@@ -40,6 +41,13 @@ func (m *MockClient) Sync(ctx context.Context, sysInfo *system.Info, msgHandler
|
||||
return m.SyncFunc(ctx, sysInfo, msgHandler)
|
||||
}
|
||||
|
||||
func (m *MockClient) Job(ctx context.Context, msgHandler func(msg *proto.JobRequest) *proto.JobResponse) error {
|
||||
if m.JobFunc == nil {
|
||||
return nil
|
||||
}
|
||||
return m.JobFunc(ctx, msgHandler)
|
||||
}
|
||||
|
||||
func (m *MockClient) GetServerPublicKey() (*wgtypes.Key, error) {
|
||||
if m.GetServerPublicKeyFunc == nil {
|
||||
return nil, nil
|
||||
|
||||
Reference in New Issue
Block a user