diff --git a/management/server/peer.go b/management/server/peer.go index 54c6b47ff..8412842c0 100644 --- a/management/server/peer.go +++ b/management/server/peer.go @@ -1189,7 +1189,7 @@ func (am *DefaultAccountManager) UpdateAccountPeers(ctx context.Context, account return } - start := time.Now() + globalStart := time.Now() approvedPeersMap, err := am.integratedPeerValidator.GetValidatedPeers(account.Id, maps.Values(account.Groups), maps.Values(account.Peers), account.Settings.Extra) if err != nil { @@ -1224,18 +1224,27 @@ func (am *DefaultAccountManager) UpdateAccountPeers(ctx context.Context, account defer wg.Done() defer func() { <-semaphore }() + start := time.Now() + postureChecks, err := am.getPeerPostureChecks(account, p.ID) if err != nil { log.WithContext(ctx).Debugf("failed to get posture checks for peer %s: %v", peer.ID, err) return } + am.metrics.UpdateChannelMetrics().CountCalcPostureChecksDuration(time.Since(start)) + start = time.Now() + remotePeerNetworkMap := account.GetPeerNetworkMap(ctx, p.ID, customZone, approvedPeersMap, resourcePolicies, routers, am.metrics.AccountManagerMetrics()) + am.metrics.UpdateChannelMetrics().CountCalcPeerNetworkMapDuration(time.Since(start)) + start = time.Now() + proxyNetworkMap, ok := proxyNetworkMaps[p.ID] if ok { remotePeerNetworkMap.Merge(proxyNetworkMap) } + am.metrics.UpdateChannelMetrics().CountMergeNetworkMapDuration(time.Since(start)) extraSetting, err := am.settingsManager.GetExtraSettings(ctx, accountID) if err != nil { @@ -1243,7 +1252,10 @@ func (am *DefaultAccountManager) UpdateAccountPeers(ctx context.Context, account return } + start = time.Now() update := toSyncResponse(ctx, nil, p, nil, nil, remotePeerNetworkMap, dnsDomain, postureChecks, dnsCache, account.Settings, extraSetting) + am.metrics.UpdateChannelMetrics().CountToSyncResponseDuration(time.Since(start)) + am.peersUpdateManager.SendUpdate(ctx, p.ID, &UpdateMessage{Update: update, NetworkMap: remotePeerNetworkMap}) }(peer) } @@ -1252,7 +1264,7 @@ func (am *DefaultAccountManager) UpdateAccountPeers(ctx context.Context, account wg.Wait() if am.metrics != nil { - am.metrics.AccountManagerMetrics().CountUpdateAccountPeersDuration(time.Since(start)) + am.metrics.AccountManagerMetrics().CountUpdateAccountPeersDuration(time.Since(globalStart)) } } diff --git a/management/server/telemetry/updatechannel_metrics.go b/management/server/telemetry/updatechannel_metrics.go index 584b9ec20..21b4a6085 100644 --- a/management/server/telemetry/updatechannel_metrics.go +++ b/management/server/telemetry/updatechannel_metrics.go @@ -18,6 +18,10 @@ type UpdateChannelMetrics struct { getAllConnectedPeersDurationMicro metric.Int64Histogram getAllConnectedPeers metric.Int64Histogram hasChannelDurationMicro metric.Int64Histogram + calcPostureChecksDurationMicro metric.Int64Histogram + calcPeerNetworkMapDurationMicro metric.Int64Histogram + mergeNetworkMapDurationMicro metric.Int64Histogram + toSyncResponseDurationMicro metric.Int64Histogram ctx context.Context } @@ -89,6 +93,38 @@ func NewUpdateChannelMetrics(ctx context.Context, meter metric.Meter) (*UpdateCh return nil, err } + calcPostureChecksDurationMicro, err := meter.Int64Histogram("management.updatechannel.calc.posturechecks.duration.micro", + metric.WithUnit("microseconds"), + metric.WithDescription("Duration of how long it takes to get the posture checks for a peer"), + ) + if err != nil { + return nil, err + } + + calcPeerNetworkMapDurationMicro, err := meter.Int64Histogram("management.updatechannel.calc.networkmap.duration.micro", + metric.WithUnit("microseconds"), + metric.WithDescription("Duration of how long it takes to calculate the network map for a peer"), + ) + if err != nil { + return nil, err + } + + mergeNetworkMapDurationMicro, err := meter.Int64Histogram("management.updatechannel.merge.networkmap.duration.micro", + metric.WithUnit("microseconds"), + metric.WithDescription("Duration of how long it takes to merge the network maps for a peer"), + ) + if err != nil { + return nil, err + } + + toSyncResponseDurationMicro, err := meter.Int64Histogram("management.updatechannel.tosyncresponse.duration.micro", + metric.WithUnit("microseconds"), + metric.WithDescription("Duration of how long it takes to convert the network map to sync response"), + ) + if err != nil { + return nil, err + } + return &UpdateChannelMetrics{ createChannelDurationMicro: createChannelDurationMicro, closeChannelDurationMicro: closeChannelDurationMicro, @@ -98,6 +134,10 @@ func NewUpdateChannelMetrics(ctx context.Context, meter metric.Meter) (*UpdateCh getAllConnectedPeersDurationMicro: getAllConnectedPeersDurationMicro, getAllConnectedPeers: getAllConnectedPeers, hasChannelDurationMicro: hasChannelDurationMicro, + calcPostureChecksDurationMicro: calcPostureChecksDurationMicro, + calcPeerNetworkMapDurationMicro: calcPeerNetworkMapDurationMicro, + mergeNetworkMapDurationMicro: mergeNetworkMapDurationMicro, + toSyncResponseDurationMicro: toSyncResponseDurationMicro, ctx: ctx, }, nil } @@ -137,3 +177,19 @@ func (metrics *UpdateChannelMetrics) CountGetAllConnectedPeersDuration(duration func (metrics *UpdateChannelMetrics) CountHasChannelDuration(duration time.Duration) { metrics.hasChannelDurationMicro.Record(metrics.ctx, duration.Microseconds()) } + +func (metrics *UpdateChannelMetrics) CountCalcPostureChecksDuration(duration time.Duration) { + metrics.calcPostureChecksDurationMicro.Record(metrics.ctx, duration.Microseconds()) +} + +func (metrics *UpdateChannelMetrics) CountCalcPeerNetworkMapDuration(duration time.Duration) { + metrics.calcPeerNetworkMapDurationMicro.Record(metrics.ctx, duration.Microseconds()) +} + +func (metrics *UpdateChannelMetrics) CountMergeNetworkMapDuration(duration time.Duration) { + metrics.mergeNetworkMapDurationMicro.Record(metrics.ctx, duration.Microseconds()) +} + +func (metrics *UpdateChannelMetrics) CountToSyncResponseDuration(duration time.Duration) { + metrics.toSyncResponseDurationMicro.Record(metrics.ctx, duration.Microseconds()) +}