mirror of
https://github.com/netbirdio/netbird.git
synced 2026-04-25 19:56:46 +00:00
Refactor fast-path Sync to log skip reasons, streamline tryFastPathSync outputs, and improve debug consistency.
This commit is contained in:
@@ -354,9 +354,11 @@ func (s *Server) Sync(req *proto.EncryptedMessage, srv proto.ManagementService_S
|
|||||||
metahash := metaHash(peerMeta, realIP.String())
|
metahash := metaHash(peerMeta, realIP.String())
|
||||||
s.loginFilter.addLogin(peerKey.String(), metahash)
|
s.loginFilter.addLogin(peerKey.String(), metahash)
|
||||||
|
|
||||||
if took, err := s.tryFastPathSync(ctx, reqStart, syncStart, accountID, peerKey, peerMeta, realIP, metahash, srv, &unlock); took {
|
took, skipReason, err := s.tryFastPathSync(ctx, reqStart, syncStart, accountID, peerKey, peerMeta, realIP, metahash, srv, &unlock)
|
||||||
|
if took {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
log.WithContext(ctx).Debugf("Sync (fast path) skipped reason=%s", skipReason)
|
||||||
|
|
||||||
peer, netMap, postureChecks, dnsFwdPort, err := s.accountManager.SyncAndMarkPeer(ctx, accountID, peerKey.String(), peerMeta, realIP, syncStart)
|
peer, netMap, postureChecks, dnsFwdPort, err := s.accountManager.SyncAndMarkPeer(ctx, accountID, peerKey.String(), peerMeta, realIP, syncStart)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
@@ -110,12 +110,14 @@ func (s *Server) lookupPeerAuthFromCache(peerPubKey string, incomingMetaHash uin
|
|||||||
// - the cached meta hash matches the incoming meta hash
|
// - the cached meta hash matches the incoming meta hash
|
||||||
// - the cached serial is non-zero (guard against uninitialised entries)
|
// - the cached serial is non-zero (guard against uninitialised entries)
|
||||||
//
|
//
|
||||||
// recordFastPathSkip emits a skip log and bumps the slow-path sync counter
|
// recordFastPathSkip bumps the slow-path sync counter with a reason label.
|
||||||
// with a reason label. Used from every early-return site in tryFastPathSync
|
// Used from every early-return site in tryFastPathSync so Grafana can graph
|
||||||
// so the fast-path hit-rate histogram in Grafana breaks down the misses by
|
// fast-path misses by cause. No log is emitted here — a single
|
||||||
// cause.
|
// "Sync (fast path) skipped reason=<reason>" line is written by the Sync
|
||||||
func (s *Server) recordFastPathSkip(ctx context.Context, reason string) {
|
// handler in server.go once tryFastPathSync returns so a grep for
|
||||||
log.WithContext(ctx).Debugf("fast path: skipped (reason=%s)", reason)
|
// "Sync (fast path)" finds both the success and the skip outcomes in one
|
||||||
|
// shot.
|
||||||
|
func (s *Server) recordFastPathSkip(_ context.Context, reason string) {
|
||||||
if s.appMetrics != nil {
|
if s.appMetrics != nil {
|
||||||
s.appMetrics.GRPCMetrics().CountSlowPathSync(reason)
|
s.appMetrics.GRPCMetrics().CountSlowPathSync(reason)
|
||||||
}
|
}
|
||||||
@@ -249,7 +251,9 @@ func (s *Server) fetchExtraSettings(ctx context.Context, accountID string) (*nbt
|
|||||||
// SetupRefresh, handleUpdates) and the returned took value is true.
|
// SetupRefresh, handleUpdates) and the returned took value is true.
|
||||||
//
|
//
|
||||||
// When took is true the caller must return the accompanying err. When took is
|
// When took is true the caller must return the accompanying err. When took is
|
||||||
// false the caller falls through to the existing slow path.
|
// false the caller falls through to the existing slow path and should log
|
||||||
|
// "Sync (fast path) skipped reason=<skipReason>" so a single grep on
|
||||||
|
// "Sync (fast path)" finds both the fast-path successes and misses.
|
||||||
func (s *Server) tryFastPathSync(
|
func (s *Server) tryFastPathSync(
|
||||||
ctx context.Context,
|
ctx context.Context,
|
||||||
reqStart, syncStart time.Time,
|
reqStart, syncStart time.Time,
|
||||||
@@ -260,18 +264,18 @@ func (s *Server) tryFastPathSync(
|
|||||||
peerMetaHash uint64,
|
peerMetaHash uint64,
|
||||||
srv proto.ManagementService_SyncServer,
|
srv proto.ManagementService_SyncServer,
|
||||||
unlock *func(),
|
unlock *func(),
|
||||||
) (took bool, err error) {
|
) (took bool, skipReason string, err error) {
|
||||||
if s.peerSerialCache == nil {
|
if s.peerSerialCache == nil {
|
||||||
s.recordFastPathSkip(ctx, "cache_disabled")
|
s.recordFastPathSkip(ctx, "cache_disabled")
|
||||||
return false, nil
|
return false, "cache_disabled", nil
|
||||||
}
|
}
|
||||||
if !s.fastPathFlag.Enabled() {
|
if !s.fastPathFlag.Enabled() {
|
||||||
s.recordFastPathSkip(ctx, "flag_off")
|
s.recordFastPathSkip(ctx, "flag_off")
|
||||||
return false, nil
|
return false, "flag_off", nil
|
||||||
}
|
}
|
||||||
if strings.EqualFold(peerMeta.GoOS, "android") {
|
if strings.EqualFold(peerMeta.GoOS, "android") {
|
||||||
s.recordFastPathSkip(ctx, "android")
|
s.recordFastPathSkip(ctx, "android")
|
||||||
return false, nil
|
return false, "android", nil
|
||||||
}
|
}
|
||||||
|
|
||||||
networkStart := time.Now()
|
networkStart := time.Now()
|
||||||
@@ -279,7 +283,7 @@ func (s *Server) tryFastPathSync(
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
log.WithContext(ctx).Debugf("fast path: account network serial lookup error: %v", err)
|
log.WithContext(ctx).Debugf("fast path: account network serial lookup error: %v", err)
|
||||||
s.recordFastPathSkip(ctx, "account_network_error")
|
s.recordFastPathSkip(ctx, "account_network_error")
|
||||||
return false, nil
|
return false, "account_network_error", nil
|
||||||
}
|
}
|
||||||
log.WithContext(ctx).Debugf("fast path: initial GetAccountNetworkSerial took %s", time.Since(networkStart))
|
log.WithContext(ctx).Debugf("fast path: initial GetAccountNetworkSerial took %s", time.Since(networkStart))
|
||||||
|
|
||||||
@@ -288,7 +292,7 @@ func (s *Server) tryFastPathSync(
|
|||||||
if reason := fastPathSkipReason(hit, cached, currentSerial, peerMetaHash); reason != "" {
|
if reason := fastPathSkipReason(hit, cached, currentSerial, peerMetaHash); reason != "" {
|
||||||
log.WithContext(ctx).Debugf("fast path: eligibility check took %s", time.Since(eligibilityStart))
|
log.WithContext(ctx).Debugf("fast path: eligibility check took %s", time.Since(eligibilityStart))
|
||||||
s.recordFastPathSkip(ctx, reason)
|
s.recordFastPathSkip(ctx, reason)
|
||||||
return false, nil
|
return false, reason, nil
|
||||||
}
|
}
|
||||||
log.WithContext(ctx).Debugf("fast path: eligibility check (hit) took %s", time.Since(eligibilityStart))
|
log.WithContext(ctx).Debugf("fast path: eligibility check (hit) took %s", time.Since(eligibilityStart))
|
||||||
|
|
||||||
@@ -299,7 +303,7 @@ func (s *Server) tryFastPathSync(
|
|||||||
peer, updates, committed := s.commitFastPath(ctx, accountID, peerKey, realIP, syncStart, cachedPeer)
|
peer, updates, committed := s.commitFastPath(ctx, accountID, peerKey, realIP, syncStart, cachedPeer)
|
||||||
if !committed {
|
if !committed {
|
||||||
s.recordFastPathSkip(ctx, "commit_failed")
|
s.recordFastPathSkip(ctx, "commit_failed")
|
||||||
return false, nil
|
return false, "commit_failed", nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Upgrade the cache only when we had to fetch the peer from the store
|
// Upgrade the cache only when we had to fetch the peer from the store
|
||||||
@@ -309,7 +313,7 @@ func (s *Server) tryFastPathSync(
|
|||||||
// entry is already in the full shape, so there's nothing to upgrade.
|
// entry is already in the full shape, so there's nothing to upgrade.
|
||||||
upgradeCache := cachedPeer == nil
|
upgradeCache := cachedPeer == nil
|
||||||
|
|
||||||
return true, s.runFastPathSync(ctx, reqStart, syncStart, accountID, peerKey, peer, updates, cached.Serial, peerMetaHash, upgradeCache, srv, unlock)
|
return true, "", s.runFastPathSync(ctx, reqStart, syncStart, accountID, peerKey, peer, updates, cached.Serial, peerMetaHash, upgradeCache, srv, unlock)
|
||||||
}
|
}
|
||||||
|
|
||||||
// commitFastPath subscribes the peer to network-map updates and marks it
|
// commitFastPath subscribes the peer to network-map updates and marks it
|
||||||
|
|||||||
Reference in New Issue
Block a user