mirror of
https://github.com/netbirdio/netbird.git
synced 2026-04-17 15:56:39 +00:00
debug dns
This commit is contained in:
@@ -388,12 +388,12 @@ func generateDebugBundle(config *profilemanager.Config, recorder *peer.Status, c
|
||||
}
|
||||
|
||||
func init() {
|
||||
debugBundleCmd.Flags().Uint32VarP(&logFileCount, "log-file-count", "C", 1, "Number of rotated log files to include in debug bundle")
|
||||
debugBundleCmd.Flags().Uint32VarP(&logFileCount, "log-file-count", "C", 10, "Number of rotated log files to include in debug bundle")
|
||||
debugBundleCmd.Flags().BoolVarP(&systemInfoFlag, "system-info", "S", true, "Adds system information to the debug bundle")
|
||||
debugBundleCmd.Flags().BoolVarP(&uploadBundleFlag, "upload-bundle", "U", false, "Uploads the debug bundle to a server")
|
||||
debugBundleCmd.Flags().StringVar(&uploadBundleURLFlag, "upload-bundle-url", types.DefaultBundleURL, "Service URL to get an URL to upload the debug bundle")
|
||||
|
||||
forCmd.Flags().Uint32VarP(&logFileCount, "log-file-count", "C", 1, "Number of rotated log files to include in debug bundle")
|
||||
forCmd.Flags().Uint32VarP(&logFileCount, "log-file-count", "C", 10, "Number of rotated log files to include in debug bundle")
|
||||
forCmd.Flags().BoolVarP(&systemInfoFlag, "system-info", "S", true, "Adds system information to the debug bundle")
|
||||
forCmd.Flags().BoolVarP(&uploadBundleFlag, "upload-bundle", "U", false, "Uploads the debug bundle to a server")
|
||||
forCmd.Flags().StringVar(&uploadBundleURLFlag, "upload-bundle-url", types.DefaultBundleURL, "Service URL to get an URL to upload the debug bundle")
|
||||
|
||||
@@ -46,6 +46,12 @@ type DNSForwarder struct {
|
||||
fwdEntries []*ForwarderEntry
|
||||
firewall firewaller
|
||||
resolver resolver
|
||||
|
||||
// failure rate tracking for routed domains
|
||||
failureMu sync.Mutex
|
||||
failureCounts map[string]int
|
||||
failureWindow time.Duration
|
||||
lastLogPerHost map[string]time.Time
|
||||
}
|
||||
|
||||
func NewDNSForwarder(listenAddress string, ttl uint32, firewall firewaller, statusRecorder *peer.Status) *DNSForwarder {
|
||||
@@ -56,6 +62,9 @@ func NewDNSForwarder(listenAddress string, ttl uint32, firewall firewaller, stat
|
||||
firewall: firewall,
|
||||
statusRecorder: statusRecorder,
|
||||
resolver: net.DefaultResolver,
|
||||
failureCounts: make(map[string]int),
|
||||
failureWindow: 10 * time.Second,
|
||||
lastLogPerHost: make(map[string]time.Time),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -306,6 +315,11 @@ func (f *DNSForwarder) handleDNSError(ctx context.Context, w dns.ResponseWriter,
|
||||
if err := w.WriteMsg(resp); err != nil {
|
||||
log.Errorf("failed to write failure DNS response: %v", err)
|
||||
}
|
||||
|
||||
// Track failure rate for routed domains only
|
||||
if resID, _ := f.getMatchingEntries(strings.TrimSuffix(domain, ".")); resID != "" {
|
||||
f.recordDomainFailure(strings.TrimSuffix(domain, "."))
|
||||
}
|
||||
}
|
||||
|
||||
// addIPsToResponse adds IP addresses to the DNS response as appropriate A or AAAA records
|
||||
@@ -341,6 +355,27 @@ func (f *DNSForwarder) addIPsToResponse(resp *dns.Msg, domain string, ips []neti
|
||||
}
|
||||
}
|
||||
|
||||
// recordDomainFailure increments failure count for the domain and logs at info/warn with throttling.
|
||||
func (f *DNSForwarder) recordDomainFailure(domain string) {
|
||||
domain = strings.ToLower(domain)
|
||||
|
||||
f.failureMu.Lock()
|
||||
defer f.failureMu.Unlock()
|
||||
|
||||
f.failureCounts[domain]++
|
||||
count := f.failureCounts[domain]
|
||||
|
||||
now := time.Now()
|
||||
last, ok := f.lastLogPerHost[domain]
|
||||
if ok && now.Sub(last) < f.failureWindow {
|
||||
return
|
||||
}
|
||||
f.lastLogPerHost[domain] = now
|
||||
|
||||
log.Warnf("[d] DNS failures observed for routed domain: domain=%s failures=%d/%s", domain, count, f.failureWindow)
|
||||
|
||||
}
|
||||
|
||||
// getMatchingEntries retrieves the resource IDs for a given domain.
|
||||
// It returns the most specific match and all matching resource IDs.
|
||||
func (f *DNSForwarder) getMatchingEntries(domain string) (route.ResID, []*ForwarderEntry) {
|
||||
|
||||
@@ -21,9 +21,9 @@ import (
|
||||
"github.com/netbirdio/netbird/client/internal/ingressgw"
|
||||
"github.com/netbirdio/netbird/client/internal/relay"
|
||||
"github.com/netbirdio/netbird/client/proto"
|
||||
"github.com/netbirdio/netbird/route"
|
||||
"github.com/netbirdio/netbird/shared/management/domain"
|
||||
relayClient "github.com/netbirdio/netbird/shared/relay/client"
|
||||
"github.com/netbirdio/netbird/route"
|
||||
)
|
||||
|
||||
const eventQueueSize = 10
|
||||
@@ -201,6 +201,8 @@ type Status struct {
|
||||
resolvedDomainsStates map[domain.Domain]ResolvedDomainInfo
|
||||
lazyConnectionEnabled bool
|
||||
|
||||
lastDisconnectLog map[string]time.Time
|
||||
|
||||
// To reduce the number of notification invocation this bool will be true when need to call the notification
|
||||
// Some Peer actions mostly used by in a batch when the network map has been synchronized. In these type of events
|
||||
// set to true this variable and at the end of the processing we will reset it by the FinishPeerListModifications()
|
||||
@@ -229,6 +231,7 @@ func NewRecorder(mgmAddress string) *Status {
|
||||
notifier: newNotifier(),
|
||||
mgmAddress: mgmAddress,
|
||||
resolvedDomainsStates: map[domain.Domain]ResolvedDomainInfo{},
|
||||
lastDisconnectLog: make(map[string]time.Time),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -487,6 +490,9 @@ func (d *Status) UpdatePeerRelayedStateToDisconnected(receivedState State) error
|
||||
|
||||
d.peers[receivedState.PubKey] = peerState
|
||||
|
||||
// info log about disconnect with impacted routes (throttled)
|
||||
d.logPeerDisconnectIfNeeded(receivedState.PubKey, peerState)
|
||||
|
||||
if hasConnStatusChanged(oldState, receivedState.ConnStatus) {
|
||||
d.notifyPeerListChanged()
|
||||
}
|
||||
@@ -519,6 +525,9 @@ func (d *Status) UpdatePeerICEStateToDisconnected(receivedState State) error {
|
||||
|
||||
d.peers[receivedState.PubKey] = peerState
|
||||
|
||||
// info log about disconnect with impacted routes (throttled)
|
||||
d.logPeerDisconnectIfNeeded(receivedState.PubKey, peerState)
|
||||
|
||||
if hasConnStatusChanged(oldState, receivedState.ConnStatus) {
|
||||
d.notifyPeerListChanged()
|
||||
}
|
||||
@@ -529,6 +538,31 @@ func (d *Status) UpdatePeerICEStateToDisconnected(receivedState State) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// logPeerDisconnectIfNeeded logs an info message when a routing peer transitions to disconnected
|
||||
// with the number of impacted routes. Throttled to once per peer per 30 seconds.
|
||||
func (d *Status) logPeerDisconnectIfNeeded(pubKey string, state State) {
|
||||
if state.ConnStatus != StatusIdle {
|
||||
return
|
||||
}
|
||||
|
||||
now := time.Now()
|
||||
last, ok := d.lastDisconnectLog[pubKey]
|
||||
if ok && now.Sub(last) < 10*time.Second {
|
||||
return
|
||||
}
|
||||
d.lastDisconnectLog[pubKey] = now
|
||||
|
||||
routes := state.GetRoutes()
|
||||
numRoutes := len(routes)
|
||||
|
||||
fqdn := state.FQDN
|
||||
if fqdn == "" {
|
||||
fqdn = pubKey
|
||||
}
|
||||
|
||||
log.Warnf("[d] Routing peer disconnected: peer=%s impacted_routes=%d", fqdn, numRoutes)
|
||||
}
|
||||
|
||||
// UpdateWireGuardPeerState updates the WireGuard bits of the peer state
|
||||
func (d *Status) UpdateWireGuardPeerState(pubKey string, wgStats configurer.WGStats) error {
|
||||
d.mux.Lock()
|
||||
|
||||
@@ -14,7 +14,7 @@ import (
|
||||
"github.com/netbirdio/netbird/formatter"
|
||||
)
|
||||
|
||||
const defaultLogSize = 15
|
||||
const defaultLogSize = 100
|
||||
|
||||
const (
|
||||
LogConsole = "console"
|
||||
|
||||
Reference in New Issue
Block a user