mirror of
https://github.com/netbirdio/netbird.git
synced 2026-04-16 15:26:40 +00:00
Add DNS configuration snapshot and per-domain statistics tracking
This commit is contained in:
@@ -315,6 +315,10 @@ func (g *BundleGenerator) createArchive() error {
|
||||
return fmt.Errorf("add sync response: %w", err)
|
||||
}
|
||||
|
||||
if err := g.addDNSConfig(); err != nil {
|
||||
log.Errorf("failed to add DNS config to debug bundle: %v", err)
|
||||
}
|
||||
|
||||
if err := g.addStateFile(); err != nil {
|
||||
log.Errorf("failed to add state file to debug bundle: %v", err)
|
||||
}
|
||||
@@ -341,6 +345,50 @@ func (g *BundleGenerator) createArchive() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// addDNSConfig writes a dns_config.json snapshot with routed domains and NS group status
|
||||
func (g *BundleGenerator) addDNSConfig() error {
|
||||
type nsGroup struct {
|
||||
ID string `json:"id"`
|
||||
Servers []string `json:"servers"`
|
||||
Domains []string `json:"domains"`
|
||||
Enabled bool `json:"enabled"`
|
||||
Error string `json:"error,omitempty"`
|
||||
}
|
||||
type dnsConfig struct {
|
||||
Groups []nsGroup `json:"name_server_groups"`
|
||||
}
|
||||
|
||||
if g.statusRecorder == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
states := g.statusRecorder.GetDNSStates()
|
||||
cfg := dnsConfig{Groups: make([]nsGroup, 0, len(states))}
|
||||
for _, st := range states {
|
||||
var servers []string
|
||||
for _, ap := range st.Servers {
|
||||
servers = append(servers, ap.String())
|
||||
}
|
||||
var errStr string
|
||||
if st.Error != nil {
|
||||
errStr = st.Error.Error()
|
||||
}
|
||||
cfg.Groups = append(cfg.Groups, nsGroup{
|
||||
ID: st.ID,
|
||||
Servers: servers,
|
||||
Domains: st.Domains,
|
||||
Enabled: st.Enabled,
|
||||
Error: errStr,
|
||||
})
|
||||
}
|
||||
|
||||
bs, err := json.MarshalIndent(cfg, "", " ")
|
||||
if err != nil {
|
||||
return fmt.Errorf("marshal dns config: %w", err)
|
||||
}
|
||||
return g.addFileToZip(bytes.NewReader(bs), "dns_config.json")
|
||||
}
|
||||
|
||||
func (g *BundleGenerator) addSystemInfo() {
|
||||
if err := g.addRoutes(); err != nil {
|
||||
log.Errorf("failed to add routes to debug bundle: %v", err)
|
||||
|
||||
@@ -52,6 +52,12 @@ type DNSForwarder struct {
|
||||
failureCounts map[string]int
|
||||
failureWindow time.Duration
|
||||
lastLogPerHost map[string]time.Time
|
||||
|
||||
// per-domain rolling stats and windows
|
||||
statsMu sync.Mutex
|
||||
stats map[string]*domainStats
|
||||
winSize time.Duration
|
||||
slowT time.Duration
|
||||
}
|
||||
|
||||
func NewDNSForwarder(listenAddress string, ttl uint32, firewall firewaller, statusRecorder *peer.Status) *DNSForwarder {
|
||||
@@ -65,9 +71,22 @@ func NewDNSForwarder(listenAddress string, ttl uint32, firewall firewaller, stat
|
||||
failureCounts: make(map[string]int),
|
||||
failureWindow: 10 * time.Second,
|
||||
lastLogPerHost: make(map[string]time.Time),
|
||||
stats: make(map[string]*domainStats),
|
||||
winSize: 10 * time.Second,
|
||||
slowT: 300 * time.Millisecond,
|
||||
}
|
||||
}
|
||||
|
||||
type domainStats struct {
|
||||
total int
|
||||
success int
|
||||
timeouts int
|
||||
notfound int
|
||||
failures int // other failures (incl. SERVFAIL-like)
|
||||
slow int
|
||||
lastLog time.Time
|
||||
}
|
||||
|
||||
func (f *DNSForwarder) Listen(entries []*ForwarderEntry) error {
|
||||
log.Infof("starting DNS forwarder on address=%s", f.listenAddress)
|
||||
|
||||
@@ -172,12 +191,19 @@ func (f *DNSForwarder) handleDNSQuery(w dns.ResponseWriter, query *dns.Msg) *dns
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), upstreamTimeout)
|
||||
defer cancel()
|
||||
start := time.Now()
|
||||
ips, err := f.resolver.LookupNetIP(ctx, network, domain)
|
||||
elapsed := time.Since(start)
|
||||
if err != nil {
|
||||
f.handleDNSError(ctx, w, question, resp, domain, err)
|
||||
// record error stats for routed domains
|
||||
f.recordErrorStats(strings.TrimSuffix(domain, "."), err)
|
||||
return nil
|
||||
}
|
||||
|
||||
// record success timing
|
||||
f.recordSuccessStats(strings.TrimSuffix(domain, "."), elapsed)
|
||||
|
||||
f.updateInternalState(ips, mostSpecificResId, matchingEntries)
|
||||
f.addIPsToResponse(resp, domain, ips)
|
||||
|
||||
@@ -322,6 +348,86 @@ func (f *DNSForwarder) handleDNSError(ctx context.Context, w dns.ResponseWriter,
|
||||
}
|
||||
}
|
||||
|
||||
// recordErrorStats updates per-domain counters and emits rate-limited logs
|
||||
func (f *DNSForwarder) recordErrorStats(domain string, err error) {
|
||||
domain = strings.ToLower(domain)
|
||||
f.statsMu.Lock()
|
||||
s := f.ensureStats(domain)
|
||||
s.total++
|
||||
|
||||
var dnsErr *net.DNSError
|
||||
if errors.As(err, &dnsErr) {
|
||||
if dnsErr.IsNotFound {
|
||||
s.notfound++
|
||||
} else if dnsErr.Timeout() {
|
||||
s.timeouts++
|
||||
} else {
|
||||
s.failures++
|
||||
}
|
||||
} else {
|
||||
s.failures++
|
||||
}
|
||||
|
||||
f.maybeLogDomainStats(domain, s)
|
||||
f.statsMu.Unlock()
|
||||
}
|
||||
|
||||
// recordSuccessStats updates per-domain latency stats and slow counters, logs if needed (rate-limited)
|
||||
func (f *DNSForwarder) recordSuccessStats(domain string, elapsed time.Duration) {
|
||||
domain = strings.ToLower(domain)
|
||||
f.statsMu.Lock()
|
||||
s := f.ensureStats(domain)
|
||||
s.total++
|
||||
s.success++
|
||||
if elapsed >= f.slowT {
|
||||
s.slow++
|
||||
}
|
||||
f.maybeLogDomainStats(domain, s)
|
||||
f.statsMu.Unlock()
|
||||
}
|
||||
|
||||
func (f *DNSForwarder) ensureStats(domain string) *domainStats {
|
||||
if ds, ok := f.stats[domain]; ok {
|
||||
return ds
|
||||
}
|
||||
ds := &domainStats{}
|
||||
f.stats[domain] = ds
|
||||
return ds
|
||||
}
|
||||
|
||||
// maybeLogDomainStats logs a compact summary per routed domain at most once per window
|
||||
func (f *DNSForwarder) maybeLogDomainStats(domain string, s *domainStats) {
|
||||
now := time.Now()
|
||||
if !s.lastLog.IsZero() && now.Sub(s.lastLog) < f.winSize {
|
||||
return
|
||||
}
|
||||
|
||||
// check if routed (avoid logging for non-routed domains)
|
||||
if resID, _ := f.getMatchingEntries(domain); resID == "" {
|
||||
return
|
||||
}
|
||||
|
||||
// only log if something noteworthy happened in the window
|
||||
noteworthy := s.timeouts > 0 || s.notfound > 0 || s.failures > 0 || s.slow > 0
|
||||
if !noteworthy {
|
||||
s.lastLog = now
|
||||
return
|
||||
}
|
||||
|
||||
// warn on persistent problems, info otherwise
|
||||
levelWarn := s.timeouts >= 3 || s.failures >= 3
|
||||
if levelWarn {
|
||||
log.Warnf("[d] DNS stats: domain=%s total=%d ok=%d timeout=%d nxdomain=%d fail=%d slow=%d(>=%s)",
|
||||
domain, s.total, s.success, s.timeouts, s.notfound, s.failures, s.slow, f.slowT)
|
||||
} else {
|
||||
log.Infof("[d] DNS stats: domain=%s total=%d ok=%d timeout=%d nxdomain=%d fail=%d slow=%d(>=%s)",
|
||||
domain, s.total, s.success, s.timeouts, s.notfound, s.failures, s.slow, f.slowT)
|
||||
}
|
||||
|
||||
// reset counters for next window
|
||||
*s = domainStats{lastLog: now}
|
||||
}
|
||||
|
||||
// addIPsToResponse adds IP addresses to the DNS response as appropriate A or AAAA records
|
||||
func (f *DNSForwarder) addIPsToResponse(resp *dns.Msg, domain string, ips []netip.Addr) {
|
||||
for _, ip := range ips {
|
||||
|
||||
Reference in New Issue
Block a user