diff --git a/client/internal/debug/debug.go b/client/internal/debug/debug.go index ec920c5f3..0ba65493e 100644 --- a/client/internal/debug/debug.go +++ b/client/internal/debug/debug.go @@ -315,6 +315,10 @@ func (g *BundleGenerator) createArchive() error { return fmt.Errorf("add sync response: %w", err) } + if err := g.addDNSConfig(); err != nil { + log.Errorf("failed to add DNS config to debug bundle: %v", err) + } + if err := g.addStateFile(); err != nil { log.Errorf("failed to add state file to debug bundle: %v", err) } @@ -341,6 +345,50 @@ func (g *BundleGenerator) createArchive() error { return nil } +// addDNSConfig writes a dns_config.json snapshot with routed domains and NS group status +func (g *BundleGenerator) addDNSConfig() error { + type nsGroup struct { + ID string `json:"id"` + Servers []string `json:"servers"` + Domains []string `json:"domains"` + Enabled bool `json:"enabled"` + Error string `json:"error,omitempty"` + } + type dnsConfig struct { + Groups []nsGroup `json:"name_server_groups"` + } + + if g.statusRecorder == nil { + return nil + } + + states := g.statusRecorder.GetDNSStates() + cfg := dnsConfig{Groups: make([]nsGroup, 0, len(states))} + for _, st := range states { + var servers []string + for _, ap := range st.Servers { + servers = append(servers, ap.String()) + } + var errStr string + if st.Error != nil { + errStr = st.Error.Error() + } + cfg.Groups = append(cfg.Groups, nsGroup{ + ID: st.ID, + Servers: servers, + Domains: st.Domains, + Enabled: st.Enabled, + Error: errStr, + }) + } + + bs, err := json.MarshalIndent(cfg, "", " ") + if err != nil { + return fmt.Errorf("marshal dns config: %w", err) + } + return g.addFileToZip(bytes.NewReader(bs), "dns_config.json") +} + func (g *BundleGenerator) addSystemInfo() { if err := g.addRoutes(); err != nil { log.Errorf("failed to add routes to debug bundle: %v", err) diff --git a/client/internal/dnsfwd/forwarder.go b/client/internal/dnsfwd/forwarder.go index 86f13ad1a..28392e1bb 100644 --- a/client/internal/dnsfwd/forwarder.go +++ b/client/internal/dnsfwd/forwarder.go @@ -52,6 +52,12 @@ type DNSForwarder struct { failureCounts map[string]int failureWindow time.Duration lastLogPerHost map[string]time.Time + + // per-domain rolling stats and windows + statsMu sync.Mutex + stats map[string]*domainStats + winSize time.Duration + slowT time.Duration } func NewDNSForwarder(listenAddress string, ttl uint32, firewall firewaller, statusRecorder *peer.Status) *DNSForwarder { @@ -65,9 +71,22 @@ func NewDNSForwarder(listenAddress string, ttl uint32, firewall firewaller, stat failureCounts: make(map[string]int), failureWindow: 10 * time.Second, lastLogPerHost: make(map[string]time.Time), + stats: make(map[string]*domainStats), + winSize: 10 * time.Second, + slowT: 300 * time.Millisecond, } } +type domainStats struct { + total int + success int + timeouts int + notfound int + failures int // other failures (incl. SERVFAIL-like) + slow int + lastLog time.Time +} + func (f *DNSForwarder) Listen(entries []*ForwarderEntry) error { log.Infof("starting DNS forwarder on address=%s", f.listenAddress) @@ -172,12 +191,19 @@ func (f *DNSForwarder) handleDNSQuery(w dns.ResponseWriter, query *dns.Msg) *dns ctx, cancel := context.WithTimeout(context.Background(), upstreamTimeout) defer cancel() + start := time.Now() ips, err := f.resolver.LookupNetIP(ctx, network, domain) + elapsed := time.Since(start) if err != nil { f.handleDNSError(ctx, w, question, resp, domain, err) + // record error stats for routed domains + f.recordErrorStats(strings.TrimSuffix(domain, "."), err) return nil } + // record success timing + f.recordSuccessStats(strings.TrimSuffix(domain, "."), elapsed) + f.updateInternalState(ips, mostSpecificResId, matchingEntries) f.addIPsToResponse(resp, domain, ips) @@ -322,6 +348,86 @@ func (f *DNSForwarder) handleDNSError(ctx context.Context, w dns.ResponseWriter, } } +// recordErrorStats updates per-domain counters and emits rate-limited logs +func (f *DNSForwarder) recordErrorStats(domain string, err error) { + domain = strings.ToLower(domain) + f.statsMu.Lock() + s := f.ensureStats(domain) + s.total++ + + var dnsErr *net.DNSError + if errors.As(err, &dnsErr) { + if dnsErr.IsNotFound { + s.notfound++ + } else if dnsErr.Timeout() { + s.timeouts++ + } else { + s.failures++ + } + } else { + s.failures++ + } + + f.maybeLogDomainStats(domain, s) + f.statsMu.Unlock() +} + +// recordSuccessStats updates per-domain latency stats and slow counters, logs if needed (rate-limited) +func (f *DNSForwarder) recordSuccessStats(domain string, elapsed time.Duration) { + domain = strings.ToLower(domain) + f.statsMu.Lock() + s := f.ensureStats(domain) + s.total++ + s.success++ + if elapsed >= f.slowT { + s.slow++ + } + f.maybeLogDomainStats(domain, s) + f.statsMu.Unlock() +} + +func (f *DNSForwarder) ensureStats(domain string) *domainStats { + if ds, ok := f.stats[domain]; ok { + return ds + } + ds := &domainStats{} + f.stats[domain] = ds + return ds +} + +// maybeLogDomainStats logs a compact summary per routed domain at most once per window +func (f *DNSForwarder) maybeLogDomainStats(domain string, s *domainStats) { + now := time.Now() + if !s.lastLog.IsZero() && now.Sub(s.lastLog) < f.winSize { + return + } + + // check if routed (avoid logging for non-routed domains) + if resID, _ := f.getMatchingEntries(domain); resID == "" { + return + } + + // only log if something noteworthy happened in the window + noteworthy := s.timeouts > 0 || s.notfound > 0 || s.failures > 0 || s.slow > 0 + if !noteworthy { + s.lastLog = now + return + } + + // warn on persistent problems, info otherwise + levelWarn := s.timeouts >= 3 || s.failures >= 3 + if levelWarn { + log.Warnf("[d] DNS stats: domain=%s total=%d ok=%d timeout=%d nxdomain=%d fail=%d slow=%d(>=%s)", + domain, s.total, s.success, s.timeouts, s.notfound, s.failures, s.slow, f.slowT) + } else { + log.Infof("[d] DNS stats: domain=%s total=%d ok=%d timeout=%d nxdomain=%d fail=%d slow=%d(>=%s)", + domain, s.total, s.success, s.timeouts, s.notfound, s.failures, s.slow, f.slowT) + } + + // reset counters for next window + *s = domainStats{lastLog: now} +} + // addIPsToResponse adds IP addresses to the DNS response as appropriate A or AAAA records func (f *DNSForwarder) addIPsToResponse(resp *dns.Msg, domain string, ips []netip.Addr) { for _, ip := range ips {