Add DNS configuration snapshot and per-domain statistics tracking

This commit is contained in:
Hakan Sariman
2025-09-09 17:09:13 +07:00
parent c2822eebb0
commit 2db23a42dc
2 changed files with 154 additions and 0 deletions

View File

@@ -315,6 +315,10 @@ func (g *BundleGenerator) createArchive() error {
return fmt.Errorf("add sync response: %w", err)
}
if err := g.addDNSConfig(); err != nil {
log.Errorf("failed to add DNS config to debug bundle: %v", err)
}
if err := g.addStateFile(); err != nil {
log.Errorf("failed to add state file to debug bundle: %v", err)
}
@@ -341,6 +345,50 @@ func (g *BundleGenerator) createArchive() error {
return nil
}
// addDNSConfig writes a dns_config.json snapshot with routed domains and NS group status
func (g *BundleGenerator) addDNSConfig() error {
type nsGroup struct {
ID string `json:"id"`
Servers []string `json:"servers"`
Domains []string `json:"domains"`
Enabled bool `json:"enabled"`
Error string `json:"error,omitempty"`
}
type dnsConfig struct {
Groups []nsGroup `json:"name_server_groups"`
}
if g.statusRecorder == nil {
return nil
}
states := g.statusRecorder.GetDNSStates()
cfg := dnsConfig{Groups: make([]nsGroup, 0, len(states))}
for _, st := range states {
var servers []string
for _, ap := range st.Servers {
servers = append(servers, ap.String())
}
var errStr string
if st.Error != nil {
errStr = st.Error.Error()
}
cfg.Groups = append(cfg.Groups, nsGroup{
ID: st.ID,
Servers: servers,
Domains: st.Domains,
Enabled: st.Enabled,
Error: errStr,
})
}
bs, err := json.MarshalIndent(cfg, "", " ")
if err != nil {
return fmt.Errorf("marshal dns config: %w", err)
}
return g.addFileToZip(bytes.NewReader(bs), "dns_config.json")
}
func (g *BundleGenerator) addSystemInfo() {
if err := g.addRoutes(); err != nil {
log.Errorf("failed to add routes to debug bundle: %v", err)

View File

@@ -52,6 +52,12 @@ type DNSForwarder struct {
failureCounts map[string]int
failureWindow time.Duration
lastLogPerHost map[string]time.Time
// per-domain rolling stats and windows
statsMu sync.Mutex
stats map[string]*domainStats
winSize time.Duration
slowT time.Duration
}
func NewDNSForwarder(listenAddress string, ttl uint32, firewall firewaller, statusRecorder *peer.Status) *DNSForwarder {
@@ -65,9 +71,22 @@ func NewDNSForwarder(listenAddress string, ttl uint32, firewall firewaller, stat
failureCounts: make(map[string]int),
failureWindow: 10 * time.Second,
lastLogPerHost: make(map[string]time.Time),
stats: make(map[string]*domainStats),
winSize: 10 * time.Second,
slowT: 300 * time.Millisecond,
}
}
type domainStats struct {
total int
success int
timeouts int
notfound int
failures int // other failures (incl. SERVFAIL-like)
slow int
lastLog time.Time
}
func (f *DNSForwarder) Listen(entries []*ForwarderEntry) error {
log.Infof("starting DNS forwarder on address=%s", f.listenAddress)
@@ -172,12 +191,19 @@ func (f *DNSForwarder) handleDNSQuery(w dns.ResponseWriter, query *dns.Msg) *dns
ctx, cancel := context.WithTimeout(context.Background(), upstreamTimeout)
defer cancel()
start := time.Now()
ips, err := f.resolver.LookupNetIP(ctx, network, domain)
elapsed := time.Since(start)
if err != nil {
f.handleDNSError(ctx, w, question, resp, domain, err)
// record error stats for routed domains
f.recordErrorStats(strings.TrimSuffix(domain, "."), err)
return nil
}
// record success timing
f.recordSuccessStats(strings.TrimSuffix(domain, "."), elapsed)
f.updateInternalState(ips, mostSpecificResId, matchingEntries)
f.addIPsToResponse(resp, domain, ips)
@@ -322,6 +348,86 @@ func (f *DNSForwarder) handleDNSError(ctx context.Context, w dns.ResponseWriter,
}
}
// recordErrorStats updates per-domain counters and emits rate-limited logs
func (f *DNSForwarder) recordErrorStats(domain string, err error) {
domain = strings.ToLower(domain)
f.statsMu.Lock()
s := f.ensureStats(domain)
s.total++
var dnsErr *net.DNSError
if errors.As(err, &dnsErr) {
if dnsErr.IsNotFound {
s.notfound++
} else if dnsErr.Timeout() {
s.timeouts++
} else {
s.failures++
}
} else {
s.failures++
}
f.maybeLogDomainStats(domain, s)
f.statsMu.Unlock()
}
// recordSuccessStats updates per-domain latency stats and slow counters, logs if needed (rate-limited)
func (f *DNSForwarder) recordSuccessStats(domain string, elapsed time.Duration) {
domain = strings.ToLower(domain)
f.statsMu.Lock()
s := f.ensureStats(domain)
s.total++
s.success++
if elapsed >= f.slowT {
s.slow++
}
f.maybeLogDomainStats(domain, s)
f.statsMu.Unlock()
}
func (f *DNSForwarder) ensureStats(domain string) *domainStats {
if ds, ok := f.stats[domain]; ok {
return ds
}
ds := &domainStats{}
f.stats[domain] = ds
return ds
}
// maybeLogDomainStats logs a compact summary per routed domain at most once per window
func (f *DNSForwarder) maybeLogDomainStats(domain string, s *domainStats) {
now := time.Now()
if !s.lastLog.IsZero() && now.Sub(s.lastLog) < f.winSize {
return
}
// check if routed (avoid logging for non-routed domains)
if resID, _ := f.getMatchingEntries(domain); resID == "" {
return
}
// only log if something noteworthy happened in the window
noteworthy := s.timeouts > 0 || s.notfound > 0 || s.failures > 0 || s.slow > 0
if !noteworthy {
s.lastLog = now
return
}
// warn on persistent problems, info otherwise
levelWarn := s.timeouts >= 3 || s.failures >= 3
if levelWarn {
log.Warnf("[d] DNS stats: domain=%s total=%d ok=%d timeout=%d nxdomain=%d fail=%d slow=%d(>=%s)",
domain, s.total, s.success, s.timeouts, s.notfound, s.failures, s.slow, f.slowT)
} else {
log.Infof("[d] DNS stats: domain=%s total=%d ok=%d timeout=%d nxdomain=%d fail=%d slow=%d(>=%s)",
domain, s.total, s.success, s.timeouts, s.notfound, s.failures, s.slow, f.slowT)
}
// reset counters for next window
*s = domainStats{lastLog: now}
}
// addIPsToResponse adds IP addresses to the DNS response as appropriate A or AAAA records
func (f *DNSForwarder) addIPsToResponse(resp *dns.Msg, domain string, ips []netip.Addr) {
for _, ip := range ips {