diff --git a/.github/workflows/golangci-lint.yml b/.github/workflows/golangci-lint.yml index 62dfe9bce..7b7b32ec0 100644 --- a/.github/workflows/golangci-lint.yml +++ b/.github/workflows/golangci-lint.yml @@ -19,7 +19,7 @@ jobs: - name: codespell uses: codespell-project/actions-codespell@v2 with: - ignore_words_list: erro,clienta,hastable,iif,groupd,testin,groupe,cros,ans,deriver,te,userA + ignore_words_list: erro,clienta,hastable,iif,groupd,testin,groupe,cros,ans,deriver,te,userA,ede,additionals skip: go.mod,go.sum,**/proxy/web/** golangci: strategy: diff --git a/client/internal/dns/upstream.go b/client/internal/dns/upstream.go index a26536f6e..39064f26c 100644 --- a/client/internal/dns/upstream.go +++ b/client/internal/dns/upstream.go @@ -30,6 +30,27 @@ import ( var currentMTU uint16 = iface.DefaultMTU +// nonRetryableEDECodes lists EDE info codes (RFC 8914) for which a SERVFAIL +// from one upstream means another upstream would return the same answer: +// DNSSEC validation outcomes and policy-based blocks. Transient errors +// (network, cached, not ready) are not included. +var nonRetryableEDECodes = map[uint16]struct{}{ + dns.ExtendedErrorCodeUnsupportedDNSKEYAlgorithm: {}, + dns.ExtendedErrorCodeUnsupportedDSDigestType: {}, + dns.ExtendedErrorCodeDNSSECIndeterminate: {}, + dns.ExtendedErrorCodeDNSBogus: {}, + dns.ExtendedErrorCodeSignatureExpired: {}, + dns.ExtendedErrorCodeSignatureNotYetValid: {}, + dns.ExtendedErrorCodeDNSKEYMissing: {}, + dns.ExtendedErrorCodeRRSIGsMissing: {}, + dns.ExtendedErrorCodeNoZoneKeyBitSet: {}, + dns.ExtendedErrorCodeNSECMissing: {}, + dns.ExtendedErrorCodeBlocked: {}, + dns.ExtendedErrorCodeCensored: {}, + dns.ExtendedErrorCodeFiltered: {}, + dns.ExtendedErrorCodeProhibited: {}, +} + // privateClientIface is the subset of the WireGuard interface needed by GetClientPrivate. type privateClientIface interface { Name() string @@ -250,6 +271,18 @@ func (u *upstreamResolverBase) queryUpstream(parentCtx context.Context, w dns.Re var t time.Duration var err error + // Advertise EDNS0 so the upstream may include Extended DNS Errors + // (RFC 8914) in failure responses; we use those to short-circuit + // failover for definitive answers like DNSSEC validation failures. + // Operate on a copy so the inbound request is unchanged: a client that + // did not advertise EDNS0 must not see an OPT in the response. + hadEdns := r.IsEdns0() != nil + reqUp := r + if !hadEdns { + reqUp = r.Copy() + reqUp.SetEdns0(upstreamUDPSize(), false) + } + var startTime time.Time var upstreamProto *upstreamProtocolResult func() { @@ -257,7 +290,7 @@ func (u *upstreamResolverBase) queryUpstream(parentCtx context.Context, w dns.Re defer cancel() ctx, upstreamProto = contextWithupstreamProtocolResult(ctx) startTime = time.Now() - rm, t, err = u.upstreamClient.exchange(ctx, upstream.String(), r) + rm, t, err = u.upstreamClient.exchange(ctx, upstream.String(), reqUp) }() if err != nil { @@ -269,13 +302,49 @@ func (u *upstreamResolverBase) queryUpstream(parentCtx context.Context, w dns.Re } if rm.Rcode == dns.RcodeServerFailure || rm.Rcode == dns.RcodeRefused { + if code, ok := nonRetryableEDE(rm); ok { + resutil.SetMeta(w, "ede", edeName(code)) + if !hadEdns { + stripOPT(rm) + } + u.writeSuccessResponse(w, rm, upstream, r.Question[0].Name, t, upstreamProto, logger) + return nil + } return &upstreamFailure{upstream: upstream, reason: dns.RcodeToString[rm.Rcode]} } + if !hadEdns { + stripOPT(rm) + } u.writeSuccessResponse(w, rm, upstream, r.Question[0].Name, t, upstreamProto, logger) return nil } +// upstreamUDPSize returns the EDNS0 UDP buffer size we advertise to upstreams, +// derived from the tunnel MTU and bounded against underflow. +func upstreamUDPSize() uint16 { + if currentMTU > ipUDPHeaderSize { + return currentMTU - ipUDPHeaderSize + } + return dns.MinMsgSize +} + +// stripOPT removes any OPT pseudo-RRs from the response's Extra section so +// the response complies with RFC 6891 when the client did not advertise EDNS0. +func stripOPT(rm *dns.Msg) { + if len(rm.Extra) == 0 { + return + } + out := rm.Extra[:0] + for _, rr := range rm.Extra { + if _, ok := rr.(*dns.OPT); ok { + continue + } + out = append(out, rr) + } + rm.Extra = out +} + func (u *upstreamResolverBase) handleUpstreamError(err error, upstream netip.AddrPort, startTime time.Time) *upstreamFailure { if !errors.Is(err, context.DeadlineExceeded) && !isTimeout(err) { return &upstreamFailure{upstream: upstream, reason: err.Error()} @@ -337,6 +406,34 @@ func formatFailures(failures []upstreamFailure) string { return strings.Join(parts, ", ") } +// nonRetryableEDE returns the first non-retryable EDE code carried in the +// response, if any. +func nonRetryableEDE(rm *dns.Msg) (uint16, bool) { + opt := rm.IsEdns0() + if opt == nil { + return 0, false + } + for _, o := range opt.Option { + ede, ok := o.(*dns.EDNS0_EDE) + if !ok { + continue + } + if _, ok := nonRetryableEDECodes[ede.InfoCode]; ok { + return ede.InfoCode, true + } + } + return 0, false +} + +// edeName returns a human-readable name for an EDE code, falling back to +// the numeric code when unknown. +func edeName(code uint16) string { + if name, ok := dns.ExtendedErrorCodeToString[code]; ok { + return name + } + return fmt.Sprintf("EDE %d", code) +} + // ProbeAvailability tests all upstream servers simultaneously and // disables the resolver if none work func (u *upstreamResolverBase) ProbeAvailability(ctx context.Context) { diff --git a/client/internal/dns/upstream_test.go b/client/internal/dns/upstream_test.go index 1797fdad8..d6aec05ca 100644 --- a/client/internal/dns/upstream_test.go +++ b/client/internal/dns/upstream_test.go @@ -770,3 +770,132 @@ func TestExchangeWithFallback_TCPTruncatesToClientSize(t *testing.T) { assert.Less(t, len(rm2.Answer), 20, "small EDNS0 client should get fewer records") assert.True(t, rm2.Truncated, "response should be truncated for small buffer client") } + +func msgWithEDE(rcode int, codes ...uint16) *dns.Msg { + m := new(dns.Msg) + m.Response = true + m.Rcode = rcode + if len(codes) == 0 { + return m + } + opt := &dns.OPT{Hdr: dns.RR_Header{Name: ".", Rrtype: dns.TypeOPT}} + opt.SetUDPSize(dns.MinMsgSize) + for _, c := range codes { + opt.Option = append(opt.Option, &dns.EDNS0_EDE{InfoCode: c}) + } + m.Extra = append(m.Extra, opt) + return m +} + +func TestNonRetryableEDE(t *testing.T) { + tests := []struct { + name string + msg *dns.Msg + wantOK bool + wantCode uint16 + }{ + {name: "no edns0", msg: msgWithEDE(dns.RcodeServerFailure)}, + { + name: "opt without ede", + msg: func() *dns.Msg { + m := msgWithEDE(dns.RcodeServerFailure) + opt := &dns.OPT{Hdr: dns.RR_Header{Name: ".", Rrtype: dns.TypeOPT}} + opt.Option = append(opt.Option, &dns.EDNS0_NSID{Code: dns.EDNS0NSID}) + m.Extra = []dns.RR{opt} + return m + }(), + }, + {name: "ede dnsbogus", msg: msgWithEDE(dns.RcodeServerFailure, dns.ExtendedErrorCodeDNSBogus), wantOK: true, wantCode: dns.ExtendedErrorCodeDNSBogus}, + {name: "ede signature expired", msg: msgWithEDE(dns.RcodeServerFailure, dns.ExtendedErrorCodeSignatureExpired), wantOK: true, wantCode: dns.ExtendedErrorCodeSignatureExpired}, + {name: "ede blocked", msg: msgWithEDE(dns.RcodeServerFailure, dns.ExtendedErrorCodeBlocked), wantOK: true, wantCode: dns.ExtendedErrorCodeBlocked}, + {name: "ede prohibited", msg: msgWithEDE(dns.RcodeRefused, dns.ExtendedErrorCodeProhibited), wantOK: true, wantCode: dns.ExtendedErrorCodeProhibited}, + {name: "ede cached error retryable", msg: msgWithEDE(dns.RcodeServerFailure, dns.ExtendedErrorCodeCachedError)}, + {name: "ede network error retryable", msg: msgWithEDE(dns.RcodeServerFailure, dns.ExtendedErrorCodeNetworkError)}, + {name: "ede not ready retryable", msg: msgWithEDE(dns.RcodeServerFailure, dns.ExtendedErrorCodeNotReady)}, + { + name: "first non-retryable wins", + msg: msgWithEDE(dns.RcodeServerFailure, dns.ExtendedErrorCodeNetworkError, dns.ExtendedErrorCodeDNSBogus), + wantOK: true, + wantCode: dns.ExtendedErrorCodeDNSBogus, + }, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + code, ok := nonRetryableEDE(tc.msg) + assert.Equal(t, tc.wantOK, ok, "ok should match") + if tc.wantOK { + assert.Equal(t, tc.wantCode, code, "code should match") + } + }) + } +} + +func TestEDEName(t *testing.T) { + assert.Equal(t, "DNSSEC Bogus", edeName(dns.ExtendedErrorCodeDNSBogus)) + assert.Equal(t, "Signature Expired", edeName(dns.ExtendedErrorCodeSignatureExpired)) + assert.Equal(t, "EDE 9999", edeName(9999), "unknown code falls back to numeric") +} + +func TestStripOPT(t *testing.T) { + rm := &dns.Msg{ + Extra: []dns.RR{ + &dns.OPT{Hdr: dns.RR_Header{Name: ".", Rrtype: dns.TypeOPT}}, + &dns.A{Hdr: dns.RR_Header{Name: "x.", Rrtype: dns.TypeA}, A: net.IPv4(1, 2, 3, 4)}, + }, + } + stripOPT(rm) + assert.Len(t, rm.Extra, 1, "OPT should be removed, A kept") + _, isOPT := rm.Extra[0].(*dns.OPT) + assert.False(t, isOPT, "remaining record must not be OPT") +} + +func TestUpstreamResolver_NonRetryableEDEShortCircuits(t *testing.T) { + upstream1 := netip.MustParseAddrPort("192.0.2.1:53") + upstream2 := netip.MustParseAddrPort("192.0.2.2:53") + + servfailWithEDE := msgWithEDE(dns.RcodeServerFailure, dns.ExtendedErrorCodeDNSBogus) + successResp := buildMockResponse(dns.RcodeSuccess, "192.0.2.100") + + var queried []string + tracking := &trackingMockClient{ + inner: &mockUpstreamResolverPerServer{ + responses: map[string]mockUpstreamResponse{ + upstream1.String(): {msg: servfailWithEDE}, + upstream2.String(): {msg: successResp}, + }, + rtt: time.Millisecond, + }, + queriedUpstreams: &queried, + } + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + resolver := &upstreamResolverBase{ + ctx: ctx, + upstreamClient: tracking, + upstreamServers: []netip.AddrPort{upstream1, upstream2}, + upstreamTimeout: UpstreamTimeout, + } + + var written *dns.Msg + w := &test.MockResponseWriter{ + WriteMsgFunc: func(m *dns.Msg) error { + written = m + return nil + }, + } + + // Client query without EDNS0 must not see an OPT in the response. + q := new(dns.Msg).SetQuestion("example.com.", dns.TypeA) + resolver.ServeDNS(w, q) + + require.NotNil(t, written, "response must be written") + assert.Equal(t, dns.RcodeServerFailure, written.Rcode, "SERVFAIL must propagate") + assert.Len(t, queried, 1, "only first upstream should be queried") + assert.Equal(t, upstream1.String(), queried[0]) + for _, rr := range written.Extra { + _, isOPT := rr.(*dns.OPT) + assert.False(t, isOPT, "synthetic OPT must not leak to a non-EDNS0 client") + } +} diff --git a/go.mod b/go.mod index 84aeab941..5704887ce 100644 --- a/go.mod +++ b/go.mod @@ -72,7 +72,7 @@ require ( github.com/lrh3321/ipset-go v0.0.0-20250619021614-54a0a98ace81 github.com/mdlayher/socket v0.5.1 github.com/mdp/qrterminal/v3 v3.2.1 - github.com/miekg/dns v1.1.59 + github.com/miekg/dns v1.1.72 github.com/mitchellh/hashstructure/v2 v2.0.2 github.com/netbirdio/management-integrations/integrations v0.0.0-20260416123949-2355d972be42 github.com/netbirdio/signal-dispatcher/dispatcher v0.0.0-20250805121659-6b4ac470ca45 diff --git a/go.sum b/go.sum index 851d1ce66..42652169c 100644 --- a/go.sum +++ b/go.sum @@ -455,8 +455,8 @@ github.com/mdp/qrterminal/v3 v3.2.1 h1:6+yQjiiOsSuXT5n9/m60E54vdgFsw0zhADHhHLrFe github.com/mdp/qrterminal/v3 v3.2.1/go.mod h1:jOTmXvnBsMy5xqLniO0R++Jmjs2sTm9dFSuQ5kpz/SU= github.com/mholt/acmez/v2 v2.0.1 h1:3/3N0u1pLjMK4sNEAFSI+bcvzbPhRpY383sy1kLHJ6k= github.com/mholt/acmez/v2 v2.0.1/go.mod h1:fX4c9r5jYwMyMsC+7tkYRxHibkOTgta5DIFGoe67e1U= -github.com/miekg/dns v1.1.59 h1:C9EXc/UToRwKLhK5wKU/I4QVsBUc8kE6MkHBkeypWZs= -github.com/miekg/dns v1.1.59/go.mod h1:nZpewl5p6IvctfgrckopVx2OlSEHPRO/U4SYkRklrEk= +github.com/miekg/dns v1.1.72 h1:vhmr+TF2A3tuoGNkLDFK9zi36F2LS+hKTRW0Uf8kbzI= +github.com/miekg/dns v1.1.72/go.mod h1:+EuEPhdHOsfk6Wk5TT2CzssZdqkmFhf8r+aVyDEToIs= github.com/mikioh/ipaddr v0.0.0-20190404000644-d465c8ab6721 h1:RlZweED6sbSArvlE924+mUcZuXKLBHA35U7LN621Bws= github.com/mikioh/ipaddr v0.0.0-20190404000644-d465c8ab6721/go.mod h1:Ickgr2WtCLZ2MDGd4Gr0geeCH5HybhRJbonOgQpvSxc= github.com/mitchellh/copystructure v1.2.0 h1:vpKXTN4ewci03Vljg/q9QvCGUDttBOGBIa15WveJJGw= diff --git a/util/capture/text.go b/util/capture/text.go index fbb26654e..a6a6dd28b 100644 --- a/util/capture/text.go +++ b/util/capture/text.go @@ -12,6 +12,7 @@ import ( "github.com/google/gopacket" "github.com/google/gopacket/layers" + "github.com/miekg/dns" ) // TextWriter writes human-readable one-line-per-packet summaries. @@ -594,19 +595,45 @@ func formatDNSResponse(d *layers.DNS, rd string, plen int) string { anCount := d.ANCount nsCount := d.NSCount arCount := d.ARCount + ede := formatEDE(d) if d.ResponseCode != layers.DNSResponseCodeNoErr { - return fmt.Sprintf("%04x %d/%d/%d %s (%d)", d.ID, anCount, nsCount, arCount, d.ResponseCode, plen) + return fmt.Sprintf("%04x %d/%d/%d %s%s (%d)", d.ID, anCount, nsCount, arCount, d.ResponseCode, ede, plen) } if anCount > 0 && len(d.Answers) > 0 { rr := d.Answers[0] if rdata := shortRData(&rr); rdata != "" { - return fmt.Sprintf("%04x %d/%d/%d %s %s (%d)", d.ID, anCount, nsCount, arCount, rr.Type, rdata, plen) + return fmt.Sprintf("%04x %d/%d/%d %s %s%s (%d)", d.ID, anCount, nsCount, arCount, rr.Type, rdata, ede, plen) } } - return fmt.Sprintf("%04x %d/%d/%d (%d)", d.ID, anCount, nsCount, arCount, plen) + return fmt.Sprintf("%04x %d/%d/%d%s (%d)", d.ID, anCount, nsCount, arCount, ede, plen) +} + +// dnsOPTCodeEDE is the EDNS0 option code for Extended DNS Errors (RFC 8914). +const dnsOPTCodeEDE layers.DNSOptionCode = layers.DNSOptionCode(dns.EDNS0EDE) + +// formatEDE returns " EDE=Name" for the first Extended DNS Error option +// found in the response, or empty string if none is present. +func formatEDE(d *layers.DNS) string { + for _, rr := range d.Additionals { + if rr.Type != layers.DNSTypeOPT { + continue + } + for _, opt := range rr.OPT { + if opt.Code != dnsOPTCodeEDE || len(opt.Data) < 2 { + continue + } + info := binary.BigEndian.Uint16(opt.Data[:2]) + name, ok := dns.ExtendedErrorCodeToString[info] + if !ok { + name = fmt.Sprintf("%d", info) + } + return " EDE=" + name + } + } + return "" } func shortRData(rr *layers.DNSResourceRecord) string {