Improve embed client error detection and reporting

This commit is contained in:
Viktor Liu
2026-02-09 01:49:16 +08:00
parent 780e9f57a5
commit 9904235a2f
3 changed files with 207 additions and 18 deletions

View File

@@ -319,8 +319,6 @@ func getRequestID(r *http.Request) string {
// classifyProxyError determines the appropriate error title, message, HTTP
// status code, and component status based on the error type.
func classifyProxyError(err error) (title, message string, code int, status web.ErrorStatus) {
errStr := err.Error()
switch {
case errors.Is(err, context.DeadlineExceeded):
return "Request Timeout",
@@ -340,36 +338,68 @@ func classifyProxyError(err error) (title, message string, code int, status web.
http.StatusInternalServerError,
web.ErrorStatus{Proxy: false, Peer: false, Destination: false}
case strings.Contains(errStr, "no peer connection found"),
strings.Contains(errStr, "start netbird client"),
strings.Contains(errStr, "engine not started"),
strings.Contains(errStr, "get net:"):
// The proxy peer (embedded client) is not connected
case errors.Is(err, roundtrip.ErrNoPeerConnection),
errors.Is(err, roundtrip.ErrClientStartFailed):
return "Proxy Not Connected",
"The proxy is not connected to the NetBird network. Please try again later or contact your administrator.",
http.StatusBadGateway,
web.ErrorStatus{Proxy: false, Peer: false, Destination: false}
case strings.Contains(errStr, "connection refused"):
// Routing peer connected but destination service refused the connection
case isConnectionRefused(err):
return "Service Unavailable",
"The connection to the service was refused. Please verify that the service is running and try again.",
http.StatusBadGateway,
web.ErrorStatus{Proxy: true, Peer: true, Destination: false}
case strings.Contains(errStr, "no route to host"),
strings.Contains(errStr, "network is unreachable"),
strings.Contains(errStr, "i/o timeout"):
// Peer is not reachable
case isHostUnreachable(err):
return "Peer Not Connected",
"The connection to the peer could not be established. Please ensure the peer is running and connected to the NetBird network.",
http.StatusBadGateway,
web.ErrorStatus{Proxy: true, Peer: false, Destination: false}
case isNetTimeout(err):
return "Request Timeout",
"The request timed out while trying to reach the service. Please refresh the page and try again.",
http.StatusGatewayTimeout,
web.ErrorStatus{Proxy: true, Peer: true, Destination: false}
}
// Unknown error - log it and show generic message
return "Connection Error",
"An unexpected error occurred while connecting to the service. Please try again later.",
http.StatusBadGateway,
web.ErrorStatus{Proxy: true, Peer: false, Destination: false}
}
// isConnectionRefused checks for connection refused errors by inspecting
// the inner error of a *net.OpError. This handles both standard net errors
// (where the inner error is a *os.SyscallError with "connection refused")
// and gVisor netstack errors ("connection was refused").
func isConnectionRefused(err error) bool {
return opErrorContains(err, "refused")
}
// isHostUnreachable checks for host/network unreachable errors by inspecting
// the inner error of a *net.OpError. Covers standard net ("no route to host",
// "network is unreachable") and gVisor ("host is unreachable", etc.).
func isHostUnreachable(err error) bool {
return opErrorContains(err, "unreachable") || opErrorContains(err, "no route to host")
}
// isNetTimeout checks whether the error is a network timeout using the
// net.Error interface.
func isNetTimeout(err error) bool {
var netErr net.Error
return errors.As(err, &netErr) && netErr.Timeout()
}
// opErrorContains extracts the inner error from a *net.OpError and checks
// whether its message contains the given substring. This handles gVisor
// netstack errors which wrap tcpip errors as plain strings rather than
// syscall.Errno values.
func opErrorContains(err error, substr string) bool {
var opErr *net.OpError
if errors.As(err, &opErr) && opErr.Err != nil {
return strings.Contains(opErr.Err.Error(), substr)
}
return false
}

View File

@@ -1,18 +1,26 @@
package proxy
import (
"context"
"crypto/tls"
"errors"
"fmt"
"net"
"net/http"
"net/http/httptest"
"net/http/httputil"
"net/netip"
"net/url"
"os"
"syscall"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/netbirdio/netbird/proxy/auth"
"github.com/netbirdio/netbird/proxy/internal/roundtrip"
"github.com/netbirdio/netbird/proxy/web"
)
func TestRewriteFunc_HostRewriting(t *testing.T) {
@@ -811,3 +819,148 @@ func newProxyRequest(t *testing.T, rawURL, remoteAddr string) *httputil.ProxyReq
return &httputil.ProxyRequest{In: in, Out: out}
}
func TestClassifyProxyError(t *testing.T) {
tests := []struct {
name string
err error
wantTitle string
wantCode int
wantStatus web.ErrorStatus
}{
{
name: "context deadline exceeded",
err: context.DeadlineExceeded,
wantTitle: "Request Timeout",
wantCode: http.StatusGatewayTimeout,
wantStatus: web.ErrorStatus{Proxy: true, Peer: true, Destination: false},
},
{
name: "wrapped deadline exceeded",
err: fmt.Errorf("dial: %w", context.DeadlineExceeded),
wantTitle: "Request Timeout",
wantCode: http.StatusGatewayTimeout,
wantStatus: web.ErrorStatus{Proxy: true, Peer: true, Destination: false},
},
{
name: "context canceled",
err: context.Canceled,
wantTitle: "Request Canceled",
wantCode: http.StatusBadGateway,
wantStatus: web.ErrorStatus{Proxy: true, Peer: true, Destination: false},
},
{
name: "no account ID",
err: roundtrip.ErrNoAccountID,
wantTitle: "Configuration Error",
wantCode: http.StatusInternalServerError,
wantStatus: web.ErrorStatus{Proxy: false, Peer: false, Destination: false},
},
{
name: "no peer connection",
err: fmt.Errorf("%w for account: abc", roundtrip.ErrNoPeerConnection),
wantTitle: "Proxy Not Connected",
wantCode: http.StatusBadGateway,
wantStatus: web.ErrorStatus{Proxy: false, Peer: false, Destination: false},
},
{
name: "client not started",
err: fmt.Errorf("%w: %w", roundtrip.ErrClientStartFailed, errors.New("engine init failed")),
wantTitle: "Proxy Not Connected",
wantCode: http.StatusBadGateway,
wantStatus: web.ErrorStatus{Proxy: false, Peer: false, Destination: false},
},
{
name: "syscall ECONNREFUSED via os.SyscallError",
err: &net.OpError{
Op: "dial",
Net: "tcp",
Err: &os.SyscallError{Syscall: "connect", Err: syscall.ECONNREFUSED},
},
wantTitle: "Service Unavailable",
wantCode: http.StatusBadGateway,
wantStatus: web.ErrorStatus{Proxy: true, Peer: true, Destination: false},
},
{
name: "gvisor connection was refused",
err: &net.OpError{
Op: "connect",
Net: "tcp",
Err: errors.New("connection was refused"),
},
wantTitle: "Service Unavailable",
wantCode: http.StatusBadGateway,
wantStatus: web.ErrorStatus{Proxy: true, Peer: true, Destination: false},
},
{
name: "syscall EHOSTUNREACH via os.SyscallError",
err: &net.OpError{
Op: "dial",
Net: "tcp",
Err: &os.SyscallError{Syscall: "connect", Err: syscall.EHOSTUNREACH},
},
wantTitle: "Peer Not Connected",
wantCode: http.StatusBadGateway,
wantStatus: web.ErrorStatus{Proxy: true, Peer: false, Destination: false},
},
{
name: "syscall ENETUNREACH via os.SyscallError",
err: &net.OpError{
Op: "dial",
Net: "tcp",
Err: &os.SyscallError{Syscall: "connect", Err: syscall.ENETUNREACH},
},
wantTitle: "Peer Not Connected",
wantCode: http.StatusBadGateway,
wantStatus: web.ErrorStatus{Proxy: true, Peer: false, Destination: false},
},
{
name: "gvisor host is unreachable",
err: &net.OpError{
Op: "connect",
Net: "tcp",
Err: errors.New("host is unreachable"),
},
wantTitle: "Peer Not Connected",
wantCode: http.StatusBadGateway,
wantStatus: web.ErrorStatus{Proxy: true, Peer: false, Destination: false},
},
{
name: "gvisor network is unreachable",
err: &net.OpError{
Op: "connect",
Net: "tcp",
Err: errors.New("network is unreachable"),
},
wantTitle: "Peer Not Connected",
wantCode: http.StatusBadGateway,
wantStatus: web.ErrorStatus{Proxy: true, Peer: false, Destination: false},
},
{
name: "standard no route to host",
err: &net.OpError{
Op: "dial",
Net: "tcp",
Err: &os.SyscallError{Syscall: "connect", Err: syscall.EHOSTUNREACH},
},
wantTitle: "Peer Not Connected",
wantCode: http.StatusBadGateway,
wantStatus: web.ErrorStatus{Proxy: true, Peer: false, Destination: false},
},
{
name: "unknown error falls to default",
err: errors.New("something unexpected"),
wantTitle: "Connection Error",
wantCode: http.StatusBadGateway,
wantStatus: web.ErrorStatus{Proxy: true, Peer: false, Destination: false},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
title, _, code, status := classifyProxyError(tt.err)
assert.Equal(t, tt.wantTitle, title, "title")
assert.Equal(t, tt.wantCode, code, "status code")
assert.Equal(t, tt.wantStatus, status, "component status")
})
}
}

View File

@@ -24,8 +24,14 @@ import (
const deviceNamePrefix = "ingress-proxy-"
// ErrNoAccountID is returned when a request context is missing the account ID.
var ErrNoAccountID = errors.New("no account ID in request context")
var (
// ErrNoAccountID is returned when a request context is missing the account ID.
ErrNoAccountID = errors.New("no account ID in request context")
// ErrNoPeerConnection is returned when no embedded client exists for the account.
ErrNoPeerConnection = errors.New("no peer connection found")
// ErrClientStartFailed is returned when the embedded client fails to start.
ErrClientStartFailed = errors.New("client start failed")
)
// domainInfo holds metadata about a registered domain.
type domainInfo struct {
@@ -346,7 +352,7 @@ func (n *NetBird) RoundTrip(req *http.Request) (*http.Response, error) {
entry, exists := n.clients[accountID]
if !exists {
n.clientsMux.RUnlock()
return nil, fmt.Errorf("no peer connection found for account: %s", accountID)
return nil, fmt.Errorf("%w for account: %s", ErrNoPeerConnection, accountID)
}
client := entry.client
transport := entry.transport
@@ -359,7 +365,7 @@ func (n *NetBird) RoundTrip(req *http.Request) (*http.Response, error) {
defer cancel()
if err := client.Start(startCtx); err != nil {
if !errors.Is(err, embed.ErrClientAlreadyStarted) {
return nil, fmt.Errorf("start netbird client: %w", err)
return nil, fmt.Errorf("%w: %w", ErrClientStartFailed, err)
}
}