Improve embed client error detection and reporting

This commit is contained in:
Viktor Liu
2026-02-09 01:49:16 +08:00
parent 780e9f57a5
commit 9904235a2f
3 changed files with 207 additions and 18 deletions

View File

@@ -319,8 +319,6 @@ func getRequestID(r *http.Request) string {
// classifyProxyError determines the appropriate error title, message, HTTP // classifyProxyError determines the appropriate error title, message, HTTP
// status code, and component status based on the error type. // status code, and component status based on the error type.
func classifyProxyError(err error) (title, message string, code int, status web.ErrorStatus) { func classifyProxyError(err error) (title, message string, code int, status web.ErrorStatus) {
errStr := err.Error()
switch { switch {
case errors.Is(err, context.DeadlineExceeded): case errors.Is(err, context.DeadlineExceeded):
return "Request Timeout", return "Request Timeout",
@@ -340,36 +338,68 @@ func classifyProxyError(err error) (title, message string, code int, status web.
http.StatusInternalServerError, http.StatusInternalServerError,
web.ErrorStatus{Proxy: false, Peer: false, Destination: false} web.ErrorStatus{Proxy: false, Peer: false, Destination: false}
case strings.Contains(errStr, "no peer connection found"), case errors.Is(err, roundtrip.ErrNoPeerConnection),
strings.Contains(errStr, "start netbird client"), errors.Is(err, roundtrip.ErrClientStartFailed):
strings.Contains(errStr, "engine not started"),
strings.Contains(errStr, "get net:"):
// The proxy peer (embedded client) is not connected
return "Proxy Not Connected", return "Proxy Not Connected",
"The proxy is not connected to the NetBird network. Please try again later or contact your administrator.", "The proxy is not connected to the NetBird network. Please try again later or contact your administrator.",
http.StatusBadGateway, http.StatusBadGateway,
web.ErrorStatus{Proxy: false, Peer: false, Destination: false} web.ErrorStatus{Proxy: false, Peer: false, Destination: false}
case strings.Contains(errStr, "connection refused"): case isConnectionRefused(err):
// Routing peer connected but destination service refused the connection
return "Service Unavailable", return "Service Unavailable",
"The connection to the service was refused. Please verify that the service is running and try again.", "The connection to the service was refused. Please verify that the service is running and try again.",
http.StatusBadGateway, http.StatusBadGateway,
web.ErrorStatus{Proxy: true, Peer: true, Destination: false} web.ErrorStatus{Proxy: true, Peer: true, Destination: false}
case strings.Contains(errStr, "no route to host"), case isHostUnreachable(err):
strings.Contains(errStr, "network is unreachable"),
strings.Contains(errStr, "i/o timeout"):
// Peer is not reachable
return "Peer Not Connected", return "Peer Not Connected",
"The connection to the peer could not be established. Please ensure the peer is running and connected to the NetBird network.", "The connection to the peer could not be established. Please ensure the peer is running and connected to the NetBird network.",
http.StatusBadGateway, http.StatusBadGateway,
web.ErrorStatus{Proxy: true, Peer: false, Destination: false} web.ErrorStatus{Proxy: true, Peer: false, Destination: false}
case isNetTimeout(err):
return "Request Timeout",
"The request timed out while trying to reach the service. Please refresh the page and try again.",
http.StatusGatewayTimeout,
web.ErrorStatus{Proxy: true, Peer: true, Destination: false}
} }
// Unknown error - log it and show generic message
return "Connection Error", return "Connection Error",
"An unexpected error occurred while connecting to the service. Please try again later.", "An unexpected error occurred while connecting to the service. Please try again later.",
http.StatusBadGateway, http.StatusBadGateway,
web.ErrorStatus{Proxy: true, Peer: false, Destination: false} web.ErrorStatus{Proxy: true, Peer: false, Destination: false}
} }
// isConnectionRefused checks for connection refused errors by inspecting
// the inner error of a *net.OpError. This handles both standard net errors
// (where the inner error is a *os.SyscallError with "connection refused")
// and gVisor netstack errors ("connection was refused").
func isConnectionRefused(err error) bool {
return opErrorContains(err, "refused")
}
// isHostUnreachable checks for host/network unreachable errors by inspecting
// the inner error of a *net.OpError. Covers standard net ("no route to host",
// "network is unreachable") and gVisor ("host is unreachable", etc.).
func isHostUnreachable(err error) bool {
return opErrorContains(err, "unreachable") || opErrorContains(err, "no route to host")
}
// isNetTimeout checks whether the error is a network timeout using the
// net.Error interface.
func isNetTimeout(err error) bool {
var netErr net.Error
return errors.As(err, &netErr) && netErr.Timeout()
}
// opErrorContains extracts the inner error from a *net.OpError and checks
// whether its message contains the given substring. This handles gVisor
// netstack errors which wrap tcpip errors as plain strings rather than
// syscall.Errno values.
func opErrorContains(err error, substr string) bool {
var opErr *net.OpError
if errors.As(err, &opErr) && opErr.Err != nil {
return strings.Contains(opErr.Err.Error(), substr)
}
return false
}

View File

@@ -1,18 +1,26 @@
package proxy package proxy
import ( import (
"context"
"crypto/tls" "crypto/tls"
"errors"
"fmt"
"net"
"net/http" "net/http"
"net/http/httptest" "net/http/httptest"
"net/http/httputil" "net/http/httputil"
"net/netip" "net/netip"
"net/url" "net/url"
"os"
"syscall"
"testing" "testing"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
"github.com/netbirdio/netbird/proxy/auth" "github.com/netbirdio/netbird/proxy/auth"
"github.com/netbirdio/netbird/proxy/internal/roundtrip"
"github.com/netbirdio/netbird/proxy/web"
) )
func TestRewriteFunc_HostRewriting(t *testing.T) { func TestRewriteFunc_HostRewriting(t *testing.T) {
@@ -811,3 +819,148 @@ func newProxyRequest(t *testing.T, rawURL, remoteAddr string) *httputil.ProxyReq
return &httputil.ProxyRequest{In: in, Out: out} return &httputil.ProxyRequest{In: in, Out: out}
} }
func TestClassifyProxyError(t *testing.T) {
tests := []struct {
name string
err error
wantTitle string
wantCode int
wantStatus web.ErrorStatus
}{
{
name: "context deadline exceeded",
err: context.DeadlineExceeded,
wantTitle: "Request Timeout",
wantCode: http.StatusGatewayTimeout,
wantStatus: web.ErrorStatus{Proxy: true, Peer: true, Destination: false},
},
{
name: "wrapped deadline exceeded",
err: fmt.Errorf("dial: %w", context.DeadlineExceeded),
wantTitle: "Request Timeout",
wantCode: http.StatusGatewayTimeout,
wantStatus: web.ErrorStatus{Proxy: true, Peer: true, Destination: false},
},
{
name: "context canceled",
err: context.Canceled,
wantTitle: "Request Canceled",
wantCode: http.StatusBadGateway,
wantStatus: web.ErrorStatus{Proxy: true, Peer: true, Destination: false},
},
{
name: "no account ID",
err: roundtrip.ErrNoAccountID,
wantTitle: "Configuration Error",
wantCode: http.StatusInternalServerError,
wantStatus: web.ErrorStatus{Proxy: false, Peer: false, Destination: false},
},
{
name: "no peer connection",
err: fmt.Errorf("%w for account: abc", roundtrip.ErrNoPeerConnection),
wantTitle: "Proxy Not Connected",
wantCode: http.StatusBadGateway,
wantStatus: web.ErrorStatus{Proxy: false, Peer: false, Destination: false},
},
{
name: "client not started",
err: fmt.Errorf("%w: %w", roundtrip.ErrClientStartFailed, errors.New("engine init failed")),
wantTitle: "Proxy Not Connected",
wantCode: http.StatusBadGateway,
wantStatus: web.ErrorStatus{Proxy: false, Peer: false, Destination: false},
},
{
name: "syscall ECONNREFUSED via os.SyscallError",
err: &net.OpError{
Op: "dial",
Net: "tcp",
Err: &os.SyscallError{Syscall: "connect", Err: syscall.ECONNREFUSED},
},
wantTitle: "Service Unavailable",
wantCode: http.StatusBadGateway,
wantStatus: web.ErrorStatus{Proxy: true, Peer: true, Destination: false},
},
{
name: "gvisor connection was refused",
err: &net.OpError{
Op: "connect",
Net: "tcp",
Err: errors.New("connection was refused"),
},
wantTitle: "Service Unavailable",
wantCode: http.StatusBadGateway,
wantStatus: web.ErrorStatus{Proxy: true, Peer: true, Destination: false},
},
{
name: "syscall EHOSTUNREACH via os.SyscallError",
err: &net.OpError{
Op: "dial",
Net: "tcp",
Err: &os.SyscallError{Syscall: "connect", Err: syscall.EHOSTUNREACH},
},
wantTitle: "Peer Not Connected",
wantCode: http.StatusBadGateway,
wantStatus: web.ErrorStatus{Proxy: true, Peer: false, Destination: false},
},
{
name: "syscall ENETUNREACH via os.SyscallError",
err: &net.OpError{
Op: "dial",
Net: "tcp",
Err: &os.SyscallError{Syscall: "connect", Err: syscall.ENETUNREACH},
},
wantTitle: "Peer Not Connected",
wantCode: http.StatusBadGateway,
wantStatus: web.ErrorStatus{Proxy: true, Peer: false, Destination: false},
},
{
name: "gvisor host is unreachable",
err: &net.OpError{
Op: "connect",
Net: "tcp",
Err: errors.New("host is unreachable"),
},
wantTitle: "Peer Not Connected",
wantCode: http.StatusBadGateway,
wantStatus: web.ErrorStatus{Proxy: true, Peer: false, Destination: false},
},
{
name: "gvisor network is unreachable",
err: &net.OpError{
Op: "connect",
Net: "tcp",
Err: errors.New("network is unreachable"),
},
wantTitle: "Peer Not Connected",
wantCode: http.StatusBadGateway,
wantStatus: web.ErrorStatus{Proxy: true, Peer: false, Destination: false},
},
{
name: "standard no route to host",
err: &net.OpError{
Op: "dial",
Net: "tcp",
Err: &os.SyscallError{Syscall: "connect", Err: syscall.EHOSTUNREACH},
},
wantTitle: "Peer Not Connected",
wantCode: http.StatusBadGateway,
wantStatus: web.ErrorStatus{Proxy: true, Peer: false, Destination: false},
},
{
name: "unknown error falls to default",
err: errors.New("something unexpected"),
wantTitle: "Connection Error",
wantCode: http.StatusBadGateway,
wantStatus: web.ErrorStatus{Proxy: true, Peer: false, Destination: false},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
title, _, code, status := classifyProxyError(tt.err)
assert.Equal(t, tt.wantTitle, title, "title")
assert.Equal(t, tt.wantCode, code, "status code")
assert.Equal(t, tt.wantStatus, status, "component status")
})
}
}

View File

@@ -24,8 +24,14 @@ import (
const deviceNamePrefix = "ingress-proxy-" const deviceNamePrefix = "ingress-proxy-"
// ErrNoAccountID is returned when a request context is missing the account ID. var (
var ErrNoAccountID = errors.New("no account ID in request context") // ErrNoAccountID is returned when a request context is missing the account ID.
ErrNoAccountID = errors.New("no account ID in request context")
// ErrNoPeerConnection is returned when no embedded client exists for the account.
ErrNoPeerConnection = errors.New("no peer connection found")
// ErrClientStartFailed is returned when the embedded client fails to start.
ErrClientStartFailed = errors.New("client start failed")
)
// domainInfo holds metadata about a registered domain. // domainInfo holds metadata about a registered domain.
type domainInfo struct { type domainInfo struct {
@@ -346,7 +352,7 @@ func (n *NetBird) RoundTrip(req *http.Request) (*http.Response, error) {
entry, exists := n.clients[accountID] entry, exists := n.clients[accountID]
if !exists { if !exists {
n.clientsMux.RUnlock() n.clientsMux.RUnlock()
return nil, fmt.Errorf("no peer connection found for account: %s", accountID) return nil, fmt.Errorf("%w for account: %s", ErrNoPeerConnection, accountID)
} }
client := entry.client client := entry.client
transport := entry.transport transport := entry.transport
@@ -359,7 +365,7 @@ func (n *NetBird) RoundTrip(req *http.Request) (*http.Response, error) {
defer cancel() defer cancel()
if err := client.Start(startCtx); err != nil { if err := client.Start(startCtx); err != nil {
if !errors.Is(err, embed.ErrClientAlreadyStarted) { if !errors.Is(err, embed.ErrClientAlreadyStarted) {
return nil, fmt.Errorf("start netbird client: %w", err) return nil, fmt.Errorf("%w: %w", ErrClientStartFailed, err)
} }
} }