Improve logging and error handling

This commit is contained in:
Viktor Liu
2026-02-12 13:26:36 +08:00
parent 54a73c6649
commit d689718b50
10 changed files with 111 additions and 89 deletions

View File

@@ -160,8 +160,7 @@ func (s *ProxyServiceServer) GetMappingUpdate(req *proto.GetMappingUpdateRequest
}()
if err := s.sendSnapshot(ctx, conn); err != nil {
log.Errorf("Failed to send snapshot to proxy %s: %v", proxyID, err)
return err
return fmt.Errorf("send snapshot to proxy %s: %w", proxyID, err)
}
errChan := make(chan error, 2)
@@ -169,7 +168,7 @@ func (s *ProxyServiceServer) GetMappingUpdate(req *proto.GetMappingUpdateRequest
select {
case err := <-errChan:
return err
return fmt.Errorf("send update to proxy %s: %w", proxyID, err)
case <-connCtx.Done():
return connCtx.Err()
}
@@ -201,7 +200,7 @@ func (s *ProxyServiceServer) sendSnapshot(ctx context.Context, conn *proxyConnec
log.WithFields(log.Fields{
"service": service.Name,
"account": service.AccountID,
}).WithError(err).Error("Failed to generate auth token for snapshot")
}).WithError(err).Error("failed to generate auth token for snapshot")
continue
}
@@ -214,8 +213,11 @@ func (s *ProxyServiceServer) sendSnapshot(ctx context.Context, conn *proxyConnec
),
},
}); err != nil {
log.WithError(err).Error("Failed to send proxy mapping")
continue
log.WithFields(log.Fields{
"domain": service.Domain,
"account": service.AccountID,
}).WithError(err).Error("failed to send proxy mapping")
return fmt.Errorf("send proxy mapping: %w", err)
}
}
@@ -244,7 +246,6 @@ func (s *ProxyServiceServer) sender(conn *proxyConnection, errChan chan<- error)
select {
case msg := <-conn.sendChan:
if err := conn.stream.Send(&proto.GetMappingUpdateResponse{Mapping: []*proto.ProxyMapping{msg}}); err != nil {
log.Errorf("Failed to send message to proxy %s: %v", conn.proxyID, err)
errChan <- err
return
}
@@ -285,7 +286,7 @@ func (s *ProxyServiceServer) SendAccessLog(ctx context.Context, req *proto.SendA
if err := s.accessLogManager.SaveAccessLog(ctx, logEntry); err != nil {
log.WithContext(ctx).Errorf("failed to save access log: %v", err)
return nil, status.Errorf(codes.Internal, "failed to save access log: %v", err)
return nil, status.Errorf(codes.Internal, "save access log: %v", err)
}
return &proto.SendAccessLogResponse{}, nil
@@ -425,8 +426,8 @@ func (s *ProxyServiceServer) GetAvailableClusters() []ClusterInfo {
func (s *ProxyServiceServer) Authenticate(ctx context.Context, req *proto.AuthenticateRequest) (*proto.AuthenticateResponse, error) {
service, err := s.reverseProxyManager.GetServiceByID(ctx, req.GetAccountId(), req.GetId())
if err != nil {
// TODO: log the error
return nil, status.Errorf(codes.FailedPrecondition, "failed to get service from store: %v", err)
log.WithContext(ctx).Debugf("failed to get service from store: %v", err)
return nil, status.Errorf(codes.FailedPrecondition, "get service from store: %v", err)
}
var authenticated bool
@@ -436,8 +437,7 @@ func (s *ProxyServiceServer) Authenticate(ctx context.Context, req *proto.Authen
case *proto.AuthenticateRequest_Pin:
auth := service.Auth.PinAuth
if auth == nil || !auth.Enabled {
// TODO: log
// Break here and use the default authenticated == false.
log.WithContext(ctx).Debugf("PIN authentication attempted but not enabled for service %s", req.GetId())
break
}
err = argon2id.Verify(v.Pin.GetPin(), auth.Pin)
@@ -455,8 +455,7 @@ func (s *ProxyServiceServer) Authenticate(ctx context.Context, req *proto.Authen
case *proto.AuthenticateRequest_Password:
auth := service.Auth.PasswordAuth
if auth == nil || !auth.Enabled {
// TODO: log
// Break here and use the default authenticated == false.
log.WithContext(ctx).Debugf("password authentication attempted but not enabled for service %s", req.GetId())
break
}
err = argon2id.Verify(v.Password.GetPassword(), auth.Password)
@@ -483,8 +482,8 @@ func (s *ProxyServiceServer) Authenticate(ctx context.Context, req *proto.Authen
proxyauth.DefaultSessionExpiry,
)
if err != nil {
log.WithError(err).Error("Failed to sign session token")
authenticated = false
log.WithContext(ctx).WithError(err).Error("failed to sign session token")
return nil, status.Errorf(codes.Internal, "sign session token: %v", err)
}
}
@@ -515,8 +514,8 @@ func (s *ProxyServiceServer) SendStatusUpdate(ctx context.Context, req *proto.Se
if certificateIssued {
if err := s.reverseProxyManager.SetCertificateIssuedAt(ctx, accountID, serviceID); err != nil {
log.WithContext(ctx).WithError(err).Error("Failed to set certificate issued timestamp")
return nil, status.Errorf(codes.Internal, "failed to update certificate timestamp: %v", err)
log.WithContext(ctx).WithError(err).Error("failed to set certificate issued timestamp")
return nil, status.Errorf(codes.Internal, "update certificate timestamp: %v", err)
}
log.WithFields(log.Fields{
"service_id": serviceID,
@@ -527,8 +526,8 @@ func (s *ProxyServiceServer) SendStatusUpdate(ctx context.Context, req *proto.Se
internalStatus := protoStatusToInternal(protoStatus)
if err := s.reverseProxyManager.SetStatus(ctx, accountID, serviceID, internalStatus); err != nil {
log.WithContext(ctx).WithError(err).Error("Failed to set service status")
return nil, status.Errorf(codes.Internal, "failed to update service status: %v", err)
log.WithContext(ctx).WithError(err).Error("failed to update service status")
return nil, status.Errorf(codes.Internal, "update service status: %v", err)
}
log.WithFields(log.Fields{
@@ -590,7 +589,7 @@ func (s *ProxyServiceServer) CreateProxyPeer(ctx context.Context, req *proto.Cre
return &proto.CreateProxyPeerResponse{
Success: false,
ErrorMessage: strPtr("authentication failed: invalid or expired token"),
}, status.Errorf(codes.Unauthenticated, "token validation failed: %v", err)
}, status.Errorf(codes.Unauthenticated, "token validation: %v", err)
}
err := s.peersManager.CreateProxyPeer(ctx, accountID, key, cluster)
@@ -598,11 +597,11 @@ func (s *ProxyServiceServer) CreateProxyPeer(ctx context.Context, req *proto.Cre
log.WithFields(log.Fields{
"service_id": serviceID,
"account_id": accountID,
}).WithError(err).Error("CreateProxyPeer: failed to create proxy peer")
}).WithError(err).Error("failed to create proxy peer")
return &proto.CreateProxyPeerResponse{
Success: false,
ErrorMessage: strPtr(fmt.Sprintf("failed to create proxy peer: %v", err)),
}, status.Errorf(codes.Internal, "failed to create proxy peer: %v", err)
ErrorMessage: strPtr(fmt.Sprintf("create proxy peer: %v", err)),
}, status.Errorf(codes.Internal, "create proxy peer: %v", err)
}
return &proto.CreateProxyPeerResponse{
@@ -618,14 +617,13 @@ func strPtr(s string) *string {
func (s *ProxyServiceServer) GetOIDCURL(ctx context.Context, req *proto.GetOIDCURLRequest) (*proto.GetOIDCURLResponse, error) {
redirectURL, err := url.Parse(req.GetRedirectUrl())
if err != nil {
// TODO: log
return nil, status.Errorf(codes.InvalidArgument, "failed to parse redirect url: %v", err)
return nil, status.Errorf(codes.InvalidArgument, "parse redirect url: %v", err)
}
// Validate redirectURL against known service endpoints to avoid abuse of OIDC redirection.
services, err := s.reverseProxyManager.GetAccountServices(ctx, req.GetAccountId())
if err != nil {
// TODO: log
return nil, status.Errorf(codes.FailedPrecondition, "failed to get services from store: %v", err)
log.WithContext(ctx).Errorf("failed to get account services: %v", err)
return nil, status.Errorf(codes.FailedPrecondition, "get account services: %v", err)
}
var found bool
for _, service := range services {
@@ -635,14 +633,14 @@ func (s *ProxyServiceServer) GetOIDCURL(ctx context.Context, req *proto.GetOIDCU
}
}
if !found {
// TODO: log
log.WithContext(ctx).Debugf("OIDC redirect URL %q does not match any service domain", redirectURL.Hostname())
return nil, status.Errorf(codes.FailedPrecondition, "service not found in store")
}
provider, err := oidc.NewProvider(ctx, s.oidcConfig.Issuer)
if err != nil {
// TODO: log
return nil, status.Errorf(codes.FailedPrecondition, "failed to create OIDC provider: %v", err)
log.WithContext(ctx).Errorf("failed to create OIDC provider: %v", err)
return nil, status.Errorf(codes.FailedPrecondition, "create OIDC provider: %v", err)
}
scopes := s.oidcConfig.Scopes

View File

@@ -3,6 +3,7 @@ package accesslog
import (
"context"
"net/netip"
"time"
log "github.com/sirupsen/logrus"
"google.golang.org/grpc"
@@ -62,7 +63,9 @@ func (l *Logger) log(ctx context.Context, entry logEntry) {
// allow for resolving that on the server.
now := timestamppb.Now() // Grab the timestamp before launching the goroutine to try to prevent weird timing issues. This is probably unnecessary.
go func() {
if _, err := l.client.SendAccessLog(context.Background(), &proto.SendAccessLogRequest{
logCtx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
if _, err := l.client.SendAccessLog(logCtx, &proto.SendAccessLogRequest{
Log: &proto.AccessLog{
LogId: entry.ID,
AccountId: entry.AccountID,

View File

@@ -30,15 +30,14 @@ type SessionValidator interface {
ValidateSession(ctx context.Context, in *proto.ValidateSessionRequest, opts ...grpc.CallOption) (*proto.ValidateSessionResponse, error)
}
// Scheme defines an authentication mechanism for a domain.
type Scheme interface {
Type() auth.Method
// Authenticate should check the passed request and determine whether
// it represents an authenticated user request. If it does not, then
// an empty string should indicate an unauthenticated request which
// will be rejected; optionally, it can also return any data that should
// be included in a UI template when prompting the user to authenticate.
// If the request is authenticated, then a session token should be returned.
Authenticate(*http.Request) (token string, promptData string)
// Authenticate checks the request and determines whether it represents
// an authenticated user. An empty token indicates an unauthenticated
// request; optionally, promptData may be returned for the login UI.
// An error indicates an infrastructure failure (e.g. gRPC unavailable).
Authenticate(*http.Request) (token string, promptData string, err error)
}
type DomainConfig struct {
@@ -141,7 +140,15 @@ func (mw *Middleware) Protect(next http.Handler) http.Handler {
methods := make(map[string]string)
var attemptedMethod string
for _, scheme := range config.Schemes {
token, promptData := scheme.Authenticate(r)
token, promptData, err := scheme.Authenticate(r)
if err != nil {
mw.logger.WithField("scheme", scheme.Type().String()).Warnf("authentication infrastructure error: %v", err)
if cd := proxy.CapturedDataFromContext(r.Context()); cd != nil {
cd.SetOrigin(proxy.OriginAuth)
}
http.Error(w, "authentication service unavailable", http.StatusBadGateway)
return
}
// Track if credentials were submitted but auth failed
if token == "" && wasCredentialSubmitted(r, scheme.Type()) {

View File

@@ -32,16 +32,16 @@ type stubScheme struct {
method auth.Method
token string
promptID string
authFn func(*http.Request) (string, string)
authFn func(*http.Request) (string, string, error)
}
func (s *stubScheme) Type() auth.Method { return s.method }
func (s *stubScheme) Authenticate(r *http.Request) (string, string) {
func (s *stubScheme) Authenticate(r *http.Request) (string, string, error) {
if s.authFn != nil {
return s.authFn(r)
}
return s.token, s.promptID
return s.token, s.promptID, nil
}
func newPassthroughHandler() http.Handler {
@@ -344,11 +344,11 @@ func TestProtect_SchemeAuthRedirectsWithCookie(t *testing.T) {
scheme := &stubScheme{
method: auth.MethodPIN,
authFn: func(r *http.Request) (string, string) {
authFn: func(r *http.Request) (string, string, error) {
if r.FormValue("pin") == "111111" {
return token, ""
return token, "", nil
}
return "", "pin"
return "", "pin", nil
},
}
require.NoError(t, mw.AddDomain("example.com", []Scheme{scheme}, kp.PublicKey, time.Hour, "", ""))
@@ -391,8 +391,8 @@ func TestProtect_FailedAuthDoesNotSetCookie(t *testing.T) {
scheme := &stubScheme{
method: auth.MethodPIN,
authFn: func(_ *http.Request) (string, string) {
return "", "pin"
authFn: func(_ *http.Request) (string, string, error) {
return "", "pin", nil
},
}
require.NoError(t, mw.AddDomain("example.com", []Scheme{scheme}, kp.PublicKey, time.Hour, "", ""))
@@ -418,17 +418,17 @@ func TestProtect_MultipleSchemes(t *testing.T) {
// First scheme (PIN) always fails, second scheme (password) succeeds.
pinScheme := &stubScheme{
method: auth.MethodPIN,
authFn: func(_ *http.Request) (string, string) {
return "", "pin"
authFn: func(_ *http.Request) (string, string, error) {
return "", "pin", nil
},
}
passwordScheme := &stubScheme{
method: auth.MethodPassword,
authFn: func(r *http.Request) (string, string) {
authFn: func(r *http.Request) (string, string, error) {
if r.FormValue("password") == "secret" {
return token, ""
return token, "", nil
}
return "", "password"
return "", "password", nil
},
}
require.NoError(t, mw.AddDomain("example.com", []Scheme{pinScheme, passwordScheme}, kp.PublicKey, time.Hour, "", ""))
@@ -457,8 +457,8 @@ func TestProtect_InvalidTokenFromSchemeReturns400(t *testing.T) {
// Return a garbage token that won't validate.
scheme := &stubScheme{
method: auth.MethodPIN,
authFn: func(_ *http.Request) (string, string) {
return "invalid-jwt-token", ""
authFn: func(_ *http.Request) (string, string, error) {
return "invalid-jwt-token", "", nil
},
}
require.NoError(t, mw.AddDomain("example.com", []Scheme{scheme}, kp.PublicKey, time.Hour, "", ""))
@@ -517,8 +517,8 @@ func TestProtect_FailedPinAuthCapturesAuthMethod(t *testing.T) {
// Scheme that always fails authentication (returns empty token)
scheme := &stubScheme{
method: auth.MethodPIN,
authFn: func(_ *http.Request) (string, string) {
return "", "pin"
authFn: func(_ *http.Request) (string, string, error) {
return "", "pin", nil
},
}
require.NoError(t, mw.AddDomain("example.com", []Scheme{scheme}, kp.PublicKey, time.Hour, "", ""))
@@ -544,8 +544,8 @@ func TestProtect_FailedPasswordAuthCapturesAuthMethod(t *testing.T) {
scheme := &stubScheme{
method: auth.MethodPassword,
authFn: func(_ *http.Request) (string, string) {
return "", "password"
authFn: func(_ *http.Request) (string, string, error) {
return "", "password", nil
},
}
require.NoError(t, mw.AddDomain("example.com", []Scheme{scheme}, kp.PublicKey, time.Hour, "", ""))
@@ -571,8 +571,8 @@ func TestProtect_NoCredentialsDoesNotCaptureAuthMethod(t *testing.T) {
scheme := &stubScheme{
method: auth.MethodPIN,
authFn: func(_ *http.Request) (string, string) {
return "", "pin"
authFn: func(_ *http.Request) (string, string, error) {
return "", "pin", nil
},
}
require.NoError(t, mw.AddDomain("example.com", []Scheme{scheme}, kp.PublicKey, time.Hour, "", ""))

View File

@@ -2,6 +2,7 @@ package auth
import (
"context"
"fmt"
"net/http"
"net/url"
@@ -36,12 +37,13 @@ func (OIDC) Type() auth.Method {
return auth.MethodOIDC
}
func (o OIDC) Authenticate(r *http.Request) (string, string) {
// Authenticate checks for an OIDC session token or obtains the OIDC redirect URL.
func (o OIDC) Authenticate(r *http.Request) (string, string, error) {
// Check for the session_token query param (from OIDC redirects).
// The management server passes the token in the URL because it cannot set
// cookies for the proxy's domain (cookies are domain-scoped per RFC 6265).
if token := r.URL.Query().Get("session_token"); token != "" {
return token, ""
return token, "", nil
}
redirectURL := &url.URL{
@@ -56,9 +58,8 @@ func (o OIDC) Authenticate(r *http.Request) (string, string) {
RedirectUrl: redirectURL.String(),
})
if err != nil {
// TODO: log
return "", ""
return "", "", fmt.Errorf("get OIDC URL: %w", err)
}
return "", res.GetUrl()
return "", res.GetUrl(), nil
}

View File

@@ -1,6 +1,7 @@
package auth
import (
"fmt"
"net/http"
"github.com/netbirdio/netbird/proxy/auth"
@@ -31,12 +32,12 @@ func (Password) Type() auth.Method {
// If authentication fails, the required HTTP form ID is returned
// so that it can be injected into a request from the UI so that
// authentication may be successful.
func (p Password) Authenticate(r *http.Request) (string, string) {
func (p Password) Authenticate(r *http.Request) (string, string, error) {
password := r.FormValue(passwordFormId)
if password == "" {
// This cannot be authenticated, so not worth wasting time sending the request.
return "", passwordFormId
// No password submitted; return the form ID so the UI can prompt the user.
return "", passwordFormId, nil
}
res, err := p.client.Authenticate(r.Context(), &proto.AuthenticateRequest{
@@ -49,13 +50,12 @@ func (p Password) Authenticate(r *http.Request) (string, string) {
},
})
if err != nil {
// TODO: log error here
return "", passwordFormId
return "", "", fmt.Errorf("authenticate password: %w", err)
}
if res.GetSuccess() {
return res.GetSessionToken(), ""
return res.GetSessionToken(), "", nil
}
return "", passwordFormId
return "", passwordFormId, nil
}

View File

@@ -1,6 +1,7 @@
package auth
import (
"fmt"
"net/http"
"github.com/netbirdio/netbird/proxy/auth"
@@ -31,12 +32,12 @@ func (Pin) Type() auth.Method {
// If authentication fails, the required HTTP form ID is returned
// so that it can be injected into a request from the UI so that
// authentication may be successful.
func (p Pin) Authenticate(r *http.Request) (string, string) {
func (p Pin) Authenticate(r *http.Request) (string, string, error) {
pin := r.FormValue(pinFormId)
if pin == "" {
// This cannot be authenticated, so not worth wasting time sending the request.
return "", pinFormId
// No PIN submitted; return the form ID so the UI can prompt the user.
return "", pinFormId, nil
}
res, err := p.client.Authenticate(r.Context(), &proto.AuthenticateRequest{
@@ -49,13 +50,12 @@ func (p Pin) Authenticate(r *http.Request) (string, string) {
},
})
if err != nil {
// TODO: log error here
return "", pinFormId
return "", "", fmt.Errorf("authenticate pin: %w", err)
}
if res.GetSuccess() {
return res.GetSessionToken(), ""
return res.GetSessionToken(), "", nil
}
return "", pinFormId
return "", pinFormId, nil
}

View File

@@ -220,7 +220,7 @@ func (n *NetBird) AddPeer(ctx context.Context, accountID types.AccountID, d doma
if errors.Is(err, context.DeadlineExceeded) {
n.logger.WithFields(log.Fields{
"account_id": accountID,
}).Debug("netbird client start timed out, will retry on first request")
}).Warn("netbird client start timed out, will retry on first request")
} else {
n.logger.WithFields(log.Fields{
"account_id": accountID,
@@ -279,6 +279,7 @@ func (n *NetBird) RemovePeer(ctx context.Context, accountID types.AccountID, d d
entry, exists := n.clients[accountID]
if !exists {
n.clientsMux.Unlock()
n.logger.WithField("account_id", accountID).Debug("remove peer: account not found")
return nil
}
@@ -286,6 +287,10 @@ func (n *NetBird) RemovePeer(ctx context.Context, accountID types.AccountID, d d
domInfo, domainExists := entry.domains[d]
if !domainExists {
n.clientsMux.Unlock()
n.logger.WithFields(log.Fields{
"account_id": accountID,
"domain": d,
}).Debug("remove peer: domain not registered")
return nil
}

View File

@@ -23,7 +23,7 @@ func (m *mockMgmtClient) CreateProxyPeer(_ context.Context, _ *proto.CreateProxy
// mockNetBird creates a NetBird instance for testing without actually connecting.
// It uses an invalid management URL to prevent real connections.
func mockNetBird() *NetBird {
return NewNetBird("http://invalid.test:9999", "test-proxy", "localhost", 0, nil, nil, &mockMgmtClient{}, nil)
return NewNetBird("http://invalid.test:9999", "test-proxy", "invalid.test", 0, nil, nil, &mockMgmtClient{}, nil)
}
func TestNetBird_AddPeer_CreatesClientForNewAccount(t *testing.T) {

View File

@@ -502,7 +502,12 @@ func (s *Server) handleMappingStream(ctx context.Context, mappingClient proto.Pr
}).Error("Error adding new mapping, ignoring this mapping and continuing processing")
}
case proto.ProxyMappingUpdateType_UPDATE_TYPE_MODIFIED:
s.updateMapping(ctx, mapping)
if err := s.updateMapping(ctx, mapping); err != nil {
s.Logger.WithFields(log.Fields{
"service_id": mapping.GetId(),
"domain": mapping.GetDomain(),
}).Errorf("failed to update mapping: %v", err)
}
case proto.ProxyMappingUpdateType_UPDATE_TYPE_REMOVED:
s.removeMapping(ctx, mapping)
}
@@ -536,11 +541,14 @@ func (s *Server) addMapping(ctx context.Context, mapping *proto.ProxyMapping) er
// Pass the mapping through to the update function to avoid duplicating the
// setup, currently update is simply a subset of this function, so this
// separation makes sense...to me at least.
s.updateMapping(ctx, mapping)
if err := s.updateMapping(ctx, mapping); err != nil {
s.removeMapping(ctx, mapping)
return fmt.Errorf("update mapping for domain %q: %w", d, err)
}
return nil
}
func (s *Server) updateMapping(ctx context.Context, mapping *proto.ProxyMapping) {
func (s *Server) updateMapping(ctx context.Context, mapping *proto.ProxyMapping) error {
// Very simple implementation here, we don't touch the existing peer
// connection or any existing TLS configuration, we simply overwrite
// the auth and proxy mappings.
@@ -559,10 +567,10 @@ func (s *Server) updateMapping(ctx context.Context, mapping *proto.ProxyMapping)
maxSessionAge := time.Duration(mapping.GetAuth().GetMaxSessionAgeSeconds()) * time.Second
if err := s.auth.AddDomain(mapping.GetDomain(), schemes, mapping.GetAuth().GetSessionKey(), maxSessionAge, mapping.GetAccountId(), mapping.GetId()); err != nil {
s.Logger.WithField("domain", mapping.GetDomain()).WithError(err).Error("Auth setup failed, refusing to serve domain without authentication")
return
return fmt.Errorf("auth setup for domain %s: %w", mapping.GetDomain(), err)
}
s.proxy.AddMapping(s.protoToMapping(mapping))
return nil
}
func (s *Server) removeMapping(ctx context.Context, mapping *proto.ProxyMapping) {
@@ -594,8 +602,8 @@ func (s *Server) protoToMapping(mapping *proto.ProxyMapping) proxy.Mapping {
"domain": mapping.GetDomain(),
"path": pathMapping.GetPath(),
"target": pathMapping.GetTarget(),
"error": err,
}).Error("Error parsing target URL for path, this path will be ignored but other paths will still be configured")
}).WithError(err).Error("failed to parse target URL for path, skipping")
continue
}
paths[pathMapping.GetPath()] = targetURL
}