mirror of
https://github.com/netbirdio/netbird.git
synced 2026-04-16 07:16:38 +00:00
379 lines
10 KiB
Go
379 lines
10 KiB
Go
package accesslog
|
|
|
|
import (
|
|
"context"
|
|
"net/netip"
|
|
"sync"
|
|
"sync/atomic"
|
|
"time"
|
|
|
|
"github.com/rs/xid"
|
|
log "github.com/sirupsen/logrus"
|
|
"google.golang.org/grpc"
|
|
"google.golang.org/protobuf/types/known/timestamppb"
|
|
|
|
"github.com/netbirdio/netbird/proxy/auth"
|
|
"github.com/netbirdio/netbird/proxy/internal/types"
|
|
"github.com/netbirdio/netbird/shared/management/proto"
|
|
)
|
|
|
|
const (
|
|
requestThreshold = 10000 // Log every 10k requests
|
|
bytesThreshold = 1024 * 1024 * 1024 // Log every 1GB
|
|
usageCleanupPeriod = 1 * time.Hour // Clean up stale counters every hour
|
|
usageInactiveWindow = 24 * time.Hour // Consider domain inactive if no traffic for 24 hours
|
|
logSendTimeout = 10 * time.Second
|
|
|
|
// denyCooldown is the min interval between deny log entries per service+reason
|
|
// to prevent flooding from denied connections (e.g. UDP packets from blocked IPs).
|
|
denyCooldown = 10 * time.Second
|
|
|
|
// maxDenyBuckets caps tracked deny rate-limit entries to bound memory under DDoS.
|
|
maxDenyBuckets = 10000
|
|
|
|
// maxLogWorkers caps concurrent gRPC send goroutines.
|
|
maxLogWorkers = 4096
|
|
)
|
|
|
|
type domainUsage struct {
|
|
requestCount int64
|
|
requestStartTime time.Time
|
|
|
|
bytesTransferred int64
|
|
bytesStartTime time.Time
|
|
|
|
lastActivity time.Time // Track last activity for cleanup
|
|
}
|
|
|
|
type gRPCClient interface {
|
|
SendAccessLog(ctx context.Context, in *proto.SendAccessLogRequest, opts ...grpc.CallOption) (*proto.SendAccessLogResponse, error)
|
|
}
|
|
|
|
// denyBucketKey identifies a rate-limited deny log stream.
|
|
type denyBucketKey struct {
|
|
ServiceID types.ServiceID
|
|
Reason string
|
|
}
|
|
|
|
// denyBucket tracks rate-limited deny log entries.
|
|
type denyBucket struct {
|
|
lastLogged time.Time
|
|
suppressed int64
|
|
}
|
|
|
|
// Logger sends access log entries to the management server via gRPC.
|
|
type Logger struct {
|
|
client gRPCClient
|
|
logger *log.Logger
|
|
trustedProxies []netip.Prefix
|
|
|
|
usageMux sync.Mutex
|
|
domainUsage map[string]*domainUsage
|
|
|
|
denyMu sync.Mutex
|
|
denyBuckets map[denyBucketKey]*denyBucket
|
|
|
|
logSem chan struct{}
|
|
cleanupCancel context.CancelFunc
|
|
dropped atomic.Int64
|
|
}
|
|
|
|
// NewLogger creates a new access log Logger. The trustedProxies parameter
|
|
// configures which upstream proxy IP ranges are trusted for extracting
|
|
// the real client IP from X-Forwarded-For headers.
|
|
func NewLogger(client gRPCClient, logger *log.Logger, trustedProxies []netip.Prefix) *Logger {
|
|
if logger == nil {
|
|
logger = log.StandardLogger()
|
|
}
|
|
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
l := &Logger{
|
|
client: client,
|
|
logger: logger,
|
|
trustedProxies: trustedProxies,
|
|
domainUsage: make(map[string]*domainUsage),
|
|
denyBuckets: make(map[denyBucketKey]*denyBucket),
|
|
logSem: make(chan struct{}, maxLogWorkers),
|
|
cleanupCancel: cancel,
|
|
}
|
|
|
|
// Start background cleanup routine
|
|
go l.cleanupStaleUsage(ctx)
|
|
|
|
return l
|
|
}
|
|
|
|
// Close stops the cleanup routine. Should be called during graceful shutdown.
|
|
func (l *Logger) Close() {
|
|
if l.cleanupCancel != nil {
|
|
l.cleanupCancel()
|
|
}
|
|
}
|
|
|
|
type logEntry struct {
|
|
ID string
|
|
AccountID types.AccountID
|
|
ServiceID types.ServiceID
|
|
Host string
|
|
Path string
|
|
DurationMs int64
|
|
Method string
|
|
ResponseCode int32
|
|
SourceIP netip.Addr
|
|
AuthMechanism string
|
|
UserID string
|
|
AuthSuccess bool
|
|
BytesUpload int64
|
|
BytesDownload int64
|
|
Protocol Protocol
|
|
}
|
|
|
|
// Protocol identifies the transport protocol of an access log entry.
|
|
type Protocol string
|
|
|
|
const (
|
|
ProtocolHTTP Protocol = "http"
|
|
ProtocolTCP Protocol = "tcp"
|
|
ProtocolUDP Protocol = "udp"
|
|
ProtocolTLS Protocol = "tls"
|
|
)
|
|
|
|
// L4Entry holds the data for a layer-4 (TCP/UDP) access log entry.
|
|
type L4Entry struct {
|
|
AccountID types.AccountID
|
|
ServiceID types.ServiceID
|
|
Protocol Protocol
|
|
Host string // SNI hostname or listen address
|
|
SourceIP netip.Addr
|
|
DurationMs int64
|
|
BytesUpload int64
|
|
BytesDownload int64
|
|
// DenyReason, when non-empty, indicates the connection was denied.
|
|
// Values match the HTTP auth mechanism strings: "ip_restricted",
|
|
// "country_restricted", "geo_unavailable".
|
|
DenyReason string
|
|
}
|
|
|
|
// LogL4 sends an access log entry for a layer-4 connection (TCP or UDP).
|
|
// The call is non-blocking: the gRPC send happens in a background goroutine.
|
|
func (l *Logger) LogL4(entry L4Entry) {
|
|
le := logEntry{
|
|
ID: xid.New().String(),
|
|
AccountID: entry.AccountID,
|
|
ServiceID: entry.ServiceID,
|
|
Protocol: entry.Protocol,
|
|
Host: entry.Host,
|
|
SourceIP: entry.SourceIP,
|
|
DurationMs: entry.DurationMs,
|
|
BytesUpload: entry.BytesUpload,
|
|
BytesDownload: entry.BytesDownload,
|
|
}
|
|
if entry.DenyReason != "" {
|
|
if !l.allowDenyLog(entry.ServiceID, entry.DenyReason) {
|
|
return
|
|
}
|
|
le.AuthMechanism = entry.DenyReason
|
|
le.AuthSuccess = false
|
|
}
|
|
l.log(le)
|
|
l.trackUsage(entry.Host, entry.BytesUpload+entry.BytesDownload)
|
|
}
|
|
|
|
// allowDenyLog rate-limits deny log entries per service+reason combination.
|
|
func (l *Logger) allowDenyLog(serviceID types.ServiceID, reason string) bool {
|
|
key := denyBucketKey{ServiceID: serviceID, Reason: reason}
|
|
now := time.Now()
|
|
|
|
l.denyMu.Lock()
|
|
defer l.denyMu.Unlock()
|
|
|
|
b, ok := l.denyBuckets[key]
|
|
if !ok {
|
|
if len(l.denyBuckets) >= maxDenyBuckets {
|
|
return false
|
|
}
|
|
l.denyBuckets[key] = &denyBucket{lastLogged: now}
|
|
return true
|
|
}
|
|
|
|
if now.Sub(b.lastLogged) >= denyCooldown {
|
|
if b.suppressed > 0 {
|
|
l.logger.Debugf("access restriction: suppressed %d deny log entries for %s (%s)", b.suppressed, serviceID, reason)
|
|
}
|
|
b.lastLogged = now
|
|
b.suppressed = 0
|
|
return true
|
|
}
|
|
|
|
b.suppressed++
|
|
return false
|
|
}
|
|
|
|
func (l *Logger) log(entry logEntry) {
|
|
// Fire off the log request in a separate routine.
|
|
// This increases the possibility of losing a log message
|
|
// (although it should still get logged in the event of an error),
|
|
// but it will reduce latency returning the request in the
|
|
// middleware.
|
|
// There is also a chance that log messages will arrive at
|
|
// the server out of order; however, the timestamp should
|
|
// allow for resolving that on the server.
|
|
now := timestamppb.Now()
|
|
select {
|
|
case l.logSem <- struct{}{}:
|
|
default:
|
|
total := l.dropped.Add(1)
|
|
l.logger.Debugf("access log send dropped: worker limit reached (total dropped: %d)", total)
|
|
return
|
|
}
|
|
go func() {
|
|
defer func() { <-l.logSem }()
|
|
logCtx, cancel := context.WithTimeout(context.Background(), logSendTimeout)
|
|
defer cancel()
|
|
// Only OIDC sessions have a meaningful user identity.
|
|
if entry.AuthMechanism != auth.MethodOIDC.String() {
|
|
entry.UserID = ""
|
|
}
|
|
|
|
var sourceIP string
|
|
if entry.SourceIP.IsValid() {
|
|
sourceIP = entry.SourceIP.String()
|
|
}
|
|
|
|
if _, err := l.client.SendAccessLog(logCtx, &proto.SendAccessLogRequest{
|
|
Log: &proto.AccessLog{
|
|
LogId: entry.ID,
|
|
AccountId: string(entry.AccountID),
|
|
Timestamp: now,
|
|
ServiceId: string(entry.ServiceID),
|
|
Host: entry.Host,
|
|
Path: entry.Path,
|
|
DurationMs: entry.DurationMs,
|
|
Method: entry.Method,
|
|
ResponseCode: entry.ResponseCode,
|
|
SourceIp: sourceIP,
|
|
AuthMechanism: entry.AuthMechanism,
|
|
UserId: entry.UserID,
|
|
AuthSuccess: entry.AuthSuccess,
|
|
BytesUpload: entry.BytesUpload,
|
|
BytesDownload: entry.BytesDownload,
|
|
Protocol: string(entry.Protocol),
|
|
},
|
|
}); err != nil {
|
|
l.logger.WithFields(log.Fields{
|
|
"service_id": entry.ServiceID,
|
|
"host": entry.Host,
|
|
"path": entry.Path,
|
|
"duration": entry.DurationMs,
|
|
"method": entry.Method,
|
|
"response_code": entry.ResponseCode,
|
|
"source_ip": sourceIP,
|
|
"auth_mechanism": entry.AuthMechanism,
|
|
"user_id": entry.UserID,
|
|
"auth_success": entry.AuthSuccess,
|
|
"error": err,
|
|
}).Error("Error sending access log on gRPC connection")
|
|
}
|
|
}()
|
|
}
|
|
|
|
// trackUsage records request and byte counts per domain, logging when thresholds are hit.
|
|
func (l *Logger) trackUsage(domain string, bytesTransferred int64) {
|
|
if domain == "" {
|
|
return
|
|
}
|
|
|
|
l.usageMux.Lock()
|
|
defer l.usageMux.Unlock()
|
|
|
|
now := time.Now()
|
|
usage, exists := l.domainUsage[domain]
|
|
if !exists {
|
|
usage = &domainUsage{
|
|
requestStartTime: now,
|
|
bytesStartTime: now,
|
|
lastActivity: now,
|
|
}
|
|
l.domainUsage[domain] = usage
|
|
}
|
|
|
|
usage.lastActivity = now
|
|
|
|
usage.requestCount++
|
|
if usage.requestCount >= requestThreshold {
|
|
elapsed := time.Since(usage.requestStartTime)
|
|
l.logger.WithFields(log.Fields{
|
|
"domain": domain,
|
|
"requests": usage.requestCount,
|
|
"duration": elapsed.String(),
|
|
}).Infof("domain %s had %d requests over %s", domain, usage.requestCount, elapsed)
|
|
|
|
usage.requestCount = 0
|
|
usage.requestStartTime = now
|
|
}
|
|
|
|
usage.bytesTransferred += bytesTransferred
|
|
if usage.bytesTransferred >= bytesThreshold {
|
|
elapsed := time.Since(usage.bytesStartTime)
|
|
bytesInGB := float64(usage.bytesTransferred) / (1024 * 1024 * 1024)
|
|
l.logger.WithFields(log.Fields{
|
|
"domain": domain,
|
|
"bytes": usage.bytesTransferred,
|
|
"bytes_gb": bytesInGB,
|
|
"duration": elapsed.String(),
|
|
}).Infof("domain %s transferred %.2f GB over %s", domain, bytesInGB, elapsed)
|
|
|
|
usage.bytesTransferred = 0
|
|
usage.bytesStartTime = now
|
|
}
|
|
}
|
|
|
|
// cleanupStaleUsage removes usage and deny-rate-limit entries that have been inactive.
|
|
func (l *Logger) cleanupStaleUsage(ctx context.Context) {
|
|
ticker := time.NewTicker(usageCleanupPeriod)
|
|
defer ticker.Stop()
|
|
|
|
for {
|
|
select {
|
|
case <-ctx.Done():
|
|
return
|
|
case <-ticker.C:
|
|
now := time.Now()
|
|
l.cleanupDomainUsage(now)
|
|
l.cleanupDenyBuckets(now)
|
|
}
|
|
}
|
|
}
|
|
|
|
func (l *Logger) cleanupDomainUsage(now time.Time) {
|
|
l.usageMux.Lock()
|
|
defer l.usageMux.Unlock()
|
|
|
|
removed := 0
|
|
for domain, usage := range l.domainUsage {
|
|
if now.Sub(usage.lastActivity) > usageInactiveWindow {
|
|
delete(l.domainUsage, domain)
|
|
removed++
|
|
}
|
|
}
|
|
if removed > 0 {
|
|
l.logger.Debugf("cleaned up %d stale domain usage entries", removed)
|
|
}
|
|
}
|
|
|
|
func (l *Logger) cleanupDenyBuckets(now time.Time) {
|
|
l.denyMu.Lock()
|
|
defer l.denyMu.Unlock()
|
|
|
|
removed := 0
|
|
for key, bucket := range l.denyBuckets {
|
|
if now.Sub(bucket.lastLogged) > usageInactiveWindow {
|
|
delete(l.denyBuckets, key)
|
|
removed++
|
|
}
|
|
}
|
|
if removed > 0 {
|
|
l.logger.Debugf("cleaned up %d stale deny rate-limit entries", removed)
|
|
}
|
|
}
|