Files
netbird/proxy/internal/debug/handler.go
2026-02-12 15:48:35 +08:00

713 lines
19 KiB
Go

// Package debug provides HTTP debug endpoints for the proxy server.
package debug
import (
"cmp"
"context"
"embed"
"encoding/json"
"fmt"
"html/template"
"maps"
"net/http"
"slices"
"strconv"
"strings"
"sync"
"time"
log "github.com/sirupsen/logrus"
"google.golang.org/protobuf/encoding/protojson"
nbembed "github.com/netbirdio/netbird/client/embed"
nbstatus "github.com/netbirdio/netbird/client/status"
"github.com/netbirdio/netbird/proxy/internal/health"
"github.com/netbirdio/netbird/proxy/internal/roundtrip"
"github.com/netbirdio/netbird/proxy/internal/types"
"github.com/netbirdio/netbird/version"
)
//go:embed templates/*.html
var templateFS embed.FS
const defaultPingTimeout = 10 * time.Second
// formatDuration formats a duration with 2 decimal places using appropriate units.
func formatDuration(d time.Duration) string {
switch {
case d >= time.Hour:
return fmt.Sprintf("%.2fh", d.Hours())
case d >= time.Minute:
return fmt.Sprintf("%.2fm", d.Minutes())
case d >= time.Second:
return fmt.Sprintf("%.2fs", d.Seconds())
case d >= time.Millisecond:
return fmt.Sprintf("%.2fms", float64(d.Microseconds())/1000)
case d >= time.Microsecond:
return fmt.Sprintf("%.2fµs", float64(d.Nanoseconds())/1000)
default:
return fmt.Sprintf("%dns", d.Nanoseconds())
}
}
func sortedAccountIDs(m map[types.AccountID]roundtrip.ClientDebugInfo) []types.AccountID {
return slices.Sorted(maps.Keys(m))
}
// clientProvider provides access to NetBird clients.
type clientProvider interface {
GetClient(accountID types.AccountID) (*nbembed.Client, bool)
ListClientsForDebug() map[types.AccountID]roundtrip.ClientDebugInfo
}
// healthChecker provides health probe state.
type healthChecker interface {
ReadinessProbe() bool
StartupProbe(ctx context.Context) bool
CheckClientsConnected(ctx context.Context) (bool, map[types.AccountID]health.ClientHealth)
}
type certStatus interface {
TotalDomains() int
PendingDomains() []string
ReadyDomains() []string
FailedDomains() map[string]string
}
// Handler provides HTTP debug endpoints.
type Handler struct {
provider clientProvider
health healthChecker
certStatus certStatus
logger *log.Logger
startTime time.Time
templates *template.Template
templateMu sync.RWMutex
}
// NewHandler creates a new debug handler.
func NewHandler(provider clientProvider, healthChecker healthChecker, logger *log.Logger) *Handler {
if logger == nil {
logger = log.StandardLogger()
}
h := &Handler{
provider: provider,
health: healthChecker,
logger: logger,
startTime: time.Now(),
}
if err := h.loadTemplates(); err != nil {
logger.Errorf("failed to load embedded templates: %v", err)
}
return h
}
// SetCertStatus sets the certificate status provider for ACME prefetch observability.
func (h *Handler) SetCertStatus(cs certStatus) {
h.certStatus = cs
}
func (h *Handler) loadTemplates() error {
tmpl, err := template.ParseFS(templateFS, "templates/*.html")
if err != nil {
return fmt.Errorf("parse embedded templates: %w", err)
}
h.templateMu.Lock()
h.templates = tmpl
h.templateMu.Unlock()
return nil
}
func (h *Handler) getTemplates() *template.Template {
h.templateMu.RLock()
defer h.templateMu.RUnlock()
return h.templates
}
// ServeHTTP handles debug requests.
func (h *Handler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
path := r.URL.Path
wantJSON := r.URL.Query().Get("format") == "json" || strings.HasSuffix(path, "/json")
path = strings.TrimSuffix(path, "/json")
switch path {
case "/debug", "/debug/":
h.handleIndex(w, r, wantJSON)
case "/debug/clients":
h.handleListClients(w, r, wantJSON)
case "/debug/health":
h.handleHealth(w, r, wantJSON)
default:
if h.handleClientRoutes(w, r, path, wantJSON) {
return
}
http.NotFound(w, r)
}
}
func (h *Handler) handleClientRoutes(w http.ResponseWriter, r *http.Request, path string, wantJSON bool) bool {
if !strings.HasPrefix(path, "/debug/clients/") {
return false
}
rest := strings.TrimPrefix(path, "/debug/clients/")
parts := strings.SplitN(rest, "/", 2)
accountID := types.AccountID(parts[0])
if len(parts) == 1 {
h.handleClientStatus(w, r, accountID, wantJSON)
return true
}
switch parts[1] {
case "syncresponse":
h.handleClientSyncResponse(w, r, accountID, wantJSON)
case "tools":
h.handleClientTools(w, r, accountID)
case "pingtcp":
h.handlePingTCP(w, r, accountID)
case "loglevel":
h.handleLogLevel(w, r, accountID)
case "start":
h.handleClientStart(w, r, accountID)
case "stop":
h.handleClientStop(w, r, accountID)
default:
return false
}
return true
}
type failedDomain struct {
Domain string
Error string
}
type indexData struct {
Version string
Uptime string
ClientCount int
TotalDomains int
CertsTotal int
CertsReady int
CertsPending int
CertsFailed int
CertsPendingDomains []string
CertsReadyDomains []string
CertsFailedDomains []failedDomain
Clients []clientData
}
type clientData struct {
AccountID string
Domains string
Age string
Status string
}
func (h *Handler) handleIndex(w http.ResponseWriter, _ *http.Request, wantJSON bool) {
clients := h.provider.ListClientsForDebug()
sortedIDs := sortedAccountIDs(clients)
totalDomains := 0
for _, info := range clients {
totalDomains += info.DomainCount
}
var certsTotal, certsReady, certsPending, certsFailed int
var certsPendingDomains, certsReadyDomains []string
var certsFailedDomains map[string]string
if h.certStatus != nil {
certsTotal = h.certStatus.TotalDomains()
certsPendingDomains = h.certStatus.PendingDomains()
certsReadyDomains = h.certStatus.ReadyDomains()
certsFailedDomains = h.certStatus.FailedDomains()
certsReady = len(certsReadyDomains)
certsPending = len(certsPendingDomains)
certsFailed = len(certsFailedDomains)
}
if wantJSON {
clientsJSON := make([]map[string]interface{}, 0, len(clients))
for _, id := range sortedIDs {
info := clients[id]
clientsJSON = append(clientsJSON, map[string]interface{}{
"account_id": info.AccountID,
"domain_count": info.DomainCount,
"domains": info.Domains,
"has_client": info.HasClient,
"created_at": info.CreatedAt,
"age": time.Since(info.CreatedAt).Round(time.Second).String(),
})
}
resp := map[string]interface{}{
"version": version.NetbirdVersion(),
"uptime": time.Since(h.startTime).Round(time.Second).String(),
"client_count": len(clients),
"total_domains": totalDomains,
"certs_total": certsTotal,
"certs_ready": certsReady,
"certs_pending": certsPending,
"certs_failed": certsFailed,
"clients": clientsJSON,
}
if len(certsPendingDomains) > 0 {
resp["certs_pending_domains"] = certsPendingDomains
}
if len(certsReadyDomains) > 0 {
resp["certs_ready_domains"] = certsReadyDomains
}
if len(certsFailedDomains) > 0 {
resp["certs_failed_domains"] = certsFailedDomains
}
h.writeJSON(w, resp)
return
}
sortedFailed := make([]failedDomain, 0, len(certsFailedDomains))
for d, e := range certsFailedDomains {
sortedFailed = append(sortedFailed, failedDomain{Domain: d, Error: e})
}
slices.SortFunc(sortedFailed, func(a, b failedDomain) int {
return cmp.Compare(a.Domain, b.Domain)
})
data := indexData{
Version: version.NetbirdVersion(),
Uptime: time.Since(h.startTime).Round(time.Second).String(),
ClientCount: len(clients),
TotalDomains: totalDomains,
CertsTotal: certsTotal,
CertsReady: certsReady,
CertsPending: certsPending,
CertsFailed: certsFailed,
CertsPendingDomains: certsPendingDomains,
CertsReadyDomains: certsReadyDomains,
CertsFailedDomains: sortedFailed,
Clients: make([]clientData, 0, len(clients)),
}
for _, id := range sortedIDs {
info := clients[id]
domains := info.Domains.SafeString()
if domains == "" {
domains = "-"
}
status := "No client"
if info.HasClient {
status = "Active"
}
data.Clients = append(data.Clients, clientData{
AccountID: string(info.AccountID),
Domains: domains,
Age: time.Since(info.CreatedAt).Round(time.Second).String(),
Status: status,
})
}
h.renderTemplate(w, "index", data)
}
type clientsData struct {
Uptime string
Clients []clientData
}
func (h *Handler) handleListClients(w http.ResponseWriter, _ *http.Request, wantJSON bool) {
clients := h.provider.ListClientsForDebug()
sortedIDs := sortedAccountIDs(clients)
if wantJSON {
clientsJSON := make([]map[string]interface{}, 0, len(clients))
for _, id := range sortedIDs {
info := clients[id]
clientsJSON = append(clientsJSON, map[string]interface{}{
"account_id": info.AccountID,
"domain_count": info.DomainCount,
"domains": info.Domains,
"has_client": info.HasClient,
"created_at": info.CreatedAt,
"age": time.Since(info.CreatedAt).Round(time.Second).String(),
})
}
h.writeJSON(w, map[string]interface{}{
"uptime": time.Since(h.startTime).Round(time.Second).String(),
"client_count": len(clients),
"clients": clientsJSON,
})
return
}
data := clientsData{
Uptime: time.Since(h.startTime).Round(time.Second).String(),
Clients: make([]clientData, 0, len(clients)),
}
for _, id := range sortedIDs {
info := clients[id]
domains := info.Domains.SafeString()
if domains == "" {
domains = "-"
}
status := "No client"
if info.HasClient {
status = "Active"
}
data.Clients = append(data.Clients, clientData{
AccountID: string(info.AccountID),
Domains: domains,
Age: time.Since(info.CreatedAt).Round(time.Second).String(),
Status: status,
})
}
h.renderTemplate(w, "clients", data)
}
type clientDetailData struct {
AccountID string
ActiveTab string
Content string
}
func (h *Handler) handleClientStatus(w http.ResponseWriter, r *http.Request, accountID types.AccountID, wantJSON bool) {
client, ok := h.provider.GetClient(accountID)
if !ok {
http.Error(w, "Client not found: "+string(accountID), http.StatusNotFound)
return
}
fullStatus, err := client.Status()
if err != nil {
http.Error(w, "Error getting status: "+err.Error(), http.StatusInternalServerError)
return
}
// Parse filter parameters
query := r.URL.Query()
statusFilter := query.Get("filter-by-status")
connectionTypeFilter := query.Get("filter-by-connection-type")
var prefixNamesFilter []string
var prefixNamesFilterMap map[string]struct{}
if names := query.Get("filter-by-names"); names != "" {
prefixNamesFilter = strings.Split(names, ",")
prefixNamesFilterMap = make(map[string]struct{})
for _, name := range prefixNamesFilter {
prefixNamesFilterMap[strings.ToLower(strings.TrimSpace(name))] = struct{}{}
}
}
var ipsFilterMap map[string]struct{}
if ips := query.Get("filter-by-ips"); ips != "" {
ipsFilterMap = make(map[string]struct{})
for _, ip := range strings.Split(ips, ",") {
ipsFilterMap[strings.TrimSpace(ip)] = struct{}{}
}
}
pbStatus := nbstatus.ToProtoFullStatus(fullStatus)
overview := nbstatus.ConvertToStatusOutputOverview(
pbStatus,
false,
version.NetbirdVersion(),
statusFilter,
prefixNamesFilter,
prefixNamesFilterMap,
ipsFilterMap,
connectionTypeFilter,
"",
)
if wantJSON {
h.writeJSON(w, map[string]interface{}{
"account_id": accountID,
"status": overview.FullDetailSummary(),
})
return
}
data := clientDetailData{
AccountID: string(accountID),
ActiveTab: "status",
Content: overview.FullDetailSummary(),
}
h.renderTemplate(w, "clientDetail", data)
}
func (h *Handler) handleClientSyncResponse(w http.ResponseWriter, _ *http.Request, accountID types.AccountID, wantJSON bool) {
client, ok := h.provider.GetClient(accountID)
if !ok {
http.Error(w, "Client not found: "+string(accountID), http.StatusNotFound)
return
}
syncResp, err := client.GetLatestSyncResponse()
if err != nil {
http.Error(w, "Error getting sync response: "+err.Error(), http.StatusInternalServerError)
return
}
if syncResp == nil {
http.Error(w, "No sync response available for client: "+string(accountID), http.StatusNotFound)
return
}
opts := protojson.MarshalOptions{
EmitUnpopulated: true,
UseProtoNames: true,
Indent: " ",
AllowPartial: true,
}
jsonBytes, err := opts.Marshal(syncResp)
if err != nil {
http.Error(w, "Error marshaling sync response: "+err.Error(), http.StatusInternalServerError)
return
}
if wantJSON {
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write(jsonBytes)
return
}
data := clientDetailData{
AccountID: string(accountID),
ActiveTab: "syncresponse",
Content: string(jsonBytes),
}
h.renderTemplate(w, "clientDetail", data)
}
type toolsData struct {
AccountID string
}
func (h *Handler) handleClientTools(w http.ResponseWriter, _ *http.Request, accountID types.AccountID) {
_, ok := h.provider.GetClient(accountID)
if !ok {
http.Error(w, "Client not found: "+string(accountID), http.StatusNotFound)
return
}
data := toolsData{
AccountID: string(accountID),
}
h.renderTemplate(w, "tools", data)
}
func (h *Handler) handlePingTCP(w http.ResponseWriter, r *http.Request, accountID types.AccountID) {
client, ok := h.provider.GetClient(accountID)
if !ok {
h.writeJSON(w, map[string]interface{}{"error": "client not found"})
return
}
host := r.URL.Query().Get("host")
portStr := r.URL.Query().Get("port")
if host == "" || portStr == "" {
h.writeJSON(w, map[string]interface{}{"error": "host and port parameters required"})
return
}
port, err := strconv.Atoi(portStr)
if err != nil || port < 1 || port > 65535 {
h.writeJSON(w, map[string]interface{}{"error": "invalid port"})
return
}
timeout := defaultPingTimeout
if t := r.URL.Query().Get("timeout"); t != "" {
if d, err := time.ParseDuration(t); err == nil {
timeout = d
}
}
ctx, cancel := context.WithTimeout(r.Context(), timeout)
defer cancel()
address := fmt.Sprintf("%s:%d", host, port)
start := time.Now()
conn, err := client.Dial(ctx, "tcp", address)
if err != nil {
h.writeJSON(w, map[string]interface{}{
"success": false,
"host": host,
"port": port,
"error": err.Error(),
})
return
}
if err := conn.Close(); err != nil {
h.logger.Debugf("close tcp ping connection: %v", err)
}
latency := time.Since(start)
h.writeJSON(w, map[string]interface{}{
"success": true,
"host": host,
"port": port,
"latency_ms": latency.Milliseconds(),
"latency": formatDuration(latency),
})
}
func (h *Handler) handleLogLevel(w http.ResponseWriter, r *http.Request, accountID types.AccountID) {
client, ok := h.provider.GetClient(accountID)
if !ok {
h.writeJSON(w, map[string]interface{}{"error": "client not found"})
return
}
level := r.URL.Query().Get("level")
if level == "" {
h.writeJSON(w, map[string]interface{}{"error": "level parameter required (trace, debug, info, warn, error)"})
return
}
if err := client.SetLogLevel(level); err != nil {
h.writeJSON(w, map[string]interface{}{
"success": false,
"error": err.Error(),
})
return
}
h.writeJSON(w, map[string]interface{}{
"success": true,
"level": level,
})
}
const clientActionTimeout = 30 * time.Second
func (h *Handler) handleClientStart(w http.ResponseWriter, r *http.Request, accountID types.AccountID) {
client, ok := h.provider.GetClient(accountID)
if !ok {
h.writeJSON(w, map[string]interface{}{"error": "client not found"})
return
}
ctx, cancel := context.WithTimeout(r.Context(), clientActionTimeout)
defer cancel()
if err := client.Start(ctx); err != nil {
h.writeJSON(w, map[string]interface{}{
"success": false,
"error": err.Error(),
})
return
}
h.writeJSON(w, map[string]interface{}{
"success": true,
"message": "client started",
})
}
func (h *Handler) handleClientStop(w http.ResponseWriter, r *http.Request, accountID types.AccountID) {
client, ok := h.provider.GetClient(accountID)
if !ok {
h.writeJSON(w, map[string]interface{}{"error": "client not found"})
return
}
ctx, cancel := context.WithTimeout(r.Context(), clientActionTimeout)
defer cancel()
if err := client.Stop(ctx); err != nil {
h.writeJSON(w, map[string]interface{}{
"success": false,
"error": err.Error(),
})
return
}
h.writeJSON(w, map[string]interface{}{
"success": true,
"message": "client stopped",
})
}
func (h *Handler) handleHealth(w http.ResponseWriter, r *http.Request, wantJSON bool) {
if !wantJSON {
http.Redirect(w, r, "/debug", http.StatusSeeOther)
return
}
uptime := time.Since(h.startTime).Round(10 * time.Millisecond).String()
ready := h.health.ReadinessProbe()
allHealthy, clientHealth := h.health.CheckClientsConnected(r.Context())
status := "ok"
// No clients is not a health issue; only degrade when actual clients are unhealthy
if !ready || (!allHealthy && len(clientHealth) > 0) {
status = "degraded"
}
var certsTotal, certsReady, certsPending, certsFailed int
var certsPendingDomains, certsReadyDomains []string
var certsFailedDomains map[string]string
if h.certStatus != nil {
certsTotal = h.certStatus.TotalDomains()
certsPendingDomains = h.certStatus.PendingDomains()
certsReadyDomains = h.certStatus.ReadyDomains()
certsFailedDomains = h.certStatus.FailedDomains()
certsReady = len(certsReadyDomains)
certsPending = len(certsPendingDomains)
certsFailed = len(certsFailedDomains)
}
resp := map[string]any{
"status": status,
"uptime": uptime,
"management_connected": ready,
"all_clients_healthy": allHealthy,
"certs_total": certsTotal,
"certs_ready": certsReady,
"certs_pending": certsPending,
"certs_failed": certsFailed,
"clients": clientHealth,
}
if len(certsPendingDomains) > 0 {
resp["certs_pending_domains"] = certsPendingDomains
}
if len(certsReadyDomains) > 0 {
resp["certs_ready_domains"] = certsReadyDomains
}
if len(certsFailedDomains) > 0 {
resp["certs_failed_domains"] = certsFailedDomains
}
h.writeJSON(w, resp)
}
func (h *Handler) renderTemplate(w http.ResponseWriter, name string, data interface{}) {
w.Header().Set("Content-Type", "text/html; charset=utf-8")
tmpl := h.getTemplates()
if tmpl == nil {
http.Error(w, "Templates not loaded", http.StatusInternalServerError)
return
}
if err := tmpl.ExecuteTemplate(w, name, data); err != nil {
h.logger.Errorf("execute template %s: %v", name, err)
http.Error(w, "Template error", http.StatusInternalServerError)
}
}
func (h *Handler) writeJSON(w http.ResponseWriter, v interface{}) {
w.Header().Set("Content-Type", "application/json")
enc := json.NewEncoder(w)
enc.SetIndent("", " ")
if err := enc.Encode(v); err != nil {
h.logger.Errorf("encode JSON response: %v", err)
}
}