mirror of
https://github.com/netbirdio/netbird.git
synced 2026-04-16 15:26:40 +00:00
Add cert health info to checks
This commit is contained in:
@@ -8,12 +8,15 @@ import (
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"net"
|
||||
"slices"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
"golang.org/x/crypto/acme"
|
||||
"golang.org/x/crypto/acme/autocert"
|
||||
|
||||
"github.com/netbirdio/netbird/shared/management/domain"
|
||||
)
|
||||
|
||||
// OID for the SCT list extension (1.3.6.1.4.1.11129.2.4.2)
|
||||
@@ -23,19 +26,31 @@ type certificateNotifier interface {
|
||||
NotifyCertificateIssued(ctx context.Context, accountID, reverseProxyID, domain string) error
|
||||
}
|
||||
|
||||
type domainState int
|
||||
|
||||
const (
|
||||
domainPending domainState = iota
|
||||
domainReady
|
||||
domainFailed
|
||||
)
|
||||
|
||||
type domainInfo struct {
|
||||
accountID string
|
||||
reverseProxyID string
|
||||
state domainState
|
||||
err string
|
||||
}
|
||||
|
||||
// Manager wraps autocert.Manager with domain tracking and cross-replica
|
||||
// coordination via a pluggable locking strategy. The locker prevents
|
||||
// duplicate ACME requests when multiple replicas share a certificate cache.
|
||||
type Manager struct {
|
||||
*autocert.Manager
|
||||
|
||||
certDir string
|
||||
locker certLocker
|
||||
domainsMux sync.RWMutex
|
||||
domains map[string]struct {
|
||||
accountID string
|
||||
reverseProxyID string
|
||||
}
|
||||
certDir string
|
||||
locker certLocker
|
||||
mu sync.RWMutex
|
||||
domains map[domain.Domain]*domainInfo
|
||||
|
||||
certNotifier certificateNotifier
|
||||
logger *log.Logger
|
||||
@@ -49,12 +64,9 @@ func NewManager(certDir, acmeURL string, notifier certificateNotifier, logger *l
|
||||
logger = log.StandardLogger()
|
||||
}
|
||||
mgr := &Manager{
|
||||
certDir: certDir,
|
||||
locker: newCertLocker(lockMethod, certDir, logger),
|
||||
domains: make(map[string]struct {
|
||||
accountID string
|
||||
reverseProxyID string
|
||||
}),
|
||||
certDir: certDir,
|
||||
locker: newCertLocker(lockMethod, certDir, logger),
|
||||
domains: make(map[domain.Domain]*domainInfo),
|
||||
certNotifier: notifier,
|
||||
logger: logger,
|
||||
}
|
||||
@@ -73,49 +85,50 @@ func (mgr *Manager) hostPolicy(_ context.Context, host string) error {
|
||||
if h, _, err := net.SplitHostPort(host); err == nil {
|
||||
host = h
|
||||
}
|
||||
mgr.domainsMux.RLock()
|
||||
_, exists := mgr.domains[host]
|
||||
mgr.domainsMux.RUnlock()
|
||||
mgr.mu.RLock()
|
||||
_, exists := mgr.domains[domain.Domain(host)]
|
||||
mgr.mu.RUnlock()
|
||||
if !exists {
|
||||
return fmt.Errorf("unknown domain %q", host)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (mgr *Manager) AddDomain(domain, accountID, reverseProxyID string) {
|
||||
mgr.domainsMux.Lock()
|
||||
mgr.domains[domain] = struct {
|
||||
accountID string
|
||||
reverseProxyID string
|
||||
}{
|
||||
// AddDomain registers a domain for ACME certificate prefetching.
|
||||
func (mgr *Manager) AddDomain(d domain.Domain, accountID, reverseProxyID string) {
|
||||
mgr.mu.Lock()
|
||||
mgr.domains[d] = &domainInfo{
|
||||
accountID: accountID,
|
||||
reverseProxyID: reverseProxyID,
|
||||
state: domainPending,
|
||||
}
|
||||
mgr.domainsMux.Unlock()
|
||||
mgr.mu.Unlock()
|
||||
|
||||
go mgr.prefetchCertificate(domain)
|
||||
go mgr.prefetchCertificate(d)
|
||||
}
|
||||
|
||||
// prefetchCertificate proactively triggers certificate generation for a domain.
|
||||
// It acquires a distributed lock to prevent multiple replicas from issuing
|
||||
// duplicate ACME requests. The second replica will block until the first
|
||||
// finishes, then find the certificate in the cache.
|
||||
func (mgr *Manager) prefetchCertificate(domain string) {
|
||||
func (mgr *Manager) prefetchCertificate(d domain.Domain) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
|
||||
defer cancel()
|
||||
|
||||
mgr.logger.Infof("acquiring cert lock for domain %q", domain)
|
||||
name := d.PunycodeString()
|
||||
|
||||
mgr.logger.Infof("acquiring cert lock for domain %q", name)
|
||||
lockStart := time.Now()
|
||||
unlock, err := mgr.locker.Lock(ctx, domain)
|
||||
unlock, err := mgr.locker.Lock(ctx, name)
|
||||
if err != nil {
|
||||
mgr.logger.Warnf("acquire cert lock for domain %q, proceeding without lock: %v", domain, err)
|
||||
mgr.logger.Warnf("acquire cert lock for domain %q, proceeding without lock: %v", name, err)
|
||||
} else {
|
||||
mgr.logger.Infof("acquired cert lock for domain %q in %s", domain, time.Since(lockStart))
|
||||
mgr.logger.Infof("acquired cert lock for domain %q in %s", name, time.Since(lockStart))
|
||||
defer unlock()
|
||||
}
|
||||
|
||||
hello := &tls.ClientHelloInfo{
|
||||
ServerName: domain,
|
||||
ServerName: name,
|
||||
Conn: &dummyConn{ctx: ctx},
|
||||
}
|
||||
|
||||
@@ -123,35 +136,47 @@ func (mgr *Manager) prefetchCertificate(domain string) {
|
||||
cert, err := mgr.GetCertificate(hello)
|
||||
elapsed := time.Since(start)
|
||||
if err != nil {
|
||||
mgr.logger.Warnf("prefetch certificate for domain %q: %v", domain, err)
|
||||
mgr.logger.Warnf("prefetch certificate for domain %q: %v", name, err)
|
||||
mgr.setDomainState(d, domainFailed, err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
mgr.setDomainState(d, domainReady, "")
|
||||
|
||||
now := time.Now()
|
||||
if cert != nil && cert.Leaf != nil {
|
||||
leaf := cert.Leaf
|
||||
mgr.logger.Infof("certificate for domain %q ready in %s: serial=%s SANs=%v notAfter=%s",
|
||||
domain, elapsed.Round(time.Millisecond),
|
||||
name, elapsed.Round(time.Millisecond),
|
||||
leaf.SerialNumber.Text(16),
|
||||
leaf.DNSNames,
|
||||
leaf.NotAfter.UTC().Format(time.RFC3339),
|
||||
)
|
||||
mgr.logCertificateDetails(domain, leaf, now)
|
||||
mgr.logCertificateDetails(name, leaf, now)
|
||||
} else {
|
||||
mgr.logger.Infof("certificate for domain %q ready in %s", domain, elapsed.Round(time.Millisecond))
|
||||
mgr.logger.Infof("certificate for domain %q ready in %s", name, elapsed.Round(time.Millisecond))
|
||||
}
|
||||
|
||||
mgr.domainsMux.RLock()
|
||||
info, exists := mgr.domains[domain]
|
||||
mgr.domainsMux.RUnlock()
|
||||
mgr.mu.RLock()
|
||||
info := mgr.domains[d]
|
||||
mgr.mu.RUnlock()
|
||||
|
||||
if exists && mgr.certNotifier != nil {
|
||||
if err := mgr.certNotifier.NotifyCertificateIssued(ctx, info.accountID, info.reverseProxyID, domain); err != nil {
|
||||
mgr.logger.Warnf("notify certificate ready for domain %q: %v", domain, err)
|
||||
if info != nil && mgr.certNotifier != nil {
|
||||
if err := mgr.certNotifier.NotifyCertificateIssued(ctx, info.accountID, info.reverseProxyID, name); err != nil {
|
||||
mgr.logger.Warnf("notify certificate ready for domain %q: %v", name, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (mgr *Manager) setDomainState(d domain.Domain, state domainState, errMsg string) {
|
||||
mgr.mu.Lock()
|
||||
defer mgr.mu.Unlock()
|
||||
if info, ok := mgr.domains[d]; ok {
|
||||
info.state = state
|
||||
info.err = errMsg
|
||||
}
|
||||
}
|
||||
|
||||
// logCertificateDetails logs certificate validity and SCT timestamps.
|
||||
func (mgr *Manager) logCertificateDetails(domain string, cert *x509.Certificate, now time.Time) {
|
||||
if cert.NotBefore.After(now) {
|
||||
@@ -245,8 +270,65 @@ func (c *dummyConn) SetDeadline(t time.Time) error { return nil }
|
||||
func (c *dummyConn) SetReadDeadline(t time.Time) error { return nil }
|
||||
func (c *dummyConn) SetWriteDeadline(t time.Time) error { return nil }
|
||||
|
||||
func (mgr *Manager) RemoveDomain(domain string) {
|
||||
mgr.domainsMux.Lock()
|
||||
defer mgr.domainsMux.Unlock()
|
||||
delete(mgr.domains, domain)
|
||||
// RemoveDomain removes a domain from tracking.
|
||||
func (mgr *Manager) RemoveDomain(d domain.Domain) {
|
||||
mgr.mu.Lock()
|
||||
defer mgr.mu.Unlock()
|
||||
delete(mgr.domains, d)
|
||||
}
|
||||
|
||||
// PendingCerts returns the number of certificates currently being prefetched.
|
||||
func (mgr *Manager) PendingCerts() int {
|
||||
mgr.mu.RLock()
|
||||
defer mgr.mu.RUnlock()
|
||||
var n int
|
||||
for _, info := range mgr.domains {
|
||||
if info.state == domainPending {
|
||||
n++
|
||||
}
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
// TotalDomains returns the total number of registered domains.
|
||||
func (mgr *Manager) TotalDomains() int {
|
||||
mgr.mu.RLock()
|
||||
defer mgr.mu.RUnlock()
|
||||
return len(mgr.domains)
|
||||
}
|
||||
|
||||
// PendingDomains returns the domain names currently being prefetched.
|
||||
func (mgr *Manager) PendingDomains() []string {
|
||||
return mgr.domainsByState(domainPending)
|
||||
}
|
||||
|
||||
// ReadyDomains returns domain names that have successfully obtained certificates.
|
||||
func (mgr *Manager) ReadyDomains() []string {
|
||||
return mgr.domainsByState(domainReady)
|
||||
}
|
||||
|
||||
// FailedDomains returns domain names that failed certificate prefetch, mapped to their error.
|
||||
func (mgr *Manager) FailedDomains() map[string]string {
|
||||
mgr.mu.RLock()
|
||||
defer mgr.mu.RUnlock()
|
||||
result := make(map[string]string)
|
||||
for d, info := range mgr.domains {
|
||||
if info.state == domainFailed {
|
||||
result[d.PunycodeString()] = info.err
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func (mgr *Manager) domainsByState(state domainState) []string {
|
||||
mgr.mu.RLock()
|
||||
defer mgr.mu.RUnlock()
|
||||
var domains []string
|
||||
for d, info := range mgr.domains {
|
||||
if info.state == state {
|
||||
domains = append(domains, d.PunycodeString())
|
||||
}
|
||||
}
|
||||
slices.Sort(domains)
|
||||
return domains
|
||||
}
|
||||
|
||||
@@ -3,6 +3,7 @@ package acme
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
@@ -12,6 +13,14 @@ func TestHostPolicy(t *testing.T) {
|
||||
mgr := NewManager(t.TempDir(), "https://acme.example.com/directory", nil, nil, "")
|
||||
mgr.AddDomain("example.com", "acc1", "rp1")
|
||||
|
||||
// Wait for the background prefetch goroutine to finish so the temp dir
|
||||
// can be cleaned up without a race.
|
||||
t.Cleanup(func() {
|
||||
assert.Eventually(t, func() bool {
|
||||
return mgr.PendingCerts() == 0
|
||||
}, 30*time.Second, 50*time.Millisecond)
|
||||
})
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
host string
|
||||
@@ -59,3 +68,35 @@ func TestHostPolicy(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestDomainStates(t *testing.T) {
|
||||
mgr := NewManager(t.TempDir(), "https://acme.example.com/directory", nil, nil, "")
|
||||
|
||||
assert.Equal(t, 0, mgr.PendingCerts(), "initially zero")
|
||||
assert.Equal(t, 0, mgr.TotalDomains(), "initially zero domains")
|
||||
assert.Empty(t, mgr.PendingDomains())
|
||||
assert.Empty(t, mgr.ReadyDomains())
|
||||
assert.Empty(t, mgr.FailedDomains())
|
||||
|
||||
// AddDomain starts as pending, then the prefetch goroutine will fail
|
||||
// (no real ACME server) and transition to failed.
|
||||
mgr.AddDomain("a.example.com", "acc1", "rp1")
|
||||
mgr.AddDomain("b.example.com", "acc1", "rp1")
|
||||
|
||||
assert.Equal(t, 2, mgr.TotalDomains(), "two domains registered")
|
||||
|
||||
// Pending domains should eventually drain after prefetch goroutines finish.
|
||||
assert.Eventually(t, func() bool {
|
||||
return mgr.PendingCerts() == 0
|
||||
}, 30*time.Second, 100*time.Millisecond, "pending certs should return to zero after prefetch completes")
|
||||
|
||||
assert.Empty(t, mgr.PendingDomains())
|
||||
assert.Equal(t, 2, mgr.TotalDomains(), "total domains unchanged")
|
||||
|
||||
// With a fake ACME URL, both should have failed.
|
||||
failed := mgr.FailedDomains()
|
||||
assert.Len(t, failed, 2, "both domains should have failed")
|
||||
assert.Contains(t, failed, "a.example.com")
|
||||
assert.Contains(t, failed, "b.example.com")
|
||||
assert.Empty(t, mgr.ReadyDomains())
|
||||
}
|
||||
|
||||
@@ -56,6 +56,33 @@ func (c *Client) printHealth(data map[string]any) {
|
||||
_, _ = fmt.Fprintf(c.out, "Management Connected: %s\n", boolIcon(data["management_connected"]))
|
||||
_, _ = fmt.Fprintf(c.out, "All Clients Healthy: %s\n", boolIcon(data["all_clients_healthy"]))
|
||||
|
||||
total, _ := data["certs_total"].(float64)
|
||||
ready, _ := data["certs_ready"].(float64)
|
||||
pending, _ := data["certs_pending"].(float64)
|
||||
failed, _ := data["certs_failed"].(float64)
|
||||
if total > 0 {
|
||||
_, _ = fmt.Fprintf(c.out, "Certificates: %d ready, %d pending, %d failed (%d total)\n",
|
||||
int(ready), int(pending), int(failed), int(total))
|
||||
}
|
||||
if domains, ok := data["certs_ready_domains"].([]any); ok && len(domains) > 0 {
|
||||
_, _ = fmt.Fprintf(c.out, " Ready:\n")
|
||||
for _, d := range domains {
|
||||
_, _ = fmt.Fprintf(c.out, " %v\n", d)
|
||||
}
|
||||
}
|
||||
if domains, ok := data["certs_pending_domains"].([]any); ok && len(domains) > 0 {
|
||||
_, _ = fmt.Fprintf(c.out, " Pending:\n")
|
||||
for _, d := range domains {
|
||||
_, _ = fmt.Fprintf(c.out, " %v\n", d)
|
||||
}
|
||||
}
|
||||
if domains, ok := data["certs_failed_domains"].(map[string]any); ok && len(domains) > 0 {
|
||||
_, _ = fmt.Fprintf(c.out, " Failed:\n")
|
||||
for d, errMsg := range domains {
|
||||
_, _ = fmt.Fprintf(c.out, " %s: %v\n", d, errMsg)
|
||||
}
|
||||
}
|
||||
|
||||
clients, ok := data["clients"].(map[string]any)
|
||||
if !ok || len(clients) == 0 {
|
||||
return
|
||||
@@ -328,7 +355,7 @@ func (c *Client) fetch(ctx context.Context, path string) (map[string]any, []byte
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("request failed: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
defer func() { _ = resp.Body.Close() }()
|
||||
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
@@ -346,4 +373,3 @@ func (c *Client) fetch(ctx context.Context, path string) (map[string]any, []byte
|
||||
|
||||
return data, body, nil
|
||||
}
|
||||
|
||||
|
||||
@@ -2,12 +2,15 @@
|
||||
package debug
|
||||
|
||||
import (
|
||||
"cmp"
|
||||
"context"
|
||||
"embed"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"html/template"
|
||||
"maps"
|
||||
"net/http"
|
||||
"slices"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
@@ -47,6 +50,10 @@ func formatDuration(d time.Duration) string {
|
||||
}
|
||||
}
|
||||
|
||||
func sortedAccountIDs(m map[types.AccountID]roundtrip.ClientDebugInfo) []types.AccountID {
|
||||
return slices.Sorted(maps.Keys(m))
|
||||
}
|
||||
|
||||
// clientProvider provides access to NetBird clients.
|
||||
type clientProvider interface {
|
||||
GetClient(accountID types.AccountID) (*nbembed.Client, bool)
|
||||
@@ -60,10 +67,18 @@ type healthChecker interface {
|
||||
CheckClientsConnected(ctx context.Context) (bool, map[types.AccountID]health.ClientHealth)
|
||||
}
|
||||
|
||||
type certStatus interface {
|
||||
TotalDomains() int
|
||||
PendingDomains() []string
|
||||
ReadyDomains() []string
|
||||
FailedDomains() map[string]string
|
||||
}
|
||||
|
||||
// Handler provides HTTP debug endpoints.
|
||||
type Handler struct {
|
||||
provider clientProvider
|
||||
health healthChecker
|
||||
certStatus certStatus
|
||||
logger *log.Logger
|
||||
startTime time.Time
|
||||
templates *template.Template
|
||||
@@ -87,6 +102,11 @@ func NewHandler(provider clientProvider, healthChecker healthChecker, logger *lo
|
||||
return h
|
||||
}
|
||||
|
||||
// SetCertStatus sets the certificate status provider for ACME prefetch observability.
|
||||
func (h *Handler) SetCertStatus(cs certStatus) {
|
||||
h.certStatus = cs
|
||||
}
|
||||
|
||||
func (h *Handler) loadTemplates() error {
|
||||
tmpl, err := template.ParseFS(templateFS, "templates/*.html")
|
||||
if err != nil {
|
||||
@@ -160,12 +180,24 @@ func (h *Handler) handleClientRoutes(w http.ResponseWriter, r *http.Request, pat
|
||||
return true
|
||||
}
|
||||
|
||||
type failedDomain struct {
|
||||
Domain string
|
||||
Error string
|
||||
}
|
||||
|
||||
type indexData struct {
|
||||
Version string
|
||||
Uptime string
|
||||
ClientCount int
|
||||
TotalDomains int
|
||||
Clients []clientData
|
||||
Version string
|
||||
Uptime string
|
||||
ClientCount int
|
||||
TotalDomains int
|
||||
CertsTotal int
|
||||
CertsReady int
|
||||
CertsPending int
|
||||
CertsFailed int
|
||||
CertsPendingDomains []string
|
||||
CertsReadyDomains []string
|
||||
CertsFailedDomains []failedDomain
|
||||
Clients []clientData
|
||||
}
|
||||
|
||||
type clientData struct {
|
||||
@@ -177,15 +209,30 @@ type clientData struct {
|
||||
|
||||
func (h *Handler) handleIndex(w http.ResponseWriter, _ *http.Request, wantJSON bool) {
|
||||
clients := h.provider.ListClientsForDebug()
|
||||
sortedIDs := sortedAccountIDs(clients)
|
||||
|
||||
totalDomains := 0
|
||||
for _, info := range clients {
|
||||
totalDomains += info.DomainCount
|
||||
}
|
||||
|
||||
var certsTotal, certsReady, certsPending, certsFailed int
|
||||
var certsPendingDomains, certsReadyDomains []string
|
||||
var certsFailedDomains map[string]string
|
||||
if h.certStatus != nil {
|
||||
certsTotal = h.certStatus.TotalDomains()
|
||||
certsPendingDomains = h.certStatus.PendingDomains()
|
||||
certsReadyDomains = h.certStatus.ReadyDomains()
|
||||
certsFailedDomains = h.certStatus.FailedDomains()
|
||||
certsReady = len(certsReadyDomains)
|
||||
certsPending = len(certsPendingDomains)
|
||||
certsFailed = len(certsFailedDomains)
|
||||
}
|
||||
|
||||
if wantJSON {
|
||||
clientsJSON := make([]map[string]interface{}, 0, len(clients))
|
||||
for _, info := range clients {
|
||||
for _, id := range sortedIDs {
|
||||
info := clients[id]
|
||||
clientsJSON = append(clientsJSON, map[string]interface{}{
|
||||
"account_id": info.AccountID,
|
||||
"domain_count": info.DomainCount,
|
||||
@@ -195,25 +242,55 @@ func (h *Handler) handleIndex(w http.ResponseWriter, _ *http.Request, wantJSON b
|
||||
"age": time.Since(info.CreatedAt).Round(time.Second).String(),
|
||||
})
|
||||
}
|
||||
h.writeJSON(w, map[string]interface{}{
|
||||
resp := map[string]interface{}{
|
||||
"version": version.NetbirdVersion(),
|
||||
"uptime": time.Since(h.startTime).Round(time.Second).String(),
|
||||
"client_count": len(clients),
|
||||
"total_domains": totalDomains,
|
||||
"certs_total": certsTotal,
|
||||
"certs_ready": certsReady,
|
||||
"certs_pending": certsPending,
|
||||
"certs_failed": certsFailed,
|
||||
"clients": clientsJSON,
|
||||
})
|
||||
}
|
||||
if len(certsPendingDomains) > 0 {
|
||||
resp["certs_pending_domains"] = certsPendingDomains
|
||||
}
|
||||
if len(certsReadyDomains) > 0 {
|
||||
resp["certs_ready_domains"] = certsReadyDomains
|
||||
}
|
||||
if len(certsFailedDomains) > 0 {
|
||||
resp["certs_failed_domains"] = certsFailedDomains
|
||||
}
|
||||
h.writeJSON(w, resp)
|
||||
return
|
||||
}
|
||||
|
||||
sortedFailed := make([]failedDomain, 0, len(certsFailedDomains))
|
||||
for d, e := range certsFailedDomains {
|
||||
sortedFailed = append(sortedFailed, failedDomain{Domain: d, Error: e})
|
||||
}
|
||||
slices.SortFunc(sortedFailed, func(a, b failedDomain) int {
|
||||
return cmp.Compare(a.Domain, b.Domain)
|
||||
})
|
||||
|
||||
data := indexData{
|
||||
Version: version.NetbirdVersion(),
|
||||
Uptime: time.Since(h.startTime).Round(time.Second).String(),
|
||||
ClientCount: len(clients),
|
||||
TotalDomains: totalDomains,
|
||||
Clients: make([]clientData, 0, len(clients)),
|
||||
Version: version.NetbirdVersion(),
|
||||
Uptime: time.Since(h.startTime).Round(time.Second).String(),
|
||||
ClientCount: len(clients),
|
||||
TotalDomains: totalDomains,
|
||||
CertsTotal: certsTotal,
|
||||
CertsReady: certsReady,
|
||||
CertsPending: certsPending,
|
||||
CertsFailed: certsFailed,
|
||||
CertsPendingDomains: certsPendingDomains,
|
||||
CertsReadyDomains: certsReadyDomains,
|
||||
CertsFailedDomains: sortedFailed,
|
||||
Clients: make([]clientData, 0, len(clients)),
|
||||
}
|
||||
|
||||
for _, info := range clients {
|
||||
for _, id := range sortedIDs {
|
||||
info := clients[id]
|
||||
domains := info.Domains.SafeString()
|
||||
if domains == "" {
|
||||
domains = "-"
|
||||
@@ -240,10 +317,12 @@ type clientsData struct {
|
||||
|
||||
func (h *Handler) handleListClients(w http.ResponseWriter, _ *http.Request, wantJSON bool) {
|
||||
clients := h.provider.ListClientsForDebug()
|
||||
sortedIDs := sortedAccountIDs(clients)
|
||||
|
||||
if wantJSON {
|
||||
clientsJSON := make([]map[string]interface{}, 0, len(clients))
|
||||
for _, info := range clients {
|
||||
for _, id := range sortedIDs {
|
||||
info := clients[id]
|
||||
clientsJSON = append(clientsJSON, map[string]interface{}{
|
||||
"account_id": info.AccountID,
|
||||
"domain_count": info.DomainCount,
|
||||
@@ -266,7 +345,8 @@ func (h *Handler) handleListClients(w http.ResponseWriter, _ *http.Request, want
|
||||
Clients: make([]clientData, 0, len(clients)),
|
||||
}
|
||||
|
||||
for _, info := range clients {
|
||||
for _, id := range sortedIDs {
|
||||
info := clients[id]
|
||||
domains := info.Domains.SafeString()
|
||||
if domains == "" {
|
||||
domains = "-"
|
||||
@@ -556,15 +636,12 @@ func (h *Handler) handleClientStop(w http.ResponseWriter, r *http.Request, accou
|
||||
})
|
||||
}
|
||||
|
||||
type healthData struct {
|
||||
Uptime string
|
||||
Status string
|
||||
ManagementReady bool
|
||||
AllClientsHealthy bool
|
||||
Clients map[types.AccountID]health.ClientHealth
|
||||
}
|
||||
|
||||
func (h *Handler) handleHealth(w http.ResponseWriter, r *http.Request, wantJSON bool) {
|
||||
if !wantJSON {
|
||||
http.Redirect(w, r, "/debug", http.StatusSeeOther)
|
||||
return
|
||||
}
|
||||
|
||||
uptime := time.Since(h.startTime).Round(10 * time.Millisecond).String()
|
||||
|
||||
ready := h.health.ReadinessProbe()
|
||||
@@ -575,26 +652,40 @@ func (h *Handler) handleHealth(w http.ResponseWriter, r *http.Request, wantJSON
|
||||
status = "degraded"
|
||||
}
|
||||
|
||||
if wantJSON {
|
||||
h.writeJSON(w, map[string]interface{}{
|
||||
"status": status,
|
||||
"uptime": uptime,
|
||||
"management_connected": ready,
|
||||
"all_clients_healthy": allHealthy,
|
||||
"clients": clientHealth,
|
||||
})
|
||||
return
|
||||
var certsTotal, certsReady, certsPending, certsFailed int
|
||||
var certsPendingDomains, certsReadyDomains []string
|
||||
var certsFailedDomains map[string]string
|
||||
if h.certStatus != nil {
|
||||
certsTotal = h.certStatus.TotalDomains()
|
||||
certsPendingDomains = h.certStatus.PendingDomains()
|
||||
certsReadyDomains = h.certStatus.ReadyDomains()
|
||||
certsFailedDomains = h.certStatus.FailedDomains()
|
||||
certsReady = len(certsReadyDomains)
|
||||
certsPending = len(certsPendingDomains)
|
||||
certsFailed = len(certsFailedDomains)
|
||||
}
|
||||
|
||||
data := healthData{
|
||||
Uptime: time.Since(h.startTime).Round(time.Second).String(),
|
||||
Status: status,
|
||||
ManagementReady: ready,
|
||||
AllClientsHealthy: allHealthy,
|
||||
Clients: clientHealth,
|
||||
resp := map[string]any{
|
||||
"status": status,
|
||||
"uptime": uptime,
|
||||
"management_connected": ready,
|
||||
"all_clients_healthy": allHealthy,
|
||||
"certs_total": certsTotal,
|
||||
"certs_ready": certsReady,
|
||||
"certs_pending": certsPending,
|
||||
"certs_failed": certsFailed,
|
||||
"clients": clientHealth,
|
||||
}
|
||||
|
||||
h.renderTemplate(w, "health", data)
|
||||
if len(certsPendingDomains) > 0 {
|
||||
resp["certs_pending_domains"] = certsPendingDomains
|
||||
}
|
||||
if len(certsReadyDomains) > 0 {
|
||||
resp["certs_ready_domains"] = certsReadyDomains
|
||||
}
|
||||
if len(certsFailedDomains) > 0 {
|
||||
resp["certs_failed_domains"] = certsFailedDomains
|
||||
}
|
||||
h.writeJSON(w, resp)
|
||||
}
|
||||
|
||||
func (h *Handler) renderTemplate(w http.ResponseWriter, name string, data interface{}) {
|
||||
|
||||
@@ -1,39 +0,0 @@
|
||||
{{define "health"}}
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>Health</title>
|
||||
<style>{{template "style"}}</style>
|
||||
</head>
|
||||
<body>
|
||||
<h1>{{.Status}}</h1>
|
||||
<p>Uptime: {{.Uptime}}</p>
|
||||
<p>Management Connected: {{.ManagementReady}}</p>
|
||||
<p>All Clients Healthy: {{.AllClientsHealthy}}</p>
|
||||
{{if .Clients}}
|
||||
<h2>Clients</h2>
|
||||
<table>
|
||||
<tr>
|
||||
<th>Account ID</th>
|
||||
<th>Healthy</th>
|
||||
<th>Management</th>
|
||||
<th>Signal</th>
|
||||
<th>Relays</th>
|
||||
<th>Error</th>
|
||||
</tr>
|
||||
{{range $id, $c := .Clients}}
|
||||
<tr>
|
||||
<td>{{$id}}</td>
|
||||
<td>{{$c.Healthy}}</td>
|
||||
<td>{{$c.ManagementConnected}}</td>
|
||||
<td>{{$c.SignalConnected}}</td>
|
||||
<td>{{$c.RelaysConnected}}/{{$c.RelaysTotal}}</td>
|
||||
<td>{{$c.Error}}</td>
|
||||
</tr>
|
||||
{{end}}
|
||||
</table>
|
||||
{{end}}
|
||||
<p><a href="/debug">← Back</a></p>
|
||||
</body>
|
||||
</html>
|
||||
{{end}}
|
||||
@@ -8,6 +8,25 @@
|
||||
<body>
|
||||
<h1>NetBird Proxy Debug</h1>
|
||||
<p class="info">Version: {{.Version}} | Uptime: {{.Uptime}}</p>
|
||||
<h2>Certificates: {{.CertsReady}} ready, {{.CertsPending}} pending, {{.CertsFailed}} failed ({{.CertsTotal}} total)</h2>
|
||||
{{if .CertsReadyDomains}}
|
||||
<details>
|
||||
<summary>Ready domains ({{.CertsReady}})</summary>
|
||||
<ul>{{range .CertsReadyDomains}}<li>{{.}}</li>{{end}}</ul>
|
||||
</details>
|
||||
{{end}}
|
||||
{{if .CertsPendingDomains}}
|
||||
<details open>
|
||||
<summary>Pending domains ({{.CertsPending}})</summary>
|
||||
<ul>{{range .CertsPendingDomains}}<li>{{.}}</li>{{end}}</ul>
|
||||
</details>
|
||||
{{end}}
|
||||
{{if .CertsFailedDomains}}
|
||||
<details open>
|
||||
<summary>Failed domains ({{.CertsFailed}})</summary>
|
||||
<ul>{{range .CertsFailedDomains}}<li>{{.Domain}}: {{.Error}}</li>{{end}}</ul>
|
||||
</details>
|
||||
{{end}}
|
||||
<h2>Clients ({{.ClientCount}}) | Domains ({{.TotalDomains}})</h2>
|
||||
{{if .Clients}}
|
||||
<table>
|
||||
@@ -32,7 +51,6 @@
|
||||
<h2>Endpoints</h2>
|
||||
<ul>
|
||||
<li><a href="/debug/clients">/debug/clients</a> - all clients detail</li>
|
||||
<li><a href="/debug/health">/debug/health</a> - health check</li>
|
||||
</ul>
|
||||
<p class="info">Add ?format=json or /json suffix for JSON output</p>
|
||||
</body>
|
||||
|
||||
@@ -63,7 +63,11 @@ func Unlock(f *os.File) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
defer f.Close()
|
||||
defer func() {
|
||||
if cerr := f.Close(); cerr != nil {
|
||||
log.Debugf("close lock file: %v", cerr)
|
||||
}
|
||||
}()
|
||||
|
||||
if err := syscall.Flock(int(f.Fd()), syscall.LOCK_UN); err != nil {
|
||||
return fmt.Errorf("release lock: %w", err)
|
||||
|
||||
@@ -65,7 +65,7 @@ type MicroTime struct {
|
||||
const microTimeFormat = "2006-01-02T15:04:05.000000Z"
|
||||
|
||||
// MarshalJSON implements json.Marshaler with k8s MicroTime format.
|
||||
func (t MicroTime) MarshalJSON() ([]byte, error) {
|
||||
func (t *MicroTime) MarshalJSON() ([]byte, error) {
|
||||
return json.Marshal(t.UTC().Format(microTimeFormat))
|
||||
}
|
||||
|
||||
@@ -148,7 +148,7 @@ func (c *LeaseClient) Get(ctx context.Context, name string) (*Lease, error) {
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
defer func() { _ = resp.Body.Close() }()
|
||||
|
||||
if resp.StatusCode == http.StatusNotFound {
|
||||
return nil, nil
|
||||
@@ -179,7 +179,7 @@ func (c *LeaseClient) Create(ctx context.Context, lease *Lease) (*Lease, error)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
defer func() { _ = resp.Body.Close() }()
|
||||
|
||||
if resp.StatusCode == http.StatusConflict {
|
||||
return nil, ErrConflict
|
||||
@@ -208,7 +208,7 @@ func (c *LeaseClient) Update(ctx context.Context, lease *Lease) (*Lease, error)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
defer func() { _ = resp.Body.Close() }()
|
||||
|
||||
if resp.StatusCode == http.StatusConflict {
|
||||
return nil, ErrConflict
|
||||
|
||||
@@ -247,6 +247,9 @@ func (s *Server) ListenAndServe(ctx context.Context, addr string) (err error) {
|
||||
if s.DebugEndpointEnabled {
|
||||
debugAddr := debugEndpointAddr(s.DebugEndpointAddress)
|
||||
debugHandler := debug.NewHandler(s.netbird, s.healthChecker, s.Logger)
|
||||
if s.acme != nil {
|
||||
debugHandler.SetCertStatus(s.acme)
|
||||
}
|
||||
s.debug = &http.Server{
|
||||
Addr: debugAddr,
|
||||
Handler: debugHandler,
|
||||
@@ -359,7 +362,7 @@ func (s *Server) shutdownServices() {
|
||||
defer wg.Done()
|
||||
ctx, cancel := context.WithTimeout(context.Background(), shutdownServiceTimeout)
|
||||
defer cancel()
|
||||
if err := shutdown(ctx); err != nil && !errors.Is(err, http.ErrServerClosed) {
|
||||
if err := shutdown(ctx); err != nil {
|
||||
s.Logger.Debugf("%s shutdown: %v", name, err)
|
||||
}
|
||||
}()
|
||||
@@ -516,7 +519,7 @@ func (s *Server) addMapping(ctx context.Context, mapping *proto.ProxyMapping) er
|
||||
return fmt.Errorf("create peer for domain %q: %w", d, err)
|
||||
}
|
||||
if s.acme != nil {
|
||||
s.acme.AddDomain(string(d), string(accountID), reverseProxyID)
|
||||
s.acme.AddDomain(d, string(accountID), reverseProxyID)
|
||||
}
|
||||
|
||||
// Pass the mapping through to the update function to avoid duplicating the
|
||||
@@ -562,7 +565,7 @@ func (s *Server) removeMapping(ctx context.Context, mapping *proto.ProxyMapping)
|
||||
}).Error("Error removing NetBird peer connection for domain, continuing additional domain cleanup but peer connection may still exist")
|
||||
}
|
||||
if s.acme != nil {
|
||||
s.acme.RemoveDomain(mapping.GetDomain())
|
||||
s.acme.RemoveDomain(d)
|
||||
}
|
||||
s.auth.RemoveDomain(mapping.GetDomain())
|
||||
s.proxy.RemoveMapping(s.protoToMapping(mapping))
|
||||
|
||||
Reference in New Issue
Block a user