Add cert health info to checks

This commit is contained in:
Viktor Liu
2026-02-09 20:44:32 +08:00
parent 53c1016a8e
commit 73aa0785ba
9 changed files with 361 additions and 135 deletions

View File

@@ -56,6 +56,33 @@ func (c *Client) printHealth(data map[string]any) {
_, _ = fmt.Fprintf(c.out, "Management Connected: %s\n", boolIcon(data["management_connected"]))
_, _ = fmt.Fprintf(c.out, "All Clients Healthy: %s\n", boolIcon(data["all_clients_healthy"]))
total, _ := data["certs_total"].(float64)
ready, _ := data["certs_ready"].(float64)
pending, _ := data["certs_pending"].(float64)
failed, _ := data["certs_failed"].(float64)
if total > 0 {
_, _ = fmt.Fprintf(c.out, "Certificates: %d ready, %d pending, %d failed (%d total)\n",
int(ready), int(pending), int(failed), int(total))
}
if domains, ok := data["certs_ready_domains"].([]any); ok && len(domains) > 0 {
_, _ = fmt.Fprintf(c.out, " Ready:\n")
for _, d := range domains {
_, _ = fmt.Fprintf(c.out, " %v\n", d)
}
}
if domains, ok := data["certs_pending_domains"].([]any); ok && len(domains) > 0 {
_, _ = fmt.Fprintf(c.out, " Pending:\n")
for _, d := range domains {
_, _ = fmt.Fprintf(c.out, " %v\n", d)
}
}
if domains, ok := data["certs_failed_domains"].(map[string]any); ok && len(domains) > 0 {
_, _ = fmt.Fprintf(c.out, " Failed:\n")
for d, errMsg := range domains {
_, _ = fmt.Fprintf(c.out, " %s: %v\n", d, errMsg)
}
}
clients, ok := data["clients"].(map[string]any)
if !ok || len(clients) == 0 {
return
@@ -328,7 +355,7 @@ func (c *Client) fetch(ctx context.Context, path string) (map[string]any, []byte
if err != nil {
return nil, nil, fmt.Errorf("request failed: %w", err)
}
defer resp.Body.Close()
defer func() { _ = resp.Body.Close() }()
body, err := io.ReadAll(resp.Body)
if err != nil {
@@ -346,4 +373,3 @@ func (c *Client) fetch(ctx context.Context, path string) (map[string]any, []byte
return data, body, nil
}

View File

@@ -2,12 +2,15 @@
package debug
import (
"cmp"
"context"
"embed"
"encoding/json"
"fmt"
"html/template"
"maps"
"net/http"
"slices"
"strconv"
"strings"
"sync"
@@ -47,6 +50,10 @@ func formatDuration(d time.Duration) string {
}
}
func sortedAccountIDs(m map[types.AccountID]roundtrip.ClientDebugInfo) []types.AccountID {
return slices.Sorted(maps.Keys(m))
}
// clientProvider provides access to NetBird clients.
type clientProvider interface {
GetClient(accountID types.AccountID) (*nbembed.Client, bool)
@@ -60,10 +67,18 @@ type healthChecker interface {
CheckClientsConnected(ctx context.Context) (bool, map[types.AccountID]health.ClientHealth)
}
type certStatus interface {
TotalDomains() int
PendingDomains() []string
ReadyDomains() []string
FailedDomains() map[string]string
}
// Handler provides HTTP debug endpoints.
type Handler struct {
provider clientProvider
health healthChecker
certStatus certStatus
logger *log.Logger
startTime time.Time
templates *template.Template
@@ -87,6 +102,11 @@ func NewHandler(provider clientProvider, healthChecker healthChecker, logger *lo
return h
}
// SetCertStatus sets the certificate status provider for ACME prefetch observability.
func (h *Handler) SetCertStatus(cs certStatus) {
h.certStatus = cs
}
func (h *Handler) loadTemplates() error {
tmpl, err := template.ParseFS(templateFS, "templates/*.html")
if err != nil {
@@ -160,12 +180,24 @@ func (h *Handler) handleClientRoutes(w http.ResponseWriter, r *http.Request, pat
return true
}
type failedDomain struct {
Domain string
Error string
}
type indexData struct {
Version string
Uptime string
ClientCount int
TotalDomains int
Clients []clientData
Version string
Uptime string
ClientCount int
TotalDomains int
CertsTotal int
CertsReady int
CertsPending int
CertsFailed int
CertsPendingDomains []string
CertsReadyDomains []string
CertsFailedDomains []failedDomain
Clients []clientData
}
type clientData struct {
@@ -177,15 +209,30 @@ type clientData struct {
func (h *Handler) handleIndex(w http.ResponseWriter, _ *http.Request, wantJSON bool) {
clients := h.provider.ListClientsForDebug()
sortedIDs := sortedAccountIDs(clients)
totalDomains := 0
for _, info := range clients {
totalDomains += info.DomainCount
}
var certsTotal, certsReady, certsPending, certsFailed int
var certsPendingDomains, certsReadyDomains []string
var certsFailedDomains map[string]string
if h.certStatus != nil {
certsTotal = h.certStatus.TotalDomains()
certsPendingDomains = h.certStatus.PendingDomains()
certsReadyDomains = h.certStatus.ReadyDomains()
certsFailedDomains = h.certStatus.FailedDomains()
certsReady = len(certsReadyDomains)
certsPending = len(certsPendingDomains)
certsFailed = len(certsFailedDomains)
}
if wantJSON {
clientsJSON := make([]map[string]interface{}, 0, len(clients))
for _, info := range clients {
for _, id := range sortedIDs {
info := clients[id]
clientsJSON = append(clientsJSON, map[string]interface{}{
"account_id": info.AccountID,
"domain_count": info.DomainCount,
@@ -195,25 +242,55 @@ func (h *Handler) handleIndex(w http.ResponseWriter, _ *http.Request, wantJSON b
"age": time.Since(info.CreatedAt).Round(time.Second).String(),
})
}
h.writeJSON(w, map[string]interface{}{
resp := map[string]interface{}{
"version": version.NetbirdVersion(),
"uptime": time.Since(h.startTime).Round(time.Second).String(),
"client_count": len(clients),
"total_domains": totalDomains,
"certs_total": certsTotal,
"certs_ready": certsReady,
"certs_pending": certsPending,
"certs_failed": certsFailed,
"clients": clientsJSON,
})
}
if len(certsPendingDomains) > 0 {
resp["certs_pending_domains"] = certsPendingDomains
}
if len(certsReadyDomains) > 0 {
resp["certs_ready_domains"] = certsReadyDomains
}
if len(certsFailedDomains) > 0 {
resp["certs_failed_domains"] = certsFailedDomains
}
h.writeJSON(w, resp)
return
}
sortedFailed := make([]failedDomain, 0, len(certsFailedDomains))
for d, e := range certsFailedDomains {
sortedFailed = append(sortedFailed, failedDomain{Domain: d, Error: e})
}
slices.SortFunc(sortedFailed, func(a, b failedDomain) int {
return cmp.Compare(a.Domain, b.Domain)
})
data := indexData{
Version: version.NetbirdVersion(),
Uptime: time.Since(h.startTime).Round(time.Second).String(),
ClientCount: len(clients),
TotalDomains: totalDomains,
Clients: make([]clientData, 0, len(clients)),
Version: version.NetbirdVersion(),
Uptime: time.Since(h.startTime).Round(time.Second).String(),
ClientCount: len(clients),
TotalDomains: totalDomains,
CertsTotal: certsTotal,
CertsReady: certsReady,
CertsPending: certsPending,
CertsFailed: certsFailed,
CertsPendingDomains: certsPendingDomains,
CertsReadyDomains: certsReadyDomains,
CertsFailedDomains: sortedFailed,
Clients: make([]clientData, 0, len(clients)),
}
for _, info := range clients {
for _, id := range sortedIDs {
info := clients[id]
domains := info.Domains.SafeString()
if domains == "" {
domains = "-"
@@ -240,10 +317,12 @@ type clientsData struct {
func (h *Handler) handleListClients(w http.ResponseWriter, _ *http.Request, wantJSON bool) {
clients := h.provider.ListClientsForDebug()
sortedIDs := sortedAccountIDs(clients)
if wantJSON {
clientsJSON := make([]map[string]interface{}, 0, len(clients))
for _, info := range clients {
for _, id := range sortedIDs {
info := clients[id]
clientsJSON = append(clientsJSON, map[string]interface{}{
"account_id": info.AccountID,
"domain_count": info.DomainCount,
@@ -266,7 +345,8 @@ func (h *Handler) handleListClients(w http.ResponseWriter, _ *http.Request, want
Clients: make([]clientData, 0, len(clients)),
}
for _, info := range clients {
for _, id := range sortedIDs {
info := clients[id]
domains := info.Domains.SafeString()
if domains == "" {
domains = "-"
@@ -556,15 +636,12 @@ func (h *Handler) handleClientStop(w http.ResponseWriter, r *http.Request, accou
})
}
type healthData struct {
Uptime string
Status string
ManagementReady bool
AllClientsHealthy bool
Clients map[types.AccountID]health.ClientHealth
}
func (h *Handler) handleHealth(w http.ResponseWriter, r *http.Request, wantJSON bool) {
if !wantJSON {
http.Redirect(w, r, "/debug", http.StatusSeeOther)
return
}
uptime := time.Since(h.startTime).Round(10 * time.Millisecond).String()
ready := h.health.ReadinessProbe()
@@ -575,26 +652,40 @@ func (h *Handler) handleHealth(w http.ResponseWriter, r *http.Request, wantJSON
status = "degraded"
}
if wantJSON {
h.writeJSON(w, map[string]interface{}{
"status": status,
"uptime": uptime,
"management_connected": ready,
"all_clients_healthy": allHealthy,
"clients": clientHealth,
})
return
var certsTotal, certsReady, certsPending, certsFailed int
var certsPendingDomains, certsReadyDomains []string
var certsFailedDomains map[string]string
if h.certStatus != nil {
certsTotal = h.certStatus.TotalDomains()
certsPendingDomains = h.certStatus.PendingDomains()
certsReadyDomains = h.certStatus.ReadyDomains()
certsFailedDomains = h.certStatus.FailedDomains()
certsReady = len(certsReadyDomains)
certsPending = len(certsPendingDomains)
certsFailed = len(certsFailedDomains)
}
data := healthData{
Uptime: time.Since(h.startTime).Round(time.Second).String(),
Status: status,
ManagementReady: ready,
AllClientsHealthy: allHealthy,
Clients: clientHealth,
resp := map[string]any{
"status": status,
"uptime": uptime,
"management_connected": ready,
"all_clients_healthy": allHealthy,
"certs_total": certsTotal,
"certs_ready": certsReady,
"certs_pending": certsPending,
"certs_failed": certsFailed,
"clients": clientHealth,
}
h.renderTemplate(w, "health", data)
if len(certsPendingDomains) > 0 {
resp["certs_pending_domains"] = certsPendingDomains
}
if len(certsReadyDomains) > 0 {
resp["certs_ready_domains"] = certsReadyDomains
}
if len(certsFailedDomains) > 0 {
resp["certs_failed_domains"] = certsFailedDomains
}
h.writeJSON(w, resp)
}
func (h *Handler) renderTemplate(w http.ResponseWriter, name string, data interface{}) {

View File

@@ -1,39 +0,0 @@
{{define "health"}}
<!DOCTYPE html>
<html>
<head>
<title>Health</title>
<style>{{template "style"}}</style>
</head>
<body>
<h1>{{.Status}}</h1>
<p>Uptime: {{.Uptime}}</p>
<p>Management Connected: {{.ManagementReady}}</p>
<p>All Clients Healthy: {{.AllClientsHealthy}}</p>
{{if .Clients}}
<h2>Clients</h2>
<table>
<tr>
<th>Account ID</th>
<th>Healthy</th>
<th>Management</th>
<th>Signal</th>
<th>Relays</th>
<th>Error</th>
</tr>
{{range $id, $c := .Clients}}
<tr>
<td>{{$id}}</td>
<td>{{$c.Healthy}}</td>
<td>{{$c.ManagementConnected}}</td>
<td>{{$c.SignalConnected}}</td>
<td>{{$c.RelaysConnected}}/{{$c.RelaysTotal}}</td>
<td>{{$c.Error}}</td>
</tr>
{{end}}
</table>
{{end}}
<p><a href="/debug">&larr; Back</a></p>
</body>
</html>
{{end}}

View File

@@ -8,6 +8,25 @@
<body>
<h1>NetBird Proxy Debug</h1>
<p class="info">Version: {{.Version}} | Uptime: {{.Uptime}}</p>
<h2>Certificates: {{.CertsReady}} ready, {{.CertsPending}} pending, {{.CertsFailed}} failed ({{.CertsTotal}} total)</h2>
{{if .CertsReadyDomains}}
<details>
<summary>Ready domains ({{.CertsReady}})</summary>
<ul>{{range .CertsReadyDomains}}<li>{{.}}</li>{{end}}</ul>
</details>
{{end}}
{{if .CertsPendingDomains}}
<details open>
<summary>Pending domains ({{.CertsPending}})</summary>
<ul>{{range .CertsPendingDomains}}<li>{{.}}</li>{{end}}</ul>
</details>
{{end}}
{{if .CertsFailedDomains}}
<details open>
<summary>Failed domains ({{.CertsFailed}})</summary>
<ul>{{range .CertsFailedDomains}}<li>{{.Domain}}: {{.Error}}</li>{{end}}</ul>
</details>
{{end}}
<h2>Clients ({{.ClientCount}}) | Domains ({{.TotalDomains}})</h2>
{{if .Clients}}
<table>
@@ -32,7 +51,6 @@
<h2>Endpoints</h2>
<ul>
<li><a href="/debug/clients">/debug/clients</a> - all clients detail</li>
<li><a href="/debug/health">/debug/health</a> - health check</li>
</ul>
<p class="info">Add ?format=json or /json suffix for JSON output</p>
</body>