mirror of
https://github.com/netbirdio/netbird.git
synced 2026-04-18 16:26:38 +00:00
Add cert health info to checks
This commit is contained in:
@@ -56,6 +56,33 @@ func (c *Client) printHealth(data map[string]any) {
|
||||
_, _ = fmt.Fprintf(c.out, "Management Connected: %s\n", boolIcon(data["management_connected"]))
|
||||
_, _ = fmt.Fprintf(c.out, "All Clients Healthy: %s\n", boolIcon(data["all_clients_healthy"]))
|
||||
|
||||
total, _ := data["certs_total"].(float64)
|
||||
ready, _ := data["certs_ready"].(float64)
|
||||
pending, _ := data["certs_pending"].(float64)
|
||||
failed, _ := data["certs_failed"].(float64)
|
||||
if total > 0 {
|
||||
_, _ = fmt.Fprintf(c.out, "Certificates: %d ready, %d pending, %d failed (%d total)\n",
|
||||
int(ready), int(pending), int(failed), int(total))
|
||||
}
|
||||
if domains, ok := data["certs_ready_domains"].([]any); ok && len(domains) > 0 {
|
||||
_, _ = fmt.Fprintf(c.out, " Ready:\n")
|
||||
for _, d := range domains {
|
||||
_, _ = fmt.Fprintf(c.out, " %v\n", d)
|
||||
}
|
||||
}
|
||||
if domains, ok := data["certs_pending_domains"].([]any); ok && len(domains) > 0 {
|
||||
_, _ = fmt.Fprintf(c.out, " Pending:\n")
|
||||
for _, d := range domains {
|
||||
_, _ = fmt.Fprintf(c.out, " %v\n", d)
|
||||
}
|
||||
}
|
||||
if domains, ok := data["certs_failed_domains"].(map[string]any); ok && len(domains) > 0 {
|
||||
_, _ = fmt.Fprintf(c.out, " Failed:\n")
|
||||
for d, errMsg := range domains {
|
||||
_, _ = fmt.Fprintf(c.out, " %s: %v\n", d, errMsg)
|
||||
}
|
||||
}
|
||||
|
||||
clients, ok := data["clients"].(map[string]any)
|
||||
if !ok || len(clients) == 0 {
|
||||
return
|
||||
@@ -328,7 +355,7 @@ func (c *Client) fetch(ctx context.Context, path string) (map[string]any, []byte
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("request failed: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
defer func() { _ = resp.Body.Close() }()
|
||||
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
@@ -346,4 +373,3 @@ func (c *Client) fetch(ctx context.Context, path string) (map[string]any, []byte
|
||||
|
||||
return data, body, nil
|
||||
}
|
||||
|
||||
|
||||
@@ -2,12 +2,15 @@
|
||||
package debug
|
||||
|
||||
import (
|
||||
"cmp"
|
||||
"context"
|
||||
"embed"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"html/template"
|
||||
"maps"
|
||||
"net/http"
|
||||
"slices"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
@@ -47,6 +50,10 @@ func formatDuration(d time.Duration) string {
|
||||
}
|
||||
}
|
||||
|
||||
func sortedAccountIDs(m map[types.AccountID]roundtrip.ClientDebugInfo) []types.AccountID {
|
||||
return slices.Sorted(maps.Keys(m))
|
||||
}
|
||||
|
||||
// clientProvider provides access to NetBird clients.
|
||||
type clientProvider interface {
|
||||
GetClient(accountID types.AccountID) (*nbembed.Client, bool)
|
||||
@@ -60,10 +67,18 @@ type healthChecker interface {
|
||||
CheckClientsConnected(ctx context.Context) (bool, map[types.AccountID]health.ClientHealth)
|
||||
}
|
||||
|
||||
type certStatus interface {
|
||||
TotalDomains() int
|
||||
PendingDomains() []string
|
||||
ReadyDomains() []string
|
||||
FailedDomains() map[string]string
|
||||
}
|
||||
|
||||
// Handler provides HTTP debug endpoints.
|
||||
type Handler struct {
|
||||
provider clientProvider
|
||||
health healthChecker
|
||||
certStatus certStatus
|
||||
logger *log.Logger
|
||||
startTime time.Time
|
||||
templates *template.Template
|
||||
@@ -87,6 +102,11 @@ func NewHandler(provider clientProvider, healthChecker healthChecker, logger *lo
|
||||
return h
|
||||
}
|
||||
|
||||
// SetCertStatus sets the certificate status provider for ACME prefetch observability.
|
||||
func (h *Handler) SetCertStatus(cs certStatus) {
|
||||
h.certStatus = cs
|
||||
}
|
||||
|
||||
func (h *Handler) loadTemplates() error {
|
||||
tmpl, err := template.ParseFS(templateFS, "templates/*.html")
|
||||
if err != nil {
|
||||
@@ -160,12 +180,24 @@ func (h *Handler) handleClientRoutes(w http.ResponseWriter, r *http.Request, pat
|
||||
return true
|
||||
}
|
||||
|
||||
type failedDomain struct {
|
||||
Domain string
|
||||
Error string
|
||||
}
|
||||
|
||||
type indexData struct {
|
||||
Version string
|
||||
Uptime string
|
||||
ClientCount int
|
||||
TotalDomains int
|
||||
Clients []clientData
|
||||
Version string
|
||||
Uptime string
|
||||
ClientCount int
|
||||
TotalDomains int
|
||||
CertsTotal int
|
||||
CertsReady int
|
||||
CertsPending int
|
||||
CertsFailed int
|
||||
CertsPendingDomains []string
|
||||
CertsReadyDomains []string
|
||||
CertsFailedDomains []failedDomain
|
||||
Clients []clientData
|
||||
}
|
||||
|
||||
type clientData struct {
|
||||
@@ -177,15 +209,30 @@ type clientData struct {
|
||||
|
||||
func (h *Handler) handleIndex(w http.ResponseWriter, _ *http.Request, wantJSON bool) {
|
||||
clients := h.provider.ListClientsForDebug()
|
||||
sortedIDs := sortedAccountIDs(clients)
|
||||
|
||||
totalDomains := 0
|
||||
for _, info := range clients {
|
||||
totalDomains += info.DomainCount
|
||||
}
|
||||
|
||||
var certsTotal, certsReady, certsPending, certsFailed int
|
||||
var certsPendingDomains, certsReadyDomains []string
|
||||
var certsFailedDomains map[string]string
|
||||
if h.certStatus != nil {
|
||||
certsTotal = h.certStatus.TotalDomains()
|
||||
certsPendingDomains = h.certStatus.PendingDomains()
|
||||
certsReadyDomains = h.certStatus.ReadyDomains()
|
||||
certsFailedDomains = h.certStatus.FailedDomains()
|
||||
certsReady = len(certsReadyDomains)
|
||||
certsPending = len(certsPendingDomains)
|
||||
certsFailed = len(certsFailedDomains)
|
||||
}
|
||||
|
||||
if wantJSON {
|
||||
clientsJSON := make([]map[string]interface{}, 0, len(clients))
|
||||
for _, info := range clients {
|
||||
for _, id := range sortedIDs {
|
||||
info := clients[id]
|
||||
clientsJSON = append(clientsJSON, map[string]interface{}{
|
||||
"account_id": info.AccountID,
|
||||
"domain_count": info.DomainCount,
|
||||
@@ -195,25 +242,55 @@ func (h *Handler) handleIndex(w http.ResponseWriter, _ *http.Request, wantJSON b
|
||||
"age": time.Since(info.CreatedAt).Round(time.Second).String(),
|
||||
})
|
||||
}
|
||||
h.writeJSON(w, map[string]interface{}{
|
||||
resp := map[string]interface{}{
|
||||
"version": version.NetbirdVersion(),
|
||||
"uptime": time.Since(h.startTime).Round(time.Second).String(),
|
||||
"client_count": len(clients),
|
||||
"total_domains": totalDomains,
|
||||
"certs_total": certsTotal,
|
||||
"certs_ready": certsReady,
|
||||
"certs_pending": certsPending,
|
||||
"certs_failed": certsFailed,
|
||||
"clients": clientsJSON,
|
||||
})
|
||||
}
|
||||
if len(certsPendingDomains) > 0 {
|
||||
resp["certs_pending_domains"] = certsPendingDomains
|
||||
}
|
||||
if len(certsReadyDomains) > 0 {
|
||||
resp["certs_ready_domains"] = certsReadyDomains
|
||||
}
|
||||
if len(certsFailedDomains) > 0 {
|
||||
resp["certs_failed_domains"] = certsFailedDomains
|
||||
}
|
||||
h.writeJSON(w, resp)
|
||||
return
|
||||
}
|
||||
|
||||
sortedFailed := make([]failedDomain, 0, len(certsFailedDomains))
|
||||
for d, e := range certsFailedDomains {
|
||||
sortedFailed = append(sortedFailed, failedDomain{Domain: d, Error: e})
|
||||
}
|
||||
slices.SortFunc(sortedFailed, func(a, b failedDomain) int {
|
||||
return cmp.Compare(a.Domain, b.Domain)
|
||||
})
|
||||
|
||||
data := indexData{
|
||||
Version: version.NetbirdVersion(),
|
||||
Uptime: time.Since(h.startTime).Round(time.Second).String(),
|
||||
ClientCount: len(clients),
|
||||
TotalDomains: totalDomains,
|
||||
Clients: make([]clientData, 0, len(clients)),
|
||||
Version: version.NetbirdVersion(),
|
||||
Uptime: time.Since(h.startTime).Round(time.Second).String(),
|
||||
ClientCount: len(clients),
|
||||
TotalDomains: totalDomains,
|
||||
CertsTotal: certsTotal,
|
||||
CertsReady: certsReady,
|
||||
CertsPending: certsPending,
|
||||
CertsFailed: certsFailed,
|
||||
CertsPendingDomains: certsPendingDomains,
|
||||
CertsReadyDomains: certsReadyDomains,
|
||||
CertsFailedDomains: sortedFailed,
|
||||
Clients: make([]clientData, 0, len(clients)),
|
||||
}
|
||||
|
||||
for _, info := range clients {
|
||||
for _, id := range sortedIDs {
|
||||
info := clients[id]
|
||||
domains := info.Domains.SafeString()
|
||||
if domains == "" {
|
||||
domains = "-"
|
||||
@@ -240,10 +317,12 @@ type clientsData struct {
|
||||
|
||||
func (h *Handler) handleListClients(w http.ResponseWriter, _ *http.Request, wantJSON bool) {
|
||||
clients := h.provider.ListClientsForDebug()
|
||||
sortedIDs := sortedAccountIDs(clients)
|
||||
|
||||
if wantJSON {
|
||||
clientsJSON := make([]map[string]interface{}, 0, len(clients))
|
||||
for _, info := range clients {
|
||||
for _, id := range sortedIDs {
|
||||
info := clients[id]
|
||||
clientsJSON = append(clientsJSON, map[string]interface{}{
|
||||
"account_id": info.AccountID,
|
||||
"domain_count": info.DomainCount,
|
||||
@@ -266,7 +345,8 @@ func (h *Handler) handleListClients(w http.ResponseWriter, _ *http.Request, want
|
||||
Clients: make([]clientData, 0, len(clients)),
|
||||
}
|
||||
|
||||
for _, info := range clients {
|
||||
for _, id := range sortedIDs {
|
||||
info := clients[id]
|
||||
domains := info.Domains.SafeString()
|
||||
if domains == "" {
|
||||
domains = "-"
|
||||
@@ -556,15 +636,12 @@ func (h *Handler) handleClientStop(w http.ResponseWriter, r *http.Request, accou
|
||||
})
|
||||
}
|
||||
|
||||
type healthData struct {
|
||||
Uptime string
|
||||
Status string
|
||||
ManagementReady bool
|
||||
AllClientsHealthy bool
|
||||
Clients map[types.AccountID]health.ClientHealth
|
||||
}
|
||||
|
||||
func (h *Handler) handleHealth(w http.ResponseWriter, r *http.Request, wantJSON bool) {
|
||||
if !wantJSON {
|
||||
http.Redirect(w, r, "/debug", http.StatusSeeOther)
|
||||
return
|
||||
}
|
||||
|
||||
uptime := time.Since(h.startTime).Round(10 * time.Millisecond).String()
|
||||
|
||||
ready := h.health.ReadinessProbe()
|
||||
@@ -575,26 +652,40 @@ func (h *Handler) handleHealth(w http.ResponseWriter, r *http.Request, wantJSON
|
||||
status = "degraded"
|
||||
}
|
||||
|
||||
if wantJSON {
|
||||
h.writeJSON(w, map[string]interface{}{
|
||||
"status": status,
|
||||
"uptime": uptime,
|
||||
"management_connected": ready,
|
||||
"all_clients_healthy": allHealthy,
|
||||
"clients": clientHealth,
|
||||
})
|
||||
return
|
||||
var certsTotal, certsReady, certsPending, certsFailed int
|
||||
var certsPendingDomains, certsReadyDomains []string
|
||||
var certsFailedDomains map[string]string
|
||||
if h.certStatus != nil {
|
||||
certsTotal = h.certStatus.TotalDomains()
|
||||
certsPendingDomains = h.certStatus.PendingDomains()
|
||||
certsReadyDomains = h.certStatus.ReadyDomains()
|
||||
certsFailedDomains = h.certStatus.FailedDomains()
|
||||
certsReady = len(certsReadyDomains)
|
||||
certsPending = len(certsPendingDomains)
|
||||
certsFailed = len(certsFailedDomains)
|
||||
}
|
||||
|
||||
data := healthData{
|
||||
Uptime: time.Since(h.startTime).Round(time.Second).String(),
|
||||
Status: status,
|
||||
ManagementReady: ready,
|
||||
AllClientsHealthy: allHealthy,
|
||||
Clients: clientHealth,
|
||||
resp := map[string]any{
|
||||
"status": status,
|
||||
"uptime": uptime,
|
||||
"management_connected": ready,
|
||||
"all_clients_healthy": allHealthy,
|
||||
"certs_total": certsTotal,
|
||||
"certs_ready": certsReady,
|
||||
"certs_pending": certsPending,
|
||||
"certs_failed": certsFailed,
|
||||
"clients": clientHealth,
|
||||
}
|
||||
|
||||
h.renderTemplate(w, "health", data)
|
||||
if len(certsPendingDomains) > 0 {
|
||||
resp["certs_pending_domains"] = certsPendingDomains
|
||||
}
|
||||
if len(certsReadyDomains) > 0 {
|
||||
resp["certs_ready_domains"] = certsReadyDomains
|
||||
}
|
||||
if len(certsFailedDomains) > 0 {
|
||||
resp["certs_failed_domains"] = certsFailedDomains
|
||||
}
|
||||
h.writeJSON(w, resp)
|
||||
}
|
||||
|
||||
func (h *Handler) renderTemplate(w http.ResponseWriter, name string, data interface{}) {
|
||||
|
||||
@@ -1,39 +0,0 @@
|
||||
{{define "health"}}
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>Health</title>
|
||||
<style>{{template "style"}}</style>
|
||||
</head>
|
||||
<body>
|
||||
<h1>{{.Status}}</h1>
|
||||
<p>Uptime: {{.Uptime}}</p>
|
||||
<p>Management Connected: {{.ManagementReady}}</p>
|
||||
<p>All Clients Healthy: {{.AllClientsHealthy}}</p>
|
||||
{{if .Clients}}
|
||||
<h2>Clients</h2>
|
||||
<table>
|
||||
<tr>
|
||||
<th>Account ID</th>
|
||||
<th>Healthy</th>
|
||||
<th>Management</th>
|
||||
<th>Signal</th>
|
||||
<th>Relays</th>
|
||||
<th>Error</th>
|
||||
</tr>
|
||||
{{range $id, $c := .Clients}}
|
||||
<tr>
|
||||
<td>{{$id}}</td>
|
||||
<td>{{$c.Healthy}}</td>
|
||||
<td>{{$c.ManagementConnected}}</td>
|
||||
<td>{{$c.SignalConnected}}</td>
|
||||
<td>{{$c.RelaysConnected}}/{{$c.RelaysTotal}}</td>
|
||||
<td>{{$c.Error}}</td>
|
||||
</tr>
|
||||
{{end}}
|
||||
</table>
|
||||
{{end}}
|
||||
<p><a href="/debug">← Back</a></p>
|
||||
</body>
|
||||
</html>
|
||||
{{end}}
|
||||
@@ -8,6 +8,25 @@
|
||||
<body>
|
||||
<h1>NetBird Proxy Debug</h1>
|
||||
<p class="info">Version: {{.Version}} | Uptime: {{.Uptime}}</p>
|
||||
<h2>Certificates: {{.CertsReady}} ready, {{.CertsPending}} pending, {{.CertsFailed}} failed ({{.CertsTotal}} total)</h2>
|
||||
{{if .CertsReadyDomains}}
|
||||
<details>
|
||||
<summary>Ready domains ({{.CertsReady}})</summary>
|
||||
<ul>{{range .CertsReadyDomains}}<li>{{.}}</li>{{end}}</ul>
|
||||
</details>
|
||||
{{end}}
|
||||
{{if .CertsPendingDomains}}
|
||||
<details open>
|
||||
<summary>Pending domains ({{.CertsPending}})</summary>
|
||||
<ul>{{range .CertsPendingDomains}}<li>{{.}}</li>{{end}}</ul>
|
||||
</details>
|
||||
{{end}}
|
||||
{{if .CertsFailedDomains}}
|
||||
<details open>
|
||||
<summary>Failed domains ({{.CertsFailed}})</summary>
|
||||
<ul>{{range .CertsFailedDomains}}<li>{{.Domain}}: {{.Error}}</li>{{end}}</ul>
|
||||
</details>
|
||||
{{end}}
|
||||
<h2>Clients ({{.ClientCount}}) | Domains ({{.TotalDomains}})</h2>
|
||||
{{if .Clients}}
|
||||
<table>
|
||||
@@ -32,7 +51,6 @@
|
||||
<h2>Endpoints</h2>
|
||||
<ul>
|
||||
<li><a href="/debug/clients">/debug/clients</a> - all clients detail</li>
|
||||
<li><a href="/debug/health">/debug/health</a> - health check</li>
|
||||
</ul>
|
||||
<p class="info">Add ?format=json or /json suffix for JSON output</p>
|
||||
</body>
|
||||
|
||||
Reference in New Issue
Block a user