Healthcheck working

This commit is contained in:
Owen
2025-08-11 08:14:29 -07:00
parent 289cce3a22
commit 28b6865f73
2 changed files with 141 additions and 39 deletions

View File

@@ -8,18 +8,20 @@ import (
"strings" "strings"
"sync" "sync"
"time" "time"
"github.com/fosrl/newt/logger"
) )
// Status represents the health status of a target // Health represents the health status of a target
type Status int type Health int
const ( const (
StatusUnknown Status = iota StatusUnknown Health = iota
StatusHealthy StatusHealthy
StatusUnhealthy StatusUnhealthy
) )
func (s Status) String() string { func (s Health) String() string {
switch s { switch s {
case StatusHealthy: case StatusHealthy:
return "healthy" return "healthy"
@@ -44,12 +46,13 @@ type Config struct {
Timeout int `json:"hcTimeout"` // in seconds Timeout int `json:"hcTimeout"` // in seconds
Headers map[string]string `json:"hcHeaders"` Headers map[string]string `json:"hcHeaders"`
Method string `json:"hcMethod"` Method string `json:"hcMethod"`
Status int `json:"hcStatus"` // HTTP status code
} }
// Target represents a health check target with its current status // Target represents a health check target with its current status
type Target struct { type Target struct {
Config Config `json:"config"` Config Config `json:"config"`
Status Status `json:"status"` Status Health `json:"status"`
LastCheck time.Time `json:"lastCheck"` LastCheck time.Time `json:"lastCheck"`
LastError string `json:"lastError,omitempty"` LastError string `json:"lastError,omitempty"`
CheckCount int `json:"checkCount"` CheckCount int `json:"checkCount"`
@@ -71,6 +74,7 @@ type Monitor struct {
// NewMonitor creates a new health check monitor // NewMonitor creates a new health check monitor
func NewMonitor(callback StatusChangeCallback) *Monitor { func NewMonitor(callback StatusChangeCallback) *Monitor {
logger.Info("Creating new health check monitor")
return &Monitor{ return &Monitor{
targets: make(map[int]*Target), targets: make(map[int]*Target),
callback: callback, callback: callback,
@@ -108,6 +112,9 @@ func (m *Monitor) AddTarget(config Config) error {
m.mutex.Lock() m.mutex.Lock()
defer m.mutex.Unlock() defer m.mutex.Unlock()
logger.Info("Adding health check target: ID=%d, hostname=%s, port=%d, enabled=%t",
config.ID, config.Hostname, config.Port, config.Enabled)
return m.addTargetUnsafe(config) return m.addTargetUnsafe(config)
} }
@@ -116,17 +123,20 @@ func (m *Monitor) AddTargets(configs []Config) error {
m.mutex.Lock() m.mutex.Lock()
defer m.mutex.Unlock() defer m.mutex.Unlock()
logger.Info("Adding %d health check targets in bulk", len(configs))
for _, config := range configs { for _, config := range configs {
if err := m.addTargetUnsafe(config); err != nil { if err := m.addTargetUnsafe(config); err != nil {
logger.Error("Failed to add target %d: %v", config.ID, err)
return fmt.Errorf("failed to add target %d: %v", config.ID, err) return fmt.Errorf("failed to add target %d: %v", config.ID, err)
} }
logger.Debug("Successfully added target: ID=%d, hostname=%s", config.ID, config.Hostname)
} }
// Notify callback once after all targets are added // Don't notify callback immediately - let the initial health checks complete first
if m.callback != nil { // The callback will be triggered when the first health check results are available
go m.callback(m.getAllTargetsUnsafe())
}
logger.Info("Successfully added all %d health check targets", len(configs))
return nil return nil
} }
@@ -152,6 +162,9 @@ func (m *Monitor) addTargetUnsafe(config Config) error {
config.Timeout = 5 config.Timeout = 5
} }
logger.Debug("Target %d configuration: scheme=%s, method=%s, interval=%ds, timeout=%ds",
config.ID, config.Scheme, config.Method, config.Interval, config.Timeout)
// Parse headers if provided as string // Parse headers if provided as string
if len(config.Headers) == 0 && config.Path != "" { if len(config.Headers) == 0 && config.Path != "" {
// This is a simplified header parsing - in real use you might want more robust parsing // This is a simplified header parsing - in real use you might want more robust parsing
@@ -160,6 +173,7 @@ func (m *Monitor) addTargetUnsafe(config Config) error {
// Remove existing target if it exists // Remove existing target if it exists
if existing, exists := m.targets[config.ID]; exists { if existing, exists := m.targets[config.ID]; exists {
logger.Info("Replacing existing target with ID %d", config.ID)
existing.cancel() existing.cancel()
} }
@@ -176,7 +190,10 @@ func (m *Monitor) addTargetUnsafe(config Config) error {
// Start monitoring if enabled // Start monitoring if enabled
if config.Enabled { if config.Enabled {
logger.Info("Starting monitoring for target %d (%s:%d)", config.ID, config.Hostname, config.Port)
go m.monitorTarget(target) go m.monitorTarget(target)
} else {
logger.Debug("Target %d added but monitoring is disabled", config.ID)
} }
return nil return nil
@@ -189,9 +206,11 @@ func (m *Monitor) RemoveTarget(id int) error {
target, exists := m.targets[id] target, exists := m.targets[id]
if !exists { if !exists {
logger.Warn("Attempted to remove non-existent target with ID %d", id)
return fmt.Errorf("target with id %d not found", id) return fmt.Errorf("target with id %d not found", id)
} }
logger.Info("Removing health check target: ID=%d", id)
target.cancel() target.cancel()
delete(m.targets, id) delete(m.targets, id)
@@ -200,6 +219,7 @@ func (m *Monitor) RemoveTarget(id int) error {
go m.callback(m.GetTargets()) go m.callback(m.GetTargets())
} }
logger.Info("Successfully removed target %d", id)
return nil return nil
} }
@@ -208,25 +228,32 @@ func (m *Monitor) RemoveTargets(ids []int) error {
m.mutex.Lock() m.mutex.Lock()
defer m.mutex.Unlock() defer m.mutex.Unlock()
logger.Info("Removing %d health check targets", len(ids))
var notFound []int var notFound []int
for _, id := range ids { for _, id := range ids {
target, exists := m.targets[id] target, exists := m.targets[id]
if !exists { if !exists {
notFound = append(notFound, id) notFound = append(notFound, id)
logger.Warn("Target with ID %d not found during bulk removal", id)
continue continue
} }
logger.Debug("Removing target %d", id)
target.cancel() target.cancel()
delete(m.targets, id) delete(m.targets, id)
} }
removedCount := len(ids) - len(notFound)
logger.Info("Successfully removed %d targets", removedCount)
// Notify callback of status change if any targets were removed // Notify callback of status change if any targets were removed
if len(notFound) != len(ids) && m.callback != nil { if len(notFound) != len(ids) && m.callback != nil {
go m.callback(m.GetTargets()) go m.callback(m.GetTargets())
} }
if len(notFound) > 0 { if len(notFound) > 0 {
logger.Error("Some targets not found during removal: %v", notFound)
return fmt.Errorf("targets not found: %v", notFound) return fmt.Errorf("targets not found: %v", notFound)
} }
@@ -263,21 +290,33 @@ func (m *Monitor) getAllTargets() map[int]*Target {
// monitorTarget monitors a single target // monitorTarget monitors a single target
func (m *Monitor) monitorTarget(target *Target) { func (m *Monitor) monitorTarget(target *Target) {
logger.Info("Starting health check monitoring for target %d (%s:%d)",
target.Config.ID, target.Config.Hostname, target.Config.Port)
// Initial check // Initial check
oldStatus := target.Status
m.performHealthCheck(target) m.performHealthCheck(target)
// Notify callback after initial check if status changed or if it's the first check
if (oldStatus != target.Status || oldStatus == StatusUnknown) && m.callback != nil {
logger.Info("Target %d initial status: %s", target.Config.ID, target.Status.String())
go m.callback(m.GetTargets())
}
// Set up ticker based on current status // Set up ticker based on current status
interval := time.Duration(target.Config.Interval) * time.Second interval := time.Duration(target.Config.Interval) * time.Second
if target.Status == StatusUnhealthy { if target.Status == StatusUnhealthy {
interval = time.Duration(target.Config.UnhealthyInterval) * time.Second interval = time.Duration(target.Config.UnhealthyInterval) * time.Second
} }
logger.Debug("Target %d: initial check interval set to %v", target.Config.ID, interval)
target.ticker = time.NewTicker(interval) target.ticker = time.NewTicker(interval)
defer target.ticker.Stop() defer target.ticker.Stop()
for { for {
select { select {
case <-target.ctx.Done(): case <-target.ctx.Done():
logger.Info("Stopping health check monitoring for target %d", target.Config.ID)
return return
case <-target.ticker.C: case <-target.ticker.C:
oldStatus := target.Status oldStatus := target.Status
@@ -290,6 +329,8 @@ func (m *Monitor) monitorTarget(target *Target) {
} }
if newInterval != interval { if newInterval != interval {
logger.Debug("Target %d: updating check interval from %v to %v due to status change",
target.Config.ID, interval, newInterval)
target.ticker.Stop() target.ticker.Stop()
target.ticker = time.NewTicker(newInterval) target.ticker = time.NewTicker(newInterval)
interval = newInterval interval = newInterval
@@ -297,6 +338,8 @@ func (m *Monitor) monitorTarget(target *Target) {
// Notify callback if status changed // Notify callback if status changed
if oldStatus != target.Status && m.callback != nil { if oldStatus != target.Status && m.callback != nil {
logger.Info("Target %d status changed: %s -> %s",
target.Config.ID, oldStatus.String(), target.Status.String())
go m.callback(m.GetTargets()) go m.callback(m.GetTargets())
} }
} }
@@ -321,6 +364,9 @@ func (m *Monitor) performHealthCheck(target *Target) {
url += target.Config.Path url += target.Config.Path
} }
logger.Debug("Target %d: performing health check %d to %s",
target.Config.ID, target.CheckCount, url)
// Create request // Create request
ctx, cancel := context.WithTimeout(context.Background(), time.Duration(target.Config.Timeout)*time.Second) ctx, cancel := context.WithTimeout(context.Background(), time.Duration(target.Config.Timeout)*time.Second)
defer cancel() defer cancel()
@@ -329,6 +375,7 @@ func (m *Monitor) performHealthCheck(target *Target) {
if err != nil { if err != nil {
target.Status = StatusUnhealthy target.Status = StatusUnhealthy
target.LastError = fmt.Sprintf("failed to create request: %v", err) target.LastError = fmt.Sprintf("failed to create request: %v", err)
logger.Warn("Target %d: failed to create request: %v", target.Config.ID, err)
return return
} }
@@ -342,16 +389,40 @@ func (m *Monitor) performHealthCheck(target *Target) {
if err != nil { if err != nil {
target.Status = StatusUnhealthy target.Status = StatusUnhealthy
target.LastError = fmt.Sprintf("request failed: %v", err) target.LastError = fmt.Sprintf("request failed: %v", err)
logger.Warn("Target %d: health check failed: %v", target.Config.ID, err)
return return
} }
defer resp.Body.Close() defer resp.Body.Close()
// Check response status // Check response status
if resp.StatusCode >= 200 && resp.StatusCode < 300 { var expectedStatus int
target.Status = StatusHealthy if target.Config.Status > 0 {
expectedStatus = target.Config.Status
} else { } else {
target.Status = StatusUnhealthy expectedStatus = 0 // Use range check for 200-299
target.LastError = fmt.Sprintf("unhealthy status code: %d", resp.StatusCode) }
if expectedStatus > 0 {
logger.Debug("Target %d: checking health status against expected code %d", target.Config.ID, expectedStatus)
// Check for specific status code
if resp.StatusCode == expectedStatus {
target.Status = StatusHealthy
logger.Debug("Target %d: health check passed (status: %d, expected: %d)", target.Config.ID, resp.StatusCode, expectedStatus)
} else {
target.Status = StatusUnhealthy
target.LastError = fmt.Sprintf("unexpected status code: %d (expected: %d)", resp.StatusCode, expectedStatus)
logger.Warn("Target %d: health check failed with status code %d (expected: %d)", target.Config.ID, resp.StatusCode, expectedStatus)
}
} else {
// Check for 2xx range
if resp.StatusCode >= 200 && resp.StatusCode < 300 {
target.Status = StatusHealthy
logger.Debug("Target %d: health check passed (status: %d)", target.Config.ID, resp.StatusCode)
} else {
target.Status = StatusUnhealthy
target.LastError = fmt.Sprintf("unhealthy status code: %d", resp.StatusCode)
logger.Warn("Target %d: health check failed with status code %d", target.Config.ID, resp.StatusCode)
}
} }
} }
@@ -360,10 +431,16 @@ func (m *Monitor) Stop() {
m.mutex.Lock() m.mutex.Lock()
defer m.mutex.Unlock() defer m.mutex.Unlock()
for _, target := range m.targets { targetCount := len(m.targets)
logger.Info("Stopping health check monitor with %d targets", targetCount)
for id, target := range m.targets {
logger.Debug("Stopping monitoring for target %d", id)
target.cancel() target.cancel()
} }
m.targets = make(map[int]*Target) m.targets = make(map[int]*Target)
logger.Info("Health check monitor stopped")
} }
// EnableTarget enables monitoring for a specific target // EnableTarget enables monitoring for a specific target
@@ -373,10 +450,12 @@ func (m *Monitor) EnableTarget(id int) error {
target, exists := m.targets[id] target, exists := m.targets[id]
if !exists { if !exists {
logger.Warn("Attempted to enable non-existent target with ID %d", id)
return fmt.Errorf("target with id %d not found", id) return fmt.Errorf("target with id %d not found", id)
} }
if !target.Config.Enabled { if !target.Config.Enabled {
logger.Info("Enabling health check monitoring for target %d", id)
target.Config.Enabled = true target.Config.Enabled = true
target.cancel() // Stop existing monitoring target.cancel() // Stop existing monitoring
@@ -385,6 +464,8 @@ func (m *Monitor) EnableTarget(id int) error {
target.cancel = cancel target.cancel = cancel
go m.monitorTarget(target) go m.monitorTarget(target)
} else {
logger.Debug("Target %d is already enabled", id)
} }
return nil return nil
@@ -397,10 +478,12 @@ func (m *Monitor) DisableTarget(id int) error {
target, exists := m.targets[id] target, exists := m.targets[id]
if !exists { if !exists {
logger.Warn("Attempted to disable non-existent target with ID %d", id)
return fmt.Errorf("target with id %d not found", id) return fmt.Errorf("target with id %d not found", id)
} }
if target.Config.Enabled { if target.Config.Enabled {
logger.Info("Disabling health check monitoring for target %d", id)
target.Config.Enabled = false target.Config.Enabled = false
target.cancel() target.cancel()
target.Status = StatusUnknown target.Status = StatusUnknown
@@ -409,6 +492,8 @@ func (m *Monitor) DisableTarget(id int) error {
if m.callback != nil { if m.callback != nil {
go m.callback(m.GetTargets()) go m.callback(m.GetTargets())
} }
} else {
logger.Debug("Target %d is already disabled", id)
} }
return nil return nil

67
main.go
View File

@@ -101,6 +101,7 @@ var (
healthFile string healthFile string
useNativeInterface bool useNativeInterface bool
authorizedKeysFile string authorizedKeysFile string
preferEndpoint string
healthMonitor *healthcheck.Monitor healthMonitor *healthcheck.Monitor
) )
@@ -172,6 +173,9 @@ func main() {
if pingTimeoutStr == "" { if pingTimeoutStr == "" {
flag.StringVar(&pingTimeoutStr, "ping-timeout", "5s", " Timeout for each ping (default 5s)") flag.StringVar(&pingTimeoutStr, "ping-timeout", "5s", " Timeout for each ping (default 5s)")
} }
// load the prefer endpoint just as a flag
flag.StringVar(&preferEndpoint, "prefer-endpoint", "", "Prefer this endpoint for the connection (if set, will override the endpoint from the server)")
// if authorizedKeysFile == "" { // if authorizedKeysFile == "" {
// flag.StringVar(&authorizedKeysFile, "authorized-keys-file", "~/.ssh/authorized_keys", "Path to authorized keys file (if unset, no keys will be authorized)") // flag.StringVar(&authorizedKeysFile, "authorized-keys-file", "~/.ssh/authorized_keys", "Path to authorized keys file (if unset, no keys will be authorized)")
// } // }
@@ -291,6 +295,33 @@ func main() {
setupClients(client) setupClients(client)
} }
// Initialize health check monitor with status change callback
healthMonitor = healthcheck.NewMonitor(func(targets map[int]*healthcheck.Target) {
logger.Debug("Health check status update for %d targets", len(targets))
// Send health status update to the server
healthStatuses := make(map[int]interface{})
for id, target := range targets {
healthStatuses[id] = map[string]interface{}{
"status": target.Status.String(),
"lastCheck": target.LastCheck.Format(time.RFC3339),
"checkCount": target.CheckCount,
"lastError": target.LastError,
"config": target.Config,
}
}
// print the status of the targets
logger.Debug("Health check status: %+v", healthStatuses)
err := client.SendMessage("newt/healthcheck/status", map[string]interface{}{
"targets": healthStatuses,
})
if err != nil {
logger.Error("Failed to send health check status update: %v", err)
}
})
var pingWithRetryStopChan chan struct{} var pingWithRetryStopChan chan struct{}
closeWgTunnel := func() { closeWgTunnel := func() {
@@ -529,9 +560,19 @@ persistent_keepalive_interval=5`, fixKey(privateKey.String()), fixKey(wgData.Pub
} }
// If there is just one exit node, we can skip pinging it and use it directly // If there is just one exit node, we can skip pinging it and use it directly
if len(exitNodes) == 1 { if len(exitNodes) == 1 || preferEndpoint != "" {
logger.Debug("Only one exit node available, using it directly: %s", exitNodes[0].Endpoint) logger.Debug("Only one exit node available, using it directly: %s", exitNodes[0].Endpoint)
// if the preferEndpoint is set, we will use it instead of the exit node endpoint. first you need to find the exit node with that endpoint in the list and send that one
if preferEndpoint != "" {
for _, node := range exitNodes {
if node.Endpoint == preferEndpoint {
exitNodes[0] = node
break
}
}
}
// Prepare data to send to the cloud for selection // Prepare data to send to the cloud for selection
pingResults := []ExitNodePingResult{ pingResults := []ExitNodePingResult{
{ {
@@ -907,30 +948,6 @@ persistent_keepalive_interval=5`, fixKey(privateKey.String()), fixKey(wgData.Pub
logger.Info("SSH public key appended to authorized keys file") logger.Info("SSH public key appended to authorized keys file")
}) })
// Initialize health check monitor with status change callback
healthMonitor = healthcheck.NewMonitor(func(targets map[int]*healthcheck.Target) {
logger.Debug("Health check status update for %d targets", len(targets))
// Send health status update to the server
healthStatuses := make(map[int]interface{})
for id, target := range targets {
healthStatuses[id] = map[string]interface{}{
"status": target.Status.String(),
"lastCheck": target.LastCheck.Format(time.RFC3339),
"checkCount": target.CheckCount,
"lastError": target.LastError,
"config": target.Config,
}
}
err := client.SendMessage("newt/healthcheck/status", map[string]interface{}{
"targets": healthStatuses,
})
if err != nil {
logger.Error("Failed to send health check status update: %v", err)
}
})
// Register handler for adding health check targets // Register handler for adding health check targets
client.RegisterHandler("newt/healthcheck/add", func(msg websocket.WSMessage) { client.RegisterHandler("newt/healthcheck/add", func(msg websocket.WSMessage) {
logger.Debug("Received health check add request: %+v", msg) logger.Debug("Received health check add request: %+v", msg)