From 28b6865f738b637817a76c8e9147323be7147d50 Mon Sep 17 00:00:00 2001 From: Owen Date: Mon, 11 Aug 2025 08:14:29 -0700 Subject: [PATCH] Healthcheck working --- healthcheck/healthcheck.go | 113 ++++++++++++++++++++++++++++++++----- main.go | 67 ++++++++++++++-------- 2 files changed, 141 insertions(+), 39 deletions(-) diff --git a/healthcheck/healthcheck.go b/healthcheck/healthcheck.go index 4e06343..ae4a411 100644 --- a/healthcheck/healthcheck.go +++ b/healthcheck/healthcheck.go @@ -8,18 +8,20 @@ import ( "strings" "sync" "time" + + "github.com/fosrl/newt/logger" ) -// Status represents the health status of a target -type Status int +// Health represents the health status of a target +type Health int const ( - StatusUnknown Status = iota + StatusUnknown Health = iota StatusHealthy StatusUnhealthy ) -func (s Status) String() string { +func (s Health) String() string { switch s { case StatusHealthy: return "healthy" @@ -44,12 +46,13 @@ type Config struct { Timeout int `json:"hcTimeout"` // in seconds Headers map[string]string `json:"hcHeaders"` Method string `json:"hcMethod"` + Status int `json:"hcStatus"` // HTTP status code } // Target represents a health check target with its current status type Target struct { Config Config `json:"config"` - Status Status `json:"status"` + Status Health `json:"status"` LastCheck time.Time `json:"lastCheck"` LastError string `json:"lastError,omitempty"` CheckCount int `json:"checkCount"` @@ -71,6 +74,7 @@ type Monitor struct { // NewMonitor creates a new health check monitor func NewMonitor(callback StatusChangeCallback) *Monitor { + logger.Info("Creating new health check monitor") return &Monitor{ targets: make(map[int]*Target), callback: callback, @@ -108,6 +112,9 @@ func (m *Monitor) AddTarget(config Config) error { m.mutex.Lock() defer m.mutex.Unlock() + logger.Info("Adding health check target: ID=%d, hostname=%s, port=%d, enabled=%t", + config.ID, config.Hostname, config.Port, config.Enabled) + return m.addTargetUnsafe(config) } @@ -116,17 +123,20 @@ func (m *Monitor) AddTargets(configs []Config) error { m.mutex.Lock() defer m.mutex.Unlock() + logger.Info("Adding %d health check targets in bulk", len(configs)) + for _, config := range configs { if err := m.addTargetUnsafe(config); err != nil { + logger.Error("Failed to add target %d: %v", config.ID, err) return fmt.Errorf("failed to add target %d: %v", config.ID, err) } + logger.Debug("Successfully added target: ID=%d, hostname=%s", config.ID, config.Hostname) } - // Notify callback once after all targets are added - if m.callback != nil { - go m.callback(m.getAllTargetsUnsafe()) - } + // Don't notify callback immediately - let the initial health checks complete first + // The callback will be triggered when the first health check results are available + logger.Info("Successfully added all %d health check targets", len(configs)) return nil } @@ -152,6 +162,9 @@ func (m *Monitor) addTargetUnsafe(config Config) error { config.Timeout = 5 } + logger.Debug("Target %d configuration: scheme=%s, method=%s, interval=%ds, timeout=%ds", + config.ID, config.Scheme, config.Method, config.Interval, config.Timeout) + // Parse headers if provided as string if len(config.Headers) == 0 && config.Path != "" { // This is a simplified header parsing - in real use you might want more robust parsing @@ -160,6 +173,7 @@ func (m *Monitor) addTargetUnsafe(config Config) error { // Remove existing target if it exists if existing, exists := m.targets[config.ID]; exists { + logger.Info("Replacing existing target with ID %d", config.ID) existing.cancel() } @@ -176,7 +190,10 @@ func (m *Monitor) addTargetUnsafe(config Config) error { // Start monitoring if enabled if config.Enabled { + logger.Info("Starting monitoring for target %d (%s:%d)", config.ID, config.Hostname, config.Port) go m.monitorTarget(target) + } else { + logger.Debug("Target %d added but monitoring is disabled", config.ID) } return nil @@ -189,9 +206,11 @@ func (m *Monitor) RemoveTarget(id int) error { target, exists := m.targets[id] if !exists { + logger.Warn("Attempted to remove non-existent target with ID %d", id) return fmt.Errorf("target with id %d not found", id) } + logger.Info("Removing health check target: ID=%d", id) target.cancel() delete(m.targets, id) @@ -200,6 +219,7 @@ func (m *Monitor) RemoveTarget(id int) error { go m.callback(m.GetTargets()) } + logger.Info("Successfully removed target %d", id) return nil } @@ -208,25 +228,32 @@ func (m *Monitor) RemoveTargets(ids []int) error { m.mutex.Lock() defer m.mutex.Unlock() + logger.Info("Removing %d health check targets", len(ids)) var notFound []int for _, id := range ids { target, exists := m.targets[id] if !exists { notFound = append(notFound, id) + logger.Warn("Target with ID %d not found during bulk removal", id) continue } + logger.Debug("Removing target %d", id) target.cancel() delete(m.targets, id) } + removedCount := len(ids) - len(notFound) + logger.Info("Successfully removed %d targets", removedCount) + // Notify callback of status change if any targets were removed if len(notFound) != len(ids) && m.callback != nil { go m.callback(m.GetTargets()) } if len(notFound) > 0 { + logger.Error("Some targets not found during removal: %v", notFound) return fmt.Errorf("targets not found: %v", notFound) } @@ -263,21 +290,33 @@ func (m *Monitor) getAllTargets() map[int]*Target { // monitorTarget monitors a single target func (m *Monitor) monitorTarget(target *Target) { + logger.Info("Starting health check monitoring for target %d (%s:%d)", + target.Config.ID, target.Config.Hostname, target.Config.Port) + // Initial check + oldStatus := target.Status m.performHealthCheck(target) + // Notify callback after initial check if status changed or if it's the first check + if (oldStatus != target.Status || oldStatus == StatusUnknown) && m.callback != nil { + logger.Info("Target %d initial status: %s", target.Config.ID, target.Status.String()) + go m.callback(m.GetTargets()) + } + // Set up ticker based on current status interval := time.Duration(target.Config.Interval) * time.Second if target.Status == StatusUnhealthy { interval = time.Duration(target.Config.UnhealthyInterval) * time.Second } + logger.Debug("Target %d: initial check interval set to %v", target.Config.ID, interval) target.ticker = time.NewTicker(interval) defer target.ticker.Stop() for { select { case <-target.ctx.Done(): + logger.Info("Stopping health check monitoring for target %d", target.Config.ID) return case <-target.ticker.C: oldStatus := target.Status @@ -290,6 +329,8 @@ func (m *Monitor) monitorTarget(target *Target) { } if newInterval != interval { + logger.Debug("Target %d: updating check interval from %v to %v due to status change", + target.Config.ID, interval, newInterval) target.ticker.Stop() target.ticker = time.NewTicker(newInterval) interval = newInterval @@ -297,6 +338,8 @@ func (m *Monitor) monitorTarget(target *Target) { // Notify callback if status changed if oldStatus != target.Status && m.callback != nil { + logger.Info("Target %d status changed: %s -> %s", + target.Config.ID, oldStatus.String(), target.Status.String()) go m.callback(m.GetTargets()) } } @@ -321,6 +364,9 @@ func (m *Monitor) performHealthCheck(target *Target) { url += target.Config.Path } + logger.Debug("Target %d: performing health check %d to %s", + target.Config.ID, target.CheckCount, url) + // Create request ctx, cancel := context.WithTimeout(context.Background(), time.Duration(target.Config.Timeout)*time.Second) defer cancel() @@ -329,6 +375,7 @@ func (m *Monitor) performHealthCheck(target *Target) { if err != nil { target.Status = StatusUnhealthy target.LastError = fmt.Sprintf("failed to create request: %v", err) + logger.Warn("Target %d: failed to create request: %v", target.Config.ID, err) return } @@ -342,16 +389,40 @@ func (m *Monitor) performHealthCheck(target *Target) { if err != nil { target.Status = StatusUnhealthy target.LastError = fmt.Sprintf("request failed: %v", err) + logger.Warn("Target %d: health check failed: %v", target.Config.ID, err) return } defer resp.Body.Close() // Check response status - if resp.StatusCode >= 200 && resp.StatusCode < 300 { - target.Status = StatusHealthy + var expectedStatus int + if target.Config.Status > 0 { + expectedStatus = target.Config.Status } else { - target.Status = StatusUnhealthy - target.LastError = fmt.Sprintf("unhealthy status code: %d", resp.StatusCode) + expectedStatus = 0 // Use range check for 200-299 + } + + if expectedStatus > 0 { + logger.Debug("Target %d: checking health status against expected code %d", target.Config.ID, expectedStatus) + // Check for specific status code + if resp.StatusCode == expectedStatus { + target.Status = StatusHealthy + logger.Debug("Target %d: health check passed (status: %d, expected: %d)", target.Config.ID, resp.StatusCode, expectedStatus) + } else { + target.Status = StatusUnhealthy + target.LastError = fmt.Sprintf("unexpected status code: %d (expected: %d)", resp.StatusCode, expectedStatus) + logger.Warn("Target %d: health check failed with status code %d (expected: %d)", target.Config.ID, resp.StatusCode, expectedStatus) + } + } else { + // Check for 2xx range + if resp.StatusCode >= 200 && resp.StatusCode < 300 { + target.Status = StatusHealthy + logger.Debug("Target %d: health check passed (status: %d)", target.Config.ID, resp.StatusCode) + } else { + target.Status = StatusUnhealthy + target.LastError = fmt.Sprintf("unhealthy status code: %d", resp.StatusCode) + logger.Warn("Target %d: health check failed with status code %d", target.Config.ID, resp.StatusCode) + } } } @@ -360,10 +431,16 @@ func (m *Monitor) Stop() { m.mutex.Lock() defer m.mutex.Unlock() - for _, target := range m.targets { + targetCount := len(m.targets) + logger.Info("Stopping health check monitor with %d targets", targetCount) + + for id, target := range m.targets { + logger.Debug("Stopping monitoring for target %d", id) target.cancel() } m.targets = make(map[int]*Target) + + logger.Info("Health check monitor stopped") } // EnableTarget enables monitoring for a specific target @@ -373,10 +450,12 @@ func (m *Monitor) EnableTarget(id int) error { target, exists := m.targets[id] if !exists { + logger.Warn("Attempted to enable non-existent target with ID %d", id) return fmt.Errorf("target with id %d not found", id) } if !target.Config.Enabled { + logger.Info("Enabling health check monitoring for target %d", id) target.Config.Enabled = true target.cancel() // Stop existing monitoring @@ -385,6 +464,8 @@ func (m *Monitor) EnableTarget(id int) error { target.cancel = cancel go m.monitorTarget(target) + } else { + logger.Debug("Target %d is already enabled", id) } return nil @@ -397,10 +478,12 @@ func (m *Monitor) DisableTarget(id int) error { target, exists := m.targets[id] if !exists { + logger.Warn("Attempted to disable non-existent target with ID %d", id) return fmt.Errorf("target with id %d not found", id) } if target.Config.Enabled { + logger.Info("Disabling health check monitoring for target %d", id) target.Config.Enabled = false target.cancel() target.Status = StatusUnknown @@ -409,6 +492,8 @@ func (m *Monitor) DisableTarget(id int) error { if m.callback != nil { go m.callback(m.GetTargets()) } + } else { + logger.Debug("Target %d is already disabled", id) } return nil diff --git a/main.go b/main.go index 4f96897..2db6160 100644 --- a/main.go +++ b/main.go @@ -101,6 +101,7 @@ var ( healthFile string useNativeInterface bool authorizedKeysFile string + preferEndpoint string healthMonitor *healthcheck.Monitor ) @@ -172,6 +173,9 @@ func main() { if pingTimeoutStr == "" { flag.StringVar(&pingTimeoutStr, "ping-timeout", "5s", " Timeout for each ping (default 5s)") } + // load the prefer endpoint just as a flag + flag.StringVar(&preferEndpoint, "prefer-endpoint", "", "Prefer this endpoint for the connection (if set, will override the endpoint from the server)") + // if authorizedKeysFile == "" { // flag.StringVar(&authorizedKeysFile, "authorized-keys-file", "~/.ssh/authorized_keys", "Path to authorized keys file (if unset, no keys will be authorized)") // } @@ -291,6 +295,33 @@ func main() { setupClients(client) } + // Initialize health check monitor with status change callback + healthMonitor = healthcheck.NewMonitor(func(targets map[int]*healthcheck.Target) { + logger.Debug("Health check status update for %d targets", len(targets)) + + // Send health status update to the server + healthStatuses := make(map[int]interface{}) + for id, target := range targets { + healthStatuses[id] = map[string]interface{}{ + "status": target.Status.String(), + "lastCheck": target.LastCheck.Format(time.RFC3339), + "checkCount": target.CheckCount, + "lastError": target.LastError, + "config": target.Config, + } + } + + // print the status of the targets + logger.Debug("Health check status: %+v", healthStatuses) + + err := client.SendMessage("newt/healthcheck/status", map[string]interface{}{ + "targets": healthStatuses, + }) + if err != nil { + logger.Error("Failed to send health check status update: %v", err) + } + }) + var pingWithRetryStopChan chan struct{} closeWgTunnel := func() { @@ -529,9 +560,19 @@ persistent_keepalive_interval=5`, fixKey(privateKey.String()), fixKey(wgData.Pub } // If there is just one exit node, we can skip pinging it and use it directly - if len(exitNodes) == 1 { + if len(exitNodes) == 1 || preferEndpoint != "" { logger.Debug("Only one exit node available, using it directly: %s", exitNodes[0].Endpoint) + // if the preferEndpoint is set, we will use it instead of the exit node endpoint. first you need to find the exit node with that endpoint in the list and send that one + if preferEndpoint != "" { + for _, node := range exitNodes { + if node.Endpoint == preferEndpoint { + exitNodes[0] = node + break + } + } + } + // Prepare data to send to the cloud for selection pingResults := []ExitNodePingResult{ { @@ -907,30 +948,6 @@ persistent_keepalive_interval=5`, fixKey(privateKey.String()), fixKey(wgData.Pub logger.Info("SSH public key appended to authorized keys file") }) - // Initialize health check monitor with status change callback - healthMonitor = healthcheck.NewMonitor(func(targets map[int]*healthcheck.Target) { - logger.Debug("Health check status update for %d targets", len(targets)) - - // Send health status update to the server - healthStatuses := make(map[int]interface{}) - for id, target := range targets { - healthStatuses[id] = map[string]interface{}{ - "status": target.Status.String(), - "lastCheck": target.LastCheck.Format(time.RFC3339), - "checkCount": target.CheckCount, - "lastError": target.LastError, - "config": target.Config, - } - } - - err := client.SendMessage("newt/healthcheck/status", map[string]interface{}{ - "targets": healthStatuses, - }) - if err != nil { - logger.Error("Failed to send health check status update: %v", err) - } - }) - // Register handler for adding health check targets client.RegisterHandler("newt/healthcheck/add", func(msg websocket.WSMessage) { logger.Debug("Received health check add request: %+v", msg)