From 9348842e2cd1cc9eea29e43c9ad5cd7740c15309 Mon Sep 17 00:00:00 2001 From: Wouter van Elten Date: Tue, 1 Jul 2025 17:26:49 +0200 Subject: [PATCH 1/4] added use of hostname if available --- docker/client.go | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/docker/client.go b/docker/client.go index 6a3bdb7..226e099 100644 --- a/docker/client.go +++ b/docker/client.go @@ -26,6 +26,8 @@ type Container struct { Labels map[string]string `json:"labels"` Created int64 `json:"created"` Networks map[string]Network `json:"networks"` + Hostname string `json:"hostname"` // added to use hostname if available instead of network address + } // Port represents a port mapping for a Docker container @@ -173,6 +175,14 @@ func ListContainers(socketPath string, enforceNetworkValidation bool) ([]Contain // Short ID like docker ps shortId := c.ID[:12] + // Inspect container to get hostname + hostname := "" + containerInfo, err := cli.ContainerInspect(ctx, c.ID) + if err == nil && containerInfo.Config != nil { + hostname = containerInfo.Config.Hostname + } + + // Skip host container if set if hostContainerId != "" && c.ID == hostContainerId { continue @@ -238,6 +248,7 @@ func ListContainers(socketPath string, enforceNetworkValidation bool) ([]Contain Labels: c.Labels, Created: c.Created, Networks: networks, + Hostname: hostname, // added } dockerContainers = append(dockerContainers, dockerContainer) From 4d343e3541064ecbf838c8959246280796091a10 Mon Sep 17 00:00:00 2001 From: Wouter van Elten Date: Fri, 11 Jul 2025 08:14:32 +0200 Subject: [PATCH 2/4] Update README.md for health check Added explanation of health_file --- README.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 9d88096..997745e 100644 --- a/README.md +++ b/README.md @@ -40,6 +40,7 @@ When Newt receives WireGuard control messages, it will use the information encod - `tls-client-cert` (optional): Client certificate (p12 or pfx) for mTLS. See [mTLS](#mtls) - `docker-socket` (optional): Set the Docker socket to use the container discovery integration - `docker-enforce-network-validation` (optional): Validate the container target is on the same network as the newt process +- `health_file` (optional): Check if connection to WG server (pangolin) is ok. creates a file if ok, removes it if not ok. Can be used with docker healtcheck to restart newt - Example: @@ -61,7 +62,8 @@ services: environment: - PANGOLIN_ENDPOINT=https://example.com - NEWT_ID=2ix2t8xk22ubpfy - - NEWT_SECRET=nnisrfsdfc7prqsp9ewo1dvtvci50j5uiqotez00dgap0ii2 + - NEWT_SECRET=nnisrfsdfc7prqsp9ewo1dvtvci50j5uiqotez00dgap0ii2 + - HEALTH_FILE=/tmp/healthy ``` You can also pass the CLI args to the container: @@ -76,6 +78,7 @@ services: - --id 31frd0uzbjvp721 - --secret h51mmlknrvrwv8s4r1i210azhumt6isgbpyavxodibx1k2d6 - --endpoint https://example.com + - --health_file /tmp/healthy ``` ### Docker Socket Integration From f513f97fc39163bf3c35137e306ad52750a01dc7 Mon Sep 17 00:00:00 2001 From: Owen Date: Sun, 13 Jul 2025 16:07:46 -0700 Subject: [PATCH 3/4] Working on better ping --- main.go | 21 ++++++++-- util.go | 120 ++++++++++++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 126 insertions(+), 15 deletions(-) diff --git a/main.go b/main.go index bcc1da9..b84ae8b 100644 --- a/main.go +++ b/main.go @@ -154,23 +154,27 @@ func main() { flag.StringVar(&pingIntervalStr, "ping-interval", "3s", "Interval for pinging the server (default 3s)") } if pingTimeoutStr == "" { - flag.StringVar(&pingTimeoutStr, "ping-timeout", "3s", " Timeout for each ping (default 3s)") + flag.StringVar(&pingTimeoutStr, "ping-timeout", "5s", " Timeout for each ping (default 3s)") } if pingIntervalStr != "" { pingInterval, err = time.ParseDuration(pingIntervalStr) if err != nil { - fmt.Printf("Invalid PING_INTERVAL value: %s, using default 1 second\n", pingIntervalStr) + fmt.Printf("Invalid PING_INTERVAL value: %s, using default 3 seconds\n", pingIntervalStr) pingInterval = 3 * time.Second } + } else { + pingInterval = 3 * time.Second } if pingTimeoutStr != "" { pingTimeout, err = time.ParseDuration(pingTimeoutStr) if err != nil { - fmt.Printf("Invalid PING_TIMEOUT value: %s, using default 2 seconds\n", pingTimeoutStr) - pingTimeout = 3 * time.Second + fmt.Printf("Invalid PING_TIMEOUT value: %s, using default 5 seconds\n", pingTimeoutStr) + pingTimeout = 5 * time.Second } + } else { + pingTimeout = 5 * time.Second } if dockerEnforceNetworkValidation == "" { @@ -386,6 +390,15 @@ persistent_keepalive_interval=5`, fixKey(privateKey.String()), fixKey(wgData.Pub close(pingWithRetryStopChan) pingWithRetryStopChan = nil } + // Use reliable ping for initial connection test + logger.Debug("Testing initial connection with reliable ping...") + _, err = reliablePing(tnet, wgData.ServerIP, pingTimeout, 5) + if err != nil { + logger.Warn("Initial reliable ping failed, but continuing: %v", err) + } else { + logger.Info("Initial connection test successful!") + } + pingWithRetryStopChan, _ = pingWithRetry(tnet, wgData.ServerIP, pingTimeout) // Always mark as connected and start the proxy manager regardless of initial ping result diff --git a/util.go b/util.go index 068a9f7..7d6da4f 100644 --- a/util.go +++ b/util.go @@ -45,9 +45,17 @@ func ping(tnet *netstack.Net, dst string, timeout time.Duration) (time.Duration, } defer socket.Close() + // Set socket buffer sizes to handle high bandwidth scenarios + if tcpConn, ok := socket.(interface{ SetReadBuffer(int) error }); ok { + tcpConn.SetReadBuffer(64 * 1024) + } + if tcpConn, ok := socket.(interface{ SetWriteBuffer(int) error }); ok { + tcpConn.SetWriteBuffer(64 * 1024) + } + requestPing := icmp.Echo{ Seq: rand.Intn(1 << 16), - Data: []byte("f"), + Data: []byte("newtping"), } icmpBytes, err := (&icmp.Message{Type: ipv4.ICMPTypeEcho, Code: 0, Body: &requestPing}).Marshal(nil) @@ -65,12 +73,14 @@ func ping(tnet *netstack.Net, dst string, timeout time.Duration) (time.Duration, return 0, fmt.Errorf("failed to write ICMP packet: %w", err) } - n, err := socket.Read(icmpBytes[:]) + // Use larger buffer for reading to handle potential network congestion + readBuffer := make([]byte, 1500) + n, err := socket.Read(readBuffer) if err != nil { return 0, fmt.Errorf("failed to read ICMP packet: %w", err) } - replyPacket, err := icmp.ParseMessage(1, icmpBytes[:n]) + replyPacket, err := icmp.ParseMessage(1, readBuffer[:n]) if err != nil { return 0, fmt.Errorf("failed to parse ICMP packet: %w", err) } @@ -92,6 +102,51 @@ func ping(tnet *netstack.Net, dst string, timeout time.Duration) (time.Duration, return latency, nil } +// reliablePing performs multiple ping attempts with adaptive timeout +func reliablePing(tnet *netstack.Net, dst string, baseTimeout time.Duration, maxAttempts int) (time.Duration, error) { + var lastErr error + var totalLatency time.Duration + successCount := 0 + + for attempt := 1; attempt <= maxAttempts; attempt++ { + // Adaptive timeout: increase timeout for later attempts + timeout := baseTimeout + time.Duration(attempt-1)*500*time.Millisecond + + // Add jitter to prevent thundering herd + jitter := time.Duration(rand.Intn(100)) * time.Millisecond + timeout += jitter + + latency, err := ping(tnet, dst, timeout) + if err != nil { + lastErr = err + logger.Debug("Ping attempt %d/%d failed: %v", attempt, maxAttempts, err) + + // Brief pause between attempts with exponential backoff + if attempt < maxAttempts { + backoff := time.Duration(attempt) * 50 * time.Millisecond + time.Sleep(backoff) + } + continue + } + + totalLatency += latency + successCount++ + + // If we get at least one success, we can return early for health checks + if successCount > 0 { + avgLatency := totalLatency / time.Duration(successCount) + logger.Debug("Reliable ping succeeded after %d attempts, avg latency: %v", attempt, avgLatency) + return avgLatency, nil + } + } + + if successCount == 0 { + return 0, fmt.Errorf("all %d ping attempts failed, last error: %v", maxAttempts, lastErr) + } + + return totalLatency / time.Duration(successCount), nil +} + func pingWithRetry(tnet *netstack.Net, dst string, timeout time.Duration) (stopChan chan struct{}, err error) { if healthFile != "" { @@ -180,6 +235,9 @@ func startPingCheck(tnet *netstack.Net, serverIP string, client *websocket.Clien consecutiveFailures := 0 connectionLost := false + // Track recent latencies for adaptive timeout calculation + recentLatencies := make([]time.Duration, 0, 10) + pingStopChan := make(chan struct{}) go func() { @@ -188,18 +246,52 @@ func startPingCheck(tnet *netstack.Net, serverIP string, client *websocket.Clien for { select { case <-ticker.C: - _, err := ping(tnet, serverIP, pingTimeout) + // Calculate adaptive timeout based on recent latencies + adaptiveTimeout := pingTimeout + if len(recentLatencies) > 0 { + var sum time.Duration + for _, lat := range recentLatencies { + sum += lat + } + avgLatency := sum / time.Duration(len(recentLatencies)) + // Use 3x average latency as timeout, with minimum of pingTimeout + adaptiveTimeout = avgLatency * 3 + if adaptiveTimeout < pingTimeout { + adaptiveTimeout = pingTimeout + } + if adaptiveTimeout > 15*time.Second { + adaptiveTimeout = 15 * time.Second + } + } + + // Use reliable ping with multiple attempts + maxAttempts := 2 + if consecutiveFailures > 4 { + maxAttempts = 4 // More attempts when connection is unstable + } + + latency, err := reliablePing(tnet, serverIP, adaptiveTimeout, maxAttempts) if err != nil { consecutiveFailures++ - if consecutiveFailures < 4 { + + // Track recent latencies (add a high value for failures) + recentLatencies = append(recentLatencies, adaptiveTimeout) + if len(recentLatencies) > 10 { + recentLatencies = recentLatencies[1:] + } + + if consecutiveFailures < 2 { logger.Debug("Periodic ping failed (%d consecutive failures): %v", consecutiveFailures, err) } else { logger.Warn("Periodic ping failed (%d consecutive failures): %v", consecutiveFailures, err) } - if consecutiveFailures >= 8 && currentInterval < maxInterval { + + // More lenient threshold for declaring connection lost under load + failureThreshold := 4 + if consecutiveFailures >= failureThreshold && currentInterval < maxInterval { if !connectionLost { connectionLost = true - logger.Warn("Connection to server lost. Continuous reconnection attempts will be made.") + logger.Warn("Connection to server lost after %d failures. Continuous reconnection attempts will be made.", consecutiveFailures) stopFunc = client.SendMessageInterval("newt/ping/request", map[string]interface{}{}, 3*time.Second) // Send registration message to the server for backward compatibility err := client.SendMessage("newt/wg/register", map[string]interface{}{ @@ -216,7 +308,7 @@ func startPingCheck(tnet *netstack.Net, serverIP string, client *websocket.Clien } } } - currentInterval = time.Duration(float64(currentInterval) * 1.5) + currentInterval = time.Duration(float64(currentInterval) * 1.3) // Slower increase if currentInterval > maxInterval { currentInterval = maxInterval } @@ -224,9 +316,15 @@ func startPingCheck(tnet *netstack.Net, serverIP string, client *websocket.Clien logger.Debug("Increased ping check interval to %v due to consecutive failures", currentInterval) } } else { + // Track recent latencies + recentLatencies = append(recentLatencies, latency) + if len(recentLatencies) > 10 { + recentLatencies = recentLatencies[1:] + } + if connectionLost { connectionLost = false - logger.Info("Connection to server restored!") + logger.Info("Connection to server restored after %d failures!", consecutiveFailures) if healthFile != "" { err := os.WriteFile(healthFile, []byte("ok"), 0644) if err != nil { @@ -235,12 +333,12 @@ func startPingCheck(tnet *netstack.Net, serverIP string, client *websocket.Clien } } if currentInterval > pingInterval { - currentInterval = time.Duration(float64(currentInterval) * 0.8) + currentInterval = time.Duration(float64(currentInterval) * 0.9) // Slower decrease if currentInterval < pingInterval { currentInterval = pingInterval } ticker.Reset(currentInterval) - logger.Info("Decreased ping check interval to %v after successful ping", currentInterval) + logger.Debug("Decreased ping check interval to %v after successful ping", currentInterval) } consecutiveFailures = 0 } From 663e28329b19ff9948f55f162ed21e4e6da576b6 Mon Sep 17 00:00:00 2001 From: Owen Date: Sun, 13 Jul 2025 16:08:32 -0700 Subject: [PATCH 4/4] Fix typo with _ --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 997745e..dd776f7 100644 --- a/README.md +++ b/README.md @@ -40,7 +40,7 @@ When Newt receives WireGuard control messages, it will use the information encod - `tls-client-cert` (optional): Client certificate (p12 or pfx) for mTLS. See [mTLS](#mtls) - `docker-socket` (optional): Set the Docker socket to use the container discovery integration - `docker-enforce-network-validation` (optional): Validate the container target is on the same network as the newt process -- `health_file` (optional): Check if connection to WG server (pangolin) is ok. creates a file if ok, removes it if not ok. Can be used with docker healtcheck to restart newt +- `health-file` (optional): Check if connection to WG server (pangolin) is ok. creates a file if ok, removes it if not ok. Can be used with docker healtcheck to restart newt - Example: @@ -78,7 +78,7 @@ services: - --id 31frd0uzbjvp721 - --secret h51mmlknrvrwv8s4r1i210azhumt6isgbpyavxodibx1k2d6 - --endpoint https://example.com - - --health_file /tmp/healthy + - --health-file /tmp/healthy ``` ### Docker Socket Integration