Merge branch 'dev' of github.com:fosrl/newt into dev

This commit is contained in:
Owen
2025-10-16 21:09:41 -07:00
31 changed files with 3504 additions and 202 deletions

5
.env.example Normal file
View File

@@ -0,0 +1,5 @@
# Copy this file to .env and fill in your values
# Required for connecting to Pangolin service
PANGOLIN_ENDPOINT=https://example.com
NEWT_ID=changeme-id
NEWT_SECRET=changeme-secret

View File

@@ -1,5 +1,8 @@
FROM golang:1.25-alpine AS builder FROM golang:1.25-alpine AS builder
# Install git and ca-certificates
RUN apk --no-cache add ca-certificates git tzdata
# Set the working directory inside the container # Set the working directory inside the container
WORKDIR /app WORKDIR /app
@@ -13,7 +16,7 @@ RUN go mod download
COPY . . COPY . .
# Build the application # Build the application
RUN CGO_ENABLED=0 GOOS=linux go build -o /newt RUN CGO_ENABLED=0 GOOS=linux go build -ldflags="-s -w" -o /newt
FROM alpine:3.22 AS runner FROM alpine:3.22 AS runner
@@ -22,6 +25,9 @@ RUN apk --no-cache add ca-certificates tzdata
COPY --from=builder /newt /usr/local/bin/ COPY --from=builder /newt /usr/local/bin/
COPY entrypoint.sh / COPY entrypoint.sh /
# Admin/metrics endpoint (Prometheus scrape)
EXPOSE 2112
RUN chmod +x /entrypoint.sh RUN chmod +x /entrypoint.sh
ENTRYPOINT ["/entrypoint.sh"] ENTRYPOINT ["/entrypoint.sh"]
CMD ["newt"] CMD ["newt"]

103
README.md
View File

@@ -33,61 +33,108 @@ When Newt receives WireGuard control messages, it will use the information encod
## CLI Args ## CLI Args
### Core Configuration
- `id`: Newt ID generated by Pangolin to identify the client. - `id`: Newt ID generated by Pangolin to identify the client.
- `secret`: A unique secret (not shared and kept private) used to authenticate the client ID with the websocket in order to receive commands. - `secret`: A unique secret (not shared and kept private) used to authenticate the client ID with the websocket in order to receive commands.
- `endpoint`: The endpoint where both Gerbil and Pangolin reside in order to connect to the websocket. - `endpoint`: The endpoint where both Gerbil and Pangolin reside in order to connect to the websocket.
- `blueprint-file` (optional): Path to blueprint file to define Pangolin resources and configurations.
- `mtu` (optional): MTU for the internal WG interface. Default: 1280 - `no-cloud` (optional): Don't fail over to the cloud when using managed nodes in Pangolin Cloud. Default: false
- `dns` (optional): DNS server to use to resolve the endpoint. Default: 9.9.9.9
- `log-level` (optional): The log level to use (DEBUG, INFO, WARN, ERROR, FATAL). Default: INFO - `log-level` (optional): The log level to use (DEBUG, INFO, WARN, ERROR, FATAL). Default: INFO
- `enforce-hc-cert` (optional): Enforce certificate validation for health checks. Default: false (accepts any cert)
### Docker Integration
- `docker-socket` (optional): Set the Docker socket to use the container discovery integration - `docker-socket` (optional): Set the Docker socket to use the container discovery integration
- `ping-interval` (optional): Interval for pinging the server. Default: 3s
- `ping-timeout` (optional): Timeout for each ping. Default: 5s
- `updown` (optional): A script to be called when targets are added or removed.
- `tls-client-cert` (optional): Client certificate (p12 or pfx) for mTLS. See [mTLS](#mtls)
- `tls-client-cert` (optional): Path to client certificate (PEM format, optional if using PKCS12). See [mTLS](#mtls)
- `tls-client-key` (optional): Path to private key for mTLS (PEM format, optional if using PKCS12)
- `tls-ca-cert` (optional): Path to CA certificate to verify server (PEM format, optional if using PKCS12)
- `docker-enforce-network-validation` (optional): Validate the container target is on the same network as the newt process. Default: false - `docker-enforce-network-validation` (optional): Validate the container target is on the same network as the newt process. Default: false
- `health-file` (optional): Check if connection to WG server (pangolin) is ok. creates a file if ok, removes it if not ok. Can be used with docker healtcheck to restart newt
### Accpet Client Connection
- `accept-clients` (optional): Enable WireGuard server mode to accept incoming newt client connections. Default: false - `accept-clients` (optional): Enable WireGuard server mode to accept incoming newt client connections. Default: false
- `generateAndSaveKeyTo` (optional): Path to save generated private key - `generateAndSaveKeyTo` (optional): Path to save generated private key
- `native` (optional): Use native WireGuard interface when accepting clients (requires WireGuard kernel module and Linux, must run as root). Default: false (uses userspace netstack) - `native` (optional): Use native WireGuard interface when accepting clients (requires WireGuard kernel module and Linux, must run as root). Default: false (uses userspace netstack)
- `interface` (optional): Name of the WireGuard interface. Default: newt - `interface` (optional): Name of the WireGuard interface. Default: newt
- `keep-interface` (optional): Keep the WireGuard interface. Default: false - `keep-interface` (optional): Keep the WireGuard interface. Default: false
- `blueprint-file` (optional): Path to blueprint file to define Pangolin resources and configurations.
- `no-cloud` (optional): Don't fail over to the cloud when using managed nodes in Pangolin Cloud. Default: false ### Metrics & Observability
- `metrics` (optional): Enable Prometheus /metrics exporter. Default: true
- `otlp` (optional): Enable OTLP exporters (metrics/traces) to OTEL_EXPORTER_OTLP_ENDPOINT. Default: false
- `metrics-admin-addr` (optional): Admin/metrics bind address. Default: 127.0.0.1:2112
- `metrics-async-bytes` (optional): Enable async bytes counting (background flush; lower hot path overhead). Default: false
- `region` (optional): Optional region resource attribute for telemetry and metrics.
### Network Configuration
- `mtu` (optional): MTU for the internal WG interface. Default: 1280
- `dns` (optional): DNS server to use to resolve the endpoint. Default: 9.9.9.9
- `ping-interval` (optional): Interval for pinging the server. Default: 3s
- `ping-timeout` (optional): Timeout for each ping. Default: 5s
### Security & TLS
- `enforce-hc-cert` (optional): Enforce certificate validation for health checks. Default: false (accepts any cert)
- `tls-client-cert` (optional): Client certificate (p12 or pfx) for mTLS or path to client certificate (PEM format). See [mTLS](#mtls)
- `tls-client-key` (optional): Path to private key for mTLS (PEM format, optional if using PKCS12)
- `tls-ca-cert` (optional): Path to CA certificate to verify server (PEM format, optional if using PKCS12)
### Monitoring & Health
- `health-file` (optional): Check if connection to WG server (pangolin) is ok. creates a file if ok, removes it if not ok. Can be used with docker healtcheck to restart newt
- `updown` (optional): A script to be called when targets are added or removed.
## Environment Variables ## Environment Variables
All CLI arguments can be set using environment variables as an alternative to command line flags. Environment variables are particularly useful when running Newt in containerized environments. All CLI arguments can be set using environment variables as an alternative to command line flags. Environment variables are particularly useful when running Newt in containerized environments.
### Core Configuration
- `PANGOLIN_ENDPOINT`: Endpoint of your pangolin server (equivalent to `--endpoint`) - `PANGOLIN_ENDPOINT`: Endpoint of your pangolin server (equivalent to `--endpoint`)
- `NEWT_ID`: Newt ID generated by Pangolin (equivalent to `--id`) - `NEWT_ID`: Newt ID generated by Pangolin (equivalent to `--id`)
- `NEWT_SECRET`: Newt secret for authentication (equivalent to `--secret`) - `NEWT_SECRET`: Newt secret for authentication (equivalent to `--secret`)
- `MTU`: MTU for the internal WG interface. Default: 1280 (equivalent to `--mtu`) - `CONFIG_FILE`: Load the config json from this file instead of in the home folder.
- `DNS`: DNS server to use to resolve the endpoint. Default: 9.9.9.9 (equivalent to `--dns`) - `BLUEPRINT_FILE`: Path to blueprint file to define Pangolin resources and configurations. (equivalent to `--blueprint-file`)
- `NO_CLOUD`: Don't fail over to the cloud when using managed nodes in Pangolin Cloud. Default: false (equivalent to `--no-cloud`)
- `LOG_LEVEL`: Log level (DEBUG, INFO, WARN, ERROR, FATAL). Default: INFO (equivalent to `--log-level`) - `LOG_LEVEL`: Log level (DEBUG, INFO, WARN, ERROR, FATAL). Default: INFO (equivalent to `--log-level`)
### Docker Integration
- `DOCKER_SOCKET`: Path to Docker socket for container discovery (equivalent to `--docker-socket`) - `DOCKER_SOCKET`: Path to Docker socket for container discovery (equivalent to `--docker-socket`)
- `PING_INTERVAL`: Interval for pinging the server. Default: 3s (equivalent to `--ping-interval`)
- `PING_TIMEOUT`: Timeout for each ping. Default: 5s (equivalent to `--ping-timeout`)
- `UPDOWN_SCRIPT`: Path to updown script for target add/remove events (equivalent to `--updown`)
- `TLS_CLIENT_CERT`: Path to client certificate for mTLS (equivalent to `--tls-client-cert`)
- `TLS_CLIENT_CERT`: Path to client certificate for mTLS (equivalent to `--tls-client-cert`)
- `TLS_CLIENT_KEY`: Path to private key for mTLS (equivalent to `--tls-client-key`)
- `TLS_CA_CERT`: Path to CA certificate to verify server (equivalent to `--tls-ca-cert`)
- `DOCKER_ENFORCE_NETWORK_VALIDATION`: Validate container targets are on same network. Default: false (equivalent to `--docker-enforce-network-validation`) - `DOCKER_ENFORCE_NETWORK_VALIDATION`: Validate container targets are on same network. Default: false (equivalent to `--docker-enforce-network-validation`)
- `ENFORCE_HC_CERT`: Enforce certificate validation for health checks. Default: false (equivalent to `--enforce-hc-cert`)
- `HEALTH_FILE`: Path to health file for connection monitoring (equivalent to `--health-file`) ### Accept Client Connections
- `ACCEPT_CLIENTS`: Enable WireGuard server mode. Default: false (equivalent to `--accept-clients`) - `ACCEPT_CLIENTS`: Enable WireGuard server mode. Default: false (equivalent to `--accept-clients`)
- `GENERATE_AND_SAVE_KEY_TO`: Path to save generated private key (equivalent to `--generateAndSaveKeyTo`) - `GENERATE_AND_SAVE_KEY_TO`: Path to save generated private key (equivalent to `--generateAndSaveKeyTo`)
- `USE_NATIVE_INTERFACE`: Use native WireGuard interface (Linux only). Default: false (equivalent to `--native`) - `USE_NATIVE_INTERFACE`: Use native WireGuard interface (Linux only). Default: false (equivalent to `--native`)
- `INTERFACE`: Name of the WireGuard interface. Default: newt (equivalent to `--interface`) - `INTERFACE`: Name of the WireGuard interface. Default: newt (equivalent to `--interface`)
- `KEEP_INTERFACE`: Keep the WireGuard interface after shutdown. Default: false (equivalent to `--keep-interface`) - `KEEP_INTERFACE`: Keep the WireGuard interface after shutdown. Default: false (equivalent to `--keep-interface`)
- `CONFIG_FILE`: Load the config json from this file instead of in the home folder.
- `BLUEPRINT_FILE`: Path to blueprint file to define Pangolin resources and configurations. (equivalent to `--blueprint-file`) ### Monitoring & Health
- `NO_CLOUD`: Don't fail over to the cloud when using managed nodes in Pangolin Cloud. Default: false (equivalent to `--no-cloud`)
- `HEALTH_FILE`: Path to health file for connection monitoring (equivalent to `--health-file`)
- `UPDOWN_SCRIPT`: Path to updown script for target add/remove events (equivalent to `--updown`)
### Metrics & Observability
- `NEWT_METRICS_PROMETHEUS_ENABLED`: Enable Prometheus /metrics exporter. Default: true (equivalent to `--metrics`)
- `NEWT_METRICS_OTLP_ENABLED`: Enable OTLP exporters (metrics/traces) to OTEL_EXPORTER_OTLP_ENDPOINT. Default: false (equivalent to `--otlp`)
- `NEWT_ADMIN_ADDR`: Admin/metrics bind address. Default: 127.0.0.1:2112 (equivalent to `--metrics-admin-addr`)
- `NEWT_METRICS_ASYNC_BYTES`: Enable async bytes counting (background flush; lower hot path overhead). Default: false (equivalent to `--metrics-async-bytes`)
- `NEWT_REGION`: Optional region resource attribute for telemetry and metrics (equivalent to `--region`)
### Network Configuration
- `MTU`: MTU for the internal WG interface. Default: 1280 (equivalent to `--mtu`)
- `DNS`: DNS server to use to resolve the endpoint. Default: 9.9.9.9 (equivalent to `--dns`)
- `PING_INTERVAL`: Interval for pinging the server. Default: 3s (equivalent to `--ping-interval`)
- `PING_TIMEOUT`: Timeout for each ping. Default: 5s (equivalent to `--ping-timeout`)
### Security & TLS
- `ENFORCE_HC_CERT`: Enforce certificate validation for health checks. Default: false (equivalent to `--enforce-hc-cert`)
- `TLS_CLIENT_CERT`: Path to client certificate for mTLS (equivalent to `--tls-client-cert`)
- `TLS_CLIENT_KEY`: Path to private key for mTLS (equivalent to `--tls-client-key`)
- `TLS_CA_CERT`: Path to CA certificate to verify server (equivalent to `--tls-ca-cert`)
## Loading secrets from files ## Loading secrets from files

View File

@@ -0,0 +1,41 @@
services:
newt:
build: .
image: newt:dev
env_file:
- .env
environment:
- NEWT_METRICS_PROMETHEUS_ENABLED=false # important: disable direct /metrics scraping
- NEWT_METRICS_OTLP_ENABLED=true # OTLP to the Collector
# optional:
# - NEWT_METRICS_INCLUDE_TUNNEL_ID=false
# When using the Collector pattern, do NOT map the Newt admin/metrics port
# (2112) on the application service. Mapping 2112 here can cause port
# conflicts and may result in duplicated Prometheus scraping (app AND
# collector being scraped for the same metrics). Instead either:
# - leave ports unset on the app service (recommended), or
# - map 2112 only on a dedicated metrics/collector service that is
# responsible for exposing metrics to Prometheus.
# Example: do NOT map here
# ports: []
# Example: map 2112 only on a collector service
# collector:
# ports:
# - "2112:2112" # collector's prometheus exporter (scraped by Prometheus)
otel-collector:
image: otel/opentelemetry-collector-contrib:latest
command: ["--config=/etc/otelcol/config.yaml"]
volumes:
- ./examples/otel-collector.yaml:/etc/otelcol/config.yaml:ro
ports:
- "4317:4317" # OTLP gRPC
- "8889:8889" # Prometheus Exporter (scraped by Prometheus)
prometheus:
image: prom/prometheus:latest
volumes:
- ./examples/prometheus.with-collector.yml:/etc/prometheus/prometheus.yml:ro
ports:
- "9090:9090"

View File

@@ -0,0 +1,56 @@
name: Newt-Metrics
services:
# Recommended Variant A: Direct Prometheus scrape of Newt (/metrics)
# Optional: You may add the Collector service and enable OTLP export, but do NOT
# scrape both Newt and the Collector for the same process.
newt:
build: .
image: newt:dev
env_file:
- .env
environment:
OTEL_SERVICE_NAME: newt
NEWT_METRICS_PROMETHEUS_ENABLED: "true"
NEWT_METRICS_OTLP_ENABLED: "false" # avoid double-scrape by default
NEWT_ADMIN_ADDR: ":2112"
# Base NEWT configuration
PANGOLIN_ENDPOINT: ${PANGOLIN_ENDPOINT}
NEWT_ID: ${NEWT_ID}
NEWT_SECRET: ${NEWT_SECRET}
LOG_LEVEL: "DEBUG"
ports:
- "2112:2112"
# Optional Variant B: Enable the Collector and switch Prometheus scrape to it.
# collector:
# image: otel/opentelemetry-collector-contrib:0.136.0
# command: ["--config=/etc/otelcol/config.yaml"]
# volumes:
# - ./examples/otel-collector.yaml:/etc/otelcol/config.yaml:ro
# ports:
# - "4317:4317" # OTLP gRPC in
# - "8889:8889" # Prometheus scrape out
prometheus:
image: prom/prometheus:v3.6.0
volumes:
- ./examples/prometheus.yml:/etc/prometheus/prometheus.yml:ro
ports:
- "9090:9090"
grafana:
image: grafana/grafana:12.2.0
container_name: newt-metrics-grafana
restart: unless-stopped
environment:
- GF_SECURITY_ADMIN_USER=admin
- GF_SECURITY_ADMIN_PASSWORD=admin
ports:
- "3005:3000"
depends_on:
- prometheus
volumes:
- ./examples/grafana/provisioning/datasources:/etc/grafana/provisioning/datasources:ro
- ./examples/grafana/provisioning/dashboards:/etc/grafana/provisioning/dashboards:ro
- ./examples/grafana/dashboards:/var/lib/grafana/dashboards:ro

View File

@@ -0,0 +1,898 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "grafana",
"uid": "-- Grafana --"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": null,
"links": [],
"liveNow": false,
"panels": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"decimals": 0,
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 500
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 7,
"w": 6,
"x": 0,
"y": 0
},
"id": 1,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "value_and_name"
},
"pluginVersion": "11.1.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"editorMode": "code",
"expr": "go_goroutine_count",
"instant": true,
"legendFormat": "",
"refId": "A"
}
],
"title": "Goroutines",
"transformations": [],
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"decimals": 1,
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "orange",
"value": 256
},
{
"color": "red",
"value": 512
}
]
},
"unit": "bytes"
},
"overrides": []
},
"gridPos": {
"h": 7,
"w": 6,
"x": 6,
"y": 0
},
"id": 2,
"options": {
"colorMode": "background",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "value_and_name"
},
"pluginVersion": "11.1.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"editorMode": "code",
"expr": "go_memory_gc_goal_bytes / 1024 / 1024",
"format": "time_series",
"instant": true,
"legendFormat": "",
"refId": "A"
}
],
"title": "GC Target Heap (MiB)",
"transformations": [],
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"decimals": 2,
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "orange",
"value": 10
},
{
"color": "red",
"value": 25
}
]
},
"unit": "ops"
},
"overrides": []
},
"gridPos": {
"h": 7,
"w": 6,
"x": 12,
"y": 0
},
"id": 3,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "value_and_name"
},
"pluginVersion": "11.1.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"editorMode": "code",
"expr": "sum(rate(http_server_request_duration_seconds_count[$__rate_interval]))",
"instant": false,
"legendFormat": "req/s",
"refId": "A"
}
],
"title": "HTTP Requests / s",
"transformations": [],
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"decimals": 3,
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "orange",
"value": 0.1
},
{
"color": "red",
"value": 0.5
}
]
},
"unit": "ops"
},
"overrides": []
},
"gridPos": {
"h": 7,
"w": 6,
"x": 18,
"y": 0
},
"id": 4,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "value_and_name"
},
"pluginVersion": "11.1.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"editorMode": "code",
"expr": "sum(rate(newt_connection_errors_total{site_id=~\"$site_id\"}[$__rate_interval]))",
"instant": false,
"legendFormat": "errors/s",
"refId": "A"
}
],
"title": "Connection Errors / s",
"transformations": [],
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"fieldConfig": {
"defaults": {
"custom": {},
"mappings": [],
"unit": "bytes"
},
"overrides": []
},
"gridPos": {
"h": 9,
"w": 12,
"x": 0,
"y": 7
},
"id": 5,
"options": {
"legend": {
"calcs": [],
"displayMode": "table",
"placement": "right"
},
"tooltip": {
"mode": "multi",
"sort": "desc"
}
},
"pluginVersion": "11.1.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"editorMode": "code",
"expr": "sum(go_memory_used_bytes)",
"legendFormat": "Used",
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"editorMode": "code",
"expr": "go_memory_gc_goal_bytes",
"legendFormat": "GC Goal",
"refId": "B"
}
],
"title": "Go Heap Usage vs GC Goal",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"fieldConfig": {
"defaults": {
"custom": {},
"decimals": 0,
"mappings": [],
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 9,
"w": 12,
"x": 12,
"y": 7
},
"id": 6,
"options": {
"legend": {
"calcs": [],
"displayMode": "table",
"placement": "right"
},
"tooltip": {
"mode": "multi",
"sort": "desc"
}
},
"pluginVersion": "11.1.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"editorMode": "code",
"expr": "rate(go_memory_allocations_total[$__rate_interval])",
"legendFormat": "Allocations/s",
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"editorMode": "code",
"expr": "rate(go_memory_allocated_bytes_total[$__rate_interval])",
"legendFormat": "Allocated bytes/s",
"refId": "B"
}
],
"title": "Allocation Activity",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"fieldConfig": {
"defaults": {
"custom": {},
"mappings": [],
"unit": "s"
},
"overrides": []
},
"gridPos": {
"h": 9,
"w": 12,
"x": 0,
"y": 16
},
"id": 7,
"options": {
"legend": {
"calcs": [],
"displayMode": "table",
"placement": "right"
},
"tooltip": {
"mode": "multi",
"sort": "desc"
}
},
"pluginVersion": "11.1.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"editorMode": "code",
"expr": "histogram_quantile(0.5, sum(rate(http_server_request_duration_seconds_bucket[$__rate_interval])) by (le))",
"legendFormat": "p50",
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"editorMode": "code",
"expr": "histogram_quantile(0.95, sum(rate(http_server_request_duration_seconds_bucket[$__rate_interval])) by (le))",
"legendFormat": "p95",
"refId": "B"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"editorMode": "code",
"expr": "histogram_quantile(0.99, sum(rate(http_server_request_duration_seconds_bucket[$__rate_interval])) by (le))",
"legendFormat": "p99",
"refId": "C"
}
],
"title": "HTTP Request Duration Quantiles",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"fieldConfig": {
"defaults": {
"custom": {},
"mappings": [],
"unit": "ops"
},
"overrides": []
},
"gridPos": {
"h": 9,
"w": 12,
"x": 12,
"y": 16
},
"id": 8,
"options": {
"legend": {
"calcs": [],
"displayMode": "table",
"placement": "right"
},
"tooltip": {
"mode": "multi",
"sort": "desc"
}
},
"pluginVersion": "11.1.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"editorMode": "code",
"expr": "sum(rate(http_server_request_duration_seconds_count[$__rate_interval])) by (http_response_status_code)",
"legendFormat": "{{http_response_status_code}}",
"refId": "A"
}
],
"title": "HTTP Requests by Status",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"fieldConfig": {
"defaults": {
"custom": {},
"mappings": [],
"unit": "ops"
},
"overrides": []
},
"gridPos": {
"h": 9,
"w": 12,
"x": 0,
"y": 25
},
"id": 9,
"options": {
"legend": {
"calcs": [],
"displayMode": "table",
"placement": "right"
},
"tooltip": {
"mode": "multi",
"sort": "desc"
}
},
"pluginVersion": "11.1.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"editorMode": "code",
"expr": "sum(rate(newt_connection_attempts_total{site_id=~\"$site_id\"}[$__rate_interval])) by (transport, result)",
"legendFormat": "{{transport}} • {{result}}",
"refId": "A"
}
],
"title": "Connection Attempts by Transport/Result",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"fieldConfig": {
"defaults": {
"custom": {},
"mappings": [],
"unit": "ops"
},
"overrides": []
},
"gridPos": {
"h": 9,
"w": 12,
"x": 12,
"y": 25
},
"id": 10,
"options": {
"legend": {
"calcs": [],
"displayMode": "table",
"placement": "right"
},
"tooltip": {
"mode": "multi",
"sort": "desc"
}
},
"pluginVersion": "11.1.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"editorMode": "code",
"expr": "sum(rate(newt_connection_errors_total{site_id=~\"$site_id\"}[$__rate_interval])) by (transport, error_type)",
"legendFormat": "{{transport}} • {{error_type}}",
"refId": "A"
}
],
"title": "Connection Errors by Type",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"fieldConfig": {
"defaults": {
"custom": {},
"decimals": 3,
"mappings": [],
"unit": "s"
},
"overrides": []
},
"gridPos": {
"h": 9,
"w": 12,
"x": 0,
"y": 34
},
"id": 11,
"options": {
"legend": {
"calcs": [],
"displayMode": "table",
"placement": "right"
},
"tooltip": {
"mode": "multi",
"sort": "desc"
}
},
"pluginVersion": "11.1.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"editorMode": "code",
"expr": "histogram_quantile(0.5, sum(rate(newt_tunnel_latency_seconds_bucket{site_id=~\"$site_id\", tunnel_id=~\"$tunnel_id\"}[$__rate_interval])) by (le))",
"legendFormat": "p50",
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"editorMode": "code",
"expr": "histogram_quantile(0.95, sum(rate(newt_tunnel_latency_seconds_bucket{site_id=~\"$site_id\", tunnel_id=~\"$tunnel_id\"}[$__rate_interval])) by (le))",
"legendFormat": "p95",
"refId": "B"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"editorMode": "code",
"expr": "histogram_quantile(0.99, sum(rate(newt_tunnel_latency_seconds_bucket{site_id=~\"$site_id\", tunnel_id=~\"$tunnel_id\"}[$__rate_interval])) by (le))",
"legendFormat": "p99",
"refId": "C"
}
],
"title": "Tunnel Latency Quantiles",
"type": "timeseries"
},
{
"cards": {},
"color": {
"cardColor": "#b4ff00",
"colorScale": "sqrt",
"colorScheme": "interpolateTurbo"
},
"dataFormat": "tsbuckets",
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"fieldConfig": {
"defaults": {
"custom": {},
"mappings": [],
"unit": "s"
},
"overrides": []
},
"gridPos": {
"h": 9,
"w": 12,
"x": 12,
"y": 34
},
"heatmap": {},
"hideZeroBuckets": true,
"id": 12,
"legend": {
"show": false
},
"options": {
"calculate": true,
"cellGap": 2,
"cellSize": "auto",
"color": {
"exponent": 0.5
},
"exemplars": {
"color": "rgba(255,255,255,0.7)"
},
"filterValues": {
"le": 1e-9
},
"legend": {
"show": false
},
"tooltip": {
"mode": "single",
"show": true
},
"xAxis": {
"show": true
},
"yAxis": {
"decimals": 3,
"show": true
}
},
"pluginVersion": "11.1.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"editorMode": "code",
"expr": "sum(rate(newt_tunnel_latency_seconds_bucket{site_id=~\"$site_id\", tunnel_id=~\"$tunnel_id\"}[$__rate_interval])) by (le)",
"format": "heatmap",
"legendFormat": "{{le}}",
"refId": "A"
}
],
"title": "Tunnel Latency Bucket Rate",
"type": "heatmap"
}
],
"refresh": "30s",
"schemaVersion": 39,
"style": "dark",
"tags": [
"newt",
"otel"
],
"templating": {
"list": [
{
"current": {
"selected": false,
"text": "Prometheus",
"value": "prometheus"
},
"hide": 0,
"label": "Datasource",
"name": "DS_PROMETHEUS",
"options": [],
"query": "prometheus",
"refresh": 1,
"type": "datasource"
},
{
"current": {
"selected": false,
"text": "All",
"value": "$__all"
},
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"definition": "label_values(target_info, site_id)",
"hide": 0,
"includeAll": true,
"label": "Site",
"multi": true,
"name": "site_id",
"options": [],
"query": {
"query": "label_values(target_info, site_id)",
"refId": "SiteIdVar"
},
"refresh": 2,
"regex": "",
"skipUrlSync": false,
"sort": 1,
"tagValuesQuery": "",
"tags": [],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"current": {
"selected": false,
"text": "All",
"value": "$__all"
},
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"definition": "label_values(newt_tunnel_latency_seconds_bucket{site_id=~\"$site_id\"}, tunnel_id)",
"hide": 0,
"includeAll": true,
"label": "Tunnel",
"multi": true,
"name": "tunnel_id",
"options": [],
"query": {
"query": "label_values(newt_tunnel_latency_seconds_bucket{site_id=~\"$site_id\"}, tunnel_id)",
"refId": "TunnelVar"
},
"refresh": 2,
"regex": "",
"skipUrlSync": false,
"sort": 1,
"tagValuesQuery": "",
"tags": [],
"tagsQuery": "",
"type": "query",
"useTags": false
}
]
},
"time": {
"from": "now-6h",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "browser",
"title": "Newt Overview",
"uid": "newt-overview",
"version": 1,
"weekStart": ""
}

View File

@@ -0,0 +1,9 @@
apiVersion: 1
providers:
- name: "newt"
folder: "Newt"
type: file
disableDeletion: false
allowUiUpdates: true
options:
path: /var/lib/grafana/dashboards

View File

@@ -0,0 +1,9 @@
apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
access: proxy
url: http://prometheus:9090
uid: prometheus
isDefault: true
editable: true

View File

@@ -0,0 +1,61 @@
# Variant A: Direct scrape of Newt (/metrics) via Prometheus (no Collector needed)
# Note: Newt already exposes labels like site_id, protocol, direction. Do not promote
# resource attributes into labels when scraping Newt directly.
#
# Example Prometheus scrape config:
# global:
# scrape_interval: 15s
# scrape_configs:
# - job_name: newt
# static_configs:
# - targets: ["newt:2112"]
#
# Variant B: Use OTEL Collector (Newt -> OTLP -> Collector -> Prometheus)
# This pipeline scrapes metrics from the Collector's Prometheus exporter.
# Labels are already on datapoints; promotion from resource is OPTIONAL and typically NOT required.
# If you enable transform/promote below, ensure you do not duplicate labels.
receivers:
otlp:
protocols:
grpc:
endpoint: ":4317"
processors:
memory_limiter:
check_interval: 5s
limit_percentage: 80
spike_limit_percentage: 25
resourcedetection:
detectors: [env, system]
timeout: 5s
batch: {}
# OPTIONAL: Only enable if you need to promote resource attributes to labels.
# WARNING: Newt already provides site_id as a label; avoid double-promotion.
# transform/promote:
# error_mode: ignore
# metric_statements:
# - context: datapoint
# statements:
# - set(attributes["service_instance_id"], resource.attributes["service.instance.id"]) where resource.attributes["service.instance.id"] != nil
# - set(attributes["site_id"], resource.attributes["site_id"]) where resource.attributes["site_id"] != nil
exporters:
prometheus:
endpoint: ":8889"
send_timestamps: true
# prometheusremotewrite:
# endpoint: http://mimir:9009/api/v1/push
debug:
verbosity: basic
service:
pipelines:
metrics:
receivers: [otlp]
processors: [memory_limiter, resourcedetection, batch] # add transform/promote if you really need it
exporters: [prometheus]
traces:
receivers: [otlp]
processors: [memory_limiter, resourcedetection, batch]
exporters: [debug]

View File

@@ -0,0 +1,16 @@
global:
scrape_interval: 15s
scrape_configs:
# IMPORTANT: Do not scrape Newt directly; scrape only the Collector!
- job_name: 'otel-collector'
static_configs:
- targets: ['otel-collector:8889']
# optional: limit metric cardinality
relabel_configs:
- action: labeldrop
regex: 'tunnel_id'
# - action: keep
# source_labels: [site_id]
# regex: '(site-a|site-b)'

21
examples/prometheus.yml Normal file
View File

@@ -0,0 +1,21 @@
global:
scrape_interval: 15s
scrape_configs:
- job_name: 'newt'
scrape_interval: 15s
static_configs:
- targets: ['newt:2112'] # /metrics
relabel_configs:
# optional: drop tunnel_id
- action: labeldrop
regex: 'tunnel_id'
# optional: allow only specific sites
- action: keep
source_labels: [site_id]
regex: '(site-a|site-b)'
# WARNING: Do not enable this together with the 'newt' job above or you will double-count.
# - job_name: 'otel-collector'
# static_configs:
# - targets: ['otel-collector:8889']

50
go.mod
View File

@@ -6,29 +6,46 @@ require (
github.com/docker/docker v28.5.1+incompatible github.com/docker/docker v28.5.1+incompatible
github.com/google/gopacket v1.1.19 github.com/google/gopacket v1.1.19
github.com/gorilla/websocket v1.5.3 github.com/gorilla/websocket v1.5.3
github.com/prometheus/client_golang v1.23.2
github.com/vishvananda/netlink v1.3.1 github.com/vishvananda/netlink v1.3.1
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0
go.opentelemetry.io/contrib/instrumentation/runtime v0.63.0
go.opentelemetry.io/otel v1.38.0
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.38.0
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.38.0
go.opentelemetry.io/otel/exporters/prometheus v0.60.0
go.opentelemetry.io/otel/metric v1.38.0
go.opentelemetry.io/otel/sdk v1.38.0
go.opentelemetry.io/otel/sdk/metric v1.38.0
golang.org/x/crypto v0.43.0 golang.org/x/crypto v0.43.0
golang.org/x/exp v0.0.0-20250718183923-645b1fa84792 golang.org/x/exp v0.0.0-20250718183923-645b1fa84792
golang.org/x/net v0.46.0 golang.org/x/net v0.46.0
golang.zx2c4.com/wireguard v0.0.0-20250521234502-f333402bd9cb golang.zx2c4.com/wireguard v0.0.0-20250521234502-f333402bd9cb
golang.zx2c4.com/wireguard/wgctrl v0.0.0-20241231184526-a9ab2273dd10 golang.zx2c4.com/wireguard/wgctrl v0.0.0-20241231184526-a9ab2273dd10
google.golang.org/grpc v1.76.0
gopkg.in/yaml.v3 v3.0.1 gopkg.in/yaml.v3 v3.0.1
gvisor.dev/gvisor v0.0.0-20250503011706-39ed1f5ac29c gvisor.dev/gvisor v0.0.0-20250503011706-39ed1f5ac29c
software.sslmate.com/src/go-pkcs12 v0.6.0 software.sslmate.com/src/go-pkcs12 v0.6.0
) )
require ( require (
github.com/Microsoft/go-winio v0.6.2 // indirect github.com/Microsoft/go-winio v0.6.0 // indirect
github.com/containerd/errdefs v1.0.0 // indirect github.com/beorn7/perks v1.0.1 // indirect
github.com/cenkalti/backoff/v5 v5.0.3 // indirect
github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/containerd/errdefs v0.3.0 // indirect
github.com/containerd/errdefs/pkg v0.3.0 // indirect github.com/containerd/errdefs/pkg v0.3.0 // indirect
github.com/distribution/reference v0.6.0 // indirect github.com/distribution/reference v0.6.0 // indirect
github.com/docker/go-connections v0.5.0 // indirect github.com/docker/go-connections v0.6.0 // indirect
github.com/docker/go-units v0.5.0 // indirect github.com/docker/go-units v0.4.0 // indirect
github.com/felixge/httpsnoop v1.0.4 // indirect github.com/felixge/httpsnoop v1.0.4 // indirect
github.com/go-logr/logr v1.4.3 // indirect github.com/go-logr/logr v1.4.3 // indirect
github.com/go-logr/stdr v1.2.2 // indirect github.com/go-logr/stdr v1.2.2 // indirect
github.com/google/btree v1.1.3 // indirect github.com/google/btree v1.1.3 // indirect
github.com/google/go-cmp v0.7.0 // indirect github.com/google/go-cmp v0.7.0 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/grafana/regexp v0.0.0-20240518133315-a468a5bfb3bc // indirect
github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.2 // indirect
github.com/josharian/native v1.1.0 // indirect github.com/josharian/native v1.1.0 // indirect
github.com/mdlayher/genetlink v1.3.2 // indirect github.com/mdlayher/genetlink v1.3.2 // indirect
github.com/mdlayher/netlink v1.7.2 // indirect github.com/mdlayher/netlink v1.7.2 // indirect
@@ -37,18 +54,29 @@ require (
github.com/moby/sys/atomicwriter v0.1.0 // indirect github.com/moby/sys/atomicwriter v0.1.0 // indirect
github.com/moby/term v0.5.2 // indirect github.com/moby/term v0.5.2 // indirect
github.com/morikuni/aec v1.0.0 // indirect github.com/morikuni/aec v1.0.0 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/opencontainers/go-digest v1.0.0 // indirect github.com/opencontainers/go-digest v1.0.0 // indirect
github.com/opencontainers/image-spec v1.1.1 // indirect github.com/opencontainers/image-spec v1.1.0 // indirect
github.com/pkg/errors v0.9.1 // indirect github.com/pkg/errors v0.9.1 // indirect
github.com/prometheus/client_model v0.6.2 // indirect
github.com/prometheus/common v0.66.1 // indirect
github.com/prometheus/otlptranslator v0.0.2 // indirect
github.com/prometheus/procfs v0.17.0 // indirect
github.com/vishvananda/netns v0.0.5 // indirect github.com/vishvananda/netns v0.0.5 // indirect
go.opentelemetry.io/auto/sdk v1.1.0 // indirect go.opentelemetry.io/auto/sdk v1.1.0 // indirect
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.62.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0 // indirect
go.opentelemetry.io/otel v1.37.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.38.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.36.0 // indirect go.opentelemetry.io/otel/trace v1.38.0 // indirect
go.opentelemetry.io/otel/metric v1.37.0 // indirect go.opentelemetry.io/proto/otlp v1.7.1 // indirect
go.opentelemetry.io/otel/trace v1.37.0 // indirect go.yaml.in/yaml/v2 v2.4.2 // indirect
golang.org/x/sync v0.16.0 // indirect golang.org/x/mod v0.28.0 // indirect
golang.org/x/sync v0.17.0 // indirect
golang.org/x/sys v0.37.0 // indirect golang.org/x/sys v0.37.0 // indirect
golang.org/x/text v0.30.0 // indirect
golang.org/x/time v0.12.0 // indirect golang.org/x/time v0.12.0 // indirect
golang.org/x/tools v0.37.0 // indirect
golang.zx2c4.com/wintun v0.0.0-20230126152724-0fa3db229ce2 // indirect golang.zx2c4.com/wintun v0.0.0-20230126152724-0fa3db229ce2 // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20250825161204-c5933d9347a5 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20250825161204-c5933d9347a5 // indirect
google.golang.org/protobuf v1.36.8 // indirect
) )

147
go.sum
View File

@@ -1,26 +1,23 @@
github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c h1:udKWzYgxTojEKWjV8V+WSxDXJ4NFATAsZjh8iIbsQIg= github.com/Microsoft/go-winio v0.6.0 h1:slsWYD/zyx7lCXoZVlvQrj0hPTM1HI4+v1sIda2yDvg=
github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E= github.com/Microsoft/go-winio v0.6.0/go.mod h1:cTAf44im0RAYeL23bpB+fzCyDH2MJiz2BO69KH/soAE=
github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
github.com/cenkalti/backoff v2.2.1+incompatible h1:tNowT99t7UNflLxfYYSlKYsBpXdEet03Pg2g16Swow4= github.com/cenkalti/backoff/v5 v5.0.3 h1:ZN+IMa753KfX5hd8vVaMixjnqRZ3y8CuJKRKj1xcsSM=
github.com/cenkalti/backoff/v5 v5.0.2 h1:rIfFVxEf1QsI7E1ZHfp/B4DF/6QBAUhmgkxc0H7Zss8= github.com/cenkalti/backoff/v5 v5.0.3/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw=
github.com/cenkalti/backoff/v5 v5.0.2/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
github.com/containerd/errdefs v1.0.0 h1:tg5yIfIlQIrxYtu9ajqY42W3lpS19XqdxRQeEwYG8PI= github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/containerd/errdefs v1.0.0/go.mod h1:+YBYIdtsnF4Iw6nWZhJcqGSg/dwvV7tyJ/kCkyJ2k+M= github.com/containerd/errdefs v0.3.0 h1:FSZgGOeK4yuT/+DnF07/Olde/q4KBoMsaamhXxIMDp4=
github.com/containerd/errdefs v0.3.0/go.mod h1:+YBYIdtsnF4Iw6nWZhJcqGSg/dwvV7tyJ/kCkyJ2k+M=
github.com/containerd/errdefs/pkg v0.3.0 h1:9IKJ06FvyNlexW690DXuQNx2KA2cUJXx151Xdx3ZPPE= github.com/containerd/errdefs/pkg v0.3.0 h1:9IKJ06FvyNlexW690DXuQNx2KA2cUJXx151Xdx3ZPPE=
github.com/containerd/errdefs/pkg v0.3.0/go.mod h1:NJw6s9HwNuRhnjJhM7pylWwMyAkmCQvQ4GpJHEqRLVk= github.com/containerd/errdefs/pkg v0.3.0/go.mod h1:NJw6s9HwNuRhnjJhM7pylWwMyAkmCQvQ4GpJHEqRLVk=
github.com/containerd/log v0.1.0 h1:TCJt7ioM2cr/tfR8GPbGf9/VRAX8D2B4PjzCpfX540I=
github.com/containerd/log v0.1.0/go.mod h1:VRRf09a7mHDIRezVKTRCrOq78v577GXq3bSa3EhrzVo=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/distribution/reference v0.6.0 h1:0IXCQ5g4/QMHHkarYzh5l+u8T3t73zM5QvfrDyIgxBk= github.com/distribution/reference v0.6.0 h1:0IXCQ5g4/QMHHkarYzh5l+u8T3t73zM5QvfrDyIgxBk=
github.com/distribution/reference v0.6.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E= github.com/distribution/reference v0.6.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E=
github.com/docker/docker v28.5.1+incompatible h1:Bm8DchhSD2J6PsFzxC35TZo4TLGR2PdW/E69rU45NhM= github.com/docker/docker v28.5.1+incompatible h1:Bm8DchhSD2J6PsFzxC35TZo4TLGR2PdW/E69rU45NhM=
github.com/docker/docker v28.5.1+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk= github.com/docker/docker v28.5.1+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk=
github.com/docker/go-connections v0.5.0 h1:USnMq7hx7gwdVZq1L49hLXaFtUdTADjXGp+uj1Br63c= github.com/docker/go-connections v0.6.0 h1:LlMG9azAe1TqfR7sO+NJttz1gy6KO7VJBh+pMmjSD94=
github.com/docker/go-connections v0.5.0/go.mod h1:ov60Kzw0kKElRwhNs9UlUHAE/F9Fe6GLaXnqyDdmEXc= github.com/docker/go-connections v0.6.0/go.mod h1:AahvXYshr6JgfUJGdDCs2b5EZG/vmaMAntpSFH5BFKE=
github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4= github.com/docker/go-units v0.4.0 h1:3uh0PgVws3nIA0Q+MwDC8yjEPf9zjRfZZWXZYDct3Tw=
github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= github.com/docker/go-units v0.4.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
@@ -38,85 +35,90 @@ github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg= github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg=
github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.3 h1:5ZPtiqj0JL5oKWmcsq4VMaAW5ukBEgSGXEN89zeH1Jo= github.com/grafana/regexp v0.0.0-20240518133315-a468a5bfb3bc h1:GN2Lv3MGO7AS6PrRoT6yV5+wkrOpcszoIsO4+4ds248=
github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.3/go.mod h1:ndYquD05frm2vACXE1nsccT4oJzjhw2arTS2cpUD1PI= github.com/grafana/regexp v0.0.0-20240518133315-a468a5bfb3bc/go.mod h1:+JKpmjMGhpgPL+rXZ5nsZieVzvarn86asRlBg4uNGnk=
github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.2 h1:8Tjv8EJ+pM1xP8mK6egEbD1OgnVTyacbefKhmbLhIhU=
github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.2/go.mod h1:pkJQ2tZHJ0aFOVEEot6oZmaVEZcRme73eIFmhiVuRWs=
github.com/josharian/native v1.1.0 h1:uuaP0hAbW7Y4l0ZRQ6C9zfb7Mg1mbFKry/xzDAfmtLA= github.com/josharian/native v1.1.0 h1:uuaP0hAbW7Y4l0ZRQ6C9zfb7Mg1mbFKry/xzDAfmtLA=
github.com/josharian/native v1.1.0/go.mod h1:7X/raswPFr05uY3HiLlYeyQntB6OO7E/d2Cu7qoaN2w= github.com/josharian/native v1.1.0/go.mod h1:7X/raswPFr05uY3HiLlYeyQntB6OO7E/d2Cu7qoaN2w=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/mdlayher/genetlink v1.3.2 h1:KdrNKe+CTu+IbZnm/GVUMXSqBBLqcGpRDa0xkQy56gw= github.com/mdlayher/genetlink v1.3.2 h1:KdrNKe+CTu+IbZnm/GVUMXSqBBLqcGpRDa0xkQy56gw=
github.com/mdlayher/genetlink v1.3.2/go.mod h1:tcC3pkCrPUGIKKsCsp0B3AdaaKuHtaxoJRz3cc+528o= github.com/mdlayher/genetlink v1.3.2/go.mod h1:tcC3pkCrPUGIKKsCsp0B3AdaaKuHtaxoJRz3cc+528o=
github.com/mdlayher/netlink v1.7.2 h1:/UtM3ofJap7Vl4QWCPDGXY8d3GIY2UGSDbK+QWmY8/g= github.com/mdlayher/netlink v1.7.2 h1:/UtM3ofJap7Vl4QWCPDGXY8d3GIY2UGSDbK+QWmY8/g=
github.com/mdlayher/netlink v1.7.2/go.mod h1:xraEF7uJbxLhc5fpHL4cPe221LI2bdttWlU+ZGLfQSw= github.com/mdlayher/netlink v1.7.2/go.mod h1:xraEF7uJbxLhc5fpHL4cPe221LI2bdttWlU+ZGLfQSw=
github.com/mdlayher/socket v0.5.1 h1:VZaqt6RkGkt2OE9l3GcC6nZkqD3xKeQLyfleW/uBcos= github.com/mdlayher/socket v0.5.1 h1:VZaqt6RkGkt2OE9l3GcC6nZkqD3xKeQLyfleW/uBcos=
github.com/mdlayher/socket v0.5.1/go.mod h1:TjPLHI1UgwEv5J1B5q0zTZq12A/6H7nKmtTanQE37IQ= github.com/mdlayher/socket v0.5.1/go.mod h1:TjPLHI1UgwEv5J1B5q0zTZq12A/6H7nKmtTanQE37IQ=
github.com/mikioh/ipaddr v0.0.0-20190404000644-d465c8ab6721 h1:RlZweED6sbSArvlE924+mUcZuXKLBHA35U7LN621Bws=
github.com/mikioh/ipaddr v0.0.0-20190404000644-d465c8ab6721/go.mod h1:Ickgr2WtCLZ2MDGd4Gr0geeCH5HybhRJbonOgQpvSxc=
github.com/moby/docker-image-spec v1.3.1 h1:jMKff3w6PgbfSa69GfNg+zN/XLhfXJGnEx3Nl2EsFP0= github.com/moby/docker-image-spec v1.3.1 h1:jMKff3w6PgbfSa69GfNg+zN/XLhfXJGnEx3Nl2EsFP0=
github.com/moby/docker-image-spec v1.3.1/go.mod h1:eKmb5VW8vQEh/BAr2yvVNvuiJuY6UIocYsFu/DxxRpo= github.com/moby/docker-image-spec v1.3.1/go.mod h1:eKmb5VW8vQEh/BAr2yvVNvuiJuY6UIocYsFu/DxxRpo=
github.com/moby/sys/atomicwriter v0.1.0 h1:kw5D/EqkBwsBFi0ss9v1VG3wIkVhzGvLklJ+w3A14Sw=
github.com/moby/sys/atomicwriter v0.1.0/go.mod h1:Ul8oqv2ZMNHOceF643P6FKPXeCmYtlQMvpizfsSoaWs= github.com/moby/sys/atomicwriter v0.1.0/go.mod h1:Ul8oqv2ZMNHOceF643P6FKPXeCmYtlQMvpizfsSoaWs=
github.com/moby/sys/sequential v0.6.0 h1:qrx7XFUd/5DxtqcoH1h438hF5TmOvzC/lspjy7zgvCU=
github.com/moby/sys/sequential v0.6.0/go.mod h1:uyv8EUTrca5PnDsdMGXhZe6CCe8U/UiTWd+lL+7b/Ko=
github.com/moby/term v0.5.2 h1:6qk3FJAFDs6i/q3W/pQ97SX192qKfZgGjCQqfCJkgzQ=
github.com/moby/term v0.5.2/go.mod h1:d3djjFCrjnB+fl8NJux+EJzu0msscUP+f8it8hPkFLc= github.com/moby/term v0.5.2/go.mod h1:d3djjFCrjnB+fl8NJux+EJzu0msscUP+f8it8hPkFLc=
github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A=
github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc= github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U= github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=
github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
github.com/opencontainers/image-spec v1.1.1 h1:y0fUlFfIZhPF1W537XOLg0/fcx6zcHCJwooC2xJA040= github.com/opencontainers/image-spec v1.1.0 h1:8SG7/vwALn54lVB/0yZ/MMwhFrPYtpEHQb2IpWsCzug=
github.com/opencontainers/image-spec v1.1.1/go.mod h1:qpqAh3Dmcf36wStyyWU+kCeDgrGnAve2nCC8+7h8Q0M= github.com/opencontainers/image-spec v1.1.0/go.mod h1:W4s4sFTMaBeK1BQLXbG4AdM2szdn85PY75RI83NrTrM=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h0RJWRi/o0o=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg=
github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII= github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk=
github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o= github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE=
github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= github.com/prometheus/common v0.66.1 h1:h5E0h5/Y8niHc5DlaLlWLArTQI7tMrsfQjHV+d9ZoGs=
github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= github.com/prometheus/common v0.66.1/go.mod h1:gcaUsgf3KfRSwHY4dIMXLPV0K/Wg1oZ8+SbZk/HH/dA=
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= github.com/prometheus/otlptranslator v0.0.2 h1:+1CdeLVrRQ6Psmhnobldo0kTp96Rj80DRXRd5OSnMEQ=
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/prometheus/otlptranslator v0.0.2/go.mod h1:P8AwMgdD7XEr6QRUJ2QWLpiAZTgTE2UYgjlu3svompI=
github.com/prometheus/procfs v0.17.0 h1:FuLQ+05u4ZI+SS/w9+BWEM2TXiHKsUQ9TADiRH7DuK0=
github.com/prometheus/procfs v0.17.0/go.mod h1:oPQLaDAMRbA+u8H5Pbfq+dl3VDAvHxMUOVhe0wYB2zw=
github.com/vishvananda/netlink v1.3.1 h1:3AEMt62VKqz90r0tmNhog0r/PpWKmrEShJU0wJW6bV0= github.com/vishvananda/netlink v1.3.1 h1:3AEMt62VKqz90r0tmNhog0r/PpWKmrEShJU0wJW6bV0=
github.com/vishvananda/netlink v1.3.1/go.mod h1:ARtKouGSTGchR8aMwmkzC0qiNPrrWO5JS/XMVl45+b4= github.com/vishvananda/netlink v1.3.1/go.mod h1:ARtKouGSTGchR8aMwmkzC0qiNPrrWO5JS/XMVl45+b4=
github.com/vishvananda/netns v0.0.5 h1:DfiHV+j8bA32MFM7bfEunvT8IAqQ/NzSJHtcmW5zdEY= github.com/vishvananda/netns v0.0.5 h1:DfiHV+j8bA32MFM7bfEunvT8IAqQ/NzSJHtcmW5zdEY=
github.com/vishvananda/netns v0.0.5/go.mod h1:SpkAiCQRtJ6TvvxPnOSyH3BMl6unz3xZlaprSwhNNJM= github.com/vishvananda/netns v0.0.5/go.mod h1:SpkAiCQRtJ6TvvxPnOSyH3BMl6unz3xZlaprSwhNNJM=
go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA= go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA=
go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A= go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A=
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.62.0 h1:Hf9xI/XLML9ElpiHVDNwvqI0hIFlzV8dgIr35kV1kRU= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0 h1:RbKq8BG0FI8OiXhBfcRtqqHcZcka+gU3cskNuf05R18=
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.62.0/go.mod h1:NfchwuyNoMcZ5MLHwPrODwUF1HWCXWrL31s8gSAdIKY= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0/go.mod h1:h06DGIukJOevXaj/xrNjhi/2098RZzcLTbc0jDAUbsg=
go.opentelemetry.io/otel v1.37.0 h1:9zhNfelUvx0KBfu/gb+ZgeAfAgtWrfHJZcAqFC228wQ= go.opentelemetry.io/contrib/instrumentation/runtime v0.63.0 h1:PeBoRj6af6xMI7qCupwFvTbbnd49V7n5YpG6pg8iDYQ=
go.opentelemetry.io/otel v1.37.0/go.mod h1:ehE/umFRLnuLa/vSccNq9oS1ErUlkkK71gMcN34UG8I= go.opentelemetry.io/contrib/instrumentation/runtime v0.63.0/go.mod h1:ingqBCtMCe8I4vpz/UVzCW6sxoqgZB37nao91mLQ3Bw=
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.36.0 h1:dNzwXjZKpMpE2JhmO+9HsPl42NIXFIFSUSSs0fiqra0= go.opentelemetry.io/otel v1.38.0 h1:RkfdswUDRimDg0m2Az18RKOsnI8UDzppJAtj01/Ymk8=
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.36.0/go.mod h1:90PoxvaEB5n6AOdZvi+yWJQoE95U8Dhhw2bSyRqnTD0= go.opentelemetry.io/otel v1.38.0/go.mod h1:zcmtmQ1+YmQM9wrNsTGV/q/uyusom3P8RxwExxkZhjM=
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.36.0 h1:nRVXXvf78e00EwY6Wp0YII8ww2JVWshZ20HfTlE11AM= go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.38.0 h1:vl9obrcoWVKp/lwl8tRE33853I8Xru9HFbw/skNeLs8=
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.36.0/go.mod h1:r49hO7CgrxY9Voaj3Xe8pANWtr0Oq916d0XAmOoCZAQ= go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.38.0/go.mod h1:GAXRxmLJcVM3u22IjTg74zWBrRCKq8BnOqUVLodpcpw=
go.opentelemetry.io/otel/metric v1.37.0 h1:mvwbQS5m0tbmqML4NqK+e3aDiO02vsf/WgbsdpcPoZE= go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0 h1:GqRJVj7UmLjCVyVJ3ZFLdPRmhDUp2zFmQe3RHIOsw24=
go.opentelemetry.io/otel/metric v1.37.0/go.mod h1:04wGrZurHYKOc+RKeye86GwKiTb9FKm1WHtO+4EVr2E= go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0/go.mod h1:ri3aaHSmCTVYu2AWv44YMauwAQc0aqI9gHKIcSbI1pU=
go.opentelemetry.io/otel/sdk v1.37.0 h1:ItB0QUqnjesGRvNcmAcU0LyvkVyGJ2xftD29bWdDvKI= go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.38.0 h1:lwI4Dc5leUqENgGuQImwLo4WnuXFPetmPpkLi2IrX54=
go.opentelemetry.io/otel/sdk v1.37.0/go.mod h1:VredYzxUvuo2q3WRcDnKDjbdvmO0sCzOvVAiY+yUkAg= go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.38.0/go.mod h1:Kz/oCE7z5wuyhPxsXDuaPteSWqjSBD5YaSdbxZYGbGk=
go.opentelemetry.io/otel/sdk/metric v1.37.0 h1:90lI228XrB9jCMuSdA0673aubgRobVZFhbjxHHspCPc= go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.38.0/go.mod h1:kldtb7jDTeol0l3ewcmd8SDvx3EmIE7lyvqbasU3QC4=
go.opentelemetry.io/otel/sdk/metric v1.37.0/go.mod h1:cNen4ZWfiD37l5NhS+Keb5RXVWZWpRE+9WyVCpbo5ps= go.opentelemetry.io/otel/exporters/prometheus v0.60.0 h1:cGtQxGvZbnrWdC2GyjZi0PDKVSLWP/Jocix3QWfXtbo=
go.opentelemetry.io/otel/trace v1.37.0 h1:HLdcFNbRQBE2imdSEgm/kwqmQj1Or1l/7bW6mxVK7z4= go.opentelemetry.io/otel/exporters/prometheus v0.60.0/go.mod h1:hkd1EekxNo69PTV4OWFGZcKQiIqg0RfuWExcPKFvepk=
go.opentelemetry.io/otel/trace v1.37.0/go.mod h1:TlgrlQ+PtQO5XFerSPUYG0JSgGyryXewPGyayAWSBS0= go.opentelemetry.io/otel/metric v1.38.0 h1:Kl6lzIYGAh5M159u9NgiRkmoMKjvbsKtYRwgfrA6WpA=
go.opentelemetry.io/proto/otlp v1.6.0 h1:jQjP+AQyTf+Fe7OKj/MfkDrmK4MNVtw2NpXsf9fefDI= go.opentelemetry.io/otel/metric v1.38.0/go.mod h1:kB5n/QoRM8YwmUahxvI3bO34eVtQf2i4utNVLr9gEmI=
go.opentelemetry.io/proto/otlp v1.6.0/go.mod h1:cicgGehlFuNdgZkcALOCh3VE6K/u2tAjzlRhDwmVpZc= go.opentelemetry.io/otel/sdk v1.38.0 h1:l48sr5YbNf2hpCUj/FoGhW9yDkl+Ma+LrVl8qaM5b+E=
go.opentelemetry.io/otel/sdk v1.38.0/go.mod h1:ghmNdGlVemJI3+ZB5iDEuk4bWA3GkTpW+DOoZMYBVVg=
go.opentelemetry.io/otel/sdk/metric v1.38.0 h1:aSH66iL0aZqo//xXzQLYozmWrXxyFkBJ6qT5wthqPoM=
go.opentelemetry.io/otel/sdk/metric v1.38.0/go.mod h1:dg9PBnW9XdQ1Hd6ZnRz689CbtrUp0wMMs9iPcgT9EZA=
go.opentelemetry.io/otel/trace v1.38.0 h1:Fxk5bKrDZJUH+AMyyIXGcFAPah0oRcT+LuNtJrmcNLE=
go.opentelemetry.io/otel/trace v1.38.0/go.mod h1:j1P9ivuFsTceSWe1oY+EeW3sc+Pp42sO++GHkg4wwhs=
go.opentelemetry.io/proto/otlp v1.7.1 h1:gTOMpGDb0WTBOP8JaO72iL3auEZhVmAQg4ipjOVAtj4=
go.opentelemetry.io/proto/otlp v1.7.1/go.mod h1:b2rVh6rfI/s2pHWNlB7ILJcRALpcNDzKhACevjI+ZnE=
go.yaml.in/yaml/v2 v2.4.2 h1:DzmwEr2rDGHl7lsFgAHxmNz/1NlQ7xLIrlN2h5d1eGI=
go.yaml.in/yaml/v2 v2.4.2/go.mod h1:081UH+NErpNdqlCXm3TtEran0rJZGxAYx9hb/ELlsPU=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.43.0 h1:dduJYIi3A3KOfdGOHX8AVZ/jGiyPa3IbBozJ5kNuE04= golang.org/x/crypto v0.43.0 h1:dduJYIi3A3KOfdGOHX8AVZ/jGiyPa3IbBozJ5kNuE04=
golang.org/x/crypto v0.43.0/go.mod h1:BFbav4mRNlXJL4wNeejLpWxB7wMbc79PdRGhWKncxR0= golang.org/x/crypto v0.43.0/go.mod h1:BFbav4mRNlXJL4wNeejLpWxB7wMbc79PdRGhWKncxR0=
golang.org/x/exp v0.0.0-20250718183923-645b1fa84792 h1:R9PFI6EUdfVKgwKjZef7QIwGcBKu86OEFpJ9nUEP2l4=
golang.org/x/exp v0.0.0-20250718183923-645b1fa84792/go.mod h1:A+z0yzpGtvnG90cToK5n2tu8UJVP2XUATh+r+sfOOOc= golang.org/x/exp v0.0.0-20250718183923-645b1fa84792/go.mod h1:A+z0yzpGtvnG90cToK5n2tu8UJVP2XUATh+r+sfOOOc=
golang.org/x/lint v0.0.0-20200302205851-738671d3881b/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= golang.org/x/lint v0.0.0-20200302205851-738671d3881b/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY=
golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
golang.org/x/mod v0.28.0 h1:gQBtGhjxykdjY9YhZpSlZIsbnaE2+PgjfLWUQTnoZ1U=
golang.org/x/mod v0.28.0/go.mod h1:yfB/L0NOf/kmEbXjzCPOx1iK1fRutOydrCMsqRhEBxI=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.46.0 h1:giFlY12I07fugqwPuWJi68oOnpfqFnJIJzaIIm2JVV4= golang.org/x/net v0.46.0 h1:giFlY12I07fugqwPuWJi68oOnpfqFnJIJzaIIm2JVV4=
golang.org/x/net v0.46.0/go.mod h1:Q9BGdFy1y4nkUwiLvT5qtyhAnEHgnQ/zd8PfU6nc210= golang.org/x/net v0.46.0/go.mod h1:Q9BGdFy1y4nkUwiLvT5qtyhAnEHgnQ/zd8PfU6nc210=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.16.0 h1:ycBJEhp9p4vXvUZNszeOq0kGTPghopOL8q0fq3vstxw= golang.org/x/sync v0.17.0 h1:l60nONMj9l5drqw6jlhIELNv9I0A4OFgRsG9k2oT9Ug=
golang.org/x/sync v0.16.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= golang.org/x/sync v0.17.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
@@ -129,6 +131,8 @@ golang.org/x/text v0.30.0/go.mod h1:yDdHFIX9t+tORqspjENWgzaCVXgk0yYnYuSZ8UzzBVM=
golang.org/x/time v0.12.0 h1:ScB/8o8olJvc+CQPWrK3fPZNfh7qgwCrY0zJmoEQLSE= golang.org/x/time v0.12.0 h1:ScB/8o8olJvc+CQPWrK3fPZNfh7qgwCrY0zJmoEQLSE=
golang.org/x/time v0.12.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg= golang.org/x/time v0.12.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg=
golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.37.0 h1:DVSRzp7FwePZW356yEAChSdNcQo6Nsp+fex1SUW09lE=
golang.org/x/tools v0.37.0/go.mod h1:MBN5QPQtLMHVdvsbtarmTNukZDdgwdwlO5qGacAzF0w=
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.zx2c4.com/wintun v0.0.0-20230126152724-0fa3db229ce2 h1:B82qJJgjvYKsXS9jeunTOisW56dUokqW/FOteYJJ/yg= golang.zx2c4.com/wintun v0.0.0-20230126152724-0fa3db229ce2 h1:B82qJJgjvYKsXS9jeunTOisW56dUokqW/FOteYJJ/yg=
golang.zx2c4.com/wintun v0.0.0-20230126152724-0fa3db229ce2/go.mod h1:deeaetjYA+DHMHg+sMSMI58GrEteJUUzzw7en6TJQcI= golang.zx2c4.com/wintun v0.0.0-20230126152724-0fa3db229ce2/go.mod h1:deeaetjYA+DHMHg+sMSMI58GrEteJUUzzw7en6TJQcI=
@@ -136,22 +140,17 @@ golang.zx2c4.com/wireguard v0.0.0-20250521234502-f333402bd9cb h1:whnFRlWMcXI9d+Z
golang.zx2c4.com/wireguard v0.0.0-20250521234502-f333402bd9cb/go.mod h1:rpwXGsirqLqN2L0JDJQlwOboGHmptD5ZD6T2VmcqhTw= golang.zx2c4.com/wireguard v0.0.0-20250521234502-f333402bd9cb/go.mod h1:rpwXGsirqLqN2L0JDJQlwOboGHmptD5ZD6T2VmcqhTw=
golang.zx2c4.com/wireguard/wgctrl v0.0.0-20241231184526-a9ab2273dd10 h1:3GDAcqdIg1ozBNLgPy4SLT84nfcBjr6rhGtXYtrkWLU= golang.zx2c4.com/wireguard/wgctrl v0.0.0-20241231184526-a9ab2273dd10 h1:3GDAcqdIg1ozBNLgPy4SLT84nfcBjr6rhGtXYtrkWLU=
golang.zx2c4.com/wireguard/wgctrl v0.0.0-20241231184526-a9ab2273dd10/go.mod h1:T97yPqesLiNrOYxkwmhMI0ZIlJDm+p0PMR8eRVeR5tQ= golang.zx2c4.com/wireguard/wgctrl v0.0.0-20241231184526-a9ab2273dd10/go.mod h1:T97yPqesLiNrOYxkwmhMI0ZIlJDm+p0PMR8eRVeR5tQ=
google.golang.org/genproto v0.0.0-20230920204549-e6e6cdab5c13 h1:vlzZttNJGVqTsRFU9AmdnrcO1Znh8Ew9kCD//yjigk0= google.golang.org/genproto/googleapis/api v0.0.0-20250825161204-c5933d9347a5 h1:BIRfGDEjiHRrk0QKZe3Xv2ieMhtgRGeLcZQ0mIVn4EY=
google.golang.org/genproto/googleapis/api v0.0.0-20250519155744-55703ea1f237 h1:Kog3KlB4xevJlAcbbbzPfRG0+X9fdoGM+UBRKVz6Wr0= google.golang.org/genproto/googleapis/api v0.0.0-20250825161204-c5933d9347a5/go.mod h1:j3QtIyytwqGr1JUDtYXwtMXWPKsEa5LtzIFN1Wn5WvE=
google.golang.org/genproto/googleapis/api v0.0.0-20250519155744-55703ea1f237/go.mod h1:ezi0AVyMKDWy5xAncvjLWH7UcLBB5n7y2fQ8MzjJcto= google.golang.org/genproto/googleapis/rpc v0.0.0-20250825161204-c5933d9347a5 h1:eaY8u2EuxbRv7c3NiGK0/NedzVsCcV6hDuU5qPX5EGE=
google.golang.org/genproto/googleapis/rpc v0.0.0-20250519155744-55703ea1f237 h1:cJfm9zPbe1e873mHJzmQ1nwVEeRDU/T1wXDK2kUSU34= google.golang.org/genproto/googleapis/rpc v0.0.0-20250825161204-c5933d9347a5/go.mod h1:M4/wBTSeyLxupu3W3tJtOgB14jILAS/XWPSSa3TAlJc=
google.golang.org/genproto/googleapis/rpc v0.0.0-20250519155744-55703ea1f237/go.mod h1:qQ0YXyHHx3XkvlzUtpXDkS29lDSafHMZBAZDc03LQ3A= google.golang.org/grpc v1.76.0 h1:UnVkv1+uMLYXoIz6o7chp59WfQUYA2ex/BXQ9rHZu7A=
google.golang.org/grpc v1.72.1 h1:HR03wO6eyZ7lknl75XlxABNVLLFc2PAb6mHlYh756mA= google.golang.org/grpc v1.76.0/go.mod h1:Ju12QI8M6iQJtbcsV+awF5a4hfJMLi4X0JLo94ULZ6c=
google.golang.org/grpc v1.72.1/go.mod h1:wH5Aktxcg25y1I3w7H69nHfXdOG3UiadoBtjh3izSDM= google.golang.org/protobuf v1.36.8 h1:xHScyCOEuuwZEc6UtSOvPbAT4zRh0xcNRYekJwfqyMc=
google.golang.org/protobuf v1.36.6 h1:z1NpPI8ku2WgiWnf+t9wTPsn6eP1L7ksHUlkfLvd9xY= google.golang.org/protobuf v1.36.8/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU=
google.golang.org/protobuf v1.36.6/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gotest.tools/v3 v3.4.0 h1:ZazjZUfuVeZGLAmlKKuyv3IKP5orXcwtOwDQH6YVr6o=
gotest.tools/v3 v3.4.0/go.mod h1:CtbdzLSsqVhDgMtKsx03ird5YTGB3ar27v0u/yKBW5g=
gvisor.dev/gvisor v0.0.0-20250503011706-39ed1f5ac29c h1:m/r7OM+Y2Ty1sgBQ7Qb27VgIMBW8ZZhT4gLnUyDIhzI= gvisor.dev/gvisor v0.0.0-20250503011706-39ed1f5ac29c h1:m/r7OM+Y2Ty1sgBQ7Qb27VgIMBW8ZZhT4gLnUyDIhzI=
gvisor.dev/gvisor v0.0.0-20250503011706-39ed1f5ac29c/go.mod h1:3r5CMtNQMKIvBlrmM9xWUNamjKBYPOWyXOjmg5Kts3g= gvisor.dev/gvisor v0.0.0-20250503011706-39ed1f5ac29c/go.mod h1:3r5CMtNQMKIvBlrmM9xWUNamjKBYPOWyXOjmg5Kts3g=
software.sslmate.com/src/go-pkcs12 v0.6.0 h1:f3sQittAeF+pao32Vb+mkli+ZyT+VwKaD014qFGq6oU= software.sslmate.com/src/go-pkcs12 v0.6.0 h1:f3sQittAeF+pao32Vb+mkli+ZyT+VwKaD014qFGq6oU=

View File

@@ -0,0 +1,80 @@
package state
import (
"sync"
"sync/atomic"
"time"
"github.com/fosrl/newt/internal/telemetry"
)
// TelemetryView is a minimal, thread-safe implementation to feed observables.
// Since one Newt process represents one site, we expose a single logical site.
// site_id is a resource attribute, so we do not emit per-site labels here.
type TelemetryView struct {
online atomic.Bool
lastHBUnix atomic.Int64 // unix seconds
// per-tunnel sessions
sessMu sync.RWMutex
sessions map[string]*atomic.Int64
}
var (
globalView atomic.Pointer[TelemetryView]
)
// Global returns a singleton TelemetryView.
func Global() *TelemetryView {
if v := globalView.Load(); v != nil { return v }
v := &TelemetryView{ sessions: make(map[string]*atomic.Int64) }
globalView.Store(v)
telemetry.RegisterStateView(v)
return v
}
// Instrumentation helpers
func (v *TelemetryView) IncSessions(tunnelID string) {
v.sessMu.Lock(); defer v.sessMu.Unlock()
c := v.sessions[tunnelID]
if c == nil { c = &atomic.Int64{}; v.sessions[tunnelID] = c }
c.Add(1)
}
func (v *TelemetryView) DecSessions(tunnelID string) {
v.sessMu.Lock(); defer v.sessMu.Unlock()
if c := v.sessions[tunnelID]; c != nil {
c.Add(-1)
if c.Load() <= 0 { delete(v.sessions, tunnelID) }
}
}
func (v *TelemetryView) ClearTunnel(tunnelID string) {
v.sessMu.Lock(); defer v.sessMu.Unlock()
delete(v.sessions, tunnelID)
}
func (v *TelemetryView) SetOnline(b bool) { v.online.Store(b) }
func (v *TelemetryView) TouchHeartbeat() { v.lastHBUnix.Store(time.Now().Unix()) }
// --- telemetry.StateView interface ---
func (v *TelemetryView) ListSites() []string { return []string{"self"} }
func (v *TelemetryView) Online(_ string) (bool, bool) { return v.online.Load(), true }
func (v *TelemetryView) LastHeartbeat(_ string) (time.Time, bool) {
sec := v.lastHBUnix.Load()
if sec == 0 { return time.Time{}, false }
return time.Unix(sec, 0), true
}
func (v *TelemetryView) ActiveSessions(_ string) (int64, bool) {
// aggregated sessions (not used for per-tunnel gauge)
v.sessMu.RLock(); defer v.sessMu.RUnlock()
var sum int64
for _, c := range v.sessions { if c != nil { sum += c.Load() } }
return sum, true
}
// Extended accessor used by telemetry callback to publish per-tunnel samples.
func (v *TelemetryView) SessionsByTunnel() map[string]int64 {
v.sessMu.RLock(); defer v.sessMu.RUnlock()
out := make(map[string]int64, len(v.sessions))
for id, c := range v.sessions { if c != nil && c.Load() > 0 { out[id] = c.Load() } }
return out
}

View File

@@ -0,0 +1,19 @@
package telemetry
// Protocol labels (low-cardinality)
const (
ProtocolTCP = "tcp"
ProtocolUDP = "udp"
)
// Reconnect reason bins (fixed, low-cardinality)
const (
ReasonServerRequest = "server_request"
ReasonTimeout = "timeout"
ReasonPeerClose = "peer_close"
ReasonNetworkChange = "network_change"
ReasonAuthError = "auth_error"
ReasonHandshakeError = "handshake_error"
ReasonConfigChange = "config_change"
ReasonError = "error"
)

View File

@@ -0,0 +1,32 @@
package telemetry
import "testing"
func TestAllowedConstants(t *testing.T) {
allowedReasons := map[string]struct{}{
ReasonServerRequest: {},
ReasonTimeout: {},
ReasonPeerClose: {},
ReasonNetworkChange: {},
ReasonAuthError: {},
ReasonHandshakeError: {},
ReasonConfigChange: {},
ReasonError: {},
}
for k := range allowedReasons {
if k == "" {
t.Fatalf("empty reason constant")
}
}
allowedProtocols := map[string]struct{}{
ProtocolTCP: {},
ProtocolUDP: {},
}
for k := range allowedProtocols {
if k == "" {
t.Fatalf("empty protocol constant")
}
}
}

View File

@@ -0,0 +1,542 @@
package telemetry
import (
"context"
"sync"
"sync/atomic"
"time"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/metric"
)
// Instruments and helpers for Newt metrics following the naming, units, and
// low-cardinality label guidance from the issue description.
//
// Counters end with _total, durations are in seconds, sizes in bytes.
// Only low-cardinality stable labels are supported: tunnel_id,
// transport, direction, result, reason, error_type.
var (
initOnce sync.Once
meter metric.Meter
// Site / Registration
mSiteRegistrations metric.Int64Counter
mSiteOnline metric.Int64ObservableGauge
mSiteLastHeartbeat metric.Float64ObservableGauge
// Tunnel / Sessions
mTunnelSessions metric.Int64ObservableGauge
mTunnelBytes metric.Int64Counter
mTunnelLatency metric.Float64Histogram
mReconnects metric.Int64Counter
// Connection / NAT
mConnAttempts metric.Int64Counter
mConnErrors metric.Int64Counter
// Config/Restart
mConfigReloads metric.Int64Counter
mConfigApply metric.Float64Histogram
mCertRotationTotal metric.Int64Counter
mProcessStartTime metric.Float64ObservableGauge
// Build info
mBuildInfo metric.Int64ObservableGauge
// WebSocket
mWSConnectLatency metric.Float64Histogram
mWSMessages metric.Int64Counter
mWSDisconnects metric.Int64Counter
mWSKeepaliveFailure metric.Int64Counter
mWSSessionDuration metric.Float64Histogram
mWSConnected metric.Int64ObservableGauge
mWSReconnects metric.Int64Counter
// Proxy
mProxyActiveConns metric.Int64ObservableGauge
mProxyBufferBytes metric.Int64ObservableGauge
mProxyAsyncBacklogByte metric.Int64ObservableGauge
mProxyDropsTotal metric.Int64Counter
mProxyAcceptsTotal metric.Int64Counter
mProxyConnDuration metric.Float64Histogram
mProxyConnectionsTotal metric.Int64Counter
buildVersion string
buildCommit string
processStartUnix = float64(time.Now().UnixNano()) / 1e9
wsConnectedState atomic.Int64
)
// Proxy connection lifecycle events.
const (
ProxyConnectionOpened = "opened"
ProxyConnectionClosed = "closed"
)
// attrsWithSite appends site/region labels only when explicitly enabled to keep
// label cardinality low by default.
func attrsWithSite(extra ...attribute.KeyValue) []attribute.KeyValue {
attrs := make([]attribute.KeyValue, len(extra))
copy(attrs, extra)
if ShouldIncludeSiteLabels() {
attrs = append(attrs, siteAttrs()...)
}
return attrs
}
func registerInstruments() error {
var err error
initOnce.Do(func() {
meter = otel.Meter("newt")
if e := registerSiteInstruments(); e != nil {
err = e
return
}
if e := registerTunnelInstruments(); e != nil {
err = e
return
}
if e := registerConnInstruments(); e != nil {
err = e
return
}
if e := registerConfigInstruments(); e != nil {
err = e
return
}
if e := registerBuildWSProxyInstruments(); e != nil {
err = e
return
}
})
return err
}
func registerSiteInstruments() error {
var err error
mSiteRegistrations, err = meter.Int64Counter("newt_site_registrations_total",
metric.WithDescription("Total site registration attempts"))
if err != nil {
return err
}
mSiteOnline, err = meter.Int64ObservableGauge("newt_site_online",
metric.WithDescription("Site online (0/1)"))
if err != nil {
return err
}
mSiteLastHeartbeat, err = meter.Float64ObservableGauge("newt_site_last_heartbeat_timestamp_seconds",
metric.WithDescription("Unix timestamp of the last site heartbeat"),
metric.WithUnit("s"))
if err != nil {
return err
}
return nil
}
func registerTunnelInstruments() error {
var err error
mTunnelSessions, err = meter.Int64ObservableGauge("newt_tunnel_sessions",
metric.WithDescription("Active tunnel sessions"))
if err != nil {
return err
}
mTunnelBytes, err = meter.Int64Counter("newt_tunnel_bytes_total",
metric.WithDescription("Tunnel bytes ingress/egress"),
metric.WithUnit("By"))
if err != nil {
return err
}
mTunnelLatency, err = meter.Float64Histogram("newt_tunnel_latency_seconds",
metric.WithDescription("Per-tunnel latency in seconds"),
metric.WithUnit("s"))
if err != nil {
return err
}
mReconnects, err = meter.Int64Counter("newt_tunnel_reconnects_total",
metric.WithDescription("Tunnel reconnect events"))
if err != nil {
return err
}
return nil
}
func registerConnInstruments() error {
var err error
mConnAttempts, err = meter.Int64Counter("newt_connection_attempts_total",
metric.WithDescription("Connection attempts"))
if err != nil {
return err
}
mConnErrors, err = meter.Int64Counter("newt_connection_errors_total",
metric.WithDescription("Connection errors by type"))
if err != nil {
return err
}
return nil
}
func registerConfigInstruments() error {
mConfigReloads, _ = meter.Int64Counter("newt_config_reloads_total",
metric.WithDescription("Configuration reloads"))
mConfigApply, _ = meter.Float64Histogram("newt_config_apply_seconds",
metric.WithDescription("Configuration apply duration in seconds"),
metric.WithUnit("s"))
mCertRotationTotal, _ = meter.Int64Counter("newt_cert_rotation_total",
metric.WithDescription("Certificate rotation events (success/failure)"))
mProcessStartTime, _ = meter.Float64ObservableGauge("process_start_time_seconds",
metric.WithDescription("Unix timestamp of the process start time"),
metric.WithUnit("s"))
if mProcessStartTime != nil {
if _, err := meter.RegisterCallback(func(ctx context.Context, o metric.Observer) error {
o.ObserveFloat64(mProcessStartTime, processStartUnix)
return nil
}, mProcessStartTime); err != nil {
otel.Handle(err)
}
}
return nil
}
func registerBuildWSProxyInstruments() error {
// Build info gauge (value 1 with version/commit attributes)
mBuildInfo, _ = meter.Int64ObservableGauge("newt_build_info",
metric.WithDescription("Newt build information (value is always 1)"))
// WebSocket
mWSConnectLatency, _ = meter.Float64Histogram("newt_websocket_connect_latency_seconds",
metric.WithDescription("WebSocket connect latency in seconds"),
metric.WithUnit("s"))
mWSMessages, _ = meter.Int64Counter("newt_websocket_messages_total",
metric.WithDescription("WebSocket messages by direction and type"))
mWSDisconnects, _ = meter.Int64Counter("newt_websocket_disconnects_total",
metric.WithDescription("WebSocket disconnects by reason/result"))
mWSKeepaliveFailure, _ = meter.Int64Counter("newt_websocket_keepalive_failures_total",
metric.WithDescription("WebSocket keepalive (ping/pong) failures"))
mWSSessionDuration, _ = meter.Float64Histogram("newt_websocket_session_duration_seconds",
metric.WithDescription("Duration of established WebSocket sessions"),
metric.WithUnit("s"))
mWSConnected, _ = meter.Int64ObservableGauge("newt_websocket_connected",
metric.WithDescription("WebSocket connection state (1=connected, 0=disconnected)"))
mWSReconnects, _ = meter.Int64Counter("newt_websocket_reconnects_total",
metric.WithDescription("WebSocket reconnect attempts by reason"))
// Proxy
mProxyActiveConns, _ = meter.Int64ObservableGauge("newt_proxy_active_connections",
metric.WithDescription("Proxy active connections per tunnel and protocol"))
mProxyBufferBytes, _ = meter.Int64ObservableGauge("newt_proxy_buffer_bytes",
metric.WithDescription("Proxy buffer bytes (may approximate async backlog)"),
metric.WithUnit("By"))
mProxyAsyncBacklogByte, _ = meter.Int64ObservableGauge("newt_proxy_async_backlog_bytes",
metric.WithDescription("Unflushed async byte backlog per tunnel and protocol"),
metric.WithUnit("By"))
mProxyDropsTotal, _ = meter.Int64Counter("newt_proxy_drops_total",
metric.WithDescription("Proxy drops due to write errors"))
mProxyAcceptsTotal, _ = meter.Int64Counter("newt_proxy_accept_total",
metric.WithDescription("Proxy connection accepts by protocol and result"))
mProxyConnDuration, _ = meter.Float64Histogram("newt_proxy_connection_duration_seconds",
metric.WithDescription("Duration of completed proxy connections"),
metric.WithUnit("s"))
mProxyConnectionsTotal, _ = meter.Int64Counter("newt_proxy_connections_total",
metric.WithDescription("Proxy connection lifecycle events by protocol"))
// Register a default callback for build info if version/commit set
reg, e := meter.RegisterCallback(func(ctx context.Context, o metric.Observer) error {
if buildVersion == "" && buildCommit == "" {
return nil
}
attrs := []attribute.KeyValue{}
if buildVersion != "" {
attrs = append(attrs, attribute.String("version", buildVersion))
}
if buildCommit != "" {
attrs = append(attrs, attribute.String("commit", buildCommit))
}
if ShouldIncludeSiteLabels() {
attrs = append(attrs, siteAttrs()...)
}
o.ObserveInt64(mBuildInfo, 1, metric.WithAttributes(attrs...))
return nil
}, mBuildInfo)
if e != nil {
otel.Handle(e)
} else {
// Provide a functional stopper that unregisters the callback
obsStopper = func() { _ = reg.Unregister() }
}
if mWSConnected != nil {
if regConn, err := meter.RegisterCallback(func(ctx context.Context, o metric.Observer) error {
val := wsConnectedState.Load()
o.ObserveInt64(mWSConnected, val, metric.WithAttributes(attrsWithSite()...))
return nil
}, mWSConnected); err != nil {
otel.Handle(err)
} else {
wsConnStopper = func() { _ = regConn.Unregister() }
}
}
return nil
}
// Observable registration: Newt can register a callback to report gauges.
// Call SetObservableCallback once to start observing online status, last
// heartbeat seconds, and active sessions.
var (
obsOnce sync.Once
obsStopper func()
proxyObsOnce sync.Once
proxyStopper func()
wsConnStopper func()
)
// SetObservableCallback registers a single callback that will be invoked
// on collection. Use the provided observer to emit values for the observable
// gauges defined here.
//
// Example inside your code (where you have access to current state):
//
// telemetry.SetObservableCallback(func(ctx context.Context, o metric.Observer) error {
// o.ObserveInt64(mSiteOnline, 1)
// o.ObserveFloat64(mSiteLastHeartbeat, float64(lastHB.Unix()))
// o.ObserveInt64(mTunnelSessions, int64(len(activeSessions)))
// return nil
// })
func SetObservableCallback(cb func(context.Context, metric.Observer) error) {
obsOnce.Do(func() {
reg, e := meter.RegisterCallback(cb, mSiteOnline, mSiteLastHeartbeat, mTunnelSessions)
if e != nil {
otel.Handle(e)
obsStopper = func() {
// no-op: registration failed; keep stopper callable
}
return
}
// Provide a functional stopper mirroring proxy/build-info behavior
obsStopper = func() { _ = reg.Unregister() }
})
}
// SetProxyObservableCallback registers a callback to observe proxy gauges.
func SetProxyObservableCallback(cb func(context.Context, metric.Observer) error) {
proxyObsOnce.Do(func() {
reg, e := meter.RegisterCallback(cb, mProxyActiveConns, mProxyBufferBytes, mProxyAsyncBacklogByte)
if e != nil {
otel.Handle(e)
proxyStopper = func() {
// no-op: registration failed; keep stopper callable
}
return
}
// Provide a functional stopper to unregister later if needed
proxyStopper = func() { _ = reg.Unregister() }
})
}
// Build info registration
func RegisterBuildInfo(version, commit string) {
buildVersion = version
buildCommit = commit
}
// Config reloads
func IncConfigReload(ctx context.Context, result string) {
mConfigReloads.Add(ctx, 1, metric.WithAttributes(attrsWithSite(
attribute.String("result", result),
)...))
}
// Helpers for counters/histograms
func IncSiteRegistration(ctx context.Context, result string) {
attrs := []attribute.KeyValue{
attribute.String("result", result),
}
mSiteRegistrations.Add(ctx, 1, metric.WithAttributes(attrsWithSite(attrs...)...))
}
func AddTunnelBytes(ctx context.Context, tunnelID, direction string, n int64) {
attrs := []attribute.KeyValue{
attribute.String("direction", direction),
}
if ShouldIncludeTunnelID() && tunnelID != "" {
attrs = append(attrs, attribute.String("tunnel_id", tunnelID))
}
mTunnelBytes.Add(ctx, n, metric.WithAttributes(attrsWithSite(attrs...)...))
}
// AddTunnelBytesSet adds bytes using a pre-built attribute.Set to avoid per-call allocations.
func AddTunnelBytesSet(ctx context.Context, n int64, attrs attribute.Set) {
mTunnelBytes.Add(ctx, n, metric.WithAttributeSet(attrs))
}
// --- WebSocket helpers ---
func ObserveWSConnectLatency(ctx context.Context, seconds float64, result, errorType string) {
attrs := []attribute.KeyValue{
attribute.String("transport", "websocket"),
attribute.String("result", result),
}
if errorType != "" {
attrs = append(attrs, attribute.String("error_type", errorType))
}
mWSConnectLatency.Record(ctx, seconds, metric.WithAttributes(attrsWithSite(attrs...)...))
}
func IncWSMessage(ctx context.Context, direction, msgType string) {
mWSMessages.Add(ctx, 1, metric.WithAttributes(attrsWithSite(
attribute.String("direction", direction),
attribute.String("msg_type", msgType),
)...))
}
func IncWSDisconnect(ctx context.Context, reason, result string) {
mWSDisconnects.Add(ctx, 1, metric.WithAttributes(attrsWithSite(
attribute.String("reason", reason),
attribute.String("result", result),
)...))
}
func IncWSKeepaliveFailure(ctx context.Context, reason string) {
mWSKeepaliveFailure.Add(ctx, 1, metric.WithAttributes(attrsWithSite(
attribute.String("reason", reason),
)...))
}
// SetWSConnectionState updates the backing gauge for the WebSocket connected state.
func SetWSConnectionState(connected bool) {
if connected {
wsConnectedState.Store(1)
} else {
wsConnectedState.Store(0)
}
}
// IncWSReconnect increments the WebSocket reconnect counter with a bounded reason label.
func IncWSReconnect(ctx context.Context, reason string) {
if reason == "" {
reason = "unknown"
}
mWSReconnects.Add(ctx, 1, metric.WithAttributes(attrsWithSite(
attribute.String("reason", reason),
)...))
}
func ObserveWSSessionDuration(ctx context.Context, seconds float64, result string) {
mWSSessionDuration.Record(ctx, seconds, metric.WithAttributes(attrsWithSite(
attribute.String("result", result),
)...))
}
// --- Proxy helpers ---
func ObserveProxyActiveConnsObs(o metric.Observer, value int64, attrs []attribute.KeyValue) {
o.ObserveInt64(mProxyActiveConns, value, metric.WithAttributes(attrs...))
}
func ObserveProxyBufferBytesObs(o metric.Observer, value int64, attrs []attribute.KeyValue) {
o.ObserveInt64(mProxyBufferBytes, value, metric.WithAttributes(attrs...))
}
func ObserveProxyAsyncBacklogObs(o metric.Observer, value int64, attrs []attribute.KeyValue) {
o.ObserveInt64(mProxyAsyncBacklogByte, value, metric.WithAttributes(attrs...))
}
func IncProxyDrops(ctx context.Context, tunnelID, protocol string) {
attrs := []attribute.KeyValue{
attribute.String("protocol", protocol),
}
if ShouldIncludeTunnelID() && tunnelID != "" {
attrs = append(attrs, attribute.String("tunnel_id", tunnelID))
}
mProxyDropsTotal.Add(ctx, 1, metric.WithAttributes(attrsWithSite(attrs...)...))
}
func IncProxyAccept(ctx context.Context, tunnelID, protocol, result, reason string) {
attrs := []attribute.KeyValue{
attribute.String("protocol", protocol),
attribute.String("result", result),
}
if reason != "" {
attrs = append(attrs, attribute.String("reason", reason))
}
if ShouldIncludeTunnelID() && tunnelID != "" {
attrs = append(attrs, attribute.String("tunnel_id", tunnelID))
}
mProxyAcceptsTotal.Add(ctx, 1, metric.WithAttributes(attrsWithSite(attrs...)...))
}
func ObserveProxyConnectionDuration(ctx context.Context, tunnelID, protocol, result string, seconds float64) {
attrs := []attribute.KeyValue{
attribute.String("protocol", protocol),
attribute.String("result", result),
}
if ShouldIncludeTunnelID() && tunnelID != "" {
attrs = append(attrs, attribute.String("tunnel_id", tunnelID))
}
mProxyConnDuration.Record(ctx, seconds, metric.WithAttributes(attrsWithSite(attrs...)...))
}
// IncProxyConnectionEvent records proxy connection lifecycle events (opened/closed).
func IncProxyConnectionEvent(ctx context.Context, tunnelID, protocol, event string) {
if event == "" {
event = "unknown"
}
attrs := []attribute.KeyValue{
attribute.String("protocol", protocol),
attribute.String("event", event),
}
if ShouldIncludeTunnelID() && tunnelID != "" {
attrs = append(attrs, attribute.String("tunnel_id", tunnelID))
}
mProxyConnectionsTotal.Add(ctx, 1, metric.WithAttributes(attrsWithSite(attrs...)...))
}
// --- Config/PKI helpers ---
func ObserveConfigApply(ctx context.Context, phase, result string, seconds float64) {
mConfigApply.Record(ctx, seconds, metric.WithAttributes(attrsWithSite(
attribute.String("phase", phase),
attribute.String("result", result),
)...))
}
func IncCertRotation(ctx context.Context, result string) {
mCertRotationTotal.Add(ctx, 1, metric.WithAttributes(attrsWithSite(
attribute.String("result", result),
)...))
}
func ObserveTunnelLatency(ctx context.Context, tunnelID, transport string, seconds float64) {
attrs := []attribute.KeyValue{
attribute.String("transport", transport),
}
if ShouldIncludeTunnelID() && tunnelID != "" {
attrs = append(attrs, attribute.String("tunnel_id", tunnelID))
}
mTunnelLatency.Record(ctx, seconds, metric.WithAttributes(attrsWithSite(attrs...)...))
}
func IncReconnect(ctx context.Context, tunnelID, initiator, reason string) {
attrs := []attribute.KeyValue{
attribute.String("initiator", initiator),
attribute.String("reason", reason),
}
if ShouldIncludeTunnelID() && tunnelID != "" {
attrs = append(attrs, attribute.String("tunnel_id", tunnelID))
}
mReconnects.Add(ctx, 1, metric.WithAttributes(attrsWithSite(attrs...)...))
}
func IncConnAttempt(ctx context.Context, transport, result string) {
mConnAttempts.Add(ctx, 1, metric.WithAttributes(attrsWithSite(
attribute.String("transport", transport),
attribute.String("result", result),
)...))
}
func IncConnError(ctx context.Context, transport, typ string) {
mConnErrors.Add(ctx, 1, metric.WithAttributes(attrsWithSite(
attribute.String("transport", transport),
attribute.String("error_type", typ),
)...))
}

View File

@@ -0,0 +1,59 @@
package telemetry
import (
"sync"
"time"
)
func resetMetricsForTest() {
initOnce = sync.Once{}
obsOnce = sync.Once{}
proxyObsOnce = sync.Once{}
obsStopper = nil
proxyStopper = nil
if wsConnStopper != nil {
wsConnStopper()
}
wsConnStopper = nil
meter = nil
mSiteRegistrations = nil
mSiteOnline = nil
mSiteLastHeartbeat = nil
mTunnelSessions = nil
mTunnelBytes = nil
mTunnelLatency = nil
mReconnects = nil
mConnAttempts = nil
mConnErrors = nil
mConfigReloads = nil
mConfigApply = nil
mCertRotationTotal = nil
mProcessStartTime = nil
mBuildInfo = nil
mWSConnectLatency = nil
mWSMessages = nil
mWSDisconnects = nil
mWSKeepaliveFailure = nil
mWSSessionDuration = nil
mWSConnected = nil
mWSReconnects = nil
mProxyActiveConns = nil
mProxyBufferBytes = nil
mProxyAsyncBacklogByte = nil
mProxyDropsTotal = nil
mProxyAcceptsTotal = nil
mProxyConnDuration = nil
mProxyConnectionsTotal = nil
processStartUnix = float64(time.Now().UnixNano()) / 1e9
wsConnectedState.Store(0)
includeTunnelIDVal.Store(false)
includeSiteLabelVal.Store(false)
}

View File

@@ -0,0 +1,106 @@
package telemetry
import (
"context"
"sync/atomic"
"time"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/metric"
)
// StateView provides a read-only view for observable gauges.
// Implementations must be concurrency-safe and avoid blocking operations.
// All methods should be fast and use RLocks where applicable.
type StateView interface {
// ListSites returns a stable, low-cardinality list of site IDs to expose.
ListSites() []string
// Online returns whether the site is online.
Online(siteID string) (online bool, ok bool)
// LastHeartbeat returns the last heartbeat time for a site.
LastHeartbeat(siteID string) (t time.Time, ok bool)
// ActiveSessions returns the current number of active sessions for a site (across tunnels),
// or scoped to site if your model is site-scoped.
ActiveSessions(siteID string) (n int64, ok bool)
}
var (
stateView atomic.Value // of type StateView
)
// RegisterStateView sets the global StateView used by the default observable callback.
func RegisterStateView(v StateView) {
stateView.Store(v)
// If instruments are registered, ensure a callback exists.
if v != nil {
SetObservableCallback(func(ctx context.Context, o metric.Observer) error {
if any := stateView.Load(); any != nil {
if sv, ok := any.(StateView); ok {
for _, siteID := range sv.ListSites() {
observeSiteOnlineFor(o, sv, siteID)
observeLastHeartbeatFor(o, sv, siteID)
observeSessionsFor(o, siteID, sv)
}
}
}
return nil
})
}
}
func observeSiteOnlineFor(o metric.Observer, sv StateView, siteID string) {
if online, ok := sv.Online(siteID); ok {
val := int64(0)
if online {
val = 1
}
o.ObserveInt64(mSiteOnline, val, metric.WithAttributes(
attribute.String("site_id", siteID),
))
}
}
func observeLastHeartbeatFor(o metric.Observer, sv StateView, siteID string) {
if t, ok := sv.LastHeartbeat(siteID); ok {
ts := float64(t.UnixNano()) / 1e9
o.ObserveFloat64(mSiteLastHeartbeat, ts, metric.WithAttributes(
attribute.String("site_id", siteID),
))
}
}
func observeSessionsFor(o metric.Observer, siteID string, any interface{}) {
if tm, ok := any.(interface{ SessionsByTunnel() map[string]int64 }); ok {
sessions := tm.SessionsByTunnel()
// If tunnel_id labels are enabled, preserve existing per-tunnel observations
if ShouldIncludeTunnelID() {
for tid, n := range sessions {
attrs := []attribute.KeyValue{
attribute.String("site_id", siteID),
}
if tid != "" {
attrs = append(attrs, attribute.String("tunnel_id", tid))
}
o.ObserveInt64(mTunnelSessions, n, metric.WithAttributes(attrs...))
}
return
}
// When tunnel_id is disabled, collapse per-tunnel counts into a single site-level value
var total int64
for _, n := range sessions {
total += n
}
// If there are no per-tunnel entries, fall back to ActiveSessions() if available
if total == 0 {
if svAny := stateView.Load(); svAny != nil {
if sv, ok := svAny.(StateView); ok {
if n, ok2 := sv.ActiveSessions(siteID); ok2 {
total = n
}
}
}
}
o.ObserveInt64(mTunnelSessions, total, metric.WithAttributes(attribute.String("site_id", siteID)))
return
}
}

View File

@@ -0,0 +1,384 @@
package telemetry
import (
"context"
"errors"
"net/http"
"os"
"strings"
"sync/atomic"
"time"
promclient "github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
"go.opentelemetry.io/contrib/instrumentation/runtime"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc"
"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc"
"go.opentelemetry.io/otel/exporters/prometheus"
"go.opentelemetry.io/otel/sdk/metric"
"go.opentelemetry.io/otel/sdk/resource"
"go.opentelemetry.io/otel/sdk/trace"
semconv "go.opentelemetry.io/otel/semconv/v1.26.0"
"google.golang.org/grpc/credentials"
)
// Config controls telemetry initialization via env flags.
//
// Defaults align with the issue requirements:
// - Prometheus exporter enabled by default (/metrics)
// - OTLP exporter disabled by default
// - Durations in seconds, bytes in raw bytes
// - Admin HTTP server address configurable (for mounting /metrics)
type Config struct {
ServiceName string
ServiceVersion string
// Optional resource attributes
SiteID string
Region string
PromEnabled bool
OTLPEnabled bool
OTLPEndpoint string // host:port
OTLPInsecure bool
MetricExportInterval time.Duration
AdminAddr string // e.g.: ":2112"
// Optional build info for newt_build_info metric
BuildVersion string
BuildCommit string
}
// FromEnv reads configuration from environment variables.
//
// NEWT_METRICS_PROMETHEUS_ENABLED (default: true)
// NEWT_METRICS_OTLP_ENABLED (default: false)
// OTEL_EXPORTER_OTLP_ENDPOINT (default: "localhost:4317")
// OTEL_EXPORTER_OTLP_INSECURE (default: true)
// OTEL_METRIC_EXPORT_INTERVAL (default: 15s)
// OTEL_SERVICE_NAME (default: "newt")
// OTEL_SERVICE_VERSION (default: "")
// NEWT_ADMIN_ADDR (default: ":2112")
func FromEnv() Config {
// Prefer explicit NEWT_* env vars, then fall back to OTEL_RESOURCE_ATTRIBUTES
site := getenv("NEWT_SITE_ID", "")
if site == "" {
site = getenv("NEWT_ID", "")
}
region := os.Getenv("NEWT_REGION")
if site == "" || region == "" {
if ra := os.Getenv("OTEL_RESOURCE_ATTRIBUTES"); ra != "" {
m := parseResourceAttributes(ra)
if site == "" {
site = m["site_id"]
}
if region == "" {
region = m["region"]
}
}
}
return Config{
ServiceName: getenv("OTEL_SERVICE_NAME", "newt"),
ServiceVersion: os.Getenv("OTEL_SERVICE_VERSION"),
SiteID: site,
Region: region,
PromEnabled: getenv("NEWT_METRICS_PROMETHEUS_ENABLED", "true") == "true",
OTLPEnabled: getenv("NEWT_METRICS_OTLP_ENABLED", "false") == "true",
OTLPEndpoint: getenv("OTEL_EXPORTER_OTLP_ENDPOINT", "localhost:4317"),
OTLPInsecure: getenv("OTEL_EXPORTER_OTLP_INSECURE", "true") == "true",
MetricExportInterval: getdur("OTEL_METRIC_EXPORT_INTERVAL", 15*time.Second),
AdminAddr: getenv("NEWT_ADMIN_ADDR", ":2112"),
}
}
// Setup holds initialized telemetry providers and (optionally) a /metrics handler.
// Call Shutdown when the process terminates to flush exporters.
type Setup struct {
MeterProvider *metric.MeterProvider
TracerProvider *trace.TracerProvider
PrometheusHandler http.Handler // nil if Prometheus exporter disabled
shutdowns []func(context.Context) error
}
// Init configures OpenTelemetry metrics and (optionally) tracing.
//
// It sets a global MeterProvider and TracerProvider, registers runtime instrumentation,
// installs recommended histogram views for *_latency_seconds, and returns a Setup with
// a Shutdown method to flush exporters.
func Init(ctx context.Context, cfg Config) (*Setup, error) {
// Configure tunnel_id label inclusion from env (default true)
if getenv("NEWT_METRICS_INCLUDE_TUNNEL_ID", "true") == "true" {
includeTunnelIDVal.Store(true)
} else {
includeTunnelIDVal.Store(false)
}
if getenv("NEWT_METRICS_INCLUDE_SITE_LABELS", "true") == "true" {
includeSiteLabelVal.Store(true)
} else {
includeSiteLabelVal.Store(false)
}
res := buildResource(ctx, cfg)
UpdateSiteInfo(cfg.SiteID, cfg.Region)
s := &Setup{}
readers, promHandler, shutdowns, err := setupMetricExport(ctx, cfg, res)
if err != nil {
return nil, err
}
s.PrometheusHandler = promHandler
// Build provider
mp := buildMeterProvider(res, readers)
otel.SetMeterProvider(mp)
s.MeterProvider = mp
s.shutdowns = append(s.shutdowns, mp.Shutdown)
// Optional tracing
if cfg.OTLPEnabled {
if tp, shutdown := setupTracing(ctx, cfg, res); tp != nil {
otel.SetTracerProvider(tp)
s.TracerProvider = tp
s.shutdowns = append(s.shutdowns, func(c context.Context) error {
return errors.Join(shutdown(c), tp.Shutdown(c))
})
}
}
// Add metric exporter shutdowns
s.shutdowns = append(s.shutdowns, shutdowns...)
// Runtime metrics
_ = runtime.Start(runtime.WithMeterProvider(mp))
// Instruments
if err := registerInstruments(); err != nil {
return nil, err
}
if cfg.BuildVersion != "" || cfg.BuildCommit != "" {
RegisterBuildInfo(cfg.BuildVersion, cfg.BuildCommit)
}
return s, nil
}
func buildResource(ctx context.Context, cfg Config) *resource.Resource {
attrs := []attribute.KeyValue{
semconv.ServiceName(cfg.ServiceName),
semconv.ServiceVersion(cfg.ServiceVersion),
}
if cfg.SiteID != "" {
attrs = append(attrs, attribute.String("site_id", cfg.SiteID))
}
if cfg.Region != "" {
attrs = append(attrs, attribute.String("region", cfg.Region))
}
res, _ := resource.New(ctx, resource.WithFromEnv(), resource.WithHost(), resource.WithAttributes(attrs...))
return res
}
func setupMetricExport(ctx context.Context, cfg Config, _ *resource.Resource) ([]metric.Reader, http.Handler, []func(context.Context) error, error) {
var readers []metric.Reader
var shutdowns []func(context.Context) error
var promHandler http.Handler
if cfg.PromEnabled {
reg := promclient.NewRegistry()
exp, err := prometheus.New(prometheus.WithRegisterer(reg))
if err != nil {
return nil, nil, nil, err
}
readers = append(readers, exp)
promHandler = promhttp.HandlerFor(reg, promhttp.HandlerOpts{})
}
if cfg.OTLPEnabled {
mopts := []otlpmetricgrpc.Option{otlpmetricgrpc.WithEndpoint(cfg.OTLPEndpoint)}
if hdrs := parseOTLPHeaders(os.Getenv("OTEL_EXPORTER_OTLP_HEADERS")); len(hdrs) > 0 {
mopts = append(mopts, otlpmetricgrpc.WithHeaders(hdrs))
}
if cfg.OTLPInsecure {
mopts = append(mopts, otlpmetricgrpc.WithInsecure())
} else if certFile := os.Getenv("OTEL_EXPORTER_OTLP_CERTIFICATE"); certFile != "" {
if creds, cerr := credentials.NewClientTLSFromFile(certFile, ""); cerr == nil {
mopts = append(mopts, otlpmetricgrpc.WithTLSCredentials(creds))
}
}
mexp, err := otlpmetricgrpc.New(ctx, mopts...)
if err != nil {
return nil, nil, nil, err
}
readers = append(readers, metric.NewPeriodicReader(mexp, metric.WithInterval(cfg.MetricExportInterval)))
shutdowns = append(shutdowns, mexp.Shutdown)
}
return readers, promHandler, shutdowns, nil
}
func buildMeterProvider(res *resource.Resource, readers []metric.Reader) *metric.MeterProvider {
var mpOpts []metric.Option
mpOpts = append(mpOpts, metric.WithResource(res))
for _, r := range readers {
mpOpts = append(mpOpts, metric.WithReader(r))
}
mpOpts = append(mpOpts, metric.WithView(metric.NewView(
metric.Instrument{Name: "newt_*_latency_seconds"},
metric.Stream{Aggregation: metric.AggregationExplicitBucketHistogram{Boundaries: []float64{0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30}}},
)))
mpOpts = append(mpOpts, metric.WithView(metric.NewView(
metric.Instrument{Name: "newt_*"},
metric.Stream{AttributeFilter: func(kv attribute.KeyValue) bool {
k := string(kv.Key)
switch k {
case "tunnel_id", "transport", "direction", "protocol", "result", "reason", "initiator", "error_type", "msg_type", "phase", "version", "commit", "site_id", "region":
return true
default:
return false
}
}},
)))
return metric.NewMeterProvider(mpOpts...)
}
func setupTracing(ctx context.Context, cfg Config, res *resource.Resource) (*trace.TracerProvider, func(context.Context) error) {
topts := []otlptracegrpc.Option{otlptracegrpc.WithEndpoint(cfg.OTLPEndpoint)}
if hdrs := parseOTLPHeaders(os.Getenv("OTEL_EXPORTER_OTLP_HEADERS")); len(hdrs) > 0 {
topts = append(topts, otlptracegrpc.WithHeaders(hdrs))
}
if cfg.OTLPInsecure {
topts = append(topts, otlptracegrpc.WithInsecure())
} else if certFile := os.Getenv("OTEL_EXPORTER_OTLP_CERTIFICATE"); certFile != "" {
if creds, cerr := credentials.NewClientTLSFromFile(certFile, ""); cerr == nil {
topts = append(topts, otlptracegrpc.WithTLSCredentials(creds))
}
}
exp, err := otlptracegrpc.New(ctx, topts...)
if err != nil {
return nil, nil
}
tp := trace.NewTracerProvider(trace.WithBatcher(exp), trace.WithResource(res))
return tp, exp.Shutdown
}
// Shutdown flushes exporters and providers in reverse init order.
func (s *Setup) Shutdown(ctx context.Context) error {
var err error
for i := len(s.shutdowns) - 1; i >= 0; i-- {
err = errors.Join(err, s.shutdowns[i](ctx))
}
return err
}
func parseOTLPHeaders(h string) map[string]string {
m := map[string]string{}
if h == "" {
return m
}
pairs := strings.Split(h, ",")
for _, p := range pairs {
kv := strings.SplitN(strings.TrimSpace(p), "=", 2)
if len(kv) == 2 {
m[strings.TrimSpace(kv[0])] = strings.TrimSpace(kv[1])
}
}
return m
}
// parseResourceAttributes parses OTEL_RESOURCE_ATTRIBUTES formatted as k=v,k2=v2
func parseResourceAttributes(s string) map[string]string {
m := map[string]string{}
if s == "" {
return m
}
parts := strings.Split(s, ",")
for _, p := range parts {
kv := strings.SplitN(strings.TrimSpace(p), "=", 2)
if len(kv) == 2 {
m[strings.TrimSpace(kv[0])] = strings.TrimSpace(kv[1])
}
}
return m
}
// Global site/region used to enrich metric labels.
var siteIDVal atomic.Value
var regionVal atomic.Value
var (
includeTunnelIDVal atomic.Value // bool; default true
includeSiteLabelVal atomic.Value // bool; default false
)
// UpdateSiteInfo updates the global site_id and region used for metric labels.
// Thread-safe via atomic.Value: subsequent metric emissions will include
// the new labels, prior emissions remain unchanged.
func UpdateSiteInfo(siteID, region string) {
if siteID != "" {
siteIDVal.Store(siteID)
}
if region != "" {
regionVal.Store(region)
}
}
func getSiteID() string {
if v, ok := siteIDVal.Load().(string); ok {
return v
}
return ""
}
func getRegion() string {
if v, ok := regionVal.Load().(string); ok {
return v
}
return ""
}
// siteAttrs returns label KVs for site_id and region (if set).
func siteAttrs() []attribute.KeyValue {
var out []attribute.KeyValue
if s := getSiteID(); s != "" {
out = append(out, attribute.String("site_id", s))
}
if r := getRegion(); r != "" {
out = append(out, attribute.String("region", r))
}
return out
}
// SiteLabelKVs exposes site label KVs for other packages (e.g., proxy manager).
func SiteLabelKVs() []attribute.KeyValue {
if !ShouldIncludeSiteLabels() {
return nil
}
return siteAttrs()
}
// ShouldIncludeTunnelID returns whether tunnel_id labels should be emitted.
func ShouldIncludeTunnelID() bool {
if v, ok := includeTunnelIDVal.Load().(bool); ok {
return v
}
return true
}
// ShouldIncludeSiteLabels returns whether site_id/region should be emitted as
// metric labels in addition to resource attributes.
func ShouldIncludeSiteLabels() bool {
if v, ok := includeSiteLabelVal.Load().(bool); ok {
return v
}
return false
}
func getenv(k, d string) string {
if v := os.Getenv(k); v != "" {
return v
}
return d
}
func getdur(k string, d time.Duration) time.Duration {
if v := os.Getenv(k); v != "" {
if p, e := time.ParseDuration(v); e == nil {
return p
}
}
return d
}

View File

@@ -0,0 +1,53 @@
package telemetry
import (
"context"
"io"
"net/http"
"net/http/httptest"
"strings"
"testing"
"time"
"go.opentelemetry.io/otel/attribute"
)
// Test that disallowed attributes are filtered from the exposition.
func TestAttributeFilterDropsUnknownKeys(t *testing.T) {
ctx := context.Background()
resetMetricsForTest()
t.Setenv("NEWT_METRICS_INCLUDE_SITE_LABELS", "true")
cfg := Config{ServiceName: "newt", PromEnabled: true, AdminAddr: "127.0.0.1:0"}
tel, err := Init(ctx, cfg)
if err != nil {
t.Fatalf("init: %v", err)
}
defer func() { _ = tel.Shutdown(context.Background()) }()
if tel.PrometheusHandler == nil {
t.Fatalf("prom handler nil")
}
ts := httptest.NewServer(tel.PrometheusHandler)
defer ts.Close()
// Add samples with disallowed attribute keys
for _, k := range []string{"forbidden", "site_id", "host"} {
set := attribute.NewSet(attribute.String(k, "x"))
AddTunnelBytesSet(ctx, 123, set)
}
time.Sleep(50 * time.Millisecond)
resp, err := http.Get(ts.URL)
if err != nil {
t.Fatalf("GET: %v", err)
}
defer resp.Body.Close()
b, _ := io.ReadAll(resp.Body)
body := string(b)
if strings.Contains(body, "forbidden=") {
t.Fatalf("unexpected forbidden attribute leaked into metrics: %s", body)
}
if !strings.Contains(body, "site_id=\"x\"") {
t.Fatalf("expected allowed attribute site_id to be present in metrics, got: %s", body)
}
}

View File

@@ -0,0 +1,76 @@
package telemetry
import (
"bufio"
"context"
"io"
"net/http"
"net/http/httptest"
"os"
"path/filepath"
"strings"
"testing"
"time"
)
// Golden test that /metrics contains expected metric names.
func TestMetricsGoldenContains(t *testing.T) {
ctx := context.Background()
resetMetricsForTest()
t.Setenv("NEWT_METRICS_INCLUDE_SITE_LABELS", "true")
cfg := Config{ServiceName: "newt", PromEnabled: true, AdminAddr: "127.0.0.1:0", BuildVersion: "test"}
tel, err := Init(ctx, cfg)
if err != nil {
t.Fatalf("telemetry init error: %v", err)
}
defer func() { _ = tel.Shutdown(context.Background()) }()
if tel.PrometheusHandler == nil {
t.Fatalf("prom handler nil")
}
ts := httptest.NewServer(tel.PrometheusHandler)
defer ts.Close()
// Trigger counters to ensure they appear in the scrape
IncConnAttempt(ctx, "websocket", "success")
IncWSReconnect(ctx, "io_error")
IncProxyConnectionEvent(ctx, "", "tcp", ProxyConnectionOpened)
if tel.MeterProvider != nil {
_ = tel.MeterProvider.ForceFlush(ctx)
}
time.Sleep(100 * time.Millisecond)
var body string
for i := 0; i < 5; i++ {
resp, err := http.Get(ts.URL)
if err != nil {
t.Fatalf("GET metrics failed: %v", err)
}
b, _ := io.ReadAll(resp.Body)
_ = resp.Body.Close()
body = string(b)
if strings.Contains(body, "newt_connection_attempts_total") {
break
}
time.Sleep(100 * time.Millisecond)
}
f, err := os.Open(filepath.Join("testdata", "expected_contains.golden"))
if err != nil {
t.Fatalf("read golden: %v", err)
}
defer f.Close()
s := bufio.NewScanner(f)
for s.Scan() {
needle := strings.TrimSpace(s.Text())
if needle == "" {
continue
}
if !strings.Contains(body, needle) {
t.Fatalf("expected metrics body to contain %q. body=\n%s", needle, body)
}
}
if err := s.Err(); err != nil {
t.Fatalf("scan golden: %v", err)
}
}

View File

@@ -0,0 +1,65 @@
package telemetry
import (
"context"
"io"
"net/http"
"net/http/httptest"
"strings"
"testing"
"time"
)
// Smoke test that /metrics contains at least one newt_* metric when Prom exporter is enabled.
func TestMetricsSmoke(t *testing.T) {
ctx := context.Background()
resetMetricsForTest()
t.Setenv("NEWT_METRICS_INCLUDE_SITE_LABELS", "true")
cfg := Config{
ServiceName: "newt",
PromEnabled: true,
OTLPEnabled: false,
AdminAddr: "127.0.0.1:0",
BuildVersion: "test",
BuildCommit: "deadbeef",
MetricExportInterval: 5 * time.Second,
}
tel, err := Init(ctx, cfg)
if err != nil {
t.Fatalf("telemetry init error: %v", err)
}
defer func() { _ = tel.Shutdown(context.Background()) }()
// Serve the Prom handler on a test server
if tel.PrometheusHandler == nil {
t.Fatalf("Prometheus handler nil; PromEnabled should enable it")
}
ts := httptest.NewServer(tel.PrometheusHandler)
defer ts.Close()
// Record a simple metric and then fetch /metrics
IncConnAttempt(ctx, "websocket", "success")
if tel.MeterProvider != nil {
_ = tel.MeterProvider.ForceFlush(ctx)
}
// Give the exporter a tick to collect
time.Sleep(100 * time.Millisecond)
var body string
for i := 0; i < 5; i++ {
resp, err := http.Get(ts.URL)
if err != nil {
t.Fatalf("GET /metrics failed: %v", err)
}
b, _ := io.ReadAll(resp.Body)
_ = resp.Body.Close()
body = string(b)
if strings.Contains(body, "newt_connection_attempts_total") {
break
}
time.Sleep(100 * time.Millisecond)
}
if !strings.Contains(body, "newt_connection_attempts_total") {
t.Fatalf("expected newt_connection_attempts_total in metrics, got:\n%s", body)
}
}

203
main.go
View File

@@ -1,7 +1,9 @@
package main package main
import ( import (
"context"
"encoding/json" "encoding/json"
"errors"
"flag" "flag"
"fmt" "fmt"
"net" "net"
@@ -22,6 +24,9 @@ import (
"github.com/fosrl/newt/updates" "github.com/fosrl/newt/updates"
"github.com/fosrl/newt/websocket" "github.com/fosrl/newt/websocket"
"github.com/fosrl/newt/internal/state"
"github.com/fosrl/newt/internal/telemetry"
"go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp"
"golang.zx2c4.com/wireguard/conn" "golang.zx2c4.com/wireguard/conn"
"golang.zx2c4.com/wireguard/device" "golang.zx2c4.com/wireguard/device"
"golang.zx2c4.com/wireguard/tun" "golang.zx2c4.com/wireguard/tun"
@@ -91,6 +96,14 @@ func (s *stringSlice) Set(value string) error {
return nil return nil
} }
const (
fmtErrMarshaling = "Error marshaling data: %v"
fmtReceivedMsg = "Received: %+v"
topicWGRegister = "newt/wg/register"
msgNoTunnelOrProxy = "No tunnel IP or proxy manager available"
fmtErrParsingTargetData = "Error parsing target data: %v"
)
var ( var (
endpoint string endpoint string
id string id string
@@ -120,6 +133,15 @@ var (
preferEndpoint string preferEndpoint string
healthMonitor *healthcheck.Monitor healthMonitor *healthcheck.Monitor
enforceHealthcheckCert bool enforceHealthcheckCert bool
// Build/version (can be overridden via -ldflags "-X main.newtVersion=...")
newtVersion = "version_replaceme"
// Observability/metrics flags
metricsEnabled bool
otlpEnabled bool
adminAddr string
region string
metricsAsyncBytes bool
blueprintFile string blueprintFile string
noCloud bool noCloud bool
@@ -133,6 +155,10 @@ var (
) )
func main() { func main() {
// Prepare context for graceful shutdown and signal handling
ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM)
defer stop()
// if PANGOLIN_ENDPOINT, NEWT_ID, and NEWT_SECRET are set as environment variables, they will be used as default values // if PANGOLIN_ENDPOINT, NEWT_ID, and NEWT_SECRET are set as environment variables, they will be used as default values
endpoint = os.Getenv("PANGOLIN_ENDPOINT") endpoint = os.Getenv("PANGOLIN_ENDPOINT")
id = os.Getenv("NEWT_ID") id = os.Getenv("NEWT_ID")
@@ -143,6 +169,14 @@ func main() {
updownScript = os.Getenv("UPDOWN_SCRIPT") updownScript = os.Getenv("UPDOWN_SCRIPT")
interfaceName = os.Getenv("INTERFACE") interfaceName = os.Getenv("INTERFACE")
generateAndSaveKeyTo = os.Getenv("GENERATE_AND_SAVE_KEY_TO") generateAndSaveKeyTo = os.Getenv("GENERATE_AND_SAVE_KEY_TO")
// Metrics/observability env mirrors
metricsEnabledEnv := os.Getenv("NEWT_METRICS_PROMETHEUS_ENABLED")
otlpEnabledEnv := os.Getenv("NEWT_METRICS_OTLP_ENABLED")
adminAddrEnv := os.Getenv("NEWT_ADMIN_ADDR")
regionEnv := os.Getenv("NEWT_REGION")
asyncBytesEnv := os.Getenv("NEWT_METRICS_ASYNC_BYTES")
keepInterfaceEnv := os.Getenv("KEEP_INTERFACE") keepInterfaceEnv := os.Getenv("KEEP_INTERFACE")
keepInterface = keepInterfaceEnv == "true" keepInterface = keepInterfaceEnv == "true"
acceptClientsEnv := os.Getenv("ACCEPT_CLIENTS") acceptClientsEnv := os.Getenv("ACCEPT_CLIENTS")
@@ -286,6 +320,43 @@ func main() {
flag.BoolVar(&noCloud, "no-cloud", false, "Disable cloud failover") flag.BoolVar(&noCloud, "no-cloud", false, "Disable cloud failover")
} }
// Metrics/observability flags (mirror ENV if unset)
if metricsEnabledEnv == "" {
flag.BoolVar(&metricsEnabled, "metrics", true, "Enable Prometheus /metrics exporter")
} else {
if v, err := strconv.ParseBool(metricsEnabledEnv); err == nil {
metricsEnabled = v
} else {
metricsEnabled = true
}
}
if otlpEnabledEnv == "" {
flag.BoolVar(&otlpEnabled, "otlp", false, "Enable OTLP exporters (metrics/traces) to OTEL_EXPORTER_OTLP_ENDPOINT")
} else {
if v, err := strconv.ParseBool(otlpEnabledEnv); err == nil {
otlpEnabled = v
}
}
if adminAddrEnv == "" {
flag.StringVar(&adminAddr, "metrics-admin-addr", "127.0.0.1:2112", "Admin/metrics bind address")
} else {
adminAddr = adminAddrEnv
}
// Async bytes toggle
if asyncBytesEnv == "" {
flag.BoolVar(&metricsAsyncBytes, "metrics-async-bytes", false, "Enable async bytes counting (background flush; lower hot path overhead)")
} else {
if v, err := strconv.ParseBool(asyncBytesEnv); err == nil {
metricsAsyncBytes = v
}
}
// Optional region flag (resource attribute)
if regionEnv == "" {
flag.StringVar(&region, "region", "", "Optional region resource attribute (also NEWT_REGION)")
} else {
region = regionEnv
}
// do a --version check // do a --version check
version := flag.Bool("version", false, "Print the version") version := flag.Bool("version", false, "Print the version")
@@ -300,12 +371,58 @@ func main() {
loggerLevel := parseLogLevel(logLevel) loggerLevel := parseLogLevel(logLevel)
logger.GetLogger().SetLevel(parseLogLevel(logLevel)) logger.GetLogger().SetLevel(parseLogLevel(logLevel))
newtVersion := "version_replaceme" // Initialize telemetry after flags are parsed (so flags override env)
tcfg := telemetry.FromEnv()
tcfg.PromEnabled = metricsEnabled
tcfg.OTLPEnabled = otlpEnabled
if adminAddr != "" {
tcfg.AdminAddr = adminAddr
}
// Resource attributes (if available)
tcfg.SiteID = id
tcfg.Region = region
// Build info
tcfg.BuildVersion = newtVersion
tcfg.BuildCommit = os.Getenv("NEWT_COMMIT")
tel, telErr := telemetry.Init(ctx, tcfg)
if telErr != nil {
logger.Warn("Telemetry init failed: %v", telErr)
}
if tel != nil {
// Admin HTTP server (exposes /metrics when Prometheus exporter is enabled)
logger.Info("Starting metrics server on %s", tcfg.AdminAddr)
mux := http.NewServeMux()
mux.HandleFunc("/healthz", func(w http.ResponseWriter, r *http.Request) { w.WriteHeader(200) })
if tel.PrometheusHandler != nil {
mux.Handle("/metrics", tel.PrometheusHandler)
}
admin := &http.Server{
Addr: tcfg.AdminAddr,
Handler: otelhttp.NewHandler(mux, "newt-admin"),
ReadTimeout: 5 * time.Second,
WriteTimeout: 10 * time.Second,
ReadHeaderTimeout: 5 * time.Second,
IdleTimeout: 30 * time.Second,
}
go func() {
if err := admin.ListenAndServe(); err != nil && !errors.Is(err, http.ErrServerClosed) {
logger.Warn("admin http error: %v", err)
}
}()
defer func() {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
_ = admin.Shutdown(ctx)
}()
defer func() { _ = tel.Shutdown(context.Background()) }()
}
if *version { if *version {
fmt.Println("Newt version " + newtVersion) fmt.Println("Newt version " + newtVersion)
os.Exit(0) os.Exit(0)
} else { } else {
logger.Info("Newt version " + newtVersion) logger.Info("Newt version %s", newtVersion)
} }
if err := updates.CheckForUpdate("fosrl", "newt", newtVersion); err != nil { if err := updates.CheckForUpdate("fosrl", "newt", newtVersion); err != nil {
@@ -376,6 +493,8 @@ func main() {
} }
endpoint = client.GetConfig().Endpoint // Update endpoint from config endpoint = client.GetConfig().Endpoint // Update endpoint from config
id = client.GetConfig().ID // Update ID from config id = client.GetConfig().ID // Update ID from config
// Update site labels for metrics with the resolved ID
telemetry.UpdateSiteInfo(id, region)
// output env var values if set // output env var values if set
logger.Debug("Endpoint: %v", endpoint) logger.Debug("Endpoint: %v", endpoint)
@@ -484,6 +603,10 @@ func main() {
// Register handlers for different message types // Register handlers for different message types
client.RegisterHandler("newt/wg/connect", func(msg websocket.WSMessage) { client.RegisterHandler("newt/wg/connect", func(msg websocket.WSMessage) {
logger.Debug("Received registration message") logger.Debug("Received registration message")
regResult := "success"
defer func() {
telemetry.IncSiteRegistration(ctx, regResult)
}()
if stopFunc != nil { if stopFunc != nil {
stopFunc() // stop the ws from sending more requests stopFunc() // stop the ws from sending more requests
stopFunc = nil // reset stopFunc to nil to avoid double stopping stopFunc = nil // reset stopFunc to nil to avoid double stopping
@@ -502,22 +625,25 @@ func main() {
jsonData, err := json.Marshal(msg.Data) jsonData, err := json.Marshal(msg.Data)
if err != nil { if err != nil {
logger.Info("Error marshaling data: %v", err) logger.Info(fmtErrMarshaling, err)
regResult = "failure"
return return
} }
if err := json.Unmarshal(jsonData, &wgData); err != nil { if err := json.Unmarshal(jsonData, &wgData); err != nil {
logger.Info("Error unmarshaling target data: %v", err) logger.Info("Error unmarshaling target data: %v", err)
regResult = "failure"
return return
} }
logger.Debug("Received: %+v", msg) logger.Debug(fmtReceivedMsg, msg)
tun, tnet, err = netstack.CreateNetTUN( tun, tnet, err = netstack.CreateNetTUN(
[]netip.Addr{netip.MustParseAddr(wgData.TunnelIP)}, []netip.Addr{netip.MustParseAddr(wgData.TunnelIP)},
[]netip.Addr{netip.MustParseAddr(dns)}, []netip.Addr{netip.MustParseAddr(dns)},
mtuInt) mtuInt)
if err != nil { if err != nil {
logger.Error("Failed to create TUN device: %v", err) logger.Error("Failed to create TUN device: %v", err)
regResult = "failure"
} }
setDownstreamTNetstack(tnet) setDownstreamTNetstack(tnet)
@@ -531,6 +657,7 @@ func main() {
host, _, err := net.SplitHostPort(wgData.Endpoint) host, _, err := net.SplitHostPort(wgData.Endpoint)
if err != nil { if err != nil {
logger.Error("Failed to split endpoint: %v", err) logger.Error("Failed to split endpoint: %v", err)
regResult = "failure"
return return
} }
@@ -539,6 +666,7 @@ func main() {
endpoint, err := resolveDomain(wgData.Endpoint) endpoint, err := resolveDomain(wgData.Endpoint)
if err != nil { if err != nil {
logger.Error("Failed to resolve endpoint: %v", err) logger.Error("Failed to resolve endpoint: %v", err)
regResult = "failure"
return return
} }
@@ -554,12 +682,14 @@ persistent_keepalive_interval=5`, fixKey(privateKey.String()), fixKey(wgData.Pub
err = dev.IpcSet(config) err = dev.IpcSet(config)
if err != nil { if err != nil {
logger.Error("Failed to configure WireGuard device: %v", err) logger.Error("Failed to configure WireGuard device: %v", err)
regResult = "failure"
} }
// Bring up the device // Bring up the device
err = dev.Up() err = dev.Up()
if err != nil { if err != nil {
logger.Error("Failed to bring up WireGuard device: %v", err) logger.Error("Failed to bring up WireGuard device: %v", err)
regResult = "failure"
} }
logger.Debug("WireGuard device created. Lets ping the server now...") logger.Debug("WireGuard device created. Lets ping the server now...")
@@ -572,9 +702,13 @@ persistent_keepalive_interval=5`, fixKey(privateKey.String()), fixKey(wgData.Pub
} }
// Use reliable ping for initial connection test // Use reliable ping for initial connection test
logger.Debug("Testing initial connection with reliable ping...") logger.Debug("Testing initial connection with reliable ping...")
_, err = reliablePing(tnet, wgData.ServerIP, pingTimeout, 5) lat, err := reliablePing(tnet, wgData.ServerIP, pingTimeout, 5)
if err == nil && wgData.PublicKey != "" {
telemetry.ObserveTunnelLatency(ctx, wgData.PublicKey, "wireguard", lat.Seconds())
}
if err != nil { if err != nil {
logger.Warn("Initial reliable ping failed, but continuing: %v", err) logger.Warn("Initial reliable ping failed, but continuing: %v", err)
regResult = "failure"
} else { } else {
logger.Debug("Initial connection test successful") logger.Debug("Initial connection test successful")
} }
@@ -585,11 +719,14 @@ persistent_keepalive_interval=5`, fixKey(privateKey.String()), fixKey(wgData.Pub
// as the pings will continue in the background // as the pings will continue in the background
if !connected { if !connected {
logger.Debug("Starting ping check") logger.Debug("Starting ping check")
pingStopChan = startPingCheck(tnet, wgData.ServerIP, client) pingStopChan = startPingCheck(tnet, wgData.ServerIP, client, wgData.PublicKey)
} }
// Create proxy manager // Create proxy manager
pm = proxy.NewProxyManager(tnet) pm = proxy.NewProxyManager(tnet)
pm.SetAsyncBytes(metricsAsyncBytes)
// Set tunnel_id for metrics (WireGuard peer public key)
pm.SetTunnelID(wgData.PublicKey)
connected = true connected = true
@@ -626,10 +763,19 @@ persistent_keepalive_interval=5`, fixKey(privateKey.String()), fixKey(wgData.Pub
client.RegisterHandler("newt/wg/reconnect", func(msg websocket.WSMessage) { client.RegisterHandler("newt/wg/reconnect", func(msg websocket.WSMessage) {
logger.Info("Received reconnect message") logger.Info("Received reconnect message")
if wgData.PublicKey != "" {
telemetry.IncReconnect(ctx, wgData.PublicKey, "server", telemetry.ReasonServerRequest)
}
// Close the WireGuard device and TUN // Close the WireGuard device and TUN
closeWgTunnel() closeWgTunnel()
// Clear metrics attrs and sessions for the tunnel
if pm != nil {
pm.ClearTunnelID()
state.Global().ClearTunnel(wgData.PublicKey)
}
// Mark as disconnected // Mark as disconnected
connected = false connected = false
@@ -648,6 +794,9 @@ persistent_keepalive_interval=5`, fixKey(privateKey.String()), fixKey(wgData.Pub
client.RegisterHandler("newt/wg/terminate", func(msg websocket.WSMessage) { client.RegisterHandler("newt/wg/terminate", func(msg websocket.WSMessage) {
logger.Info("Received termination message") logger.Info("Received termination message")
if wgData.PublicKey != "" {
telemetry.IncReconnect(ctx, wgData.PublicKey, "server", telemetry.ReasonServerRequest)
}
// Close the WireGuard device and TUN // Close the WireGuard device and TUN
closeWgTunnel() closeWgTunnel()
@@ -675,7 +824,7 @@ persistent_keepalive_interval=5`, fixKey(privateKey.String()), fixKey(wgData.Pub
jsonData, err := json.Marshal(msg.Data) jsonData, err := json.Marshal(msg.Data)
if err != nil { if err != nil {
logger.Info("Error marshaling data: %v", err) logger.Info(fmtErrMarshaling, err)
return return
} }
if err := json.Unmarshal(jsonData, &exitNodeData); err != nil { if err := json.Unmarshal(jsonData, &exitNodeData); err != nil {
@@ -716,7 +865,7 @@ persistent_keepalive_interval=5`, fixKey(privateKey.String()), fixKey(wgData.Pub
}, },
} }
stopFunc = client.SendMessageInterval("newt/wg/register", map[string]interface{}{ stopFunc = client.SendMessageInterval(topicWGRegister, map[string]interface{}{
"publicKey": publicKey.String(), "publicKey": publicKey.String(),
"pingResults": pingResults, "pingResults": pingResults,
"newtVersion": newtVersion, "newtVersion": newtVersion,
@@ -819,7 +968,7 @@ persistent_keepalive_interval=5`, fixKey(privateKey.String()), fixKey(wgData.Pub
} }
// Send the ping results to the cloud for selection // Send the ping results to the cloud for selection
stopFunc = client.SendMessageInterval("newt/wg/register", map[string]interface{}{ stopFunc = client.SendMessageInterval(topicWGRegister, map[string]interface{}{
"publicKey": publicKey.String(), "publicKey": publicKey.String(),
"pingResults": pingResults, "pingResults": pingResults,
"newtVersion": newtVersion, "newtVersion": newtVersion,
@@ -829,17 +978,17 @@ persistent_keepalive_interval=5`, fixKey(privateKey.String()), fixKey(wgData.Pub
}) })
client.RegisterHandler("newt/tcp/add", func(msg websocket.WSMessage) { client.RegisterHandler("newt/tcp/add", func(msg websocket.WSMessage) {
logger.Debug("Received: %+v", msg) logger.Debug(fmtReceivedMsg, msg)
// if there is no wgData or pm, we can't add targets // if there is no wgData or pm, we can't add targets
if wgData.TunnelIP == "" || pm == nil { if wgData.TunnelIP == "" || pm == nil {
logger.Info("No tunnel IP or proxy manager available") logger.Info(msgNoTunnelOrProxy)
return return
} }
targetData, err := parseTargetData(msg.Data) targetData, err := parseTargetData(msg.Data)
if err != nil { if err != nil {
logger.Info("Error parsing target data: %v", err) logger.Info(fmtErrParsingTargetData, err)
return return
} }
@@ -854,17 +1003,17 @@ persistent_keepalive_interval=5`, fixKey(privateKey.String()), fixKey(wgData.Pub
}) })
client.RegisterHandler("newt/udp/add", func(msg websocket.WSMessage) { client.RegisterHandler("newt/udp/add", func(msg websocket.WSMessage) {
logger.Info("Received: %+v", msg) logger.Info(fmtReceivedMsg, msg)
// if there is no wgData or pm, we can't add targets // if there is no wgData or pm, we can't add targets
if wgData.TunnelIP == "" || pm == nil { if wgData.TunnelIP == "" || pm == nil {
logger.Info("No tunnel IP or proxy manager available") logger.Info(msgNoTunnelOrProxy)
return return
} }
targetData, err := parseTargetData(msg.Data) targetData, err := parseTargetData(msg.Data)
if err != nil { if err != nil {
logger.Info("Error parsing target data: %v", err) logger.Info(fmtErrParsingTargetData, err)
return return
} }
@@ -879,17 +1028,17 @@ persistent_keepalive_interval=5`, fixKey(privateKey.String()), fixKey(wgData.Pub
}) })
client.RegisterHandler("newt/udp/remove", func(msg websocket.WSMessage) { client.RegisterHandler("newt/udp/remove", func(msg websocket.WSMessage) {
logger.Info("Received: %+v", msg) logger.Info(fmtReceivedMsg, msg)
// if there is no wgData or pm, we can't add targets // if there is no wgData or pm, we can't add targets
if wgData.TunnelIP == "" || pm == nil { if wgData.TunnelIP == "" || pm == nil {
logger.Info("No tunnel IP or proxy manager available") logger.Info(msgNoTunnelOrProxy)
return return
} }
targetData, err := parseTargetData(msg.Data) targetData, err := parseTargetData(msg.Data)
if err != nil { if err != nil {
logger.Info("Error parsing target data: %v", err) logger.Info(fmtErrParsingTargetData, err)
return return
} }
@@ -904,17 +1053,17 @@ persistent_keepalive_interval=5`, fixKey(privateKey.String()), fixKey(wgData.Pub
}) })
client.RegisterHandler("newt/tcp/remove", func(msg websocket.WSMessage) { client.RegisterHandler("newt/tcp/remove", func(msg websocket.WSMessage) {
logger.Info("Received: %+v", msg) logger.Info(fmtReceivedMsg, msg)
// if there is no wgData or pm, we can't add targets // if there is no wgData or pm, we can't add targets
if wgData.TunnelIP == "" || pm == nil { if wgData.TunnelIP == "" || pm == nil {
logger.Info("No tunnel IP or proxy manager available") logger.Info(msgNoTunnelOrProxy)
return return
} }
targetData, err := parseTargetData(msg.Data) targetData, err := parseTargetData(msg.Data)
if err != nil { if err != nil {
logger.Info("Error parsing target data: %v", err) logger.Info(fmtErrParsingTargetData, err)
return return
} }
@@ -998,7 +1147,7 @@ persistent_keepalive_interval=5`, fixKey(privateKey.String()), fixKey(wgData.Pub
jsonData, err := json.Marshal(msg.Data) jsonData, err := json.Marshal(msg.Data)
if err != nil { if err != nil {
logger.Info("Error marshaling data: %v", err) logger.Info(fmtErrMarshaling, err)
return return
} }
if err := json.Unmarshal(jsonData, &sshPublicKeyData); err != nil { if err := json.Unmarshal(jsonData, &sshPublicKeyData); err != nil {
@@ -1155,9 +1304,9 @@ persistent_keepalive_interval=5`, fixKey(privateKey.String()), fixKey(wgData.Pub
} }
if err := healthMonitor.EnableTarget(requestData.ID); err != nil { if err := healthMonitor.EnableTarget(requestData.ID); err != nil {
logger.Error("Failed to enable health check target %s: %v", requestData.ID, err) logger.Error("Failed to enable health check target %d: %v", requestData.ID, err)
} else { } else {
logger.Info("Enabled health check target: %s", requestData.ID) logger.Info("Enabled health check target: %d", requestData.ID)
} }
}) })
@@ -1180,9 +1329,9 @@ persistent_keepalive_interval=5`, fixKey(privateKey.String()), fixKey(wgData.Pub
} }
if err := healthMonitor.DisableTarget(requestData.ID); err != nil { if err := healthMonitor.DisableTarget(requestData.ID); err != nil {
logger.Error("Failed to disable health check target %s: %v", requestData.ID, err) logger.Error("Failed to disable health check target %d: %v", requestData.ID, err)
} else { } else {
logger.Info("Disabled health check target: %s", requestData.ID) logger.Info("Disabled health check target: %d", requestData.ID)
} }
}) })
@@ -1252,7 +1401,7 @@ persistent_keepalive_interval=5`, fixKey(privateKey.String()), fixKey(wgData.Pub
} }
// Send registration message to the server for backward compatibility // Send registration message to the server for backward compatibility
err := client.SendMessage("newt/wg/register", map[string]interface{}{ err := client.SendMessage(topicWGRegister, map[string]interface{}{
"publicKey": publicKey.String(), "publicKey": publicKey.String(),
"newtVersion": newtVersion, "newtVersion": newtVersion,
"backwardsCompatible": true, "backwardsCompatible": true,

View File

@@ -1,18 +1,28 @@
package proxy package proxy
import ( import (
"context"
"errors"
"fmt" "fmt"
"io" "io"
"net" "net"
"os"
"strings" "strings"
"sync" "sync"
"sync/atomic"
"time" "time"
"github.com/fosrl/newt/internal/state"
"github.com/fosrl/newt/internal/telemetry"
"github.com/fosrl/newt/logger" "github.com/fosrl/newt/logger"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/metric"
"golang.zx2c4.com/wireguard/tun/netstack" "golang.zx2c4.com/wireguard/tun/netstack"
"gvisor.dev/gvisor/pkg/tcpip/adapters/gonet" "gvisor.dev/gvisor/pkg/tcpip/adapters/gonet"
) )
const errUnsupportedProtoFmt = "unsupported protocol: %s"
// Target represents a proxy target with its address and port // Target represents a proxy target with its address and port
type Target struct { type Target struct {
Address string Address string
@@ -28,6 +38,90 @@ type ProxyManager struct {
udpConns []*gonet.UDPConn udpConns []*gonet.UDPConn
running bool running bool
mutex sync.RWMutex mutex sync.RWMutex
// telemetry (multi-tunnel)
currentTunnelID string
tunnels map[string]*tunnelEntry
asyncBytes bool
flushStop chan struct{}
}
// tunnelEntry holds per-tunnel attributes and (optional) async counters.
type tunnelEntry struct {
attrInTCP attribute.Set
attrOutTCP attribute.Set
attrInUDP attribute.Set
attrOutUDP attribute.Set
bytesInTCP atomic.Uint64
bytesOutTCP atomic.Uint64
bytesInUDP atomic.Uint64
bytesOutUDP atomic.Uint64
activeTCP atomic.Int64
activeUDP atomic.Int64
}
// countingWriter wraps an io.Writer and adds bytes to OTel counter using a pre-built attribute set.
type countingWriter struct {
ctx context.Context
w io.Writer
set attribute.Set
pm *ProxyManager
ent *tunnelEntry
out bool // false=in, true=out
proto string // "tcp" or "udp"
}
func (cw *countingWriter) Write(p []byte) (int, error) {
n, err := cw.w.Write(p)
if n > 0 {
if cw.pm != nil && cw.pm.asyncBytes && cw.ent != nil {
switch cw.proto {
case "tcp":
if cw.out {
cw.ent.bytesOutTCP.Add(uint64(n))
} else {
cw.ent.bytesInTCP.Add(uint64(n))
}
case "udp":
if cw.out {
cw.ent.bytesOutUDP.Add(uint64(n))
} else {
cw.ent.bytesInUDP.Add(uint64(n))
}
}
} else {
telemetry.AddTunnelBytesSet(cw.ctx, int64(n), cw.set)
}
}
return n, err
}
func classifyProxyError(err error) string {
if err == nil {
return ""
}
if errors.Is(err, net.ErrClosed) {
return "closed"
}
if ne, ok := err.(net.Error); ok {
if ne.Timeout() {
return "timeout"
}
if ne.Temporary() {
return "temporary"
}
}
msg := strings.ToLower(err.Error())
switch {
case strings.Contains(msg, "refused"):
return "refused"
case strings.Contains(msg, "reset"):
return "reset"
default:
return "io_error"
}
} }
// NewProxyManager creates a new proxy manager instance // NewProxyManager creates a new proxy manager instance
@@ -38,9 +132,77 @@ func NewProxyManager(tnet *netstack.Net) *ProxyManager {
udpTargets: make(map[string]map[int]string), udpTargets: make(map[string]map[int]string),
listeners: make([]*gonet.TCPListener, 0), listeners: make([]*gonet.TCPListener, 0),
udpConns: make([]*gonet.UDPConn, 0), udpConns: make([]*gonet.UDPConn, 0),
tunnels: make(map[string]*tunnelEntry),
} }
} }
// SetTunnelID sets the WireGuard peer public key used as tunnel_id label.
func (pm *ProxyManager) SetTunnelID(id string) {
pm.mutex.Lock()
defer pm.mutex.Unlock()
pm.currentTunnelID = id
if _, ok := pm.tunnels[id]; !ok {
pm.tunnels[id] = &tunnelEntry{}
}
e := pm.tunnels[id]
// include site labels if available
site := telemetry.SiteLabelKVs()
build := func(base []attribute.KeyValue) attribute.Set {
if telemetry.ShouldIncludeTunnelID() {
base = append([]attribute.KeyValue{attribute.String("tunnel_id", id)}, base...)
}
base = append(site, base...)
return attribute.NewSet(base...)
}
e.attrInTCP = build([]attribute.KeyValue{
attribute.String("direction", "ingress"),
attribute.String("protocol", "tcp"),
})
e.attrOutTCP = build([]attribute.KeyValue{
attribute.String("direction", "egress"),
attribute.String("protocol", "tcp"),
})
e.attrInUDP = build([]attribute.KeyValue{
attribute.String("direction", "ingress"),
attribute.String("protocol", "udp"),
})
e.attrOutUDP = build([]attribute.KeyValue{
attribute.String("direction", "egress"),
attribute.String("protocol", "udp"),
})
}
// ClearTunnelID clears cached attribute sets for the current tunnel.
func (pm *ProxyManager) ClearTunnelID() {
pm.mutex.Lock()
defer pm.mutex.Unlock()
id := pm.currentTunnelID
if id == "" {
return
}
if e, ok := pm.tunnels[id]; ok {
// final flush for this tunnel
inTCP := e.bytesInTCP.Swap(0)
outTCP := e.bytesOutTCP.Swap(0)
inUDP := e.bytesInUDP.Swap(0)
outUDP := e.bytesOutUDP.Swap(0)
if inTCP > 0 {
telemetry.AddTunnelBytesSet(context.Background(), int64(inTCP), e.attrInTCP)
}
if outTCP > 0 {
telemetry.AddTunnelBytesSet(context.Background(), int64(outTCP), e.attrOutTCP)
}
if inUDP > 0 {
telemetry.AddTunnelBytesSet(context.Background(), int64(inUDP), e.attrInUDP)
}
if outUDP > 0 {
telemetry.AddTunnelBytesSet(context.Background(), int64(outUDP), e.attrOutUDP)
}
delete(pm.tunnels, id)
}
pm.currentTunnelID = ""
}
// init function without tnet // init function without tnet
func NewProxyManagerWithoutTNet() *ProxyManager { func NewProxyManagerWithoutTNet() *ProxyManager {
return &ProxyManager{ return &ProxyManager{
@@ -75,7 +237,7 @@ func (pm *ProxyManager) AddTarget(proto, listenIP string, port int, targetAddr s
} }
pm.udpTargets[listenIP][port] = targetAddr pm.udpTargets[listenIP][port] = targetAddr
default: default:
return fmt.Errorf("unsupported protocol: %s", proto) return fmt.Errorf(errUnsupportedProtoFmt, proto)
} }
if pm.running { if pm.running {
@@ -124,13 +286,28 @@ func (pm *ProxyManager) RemoveTarget(proto, listenIP string, port int) error {
return fmt.Errorf("target not found: %s:%d", listenIP, port) return fmt.Errorf("target not found: %s:%d", listenIP, port)
} }
default: default:
return fmt.Errorf("unsupported protocol: %s", proto) return fmt.Errorf(errUnsupportedProtoFmt, proto)
} }
return nil return nil
} }
// Start begins listening for all configured proxy targets // Start begins listening for all configured proxy targets
func (pm *ProxyManager) Start() error { func (pm *ProxyManager) Start() error {
// Register proxy observables once per process
telemetry.SetProxyObservableCallback(func(ctx context.Context, o metric.Observer) error {
pm.mutex.RLock()
defer pm.mutex.RUnlock()
for _, e := range pm.tunnels {
// active connections
telemetry.ObserveProxyActiveConnsObs(o, e.activeTCP.Load(), e.attrOutTCP.ToSlice())
telemetry.ObserveProxyActiveConnsObs(o, e.activeUDP.Load(), e.attrOutUDP.ToSlice())
// backlog bytes (sum of unflushed counters)
b := int64(e.bytesInTCP.Load() + e.bytesOutTCP.Load() + e.bytesInUDP.Load() + e.bytesOutUDP.Load())
telemetry.ObserveProxyAsyncBacklogObs(o, b, e.attrOutTCP.ToSlice())
telemetry.ObserveProxyBufferBytesObs(o, b, e.attrOutTCP.ToSlice())
}
return nil
})
pm.mutex.Lock() pm.mutex.Lock()
defer pm.mutex.Unlock() defer pm.mutex.Unlock()
@@ -160,6 +337,75 @@ func (pm *ProxyManager) Start() error {
return nil return nil
} }
func (pm *ProxyManager) SetAsyncBytes(b bool) {
pm.mutex.Lock()
defer pm.mutex.Unlock()
pm.asyncBytes = b
if b && pm.flushStop == nil {
pm.flushStop = make(chan struct{})
go pm.flushLoop()
}
}
func (pm *ProxyManager) flushLoop() {
flushInterval := 2 * time.Second
if v := os.Getenv("OTEL_METRIC_EXPORT_INTERVAL"); v != "" {
if d, err := time.ParseDuration(v); err == nil && d > 0 {
if d/2 < flushInterval {
flushInterval = d / 2
}
}
}
ticker := time.NewTicker(flushInterval)
defer ticker.Stop()
for {
select {
case <-ticker.C:
pm.mutex.RLock()
for _, e := range pm.tunnels {
inTCP := e.bytesInTCP.Swap(0)
outTCP := e.bytesOutTCP.Swap(0)
inUDP := e.bytesInUDP.Swap(0)
outUDP := e.bytesOutUDP.Swap(0)
if inTCP > 0 {
telemetry.AddTunnelBytesSet(context.Background(), int64(inTCP), e.attrInTCP)
}
if outTCP > 0 {
telemetry.AddTunnelBytesSet(context.Background(), int64(outTCP), e.attrOutTCP)
}
if inUDP > 0 {
telemetry.AddTunnelBytesSet(context.Background(), int64(inUDP), e.attrInUDP)
}
if outUDP > 0 {
telemetry.AddTunnelBytesSet(context.Background(), int64(outUDP), e.attrOutUDP)
}
}
pm.mutex.RUnlock()
case <-pm.flushStop:
pm.mutex.RLock()
for _, e := range pm.tunnels {
inTCP := e.bytesInTCP.Swap(0)
outTCP := e.bytesOutTCP.Swap(0)
inUDP := e.bytesInUDP.Swap(0)
outUDP := e.bytesOutUDP.Swap(0)
if inTCP > 0 {
telemetry.AddTunnelBytesSet(context.Background(), int64(inTCP), e.attrInTCP)
}
if outTCP > 0 {
telemetry.AddTunnelBytesSet(context.Background(), int64(outTCP), e.attrOutTCP)
}
if inUDP > 0 {
telemetry.AddTunnelBytesSet(context.Background(), int64(inUDP), e.attrInUDP)
}
if outUDP > 0 {
telemetry.AddTunnelBytesSet(context.Background(), int64(outUDP), e.attrOutUDP)
}
}
pm.mutex.RUnlock()
return
}
}
}
func (pm *ProxyManager) Stop() error { func (pm *ProxyManager) Stop() error {
pm.mutex.Lock() pm.mutex.Lock()
defer pm.mutex.Unlock() defer pm.mutex.Unlock()
@@ -227,7 +473,7 @@ func (pm *ProxyManager) startTarget(proto, listenIP string, port int, targetAddr
go pm.handleUDPProxy(conn, targetAddr) go pm.handleUDPProxy(conn, targetAddr)
default: default:
return fmt.Errorf("unsupported protocol: %s", proto) return fmt.Errorf(errUnsupportedProtoFmt, proto)
} }
logger.Info("Started %s proxy to %s", proto, targetAddr) logger.Info("Started %s proxy to %s", proto, targetAddr)
@@ -236,54 +482,84 @@ func (pm *ProxyManager) startTarget(proto, listenIP string, port int, targetAddr
return nil return nil
} }
// getEntry returns per-tunnel entry or nil.
func (pm *ProxyManager) getEntry(id string) *tunnelEntry {
pm.mutex.RLock()
e := pm.tunnels[id]
pm.mutex.RUnlock()
return e
}
func (pm *ProxyManager) handleTCPProxy(listener net.Listener, targetAddr string) { func (pm *ProxyManager) handleTCPProxy(listener net.Listener, targetAddr string) {
for { for {
conn, err := listener.Accept() conn, err := listener.Accept()
if err != nil { if err != nil {
// Check if we're shutting down or the listener was closed telemetry.IncProxyAccept(context.Background(), pm.currentTunnelID, "tcp", "failure", classifyProxyError(err))
if !pm.running { if !pm.running {
return return
} }
// Check for specific network errors that indicate the listener is closed
if ne, ok := err.(net.Error); ok && !ne.Temporary() { if ne, ok := err.(net.Error); ok && !ne.Temporary() {
logger.Info("TCP listener closed, stopping proxy handler for %v", listener.Addr()) logger.Info("TCP listener closed, stopping proxy handler for %v", listener.Addr())
return return
} }
logger.Error("Error accepting TCP connection: %v", err) logger.Error("Error accepting TCP connection: %v", err)
// Don't hammer the CPU if we hit a temporary error
time.Sleep(100 * time.Millisecond) time.Sleep(100 * time.Millisecond)
continue continue
} }
go func() { tunnelID := pm.currentTunnelID
telemetry.IncProxyAccept(context.Background(), tunnelID, "tcp", "success", "")
telemetry.IncProxyConnectionEvent(context.Background(), tunnelID, "tcp", telemetry.ProxyConnectionOpened)
if tunnelID != "" {
state.Global().IncSessions(tunnelID)
if e := pm.getEntry(tunnelID); e != nil {
e.activeTCP.Add(1)
}
}
go func(tunnelID string, accepted net.Conn) {
connStart := time.Now()
target, err := net.Dial("tcp", targetAddr) target, err := net.Dial("tcp", targetAddr)
if err != nil { if err != nil {
logger.Error("Error connecting to target: %v", err) logger.Error("Error connecting to target: %v", err)
conn.Close() accepted.Close()
telemetry.IncProxyAccept(context.Background(), tunnelID, "tcp", "failure", classifyProxyError(err))
telemetry.IncProxyConnectionEvent(context.Background(), tunnelID, "tcp", telemetry.ProxyConnectionClosed)
telemetry.ObserveProxyConnectionDuration(context.Background(), tunnelID, "tcp", "failure", time.Since(connStart).Seconds())
return return
} }
// Create a WaitGroup to ensure both copy operations complete entry := pm.getEntry(tunnelID)
if entry == nil {
entry = &tunnelEntry{}
}
var wg sync.WaitGroup var wg sync.WaitGroup
wg.Add(2) wg.Add(2)
go func() { go func(ent *tunnelEntry) {
defer wg.Done() defer wg.Done()
io.Copy(target, conn) cw := &countingWriter{ctx: context.Background(), w: target, set: ent.attrInTCP, pm: pm, ent: ent, out: false, proto: "tcp"}
target.Close() _, _ = io.Copy(cw, accepted)
}() _ = target.Close()
}(entry)
go func() { go func(ent *tunnelEntry) {
defer wg.Done() defer wg.Done()
io.Copy(conn, target) cw := &countingWriter{ctx: context.Background(), w: accepted, set: ent.attrOutTCP, pm: pm, ent: ent, out: true, proto: "tcp"}
conn.Close() _, _ = io.Copy(cw, target)
}() _ = accepted.Close()
}(entry)
// Wait for both copies to complete
wg.Wait() wg.Wait()
}() if tunnelID != "" {
state.Global().DecSessions(tunnelID)
if e := pm.getEntry(tunnelID); e != nil {
e.activeTCP.Add(-1)
}
}
telemetry.ObserveProxyConnectionDuration(context.Background(), tunnelID, "tcp", "success", time.Since(connStart).Seconds())
telemetry.IncProxyConnectionEvent(context.Background(), tunnelID, "tcp", telemetry.ProxyConnectionClosed)
}(tunnelID, conn)
} }
} }
@@ -326,6 +602,18 @@ func (pm *ProxyManager) handleUDPProxy(conn *gonet.UDPConn, targetAddr string) {
} }
clientKey := remoteAddr.String() clientKey := remoteAddr.String()
// bytes from client -> target (direction=in)
if pm.currentTunnelID != "" && n > 0 {
if pm.asyncBytes {
if e := pm.getEntry(pm.currentTunnelID); e != nil {
e.bytesInUDP.Add(uint64(n))
}
} else {
if e := pm.getEntry(pm.currentTunnelID); e != nil {
telemetry.AddTunnelBytesSet(context.Background(), int64(n), e.attrInUDP)
}
}
}
clientsMutex.RLock() clientsMutex.RLock()
targetConn, exists := clientConns[clientKey] targetConn, exists := clientConns[clientKey]
clientsMutex.RUnlock() clientsMutex.RUnlock()
@@ -334,28 +622,44 @@ func (pm *ProxyManager) handleUDPProxy(conn *gonet.UDPConn, targetAddr string) {
targetUDPAddr, err := net.ResolveUDPAddr("udp", targetAddr) targetUDPAddr, err := net.ResolveUDPAddr("udp", targetAddr)
if err != nil { if err != nil {
logger.Error("Error resolving target address: %v", err) logger.Error("Error resolving target address: %v", err)
telemetry.IncProxyAccept(context.Background(), pm.currentTunnelID, "udp", "failure", "resolve")
continue continue
} }
targetConn, err = net.DialUDP("udp", nil, targetUDPAddr) targetConn, err = net.DialUDP("udp", nil, targetUDPAddr)
if err != nil { if err != nil {
logger.Error("Error connecting to target: %v", err) logger.Error("Error connecting to target: %v", err)
telemetry.IncProxyAccept(context.Background(), pm.currentTunnelID, "udp", "failure", classifyProxyError(err))
continue continue
} }
tunnelID := pm.currentTunnelID
telemetry.IncProxyAccept(context.Background(), tunnelID, "udp", "success", "")
telemetry.IncProxyConnectionEvent(context.Background(), tunnelID, "udp", telemetry.ProxyConnectionOpened)
// Only increment activeUDP after a successful DialUDP
if e := pm.getEntry(tunnelID); e != nil {
e.activeUDP.Add(1)
}
clientsMutex.Lock() clientsMutex.Lock()
clientConns[clientKey] = targetConn clientConns[clientKey] = targetConn
clientsMutex.Unlock() clientsMutex.Unlock()
go func(clientKey string, targetConn *net.UDPConn, remoteAddr net.Addr) { go func(clientKey string, targetConn *net.UDPConn, remoteAddr net.Addr, tunnelID string) {
start := time.Now()
result := "success"
defer func() { defer func() {
// Always clean up when this goroutine exits // Always clean up when this goroutine exits
clientsMutex.Lock() clientsMutex.Lock()
if storedConn, exists := clientConns[clientKey]; exists && storedConn == targetConn { if storedConn, exists := clientConns[clientKey]; exists && storedConn == targetConn {
delete(clientConns, clientKey) delete(clientConns, clientKey)
targetConn.Close() targetConn.Close()
if e := pm.getEntry(tunnelID); e != nil {
e.activeUDP.Add(-1)
}
} }
clientsMutex.Unlock() clientsMutex.Unlock()
telemetry.ObserveProxyConnectionDuration(context.Background(), tunnelID, "udp", result, time.Since(start).Seconds())
telemetry.IncProxyConnectionEvent(context.Background(), tunnelID, "udp", telemetry.ProxyConnectionClosed)
}() }()
buffer := make([]byte, 65507) buffer := make([]byte, 65507)
@@ -363,25 +667,52 @@ func (pm *ProxyManager) handleUDPProxy(conn *gonet.UDPConn, targetAddr string) {
n, _, err := targetConn.ReadFromUDP(buffer) n, _, err := targetConn.ReadFromUDP(buffer)
if err != nil { if err != nil {
logger.Error("Error reading from target: %v", err) logger.Error("Error reading from target: %v", err)
result = "failure"
return // defer will handle cleanup return // defer will handle cleanup
} }
// bytes from target -> client (direction=out)
if pm.currentTunnelID != "" && n > 0 {
if pm.asyncBytes {
if e := pm.getEntry(pm.currentTunnelID); e != nil {
e.bytesOutUDP.Add(uint64(n))
}
} else {
if e := pm.getEntry(pm.currentTunnelID); e != nil {
telemetry.AddTunnelBytesSet(context.Background(), int64(n), e.attrOutUDP)
}
}
}
_, err = conn.WriteTo(buffer[:n], remoteAddr) _, err = conn.WriteTo(buffer[:n], remoteAddr)
if err != nil { if err != nil {
logger.Error("Error writing to client: %v", err) logger.Error("Error writing to client: %v", err)
telemetry.IncProxyDrops(context.Background(), pm.currentTunnelID, "udp")
result = "failure"
return // defer will handle cleanup return // defer will handle cleanup
} }
} }
}(clientKey, targetConn, remoteAddr) }(clientKey, targetConn, remoteAddr, tunnelID)
} }
_, err = targetConn.Write(buffer[:n]) written, err := targetConn.Write(buffer[:n])
if err != nil { if err != nil {
logger.Error("Error writing to target: %v", err) logger.Error("Error writing to target: %v", err)
telemetry.IncProxyDrops(context.Background(), pm.currentTunnelID, "udp")
targetConn.Close() targetConn.Close()
clientsMutex.Lock() clientsMutex.Lock()
delete(clientConns, clientKey) delete(clientConns, clientKey)
clientsMutex.Unlock() clientsMutex.Unlock()
} else if pm.currentTunnelID != "" && written > 0 {
if pm.asyncBytes {
if e := pm.getEntry(pm.currentTunnelID); e != nil {
e.bytesInUDP.Add(uint64(written))
}
} else {
if e := pm.getEntry(pm.currentTunnelID); e != nil {
telemetry.AddTunnelBytesSet(context.Background(), int64(written), e.attrInUDP)
}
}
} }
} }
} }

12
stub.go
View File

@@ -8,25 +8,27 @@ import (
) )
func setupClientsNative(client *websocket.Client, host string) { func setupClientsNative(client *websocket.Client, host string) {
return // This function is not implemented for non-Linux systems. _ = client
_ = host
// No-op for non-Linux systems
} }
func closeWgServiceNative() { func closeWgServiceNative() {
// No-op for non-Linux systems // No-op for non-Linux systems
return
} }
func clientsOnConnectNative() { func clientsOnConnectNative() {
// No-op for non-Linux systems // No-op for non-Linux systems
return
} }
func clientsHandleNewtConnectionNative(publicKey, endpoint string) { func clientsHandleNewtConnectionNative(publicKey, endpoint string) {
_ = publicKey
_ = endpoint
// No-op for non-Linux systems // No-op for non-Linux systems
return
} }
func clientsAddProxyTargetNative(pm *proxy.ProxyManager, tunnelIp string) { func clientsAddProxyTargetNative(pm *proxy.ProxyManager, tunnelIp string) {
_ = pm
_ = tunnelIp
// No-op for non-Linux systems // No-op for non-Linux systems
return
} }

28
util.go
View File

@@ -2,6 +2,7 @@ package main
import ( import (
"bytes" "bytes"
"context"
"encoding/base64" "encoding/base64"
"encoding/hex" "encoding/hex"
"encoding/json" "encoding/json"
@@ -14,6 +15,7 @@ import (
"math/rand" "math/rand"
"github.com/fosrl/newt/internal/telemetry"
"github.com/fosrl/newt/logger" "github.com/fosrl/newt/logger"
"github.com/fosrl/newt/proxy" "github.com/fosrl/newt/proxy"
"github.com/fosrl/newt/websocket" "github.com/fosrl/newt/websocket"
@@ -24,6 +26,8 @@ import (
"gopkg.in/yaml.v3" "gopkg.in/yaml.v3"
) )
const msgHealthFileWriteFailed = "Failed to write health file: %v"
func fixKey(key string) string { func fixKey(key string) string {
// Remove any whitespace // Remove any whitespace
key = strings.TrimSpace(key) key = strings.TrimSpace(key)
@@ -176,7 +180,7 @@ func pingWithRetry(tnet *netstack.Net, dst string, timeout time.Duration) (stopC
if healthFile != "" { if healthFile != "" {
err := os.WriteFile(healthFile, []byte("ok"), 0644) err := os.WriteFile(healthFile, []byte("ok"), 0644)
if err != nil { if err != nil {
logger.Warn("Failed to write health file: %v", err) logger.Warn(msgHealthFileWriteFailed, err)
} }
} }
return stopChan, nil return stopChan, nil
@@ -217,20 +221,22 @@ func pingWithRetry(tnet *netstack.Net, dst string, timeout time.Duration) (stopC
if healthFile != "" { if healthFile != "" {
err := os.WriteFile(healthFile, []byte("ok"), 0644) err := os.WriteFile(healthFile, []byte("ok"), 0644)
if err != nil { if err != nil {
logger.Warn("Failed to write health file: %v", err) logger.Warn(msgHealthFileWriteFailed, err)
} }
} }
}
case <-pingStopChan:
// Stop the goroutine when signaled
return return
} }
} }
}
}() }()
// Return an error for the first batch of attempts (to maintain compatibility with existing code) // Return an error for the first batch of attempts (to maintain compatibility with existing code)
return stopChan, fmt.Errorf("initial ping attempts failed, continuing in background") return stopChan, fmt.Errorf("initial ping attempts failed, continuing in background")
} }
func startPingCheck(tnet *netstack.Net, serverIP string, client *websocket.Client) chan struct{} { func startPingCheck(tnet *netstack.Net, serverIP string, client *websocket.Client, tunnelID string) chan struct{} {
maxInterval := 6 * time.Second maxInterval := 6 * time.Second
currentInterval := pingInterval currentInterval := pingInterval
consecutiveFailures := 0 consecutiveFailures := 0
@@ -293,6 +299,9 @@ func startPingCheck(tnet *netstack.Net, serverIP string, client *websocket.Clien
if !connectionLost { if !connectionLost {
connectionLost = true connectionLost = true
logger.Warn("Connection to server lost after %d failures. Continuous reconnection attempts will be made.", consecutiveFailures) logger.Warn("Connection to server lost after %d failures. Continuous reconnection attempts will be made.", consecutiveFailures)
if tunnelID != "" {
telemetry.IncReconnect(context.Background(), tunnelID, "client", telemetry.ReasonTimeout)
}
stopFunc = client.SendMessageInterval("newt/ping/request", map[string]interface{}{}, 3*time.Second) stopFunc = client.SendMessageInterval("newt/ping/request", map[string]interface{}{}, 3*time.Second)
// Send registration message to the server for backward compatibility // Send registration message to the server for backward compatibility
err := client.SendMessage("newt/wg/register", map[string]interface{}{ err := client.SendMessage("newt/wg/register", map[string]interface{}{
@@ -319,6 +328,10 @@ func startPingCheck(tnet *netstack.Net, serverIP string, client *websocket.Clien
} else { } else {
// Track recent latencies // Track recent latencies
recentLatencies = append(recentLatencies, latency) recentLatencies = append(recentLatencies, latency)
// Record tunnel latency (limit sampling to this periodic check)
if tunnelID != "" {
telemetry.ObserveTunnelLatency(context.Background(), tunnelID, "wireguard", latency.Seconds())
}
if len(recentLatencies) > 10 { if len(recentLatencies) > 10 {
recentLatencies = recentLatencies[1:] recentLatencies = recentLatencies[1:]
} }
@@ -468,7 +481,8 @@ func updateTargets(pm *proxy.ProxyManager, action string, tunnelIP string, proto
continue continue
} }
if action == "add" { switch action {
case "add":
target := parts[1] + ":" + parts[2] target := parts[1] + ":" + parts[2]
// Call updown script if provided // Call updown script if provided
@@ -494,7 +508,7 @@ func updateTargets(pm *proxy.ProxyManager, action string, tunnelIP string, proto
// Add the new target // Add the new target
pm.AddTarget(proto, tunnelIP, port, processedTarget) pm.AddTarget(proto, tunnelIP, port, processedTarget)
} else if action == "remove" { case "remove":
logger.Info("Removing target with port %d", port) logger.Info("Removing target with port %d", port)
target := parts[1] + ":" + parts[2] target := parts[1] + ":" + parts[2]
@@ -512,6 +526,8 @@ func updateTargets(pm *proxy.ProxyManager, action string, tunnelIP string, proto
logger.Error("Failed to remove target: %v", err) logger.Error("Failed to remove target: %v", err)
return err return err
} }
default:
logger.Info("Unknown action: %s", action)
} }
} }

View File

@@ -7,6 +7,7 @@ import (
"encoding/json" "encoding/json"
"fmt" "fmt"
"io" "io"
"net"
"net/http" "net/http"
"net/url" "net/url"
"os" "os"
@@ -18,6 +19,11 @@ import (
"github.com/fosrl/newt/logger" "github.com/fosrl/newt/logger"
"github.com/gorilla/websocket" "github.com/gorilla/websocket"
"context"
"github.com/fosrl/newt/internal/telemetry"
"go.opentelemetry.io/otel"
) )
type Client struct { type Client struct {
@@ -37,6 +43,8 @@ type Client struct {
writeMux sync.Mutex writeMux sync.Mutex
clientType string // Type of client (e.g., "newt", "olm") clientType string // Type of client (e.g., "newt", "olm")
tlsConfig TLSConfig tlsConfig TLSConfig
metricsCtxMu sync.RWMutex
metricsCtx context.Context
configNeedsSave bool // Flag to track if config needs to be saved configNeedsSave bool // Flag to track if config needs to be saved
} }
@@ -81,6 +89,26 @@ func (c *Client) OnTokenUpdate(callback func(token string)) {
c.onTokenUpdate = callback c.onTokenUpdate = callback
} }
func (c *Client) metricsContext() context.Context {
c.metricsCtxMu.RLock()
defer c.metricsCtxMu.RUnlock()
if c.metricsCtx != nil {
return c.metricsCtx
}
return context.Background()
}
func (c *Client) setMetricsContext(ctx context.Context) {
c.metricsCtxMu.Lock()
c.metricsCtx = ctx
c.metricsCtxMu.Unlock()
}
// MetricsContext exposes the context used for telemetry emission when a connection is active.
func (c *Client) MetricsContext() context.Context {
return c.metricsContext()
}
// NewClient creates a new websocket client // NewClient creates a new websocket client
func NewClient(clientType string, ID, secret string, endpoint string, pingInterval time.Duration, pingTimeout time.Duration, opts ...ClientOption) (*Client, error) { func NewClient(clientType string, ID, secret string, endpoint string, pingInterval time.Duration, pingTimeout time.Duration, opts ...ClientOption) (*Client, error) {
config := &Config{ config := &Config{
@@ -140,6 +168,7 @@ func (c *Client) Close() error {
// Set connection status to false // Set connection status to false
c.setConnected(false) c.setConnected(false)
telemetry.SetWSConnectionState(false)
// Close the WebSocket connection gracefully // Close the WebSocket connection gracefully
if c.conn != nil { if c.conn != nil {
@@ -170,7 +199,11 @@ func (c *Client) SendMessage(messageType string, data interface{}) error {
c.writeMux.Lock() c.writeMux.Lock()
defer c.writeMux.Unlock() defer c.writeMux.Unlock()
return c.conn.WriteJSON(msg) if err := c.conn.WriteJSON(msg); err != nil {
return err
}
telemetry.IncWSMessage(c.metricsContext(), "out", "text")
return nil
} }
func (c *Client) SendMessageInterval(messageType string, data interface{}, interval time.Duration) (stop func()) { func (c *Client) SendMessageInterval(messageType string, data interface{}, interval time.Duration) (stop func()) {
@@ -265,8 +298,12 @@ func (c *Client) getToken() (string, error) {
return "", fmt.Errorf("failed to marshal token request data: %w", err) return "", fmt.Errorf("failed to marshal token request data: %w", err)
} }
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
// Create a new request // Create a new request
req, err := http.NewRequest( req, err := http.NewRequestWithContext(
ctx,
"POST", "POST",
baseEndpoint+"/api/v1/auth/"+c.clientType+"/get-token", baseEndpoint+"/api/v1/auth/"+c.clientType+"/get-token",
bytes.NewBuffer(jsonData), bytes.NewBuffer(jsonData),
@@ -288,6 +325,8 @@ func (c *Client) getToken() (string, error) {
} }
resp, err := client.Do(req) resp, err := client.Do(req)
if err != nil { if err != nil {
telemetry.IncConnAttempt(ctx, "auth", "failure")
telemetry.IncConnError(ctx, "auth", classifyConnError(err))
return "", fmt.Errorf("failed to request new token: %w", err) return "", fmt.Errorf("failed to request new token: %w", err)
} }
defer resp.Body.Close() defer resp.Body.Close()
@@ -295,6 +334,16 @@ func (c *Client) getToken() (string, error) {
if resp.StatusCode != http.StatusOK { if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(resp.Body) body, _ := io.ReadAll(resp.Body)
logger.Error("Failed to get token with status code: %d, body: %s", resp.StatusCode, string(body)) logger.Error("Failed to get token with status code: %d, body: %s", resp.StatusCode, string(body))
telemetry.IncConnAttempt(ctx, "auth", "failure")
etype := "io_error"
if resp.StatusCode == http.StatusUnauthorized || resp.StatusCode == http.StatusForbidden {
etype = "auth_failed"
}
telemetry.IncConnError(ctx, "auth", etype)
// Reconnect reason mapping for auth failures
if resp.StatusCode == http.StatusUnauthorized || resp.StatusCode == http.StatusForbidden {
telemetry.IncReconnect(ctx, c.config.ID, "client", telemetry.ReasonAuthError)
}
return "", fmt.Errorf("failed to get token with status code: %d, body: %s", resp.StatusCode, string(body)) return "", fmt.Errorf("failed to get token with status code: %d, body: %s", resp.StatusCode, string(body))
} }
@@ -313,10 +362,55 @@ func (c *Client) getToken() (string, error) {
} }
logger.Debug("Received token: %s", tokenResp.Data.Token) logger.Debug("Received token: %s", tokenResp.Data.Token)
telemetry.IncConnAttempt(ctx, "auth", "success")
return tokenResp.Data.Token, nil return tokenResp.Data.Token, nil
} }
// classifyConnError maps to fixed, low-cardinality error_type values.
// Allowed enum: dial_timeout, tls_handshake, auth_failed, io_error
func classifyConnError(err error) string {
if err == nil {
return ""
}
msg := strings.ToLower(err.Error())
switch {
case strings.Contains(msg, "tls") || strings.Contains(msg, "certificate"):
return "tls_handshake"
case strings.Contains(msg, "timeout") || strings.Contains(msg, "i/o timeout") || strings.Contains(msg, "deadline exceeded"):
return "dial_timeout"
case strings.Contains(msg, "unauthorized") || strings.Contains(msg, "forbidden"):
return "auth_failed"
default:
// Group remaining network/socket errors as io_error to avoid label explosion
return "io_error"
}
}
func classifyWSDisconnect(err error) (result, reason string) {
if err == nil {
return "success", "normal"
}
if websocket.IsCloseError(err, websocket.CloseNormalClosure) {
return "success", "normal"
}
if ne, ok := err.(net.Error); ok && ne.Timeout() {
return "error", "timeout"
}
if websocket.IsUnexpectedCloseError(err, websocket.CloseGoingAway, websocket.CloseAbnormalClosure) {
return "error", "unexpected_close"
}
msg := strings.ToLower(err.Error())
switch {
case strings.Contains(msg, "eof"):
return "error", "eof"
case strings.Contains(msg, "reset"):
return "error", "connection_reset"
default:
return "error", "read_error"
}
}
func (c *Client) connectWithRetry() { func (c *Client) connectWithRetry() {
for { for {
select { select {
@@ -335,9 +429,13 @@ func (c *Client) connectWithRetry() {
} }
func (c *Client) establishConnection() error { func (c *Client) establishConnection() error {
ctx := context.Background()
// Get token for authentication // Get token for authentication
token, err := c.getToken() token, err := c.getToken()
if err != nil { if err != nil {
telemetry.IncConnAttempt(ctx, "websocket", "failure")
telemetry.IncConnError(ctx, "websocket", classifyConnError(err))
return fmt.Errorf("failed to get token: %w", err) return fmt.Errorf("failed to get token: %w", err)
} }
@@ -370,7 +468,12 @@ func (c *Client) establishConnection() error {
q.Set("clientType", c.clientType) q.Set("clientType", c.clientType)
u.RawQuery = q.Encode() u.RawQuery = q.Encode()
// Connect to WebSocket // Connect to WebSocket (optional span)
tr := otel.Tracer("newt")
ctx, span := tr.Start(ctx, "ws.connect")
defer span.End()
start := time.Now()
dialer := websocket.DefaultDialer dialer := websocket.DefaultDialer
// Use new TLS configuration method // Use new TLS configuration method
@@ -392,18 +495,42 @@ func (c *Client) establishConnection() error {
logger.Debug("WebSocket TLS certificate verification disabled via SKIP_TLS_VERIFY environment variable") logger.Debug("WebSocket TLS certificate verification disabled via SKIP_TLS_VERIFY environment variable")
} }
conn, _, err := dialer.Dial(u.String(), nil) conn, _, err := dialer.DialContext(ctx, u.String(), nil)
lat := time.Since(start).Seconds()
if err != nil { if err != nil {
telemetry.IncConnAttempt(ctx, "websocket", "failure")
etype := classifyConnError(err)
telemetry.IncConnError(ctx, "websocket", etype)
telemetry.ObserveWSConnectLatency(ctx, lat, "failure", etype)
// Map handshake-related errors to reconnect reasons where appropriate
if etype == "tls_handshake" {
telemetry.IncReconnect(ctx, c.config.ID, "client", telemetry.ReasonHandshakeError)
} else if etype == "dial_timeout" {
telemetry.IncReconnect(ctx, c.config.ID, "client", telemetry.ReasonTimeout)
} else {
telemetry.IncReconnect(ctx, c.config.ID, "client", telemetry.ReasonError)
}
telemetry.IncWSReconnect(ctx, etype)
return fmt.Errorf("failed to connect to WebSocket: %w", err) return fmt.Errorf("failed to connect to WebSocket: %w", err)
} }
telemetry.IncConnAttempt(ctx, "websocket", "success")
telemetry.ObserveWSConnectLatency(ctx, lat, "success", "")
c.conn = conn c.conn = conn
c.setConnected(true) c.setConnected(true)
telemetry.SetWSConnectionState(true)
c.setMetricsContext(ctx)
sessionStart := time.Now()
// Wire up pong handler for metrics
c.conn.SetPongHandler(func(appData string) error {
telemetry.IncWSMessage(c.metricsContext(), "in", "pong")
return nil
})
// Start the ping monitor // Start the ping monitor
go c.pingMonitor() go c.pingMonitor()
// Start the read pump with disconnect detection // Start the read pump with disconnect detection
go c.readPumpWithDisconnectDetection() go c.readPumpWithDisconnectDetection(sessionStart)
if c.onConnect != nil { if c.onConnect != nil {
err := c.saveConfig() err := c.saveConfig()
@@ -496,6 +623,9 @@ func (c *Client) pingMonitor() {
} }
c.writeMux.Lock() c.writeMux.Lock()
err := c.conn.WriteControl(websocket.PingMessage, []byte{}, time.Now().Add(c.pingTimeout)) err := c.conn.WriteControl(websocket.PingMessage, []byte{}, time.Now().Add(c.pingTimeout))
if err == nil {
telemetry.IncWSMessage(c.metricsContext(), "out", "ping")
}
c.writeMux.Unlock() c.writeMux.Unlock()
if err != nil { if err != nil {
// Check if we're shutting down before logging error and reconnecting // Check if we're shutting down before logging error and reconnecting
@@ -505,6 +635,8 @@ func (c *Client) pingMonitor() {
return return
default: default:
logger.Error("Ping failed: %v", err) logger.Error("Ping failed: %v", err)
telemetry.IncWSKeepaliveFailure(c.metricsContext(), "ping_write")
telemetry.IncWSReconnect(c.metricsContext(), "ping_write")
c.reconnect() c.reconnect()
return return
} }
@@ -514,17 +646,26 @@ func (c *Client) pingMonitor() {
} }
// readPumpWithDisconnectDetection reads messages and triggers reconnect on error // readPumpWithDisconnectDetection reads messages and triggers reconnect on error
func (c *Client) readPumpWithDisconnectDetection() { func (c *Client) readPumpWithDisconnectDetection(started time.Time) {
ctx := c.metricsContext()
disconnectReason := "shutdown"
disconnectResult := "success"
defer func() { defer func() {
if c.conn != nil { if c.conn != nil {
c.conn.Close() c.conn.Close()
} }
if !started.IsZero() {
telemetry.ObserveWSSessionDuration(ctx, time.Since(started).Seconds(), disconnectResult)
}
telemetry.IncWSDisconnect(ctx, disconnectReason, disconnectResult)
// Only attempt reconnect if we're not shutting down // Only attempt reconnect if we're not shutting down
select { select {
case <-c.done: case <-c.done:
// Shutting down, don't reconnect // Shutting down, don't reconnect
return return
default: default:
telemetry.IncWSReconnect(ctx, disconnectReason)
c.reconnect() c.reconnect()
} }
}() }()
@@ -532,24 +673,34 @@ func (c *Client) readPumpWithDisconnectDetection() {
for { for {
select { select {
case <-c.done: case <-c.done:
disconnectReason = "shutdown"
disconnectResult = "success"
return return
default: default:
var msg WSMessage var msg WSMessage
err := c.conn.ReadJSON(&msg) err := c.conn.ReadJSON(&msg)
if err == nil {
telemetry.IncWSMessage(c.metricsContext(), "in", "text")
}
if err != nil { if err != nil {
// Check if we're shutting down before logging error // Check if we're shutting down before logging error
select { select {
case <-c.done: case <-c.done:
// Expected during shutdown, don't log as error // Expected during shutdown, don't log as error
logger.Debug("WebSocket connection closed during shutdown") logger.Debug("WebSocket connection closed during shutdown")
disconnectReason = "shutdown"
disconnectResult = "success"
return return
default: default:
// Unexpected error during normal operation // Unexpected error during normal operation
disconnectResult, disconnectReason = classifyWSDisconnect(err)
if disconnectResult == "error" {
if websocket.IsUnexpectedCloseError(err, websocket.CloseGoingAway, websocket.CloseAbnormalClosure, websocket.CloseNormalClosure) { if websocket.IsUnexpectedCloseError(err, websocket.CloseGoingAway, websocket.CloseAbnormalClosure, websocket.CloseNormalClosure) {
logger.Error("WebSocket read error: %v", err) logger.Error("WebSocket read error: %v", err)
} else { } else {
logger.Debug("WebSocket connection closed: %v", err) logger.Debug("WebSocket connection closed: %v", err)
} }
}
return // triggers reconnect via defer return // triggers reconnect via defer
} }
} }
@@ -565,6 +716,7 @@ func (c *Client) readPumpWithDisconnectDetection() {
func (c *Client) reconnect() { func (c *Client) reconnect() {
c.setConnected(false) c.setConnected(false)
telemetry.SetWSConnectionState(false)
if c.conn != nil { if c.conn != nil {
c.conn.Close() c.conn.Close()
c.conn = nil c.conn = nil

View File

@@ -3,6 +3,7 @@
package wg package wg
import ( import (
"context"
"encoding/json" "encoding/json"
"errors" "errors"
"fmt" "fmt"
@@ -13,16 +14,19 @@ import (
"sync" "sync"
"time" "time"
"math/rand"
"github.com/fosrl/newt/logger" "github.com/fosrl/newt/logger"
"github.com/fosrl/newt/network" "github.com/fosrl/newt/network"
"github.com/fosrl/newt/websocket" "github.com/fosrl/newt/websocket"
"github.com/vishvananda/netlink" "github.com/vishvananda/netlink"
"golang.org/x/crypto/chacha20poly1305" "golang.org/x/crypto/chacha20poly1305"
"golang.org/x/crypto/curve25519" "golang.org/x/crypto/curve25519"
"golang.org/x/exp/rand"
"golang.zx2c4.com/wireguard/conn" "golang.zx2c4.com/wireguard/conn"
"golang.zx2c4.com/wireguard/wgctrl" "golang.zx2c4.com/wireguard/wgctrl"
"golang.zx2c4.com/wireguard/wgctrl/wgtypes" "golang.zx2c4.com/wireguard/wgctrl/wgtypes"
"github.com/fosrl/newt/internal/telemetry"
) )
type WgConfig struct { type WgConfig struct {
@@ -106,7 +110,7 @@ func FindAvailableUDPPort(minPort, maxPort uint16) (uint16, error) {
} }
// Fisher-Yates shuffle to randomize the port order // Fisher-Yates shuffle to randomize the port order
rand.Seed(uint64(time.Now().UnixNano())) rand.Seed(time.Now().UnixNano())
for i := len(portRange) - 1; i > 0; i-- { for i := len(portRange) - 1; i > 0; i-- {
j := rand.Intn(i + 1) j := rand.Intn(i + 1)
portRange[i], portRange[j] = portRange[j], portRange[i] portRange[i], portRange[j] = portRange[j], portRange[i]
@@ -280,6 +284,15 @@ func (s *WireGuardService) LoadRemoteConfig() error {
} }
func (s *WireGuardService) handleConfig(msg websocket.WSMessage) { func (s *WireGuardService) handleConfig(msg websocket.WSMessage) {
ctx := context.Background()
if s.client != nil {
ctx = s.client.MetricsContext()
}
result := "success"
defer func() {
telemetry.IncConfigReload(ctx, result)
}()
var config WgConfig var config WgConfig
logger.Debug("Received message: %v", msg) logger.Debug("Received message: %v", msg)
@@ -288,11 +301,13 @@ func (s *WireGuardService) handleConfig(msg websocket.WSMessage) {
jsonData, err := json.Marshal(msg.Data) jsonData, err := json.Marshal(msg.Data)
if err != nil { if err != nil {
logger.Info("Error marshaling data: %v", err) logger.Info("Error marshaling data: %v", err)
result = "failure"
return return
} }
if err := json.Unmarshal(jsonData, &config); err != nil { if err := json.Unmarshal(jsonData, &config); err != nil {
logger.Info("Error unmarshaling target data: %v", err) logger.Info("Error unmarshaling target data: %v", err)
result = "failure"
return return
} }
s.config = config s.config = config
@@ -302,13 +317,29 @@ func (s *WireGuardService) handleConfig(msg websocket.WSMessage) {
s.stopGetConfig = nil s.stopGetConfig = nil
} }
// Ensure the WireGuard interface and peers are configured // telemetry: config reload success
if err := s.ensureWireguardInterface(config); err != nil { // Optional reconnect reason mapping: config change
logger.Error("Failed to ensure WireGuard interface: %v", err) if s.serverPubKey != "" {
telemetry.IncReconnect(ctx, s.serverPubKey, "client", telemetry.ReasonConfigChange)
} }
// Ensure the WireGuard interface and peers are configured
start := time.Now()
if err := s.ensureWireguardInterface(config); err != nil {
logger.Error("Failed to ensure WireGuard interface: %v", err)
telemetry.ObserveConfigApply(ctx, "interface", "failure", time.Since(start).Seconds())
result = "failure"
} else {
telemetry.ObserveConfigApply(ctx, "interface", "success", time.Since(start).Seconds())
}
startPeers := time.Now()
if err := s.ensureWireguardPeers(config.Peers); err != nil { if err := s.ensureWireguardPeers(config.Peers); err != nil {
logger.Error("Failed to ensure WireGuard peers: %v", err) logger.Error("Failed to ensure WireGuard peers: %v", err)
telemetry.ObserveConfigApply(ctx, "peer", "failure", time.Since(startPeers).Seconds())
result = "failure"
} else {
telemetry.ObserveConfigApply(ctx, "peer", "success", time.Since(startPeers).Seconds())
} }
} }

View File

@@ -1,6 +1,7 @@
package wgnetstack package wgnetstack
import ( import (
"context"
"crypto/rand" "crypto/rand"
"encoding/base64" "encoding/base64"
"encoding/hex" "encoding/hex"
@@ -26,6 +27,8 @@ import (
"golang.zx2c4.com/wireguard/tun" "golang.zx2c4.com/wireguard/tun"
"golang.zx2c4.com/wireguard/tun/netstack" "golang.zx2c4.com/wireguard/tun/netstack"
"golang.zx2c4.com/wireguard/wgctrl/wgtypes" "golang.zx2c4.com/wireguard/wgctrl/wgtypes"
"github.com/fosrl/newt/internal/telemetry"
) )
type WgConfig struct { type WgConfig struct {
@@ -242,6 +245,12 @@ func NewWireGuardService(interfaceName string, mtu int, generateAndSaveKeyTo str
return service, nil return service, nil
} }
// ReportRTT allows reporting native RTTs to telemetry, rate-limited externally.
func (s *WireGuardService) ReportRTT(seconds float64) {
if s.serverPubKey == "" { return }
telemetry.ObserveTunnelLatency(context.Background(), s.serverPubKey, "wireguard", seconds)
}
func (s *WireGuardService) addTcpTarget(msg websocket.WSMessage) { func (s *WireGuardService) addTcpTarget(msg websocket.WSMessage) {
logger.Debug("Received: %+v", msg) logger.Debug("Received: %+v", msg)
@@ -249,7 +258,7 @@ func (s *WireGuardService) addTcpTarget(msg websocket.WSMessage) {
if s.TunnelIP == "" || s.proxyManager == nil { if s.TunnelIP == "" || s.proxyManager == nil {
logger.Info("No tunnel IP or proxy manager available") logger.Info("No tunnel IP or proxy manager available")
return return
} }
targetData, err := parseTargetData(msg.Data) targetData, err := parseTargetData(msg.Data)
if err != nil { if err != nil {

View File

@@ -126,7 +126,7 @@ func (s *Server) Stop() {
s.conn.Close() s.conn.Close()
} }
s.isRunning = false s.isRunning = false
logger.Info(s.outputPrefix + "Server stopped") logger.Info("%sServer stopped", s.outputPrefix)
} }
// RestartWithNetstack stops the current server and restarts it with netstack // RestartWithNetstack stops the current server and restarts it with netstack
@@ -161,7 +161,7 @@ func (s *Server) handleConnections() {
// Set read deadline to avoid blocking forever // Set read deadline to avoid blocking forever
err := s.conn.SetReadDeadline(time.Now().Add(1 * time.Second)) err := s.conn.SetReadDeadline(time.Now().Add(1 * time.Second))
if err != nil { if err != nil {
logger.Error(s.outputPrefix+"Error setting read deadline: %v", err) logger.Error("%sError setting read deadline: %v", s.outputPrefix, err)
continue continue
} }
@@ -187,7 +187,7 @@ func (s *Server) handleConnections() {
case <-s.shutdownCh: case <-s.shutdownCh:
return // Don't log error if we're shutting down return // Don't log error if we're shutting down
default: default:
logger.Error(s.outputPrefix+"Error reading from UDP: %v", err) logger.Error("%sError reading from UDP: %v", s.outputPrefix, err)
} }
continue continue
} }
@@ -219,7 +219,7 @@ func (s *Server) handleConnections() {
copy(responsePacket[5:13], buffer[5:13]) copy(responsePacket[5:13], buffer[5:13])
// Log response being sent for debugging // Log response being sent for debugging
logger.Debug(s.outputPrefix+"Sending response to %s", addr.String()) logger.Debug("%sSending response to %s", s.outputPrefix, addr.String())
// Send the response packet - handle both regular UDP and netstack UDP // Send the response packet - handle both regular UDP and netstack UDP
if s.useNetstack { if s.useNetstack {
@@ -233,9 +233,9 @@ func (s *Server) handleConnections() {
} }
if err != nil { if err != nil {
logger.Error(s.outputPrefix+"Error sending response: %v", err) logger.Error("%sError sending response: %v", s.outputPrefix, err)
} else { } else {
logger.Debug(s.outputPrefix + "Response sent successfully") logger.Debug("%sResponse sent successfully", s.outputPrefix)
} }
} }
} }