mirror of
https://github.com/fosrl/pangolin.git
synced 2026-03-24 19:46:38 +00:00
Merge branch 'dev' into msg-opt
This commit is contained in:
@@ -15,6 +15,7 @@ import { initPeerAddHandshake, updatePeer } from "../olm/peers";
|
||||
import { eq, and } from "drizzle-orm";
|
||||
import config from "@server/lib/config";
|
||||
import {
|
||||
formatEndpoint,
|
||||
generateSubnetProxyTargetV2,
|
||||
SubnetProxyTargetV2
|
||||
} from "@server/lib/ip";
|
||||
@@ -83,40 +84,42 @@ export async function buildClientConfigurationForNewtClient(
|
||||
// )
|
||||
// );
|
||||
|
||||
// update the peer info on the olm
|
||||
// if the peer has not been added yet this will be a no-op
|
||||
await updatePeer(client.clients.clientId, {
|
||||
siteId: site.siteId,
|
||||
endpoint: site.endpoint!,
|
||||
relayEndpoint: `${exitNode.endpoint}:${config.getRawConfig().gerbil.clients_start_port}`,
|
||||
publicKey: site.publicKey!,
|
||||
serverIP: site.address,
|
||||
serverPort: site.listenPort
|
||||
// remoteSubnets: generateRemoteSubnets(
|
||||
// allSiteResources.map(
|
||||
// ({ siteResources }) => siteResources
|
||||
// )
|
||||
// ),
|
||||
// aliases: generateAliasConfig(
|
||||
// allSiteResources.map(
|
||||
// ({ siteResources }) => siteResources
|
||||
// )
|
||||
// )
|
||||
});
|
||||
if (!client.clientSitesAssociationsCache.isJitMode) { // if we are adding sites through jit then dont add the site to the olm
|
||||
// update the peer info on the olm
|
||||
// if the peer has not been added yet this will be a no-op
|
||||
await updatePeer(client.clients.clientId, {
|
||||
siteId: site.siteId,
|
||||
endpoint: site.endpoint!,
|
||||
relayEndpoint: `${exitNode.endpoint}:${config.getRawConfig().gerbil.clients_start_port}`,
|
||||
publicKey: site.publicKey!,
|
||||
serverIP: site.address,
|
||||
serverPort: site.listenPort
|
||||
// remoteSubnets: generateRemoteSubnets(
|
||||
// allSiteResources.map(
|
||||
// ({ siteResources }) => siteResources
|
||||
// )
|
||||
// ),
|
||||
// aliases: generateAliasConfig(
|
||||
// allSiteResources.map(
|
||||
// ({ siteResources }) => siteResources
|
||||
// )
|
||||
// )
|
||||
});
|
||||
|
||||
// also trigger the peer add handshake in case the peer was not already added to the olm and we need to hole punch
|
||||
// if it has already been added this will be a no-op
|
||||
await initPeerAddHandshake(
|
||||
// this will kick off the add peer process for the client
|
||||
client.clients.clientId,
|
||||
{
|
||||
siteId,
|
||||
exitNode: {
|
||||
publicKey: exitNode.publicKey,
|
||||
endpoint: exitNode.endpoint
|
||||
// also trigger the peer add handshake in case the peer was not already added to the olm and we need to hole punch
|
||||
// if it has already been added this will be a no-op
|
||||
await initPeerAddHandshake(
|
||||
// this will kick off the add peer process for the client
|
||||
client.clients.clientId,
|
||||
{
|
||||
siteId,
|
||||
exitNode: {
|
||||
publicKey: exitNode.publicKey,
|
||||
endpoint: exitNode.endpoint
|
||||
}
|
||||
}
|
||||
}
|
||||
);
|
||||
);
|
||||
}
|
||||
|
||||
return {
|
||||
publicKey: client.clients.pubKey!,
|
||||
@@ -204,7 +207,8 @@ export async function buildTargetConfigurationForNewtClient(siteId: number) {
|
||||
hcTimeout: targetHealthCheck.hcTimeout,
|
||||
hcHeaders: targetHealthCheck.hcHeaders,
|
||||
hcMethod: targetHealthCheck.hcMethod,
|
||||
hcTlsServerName: targetHealthCheck.hcTlsServerName
|
||||
hcTlsServerName: targetHealthCheck.hcTlsServerName,
|
||||
hcStatus: targetHealthCheck.hcStatus
|
||||
})
|
||||
.from(targets)
|
||||
.innerJoin(resources, eq(targets.resourceId, resources.resourceId))
|
||||
@@ -221,8 +225,8 @@ export async function buildTargetConfigurationForNewtClient(siteId: number) {
|
||||
return acc;
|
||||
}
|
||||
|
||||
// Format target into string
|
||||
const formattedTarget = `${target.internalPort}:${target.ip}:${target.port}`;
|
||||
// Format target into string (handles IPv6 bracketing)
|
||||
const formattedTarget = `${target.internalPort}:${formatEndpoint(target.ip, target.port)}`;
|
||||
|
||||
// Add to the appropriate protocol array
|
||||
if (target.protocol === "tcp") {
|
||||
@@ -245,9 +249,9 @@ export async function buildTargetConfigurationForNewtClient(siteId: number) {
|
||||
!target.hcInterval ||
|
||||
!target.hcMethod
|
||||
) {
|
||||
logger.debug(
|
||||
`Skipping target ${target.targetId} due to missing health check fields`
|
||||
);
|
||||
// logger.debug(
|
||||
// `Skipping adding target health check ${target.targetId} due to missing health check fields`
|
||||
// );
|
||||
return null; // Skip targets with missing health check fields
|
||||
}
|
||||
|
||||
@@ -277,7 +281,8 @@ export async function buildTargetConfigurationForNewtClient(siteId: number) {
|
||||
hcTimeout: target.hcTimeout, // in seconds
|
||||
hcHeaders: hcHeadersSend,
|
||||
hcMethod: target.hcMethod,
|
||||
hcTlsServerName: target.hcTlsServerName
|
||||
hcTlsServerName: target.hcTlsServerName,
|
||||
hcStatus: target.hcStatus
|
||||
};
|
||||
});
|
||||
|
||||
|
||||
@@ -7,6 +7,7 @@ import { eq } from "drizzle-orm";
|
||||
import { sendToExitNode } from "#dynamic/lib/exitNodes";
|
||||
import { buildClientConfigurationForNewtClient } from "./buildConfiguration";
|
||||
import { convertTargetsIfNessicary } from "../client/targets";
|
||||
import { canCompress } from "@server/lib/clientVersionChecks";
|
||||
|
||||
const inputSchema = z.object({
|
||||
publicKey: z.string(),
|
||||
@@ -105,11 +106,11 @@ export const handleGetConfigMessage: MessageHandler = async (context) => {
|
||||
const payload = {
|
||||
oldDestination: {
|
||||
destinationIP: existingSite.subnet?.split("/")[0],
|
||||
destinationPort: existingSite.listenPort
|
||||
destinationPort: existingSite.listenPort || 1 // this satisfies gerbil for now but should be reevaluated
|
||||
},
|
||||
newDestination: {
|
||||
destinationIP: site.subnet?.split("/")[0],
|
||||
destinationPort: site.listenPort
|
||||
destinationPort: site.listenPort || 1 // this satisfies gerbil for now but should be reevaluated
|
||||
}
|
||||
};
|
||||
|
||||
@@ -138,6 +139,9 @@ export const handleGetConfigMessage: MessageHandler = async (context) => {
|
||||
targets: targetsToSend
|
||||
}
|
||||
},
|
||||
options: {
|
||||
compress: canCompress(newt.version, "newt")
|
||||
},
|
||||
broadcast: false,
|
||||
excludeSender: false
|
||||
};
|
||||
|
||||
36
server/routers/newt/handleNewtDisconnectingMessage.ts
Normal file
36
server/routers/newt/handleNewtDisconnectingMessage.ts
Normal file
@@ -0,0 +1,36 @@
|
||||
import { MessageHandler } from "@server/routers/ws";
|
||||
import { db, Newt, sites } from "@server/db";
|
||||
import { eq } from "drizzle-orm";
|
||||
import logger from "@server/logger";
|
||||
|
||||
/**
|
||||
* Handles disconnecting messages from sites to show disconnected in the ui
|
||||
*/
|
||||
export const handleNewtDisconnectingMessage: MessageHandler = async (
|
||||
context
|
||||
) => {
|
||||
const { message, client: c, sendToClient } = context;
|
||||
const newt = c as Newt;
|
||||
|
||||
if (!newt) {
|
||||
logger.warn("Newt not found");
|
||||
return;
|
||||
}
|
||||
|
||||
if (!newt.siteId) {
|
||||
logger.warn("Newt has no client ID!");
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
// Update the client's last ping timestamp
|
||||
await db
|
||||
.update(sites)
|
||||
.set({
|
||||
online: false
|
||||
})
|
||||
.where(eq(sites.siteId, newt.siteId));
|
||||
} catch (error) {
|
||||
logger.error("Error handling disconnecting message", { error });
|
||||
}
|
||||
};
|
||||
@@ -1,105 +1,107 @@
|
||||
import { db, sites } from "@server/db";
|
||||
import { disconnectClient, getClientConfigVersion } from "#dynamic/routers/ws";
|
||||
import { db, newts, sites } from "@server/db";
|
||||
import { hasActiveConnections, getClientConfigVersion } from "#dynamic/routers/ws";
|
||||
import { MessageHandler } from "@server/routers/ws";
|
||||
import { clients, Newt } from "@server/db";
|
||||
import { Newt } from "@server/db";
|
||||
import { eq, lt, isNull, and, or } from "drizzle-orm";
|
||||
import logger from "@server/logger";
|
||||
import { validateSessionToken } from "@server/auth/sessions/app";
|
||||
import { checkOrgAccessPolicy } from "#dynamic/lib/checkOrgAccessPolicy";
|
||||
import { sendTerminateClient } from "../client/terminate";
|
||||
import { encodeHexLowerCase } from "@oslojs/encoding";
|
||||
import { sha256 } from "@oslojs/crypto/sha2";
|
||||
import { sendNewtSyncMessage } from "./sync";
|
||||
|
||||
// Track if the offline checker interval is running
|
||||
// let offlineCheckerInterval: NodeJS.Timeout | null = null;
|
||||
// const OFFLINE_CHECK_INTERVAL = 30 * 1000; // Check every 30 seconds
|
||||
// const OFFLINE_THRESHOLD_MS = 2 * 60 * 1000; // 2 minutes
|
||||
let offlineCheckerInterval: NodeJS.Timeout | null = null;
|
||||
const OFFLINE_CHECK_INTERVAL = 30 * 1000; // Check every 30 seconds
|
||||
const OFFLINE_THRESHOLD_MS = 2 * 60 * 1000; // 2 minutes
|
||||
|
||||
/**
|
||||
* Starts the background interval that checks for clients that haven't pinged recently
|
||||
* and marks them as offline
|
||||
* Starts the background interval that checks for newt sites that haven't
|
||||
* pinged recently and marks them as offline. For backward compatibility,
|
||||
* a site is only marked offline when there is no active WebSocket connection
|
||||
* either — so older newt versions that don't send pings but remain connected
|
||||
* continue to be treated as online.
|
||||
*/
|
||||
// export const startNewtOfflineChecker = (): void => {
|
||||
// if (offlineCheckerInterval) {
|
||||
// return; // Already running
|
||||
// }
|
||||
export const startNewtOfflineChecker = (): void => {
|
||||
if (offlineCheckerInterval) {
|
||||
return; // Already running
|
||||
}
|
||||
|
||||
// offlineCheckerInterval = setInterval(async () => {
|
||||
// try {
|
||||
// const twoMinutesAgo = Math.floor(
|
||||
// (Date.now() - OFFLINE_THRESHOLD_MS) / 1000
|
||||
// );
|
||||
offlineCheckerInterval = setInterval(async () => {
|
||||
try {
|
||||
const twoMinutesAgo = Math.floor(
|
||||
(Date.now() - OFFLINE_THRESHOLD_MS) / 1000
|
||||
);
|
||||
|
||||
// // TODO: WE NEED TO MAKE SURE THIS WORKS WITH DISTRIBUTED NODES ALL DOING THE SAME THING
|
||||
// Find all online newt-type sites that haven't pinged recently
|
||||
// (or have never pinged at all). Join newts to obtain the newtId
|
||||
// needed for the WebSocket connection check.
|
||||
const staleSites = await db
|
||||
.select({
|
||||
siteId: sites.siteId,
|
||||
newtId: newts.newtId,
|
||||
lastPing: sites.lastPing
|
||||
})
|
||||
.from(sites)
|
||||
.innerJoin(newts, eq(newts.siteId, sites.siteId))
|
||||
.where(
|
||||
and(
|
||||
eq(sites.online, true),
|
||||
eq(sites.type, "newt"),
|
||||
or(
|
||||
lt(sites.lastPing, twoMinutesAgo),
|
||||
isNull(sites.lastPing)
|
||||
)
|
||||
)
|
||||
);
|
||||
|
||||
// // Find clients that haven't pinged in the last 2 minutes and mark them as offline
|
||||
// const offlineClients = await db
|
||||
// .update(clients)
|
||||
// .set({ online: false })
|
||||
// .where(
|
||||
// and(
|
||||
// eq(clients.online, true),
|
||||
// or(
|
||||
// lt(clients.lastPing, twoMinutesAgo),
|
||||
// isNull(clients.lastPing)
|
||||
// )
|
||||
// )
|
||||
// )
|
||||
// .returning();
|
||||
for (const staleSite of staleSites) {
|
||||
// Backward-compatibility check: if the newt still has an
|
||||
// active WebSocket connection (older clients that don't send
|
||||
// pings), keep the site online.
|
||||
const isConnected = await hasActiveConnections(staleSite.newtId);
|
||||
if (isConnected) {
|
||||
logger.debug(
|
||||
`Newt ${staleSite.newtId} has not pinged recently but is still connected via WebSocket — keeping site ${staleSite.siteId} online`
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
// for (const offlineClient of offlineClients) {
|
||||
// logger.info(
|
||||
// `Kicking offline newt client ${offlineClient.clientId} due to inactivity`
|
||||
// );
|
||||
logger.info(
|
||||
`Marking site ${staleSite.siteId} offline: newt ${staleSite.newtId} has no recent ping and no active WebSocket connection`
|
||||
);
|
||||
|
||||
// if (!offlineClient.newtId) {
|
||||
// logger.warn(
|
||||
// `Offline client ${offlineClient.clientId} has no newtId, cannot disconnect`
|
||||
// );
|
||||
// continue;
|
||||
// }
|
||||
await db
|
||||
.update(sites)
|
||||
.set({ online: false })
|
||||
.where(eq(sites.siteId, staleSite.siteId));
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error("Error in newt offline checker interval", { error });
|
||||
}
|
||||
}, OFFLINE_CHECK_INTERVAL);
|
||||
|
||||
// // Send a disconnect message to the client if connected
|
||||
// try {
|
||||
// await sendTerminateClient(
|
||||
// offlineClient.clientId,
|
||||
// offlineClient.newtId
|
||||
// ); // terminate first
|
||||
// // wait a moment to ensure the message is sent
|
||||
// await new Promise((resolve) => setTimeout(resolve, 1000));
|
||||
// await disconnectClient(offlineClient.newtId);
|
||||
// } catch (error) {
|
||||
// logger.error(
|
||||
// `Error sending disconnect to offline newt ${offlineClient.clientId}`,
|
||||
// { error }
|
||||
// );
|
||||
// }
|
||||
// }
|
||||
// } catch (error) {
|
||||
// logger.error("Error in offline checker interval", { error });
|
||||
// }
|
||||
// }, OFFLINE_CHECK_INTERVAL);
|
||||
|
||||
// logger.debug("Started offline checker interval");
|
||||
// };
|
||||
logger.debug("Started newt offline checker interval");
|
||||
};
|
||||
|
||||
/**
|
||||
* Stops the background interval that checks for offline clients
|
||||
* Stops the background interval that checks for offline newt sites.
|
||||
*/
|
||||
// export const stopNewtOfflineChecker = (): void => {
|
||||
// if (offlineCheckerInterval) {
|
||||
// clearInterval(offlineCheckerInterval);
|
||||
// offlineCheckerInterval = null;
|
||||
// logger.info("Stopped offline checker interval");
|
||||
// }
|
||||
// };
|
||||
export const stopNewtOfflineChecker = (): void => {
|
||||
if (offlineCheckerInterval) {
|
||||
clearInterval(offlineCheckerInterval);
|
||||
offlineCheckerInterval = null;
|
||||
logger.info("Stopped newt offline checker interval");
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Handles ping messages from clients and responds with pong
|
||||
* Handles ping messages from newt clients.
|
||||
*
|
||||
* On each ping:
|
||||
* - Marks the associated site as online.
|
||||
* - Records the current timestamp as the newt's last-ping time.
|
||||
* - Triggers a config sync if the newt is running an outdated config version.
|
||||
* - Responds with a pong message.
|
||||
*/
|
||||
export const handleNewtPingMessage: MessageHandler = async (context) => {
|
||||
const { message, client: c, sendToClient } = context;
|
||||
const { message, client: c } = context;
|
||||
const newt = c as Newt;
|
||||
|
||||
if (!newt) {
|
||||
@@ -112,15 +114,31 @@ export const handleNewtPingMessage: MessageHandler = async (context) => {
|
||||
return;
|
||||
}
|
||||
|
||||
// get the version
|
||||
try {
|
||||
// Mark the site as online and record the ping timestamp.
|
||||
await db
|
||||
.update(sites)
|
||||
.set({
|
||||
online: true,
|
||||
lastPing: Math.floor(Date.now() / 1000)
|
||||
})
|
||||
.where(eq(sites.siteId, newt.siteId));
|
||||
} catch (error) {
|
||||
logger.error("Error updating online state on newt ping", { error });
|
||||
}
|
||||
|
||||
// Check config version and sync if stale.
|
||||
const configVersion = await getClientConfigVersion(newt.newtId);
|
||||
|
||||
if (message.configVersion && configVersion != null && configVersion != message.configVersion) {
|
||||
if (
|
||||
message.configVersion != null &&
|
||||
configVersion != null &&
|
||||
configVersion !== message.configVersion
|
||||
) {
|
||||
logger.warn(
|
||||
`Newt ping with outdated config version: ${message.configVersion} (current: ${configVersion})`
|
||||
);
|
||||
|
||||
// get the site
|
||||
const [site] = await db
|
||||
.select()
|
||||
.from(sites)
|
||||
@@ -137,19 +155,6 @@ export const handleNewtPingMessage: MessageHandler = async (context) => {
|
||||
await sendNewtSyncMessage(newt, site);
|
||||
}
|
||||
|
||||
// try {
|
||||
// // Update the client's last ping timestamp
|
||||
// await db
|
||||
// .update(clients)
|
||||
// .set({
|
||||
// lastPing: Math.floor(Date.now() / 1000),
|
||||
// online: true
|
||||
// })
|
||||
// .where(eq(clients.clientId, newt.clientId));
|
||||
// } catch (error) {
|
||||
// logger.error("Error handling ping message", { error });
|
||||
// }
|
||||
|
||||
return {
|
||||
message: {
|
||||
type: "pong",
|
||||
|
||||
@@ -5,9 +5,7 @@ import { eq } from "drizzle-orm";
|
||||
import { addPeer, deletePeer } from "../gerbil/peers";
|
||||
import logger from "@server/logger";
|
||||
import config from "@server/lib/config";
|
||||
import {
|
||||
findNextAvailableCidr,
|
||||
} from "@server/lib/ip";
|
||||
import { findNextAvailableCidr } from "@server/lib/ip";
|
||||
import {
|
||||
selectBestExitNode,
|
||||
verifyExitNodeOrgAccess
|
||||
@@ -15,6 +13,7 @@ import {
|
||||
import { fetchContainers } from "./dockerSocket";
|
||||
import { lockManager } from "#dynamic/lib/lock";
|
||||
import { buildTargetConfigurationForNewtClient } from "./buildConfiguration";
|
||||
import { canCompress } from "@server/lib/clientVersionChecks";
|
||||
|
||||
export type ExitNodePingResult = {
|
||||
exitNodeId: number;
|
||||
@@ -215,6 +214,9 @@ export const handleNewtRegisterMessage: MessageHandler = async (context) => {
|
||||
healthCheckTargets: validHealthCheckTargets
|
||||
}
|
||||
},
|
||||
options: {
|
||||
compress: canCompress(newt.version, "newt")
|
||||
},
|
||||
broadcast: false, // Send to all clients
|
||||
excludeSender: false // Include sender in broadcast
|
||||
};
|
||||
|
||||
@@ -10,10 +10,21 @@ interface PeerBandwidth {
|
||||
bytesOut: number;
|
||||
}
|
||||
|
||||
interface BandwidthAccumulator {
|
||||
bytesIn: number;
|
||||
bytesOut: number;
|
||||
}
|
||||
|
||||
// Retry configuration for deadlock handling
|
||||
const MAX_RETRIES = 3;
|
||||
const BASE_DELAY_MS = 50;
|
||||
|
||||
// How often to flush accumulated bandwidth data to the database
|
||||
const FLUSH_INTERVAL_MS = 120_000; // 120 seconds
|
||||
|
||||
// In-memory accumulator: publicKey -> { bytesIn, bytesOut }
|
||||
let accumulator = new Map<string, BandwidthAccumulator>();
|
||||
|
||||
/**
|
||||
* Check if an error is a deadlock error
|
||||
*/
|
||||
@@ -53,6 +64,90 @@ async function withDeadlockRetry<T>(
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Flush all accumulated bandwidth data to the database.
|
||||
*
|
||||
* Swaps out the accumulator before writing so that any bandwidth messages
|
||||
* received during the flush are captured in the new accumulator rather than
|
||||
* being lost or causing contention. Entries that fail to write are re-queued
|
||||
* back into the accumulator so they will be retried on the next flush.
|
||||
*
|
||||
* This function is exported so that the application's graceful-shutdown
|
||||
* cleanup handler can call it before the process exits.
|
||||
*/
|
||||
export async function flushBandwidthToDb(): Promise<void> {
|
||||
if (accumulator.size === 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Atomically swap out the accumulator so new data keeps flowing in
|
||||
// while we write the snapshot to the database.
|
||||
const snapshot = accumulator;
|
||||
accumulator = new Map<string, BandwidthAccumulator>();
|
||||
|
||||
const currentTime = new Date().toISOString();
|
||||
|
||||
// Sort by publicKey for consistent lock ordering across concurrent
|
||||
// writers — this is the same deadlock-prevention strategy used in the
|
||||
// original per-message implementation.
|
||||
const sortedEntries = [...snapshot.entries()].sort(([a], [b]) =>
|
||||
a.localeCompare(b)
|
||||
);
|
||||
|
||||
logger.debug(
|
||||
`Flushing accumulated bandwidth data for ${sortedEntries.length} client(s) to the database`
|
||||
);
|
||||
|
||||
for (const [publicKey, { bytesIn, bytesOut }] of sortedEntries) {
|
||||
try {
|
||||
await withDeadlockRetry(async () => {
|
||||
// Use atomic SQL increment to avoid the SELECT-then-UPDATE
|
||||
// anti-pattern and the races it would introduce.
|
||||
await db
|
||||
.update(clients)
|
||||
.set({
|
||||
// Note: bytesIn from peer goes to megabytesOut (data
|
||||
// sent to client) and bytesOut from peer goes to
|
||||
// megabytesIn (data received from client).
|
||||
megabytesOut: sql`COALESCE(${clients.megabytesOut}, 0) + ${bytesIn}`,
|
||||
megabytesIn: sql`COALESCE(${clients.megabytesIn}, 0) + ${bytesOut}`,
|
||||
lastBandwidthUpdate: currentTime
|
||||
})
|
||||
.where(eq(clients.pubKey, publicKey));
|
||||
}, `flush bandwidth for client ${publicKey}`);
|
||||
} catch (error) {
|
||||
logger.error(
|
||||
`Failed to flush bandwidth for client ${publicKey}:`,
|
||||
error
|
||||
);
|
||||
|
||||
// Re-queue the failed entry so it is retried on the next flush
|
||||
// rather than silently dropped.
|
||||
const existing = accumulator.get(publicKey);
|
||||
if (existing) {
|
||||
existing.bytesIn += bytesIn;
|
||||
existing.bytesOut += bytesOut;
|
||||
} else {
|
||||
accumulator.set(publicKey, { bytesIn, bytesOut });
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const flushTimer = setInterval(async () => {
|
||||
try {
|
||||
await flushBandwidthToDb();
|
||||
} catch (error) {
|
||||
logger.error("Unexpected error during periodic bandwidth flush:", error);
|
||||
}
|
||||
}, FLUSH_INTERVAL_MS);
|
||||
|
||||
// Calling unref() means this timer will not keep the Node.js event loop alive
|
||||
// on its own — the process can still exit normally when there is no other work
|
||||
// left. The graceful-shutdown path (see server/cleanup.ts) will call
|
||||
// flushBandwidthToDb() explicitly before process.exit(), so no data is lost.
|
||||
flushTimer.unref();
|
||||
|
||||
export const handleReceiveBandwidthMessage: MessageHandler = async (
|
||||
context
|
||||
) => {
|
||||
@@ -69,40 +164,21 @@ export const handleReceiveBandwidthMessage: MessageHandler = async (
|
||||
throw new Error("Invalid bandwidth data");
|
||||
}
|
||||
|
||||
// Sort bandwidth data by publicKey to ensure consistent lock ordering across all instances
|
||||
// This is critical for preventing deadlocks when multiple instances update the same clients
|
||||
const sortedBandwidthData = [...bandwidthData].sort((a, b) =>
|
||||
a.publicKey.localeCompare(b.publicKey)
|
||||
);
|
||||
// Accumulate the incoming data in memory; the periodic timer (and the
|
||||
// shutdown hook) will take care of writing it to the database.
|
||||
for (const { publicKey, bytesIn, bytesOut } of bandwidthData) {
|
||||
// Skip peers that haven't transferred any data — writing zeros to the
|
||||
// database would be a no-op anyway.
|
||||
if (bytesIn <= 0 && bytesOut <= 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const currentTime = new Date().toISOString();
|
||||
|
||||
// Update each client individually with retry logic
|
||||
// This reduces transaction scope and allows retries per-client
|
||||
for (const peer of sortedBandwidthData) {
|
||||
const { publicKey, bytesIn, bytesOut } = peer;
|
||||
|
||||
try {
|
||||
await withDeadlockRetry(async () => {
|
||||
// Use atomic SQL increment to avoid SELECT then UPDATE pattern
|
||||
// This eliminates the need to read the current value first
|
||||
await db
|
||||
.update(clients)
|
||||
.set({
|
||||
// Note: bytesIn from peer goes to megabytesOut (data sent to client)
|
||||
// and bytesOut from peer goes to megabytesIn (data received from client)
|
||||
megabytesOut: sql`COALESCE(${clients.megabytesOut}, 0) + ${bytesIn}`,
|
||||
megabytesIn: sql`COALESCE(${clients.megabytesIn}, 0) + ${bytesOut}`,
|
||||
lastBandwidthUpdate: currentTime
|
||||
})
|
||||
.where(eq(clients.pubKey, publicKey));
|
||||
}, `update client bandwidth ${publicKey}`);
|
||||
} catch (error) {
|
||||
logger.error(
|
||||
`Failed to update bandwidth for client ${publicKey}:`,
|
||||
error
|
||||
);
|
||||
// Continue with other clients even if one fails
|
||||
const existing = accumulator.get(publicKey);
|
||||
if (existing) {
|
||||
existing.bytesIn += bytesIn;
|
||||
existing.bytesOut += bytesOut;
|
||||
} else {
|
||||
accumulator.set(publicKey, { bytesIn, bytesOut });
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
@@ -7,3 +7,4 @@ export * from "./handleSocketMessages";
|
||||
export * from "./handleNewtPingRequestMessage";
|
||||
export * from "./handleApplyBlueprintMessage";
|
||||
export * from "./handleNewtPingMessage";
|
||||
export * from "./handleNewtDisconnectingMessage";
|
||||
|
||||
@@ -6,6 +6,7 @@ import {
|
||||
buildClientConfigurationForNewtClient,
|
||||
buildTargetConfigurationForNewtClient
|
||||
} from "./buildConfiguration";
|
||||
import { canCompress } from "@server/lib/clientVersionChecks";
|
||||
|
||||
export async function sendNewtSyncMessage(newt: Newt, site: Site) {
|
||||
const { tcpTargets, udpTargets, validHealthCheckTargets } =
|
||||
@@ -24,18 +25,24 @@ export async function sendNewtSyncMessage(newt: Newt, site: Site) {
|
||||
exitNode
|
||||
);
|
||||
|
||||
await sendToClient(newt.newtId, {
|
||||
type: "newt/sync",
|
||||
data: {
|
||||
proxyTargets: {
|
||||
udp: udpTargets,
|
||||
tcp: tcpTargets
|
||||
},
|
||||
healthCheckTargets: validHealthCheckTargets,
|
||||
peers: peers,
|
||||
clientTargets: targets
|
||||
await sendToClient(
|
||||
newt.newtId,
|
||||
{
|
||||
type: "newt/sync",
|
||||
data: {
|
||||
proxyTargets: {
|
||||
udp: udpTargets,
|
||||
tcp: tcpTargets
|
||||
},
|
||||
healthCheckTargets: validHealthCheckTargets,
|
||||
peers: peers,
|
||||
clientTargets: targets
|
||||
}
|
||||
},
|
||||
{
|
||||
compress: canCompress(newt.version, "newt")
|
||||
}
|
||||
}).catch((error) => {
|
||||
).catch((error) => {
|
||||
logger.warn(`Error sending newt sync message:`, error);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -2,13 +2,14 @@ import { Target, TargetHealthCheck, db, targetHealthCheck } from "@server/db";
|
||||
import { sendToClient } from "#dynamic/routers/ws";
|
||||
import logger from "@server/logger";
|
||||
import { eq, inArray } from "drizzle-orm";
|
||||
import { canCompress } from "@server/lib/clientVersionChecks";
|
||||
|
||||
export async function addTargets(
|
||||
newtId: string,
|
||||
targets: Target[],
|
||||
healthCheckData: TargetHealthCheck[],
|
||||
protocol: string,
|
||||
port: number | null = null
|
||||
version?: string | null
|
||||
) {
|
||||
//create a list of udp and tcp targets
|
||||
const payloadTargets = targets.map((target) => {
|
||||
@@ -22,7 +23,7 @@ export async function addTargets(
|
||||
data: {
|
||||
targets: payloadTargets
|
||||
}
|
||||
}, { incrementConfigVersion: true });
|
||||
}, { incrementConfigVersion: true, compress: canCompress(version, "newt") });
|
||||
|
||||
// Create a map for quick lookup
|
||||
const healthCheckMap = new Map<number, TargetHealthCheck>();
|
||||
@@ -103,14 +104,14 @@ export async function addTargets(
|
||||
data: {
|
||||
targets: validHealthCheckTargets
|
||||
}
|
||||
}, { incrementConfigVersion: true });
|
||||
}, { incrementConfigVersion: true, compress: canCompress(version, "newt") });
|
||||
}
|
||||
|
||||
export async function removeTargets(
|
||||
newtId: string,
|
||||
targets: Target[],
|
||||
protocol: string,
|
||||
port: number | null = null
|
||||
version?: string | null
|
||||
) {
|
||||
//create a list of udp and tcp targets
|
||||
const payloadTargets = targets.map((target) => {
|
||||
@@ -135,5 +136,5 @@ export async function removeTargets(
|
||||
data: {
|
||||
ids: healthCheckTargets
|
||||
}
|
||||
}, { incrementConfigVersion: true });
|
||||
}, { incrementConfigVersion: true, compress: canCompress(version, "newt") });
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user