mirror of
https://github.com/fosrl/pangolin.git
synced 2026-04-18 15:56:35 +00:00
seperate out the offline checker logic
This commit is contained in:
@@ -11,78 +11,12 @@
|
|||||||
* This file is not licensed under the AGPLv3.
|
* This file is not licensed under the AGPLv3.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import { db, exitNodes, sites } from "@server/db";
|
import { db, exitNodes } from "@server/db";
|
||||||
import { MessageHandler } from "@server/routers/ws";
|
import { MessageHandler } from "@server/routers/ws";
|
||||||
import { clients, RemoteExitNode } from "@server/db";
|
import { RemoteExitNode } from "@server/db";
|
||||||
import { eq, lt, isNull, and, or, inArray } from "drizzle-orm";
|
import { eq } from "drizzle-orm";
|
||||||
import logger from "@server/logger";
|
import logger from "@server/logger";
|
||||||
|
|
||||||
// Track if the offline checker interval is running
|
|
||||||
let offlineCheckerInterval: NodeJS.Timeout | null = null;
|
|
||||||
const OFFLINE_CHECK_INTERVAL = 30 * 1000; // Check every 30 seconds
|
|
||||||
const OFFLINE_THRESHOLD_MS = 2 * 60 * 1000; // 2 minutes
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Starts the background interval that checks for clients that haven't pinged recently
|
|
||||||
* and marks them as offline
|
|
||||||
*/
|
|
||||||
export const startRemoteExitNodeOfflineChecker = (): void => {
|
|
||||||
if (offlineCheckerInterval) {
|
|
||||||
return; // Already running
|
|
||||||
}
|
|
||||||
|
|
||||||
offlineCheckerInterval = setInterval(async () => {
|
|
||||||
try {
|
|
||||||
const twoMinutesAgo = Math.floor(
|
|
||||||
(Date.now() - OFFLINE_THRESHOLD_MS) / 1000
|
|
||||||
);
|
|
||||||
|
|
||||||
// Find clients that haven't pinged in the last 2 minutes and mark them as offline
|
|
||||||
const offlineNodes = await db
|
|
||||||
.update(exitNodes)
|
|
||||||
.set({ online: false })
|
|
||||||
.where(
|
|
||||||
and(
|
|
||||||
eq(exitNodes.online, true),
|
|
||||||
eq(exitNodes.type, "remoteExitNode"),
|
|
||||||
or(
|
|
||||||
lt(exitNodes.lastPing, twoMinutesAgo),
|
|
||||||
isNull(exitNodes.lastPing)
|
|
||||||
)
|
|
||||||
)
|
|
||||||
)
|
|
||||||
.returning();
|
|
||||||
|
|
||||||
if (offlineNodes.length > 0) {
|
|
||||||
logger.info(
|
|
||||||
`checkRemoteExitNodeOffline: Marked ${offlineNodes.length} remoteExitNode client(s) offline due to inactivity`
|
|
||||||
);
|
|
||||||
|
|
||||||
for (const offlineClient of offlineNodes) {
|
|
||||||
logger.debug(
|
|
||||||
`checkRemoteExitNodeOffline: Client ${offlineClient.exitNodeId} marked offline (lastPing: ${offlineClient.lastPing})`
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (error) {
|
|
||||||
logger.error("Error in offline checker interval", { error });
|
|
||||||
}
|
|
||||||
}, OFFLINE_CHECK_INTERVAL);
|
|
||||||
|
|
||||||
logger.debug("Started offline checker interval");
|
|
||||||
};
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Stops the background interval that checks for offline clients
|
|
||||||
*/
|
|
||||||
export const stopRemoteExitNodeOfflineChecker = (): void => {
|
|
||||||
if (offlineCheckerInterval) {
|
|
||||||
clearInterval(offlineCheckerInterval);
|
|
||||||
offlineCheckerInterval = null;
|
|
||||||
logger.info("Stopped offline checker interval");
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Handles ping messages from clients and responds with pong
|
* Handles ping messages from clients and responds with pong
|
||||||
*/
|
*/
|
||||||
|
|||||||
@@ -21,3 +21,4 @@ export * from "./deleteRemoteExitNode";
|
|||||||
export * from "./listRemoteExitNodes";
|
export * from "./listRemoteExitNodes";
|
||||||
export * from "./pickRemoteExitNodeDefaults";
|
export * from "./pickRemoteExitNodeDefaults";
|
||||||
export * from "./quickStartRemoteExitNode";
|
export * from "./quickStartRemoteExitNode";
|
||||||
|
export * from "./offlineChecker";
|
||||||
|
|||||||
82
server/private/routers/remoteExitNode/offlineChecker.ts
Normal file
82
server/private/routers/remoteExitNode/offlineChecker.ts
Normal file
@@ -0,0 +1,82 @@
|
|||||||
|
/*
|
||||||
|
* This file is part of a proprietary work.
|
||||||
|
*
|
||||||
|
* Copyright (c) 2025-2026 Fossorial, Inc.
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* This file is licensed under the Fossorial Commercial License.
|
||||||
|
* You may not use this file except in compliance with the License.
|
||||||
|
* Unauthorized use, copying, modification, or distribution is strictly prohibited.
|
||||||
|
*
|
||||||
|
* This file is not licensed under the AGPLv3.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { db, exitNodes } from "@server/db";
|
||||||
|
import { eq, lt, isNull, and, or } from "drizzle-orm";
|
||||||
|
import logger from "@server/logger";
|
||||||
|
|
||||||
|
// Track if the offline checker interval is running
|
||||||
|
let offlineCheckerInterval: NodeJS.Timeout | null = null;
|
||||||
|
const OFFLINE_CHECK_INTERVAL = 30 * 1000; // Check every 30 seconds
|
||||||
|
const OFFLINE_THRESHOLD_MS = 2 * 60 * 1000; // 2 minutes
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Starts the background interval that checks for clients that haven't pinged recently
|
||||||
|
* and marks them as offline
|
||||||
|
*/
|
||||||
|
export const startRemoteExitNodeOfflineChecker = (): void => {
|
||||||
|
if (offlineCheckerInterval) {
|
||||||
|
return; // Already running
|
||||||
|
}
|
||||||
|
|
||||||
|
offlineCheckerInterval = setInterval(async () => {
|
||||||
|
try {
|
||||||
|
const twoMinutesAgo = Math.floor(
|
||||||
|
(Date.now() - OFFLINE_THRESHOLD_MS) / 1000
|
||||||
|
);
|
||||||
|
|
||||||
|
// Find clients that haven't pinged in the last 2 minutes and mark them as offline
|
||||||
|
const offlineNodes = await db
|
||||||
|
.update(exitNodes)
|
||||||
|
.set({ online: false })
|
||||||
|
.where(
|
||||||
|
and(
|
||||||
|
eq(exitNodes.online, true),
|
||||||
|
eq(exitNodes.type, "remoteExitNode"),
|
||||||
|
or(
|
||||||
|
lt(exitNodes.lastPing, twoMinutesAgo),
|
||||||
|
isNull(exitNodes.lastPing)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
.returning();
|
||||||
|
|
||||||
|
if (offlineNodes.length > 0) {
|
||||||
|
logger.info(
|
||||||
|
`checkRemoteExitNodeOffline: Marked ${offlineNodes.length} remoteExitNode client(s) offline due to inactivity`
|
||||||
|
);
|
||||||
|
|
||||||
|
for (const offlineClient of offlineNodes) {
|
||||||
|
logger.debug(
|
||||||
|
`checkRemoteExitNodeOffline: Client ${offlineClient.exitNodeId} marked offline (lastPing: ${offlineClient.lastPing})`
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
logger.error("Error in offline checker interval", { error });
|
||||||
|
}
|
||||||
|
}, OFFLINE_CHECK_INTERVAL);
|
||||||
|
|
||||||
|
logger.debug("Started offline checker interval");
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Stops the background interval that checks for offline clients
|
||||||
|
*/
|
||||||
|
export const stopRemoteExitNodeOfflineChecker = (): void => {
|
||||||
|
if (offlineCheckerInterval) {
|
||||||
|
clearInterval(offlineCheckerInterval);
|
||||||
|
offlineCheckerInterval = null;
|
||||||
|
logger.info("Stopped offline checker interval");
|
||||||
|
}
|
||||||
|
};
|
||||||
@@ -1,184 +1,11 @@
|
|||||||
import { db, newts, sites, targetHealthCheck, targets } from "@server/db";
|
import { db, sites } from "@server/db";
|
||||||
import {
|
import { getClientConfigVersion } from "#dynamic/routers/ws";
|
||||||
hasActiveConnections,
|
|
||||||
getClientConfigVersion
|
|
||||||
} from "#dynamic/routers/ws";
|
|
||||||
import { MessageHandler } from "@server/routers/ws";
|
import { MessageHandler } from "@server/routers/ws";
|
||||||
import { Newt } from "@server/db";
|
import { Newt } from "@server/db";
|
||||||
import { eq, lt, isNull, and, or, ne, not } from "drizzle-orm";
|
import { eq } from "drizzle-orm";
|
||||||
import logger from "@server/logger";
|
import logger from "@server/logger";
|
||||||
import { sendNewtSyncMessage } from "./sync";
|
import { sendNewtSyncMessage } from "./sync";
|
||||||
import { recordPing } from "./pingAccumulator";
|
import { recordPing } from "./pingAccumulator";
|
||||||
import { fireSiteOfflineAlert } from "#dynamic/lib/alerts";
|
|
||||||
|
|
||||||
// Track if the offline checker interval is running
|
|
||||||
let offlineCheckerInterval: NodeJS.Timeout | null = null;
|
|
||||||
const OFFLINE_CHECK_INTERVAL = 30 * 1000; // Check every 30 seconds
|
|
||||||
const OFFLINE_THRESHOLD_MS = 2 * 60 * 1000; // 2 minutes
|
|
||||||
const OFFLINE_THRESHOLD_BANDWIDTH_MS = 8 * 60 * 1000; // 8 minutes
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Starts the background interval that checks for newt sites that haven't
|
|
||||||
* pinged recently and marks them as offline. For backward compatibility,
|
|
||||||
* a site is only marked offline when there is no active WebSocket connection
|
|
||||||
* either — so older newt versions that don't send pings but remain connected
|
|
||||||
* continue to be treated as online.
|
|
||||||
*/
|
|
||||||
export const startNewtOfflineChecker = (): void => {
|
|
||||||
if (offlineCheckerInterval) {
|
|
||||||
return; // Already running
|
|
||||||
}
|
|
||||||
|
|
||||||
offlineCheckerInterval = setInterval(async () => {
|
|
||||||
try {
|
|
||||||
const twoMinutesAgo = Math.floor(
|
|
||||||
(Date.now() - OFFLINE_THRESHOLD_MS) / 1000
|
|
||||||
);
|
|
||||||
|
|
||||||
// Find all online newt-type sites that haven't pinged recently
|
|
||||||
// (or have never pinged at all). Join newts to obtain the newtId
|
|
||||||
// needed for the WebSocket connection check.
|
|
||||||
const staleSites = await db
|
|
||||||
.select({
|
|
||||||
siteId: sites.siteId,
|
|
||||||
orgId: sites.orgId,
|
|
||||||
name: sites.name,
|
|
||||||
newtId: newts.newtId,
|
|
||||||
lastPing: sites.lastPing
|
|
||||||
})
|
|
||||||
.from(sites)
|
|
||||||
.innerJoin(newts, eq(newts.siteId, sites.siteId))
|
|
||||||
.where(
|
|
||||||
and(
|
|
||||||
eq(sites.online, true),
|
|
||||||
eq(sites.type, "newt"),
|
|
||||||
or(
|
|
||||||
lt(sites.lastPing, twoMinutesAgo),
|
|
||||||
isNull(sites.lastPing)
|
|
||||||
)
|
|
||||||
)
|
|
||||||
);
|
|
||||||
|
|
||||||
for (const staleSite of staleSites) {
|
|
||||||
// Backward-compatibility check: if the newt still has an
|
|
||||||
// active WebSocket connection (older clients that don't send
|
|
||||||
// pings), keep the site online.
|
|
||||||
const isConnected = await hasActiveConnections(
|
|
||||||
staleSite.newtId
|
|
||||||
);
|
|
||||||
if (isConnected) {
|
|
||||||
logger.debug(
|
|
||||||
`Newt ${staleSite.newtId} has not pinged recently but is still connected via WebSocket — keeping site ${staleSite.siteId} online`
|
|
||||||
);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
logger.info(
|
|
||||||
`Marking site ${staleSite.siteId} offline: newt ${staleSite.newtId} has no recent ping and no active WebSocket connection`
|
|
||||||
);
|
|
||||||
|
|
||||||
await db
|
|
||||||
.update(sites)
|
|
||||||
.set({ online: false })
|
|
||||||
.where(eq(sites.siteId, staleSite.siteId));
|
|
||||||
|
|
||||||
const healthChecksOnSite = await db
|
|
||||||
.select()
|
|
||||||
.from(targetHealthCheck)
|
|
||||||
.innerJoin(
|
|
||||||
targets,
|
|
||||||
eq(targets.targetId, targetHealthCheck.targetId)
|
|
||||||
)
|
|
||||||
.innerJoin(sites, eq(sites.siteId, targets.siteId))
|
|
||||||
.where(eq(sites.siteId, staleSite.siteId));
|
|
||||||
|
|
||||||
for (const healthCheck of healthChecksOnSite) {
|
|
||||||
logger.info(
|
|
||||||
`Marking health check ${healthCheck.targetHealthCheck.targetHealthCheckId} offline due to site ${staleSite.siteId} being marked offline`
|
|
||||||
);
|
|
||||||
await db
|
|
||||||
.update(targetHealthCheck)
|
|
||||||
.set({ hcHealth: "unknown" })
|
|
||||||
.where(
|
|
||||||
eq(
|
|
||||||
targetHealthCheck.targetHealthCheckId,
|
|
||||||
healthCheck.targetHealthCheck
|
|
||||||
.targetHealthCheckId
|
|
||||||
)
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
await fireSiteOfflineAlert(staleSite.orgId, staleSite.siteId, staleSite.name);
|
|
||||||
}
|
|
||||||
|
|
||||||
// this part only effects self hosted. Its not efficient but we dont expect people to have very many wireguard sites
|
|
||||||
// select all of the wireguard sites to evaluate if they need to be offline due to the last bandwidth update
|
|
||||||
const allWireguardSites = await db
|
|
||||||
.select({
|
|
||||||
siteId: sites.siteId,
|
|
||||||
online: sites.online,
|
|
||||||
lastBandwidthUpdate: sites.lastBandwidthUpdate
|
|
||||||
})
|
|
||||||
.from(sites)
|
|
||||||
.where(
|
|
||||||
and(
|
|
||||||
eq(sites.type, "wireguard"),
|
|
||||||
not(isNull(sites.lastBandwidthUpdate))
|
|
||||||
)
|
|
||||||
);
|
|
||||||
|
|
||||||
const wireguardOfflineThreshold = Math.floor(
|
|
||||||
(Date.now() - OFFLINE_THRESHOLD_BANDWIDTH_MS) / 1000
|
|
||||||
);
|
|
||||||
|
|
||||||
// loop over each one. If its offline and there is a new update then mark it online. If its online and there is no update then mark it offline
|
|
||||||
for (const site of allWireguardSites) {
|
|
||||||
const lastBandwidthUpdate =
|
|
||||||
new Date(site.lastBandwidthUpdate!).getTime() / 1000;
|
|
||||||
if (
|
|
||||||
lastBandwidthUpdate < wireguardOfflineThreshold &&
|
|
||||||
site.online
|
|
||||||
) {
|
|
||||||
logger.info(
|
|
||||||
`Marking wireguard site ${site.siteId} offline: no bandwidth update in over ${OFFLINE_THRESHOLD_BANDWIDTH_MS / 60000} minutes`
|
|
||||||
);
|
|
||||||
|
|
||||||
await db
|
|
||||||
.update(sites)
|
|
||||||
.set({ online: false })
|
|
||||||
.where(eq(sites.siteId, site.siteId));
|
|
||||||
} else if (
|
|
||||||
lastBandwidthUpdate >= wireguardOfflineThreshold &&
|
|
||||||
!site.online
|
|
||||||
) {
|
|
||||||
logger.info(
|
|
||||||
`Marking wireguard site ${site.siteId} online: recent bandwidth update`
|
|
||||||
);
|
|
||||||
|
|
||||||
await db
|
|
||||||
.update(sites)
|
|
||||||
.set({ online: true })
|
|
||||||
.where(eq(sites.siteId, site.siteId));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (error) {
|
|
||||||
logger.error("Error in newt offline checker interval", { error });
|
|
||||||
}
|
|
||||||
}, OFFLINE_CHECK_INTERVAL);
|
|
||||||
|
|
||||||
logger.debug("Started newt offline checker interval");
|
|
||||||
};
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Stops the background interval that checks for offline newt sites.
|
|
||||||
*/
|
|
||||||
export const stopNewtOfflineChecker = (): void => {
|
|
||||||
if (offlineCheckerInterval) {
|
|
||||||
clearInterval(offlineCheckerInterval);
|
|
||||||
offlineCheckerInterval = null;
|
|
||||||
logger.info("Stopped newt offline checker interval");
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Handles ping messages from newt clients.
|
* Handles ping messages from newt clients.
|
||||||
|
|||||||
@@ -11,3 +11,4 @@ export * from "./handleNewtDisconnectingMessage";
|
|||||||
export * from "./handleConnectionLogMessage";
|
export * from "./handleConnectionLogMessage";
|
||||||
export * from "./handleRequestLogMessage";
|
export * from "./handleRequestLogMessage";
|
||||||
export * from "./registerNewt";
|
export * from "./registerNewt";
|
||||||
|
export * from "./offlineChecker";
|
||||||
|
|||||||
176
server/routers/newt/offlineChecker.ts
Normal file
176
server/routers/newt/offlineChecker.ts
Normal file
@@ -0,0 +1,176 @@
|
|||||||
|
import { db, newts, sites, targetHealthCheck, targets } from "@server/db";
|
||||||
|
import {
|
||||||
|
hasActiveConnections,
|
||||||
|
} from "#dynamic/routers/ws";
|
||||||
|
import { eq, lt, isNull, and, or, ne, not } from "drizzle-orm";
|
||||||
|
import logger from "@server/logger";
|
||||||
|
import { fireSiteOfflineAlert } from "#dynamic/lib/alerts";
|
||||||
|
|
||||||
|
// Track if the offline checker interval is running
|
||||||
|
let offlineCheckerInterval: NodeJS.Timeout | null = null;
|
||||||
|
const OFFLINE_CHECK_INTERVAL = 30 * 1000; // Check every 30 seconds
|
||||||
|
const OFFLINE_THRESHOLD_MS = 2 * 60 * 1000; // 2 minutes
|
||||||
|
const OFFLINE_THRESHOLD_BANDWIDTH_MS = 8 * 60 * 1000; // 8 minutes
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Starts the background interval that checks for newt sites that haven't
|
||||||
|
* pinged recently and marks them as offline. For backward compatibility,
|
||||||
|
* a site is only marked offline when there is no active WebSocket connection
|
||||||
|
* either — so older newt versions that don't send pings but remain connected
|
||||||
|
* continue to be treated as online.
|
||||||
|
*/
|
||||||
|
export const startNewtOfflineChecker = (): void => {
|
||||||
|
if (offlineCheckerInterval) {
|
||||||
|
return; // Already running
|
||||||
|
}
|
||||||
|
|
||||||
|
offlineCheckerInterval = setInterval(async () => {
|
||||||
|
try {
|
||||||
|
const twoMinutesAgo = Math.floor(
|
||||||
|
(Date.now() - OFFLINE_THRESHOLD_MS) / 1000
|
||||||
|
);
|
||||||
|
|
||||||
|
// Find all online newt-type sites that haven't pinged recently
|
||||||
|
// (or have never pinged at all). Join newts to obtain the newtId
|
||||||
|
// needed for the WebSocket connection check.
|
||||||
|
const staleSites = await db
|
||||||
|
.select({
|
||||||
|
siteId: sites.siteId,
|
||||||
|
orgId: sites.orgId,
|
||||||
|
name: sites.name,
|
||||||
|
newtId: newts.newtId,
|
||||||
|
lastPing: sites.lastPing
|
||||||
|
})
|
||||||
|
.from(sites)
|
||||||
|
.innerJoin(newts, eq(newts.siteId, sites.siteId))
|
||||||
|
.where(
|
||||||
|
and(
|
||||||
|
eq(sites.online, true),
|
||||||
|
eq(sites.type, "newt"),
|
||||||
|
or(
|
||||||
|
lt(sites.lastPing, twoMinutesAgo),
|
||||||
|
isNull(sites.lastPing)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
for (const staleSite of staleSites) {
|
||||||
|
// Backward-compatibility check: if the newt still has an
|
||||||
|
// active WebSocket connection (older clients that don't send
|
||||||
|
// pings), keep the site online.
|
||||||
|
const isConnected = await hasActiveConnections(
|
||||||
|
staleSite.newtId
|
||||||
|
);
|
||||||
|
if (isConnected) {
|
||||||
|
logger.debug(
|
||||||
|
`Newt ${staleSite.newtId} has not pinged recently but is still connected via WebSocket — keeping site ${staleSite.siteId} online`
|
||||||
|
);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
`Marking site ${staleSite.siteId} offline: newt ${staleSite.newtId} has no recent ping and no active WebSocket connection`
|
||||||
|
);
|
||||||
|
|
||||||
|
await db
|
||||||
|
.update(sites)
|
||||||
|
.set({ online: false })
|
||||||
|
.where(eq(sites.siteId, staleSite.siteId));
|
||||||
|
|
||||||
|
const healthChecksOnSite = await db
|
||||||
|
.select()
|
||||||
|
.from(targetHealthCheck)
|
||||||
|
.innerJoin(
|
||||||
|
targets,
|
||||||
|
eq(targets.targetId, targetHealthCheck.targetId)
|
||||||
|
)
|
||||||
|
.innerJoin(sites, eq(sites.siteId, targets.siteId))
|
||||||
|
.where(eq(sites.siteId, staleSite.siteId));
|
||||||
|
|
||||||
|
for (const healthCheck of healthChecksOnSite) {
|
||||||
|
logger.info(
|
||||||
|
`Marking health check ${healthCheck.targetHealthCheck.targetHealthCheckId} offline due to site ${staleSite.siteId} being marked offline`
|
||||||
|
);
|
||||||
|
await db
|
||||||
|
.update(targetHealthCheck)
|
||||||
|
.set({ hcHealth: "unknown" })
|
||||||
|
.where(
|
||||||
|
eq(
|
||||||
|
targetHealthCheck.targetHealthCheckId,
|
||||||
|
healthCheck.targetHealthCheck
|
||||||
|
.targetHealthCheckId
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
await fireSiteOfflineAlert(staleSite.orgId, staleSite.siteId, staleSite.name);
|
||||||
|
}
|
||||||
|
|
||||||
|
// this part only effects self hosted. Its not efficient but we dont expect people to have very many wireguard sites
|
||||||
|
// select all of the wireguard sites to evaluate if they need to be offline due to the last bandwidth update
|
||||||
|
const allWireguardSites = await db
|
||||||
|
.select({
|
||||||
|
siteId: sites.siteId,
|
||||||
|
online: sites.online,
|
||||||
|
lastBandwidthUpdate: sites.lastBandwidthUpdate
|
||||||
|
})
|
||||||
|
.from(sites)
|
||||||
|
.where(
|
||||||
|
and(
|
||||||
|
eq(sites.type, "wireguard"),
|
||||||
|
not(isNull(sites.lastBandwidthUpdate))
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
const wireguardOfflineThreshold = Math.floor(
|
||||||
|
(Date.now() - OFFLINE_THRESHOLD_BANDWIDTH_MS) / 1000
|
||||||
|
);
|
||||||
|
|
||||||
|
// loop over each one. If its offline and there is a new update then mark it online. If its online and there is no update then mark it offline
|
||||||
|
for (const site of allWireguardSites) {
|
||||||
|
const lastBandwidthUpdate =
|
||||||
|
new Date(site.lastBandwidthUpdate!).getTime() / 1000;
|
||||||
|
if (
|
||||||
|
lastBandwidthUpdate < wireguardOfflineThreshold &&
|
||||||
|
site.online
|
||||||
|
) {
|
||||||
|
logger.info(
|
||||||
|
`Marking wireguard site ${site.siteId} offline: no bandwidth update in over ${OFFLINE_THRESHOLD_BANDWIDTH_MS / 60000} minutes`
|
||||||
|
);
|
||||||
|
|
||||||
|
await db
|
||||||
|
.update(sites)
|
||||||
|
.set({ online: false })
|
||||||
|
.where(eq(sites.siteId, site.siteId));
|
||||||
|
} else if (
|
||||||
|
lastBandwidthUpdate >= wireguardOfflineThreshold &&
|
||||||
|
!site.online
|
||||||
|
) {
|
||||||
|
logger.info(
|
||||||
|
`Marking wireguard site ${site.siteId} online: recent bandwidth update`
|
||||||
|
);
|
||||||
|
|
||||||
|
await db
|
||||||
|
.update(sites)
|
||||||
|
.set({ online: true })
|
||||||
|
.where(eq(sites.siteId, site.siteId));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
logger.error("Error in newt offline checker interval", { error });
|
||||||
|
}
|
||||||
|
}, OFFLINE_CHECK_INTERVAL);
|
||||||
|
|
||||||
|
logger.debug("Started newt offline checker interval");
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Stops the background interval that checks for offline newt sites.
|
||||||
|
*/
|
||||||
|
export const stopNewtOfflineChecker = (): void => {
|
||||||
|
if (offlineCheckerInterval) {
|
||||||
|
clearInterval(offlineCheckerInterval);
|
||||||
|
offlineCheckerInterval = null;
|
||||||
|
logger.info("Stopped newt offline checker interval");
|
||||||
|
}
|
||||||
|
};
|
||||||
@@ -1,104 +1,17 @@
|
|||||||
import { disconnectClient, getClientConfigVersion } from "#dynamic/routers/ws";
|
import { getClientConfigVersion } from "#dynamic/routers/ws";
|
||||||
import { db } from "@server/db";
|
import { db } from "@server/db";
|
||||||
import { MessageHandler } from "@server/routers/ws";
|
import { MessageHandler } from "@server/routers/ws";
|
||||||
import { clients, olms, Olm } from "@server/db";
|
import { clients, Olm } from "@server/db";
|
||||||
import { eq, lt, isNull, and, or } from "drizzle-orm";
|
import { eq } from "drizzle-orm";
|
||||||
import { recordClientPing } from "@server/routers/newt/pingAccumulator";
|
import { recordClientPing } from "@server/routers/newt/pingAccumulator";
|
||||||
import logger from "@server/logger";
|
import logger from "@server/logger";
|
||||||
import { validateSessionToken } from "@server/auth/sessions/app";
|
import { validateSessionToken } from "@server/auth/sessions/app";
|
||||||
import { checkOrgAccessPolicy } from "#dynamic/lib/checkOrgAccessPolicy";
|
import { checkOrgAccessPolicy } from "#dynamic/lib/checkOrgAccessPolicy";
|
||||||
import { sendTerminateClient } from "../client/terminate";
|
|
||||||
import { encodeHexLowerCase } from "@oslojs/encoding";
|
import { encodeHexLowerCase } from "@oslojs/encoding";
|
||||||
import { sha256 } from "@oslojs/crypto/sha2";
|
import { sha256 } from "@oslojs/crypto/sha2";
|
||||||
import { sendOlmSyncMessage } from "./sync";
|
import { sendOlmSyncMessage } from "./sync";
|
||||||
import { OlmErrorCodes } from "./error";
|
|
||||||
import { handleFingerprintInsertion } from "./fingerprintingUtils";
|
import { handleFingerprintInsertion } from "./fingerprintingUtils";
|
||||||
|
|
||||||
// Track if the offline checker interval is running
|
|
||||||
let offlineCheckerInterval: NodeJS.Timeout | null = null;
|
|
||||||
const OFFLINE_CHECK_INTERVAL = 30 * 1000; // Check every 30 seconds
|
|
||||||
const OFFLINE_THRESHOLD_MS = 2 * 60 * 1000; // 2 minutes
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Starts the background interval that checks for clients that haven't pinged recently
|
|
||||||
* and marks them as offline
|
|
||||||
*/
|
|
||||||
export const startOlmOfflineChecker = (): void => {
|
|
||||||
if (offlineCheckerInterval) {
|
|
||||||
return; // Already running
|
|
||||||
}
|
|
||||||
|
|
||||||
offlineCheckerInterval = setInterval(async () => {
|
|
||||||
try {
|
|
||||||
const twoMinutesAgo = Math.floor(
|
|
||||||
(Date.now() - OFFLINE_THRESHOLD_MS) / 1000
|
|
||||||
);
|
|
||||||
|
|
||||||
// TODO: WE NEED TO MAKE SURE THIS WORKS WITH DISTRIBUTED NODES ALL DOING THE SAME THING
|
|
||||||
|
|
||||||
// Find clients that haven't pinged in the last 2 minutes and mark them as offline
|
|
||||||
const offlineClients = await db
|
|
||||||
.update(clients)
|
|
||||||
.set({ online: false })
|
|
||||||
.where(
|
|
||||||
and(
|
|
||||||
eq(clients.online, true),
|
|
||||||
or(
|
|
||||||
lt(clients.lastPing, twoMinutesAgo),
|
|
||||||
isNull(clients.lastPing)
|
|
||||||
)
|
|
||||||
)
|
|
||||||
)
|
|
||||||
.returning();
|
|
||||||
|
|
||||||
for (const offlineClient of offlineClients) {
|
|
||||||
logger.info(
|
|
||||||
`Kicking offline olm client ${offlineClient.clientId} due to inactivity`
|
|
||||||
);
|
|
||||||
|
|
||||||
if (!offlineClient.olmId) {
|
|
||||||
logger.warn(
|
|
||||||
`Offline client ${offlineClient.clientId} has no olmId, cannot disconnect`
|
|
||||||
);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Send a disconnect message to the client if connected
|
|
||||||
try {
|
|
||||||
await sendTerminateClient(
|
|
||||||
offlineClient.clientId,
|
|
||||||
OlmErrorCodes.TERMINATED_INACTIVITY,
|
|
||||||
offlineClient.olmId
|
|
||||||
); // terminate first
|
|
||||||
// wait a moment to ensure the message is sent
|
|
||||||
await new Promise((resolve) => setTimeout(resolve, 1000));
|
|
||||||
await disconnectClient(offlineClient.olmId);
|
|
||||||
} catch (error) {
|
|
||||||
logger.error(
|
|
||||||
`Error sending disconnect to offline olm ${offlineClient.clientId}`,
|
|
||||||
{ error }
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (error) {
|
|
||||||
logger.error("Error in offline checker interval", { error });
|
|
||||||
}
|
|
||||||
}, OFFLINE_CHECK_INTERVAL);
|
|
||||||
|
|
||||||
logger.debug("Started offline checker interval");
|
|
||||||
};
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Stops the background interval that checks for offline clients
|
|
||||||
*/
|
|
||||||
export const stopOlmOfflineChecker = (): void => {
|
|
||||||
if (offlineCheckerInterval) {
|
|
||||||
clearInterval(offlineCheckerInterval);
|
|
||||||
offlineCheckerInterval = null;
|
|
||||||
logger.info("Stopped offline checker interval");
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Handles ping messages from clients and responds with pong
|
* Handles ping messages from clients and responds with pong
|
||||||
*/
|
*/
|
||||||
|
|||||||
@@ -12,3 +12,4 @@ export * from "./handleOlmUnRelayMessage";
|
|||||||
export * from "./recoverOlmWithFingerprint";
|
export * from "./recoverOlmWithFingerprint";
|
||||||
export * from "./handleOlmDisconnectingMessage";
|
export * from "./handleOlmDisconnectingMessage";
|
||||||
export * from "./handleOlmServerInitAddPeerHandshake";
|
export * from "./handleOlmServerInitAddPeerHandshake";
|
||||||
|
export * from "./offlineChecker";
|
||||||
|
|||||||
92
server/routers/olm/offlineChecker.ts
Normal file
92
server/routers/olm/offlineChecker.ts
Normal file
@@ -0,0 +1,92 @@
|
|||||||
|
import { disconnectClient, getClientConfigVersion } from "#dynamic/routers/ws";
|
||||||
|
import { db } from "@server/db";
|
||||||
|
import { clients } from "@server/db";
|
||||||
|
import { eq, lt, isNull, and, or } from "drizzle-orm";
|
||||||
|
import logger from "@server/logger";
|
||||||
|
import { sendTerminateClient } from "../client/terminate";
|
||||||
|
import { OlmErrorCodes } from "./error";
|
||||||
|
|
||||||
|
// Track if the offline checker interval is running
|
||||||
|
let offlineCheckerInterval: NodeJS.Timeout | null = null;
|
||||||
|
const OFFLINE_CHECK_INTERVAL = 30 * 1000; // Check every 30 seconds
|
||||||
|
const OFFLINE_THRESHOLD_MS = 2 * 60 * 1000; // 2 minutes
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Starts the background interval that checks for clients that haven't pinged recently
|
||||||
|
* and marks them as offline
|
||||||
|
*/
|
||||||
|
export const startOlmOfflineChecker = (): void => {
|
||||||
|
if (offlineCheckerInterval) {
|
||||||
|
return; // Already running
|
||||||
|
}
|
||||||
|
|
||||||
|
offlineCheckerInterval = setInterval(async () => {
|
||||||
|
try {
|
||||||
|
const twoMinutesAgo = Math.floor(
|
||||||
|
(Date.now() - OFFLINE_THRESHOLD_MS) / 1000
|
||||||
|
);
|
||||||
|
|
||||||
|
// TODO: WE NEED TO MAKE SURE THIS WORKS WITH DISTRIBUTED NODES ALL DOING THE SAME THING
|
||||||
|
|
||||||
|
// Find clients that haven't pinged in the last 2 minutes and mark them as offline
|
||||||
|
const offlineClients = await db
|
||||||
|
.update(clients)
|
||||||
|
.set({ online: false })
|
||||||
|
.where(
|
||||||
|
and(
|
||||||
|
eq(clients.online, true),
|
||||||
|
or(
|
||||||
|
lt(clients.lastPing, twoMinutesAgo),
|
||||||
|
isNull(clients.lastPing)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
.returning();
|
||||||
|
|
||||||
|
for (const offlineClient of offlineClients) {
|
||||||
|
logger.info(
|
||||||
|
`Kicking offline olm client ${offlineClient.clientId} due to inactivity`
|
||||||
|
);
|
||||||
|
|
||||||
|
if (!offlineClient.olmId) {
|
||||||
|
logger.warn(
|
||||||
|
`Offline client ${offlineClient.clientId} has no olmId, cannot disconnect`
|
||||||
|
);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Send a disconnect message to the client if connected
|
||||||
|
try {
|
||||||
|
await sendTerminateClient(
|
||||||
|
offlineClient.clientId,
|
||||||
|
OlmErrorCodes.TERMINATED_INACTIVITY,
|
||||||
|
offlineClient.olmId
|
||||||
|
); // terminate first
|
||||||
|
// wait a moment to ensure the message is sent
|
||||||
|
await new Promise((resolve) => setTimeout(resolve, 1000));
|
||||||
|
await disconnectClient(offlineClient.olmId);
|
||||||
|
} catch (error) {
|
||||||
|
logger.error(
|
||||||
|
`Error sending disconnect to offline olm ${offlineClient.clientId}`,
|
||||||
|
{ error }
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
logger.error("Error in offline checker interval", { error });
|
||||||
|
}
|
||||||
|
}, OFFLINE_CHECK_INTERVAL);
|
||||||
|
|
||||||
|
logger.debug("Started offline checker interval");
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Stops the background interval that checks for offline clients
|
||||||
|
*/
|
||||||
|
export const stopOlmOfflineChecker = (): void => {
|
||||||
|
if (offlineCheckerInterval) {
|
||||||
|
clearInterval(offlineCheckerInterval);
|
||||||
|
offlineCheckerInterval = null;
|
||||||
|
logger.info("Stopped offline checker interval");
|
||||||
|
}
|
||||||
|
};
|
||||||
Reference in New Issue
Block a user