Files
pangolin/server/private/lib/logStreaming/LogStreamingManager.ts
2026-03-31 12:22:37 -07:00

626 lines
21 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/*
* This file is part of a proprietary work.
*
* Copyright (c) 2025 Fossorial, Inc.
* All rights reserved.
*
* This file is licensed under the Fossorial Commercial License.
* You may not use this file except in compliance with the License.
* Unauthorized use, copying, modification, or distribution is strictly prohibited.
*
* This file is not licensed under the AGPLv3.
*/
import {
db,
logsDb,
eventStreamingDestinations,
eventStreamingCursors,
requestAuditLog,
actionAuditLog,
accessAuditLog,
connectionAuditLog
} from "@server/db";
import logger from "@server/logger";
import { and, eq, gt, desc, max, sql } from "drizzle-orm";
import {
LogType,
LOG_TYPES,
LogEvent,
DestinationFailureState,
HttpConfig
} from "./types";
import { LogDestinationProvider } from "./providers/LogDestinationProvider";
import { HttpLogDestination } from "./providers/HttpLogDestination";
import type { EventStreamingDestination } from "@server/db";
// ---------------------------------------------------------------------------
// Configuration
// ---------------------------------------------------------------------------
/**
* How often (ms) the manager polls all destinations for new log records.
* Destinations that were behind (full batch returned) will be re-polled
* immediately without waiting for this interval.
*/
const POLL_INTERVAL_MS = 30_000;
/**
* Maximum number of log records fetched from the DB in a single query.
* This also controls the maximum size of one HTTP POST body.
*/
const BATCH_SIZE = 250;
/**
* Minimum delay (ms) between consecutive HTTP requests to the same destination
* during a catch-up run. Prevents bursting thousands of requests back-to-back
* when a destination has fallen behind.
*/
const INTER_BATCH_DELAY_MS = 100;
/**
* Maximum number of consecutive back-to-back batches to process for a single
* destination per poll cycle. After this limit the destination will wait for
* the next scheduled poll before continuing, giving other destinations a turn.
*/
const MAX_CATCHUP_BATCHES = 20;
/**
* Back-off schedule (ms) indexed by consecutive failure count.
* After the last entry the max value is re-used.
*/
const BACKOFF_SCHEDULE_MS = [
60_000, // 1 min (failure 1)
2 * 60_000, // 2 min (failure 2)
5 * 60_000, // 5 min (failure 3)
10 * 60_000, // 10 min (failure 4)
30 * 60_000 // 30 min (failure 5+)
];
// ---------------------------------------------------------------------------
// LogStreamingManager
// ---------------------------------------------------------------------------
/**
* Orchestrates periodic polling of the four audit-log tables and forwards new
* records to every enabled event-streaming destination.
*
* ### Design
* - **Interval-based**: a timer fires every `POLL_INTERVAL_MS`. On each tick
* every enabled destination is processed in sequence.
* - **Cursor-based**: the last successfully forwarded row `id` is persisted in
* the `eventStreamingCursors` table so state survives restarts.
* - **Catch-up**: if a full batch is returned the destination is immediately
* re-queried (up to `MAX_CATCHUP_BATCHES` times) before yielding.
* - **Smoothing**: `INTER_BATCH_DELAY_MS` is inserted between consecutive
* catch-up batches to avoid hammering the remote endpoint.
* - **Back-off**: consecutive send failures trigger exponential back-off
* (tracked in-memory per destination). Successful sends reset the counter.
*/
export class LogStreamingManager {
private pollTimer: ReturnType<typeof setTimeout> | null = null;
private isRunning = false;
private isPolling = false;
/** In-memory back-off state keyed by destinationId. */
private readonly failures = new Map<number, DestinationFailureState>();
// -------------------------------------------------------------------------
// Lifecycle
// -------------------------------------------------------------------------
start(): void {
if (this.isRunning) return;
this.isRunning = true;
logger.info("LogStreamingManager: started");
this.schedulePoll(POLL_INTERVAL_MS);
}
async shutdown(): Promise<void> {
this.isRunning = false;
if (this.pollTimer !== null) {
clearTimeout(this.pollTimer);
this.pollTimer = null;
}
// Wait for any in-progress poll to finish before returning so that
// callers (graceful-shutdown handlers) can safely exit afterward.
const deadline = Date.now() + 15_000;
while (this.isPolling && Date.now() < deadline) {
await sleep(100);
}
logger.info("LogStreamingManager: stopped");
}
// -------------------------------------------------------------------------
// Scheduling
// -------------------------------------------------------------------------
private schedulePoll(delayMs: number): void {
this.pollTimer = setTimeout(() => {
this.pollTimer = null;
this.runPoll()
.catch((err) =>
logger.error("LogStreamingManager: unexpected poll error", err)
)
.finally(() => {
if (this.isRunning) {
this.schedulePoll(POLL_INTERVAL_MS);
}
});
}, delayMs);
// Do not keep the event loop alive just for the poll timer the
// graceful-shutdown path calls shutdown() explicitly.
this.pollTimer.unref?.();
}
// -------------------------------------------------------------------------
// Poll cycle
// -------------------------------------------------------------------------
private async runPoll(): Promise<void> {
if (this.isPolling) return; // previous poll still running skip
this.isPolling = true;
try {
const destinations = await this.loadEnabledDestinations();
if (destinations.length === 0) return;
for (const dest of destinations) {
if (!this.isRunning) break;
await this.processDestination(dest).catch((err) => {
// Individual destination errors must never abort the whole cycle
logger.error(
`LogStreamingManager: unhandled error for destination ${dest.destinationId}`,
err
);
});
}
} finally {
this.isPolling = false;
}
}
// -------------------------------------------------------------------------
// Per-destination processing
// -------------------------------------------------------------------------
private async processDestination(
dest: EventStreamingDestination
): Promise<void> {
// Check back-off
const failState = this.failures.get(dest.destinationId);
if (failState && Date.now() < failState.nextRetryAt) {
logger.debug(
`LogStreamingManager: destination ${dest.destinationId} in back-off, skipping`
);
return;
}
// Parse config skip destination if config is unparseable
let config: HttpConfig;
try {
config = JSON.parse(dest.config) as HttpConfig;
} catch (err) {
logger.error(
`LogStreamingManager: destination ${dest.destinationId} has invalid JSON config`,
err
);
return;
}
const provider = this.createProvider(dest.type, config);
if (!provider) {
logger.warn(
`LogStreamingManager: unsupported destination type "${dest.type}" ` +
`for destination ${dest.destinationId} skipping`
);
return;
}
const enabledTypes: LogType[] = [];
if (dest.sendRequestLogs) enabledTypes.push("request");
if (dest.sendActionLogs) enabledTypes.push("action");
if (dest.sendAccessLogs) enabledTypes.push("access");
if (dest.sendConnectionLogs) enabledTypes.push("connection");
if (enabledTypes.length === 0) return;
let anyFailure = false;
for (const logType of enabledTypes) {
if (!this.isRunning) break;
try {
await this.processLogType(dest, provider, logType);
} catch (err) {
anyFailure = true;
logger.error(
`LogStreamingManager: failed to process "${logType}" logs ` +
`for destination ${dest.destinationId}`,
err
);
}
}
if (anyFailure) {
this.recordFailure(dest.destinationId);
} else {
// Any success resets the failure/back-off state
if (this.failures.has(dest.destinationId)) {
this.failures.delete(dest.destinationId);
logger.info(
`LogStreamingManager: destination ${dest.destinationId} recovered`
);
}
}
}
/**
* Forward all pending log records of a specific type for a destination.
*
* Fetches up to `BATCH_SIZE` records at a time. If the batch is full
* (indicating more records may exist) it loops immediately, inserting a
* short delay between consecutive requests to the remote endpoint.
* The loop is capped at `MAX_CATCHUP_BATCHES` to keep the poll cycle
* bounded.
*/
private async processLogType(
dest: EventStreamingDestination,
provider: LogDestinationProvider,
logType: LogType
): Promise<void> {
// Ensure a cursor row exists (creates one pointing at the current max
// id so we do not replay historical logs on first run)
const cursor = await this.getOrCreateCursor(
dest.destinationId,
logType,
dest.orgId
);
let lastSentId = cursor.lastSentId;
let batchCount = 0;
while (batchCount < MAX_CATCHUP_BATCHES) {
const rows = await this.fetchLogs(
logType,
dest.orgId,
lastSentId,
BATCH_SIZE
);
if (rows.length === 0) break;
const events = rows.map((row) =>
this.rowToLogEvent(logType, row)
);
// Throws on failure caught by the caller which applies back-off
await provider.send(events);
lastSentId = rows[rows.length - 1].id;
await this.updateCursor(dest.destinationId, logType, lastSentId);
batchCount++;
if (rows.length < BATCH_SIZE) {
// Partial batch means we have caught up
break;
}
// Full batch there are likely more records; pause briefly before
// fetching the next batch to smooth out the HTTP request rate
if (batchCount < MAX_CATCHUP_BATCHES) {
await sleep(INTER_BATCH_DELAY_MS);
}
}
}
// -------------------------------------------------------------------------
// Cursor management
// -------------------------------------------------------------------------
private async getOrCreateCursor(
destinationId: number,
logType: LogType,
orgId: string
): Promise<{ lastSentId: number }> {
// Try to read an existing cursor
const existing = await db
.select({
lastSentId: eventStreamingCursors.lastSentId
})
.from(eventStreamingCursors)
.where(
and(
eq(eventStreamingCursors.destinationId, destinationId),
eq(eventStreamingCursors.logType, logType)
)
)
.limit(1);
if (existing.length > 0) {
return { lastSentId: existing[0].lastSentId };
}
// No cursor yet initialise at the current maximum id of the log
// table for this org so we only forward *new* events going forward.
const initialId = await this.getCurrentMaxId(logType, orgId);
await db.insert(eventStreamingCursors).values({
destinationId,
logType,
lastSentId: initialId,
lastSentAt: null
});
logger.debug(
`LogStreamingManager: initialised cursor for destination ${destinationId} ` +
`logType="${logType}" at id=${initialId}`
);
return { lastSentId: initialId };
}
private async updateCursor(
destinationId: number,
logType: LogType,
lastSentId: number
): Promise<void> {
await db
.update(eventStreamingCursors)
.set({
lastSentId,
lastSentAt: Date.now()
})
.where(
and(
eq(eventStreamingCursors.destinationId, destinationId),
eq(eventStreamingCursors.logType, logType)
)
);
}
/**
* Returns the current maximum `id` in the given log table for the org.
* Returns 0 when the table is empty.
*/
private async getCurrentMaxId(
logType: LogType,
orgId: string
): Promise<number> {
try {
switch (logType) {
case "request": {
const [row] = await logsDb
.select({ maxId: max(requestAuditLog.id) })
.from(requestAuditLog)
.where(eq(requestAuditLog.orgId, orgId));
return row?.maxId ?? 0;
}
case "action": {
const [row] = await logsDb
.select({ maxId: max(actionAuditLog.id) })
.from(actionAuditLog)
.where(eq(actionAuditLog.orgId, orgId));
return row?.maxId ?? 0;
}
case "access": {
const [row] = await logsDb
.select({ maxId: max(accessAuditLog.id) })
.from(accessAuditLog)
.where(eq(accessAuditLog.orgId, orgId));
return row?.maxId ?? 0;
}
case "connection": {
const [row] = await logsDb
.select({ maxId: max(connectionAuditLog.id) })
.from(connectionAuditLog)
.where(eq(connectionAuditLog.orgId, orgId));
return row?.maxId ?? 0;
}
}
} catch (err) {
logger.warn(
`LogStreamingManager: could not determine current max id for ` +
`logType="${logType}", defaulting to 0`,
err
);
return 0;
}
}
// -------------------------------------------------------------------------
// Log fetching
// -------------------------------------------------------------------------
/**
* Fetch up to `limit` log rows with `id > afterId`, ordered by id ASC,
* filtered to the given organisation.
*/
private async fetchLogs(
logType: LogType,
orgId: string,
afterId: number,
limit: number
): Promise<Array<Record<string, unknown> & { id: number }>> {
switch (logType) {
case "request":
return (await logsDb
.select()
.from(requestAuditLog)
.where(
and(
eq(requestAuditLog.orgId, orgId),
gt(requestAuditLog.id, afterId)
)
)
.orderBy(requestAuditLog.id)
.limit(limit)) as Array<
Record<string, unknown> & { id: number }
>;
case "action":
return (await logsDb
.select()
.from(actionAuditLog)
.where(
and(
eq(actionAuditLog.orgId, orgId),
gt(actionAuditLog.id, afterId)
)
)
.orderBy(actionAuditLog.id)
.limit(limit)) as Array<
Record<string, unknown> & { id: number }
>;
case "access":
return (await logsDb
.select()
.from(accessAuditLog)
.where(
and(
eq(accessAuditLog.orgId, orgId),
gt(accessAuditLog.id, afterId)
)
)
.orderBy(accessAuditLog.id)
.limit(limit)) as Array<
Record<string, unknown> & { id: number }
>;
case "connection":
return (await logsDb
.select()
.from(connectionAuditLog)
.where(
and(
eq(connectionAuditLog.orgId, orgId),
gt(connectionAuditLog.id, afterId)
)
)
.orderBy(connectionAuditLog.id)
.limit(limit)) as Array<
Record<string, unknown> & { id: number }
>;
}
}
// -------------------------------------------------------------------------
// Row → LogEvent conversion
// -------------------------------------------------------------------------
private rowToLogEvent(
logType: LogType,
row: Record<string, unknown> & { id: number }
): LogEvent {
// Determine the epoch-seconds timestamp for this row type
let timestamp: number;
switch (logType) {
case "request":
case "action":
case "access":
timestamp =
typeof row.timestamp === "number" ? row.timestamp : 0;
break;
case "connection":
timestamp =
typeof row.startedAt === "number" ? row.startedAt : 0;
break;
}
const orgId =
typeof row.orgId === "string" ? row.orgId : "";
return {
id: row.id,
logType,
orgId,
timestamp,
data: row as Record<string, unknown>
};
}
// -------------------------------------------------------------------------
// Provider factory
// -------------------------------------------------------------------------
/**
* Instantiate the correct LogDestinationProvider for the given destination
* type string. Returns `null` for unknown types.
*
* To add a new provider:
* 1. Implement `LogDestinationProvider` in a new file under `providers/`
* 2. Add a `case` here
*/
private createProvider(
type: string,
config: unknown
): LogDestinationProvider | null {
switch (type) {
case "http":
return new HttpLogDestination(config as HttpConfig);
// Future providers:
// case "datadog": return new DatadogLogDestination(config as DatadogConfig);
default:
return null;
}
}
// -------------------------------------------------------------------------
// Back-off tracking
// -------------------------------------------------------------------------
private recordFailure(destinationId: number): void {
const current = this.failures.get(destinationId) ?? {
consecutiveFailures: 0,
nextRetryAt: 0
};
current.consecutiveFailures += 1;
const scheduleIdx = Math.min(
current.consecutiveFailures - 1,
BACKOFF_SCHEDULE_MS.length - 1
);
const backoffMs = BACKOFF_SCHEDULE_MS[scheduleIdx];
current.nextRetryAt = Date.now() + backoffMs;
this.failures.set(destinationId, current);
logger.warn(
`LogStreamingManager: destination ${destinationId} failed ` +
`(consecutive #${current.consecutiveFailures}), ` +
`backing off for ${backoffMs / 1000}s`
);
}
// -------------------------------------------------------------------------
// DB helpers
// -------------------------------------------------------------------------
private async loadEnabledDestinations(): Promise<
EventStreamingDestination[]
> {
try {
return await db
.select()
.from(eventStreamingDestinations)
.where(eq(eventStreamingDestinations.enabled, true));
} catch (err) {
logger.error(
"LogStreamingManager: failed to load destinations",
err
);
return [];
}
}
}
// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------
function sleep(ms: number): Promise<void> {
return new Promise((resolve) => setTimeout(resolve, ms));
}