mirror of
https://github.com/fosrl/pangolin.git
synced 2026-03-21 10:06:38 +00:00
Extend santize into hybrid
This commit is contained in:
40
server/lib/sanitize.ts
Normal file
40
server/lib/sanitize.ts
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
/**
|
||||||
|
* Sanitize a string field before inserting into a database TEXT column.
|
||||||
|
*
|
||||||
|
* Two passes are applied:
|
||||||
|
*
|
||||||
|
* 1. Lone UTF-16 surrogates – JavaScript strings can hold unpaired surrogates
|
||||||
|
* (e.g. \uD800 without a following \uDC00-\uDFFF codepoint). These are
|
||||||
|
* valid in JS but cannot be encoded as UTF-8, triggering
|
||||||
|
* `report_invalid_encoding` in SQLite / Postgres. They are replaced with
|
||||||
|
* the Unicode replacement character U+FFFD so the data is preserved as a
|
||||||
|
* visible signal that something was malformed.
|
||||||
|
*
|
||||||
|
* 2. Null bytes and C0 control characters – SQLite stores TEXT as
|
||||||
|
* null-terminated C strings, so \x00 in a value causes
|
||||||
|
* `report_invalid_encoding`. Bots and scanners routinely inject null bytes
|
||||||
|
* into URLs (e.g. `/path\u0000.jpg`). All C0 control characters in the
|
||||||
|
* range \x00-\x1F are stripped except for the three that are legitimate in
|
||||||
|
* text payloads: HT (\x09), LF (\x0A), and CR (\x0D). DEL (\x7F) is also
|
||||||
|
* stripped.
|
||||||
|
*/
|
||||||
|
export function sanitizeString(value: string): string;
|
||||||
|
export function sanitizeString(
|
||||||
|
value: string | null | undefined
|
||||||
|
): string | undefined;
|
||||||
|
export function sanitizeString(
|
||||||
|
value: string | null | undefined
|
||||||
|
): string | undefined {
|
||||||
|
if (value == null) return undefined;
|
||||||
|
return (
|
||||||
|
value
|
||||||
|
// Replace lone high surrogates (not followed by a low surrogate)
|
||||||
|
// and lone low surrogates (not preceded by a high surrogate).
|
||||||
|
.replace(
|
||||||
|
/[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?<![\uD800-\uDBFF])[\uDC00-\uDFFF]/g,
|
||||||
|
"\uFFFD"
|
||||||
|
)
|
||||||
|
// Strip null bytes, C0 control chars (except HT/LF/CR), and DEL.
|
||||||
|
.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, "")
|
||||||
|
);
|
||||||
|
}
|
||||||
@@ -81,6 +81,7 @@ import { verifyResourceAccessToken } from "@server/auth/verifyResourceAccessToke
|
|||||||
import semver from "semver";
|
import semver from "semver";
|
||||||
import { maxmindAsnLookup } from "@server/db/maxmindAsn";
|
import { maxmindAsnLookup } from "@server/db/maxmindAsn";
|
||||||
import { checkOrgAccessPolicy } from "@server/lib/checkOrgAccessPolicy";
|
import { checkOrgAccessPolicy } from "@server/lib/checkOrgAccessPolicy";
|
||||||
|
import { sanitizeString } from "@server/lib/sanitize";
|
||||||
|
|
||||||
// Zod schemas for request validation
|
// Zod schemas for request validation
|
||||||
const getResourceByDomainParamsSchema = z.strictObject({
|
const getResourceByDomainParamsSchema = z.strictObject({
|
||||||
@@ -1859,24 +1860,24 @@ hybridRouter.post(
|
|||||||
})
|
})
|
||||||
.map((logEntry) => ({
|
.map((logEntry) => ({
|
||||||
timestamp: logEntry.timestamp,
|
timestamp: logEntry.timestamp,
|
||||||
orgId: logEntry.orgId,
|
orgId: sanitizeString(logEntry.orgId),
|
||||||
actorType: logEntry.actorType,
|
actorType: sanitizeString(logEntry.actorType),
|
||||||
actor: logEntry.actor,
|
actor: sanitizeString(logEntry.actor),
|
||||||
actorId: logEntry.actorId,
|
actorId: sanitizeString(logEntry.actorId),
|
||||||
metadata: logEntry.metadata,
|
metadata: sanitizeString(logEntry.metadata),
|
||||||
action: logEntry.action,
|
action: logEntry.action,
|
||||||
resourceId: logEntry.resourceId,
|
resourceId: logEntry.resourceId,
|
||||||
reason: logEntry.reason,
|
reason: logEntry.reason,
|
||||||
location: logEntry.location,
|
location: sanitizeString(logEntry.location),
|
||||||
// userAgent: data.userAgent, // TODO: add this
|
// userAgent: data.userAgent, // TODO: add this
|
||||||
// headers: data.body.headers,
|
// headers: data.body.headers,
|
||||||
// query: data.body.query,
|
// query: data.body.query,
|
||||||
originalRequestURL: logEntry.originalRequestURL,
|
originalRequestURL: sanitizeString(logEntry.originalRequestURL) ?? "",
|
||||||
scheme: logEntry.scheme,
|
scheme: sanitizeString(logEntry.scheme) ?? "",
|
||||||
host: logEntry.host,
|
host: sanitizeString(logEntry.host) ?? "",
|
||||||
path: logEntry.path,
|
path: sanitizeString(logEntry.path) ?? "",
|
||||||
method: logEntry.method,
|
method: sanitizeString(logEntry.method) ?? "",
|
||||||
ip: logEntry.ip,
|
ip: sanitizeString(logEntry.ip),
|
||||||
tls: logEntry.tls
|
tls: logEntry.tls
|
||||||
}));
|
}));
|
||||||
|
|
||||||
|
|||||||
@@ -5,25 +5,7 @@ import cache from "#dynamic/lib/cache";
|
|||||||
import { calculateCutoffTimestamp } from "@server/lib/cleanupLogs";
|
import { calculateCutoffTimestamp } from "@server/lib/cleanupLogs";
|
||||||
import { stripPortFromHost } from "@server/lib/ip";
|
import { stripPortFromHost } from "@server/lib/ip";
|
||||||
|
|
||||||
/**
|
import { sanitizeString } from "@server/lib/sanitize";
|
||||||
* Sanitize a string field by replacing lone UTF-16 surrogates (which cannot
|
|
||||||
* be encoded as valid UTF-8) with the Unicode replacement character, and
|
|
||||||
* stripping ASCII control characters that are invalid in most text columns.
|
|
||||||
*/
|
|
||||||
function sanitizeString(value: string | undefined | null): string | undefined {
|
|
||||||
if (value == null) return undefined;
|
|
||||||
return (
|
|
||||||
value
|
|
||||||
// Replace lone high surrogates (not followed by a low surrogate)
|
|
||||||
// and lone low surrogates (not preceded by a high surrogate)
|
|
||||||
.replace(
|
|
||||||
/[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?<![\uD800-\uDBFF])[\uDC00-\uDFFF]/g,
|
|
||||||
"\uFFFD"
|
|
||||||
)
|
|
||||||
// Strip C0 control characters except HT (\x09), LF (\x0A), CR (\x0D)
|
|
||||||
.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, "")
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user