Extend santize into hybrid

2026-03-21 10:06:38 +00:00 · 2026-03-20 14:31:12 -07:00
parent ce2704fc1a
commit 0c4d9ea164
3 changed files with 54 additions and 31 deletions
--- a/server/lib/sanitize.ts
+++ b/server/lib/sanitize.ts
@@ -0,0 +1,40 @@
 /**
 * Sanitize a string field before inserting into a database TEXT column.
 *
 * Two passes are applied:
 *
 * 1. Lone UTF-16 surrogates – JavaScript strings can hold unpaired surrogates
 *    (e.g. \uD800 without a following \uDC00-\uDFFF codepoint). These are
 *    valid in JS but cannot be encoded as UTF-8, triggering
 *    `report_invalid_encoding` in SQLite / Postgres. They are replaced with
 *    the Unicode replacement character U+FFFD so the data is preserved as a
 *    visible signal that something was malformed.
 *
 * 2. Null bytes and C0 control characters – SQLite stores TEXT as
 *    null-terminated C strings, so \x00 in a value causes
 *    `report_invalid_encoding`. Bots and scanners routinely inject null bytes
 *    into URLs (e.g. `/path\u0000.jpg`). All C0 control characters in the
 *    range \x00-\x1F are stripped except for the three that are legitimate in
 *    text payloads: HT (\x09), LF (\x0A), and CR (\x0D). DEL (\x7F) is also
 *    stripped.
 */
 export function sanitizeString(value: string): string;
 export function sanitizeString(
    value: string | null | undefined
 ): string | undefined;
 export function sanitizeString(
    value: string | null | undefined
 ): string | undefined {
    if (value == null) return undefined;
    return (
        value
            // Replace lone high surrogates (not followed by a low surrogate)
            // and lone low surrogates (not preceded by a high surrogate).
            .replace(
                /[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?<![\uD800-\uDBFF])[\uDC00-\uDFFF]/g,
                "\uFFFD"
            )
            // Strip null bytes, C0 control chars (except HT/LF/CR), and DEL.
            .replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, "")
    );
 }
--- a/server/private/routers/hybrid.ts
+++ b/server/private/routers/hybrid.ts
@@ -81,6 +81,7 @@ import { verifyResourceAccessToken } from "@server/auth/verifyResourceAccessToke
 import semver from "semver";
 import { maxmindAsnLookup } from "@server/db/maxmindAsn";
 import { checkOrgAccessPolicy } from "@server/lib/checkOrgAccessPolicy";
 import { sanitizeString } from "@server/lib/sanitize";
 // Zod schemas for request validation
 const getResourceByDomainParamsSchema = z.strictObject({
@@ -1859,24 +1860,24 @@ hybridRouter.post(
                })
                .map((logEntry) => ({
                    timestamp: logEntry.timestamp,
-                    orgId: logEntry.orgId,
+                    orgId: sanitizeString(logEntry.orgId),
-                    actorType: logEntry.actorType,
+                    actorType: sanitizeString(logEntry.actorType),
-                    actor: logEntry.actor,
+                    actor: sanitizeString(logEntry.actor),
-                    actorId: logEntry.actorId,
+                    actorId: sanitizeString(logEntry.actorId),
-                    metadata: logEntry.metadata,
+                    metadata: sanitizeString(logEntry.metadata),
                    action: logEntry.action,
                    resourceId: logEntry.resourceId,
                    reason: logEntry.reason,
-                    location: logEntry.location,
+                    location: sanitizeString(logEntry.location),
                    // userAgent: data.userAgent, // TODO: add this
                    // headers: data.body.headers,
                    // query: data.body.query,
-                    originalRequestURL: logEntry.originalRequestURL,
+                    originalRequestURL: sanitizeString(logEntry.originalRequestURL) ?? "",
-                    scheme: logEntry.scheme,
+                    scheme: sanitizeString(logEntry.scheme) ?? "",
-                    host: logEntry.host,
+                    host: sanitizeString(logEntry.host) ?? "",
-                    path: logEntry.path,
+                    path: sanitizeString(logEntry.path) ?? "",
-                    method: logEntry.method,
+                    method: sanitizeString(logEntry.method) ?? "",
-                    ip: logEntry.ip,
+                    ip: sanitizeString(logEntry.ip),
                    tls: logEntry.tls
                }));
--- a/server/routers/badger/logRequestAudit.ts
+++ b/server/routers/badger/logRequestAudit.ts
@@ -5,25 +5,7 @@ import cache from "#dynamic/lib/cache";
 import { calculateCutoffTimestamp } from "@server/lib/cleanupLogs";
 import { stripPortFromHost } from "@server/lib/ip";
-/**
+import { sanitizeString } from "@server/lib/sanitize";
 * Sanitize a string field by replacing lone UTF-16 surrogates (which cannot
 * be encoded as valid UTF-8) with the Unicode replacement character, and
 * stripping ASCII control characters that are invalid in most text columns.
 */
 function sanitizeString(value: string | undefined | null): string | undefined {
    if (value == null) return undefined;
    return (
        value
            // Replace lone high surrogates (not followed by a low surrogate)
            // and lone low surrogates (not preceded by a high surrogate)
            .replace(
                /[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?<![\uD800-\uDBFF])[\uDC00-\uDFFF]/g,
                "\uFFFD"
            )
            // Strip C0 control characters except HT (\x09), LF (\x0A), CR (\x0D)
            .replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, "")
    );
 }
 /**