WindsurfAPI / src /sanitize.js
github-actions[bot]
Deploy from GitHub: 7495fde758f0be655f95e6331fec2898267f790c
f6266b9
/**
* Strip server-internal filesystem paths from model output before it reaches
* the API caller.
*
* Background: Cascade's baked-in system context tells the model its workspace
* lives at /tmp/windsurf-workspace. Even after we removed CascadeToolConfig
* .run_command (see windsurf.js buildCascadeConfig) the model still
* (a) narrates "I'll look at /tmp/windsurf-workspace/config.yaml" in plain
* text, and
* (b) occasionally emits built-in edit_file / view_file / list_directory
* trajectory steps whose argumentsJson references these paths.
* Both routes leak the proxy's internal filesystem layout to API callers.
*
* This module provides two scrubbers:
* - sanitizeText(s) β€” one-shot, use on accumulated buffers
* - PathSanitizeStream β€” incremental, use on streaming chunks
*
* The streaming version holds back any tail that could be an incomplete
* prefix of a sensitive literal OR a match-in-progress whose path-tail hasn't
* hit a terminator yet, so a path cannot slip through by straddling a chunk
* boundary.
*/
// Literal prefixes that must never appear in output. First-match wins in the
// order given. The workspace literal is replaced with "." so text like
// "/tmp/windsurf-workspace/foo.py" becomes "./foo.py" (still readable). The
// other two go to "[internal]" β€” no reason a caller should ever see them.
const PATTERNS = [
[/\/tmp\/windsurf-workspace(\/[^\s"'`<>)}\],*;]*)?/g, '.$1'],
[/\/opt\/windsurf(?:\/[^\s"'`<>)}\],*;]*)?/g, '[internal]'],
[/\/root\/WindsurfAPI(?:\/[^\s"'`<>)}\],*;]*)?/g, '[internal]'],
];
// Bare literals (no path tail) used by the streaming cut-point finder.
const SENSITIVE_LITERALS = [
'/tmp/windsurf-workspace',
'/opt/windsurf',
'/root/WindsurfAPI',
];
// Character class that counts as part of a path body. Mirrors the PATTERNS
// regex char class so cut-point detection matches replacement behaviour.
const PATH_BODY_RE = /[^\s"'`<>)}\],*;]/;
/**
* Apply all path redactions to `s` in one pass. Safe to call on any string;
* non-strings and empty strings are returned unchanged.
*/
export function sanitizeText(s) {
if (typeof s !== 'string' || !s) return s;
let out = s;
for (const [re, rep] of PATTERNS) out = out.replace(re, rep);
return out;
}
/**
* Incremental sanitizer for streamed deltas.
*
* Usage:
* const stream = new PathSanitizeStream();
* for (const chunk of deltas) emit(stream.feed(chunk));
* emit(stream.flush());
*
* The returned string from feed()/flush() is guaranteed to contain no
* sensitive literal. Any trailing text that COULD extend into a sensitive
* literal (either as a partial prefix or as an unterminated path tail) is
* held internally until the next feed or the flush.
*/
export class PathSanitizeStream {
constructor() {
this.buffer = '';
}
feed(delta) {
if (!delta) return '';
this.buffer += delta;
const cut = this._safeCutPoint();
if (cut === 0) return '';
const safeRegion = this.buffer.slice(0, cut);
this.buffer = this.buffer.slice(cut);
return sanitizeText(safeRegion);
}
// Largest index into this.buffer such that buffer[0:cut] contains no
// match that could extend past `cut`. Two conditions back off the cut:
// (1) a full sensitive literal was found but its path body ran to the
// end of the buffer β€” the next delta might append more path chars,
// in which case the fully-rendered path would differ. Hold from the
// literal's start.
// (2) the buffer tail is itself a proper prefix of a sensitive literal
// (e.g., ends with "/tmp/win") β€” the next delta might complete it.
// Hold from that tail start.
_safeCutPoint() {
const buf = this.buffer;
const len = buf.length;
let cut = len;
// (1) unterminated full literal
for (const lit of SENSITIVE_LITERALS) {
let searchFrom = 0;
while (searchFrom < len) {
const idx = buf.indexOf(lit, searchFrom);
if (idx === -1) break;
let end = idx + lit.length;
while (end < len && PATH_BODY_RE.test(buf[end])) end++;
if (end === len) {
if (idx < cut) cut = idx;
break;
}
searchFrom = end + 1;
}
}
// (2) partial-prefix tail
for (const lit of SENSITIVE_LITERALS) {
const maxLen = Math.min(lit.length - 1, len);
for (let plen = maxLen; plen > 0; plen--) {
if (buf.endsWith(lit.slice(0, plen))) {
const start = len - plen;
if (start < cut) cut = start;
break;
}
}
}
return cut;
}
flush() {
const out = sanitizeText(this.buffer);
this.buffer = '';
return out;
}
}
/**
* Sanitize a native Cascade tool call (built-in tools like edit_file /
* view_file) before surfacing to the client. Scrubs argumentsJson and
* result. Not used on the hot path today β€” handlers/chat.js drops all
* native tool calls in non-emulation mode rather than risking leakage β€”
* but kept here for opt-in use.
*/
export function sanitizeToolCall(tc) {
if (!tc) return tc;
return {
...tc,
argumentsJson: sanitizeText(tc.argumentsJson || ''),
result: sanitizeText(tc.result || ''),
};
}