Spaces:
Sleeping
Sleeping
| /** | |
| * Strip server-internal filesystem paths from model output before it reaches | |
| * the API caller. | |
| * | |
| * Background: Cascade's baked-in system context tells the model its workspace | |
| * lives at /tmp/windsurf-workspace. Even after we removed CascadeToolConfig | |
| * .run_command (see windsurf.js buildCascadeConfig) the model still | |
| * (a) narrates "I'll look at /tmp/windsurf-workspace/config.yaml" in plain | |
| * text, and | |
| * (b) occasionally emits built-in edit_file / view_file / list_directory | |
| * trajectory steps whose argumentsJson references these paths. | |
| * Both routes leak the proxy's internal filesystem layout to API callers. | |
| * | |
| * This module provides two scrubbers: | |
| * - sanitizeText(s) β one-shot, use on accumulated buffers | |
| * - PathSanitizeStream β incremental, use on streaming chunks | |
| * | |
| * The streaming version holds back any tail that could be an incomplete | |
| * prefix of a sensitive literal OR a match-in-progress whose path-tail hasn't | |
| * hit a terminator yet, so a path cannot slip through by straddling a chunk | |
| * boundary. | |
| */ | |
| // Literal prefixes that must never appear in output. First-match wins in the | |
| // order given. The workspace literal is replaced with "." so text like | |
| // "/tmp/windsurf-workspace/foo.py" becomes "./foo.py" (still readable). The | |
| // other two go to "[internal]" β no reason a caller should ever see them. | |
| const PATTERNS = [ | |
| [/\/tmp\/windsurf-workspace(\/[^\s"'`<>)}\],*;]*)?/g, '.$1'], | |
| [/\/opt\/windsurf(?:\/[^\s"'`<>)}\],*;]*)?/g, '[internal]'], | |
| [/\/root\/WindsurfAPI(?:\/[^\s"'`<>)}\],*;]*)?/g, '[internal]'], | |
| ]; | |
| // Bare literals (no path tail) used by the streaming cut-point finder. | |
| const SENSITIVE_LITERALS = [ | |
| '/tmp/windsurf-workspace', | |
| '/opt/windsurf', | |
| '/root/WindsurfAPI', | |
| ]; | |
| // Character class that counts as part of a path body. Mirrors the PATTERNS | |
| // regex char class so cut-point detection matches replacement behaviour. | |
| const PATH_BODY_RE = /[^\s"'`<>)}\],*;]/; | |
| /** | |
| * Apply all path redactions to `s` in one pass. Safe to call on any string; | |
| * non-strings and empty strings are returned unchanged. | |
| */ | |
| export function sanitizeText(s) { | |
| if (typeof s !== 'string' || !s) return s; | |
| let out = s; | |
| for (const [re, rep] of PATTERNS) out = out.replace(re, rep); | |
| return out; | |
| } | |
| /** | |
| * Incremental sanitizer for streamed deltas. | |
| * | |
| * Usage: | |
| * const stream = new PathSanitizeStream(); | |
| * for (const chunk of deltas) emit(stream.feed(chunk)); | |
| * emit(stream.flush()); | |
| * | |
| * The returned string from feed()/flush() is guaranteed to contain no | |
| * sensitive literal. Any trailing text that COULD extend into a sensitive | |
| * literal (either as a partial prefix or as an unterminated path tail) is | |
| * held internally until the next feed or the flush. | |
| */ | |
| export class PathSanitizeStream { | |
| constructor() { | |
| this.buffer = ''; | |
| } | |
| feed(delta) { | |
| if (!delta) return ''; | |
| this.buffer += delta; | |
| const cut = this._safeCutPoint(); | |
| if (cut === 0) return ''; | |
| const safeRegion = this.buffer.slice(0, cut); | |
| this.buffer = this.buffer.slice(cut); | |
| return sanitizeText(safeRegion); | |
| } | |
| // Largest index into this.buffer such that buffer[0:cut] contains no | |
| // match that could extend past `cut`. Two conditions back off the cut: | |
| // (1) a full sensitive literal was found but its path body ran to the | |
| // end of the buffer β the next delta might append more path chars, | |
| // in which case the fully-rendered path would differ. Hold from the | |
| // literal's start. | |
| // (2) the buffer tail is itself a proper prefix of a sensitive literal | |
| // (e.g., ends with "/tmp/win") β the next delta might complete it. | |
| // Hold from that tail start. | |
| _safeCutPoint() { | |
| const buf = this.buffer; | |
| const len = buf.length; | |
| let cut = len; | |
| // (1) unterminated full literal | |
| for (const lit of SENSITIVE_LITERALS) { | |
| let searchFrom = 0; | |
| while (searchFrom < len) { | |
| const idx = buf.indexOf(lit, searchFrom); | |
| if (idx === -1) break; | |
| let end = idx + lit.length; | |
| while (end < len && PATH_BODY_RE.test(buf[end])) end++; | |
| if (end === len) { | |
| if (idx < cut) cut = idx; | |
| break; | |
| } | |
| searchFrom = end + 1; | |
| } | |
| } | |
| // (2) partial-prefix tail | |
| for (const lit of SENSITIVE_LITERALS) { | |
| const maxLen = Math.min(lit.length - 1, len); | |
| for (let plen = maxLen; plen > 0; plen--) { | |
| if (buf.endsWith(lit.slice(0, plen))) { | |
| const start = len - plen; | |
| if (start < cut) cut = start; | |
| break; | |
| } | |
| } | |
| } | |
| return cut; | |
| } | |
| flush() { | |
| const out = sanitizeText(this.buffer); | |
| this.buffer = ''; | |
| return out; | |
| } | |
| } | |
| /** | |
| * Sanitize a native Cascade tool call (built-in tools like edit_file / | |
| * view_file) before surfacing to the client. Scrubs argumentsJson and | |
| * result. Not used on the hot path today β handlers/chat.js drops all | |
| * native tool calls in non-emulation mode rather than risking leakage β | |
| * but kept here for opt-in use. | |
| */ | |
| export function sanitizeToolCall(tc) { | |
| if (!tc) return tc; | |
| return { | |
| ...tc, | |
| argumentsJson: sanitizeText(tc.argumentsJson || ''), | |
| result: sanitizeText(tc.result || ''), | |
| }; | |
| } | |