Spaces:

darkfire514
/

OpenClawBot

Paused

App Files Files Community

OpenClawBot / src /auto-reply /chunk.ts

darkfire514

Upload 2526 files

fb4d8fe verified about 1 month ago

raw

history blame contribute delete

15.2 kB

	// Utilities for splitting outbound text into platform-sized chunks without
	// unintentionally breaking on newlines. Using [\s\S] keeps newlines inside
	// the chunk so messages are only split when they truly exceed the limit.

	import type { ChannelId } from "../channels/plugins/types.js";
	import type { OpenClawConfig } from "../config/config.js";
	import { findFenceSpanAt, isSafeFenceBreak, parseFenceSpans } from "../markdown/fences.js";
	import { normalizeAccountId } from "../routing/session-key.js";
	import { INTERNAL_MESSAGE_CHANNEL } from "../utils/message-channel.js";

	export type TextChunkProvider = ChannelId \| typeof INTERNAL_MESSAGE_CHANNEL;

	/**
	* Chunking mode for outbound messages:
	* - "length": Split only when exceeding textChunkLimit (default)
	* - "newline": Prefer breaking on "soft" boundaries. Historically this split on every
	* newline; now it only breaks on paragraph boundaries (blank lines) unless the text
	* exceeds the length limit.
	*/
	export type ChunkMode = "length" \| "newline";

	const DEFAULT_CHUNK_LIMIT = 4000;
	const DEFAULT_CHUNK_MODE: ChunkMode = "length";

	type ProviderChunkConfig = {
	textChunkLimit?: number;
	chunkMode?: ChunkMode;
	accounts?: Record<string, { textChunkLimit?: number; chunkMode?: ChunkMode }>;
	};

	function resolveChunkLimitForProvider(
	cfgSection: ProviderChunkConfig \| undefined,
	accountId?: string \| null,
	): number \| undefined {
	if (!cfgSection) {
	return undefined;
	}
	const normalizedAccountId = normalizeAccountId(accountId);
	const accounts = cfgSection.accounts;
	if (accounts && typeof accounts === "object") {
	const direct = accounts[normalizedAccountId];
	if (typeof direct?.textChunkLimit === "number") {
	return direct.textChunkLimit;
	}
	const matchKey = Object.keys(accounts).find(
	(key) => key.toLowerCase() === normalizedAccountId.toLowerCase(),
	);
	const match = matchKey ? accounts[matchKey] : undefined;
	if (typeof match?.textChunkLimit === "number") {
	return match.textChunkLimit;
	}
	}
	return cfgSection.textChunkLimit;
	}

	export function resolveTextChunkLimit(
	cfg: OpenClawConfig \| undefined,
	provider?: TextChunkProvider,
	accountId?: string \| null,
	opts?: { fallbackLimit?: number },
	): number {
	const fallback =
	typeof opts?.fallbackLimit === "number" && opts.fallbackLimit > 0
	? opts.fallbackLimit
	: DEFAULT_CHUNK_LIMIT;
	const providerOverride = (() => {
	if (!provider \|\| provider === INTERNAL_MESSAGE_CHANNEL) {
	return undefined;
	}
	const channelsConfig = cfg?.channels as Record<string, unknown> \| undefined;
	const providerConfig = (channelsConfig?.[provider] ??
	(cfg as Record<string, unknown> \| undefined)?.[provider]) as ProviderChunkConfig \| undefined;
	return resolveChunkLimitForProvider(providerConfig, accountId);
	})();
	if (typeof providerOverride === "number" && providerOverride > 0) {
	return providerOverride;
	}
	return fallback;
	}

	function resolveChunkModeForProvider(
	cfgSection: ProviderChunkConfig \| undefined,
	accountId?: string \| null,
	): ChunkMode \| undefined {
	if (!cfgSection) {
	return undefined;
	}
	const normalizedAccountId = normalizeAccountId(accountId);
	const accounts = cfgSection.accounts;
	if (accounts && typeof accounts === "object") {
	const direct = accounts[normalizedAccountId];
	if (direct?.chunkMode) {
	return direct.chunkMode;
	}
	const matchKey = Object.keys(accounts).find(
	(key) => key.toLowerCase() === normalizedAccountId.toLowerCase(),
	);
	const match = matchKey ? accounts[matchKey] : undefined;
	if (match?.chunkMode) {
	return match.chunkMode;
	}
	}
	return cfgSection.chunkMode;
	}

	export function resolveChunkMode(
	cfg: OpenClawConfig \| undefined,
	provider?: TextChunkProvider,
	accountId?: string \| null,
	): ChunkMode {
	if (!provider \|\| provider === INTERNAL_MESSAGE_CHANNEL) {
	return DEFAULT_CHUNK_MODE;
	}
	const channelsConfig = cfg?.channels as Record<string, unknown> \| undefined;
	const providerConfig = (channelsConfig?.[provider] ??
	(cfg as Record<string, unknown> \| undefined)?.[provider]) as ProviderChunkConfig \| undefined;
	const mode = resolveChunkModeForProvider(providerConfig, accountId);
	return mode ?? DEFAULT_CHUNK_MODE;
	}

	/**
	* Split text on newlines, trimming line whitespace.
	* Blank lines are folded into the next non-empty line as leading "\n" prefixes.
	* Long lines can be split by length (default) or kept intact via splitLongLines:false.
	*/
	export function chunkByNewline(
	text: string,
	maxLineLength: number,
	opts?: {
	splitLongLines?: boolean;
	trimLines?: boolean;
	isSafeBreak?: (index: number) => boolean;
	},
	): string[] {
	if (!text) {
	return [];
	}
	if (maxLineLength <= 0) {
	return text.trim() ? [text] : [];
	}
	const splitLongLines = opts?.splitLongLines !== false;
	const trimLines = opts?.trimLines !== false;
	const lines = splitByNewline(text, opts?.isSafeBreak);
	const chunks: string[] = [];
	let pendingBlankLines = 0;

	for (const line of lines) {
	const trimmed = line.trim();
	if (!trimmed) {
	pendingBlankLines += 1;
	continue;
	}

	const maxPrefix = Math.max(0, maxLineLength - 1);
	const cappedBlankLines = pendingBlankLines > 0 ? Math.min(pendingBlankLines, maxPrefix) : 0;
	const prefix = cappedBlankLines > 0 ? "\n".repeat(cappedBlankLines) : "";
	pendingBlankLines = 0;

	const lineValue = trimLines ? trimmed : line;
	if (!splitLongLines \|\| lineValue.length + prefix.length <= maxLineLength) {
	chunks.push(prefix + lineValue);
	continue;
	}

	const firstLimit = Math.max(1, maxLineLength - prefix.length);
	const first = lineValue.slice(0, firstLimit);
	chunks.push(prefix + first);
	const remaining = lineValue.slice(firstLimit);
	if (remaining) {
	chunks.push(...chunkText(remaining, maxLineLength));
	}
	}

	if (pendingBlankLines > 0 && chunks.length > 0) {
	chunks[chunks.length - 1] += "\n".repeat(pendingBlankLines);
	}

	return chunks;
	}

	/**
	* Split text into chunks on paragraph boundaries (blank lines), preserving lists and
	* single-newline line wraps inside paragraphs.
	*
	* - Only breaks at paragraph separators ("\n\n" or more, allowing whitespace on blank lines)
	* - Packs multiple paragraphs into a single chunk up to `limit`
	* - Falls back to length-based splitting when a single paragraph exceeds `limit`
	* (unless `splitLongParagraphs` is disabled)
	*/
	export function chunkByParagraph(
	text: string,
	limit: number,
	opts?: { splitLongParagraphs?: boolean },
	): string[] {
	if (!text) {
	return [];
	}
	if (limit <= 0) {
	return [text];
	}
	const splitLongParagraphs = opts?.splitLongParagraphs !== false;

	// Normalize to \n so blank line detection is consistent.
	const normalized = text.replace(/\r\n?/g, "\n");

	// Fast-path: if there are no blank-line paragraph separators, do not split.
	// (We do not early-return based on `limit` — newline mode is about paragraph
	// boundaries, not only exceeding a length limit.)
	const paragraphRe = /\n[\t ]*\n+/;
	if (!paragraphRe.test(normalized)) {
	if (normalized.length <= limit) {
	return [normalized];
	}
	if (!splitLongParagraphs) {
	return [normalized];
	}
	return chunkText(normalized, limit);
	}

	const spans = parseFenceSpans(normalized);

	const parts: string[] = [];
	const re = /\n[\t ]*\n+/g; // paragraph break: blank line(s), allowing whitespace
	let lastIndex = 0;
	for (const match of normalized.matchAll(re)) {
	const idx = match.index ?? 0;

	// Do not split on blank lines that occur inside fenced code blocks.
	if (!isSafeFenceBreak(spans, idx)) {
	continue;
	}

	parts.push(normalized.slice(lastIndex, idx));
	lastIndex = idx + match[0].length;
	}
	parts.push(normalized.slice(lastIndex));

	const chunks: string[] = [];
	for (const part of parts) {
	const paragraph = part.replace(/\s+$/g, "");
	if (!paragraph.trim()) {
	continue;
	}
	if (paragraph.length <= limit) {
	chunks.push(paragraph);
	} else if (!splitLongParagraphs) {
	chunks.push(paragraph);
	} else {
	chunks.push(...chunkText(paragraph, limit));
	}
	}

	return chunks;
	}

	/**
	* Unified chunking function that dispatches based on mode.
	*/
	export function chunkTextWithMode(text: string, limit: number, mode: ChunkMode): string[] {
	if (mode === "newline") {
	return chunkByParagraph(text, limit);
	}
	return chunkText(text, limit);
	}

	export function chunkMarkdownTextWithMode(text: string, limit: number, mode: ChunkMode): string[] {
	if (mode === "newline") {
	// Paragraph chunking is fence-safe because we never split at arbitrary indices.
	// If a paragraph must be split by length, defer to the markdown-aware chunker.
	const paragraphChunks = chunkByParagraph(text, limit, { splitLongParagraphs: false });
	const out: string[] = [];
	for (const chunk of paragraphChunks) {
	const nested = chunkMarkdownText(chunk, limit);
	if (!nested.length && chunk) {
	out.push(chunk);
	} else {
	out.push(...nested);
	}
	}
	return out;
	}
	return chunkMarkdownText(text, limit);
	}

	function splitByNewline(
	text: string,
	isSafeBreak: (index: number) => boolean = () => true,
	): string[] {
	const lines: string[] = [];
	let start = 0;
	for (let i = 0; i < text.length; i++) {
	if (text[i] === "\n" && isSafeBreak(i)) {
	lines.push(text.slice(start, i));
	start = i + 1;
	}
	}
	lines.push(text.slice(start));
	return lines;
	}

	export function chunkText(text: string, limit: number): string[] {
	if (!text) {
	return [];
	}
	if (limit <= 0) {
	return [text];
	}
	if (text.length <= limit) {
	return [text];
	}

	const chunks: string[] = [];
	let remaining = text;

	while (remaining.length > limit) {
	const window = remaining.slice(0, limit);

	// 1) Prefer a newline break inside the window (outside parentheses).
	const { lastNewline, lastWhitespace } = scanParenAwareBreakpoints(window);

	// 2) Otherwise prefer the last whitespace (word boundary) inside the window.
	let breakIdx = lastNewline > 0 ? lastNewline : lastWhitespace;

	// 3) Fallback: hard break exactly at the limit.
	if (breakIdx <= 0) {
	breakIdx = limit;
	}

	const rawChunk = remaining.slice(0, breakIdx);
	const chunk = rawChunk.trimEnd();
	if (chunk.length > 0) {
	chunks.push(chunk);
	}

	// If we broke on whitespace/newline, skip that separator; for hard breaks keep it.
	const brokeOnSeparator = breakIdx < remaining.length && /\s/.test(remaining[breakIdx]);
	const nextStart = Math.min(remaining.length, breakIdx + (brokeOnSeparator ? 1 : 0));
	remaining = remaining.slice(nextStart).trimStart();
	}

	if (remaining.length) {
	chunks.push(remaining);
	}

	return chunks;
	}

	export function chunkMarkdownText(text: string, limit: number): string[] {
	if (!text) {
	return [];
	}
	if (limit <= 0) {
	return [text];
	}
	if (text.length <= limit) {
	return [text];
	}

	const chunks: string[] = [];
	let remaining = text;

	while (remaining.length > limit) {
	const spans = parseFenceSpans(remaining);
	const window = remaining.slice(0, limit);

	const softBreak = pickSafeBreakIndex(window, spans);
	let breakIdx = softBreak > 0 ? softBreak : limit;

	const initialFence = isSafeFenceBreak(spans, breakIdx)
	? undefined
	: findFenceSpanAt(spans, breakIdx);

	let fenceToSplit = initialFence;
	if (initialFence) {
	const closeLine = `${initialFence.indent}${initialFence.marker}`;
	const maxIdxIfNeedNewline = limit - (closeLine.length + 1);

	if (maxIdxIfNeedNewline <= 0) {
	fenceToSplit = undefined;
	breakIdx = limit;
	} else {
	const minProgressIdx = Math.min(
	remaining.length,
	initialFence.start + initialFence.openLine.length + 2,
	);
	const maxIdxIfAlreadyNewline = limit - closeLine.length;

	let pickedNewline = false;
	let lastNewline = remaining.lastIndexOf("\n", Math.max(0, maxIdxIfAlreadyNewline - 1));
	while (lastNewline !== -1) {
	const candidateBreak = lastNewline + 1;
	if (candidateBreak < minProgressIdx) {
	break;
	}
	const candidateFence = findFenceSpanAt(spans, candidateBreak);
	if (candidateFence && candidateFence.start === initialFence.start) {
	breakIdx = Math.max(1, candidateBreak);
	pickedNewline = true;
	break;
	}
	lastNewline = remaining.lastIndexOf("\n", lastNewline - 1);
	}

	if (!pickedNewline) {
	if (minProgressIdx > maxIdxIfAlreadyNewline) {
	fenceToSplit = undefined;
	breakIdx = limit;
	} else {
	breakIdx = Math.max(minProgressIdx, maxIdxIfNeedNewline);
	}
	}
	}

	const fenceAtBreak = findFenceSpanAt(spans, breakIdx);
	fenceToSplit =
	fenceAtBreak && fenceAtBreak.start === initialFence.start ? fenceAtBreak : undefined;
	}

	let rawChunk = remaining.slice(0, breakIdx);
	if (!rawChunk) {
	break;
	}

	const brokeOnSeparator = breakIdx < remaining.length && /\s/.test(remaining[breakIdx]);
	const nextStart = Math.min(remaining.length, breakIdx + (brokeOnSeparator ? 1 : 0));
	let next = remaining.slice(nextStart);

	if (fenceToSplit) {
	const closeLine = `${fenceToSplit.indent}${fenceToSplit.marker}`;
	rawChunk = rawChunk.endsWith("\n") ? `${rawChunk}${closeLine}` : `${rawChunk}\n${closeLine}`;
	next = `${fenceToSplit.openLine}\n${next}`;
	} else {
	next = stripLeadingNewlines(next);
	}

	chunks.push(rawChunk);
	remaining = next;
	}

	if (remaining.length) {
	chunks.push(remaining);
	}
	return chunks;
	}

	function stripLeadingNewlines(value: string): string {
	let i = 0;
	while (i < value.length && value[i] === "\n") {
	i++;
	}
	return i > 0 ? value.slice(i) : value;
	}

	function pickSafeBreakIndex(window: string, spans: ReturnType<typeof parseFenceSpans>): number {
	const { lastNewline, lastWhitespace } = scanParenAwareBreakpoints(window, (index) =>
	isSafeFenceBreak(spans, index),
	);

	if (lastNewline > 0) {
	return lastNewline;
	}
	if (lastWhitespace > 0) {
	return lastWhitespace;
	}
	return -1;
	}

	function scanParenAwareBreakpoints(
	window: string,
	isAllowed: (index: number) => boolean = () => true,
	): { lastNewline: number; lastWhitespace: number } {
	let lastNewline = -1;
	let lastWhitespace = -1;
	let depth = 0;

	for (let i = 0; i < window.length; i++) {
	if (!isAllowed(i)) {
	continue;
	}
	const char = window[i];
	if (char === "(") {
	depth += 1;
	continue;
	}
	if (char === ")" && depth > 0) {
	depth -= 1;
	continue;
	}
	if (depth !== 0) {
	continue;
	}
	if (char === "\n") {
	lastNewline = i;
	} else if (/\s/.test(char)) {
	lastWhitespace = i;
	}
	}

	return { lastNewline, lastWhitespace };
	}