Spaces:
Paused
Paused
| // Utilities for splitting outbound text into platform-sized chunks without | |
| // unintentionally breaking on newlines. Using [\s\S] keeps newlines inside | |
| // the chunk so messages are only split when they truly exceed the limit. | |
| import type { ChannelId } from "../channels/plugins/types.js"; | |
| import type { OpenClawConfig } from "../config/config.js"; | |
| import { findFenceSpanAt, isSafeFenceBreak, parseFenceSpans } from "../markdown/fences.js"; | |
| import { normalizeAccountId } from "../routing/session-key.js"; | |
| import { INTERNAL_MESSAGE_CHANNEL } from "../utils/message-channel.js"; | |
| export type TextChunkProvider = ChannelId | typeof INTERNAL_MESSAGE_CHANNEL; | |
| /** | |
| * Chunking mode for outbound messages: | |
| * - "length": Split only when exceeding textChunkLimit (default) | |
| * - "newline": Prefer breaking on "soft" boundaries. Historically this split on every | |
| * newline; now it only breaks on paragraph boundaries (blank lines) unless the text | |
| * exceeds the length limit. | |
| */ | |
| export type ChunkMode = "length" | "newline"; | |
| const DEFAULT_CHUNK_LIMIT = 4000; | |
| const DEFAULT_CHUNK_MODE: ChunkMode = "length"; | |
| type ProviderChunkConfig = { | |
| textChunkLimit?: number; | |
| chunkMode?: ChunkMode; | |
| accounts?: Record<string, { textChunkLimit?: number; chunkMode?: ChunkMode }>; | |
| }; | |
| function resolveChunkLimitForProvider( | |
| cfgSection: ProviderChunkConfig | undefined, | |
| accountId?: string | null, | |
| ): number | undefined { | |
| if (!cfgSection) { | |
| return undefined; | |
| } | |
| const normalizedAccountId = normalizeAccountId(accountId); | |
| const accounts = cfgSection.accounts; | |
| if (accounts && typeof accounts === "object") { | |
| const direct = accounts[normalizedAccountId]; | |
| if (typeof direct?.textChunkLimit === "number") { | |
| return direct.textChunkLimit; | |
| } | |
| const matchKey = Object.keys(accounts).find( | |
| (key) => key.toLowerCase() === normalizedAccountId.toLowerCase(), | |
| ); | |
| const match = matchKey ? accounts[matchKey] : undefined; | |
| if (typeof match?.textChunkLimit === "number") { | |
| return match.textChunkLimit; | |
| } | |
| } | |
| return cfgSection.textChunkLimit; | |
| } | |
| export function resolveTextChunkLimit( | |
| cfg: OpenClawConfig | undefined, | |
| provider?: TextChunkProvider, | |
| accountId?: string | null, | |
| opts?: { fallbackLimit?: number }, | |
| ): number { | |
| const fallback = | |
| typeof opts?.fallbackLimit === "number" && opts.fallbackLimit > 0 | |
| ? opts.fallbackLimit | |
| : DEFAULT_CHUNK_LIMIT; | |
| const providerOverride = (() => { | |
| if (!provider || provider === INTERNAL_MESSAGE_CHANNEL) { | |
| return undefined; | |
| } | |
| const channelsConfig = cfg?.channels as Record<string, unknown> | undefined; | |
| const providerConfig = (channelsConfig?.[provider] ?? | |
| (cfg as Record<string, unknown> | undefined)?.[provider]) as ProviderChunkConfig | undefined; | |
| return resolveChunkLimitForProvider(providerConfig, accountId); | |
| })(); | |
| if (typeof providerOverride === "number" && providerOverride > 0) { | |
| return providerOverride; | |
| } | |
| return fallback; | |
| } | |
| function resolveChunkModeForProvider( | |
| cfgSection: ProviderChunkConfig | undefined, | |
| accountId?: string | null, | |
| ): ChunkMode | undefined { | |
| if (!cfgSection) { | |
| return undefined; | |
| } | |
| const normalizedAccountId = normalizeAccountId(accountId); | |
| const accounts = cfgSection.accounts; | |
| if (accounts && typeof accounts === "object") { | |
| const direct = accounts[normalizedAccountId]; | |
| if (direct?.chunkMode) { | |
| return direct.chunkMode; | |
| } | |
| const matchKey = Object.keys(accounts).find( | |
| (key) => key.toLowerCase() === normalizedAccountId.toLowerCase(), | |
| ); | |
| const match = matchKey ? accounts[matchKey] : undefined; | |
| if (match?.chunkMode) { | |
| return match.chunkMode; | |
| } | |
| } | |
| return cfgSection.chunkMode; | |
| } | |
| export function resolveChunkMode( | |
| cfg: OpenClawConfig | undefined, | |
| provider?: TextChunkProvider, | |
| accountId?: string | null, | |
| ): ChunkMode { | |
| if (!provider || provider === INTERNAL_MESSAGE_CHANNEL) { | |
| return DEFAULT_CHUNK_MODE; | |
| } | |
| const channelsConfig = cfg?.channels as Record<string, unknown> | undefined; | |
| const providerConfig = (channelsConfig?.[provider] ?? | |
| (cfg as Record<string, unknown> | undefined)?.[provider]) as ProviderChunkConfig | undefined; | |
| const mode = resolveChunkModeForProvider(providerConfig, accountId); | |
| return mode ?? DEFAULT_CHUNK_MODE; | |
| } | |
| /** | |
| * Split text on newlines, trimming line whitespace. | |
| * Blank lines are folded into the next non-empty line as leading "\n" prefixes. | |
| * Long lines can be split by length (default) or kept intact via splitLongLines:false. | |
| */ | |
| export function chunkByNewline( | |
| text: string, | |
| maxLineLength: number, | |
| opts?: { | |
| splitLongLines?: boolean; | |
| trimLines?: boolean; | |
| isSafeBreak?: (index: number) => boolean; | |
| }, | |
| ): string[] { | |
| if (!text) { | |
| return []; | |
| } | |
| if (maxLineLength <= 0) { | |
| return text.trim() ? [text] : []; | |
| } | |
| const splitLongLines = opts?.splitLongLines !== false; | |
| const trimLines = opts?.trimLines !== false; | |
| const lines = splitByNewline(text, opts?.isSafeBreak); | |
| const chunks: string[] = []; | |
| let pendingBlankLines = 0; | |
| for (const line of lines) { | |
| const trimmed = line.trim(); | |
| if (!trimmed) { | |
| pendingBlankLines += 1; | |
| continue; | |
| } | |
| const maxPrefix = Math.max(0, maxLineLength - 1); | |
| const cappedBlankLines = pendingBlankLines > 0 ? Math.min(pendingBlankLines, maxPrefix) : 0; | |
| const prefix = cappedBlankLines > 0 ? "\n".repeat(cappedBlankLines) : ""; | |
| pendingBlankLines = 0; | |
| const lineValue = trimLines ? trimmed : line; | |
| if (!splitLongLines || lineValue.length + prefix.length <= maxLineLength) { | |
| chunks.push(prefix + lineValue); | |
| continue; | |
| } | |
| const firstLimit = Math.max(1, maxLineLength - prefix.length); | |
| const first = lineValue.slice(0, firstLimit); | |
| chunks.push(prefix + first); | |
| const remaining = lineValue.slice(firstLimit); | |
| if (remaining) { | |
| chunks.push(...chunkText(remaining, maxLineLength)); | |
| } | |
| } | |
| if (pendingBlankLines > 0 && chunks.length > 0) { | |
| chunks[chunks.length - 1] += "\n".repeat(pendingBlankLines); | |
| } | |
| return chunks; | |
| } | |
| /** | |
| * Split text into chunks on paragraph boundaries (blank lines), preserving lists and | |
| * single-newline line wraps inside paragraphs. | |
| * | |
| * - Only breaks at paragraph separators ("\n\n" or more, allowing whitespace on blank lines) | |
| * - Packs multiple paragraphs into a single chunk up to `limit` | |
| * - Falls back to length-based splitting when a single paragraph exceeds `limit` | |
| * (unless `splitLongParagraphs` is disabled) | |
| */ | |
| export function chunkByParagraph( | |
| text: string, | |
| limit: number, | |
| opts?: { splitLongParagraphs?: boolean }, | |
| ): string[] { | |
| if (!text) { | |
| return []; | |
| } | |
| if (limit <= 0) { | |
| return [text]; | |
| } | |
| const splitLongParagraphs = opts?.splitLongParagraphs !== false; | |
| // Normalize to \n so blank line detection is consistent. | |
| const normalized = text.replace(/\r\n?/g, "\n"); | |
| // Fast-path: if there are no blank-line paragraph separators, do not split. | |
| // (We *do not* early-return based on `limit` — newline mode is about paragraph | |
| // boundaries, not only exceeding a length limit.) | |
| const paragraphRe = /\n[\t ]*\n+/; | |
| if (!paragraphRe.test(normalized)) { | |
| if (normalized.length <= limit) { | |
| return [normalized]; | |
| } | |
| if (!splitLongParagraphs) { | |
| return [normalized]; | |
| } | |
| return chunkText(normalized, limit); | |
| } | |
| const spans = parseFenceSpans(normalized); | |
| const parts: string[] = []; | |
| const re = /\n[\t ]*\n+/g; // paragraph break: blank line(s), allowing whitespace | |
| let lastIndex = 0; | |
| for (const match of normalized.matchAll(re)) { | |
| const idx = match.index ?? 0; | |
| // Do not split on blank lines that occur inside fenced code blocks. | |
| if (!isSafeFenceBreak(spans, idx)) { | |
| continue; | |
| } | |
| parts.push(normalized.slice(lastIndex, idx)); | |
| lastIndex = idx + match[0].length; | |
| } | |
| parts.push(normalized.slice(lastIndex)); | |
| const chunks: string[] = []; | |
| for (const part of parts) { | |
| const paragraph = part.replace(/\s+$/g, ""); | |
| if (!paragraph.trim()) { | |
| continue; | |
| } | |
| if (paragraph.length <= limit) { | |
| chunks.push(paragraph); | |
| } else if (!splitLongParagraphs) { | |
| chunks.push(paragraph); | |
| } else { | |
| chunks.push(...chunkText(paragraph, limit)); | |
| } | |
| } | |
| return chunks; | |
| } | |
| /** | |
| * Unified chunking function that dispatches based on mode. | |
| */ | |
| export function chunkTextWithMode(text: string, limit: number, mode: ChunkMode): string[] { | |
| if (mode === "newline") { | |
| return chunkByParagraph(text, limit); | |
| } | |
| return chunkText(text, limit); | |
| } | |
| export function chunkMarkdownTextWithMode(text: string, limit: number, mode: ChunkMode): string[] { | |
| if (mode === "newline") { | |
| // Paragraph chunking is fence-safe because we never split at arbitrary indices. | |
| // If a paragraph must be split by length, defer to the markdown-aware chunker. | |
| const paragraphChunks = chunkByParagraph(text, limit, { splitLongParagraphs: false }); | |
| const out: string[] = []; | |
| for (const chunk of paragraphChunks) { | |
| const nested = chunkMarkdownText(chunk, limit); | |
| if (!nested.length && chunk) { | |
| out.push(chunk); | |
| } else { | |
| out.push(...nested); | |
| } | |
| } | |
| return out; | |
| } | |
| return chunkMarkdownText(text, limit); | |
| } | |
| function splitByNewline( | |
| text: string, | |
| isSafeBreak: (index: number) => boolean = () => true, | |
| ): string[] { | |
| const lines: string[] = []; | |
| let start = 0; | |
| for (let i = 0; i < text.length; i++) { | |
| if (text[i] === "\n" && isSafeBreak(i)) { | |
| lines.push(text.slice(start, i)); | |
| start = i + 1; | |
| } | |
| } | |
| lines.push(text.slice(start)); | |
| return lines; | |
| } | |
| export function chunkText(text: string, limit: number): string[] { | |
| if (!text) { | |
| return []; | |
| } | |
| if (limit <= 0) { | |
| return [text]; | |
| } | |
| if (text.length <= limit) { | |
| return [text]; | |
| } | |
| const chunks: string[] = []; | |
| let remaining = text; | |
| while (remaining.length > limit) { | |
| const window = remaining.slice(0, limit); | |
| // 1) Prefer a newline break inside the window (outside parentheses). | |
| const { lastNewline, lastWhitespace } = scanParenAwareBreakpoints(window); | |
| // 2) Otherwise prefer the last whitespace (word boundary) inside the window. | |
| let breakIdx = lastNewline > 0 ? lastNewline : lastWhitespace; | |
| // 3) Fallback: hard break exactly at the limit. | |
| if (breakIdx <= 0) { | |
| breakIdx = limit; | |
| } | |
| const rawChunk = remaining.slice(0, breakIdx); | |
| const chunk = rawChunk.trimEnd(); | |
| if (chunk.length > 0) { | |
| chunks.push(chunk); | |
| } | |
| // If we broke on whitespace/newline, skip that separator; for hard breaks keep it. | |
| const brokeOnSeparator = breakIdx < remaining.length && /\s/.test(remaining[breakIdx]); | |
| const nextStart = Math.min(remaining.length, breakIdx + (brokeOnSeparator ? 1 : 0)); | |
| remaining = remaining.slice(nextStart).trimStart(); | |
| } | |
| if (remaining.length) { | |
| chunks.push(remaining); | |
| } | |
| return chunks; | |
| } | |
| export function chunkMarkdownText(text: string, limit: number): string[] { | |
| if (!text) { | |
| return []; | |
| } | |
| if (limit <= 0) { | |
| return [text]; | |
| } | |
| if (text.length <= limit) { | |
| return [text]; | |
| } | |
| const chunks: string[] = []; | |
| let remaining = text; | |
| while (remaining.length > limit) { | |
| const spans = parseFenceSpans(remaining); | |
| const window = remaining.slice(0, limit); | |
| const softBreak = pickSafeBreakIndex(window, spans); | |
| let breakIdx = softBreak > 0 ? softBreak : limit; | |
| const initialFence = isSafeFenceBreak(spans, breakIdx) | |
| ? undefined | |
| : findFenceSpanAt(spans, breakIdx); | |
| let fenceToSplit = initialFence; | |
| if (initialFence) { | |
| const closeLine = `${initialFence.indent}${initialFence.marker}`; | |
| const maxIdxIfNeedNewline = limit - (closeLine.length + 1); | |
| if (maxIdxIfNeedNewline <= 0) { | |
| fenceToSplit = undefined; | |
| breakIdx = limit; | |
| } else { | |
| const minProgressIdx = Math.min( | |
| remaining.length, | |
| initialFence.start + initialFence.openLine.length + 2, | |
| ); | |
| const maxIdxIfAlreadyNewline = limit - closeLine.length; | |
| let pickedNewline = false; | |
| let lastNewline = remaining.lastIndexOf("\n", Math.max(0, maxIdxIfAlreadyNewline - 1)); | |
| while (lastNewline !== -1) { | |
| const candidateBreak = lastNewline + 1; | |
| if (candidateBreak < minProgressIdx) { | |
| break; | |
| } | |
| const candidateFence = findFenceSpanAt(spans, candidateBreak); | |
| if (candidateFence && candidateFence.start === initialFence.start) { | |
| breakIdx = Math.max(1, candidateBreak); | |
| pickedNewline = true; | |
| break; | |
| } | |
| lastNewline = remaining.lastIndexOf("\n", lastNewline - 1); | |
| } | |
| if (!pickedNewline) { | |
| if (minProgressIdx > maxIdxIfAlreadyNewline) { | |
| fenceToSplit = undefined; | |
| breakIdx = limit; | |
| } else { | |
| breakIdx = Math.max(minProgressIdx, maxIdxIfNeedNewline); | |
| } | |
| } | |
| } | |
| const fenceAtBreak = findFenceSpanAt(spans, breakIdx); | |
| fenceToSplit = | |
| fenceAtBreak && fenceAtBreak.start === initialFence.start ? fenceAtBreak : undefined; | |
| } | |
| let rawChunk = remaining.slice(0, breakIdx); | |
| if (!rawChunk) { | |
| break; | |
| } | |
| const brokeOnSeparator = breakIdx < remaining.length && /\s/.test(remaining[breakIdx]); | |
| const nextStart = Math.min(remaining.length, breakIdx + (brokeOnSeparator ? 1 : 0)); | |
| let next = remaining.slice(nextStart); | |
| if (fenceToSplit) { | |
| const closeLine = `${fenceToSplit.indent}${fenceToSplit.marker}`; | |
| rawChunk = rawChunk.endsWith("\n") ? `${rawChunk}${closeLine}` : `${rawChunk}\n${closeLine}`; | |
| next = `${fenceToSplit.openLine}\n${next}`; | |
| } else { | |
| next = stripLeadingNewlines(next); | |
| } | |
| chunks.push(rawChunk); | |
| remaining = next; | |
| } | |
| if (remaining.length) { | |
| chunks.push(remaining); | |
| } | |
| return chunks; | |
| } | |
| function stripLeadingNewlines(value: string): string { | |
| let i = 0; | |
| while (i < value.length && value[i] === "\n") { | |
| i++; | |
| } | |
| return i > 0 ? value.slice(i) : value; | |
| } | |
| function pickSafeBreakIndex(window: string, spans: ReturnType<typeof parseFenceSpans>): number { | |
| const { lastNewline, lastWhitespace } = scanParenAwareBreakpoints(window, (index) => | |
| isSafeFenceBreak(spans, index), | |
| ); | |
| if (lastNewline > 0) { | |
| return lastNewline; | |
| } | |
| if (lastWhitespace > 0) { | |
| return lastWhitespace; | |
| } | |
| return -1; | |
| } | |
| function scanParenAwareBreakpoints( | |
| window: string, | |
| isAllowed: (index: number) => boolean = () => true, | |
| ): { lastNewline: number; lastWhitespace: number } { | |
| let lastNewline = -1; | |
| let lastWhitespace = -1; | |
| let depth = 0; | |
| for (let i = 0; i < window.length; i++) { | |
| if (!isAllowed(i)) { | |
| continue; | |
| } | |
| const char = window[i]; | |
| if (char === "(") { | |
| depth += 1; | |
| continue; | |
| } | |
| if (char === ")" && depth > 0) { | |
| depth -= 1; | |
| continue; | |
| } | |
| if (depth !== 0) { | |
| continue; | |
| } | |
| if (char === "\n") { | |
| lastNewline = i; | |
| } else if (/\s/.test(char)) { | |
| lastWhitespace = i; | |
| } | |
| } | |
| return { lastNewline, lastWhitespace }; | |
| } | |