| const Anthropic = require('@anthropic-ai/sdk'); |
| const { logger } = require('@librechat/data-schemas'); |
| const { HttpsProxyAgent } = require('https-proxy-agent'); |
| const { |
| Constants, |
| ErrorTypes, |
| EModelEndpoint, |
| parseTextParts, |
| anthropicSettings, |
| getResponseSender, |
| validateVisionModel, |
| } = require('librechat-data-provider'); |
| const { sleep, SplitStreamHandler: _Handler, addCacheControl } = require('@librechat/agents'); |
| const { |
| Tokenizer, |
| createFetch, |
| matchModelName, |
| getClaudeHeaders, |
| getModelMaxTokens, |
| configureReasoning, |
| checkPromptCacheSupport, |
| getModelMaxOutputTokens, |
| createStreamEventHandlers, |
| } = require('@librechat/api'); |
| const { |
| truncateText, |
| formatMessage, |
| titleFunctionPrompt, |
| parseParamFromPrompt, |
| createContextHandlers, |
| } = require('./prompts'); |
| const { spendTokens, spendStructuredTokens } = require('~/models/spendTokens'); |
| const { encodeAndFormat } = require('~/server/services/Files/images/encode'); |
| const BaseClient = require('./BaseClient'); |
|
|
| const HUMAN_PROMPT = '\n\nHuman:'; |
| const AI_PROMPT = '\n\nAssistant:'; |
|
|
| class SplitStreamHandler extends _Handler { |
| getDeltaContent(chunk) { |
| return (chunk?.delta?.text ?? chunk?.completion) || ''; |
| } |
| getReasoningDelta(chunk) { |
| return chunk?.delta?.thinking || ''; |
| } |
| } |
|
|
| |
| function delayBeforeRetry(attempts, baseDelay = 1000) { |
| return new Promise((resolve) => setTimeout(resolve, baseDelay * attempts)); |
| } |
|
|
| const tokenEventTypes = new Set(['message_start', 'message_delta']); |
| const { legacy } = anthropicSettings; |
|
|
| class AnthropicClient extends BaseClient { |
| constructor(apiKey, options = {}) { |
| super(apiKey, options); |
| this.apiKey = apiKey || process.env.ANTHROPIC_API_KEY; |
| this.userLabel = HUMAN_PROMPT; |
| this.assistantLabel = AI_PROMPT; |
| this.contextStrategy = options.contextStrategy |
| ? options.contextStrategy.toLowerCase() |
| : 'discard'; |
| this.setOptions(options); |
| |
| this.systemMessage; |
| |
| this.message_start; |
| |
| this.message_delta; |
| |
| |
| this.isClaudeLatest; |
| |
| |
| this.useMessages; |
| |
| |
| this.supportsCacheControl; |
| |
| |
| this.inputTokensKey = 'input_tokens'; |
| |
| |
| this.outputTokensKey = 'output_tokens'; |
| |
| this.streamHandler; |
| } |
|
|
| setOptions(options) { |
| if (this.options && !this.options.replaceOptions) { |
| |
| this.options.modelOptions = { |
| ...this.options.modelOptions, |
| ...options.modelOptions, |
| }; |
| delete options.modelOptions; |
| |
| this.options = { |
| ...this.options, |
| ...options, |
| }; |
| } else { |
| this.options = options; |
| } |
|
|
| this.modelOptions = Object.assign( |
| { |
| model: anthropicSettings.model.default, |
| }, |
| this.modelOptions, |
| this.options.modelOptions, |
| ); |
|
|
| const modelMatch = matchModelName(this.modelOptions.model, EModelEndpoint.anthropic); |
| this.isClaudeLatest = |
| /claude-[3-9]/.test(modelMatch) || /claude-(?:sonnet|opus|haiku)-[4-9]/.test(modelMatch); |
| const isLegacyOutput = !( |
| /claude-3[-.]5-sonnet/.test(modelMatch) || |
| /claude-3[-.]7/.test(modelMatch) || |
| /claude-(?:sonnet|opus|haiku)-[4-9]/.test(modelMatch) || |
| /claude-[4-9]/.test(modelMatch) |
| ); |
| this.supportsCacheControl = this.options.promptCache && checkPromptCacheSupport(modelMatch); |
|
|
| if ( |
| isLegacyOutput && |
| this.modelOptions.maxOutputTokens && |
| this.modelOptions.maxOutputTokens > legacy.maxOutputTokens.default |
| ) { |
| this.modelOptions.maxOutputTokens = legacy.maxOutputTokens.default; |
| } |
|
|
| this.useMessages = this.isClaudeLatest || !!this.options.attachments; |
|
|
| this.defaultVisionModel = this.options.visionModel ?? 'claude-3-sonnet-20240229'; |
| this.options.attachments?.then((attachments) => this.checkVisionRequest(attachments)); |
|
|
| this.maxContextTokens = |
| this.options.maxContextTokens ?? |
| getModelMaxTokens(this.modelOptions.model, EModelEndpoint.anthropic) ?? |
| 100000; |
| this.maxResponseTokens = |
| this.modelOptions.maxOutputTokens ?? |
| getModelMaxOutputTokens( |
| this.modelOptions.model, |
| this.options.endpointType ?? this.options.endpoint, |
| this.options.endpointTokenConfig, |
| ) ?? |
| anthropicSettings.maxOutputTokens.reset(this.modelOptions.model); |
| this.maxPromptTokens = |
| this.options.maxPromptTokens || this.maxContextTokens - this.maxResponseTokens; |
|
|
| const reservedTokens = this.maxPromptTokens + this.maxResponseTokens; |
| if (reservedTokens > this.maxContextTokens) { |
| const info = `Total Possible Tokens + Max Output Tokens must be less than or equal to Max Context Tokens: ${this.maxPromptTokens} (total possible output) + ${this.maxResponseTokens} (max output) = ${reservedTokens}/${this.maxContextTokens} (max context)`; |
| const errorMessage = `{ "type": "${ErrorTypes.INPUT_LENGTH}", "info": "${info}" }`; |
| logger.warn(info); |
| throw new Error(errorMessage); |
| } else if (this.maxResponseTokens === this.maxContextTokens) { |
| const info = `Max Output Tokens must be less than Max Context Tokens: ${this.maxResponseTokens} (max output) = ${this.maxContextTokens} (max context)`; |
| const errorMessage = `{ "type": "${ErrorTypes.INPUT_LENGTH}", "info": "${info}" }`; |
| logger.warn(info); |
| throw new Error(errorMessage); |
| } |
|
|
| this.sender = |
| this.options.sender ?? |
| getResponseSender({ |
| model: this.modelOptions.model, |
| endpoint: EModelEndpoint.anthropic, |
| modelLabel: this.options.modelLabel, |
| }); |
|
|
| this.startToken = '||>'; |
| this.endToken = ''; |
|
|
| return this; |
| } |
|
|
| |
| |
| |
| |
| |
| getClient(requestOptions) { |
| |
| const options = { |
| fetch: createFetch({ |
| directEndpoint: this.options.directEndpoint, |
| reverseProxyUrl: this.options.reverseProxyUrl, |
| }), |
| apiKey: this.apiKey, |
| fetchOptions: {}, |
| }; |
|
|
| if (this.options.proxy) { |
| options.fetchOptions.agent = new HttpsProxyAgent(this.options.proxy); |
| } |
|
|
| if (this.options.reverseProxyUrl) { |
| options.baseURL = this.options.reverseProxyUrl; |
| } |
|
|
| const headers = getClaudeHeaders(requestOptions?.model, this.supportsCacheControl); |
| if (headers) { |
| options.defaultHeaders = headers; |
| } |
|
|
| return new Anthropic(options); |
| } |
|
|
| |
| |
| |
| |
| getStreamUsage() { |
| const inputUsage = this.message_start?.message?.usage ?? {}; |
| const outputUsage = this.message_delta?.usage ?? {}; |
| return Object.assign({}, inputUsage, outputUsage); |
| } |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| calculateCurrentTokenCount({ tokenCountMap, currentMessageId, usage }) { |
| const originalEstimate = tokenCountMap[currentMessageId] || 0; |
|
|
| if (!usage || typeof usage.input_tokens !== 'number') { |
| return originalEstimate; |
| } |
|
|
| tokenCountMap[currentMessageId] = 0; |
| const totalTokensFromMap = Object.values(tokenCountMap).reduce((sum, count) => { |
| const numCount = Number(count); |
| return sum + (isNaN(numCount) ? 0 : numCount); |
| }, 0); |
| const totalInputTokens = |
| (usage.input_tokens ?? 0) + |
| (usage.cache_creation_input_tokens ?? 0) + |
| (usage.cache_read_input_tokens ?? 0); |
|
|
| const currentMessageTokens = totalInputTokens - totalTokensFromMap; |
| return currentMessageTokens > 0 ? currentMessageTokens : originalEstimate; |
| } |
|
|
| |
| |
| |
| |
| |
| getTokenCountForResponse(responseMessage) { |
| return this.getTokenCountForMessage({ |
| role: 'assistant', |
| content: responseMessage.text, |
| }); |
| } |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| checkVisionRequest(attachments) { |
| const availableModels = this.options.modelsConfig?.[EModelEndpoint.anthropic]; |
| this.isVisionModel = validateVisionModel({ model: this.modelOptions.model, availableModels }); |
|
|
| const visionModelAvailable = availableModels?.includes(this.defaultVisionModel); |
| if ( |
| attachments && |
| attachments.some((file) => file?.type && file?.type?.includes('image')) && |
| visionModelAvailable && |
| !this.isVisionModel |
| ) { |
| this.modelOptions.model = this.defaultVisionModel; |
| this.isVisionModel = true; |
| } |
| } |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| calculateImageTokenCost({ width, height }) { |
| return Math.ceil((width * height) / 750); |
| } |
|
|
| async addImageURLs(message, attachments) { |
| const { files, image_urls } = await encodeAndFormat(this.options.req, attachments, { |
| endpoint: EModelEndpoint.anthropic, |
| }); |
| message.image_urls = image_urls.length ? image_urls : undefined; |
| return files; |
| } |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| async recordTokenUsage({ promptTokens, completionTokens, usage, model, context = 'message' }) { |
| if (usage != null && usage?.input_tokens != null) { |
| const input = usage.input_tokens ?? 0; |
| const write = usage.cache_creation_input_tokens ?? 0; |
| const read = usage.cache_read_input_tokens ?? 0; |
|
|
| await spendStructuredTokens( |
| { |
| context, |
| user: this.user, |
| conversationId: this.conversationId, |
| model: model ?? this.modelOptions.model, |
| endpointTokenConfig: this.options.endpointTokenConfig, |
| }, |
| { |
| promptTokens: { input, write, read }, |
| completionTokens, |
| }, |
| ); |
|
|
| return; |
| } |
|
|
| await spendTokens( |
| { |
| context, |
| user: this.user, |
| conversationId: this.conversationId, |
| model: model ?? this.modelOptions.model, |
| endpointTokenConfig: this.options.endpointTokenConfig, |
| }, |
| { promptTokens, completionTokens }, |
| ); |
| } |
|
|
| async buildMessages(messages, parentMessageId) { |
| const orderedMessages = this.constructor.getMessagesForConversation({ |
| messages, |
| parentMessageId, |
| }); |
|
|
| logger.debug('[AnthropicClient] orderedMessages', { orderedMessages, parentMessageId }); |
|
|
| if (this.options.attachments) { |
| const attachments = await this.options.attachments; |
| const images = attachments.filter((file) => file.type.includes('image')); |
|
|
| if (images.length && !this.isVisionModel) { |
| throw new Error('Images are only supported with the Claude 3 family of models'); |
| } |
|
|
| const latestMessage = orderedMessages[orderedMessages.length - 1]; |
|
|
| if (this.message_file_map) { |
| this.message_file_map[latestMessage.messageId] = attachments; |
| } else { |
| this.message_file_map = { |
| [latestMessage.messageId]: attachments, |
| }; |
| } |
|
|
| const files = await this.addImageURLs(latestMessage, attachments); |
|
|
| this.options.attachments = files; |
| } |
|
|
| if (this.message_file_map) { |
| this.contextHandlers = createContextHandlers( |
| this.options.req, |
| orderedMessages[orderedMessages.length - 1].text, |
| ); |
| } |
|
|
| const formattedMessages = orderedMessages.map((message, i) => { |
| const formattedMessage = this.useMessages |
| ? formatMessage({ |
| message, |
| endpoint: EModelEndpoint.anthropic, |
| }) |
| : { |
| author: message.isCreatedByUser ? this.userLabel : this.assistantLabel, |
| content: message?.content ?? message.text, |
| }; |
|
|
| const needsTokenCount = this.contextStrategy && !orderedMessages[i].tokenCount; |
| |
| if (needsTokenCount || (this.isVisionModel && (message.image_urls || message.files))) { |
| orderedMessages[i].tokenCount = this.getTokenCountForMessage(formattedMessage); |
| } |
|
|
| |
| if (this.message_file_map && this.message_file_map[message.messageId]) { |
| const attachments = this.message_file_map[message.messageId]; |
| for (const file of attachments) { |
| if (file.embedded) { |
| this.contextHandlers?.processFile(file); |
| continue; |
| } |
| if (file.metadata?.fileIdentifier) { |
| continue; |
| } |
|
|
| orderedMessages[i].tokenCount += this.calculateImageTokenCost({ |
| width: file.width, |
| height: file.height, |
| }); |
| } |
| } |
|
|
| formattedMessage.tokenCount = orderedMessages[i].tokenCount; |
| return formattedMessage; |
| }); |
|
|
| if (this.contextHandlers) { |
| this.augmentedPrompt = await this.contextHandlers.createContext(); |
| this.options.promptPrefix = this.augmentedPrompt + (this.options.promptPrefix ?? ''); |
| } |
|
|
| let { context: messagesInWindow, remainingContextTokens } = |
| await this.getMessagesWithinTokenLimit({ messages: formattedMessages }); |
|
|
| const tokenCountMap = orderedMessages |
| .slice(orderedMessages.length - messagesInWindow.length) |
| .reduce((map, message, index) => { |
| const { messageId } = message; |
| if (!messageId) { |
| return map; |
| } |
|
|
| map[messageId] = orderedMessages[index].tokenCount; |
| return map; |
| }, {}); |
|
|
| logger.debug('[AnthropicClient]', { |
| messagesInWindow: messagesInWindow.length, |
| remainingContextTokens, |
| }); |
|
|
| let lastAuthor = ''; |
| let groupedMessages = []; |
|
|
| for (let i = 0; i < messagesInWindow.length; i++) { |
| const message = messagesInWindow[i]; |
| const author = message.role ?? message.author; |
| |
| if (lastAuthor !== author) { |
| const newMessage = { |
| content: [message.content], |
| }; |
|
|
| if (message.role) { |
| newMessage.role = message.role; |
| } else { |
| newMessage.author = message.author; |
| } |
|
|
| groupedMessages.push(newMessage); |
| lastAuthor = author; |
| |
| } else { |
| groupedMessages[groupedMessages.length - 1].content.push(message.content); |
| } |
| } |
|
|
| groupedMessages = groupedMessages.map((msg, i) => { |
| const isLast = i === groupedMessages.length - 1; |
| if (msg.content.length === 1) { |
| const content = msg.content[0]; |
| return { |
| ...msg, |
| |
| content: |
| isLast && this.useMessages && msg.role === 'assistant' && typeof content === 'string' |
| ? content?.trim() |
| : content, |
| }; |
| } |
|
|
| if (!this.useMessages && msg.tokenCount) { |
| delete msg.tokenCount; |
| } |
|
|
| return msg; |
| }); |
|
|
| let identityPrefix = ''; |
| if (this.options.userLabel) { |
| identityPrefix = `\nHuman's name: ${this.options.userLabel}`; |
| } |
|
|
| if (this.options.modelLabel) { |
| identityPrefix = `${identityPrefix}\nYou are ${this.options.modelLabel}`; |
| } |
|
|
| let promptPrefix = (this.options.promptPrefix ?? '').trim(); |
| if (typeof this.options.artifactsPrompt === 'string' && this.options.artifactsPrompt) { |
| promptPrefix = `${promptPrefix ?? ''}\n${this.options.artifactsPrompt}`.trim(); |
| } |
| if (promptPrefix) { |
| |
| if (!promptPrefix.endsWith(`${this.endToken}`)) { |
| promptPrefix = `${promptPrefix.trim()}${this.endToken}\n\n`; |
| } |
| promptPrefix = `\nContext:\n${promptPrefix}`; |
| } |
|
|
| if (identityPrefix) { |
| promptPrefix = `${identityPrefix}${promptPrefix}`; |
| } |
|
|
| |
| let isEdited = lastAuthor === this.assistantLabel; |
| const promptSuffix = isEdited ? '' : `${promptPrefix}${this.assistantLabel}\n`; |
| let currentTokenCount = |
| isEdited || this.useMessages |
| ? this.getTokenCount(promptPrefix) |
| : this.getTokenCount(promptSuffix); |
|
|
| let promptBody = ''; |
| const maxTokenCount = this.maxPromptTokens; |
|
|
| const context = []; |
|
|
| |
| |
| |
| |
| const nextMessage = { |
| remove: false, |
| tokenCount: 0, |
| messageString: '', |
| }; |
|
|
| const buildPromptBody = async () => { |
| if (currentTokenCount < maxTokenCount && groupedMessages.length > 0) { |
| const message = groupedMessages.pop(); |
| const isCreatedByUser = message.author === this.userLabel; |
| |
| const messagePrefix = |
| isCreatedByUser || !isEdited ? message.author : `${promptPrefix}${message.author}`; |
| const messageString = `${messagePrefix}\n${message.content}${this.endToken}\n`; |
| let newPromptBody = `${messageString}${promptBody}`; |
|
|
| context.unshift(message); |
|
|
| const tokenCountForMessage = this.getTokenCount(messageString); |
| const newTokenCount = currentTokenCount + tokenCountForMessage; |
|
|
| if (!isCreatedByUser) { |
| nextMessage.messageString = messageString; |
| nextMessage.tokenCount = tokenCountForMessage; |
| } |
|
|
| if (newTokenCount > maxTokenCount) { |
| if (!promptBody) { |
| |
| throw new Error( |
| `Prompt is too long. Max token count is ${maxTokenCount}, but prompt is ${newTokenCount} tokens long.`, |
| ); |
| } |
|
|
| |
| |
| if (isCreatedByUser) { |
| nextMessage.remove = true; |
| } |
|
|
| return false; |
| } |
| promptBody = newPromptBody; |
| currentTokenCount = newTokenCount; |
|
|
| |
| if (isEdited) { |
| isEdited = false; |
| } |
|
|
| |
| await new Promise((resolve) => setImmediate(resolve)); |
| return buildPromptBody(); |
| } |
| return true; |
| }; |
|
|
| const messagesPayload = []; |
| const buildMessagesPayload = async () => { |
| let canContinue = true; |
|
|
| if (promptPrefix) { |
| this.systemMessage = promptPrefix; |
| } |
|
|
| while (currentTokenCount < maxTokenCount && groupedMessages.length > 0 && canContinue) { |
| const message = groupedMessages.pop(); |
|
|
| let tokenCountForMessage = message.tokenCount ?? this.getTokenCountForMessage(message); |
|
|
| const newTokenCount = currentTokenCount + tokenCountForMessage; |
| const exceededMaxCount = newTokenCount > maxTokenCount; |
|
|
| if (exceededMaxCount && messagesPayload.length === 0) { |
| throw new Error( |
| `Prompt is too long. Max token count is ${maxTokenCount}, but prompt is ${newTokenCount} tokens long.`, |
| ); |
| } else if (exceededMaxCount) { |
| canContinue = false; |
| break; |
| } |
|
|
| delete message.tokenCount; |
| messagesPayload.unshift(message); |
| currentTokenCount = newTokenCount; |
|
|
| |
| if (isEdited && message.role === 'assistant') { |
| isEdited = false; |
| } |
|
|
| |
| await new Promise((resolve) => setImmediate(resolve)); |
| } |
| }; |
|
|
| const processTokens = () => { |
| |
| currentTokenCount += 2; |
|
|
| |
| this.modelOptions.maxOutputTokens = Math.min( |
| this.maxContextTokens - currentTokenCount, |
| this.maxResponseTokens, |
| ); |
| }; |
|
|
| if ( |
| /claude-[3-9]/.test(this.modelOptions.model) || |
| /claude-(?:sonnet|opus|haiku)-[4-9]/.test(this.modelOptions.model) |
| ) { |
| await buildMessagesPayload(); |
| processTokens(); |
| return { |
| prompt: messagesPayload, |
| context: messagesInWindow, |
| promptTokens: currentTokenCount, |
| tokenCountMap, |
| }; |
| } else { |
| await buildPromptBody(); |
| processTokens(); |
| } |
|
|
| if (nextMessage.remove) { |
| promptBody = promptBody.replace(nextMessage.messageString, ''); |
| currentTokenCount -= nextMessage.tokenCount; |
| context.shift(); |
| } |
|
|
| let prompt = `${promptBody}${promptSuffix}`; |
|
|
| return { prompt, context, promptTokens: currentTokenCount, tokenCountMap }; |
| } |
|
|
| getCompletion() { |
| logger.debug("AnthropicClient doesn't use getCompletion (all handled in sendCompletion)"); |
| } |
|
|
| |
| |
| |
| |
| |
| |
| |
| async createResponse(client, options, useMessages) { |
| return (useMessages ?? this.useMessages) |
| ? await client.messages.create(options) |
| : await client.completions.create(options); |
| } |
|
|
| getMessageMapMethod() { |
| |
| |
| |
| return (msg) => { |
| if (msg.text != null && msg.text && msg.text.startsWith(':::thinking')) { |
| msg.text = msg.text.replace(/:::thinking.*?:::/gs, '').trim(); |
| } else if (msg.content != null) { |
| msg.text = parseTextParts(msg.content, true); |
| delete msg.content; |
| } |
|
|
| return msg; |
| }; |
| } |
|
|
| |
| |
| |
| |
| getStreamText(intermediateReply) { |
| if (!this.streamHandler) { |
| return intermediateReply?.join('') ?? ''; |
| } |
|
|
| const reasoningText = this.streamHandler.reasoningTokens.join(''); |
|
|
| const reasoningBlock = reasoningText.length > 0 ? `:::thinking\n${reasoningText}\n:::\n` : ''; |
|
|
| return `${reasoningBlock}${this.streamHandler.tokens.join('')}`; |
| } |
|
|
| async sendCompletion(payload, { onProgress, abortController }) { |
| if (!abortController) { |
| abortController = new AbortController(); |
| } |
|
|
| const { signal } = abortController; |
|
|
| const modelOptions = { ...this.modelOptions }; |
| if (typeof onProgress === 'function') { |
| modelOptions.stream = true; |
| } |
|
|
| logger.debug('modelOptions', { modelOptions }); |
| const metadata = { |
| user_id: this.user, |
| }; |
|
|
| const { |
| stream, |
| model, |
| temperature, |
| maxOutputTokens, |
| stop: stop_sequences, |
| topP: top_p, |
| topK: top_k, |
| } = this.modelOptions; |
|
|
| let requestOptions = { |
| model, |
| stream: stream || true, |
| stop_sequences, |
| temperature, |
| metadata, |
| }; |
|
|
| if (this.useMessages) { |
| requestOptions.messages = payload; |
| requestOptions.max_tokens = |
| maxOutputTokens || anthropicSettings.maxOutputTokens.reset(requestOptions.model); |
| } else { |
| requestOptions.prompt = payload; |
| requestOptions.max_tokens_to_sample = maxOutputTokens || legacy.maxOutputTokens.default; |
| } |
|
|
| requestOptions = configureReasoning(requestOptions, { |
| thinking: this.options.thinking, |
| thinkingBudget: this.options.thinkingBudget, |
| }); |
|
|
| if (!/claude-3[-.]7/.test(model)) { |
| requestOptions.top_p = top_p; |
| requestOptions.top_k = top_k; |
| } else if (requestOptions.thinking == null) { |
| requestOptions.topP = top_p; |
| requestOptions.topK = top_k; |
| } |
|
|
| if (this.systemMessage && this.supportsCacheControl === true) { |
| requestOptions.system = [ |
| { |
| type: 'text', |
| text: this.systemMessage, |
| cache_control: { type: 'ephemeral' }, |
| }, |
| ]; |
| } else if (this.systemMessage) { |
| requestOptions.system = this.systemMessage; |
| } |
|
|
| if (this.supportsCacheControl === true && this.useMessages) { |
| requestOptions.messages = addCacheControl(requestOptions.messages); |
| } |
|
|
| logger.debug('[AnthropicClient]', { ...requestOptions }); |
| const handlers = createStreamEventHandlers(this.options.res); |
| this.streamHandler = new SplitStreamHandler({ |
| accumulate: true, |
| runId: this.responseMessageId, |
| handlers, |
| }); |
|
|
| let intermediateReply = this.streamHandler.tokens; |
|
|
| const maxRetries = 3; |
| const streamRate = this.options.streamRate ?? Constants.DEFAULT_STREAM_RATE; |
| async function processResponse() { |
| let attempts = 0; |
|
|
| while (attempts < maxRetries) { |
| let response; |
| try { |
| const client = this.getClient(requestOptions); |
| response = await this.createResponse(client, requestOptions); |
|
|
| signal.addEventListener('abort', () => { |
| logger.debug('[AnthropicClient] message aborted!'); |
| if (response.controller?.abort) { |
| response.controller.abort(); |
| } |
| }); |
|
|
| for await (const completion of response) { |
| const type = completion?.type ?? ''; |
| if (tokenEventTypes.has(type)) { |
| logger.debug(`[AnthropicClient] ${type}`, completion); |
| this[type] = completion; |
| } |
| this.streamHandler.handle(completion); |
| await sleep(streamRate); |
| } |
|
|
| break; |
| } catch (error) { |
| attempts += 1; |
| logger.warn( |
| `User: ${this.user} | Anthropic Request ${attempts} failed: ${error.message}`, |
| ); |
|
|
| if (attempts < maxRetries) { |
| await delayBeforeRetry(attempts, 350); |
| } else if (this.streamHandler && this.streamHandler.reasoningTokens.length) { |
| return this.getStreamText(); |
| } else if (intermediateReply.length > 0) { |
| return this.getStreamText(intermediateReply); |
| } else { |
| throw new Error(`Operation failed after ${maxRetries} attempts: ${error.message}`); |
| } |
| } finally { |
| signal.removeEventListener('abort', () => { |
| logger.debug('[AnthropicClient] message aborted!'); |
| if (response.controller?.abort) { |
| response.controller.abort(); |
| } |
| }); |
| } |
| } |
| } |
|
|
| await processResponse.bind(this)(); |
| return this.getStreamText(intermediateReply); |
| } |
|
|
| getSaveOptions() { |
| return { |
| maxContextTokens: this.options.maxContextTokens, |
| artifacts: this.options.artifacts, |
| promptPrefix: this.options.promptPrefix, |
| modelLabel: this.options.modelLabel, |
| promptCache: this.options.promptCache, |
| thinking: this.options.thinking, |
| thinkingBudget: this.options.thinkingBudget, |
| resendFiles: this.options.resendFiles, |
| iconURL: this.options.iconURL, |
| greeting: this.options.greeting, |
| spec: this.options.spec, |
| ...this.modelOptions, |
| }; |
| } |
|
|
| getBuildMessagesOptions() { |
| logger.debug("AnthropicClient doesn't use getBuildMessagesOptions"); |
| } |
|
|
| getEncoding() { |
| return 'cl100k_base'; |
| } |
|
|
| |
| |
| |
| |
| |
| getTokenCount(text) { |
| const encoding = this.getEncoding(); |
| return Tokenizer.getTokenCount(text, encoding); |
| } |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| async titleConvo({ text, responseText = '' }) { |
| let title = 'New Chat'; |
| this.message_delta = undefined; |
| this.message_start = undefined; |
| const convo = `<initial_message> |
| ${truncateText(text)} |
| </initial_message> |
| <response> |
| ${JSON.stringify(truncateText(responseText))} |
| </response>`; |
|
|
| const { ANTHROPIC_TITLE_MODEL } = process.env ?? {}; |
| const model = this.options.titleModel ?? ANTHROPIC_TITLE_MODEL ?? 'claude-3-haiku-20240307'; |
| const system = titleFunctionPrompt; |
|
|
| const titleChatCompletion = async () => { |
| const content = `<conversation_context> |
| ${convo} |
| </conversation_context> |
| |
| Please generate a title for this conversation.`; |
|
|
| const titleMessage = { role: 'user', content }; |
| const requestOptions = { |
| model, |
| temperature: 0.3, |
| max_tokens: 1024, |
| system, |
| stop_sequences: ['\n\nHuman:', '\n\nAssistant', '</function_calls>'], |
| messages: [titleMessage], |
| }; |
|
|
| try { |
| const response = await this.createResponse( |
| this.getClient(requestOptions), |
| requestOptions, |
| true, |
| ); |
| let promptTokens = response?.usage?.input_tokens; |
| let completionTokens = response?.usage?.output_tokens; |
| if (!promptTokens) { |
| promptTokens = this.getTokenCountForMessage(titleMessage); |
| promptTokens += this.getTokenCountForMessage({ role: 'system', content: system }); |
| } |
| if (!completionTokens) { |
| completionTokens = this.getTokenCountForMessage(response.content[0]); |
| } |
| await this.recordTokenUsage({ |
| model, |
| promptTokens, |
| completionTokens, |
| context: 'title', |
| }); |
| const text = response.content[0].text; |
| title = parseParamFromPrompt(text, 'title'); |
| } catch (e) { |
| logger.error('[AnthropicClient] There was an issue generating the title', e); |
| } |
| }; |
|
|
| await titleChatCompletion(); |
| logger.debug('[AnthropicClient] Convo Title: ' + title); |
| return title; |
| } |
| } |
|
|
| module.exports = AnthropicClient; |
|
|