|
const { encoding_for_model: encodingForModel, get_encoding: getEncoding } = require('tiktoken'); |
|
const ChatGPTClient = require('./ChatGPTClient'); |
|
const BaseClient = require('./BaseClient'); |
|
const { getModelMaxTokens, genAzureChatCompletion } = require('../../utils'); |
|
const { truncateText, formatMessage, CUT_OFF_PROMPT } = require('./prompts'); |
|
const spendTokens = require('../../models/spendTokens'); |
|
const { isEnabled } = require('../../server/utils'); |
|
const { createLLM, RunManager } = require('./llm'); |
|
const { summaryBuffer } = require('./memory'); |
|
const { runTitleChain } = require('./chains'); |
|
const { tokenSplit } = require('./document'); |
|
|
|
|
|
const tokenizersCache = {}; |
|
|
|
let tokenizerCallsCount = 0; |
|
|
|
class OpenAIClient extends BaseClient { |
|
constructor(apiKey, options = {}) { |
|
super(apiKey, options); |
|
this.ChatGPTClient = new ChatGPTClient(); |
|
this.buildPrompt = this.ChatGPTClient.buildPrompt.bind(this); |
|
this.getCompletion = this.ChatGPTClient.getCompletion.bind(this); |
|
this.sender = options.sender ?? 'ChatGPT'; |
|
this.contextStrategy = options.contextStrategy |
|
? options.contextStrategy.toLowerCase() |
|
: 'discard'; |
|
this.shouldSummarize = this.contextStrategy === 'summarize'; |
|
this.azure = options.azure || false; |
|
if (this.azure) { |
|
this.azureEndpoint = genAzureChatCompletion(this.azure); |
|
} |
|
this.setOptions(options); |
|
} |
|
|
|
setOptions(options) { |
|
if (this.options && !this.options.replaceOptions) { |
|
this.options.modelOptions = { |
|
...this.options.modelOptions, |
|
...options.modelOptions, |
|
}; |
|
delete options.modelOptions; |
|
this.options = { |
|
...this.options, |
|
...options, |
|
}; |
|
} else { |
|
this.options = options; |
|
} |
|
|
|
if (this.options.openaiApiKey) { |
|
this.apiKey = this.options.openaiApiKey; |
|
} |
|
|
|
const modelOptions = this.options.modelOptions || {}; |
|
if (!this.modelOptions) { |
|
this.modelOptions = { |
|
...modelOptions, |
|
model: modelOptions.model || 'gpt-3.5-turbo', |
|
temperature: |
|
typeof modelOptions.temperature === 'undefined' ? 0.8 : modelOptions.temperature, |
|
top_p: typeof modelOptions.top_p === 'undefined' ? 1 : modelOptions.top_p, |
|
presence_penalty: |
|
typeof modelOptions.presence_penalty === 'undefined' ? 1 : modelOptions.presence_penalty, |
|
stop: modelOptions.stop, |
|
}; |
|
} else { |
|
|
|
this.modelOptions = { |
|
...this.modelOptions, |
|
...modelOptions, |
|
}; |
|
} |
|
|
|
const { OPENROUTER_API_KEY, OPENAI_FORCE_PROMPT } = process.env ?? {}; |
|
if (OPENROUTER_API_KEY) { |
|
this.apiKey = OPENROUTER_API_KEY; |
|
this.useOpenRouter = true; |
|
} |
|
|
|
const { reverseProxyUrl: reverseProxy } = this.options; |
|
this.FORCE_PROMPT = |
|
isEnabled(OPENAI_FORCE_PROMPT) || |
|
(reverseProxy && reverseProxy.includes('completions') && !reverseProxy.includes('chat')); |
|
|
|
const { model } = this.modelOptions; |
|
|
|
this.isChatCompletion = this.useOpenRouter || !!reverseProxy || model.includes('gpt-'); |
|
this.isChatGptModel = this.isChatCompletion; |
|
if (model.includes('text-davinci-003') || model.includes('instruct') || this.FORCE_PROMPT) { |
|
this.isChatCompletion = false; |
|
this.isChatGptModel = false; |
|
} |
|
const { isChatGptModel } = this; |
|
this.isUnofficialChatGptModel = |
|
model.startsWith('text-chat') || model.startsWith('text-davinci-002-render'); |
|
this.maxContextTokens = getModelMaxTokens(model) ?? 4095; |
|
|
|
if (this.shouldSummarize) { |
|
this.maxContextTokens = Math.floor(this.maxContextTokens / 2); |
|
} |
|
|
|
if (this.options.debug) { |
|
console.debug('maxContextTokens', this.maxContextTokens); |
|
} |
|
|
|
this.maxResponseTokens = this.modelOptions.max_tokens || 1024; |
|
this.maxPromptTokens = |
|
this.options.maxPromptTokens || this.maxContextTokens - this.maxResponseTokens; |
|
|
|
if (this.maxPromptTokens + this.maxResponseTokens > this.maxContextTokens) { |
|
throw new Error( |
|
`maxPromptTokens + max_tokens (${this.maxPromptTokens} + ${this.maxResponseTokens} = ${ |
|
this.maxPromptTokens + this.maxResponseTokens |
|
}) must be less than or equal to maxContextTokens (${this.maxContextTokens})`, |
|
); |
|
} |
|
|
|
this.userLabel = this.options.userLabel || 'User'; |
|
this.chatGptLabel = this.options.chatGptLabel || 'Assistant'; |
|
|
|
this.setupTokens(); |
|
|
|
if (!this.modelOptions.stop) { |
|
const stopTokens = [this.startToken]; |
|
if (this.endToken && this.endToken !== this.startToken) { |
|
stopTokens.push(this.endToken); |
|
} |
|
stopTokens.push(`\n${this.userLabel}:`); |
|
stopTokens.push('<|diff_marker|>'); |
|
this.modelOptions.stop = stopTokens; |
|
} |
|
|
|
if (reverseProxy) { |
|
this.completionsUrl = reverseProxy; |
|
this.langchainProxy = reverseProxy.match(/.*v1/)?.[0]; |
|
!this.langchainProxy && |
|
console.warn(`The reverse proxy URL ${reverseProxy} is not valid for Plugins. |
|
The url must follow OpenAI specs, for example: https://localhost:8080/v1/chat/completions |
|
If your reverse proxy is compatible to OpenAI specs in every other way, it may still work without plugins enabled.`); |
|
} else if (isChatGptModel) { |
|
this.completionsUrl = 'https://api.openai.com/v1/chat/completions'; |
|
} else { |
|
this.completionsUrl = 'https://api.openai.com/v1/completions'; |
|
} |
|
|
|
if (this.azureEndpoint) { |
|
this.completionsUrl = this.azureEndpoint; |
|
} |
|
|
|
if (this.azureEndpoint && this.options.debug) { |
|
console.debug('Using Azure endpoint'); |
|
} |
|
|
|
if (this.useOpenRouter) { |
|
this.completionsUrl = 'https://openrouter.ai/api/v1/chat/completions'; |
|
} |
|
|
|
return this; |
|
} |
|
|
|
setupTokens() { |
|
if (this.isChatCompletion) { |
|
this.startToken = '||>'; |
|
this.endToken = ''; |
|
} else if (this.isUnofficialChatGptModel) { |
|
this.startToken = '<|im_start|>'; |
|
this.endToken = '<|im_end|>'; |
|
} else { |
|
this.startToken = '||>'; |
|
this.endToken = ''; |
|
} |
|
} |
|
|
|
|
|
|
|
selectTokenizer() { |
|
let tokenizer; |
|
this.encoding = 'text-davinci-003'; |
|
if (this.isChatCompletion) { |
|
this.encoding = 'cl100k_base'; |
|
tokenizer = this.constructor.getTokenizer(this.encoding); |
|
} else if (this.isUnofficialChatGptModel) { |
|
const extendSpecialTokens = { |
|
'<|im_start|>': 100264, |
|
'<|im_end|>': 100265, |
|
}; |
|
tokenizer = this.constructor.getTokenizer(this.encoding, true, extendSpecialTokens); |
|
} else { |
|
try { |
|
const { model } = this.modelOptions; |
|
this.encoding = model.includes('instruct') ? 'text-davinci-003' : model; |
|
tokenizer = this.constructor.getTokenizer(this.encoding, true); |
|
} catch { |
|
tokenizer = this.constructor.getTokenizer('text-davinci-003', true); |
|
} |
|
} |
|
|
|
return tokenizer; |
|
} |
|
|
|
|
|
|
|
static getTokenizer(encoding, isModelName = false, extendSpecialTokens = {}) { |
|
let tokenizer; |
|
if (tokenizersCache[encoding]) { |
|
tokenizer = tokenizersCache[encoding]; |
|
} else { |
|
if (isModelName) { |
|
tokenizer = encodingForModel(encoding, extendSpecialTokens); |
|
} else { |
|
tokenizer = getEncoding(encoding, extendSpecialTokens); |
|
} |
|
tokenizersCache[encoding] = tokenizer; |
|
} |
|
return tokenizer; |
|
} |
|
|
|
|
|
static freeAndResetAllEncoders() { |
|
try { |
|
Object.keys(tokenizersCache).forEach((key) => { |
|
if (tokenizersCache[key]) { |
|
tokenizersCache[key].free(); |
|
delete tokenizersCache[key]; |
|
} |
|
}); |
|
|
|
tokenizerCallsCount = 1; |
|
} catch (error) { |
|
console.log('Free and reset encoders error'); |
|
console.error(error); |
|
} |
|
} |
|
|
|
|
|
resetTokenizersIfNecessary() { |
|
if (tokenizerCallsCount >= 25) { |
|
if (this.options.debug) { |
|
console.debug('freeAndResetAllEncoders: reached 25 encodings, resetting...'); |
|
} |
|
this.constructor.freeAndResetAllEncoders(); |
|
} |
|
tokenizerCallsCount++; |
|
} |
|
|
|
|
|
getTokenCount(text) { |
|
this.resetTokenizersIfNecessary(); |
|
try { |
|
const tokenizer = this.selectTokenizer(); |
|
return tokenizer.encode(text, 'all').length; |
|
} catch (error) { |
|
this.constructor.freeAndResetAllEncoders(); |
|
const tokenizer = this.selectTokenizer(); |
|
return tokenizer.encode(text, 'all').length; |
|
} |
|
} |
|
|
|
getSaveOptions() { |
|
return { |
|
chatGptLabel: this.options.chatGptLabel, |
|
promptPrefix: this.options.promptPrefix, |
|
...this.modelOptions, |
|
}; |
|
} |
|
|
|
getBuildMessagesOptions(opts) { |
|
return { |
|
isChatCompletion: this.isChatCompletion, |
|
promptPrefix: opts.promptPrefix, |
|
abortController: opts.abortController, |
|
}; |
|
} |
|
|
|
async buildMessages( |
|
messages, |
|
parentMessageId, |
|
{ isChatCompletion = false, promptPrefix = null }, |
|
) { |
|
let orderedMessages = this.constructor.getMessagesForConversation({ |
|
messages, |
|
parentMessageId, |
|
summary: this.shouldSummarize, |
|
}); |
|
if (!isChatCompletion) { |
|
return await this.buildPrompt(orderedMessages, { |
|
isChatGptModel: isChatCompletion, |
|
promptPrefix, |
|
}); |
|
} |
|
|
|
let payload; |
|
let instructions; |
|
let tokenCountMap; |
|
let promptTokens; |
|
|
|
promptPrefix = (promptPrefix || this.options.promptPrefix || '').trim(); |
|
if (promptPrefix) { |
|
promptPrefix = `Instructions:\n${promptPrefix}`; |
|
instructions = { |
|
role: 'system', |
|
name: 'instructions', |
|
content: promptPrefix, |
|
}; |
|
|
|
if (this.contextStrategy) { |
|
instructions.tokenCount = this.getTokenCountForMessage(instructions); |
|
} |
|
} |
|
|
|
const formattedMessages = orderedMessages.map((message, i) => { |
|
const formattedMessage = formatMessage({ |
|
message, |
|
userName: this.options?.name, |
|
assistantName: this.options?.chatGptLabel, |
|
}); |
|
|
|
if (this.contextStrategy && !orderedMessages[i].tokenCount) { |
|
orderedMessages[i].tokenCount = this.getTokenCountForMessage(formattedMessage); |
|
} |
|
|
|
return formattedMessage; |
|
}); |
|
|
|
|
|
if (this.contextStrategy) { |
|
({ payload, tokenCountMap, promptTokens, messages } = await this.handleContextStrategy({ |
|
instructions, |
|
orderedMessages, |
|
formattedMessages, |
|
})); |
|
} |
|
|
|
const result = { |
|
prompt: payload, |
|
promptTokens, |
|
messages, |
|
}; |
|
|
|
if (tokenCountMap) { |
|
tokenCountMap.instructions = instructions?.tokenCount; |
|
result.tokenCountMap = tokenCountMap; |
|
} |
|
|
|
if (promptTokens >= 0 && typeof this.options.getReqData === 'function') { |
|
this.options.getReqData({ promptTokens }); |
|
} |
|
|
|
return result; |
|
} |
|
|
|
async sendCompletion(payload, opts = {}) { |
|
let reply = ''; |
|
let result = null; |
|
let streamResult = null; |
|
this.modelOptions.user = this.user; |
|
if (typeof opts.onProgress === 'function') { |
|
await this.getCompletion( |
|
payload, |
|
(progressMessage) => { |
|
if (progressMessage === '[DONE]') { |
|
return; |
|
} |
|
|
|
if (this.options.debug) { |
|
|
|
|
|
} |
|
|
|
if (progressMessage.choices) { |
|
streamResult = progressMessage; |
|
} |
|
|
|
let token = null; |
|
if (this.isChatCompletion) { |
|
token = |
|
progressMessage.choices?.[0]?.delta?.content ?? progressMessage.choices?.[0]?.text; |
|
} else { |
|
token = progressMessage.choices?.[0]?.text; |
|
} |
|
|
|
if (!token && this.useOpenRouter) { |
|
token = progressMessage.choices?.[0]?.message?.content; |
|
} |
|
|
|
if (!token) { |
|
return; |
|
} |
|
if (this.options.debug) { |
|
|
|
} |
|
if (token === this.endToken) { |
|
return; |
|
} |
|
opts.onProgress(token); |
|
reply += token; |
|
}, |
|
opts.abortController || new AbortController(), |
|
); |
|
} else { |
|
result = await this.getCompletion( |
|
payload, |
|
null, |
|
opts.abortController || new AbortController(), |
|
); |
|
if (this.options.debug) { |
|
console.debug(JSON.stringify(result)); |
|
} |
|
if (this.isChatCompletion) { |
|
reply = result.choices[0].message.content; |
|
} else { |
|
reply = result.choices[0].text.replace(this.endToken, ''); |
|
} |
|
} |
|
|
|
if (streamResult && typeof opts.addMetadata === 'function') { |
|
const { finish_reason } = streamResult.choices[0]; |
|
opts.addMetadata({ finish_reason }); |
|
} |
|
return reply.trim(); |
|
} |
|
|
|
initializeLLM({ |
|
model = 'gpt-3.5-turbo', |
|
modelName, |
|
temperature = 0.2, |
|
presence_penalty = 0, |
|
frequency_penalty = 0, |
|
max_tokens, |
|
streaming, |
|
context, |
|
tokenBuffer, |
|
initialMessageCount, |
|
}) { |
|
const modelOptions = { |
|
modelName: modelName ?? model, |
|
temperature, |
|
presence_penalty, |
|
frequency_penalty, |
|
user: this.user, |
|
}; |
|
|
|
if (max_tokens) { |
|
modelOptions.max_tokens = max_tokens; |
|
} |
|
|
|
const configOptions = {}; |
|
|
|
if (this.langchainProxy) { |
|
configOptions.basePath = this.langchainProxy; |
|
} |
|
|
|
if (this.useOpenRouter) { |
|
configOptions.basePath = 'https://openrouter.ai/api/v1'; |
|
configOptions.baseOptions = { |
|
headers: { |
|
'HTTP-Referer': 'https://librechat.ai', |
|
'X-Title': 'LibreChat', |
|
}, |
|
}; |
|
} |
|
|
|
const { req, res, debug } = this.options; |
|
const runManager = new RunManager({ req, res, debug, abortController: this.abortController }); |
|
this.runManager = runManager; |
|
|
|
const llm = createLLM({ |
|
modelOptions, |
|
configOptions, |
|
openAIApiKey: this.apiKey, |
|
azure: this.azure, |
|
streaming, |
|
callbacks: runManager.createCallbacks({ |
|
context, |
|
tokenBuffer, |
|
conversationId: this.conversationId, |
|
initialMessageCount, |
|
}), |
|
}); |
|
|
|
return llm; |
|
} |
|
|
|
async titleConvo({ text, responseText = '' }) { |
|
let title = 'New Chat'; |
|
const convo = `||>User: |
|
"${truncateText(text)}" |
|
||>Response: |
|
"${JSON.stringify(truncateText(responseText))}"`; |
|
|
|
const { OPENAI_TITLE_MODEL } = process.env ?? {}; |
|
|
|
const modelOptions = { |
|
model: OPENAI_TITLE_MODEL ?? 'gpt-3.5-turbo', |
|
temperature: 0.2, |
|
presence_penalty: 0, |
|
frequency_penalty: 0, |
|
max_tokens: 16, |
|
}; |
|
|
|
try { |
|
this.abortController = new AbortController(); |
|
const llm = this.initializeLLM({ ...modelOptions, context: 'title', tokenBuffer: 150 }); |
|
title = await runTitleChain({ llm, text, convo, signal: this.abortController.signal }); |
|
} catch (e) { |
|
if (e?.message?.toLowerCase()?.includes('abort')) { |
|
this.options.debug && console.debug('Aborted title generation'); |
|
return; |
|
} |
|
console.log('There was an issue generating title with LangChain, trying the old method...'); |
|
this.options.debug && console.error(e.message, e); |
|
modelOptions.model = OPENAI_TITLE_MODEL ?? 'gpt-3.5-turbo'; |
|
const instructionsPayload = [ |
|
{ |
|
role: 'system', |
|
content: `Detect user language and write in the same language an extremely concise title for this conversation, which you must accurately detect. |
|
Write in the detected language. Title in 5 Words or Less. No Punctuation or Quotation. Do not mention the language. All first letters of every word should be capitalized and write the title in User Language only. |
|
|
|
${convo} |
|
|
|
||>Title:`, |
|
}, |
|
]; |
|
|
|
try { |
|
title = (await this.sendPayload(instructionsPayload, { modelOptions })).replaceAll('"', ''); |
|
} catch (e) { |
|
console.error(e); |
|
console.log('There was another issue generating the title, see error above.'); |
|
} |
|
} |
|
|
|
console.log('CONVERSATION TITLE', title); |
|
return title; |
|
} |
|
|
|
async summarizeMessages({ messagesToRefine, remainingContextTokens }) { |
|
this.options.debug && console.debug('Summarizing messages...'); |
|
let context = messagesToRefine; |
|
let prompt; |
|
|
|
const { OPENAI_SUMMARY_MODEL = 'gpt-3.5-turbo' } = process.env ?? {}; |
|
const maxContextTokens = getModelMaxTokens(OPENAI_SUMMARY_MODEL) ?? 4095; |
|
|
|
let promptBuffer = 101; |
|
|
|
|
|
|
|
|
|
|
|
const excessTokenCount = context.reduce( |
|
(acc, message) => acc + message.tokenCount, |
|
promptBuffer, |
|
); |
|
|
|
if (excessTokenCount > maxContextTokens) { |
|
({ context } = await this.getMessagesWithinTokenLimit(context, maxContextTokens)); |
|
} |
|
|
|
if (context.length === 0) { |
|
this.options.debug && |
|
console.debug('Summary context is empty, using latest message within token limit'); |
|
|
|
promptBuffer = 32; |
|
const { text, ...latestMessage } = messagesToRefine[messagesToRefine.length - 1]; |
|
const splitText = await tokenSplit({ |
|
text, |
|
chunkSize: Math.floor((maxContextTokens - promptBuffer) / 3), |
|
}); |
|
|
|
const newText = `${splitText[0]}\n...[truncated]...\n${splitText[splitText.length - 1]}`; |
|
prompt = CUT_OFF_PROMPT; |
|
|
|
context = [ |
|
formatMessage({ |
|
message: { |
|
...latestMessage, |
|
text: newText, |
|
}, |
|
userName: this.options?.name, |
|
assistantName: this.options?.chatGptLabel, |
|
}), |
|
]; |
|
} |
|
|
|
|
|
|
|
const initialPromptTokens = this.maxContextTokens - remainingContextTokens; |
|
this.options.debug && console.debug(`initialPromptTokens: ${initialPromptTokens}`); |
|
|
|
const llm = this.initializeLLM({ |
|
model: OPENAI_SUMMARY_MODEL, |
|
temperature: 0.2, |
|
context: 'summary', |
|
tokenBuffer: initialPromptTokens, |
|
}); |
|
|
|
try { |
|
const summaryMessage = await summaryBuffer({ |
|
llm, |
|
debug: this.options.debug, |
|
prompt, |
|
context, |
|
formatOptions: { |
|
userName: this.options?.name, |
|
assistantName: this.options?.chatGptLabel ?? this.options?.modelLabel, |
|
}, |
|
previous_summary: this.previous_summary?.summary, |
|
signal: this.abortController.signal, |
|
}); |
|
|
|
const summaryTokenCount = this.getTokenCountForMessage(summaryMessage); |
|
|
|
if (this.options.debug) { |
|
console.debug('summaryMessage:', summaryMessage); |
|
console.debug( |
|
`remainingContextTokens: ${remainingContextTokens}, after refining: ${ |
|
remainingContextTokens - summaryTokenCount |
|
}`, |
|
); |
|
} |
|
|
|
return { summaryMessage, summaryTokenCount }; |
|
} catch (e) { |
|
if (e?.message?.toLowerCase()?.includes('abort')) { |
|
this.options.debug && console.debug('Aborted summarization'); |
|
const { run, runId } = this.runManager.getRunByConversationId(this.conversationId); |
|
if (run && run.error) { |
|
const { error } = run; |
|
this.runManager.removeRun(runId); |
|
throw new Error(error); |
|
} |
|
} |
|
console.error('Error summarizing messages'); |
|
this.options.debug && console.error(e); |
|
return {}; |
|
} |
|
} |
|
|
|
async recordTokenUsage({ promptTokens, completionTokens }) { |
|
if (this.options.debug) { |
|
console.debug('promptTokens', promptTokens); |
|
console.debug('completionTokens', completionTokens); |
|
} |
|
await spendTokens( |
|
{ |
|
user: this.user, |
|
model: this.modelOptions.model, |
|
context: 'message', |
|
conversationId: this.conversationId, |
|
}, |
|
{ promptTokens, completionTokens }, |
|
); |
|
} |
|
} |
|
|
|
module.exports = OpenAIClient; |
|
|