mishig's picture
mishig HF staff
No need for double-try
5f94ff7
raw
history blame
1.93 kB
import { type ChatCompletionInputMessage } from '@huggingface/tasks';
import { HfInference } from '@huggingface/inference';
import type { Conversation, ModelEntryWithTokenizer } from '$lib/types';
export function createHfInference(token: string): HfInference {
return new HfInference(token);
}
export async function handleStreamingResponse(
hf: HfInference,
conversation: Conversation,
onChunk: (content: string) => void,
abortController: AbortController,
systemMessage?: ChatCompletionInputMessage
): Promise<void> {
const messages = [
...(isSystemPromptSupported(conversation.model) && systemMessage?.content?.length
? [systemMessage]
: []),
...conversation.messages
];
let out = '';
for await (const chunk of hf.chatCompletionStream(
{
model: conversation.model.id,
messages,
temperature: conversation.config.temperature,
max_tokens: conversation.config.maxTokens
},
{ signal: abortController.signal }
)) {
if (chunk.choices && chunk.choices.length > 0 && chunk.choices[0]?.delta?.content) {
out += chunk.choices[0].delta.content;
onChunk(out);
}
}
}
export async function handleNonStreamingResponse(
hf: HfInference,
conversation: Conversation,
systemMessage?: ChatCompletionInputMessage
): Promise<ChatCompletionInputMessage> {
const messages = [
...(isSystemPromptSupported(conversation.model) && systemMessage?.content?.length
? [systemMessage]
: []),
...conversation.messages
];
const response = await hf.chatCompletion({
model: conversation.model,
messages,
temperature: conversation.config.temperature,
max_tokens: conversation.config.maxTokens
});
if (response.choices && response.choices.length > 0) {
return response.choices[0].message;
}
throw new Error('No response from the model');
}
export function isSystemPromptSupported(model: ModelEntryWithTokenizer) {
return model.tokenizerConfig?.chat_template?.includes('system');
}