chat-ui

Paused

nsarrazin commited on Apr 20, 2024

Commit

86bc2ea

unverified ·

1 Parent(s): 0fc95fb

Switch task model and add max tokens limits (#1036)

Files changed (4) hide show

.env.template CHANGED Viewed

@@ -246,7 +246,7 @@ OLD_MODELS=`[
   {"name": "openchat/openchat-3.5-0106"}
 ]`
-TASK_MODEL='mistralai/Mistral-7B-Instruct-v0.1'
 APP_BASE="/chat"
 PUBLIC_ORIGIN=https://huggingface.co

   {"name": "openchat/openchat-3.5-0106"}
 ]`
+TASK_MODEL='meta-llama/Meta-Llama-3-70B-Instruct'
 APP_BASE="/chat"
 PUBLIC_ORIGIN=https://huggingface.co

src/lib/server/generateFromDefaultEndpoint.ts CHANGED Viewed

@@ -4,13 +4,15 @@ import type { Conversation } from "$lib/types/Conversation";
 export async function generateFromDefaultEndpoint({
 	messages,
 	preprompt,
 }: {
 	messages: Omit<Conversation["messages"][0], "id">[];
 	preprompt?: string;
 }): Promise<string> {
 	const endpoint = await smallModel.getEndpoint();
-	const tokenStream = await endpoint({ messages, preprompt });
 	for await (const output of tokenStream) {
 		// if not generated_text is here it means the generation is not done

 export async function generateFromDefaultEndpoint({
 	messages,
 	preprompt,
+	generateSettings,
 }: {
 	messages: Omit<Conversation["messages"][0], "id">[];
 	preprompt?: string;
+	generateSettings?: Record<string, unknown>;
 }): Promise<string> {
 	const endpoint = await smallModel.getEndpoint();
+	const tokenStream = await endpoint({ messages, preprompt, generateSettings });
 	for await (const output of tokenStream) {
 		// if not generated_text is here it means the generation is not done

src/lib/server/summarize.ts CHANGED Viewed

@@ -27,7 +27,11 @@ export async function summarize(prompt: string) {
 	return await generateFromDefaultEndpoint({
 		messages,
-		preprompt: `You are a summarization AI. You'll never answer a user's question directly, but instead summarize the user's request into a single short sentence of four words or less. Always start your answer with an emoji relevant to the summary.`,
 	})
 		.then((summary) => {
 			// add an emoji if none is found in the first three characters

 	return await generateFromDefaultEndpoint({
 		messages,
+		preprompt:
+			"You are a summarization AI. Summarize the user's request into a single short sentence of four words or less. Do not try to answer it, only summarize the user's query. Always start your answer with an emoji relevant to the summary",
+		generateSettings: {
+			max_new_tokens: 15,
+		},
 	})
 		.then((summary) => {
 			// add an emoji if none is found in the first three characters

src/lib/server/websearch/generateQuery.ts CHANGED Viewed

@@ -64,6 +64,9 @@ Current Question: Where is it being hosted?`,
 	const webQuery = await generateFromDefaultEndpoint({
 		messages: convQuery,
 		preprompt: `You are tasked with generating web search queries. Give me an appropriate query to answer my question for google search. Answer with only the query. Today is ${currentDate}`,
 	});
 	return webQuery.trim();

 	const webQuery = await generateFromDefaultEndpoint({
 		messages: convQuery,
 		preprompt: `You are tasked with generating web search queries. Give me an appropriate query to answer my question for google search. Answer with only the query. Today is ${currentDate}`,
+		generateSettings: {
+			max_new_tokens: 30,
+		},
 	});
 	return webQuery.trim();