Switch task model and add max tokens limits (#1036)
Browse files
.env.template
CHANGED
|
@@ -246,7 +246,7 @@ OLD_MODELS=`[
|
|
| 246 |
{"name": "openchat/openchat-3.5-0106"}
|
| 247 |
]`
|
| 248 |
|
| 249 |
-
TASK_MODEL='
|
| 250 |
|
| 251 |
APP_BASE="/chat"
|
| 252 |
PUBLIC_ORIGIN=https://huggingface.co
|
|
|
|
| 246 |
{"name": "openchat/openchat-3.5-0106"}
|
| 247 |
]`
|
| 248 |
|
| 249 |
+
TASK_MODEL='meta-llama/Meta-Llama-3-70B-Instruct'
|
| 250 |
|
| 251 |
APP_BASE="/chat"
|
| 252 |
PUBLIC_ORIGIN=https://huggingface.co
|
src/lib/server/generateFromDefaultEndpoint.ts
CHANGED
|
@@ -4,13 +4,15 @@ import type { Conversation } from "$lib/types/Conversation";
|
|
| 4 |
export async function generateFromDefaultEndpoint({
|
| 5 |
messages,
|
| 6 |
preprompt,
|
|
|
|
| 7 |
}: {
|
| 8 |
messages: Omit<Conversation["messages"][0], "id">[];
|
| 9 |
preprompt?: string;
|
|
|
|
| 10 |
}): Promise<string> {
|
| 11 |
const endpoint = await smallModel.getEndpoint();
|
| 12 |
|
| 13 |
-
const tokenStream = await endpoint({ messages, preprompt });
|
| 14 |
|
| 15 |
for await (const output of tokenStream) {
|
| 16 |
// if not generated_text is here it means the generation is not done
|
|
|
|
| 4 |
export async function generateFromDefaultEndpoint({
|
| 5 |
messages,
|
| 6 |
preprompt,
|
| 7 |
+
generateSettings,
|
| 8 |
}: {
|
| 9 |
messages: Omit<Conversation["messages"][0], "id">[];
|
| 10 |
preprompt?: string;
|
| 11 |
+
generateSettings?: Record<string, unknown>;
|
| 12 |
}): Promise<string> {
|
| 13 |
const endpoint = await smallModel.getEndpoint();
|
| 14 |
|
| 15 |
+
const tokenStream = await endpoint({ messages, preprompt, generateSettings });
|
| 16 |
|
| 17 |
for await (const output of tokenStream) {
|
| 18 |
// if not generated_text is here it means the generation is not done
|
src/lib/server/summarize.ts
CHANGED
|
@@ -27,7 +27,11 @@ export async function summarize(prompt: string) {
|
|
| 27 |
|
| 28 |
return await generateFromDefaultEndpoint({
|
| 29 |
messages,
|
| 30 |
-
preprompt:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
})
|
| 32 |
.then((summary) => {
|
| 33 |
// add an emoji if none is found in the first three characters
|
|
|
|
| 27 |
|
| 28 |
return await generateFromDefaultEndpoint({
|
| 29 |
messages,
|
| 30 |
+
preprompt:
|
| 31 |
+
"You are a summarization AI. Summarize the user's request into a single short sentence of four words or less. Do not try to answer it, only summarize the user's query. Always start your answer with an emoji relevant to the summary",
|
| 32 |
+
generateSettings: {
|
| 33 |
+
max_new_tokens: 15,
|
| 34 |
+
},
|
| 35 |
})
|
| 36 |
.then((summary) => {
|
| 37 |
// add an emoji if none is found in the first three characters
|
src/lib/server/websearch/generateQuery.ts
CHANGED
|
@@ -64,6 +64,9 @@ Current Question: Where is it being hosted?`,
|
|
| 64 |
const webQuery = await generateFromDefaultEndpoint({
|
| 65 |
messages: convQuery,
|
| 66 |
preprompt: `You are tasked with generating web search queries. Give me an appropriate query to answer my question for google search. Answer with only the query. Today is ${currentDate}`,
|
|
|
|
|
|
|
|
|
|
| 67 |
});
|
| 68 |
|
| 69 |
return webQuery.trim();
|
|
|
|
| 64 |
const webQuery = await generateFromDefaultEndpoint({
|
| 65 |
messages: convQuery,
|
| 66 |
preprompt: `You are tasked with generating web search queries. Give me an appropriate query to answer my question for google search. Answer with only the query. Today is ${currentDate}`,
|
| 67 |
+
generateSettings: {
|
| 68 |
+
max_new_tokens: 30,
|
| 69 |
+
},
|
| 70 |
});
|
| 71 |
|
| 72 |
return webQuery.trim();
|