Spaces:
Running
Running
Switch task model and add max tokens limits (#1036)
Browse files
.env.template
CHANGED
@@ -246,7 +246,7 @@ OLD_MODELS=`[
|
|
246 |
{"name": "openchat/openchat-3.5-0106"}
|
247 |
]`
|
248 |
|
249 |
-
TASK_MODEL='
|
250 |
|
251 |
APP_BASE="/chat"
|
252 |
PUBLIC_ORIGIN=https://huggingface.co
|
|
|
246 |
{"name": "openchat/openchat-3.5-0106"}
|
247 |
]`
|
248 |
|
249 |
+
TASK_MODEL='meta-llama/Meta-Llama-3-70B-Instruct'
|
250 |
|
251 |
APP_BASE="/chat"
|
252 |
PUBLIC_ORIGIN=https://huggingface.co
|
src/lib/server/generateFromDefaultEndpoint.ts
CHANGED
@@ -4,13 +4,15 @@ import type { Conversation } from "$lib/types/Conversation";
|
|
4 |
export async function generateFromDefaultEndpoint({
|
5 |
messages,
|
6 |
preprompt,
|
|
|
7 |
}: {
|
8 |
messages: Omit<Conversation["messages"][0], "id">[];
|
9 |
preprompt?: string;
|
|
|
10 |
}): Promise<string> {
|
11 |
const endpoint = await smallModel.getEndpoint();
|
12 |
|
13 |
-
const tokenStream = await endpoint({ messages, preprompt });
|
14 |
|
15 |
for await (const output of tokenStream) {
|
16 |
// if not generated_text is here it means the generation is not done
|
|
|
4 |
export async function generateFromDefaultEndpoint({
|
5 |
messages,
|
6 |
preprompt,
|
7 |
+
generateSettings,
|
8 |
}: {
|
9 |
messages: Omit<Conversation["messages"][0], "id">[];
|
10 |
preprompt?: string;
|
11 |
+
generateSettings?: Record<string, unknown>;
|
12 |
}): Promise<string> {
|
13 |
const endpoint = await smallModel.getEndpoint();
|
14 |
|
15 |
+
const tokenStream = await endpoint({ messages, preprompt, generateSettings });
|
16 |
|
17 |
for await (const output of tokenStream) {
|
18 |
// if not generated_text is here it means the generation is not done
|
src/lib/server/summarize.ts
CHANGED
@@ -27,7 +27,11 @@ export async function summarize(prompt: string) {
|
|
27 |
|
28 |
return await generateFromDefaultEndpoint({
|
29 |
messages,
|
30 |
-
preprompt:
|
|
|
|
|
|
|
|
|
31 |
})
|
32 |
.then((summary) => {
|
33 |
// add an emoji if none is found in the first three characters
|
|
|
27 |
|
28 |
return await generateFromDefaultEndpoint({
|
29 |
messages,
|
30 |
+
preprompt:
|
31 |
+
"You are a summarization AI. Summarize the user's request into a single short sentence of four words or less. Do not try to answer it, only summarize the user's query. Always start your answer with an emoji relevant to the summary",
|
32 |
+
generateSettings: {
|
33 |
+
max_new_tokens: 15,
|
34 |
+
},
|
35 |
})
|
36 |
.then((summary) => {
|
37 |
// add an emoji if none is found in the first three characters
|
src/lib/server/websearch/generateQuery.ts
CHANGED
@@ -64,6 +64,9 @@ Current Question: Where is it being hosted?`,
|
|
64 |
const webQuery = await generateFromDefaultEndpoint({
|
65 |
messages: convQuery,
|
66 |
preprompt: `You are tasked with generating web search queries. Give me an appropriate query to answer my question for google search. Answer with only the query. Today is ${currentDate}`,
|
|
|
|
|
|
|
67 |
});
|
68 |
|
69 |
return webQuery.trim();
|
|
|
64 |
const webQuery = await generateFromDefaultEndpoint({
|
65 |
messages: convQuery,
|
66 |
preprompt: `You are tasked with generating web search queries. Give me an appropriate query to answer my question for google search. Answer with only the query. Today is ${currentDate}`,
|
67 |
+
generateSettings: {
|
68 |
+
max_new_tokens: 30,
|
69 |
+
},
|
70 |
});
|
71 |
|
72 |
return webQuery.trim();
|