nsarrazin HF staff commited on
Commit
86bc2ea
1 Parent(s): 0fc95fb

Switch task model and add max tokens limits (#1036)

Browse files
.env.template CHANGED
@@ -246,7 +246,7 @@ OLD_MODELS=`[
246
  {"name": "openchat/openchat-3.5-0106"}
247
  ]`
248
 
249
- TASK_MODEL='mistralai/Mistral-7B-Instruct-v0.1'
250
 
251
  APP_BASE="/chat"
252
  PUBLIC_ORIGIN=https://huggingface.co
 
246
  {"name": "openchat/openchat-3.5-0106"}
247
  ]`
248
 
249
+ TASK_MODEL='meta-llama/Meta-Llama-3-70B-Instruct'
250
 
251
  APP_BASE="/chat"
252
  PUBLIC_ORIGIN=https://huggingface.co
src/lib/server/generateFromDefaultEndpoint.ts CHANGED
@@ -4,13 +4,15 @@ import type { Conversation } from "$lib/types/Conversation";
4
  export async function generateFromDefaultEndpoint({
5
  messages,
6
  preprompt,
 
7
  }: {
8
  messages: Omit<Conversation["messages"][0], "id">[];
9
  preprompt?: string;
 
10
  }): Promise<string> {
11
  const endpoint = await smallModel.getEndpoint();
12
 
13
- const tokenStream = await endpoint({ messages, preprompt });
14
 
15
  for await (const output of tokenStream) {
16
  // if not generated_text is here it means the generation is not done
 
4
  export async function generateFromDefaultEndpoint({
5
  messages,
6
  preprompt,
7
+ generateSettings,
8
  }: {
9
  messages: Omit<Conversation["messages"][0], "id">[];
10
  preprompt?: string;
11
+ generateSettings?: Record<string, unknown>;
12
  }): Promise<string> {
13
  const endpoint = await smallModel.getEndpoint();
14
 
15
+ const tokenStream = await endpoint({ messages, preprompt, generateSettings });
16
 
17
  for await (const output of tokenStream) {
18
  // if not generated_text is here it means the generation is not done
src/lib/server/summarize.ts CHANGED
@@ -27,7 +27,11 @@ export async function summarize(prompt: string) {
27
 
28
  return await generateFromDefaultEndpoint({
29
  messages,
30
- preprompt: `You are a summarization AI. You'll never answer a user's question directly, but instead summarize the user's request into a single short sentence of four words or less. Always start your answer with an emoji relevant to the summary.`,
 
 
 
 
31
  })
32
  .then((summary) => {
33
  // add an emoji if none is found in the first three characters
 
27
 
28
  return await generateFromDefaultEndpoint({
29
  messages,
30
+ preprompt:
31
+ "You are a summarization AI. Summarize the user's request into a single short sentence of four words or less. Do not try to answer it, only summarize the user's query. Always start your answer with an emoji relevant to the summary",
32
+ generateSettings: {
33
+ max_new_tokens: 15,
34
+ },
35
  })
36
  .then((summary) => {
37
  // add an emoji if none is found in the first three characters
src/lib/server/websearch/generateQuery.ts CHANGED
@@ -64,6 +64,9 @@ Current Question: Where is it being hosted?`,
64
  const webQuery = await generateFromDefaultEndpoint({
65
  messages: convQuery,
66
  preprompt: `You are tasked with generating web search queries. Give me an appropriate query to answer my question for google search. Answer with only the query. Today is ${currentDate}`,
 
 
 
67
  });
68
 
69
  return webQuery.trim();
 
64
  const webQuery = await generateFromDefaultEndpoint({
65
  messages: convQuery,
66
  preprompt: `You are tasked with generating web search queries. Give me an appropriate query to answer my question for google search. Answer with only the query. Today is ${currentDate}`,
67
+ generateSettings: {
68
+ max_new_tokens: 30,
69
+ },
70
  });
71
 
72
  return webQuery.trim();