coyotte508 HF staff commited on
Commit
3c650ed
1 Parent(s): 9bebf7e

⚡️ Limit the number of tokens sent to the backend (#93)

Browse files
.env CHANGED
@@ -6,6 +6,7 @@ MONGODB_DB_NAME=chat-ui
6
  HF_TOKEN=#your huggingface token here
7
  COOKIE_NAME=hf-chat
8
 
 
9
  PUBLIC_ORIGIN=#https://hf.co
10
  PUBLIC_MODEL_ENDPOINT=https://api-inference.huggingface.co/models/OpenAssistant/oasst-sft-6-llama-30b
11
  PUBLIC_MODEL_NAME=OpenAssistant/oasst-sft-6-llama-30b # public facing link
 
6
  HF_TOKEN=#your huggingface token here
7
  COOKIE_NAME=hf-chat
8
 
9
+ PUBLIC_MAX_INPUT_TOKENS=1024
10
  PUBLIC_ORIGIN=#https://hf.co
11
  PUBLIC_MODEL_ENDPOINT=https://api-inference.huggingface.co/models/OpenAssistant/oasst-sft-6-llama-30b
12
  PUBLIC_MODEL_NAME=OpenAssistant/oasst-sft-6-llama-30b # public facing link
src/lib/buildPrompt.ts CHANGED
@@ -1,5 +1,6 @@
1
  import {
2
  PUBLIC_ASSISTANT_MESSAGE_TOKEN,
 
3
  PUBLIC_SEP_TOKEN,
4
  PUBLIC_USER_MESSAGE_TOKEN,
5
  } from "$env/static/public";
@@ -11,7 +12,7 @@ import type { Message } from "./types/Message";
11
  * <|assistant|>hi<|endoftext|><|prompter|>hello<|endoftext|><|assistant|>
12
  */
13
  export function buildPrompt(messages: Message[]): string {
14
- return (
15
  messages
16
  .map(
17
  (m) =>
@@ -20,6 +21,8 @@ export function buildPrompt(messages: Message[]): string {
20
  : PUBLIC_ASSISTANT_MESSAGE_TOKEN + m.content) +
21
  (m.content.endsWith(PUBLIC_SEP_TOKEN) ? "" : PUBLIC_SEP_TOKEN)
22
  )
23
- .join("") + PUBLIC_ASSISTANT_MESSAGE_TOKEN
24
- );
 
 
25
  }
 
1
  import {
2
  PUBLIC_ASSISTANT_MESSAGE_TOKEN,
3
+ PUBLIC_MAX_INPUT_TOKENS,
4
  PUBLIC_SEP_TOKEN,
5
  PUBLIC_USER_MESSAGE_TOKEN,
6
  } from "$env/static/public";
 
12
  * <|assistant|>hi<|endoftext|><|prompter|>hello<|endoftext|><|assistant|>
13
  */
14
  export function buildPrompt(messages: Message[]): string {
15
+ const prompt =
16
  messages
17
  .map(
18
  (m) =>
 
21
  : PUBLIC_ASSISTANT_MESSAGE_TOKEN + m.content) +
22
  (m.content.endsWith(PUBLIC_SEP_TOKEN) ? "" : PUBLIC_SEP_TOKEN)
23
  )
24
+ .join("") + PUBLIC_ASSISTANT_MESSAGE_TOKEN;
25
+
26
+ // Not super precise, but it's truncated in the model's backend anyway
27
+ return prompt.split(" ").slice(-parseInt(PUBLIC_MAX_INPUT_TOKENS)).join(" ");
28
  }
src/routes/conversation/[id]/+page.svelte CHANGED
@@ -8,7 +8,7 @@
8
  import { invalidate } from "$app/navigation";
9
  import { base } from "$app/paths";
10
  import { trimSuffix } from "$lib/utils/trimSuffix";
11
- import { PUBLIC_SEP_TOKEN } from "$env/static/public";
12
  import { trimPrefix } from "$lib/utils/trimPrefix";
13
  import { shareConversation } from "$lib/shareConversation";
14
  import { UrlDependency } from "$lib/types/UrlDependency";
@@ -41,7 +41,7 @@
41
  repetition_penalty: 1.2,
42
  top_k: 50,
43
  // @ts-ignore
44
- truncate: 1024,
45
  watermark: false,
46
  max_new_tokens: 1024,
47
  stop: ["<|endoftext|>"],
 
8
  import { invalidate } from "$app/navigation";
9
  import { base } from "$app/paths";
10
  import { trimSuffix } from "$lib/utils/trimSuffix";
11
+ import { PUBLIC_SEP_TOKEN, PUBLIC_MAX_INPUT_TOKENS } from "$env/static/public";
12
  import { trimPrefix } from "$lib/utils/trimPrefix";
13
  import { shareConversation } from "$lib/shareConversation";
14
  import { UrlDependency } from "$lib/types/UrlDependency";
 
41
  repetition_penalty: 1.2,
42
  top_k: 50,
43
  // @ts-ignore
44
+ truncate: parseInt(PUBLIC_MAX_INPUT_TOKENS),
45
  watermark: false,
46
  max_new_tokens: 1024,
47
  stop: ["<|endoftext|>"],
src/routes/conversation/[id]/summarize/+server.ts CHANGED
@@ -1,5 +1,5 @@
1
  import { HF_TOKEN } from "$env/static/private";
2
- import { PUBLIC_MODEL_ENDPOINT } from "$env/static/public";
3
  import { buildPrompt } from "$lib/buildPrompt";
4
  import { collections } from "$lib/server/database.js";
5
  import { textGeneration } from "@huggingface/inference";
@@ -33,6 +33,7 @@ export async function POST({ params, locals, fetch }) {
33
  top_k: 50,
34
  watermark: false,
35
  max_new_tokens: 1024,
 
36
  stop: ["<|endoftext|>"],
37
  return_full_text: false,
38
  };
 
1
  import { HF_TOKEN } from "$env/static/private";
2
+ import { PUBLIC_MAX_INPUT_TOKENS, PUBLIC_MODEL_ENDPOINT } from "$env/static/public";
3
  import { buildPrompt } from "$lib/buildPrompt";
4
  import { collections } from "$lib/server/database.js";
5
  import { textGeneration } from "@huggingface/inference";
 
33
  top_k: 50,
34
  watermark: false,
35
  max_new_tokens: 1024,
36
+ truncate: parseInt(PUBLIC_MAX_INPUT_TOKENS),
37
  stop: ["<|endoftext|>"],
38
  return_full_text: false,
39
  };