coyotte508 HF staff commited on
Commit
7764421
1 Parent(s): 6f12e84

♻️ Simplify setup, prepare for multi-models (#156)

Browse files
.env CHANGED
@@ -1,27 +1,29 @@
1
- # Use .env.local to change these variables, or directly change your env
2
  # DO NOT EDIT THIS FILE WITH SENSITIVE DATA
3
 
4
  MONGODB_URL=#your mongodb URL here
5
  MONGODB_DB_NAME=chat-ui
6
  COOKIE_NAME=hf-chat
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
  # Increase depending on the model
9
  PUBLIC_MAX_INPUT_TOKENS=1000
10
  PUBLIC_ORIGIN=#https://hf.co
11
- PUBLIC_MODEL_NAME=OpenAssistant/oasst-sft-6-llama-30b # public facing link
12
- PUBLIC_MODEL_ID=OpenAssistant/oasst-sft-6-llama-30b-xor # used to link to model page
13
  PUBLIC_DISABLE_INTRO_TILES=false
14
  PUBLIC_USER_MESSAGE_TOKEN=<|prompter|>
15
  PUBLIC_ASSISTANT_MESSAGE_TOKEN=<|assistant|>
16
  PUBLIC_SEP_TOKEN=</s>
17
  PUBLIC_PREPROMPT="Below are a series of dialogues between various people and an AI assistant. The AI tries to be helpful, polite, honest, sophisticated, emotionally aware, and humble-but-knowledgeable. The assistant is happy to help with almost anything, and will do its best to understand exactly what is needed. It also tries to avoid giving false or misleading information, and it caveats when it isn't entirely sure about the right answer. That said, the assistant is practical and really does its best, and doesn't let caution get too much in the way of being useful."
18
  PUBLIC_GOOGLE_ANALYTICS_ID=#G-XXXXXXXX / Leave empty to disable
19
- PUBLIC_DEPRECATED_GOOGLE_ANALYTICS_ID=#UA-XXXXXXXX-X / Leave empty to disable
20
-
21
- # Copy this in .env.local with and replace "hf_<token>" your HF token from https://huggingface.co/settings/token
22
- # You can also change the model from OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 to your own model
23
- MODEL_ENDPOINTS=`[{
24
- "endpoint": "https://api-inference.huggingface.co/models/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5",
25
- "authorization": "Bearer hf_<token>",
26
- "weight": 1
27
- }]`
 
1
+ # Use .env.local to change these variables
2
  # DO NOT EDIT THIS FILE WITH SENSITIVE DATA
3
 
4
  MONGODB_URL=#your mongodb URL here
5
  MONGODB_DB_NAME=chat-ui
6
  COOKIE_NAME=hf-chat
7
+ HF_ACCESS_TOKEN=#hf_<token> from from https://huggingface.co/settings/token
8
+ MODELS=`["OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5"]`
9
+ # Alternative syntax (all fields are optional except 'name'):
10
+ # MODELS=`[{
11
+ # "name": "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5",
12
+ # "displayName": "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5",
13
+ # "endpoints": [{
14
+ # "url": "https://api-inference.huggingface.co/models/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5",
15
+ # "authorization": "Bearer hf_<token>",
16
+ # "weight": 1
17
+ # }]
18
+ # }]
19
 
20
  # Increase depending on the model
21
  PUBLIC_MAX_INPUT_TOKENS=1000
22
  PUBLIC_ORIGIN=#https://hf.co
 
 
23
  PUBLIC_DISABLE_INTRO_TILES=false
24
  PUBLIC_USER_MESSAGE_TOKEN=<|prompter|>
25
  PUBLIC_ASSISTANT_MESSAGE_TOKEN=<|assistant|>
26
  PUBLIC_SEP_TOKEN=</s>
27
  PUBLIC_PREPROMPT="Below are a series of dialogues between various people and an AI assistant. The AI tries to be helpful, polite, honest, sophisticated, emotionally aware, and humble-but-knowledgeable. The assistant is happy to help with almost anything, and will do its best to understand exactly what is needed. It also tries to avoid giving false or misleading information, and it caveats when it isn't entirely sure about the right answer. That said, the assistant is practical and really does its best, and doesn't let caution get too much in the way of being useful."
28
  PUBLIC_GOOGLE_ANALYTICS_ID=#G-XXXXXXXX / Leave empty to disable
29
+ PUBLIC_DEPRECATED_GOOGLE_ANALYTICS_ID=#UA-XXXXXXXX-X / Leave empty to disable
 
 
 
 
 
 
 
 
README.md CHANGED
@@ -31,30 +31,32 @@ Basically you need to create a `.env.local` with the following contents:
31
 
32
  ```
33
  MONGODB_URL=<url to mongo, for example a free MongoDB Atlas sandbox instance>
34
- MODEL_ENDPOINTS=`[{
35
- "endpoint": "https://api-inference.huggingface.co/models/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5",
36
- "authorization": "Bearer <hf_token>",
37
- "weight": 1
38
- }]`
39
  ```
40
 
41
- Where the contents in `<...>` are replaced by the MongoDB URL and your [HF Access Token](https://huggingface.co/settings/tokens).
42
-
43
  ## Duplicating to a Space
44
 
45
  Create a `DOTENV_LOCAL` secret to your space with the following contents:
46
 
47
  ```
48
  MONGODB_URL=<url to mongo, for example a free MongoDB Atlas sandbox instance>
49
- MODEL_ENDPOINTS=`[{
50
- "endpoint": "https://api-inference.huggingface.co/models/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5",
51
- "authorization": "Bearer <hf_token>",
52
- "weight": 1
53
- }]`
54
  ```
55
 
56
  Where the contents in `<...>` are replaced by the MongoDB URL and your [HF Access Token](https://huggingface.co/settings/tokens).
57
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  ## Building
59
 
60
  To create a production version of your app:
 
31
 
32
  ```
33
  MONGODB_URL=<url to mongo, for example a free MongoDB Atlas sandbox instance>
34
+ HF_ACCESS_TOKEN=<your HF access token from https://huggingface.co/settings/tokens>
 
 
 
 
35
  ```
36
 
 
 
37
  ## Duplicating to a Space
38
 
39
  Create a `DOTENV_LOCAL` secret to your space with the following contents:
40
 
41
  ```
42
  MONGODB_URL=<url to mongo, for example a free MongoDB Atlas sandbox instance>
43
+ HF_ACCESS_TOKEN=<your HF access token from https://huggingface.co/settings/tokens>
 
 
 
 
44
  ```
45
 
46
  Where the contents in `<...>` are replaced by the MongoDB URL and your [HF Access Token](https://huggingface.co/settings/tokens).
47
 
48
+ ## Running Local Inference
49
+
50
+ Both the example above use the HF Inference API or HF Endpoints API.
51
+
52
+ If you want to run the model locally, you need to run this inference server locally: https://github.com/huggingface/text-generation-inference
53
+
54
+ And add this to your `.env.local`:
55
+
56
+ ```
57
+ MODELS=`[{"name": "...", "endpoints": [{"url": "127.0.0.1:8080/generate_stream"}]}]`
58
+ ```
59
+
60
  ## Building
61
 
62
  To create a production version of your app:
src/hooks.server.ts CHANGED
@@ -6,6 +6,7 @@ import {
6
  PUBLIC_DEPRECATED_GOOGLE_ANALYTICS_ID,
7
  } from "$env/static/public";
8
  import { addYears } from "date-fns";
 
9
 
10
  export const handle: Handle = async ({ event, resolve }) => {
11
  const token = event.cookies.get(COOKIE_NAME);
 
6
  PUBLIC_DEPRECATED_GOOGLE_ANALYTICS_ID,
7
  } from "$env/static/public";
8
  import { addYears } from "date-fns";
9
+ import { inspect } from "node:util";
10
 
11
  export const handle: Handle = async ({ event, resolve }) => {
12
  const token = event.cookies.get(COOKIE_NAME);
src/lib/components/chat/ChatIntroduction.svelte CHANGED
@@ -1,15 +1,11 @@
1
  <script lang="ts">
2
- import {
3
- PUBLIC_DISABLE_INTRO_TILES,
4
- PUBLIC_MODEL_ID,
5
- PUBLIC_MODEL_NAME,
6
- PUBLIC_VERSION,
7
- } from "$env/static/public";
8
-
9
  import Logo from "$lib/components/icons/Logo.svelte";
10
  import CarbonArrowUpRight from "~icons/carbon/arrow-up-right";
11
  import CarbonEarth from "~icons/carbon/earth";
12
  import { createEventDispatcher } from "svelte";
 
 
13
  const dispatch = createEventDispatcher<{ message: string }>();
14
  </script>
15
 
@@ -36,13 +32,13 @@
36
  <div class="overflow-hidden rounded-xl border dark:border-gray-800">
37
  <div class="p-3">
38
  <div class="text-sm text-gray-600 dark:text-gray-400">Current Model</div>
39
- <div class="font-semibold">{PUBLIC_MODEL_NAME}</div>
40
  </div>
41
  <div
42
  class="flex items-center gap-5 rounded-xl bg-gray-100 px-3 py-2 text-sm text-gray-600 dark:bg-gray-800 dark:text-gray-300"
43
  >
44
  <a
45
- href="https://huggingface.co/{PUBLIC_MODEL_ID}"
46
  target="_blank"
47
  rel="noreferrer"
48
  class="flex items-center hover:underline"
 
1
  <script lang="ts">
2
+ import { PUBLIC_DISABLE_INTRO_TILES, PUBLIC_VERSION } from "$env/static/public";
 
 
 
 
 
 
3
  import Logo from "$lib/components/icons/Logo.svelte";
4
  import CarbonArrowUpRight from "~icons/carbon/arrow-up-right";
5
  import CarbonEarth from "~icons/carbon/earth";
6
  import { createEventDispatcher } from "svelte";
7
+
8
+ export let currentModel: { name: string; displayName: string };
9
  const dispatch = createEventDispatcher<{ message: string }>();
10
  </script>
11
 
 
32
  <div class="overflow-hidden rounded-xl border dark:border-gray-800">
33
  <div class="p-3">
34
  <div class="text-sm text-gray-600 dark:text-gray-400">Current Model</div>
35
+ <div class="font-semibold">{currentModel.displayName}</div>
36
  </div>
37
  <div
38
  class="flex items-center gap-5 rounded-xl bg-gray-100 px-3 py-2 text-sm text-gray-600 dark:bg-gray-800 dark:text-gray-300"
39
  >
40
  <a
41
+ href="https://huggingface.co/{currentModel.name}"
42
  target="_blank"
43
  rel="noreferrer"
44
  class="flex items-center hover:underline"
src/lib/components/chat/ChatMessages.svelte CHANGED
@@ -13,6 +13,7 @@
13
  export let messages: Message[];
14
  export let loading: boolean;
15
  export let pending: boolean;
 
16
 
17
  let chatContainer: HTMLElement;
18
 
@@ -40,7 +41,7 @@
40
  on:retry={() => dispatch("retry", { id: message.id, content: message.content })}
41
  />
42
  {:else}
43
- <ChatIntroduction on:message />
44
  {/each}
45
  {#if pending}
46
  <ChatMessage message={{ from: "assistant", content: "", id: randomUUID() }} />
 
13
  export let messages: Message[];
14
  export let loading: boolean;
15
  export let pending: boolean;
16
+ export let currentModel: { name: string; displayName: string };
17
 
18
  let chatContainer: HTMLElement;
19
 
 
41
  on:retry={() => dispatch("retry", { id: message.id, content: message.content })}
42
  />
43
  {:else}
44
+ <ChatIntroduction on:message {currentModel} />
45
  {/each}
46
  {#if pending}
47
  <ChatMessage message={{ from: "assistant", content: "", id: randomUUID() }} />
src/lib/components/chat/ChatWindow.svelte CHANGED
@@ -8,12 +8,12 @@
8
  import ChatMessages from "./ChatMessages.svelte";
9
  import ChatInput from "./ChatInput.svelte";
10
  import StopGeneratingBtn from "../StopGeneratingBtn.svelte";
11
- import { PUBLIC_MODEL_ID, PUBLIC_MODEL_NAME } from "$env/static/public";
12
 
13
  export let messages: Message[] = [];
14
  export let disabled = false;
15
  export let loading = false;
16
  export let pending = false;
 
17
 
18
  let message: string;
19
 
@@ -35,6 +35,7 @@
35
  <ChatMessages
36
  {loading}
37
  {pending}
 
38
  {messages}
39
  on:message
40
  on:retry={(ev) => {
@@ -73,10 +74,10 @@
73
  <div class="mt-2 flex justify-between self-stretch px-1 text-xs text-gray-400/90 max-sm:gap-2">
74
  <p>
75
  Model: <a
76
- href="https://huggingface.co/{PUBLIC_MODEL_ID}"
77
  target="_blank"
78
  rel="noreferrer"
79
- class="hover:underline">{PUBLIC_MODEL_NAME}</a
80
  > <span class="max-sm:hidden">·</span><br class="sm:hidden" /> Generated content may be inaccurate
81
  or false.
82
  </p>
 
8
  import ChatMessages from "./ChatMessages.svelte";
9
  import ChatInput from "./ChatInput.svelte";
10
  import StopGeneratingBtn from "../StopGeneratingBtn.svelte";
 
11
 
12
  export let messages: Message[] = [];
13
  export let disabled = false;
14
  export let loading = false;
15
  export let pending = false;
16
+ export let currentModel: { name: string; displayName: string };
17
 
18
  let message: string;
19
 
 
35
  <ChatMessages
36
  {loading}
37
  {pending}
38
+ {currentModel}
39
  {messages}
40
  on:message
41
  on:retry={(ev) => {
 
74
  <div class="mt-2 flex justify-between self-stretch px-1 text-xs text-gray-400/90 max-sm:gap-2">
75
  <p>
76
  Model: <a
77
+ href="https://huggingface.co/{currentModel.name}"
78
  target="_blank"
79
  rel="noreferrer"
80
+ class="hover:underline">{currentModel.displayName}</a
81
  > <span class="max-sm:hidden">·</span><br class="sm:hidden" /> Generated content may be inaccurate
82
  or false.
83
  </p>
src/lib/server/modelEndpoint.ts CHANGED
@@ -1,14 +1,38 @@
1
- import { MODEL_ENDPOINTS } from "$env/static/private";
2
  import { sum } from "$lib/utils/sum";
3
-
4
- const endpoints: Array<{ endpoint: string; authorization: string; weight: number }> =
5
- JSON.parse(MODEL_ENDPOINTS);
6
- const totalWeight = sum(endpoints.map((e) => e.weight));
7
 
8
  /**
9
  * Find a random load-balanced endpoint
10
  */
11
- export function modelEndpoint(): { endpoint: string; authorization: string; weight: number } {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  let random = Math.random() * totalWeight;
13
  for (const endpoint of endpoints) {
14
  if (random < endpoint.weight) {
 
1
+ import { HF_ACCESS_TOKEN } from "$env/static/private";
2
  import { sum } from "$lib/utils/sum";
3
+ import { models } from "./models";
 
 
 
4
 
5
  /**
6
  * Find a random load-balanced endpoint
7
  */
8
+ export function modelEndpoint(model: string): {
9
+ url: string;
10
+ authorization: string;
11
+ weight: number;
12
+ } {
13
+ const modelDefinition = models.find(
14
+ (m) => m === model || (typeof m === "object" && m.name === model)
15
+ );
16
+ if (!modelDefinition) {
17
+ throw new Error(`Invalid model: ${model}`);
18
+ }
19
+ if (typeof modelDefinition === "string") {
20
+ return {
21
+ url: `https://api-inference.huggingface.co/models/${modelDefinition}`,
22
+ authorization: `Bearer ${HF_ACCESS_TOKEN}`,
23
+ weight: 1,
24
+ };
25
+ }
26
+ if (!modelDefinition.endpoints) {
27
+ return {
28
+ url: `https://api-inference.huggingface.co/models/${modelDefinition.name}`,
29
+ authorization: `Bearer ${HF_ACCESS_TOKEN}`,
30
+ weight: 1,
31
+ };
32
+ }
33
+ const endpoints = modelDefinition.endpoints;
34
+ const totalWeight = sum(endpoints.map((e) => e.weight));
35
+
36
  let random = Math.random() * totalWeight;
37
  for (const endpoint of endpoints) {
38
  if (random < endpoint.weight) {
src/lib/server/models.ts ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { HF_ACCESS_TOKEN, MODELS } from "$env/static/private";
2
+ import { z } from "zod";
3
+
4
+ export const models = z
5
+ .array(
6
+ z.union([
7
+ z.string().min(1),
8
+ z.object({
9
+ name: z.string().min(1),
10
+ displayName: z.string().min(1).optional(),
11
+ endpoints: z
12
+ .array(
13
+ z.object({
14
+ url: z.string().url(),
15
+ authorization: z.string().min(1).default(`Bearer ${HF_ACCESS_TOKEN}`),
16
+ weight: z.number().int().positive().default(1),
17
+ })
18
+ )
19
+ .optional(),
20
+ }),
21
+ ])
22
+ )
23
+ .parse(JSON.parse(MODELS));
24
+
25
+ export const modelNames: Array<{ name: string; displayName: string }> = models.map((m) =>
26
+ typeof m === "string"
27
+ ? { name: m, displayName: m }
28
+ : { name: m.name, displayName: m.displayName ?? m.name }
29
+ );
30
+ export const defaultModel = modelNames[0];
src/lib/types/Message.ts CHANGED
@@ -2,4 +2,6 @@ export interface Message {
2
  from: "user" | "assistant";
3
  id: ReturnType<typeof crypto.randomUUID>;
4
  content: string;
 
 
5
  }
 
2
  from: "user" | "assistant";
3
  id: ReturnType<typeof crypto.randomUUID>;
4
  content: string;
5
+ // Only for "assistant" messages
6
+ model?: string;
7
  }
src/routes/+layout.server.ts CHANGED
@@ -2,6 +2,7 @@ import type { LayoutServerLoad } from "./$types";
2
  import { collections } from "$lib/server/database";
3
  import type { Conversation } from "$lib/types/Conversation";
4
  import { UrlDependency } from "$lib/types/UrlDependency";
 
5
 
6
  export const load: LayoutServerLoad = async ({ locals, depends }) => {
7
  const { conversations } = collections;
@@ -29,5 +30,6 @@ export const load: LayoutServerLoad = async ({ locals, depends }) => {
29
  shareConversationsWithModelAuthors: settings?.shareConversationsWithModelAuthors ?? true,
30
  ethicsModalAcceptedAt: settings?.ethicsModalAcceptedAt ?? null,
31
  },
 
32
  };
33
  };
 
2
  import { collections } from "$lib/server/database";
3
  import type { Conversation } from "$lib/types/Conversation";
4
  import { UrlDependency } from "$lib/types/UrlDependency";
5
+ import { modelNames } from "$lib/server/models";
6
 
7
  export const load: LayoutServerLoad = async ({ locals, depends }) => {
8
  const { conversations } = collections;
 
30
  shareConversationsWithModelAuthors: settings?.shareConversationsWithModelAuthors ?? true,
31
  ethicsModalAcceptedAt: settings?.ethicsModalAcceptedAt ?? null,
32
  },
33
+ models: modelNames,
34
  };
35
  };
src/routes/+page.svelte CHANGED
@@ -5,6 +5,7 @@
5
  import { ERROR_MESSAGES, error } from "$lib/stores/errors";
6
  import { pendingMessage } from "$lib/stores/pendingMessage";
7
 
 
8
  let loading = false;
9
 
10
  async function createConversation(message: string) {
@@ -39,4 +40,8 @@
39
  }
40
  </script>
41
 
42
- <ChatWindow on:message={(ev) => createConversation(ev.detail)} {loading} />
 
 
 
 
 
5
  import { ERROR_MESSAGES, error } from "$lib/stores/errors";
6
  import { pendingMessage } from "$lib/stores/pendingMessage";
7
 
8
+ export let data;
9
  let loading = false;
10
 
11
  async function createConversation(message: string) {
 
40
  }
41
  </script>
42
 
43
+ <ChatWindow
44
+ on:message={(ev) => createConversation(ev.detail)}
45
+ {loading}
46
+ currentModel={data.models[0]}
47
+ />
src/routes/conversation/[id]/+page.svelte CHANGED
@@ -181,4 +181,5 @@
181
  on:retry={(message) => writeMessage(message.detail.content, message.detail.id)}
182
  on:share={() => shareConversation($page.params.id, data.title)}
183
  on:stop={() => (isAborted = true)}
 
184
  />
 
181
  on:retry={(message) => writeMessage(message.detail.content, message.detail.id)}
182
  on:share={() => shareConversation($page.params.id, data.title)}
183
  on:stop={() => (isAborted = true)}
184
+ currentModel={data.models[0]}
185
  />
src/routes/conversation/[id]/+server.ts CHANGED
@@ -3,6 +3,7 @@ import { buildPrompt } from "$lib/buildPrompt.js";
3
  import { abortedGenerations } from "$lib/server/abortedGenerations.js";
4
  import { collections } from "$lib/server/database.js";
5
  import { modelEndpoint } from "$lib/server/modelEndpoint.js";
 
6
  import type { Message } from "$lib/types/Message.js";
7
  import { concatUint8Arrays } from "$lib/utils/concatUint8Arrays.js";
8
  import { streamToAsyncIterable } from "$lib/utils/streamToAsyncIterable";
@@ -30,10 +31,14 @@ export async function POST({ request, fetch, locals, params }) {
30
  const json = await request.json();
31
  const {
32
  inputs: newPrompt,
 
33
  options: { id: messageId, is_retry },
34
  } = z
35
  .object({
36
  inputs: z.string().trim().min(1),
 
 
 
37
  options: z.object({
38
  id: z.optional(z.string().uuid()),
39
  is_retry: z.optional(z.boolean()),
@@ -66,11 +71,11 @@ export async function POST({ request, fetch, locals, params }) {
66
  }
67
  const prompt = buildPrompt(messages);
68
 
69
- const randomEndpoint = modelEndpoint();
70
 
71
  const abortController = new AbortController();
72
 
73
- const resp = await fetch(randomEndpoint.endpoint, {
74
  headers: {
75
  "Content-Type": request.headers.get("Content-Type") ?? "application/json",
76
  Authorization: randomEndpoint.authorization,
@@ -99,7 +104,7 @@ export async function POST({ request, fetch, locals, params }) {
99
 
100
  generated_text = trimSuffix(trimPrefix(generated_text, "<|startoftext|>"), PUBLIC_SEP_TOKEN);
101
 
102
- messages.push({ from: "assistant", content: generated_text, id: crypto.randomUUID() });
103
 
104
  await collections.conversations.updateOne(
105
  {
 
3
  import { abortedGenerations } from "$lib/server/abortedGenerations.js";
4
  import { collections } from "$lib/server/database.js";
5
  import { modelEndpoint } from "$lib/server/modelEndpoint.js";
6
+ import { defaultModel, modelNames } from "$lib/server/models.js";
7
  import type { Message } from "$lib/types/Message.js";
8
  import { concatUint8Arrays } from "$lib/utils/concatUint8Arrays.js";
9
  import { streamToAsyncIterable } from "$lib/utils/streamToAsyncIterable";
 
31
  const json = await request.json();
32
  const {
33
  inputs: newPrompt,
34
+ model,
35
  options: { id: messageId, is_retry },
36
  } = z
37
  .object({
38
  inputs: z.string().trim().min(1),
39
+ model: z
40
+ .enum([modelNames[0].name, ...modelNames.slice(1).map((m) => m.name)])
41
+ .default(defaultModel.name),
42
  options: z.object({
43
  id: z.optional(z.string().uuid()),
44
  is_retry: z.optional(z.boolean()),
 
71
  }
72
  const prompt = buildPrompt(messages);
73
 
74
+ const randomEndpoint = modelEndpoint(model);
75
 
76
  const abortController = new AbortController();
77
 
78
+ const resp = await fetch(randomEndpoint.url, {
79
  headers: {
80
  "Content-Type": request.headers.get("Content-Type") ?? "application/json",
81
  Authorization: randomEndpoint.authorization,
 
104
 
105
  generated_text = trimSuffix(trimPrefix(generated_text, "<|startoftext|>"), PUBLIC_SEP_TOKEN);
106
 
107
+ messages.push({ from: "assistant", content: generated_text, id: crypto.randomUUID(), model });
108
 
109
  await collections.conversations.updateOne(
110
  {
src/routes/conversation/[id]/summarize/+server.ts CHANGED
@@ -2,6 +2,7 @@ import { PUBLIC_MAX_INPUT_TOKENS, PUBLIC_SEP_TOKEN } from "$env/static/public";
2
  import { buildPrompt } from "$lib/buildPrompt";
3
  import { collections } from "$lib/server/database.js";
4
  import { modelEndpoint } from "$lib/server/modelEndpoint.js";
 
5
  import { trimPrefix } from "$lib/utils/trimPrefix.js";
6
  import { trimSuffix } from "$lib/utils/trimSuffix.js";
7
  import { textGeneration } from "@huggingface/inference";
@@ -40,10 +41,10 @@ export async function POST({ params, locals, fetch }) {
40
  return_full_text: false,
41
  };
42
 
43
- const endpoint = modelEndpoint();
44
  let { generated_text } = await textGeneration(
45
  {
46
- model: endpoint.endpoint,
47
  inputs: prompt,
48
  parameters,
49
  },
 
2
  import { buildPrompt } from "$lib/buildPrompt";
3
  import { collections } from "$lib/server/database.js";
4
  import { modelEndpoint } from "$lib/server/modelEndpoint.js";
5
+ import { defaultModel } from "$lib/server/models.js";
6
  import { trimPrefix } from "$lib/utils/trimPrefix.js";
7
  import { trimSuffix } from "$lib/utils/trimSuffix.js";
8
  import { textGeneration } from "@huggingface/inference";
 
41
  return_full_text: false,
42
  };
43
 
44
+ const endpoint = modelEndpoint(defaultModel.name);
45
  let { generated_text } = await textGeneration(
46
  {
47
+ model: endpoint.url,
48
  inputs: prompt,
49
  parameters,
50
  },
src/routes/r/[id]/+page.svelte CHANGED
@@ -71,5 +71,6 @@
71
  })
72
  .finally(() => (loading = false))}
73
  messages={data.messages}
 
74
  {loading}
75
  />
 
71
  })
72
  .finally(() => (loading = false))}
73
  messages={data.messages}
74
+ currentModel={data.models[0]}
75
  {loading}
76
  />