Spaces:
Running
Running
♻️ Simplify setup, prepare for multi-models (#156)
Browse files- .env +14 -12
- README.md +14 -12
- src/hooks.server.ts +1 -0
- src/lib/components/chat/ChatIntroduction.svelte +5 -9
- src/lib/components/chat/ChatMessages.svelte +2 -1
- src/lib/components/chat/ChatWindow.svelte +4 -3
- src/lib/server/modelEndpoint.ts +30 -6
- src/lib/server/models.ts +30 -0
- src/lib/types/Message.ts +2 -0
- src/routes/+layout.server.ts +2 -0
- src/routes/+page.svelte +6 -1
- src/routes/conversation/[id]/+page.svelte +1 -0
- src/routes/conversation/[id]/+server.ts +8 -3
- src/routes/conversation/[id]/summarize/+server.ts +3 -2
- src/routes/r/[id]/+page.svelte +1 -0
.env
CHANGED
@@ -1,27 +1,29 @@
|
|
1 |
-
# Use .env.local to change these variables
|
2 |
# DO NOT EDIT THIS FILE WITH SENSITIVE DATA
|
3 |
|
4 |
MONGODB_URL=#your mongodb URL here
|
5 |
MONGODB_DB_NAME=chat-ui
|
6 |
COOKIE_NAME=hf-chat
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
# Increase depending on the model
|
9 |
PUBLIC_MAX_INPUT_TOKENS=1000
|
10 |
PUBLIC_ORIGIN=#https://hf.co
|
11 |
-
PUBLIC_MODEL_NAME=OpenAssistant/oasst-sft-6-llama-30b # public facing link
|
12 |
-
PUBLIC_MODEL_ID=OpenAssistant/oasst-sft-6-llama-30b-xor # used to link to model page
|
13 |
PUBLIC_DISABLE_INTRO_TILES=false
|
14 |
PUBLIC_USER_MESSAGE_TOKEN=<|prompter|>
|
15 |
PUBLIC_ASSISTANT_MESSAGE_TOKEN=<|assistant|>
|
16 |
PUBLIC_SEP_TOKEN=</s>
|
17 |
PUBLIC_PREPROMPT="Below are a series of dialogues between various people and an AI assistant. The AI tries to be helpful, polite, honest, sophisticated, emotionally aware, and humble-but-knowledgeable. The assistant is happy to help with almost anything, and will do its best to understand exactly what is needed. It also tries to avoid giving false or misleading information, and it caveats when it isn't entirely sure about the right answer. That said, the assistant is practical and really does its best, and doesn't let caution get too much in the way of being useful."
|
18 |
PUBLIC_GOOGLE_ANALYTICS_ID=#G-XXXXXXXX / Leave empty to disable
|
19 |
-
PUBLIC_DEPRECATED_GOOGLE_ANALYTICS_ID=#UA-XXXXXXXX-X / Leave empty to disable
|
20 |
-
|
21 |
-
# Copy this in .env.local with and replace "hf_<token>" your HF token from https://huggingface.co/settings/token
|
22 |
-
# You can also change the model from OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 to your own model
|
23 |
-
MODEL_ENDPOINTS=`[{
|
24 |
-
"endpoint": "https://api-inference.huggingface.co/models/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5",
|
25 |
-
"authorization": "Bearer hf_<token>",
|
26 |
-
"weight": 1
|
27 |
-
}]`
|
|
|
1 |
+
# Use .env.local to change these variables
|
2 |
# DO NOT EDIT THIS FILE WITH SENSITIVE DATA
|
3 |
|
4 |
MONGODB_URL=#your mongodb URL here
|
5 |
MONGODB_DB_NAME=chat-ui
|
6 |
COOKIE_NAME=hf-chat
|
7 |
+
HF_ACCESS_TOKEN=#hf_<token> from from https://huggingface.co/settings/token
|
8 |
+
MODELS=`["OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5"]`
|
9 |
+
# Alternative syntax (all fields are optional except 'name'):
|
10 |
+
# MODELS=`[{
|
11 |
+
# "name": "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5",
|
12 |
+
# "displayName": "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5",
|
13 |
+
# "endpoints": [{
|
14 |
+
# "url": "https://api-inference.huggingface.co/models/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5",
|
15 |
+
# "authorization": "Bearer hf_<token>",
|
16 |
+
# "weight": 1
|
17 |
+
# }]
|
18 |
+
# }]
|
19 |
|
20 |
# Increase depending on the model
|
21 |
PUBLIC_MAX_INPUT_TOKENS=1000
|
22 |
PUBLIC_ORIGIN=#https://hf.co
|
|
|
|
|
23 |
PUBLIC_DISABLE_INTRO_TILES=false
|
24 |
PUBLIC_USER_MESSAGE_TOKEN=<|prompter|>
|
25 |
PUBLIC_ASSISTANT_MESSAGE_TOKEN=<|assistant|>
|
26 |
PUBLIC_SEP_TOKEN=</s>
|
27 |
PUBLIC_PREPROMPT="Below are a series of dialogues between various people and an AI assistant. The AI tries to be helpful, polite, honest, sophisticated, emotionally aware, and humble-but-knowledgeable. The assistant is happy to help with almost anything, and will do its best to understand exactly what is needed. It also tries to avoid giving false or misleading information, and it caveats when it isn't entirely sure about the right answer. That said, the assistant is practical and really does its best, and doesn't let caution get too much in the way of being useful."
|
28 |
PUBLIC_GOOGLE_ANALYTICS_ID=#G-XXXXXXXX / Leave empty to disable
|
29 |
+
PUBLIC_DEPRECATED_GOOGLE_ANALYTICS_ID=#UA-XXXXXXXX-X / Leave empty to disable
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
README.md
CHANGED
@@ -31,30 +31,32 @@ Basically you need to create a `.env.local` with the following contents:
|
|
31 |
|
32 |
```
|
33 |
MONGODB_URL=<url to mongo, for example a free MongoDB Atlas sandbox instance>
|
34 |
-
|
35 |
-
"endpoint": "https://api-inference.huggingface.co/models/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5",
|
36 |
-
"authorization": "Bearer <hf_token>",
|
37 |
-
"weight": 1
|
38 |
-
}]`
|
39 |
```
|
40 |
|
41 |
-
Where the contents in `<...>` are replaced by the MongoDB URL and your [HF Access Token](https://huggingface.co/settings/tokens).
|
42 |
-
|
43 |
## Duplicating to a Space
|
44 |
|
45 |
Create a `DOTENV_LOCAL` secret to your space with the following contents:
|
46 |
|
47 |
```
|
48 |
MONGODB_URL=<url to mongo, for example a free MongoDB Atlas sandbox instance>
|
49 |
-
|
50 |
-
"endpoint": "https://api-inference.huggingface.co/models/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5",
|
51 |
-
"authorization": "Bearer <hf_token>",
|
52 |
-
"weight": 1
|
53 |
-
}]`
|
54 |
```
|
55 |
|
56 |
Where the contents in `<...>` are replaced by the MongoDB URL and your [HF Access Token](https://huggingface.co/settings/tokens).
|
57 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
## Building
|
59 |
|
60 |
To create a production version of your app:
|
|
|
31 |
|
32 |
```
|
33 |
MONGODB_URL=<url to mongo, for example a free MongoDB Atlas sandbox instance>
|
34 |
+
HF_ACCESS_TOKEN=<your HF access token from https://huggingface.co/settings/tokens>
|
|
|
|
|
|
|
|
|
35 |
```
|
36 |
|
|
|
|
|
37 |
## Duplicating to a Space
|
38 |
|
39 |
Create a `DOTENV_LOCAL` secret to your space with the following contents:
|
40 |
|
41 |
```
|
42 |
MONGODB_URL=<url to mongo, for example a free MongoDB Atlas sandbox instance>
|
43 |
+
HF_ACCESS_TOKEN=<your HF access token from https://huggingface.co/settings/tokens>
|
|
|
|
|
|
|
|
|
44 |
```
|
45 |
|
46 |
Where the contents in `<...>` are replaced by the MongoDB URL and your [HF Access Token](https://huggingface.co/settings/tokens).
|
47 |
|
48 |
+
## Running Local Inference
|
49 |
+
|
50 |
+
Both the example above use the HF Inference API or HF Endpoints API.
|
51 |
+
|
52 |
+
If you want to run the model locally, you need to run this inference server locally: https://github.com/huggingface/text-generation-inference
|
53 |
+
|
54 |
+
And add this to your `.env.local`:
|
55 |
+
|
56 |
+
```
|
57 |
+
MODELS=`[{"name": "...", "endpoints": [{"url": "127.0.0.1:8080/generate_stream"}]}]`
|
58 |
+
```
|
59 |
+
|
60 |
## Building
|
61 |
|
62 |
To create a production version of your app:
|
src/hooks.server.ts
CHANGED
@@ -6,6 +6,7 @@ import {
|
|
6 |
PUBLIC_DEPRECATED_GOOGLE_ANALYTICS_ID,
|
7 |
} from "$env/static/public";
|
8 |
import { addYears } from "date-fns";
|
|
|
9 |
|
10 |
export const handle: Handle = async ({ event, resolve }) => {
|
11 |
const token = event.cookies.get(COOKIE_NAME);
|
|
|
6 |
PUBLIC_DEPRECATED_GOOGLE_ANALYTICS_ID,
|
7 |
} from "$env/static/public";
|
8 |
import { addYears } from "date-fns";
|
9 |
+
import { inspect } from "node:util";
|
10 |
|
11 |
export const handle: Handle = async ({ event, resolve }) => {
|
12 |
const token = event.cookies.get(COOKIE_NAME);
|
src/lib/components/chat/ChatIntroduction.svelte
CHANGED
@@ -1,15 +1,11 @@
|
|
1 |
<script lang="ts">
|
2 |
-
import {
|
3 |
-
PUBLIC_DISABLE_INTRO_TILES,
|
4 |
-
PUBLIC_MODEL_ID,
|
5 |
-
PUBLIC_MODEL_NAME,
|
6 |
-
PUBLIC_VERSION,
|
7 |
-
} from "$env/static/public";
|
8 |
-
|
9 |
import Logo from "$lib/components/icons/Logo.svelte";
|
10 |
import CarbonArrowUpRight from "~icons/carbon/arrow-up-right";
|
11 |
import CarbonEarth from "~icons/carbon/earth";
|
12 |
import { createEventDispatcher } from "svelte";
|
|
|
|
|
13 |
const dispatch = createEventDispatcher<{ message: string }>();
|
14 |
</script>
|
15 |
|
@@ -36,13 +32,13 @@
|
|
36 |
<div class="overflow-hidden rounded-xl border dark:border-gray-800">
|
37 |
<div class="p-3">
|
38 |
<div class="text-sm text-gray-600 dark:text-gray-400">Current Model</div>
|
39 |
-
<div class="font-semibold">{
|
40 |
</div>
|
41 |
<div
|
42 |
class="flex items-center gap-5 rounded-xl bg-gray-100 px-3 py-2 text-sm text-gray-600 dark:bg-gray-800 dark:text-gray-300"
|
43 |
>
|
44 |
<a
|
45 |
-
href="https://huggingface.co/{
|
46 |
target="_blank"
|
47 |
rel="noreferrer"
|
48 |
class="flex items-center hover:underline"
|
|
|
1 |
<script lang="ts">
|
2 |
+
import { PUBLIC_DISABLE_INTRO_TILES, PUBLIC_VERSION } from "$env/static/public";
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
import Logo from "$lib/components/icons/Logo.svelte";
|
4 |
import CarbonArrowUpRight from "~icons/carbon/arrow-up-right";
|
5 |
import CarbonEarth from "~icons/carbon/earth";
|
6 |
import { createEventDispatcher } from "svelte";
|
7 |
+
|
8 |
+
export let currentModel: { name: string; displayName: string };
|
9 |
const dispatch = createEventDispatcher<{ message: string }>();
|
10 |
</script>
|
11 |
|
|
|
32 |
<div class="overflow-hidden rounded-xl border dark:border-gray-800">
|
33 |
<div class="p-3">
|
34 |
<div class="text-sm text-gray-600 dark:text-gray-400">Current Model</div>
|
35 |
+
<div class="font-semibold">{currentModel.displayName}</div>
|
36 |
</div>
|
37 |
<div
|
38 |
class="flex items-center gap-5 rounded-xl bg-gray-100 px-3 py-2 text-sm text-gray-600 dark:bg-gray-800 dark:text-gray-300"
|
39 |
>
|
40 |
<a
|
41 |
+
href="https://huggingface.co/{currentModel.name}"
|
42 |
target="_blank"
|
43 |
rel="noreferrer"
|
44 |
class="flex items-center hover:underline"
|
src/lib/components/chat/ChatMessages.svelte
CHANGED
@@ -13,6 +13,7 @@
|
|
13 |
export let messages: Message[];
|
14 |
export let loading: boolean;
|
15 |
export let pending: boolean;
|
|
|
16 |
|
17 |
let chatContainer: HTMLElement;
|
18 |
|
@@ -40,7 +41,7 @@
|
|
40 |
on:retry={() => dispatch("retry", { id: message.id, content: message.content })}
|
41 |
/>
|
42 |
{:else}
|
43 |
-
<ChatIntroduction on:message />
|
44 |
{/each}
|
45 |
{#if pending}
|
46 |
<ChatMessage message={{ from: "assistant", content: "", id: randomUUID() }} />
|
|
|
13 |
export let messages: Message[];
|
14 |
export let loading: boolean;
|
15 |
export let pending: boolean;
|
16 |
+
export let currentModel: { name: string; displayName: string };
|
17 |
|
18 |
let chatContainer: HTMLElement;
|
19 |
|
|
|
41 |
on:retry={() => dispatch("retry", { id: message.id, content: message.content })}
|
42 |
/>
|
43 |
{:else}
|
44 |
+
<ChatIntroduction on:message {currentModel} />
|
45 |
{/each}
|
46 |
{#if pending}
|
47 |
<ChatMessage message={{ from: "assistant", content: "", id: randomUUID() }} />
|
src/lib/components/chat/ChatWindow.svelte
CHANGED
@@ -8,12 +8,12 @@
|
|
8 |
import ChatMessages from "./ChatMessages.svelte";
|
9 |
import ChatInput from "./ChatInput.svelte";
|
10 |
import StopGeneratingBtn from "../StopGeneratingBtn.svelte";
|
11 |
-
import { PUBLIC_MODEL_ID, PUBLIC_MODEL_NAME } from "$env/static/public";
|
12 |
|
13 |
export let messages: Message[] = [];
|
14 |
export let disabled = false;
|
15 |
export let loading = false;
|
16 |
export let pending = false;
|
|
|
17 |
|
18 |
let message: string;
|
19 |
|
@@ -35,6 +35,7 @@
|
|
35 |
<ChatMessages
|
36 |
{loading}
|
37 |
{pending}
|
|
|
38 |
{messages}
|
39 |
on:message
|
40 |
on:retry={(ev) => {
|
@@ -73,10 +74,10 @@
|
|
73 |
<div class="mt-2 flex justify-between self-stretch px-1 text-xs text-gray-400/90 max-sm:gap-2">
|
74 |
<p>
|
75 |
Model: <a
|
76 |
-
href="https://huggingface.co/{
|
77 |
target="_blank"
|
78 |
rel="noreferrer"
|
79 |
-
class="hover:underline">{
|
80 |
> <span class="max-sm:hidden">·</span><br class="sm:hidden" /> Generated content may be inaccurate
|
81 |
or false.
|
82 |
</p>
|
|
|
8 |
import ChatMessages from "./ChatMessages.svelte";
|
9 |
import ChatInput from "./ChatInput.svelte";
|
10 |
import StopGeneratingBtn from "../StopGeneratingBtn.svelte";
|
|
|
11 |
|
12 |
export let messages: Message[] = [];
|
13 |
export let disabled = false;
|
14 |
export let loading = false;
|
15 |
export let pending = false;
|
16 |
+
export let currentModel: { name: string; displayName: string };
|
17 |
|
18 |
let message: string;
|
19 |
|
|
|
35 |
<ChatMessages
|
36 |
{loading}
|
37 |
{pending}
|
38 |
+
{currentModel}
|
39 |
{messages}
|
40 |
on:message
|
41 |
on:retry={(ev) => {
|
|
|
74 |
<div class="mt-2 flex justify-between self-stretch px-1 text-xs text-gray-400/90 max-sm:gap-2">
|
75 |
<p>
|
76 |
Model: <a
|
77 |
+
href="https://huggingface.co/{currentModel.name}"
|
78 |
target="_blank"
|
79 |
rel="noreferrer"
|
80 |
+
class="hover:underline">{currentModel.displayName}</a
|
81 |
> <span class="max-sm:hidden">·</span><br class="sm:hidden" /> Generated content may be inaccurate
|
82 |
or false.
|
83 |
</p>
|
src/lib/server/modelEndpoint.ts
CHANGED
@@ -1,14 +1,38 @@
|
|
1 |
-
import {
|
2 |
import { sum } from "$lib/utils/sum";
|
3 |
-
|
4 |
-
const endpoints: Array<{ endpoint: string; authorization: string; weight: number }> =
|
5 |
-
JSON.parse(MODEL_ENDPOINTS);
|
6 |
-
const totalWeight = sum(endpoints.map((e) => e.weight));
|
7 |
|
8 |
/**
|
9 |
* Find a random load-balanced endpoint
|
10 |
*/
|
11 |
-
export function modelEndpoint(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
let random = Math.random() * totalWeight;
|
13 |
for (const endpoint of endpoints) {
|
14 |
if (random < endpoint.weight) {
|
|
|
1 |
+
import { HF_ACCESS_TOKEN } from "$env/static/private";
|
2 |
import { sum } from "$lib/utils/sum";
|
3 |
+
import { models } from "./models";
|
|
|
|
|
|
|
4 |
|
5 |
/**
|
6 |
* Find a random load-balanced endpoint
|
7 |
*/
|
8 |
+
export function modelEndpoint(model: string): {
|
9 |
+
url: string;
|
10 |
+
authorization: string;
|
11 |
+
weight: number;
|
12 |
+
} {
|
13 |
+
const modelDefinition = models.find(
|
14 |
+
(m) => m === model || (typeof m === "object" && m.name === model)
|
15 |
+
);
|
16 |
+
if (!modelDefinition) {
|
17 |
+
throw new Error(`Invalid model: ${model}`);
|
18 |
+
}
|
19 |
+
if (typeof modelDefinition === "string") {
|
20 |
+
return {
|
21 |
+
url: `https://api-inference.huggingface.co/models/${modelDefinition}`,
|
22 |
+
authorization: `Bearer ${HF_ACCESS_TOKEN}`,
|
23 |
+
weight: 1,
|
24 |
+
};
|
25 |
+
}
|
26 |
+
if (!modelDefinition.endpoints) {
|
27 |
+
return {
|
28 |
+
url: `https://api-inference.huggingface.co/models/${modelDefinition.name}`,
|
29 |
+
authorization: `Bearer ${HF_ACCESS_TOKEN}`,
|
30 |
+
weight: 1,
|
31 |
+
};
|
32 |
+
}
|
33 |
+
const endpoints = modelDefinition.endpoints;
|
34 |
+
const totalWeight = sum(endpoints.map((e) => e.weight));
|
35 |
+
|
36 |
let random = Math.random() * totalWeight;
|
37 |
for (const endpoint of endpoints) {
|
38 |
if (random < endpoint.weight) {
|
src/lib/server/models.ts
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { HF_ACCESS_TOKEN, MODELS } from "$env/static/private";
|
2 |
+
import { z } from "zod";
|
3 |
+
|
4 |
+
export const models = z
|
5 |
+
.array(
|
6 |
+
z.union([
|
7 |
+
z.string().min(1),
|
8 |
+
z.object({
|
9 |
+
name: z.string().min(1),
|
10 |
+
displayName: z.string().min(1).optional(),
|
11 |
+
endpoints: z
|
12 |
+
.array(
|
13 |
+
z.object({
|
14 |
+
url: z.string().url(),
|
15 |
+
authorization: z.string().min(1).default(`Bearer ${HF_ACCESS_TOKEN}`),
|
16 |
+
weight: z.number().int().positive().default(1),
|
17 |
+
})
|
18 |
+
)
|
19 |
+
.optional(),
|
20 |
+
}),
|
21 |
+
])
|
22 |
+
)
|
23 |
+
.parse(JSON.parse(MODELS));
|
24 |
+
|
25 |
+
export const modelNames: Array<{ name: string; displayName: string }> = models.map((m) =>
|
26 |
+
typeof m === "string"
|
27 |
+
? { name: m, displayName: m }
|
28 |
+
: { name: m.name, displayName: m.displayName ?? m.name }
|
29 |
+
);
|
30 |
+
export const defaultModel = modelNames[0];
|
src/lib/types/Message.ts
CHANGED
@@ -2,4 +2,6 @@ export interface Message {
|
|
2 |
from: "user" | "assistant";
|
3 |
id: ReturnType<typeof crypto.randomUUID>;
|
4 |
content: string;
|
|
|
|
|
5 |
}
|
|
|
2 |
from: "user" | "assistant";
|
3 |
id: ReturnType<typeof crypto.randomUUID>;
|
4 |
content: string;
|
5 |
+
// Only for "assistant" messages
|
6 |
+
model?: string;
|
7 |
}
|
src/routes/+layout.server.ts
CHANGED
@@ -2,6 +2,7 @@ import type { LayoutServerLoad } from "./$types";
|
|
2 |
import { collections } from "$lib/server/database";
|
3 |
import type { Conversation } from "$lib/types/Conversation";
|
4 |
import { UrlDependency } from "$lib/types/UrlDependency";
|
|
|
5 |
|
6 |
export const load: LayoutServerLoad = async ({ locals, depends }) => {
|
7 |
const { conversations } = collections;
|
@@ -29,5 +30,6 @@ export const load: LayoutServerLoad = async ({ locals, depends }) => {
|
|
29 |
shareConversationsWithModelAuthors: settings?.shareConversationsWithModelAuthors ?? true,
|
30 |
ethicsModalAcceptedAt: settings?.ethicsModalAcceptedAt ?? null,
|
31 |
},
|
|
|
32 |
};
|
33 |
};
|
|
|
2 |
import { collections } from "$lib/server/database";
|
3 |
import type { Conversation } from "$lib/types/Conversation";
|
4 |
import { UrlDependency } from "$lib/types/UrlDependency";
|
5 |
+
import { modelNames } from "$lib/server/models";
|
6 |
|
7 |
export const load: LayoutServerLoad = async ({ locals, depends }) => {
|
8 |
const { conversations } = collections;
|
|
|
30 |
shareConversationsWithModelAuthors: settings?.shareConversationsWithModelAuthors ?? true,
|
31 |
ethicsModalAcceptedAt: settings?.ethicsModalAcceptedAt ?? null,
|
32 |
},
|
33 |
+
models: modelNames,
|
34 |
};
|
35 |
};
|
src/routes/+page.svelte
CHANGED
@@ -5,6 +5,7 @@
|
|
5 |
import { ERROR_MESSAGES, error } from "$lib/stores/errors";
|
6 |
import { pendingMessage } from "$lib/stores/pendingMessage";
|
7 |
|
|
|
8 |
let loading = false;
|
9 |
|
10 |
async function createConversation(message: string) {
|
@@ -39,4 +40,8 @@
|
|
39 |
}
|
40 |
</script>
|
41 |
|
42 |
-
<ChatWindow
|
|
|
|
|
|
|
|
|
|
5 |
import { ERROR_MESSAGES, error } from "$lib/stores/errors";
|
6 |
import { pendingMessage } from "$lib/stores/pendingMessage";
|
7 |
|
8 |
+
export let data;
|
9 |
let loading = false;
|
10 |
|
11 |
async function createConversation(message: string) {
|
|
|
40 |
}
|
41 |
</script>
|
42 |
|
43 |
+
<ChatWindow
|
44 |
+
on:message={(ev) => createConversation(ev.detail)}
|
45 |
+
{loading}
|
46 |
+
currentModel={data.models[0]}
|
47 |
+
/>
|
src/routes/conversation/[id]/+page.svelte
CHANGED
@@ -181,4 +181,5 @@
|
|
181 |
on:retry={(message) => writeMessage(message.detail.content, message.detail.id)}
|
182 |
on:share={() => shareConversation($page.params.id, data.title)}
|
183 |
on:stop={() => (isAborted = true)}
|
|
|
184 |
/>
|
|
|
181 |
on:retry={(message) => writeMessage(message.detail.content, message.detail.id)}
|
182 |
on:share={() => shareConversation($page.params.id, data.title)}
|
183 |
on:stop={() => (isAborted = true)}
|
184 |
+
currentModel={data.models[0]}
|
185 |
/>
|
src/routes/conversation/[id]/+server.ts
CHANGED
@@ -3,6 +3,7 @@ import { buildPrompt } from "$lib/buildPrompt.js";
|
|
3 |
import { abortedGenerations } from "$lib/server/abortedGenerations.js";
|
4 |
import { collections } from "$lib/server/database.js";
|
5 |
import { modelEndpoint } from "$lib/server/modelEndpoint.js";
|
|
|
6 |
import type { Message } from "$lib/types/Message.js";
|
7 |
import { concatUint8Arrays } from "$lib/utils/concatUint8Arrays.js";
|
8 |
import { streamToAsyncIterable } from "$lib/utils/streamToAsyncIterable";
|
@@ -30,10 +31,14 @@ export async function POST({ request, fetch, locals, params }) {
|
|
30 |
const json = await request.json();
|
31 |
const {
|
32 |
inputs: newPrompt,
|
|
|
33 |
options: { id: messageId, is_retry },
|
34 |
} = z
|
35 |
.object({
|
36 |
inputs: z.string().trim().min(1),
|
|
|
|
|
|
|
37 |
options: z.object({
|
38 |
id: z.optional(z.string().uuid()),
|
39 |
is_retry: z.optional(z.boolean()),
|
@@ -66,11 +71,11 @@ export async function POST({ request, fetch, locals, params }) {
|
|
66 |
}
|
67 |
const prompt = buildPrompt(messages);
|
68 |
|
69 |
-
const randomEndpoint = modelEndpoint();
|
70 |
|
71 |
const abortController = new AbortController();
|
72 |
|
73 |
-
const resp = await fetch(randomEndpoint.
|
74 |
headers: {
|
75 |
"Content-Type": request.headers.get("Content-Type") ?? "application/json",
|
76 |
Authorization: randomEndpoint.authorization,
|
@@ -99,7 +104,7 @@ export async function POST({ request, fetch, locals, params }) {
|
|
99 |
|
100 |
generated_text = trimSuffix(trimPrefix(generated_text, "<|startoftext|>"), PUBLIC_SEP_TOKEN);
|
101 |
|
102 |
-
messages.push({ from: "assistant", content: generated_text, id: crypto.randomUUID() });
|
103 |
|
104 |
await collections.conversations.updateOne(
|
105 |
{
|
|
|
3 |
import { abortedGenerations } from "$lib/server/abortedGenerations.js";
|
4 |
import { collections } from "$lib/server/database.js";
|
5 |
import { modelEndpoint } from "$lib/server/modelEndpoint.js";
|
6 |
+
import { defaultModel, modelNames } from "$lib/server/models.js";
|
7 |
import type { Message } from "$lib/types/Message.js";
|
8 |
import { concatUint8Arrays } from "$lib/utils/concatUint8Arrays.js";
|
9 |
import { streamToAsyncIterable } from "$lib/utils/streamToAsyncIterable";
|
|
|
31 |
const json = await request.json();
|
32 |
const {
|
33 |
inputs: newPrompt,
|
34 |
+
model,
|
35 |
options: { id: messageId, is_retry },
|
36 |
} = z
|
37 |
.object({
|
38 |
inputs: z.string().trim().min(1),
|
39 |
+
model: z
|
40 |
+
.enum([modelNames[0].name, ...modelNames.slice(1).map((m) => m.name)])
|
41 |
+
.default(defaultModel.name),
|
42 |
options: z.object({
|
43 |
id: z.optional(z.string().uuid()),
|
44 |
is_retry: z.optional(z.boolean()),
|
|
|
71 |
}
|
72 |
const prompt = buildPrompt(messages);
|
73 |
|
74 |
+
const randomEndpoint = modelEndpoint(model);
|
75 |
|
76 |
const abortController = new AbortController();
|
77 |
|
78 |
+
const resp = await fetch(randomEndpoint.url, {
|
79 |
headers: {
|
80 |
"Content-Type": request.headers.get("Content-Type") ?? "application/json",
|
81 |
Authorization: randomEndpoint.authorization,
|
|
|
104 |
|
105 |
generated_text = trimSuffix(trimPrefix(generated_text, "<|startoftext|>"), PUBLIC_SEP_TOKEN);
|
106 |
|
107 |
+
messages.push({ from: "assistant", content: generated_text, id: crypto.randomUUID(), model });
|
108 |
|
109 |
await collections.conversations.updateOne(
|
110 |
{
|
src/routes/conversation/[id]/summarize/+server.ts
CHANGED
@@ -2,6 +2,7 @@ import { PUBLIC_MAX_INPUT_TOKENS, PUBLIC_SEP_TOKEN } from "$env/static/public";
|
|
2 |
import { buildPrompt } from "$lib/buildPrompt";
|
3 |
import { collections } from "$lib/server/database.js";
|
4 |
import { modelEndpoint } from "$lib/server/modelEndpoint.js";
|
|
|
5 |
import { trimPrefix } from "$lib/utils/trimPrefix.js";
|
6 |
import { trimSuffix } from "$lib/utils/trimSuffix.js";
|
7 |
import { textGeneration } from "@huggingface/inference";
|
@@ -40,10 +41,10 @@ export async function POST({ params, locals, fetch }) {
|
|
40 |
return_full_text: false,
|
41 |
};
|
42 |
|
43 |
-
const endpoint = modelEndpoint();
|
44 |
let { generated_text } = await textGeneration(
|
45 |
{
|
46 |
-
model: endpoint.
|
47 |
inputs: prompt,
|
48 |
parameters,
|
49 |
},
|
|
|
2 |
import { buildPrompt } from "$lib/buildPrompt";
|
3 |
import { collections } from "$lib/server/database.js";
|
4 |
import { modelEndpoint } from "$lib/server/modelEndpoint.js";
|
5 |
+
import { defaultModel } from "$lib/server/models.js";
|
6 |
import { trimPrefix } from "$lib/utils/trimPrefix.js";
|
7 |
import { trimSuffix } from "$lib/utils/trimSuffix.js";
|
8 |
import { textGeneration } from "@huggingface/inference";
|
|
|
41 |
return_full_text: false,
|
42 |
};
|
43 |
|
44 |
+
const endpoint = modelEndpoint(defaultModel.name);
|
45 |
let { generated_text } = await textGeneration(
|
46 |
{
|
47 |
+
model: endpoint.url,
|
48 |
inputs: prompt,
|
49 |
parameters,
|
50 |
},
|
src/routes/r/[id]/+page.svelte
CHANGED
@@ -71,5 +71,6 @@
|
|
71 |
})
|
72 |
.finally(() => (loading = false))}
|
73 |
messages={data.messages}
|
|
|
74 |
{loading}
|
75 |
/>
|
|
|
71 |
})
|
72 |
.finally(() => (loading = false))}
|
73 |
messages={data.messages}
|
74 |
+
currentModel={data.models[0]}
|
75 |
{loading}
|
76 |
/>
|