Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Thomas G. Lopes
commited on
Commit
·
00a2728
1
Parent(s):
0120cd1
auto modes
Browse files
package.json
CHANGED
|
@@ -22,9 +22,9 @@
|
|
| 22 |
"@eslint/js": "^9.22.0",
|
| 23 |
"@floating-ui/dom": "^1.6.13",
|
| 24 |
"@huggingface/hub": "^2.6.12",
|
| 25 |
-
"@huggingface/inference": "^4.
|
| 26 |
-
"@huggingface/tasks": "^0.19.
|
| 27 |
-
"@huggingface/transformers": "^3.7.
|
| 28 |
"@iconify-json/carbon": "^1.2.8",
|
| 29 |
"@iconify-json/line-md": "^1.2.11",
|
| 30 |
"@iconify-json/logos": "^1.2.9",
|
|
|
|
| 22 |
"@eslint/js": "^9.22.0",
|
| 23 |
"@floating-ui/dom": "^1.6.13",
|
| 24 |
"@huggingface/hub": "^2.6.12",
|
| 25 |
+
"@huggingface/inference": "^4.13.0",
|
| 26 |
+
"@huggingface/tasks": "^0.19.62",
|
| 27 |
+
"@huggingface/transformers": "^3.7.6",
|
| 28 |
"@iconify-json/carbon": "^1.2.8",
|
| 29 |
"@iconify-json/line-md": "^1.2.11",
|
| 30 |
"@iconify-json/logos": "^1.2.9",
|
pnpm-lock.yaml
CHANGED
|
@@ -46,14 +46,14 @@ importers:
|
|
| 46 |
specifier: ^2.6.12
|
| 47 |
version: 2.6.12
|
| 48 |
'@huggingface/inference':
|
| 49 |
-
specifier: ^4.
|
| 50 |
-
version: 4.
|
| 51 |
'@huggingface/tasks':
|
| 52 |
-
specifier: ^0.19.
|
| 53 |
-
version: 0.19.
|
| 54 |
'@huggingface/transformers':
|
| 55 |
-
specifier: ^3.7.
|
| 56 |
-
version: 3.7.
|
| 57 |
'@iconify-json/carbon':
|
| 58 |
specifier: ^1.2.8
|
| 59 |
version: 1.2.8
|
|
@@ -459,19 +459,19 @@ packages:
|
|
| 459 |
engines: {node: '>=18'}
|
| 460 |
hasBin: true
|
| 461 |
|
| 462 |
-
'@huggingface/inference@4.
|
| 463 |
-
resolution: {integrity: sha512-
|
| 464 |
engines: {node: '>=18'}
|
| 465 |
|
| 466 |
'@huggingface/jinja@0.5.1':
|
| 467 |
resolution: {integrity: sha512-yUZLld4lrM9iFxHCwFQ7D1HW2MWMwSbeB7WzWqFYDWK+rEb+WldkLdAJxUPOmgICMHZLzZGVcVjFh3w/YGubng==}
|
| 468 |
engines: {node: '>=18'}
|
| 469 |
|
| 470 |
-
'@huggingface/tasks@0.19.
|
| 471 |
-
resolution: {integrity: sha512-
|
| 472 |
|
| 473 |
-
'@huggingface/transformers@3.7.
|
| 474 |
-
resolution: {integrity: sha512-
|
| 475 |
|
| 476 |
'@humanfs/core@0.19.1':
|
| 477 |
resolution: {integrity: sha512-5DyQ4+1JEUzejeK1JGICcideyfUbGixgS9jNgex5nqkW+cY7WZhxBigmieN5Qnw9ZosSNVC9KQKyb+GUaGyKUA==}
|
|
@@ -3586,20 +3586,20 @@ snapshots:
|
|
| 3586 |
|
| 3587 |
'@huggingface/hub@2.6.12':
|
| 3588 |
dependencies:
|
| 3589 |
-
'@huggingface/tasks': 0.19.
|
| 3590 |
optionalDependencies:
|
| 3591 |
cli-progress: 3.12.0
|
| 3592 |
|
| 3593 |
-
'@huggingface/inference@4.
|
| 3594 |
dependencies:
|
| 3595 |
'@huggingface/jinja': 0.5.1
|
| 3596 |
-
'@huggingface/tasks': 0.19.
|
| 3597 |
|
| 3598 |
'@huggingface/jinja@0.5.1': {}
|
| 3599 |
|
| 3600 |
-
'@huggingface/tasks@0.19.
|
| 3601 |
|
| 3602 |
-
'@huggingface/transformers@3.7.
|
| 3603 |
dependencies:
|
| 3604 |
'@huggingface/jinja': 0.5.1
|
| 3605 |
onnxruntime-node: 1.21.0
|
|
|
|
| 46 |
specifier: ^2.6.12
|
| 47 |
version: 2.6.12
|
| 48 |
'@huggingface/inference':
|
| 49 |
+
specifier: ^4.13.0
|
| 50 |
+
version: 4.13.0
|
| 51 |
'@huggingface/tasks':
|
| 52 |
+
specifier: ^0.19.62
|
| 53 |
+
version: 0.19.62
|
| 54 |
'@huggingface/transformers':
|
| 55 |
+
specifier: ^3.7.6
|
| 56 |
+
version: 3.7.6
|
| 57 |
'@iconify-json/carbon':
|
| 58 |
specifier: ^1.2.8
|
| 59 |
version: 1.2.8
|
|
|
|
| 459 |
engines: {node: '>=18'}
|
| 460 |
hasBin: true
|
| 461 |
|
| 462 |
+
'@huggingface/inference@4.13.0':
|
| 463 |
+
resolution: {integrity: sha512-oqBHbYoLHpNlEUJp/MG28gCR0JOUmLi9iKKd0oJfTAgTqaDSLhoCsIdS1XfLHbT/CrzZch33cARTR3bO4q/2yQ==}
|
| 464 |
engines: {node: '>=18'}
|
| 465 |
|
| 466 |
'@huggingface/jinja@0.5.1':
|
| 467 |
resolution: {integrity: sha512-yUZLld4lrM9iFxHCwFQ7D1HW2MWMwSbeB7WzWqFYDWK+rEb+WldkLdAJxUPOmgICMHZLzZGVcVjFh3w/YGubng==}
|
| 468 |
engines: {node: '>=18'}
|
| 469 |
|
| 470 |
+
'@huggingface/tasks@0.19.62':
|
| 471 |
+
resolution: {integrity: sha512-sv0ieUqggz2yWfVv14WuvdhmQA8mvSF/OD1quz5RKcysP+dmrPn0E9SwJsSksTsp1QwpAC396PukgPmDymPVhQ==}
|
| 472 |
|
| 473 |
+
'@huggingface/transformers@3.7.6':
|
| 474 |
+
resolution: {integrity: sha512-OYlIRY8vj8r/pNx2CdXcDHz4KqpEC+bUMKzdVW5Dx//gp4XRmK+/g8as0h3cssRQYT0vG1A6VCfZy8SV0F4RDQ==}
|
| 475 |
|
| 476 |
'@humanfs/core@0.19.1':
|
| 477 |
resolution: {integrity: sha512-5DyQ4+1JEUzejeK1JGICcideyfUbGixgS9jNgex5nqkW+cY7WZhxBigmieN5Qnw9ZosSNVC9KQKyb+GUaGyKUA==}
|
|
|
|
| 3586 |
|
| 3587 |
'@huggingface/hub@2.6.12':
|
| 3588 |
dependencies:
|
| 3589 |
+
'@huggingface/tasks': 0.19.62
|
| 3590 |
optionalDependencies:
|
| 3591 |
cli-progress: 3.12.0
|
| 3592 |
|
| 3593 |
+
'@huggingface/inference@4.13.0':
|
| 3594 |
dependencies:
|
| 3595 |
'@huggingface/jinja': 0.5.1
|
| 3596 |
+
'@huggingface/tasks': 0.19.62
|
| 3597 |
|
| 3598 |
'@huggingface/jinja@0.5.1': {}
|
| 3599 |
|
| 3600 |
+
'@huggingface/tasks@0.19.62': {}
|
| 3601 |
|
| 3602 |
+
'@huggingface/transformers@3.7.6':
|
| 3603 |
dependencies:
|
| 3604 |
'@huggingface/jinja': 0.5.1
|
| 3605 |
onnxruntime-node: 1.21.0
|
src/lib/components/inference-playground/conversation-header.svelte
CHANGED
|
@@ -27,7 +27,7 @@
|
|
| 27 |
if (!model) {
|
| 28 |
return;
|
| 29 |
}
|
| 30 |
-
conversation.update({ modelId: model.id, provider:
|
| 31 |
}
|
| 32 |
|
| 33 |
let nameSpace = $derived(conversation.model.id.split("/")[0] ?? "");
|
|
|
|
| 27 |
if (!model) {
|
| 28 |
return;
|
| 29 |
}
|
| 30 |
+
conversation.update({ modelId: model.id, provider: "auto", autoPolicy: "default" });
|
| 31 |
}
|
| 32 |
|
| 33 |
let nameSpace = $derived(conversation.model.id.split("/")[0] ?? "");
|
src/lib/components/inference-playground/provider-select.svelte
CHANGED
|
@@ -1,14 +1,14 @@
|
|
| 1 |
<script lang="ts">
|
| 2 |
import type { ConversationClass } from "$lib/state/conversations.svelte";
|
| 3 |
-
import { models } from "$lib/state/models.svelte";
|
| 4 |
import { pricing } from "$lib/state/pricing.svelte";
|
| 5 |
import type { Model } from "$lib/types.js";
|
| 6 |
-
import { randomPick } from "$lib/utils/array.js";
|
| 7 |
import { cn } from "$lib/utils/cn.js";
|
| 8 |
import { Select } from "melt/builders";
|
| 9 |
import { run } from "svelte/legacy";
|
| 10 |
import IconCaret from "~icons/carbon/chevron-down";
|
| 11 |
import IconProvider from "../icon-provider.svelte";
|
|
|
|
|
|
|
| 12 |
|
| 13 |
interface Props {
|
| 14 |
conversation: ConversationClass & { model: Model };
|
|
@@ -20,11 +20,8 @@
|
|
| 20 |
function reset(providers: typeof conversation.model.inferenceProviderMapping) {
|
| 21 |
const validProvider = providers.find(p => p.provider === conversation.data.provider);
|
| 22 |
if (validProvider || conversation.data.provider === "auto") return;
|
| 23 |
-
if
|
| 24 |
-
|
| 25 |
-
} else {
|
| 26 |
-
conversation.update({ modelId: randomPick(models.all)?.id });
|
| 27 |
-
}
|
| 28 |
}
|
| 29 |
|
| 30 |
let providers = $derived(conversation.model.inferenceProviderMapping);
|
|
@@ -39,6 +36,13 @@
|
|
| 39 |
},
|
| 40 |
});
|
| 41 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
const nameMap: Record<string, string> = {
|
| 43 |
"sambanova": "SambaNova",
|
| 44 |
"fal": "fal",
|
|
@@ -82,6 +86,28 @@
|
|
| 82 |
const pd = pricing.getPricing(conversation.model.id, provider);
|
| 83 |
return pricing.formatPricing(pd);
|
| 84 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
</script>
|
| 86 |
|
| 87 |
{#snippet providerDisplay(provider: string)}
|
|
@@ -131,4 +157,54 @@
|
|
| 131 |
{/each}
|
| 132 |
{@render option("auto")}
|
| 133 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 134 |
</div>
|
|
|
|
| 1 |
<script lang="ts">
|
| 2 |
import type { ConversationClass } from "$lib/state/conversations.svelte";
|
|
|
|
| 3 |
import { pricing } from "$lib/state/pricing.svelte";
|
| 4 |
import type { Model } from "$lib/types.js";
|
|
|
|
| 5 |
import { cn } from "$lib/utils/cn.js";
|
| 6 |
import { Select } from "melt/builders";
|
| 7 |
import { run } from "svelte/legacy";
|
| 8 |
import IconCaret from "~icons/carbon/chevron-down";
|
| 9 |
import IconProvider from "../icon-provider.svelte";
|
| 10 |
+
import Tooltip from "../tooltip.svelte";
|
| 11 |
+
import IconInfo from "~icons/carbon/information";
|
| 12 |
|
| 13 |
interface Props {
|
| 14 |
conversation: ConversationClass & { model: Model };
|
|
|
|
| 20 |
function reset(providers: typeof conversation.model.inferenceProviderMapping) {
|
| 21 |
const validProvider = providers.find(p => p.provider === conversation.data.provider);
|
| 22 |
if (validProvider || conversation.data.provider === "auto") return;
|
| 23 |
+
// Default to auto provider if no valid provider is set
|
| 24 |
+
conversation.update({ provider: "auto", autoPolicy: "default" });
|
|
|
|
|
|
|
|
|
|
| 25 |
}
|
| 26 |
|
| 27 |
let providers = $derived(conversation.model.inferenceProviderMapping);
|
|
|
|
| 36 |
},
|
| 37 |
});
|
| 38 |
|
| 39 |
+
const autoPolicySelect = new Select<"default" | "fastest" | "cheapest", false>({
|
| 40 |
+
value: () => conversation.data.autoPolicy ?? "default",
|
| 41 |
+
onValueChange(v) {
|
| 42 |
+
conversation.update({ autoPolicy: v });
|
| 43 |
+
},
|
| 44 |
+
});
|
| 45 |
+
|
| 46 |
const nameMap: Record<string, string> = {
|
| 47 |
"sambanova": "SambaNova",
|
| 48 |
"fal": "fal",
|
|
|
|
| 86 |
const pd = pricing.getPricing(conversation.model.id, provider);
|
| 87 |
return pricing.formatPricing(pd);
|
| 88 |
}
|
| 89 |
+
|
| 90 |
+
function getAutoPolicyLabel(policy: "default" | "fastest" | "cheapest") {
|
| 91 |
+
switch (policy) {
|
| 92 |
+
case "default":
|
| 93 |
+
return "Default";
|
| 94 |
+
case "fastest":
|
| 95 |
+
return "Fastest";
|
| 96 |
+
case "cheapest":
|
| 97 |
+
return "Cheapest";
|
| 98 |
+
}
|
| 99 |
+
}
|
| 100 |
+
|
| 101 |
+
function getAutoPolicyDescription(policy: "default" | "fastest" | "cheapest") {
|
| 102 |
+
switch (policy) {
|
| 103 |
+
case "default":
|
| 104 |
+
return "Uses your preference order from Inference Provider settings";
|
| 105 |
+
case "fastest":
|
| 106 |
+
return "Selects the provider with highest throughput";
|
| 107 |
+
case "cheapest":
|
| 108 |
+
return "Selects the provider with lowest price per output token";
|
| 109 |
+
}
|
| 110 |
+
}
|
| 111 |
</script>
|
| 112 |
|
| 113 |
{#snippet providerDisplay(provider: string)}
|
|
|
|
| 157 |
{/each}
|
| 158 |
{@render option("auto")}
|
| 159 |
</div>
|
| 160 |
+
|
| 161 |
+
{#if conversation.data.provider === "auto"}
|
| 162 |
+
<div class="flex flex-col gap-1.5">
|
| 163 |
+
<div class="flex items-center gap-1 text-xs text-gray-600 dark:text-gray-400">
|
| 164 |
+
<span>Auto Policy</span>
|
| 165 |
+
<Tooltip>
|
| 166 |
+
{#snippet trigger(tooltip)}
|
| 167 |
+
<button class="flex items-center" {...tooltip.trigger}>
|
| 168 |
+
<IconInfo class="size-3" />
|
| 169 |
+
</button>
|
| 170 |
+
{/snippet}
|
| 171 |
+
{getAutoPolicyDescription(conversation.data.autoPolicy ?? "default")}
|
| 172 |
+
</Tooltip>
|
| 173 |
+
</div>
|
| 174 |
+
<button
|
| 175 |
+
{...autoPolicySelect.trigger}
|
| 176 |
+
class={cn(
|
| 177 |
+
"relative flex items-center justify-between gap-6 overflow-hidden rounded-lg border bg-gray-100/80 px-3 py-1.5 text-sm leading-tight whitespace-nowrap shadow-sm",
|
| 178 |
+
"hover:brightness-95 dark:border-gray-700 dark:bg-gray-800 dark:hover:brightness-110",
|
| 179 |
+
)}
|
| 180 |
+
>
|
| 181 |
+
{getAutoPolicyLabel(conversation.data.autoPolicy ?? "default")}
|
| 182 |
+
<div
|
| 183 |
+
class="absolute right-2 grid size-4 flex-none place-items-center rounded-sm bg-gray-100 text-xs dark:bg-gray-600"
|
| 184 |
+
>
|
| 185 |
+
<IconCaret />
|
| 186 |
+
</div>
|
| 187 |
+
</button>
|
| 188 |
+
|
| 189 |
+
<div {...autoPolicySelect.content} class="rounded-lg border bg-gray-100 dark:border-gray-700 dark:bg-gray-800">
|
| 190 |
+
{#snippet policyOption(policy: "default" | "fastest" | "cheapest", label: string)}
|
| 191 |
+
<div {...autoPolicySelect.getOption(policy)} class="group block w-full p-1 text-sm dark:text-white">
|
| 192 |
+
<div
|
| 193 |
+
class="rounded-md px-2 py-1.5 group-data-[highlighted]:bg-gray-200 dark:group-data-[highlighted]:bg-gray-700"
|
| 194 |
+
>
|
| 195 |
+
<div class="flex flex-col items-start gap-0.5">
|
| 196 |
+
<span>{label}</span>
|
| 197 |
+
<span class="text-xs text-gray-500 dark:text-gray-400">
|
| 198 |
+
{getAutoPolicyDescription(policy)}
|
| 199 |
+
</span>
|
| 200 |
+
</div>
|
| 201 |
+
</div>
|
| 202 |
+
</div>
|
| 203 |
+
{/snippet}
|
| 204 |
+
{@render policyOption("default", "Default")}
|
| 205 |
+
{@render policyOption("fastest", "Fastest")}
|
| 206 |
+
{@render policyOption("cheapest", "Cheapest")}
|
| 207 |
+
</div>
|
| 208 |
+
</div>
|
| 209 |
+
{/if}
|
| 210 |
</div>
|
src/lib/state/conversations.svelte.ts
CHANGED
|
@@ -51,6 +51,9 @@ export class ConversationEntity {
|
|
| 51 |
@Fields.string()
|
| 52 |
provider?: string;
|
| 53 |
|
|
|
|
|
|
|
|
|
|
| 54 |
@Fields.string()
|
| 55 |
projectId!: string;
|
| 56 |
|
|
@@ -87,6 +90,8 @@ function getDefaultConversation(projectId: string) {
|
|
| 87 |
messages: [],
|
| 88 |
streaming: true,
|
| 89 |
parseMarkdown: false,
|
|
|
|
|
|
|
| 90 |
createdAt: new Date(),
|
| 91 |
} satisfies Partial<ConversationEntityMembers>;
|
| 92 |
}
|
|
|
|
| 51 |
@Fields.string()
|
| 52 |
provider?: string;
|
| 53 |
|
| 54 |
+
@Fields.string()
|
| 55 |
+
autoPolicy?: "default" | "fastest" | "cheapest";
|
| 56 |
+
|
| 57 |
@Fields.string()
|
| 58 |
projectId!: string;
|
| 59 |
|
|
|
|
| 90 |
messages: [],
|
| 91 |
streaming: true,
|
| 92 |
parseMarkdown: false,
|
| 93 |
+
provider: "auto",
|
| 94 |
+
autoPolicy: "default" as const,
|
| 95 |
createdAt: new Date(),
|
| 96 |
} satisfies Partial<ConversationEntityMembers>;
|
| 97 |
}
|
src/lib/types.ts
CHANGED
|
@@ -15,7 +15,7 @@ export type Conversation = {
|
|
| 15 |
systemMessage: ConversationMessage;
|
| 16 |
streaming: boolean;
|
| 17 |
provider?: string;
|
| 18 |
-
} & Pick<ConversationEntityMembers, "structuredOutput" | "extraParams">;
|
| 19 |
|
| 20 |
export type ConversationWithCustomModel = Conversation & {
|
| 21 |
model: CustomModel;
|
|
|
|
| 15 |
systemMessage: ConversationMessage;
|
| 16 |
streaming: boolean;
|
| 17 |
provider?: string;
|
| 18 |
+
} & Pick<ConversationEntityMembers, "structuredOutput" | "extraParams" | "autoPolicy">;
|
| 19 |
|
| 20 |
export type ConversationWithCustomModel = Conversation & {
|
| 21 |
model: CustomModel;
|
src/lib/utils/business.svelte.ts
CHANGED
|
@@ -123,6 +123,15 @@ const tokenErrMessage = dev
|
|
| 123 |
? "Please set your Hugging Face token in the .env file"
|
| 124 |
: "Failed to connect to inference providers. Are you logged in?";
|
| 125 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 126 |
export async function handleStreamingResponse(
|
| 127 |
conversation: ConversationClass | Conversation,
|
| 128 |
onChunk: (content: string) => void,
|
|
@@ -139,9 +148,11 @@ export async function handleStreamingResponse(
|
|
| 139 |
];
|
| 140 |
const parsed = await Promise.all(messages.map(parseMessage));
|
| 141 |
|
|
|
|
|
|
|
| 142 |
const requestBody = {
|
| 143 |
model: {
|
| 144 |
-
id:
|
| 145 |
isCustom: isCustomModel(model),
|
| 146 |
accessToken: isCustomModel(model) ? model.accessToken : undefined,
|
| 147 |
endpointUrl: isCustomModel(model) ? model.endpointUrl : undefined,
|
|
@@ -198,9 +209,11 @@ export async function handleNonStreamingResponse(
|
|
| 198 |
];
|
| 199 |
const parsed = await Promise.all(messages.map(parseMessage));
|
| 200 |
|
|
|
|
|
|
|
| 201 |
const requestBody = {
|
| 202 |
model: {
|
| 203 |
-
id:
|
| 204 |
isCustom: isCustomModel(model),
|
| 205 |
accessToken: isCustomModel(model) ? model.accessToken : undefined,
|
| 206 |
endpointUrl: isCustomModel(model) ? model.endpointUrl : undefined,
|
|
@@ -331,11 +344,15 @@ export function getInferenceSnippet(
|
|
| 331 |
|
| 332 |
const providerMapping = model.inferenceProviderMapping.find(p => p.provider === provider);
|
| 333 |
if (!providerMapping && provider !== "auto") return [];
|
|
|
|
|
|
|
|
|
|
|
|
|
| 334 |
const allSnippets = snippets.getInferenceSnippets(
|
| 335 |
-
{ ...model, inference: "" },
|
| 336 |
provider,
|
| 337 |
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
| 338 |
-
{ ...providerMapping, hfModelId:
|
| 339 |
{ ...opts, directRequest: false },
|
| 340 |
);
|
| 341 |
|
|
|
|
| 123 |
? "Please set your Hugging Face token in the .env file"
|
| 124 |
: "Failed to connect to inference providers. Are you logged in?";
|
| 125 |
|
| 126 |
+
function getModelIdWithPolicy(modelId: string, provider?: string, autoPolicy?: "default" | "fastest" | "cheapest") {
|
| 127 |
+
// When using auto provider, append policy suffix if it's not "default"
|
| 128 |
+
const policy = autoPolicy ?? "default";
|
| 129 |
+
if (provider === "auto" && policy !== "default") {
|
| 130 |
+
return `${modelId}:${policy}`;
|
| 131 |
+
}
|
| 132 |
+
return modelId;
|
| 133 |
+
}
|
| 134 |
+
|
| 135 |
export async function handleStreamingResponse(
|
| 136 |
conversation: ConversationClass | Conversation,
|
| 137 |
onChunk: (content: string) => void,
|
|
|
|
| 148 |
];
|
| 149 |
const parsed = await Promise.all(messages.map(parseMessage));
|
| 150 |
|
| 151 |
+
const modelIdWithPolicy = getModelIdWithPolicy(model.id, data.provider, data.autoPolicy);
|
| 152 |
+
|
| 153 |
const requestBody = {
|
| 154 |
model: {
|
| 155 |
+
id: modelIdWithPolicy,
|
| 156 |
isCustom: isCustomModel(model),
|
| 157 |
accessToken: isCustomModel(model) ? model.accessToken : undefined,
|
| 158 |
endpointUrl: isCustomModel(model) ? model.endpointUrl : undefined,
|
|
|
|
| 209 |
];
|
| 210 |
const parsed = await Promise.all(messages.map(parseMessage));
|
| 211 |
|
| 212 |
+
const modelIdWithPolicy = getModelIdWithPolicy(model.id, data.provider, data.autoPolicy);
|
| 213 |
+
|
| 214 |
const requestBody = {
|
| 215 |
model: {
|
| 216 |
+
id: modelIdWithPolicy,
|
| 217 |
isCustom: isCustomModel(model),
|
| 218 |
accessToken: isCustomModel(model) ? model.accessToken : undefined,
|
| 219 |
endpointUrl: isCustomModel(model) ? model.endpointUrl : undefined,
|
|
|
|
| 344 |
|
| 345 |
const providerMapping = model.inferenceProviderMapping.find(p => p.provider === provider);
|
| 346 |
if (!providerMapping && provider !== "auto") return [];
|
| 347 |
+
|
| 348 |
+
// Apply auto policy suffix to model ID if using auto provider
|
| 349 |
+
const modelIdWithPolicy = getModelIdWithPolicy(model.id, data.provider, data.autoPolicy);
|
| 350 |
+
|
| 351 |
const allSnippets = snippets.getInferenceSnippets(
|
| 352 |
+
{ ...model, inference: "", id: modelIdWithPolicy },
|
| 353 |
provider,
|
| 354 |
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
| 355 |
+
{ ...providerMapping, hfModelId: modelIdWithPolicy } as any,
|
| 356 |
{ ...opts, directRequest: false },
|
| 357 |
);
|
| 358 |
|