Thomas G. Lopes commited on
Commit
00a2728
·
1 Parent(s): 0120cd1

auto modes

Browse files
package.json CHANGED
@@ -22,9 +22,9 @@
22
  "@eslint/js": "^9.22.0",
23
  "@floating-ui/dom": "^1.6.13",
24
  "@huggingface/hub": "^2.6.12",
25
- "@huggingface/inference": "^4.11.1",
26
- "@huggingface/tasks": "^0.19.50",
27
- "@huggingface/transformers": "^3.7.5",
28
  "@iconify-json/carbon": "^1.2.8",
29
  "@iconify-json/line-md": "^1.2.11",
30
  "@iconify-json/logos": "^1.2.9",
 
22
  "@eslint/js": "^9.22.0",
23
  "@floating-ui/dom": "^1.6.13",
24
  "@huggingface/hub": "^2.6.12",
25
+ "@huggingface/inference": "^4.13.0",
26
+ "@huggingface/tasks": "^0.19.62",
27
+ "@huggingface/transformers": "^3.7.6",
28
  "@iconify-json/carbon": "^1.2.8",
29
  "@iconify-json/line-md": "^1.2.11",
30
  "@iconify-json/logos": "^1.2.9",
pnpm-lock.yaml CHANGED
@@ -46,14 +46,14 @@ importers:
46
  specifier: ^2.6.12
47
  version: 2.6.12
48
  '@huggingface/inference':
49
- specifier: ^4.11.1
50
- version: 4.11.1
51
  '@huggingface/tasks':
52
- specifier: ^0.19.50
53
- version: 0.19.50
54
  '@huggingface/transformers':
55
- specifier: ^3.7.5
56
- version: 3.7.5
57
  '@iconify-json/carbon':
58
  specifier: ^1.2.8
59
  version: 1.2.8
@@ -459,19 +459,19 @@ packages:
459
  engines: {node: '>=18'}
460
  hasBin: true
461
 
462
- '@huggingface/inference@4.11.1':
463
- resolution: {integrity: sha512-otClcigc7KUQfvG5wav7oVTBEVLsdKelEMuuQGXwBdPqrsJ7WZknAGCXJcXJoO2zLw9v518jgylkQnFwEBpGvw==}
464
  engines: {node: '>=18'}
465
 
466
  '@huggingface/jinja@0.5.1':
467
  resolution: {integrity: sha512-yUZLld4lrM9iFxHCwFQ7D1HW2MWMwSbeB7WzWqFYDWK+rEb+WldkLdAJxUPOmgICMHZLzZGVcVjFh3w/YGubng==}
468
  engines: {node: '>=18'}
469
 
470
- '@huggingface/tasks@0.19.50':
471
- resolution: {integrity: sha512-kHrfiDsJttkuwpdp7PgFiFHaK9rj+COJTIZ+221gk9vdF4B5QySW7hQT4aOGkwwZP+4qbXGhjMNGg/bxOq+LwA==}
472
 
473
- '@huggingface/transformers@3.7.5':
474
- resolution: {integrity: sha512-5jvrIwHyRXfOKVaGKYvUZM6ZjJKQXWeKzIOdKBE5pdzPSNzTwBNx5NdWcGElf4Ddv7Dl2mWsvJh+G5RnCUxMmA==}
475
 
476
  '@humanfs/core@0.19.1':
477
  resolution: {integrity: sha512-5DyQ4+1JEUzejeK1JGICcideyfUbGixgS9jNgex5nqkW+cY7WZhxBigmieN5Qnw9ZosSNVC9KQKyb+GUaGyKUA==}
@@ -3586,20 +3586,20 @@ snapshots:
3586
 
3587
  '@huggingface/hub@2.6.12':
3588
  dependencies:
3589
- '@huggingface/tasks': 0.19.50
3590
  optionalDependencies:
3591
  cli-progress: 3.12.0
3592
 
3593
- '@huggingface/inference@4.11.1':
3594
  dependencies:
3595
  '@huggingface/jinja': 0.5.1
3596
- '@huggingface/tasks': 0.19.50
3597
 
3598
  '@huggingface/jinja@0.5.1': {}
3599
 
3600
- '@huggingface/tasks@0.19.50': {}
3601
 
3602
- '@huggingface/transformers@3.7.5':
3603
  dependencies:
3604
  '@huggingface/jinja': 0.5.1
3605
  onnxruntime-node: 1.21.0
 
46
  specifier: ^2.6.12
47
  version: 2.6.12
48
  '@huggingface/inference':
49
+ specifier: ^4.13.0
50
+ version: 4.13.0
51
  '@huggingface/tasks':
52
+ specifier: ^0.19.62
53
+ version: 0.19.62
54
  '@huggingface/transformers':
55
+ specifier: ^3.7.6
56
+ version: 3.7.6
57
  '@iconify-json/carbon':
58
  specifier: ^1.2.8
59
  version: 1.2.8
 
459
  engines: {node: '>=18'}
460
  hasBin: true
461
 
462
+ '@huggingface/inference@4.13.0':
463
+ resolution: {integrity: sha512-oqBHbYoLHpNlEUJp/MG28gCR0JOUmLi9iKKd0oJfTAgTqaDSLhoCsIdS1XfLHbT/CrzZch33cARTR3bO4q/2yQ==}
464
  engines: {node: '>=18'}
465
 
466
  '@huggingface/jinja@0.5.1':
467
  resolution: {integrity: sha512-yUZLld4lrM9iFxHCwFQ7D1HW2MWMwSbeB7WzWqFYDWK+rEb+WldkLdAJxUPOmgICMHZLzZGVcVjFh3w/YGubng==}
468
  engines: {node: '>=18'}
469
 
470
+ '@huggingface/tasks@0.19.62':
471
+ resolution: {integrity: sha512-sv0ieUqggz2yWfVv14WuvdhmQA8mvSF/OD1quz5RKcysP+dmrPn0E9SwJsSksTsp1QwpAC396PukgPmDymPVhQ==}
472
 
473
+ '@huggingface/transformers@3.7.6':
474
+ resolution: {integrity: sha512-OYlIRY8vj8r/pNx2CdXcDHz4KqpEC+bUMKzdVW5Dx//gp4XRmK+/g8as0h3cssRQYT0vG1A6VCfZy8SV0F4RDQ==}
475
 
476
  '@humanfs/core@0.19.1':
477
  resolution: {integrity: sha512-5DyQ4+1JEUzejeK1JGICcideyfUbGixgS9jNgex5nqkW+cY7WZhxBigmieN5Qnw9ZosSNVC9KQKyb+GUaGyKUA==}
 
3586
 
3587
  '@huggingface/hub@2.6.12':
3588
  dependencies:
3589
+ '@huggingface/tasks': 0.19.62
3590
  optionalDependencies:
3591
  cli-progress: 3.12.0
3592
 
3593
+ '@huggingface/inference@4.13.0':
3594
  dependencies:
3595
  '@huggingface/jinja': 0.5.1
3596
+ '@huggingface/tasks': 0.19.62
3597
 
3598
  '@huggingface/jinja@0.5.1': {}
3599
 
3600
+ '@huggingface/tasks@0.19.62': {}
3601
 
3602
+ '@huggingface/transformers@3.7.6':
3603
  dependencies:
3604
  '@huggingface/jinja': 0.5.1
3605
  onnxruntime-node: 1.21.0
src/lib/components/inference-playground/conversation-header.svelte CHANGED
@@ -27,7 +27,7 @@
27
  if (!model) {
28
  return;
29
  }
30
- conversation.update({ modelId: model.id, provider: undefined });
31
  }
32
 
33
  let nameSpace = $derived(conversation.model.id.split("/")[0] ?? "");
 
27
  if (!model) {
28
  return;
29
  }
30
+ conversation.update({ modelId: model.id, provider: "auto", autoPolicy: "default" });
31
  }
32
 
33
  let nameSpace = $derived(conversation.model.id.split("/")[0] ?? "");
src/lib/components/inference-playground/provider-select.svelte CHANGED
@@ -1,14 +1,14 @@
1
  <script lang="ts">
2
  import type { ConversationClass } from "$lib/state/conversations.svelte";
3
- import { models } from "$lib/state/models.svelte";
4
  import { pricing } from "$lib/state/pricing.svelte";
5
  import type { Model } from "$lib/types.js";
6
- import { randomPick } from "$lib/utils/array.js";
7
  import { cn } from "$lib/utils/cn.js";
8
  import { Select } from "melt/builders";
9
  import { run } from "svelte/legacy";
10
  import IconCaret from "~icons/carbon/chevron-down";
11
  import IconProvider from "../icon-provider.svelte";
 
 
12
 
13
  interface Props {
14
  conversation: ConversationClass & { model: Model };
@@ -20,11 +20,8 @@
20
  function reset(providers: typeof conversation.model.inferenceProviderMapping) {
21
  const validProvider = providers.find(p => p.provider === conversation.data.provider);
22
  if (validProvider || conversation.data.provider === "auto") return;
23
- if (providers) {
24
- conversation.update({ provider: randomPick(providers)?.provider });
25
- } else {
26
- conversation.update({ modelId: randomPick(models.all)?.id });
27
- }
28
  }
29
 
30
  let providers = $derived(conversation.model.inferenceProviderMapping);
@@ -39,6 +36,13 @@
39
  },
40
  });
41
 
 
 
 
 
 
 
 
42
  const nameMap: Record<string, string> = {
43
  "sambanova": "SambaNova",
44
  "fal": "fal",
@@ -82,6 +86,28 @@
82
  const pd = pricing.getPricing(conversation.model.id, provider);
83
  return pricing.formatPricing(pd);
84
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  </script>
86
 
87
  {#snippet providerDisplay(provider: string)}
@@ -131,4 +157,54 @@
131
  {/each}
132
  {@render option("auto")}
133
  </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  </div>
 
1
  <script lang="ts">
2
  import type { ConversationClass } from "$lib/state/conversations.svelte";
 
3
  import { pricing } from "$lib/state/pricing.svelte";
4
  import type { Model } from "$lib/types.js";
 
5
  import { cn } from "$lib/utils/cn.js";
6
  import { Select } from "melt/builders";
7
  import { run } from "svelte/legacy";
8
  import IconCaret from "~icons/carbon/chevron-down";
9
  import IconProvider from "../icon-provider.svelte";
10
+ import Tooltip from "../tooltip.svelte";
11
+ import IconInfo from "~icons/carbon/information";
12
 
13
  interface Props {
14
  conversation: ConversationClass & { model: Model };
 
20
  function reset(providers: typeof conversation.model.inferenceProviderMapping) {
21
  const validProvider = providers.find(p => p.provider === conversation.data.provider);
22
  if (validProvider || conversation.data.provider === "auto") return;
23
+ // Default to auto provider if no valid provider is set
24
+ conversation.update({ provider: "auto", autoPolicy: "default" });
 
 
 
25
  }
26
 
27
  let providers = $derived(conversation.model.inferenceProviderMapping);
 
36
  },
37
  });
38
 
39
+ const autoPolicySelect = new Select<"default" | "fastest" | "cheapest", false>({
40
+ value: () => conversation.data.autoPolicy ?? "default",
41
+ onValueChange(v) {
42
+ conversation.update({ autoPolicy: v });
43
+ },
44
+ });
45
+
46
  const nameMap: Record<string, string> = {
47
  "sambanova": "SambaNova",
48
  "fal": "fal",
 
86
  const pd = pricing.getPricing(conversation.model.id, provider);
87
  return pricing.formatPricing(pd);
88
  }
89
+
90
+ function getAutoPolicyLabel(policy: "default" | "fastest" | "cheapest") {
91
+ switch (policy) {
92
+ case "default":
93
+ return "Default";
94
+ case "fastest":
95
+ return "Fastest";
96
+ case "cheapest":
97
+ return "Cheapest";
98
+ }
99
+ }
100
+
101
+ function getAutoPolicyDescription(policy: "default" | "fastest" | "cheapest") {
102
+ switch (policy) {
103
+ case "default":
104
+ return "Uses your preference order from Inference Provider settings";
105
+ case "fastest":
106
+ return "Selects the provider with highest throughput";
107
+ case "cheapest":
108
+ return "Selects the provider with lowest price per output token";
109
+ }
110
+ }
111
  </script>
112
 
113
  {#snippet providerDisplay(provider: string)}
 
157
  {/each}
158
  {@render option("auto")}
159
  </div>
160
+
161
+ {#if conversation.data.provider === "auto"}
162
+ <div class="flex flex-col gap-1.5">
163
+ <div class="flex items-center gap-1 text-xs text-gray-600 dark:text-gray-400">
164
+ <span>Auto Policy</span>
165
+ <Tooltip>
166
+ {#snippet trigger(tooltip)}
167
+ <button class="flex items-center" {...tooltip.trigger}>
168
+ <IconInfo class="size-3" />
169
+ </button>
170
+ {/snippet}
171
+ {getAutoPolicyDescription(conversation.data.autoPolicy ?? "default")}
172
+ </Tooltip>
173
+ </div>
174
+ <button
175
+ {...autoPolicySelect.trigger}
176
+ class={cn(
177
+ "relative flex items-center justify-between gap-6 overflow-hidden rounded-lg border bg-gray-100/80 px-3 py-1.5 text-sm leading-tight whitespace-nowrap shadow-sm",
178
+ "hover:brightness-95 dark:border-gray-700 dark:bg-gray-800 dark:hover:brightness-110",
179
+ )}
180
+ >
181
+ {getAutoPolicyLabel(conversation.data.autoPolicy ?? "default")}
182
+ <div
183
+ class="absolute right-2 grid size-4 flex-none place-items-center rounded-sm bg-gray-100 text-xs dark:bg-gray-600"
184
+ >
185
+ <IconCaret />
186
+ </div>
187
+ </button>
188
+
189
+ <div {...autoPolicySelect.content} class="rounded-lg border bg-gray-100 dark:border-gray-700 dark:bg-gray-800">
190
+ {#snippet policyOption(policy: "default" | "fastest" | "cheapest", label: string)}
191
+ <div {...autoPolicySelect.getOption(policy)} class="group block w-full p-1 text-sm dark:text-white">
192
+ <div
193
+ class="rounded-md px-2 py-1.5 group-data-[highlighted]:bg-gray-200 dark:group-data-[highlighted]:bg-gray-700"
194
+ >
195
+ <div class="flex flex-col items-start gap-0.5">
196
+ <span>{label}</span>
197
+ <span class="text-xs text-gray-500 dark:text-gray-400">
198
+ {getAutoPolicyDescription(policy)}
199
+ </span>
200
+ </div>
201
+ </div>
202
+ </div>
203
+ {/snippet}
204
+ {@render policyOption("default", "Default")}
205
+ {@render policyOption("fastest", "Fastest")}
206
+ {@render policyOption("cheapest", "Cheapest")}
207
+ </div>
208
+ </div>
209
+ {/if}
210
  </div>
src/lib/state/conversations.svelte.ts CHANGED
@@ -51,6 +51,9 @@ export class ConversationEntity {
51
  @Fields.string()
52
  provider?: string;
53
 
 
 
 
54
  @Fields.string()
55
  projectId!: string;
56
 
@@ -87,6 +90,8 @@ function getDefaultConversation(projectId: string) {
87
  messages: [],
88
  streaming: true,
89
  parseMarkdown: false,
 
 
90
  createdAt: new Date(),
91
  } satisfies Partial<ConversationEntityMembers>;
92
  }
 
51
  @Fields.string()
52
  provider?: string;
53
 
54
+ @Fields.string()
55
+ autoPolicy?: "default" | "fastest" | "cheapest";
56
+
57
  @Fields.string()
58
  projectId!: string;
59
 
 
90
  messages: [],
91
  streaming: true,
92
  parseMarkdown: false,
93
+ provider: "auto",
94
+ autoPolicy: "default" as const,
95
  createdAt: new Date(),
96
  } satisfies Partial<ConversationEntityMembers>;
97
  }
src/lib/types.ts CHANGED
@@ -15,7 +15,7 @@ export type Conversation = {
15
  systemMessage: ConversationMessage;
16
  streaming: boolean;
17
  provider?: string;
18
- } & Pick<ConversationEntityMembers, "structuredOutput" | "extraParams">;
19
 
20
  export type ConversationWithCustomModel = Conversation & {
21
  model: CustomModel;
 
15
  systemMessage: ConversationMessage;
16
  streaming: boolean;
17
  provider?: string;
18
+ } & Pick<ConversationEntityMembers, "structuredOutput" | "extraParams" | "autoPolicy">;
19
 
20
  export type ConversationWithCustomModel = Conversation & {
21
  model: CustomModel;
src/lib/utils/business.svelte.ts CHANGED
@@ -123,6 +123,15 @@ const tokenErrMessage = dev
123
  ? "Please set your Hugging Face token in the .env file"
124
  : "Failed to connect to inference providers. Are you logged in?";
125
 
 
 
 
 
 
 
 
 
 
126
  export async function handleStreamingResponse(
127
  conversation: ConversationClass | Conversation,
128
  onChunk: (content: string) => void,
@@ -139,9 +148,11 @@ export async function handleStreamingResponse(
139
  ];
140
  const parsed = await Promise.all(messages.map(parseMessage));
141
 
 
 
142
  const requestBody = {
143
  model: {
144
- id: model.id,
145
  isCustom: isCustomModel(model),
146
  accessToken: isCustomModel(model) ? model.accessToken : undefined,
147
  endpointUrl: isCustomModel(model) ? model.endpointUrl : undefined,
@@ -198,9 +209,11 @@ export async function handleNonStreamingResponse(
198
  ];
199
  const parsed = await Promise.all(messages.map(parseMessage));
200
 
 
 
201
  const requestBody = {
202
  model: {
203
- id: model.id,
204
  isCustom: isCustomModel(model),
205
  accessToken: isCustomModel(model) ? model.accessToken : undefined,
206
  endpointUrl: isCustomModel(model) ? model.endpointUrl : undefined,
@@ -331,11 +344,15 @@ export function getInferenceSnippet(
331
 
332
  const providerMapping = model.inferenceProviderMapping.find(p => p.provider === provider);
333
  if (!providerMapping && provider !== "auto") return [];
 
 
 
 
334
  const allSnippets = snippets.getInferenceSnippets(
335
- { ...model, inference: "" },
336
  provider,
337
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
338
- { ...providerMapping, hfModelId: model.id } as any,
339
  { ...opts, directRequest: false },
340
  );
341
 
 
123
  ? "Please set your Hugging Face token in the .env file"
124
  : "Failed to connect to inference providers. Are you logged in?";
125
 
126
+ function getModelIdWithPolicy(modelId: string, provider?: string, autoPolicy?: "default" | "fastest" | "cheapest") {
127
+ // When using auto provider, append policy suffix if it's not "default"
128
+ const policy = autoPolicy ?? "default";
129
+ if (provider === "auto" && policy !== "default") {
130
+ return `${modelId}:${policy}`;
131
+ }
132
+ return modelId;
133
+ }
134
+
135
  export async function handleStreamingResponse(
136
  conversation: ConversationClass | Conversation,
137
  onChunk: (content: string) => void,
 
148
  ];
149
  const parsed = await Promise.all(messages.map(parseMessage));
150
 
151
+ const modelIdWithPolicy = getModelIdWithPolicy(model.id, data.provider, data.autoPolicy);
152
+
153
  const requestBody = {
154
  model: {
155
+ id: modelIdWithPolicy,
156
  isCustom: isCustomModel(model),
157
  accessToken: isCustomModel(model) ? model.accessToken : undefined,
158
  endpointUrl: isCustomModel(model) ? model.endpointUrl : undefined,
 
209
  ];
210
  const parsed = await Promise.all(messages.map(parseMessage));
211
 
212
+ const modelIdWithPolicy = getModelIdWithPolicy(model.id, data.provider, data.autoPolicy);
213
+
214
  const requestBody = {
215
  model: {
216
+ id: modelIdWithPolicy,
217
  isCustom: isCustomModel(model),
218
  accessToken: isCustomModel(model) ? model.accessToken : undefined,
219
  endpointUrl: isCustomModel(model) ? model.endpointUrl : undefined,
 
344
 
345
  const providerMapping = model.inferenceProviderMapping.find(p => p.provider === provider);
346
  if (!providerMapping && provider !== "auto") return [];
347
+
348
+ // Apply auto policy suffix to model ID if using auto provider
349
+ const modelIdWithPolicy = getModelIdWithPolicy(model.id, data.provider, data.autoPolicy);
350
+
351
  const allSnippets = snippets.getInferenceSnippets(
352
+ { ...model, inference: "", id: modelIdWithPolicy },
353
  provider,
354
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
355
+ { ...providerMapping, hfModelId: modelIdWithPolicy } as any,
356
  { ...opts, directRequest: false },
357
  );
358