Spaces:

huggingfacejs
/

inference-widgets

Running on CPU Upgrade

App Files Files Community

machineuser commited on May 6, 2024

Commit

4e99448

1 Parent(s): c25d7cc

Sync widgets demo

Browse files

Files changed (7) hide show

packages/inference/package.json +4 -2
packages/inference/pnpm-lock.yaml +3 -1
packages/inference/scripts/generate-dts.ts +19 -0
packages/inference/src/tasks/nlp/textGeneration.ts +2 -202
packages/inference/src/tasks/nlp/textGenerationStream.ts +1 -1
packages/widgets/src/lib/components/InferenceWidget/widgets/ConversationalWidget/ConversationalWidget.svelte +3 -2
packages/widgets/src/routes/+page.svelte +52 -7

packages/inference/package.json CHANGED Viewed

@@ -40,7 +40,7 @@
 	"type": "module",
 	"scripts": {
 		"build": "tsup src/index.ts --format cjs,esm --clean && pnpm run dts",
-		"dts": "tsx scripts/generate-dts.ts",
 		"lint": "eslint --quiet --fix --ext .cjs,.ts .",
 		"lint:check": "eslint --ext .cjs,.ts .",
 		"format": "prettier --write .",
@@ -51,8 +51,10 @@
 		"test:browser": "vitest run --browser.name=chrome --browser.headless --config vitest.config.mts",
 		"check": "tsc"
 	},
 	"devDependencies": {
-		"@huggingface/tasks": "workspace:^",
 		"@types/node": "18.13.0"
 	},
 	"resolutions": {}

 	"type": "module",
 	"scripts": {
 		"build": "tsup src/index.ts --format cjs,esm --clean && pnpm run dts",
+		"dts": "tsx scripts/generate-dts.ts && tsc --noEmit dist/index.d.ts",
 		"lint": "eslint --quiet --fix --ext .cjs,.ts .",
 		"lint:check": "eslint --ext .cjs,.ts .",
 		"format": "prettier --write .",
 		"test:browser": "vitest run --browser.name=chrome --browser.headless --config vitest.config.mts",
 		"check": "tsc"
 	},
+	"dependencies": {
+		"@huggingface/tasks": "workspace:^"
+	},
 	"devDependencies": {
 		"@types/node": "18.13.0"
 	},
 	"resolutions": {}

packages/inference/pnpm-lock.yaml CHANGED Viewed

@@ -4,10 +4,12 @@ settings:
   autoInstallPeers: true
   excludeLinksFromLockfile: false
-devDependencies:
   '@huggingface/tasks':
     specifier: workspace:^
     version: link:../tasks
   '@types/node':
     specifier: 18.13.0
     version: 18.13.0

   autoInstallPeers: true
   excludeLinksFromLockfile: false
+dependencies:
   '@huggingface/tasks':
     specifier: workspace:^
     version: link:../tasks
+devDependencies:
   '@types/node':
     specifier: 18.13.0
     version: 18.13.0

packages/inference/scripts/generate-dts.ts CHANGED Viewed

@@ -3,6 +3,8 @@
 import { readFileSync, writeFileSync, appendFileSync, readdirSync } from "node:fs";
 import { TASKS_DATA } from "@huggingface/tasks";
 const tasks = Object.keys(TASKS_DATA)
 	.sort()
 	.filter((task) => task !== "other");
@@ -36,6 +38,16 @@ for (const dir of dirs) {
 		const fileContent = readFileSync(`./src/tasks/${dir}/${file}`, "utf-8");
 		for (const type of extractTypesAndInterfaces(fileContent)) {
 			appendFileSync("./dist/index.d.ts", type + "\n");
 		}
@@ -87,6 +99,13 @@ appendFileSync(
 		"\n}\n"
 );
 function* extractTypesAndInterfaces(fileContent: string): Iterable<string> {
 	let index = 0;

 import { readFileSync, writeFileSync, appendFileSync, readdirSync } from "node:fs";
 import { TASKS_DATA } from "@huggingface/tasks";
+const taskImports = new Set<string>();
 const tasks = Object.keys(TASKS_DATA)
 	.sort()
 	.filter((task) => task !== "other");
 		const fileContent = readFileSync(`./src/tasks/${dir}/${file}`, "utf-8");
+		// detect imports from @huggingface/tasks
+		for (const imports of fileContent.matchAll(/import type {(.*)} from "@huggingface\/tasks";/g)) {
+			// Convert A, B, C to ["A", "B", "C"]
+			const imported = imports[1].split(",").map((x) => x.trim());
+			for (const imp of imported) {
+				taskImports.add(imp);
+			}
+		}
 		for (const type of extractTypesAndInterfaces(fileContent)) {
 			appendFileSync("./dist/index.d.ts", type + "\n");
 		}
 		"\n}\n"
 );
+// Prepend import from @huggingface/tasks
+writeFileSync(
+	"./dist/index.d.ts",
+	`import type { ${[...taskImports].join(", ")} } from "@huggingface/tasks";\n` +
+		readFileSync("./dist/index.d.ts", "utf-8")
+);
 function* extractTypesAndInterfaces(fileContent: string): Iterable<string> {
 	let index = 0;

packages/inference/src/tasks/nlp/textGeneration.ts CHANGED Viewed

@@ -1,209 +1,9 @@
 import { InferenceOutputError } from "../../lib/InferenceOutputError";
 import type { BaseArgs, Options } from "../../types";
 import { request } from "../custom/request";
-/**
- * Inputs for Text Generation inference
- */
-export interface TextGenerationInput {
-	/**
-	 * The text to initialize generation with
-	 */
-	inputs: string;
-	/**
-	 * Additional inference parameters
-	 */
-	parameters?: TextGenerationParameters;
-	/**
-	 * Whether to stream output tokens
-	 */
-	stream?: boolean;
-	[property: string]: unknown;
-}
-/**
- * Additional inference parameters
- *
- * Additional inference parameters for Text Generation
- */
-export interface TextGenerationParameters {
-	/**
-	 * The number of sampling queries to run. Only the best one (in terms of total logprob) will
-	 * be returned.
-	 */
-	best_of?: number;
-	/**
-	 * Whether or not to output decoder input details
-	 */
-	decoder_input_details?: boolean;
-	/**
-	 * Whether or not to output details
-	 */
-	details?: boolean;
-	/**
-	 * Whether to use logits sampling instead of greedy decoding when generating new tokens.
-	 */
-	do_sample?: boolean;
-	/**
-	 * The maximum number of tokens to generate.
-	 */
-	max_new_tokens?: number;
-	/**
-	 * The parameter for repetition penalty. A value of 1.0 means no penalty. See [this
-	 * paper](https://hf.co/papers/1909.05858) for more details.
-	 */
-	repetition_penalty?: number;
-	/**
-	 * Whether to prepend the prompt to the generated text.
-	 */
-	return_full_text?: boolean;
-	/**
-	 * The random sampling seed.
-	 */
-	seed?: number;
-	/**
-	 * Stop generating tokens if a member of `stop_sequences` is generated.
-	 */
-	stop_sequences?: string[];
-	/**
-	 * The value used to modulate the logits distribution.
-	 */
-	temperature?: number;
-	/**
-	 * The number of highest probability vocabulary tokens to keep for top-k-filtering.
-	 */
-	top_k?: number;
-	/**
-	 * If set to < 1, only the smallest set of most probable tokens with probabilities that add
-	 * up to `top_p` or higher are kept for generation.
-	 */
-	top_p?: number;
-	/**
-	 * Truncate input tokens to the given size.
-	 */
-	truncate?: number;
-	/**
-	 * Typical Decoding mass. See [Typical Decoding for Natural Language
-	 * Generation](https://hf.co/papers/2202.00666) for more information
-	 */
-	typical_p?: number;
-	/**
-	 * Watermarking with [A Watermark for Large Language Models](https://hf.co/papers/2301.10226)
-	 */
-	watermark?: boolean;
-	[property: string]: unknown;
-}
-/**
- * Outputs for Text Generation inference
- */
-export interface TextGenerationOutput {
-	/**
-	 * When enabled, details about the generation
-	 */
-	details?: TextGenerationOutputDetails;
-	/**
-	 * The generated text
-	 */
-	generated_text: string;
-	[property: string]: unknown;
-}
-/**
- * When enabled, details about the generation
- */
-export interface TextGenerationOutputDetails {
-	/**
-	 * Details about additional sequences when best_of is provided
-	 */
-	best_of_sequences?: TextGenerationOutputSequenceDetails[];
-	/**
-	 * The reason why the generation was stopped.
-	 */
-	finish_reason: TextGenerationFinishReason;
-	/**
-	 * The number of generated tokens
-	 */
-	generated_tokens: number;
-	prefill: TextGenerationPrefillToken[];
-	/**
-	 * The random seed used for generation
-	 */
-	seed?: number;
-	/**
-	 * The generated tokens and associated details
-	 */
-	tokens: TextGenerationOutputToken[];
-	/**
-	 * Most likely tokens
-	 */
-	top_tokens?: Array<TextGenerationOutputToken[]>;
-	[property: string]: unknown;
-}
-export interface TextGenerationOutputSequenceDetails {
-	finish_reason: TextGenerationFinishReason;
-	/**
-	 * The generated text
-	 */
-	generated_text: string;
-	/**
-	 * The number of generated tokens
-	 */
-	generated_tokens: number;
-	prefill: TextGenerationPrefillToken[];
-	/**
-	 * The random seed used for generation
-	 */
-	seed?: number;
-	/**
-	 * The generated tokens and associated details
-	 */
-	tokens: TextGenerationOutputToken[];
-	/**
-	 * Most likely tokens
-	 */
-	top_tokens?: Array<TextGenerationOutputToken[]>;
-	[property: string]: unknown;
-}
-export interface TextGenerationPrefillToken {
-	id: number;
-	logprob: number;
-	/**
-	 * The text associated with that token
-	 */
-	text: string;
-	[property: string]: unknown;
-}
-/**
- * Generated token.
- */
-export interface TextGenerationOutputToken {
-	id: number;
-	logprob?: number;
-	/**
-	 * Whether or not that token is a special one
-	 */
-	special: boolean;
-	/**
-	 * The text associated with that token
-	 */
-	text: string;
-	[property: string]: unknown;
-}
-/**
- * The reason why the generation was stopped.
- *
- * length: The generated sequence reached the maximum allowed length
- *
- * eos_token: The model generated an end-of-sentence (EOS) token
- *
- * stop_sequence: One of the sequence in stop_sequences was generated
- */
-export type TextGenerationFinishReason = "length" | "eos_token" | "stop_sequence";
 /**
  * Use to continue text from a prompt. This is a very generic task. Recommended model: gpt2 (it’s a simple model, but fun to play with).

+import type { TextGenerationInput, TextGenerationOutput } from "@huggingface/tasks";
 import { InferenceOutputError } from "../../lib/InferenceOutputError";
 import type { BaseArgs, Options } from "../../types";
 import { request } from "../custom/request";
+export type { TextGenerationInput, TextGenerationOutput };
 /**
  * Use to continue text from a prompt. This is a very generic task. Recommended model: gpt2 (it’s a simple model, but fun to play with).

packages/inference/src/tasks/nlp/textGenerationStream.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 import type { BaseArgs, Options } from "../../types";
 import { streamingRequest } from "../custom/streamingRequest";
-import type { TextGenerationInput } from "./textGeneration";
 export interface TextGenerationStreamToken {
 	/** Token ID from the model tokenizer */

+import type { TextGenerationInput } from "@huggingface/tasks";
 import type { BaseArgs, Options } from "../../types";
 import { streamingRequest } from "../custom/streamingRequest";
 export interface TextGenerationStreamToken {
 	/** Token ID from the model tokenizer */

packages/widgets/src/lib/components/InferenceWidget/widgets/ConversationalWidget/ConversationalWidget.svelte CHANGED Viewed

@@ -51,6 +51,8 @@
 	let inferenceClient: HfInference | undefined = undefined;
 	let abort: AbortController | undefined = undefined;
 	// Check config and compile template
 	onMount(() => {
 		const config = model.config;
@@ -84,8 +86,6 @@
 			error = `Invalid chat template: "${(e as Error).message}"`;
 			return;
 		}
-		inferenceClient = new HfInference(apiToken);
 	});
 	async function handleNewMessage(): Promise<void> {
@@ -165,6 +165,7 @@
 				signal: abort?.signal,
 				use_cache: useCache || !$isLoggedIn,
 				wait_for_model: withModelLoading,
 			} satisfies Options;
 			tgiSupportedModels = await getTgiSupportedModels(apiUrl);

 	let inferenceClient: HfInference | undefined = undefined;
 	let abort: AbortController | undefined = undefined;
+	$: inferenceClient = new HfInference(apiToken);
 	// Check config and compile template
 	onMount(() => {
 		const config = model.config;
 			error = `Invalid chat template: "${(e as Error).message}"`;
 			return;
 		}
 	});
 	async function handleNewMessage(): Promise<void> {
 				signal: abort?.signal,
 				use_cache: useCache || !$isLoggedIn,
 				wait_for_model: withModelLoading,
+				retry_on_error: false,
 			} satisfies Options;
 			tgiSupportedModels = await getTgiSupportedModels(apiUrl);

packages/widgets/src/routes/+page.svelte CHANGED Viewed

@@ -28,25 +28,70 @@
 				apiToken = token;
 			}
 		}
 	});
 	const models: ModelData[] = [
 		{
-			id: "mistralai/Mistral-7B-Instruct-v0.2",
 			pipeline_tag: "text-generation",
 			tags: ["conversational"],
 			inference: InferenceDisplayability.Yes,
 			config: {
-				architectures: ["MistralForCausalLM"],
-				model_type: "mistral",
 				tokenizer_config: {
 					chat_template:
-						"{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}",
-					use_default_system_prompt: false,
 					bos_token: "<s>",
-					eos_token: "</s>",
 					unk_token: "<unk>",
-					pad_token: null,
 				},
 			},
 			widgetData: [

 				apiToken = token;
 			}
 		}
+		isLoggedIn.set(true);
 	});
 	const models: ModelData[] = [
 		{
+			id: "meta-llama/Meta-Llama-3-8B-Instruct",
 			pipeline_tag: "text-generation",
 			tags: ["conversational"],
 			inference: InferenceDisplayability.Yes,
 			config: {
+				architectures: ["LlamaForCausalLM"],
+				model_type: "llama",
 				tokenizer_config: {
 					chat_template:
+						"{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
+					bos_token: "<|begin_of_text|>",
+					eos_token: "<|end_of_text|>",
+				},
+			},
+			widgetData: [
+				{ text: "This is a text-only example", example_title: "Text only" },
+				{
+					messages: [{ content: "Please exlain QCD in very few words", role: "user" }],
+					example_title: "Chat messages",
+				},
+				{
+					messages: [{ content: "Please exlain QCD in very few words", role: "user" }],
+					output: {
+						text: "QCD is the physics of strong force and small particles.",
+					},
+					example_title: "Chat messages with Output",
+				},
+				{
+					text: "Explain QCD in one short sentence.",
+					output: {
+						text: "QCD is the physics of strong force and small particles.",
+					},
+					example_title: "Text only with Output",
+				},
+				{
+					example_title: "Invalid example - unsupported role",
+					messages: [
+						{ role: "system", content: "This will fail because of the chat template" },
+						{ role: "user", content: "What's your favorite condiment?" },
+					],
+				},
+			],
+		},
+		{
+			id: "microsoft/Phi-3-mini-128k-instruct",
+			pipeline_tag: "text-generation",
+			tags: ["conversational"],
+			inference: InferenceDisplayability.Yes,
+			config: {
+				architectures: ["Phi3ForCausalLM"],
+				model_type: "phi3",
+				tokenizer_config: {
 					bos_token: "<s>",
+					chat_template:
+						"{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') %}{{'<|user|>' + '\n' + message['content'] + '<|end|>' + '\n' + '<|assistant|>' + '\n'}}{% elif (message['role'] == 'assistant') %}{{message['content'] + '<|end|>' + '\n'}}{% endif %}{% endfor %}",
+					eos_token: "<|endoftext|>",
+					pad_token: "<|endoftext|>",
 					unk_token: "<unk>",
 				},
 			},
 			widgetData: [