Upload 8 files

Browse files

Files changed (8) hide show

README.md +62 -0
config.json +28 -0
generation_config.json +7 -0
onnx/decoder_model_merged.onnx +3 -0
quantize_config.json +38 -0
special_tokens_map.json +30 -0
tokenizer.json +0 -0
tokenizer_config.json +42 -0

README.md ADDED Viewed

	@@ -0,0 +1,62 @@

+---
+library_name: transformers.js
+---
+https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0 with ONNX weights to be compatible with Transformers.js.
+## Usage (Transformers.js)
+If you haven't already, you can install the [Transformers.js](https://huggingface.co/docs/transformers.js) JavaScript library from [NPM](https://www.npmjs.com/package/@xenova/transformers) using:
+```bash
+npm i @xenova/transformers
+```
+**Example:** Text generation with `Xenova/TinyLlama-1.1B-Chat-v1.0`.
+```js
+import { pipeline } from '@xenova/transformers';
+// Create a text-generation pipeline
+const generator = await pipeline('text-generation', 'Xenova/TinyLlama-1.1B-Chat-v1.0');
+// Define the list of messages
+const messages = [
+    { "role": "system", "content": "You are a friendly assistant." },
+    { "role": "user", "content": "Explain thermodynamics in simple terms." },
+]
+// Construct the prompt
+const prompt = generator.tokenizer.apply_chat_template(messages, {
+    tokenize: false, add_generation_prompt: true,
+});
+// Generate a response
+const result = await generator(prompt, {
+    max_new_tokens: 256,
+    temperature: 0.7,
+    do_sample: true,
+    top_k: 50,
+});
+console.log(result);
+// [
+//   {
+//     generated_text: '<|system|>\n' +
+//       'You are a friendly assistant.\n' +
+//       '<|user|>\n' +
+//       'Explain thermodynamics in simple terms.\n' +
+//       '<|assistant|>\n' +
+//       'Thermodynamics is a branch of physics that deals with the study of heat and its transfer, including the relationship between matter and energy, the concept of chemical equilibrium, and the effects of temperature on chemical and physical processes. In thermodynamics, the properties of matter (such as heat capacity, specific heat, and entropy) are considered and their behavior is studied in relation to the temperature.\n\n' +
+//       'Here are some simple steps to explain thermodynamics in simple terms:\n\n' +
+//       '1. Energy: Energy is the ability to do work. It is the ability to transfer heat or do other thermodynamic processes. Some common forms of energy are heat, light, electricity, and chemical energy.\n\n' +
+//       '2. Heat: Heat is a form of energy that can be transferred from one place to another. It is the ability to induce a change in the temperature of a body or system.\n\n' +
+//       '3. Heat capacity: Heat capacity is the amount of heat required to raise the temperature of a system by 1 degree Kelvin (K). It is a measure of the ability of a material to absorb and dissipate thermal energy.\n\n' +
+//       '4. Specific heat: Specific heat is the heat required to raise the'
+//   }
+// ]
+```
+---
+Note: Having a separate repo for ONNX weights is intended to be a temporary solution until WebML gains more traction. If you would like to make your models web-ready, we recommend converting to ONNX using [🤗 Optimum](https://huggingface.co/docs/optimum/index) and structuring your repo like this one (with ONNX weights located in a subfolder named `onnx`).

config.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "_name_or_path": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+  "architectures": [
+    "LlamaForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "hidden_act": "silu",
+  "hidden_size": 2048,
+  "initializer_range": 0.02,
+  "intermediate_size": 5632,
+  "max_position_embeddings": 2048,
+  "model_type": "llama",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 22,
+  "num_key_value_heads": 4,
+  "pretraining_tp": 1,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": null,
+  "rope_theta": 10000.0,
+  "tie_word_embeddings": false,
+  "transformers_version": "4.36.2",
+  "use_cache": true,
+  "vocab_size": 32000,
+  "precision": "float16"
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "max_length": 2048,
+  "pad_token_id": 0,
+  "transformers_version": "4.36.2"
+}

onnx/decoder_model_merged.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7ce0686d0d5898230e1b3519485074ffebe8ebb9fe7f6c4f55fff3e537b2dfa5
+size 714750596

quantize_config.json ADDED Viewed

	@@ -0,0 +1,38 @@

+{
+    "per_channel": false,
+    "reduce_range": false,
+    "per_model_config": {
+        "model": {
+            "op_types": [
+                "ReduceMean",
+                "Range",
+                "Where",
+                "Sqrt",
+                "Cast",
+                "Expand",
+                "Concat",
+                "Sub",
+                "Squeeze",
+                "Neg",
+                "Constant",
+                "Add",
+                "Transpose",
+                "Unsqueeze",
+                "Equal",
+                "ConstantOfShape",
+                "Softmax",
+                "Gather",
+                "Pow",
+                "Reshape",
+                "Sigmoid",
+                "Slice",
+                "Div",
+                "MatMul",
+                "Less",
+                "Shape",
+                "Mul"
+            ],
+            "weight_type": "QInt8"
+        }
+    }
+}

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,42 @@

+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n'  + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "legacy": false,
+  "model_max_length": 2048,
+  "pad_token": "</s>",
+  "padding_side": "right",
+  "sp_model_kwargs": {},
+  "tokenizer_class": "LlamaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": false
+}