schmuell commited on
Commit
b57bc05
1 Parent(s): 3826805

Upload 8 files

Browse files
README.md ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers.js
3
+ ---
4
+
5
+ https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0 with ONNX weights to be compatible with Transformers.js.
6
+
7
+
8
+ ## Usage (Transformers.js)
9
+
10
+ If you haven't already, you can install the [Transformers.js](https://huggingface.co/docs/transformers.js) JavaScript library from [NPM](https://www.npmjs.com/package/@xenova/transformers) using:
11
+ ```bash
12
+ npm i @xenova/transformers
13
+ ```
14
+
15
+ **Example:** Text generation with `Xenova/TinyLlama-1.1B-Chat-v1.0`.
16
+
17
+ ```js
18
+ import { pipeline } from '@xenova/transformers';
19
+
20
+ // Create a text-generation pipeline
21
+ const generator = await pipeline('text-generation', 'Xenova/TinyLlama-1.1B-Chat-v1.0');
22
+
23
+ // Define the list of messages
24
+ const messages = [
25
+ { "role": "system", "content": "You are a friendly assistant." },
26
+ { "role": "user", "content": "Explain thermodynamics in simple terms." },
27
+ ]
28
+
29
+ // Construct the prompt
30
+ const prompt = generator.tokenizer.apply_chat_template(messages, {
31
+ tokenize: false, add_generation_prompt: true,
32
+ });
33
+
34
+ // Generate a response
35
+ const result = await generator(prompt, {
36
+ max_new_tokens: 256,
37
+ temperature: 0.7,
38
+ do_sample: true,
39
+ top_k: 50,
40
+ });
41
+ console.log(result);
42
+ // [
43
+ // {
44
+ // generated_text: '<|system|>\n' +
45
+ // 'You are a friendly assistant.\n' +
46
+ // '<|user|>\n' +
47
+ // 'Explain thermodynamics in simple terms.\n' +
48
+ // '<|assistant|>\n' +
49
+ // 'Thermodynamics is a branch of physics that deals with the study of heat and its transfer, including the relationship between matter and energy, the concept of chemical equilibrium, and the effects of temperature on chemical and physical processes. In thermodynamics, the properties of matter (such as heat capacity, specific heat, and entropy) are considered and their behavior is studied in relation to the temperature.\n\n' +
50
+ // 'Here are some simple steps to explain thermodynamics in simple terms:\n\n' +
51
+ // '1. Energy: Energy is the ability to do work. It is the ability to transfer heat or do other thermodynamic processes. Some common forms of energy are heat, light, electricity, and chemical energy.\n\n' +
52
+ // '2. Heat: Heat is a form of energy that can be transferred from one place to another. It is the ability to induce a change in the temperature of a body or system.\n\n' +
53
+ // '3. Heat capacity: Heat capacity is the amount of heat required to raise the temperature of a system by 1 degree Kelvin (K). It is a measure of the ability of a material to absorb and dissipate thermal energy.\n\n' +
54
+ // '4. Specific heat: Specific heat is the heat required to raise the'
55
+ // }
56
+ // ]
57
+
58
+ ```
59
+
60
+ ---
61
+
62
+ Note: Having a separate repo for ONNX weights is intended to be a temporary solution until WebML gains more traction. If you would like to make your models web-ready, we recommend converting to ONNX using [🤗 Optimum](https://huggingface.co/docs/optimum/index) and structuring your repo like this one (with ONNX weights located in a subfolder named `onnx`).
config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
3
+ "architectures": [
4
+ "LlamaForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 1,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 2048,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 5632,
14
+ "max_position_embeddings": 2048,
15
+ "model_type": "llama",
16
+ "num_attention_heads": 32,
17
+ "num_hidden_layers": 22,
18
+ "num_key_value_heads": 4,
19
+ "pretraining_tp": 1,
20
+ "rms_norm_eps": 1e-05,
21
+ "rope_scaling": null,
22
+ "rope_theta": 10000.0,
23
+ "tie_word_embeddings": false,
24
+ "transformers_version": "4.36.2",
25
+ "use_cache": true,
26
+ "vocab_size": 32000,
27
+ "precision": "float16"
28
+ }
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 1,
3
+ "eos_token_id": 2,
4
+ "max_length": 2048,
5
+ "pad_token_id": 0,
6
+ "transformers_version": "4.36.2"
7
+ }
onnx/decoder_model_merged.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ce0686d0d5898230e1b3519485074ffebe8ebb9fe7f6c4f55fff3e537b2dfa5
3
+ size 714750596
quantize_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "per_channel": false,
3
+ "reduce_range": false,
4
+ "per_model_config": {
5
+ "model": {
6
+ "op_types": [
7
+ "ReduceMean",
8
+ "Range",
9
+ "Where",
10
+ "Sqrt",
11
+ "Cast",
12
+ "Expand",
13
+ "Concat",
14
+ "Sub",
15
+ "Squeeze",
16
+ "Neg",
17
+ "Constant",
18
+ "Add",
19
+ "Transpose",
20
+ "Unsqueeze",
21
+ "Equal",
22
+ "ConstantOfShape",
23
+ "Softmax",
24
+ "Gather",
25
+ "Pow",
26
+ "Reshape",
27
+ "Sigmoid",
28
+ "Slice",
29
+ "Div",
30
+ "MatMul",
31
+ "Less",
32
+ "Shape",
33
+ "Mul"
34
+ ],
35
+ "weight_type": "QInt8"
36
+ }
37
+ }
38
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "added_tokens_decoder": {
5
+ "0": {
6
+ "content": "<unk>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "1": {
14
+ "content": "<s>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "2": {
22
+ "content": "</s>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ }
29
+ },
30
+ "bos_token": "<s>",
31
+ "chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
32
+ "clean_up_tokenization_spaces": false,
33
+ "eos_token": "</s>",
34
+ "legacy": false,
35
+ "model_max_length": 2048,
36
+ "pad_token": "</s>",
37
+ "padding_side": "right",
38
+ "sp_model_kwargs": {},
39
+ "tokenizer_class": "LlamaTokenizer",
40
+ "unk_token": "<unk>",
41
+ "use_default_system_prompt": false
42
+ }