Upload model
Browse files- README.md +40 -0
- config.json +57 -0
- decoder_model.onnx +3 -0
- decoder_with_past_model.onnx +3 -0
- encoder_model.onnx +3 -0
- ort_config.json +31 -0
- tokenizer.json +0 -0
README.md
CHANGED
@@ -1,3 +1,43 @@
|
|
1 |
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
license: apache-2.0
|
3 |
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
+
language:
|
3 |
+
- en
|
4 |
+
- fr
|
5 |
+
- ro
|
6 |
+
- de
|
7 |
+
datasets:
|
8 |
+
- c4
|
9 |
+
tags:
|
10 |
+
- int8
|
11 |
+
- summarization
|
12 |
+
- translation
|
13 |
+
|
14 |
license: apache-2.0
|
15 |
---
|
16 |
+
|
17 |
+
## [t5-small](https://huggingface.co/t5-small) exported to the ONNX format and dynamically quantized.
|
18 |
+
|
19 |
+
## Model description
|
20 |
+
|
21 |
+
[T5](https://huggingface.co/docs/transformers/model_doc/t5#t5) is an encoder-decoder model pre-trained on a multi-task mixture of unsupervised and supervised tasks and for which each task is converted into a text-to-text format.
|
22 |
+
|
23 |
+
For more information, please take a look at the original paper.
|
24 |
+
|
25 |
+
Paper: [Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer](https://arxiv.org/pdf/1910.10683.pdf)
|
26 |
+
|
27 |
+
Authors: *Colin Raffel, Noam Shazeer, Adam Roberts, Katherine Lee, Sharan Narang, Michael Matena, Yanqi Zhou, Wei Li, Peter J. Liu*
|
28 |
+
|
29 |
+
|
30 |
+
## Usage example
|
31 |
+
|
32 |
+
You can use this model with Transformers *pipeline*.
|
33 |
+
|
34 |
+
```python
|
35 |
+
from transformers import AutoTokenizer, pipeline
|
36 |
+
from optimum.onnxruntime import ORTModelForSeq2SeqLM
|
37 |
+
tokenizer = AutoTokenizer.from_pretrained("echarlaix/t5-small-dynamic")
|
38 |
+
model = ORTModelForSeq2SeqLM.from_pretrained("echarlaix/t5-small-dynamic")
|
39 |
+
translator = pipeline("translation_en_to_fr", model=model, tokenizer=tokenizer)
|
40 |
+
text = "He never went out without a book under his arm, and he often came back with two."
|
41 |
+
results = translator(text)
|
42 |
+
print(results)
|
43 |
+
```
|
config.json
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "t5-small",
|
3 |
+
"architectures": [
|
4 |
+
"T5WithLMHeadModel"
|
5 |
+
],
|
6 |
+
"d_ff": 2048,
|
7 |
+
"d_kv": 64,
|
8 |
+
"d_model": 512,
|
9 |
+
"decoder_start_token_id": 0,
|
10 |
+
"dropout_rate": 0.1,
|
11 |
+
"eos_token_id": 1,
|
12 |
+
"feed_forward_proj": "relu",
|
13 |
+
"initializer_factor": 1.0,
|
14 |
+
"is_encoder_decoder": true,
|
15 |
+
"layer_norm_epsilon": 1e-06,
|
16 |
+
"model_type": "t5",
|
17 |
+
"n_positions": 512,
|
18 |
+
"num_decoder_layers": 6,
|
19 |
+
"num_heads": 8,
|
20 |
+
"num_layers": 6,
|
21 |
+
"output_past": true,
|
22 |
+
"pad_token_id": 0,
|
23 |
+
"relative_attention_max_distance": 128,
|
24 |
+
"relative_attention_num_buckets": 32,
|
25 |
+
"task_specific_params": {
|
26 |
+
"summarization": {
|
27 |
+
"early_stopping": true,
|
28 |
+
"length_penalty": 2.0,
|
29 |
+
"max_length": 200,
|
30 |
+
"min_length": 30,
|
31 |
+
"no_repeat_ngram_size": 3,
|
32 |
+
"num_beams": 4,
|
33 |
+
"prefix": "summarize: "
|
34 |
+
},
|
35 |
+
"translation_en_to_de": {
|
36 |
+
"early_stopping": true,
|
37 |
+
"max_length": 300,
|
38 |
+
"num_beams": 4,
|
39 |
+
"prefix": "translate English to German: "
|
40 |
+
},
|
41 |
+
"translation_en_to_fr": {
|
42 |
+
"early_stopping": true,
|
43 |
+
"max_length": 300,
|
44 |
+
"num_beams": 4,
|
45 |
+
"prefix": "translate English to French: "
|
46 |
+
},
|
47 |
+
"translation_en_to_ro": {
|
48 |
+
"early_stopping": true,
|
49 |
+
"max_length": 300,
|
50 |
+
"num_beams": 4,
|
51 |
+
"prefix": "translate English to Romanian: "
|
52 |
+
}
|
53 |
+
},
|
54 |
+
"transformers_version": "4.19.0.dev0",
|
55 |
+
"use_cache": true,
|
56 |
+
"vocab_size": 32128
|
57 |
+
}
|
decoder_model.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8a08e2c556fc3c0929c040a53a68cc3e8e0027ecb42056925577cc52d9f06cdb
|
3 |
+
size 58259052
|
decoder_with_past_model.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9d0cf75b07500325987ba6396abf76fb8ff971e32ee1e76bf010492f6c91f2fe
|
3 |
+
size 55103207
|
encoder_model.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:60328e59847197fb74cec4331fdf96fe08071ec4cf15a35e873a5ee93d3a641c
|
3 |
+
size 35433618
|
ort_config.json
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"opset": 13,
|
3 |
+
"optimization": {},
|
4 |
+
"optimum_version": "1.4.0.dev0",
|
5 |
+
"quantization": {
|
6 |
+
"activations_dtype": "QUInt8",
|
7 |
+
"activations_symmetric": false,
|
8 |
+
"format": "QOperator",
|
9 |
+
"is_static": false,
|
10 |
+
"mode": "IntegerOps",
|
11 |
+
"nodes_to_exclude": [],
|
12 |
+
"nodes_to_quantize": [],
|
13 |
+
"operators_to_quantize": [
|
14 |
+
"MatMul",
|
15 |
+
"Add",
|
16 |
+
"Gather",
|
17 |
+
"Transpose"
|
18 |
+
],
|
19 |
+
"per_channel": false,
|
20 |
+
"qdq_add_pair_to_weight": false,
|
21 |
+
"qdq_dedicated_pair": false,
|
22 |
+
"qdq_op_type_per_channel_support_to_axis": {
|
23 |
+
"MatMul": 1
|
24 |
+
},
|
25 |
+
"reduce_range": false,
|
26 |
+
"weights_dtype": "QInt8",
|
27 |
+
"weights_symmetric": true
|
28 |
+
},
|
29 |
+
"transformers_version": "4.20.1",
|
30 |
+
"use_external_data_format": false
|
31 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|