Upload model

Files changed (7) hide show

README.md CHANGED Viewed

@@ -1,3 +1,43 @@
 ---
 license: apache-2.0
 ---

 ---
+language:
+- en
+- fr
+- ro
+- de
+datasets:
+- c4
+tags:
+- int8
+- summarization
+- translation
 license: apache-2.0
 ---
+## [t5-small](https://huggingface.co/t5-small) exported to the ONNX format and dynamically quantized.
+## Model description
+[T5](https://huggingface.co/docs/transformers/model_doc/t5#t5) is an encoder-decoder model pre-trained on a multi-task mixture of unsupervised and supervised tasks and for which each task is converted into a text-to-text format.
+For more information, please take a look at the original paper.
+Paper: [Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer](https://arxiv.org/pdf/1910.10683.pdf)
+Authors: *Colin Raffel, Noam Shazeer, Adam Roberts, Katherine Lee, Sharan Narang, Michael Matena, Yanqi Zhou, Wei Li, Peter J. Liu*
+## Usage example
+You can use this model with Transformers *pipeline*.
+```python
+from transformers import AutoTokenizer, pipeline
+from optimum.onnxruntime import ORTModelForSeq2SeqLM
+tokenizer = AutoTokenizer.from_pretrained("echarlaix/t5-small-dynamic")
+model = ORTModelForSeq2SeqLM.from_pretrained("echarlaix/t5-small-dynamic")
+translator = pipeline("translation_en_to_fr", model=model, tokenizer=tokenizer)
+text = "He never went out without a book under his arm, and he often came back with two."
+results = translator(text)
+print(results)
+```

config.json ADDED Viewed

+{
+  "_name_or_path": "t5-small",
+  "architectures": [
+    "T5WithLMHeadModel"
+  ],
+  "d_ff": 2048,
+  "d_kv": 64,
+  "d_model": 512,
+  "decoder_start_token_id": 0,
+  "dropout_rate": 0.1,
+  "eos_token_id": 1,
+  "feed_forward_proj": "relu",
+  "initializer_factor": 1.0,
+  "is_encoder_decoder": true,
+  "layer_norm_epsilon": 1e-06,
+  "model_type": "t5",
+  "n_positions": 512,
+  "num_decoder_layers": 6,
+  "num_heads": 8,
+  "num_layers": 6,
+  "output_past": true,
+  "pad_token_id": 0,
+  "relative_attention_max_distance": 128,
+  "relative_attention_num_buckets": 32,
+  "task_specific_params": {
+    "summarization": {
+      "early_stopping": true,
+      "length_penalty": 2.0,
+      "max_length": 200,
+      "min_length": 30,
+      "no_repeat_ngram_size": 3,
+      "num_beams": 4,
+      "prefix": "summarize: "
+    },
+    "translation_en_to_de": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to German: "
+    },
+    "translation_en_to_fr": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to French: "
+    },
+    "translation_en_to_ro": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to Romanian: "
+    }
+  },
+  "transformers_version": "4.19.0.dev0",
+  "use_cache": true,
+  "vocab_size": 32128
+}

decoder_model.onnx ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:8a08e2c556fc3c0929c040a53a68cc3e8e0027ecb42056925577cc52d9f06cdb
+size 58259052

decoder_with_past_model.onnx ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:9d0cf75b07500325987ba6396abf76fb8ff971e32ee1e76bf010492f6c91f2fe
+size 55103207

encoder_model.onnx ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:60328e59847197fb74cec4331fdf96fe08071ec4cf15a35e873a5ee93d3a641c
+size 35433618

ort_config.json ADDED Viewed

+{
+  "opset": 13,
+  "optimization": {},
+  "optimum_version": "1.4.0.dev0",
+  "quantization": {
+    "activations_dtype": "QUInt8",
+    "activations_symmetric": false,
+    "format": "QOperator",
+    "is_static": false,
+    "mode": "IntegerOps",
+    "nodes_to_exclude": [],
+    "nodes_to_quantize": [],
+    "operators_to_quantize": [
+      "MatMul",
+      "Add",
+      "Gather",
+      "Transpose"
+    ],
+    "per_channel": false,
+    "qdq_add_pair_to_weight": false,
+    "qdq_dedicated_pair": false,
+    "qdq_op_type_per_channel_support_to_axis": {
+      "MatMul": 1
+    },
+    "reduce_range": false,
+    "weights_dtype": "QInt8",
+    "weights_symmetric": true
+  },
+  "transformers_version": "4.20.1",
+  "use_external_data_format": false
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff