add model

Files changed (9) hide show

README.md +27 -0
config.json +31 -0
eval_results.json +3 -0
model.onnx +3 -0
no_qdq.png +0 -0
ort_config.json +190 -0
qdq.png +0 -0
tokenizer_config.json +1 -0
vocab.txt +0 -0

README.md CHANGED Viewed

@@ -1,3 +1,30 @@
 ---
 license: apache-2.0
 ---

 ---
 license: apache-2.0
+datasets:
+- sst2
+- glue
 ---
+This model is a fork of https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english , quantized using static Post-Training Quantization (PTQ) with ONNX Runtime and 🤗 Optimum library.
+It achieves 0.896 accuracy on the validation set.
+This model uses the ONNX Runtime static quantization configurations `qdq_add_pair_to_weight=True` and `qdq_dedicated_pair=True`, so that **weights are stored in fp32**, and full Quantize + Dequantize nodes are inserted for the weights, compared to the default where weights are stored in int8 and only a Dequantize node is inserted for weights. Moreover, here QDQ pairs have a single output. For more reference, see the documentation: https://github.com/microsoft/onnxruntime/blob/ade0d291749144e1962884a9cfa736d4e1e80ff8/onnxruntime/python/tools/quantization/quantize.py#L432-L441
+This is useful to later load a static quantized model in TensorRT.
+To load this model:
+```python
+from optimum.onnxruntime import ORTModelForSequenceClassification
+model = ORTModelForSequenceClassification.from_pretrained("fxmarty/distilbert-base-uncased-finetuned-sst-2-english-int8-static")
+```
+<details>
+<summary>Weights stored as int8, only DequantizeLinear nodes (model here: https://huggingface.co/fxmarty/distilbert-base-uncased-finetuned-sst-2-english-int8-static)</summary>
+![DQ only](./no_qdq.png)
+</details>
+<details>
+<summary>Weights stored as fp32, only QuantizeLinear + DequantizeLinear nodes (this model)</summary>
+![QDQ](./qdq.png)
+</details>

config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "finetuning_task": "sst-2",
+  "hidden_dim": 3072,
+  "id2label": {
+    "0": "NEGATIVE",
+    "1": "POSITIVE"
+  },
+  "initializer_range": 0.02,
+  "label2id": {
+    "NEGATIVE": 0,
+    "POSITIVE": 1
+  },
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "output_past": true,
+  "pad_token_id": 0,
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "vocab_size": 30522
+}

eval_results.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+    "accuracy": 0.8967889908256881
+}

model.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c3baf2c754c909076fb4deb57c5f5eae163b147532c09f2efe48f7eb3a0cf98c
+size 267991623

no_qdq.png ADDED Viewed

ort_config.json ADDED Viewed

	@@ -0,0 +1,190 @@

+{
+  "opset": null,
+  "optimization": {},
+  "optimum_version": "1.4.1.dev0",
+  "quantization": {
+    "activations_dtype": "QInt8",
+    "activations_symmetric": false,
+    "format": "QDQ",
+    "is_static": true,
+    "mode": "QLinearOps",
+    "nodes_to_exclude": [
+      "Sub_471",
+      "Add_504",
+      "Pow_333",
+      "Add_241",
+      "Erf_239",
+      "Add_418",
+      "Mul_479",
+      "ReduceMean_310",
+      "Div_502",
+      "ReduceMean_142",
+      "Div_256",
+      "Sqrt_255",
+      "ReduceMean_498",
+      "Mul_162",
+      "Add_159",
+      "ReduceMean_494",
+      "Add_336",
+      "Div_396",
+      "ReduceMean_388",
+      "Erf_321",
+      "Sqrt_313",
+      "Mul_242",
+      "Mul_397",
+      "ReduceMean_170",
+      "ReduceMean_228",
+      "Mul_585",
+      "Add_141",
+      "ReduceMean_556",
+      "Sub_577",
+      "Div_338",
+      "Add_340",
+      "Add_165",
+      "Add_94",
+      "Add_305",
+      "ReduceMean_392",
+      "Add_387",
+      "Mul_160",
+      "Div_156",
+      "Div_92",
+      "ReduceMean_580",
+      "Mul_490",
+      "ReduceMean_412",
+      "ReduceMean_88",
+      "Mul_339",
+      "Div_314",
+      "Pow_579",
+      "Add_586",
+      "Mul_324",
+      "Pow_555",
+      "Div_420",
+      "Sub_389",
+      "ReduceMean_416",
+      "Mul_326",
+      "Div_238",
+      "Mul_572",
+      "ReduceMean_84",
+      "Pow_251",
+      "Add_558",
+      "Sub_331",
+      "Sqrt_149",
+      "Add_487",
+      "Add_398",
+      "Sub_143",
+      "Add_469",
+      "Add_551",
+      "Mul_503",
+      "Sub_553",
+      "Sqrt_231",
+      "Mul_175",
+      "Pow_169",
+      "Pow_473",
+      "ReduceMean_474",
+      "Sqrt_395",
+      "Add_312",
+      "Add_422",
+      "Erf_485",
+      "Sub_495",
+      "Add_148",
+      "Pow_415",
+      "Pow_497",
+      "Sub_167",
+      "Erf_403",
+      "Div_150",
+      "Pow_227",
+      "Div_174",
+      "Sub_413",
+      "ReduceMean_252",
+      "Add_230",
+      "Div_484",
+      "Mul_93",
+      "Mul_151",
+      "Add_394",
+      "Add_493",
+      "Add_247",
+      "Mul_421",
+      "Sub_225",
+      "Div_560",
+      "Sqrt_583",
+      "ReduceMean_306",
+      "Add_476",
+      "Sqrt_419",
+      "Sub_85",
+      "Mul_406",
+      "ReduceMean_166",
+      "Mul_570",
+      "Mul_315",
+      "ReduceMean_576",
+      "Pow_145",
+      "Mul_408",
+      "Add_258",
+      "Add_405",
+      "Add_575",
+      "ReduceMean_470",
+      "Mul_561",
+      "Pow_87",
+      "Add_254",
+      "Add_562",
+      "Sqrt_559",
+      "Pow_309",
+      "Add_411",
+      "Sqrt_91",
+      "Mul_257",
+      "Add_500",
+      "Add_83",
+      "Add_323",
+      "Sqrt_337",
+      "Div_584",
+      "Mul_488",
+      "Sqrt_477",
+      "ReduceMean_552",
+      "Div_320",
+      "Add_223",
+      "Add_329",
+      "Add_176",
+      "Add_316",
+      "Div_232",
+      "Add_480",
+      "Mul_244",
+      "ReduceMean_146",
+      "Add_90",
+      "Erf_157",
+      "Sub_307",
+      "ReduceMean_224",
+      "Erf_567",
+      "ReduceMean_330",
+      "Add_569",
+      "Add_582",
+      "Mul_233",
+      "Sqrt_501",
+      "Sqrt_173",
+      "ReduceMean_248",
+      "Pow_391",
+      "Div_402",
+      "Sub_249",
+      "Add_172",
+      "ReduceMean_334",
+      "Div_566",
+      "Div_478",
+      "Add_152",
+      "Add_234"
+    ],
+    "nodes_to_quantize": [],
+    "operators_to_quantize": [
+      "MatMul",
+      "Add"
+    ],
+    "per_channel": false,
+    "qdq_add_pair_to_weight": true,
+    "qdq_dedicated_pair": true,
+    "qdq_op_type_per_channel_support_to_axis": {
+      "MatMul": 1
+    },
+    "reduce_range": false,
+    "weights_dtype": "QInt8",
+    "weights_symmetric": true
+  },
+  "transformers_version": "4.23.0.dev0",
+  "use_external_data_format": false
+}

qdq.png ADDED Viewed

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"model_max_length": 512, "do_lower_case": true}

vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff