Upload folder using huggingface_hub

Browse files

Files changed (8) hide show

config.json +1 -2
generation_config.json +1 -1
onnx/decoder_model_merged.onnx +1 -1
onnx/decoder_model_merged_quantized.onnx +2 -2
onnx/decoder_model_quantized.onnx +2 -2
onnx/decoder_with_past_model_quantized.onnx +2 -2
onnx/encoder_model_quantized.onnx +2 -2
quant_config.json +89 -0

config.json CHANGED Viewed

@@ -145,8 +145,7 @@
     50361,
     50362
   ],
-  "torch_dtype": "float32",
-  "transformers_version": "4.29.2",
   "use_cache": true,
   "use_weighted_layer_sum": false,
   "vocab_size": 51865

     50361,
     50362
   ],
+  "transformers_version": "4.30.2",
   "use_cache": true,
   "use_weighted_layer_sum": false,
   "vocab_size": 51865

generation_config.json CHANGED Viewed

@@ -217,5 +217,5 @@
     "transcribe": 50359,
     "translate": 50358
   },
-  "transformers_version": "4.29.2"
 }

     "transcribe": 50359,
     "translate": 50358
   },
+  "transformers_version": "4.30.2"
 }

onnx/decoder_model_merged.onnx CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ca286b0628b2bf1a25b8f7d36640926d3394b24b81c6dfbd1e380920d72d92c1
 size 198197535

 version https://git-lfs.github.com/spec/v1
+oid sha256:3a3f5dfc9cf89e4910a1be6574ed519181eefc743fd125ddcd4a97632fd44e94
 size 198197535

onnx/decoder_model_merged_quantized.onnx CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a5e98c24f94eeefadebec026324a67baec6ff3ea504bb4f140134b95d8eb6d00
-size 50943703

 version https://git-lfs.github.com/spec/v1
+oid sha256:3630e9e28169f8664cdaed6d2b0f00abae079bae7e4336c249308022635721b4
+size 50584327

onnx/decoder_model_quantized.onnx CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c2741fcc4bf68bebccf3bd5c607a0c541d8ab0938444d6deb0f979ebfbed4367
-size 50707856

 version https://git-lfs.github.com/spec/v1
+oid sha256:6ae32f7a6c43d40740199b20c1720539f3d6dcd73d1bc0ff0c9d22f4386bdc37
+size 50348480

onnx/decoder_with_past_model_quantized.onnx CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b0c3cfb8a019b7524a3d9c7ca8d47bc6a7ee3449c8413f105e398c1be11cbae0
-size 49458544

 version https://git-lfs.github.com/spec/v1
+oid sha256:ac92773054aca908fca90d7e84a8c97a995526e9fe6292cf00c6e77ff7d223dd
+size 49114568

onnx/encoder_model_quantized.onnx CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:00c5456c76e0fefded6980221be42f67470feebf6f7b510ea3e645c42b6bbf71
-size 10182475

 version https://git-lfs.github.com/spec/v1
+oid sha256:ca9d7bb2836193704b7e2435e3bbadbed985ac3a79ab7406b244b8865ab1a5c0
+size 10113248

quant_config.json ADDED Viewed

	@@ -0,0 +1,89 @@

+{
+    "per_channel": false,
+    "reduce_range": false,
+    "per_model_config": {
+        "encoder_model": {
+            "op_types": [
+                "Transpose",
+                "Add",
+                "Pow",
+                "Reshape",
+                "Div",
+                "Sqrt",
+                "Gather",
+                "Shape",
+                "Conv",
+                "Mul",
+                "ReduceMean",
+                "Unsqueeze",
+                "Sub",
+                "Concat",
+                "Constant",
+                "MatMul",
+                "Erf",
+                "Softmax"
+            ],
+            "weight_type": "QUInt8"
+        },
+        "decoder_model": {
+            "op_types": [
+                "Reshape",
+                "Equal",
+                "MatMul",
+                "Constant",
+                "Add",
+                "Pow",
+                "ReduceMean",
+                "Less",
+                "Cast",
+                "Concat",
+                "ConstantOfShape",
+                "Sqrt",
+                "Gather",
+                "Div",
+                "Sub",
+                "Where",
+                "Transpose",
+                "Shape",
+                "Range",
+                "Squeeze",
+                "Mul",
+                "Expand",
+                "Unsqueeze",
+                "Slice",
+                "Erf",
+                "Softmax"
+            ],
+            "weight_type": "QInt8"
+        },
+        "decoder_model_merged": {
+            "op_types": [
+                "If"
+            ],
+            "weight_type": "QInt8"
+        },
+        "decoder_with_past_model": {
+            "op_types": [
+                "Transpose",
+                "Add",
+                "Reshape",
+                "Pow",
+                "Sqrt",
+                "Gather",
+                "Div",
+                "Shape",
+                "ReduceMean",
+                "Mul",
+                "Unsqueeze",
+                "Sub",
+                "MatMul",
+                "Constant",
+                "Concat",
+                "Slice",
+                "Erf",
+                "Softmax"
+            ],
+            "weight_type": "QInt8"
+        }
+    }
+}