Upload 15 files

Browse files

Files changed (5) hide show

onnx/decoder_model_merged.onnx +3 -0
onnx/decoder_model_merged_quantized.onnx +3 -0
onnx/decoder_with_past_model.onnx +3 -0
onnx/decoder_with_past_model_quantized.onnx +3 -0
quantize_config.json +90 -29

onnx/decoder_model_merged.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d603904cb811150731ec5d2d4680e7b3a79bd728b6161a9688563ca92fa403c5
+size 387342586

onnx/decoder_model_merged_quantized.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0954635de52a206823c20847ff4793622a7b9c0ef1e5adaaedb906dba02ce466
+size 99759579

onnx/decoder_with_past_model.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6a607d1e4630646433316bbd5d27de80baa0292d01c9f962f97f783de64f4996
+size 385864377

onnx/decoder_with_past_model_quantized.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5c5a4a2f6ecdbee840feb854a400b1b1856a896a29d15993458dc7fd1a128719
+size 98063170

quantize_config.json CHANGED Viewed

@@ -2,63 +2,124 @@
     "per_channel": false,
     "reduce_range": false,
     "per_model_config": {
-        "decoder_model": {
             "op_types": [
-                "Mul",
-                "Concat",
-                "Range",
-                "Add",
                 "Transpose",
                 "Sub",
                 "Slice",
-                "MatMul",
                 "Div",
                 "Split",
-                "Cast",
-                "ConstantOfShape",
-                "Sqrt",
-                "Pow",
-                "Gemm",
-                "ReduceMean",
-                "Constant",
                 "Softmax",
                 "Shape",
                 "Tanh",
                 "Reshape",
                 "Gather",
-                "Squeeze",
                 "Where",
-                "Unsqueeze"
             ],
             "weight_type": "QInt8"
         },
         "encoder_model": {
             "op_types": [
-                "Mul",
-                "Concat",
-                "Add",
                 "Transpose",
                 "Sub",
-                "Slice",
                 "Expand",
-                "MatMul",
                 "Div",
-                "ConstantOfShape",
-                "Sqrt",
-                "Pow",
-                "ReduceMean",
-                "Equal",
-                "Constant",
                 "Softmax",
                 "Shape",
                 "Erf",
                 "Reshape",
                 "Gather",
-                "Conv",
                 "Where",
-                "Unsqueeze"
             ],
-            "weight_type": "QUInt8"
         }
     }
 }

     "per_channel": false,
     "reduce_range": false,
     "per_model_config": {
+        "decoder_with_past_model": {
             "op_types": [
+                "Sqrt",
                 "Transpose",
+                "Reshape",
+                "Gather",
+                "Constant",
+                "Pow",
                 "Sub",
+                "Cast",
+                "Mul",
+                "Range",
+                "Where",
                 "Slice",
                 "Div",
                 "Split",
+                "MatMul",
+                "Unsqueeze",
+                "Concat",
                 "Softmax",
+                "Squeeze",
+                "Add",
                 "Shape",
+                "ConstantOfShape",
+                "Gemm",
                 "Tanh",
+                "ReduceMean"
+            ],
+            "weight_type": "QInt8"
+        },
+        "decoder_model": {
+            "op_types": [
+                "Sqrt",
+                "Transpose",
                 "Reshape",
                 "Gather",
+                "Constant",
+                "Pow",
+                "Sub",
+                "Cast",
+                "Mul",
+                "Range",
                 "Where",
+                "Slice",
+                "Div",
+                "Split",
+                "MatMul",
+                "Unsqueeze",
+                "Concat",
+                "Softmax",
+                "Squeeze",
+                "Add",
+                "Shape",
+                "ConstantOfShape",
+                "Gemm",
+                "Tanh",
+                "ReduceMean"
             ],
             "weight_type": "QInt8"
         },
         "encoder_model": {
             "op_types": [
+                "Sqrt",
                 "Transpose",
+                "Reshape",
+                "Gather",
+                "Constant",
+                "Pow",
                 "Sub",
+                "Mul",
                 "Expand",
+                "Where",
+                "Slice",
                 "Div",
+                "MatMul",
+                "Unsqueeze",
+                "Concat",
                 "Softmax",
+                "Add",
                 "Shape",
                 "Erf",
+                "ConstantOfShape",
+                "ReduceMean",
+                "Equal",
+                "Conv"
+            ],
+            "weight_type": "QUInt8"
+        },
+        "decoder_model_merged": {
+            "op_types": [
+                "Sqrt",
+                "Transpose",
                 "Reshape",
                 "Gather",
+                "Constant",
+                "Pow",
+                "Sub",
+                "Cast",
+                "Mul",
+                "If",
+                "Range",
                 "Where",
+                "Slice",
+                "Div",
+                "Split",
+                "MatMul",
+                "Unsqueeze",
+                "Concat",
+                "Softmax",
+                "Squeeze",
+                "Add",
+                "Shape",
+                "ConstantOfShape",
+                "Gemm",
+                "Tanh",
+                "ReduceMean"
             ],
+            "weight_type": "QInt8"
         }
     }
 }