feat: update converted model with config

Browse files

Files changed (15) hide show

config.json +1 -2
generation_config.json +2 -1
onnx/decoder_model.onnx +3 -0
onnx/decoder_model_merged.onnx +3 -0
onnx/decoder_model_merged_quantized.onnx +3 -0
onnx/decoder_model_quantized.onnx +3 -0
onnx/decoder_with_past_model.onnx +3 -0
onnx/decoder_with_past_model_quantized.onnx +3 -0
onnx/encoder_model.onnx +3 -0
onnx/encoder_model_quantized.onnx +3 -0
preprocessor_config.json +0 -0
quantize_config.json +109 -351
tokenizer.json +1 -1
tokenizer_config.json +33 -4
vocab.json +0 -0

config.json CHANGED Viewed

@@ -146,8 +146,7 @@
     50361,
     50362
   ],
-  "torch_dtype": "float32",
-  "transformers_version": "4.38.2",
   "use_cache": true,
   "use_weighted_layer_sum": false,
   "vocab_size": 51865

     50361,
     50362
   ],
+  "transformers_version": "4.33.2",
   "use_cache": true,
   "use_weighted_layer_sum": false,
   "vocab_size": 51865

generation_config.json CHANGED Viewed

@@ -252,5 +252,6 @@
     "transcribe": 50359,
     "translate": 50358
   },
-  "transformers_version": "4.38.2"
 }

     "transcribe": 50359,
     "translate": 50358
   },
+  "transformers_version": "4.33.2",
+  "trust_remote_code": false
 }

onnx/decoder_model.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:41476bfbb62f72bd4ac699a5b50845e486b0de673ae5f48e45a4e41fed183c03
+size 208271694

onnx/decoder_model_merged.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:96ecf3b6999f507430becfb964d7cdc9f60a116d3fccc2a990585ffb9b735ae0
+size 208491287

onnx/decoder_model_merged_quantized.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7aab16e3dce2b8a64a9f2f551766146c0ac8183d5218e4738d92b4aca5f5345a
+size 53617059

onnx/decoder_model_quantized.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:58a024c59f3c16266b01c8c8337234b7dbf3e9d315fa3ccb552a1b6e00fb4972
+size 53267560

onnx/decoder_with_past_model.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5ef88353f43a44b98b7b759bf31e240756c6f9d8808fd17e46e976920f61db44
+size 195635283

onnx/decoder_with_past_model_quantized.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a7d132ebf02d5facbaaf40b7ad29caf5162550fcb8163bb7ae449fde56ee1999
+size 50041642

onnx/encoder_model.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:42a2fb35d4d31cf0f1040df4c04415a297de1718931f8f40804eb3b1de83e8a5
+size 82461622

onnx/encoder_model_quantized.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c6679e3deb48907533a51da9a9c58ae0feab36c074fe826a45ab4ecaff755c44
+size 23183302

preprocessor_config.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

quantize_config.json CHANGED Viewed

@@ -1,357 +1,115 @@
 {
-    "fp16": {},
-    "q8": {
-        "per_model_config": {
-            "encoder_model": {
-                "op_types": [
-                    "Add",
-                    "Concat",
-                    "Constant",
-                    "Conv",
-                    "Div",
-                    "Erf",
-                    "Gather",
-                    "MatMul",
-                    "Mul",
-                    "Pow",
-                    "ReduceMean",
-                    "Reshape",
-                    "Shape",
-                    "Softmax",
-                    "Sqrt",
-                    "Sub",
-                    "Transpose",
-                    "Unsqueeze"
-                ],
-                "weight_type": "QUInt8"
-            },
-            "decoder_model": {
-                "op_types": [
-                    "Add",
-                    "Cast",
-                    "Concat",
-                    "Constant",
-                    "ConstantOfShape",
-                    "Div",
-                    "Equal",
-                    "Erf",
-                    "Expand",
-                    "Gather",
-                    "Less",
-                    "MatMul",
-                    "Mul",
-                    "Pow",
-                    "Range",
-                    "ReduceMean",
-                    "Reshape",
-                    "Shape",
-                    "Slice",
-                    "Softmax",
-                    "Sqrt",
-                    "Squeeze",
-                    "Sub",
-                    "Transpose",
-                    "Unsqueeze",
-                    "Where"
-                ],
-                "weight_type": "QInt8"
-            },
-            "decoder_with_past_model": {
-                "op_types": [
-                    "Add",
-                    "Concat",
-                    "Constant",
-                    "Div",
-                    "Erf",
-                    "Gather",
-                    "MatMul",
-                    "Mul",
-                    "Pow",
-                    "ReduceMean",
-                    "Reshape",
-                    "Shape",
-                    "Slice",
-                    "Softmax",
-                    "Sqrt",
-                    "Sub",
-                    "Transpose",
-                    "Unsqueeze"
-                ],
-                "weight_type": "QInt8"
-            },
-            "decoder_model_merged": {
-                "op_types": [
-                    "Add",
-                    "Cast",
-                    "Concat",
-                    "Constant",
-                    "ConstantOfShape",
-                    "Div",
-                    "Equal",
-                    "Erf",
-                    "Expand",
-                    "Gather",
-                    "If",
-                    "Less",
-                    "MatMul",
-                    "Mul",
-                    "Pow",
-                    "Range",
-                    "ReduceMean",
-                    "Reshape",
-                    "Shape",
-                    "Slice",
-                    "Softmax",
-                    "Sqrt",
-                    "Squeeze",
-                    "Sub",
-                    "Transpose",
-                    "Unsqueeze",
-                    "Where"
-                ],
-                "weight_type": "QInt8"
-            }
         },
-        "per_channel": false,
-        "reduce_range": false
-    },
-    "int8": {
-        "per_model_config": {
-            "encoder_model": {
-                "op_types": [
-                    "Add",
-                    "Concat",
-                    "Constant",
-                    "Conv",
-                    "Div",
-                    "Erf",
-                    "Gather",
-                    "MatMul",
-                    "Mul",
-                    "Pow",
-                    "ReduceMean",
-                    "Reshape",
-                    "Shape",
-                    "Softmax",
-                    "Sqrt",
-                    "Sub",
-                    "Transpose",
-                    "Unsqueeze"
-                ],
-                "weight_type": "QInt8"
-            },
-            "decoder_model": {
-                "op_types": [
-                    "Add",
-                    "Cast",
-                    "Concat",
-                    "Constant",
-                    "ConstantOfShape",
-                    "Div",
-                    "Equal",
-                    "Erf",
-                    "Expand",
-                    "Gather",
-                    "Less",
-                    "MatMul",
-                    "Mul",
-                    "Pow",
-                    "Range",
-                    "ReduceMean",
-                    "Reshape",
-                    "Shape",
-                    "Slice",
-                    "Softmax",
-                    "Sqrt",
-                    "Squeeze",
-                    "Sub",
-                    "Transpose",
-                    "Unsqueeze",
-                    "Where"
-                ],
-                "weight_type": "QInt8"
-            },
-            "decoder_with_past_model": {
-                "op_types": [
-                    "Add",
-                    "Concat",
-                    "Constant",
-                    "Div",
-                    "Erf",
-                    "Gather",
-                    "MatMul",
-                    "Mul",
-                    "Pow",
-                    "ReduceMean",
-                    "Reshape",
-                    "Shape",
-                    "Slice",
-                    "Softmax",
-                    "Sqrt",
-                    "Sub",
-                    "Transpose",
-                    "Unsqueeze"
-                ],
-                "weight_type": "QInt8"
-            },
-            "decoder_model_merged": {
-                "op_types": [
-                    "Add",
-                    "Cast",
-                    "Concat",
-                    "Constant",
-                    "ConstantOfShape",
-                    "Div",
-                    "Equal",
-                    "Erf",
-                    "Expand",
-                    "Gather",
-                    "If",
-                    "Less",
-                    "MatMul",
-                    "Mul",
-                    "Pow",
-                    "Range",
-                    "ReduceMean",
-                    "Reshape",
-                    "Shape",
-                    "Slice",
-                    "Softmax",
-                    "Sqrt",
-                    "Squeeze",
-                    "Sub",
-                    "Transpose",
-                    "Unsqueeze",
-                    "Where"
-                ],
-                "weight_type": "QInt8"
-            }
         },
-        "per_channel": false,
-        "reduce_range": false
-    },
-    "uint8": {
-        "per_model_config": {
-            "encoder_model": {
-                "op_types": [
-                    "Add",
-                    "Concat",
-                    "Constant",
-                    "Conv",
-                    "Div",
-                    "Erf",
-                    "Gather",
-                    "MatMul",
-                    "Mul",
-                    "Pow",
-                    "ReduceMean",
-                    "Reshape",
-                    "Shape",
-                    "Softmax",
-                    "Sqrt",
-                    "Sub",
-                    "Transpose",
-                    "Unsqueeze"
-                ],
-                "weight_type": "QUInt8"
-            },
-            "decoder_model": {
-                "op_types": [
-                    "Add",
-                    "Cast",
-                    "Concat",
-                    "Constant",
-                    "ConstantOfShape",
-                    "Div",
-                    "Equal",
-                    "Erf",
-                    "Expand",
-                    "Gather",
-                    "Less",
-                    "MatMul",
-                    "Mul",
-                    "Pow",
-                    "Range",
-                    "ReduceMean",
-                    "Reshape",
-                    "Shape",
-                    "Slice",
-                    "Softmax",
-                    "Sqrt",
-                    "Squeeze",
-                    "Sub",
-                    "Transpose",
-                    "Unsqueeze",
-                    "Where"
-                ],
-                "weight_type": "QUInt8"
-            },
-            "decoder_with_past_model": {
-                "op_types": [
-                    "Add",
-                    "Concat",
-                    "Constant",
-                    "Div",
-                    "Erf",
-                    "Gather",
-                    "MatMul",
-                    "Mul",
-                    "Pow",
-                    "ReduceMean",
-                    "Reshape",
-                    "Shape",
-                    "Slice",
-                    "Softmax",
-                    "Sqrt",
-                    "Sub",
-                    "Transpose",
-                    "Unsqueeze"
-                ],
-                "weight_type": "QUInt8"
-            },
-            "decoder_model_merged": {
-                "op_types": [
-                    "Add",
-                    "Cast",
-                    "Concat",
-                    "Constant",
-                    "ConstantOfShape",
-                    "Div",
-                    "Equal",
-                    "Erf",
-                    "Expand",
-                    "Gather",
-                    "If",
-                    "Less",
-                    "MatMul",
-                    "Mul",
-                    "Pow",
-                    "Range",
-                    "ReduceMean",
-                    "Reshape",
-                    "Shape",
-                    "Slice",
-                    "Softmax",
-                    "Sqrt",
-                    "Squeeze",
-                    "Sub",
-                    "Transpose",
-                    "Unsqueeze",
-                    "Where"
-                ],
-                "weight_type": "QUInt8"
-            }
         },
-        "per_channel": false,
-        "reduce_range": false
-    },
-    "q4": {
-        "block_size": 32,
-        "is_symmetric": true,
-        "accuracy_level": null
-    },
-    "bnb4": {
-        "block_size": 64,
-        "quant_type": 1
     }
 }

 {
+    "per_channel": false,
+    "reduce_range": false,
+    "per_model_config": {
+        "encoder_model": {
+            "op_types": [
+                "Reshape",
+                "Sqrt",
+                "Erf",
+                "Mul",
+                "ReduceMean",
+                "Gather",
+                "Softmax",
+                "Constant",
+                "Add",
+                "Pow",
+                "Unsqueeze",
+                "Shape",
+                "Concat",
+                "Div",
+                "MatMul",
+                "Sub",
+                "Transpose",
+                "Conv"
+            ],
+            "weight_type": "QUInt8"
         },
+        "decoder_model": {
+            "op_types": [
+                "Erf",
+                "Equal",
+                "Unsqueeze",
+                "Range",
+                "Less",
+                "Slice",
+                "Sqrt",
+                "Add",
+                "Pow",
+                "MatMul",
+                "Sub",
+                "Expand",
+                "Where",
+                "Transpose",
+                "Reshape",
+                "Cast",
+                "Mul",
+                "ReduceMean",
+                "Gather",
+                "Softmax",
+                "Constant",
+                "Concat",
+                "Squeeze",
+                "Shape",
+                "Div",
+                "ConstantOfShape"
+            ],
+            "weight_type": "QInt8"
         },
+        "decoder_with_past_model": {
+            "op_types": [
+                "Reshape",
+                "Sqrt",
+                "Erf",
+                "Mul",
+                "ReduceMean",
+                "Gather",
+                "Softmax",
+                "Constant",
+                "Concat",
+                "Unsqueeze",
+                "Add",
+                "Shape",
+                "Pow",
+                "Div",
+                "MatMul",
+                "Sub",
+                "Transpose",
+                "Slice"
+            ],
+            "weight_type": "QInt8"
         },
+        "decoder_model_merged": {
+            "op_types": [
+                "Erf",
+                "Equal",
+                "Unsqueeze",
+                "Range",
+                "Less",
+                "Slice",
+                "Sqrt",
+                "Add",
+                "Pow",
+                "MatMul",
+                "Sub",
+                "Expand",
+                "If",
+                "Where",
+                "Transpose",
+                "Reshape",
+                "Cast",
+                "Mul",
+                "ReduceMean",
+                "Gather",
+                "Softmax",
+                "Constant",
+                "Concat",
+                "Squeeze",
+                "Shape",
+                "Div",
+                "ConstantOfShape"
+            ],
+            "weight_type": "QInt8"
+        }
     }
 }

tokenizer.json CHANGED Viewed

@@ -114850,4 +114850,4 @@
       "åľ º"
     ]
   }
-}

       "åľ º"
     ]
   }
+}

tokenizer_config.json CHANGED Viewed

@@ -12976,14 +12976,43 @@
     "<|nocaptions|>",
     "<|notimestamps|>"
   ],
-  "bos_token": "<|endoftext|>",
   "clean_up_tokenization_spaces": true,
-  "eos_token": "<|endoftext|>",
   "errors": "replace",
   "model_max_length": 1024,
-  "pad_token": "<|endoftext|>",
   "processor_class": "WhisperProcessor",
   "return_attention_mask": false,
   "tokenizer_class": "WhisperTokenizer",
-  "unk_token": "<|endoftext|>"
 }

     "<|nocaptions|>",
     "<|notimestamps|>"
   ],
+  "bos_token": {
+    "__type": "AddedToken",
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
   "clean_up_tokenization_spaces": true,
+  "eos_token": {
+    "__type": "AddedToken",
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
   "errors": "replace",
   "model_max_length": 1024,
+  "pad_token": {
+    "__type": "AddedToken",
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
   "processor_class": "WhisperProcessor",
   "return_attention_mask": false,
   "tokenizer_class": "WhisperTokenizer",
+  "trust_remote_code": false,
+  "unk_token": {
+    "__type": "AddedToken",
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
 }

vocab.json CHANGED Viewed

The diff for this file is too large to render. See raw diff