pszemraj commited on
Commit
a187922
1 Parent(s): 23ad0a3

quantize per channel

Browse files
config.json CHANGED
@@ -185,5 +185,5 @@
185
  "model_type": "vision-encoder-decoder",
186
  "tie_word_embeddings": false,
187
  "torch_dtype": "float32",
188
- "transformers_version": "4.35.2"
189
  }
 
185
  "model_type": "vision-encoder-decoder",
186
  "tie_word_embeddings": false,
187
  "torch_dtype": "float32",
188
+ "transformers_version": "4.38.1"
189
  }
decoder_model_quantized.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b79247a2b0ba7d349987de3e462f42e395f9d7d0989c968975977e3a97563e85
3
- size 173838810
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9c8f1bb7456441320245595f93fa052612b9f477b71f4822a46c070faf48e8d
3
+ size 174444105
decoder_with_past_model_quantized.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b027b072abe7165445dcec03f8492b9201fdfb2e196ed870febbdbe8508329e
3
- size 165366466
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a68ddb99f8e476a6ba4e356145440de0fa70a5d2cff89c136feba9691f27c98
3
+ size 165923718
encoder_model_quantized.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:febe91664da233b0b71a61217a84c7d5d9b5db6d7991eec0e761742fd4c4d5ee
3
- size 81514092
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6bec906871770afe111f0f3474bd5cb143cfd7f659c242431ee5431be580684d
3
+ size 82052587
ort_config.json CHANGED
@@ -2,7 +2,7 @@
2
  "one_external_file": true,
3
  "opset": null,
4
  "optimization": {},
5
- "optimum_version": "1.14.1",
6
  "quantization": {
7
  "activations_dtype": "QUInt8",
8
  "activations_symmetric": false,
@@ -20,7 +20,7 @@
20
  "Transpose",
21
  "EmbedLayerNormalization"
22
  ],
23
- "per_channel": false,
24
  "qdq_add_pair_to_weight": false,
25
  "qdq_dedicated_pair": false,
26
  "qdq_op_type_per_channel_support_to_axis": {
@@ -30,6 +30,6 @@
30
  "weights_dtype": "QUInt8",
31
  "weights_symmetric": true
32
  },
33
- "transformers_version": "4.35.2",
34
  "use_external_data_format": false
35
  }
 
2
  "one_external_file": true,
3
  "opset": null,
4
  "optimization": {},
5
+ "optimum_version": "1.17.1",
6
  "quantization": {
7
  "activations_dtype": "QUInt8",
8
  "activations_symmetric": false,
 
20
  "Transpose",
21
  "EmbedLayerNormalization"
22
  ],
23
+ "per_channel": true,
24
  "qdq_add_pair_to_weight": false,
25
  "qdq_dedicated_pair": false,
26
  "qdq_op_type_per_channel_support_to_axis": {
 
30
  "weights_dtype": "QUInt8",
31
  "weights_symmetric": true
32
  },
33
+ "transformers_version": "4.38.1",
34
  "use_external_data_format": false
35
  }