tarekziade commited on
Commit
93b3b35
1 Parent(s): a6a78d0

Upload 15 files

Browse files
onnx/decoder_model_merged.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d603904cb811150731ec5d2d4680e7b3a79bd728b6161a9688563ca92fa403c5
3
+ size 387342586
onnx/decoder_model_merged_quantized.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0954635de52a206823c20847ff4793622a7b9c0ef1e5adaaedb906dba02ce466
3
+ size 99759579
onnx/decoder_with_past_model.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a607d1e4630646433316bbd5d27de80baa0292d01c9f962f97f783de64f4996
3
+ size 385864377
onnx/decoder_with_past_model_quantized.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c5a4a2f6ecdbee840feb854a400b1b1856a896a29d15993458dc7fd1a128719
3
+ size 98063170
quantize_config.json CHANGED
@@ -2,63 +2,124 @@
2
  "per_channel": false,
3
  "reduce_range": false,
4
  "per_model_config": {
5
- "decoder_model": {
6
  "op_types": [
7
- "Mul",
8
- "Concat",
9
- "Range",
10
- "Add",
11
  "Transpose",
 
 
 
 
12
  "Sub",
 
 
 
 
13
  "Slice",
14
- "MatMul",
15
  "Div",
16
  "Split",
17
- "Cast",
18
- "ConstantOfShape",
19
- "Sqrt",
20
- "Pow",
21
- "Gemm",
22
- "ReduceMean",
23
- "Constant",
24
  "Softmax",
 
 
25
  "Shape",
 
 
26
  "Tanh",
 
 
 
 
 
 
 
 
27
  "Reshape",
28
  "Gather",
29
- "Squeeze",
 
 
 
 
 
30
  "Where",
31
- "Unsqueeze"
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  ],
33
  "weight_type": "QInt8"
34
  },
35
  "encoder_model": {
36
  "op_types": [
37
- "Mul",
38
- "Concat",
39
- "Add",
40
  "Transpose",
 
 
 
 
41
  "Sub",
42
- "Slice",
43
  "Expand",
44
- "MatMul",
 
45
  "Div",
46
- "ConstantOfShape",
47
- "Sqrt",
48
- "Pow",
49
- "ReduceMean",
50
- "Equal",
51
- "Constant",
52
  "Softmax",
 
53
  "Shape",
54
  "Erf",
 
 
 
 
 
 
 
 
 
 
 
55
  "Reshape",
56
  "Gather",
57
- "Conv",
 
 
 
 
 
 
58
  "Where",
59
- "Unsqueeze"
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  ],
61
- "weight_type": "QUInt8"
62
  }
63
  }
64
  }
 
2
  "per_channel": false,
3
  "reduce_range": false,
4
  "per_model_config": {
5
+ "decoder_with_past_model": {
6
  "op_types": [
7
+ "Sqrt",
 
 
 
8
  "Transpose",
9
+ "Reshape",
10
+ "Gather",
11
+ "Constant",
12
+ "Pow",
13
  "Sub",
14
+ "Cast",
15
+ "Mul",
16
+ "Range",
17
+ "Where",
18
  "Slice",
 
19
  "Div",
20
  "Split",
21
+ "MatMul",
22
+ "Unsqueeze",
23
+ "Concat",
 
 
 
 
24
  "Softmax",
25
+ "Squeeze",
26
+ "Add",
27
  "Shape",
28
+ "ConstantOfShape",
29
+ "Gemm",
30
  "Tanh",
31
+ "ReduceMean"
32
+ ],
33
+ "weight_type": "QInt8"
34
+ },
35
+ "decoder_model": {
36
+ "op_types": [
37
+ "Sqrt",
38
+ "Transpose",
39
  "Reshape",
40
  "Gather",
41
+ "Constant",
42
+ "Pow",
43
+ "Sub",
44
+ "Cast",
45
+ "Mul",
46
+ "Range",
47
  "Where",
48
+ "Slice",
49
+ "Div",
50
+ "Split",
51
+ "MatMul",
52
+ "Unsqueeze",
53
+ "Concat",
54
+ "Softmax",
55
+ "Squeeze",
56
+ "Add",
57
+ "Shape",
58
+ "ConstantOfShape",
59
+ "Gemm",
60
+ "Tanh",
61
+ "ReduceMean"
62
  ],
63
  "weight_type": "QInt8"
64
  },
65
  "encoder_model": {
66
  "op_types": [
67
+ "Sqrt",
 
 
68
  "Transpose",
69
+ "Reshape",
70
+ "Gather",
71
+ "Constant",
72
+ "Pow",
73
  "Sub",
74
+ "Mul",
75
  "Expand",
76
+ "Where",
77
+ "Slice",
78
  "Div",
79
+ "MatMul",
80
+ "Unsqueeze",
81
+ "Concat",
 
 
 
82
  "Softmax",
83
+ "Add",
84
  "Shape",
85
  "Erf",
86
+ "ConstantOfShape",
87
+ "ReduceMean",
88
+ "Equal",
89
+ "Conv"
90
+ ],
91
+ "weight_type": "QUInt8"
92
+ },
93
+ "decoder_model_merged": {
94
+ "op_types": [
95
+ "Sqrt",
96
+ "Transpose",
97
  "Reshape",
98
  "Gather",
99
+ "Constant",
100
+ "Pow",
101
+ "Sub",
102
+ "Cast",
103
+ "Mul",
104
+ "If",
105
+ "Range",
106
  "Where",
107
+ "Slice",
108
+ "Div",
109
+ "Split",
110
+ "MatMul",
111
+ "Unsqueeze",
112
+ "Concat",
113
+ "Softmax",
114
+ "Squeeze",
115
+ "Add",
116
+ "Shape",
117
+ "ConstantOfShape",
118
+ "Gemm",
119
+ "Tanh",
120
+ "ReduceMean"
121
  ],
122
+ "weight_type": "QInt8"
123
  }
124
  }
125
  }