Xenova HF staff commited on
Commit
1dfaec2
1 Parent(s): b4f1eb9

Update quantize_config.json

Browse files
Files changed (1) hide show
  1. quantize_config.json +99 -27
quantize_config.json CHANGED
@@ -1,30 +1,102 @@
1
  {
2
- "per_channel": true,
3
- "reduce_range": true,
4
- "per_model_config": {
5
- "model": {
6
- "op_types": [
7
- "Conv",
8
- "Sub",
9
- "ReduceMean",
10
- "Div",
11
- "Gather",
12
- "Mul",
13
- "Shape",
14
- "Pow",
15
- "Sqrt",
16
- "MatMul",
17
- "Concat",
18
- "Unsqueeze",
19
- "Add",
20
- "Slice",
21
- "Transpose",
22
- "Constant",
23
- "Softmax",
24
- "Reshape",
25
- "Erf"
26
- ],
27
- "weight_type": "QUInt8"
28
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  }
30
  }
 
1
  {
2
+ "fp16": {},
3
+ "q8": {
4
+ "per_model_config": {
5
+ "model": {
6
+ "op_types": [
7
+ "Add",
8
+ "Concat",
9
+ "Constant",
10
+ "Conv",
11
+ "Div",
12
+ "Erf",
13
+ "Gather",
14
+ "MatMul",
15
+ "Mul",
16
+ "Pow",
17
+ "ReduceMean",
18
+ "Reshape",
19
+ "Shape",
20
+ "Slice",
21
+ "Softmax",
22
+ "Sqrt",
23
+ "Sub",
24
+ "Transpose",
25
+ "Unsqueeze"
26
+ ],
27
+ "weight_type": "QUInt8"
28
+ }
29
+ },
30
+ "per_channel": false,
31
+ "reduce_range": false
32
+ },
33
+ "int8": {
34
+ "per_model_config": {
35
+ "model": {
36
+ "op_types": [
37
+ "Add",
38
+ "Concat",
39
+ "Constant",
40
+ "Conv",
41
+ "Div",
42
+ "Erf",
43
+ "Gather",
44
+ "MatMul",
45
+ "Mul",
46
+ "Pow",
47
+ "ReduceMean",
48
+ "Reshape",
49
+ "Shape",
50
+ "Slice",
51
+ "Softmax",
52
+ "Sqrt",
53
+ "Sub",
54
+ "Transpose",
55
+ "Unsqueeze"
56
+ ],
57
+ "weight_type": "QInt8"
58
+ }
59
+ },
60
+ "per_channel": false,
61
+ "reduce_range": false
62
+ },
63
+ "uint8": {
64
+ "per_model_config": {
65
+ "model": {
66
+ "op_types": [
67
+ "Add",
68
+ "Concat",
69
+ "Constant",
70
+ "Conv",
71
+ "Div",
72
+ "Erf",
73
+ "Gather",
74
+ "MatMul",
75
+ "Mul",
76
+ "Pow",
77
+ "ReduceMean",
78
+ "Reshape",
79
+ "Shape",
80
+ "Slice",
81
+ "Softmax",
82
+ "Sqrt",
83
+ "Sub",
84
+ "Transpose",
85
+ "Unsqueeze"
86
+ ],
87
+ "weight_type": "QUInt8"
88
+ }
89
+ },
90
+ "per_channel": false,
91
+ "reduce_range": false
92
+ },
93
+ "q4": {
94
+ "block_size": 32,
95
+ "is_symmetric": true,
96
+ "accuracy_level": null
97
+ },
98
+ "bnb4": {
99
+ "block_size": 64,
100
+ "quant_type": 1
101
  }
102
  }