{ "q8": { "per_channel": true, "reduce_range": true, "quantize_mode": "q8", "per_model_config": { "model": { "op_types": [ "Mul", "Unsqueeze", "Transpose", "Sqrt", "Sub", "ReduceMean", "Div", "Reshape", "Softmax", "Shape", "MatMul", "Conv", "Add", "Pow", "Sigmoid", "Concat", "Constant", "Gather", "Gemm" ], "weight_type": "QUInt8" } } }, "int8": { "per_channel": true, "reduce_range": true, "quantize_mode": "int8", "per_model_config": { "model": { "op_types": [ "Mul", "Unsqueeze", "Transpose", "Sqrt", "Sub", "ReduceMean", "Div", "Reshape", "Softmax", "Shape", "MatMul", "Conv", "Add", "Pow", "Sigmoid", "Concat", "Constant", "Gather", "Gemm" ], "weight_type": "QInt8" } } }, "uint8": { "per_channel": true, "reduce_range": true, "quantize_mode": "uint8", "per_model_config": { "model": { "op_types": [ "Mul", "Unsqueeze", "Transpose", "Sqrt", "Sub", "ReduceMean", "Div", "Reshape", "Softmax", "Shape", "MatMul", "Conv", "Add", "Pow", "Sigmoid", "Concat", "Constant", "Gather", "Gemm" ], "weight_type": "QUInt8" } } }, "q4": { "per_channel": true, "reduce_range": true, "quantize_mode": "q4", "per_model_config": {} }, "bnb4": { "per_channel": true, "reduce_range": true, "quantize_mode": "bnb4", "per_model_config": {} } }