{ "fp16": {}, "q8": { "per_model_config": { "model": { "op_types": [ "Add", "Cast", "Concat", "Constant", "ConstantOfShape", "CumSum", "Div", "Erf", "Gather", "Identity", "MatMul", "Mul", "Pow", "ReduceMean", "Reshape", "Shape", "Softmax", "Sqrt", "Sub", "Transpose", "Unsqueeze" ], "weight_type": "QInt8" } }, "per_channel": true, "reduce_range": true }, "int8": { "per_model_config": { "model": { "op_types": [ "Add", "Cast", "Concat", "Constant", "ConstantOfShape", "CumSum", "Div", "Erf", "Gather", "Identity", "MatMul", "Mul", "Pow", "ReduceMean", "Reshape", "Shape", "Softmax", "Sqrt", "Sub", "Transpose", "Unsqueeze" ], "weight_type": "QInt8" } }, "per_channel": true, "reduce_range": true }, "uint8": { "per_model_config": { "model": { "op_types": [ "Add", "Cast", "Concat", "Constant", "ConstantOfShape", "CumSum", "Div", "Erf", "Gather", "Identity", "MatMul", "Mul", "Pow", "ReduceMean", "Reshape", "Shape", "Softmax", "Sqrt", "Sub", "Transpose", "Unsqueeze" ], "weight_type": "QUInt8" } }, "per_channel": true, "reduce_range": true }, "q4": { "block_size": 32, "is_symmetric": true, "accuracy_level": null }, "bnb4": { "block_size": 64, "quant_type": 1 } }