{ "fp16": {}, "q8": { "per_model_config": { "decoder_model_merged": { "op_types": [ "Add", "Cast", "Concat", "Constant", "ConstantOfShape", "Div", "Equal", "Erf", "Expand", "Gather", "If", "Less", "MatMul", "Mul", "Pow", "Range", "ReduceMean", "Reshape", "Shape", "Slice", "Softmax", "Sqrt", "Squeeze", "Sub", "Transpose", "Unsqueeze", "Where" ], "weight_type": "QInt8" }, "decoder_model": { "op_types": [ "Add", "Cast", "Concat", "Constant", "ConstantOfShape", "Div", "Equal", "Erf", "Expand", "Gather", "Less", "MatMul", "Mul", "Pow", "Range", "ReduceMean", "Reshape", "Shape", "Slice", "Softmax", "Sqrt", "Squeeze", "Sub", "Transpose", "Unsqueeze", "Where" ], "weight_type": "QInt8" }, "text_encoder": { "op_types": [ "Abs", "Add", "Cast", "Concat", "Constant", "ConstantOfShape", "Div", "Gather", "Greater", "Less", "Log", "MatMul", "Min", "Mul", "Pow", "Range", "ReduceMean", "Relu", "Reshape", "Shape", "Softmax", "Sqrt", "Sub", "Transpose", "Unsqueeze", "Where" ], "weight_type": "QInt8" }, "decoder_with_past_model": { "op_types": [ "Add", "Cast", "Concat", "Constant", "ConstantOfShape", "Div", "Equal", "Erf", "Expand", "Gather", "MatMul", "Mul", "Pow", "Range", "ReduceMean", "Reshape", "Shape", "Softmax", "Sqrt", "Sub", "Transpose", "Unsqueeze", "Where" ], "weight_type": "QInt8" }, "encodec_decode": { "op_types": [ "Add", "Cast", "Ceil", "Concat", "Constant", "ConstantOfShape", "Conv", "ConvTranspose", "Div", "Elu", "Gather", "LSTM", "Pad", "Reshape", "Shape", "Slice", "Split", "Squeeze", "Sub", "Transpose", "Unsqueeze" ], "weight_type": "QUInt8" }, "build_delay_pattern_mask": { "op_types": [ "Add", "Cast", "Concat", "Constant", "ConstantOfShape", "Equal", "Expand", "Gather", "GreaterOrEqual", "Mul", "NonZero", "Not", "Range", "ReduceMin", "Reshape", "ScatterND", "Shape", "Slice", "Sub", "Transpose", "Trilu", "Unsqueeze", "Where" ], "weight_type": "QInt8" } }, "per_channel": true, "reduce_range": true }, "int8": { "per_model_config": { "decoder_model_merged": { "op_types": [ "Add", "Cast", "Concat", "Constant", "ConstantOfShape", "Div", "Equal", "Erf", "Expand", "Gather", "If", "Less", "MatMul", "Mul", "Pow", "Range", "ReduceMean", "Reshape", "Shape", "Slice", "Softmax", "Sqrt", "Squeeze", "Sub", "Transpose", "Unsqueeze", "Where" ], "weight_type": "QInt8" }, "decoder_model": { "op_types": [ "Add", "Cast", "Concat", "Constant", "ConstantOfShape", "Div", "Equal", "Erf", "Expand", "Gather", "Less", "MatMul", "Mul", "Pow", "Range", "ReduceMean", "Reshape", "Shape", "Slice", "Softmax", "Sqrt", "Squeeze", "Sub", "Transpose", "Unsqueeze", "Where" ], "weight_type": "QInt8" }, "text_encoder": { "op_types": [ "Abs", "Add", "Cast", "Concat", "Constant", "ConstantOfShape", "Div", "Gather", "Greater", "Less", "Log", "MatMul", "Min", "Mul", "Pow", "Range", "ReduceMean", "Relu", "Reshape", "Shape", "Softmax", "Sqrt", "Sub", "Transpose", "Unsqueeze", "Where" ], "weight_type": "QInt8" }, "decoder_with_past_model": { "op_types": [ "Add", "Cast", "Concat", "Constant", "ConstantOfShape", "Div", "Equal", "Erf", "Expand", "Gather", "MatMul", "Mul", "Pow", "Range", "ReduceMean", "Reshape", "Shape", "Softmax", "Sqrt", "Sub", "Transpose", "Unsqueeze", "Where" ], "weight_type": "QInt8" }, "encodec_decode": { "op_types": [ "Add", "Cast", "Ceil", "Concat", "Constant", "ConstantOfShape", "Conv", "ConvTranspose", "Div", "Elu", "Gather", "LSTM", "Pad", "Reshape", "Shape", "Slice", "Split", "Squeeze", "Sub", "Transpose", "Unsqueeze" ], "weight_type": "QInt8" }, "build_delay_pattern_mask": { "op_types": [ "Add", "Cast", "Concat", "Constant", "ConstantOfShape", "Equal", "Expand", "Gather", "GreaterOrEqual", "Mul", "NonZero", "Not", "Range", "ReduceMin", "Reshape", "ScatterND", "Shape", "Slice", "Sub", "Transpose", "Trilu", "Unsqueeze", "Where" ], "weight_type": "QInt8" } }, "per_channel": false, "reduce_range": false }, "uint8": { "per_model_config": { "decoder_model_merged": { "op_types": [ "Add", "Cast", "Concat", "Constant", "ConstantOfShape", "Div", "Equal", "Erf", "Expand", "Gather", "If", "Less", "MatMul", "Mul", "Pow", "Range", "ReduceMean", "Reshape", "Shape", "Slice", "Softmax", "Sqrt", "Squeeze", "Sub", "Transpose", "Unsqueeze", "Where" ], "weight_type": "QUInt8" }, "decoder_model": { "op_types": [ "Add", "Cast", "Concat", "Constant", "ConstantOfShape", "Div", "Equal", "Erf", "Expand", "Gather", "Less", "MatMul", "Mul", "Pow", "Range", "ReduceMean", "Reshape", "Shape", "Slice", "Softmax", "Sqrt", "Squeeze", "Sub", "Transpose", "Unsqueeze", "Where" ], "weight_type": "QUInt8" }, "text_encoder": { "op_types": [ "Abs", "Add", "Cast", "Concat", "Constant", "ConstantOfShape", "Div", "Gather", "Greater", "Less", "Log", "MatMul", "Min", "Mul", "Pow", "Range", "ReduceMean", "Relu", "Reshape", "Shape", "Softmax", "Sqrt", "Sub", "Transpose", "Unsqueeze", "Where" ], "weight_type": "QUInt8" }, "decoder_with_past_model": { "op_types": [ "Add", "Cast", "Concat", "Constant", "ConstantOfShape", "Div", "Equal", "Erf", "Expand", "Gather", "MatMul", "Mul", "Pow", "Range", "ReduceMean", "Reshape", "Shape", "Softmax", "Sqrt", "Sub", "Transpose", "Unsqueeze", "Where" ], "weight_type": "QUInt8" }, "encodec_decode": { "op_types": [ "Add", "Cast", "Ceil", "Concat", "Constant", "ConstantOfShape", "Conv", "ConvTranspose", "Div", "Elu", "Gather", "LSTM", "Pad", "Reshape", "Shape", "Slice", "Split", "Squeeze", "Sub", "Transpose", "Unsqueeze" ], "weight_type": "QUInt8" }, "build_delay_pattern_mask": { "op_types": [ "Add", "Cast", "Concat", "Constant", "ConstantOfShape", "Equal", "Expand", "Gather", "GreaterOrEqual", "Mul", "NonZero", "Not", "Range", "ReduceMin", "Reshape", "ScatterND", "Shape", "Slice", "Sub", "Transpose", "Trilu", "Unsqueeze", "Where" ], "weight_type": "QUInt8" } }, "per_channel": false, "reduce_range": false }, "bnb4": { "block_size": 64, "quant_type": 1 }, "q4": { "block_size": 32, "is_symmetric": true, "accuracy_level": null } }