{
    "fp16": {},
    "q8": {
        "per_model_config": {
            "encoder_model": {
                "op_types": [
                    "Add",
                    "Concat",
                    "Constant",
                    "Conv",
                    "Div",
                    "Erf",
                    "Gather",
                    "MatMul",
                    "Mul",
                    "Pow",
                    "ReduceMean",
                    "Reshape",
                    "Shape",
                    "Softmax",
                    "Sqrt",
                    "Sub",
                    "Transpose",
                    "Unsqueeze"
                ],
                "weight_type": "QUInt8"
            },
            "decoder_with_past_model": {
                "op_types": [
                    "Add",
                    "Concat",
                    "Constant",
                    "Div",
                    "Erf",
                    "Gather",
                    "MatMul",
                    "Mul",
                    "Pow",
                    "ReduceMean",
                    "Reshape",
                    "Shape",
                    "Slice",
                    "Softmax",
                    "Sqrt",
                    "Sub",
                    "Transpose",
                    "Unsqueeze"
                ],
                "weight_type": "QInt8"
            },
            "decoder_model": {
                "op_types": [
                    "Add",
                    "Cast",
                    "Concat",
                    "Constant",
                    "ConstantOfShape",
                    "Div",
                    "Equal",
                    "Erf",
                    "Expand",
                    "Gather",
                    "Less",
                    "MatMul",
                    "Mul",
                    "Pow",
                    "Range",
                    "ReduceMean",
                    "Reshape",
                    "Shape",
                    "Slice",
                    "Softmax",
                    "Sqrt",
                    "Squeeze",
                    "Sub",
                    "Transpose",
                    "Unsqueeze",
                    "Where"
                ],
                "weight_type": "QInt8"
            },
            "decoder_model_merged": {
                "op_types": [
                    "Add",
                    "Cast",
                    "Concat",
                    "Constant",
                    "ConstantOfShape",
                    "Div",
                    "Equal",
                    "Erf",
                    "Expand",
                    "Gather",
                    "If",
                    "Less",
                    "MatMul",
                    "Mul",
                    "Pow",
                    "Range",
                    "ReduceMean",
                    "Reshape",
                    "Shape",
                    "Slice",
                    "Softmax",
                    "Sqrt",
                    "Squeeze",
                    "Sub",
                    "Transpose",
                    "Unsqueeze",
                    "Where"
                ],
                "weight_type": "QInt8"
            }
        },
        "per_channel": false,
        "reduce_range": false
    },
    "int8": {
        "per_model_config": {
            "encoder_model": {
                "op_types": [
                    "Add",
                    "Concat",
                    "Constant",
                    "Conv",
                    "Div",
                    "Erf",
                    "Gather",
                    "MatMul",
                    "Mul",
                    "Pow",
                    "ReduceMean",
                    "Reshape",
                    "Shape",
                    "Softmax",
                    "Sqrt",
                    "Sub",
                    "Transpose",
                    "Unsqueeze"
                ],
                "weight_type": "QInt8"
            },
            "decoder_with_past_model": {
                "op_types": [
                    "Add",
                    "Concat",
                    "Constant",
                    "Div",
                    "Erf",
                    "Gather",
                    "MatMul",
                    "Mul",
                    "Pow",
                    "ReduceMean",
                    "Reshape",
                    "Shape",
                    "Slice",
                    "Softmax",
                    "Sqrt",
                    "Sub",
                    "Transpose",
                    "Unsqueeze"
                ],
                "weight_type": "QInt8"
            },
            "decoder_model": {
                "op_types": [
                    "Add",
                    "Cast",
                    "Concat",
                    "Constant",
                    "ConstantOfShape",
                    "Div",
                    "Equal",
                    "Erf",
                    "Expand",
                    "Gather",
                    "Less",
                    "MatMul",
                    "Mul",
                    "Pow",
                    "Range",
                    "ReduceMean",
                    "Reshape",
                    "Shape",
                    "Slice",
                    "Softmax",
                    "Sqrt",
                    "Squeeze",
                    "Sub",
                    "Transpose",
                    "Unsqueeze",
                    "Where"
                ],
                "weight_type": "QInt8"
            },
            "decoder_model_merged": {
                "op_types": [
                    "Add",
                    "Cast",
                    "Concat",
                    "Constant",
                    "ConstantOfShape",
                    "Div",
                    "Equal",
                    "Erf",
                    "Expand",
                    "Gather",
                    "If",
                    "Less",
                    "MatMul",
                    "Mul",
                    "Pow",
                    "Range",
                    "ReduceMean",
                    "Reshape",
                    "Shape",
                    "Slice",
                    "Softmax",
                    "Sqrt",
                    "Squeeze",
                    "Sub",
                    "Transpose",
                    "Unsqueeze",
                    "Where"
                ],
                "weight_type": "QInt8"
            }
        },
        "per_channel": false,
        "reduce_range": false
    },
    "uint8": {
        "per_model_config": {
            "encoder_model": {
                "op_types": [
                    "Add",
                    "Concat",
                    "Constant",
                    "Conv",
                    "Div",
                    "Erf",
                    "Gather",
                    "MatMul",
                    "Mul",
                    "Pow",
                    "ReduceMean",
                    "Reshape",
                    "Shape",
                    "Softmax",
                    "Sqrt",
                    "Sub",
                    "Transpose",
                    "Unsqueeze"
                ],
                "weight_type": "QUInt8"
            },
            "decoder_with_past_model": {
                "op_types": [
                    "Add",
                    "Concat",
                    "Constant",
                    "Div",
                    "Erf",
                    "Gather",
                    "MatMul",
                    "Mul",
                    "Pow",
                    "ReduceMean",
                    "Reshape",
                    "Shape",
                    "Slice",
                    "Softmax",
                    "Sqrt",
                    "Sub",
                    "Transpose",
                    "Unsqueeze"
                ],
                "weight_type": "QUInt8"
            },
            "decoder_model": {
                "op_types": [
                    "Add",
                    "Cast",
                    "Concat",
                    "Constant",
                    "ConstantOfShape",
                    "Div",
                    "Equal",
                    "Erf",
                    "Expand",
                    "Gather",
                    "Less",
                    "MatMul",
                    "Mul",
                    "Pow",
                    "Range",
                    "ReduceMean",
                    "Reshape",
                    "Shape",
                    "Slice",
                    "Softmax",
                    "Sqrt",
                    "Squeeze",
                    "Sub",
                    "Transpose",
                    "Unsqueeze",
                    "Where"
                ],
                "weight_type": "QUInt8"
            },
            "decoder_model_merged": {
                "op_types": [
                    "Add",
                    "Cast",
                    "Concat",
                    "Constant",
                    "ConstantOfShape",
                    "Div",
                    "Equal",
                    "Erf",
                    "Expand",
                    "Gather",
                    "If",
                    "Less",
                    "MatMul",
                    "Mul",
                    "Pow",
                    "Range",
                    "ReduceMean",
                    "Reshape",
                    "Shape",
                    "Slice",
                    "Softmax",
                    "Sqrt",
                    "Squeeze",
                    "Sub",
                    "Transpose",
                    "Unsqueeze",
                    "Where"
                ],
                "weight_type": "QUInt8"
            }
        },
        "per_channel": false,
        "reduce_range": false
    },
    "q4": {
        "block_size": 32,
        "is_symmetric": true,
        "accuracy_level": null
    },
    "bnb4": {
        "block_size": 64,
        "quant_type": 1
    }
}