{ "bits": 4, "block_name_to_quantize": "transformer.h", "damp_percent": 0.1, "dataset": "c4", "desc_act": false, "disable_exllama": true, "group_size": 128, "max_input_length": null, "model_seqlen": 2048, "module_name_preceding_first_block": [ "transformer.wte", "transformer.wpe", "transformer.drop" ], "pad_token_id": null, "quant_method": "gptq", "sym": true, "tokenizer": null, "true_sequential": true, "use_cuda_fp16": true }