```bash pip install numpy gekko pandas git clone https://github.com/PanQiWei/AutoGPTQ.git && cd AutoGPTQ pip install -vvv --no-build-isolation -e . ``` ```python from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig from transformers import AutoTokenizer, TextStreamer model = AutoGPTQForCausalLM.from_quantized( "GSJL/Qwen2.5-14B-Instruct-GPTQ-Marlin", use_marlin=True ).to("cuda:0") tokenizer = AutoTokenizer.from_pretrained(save_dir, use_fast = True) streamer = TextStreamer(tokenizer, skip_prompt = True, skip_special_tokens=True) prompt = [{"role":"user","content":"Hi mom!!!!!"}] inputs = tokenizer.apply_chat_template( prompt, return_tensors="pt", add_generation_prompt = True ).to("cuda:0") output = model.generate( input_ids = inputs, streamer = streamer, use_cache=True, do_sample = True, max_new_tokens = 600 ) ```