--- license: bigcode-openrail-m datasets: - WizardLM/WizardLM_evol_instruct_70k --- Here is an example to show how to use model quantized by auto_gptq ``` _3BITS_MODEL_PATH_V1_ = 'GodRain/WizardCoder-15B-V1.1-3bit' # pip install auto_gptq from auto_gptq import AutoGPTQForCausalLM from transformers import AutoTokenizer tokenizer = AutoTokenizer.from_pretrained(_3BITS_MODEL_PATH_V1_) model = AutoGPTQForCausalLM.from_quantized(_3BITS_MODEL_PATH_V1_) out = evaluate("Hello, tell me a story about sun", model=model, tokenizer=tokenizer) print(out[0].strip()) ``` ``` def evaluate( batch_data, tokenizer, model, temperature=1, top_p=0.9, top_k=40, num_beams=1, max_new_tokens=2048, **kwargs, ): prompts = generate_prompt(batch_data) inputs = tokenizer(prompts, return_tensors="pt", max_length=256, truncation=True) input_ids = inputs["input_ids"].to(device) generation_config = GenerationConfig( temperature=temperature, top_p=top_p, top_k=top_k, num_beams=num_beams, eos_token_id=tokenizer.eos_token_id, pad_token_id=tokenizer.pad_token_id, **kwargs, ) with torch.no_grad(): generation_output = model.generate( input_ids=input_ids, generation_config=generation_config, return_dict_in_generate=True, output_scores=True, max_new_tokens=max_new_tokens, ) s = generation_output.sequences output = tokenizer.batch_decode(s, skip_special_tokens=True) return output ``` Citiation: ``` @misc{xu2023wizardlm, title={WizardLM: Empowering Large Language Models to Follow Complex Instructions}, author={Can Xu and Qingfeng Sun and Kai Zheng and Xiubo Geng and Pu Zhao and Jiazhan Feng and Chongyang Tao and Daxin Jiang}, year={2023}, eprint={2304.12244}, archivePrefix={arXiv}, primaryClass={cs.CL} } ```