# 经测试,此版本的效果较好😀 I use the 50k [Chinese data](https://huggingface.co/datasets/Chinese-Vicuna/instruct_chat_50k.jsonl), which is the combination of alpaca_chinese_instruction_dataset and the Chinese conversation data from sharegpt-90k data. I finetune the model for 3 epochs use a single 4090 GPU with cutoff_len=1024. **Use in Python**: from transformers import LlamaForCausalLM, LlamaTokenizer from peft import PeftModel import torch tokenizer = LlamaTokenizer.from_pretrained("decapoda-research/llama-7b-hf") model = LlamaForCausalLM.from_pretrained( "decapoda-research/llama-7b-hf", load_in_8bit=True, torch_dtype=torch.float16, device_map="auto", ) model = PeftModel.from_pretrained( model, "Laurie/lora-instruct-chat-50k-cn-en", torch_dtype=torch.float16, device_map={'': 0} ) device = "cuda" if torch.cuda.is_available() else "cpu" inputs = tokenizer("什么是自然语言处理?",return_tensors="pt" ) model.to(device) with torch.no_grad(): inputs = {k: v.to(device) for k, v in inputs.items()} outputs = model.generate(input_ids=inputs["input_ids"], max_new_tokens=129) print(tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True))