def infer_hf(): from transformers import AutoModelForCausalLM, AutoTokenizer from peft import PeftModel from modelscope import snapshot_download model_dir = snapshot_download('Qwen/Qwen2.5-7B-Instruct') adapter_dir = snapshot_download('swift/test_lora') model = AutoModelForCausalLM.from_pretrained( model_dir, torch_dtype='auto', device_map='auto', trust_remote_code=True) model = PeftModel.from_pretrained(model, adapter_dir) tokenizer = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=True) messages = [{ 'role': 'system', 'content': 'You are a helpful assistant.' }, { 'role': 'user', 'content': 'who are you?' }] text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) model_inputs = tokenizer([text], return_tensors='pt', add_special_tokens=False).to(model.device) generated_ids = model.generate(**model_inputs, max_new_tokens=512, do_sample=False) generated_ids = [ output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids) ] response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0] print(f'response: {response}') return response def infer_swift(): from swift.llm import get_model_tokenizer, get_template, InferRequest, RequestConfig, PtEngine from modelscope import snapshot_download from swift.tuners import Swift model_dir = snapshot_download('Qwen/Qwen2.5-7B-Instruct') adapter_dir = snapshot_download('swift/test_lora') model, tokenizer = get_model_tokenizer(model_dir, device_map='auto') model = Swift.from_pretrained(model, adapter_dir) template = get_template(model.model_meta.template, tokenizer) engine = PtEngine.from_model_template(model, template) messages = [{ 'role': 'system', 'content': 'You are a helpful assistant.' }, { 'role': 'user', 'content': 'who are you?' }] request_config = RequestConfig(max_tokens=512, temperature=0) resp_list = engine.infer([InferRequest(messages=messages)], request_config=request_config) response = resp_list[0].choices[0].message.content print(f'response: {response}') return response if __name__ == '__main__': response = infer_hf() response2 = infer_swift() assert response == response2