Enxin's picture
Upload folder using huggingface_hub
96fe658 verified
def infer_hf():
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
from modelscope import snapshot_download
model_dir = snapshot_download('Qwen/Qwen2.5-7B-Instruct')
adapter_dir = snapshot_download('swift/test_lora')
model = AutoModelForCausalLM.from_pretrained(
model_dir, torch_dtype='auto', device_map='auto', trust_remote_code=True)
model = PeftModel.from_pretrained(model, adapter_dir)
tokenizer = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=True)
messages = [{
'role': 'system',
'content': 'You are a helpful assistant.'
}, {
'role': 'user',
'content': 'who are you?'
}]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors='pt', add_special_tokens=False).to(model.device)
generated_ids = model.generate(**model_inputs, max_new_tokens=512, do_sample=False)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
print(f'response: {response}')
return response
def infer_swift():
from swift.llm import get_model_tokenizer, get_template, InferRequest, RequestConfig, PtEngine
from modelscope import snapshot_download
from swift.tuners import Swift
model_dir = snapshot_download('Qwen/Qwen2.5-7B-Instruct')
adapter_dir = snapshot_download('swift/test_lora')
model, tokenizer = get_model_tokenizer(model_dir, device_map='auto')
model = Swift.from_pretrained(model, adapter_dir)
template = get_template(model.model_meta.template, tokenizer)
engine = PtEngine.from_model_template(model, template)
messages = [{
'role': 'system',
'content': 'You are a helpful assistant.'
}, {
'role': 'user',
'content': 'who are you?'
}]
request_config = RequestConfig(max_tokens=512, temperature=0)
resp_list = engine.infer([InferRequest(messages=messages)], request_config=request_config)
response = resp_list[0].choices[0].message.content
print(f'response: {response}')
return response
if __name__ == '__main__':
response = infer_hf()
response2 = infer_swift()
assert response == response2