File size: 1,065 Bytes
96fe658 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 |
from swift.llm import InferClient, InferRequest, RequestConfig
def infer_multilora(engine: InferClient, infer_request: InferRequest):
# Dynamic LoRA
models = engine.models
print(f'models: {models}')
request_config = RequestConfig(max_tokens=512, temperature=0)
# use lora1
resp_list = engine.infer([infer_request], request_config, model=models[1])
response = resp_list[0].choices[0].message.content
print(f'lora1-response: {response}')
# origin model
resp_list = engine.infer([infer_request], request_config, model=models[0])
response = resp_list[0].choices[0].message.content
print(f'response: {response}')
# use lora2
resp_list = engine.infer([infer_request], request_config, model=models[2])
response = resp_list[0].choices[0].message.content
print(f'lora2-response: {response}')
if __name__ == '__main__':
engine = InferClient(host='127.0.0.1', port=8000)
infer_request = InferRequest(messages=[{'role': 'user', 'content': 'who are you?'}])
infer_multilora(engine, infer_request)
|