from typing import List | |
from swift.llm import InferClient, InferRequest | |
def infer_batch(engine: InferClient, infer_requests: List[InferRequest]): | |
resp_list = engine.infer(infer_requests) | |
query0 = infer_requests[0].messages[0]['content'] | |
query1 = infer_requests[1].messages[0]['content'] | |
print(f'query0: {query0}') | |
print(f'response0: {resp_list[0].choices[0].message.content}') | |
print(f'query1: {query1}') | |
print(f'response1: {resp_list[1].choices[0].message.content}') | |
if __name__ == '__main__': | |
engine = InferClient(host='127.0.0.1', port=8000) | |
models = engine.models | |
print(f'models: {models}') | |
infer_batch(engine, [ | |
InferRequest(messages=[{ | |
'role': 'user', | |
'content': '今天天气真好呀' | |
}]), | |
InferRequest(messages=[{ | |
'role': 'user', | |
'content': '真倒霉' | |
}]) | |
]) | |