sparse / ms-swift /tests /deploy /test_dataset.py
Enxin's picture
Upload folder using huggingface_hub
96fe658 verified
def _test_client(port=8000):
import time
import aiohttp
from swift.llm import InferClient, InferRequest, RequestConfig, load_dataset, run_deploy
dataset = load_dataset(['AI-ModelScope/alpaca-gpt4-data-zh#1000'], num_proc=4)
infer_client = InferClient(port=port)
while True:
try:
infer_client.models
break
except Exception:
time.sleep(1)
pass
infer_requests = []
for data in dataset[0]:
infer_requests.append(InferRequest(**data))
request_config = RequestConfig(seed=42, max_tokens=256, temperature=0.8)
resp = infer_client.infer(infer_requests, request_config=request_config, use_tqdm=False)
print(len(resp))
def _test(infer_backend):
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
from swift.llm import DeployArguments
from swift.llm import run_deploy
args = DeployArguments(model='Qwen/Qwen2-7B-Instruct', infer_backend=infer_backend, verbose=False)
with run_deploy(args) as port:
_test_client(port)
def test_vllm():
_test('vllm')
def test_lmdeploy():
_test('lmdeploy')
def test_pt():
_test('pt')
def test_vllm_origin():
import subprocess
import sys
from modelscope import snapshot_download
model_dir = snapshot_download('Qwen/Qwen2-7B-Instruct')
args = [sys.executable, '-m', 'vllm.entrypoints.openai.api_server', '--model', model_dir]
process = subprocess.Popen(args)
_test_client()
process.terminate()
if __name__ == '__main__':
# test_vllm_origin()
# test_vllm()
test_lmdeploy()
# test_pt()