sparse / ms-swift /tests /deploy /test_logprobs.py
Enxin's picture
Upload folder using huggingface_hub
96fe658 verified
def _test_client(port: int, print_logprobs: bool = False, test_vlm: bool = False):
import requests
import time
import aiohttp
from pprint import pprint
from swift.llm import InferClient, InferRequest, RequestConfig
infer_client = InferClient(port=port)
while True:
try:
models = infer_client.models
print(f'models: {models}')
except aiohttp.ClientConnectorError:
time.sleep(5)
continue
break
if test_vlm:
query = '这是什么'
# http://modelscope-open.oss-cn-hangzhou.aliyuncs.com/images/cat.png
messages = [{
'role':
'user',
'content': [
{
'type': 'text',
'text': '这是什么'
},
{
'type': 'image_url',
'image_url': {
'url': 'cat.png'
}
},
]
}]
else:
query = '123*234=?'
messages = [{'role': 'user', 'content': query}]
infer_request = InferRequest(messages=messages)
request_config = RequestConfig(seed=42, max_tokens=256, temperature=0.8, logprobs=True, top_logprobs=5)
resp = infer_client.infer([infer_request], request_config=request_config)[0]
response = resp.choices[0].message.content
print(f'query: {query}')
print(f'response: {response}')
if print_logprobs:
pprint(resp.choices[0].logprobs)
request_config = RequestConfig(
stream=True, seed=42, max_tokens=256, temperature=0.8, top_k=20, top_p=0.8, logprobs=True, top_logprobs=5)
gen_list = infer_client.infer([infer_request], request_config=request_config)
print(f'query: {query}')
print('response: ', end='')
for chunk in gen_list[0]:
if chunk is None:
continue
print(chunk.choices[0].delta.content, end='', flush=True)
if print_logprobs and chunk.choices[0].logprobs is not None:
pprint(chunk.choices[0].logprobs)
print()
def _test(infer_backend, test_vlm: bool = False):
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
from swift.llm import DeployArguments
from swift.llm import deploy_main
import multiprocessing
mp = multiprocessing.get_context('spawn')
model = 'Qwen/Qwen2-VL-7B-Instruct' if test_vlm else 'Qwen/Qwen2-7B-Instruct'
args = DeployArguments(model=model, infer_backend=infer_backend, verbose=False)
process = mp.Process(target=deploy_main, args=(args, ))
process.start()
_test_client(args.port, True, test_vlm)
process.terminate()
def test_vllm_vlm():
_test('vllm', test_vlm=True)
def test_vllm():
_test('vllm')
def test_lmdeploy():
_test('lmdeploy')
def test_pt():
_test('pt')
def test_vllm_origin():
import os
import subprocess
import sys
from modelscope import snapshot_download
model_dir = snapshot_download('Qwen/Qwen2-7B-Instruct')
args = [sys.executable, '-m', 'vllm.entrypoints.openai.api_server', '--model', model_dir]
process = subprocess.Popen(args)
_test_client(8000)
process.terminate()
if __name__ == '__main__':
# test_vllm_origin()
# test_vllm()
test_vllm_vlm()
# test_lmdeploy()
# test_pt()