|
def _test_client(port: int, print_logprobs: bool = False, test_vlm: bool = False): |
|
import requests |
|
import time |
|
import aiohttp |
|
from pprint import pprint |
|
from swift.llm import InferClient, InferRequest, RequestConfig |
|
|
|
infer_client = InferClient(port=port) |
|
|
|
while True: |
|
try: |
|
models = infer_client.models |
|
print(f'models: {models}') |
|
except aiohttp.ClientConnectorError: |
|
time.sleep(5) |
|
continue |
|
break |
|
|
|
if test_vlm: |
|
query = '这是什么' |
|
|
|
messages = [{ |
|
'role': |
|
'user', |
|
'content': [ |
|
{ |
|
'type': 'text', |
|
'text': '这是什么' |
|
}, |
|
{ |
|
'type': 'image_url', |
|
'image_url': { |
|
'url': 'cat.png' |
|
} |
|
}, |
|
] |
|
}] |
|
else: |
|
query = '123*234=?' |
|
messages = [{'role': 'user', 'content': query}] |
|
|
|
infer_request = InferRequest(messages=messages) |
|
request_config = RequestConfig(seed=42, max_tokens=256, temperature=0.8, logprobs=True, top_logprobs=5) |
|
|
|
resp = infer_client.infer([infer_request], request_config=request_config)[0] |
|
response = resp.choices[0].message.content |
|
print(f'query: {query}') |
|
print(f'response: {response}') |
|
if print_logprobs: |
|
pprint(resp.choices[0].logprobs) |
|
|
|
request_config = RequestConfig( |
|
stream=True, seed=42, max_tokens=256, temperature=0.8, top_k=20, top_p=0.8, logprobs=True, top_logprobs=5) |
|
gen_list = infer_client.infer([infer_request], request_config=request_config) |
|
print(f'query: {query}') |
|
print('response: ', end='') |
|
for chunk in gen_list[0]: |
|
if chunk is None: |
|
continue |
|
print(chunk.choices[0].delta.content, end='', flush=True) |
|
if print_logprobs and chunk.choices[0].logprobs is not None: |
|
pprint(chunk.choices[0].logprobs) |
|
print() |
|
|
|
|
|
def _test(infer_backend, test_vlm: bool = False): |
|
import os |
|
os.environ['CUDA_VISIBLE_DEVICES'] = '0' |
|
|
|
from swift.llm import DeployArguments |
|
from swift.llm import deploy_main |
|
import multiprocessing |
|
mp = multiprocessing.get_context('spawn') |
|
model = 'Qwen/Qwen2-VL-7B-Instruct' if test_vlm else 'Qwen/Qwen2-7B-Instruct' |
|
args = DeployArguments(model=model, infer_backend=infer_backend, verbose=False) |
|
process = mp.Process(target=deploy_main, args=(args, )) |
|
process.start() |
|
_test_client(args.port, True, test_vlm) |
|
process.terminate() |
|
|
|
|
|
def test_vllm_vlm(): |
|
_test('vllm', test_vlm=True) |
|
|
|
|
|
def test_vllm(): |
|
_test('vllm') |
|
|
|
|
|
def test_lmdeploy(): |
|
_test('lmdeploy') |
|
|
|
|
|
def test_pt(): |
|
_test('pt') |
|
|
|
|
|
def test_vllm_origin(): |
|
import os |
|
import subprocess |
|
import sys |
|
from modelscope import snapshot_download |
|
model_dir = snapshot_download('Qwen/Qwen2-7B-Instruct') |
|
args = [sys.executable, '-m', 'vllm.entrypoints.openai.api_server', '--model', model_dir] |
|
process = subprocess.Popen(args) |
|
_test_client(8000) |
|
process.terminate() |
|
|
|
|
|
if __name__ == '__main__': |
|
|
|
|
|
test_vllm_vlm() |
|
|
|
|
|
|