|
import os |
|
|
|
os.environ['CUDA_VISIBLE_DEVICES'] = '0,1,2,3' |
|
|
|
|
|
def _infer_model(pt_engine, system=None, messages=None, audios=None): |
|
seed_everything(42) |
|
request_config = RequestConfig(max_tokens=128, temperature=0) |
|
if messages is None: |
|
messages = [] |
|
if system is not None: |
|
messages += [{'role': 'system', 'content': system}] |
|
messages += [{'role': 'user', 'content': '你好'}] |
|
resp = pt_engine.infer([{'messages': messages}], request_config=request_config) |
|
response = resp[0].choices[0].message.content |
|
messages += [{'role': 'assistant', 'content': response}] |
|
messages += [{'role': 'user', 'content': '<audio>这段语音说了什么'}] |
|
else: |
|
messages = messages.copy() |
|
if audios is None: |
|
audios = ['http://modelscope-open.oss-cn-hangzhou.aliyuncs.com/images/weather.wav'] |
|
resp = pt_engine.infer([{'messages': messages, 'audios': audios}], request_config=request_config) |
|
response = resp[0].choices[0].message.content |
|
messages += [{'role': 'assistant', 'content': response}] |
|
logger.info(f'model: {pt_engine.model_info.model_name}, messages: {messages}') |
|
return response |
|
|
|
|
|
def test_qwen_audio(): |
|
pt_engine = PtEngine('Qwen/Qwen-Audio-Chat') |
|
_infer_model(pt_engine) |
|
|
|
|
|
def test_qwen2_audio(): |
|
|
|
pt_engine = PtEngine('Qwen/Qwen2-Audio-7B-Instruct') |
|
messages = [{'role': 'user', 'content': '<audio>'}] |
|
audios = ['https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen2-Audio/audio/guess_age_gender.wav'] |
|
response = _infer_model(pt_engine, messages=messages, audios=audios) |
|
pt_engine.default_template.template_backend = 'jinja' |
|
response2 = _infer_model(pt_engine, messages=messages, audios=audios) |
|
assert response == response2 == 'Yes, the speaker is female and in her twenties.' |
|
|
|
|
|
def test_xcomposer2d5_ol(): |
|
pt_engine = PtEngine('Shanghai_AI_Laboratory/internlm-xcomposer2d5-ol-7b:audio') |
|
_infer_model(pt_engine) |
|
pt_engine.default_template.template_backend = 'jinja' |
|
_infer_model(pt_engine) |
|
|
|
|
|
def test_step_audio_chat(): |
|
pt_engine = PtEngine('stepfun-ai/Step-Audio-Chat') |
|
response = _infer_model(pt_engine, messages=[{'role': 'user', 'content': '<audio>'}]) |
|
assert response == ('是的呢,今天天气晴朗,阳光明媚,微风和煦,非常适合外出活动。天空湛蓝,白云朵朵,让人心情愉悦。希望你能好好享受这美好的一天!') |
|
|
|
|
|
def test_qwen2_5_omni(): |
|
USE_AUDIO_IN_VIDEO = True |
|
os.environ['USE_AUDIO_IN_VIDEO'] = str(USE_AUDIO_IN_VIDEO) |
|
pt_engine = PtEngine('Qwen/Qwen2.5-Omni-7B') |
|
response = _infer_model(pt_engine) |
|
pt_engine.default_template.template_backend = 'jinja' |
|
response2 = _infer_model(pt_engine) |
|
assert response == response2 |
|
|
|
|
|
def test_gemma3n(): |
|
pt_engine = PtEngine('google/gemma-3n-E4B-it') |
|
messages = [{'role': 'user', 'content': '<audio>Transcribe this audio and complete the statement'}] |
|
audios = ['https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen2-Audio/audio/guess_age_gender.wav'] |
|
response = _infer_model(pt_engine, messages=messages, audios=audios) |
|
pt_engine.default_template.template_backend = 'jinja' |
|
response2 = _infer_model(pt_engine, messages=messages, audios=audios) |
|
assert response == response2 |
|
|
|
|
|
if __name__ == '__main__': |
|
from swift.llm import PtEngine, RequestConfig |
|
from swift.utils import get_logger, seed_everything |
|
logger = get_logger() |
|
|
|
|
|
|
|
|
|
|
|
test_gemma3n() |
|
|