|
import os |
|
|
|
import json |
|
import torch |
|
|
|
os.environ['CUDA_VISIBLE_DEVICES'] = '0,1,2,3' |
|
os.environ['SWIFT_DEBUG'] = '1' |
|
|
|
|
|
def _infer_model(pt_engine, system=None, messages=None): |
|
seed_everything(42) |
|
request_config = RequestConfig(max_tokens=128, temperature=0) |
|
if messages is None: |
|
messages = [] |
|
if system is not None: |
|
messages += [{'role': 'system', 'content': system}] |
|
messages += [{'role': 'user', 'content': '你好'}] |
|
resp = pt_engine.infer([{'messages': messages}], request_config=request_config) |
|
response = resp[0].choices[0].message.content |
|
messages += [{'role': 'assistant', 'content': response}, {'role': 'user', 'content': '<image>这是什么'}] |
|
else: |
|
messages = messages.copy() |
|
resp = pt_engine.infer([{ |
|
'messages': messages, |
|
}], request_config=request_config) |
|
response = resp[0].choices[0].message.content |
|
messages += [{'role': 'assistant', 'content': response}] |
|
logger.info(f'model: {pt_engine.model_info.model_name}, messages: {messages}') |
|
return response |
|
|
|
|
|
def test_baichuan_m1(): |
|
pt_engine = PtEngine('baichuan-inc/Baichuan-M1-14B-Instruct') |
|
messages = [{'role': 'user', 'content': '你是谁'}] |
|
response = _infer_model(pt_engine, messages=messages) |
|
assert response == '我是一个人工智能助手,可以回答你的问题并提供帮助。' |
|
|
|
|
|
def test_qwen2_5(): |
|
pt_engine = PtEngine('Qwen/Qwen2.5-7B-Instruct-1M') |
|
response = _infer_model(pt_engine) |
|
pt_engine.default_template.template_backend = 'jinja' |
|
response2 = _infer_model(pt_engine) |
|
assert response == response2 |
|
|
|
|
|
def test_qwen3(): |
|
pt_engine = PtEngine('Qwen/Qwen3-4B') |
|
response = _infer_model(pt_engine) |
|
pt_engine.default_template.template_backend = 'jinja' |
|
response2 = _infer_model(pt_engine) |
|
assert response == response2 |
|
|
|
|
|
def test_phi4(): |
|
pt_engine = PtEngine('LLM-Research/phi-4') |
|
response = _infer_model(pt_engine) |
|
pt_engine.default_template.template_backend = 'jinja' |
|
response2 = _infer_model(pt_engine) |
|
assert response == response2 |
|
|
|
|
|
def test_phi4_mini(): |
|
pt_engine = PtEngine('LLM-Research/Phi-4-mini-instruct') |
|
response = _infer_model(pt_engine) |
|
pt_engine.default_template.template_backend = 'jinja' |
|
response2 = _infer_model(pt_engine) |
|
assert response == response2 |
|
|
|
|
|
def test_qwen1half(): |
|
pt_engine = PtEngine('Qwen/Qwen1.5-0.5B-Chat-GPTQ-Int4') |
|
_infer_model(pt_engine) |
|
pt_engine.default_template.template_backend = 'jinja' |
|
_infer_model(pt_engine) |
|
|
|
|
|
def test_glm4(): |
|
pt_engine = PtEngine('ZhipuAI/glm-4-9b-chat') |
|
response = _infer_model(pt_engine) |
|
pt_engine.default_template.template_backend = 'jinja' |
|
response2 = _infer_model(pt_engine) |
|
assert response == response2 |
|
|
|
|
|
def test_glm4_0414(): |
|
models = ['ZhipuAI/GLM-4-9B-0414', 'ZhipuAI/GLM-Z1-9B-0414', 'ZhipuAI/GLM-Z1-Rumination-32B-0414'] |
|
for model in models: |
|
pt_engine = PtEngine(model) |
|
response = _infer_model(pt_engine) |
|
pt_engine.default_template.template_backend = 'jinja' |
|
response2 = _infer_model(pt_engine) |
|
assert response == response2 |
|
|
|
|
|
def test_qwq(): |
|
pt_engine = PtEngine('Qwen/QwQ-32B-Preview') |
|
response = _infer_model(pt_engine) |
|
pt_engine.default_template.template_backend = 'jinja' |
|
response2 = _infer_model(pt_engine) |
|
assert response == response2 |
|
|
|
|
|
def test_internlm(): |
|
pt_engine = PtEngine('Shanghai_AI_Laboratory/internlm-chat-7b') |
|
_infer_model(pt_engine) |
|
|
|
|
|
def test_internlm2(): |
|
pt_engine = PtEngine('Shanghai_AI_Laboratory/internlm2_5-1_8b-chat') |
|
_infer_model(pt_engine) |
|
pt_engine.default_template.template_backend = 'jinja' |
|
_infer_model(pt_engine) |
|
|
|
|
|
def test_internlm3(): |
|
pt_engine = PtEngine('Shanghai_AI_Laboratory/internlm3-8b-instruct') |
|
response = _infer_model(pt_engine, system='') |
|
pt_engine.default_template.template_backend = 'jinja' |
|
response2 = _infer_model(pt_engine) |
|
assert response == response2 |
|
|
|
|
|
def test_yi_coder(): |
|
pt_engine = PtEngine('01ai/Yi-Coder-1.5B-Chat') |
|
_infer_model(pt_engine) |
|
pt_engine.default_template.template_backend = 'jinja' |
|
_infer_model(pt_engine) |
|
|
|
|
|
def test_yi(): |
|
pt_engine = PtEngine('01ai/Yi-6B-Chat') |
|
_infer_model(pt_engine) |
|
pt_engine.default_template.template_backend = 'jinja' |
|
_infer_model(pt_engine) |
|
|
|
|
|
def test_deepseek_moe(): |
|
pt_engine = PtEngine('deepseek-ai/deepseek-moe-16b-chat') |
|
_infer_model(pt_engine) |
|
|
|
|
|
def test_codegeex4(): |
|
|
|
pt_engine = PtEngine('ZhipuAI/codegeex4-all-9b') |
|
_infer_model(pt_engine) |
|
pt_engine.default_template.template_backend = 'jinja' |
|
_infer_model(pt_engine) |
|
|
|
|
|
def test_telechat(): |
|
pt_engine = PtEngine('TeleAI/TeleChat-12B', torch_dtype=torch.float16) |
|
messages = [{'role': 'user', 'content': '你是谁'}] |
|
response = _infer_model(pt_engine, messages=messages) |
|
assert response == ('我是中国电信星辰语义大模型,英文名TeleChat,是由中国电信自主研发的生成式大语言模型。\n\n' |
|
'我基于Transformer-decoder结构,学习了海量知识,包括百科、书籍、论坛、党政媒体、GitHub代码、专业领域知识等,' |
|
'具备自然语言处理、语义理解、内容创作和逻辑推理等能力,可以与人类进行对话互动和情感交流,还能提供知识问答、创作写作、' |
|
'代码生成等服务,希望能为人类带来更加智能、高效和便捷的工作与生活体验。') |
|
|
|
|
|
def test_telechat2(): |
|
pt_engine = PtEngine('TeleAI/TeleChat2-7B-32K', torch_dtype=torch.float16) |
|
messages = [{'role': 'system', 'content': '你是一个乐于助人的智能助手,请使用用户提问的语言进行有帮助的问答'}, {'role': 'user', 'content': '你好'}] |
|
response = _infer_model(pt_engine, messages=messages) |
|
pt_engine.default_template.template_backend = 'jinja' |
|
response2 = _infer_model(pt_engine, messages=messages) |
|
assert response == response2 |
|
|
|
|
|
def test_glm_edge(): |
|
pt_engine = PtEngine('ZhipuAI/glm-edge-1.5b-chat') |
|
_infer_model(pt_engine) |
|
pt_engine.default_template.template_backend = 'jinja' |
|
_infer_model(pt_engine) |
|
|
|
|
|
def test_llama(): |
|
from swift.llm import VllmEngine |
|
|
|
|
|
|
|
pt_engine = VllmEngine('LLM-Research/Llama-3.2-1B-Instruct') |
|
|
|
|
|
|
|
res = _infer_model(pt_engine, system='') |
|
pt_engine.default_template.template_backend = 'jinja' |
|
res2 = _infer_model(pt_engine, system='') |
|
assert res == res2, f'res: {res}, res2: {res2}' |
|
|
|
|
|
def test_openbuddy(): |
|
|
|
pt_engine = PtEngine('OpenBuddy/openbuddy-nemotron-70b-v23.2-131k') |
|
|
|
res = _infer_model(pt_engine, system='') |
|
pt_engine.default_template.template_backend = 'jinja' |
|
res2 = _infer_model(pt_engine) |
|
assert res == res2, f'res: {res}, res2: {res2}' |
|
|
|
|
|
def test_megrez(): |
|
pt_engine = PtEngine('InfiniAI/Megrez-3b-Instruct') |
|
res = _infer_model(pt_engine) |
|
pt_engine.default_template.template_backend = 'jinja' |
|
res2 = _infer_model(pt_engine) |
|
assert res == res2, f'res: {res}, res2: {res2}' |
|
|
|
|
|
def test_skywork_o1(): |
|
pt_engine = PtEngine('AI-ModelScope/Skywork-o1-Open-Llama-3.1-8B') |
|
res = _infer_model( |
|
pt_engine, |
|
messages=[{ |
|
'role': |
|
'user', |
|
'content': |
|
('Jane has 12 apples. She gives 4 apples to her friend Mark, then buys 1 more apple, and finally splits ' |
|
'all her apples equally among herself and her 2 siblings. How many apples does each person get?') |
|
}]) |
|
assert res == ("To solve the problem, let's break it down into a series of logical steps:\n\n1. **Initial Number " |
|
'of Apples**: Jane starts with 12 apples.\n2. **Apples Given Away**: Jane gives 4 apples to her ' |
|
'friend Mark. So, the number of apples she has now is:\n \\[\n 12 - 4 = 8\n \\]\n3. **Apples ' |
|
'Bought**: Jane then buys 1 more apple. So, the number of apples she has now is:\n \\[\n ' |
|
'8 + 1 = 9\n \\]\n4. **Apples Split Equally') |
|
|
|
|
|
def test_internlm2_reward(): |
|
pt_engine = PtEngine('Shanghai_AI_Laboratory/internlm2-1_8b-reward') |
|
messages = [{ |
|
'role': 'user', |
|
'content': "Hello! What's your name?" |
|
}, { |
|
'role': 'assistant', |
|
'content': 'My name is InternLM2! A helpful AI assistant. What can I do for you?' |
|
}] |
|
res = _infer_model(pt_engine, messages=messages) |
|
pt_engine.default_template.template_backend = 'jinja' |
|
res2 = _infer_model(pt_engine, messages=messages) |
|
assert res == res2 == '0.48681640625' |
|
|
|
|
|
def test_qwen2_reward(): |
|
pt_engine = PtEngine('Qwen/Qwen2-Math-RM-72B') |
|
messages = [{ |
|
'role': |
|
'user', |
|
'content': ('Suppose that a certain software product has a mean time between failures of 10,000 hours ' |
|
'and has a mean time to repair of 20 hours. If the product is used by 100 customers, ' |
|
'what is its availability?\nAnswer Choices: (A) 80% (B) 90% (C) 98% (D) 99.80%\nPlease ' |
|
'reason step by step, and put your final answer within \\boxed{}.') |
|
}, { |
|
'role': |
|
'assistant', |
|
'content': ("To find the availability of the software product, we'll use the formula:\n\n\\[ \\text{ " |
|
'availability} = \\frac{\\text{Mean Time Between Failures (MTBF)}}{\\text{Mean Time Between ' |
|
'Failures (MTBF) + Mean Time To Repair (MTTR)}} \\]\n\nGiven:\n- MTBF = 10,000 hours\n- MTTR ' |
|
"= 20 hours\n\nLet's plug these values into the formula:\n\n\\[ \\text{availability} = " |
|
'\\frac{10,000}{10,000 + 20} = \\frac{10,000}{10,020} \\]\n\nTo simplify this fraction, ' |
|
'we can divide both the numerator and the denominator by 10,000:\n\n\\[ \\text{availability} =' |
|
' \\frac{10,000 \\div 10,000}{10,020 \\div 10,000} = \\frac{1}{1.002} \\]\n\nTo express this as' |
|
' a percentage, we can calculate the decimal value of the fraction and then multiply by ' |
|
'100:\n\n\\[ \\text{availability} \\approx 0.998002 \\times 100 = 99.80\\% \\]\n\nTherefore, ' |
|
'the availability of the software product is approximately 99.80%.\n\nThe correct answer is ' |
|
'\\boxed{D}') |
|
}] |
|
res = _infer_model(pt_engine, messages=messages) |
|
pt_engine.default_template.template_backend = 'jinja' |
|
res2 = _infer_model(pt_engine, messages=messages) |
|
assert res == '1.84375' and res2 == '1.390625' |
|
|
|
|
|
def test_qwen2_5_math(): |
|
pt_engine = PtEngine('Qwen/Qwen2.5-Math-1.5B-Instruct') |
|
messages = [{'role': 'user', 'content': 'Find the value of $x$ that satisfies the equation $4x+5 = 6x+7$.'}] |
|
res = _infer_model(pt_engine, messages=messages) |
|
pt_engine.default_template.template_backend = 'jinja' |
|
res2 = _infer_model(pt_engine, messages=messages) |
|
assert res == res2 |
|
|
|
|
|
def test_skywork_reward(): |
|
prompt = ('Jane has 12 apples. She gives 4 apples to her friend Mark, then buys 1 more apple, and finally splits ' |
|
'all her apples equally among herself and her 2 siblings. How many apples does each person get?') |
|
response = ('1. Jane starts with 12 apples and gives 4 to Mark. 12 - 4 = 8. Jane now has 8 apples.\n2. Jane buys ' |
|
'1 more apple. 8 + 1 = 9. Jane now has 9 apples.\n3. Jane splits the 9 apples equally among herself ' |
|
'and her 2 siblings (3 people in total). 9 ÷ 3 = 3 apples each. Each person gets 3 apples.') |
|
|
|
pt_engine = PtEngine('AI-ModelScope/Skywork-Reward-Llama-3.1-8B-v0.2') |
|
messages = [{'role': 'user', 'content': prompt}, {'role': 'assistant', 'content': response}] |
|
res = _infer_model(pt_engine, messages=messages) |
|
pt_engine.default_template.template_backend = 'jinja' |
|
res2 = _infer_model(pt_engine, messages=messages) |
|
assert res == '14.25' |
|
assert res2 == '13.8125' |
|
|
|
|
|
def test_deepseek_r1_distill(): |
|
pt_engine = PtEngine('deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B') |
|
res = _infer_model(pt_engine) |
|
pt_engine.default_template.template_backend = 'jinja' |
|
res2 = _infer_model(pt_engine) |
|
assert res == res2, f'res: {res}, res2: {res2}' |
|
|
|
|
|
def test_deepseek_prover_v2(): |
|
pt_engine = PtEngine('deepseek-ai/DeepSeek-Prover-V2-7B') |
|
res = _infer_model(pt_engine) |
|
pt_engine.default_template.template_backend = 'jinja' |
|
res2 = _infer_model(pt_engine) |
|
assert res == res2, f'res: {res}, res2: {res2}' |
|
|
|
|
|
def test_qwen2_5_prm(): |
|
pt_engine = PtEngine('Qwen/Qwen2.5-Math-7B-PRM800K') |
|
data = { |
|
'system': |
|
'Please reason step by step, and put your final answer within \\boxed{}.', |
|
'query': ('Sue lives in a fun neighborhood. One weekend, the neighbors decided to play a prank on Sue. ' |
|
"On Friday morning, the neighbors placed 18 pink plastic flamingos out on Sue's front yard. " |
|
'On Saturday morning, the neighbors took back one third of the flamingos, painted them white, and ' |
|
"put these newly painted white flamingos back out on Sue's front yard. Then, on Sunday morning, " |
|
'they added another 18 pink plastic flamingos to the collection. At noon on Sunday, how many more ' |
|
'pink plastic flamingos were out than white plastic flamingos?'), |
|
'response': |
|
[('To find out how many more pink plastic flamingos were out than white plastic flamingos at noon on Sunday, ' |
|
'we can break down the problem into steps. First, on Friday, the neighbors start with 18 pink ' |
|
'plastic flamingos.'), |
|
('On Saturday, they take back one third of the flamingos. Since there were 18 flamingos, (1/3 \\times 18 = 6) ' |
|
'flamingos are taken back. So, they have (18 - 6 = 12) flamingos left in their possession. Then, they paint ' |
|
"these 6 flamingos white and put them back out on Sue's front yard. Now, Sue has the original 12 pink " |
|
'flamingos plus the 6 new white ones. Thus, by the end of Saturday, Sue has (12 + 6 = 18) pink flamingos ' |
|
'and 6 white flamingos.'), |
|
("On Sunday, the neighbors add another 18 pink plastic flamingos to Sue's front yard. By the end of Sunday " |
|
'morning, Sue has (18 + 18 = 36) pink flamingos and still 6 white flamingos.'), |
|
('To find the difference, subtract the number of white flamingos from the number of pink ' |
|
'flamingos: (36 - 6 = 30). Therefore, at noon on Sunday, there were 30 more pink plastic flamingos out ' |
|
'than white plastic flamingos. The answer is (\\boxed{30}).')] |
|
} |
|
|
|
messages = [ |
|
{ |
|
'role': 'system', |
|
'content': data['system'] |
|
}, |
|
{ |
|
'role': 'user', |
|
'content': data['query'] |
|
}, |
|
{ |
|
'role': 'assistant', |
|
'content': '<extra_0>'.join(data['response']) + '<extra_0>' |
|
}, |
|
] |
|
res = _infer_model(pt_engine, messages=messages) |
|
pt_engine.default_template.template_backend = 'jinja' |
|
res2 = _infer_model(pt_engine, messages=messages) |
|
assert res == res2 == json.dumps([0.9921875, 0.2490234375, 0.70703125, 0.9375]), f'res: {res}, res2: {res2}' |
|
|
|
|
|
def test_mistral_small(): |
|
pt_engine = PtEngine('mistralai/Mistral-Small-24B-Instruct-2501') |
|
response = _infer_model(pt_engine) |
|
pt_engine.default_template.template_backend = 'jinja' |
|
response2 = _infer_model(pt_engine) |
|
assert response == response2 |
|
|
|
|
|
def test_moonlight(): |
|
pt_engine = PtEngine('moonshotai/Moonlight-16B-A3B-Instruct') |
|
res = _infer_model(pt_engine) |
|
pt_engine.default_template.template_backend = 'jinja' |
|
res2 = _infer_model(pt_engine) |
|
assert res == res2, f'res: {res}, res2: {res2}' |
|
|
|
|
|
def test_ling(): |
|
pt_engine = PtEngine('inclusionAI/Ling-lite') |
|
res = _infer_model(pt_engine) |
|
pt_engine.default_template.template_backend = 'jinja' |
|
res2 = _infer_model(pt_engine) |
|
assert res == res2, f'res: {res}, res2: {res2}' |
|
|
|
|
|
def test_gemma3(): |
|
pt_engine = PtEngine('LLM-Research/gemma-3-1b-it') |
|
res = _infer_model(pt_engine, system='You are a helpful assistant') |
|
pt_engine.default_template.template_backend = 'jinja' |
|
res2 = _infer_model(pt_engine, system='You are a helpful assistant') |
|
assert res == res2, f'res: {res}, res2: {res2}' |
|
|
|
|
|
def test_mimo(): |
|
pt_engine = PtEngine('XiaomiMiMo/MiMo-7B-RL-0530') |
|
res = _infer_model(pt_engine) |
|
pt_engine.default_template.template_backend = 'jinja' |
|
res2 = _infer_model(pt_engine) |
|
assert res == res2, f'res: {res}, res2: {res2}' |
|
|
|
|
|
def test_minicpm(): |
|
pt_engine = PtEngine('OpenBMB/MiniCPM4-0.5B') |
|
res = _infer_model(pt_engine) |
|
pt_engine.default_template.template_backend = 'jinja' |
|
res2 = _infer_model(pt_engine) |
|
assert res == res2, f'res: {res}, res2: {res2}' |
|
|
|
|
|
def test_minimax(): |
|
os.environ['CUDA_VISIBLE_DEVICES'] = '0,1,2,3,4,5,6,7' |
|
from transformers import QuantoConfig |
|
quantization_config = QuantoConfig(weights='int8') |
|
messages = [{ |
|
'role': 'system', |
|
'content': 'You are a helpful assistant.' |
|
}, { |
|
'role': 'user', |
|
'content': 'who are you?' |
|
}] |
|
pt_engine = PtEngine('MiniMax/MiniMax-M1-40k', quantization_config=quantization_config) |
|
res = _infer_model(pt_engine, messages=messages) |
|
print(f'res: {res}') |
|
|
|
|
|
def test_kimi_dev(): |
|
pt_engine = PtEngine('moonshotai/Kimi-Dev-72B') |
|
res = _infer_model(pt_engine) |
|
pt_engine.default_template.template_backend = 'jinja' |
|
res2 = _infer_model(pt_engine) |
|
assert res == res2, f'res: {res}, res2: {res2}' |
|
|
|
|
|
def test_hunyuan(): |
|
pt_engine = PtEngine('Tencent-Hunyuan/Hunyuan-A13B-Instruct') |
|
res = _infer_model(pt_engine) |
|
pt_engine.default_template.template_backend = 'jinja' |
|
res2 = _infer_model(pt_engine) |
|
assert res == res2, f'res: {res}, res2: {res2}' |
|
|
|
|
|
def test_ernie(): |
|
pt_engine = PtEngine('PaddlePaddle/ERNIE-4.5-0.3B-PT') |
|
res = _infer_model(pt_engine) |
|
pt_engine.default_template.template_backend = 'jinja' |
|
res2 = _infer_model(pt_engine) |
|
assert res == res2, f'res: {res}, res2: {res2}' |
|
|
|
|
|
def test_devstral(): |
|
from swift.llm.template.template.mistral import devstral_small_2505_system |
|
|
|
pt_engine = PtEngine('mistralai/Devstral-Small-2505') |
|
res = _infer_model(pt_engine, system=devstral_small_2505_system) |
|
|
|
pt_engine.default_template.template_backend = 'jinja' |
|
|
|
chat_template = ( |
|
'{%- set today = strftime_now("%Y-%m-%d") %}\n' |
|
'{%- set default_system_message = "You are Mistral Small 3, a Large Language Model (LLM) ' |
|
'created by Mistral AI, a French startup headquartered in Paris.\\nYour knowledge base was ' |
|
'last updated on 2023-10-01. The current date is " + today + ".\\n\\nWhen you\'re not sure ' |
|
'about some information, you say that you don\'t have the information and don\'t make up ' |
|
'anything.\\nIf the user\'s question is not clear, ambiguous, or does not provide enough ' |
|
'context for you to accurately answer the question, you do not try to answer it right away ' |
|
'and you rather ask the user to clarify their request (e.g. \\"What are some good restaurants ' |
|
'around me?\\" => \\"Where are you?\\" or \\"When is the next flight to Tokyo\\" => ' |
|
'\\"Where do you travel from?\\")" %}\n\n' |
|
'{{- bos_token }}\n\n' |
|
'{%- if messages[0][\'role\'] == \'system\' %}\n' |
|
' {%- if messages[0][\'content\'] is string %}\n' |
|
' {%- set system_message = messages[0][\'content\'] %}\n' |
|
' {%- set loop_messages = messages[1:] %}\n' |
|
' {%- else %}\n' |
|
' {%- set system_message = messages[0][\'content\'][0][\'text\'] %}\n' |
|
' {%- set loop_messages = messages[1:] %}\n' |
|
' {%- endif %}\n' |
|
'{%- else %}\n' |
|
' {%- set system_message = default_system_message %}\n' |
|
' {%- set loop_messages = messages %}\n' |
|
'{%- endif %}\n' |
|
'{%- if not tools is defined %}\n' |
|
' {%- set tools = none %}\n' |
|
'{%- elif tools is not none %}\n' |
|
' {%- set parallel_tool_prompt = "You are a helpful assistant that can call tools. ' |
|
'If you call one or more tools, format them in a single JSON array or objects, where each ' |
|
'object is a tool call, not as separate objects outside of an array or multiple arrays. ' |
|
'Use the format [{\\"name\\": tool call name, \\"arguments\\": tool call arguments}, ' |
|
'additional tool calls] if you call more than one tool. If you call tools, do not attempt ' |
|
'to interpret them or otherwise provide a response until you receive a tool call result ' |
|
'that you can interpret for the user." %}\n' |
|
' {%- if system_message is defined %}\n' |
|
' {%- set system_message = parallel_tool_prompt + "\\n\\n" + system_message %}\n' |
|
' {%- else %}\n' |
|
' {%- set system_message = parallel_tool_prompt %}\n' |
|
' {%- endif %}\n' |
|
'{%- endif %}\n' |
|
'{{- \'[SYSTEM_PROMPT]\' + system_message + \'[/SYSTEM_PROMPT]\' }}\n\n' |
|
'{%- set user_messages = loop_messages | selectattr("role", "equalto", "user") | list %}\n\n' |
|
'{%- set filtered_messages = [] %}\n' |
|
'{%- for message in loop_messages %}\n' |
|
' {%- if message["role"] not in ["tool", "tool_results"] and not message.get("tool_calls") %}\n' |
|
' {%- set filtered_messages = filtered_messages + [message] %}\n' |
|
' {%- endif %}\n' |
|
'{%- endfor %}\n\n' |
|
'{%- for message in filtered_messages %}\n' |
|
' {%- if (message["role"] == "user") != (loop.index0 % 2 == 0) %}\n' |
|
' {{- raise_exception("After the optional system message, conversation roles must ' |
|
'alternate user/assistant/user/assistant/...") }}\n' |
|
' {%- endif %}\n' |
|
'{%- endfor %}\n\n' |
|
'{%- for message in loop_messages %}\n' |
|
' {%- if message["role"] == "user" %}\n' |
|
' {%- if tools is not none and (message == user_messages[-1]) %}\n' |
|
' {{- "[AVAILABLE_TOOLS] [" }}\n' |
|
' {%- for tool in tools %}\n' |
|
' {%- set tool = tool.function %}\n' |
|
' {{- \'{"type": "function", "function": {\' }}\n' |
|
' {%- for key, val in tool.items() if key != "return" %}\n' |
|
' {%- if val is string %}\n' |
|
' {{- \'"\' + key + \'": "\' + val + \'"\' }}\n' |
|
' {%- else %}\n' |
|
' {{- \'"\' + key + \'": \' + val|tojson }}\n' |
|
' {%- endif %}\n' |
|
' {%- if not loop.last %}\n' |
|
' {{- ", " }}\n' |
|
' {%- endif %}\n' |
|
' {%- endfor %}\n' |
|
' {{- "}}" }}\n' |
|
' {%- if not loop.last %}\n' |
|
' {{- ", " }}\n' |
|
' {%- else %}\n' |
|
' {{- "]" }}\n' |
|
' {%- endif %}\n' |
|
' {%- endfor %}\n' |
|
' {{- "[/AVAILABLE_TOOLS]" }}\n' |
|
' {%- endif %}\n' |
|
' {%- if message[\'content\'] is string %}\n' |
|
' {{- \'[INST]\' + message[\'content\'] + \'[/INST]\' }}\n' |
|
' {%- else %}\n' |
|
' {{- \'[INST]\' }}\n' |
|
' {%- for block in message[\'content\'] %}\n' |
|
' {%- if block[\'type\'] == \'text\' %}\n' |
|
' {{- block[\'text\'] }}\n' |
|
' {%- elif block[\'type\'] == \'image\' or block[\'type\'] == \'image_url\' %}\n' |
|
' {{- \'[IMG]\' }}\n' |
|
' {%- else %}\n' |
|
' {{- raise_exception(\'Only text and image blocks are supported ' |
|
'in message content!\') }}\n' |
|
' {%- endif %}\n' |
|
' {%- endfor %}\n' |
|
' {{- \'[/INST]\' }}\n' |
|
' {%- endif %}\n' |
|
' {%- elif message["role"] == "tool_calls" or message.tool_calls is defined %}\n' |
|
' {%- if message.tool_calls is defined %}\n' |
|
' {%- set tool_calls = message.tool_calls %}\n' |
|
' {%- else %}\n' |
|
' {%- set tool_calls = message.content %}\n' |
|
' {%- endif %}\n' |
|
' {{- "[TOOL_CALLS] [" }}\n' |
|
' {%- for tool_call in tool_calls %}\n' |
|
' {%- set out = tool_call.function|tojson %}\n' |
|
' {{- out[:-1] }}\n' |
|
' {%- if not tool_call.id is defined or tool_call.id|length < 9 %}\n' |
|
' {{- raise_exception("Tool call IDs should be alphanumeric strings with ' |
|
'length >= 9! (1)" + tool_call.id) }}\n' |
|
' {%- endif %}\n' |
|
' {{- \', "id": "\' + tool_call.id[-9:] + \'"}\' }}\n' |
|
' {%- if not loop.last %}\n' |
|
' {{- ", " }}\n' |
|
' {%- else %}\n' |
|
' {{- "]" + eos_token }}\n' |
|
' {%- endif %}\n' |
|
' {%- endfor %}\n' |
|
' {%- elif message[\'role\'] == \'assistant\' %}\n' |
|
' {%- if message[\'content\'] is string %}\n' |
|
' {{- message[\'content\'] + eos_token }}\n' |
|
' {%- else %}\n' |
|
' {{- message[\'content\'][0][\'text\'] + eos_token }}\n' |
|
' {%- endif %}\n' |
|
' {%- elif message["role"] == "tool_results" or message["role"] == "tool" %}\n' |
|
' {%- if message.content is defined and message.content.content is defined %}\n' |
|
' {%- set content = message.content.content %}\n' |
|
' {%- else %}\n' |
|
' {%- set content = message.content %}\n' |
|
' {%- endif %}\n' |
|
' {{- \'[TOOL_RESULTS] {"content": \' + content|string + ", " }}\n' |
|
' {%- if not message.tool_call_id is defined or message.tool_call_id|length < 9 %}\n' |
|
' {{- raise_exception("Tool call IDs should be alphanumeric strings with ' |
|
'length >= 9! (2)" + message.tool_call_id) }}\n' |
|
' {%- endif %}\n' |
|
' {{- \'"call_id": "\' + message.tool_call_id[-9:] + \'"}[/TOOL_RESULTS]\' }}\n' |
|
' {%- else %}\n' |
|
' {{- raise_exception("Only user and assistant roles are supported, with the ' |
|
'exception of an initial optional system message!") }}\n' |
|
' {%- endif %}\n' |
|
'{%- endfor %}') |
|
|
|
|
|
pt_engine.processor.chat_template = chat_template |
|
res2 = _infer_model(pt_engine, system=devstral_small_2505_system) |
|
|
|
assert res == res2, f'res: {res}, res2: {res2}' |
|
|
|
|
|
def test_glm4_5(): |
|
messages = [{'role': 'user', 'content': '浙江的省会在哪?'}] |
|
pt_engine = PtEngine('ZhipuAI/GLM-4.5-Air') |
|
res = _infer_model(pt_engine, messages=messages) |
|
pt_engine.default_template.template_backend = 'jinja' |
|
res2 = _infer_model(pt_engine, messages=messages) |
|
assert res == res2, f'res: {res}, res2: {res2}' |
|
|
|
|
|
if __name__ == '__main__': |
|
from swift.llm import PtEngine, RequestConfig |
|
from swift.utils import get_logger, seed_everything |
|
logger = get_logger() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
test_devstral() |
|
|