Spaces:

xu-song
/

tokenizer-arena

Running

App Files Files Community

xu-song commited on Aug 23

Commit

a21f3cd

1 Parent(s): 398e54a

update

Browse files

Files changed (4) hide show

docs/chat-template/DeepSeek-V3.1/chat_template.jinja +3 -0
docs/chat-template/Llama-4-Maverick-17B-128E-Instruct/chat_template.jinja +123 -0
docs/chat-template/export_chat_template.py +6 -6
docs/chat-template/tools_and_llm_response.md +2 -2

docs/chat-template/DeepSeek-V3.1/chat_template.jinja ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ {% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% if not thinking is defined %}{% set thinking = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, system_prompt='', is_first_sp=true, is_last_user=false) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + '
2	+
3	+ ' + message['content'] %}{%- endif %}{%- endif %}{%- endfor %}{{ bos_token }}{{ ns.system_prompt }}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{%- set ns.is_first = false -%}{%- set ns.is_last_user = true -%}{{'<｜User｜>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['tool_calls'] is defined and message['tool_calls'] is not none %}{%- if ns.is_last_user %}{{'<｜Assistant｜></think>'}}{%- endif %}{%- set ns.is_last_user = false -%}{%- set ns.is_first = false %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls'] %}{%- if not ns.is_first %}{%- if message['content'] is none %}{{'<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>'+ tool['function']['name'] + '<｜tool▁sep｜>' + tool['function']['arguments'] + '<｜tool▁call▁end｜>'}}{%- else %}{{message['content'] + '<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['function']['name'] + '<｜tool▁sep｜>' + tool['function']['arguments'] + '<｜tool▁call▁end｜>'}}{%- endif %}{%- set ns.is_first = true -%}{%- else %}{{'<｜tool▁call▁begin｜>'+ tool['function']['name'] + '<｜tool▁sep｜>' + tool['function']['arguments'] + '<｜tool▁call▁end｜>'}}{%- endif %}{%- endfor %}{{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}{%- endif %}{%- if message['role'] == 'assistant' and (message['tool_calls'] is not defined or message['tool_calls'] is none) %}{%- if ns.is_last_user %}{{'<｜Assistant｜>'}}{%- if message['prefix'] is defined and message['prefix'] and thinking %}{{'<think>'}} {%- else %}{{'</think>'}}{%- endif %}{%- endif %}{%- set ns.is_last_user = false -%}{%- if ns.is_tool %}{{message['content'] + '<｜end▁of▁sentence｜>'}}{%- set ns.is_tool = false -%}{%- else %}{%- set content = message['content'] -%}{%- if '</think>' in content %}{%- set content = content.split('</think>', 1)[1] -%}{%- endif %}{{content + '<｜end▁of▁sentence｜>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_last_user = false -%}{%- set ns.is_tool = true -%}{{'<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- endif %}{%- endfor -%}{%- if add_generation_prompt and ns.is_last_user and not ns.is_tool %}{{'<｜Assistant｜>'}}{%- if not thinking %}{{'</think>'}}{%- else %}{{'<think>'}}{%- endif %}{% endif %}

docs/chat-template/Llama-4-Maverick-17B-128E-Instruct/chat_template.jinja ADDED Viewed

	@@ -0,0 +1,123 @@

+{{- bos_token }}
+{%- if custom_tools is defined %}
+    {%- set tools = custom_tools %}
+{%- endif %}
+{%- if not tools_in_user_message is defined %}
+    {%- set tools_in_user_message = true %}
+{%- endif %}
+{%- if not date_string is defined %}
+    {%- if strftime_now is defined %}
+        {%- set date_string = strftime_now("%d %b %Y") %}
+    {%- else %}
+        {%- set date_string = "26 Jul 2024" %}
+    {%- endif %}
+{%- endif %}
+{%- if not tools is defined %}
+    {%- set tools = none %}
+{%- endif %}
+{#- This block extracts the system message, so we can slot it into the right place. #}
+{%- if messages[0]['role'] == 'system' %}
+    {%- if messages[0]['content'] is string %}
+        {%- set system_message = messages[0]['content']|trim %}
+    {%- else %}
+        {#- FIXME: The processor requires an array, always. #}
+        {%- set system_message = messages[0]['content'][0]['text']|trim %}
+    {%- endif %}
+    {%- set messages = messages[1:] %}
+    {%- set user_supplied_system_message = true %}
+{%- else %}
+    {%- set system_message = "" %}
+    {%- set user_supplied_system_message = false %}
+{%- endif %}
+{#- System message if the user supplied one #}
+{%- if user_supplied_system_message %}
+    {{- "<|header_start|>system<|header_end|>\n\n" }}
+    {%- if tools is not none %}
+        {{- "Environment: ipython\n" }}
+    {%- endif %}
+    {%- if tools is not none and not tools_in_user_message %}
+        {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }}
+        {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }}
+        {{- "Do not use variables.\n\n" }}
+        {%- for t in tools %}
+            {{- t | tojson(indent=4) }}
+            {{- "\n\n" }}
+        {%- endfor %}
+    {%- endif %}
+    {{- system_message }}
+    {{- "<|eot|>" }}
+{%- endif %}
+{#- Custom tools are passed in a user message with some extra guidance #}
+{%- if tools_in_user_message and not tools is none %}
+    {#- Extract the first user message so we can plug it in here #}
+    {%- if messages | length != 0 %}
+        {%- set first_user_message = messages[0]['content']|trim %}
+        {%- set messages = messages[1:] %}
+    {%- else %}
+        {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }}
+{%- endif %}
+    {{- '<|header_start|>user<|header_end|>\n\n' -}}
+    {{- "Given the following functions, please respond with a JSON for a function call " }}
+    {{- "with its proper arguments that best answers the given prompt.\n\n" }}
+    {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }}
+    {{- "Do not use variables.\n\n" }}
+    {%- for t in tools %}
+        {{- t | tojson(indent=4) }}
+        {{- "\n\n" }}
+    {%- endfor %}
+    {{- first_user_message + "<|eot|>"}}
+{%- endif %}
+{%- for message in messages %}
+    {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}
+    {{- '<|header_start|>' + message['role'] + '<|header_end|>\n\n' }}
+        {%- if message['content'] is string %}
+            {{- message['content'] }}
+        {%- else %}
+            {%- for content in message['content'] %}
+                {%- if content['type'] == 'image' %}
+                    {{- '<|image|>' }}
+                {%- elif content['type'] == 'text' %}
+                    {{- content['text'] }}
+                {%- endif %}
+            {%- endfor %}
+        {%- endif %}
+        {{- "<|eot|>" }}
+    {%- elif 'tool_calls' in message and message.tool_calls|length > 0 %}
+       {{- '<|header_start|>assistant<|header_end|>\n\n' -}}
+       {{- '<|python_start|>' }}
+        {%- if message['content'] is string %}
+            {{- message['content'] }}
+        {%- else %}
+            {%- for content in message['content'] %}
+                {%- if content['type'] == 'image' %}
+                    {{- '<|image|>' }}
+                {%- elif content['type'] == 'text' %}
+                    {{- content['text'] }}
+                {%- endif %}
+            {%- endfor %}
+        {%- endif %}
+       {{- '<|python_end|>' }}
+        {%- for tool_call in message.tool_calls %}
+           {{- '{"name": "' + tool_call.function.name + '", ' }}
+           {{- '"parameters": ' }}
+           {{- tool_call.function.arguments | tojson }}
+           {{- "}" }}
+        {%- endfor %}
+       {{- "<|eot|>" }}
+    {%- elif message.role == "tool" or message.role == "ipython" %}
+        {{- "<|header_start|>ipython<|header_end|>\n\n" }}
+        {%- if message.content is mapping or message.content is iterable %}
+            {{- message.content | tojson }}
+        {%- else %}
+            {{- message.content }}
+        {%- endif %}
+        {{- "<|eot|>" }}
+    {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+    {{- '<|header_start|>assistant<|header_end|>\n\n' }}
+{%- endif %}

docs/chat-template/export_chat_template.py CHANGED Viewed

@@ -1,21 +1,21 @@
 import os
-import json
 from transformers import AutoTokenizer
-from transformers.utils import get_json_schema
 # MODEL_PATH = "meta-llama/Llama-3.1-405B-Instruct"
-MODEL_PATH = "NousResearch/Hermes-3-Llama-3.1-405B"  # messages里不支持tool_calls
-# MODEL_PATH = "../../test/Llama-4-Maverick-17B-128E-Instruct/"
 # MODEL_PATH = "meta-llama/Llama-4-Maverick-17B-128E-Instruct"
 # MODEL_PATH = "Qwen/Qwen3-235B-A22B-Instruct-2507"
 # MODEL_PATH = "mistralai/Mistral-7B-Instruct-v0.1"  # messages里不支持tool_calls，不支持 role=tool，不支持 tools
 # MODEL_PATH = "mistralai/Ministral-8B-Instruct-2410" # 支持 tools, 支持tool_calls(必须要有id), 格式非主流
-MODEL_PATH = "deepseek-ai/DeepSeek-R1"
 # MODEL_PATH = "deepseek-ai/DeepSeek-R1-0528"
-MODEL_PATH = "google/gemma-3-27b-it"
 tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
 chat_template = tokenizer.chat_template

 import os
 from transformers import AutoTokenizer
 # MODEL_PATH = "meta-llama/Llama-3.1-405B-Instruct"
+# MODEL_PATH = "NousResearch/Hermes-3-Llama-3.1-405B"  # messages里不支持tool_calls
+# MODEL_PATH = "../../test/Llama-4-Maverick-17B-128E-Instruct"
 # MODEL_PATH = "meta-llama/Llama-4-Maverick-17B-128E-Instruct"
 # MODEL_PATH = "Qwen/Qwen3-235B-A22B-Instruct-2507"
 # MODEL_PATH = "mistralai/Mistral-7B-Instruct-v0.1"  # messages里不支持tool_calls，不支持 role=tool，不支持 tools
 # MODEL_PATH = "mistralai/Ministral-8B-Instruct-2410" # 支持 tools, 支持tool_calls(必须要有id), 格式非主流
+# MODEL_PATH = "deepseek-ai/DeepSeek-R1"
 # MODEL_PATH = "deepseek-ai/DeepSeek-R1-0528"
+MODEL_PATH = 'deepseek-ai/DeepSeek-V3.1'
+# MODEL_PATH = "google/gemma-3-27b-it"
 tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
 chat_template = tokenizer.chat_template

docs/chat-template/tools_and_llm_response.md CHANGED Viewed

@@ -147,7 +147,7 @@ Hey, what's the temperature in Paris right now?<|im_end|>
 - **入参**:
   - **tools格式**: 支持的工具列表（`tools`） 是基于自定义格式的，详见 [chat_template](https://github.com/vllm-project/vllm/blob/v0.10.1/examples/tool_chat_template_hermes.jinja#L41)
-  - **tools在prompt中的位置**: 额外增加了一个`system`轮，放在最前面。（用户设置的`system`保持不变）
 - **出参**: 返回的`respone` 要求是 `<tool_call>` 包裹的json
 `return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows: <tool_call>{"name": <function-name>, "arguments": <args-dict>}</tool_call>`
@@ -238,7 +238,7 @@ Hey, what's the temperature in Paris right now?<|im_end|>
 - **入参**:
   - **tools格式**: 支持的工具列表（`tools`） 是基于 json-schema 的.
-  - **tools在prompt中的位置**: 拼接到原始`system`的结尾。
 - **出参**: 返回的`respone` 要求是 `<tool_call>` 包裹的json
 `return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{"name": <function-name>, "arguments": <args-json-object>}\n</tool_call>`

 - **入参**:
   - **tools格式**: 支持的工具列表（`tools`） 是基于自定义格式的，详见 [chat_template](https://github.com/vllm-project/vllm/blob/v0.10.1/examples/tool_chat_template_hermes.jinja#L41)
+  - **tools在prompt中的位置**: 额外增加了一个`system`轮，放在最前面。（用户设置的`system`保持不变），这个额外的system轮告诉大模型可以使用tools中的工具，并且指定返回格式
 - **出参**: 返回的`respone` 要求是 `<tool_call>` 包裹的json
 `return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows: <tool_call>{"name": <function-name>, "arguments": <args-dict>}</tool_call>`
 - **入参**:
   - **tools格式**: 支持的工具列表（`tools`） 是基于 json-schema 的.
+  - **tools在prompt中的位置**: 拼接到原始`system`的结尾 + 额外的prompt (告诉大模型可以使用tools中的工具，并且指定返回格式)
 - **出参**: 返回的`respone` 要求是 `<tool_call>` 包裹的json
 `return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{"name": <function-name>, "arguments": <args-json-object>}\n</tool_call>`