MeV3 / tokenizer_config.json
helixdouble's picture
SFT run 3: heretic-v2 base (0/100 refusals), QLoRA rank 32, 2 epochs, 529 conversations
ab7e7cc verified
{
"add_prefix_space": false,
"audio_bos_token": "<|audio_start|>",
"audio_eos_token": "<|audio_end|>",
"audio_token": "<|audio_pad|>",
"backend": "tokenizers",
"bos_token": null,
"clean_up_tokenization_spaces": false,
"eos_token": "<|im_end|>",
"errors": "replace",
"image_token": "<|image_pad|>",
"is_local": false,
"max_length": null,
"model_max_length": 262144,
"model_specific_special_tokens": {
"audio_bos_token": "<|audio_start|>",
"audio_eos_token": "<|audio_end|>",
"audio_token": "<|audio_pad|>",
"image_token": "<|image_pad|>",
"video_token": "<|video_pad|>",
"vision_bos_token": "<|vision_start|>",
"vision_eos_token": "<|vision_end|>"
},
"pad_to_multiple_of": null,
"pad_token": "<|endoftext|>",
"pad_token_type_id": 0,
"padding_side": "left",
"pretokenize_regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?[\\p{L}\\p{M}]+|\\p{N}| ?[^\\s\\p{L}\\p{M}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
"processor_class": "Qwen3VLProcessor",
"split_special_tokens": false,
"tokenizer_class": "Qwen2Tokenizer",
"unk_token": null,
"video_token": "<|video_pad|>",
"vision_bos_token": "<|vision_start|>",
"vision_eos_token": "<|vision_end|>",
"chat_template": "{# Define the macros for XML conversion #}\n{%- macro render_item_list(item_list, tag_name='required') -%}\n {%- if item_list is defined and item_list is iterable and item_list | length > 0 -%}\n <{{ tag_name }}>[{{- item_list | join(\", \") -}}]</{{ tag_name }}>\n {%- endif -%}\n{%- endmacro -%}\n\n{%- macro render_extra_keys(json_dict, handled_keys) -%}\n {%- if json_dict is mapping -%}\n {%- for json_key in json_dict if json_key not in handled_keys -%}\n <{{ json_key }}>{{ json_dict[json_key] }}</{{ json_key }}>\n {%- endfor -%}\n {%- endif -%}\n{%- endmacro -%}\n\n\n{%- set image_count = namespace(value=0) %}\n{%- set video_count = namespace(value=0) %}\n{%- set add_vision_id = add_vision_id if add_vision_id is defined else true %}\n\n{# Set Instruct mode here #}\n\n{%- macro render_content(content, do_vision_count, is_system_content=false) %}\n {%- if content is string %}\n {{- content }}\n {%- elif content is iterable and content is not mapping %}\n {%- for item in content %}\n {%- if 'image' in item or 'image_url' in item or (item is mapping and item.get('type') == 'image') %}\n {%- if is_system_content %}\n {{- raise_exception('System message cannot contain images.') }}\n {%- endif %}\n {%- if do_vision_count %}\n {%- set image_count.value = image_count.value + 1 %}\n {%- endif %}\n {%- if add_vision_id %}\n {{- 'Picture ' ~ image_count.value ~ ': ' }}\n {%- endif %}\n {{- '<|vision_start|><|image_pad|><|vision_end|>' }}\n {%- elif 'video' in item or (item is mapping and item.get('type') == 'video') %}\n {%- if is_system_content %}\n {{- raise_exception('System message cannot contain videos.') }}\n {%- endif %}\n {%- if do_vision_count %}\n {%- set video_count.value = video_count.value + 1 %}\n {%- endif %}\n {%- if add_vision_id %}\n {{- 'Video ' ~ video_count.value ~ ': ' }}\n {%- endif %}\n {{- '<|vision_start|><|video_pad|><|vision_end|>' }}\n {%- elif item is mapping and 'text' in item %}\n {{- item.text }}\n {%- else %}\n {{- raise_exception('Unexpected item type in content.') }}\n {%- endif %}\n {%- endfor %}\n {%- elif content is none or content is undefined %}\n {{- '' }}\n {%- else %}\n {{- raise_exception('Unexpected content type.') }}\n {%- endif %}\n{%- endmacro %}\n\n{%- if not messages %}\n {{- raise_exception('No messages provided.') }}\n{%- endif %}\n\n{# Flag to prevent double-rendering system prompt #}\n{%- set ns = namespace(system_rendered=false) %}\n\n{%- if tools and tools is iterable and tools is not mapping %}\n\n {{- '<|im_start|>system\\n# Tools\\n\\nYou have access to the following functions:\\n\\n<tools>' -}}\n {%- for tool in tools -%}\n {%- set function = tool.function -%}\n {{- \"\\n<tool>\\n<name>\" + function.name + \"</name>\\n<description>\" + function.description + \"</description>\" -}}\n {%- if function.parameters and function.parameters.properties -%}\n {%- for param_name, param_details in function.parameters.properties.items() -%}\n {{- \"\\n<parameter>\\n<name>\" + param_name + \"</name>\\n<type>\" + param_details.type + \"</type>\\n<description>\" + (param_details.description | default('')) + \"</description>\" -}}\n {{- render_item_list(function.parameters.required) -}}\n {{- render_extra_keys(param_details, ['type', 'description']) -}}\n {{- \"\\n</parameter>\" -}}\n {%- endfor -%}\n {%- endif -%}\n {{- \"\\n</tool>\" -}}\n {%- endfor -%}\n\n {{- '\\n\\nIf you choose to call a function ONLY reply in the following format with NO suffix:\\n\\n<tool_call>\\n<function=example_function_name>\\n<parameter=example_parameter_1>\\nvalue_1\\n</parameter>\\n<parameter=example_parameter_2>\\nThis is the value for the second parameter\\nthat can span\\nmultiple lines\\n</parameter>\\n</function>\\n</tool_call>\\n\\n<IMPORTANT>\\nReminder:\\n- Function calls MUST follow the specified format: an inner <function=...></function> block must be nested within <tool_call></tool_call> XML tags\\n- Required parameters MUST be specified\\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\\n</IMPORTANT>' }}\n \n {%- if messages[0].role == 'system' %}\n {%- set content = render_content(messages[0].content, false, true)|trim %}\n {%- if content %}\n {{- '\\n\\n' + content }}\n {%- endif %}\n {%- set ns.system_rendered = true %}\n {%- endif %}\n {{- '<|im_end|>\\n' }}\n{%- else %}\n {%- if messages[0].role == 'system' %}\n {%- set content = render_content(messages[0].content, false, true)|trim %}\n {{- '<|im_start|>system\\n' + content + '<|im_end|>\\n' }}\n {%- set ns.system_rendered = true %}\n {%- endif %}\n{%- endif %}\n\n{# Main Message Loop #}\n{%- for message in messages %}\n {%- if message.role == \"system\" and ns.system_rendered and loop.first %}\n {%- continue %}\n {%- endif %}\n\n {%- set content = render_content(message.content, true)|trim %}\n \n {%- if message.role == \"system\" %}\n {{- '<|im_start|>system\\n' + content + '<|im_end|>\\n' }}\n {%- elif message.role == \"user\" %}\n {{- '<|im_start|>' + message.role + '\\n' + content + '<|im_end|>\\n' }}\n {%- elif message.role == \"assistant\" %}\n {%- set reasoning_content = '' %}\n {%- if message.reasoning_content is defined and message.reasoning_content is string %}\n {%- set reasoning_content = message.reasoning_content | trim %}\n {%- elif '<think>' in content and '</think>' in content %}\n {%- set reasoning_content = content.split('</think>')[0].split('<think>')[-1] | trim %}\n {%- set content = content.split('</think>')[-1] | trim %}\n {%- endif %}\n\n {{- '<|im_start|>' + message.role + '\\n' }}\n \n {%- if reasoning_content %}\n {{- '<think>\\n' + reasoning_content + '\\n</think>\\n\\n' }}\n {%- endif %}\n \n {{- content }}\n\n {# Tool call formatting #}\n {%- if message.tool_calls %}\n {%- for tool_call in message.tool_calls %}\n {%- set tc = tool_call.function if tool_call.function is defined else tool_call %}\n {%- if loop.first and content %}{{- '\\n\\n' }}{%- elif not loop.first %}{{- '\\n' }}{%- endif %}\n {{- '<tool_call>\\n<function=' + tc.name + '>\\n' }}\n {%- for args_name, args_value in tc.arguments|items %}\n {{- '<parameter=' + args_name + '>\\n' }}\n {{- (args_value | tojson | safe if args_value is mapping or args_value is sequence else args_value | string) + '\\n</parameter>\\n' }}\n {%- endfor %}\n {{- '</function>\\n</tool_call>' }}\n {%- endfor %}\n {%- endif %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if loop.previtem and loop.previtem.role != \"tool\" %}{{- '<|im_start|>user' }}{%- endif %}\n {{- '\\n<tool_response>\\n' + content + '\\n</tool_response>' }}\n {%- if loop.last or (loop.nextitem and loop.nextitem.role != \"tool\") %}{{- '<|im_end|>\\n' }}{%- endif %}\n {%- endif %}\n{%- endfor %}\n\n{# Final Generation Prompt #}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n {%- if enable_thinking is defined and enable_thinking is false %}\n {{- '<think>\\n\\n</think>\\n\\n' }}\n {%- else %}\n {{- '<think>\\n' }}\n {%- endif %}\n{%- endif %}"
}