Warecube commited on
Commit
de1512b
·
verified ·
1 Parent(s): 98921fc

upload tokenizer_config.json

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +32 -0
tokenizer_config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "audio_bos_token": "<|audio_start|>",
4
+ "audio_eos_token": "<|audio_end|>",
5
+ "audio_token": "<|audio_pad|>",
6
+ "backend": "tokenizers",
7
+ "bos_token": null,
8
+ "clean_up_tokenization_spaces": false,
9
+ "eos_token": "<|im_end|>",
10
+ "errors": "replace",
11
+ "image_token": "<|image_pad|>",
12
+ "is_local": true,
13
+ "model_max_length": 262144,
14
+ "model_specific_special_tokens": {
15
+ "audio_bos_token": "<|audio_start|>",
16
+ "audio_eos_token": "<|audio_end|>",
17
+ "audio_token": "<|audio_pad|>",
18
+ "image_token": "<|image_pad|>",
19
+ "video_token": "<|video_pad|>",
20
+ "vision_bos_token": "<|vision_start|>",
21
+ "vision_eos_token": "<|vision_end|>"
22
+ },
23
+ "pad_token": "<|endoftext|>",
24
+ "pretokenize_regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?[\\p{L}\\p{M}]+|\\p{N}| ?[^\\s\\p{L}\\p{M}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
25
+ "split_special_tokens": false,
26
+ "tokenizer_class": "Qwen2TokenizerFast",
27
+ "unk_token": null,
28
+ "video_token": "<|video_pad|>",
29
+ "vision_bos_token": "<|vision_start|>",
30
+ "vision_eos_token": "<|vision_end|>",
31
+ "chat_template": "{%- if not messages %}\n {{- raise_exception('No messages provided.') }}\n{%- endif %}\n{%- for message in messages %}\n {%- if loop.first and message.role == 'system' %}\n {{- '<|im_start|>system\\n' + message.content | trim + '<|im_end|>\\n' }}\n {%- elif message.role == 'user' %}\n {{- '<|im_start|>user\\n' + message.content | trim + '<|im_end|>\\n' }}\n {%- elif message.role == 'assistant' %}\n {%- if not loop.last %}\n {{- '<|im_start|>assistant\\n' + message.content | trim + '<|im_end|>\\n' }}\n {%- else %}\n {%- set content = message.content | trim %}\n {%- if content.startswith('<think>') %}\n {{- '<|im_start|>assistant\\n' + content + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>assistant\\n<think>\\n' + content + '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n<think>\\n' }}\n{%- endif %}\n"
32
+ }