Pinkstack commited on
Commit
3017e21
·
verified ·
1 Parent(s): e63ae1b

Update tokenizer_config.json

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +86 -5
tokenizer_config.json CHANGED
@@ -26,7 +26,7 @@
26
  "special": true
27
  },
28
  "3": {
29
- "content": "<think>",
30
  "lstrip": false,
31
  "normalized": false,
32
  "rstrip": false,
@@ -34,7 +34,7 @@
34
  "special": true
35
  },
36
  "4": {
37
- "content": "</think>",
38
  "lstrip": false,
39
  "normalized": false,
40
  "rstrip": false,
@@ -42,7 +42,7 @@
42
  "special": true
43
  },
44
  "5": {
45
- "content": "<output>",
46
  "lstrip": false,
47
  "normalized": false,
48
  "rstrip": false,
@@ -50,7 +50,87 @@
50
  "special": true
51
  },
52
  "6": {
53
- "content": "</output>",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  "lstrip": false,
55
  "normalized": false,
56
  "rstrip": false,
@@ -59,9 +139,10 @@
59
  }
60
  },
61
  "bos_token": "<|im_start|>",
62
- "chat_template": "{%- for message in messages -%}\n{%- if message['role'] == 'system' -%}\n<|im_start|>system\nYour name is Superthoughts lite by Pinkstack. You are an open weights AI model released in 2025 with built-in information up to 2024.\n<|im_end|>\n{%- elif message['role'] == 'user' -%}\n<|im_start|>user\n{{ message.content }}<|im_end|>\n{%- elif message['role'] == 'assistant' -%}\n<|im_start|>assistant\n<think>\n{{ message.thinking_content if 'thinking_content' in message else '' }}</think>\n<output>\n{{ message.content }}</output>\n<|im_end|>\n{%- endif -%}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n<|im_start|>assistant\n{%- endif -%}",
63
  "clean_up_tokenization_spaces": false,
64
  "eos_token": "<|im_end|>",
 
65
  "model_max_length": 1000000000000000019884624838656,
66
  "pad_token": "<|endoftext|>",
67
  "tokenizer_class": "GPT2Tokenizer",
 
26
  "special": true
27
  },
28
  "3": {
29
+ "content": "<repo_name>",
30
  "lstrip": false,
31
  "normalized": false,
32
  "rstrip": false,
 
34
  "special": true
35
  },
36
  "4": {
37
+ "content": "<reponame>",
38
  "lstrip": false,
39
  "normalized": false,
40
  "rstrip": false,
 
42
  "special": true
43
  },
44
  "5": {
45
+ "content": "<file_sep>",
46
  "lstrip": false,
47
  "normalized": false,
48
  "rstrip": false,
 
50
  "special": true
51
  },
52
  "6": {
53
+ "content": "<filename>",
54
+ "lstrip": false,
55
+ "normalized": false,
56
+ "rstrip": false,
57
+ "single_word": false,
58
+ "special": true
59
+ },
60
+ "7": {
61
+ "content": "<gh_stars>",
62
+ "lstrip": false,
63
+ "normalized": false,
64
+ "rstrip": false,
65
+ "single_word": false,
66
+ "special": true
67
+ },
68
+ "8": {
69
+ "content": "<issue_start>",
70
+ "lstrip": false,
71
+ "normalized": false,
72
+ "rstrip": false,
73
+ "single_word": false,
74
+ "special": true
75
+ },
76
+ "9": {
77
+ "content": "<issue_comment>",
78
+ "lstrip": false,
79
+ "normalized": false,
80
+ "rstrip": false,
81
+ "single_word": false,
82
+ "special": true
83
+ },
84
+ "10": {
85
+ "content": "<issue_closed>",
86
+ "lstrip": false,
87
+ "normalized": false,
88
+ "rstrip": false,
89
+ "single_word": false,
90
+ "special": true
91
+ },
92
+ "11": {
93
+ "content": "<jupyter_start>",
94
+ "lstrip": false,
95
+ "normalized": false,
96
+ "rstrip": false,
97
+ "single_word": false,
98
+ "special": true
99
+ },
100
+ "12": {
101
+ "content": "<jupyter_text>",
102
+ "lstrip": false,
103
+ "normalized": false,
104
+ "rstrip": false,
105
+ "single_word": false,
106
+ "special": true
107
+ },
108
+ "13": {
109
+ "content": "<jupyter_code>",
110
+ "lstrip": false,
111
+ "normalized": false,
112
+ "rstrip": false,
113
+ "single_word": false,
114
+ "special": true
115
+ },
116
+ "14": {
117
+ "content": "<jupyter_output>",
118
+ "lstrip": false,
119
+ "normalized": false,
120
+ "rstrip": false,
121
+ "single_word": false,
122
+ "special": true
123
+ },
124
+ "15": {
125
+ "content": "<jupyter_script>",
126
+ "lstrip": false,
127
+ "normalized": false,
128
+ "rstrip": false,
129
+ "single_word": false,
130
+ "special": true
131
+ },
132
+ "16": {
133
+ "content": "<empty_output>",
134
  "lstrip": false,
135
  "normalized": false,
136
  "rstrip": false,
 
139
  }
140
  },
141
  "bos_token": "<|im_start|>",
142
+ "chat_template": "{% if message['role'] == 'system' %}<model_identity>Your name is Superthoughts lite by Pinkstack. You are an open weights AI model released in 2025 with built-in information up to 2024.</model_identity>\n{% endif %}{% if 'role' in messages[0] %}{% for message in messages %}{% if message['role'] == 'user' %}{{'<|im_start|>user\n' + message['content'] + '<|im_end|>\n'}}{% elif message['role'] == 'assistant' %}{{'<|im_start|>assistant\n<think>\n' + message['thinking_content'] + '</think>\n<output>\n' + message['content'] + '</output>\n<|im_end|>\n' }}{% else %}{{ '<|im_start|>system\n' + message['content'] + '<|im_end|>\n' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}{% else %}{% for message in messages %}{% if message['from'] == 'human' %}{{'<|im_start|>user\n' + message['value'] + '<|im_end|>\n'}}{% elif message['from'] == 'assistant' %}{{'<|im_start|>assistant\n<think>\n' + message['thinking_value'] + '</think>\n<output>\n' + message['value'] + '</output>\n<|im_end|>\n' }}{% else %}{{ '<|im_start|>system\n' + message['value'] + '<|im_end|>\n' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}{% endif %}",
143
  "clean_up_tokenization_spaces": false,
144
  "eos_token": "<|im_end|>",
145
+ "extra_special_tokens": {},
146
  "model_max_length": 1000000000000000019884624838656,
147
  "pad_token": "<|endoftext|>",
148
  "tokenizer_class": "GPT2Tokenizer",