C10X commited on
Commit
1cce6c0
·
verified ·
1 Parent(s): 4ace306

Upload 6 files

Browse files
chat_template.jinja ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {% for message in messages %}<|im_start|>{{ message['role'] }}
2
+ {% if message['role'] == 'assistant' %}{% generation %}{{ message['content'] }}<|im_end|>
3
+ {% endgeneration %}{% else %}{{ message['content'] }}<|im_end|>
4
+ {% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant
5
+ {% endif %}
config.json CHANGED
@@ -4,56 +4,38 @@
4
  ],
5
  "attention_bias": false,
6
  "attention_dropout": 0.0,
7
- "bos_token_id": 1,
8
  "dtype": "bfloat16",
9
- "eos_token_id": 2,
10
- "head_dim": 32,
11
  "hidden_act": "silu",
12
- "hidden_size": 64,
13
  "initializer_range": 0.02,
14
  "intermediate_size": 2048,
15
  "layer_types": [
 
16
  "full_attention",
 
17
  "full_attention",
18
- "full_attention",
19
- "full_attention",
20
- "full_attention",
21
- "full_attention",
22
- "full_attention",
23
- "full_attention",
24
- "full_attention",
25
- "full_attention",
26
- "full_attention",
27
- "full_attention",
28
- "full_attention",
29
- "full_attention",
30
- "full_attention",
31
- "full_attention",
32
- "full_attention",
33
- "full_attention",
34
- "full_attention",
35
- "full_attention",
36
- "full_attention",
37
- "full_attention",
38
- "full_attention",
39
  "full_attention"
40
  ],
41
- "max_position_embeddings": 2048,
42
- "max_window_layers": 24,
43
  "model_type": "qwen3",
44
- "num_attention_heads": 4,
45
- "num_hidden_layers": 24,
46
  "num_key_value_heads": 2,
47
- "pad_token_id": 2,
48
  "rms_norm_eps": 1e-06,
49
  "rope_parameters": {
50
  "rope_theta": 10000.0,
51
  "rope_type": "default"
52
  },
53
- "sliding_window": null,
54
  "tie_word_embeddings": true,
55
  "transformers_version": "5.8.0.dev0",
56
  "use_cache": false,
57
- "use_sliding_window": false,
58
- "vocab_size": 2048
59
  }
 
4
  ],
5
  "attention_bias": false,
6
  "attention_dropout": 0.0,
7
+ "bos_token_id": 0,
8
  "dtype": "bfloat16",
9
+ "eos_token_id": 6,
10
+ "head_dim": 64,
11
  "hidden_act": "silu",
12
+ "hidden_size": 512,
13
  "initializer_range": 0.02,
14
  "intermediate_size": 2048,
15
  "layer_types": [
16
+ "sliding_attention",
17
  "full_attention",
18
+ "sliding_attention",
19
  "full_attention",
20
+ "sliding_attention",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  "full_attention"
22
  ],
23
+ "max_position_embeddings": 8192,
24
+ "max_window_layers": 5,
25
  "model_type": "qwen3",
26
+ "num_attention_heads": 8,
27
+ "num_hidden_layers": 6,
28
  "num_key_value_heads": 2,
29
+ "pad_token_id": 6,
30
  "rms_norm_eps": 1e-06,
31
  "rope_parameters": {
32
  "rope_theta": 10000.0,
33
  "rope_type": "default"
34
  },
35
+ "sliding_window": 512,
36
  "tie_word_embeddings": true,
37
  "transformers_version": "5.8.0.dev0",
38
  "use_cache": false,
39
+ "use_sliding_window": true,
40
+ "vocab_size": 16384
41
  }
generation_config.json CHANGED
@@ -1,11 +1,11 @@
1
  {
2
  "_from_model_config": true,
3
- "bos_token_id": 1,
4
  "eos_token_id": [
5
- 2,
6
- 4
7
  ],
8
- "pad_token_id": 2,
9
  "transformers_version": "5.8.0.dev0",
10
  "use_cache": false
11
  }
 
1
  {
2
  "_from_model_config": true,
3
+ "bos_token_id": 0,
4
  "eos_token_id": [
5
+ 6,
6
+ 2
7
  ],
8
+ "pad_token_id": 6,
9
  "transformers_version": "5.8.0.dev0",
10
  "use_cache": false
11
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:44fc1d7661d5e478a6c2b6cd82f877ac2246915f368a3b579e03bf61afcc6054
3
- size 20354200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bba7b17a06ff8bb8ff2dab9a672e22b4cec6c67fe0d1fbf075753ec0c7c164e5
3
+ size 62412552
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -1,15 +1,220 @@
1
  {
2
- "add_prefix_space": null,
3
  "backend": "tokenizers",
4
- "bos_token": "<|start_story|>",
 
 
 
 
 
 
 
 
5
  "clean_up_tokenization_spaces": false,
6
- "eos_token": "<|end_story|>",
7
- "is_local": false,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  "local_files_only": false,
 
 
 
 
9
  "model_max_length": 1000000000000000019884624838656,
10
- "pad_token": "<|end_story|>",
11
- "sp_model_kwargs": {},
12
- "tokenizer_class": "LlamaTokenizer",
13
- "unk_token": "<unk>",
14
- "use_default_system_prompt": false
15
  }
 
1
  {
 
2
  "backend": "tokenizers",
3
+ "bos_token": "<|startoftext|>",
4
+ "chatml_role_tokens": [
5
+ "<|system|>",
6
+ "<|user|>",
7
+ "<|assistant|>",
8
+ "<|developer|>",
9
+ "<|tool|>",
10
+ "<|function|>"
11
+ ],
12
  "clean_up_tokenization_spaces": false,
13
+ "eos_token": "<|im_end|>",
14
+ "expected_vocab_size": 16384,
15
+ "falcon_latex_tokens": [
16
+ "\\blindtext",
17
+ "\\newpage",
18
+ "\\boxed",
19
+ "\\framebox",
20
+ "\\fbox",
21
+ "\\tag",
22
+ "\\nonumber",
23
+ "\\item",
24
+ "\\centering",
25
+ "\\caption",
26
+ "\\includegraphics",
27
+ "\\label",
28
+ "\\multicolumn",
29
+ "\\cline",
30
+ "\\hline",
31
+ "\\end",
32
+ "\\begin",
33
+ "\\tableofcontents",
34
+ "\\maketitle",
35
+ "\\date",
36
+ "\\author",
37
+ "\\title",
38
+ "\\chapter",
39
+ "\\subsubsection",
40
+ "\\subsection",
41
+ "\\section",
42
+ "\\noindent",
43
+ "\\newline",
44
+ "\\par",
45
+ "\\ddot",
46
+ "\\dot",
47
+ "\\bar",
48
+ "\\iff",
49
+ "\\implies",
50
+ "\\neg",
51
+ "\\lor",
52
+ "\\land",
53
+ "\\qquad",
54
+ "\\quad",
55
+ "\\arctan",
56
+ "\\arccos",
57
+ "\\arcsin",
58
+ "\\cot",
59
+ "\\sec",
60
+ "\\csc",
61
+ "\\tan",
62
+ "\\cos",
63
+ "\\sin",
64
+ "\\widetilde",
65
+ "\\widehat",
66
+ "\\complement",
67
+ "\\varnothing",
68
+ "\\tilde",
69
+ "\\hat",
70
+ "\\vec",
71
+ "\\mathfrak",
72
+ "\\mathcal",
73
+ "\\mathbb",
74
+ "\\emptyset",
75
+ "\\bot",
76
+ "\\aleph",
77
+ "\\Re",
78
+ "\\Im",
79
+ "\\ell",
80
+ "\\hbar",
81
+ "\\exists",
82
+ "\\forall",
83
+ "\\partial",
84
+ "\\nabla",
85
+ "\\infty",
86
+ "\\binom",
87
+ "\\overline",
88
+ "\\sqrt",
89
+ "\\tfrac",
90
+ "\\dfrac",
91
+ "\\frac",
92
+ "\\textsc",
93
+ "\\textsf",
94
+ "\\texttt",
95
+ "\\emph",
96
+ "\\underline",
97
+ "\\textit",
98
+ "\\textbf",
99
+ "\\bigoplus",
100
+ "\\bigcap",
101
+ "\\bigcup",
102
+ "\\prod",
103
+ "\\sum",
104
+ "\\oint",
105
+ "\\iiint",
106
+ "\\iint",
107
+ "\\int",
108
+ "\\right",
109
+ "\\left",
110
+ "\\Bigr",
111
+ "\\Bigl",
112
+ "\\bigr",
113
+ "\\bigl",
114
+ "\\rceil",
115
+ "\\lceil",
116
+ "\\rfloor",
117
+ "\\lfloor",
118
+ "\\rangle",
119
+ "\\langle",
120
+ "\\overleftarrow",
121
+ "\\overleftrightarrow",
122
+ "\\overrightarrow",
123
+ "\\Longleftrightarrow",
124
+ "\\longleftrightarrow",
125
+ "\\longrightarrow",
126
+ "\\longleftarrow",
127
+ "\\Longrightarrow",
128
+ "\\Longleftarrow",
129
+ "\\gets",
130
+ "\\to",
131
+ "\\mapsto",
132
+ "\\Updownarrow",
133
+ "\\Downarrow",
134
+ "\\Uparrow",
135
+ "\\updownarrow",
136
+ "\\downarrow",
137
+ "\\uparrow",
138
+ "\\Leftrightarrow",
139
+ "\\Rightarrow",
140
+ "\\Leftarrow",
141
+ "\\leftrightarrow",
142
+ "\\rightarrow",
143
+ "\\leftarrow",
144
+ "\\perp",
145
+ "\\propto",
146
+ "\\ni",
147
+ "\\notin",
148
+ "\\in",
149
+ "\\supseteq",
150
+ "\\supset",
151
+ "\\sqsupseteq",
152
+ "\\sqsubseteq",
153
+ "\\subseteq",
154
+ "\\subset",
155
+ "\\cong",
156
+ "\\approx",
157
+ "\\simeq",
158
+ "\\sim",
159
+ "\\equiv",
160
+ "\\neq",
161
+ "\\geq",
162
+ "\\leq",
163
+ "\\oslash",
164
+ "\\otimes",
165
+ "\\ominus",
166
+ "\\oplus",
167
+ "\\wedge",
168
+ "\\vee",
169
+ "\\sqcup",
170
+ "\\sqcap",
171
+ "\\uplus",
172
+ "\\cup",
173
+ "\\cap",
174
+ "\\cdot",
175
+ "\\bullet",
176
+ "\\circ",
177
+ "\\star",
178
+ "\\ast",
179
+ "\\div",
180
+ "\\times",
181
+ "\\mp",
182
+ "\\pm",
183
+ "\\omega",
184
+ "\\psi",
185
+ "\\chi",
186
+ "\\varphi",
187
+ "\\varsigma",
188
+ "\\phi",
189
+ "\\upsilon",
190
+ "\\tau",
191
+ "\\sigma",
192
+ "\\rho",
193
+ "\\pi",
194
+ "\\xi",
195
+ "\\nu",
196
+ "\\mu",
197
+ "\\lambda",
198
+ "\\kappa",
199
+ "\\iota",
200
+ "\\theta",
201
+ "\\eta",
202
+ "\\zeta",
203
+ "\\epsilon",
204
+ "\\delta",
205
+ "\\gamma",
206
+ "\\beta",
207
+ "\\alpha"
208
+ ],
209
+ "fresh_training_vocab_note": "This tokenizer intentionally repurposes obsolete Harmony/control token IDs 2,3,4,7,8,9 as ChatML role tokens. It is intended for from-scratch base-model training; do not use it with old checkpoints or old tokenized caches.",
210
+ "hf_compatibility_note": "Set tokenizer_class to PreTrainedTokenizerFast for vanilla Hugging Face loading; original source used a custom TokenizersBackend name.",
211
+ "is_local": true,
212
  "local_files_only": false,
213
+ "model_input_names": [
214
+ "input_ids",
215
+ "attention_mask"
216
+ ],
217
  "model_max_length": 1000000000000000019884624838656,
218
+ "pad_token": "<|im_end|>",
219
+ "tokenizer_class": "TokenizersBackend"
 
 
 
220
  }