wenhuach commited on
Commit
008ac56
1 Parent(s): 730c79e

replace with auto-round sym format

Browse files

Signed-off-by: wenhuach <wenhuach87@gmail.com>

added_tokens.json DELETED
@@ -1,5 +0,0 @@
1
- {
2
- "<|endoftext|>": 151643,
3
- "<|im_end|>": 151645,
4
- "<|im_start|>": 151644
5
- }
 
 
 
 
 
 
config.json DELETED
@@ -1,266 +0,0 @@
1
- {
2
- "_name_or_path": "/data5/models/Qwen2-57B-A14B-Instruct",
3
- "architectures": [
4
- "Qwen2MoeForCausalLM"
5
- ],
6
- "attention_dropout": 0.0,
7
- "bos_token_id": 151643,
8
- "decoder_sparse_step": 1,
9
- "eos_token_id": 151643,
10
- "hidden_act": "silu",
11
- "hidden_size": 3584,
12
- "initializer_range": 0.02,
13
- "intermediate_size": 18944,
14
- "max_position_embeddings": 32768,
15
- "max_window_layers": 28,
16
- "mlp_only_layers": [],
17
- "model_type": "qwen2_moe",
18
- "moe_intermediate_size": 2560,
19
- "norm_topk_prob": false,
20
- "num_attention_heads": 28,
21
- "num_experts": 64,
22
- "num_experts_per_tok": 8,
23
- "num_hidden_layers": 28,
24
- "num_key_value_heads": 4,
25
- "output_router_logits": false,
26
- "quantization_config": {
27
- "amp": true,
28
- "autoround_version": "0.3.1.dev",
29
- "bits": 4,
30
- "damp_percent": 0.01,
31
- "data_type": "int",
32
- "desc_act": false,
33
- "enable_minmax_tuning": true,
34
- "enable_norm_bias_tuning": false,
35
- "enable_quanted_input": true,
36
- "gradient_accumulate_steps": 1,
37
- "group_size": 128,
38
- "iters": 1000,
39
- "low_gpu_mem_usage": false,
40
- "lr": 0.001,
41
- "minmax_lr": 0.001,
42
- "modules_in_block_to_quantize": [
43
- [
44
- "self_attn.q_proj",
45
- "self_attn.k_proj",
46
- "self_attn.v_proj",
47
- "self_attn.o_proj",
48
- "mlp.gate",
49
- "mlp.experts.0.gate_proj",
50
- "mlp.experts.0.up_proj",
51
- "mlp.experts.0.down_proj",
52
- "mlp.experts.1.gate_proj",
53
- "mlp.experts.1.up_proj",
54
- "mlp.experts.1.down_proj",
55
- "mlp.experts.2.gate_proj",
56
- "mlp.experts.2.up_proj",
57
- "mlp.experts.2.down_proj",
58
- "mlp.experts.3.gate_proj",
59
- "mlp.experts.3.up_proj",
60
- "mlp.experts.3.down_proj",
61
- "mlp.experts.4.gate_proj",
62
- "mlp.experts.4.up_proj",
63
- "mlp.experts.4.down_proj",
64
- "mlp.experts.5.gate_proj",
65
- "mlp.experts.5.up_proj",
66
- "mlp.experts.5.down_proj",
67
- "mlp.experts.6.gate_proj",
68
- "mlp.experts.6.up_proj",
69
- "mlp.experts.6.down_proj",
70
- "mlp.experts.7.gate_proj",
71
- "mlp.experts.7.up_proj",
72
- "mlp.experts.7.down_proj",
73
- "mlp.experts.8.gate_proj",
74
- "mlp.experts.8.up_proj",
75
- "mlp.experts.8.down_proj",
76
- "mlp.experts.9.gate_proj",
77
- "mlp.experts.9.up_proj",
78
- "mlp.experts.9.down_proj",
79
- "mlp.experts.10.gate_proj",
80
- "mlp.experts.10.up_proj",
81
- "mlp.experts.10.down_proj",
82
- "mlp.experts.11.gate_proj",
83
- "mlp.experts.11.up_proj",
84
- "mlp.experts.11.down_proj",
85
- "mlp.experts.12.gate_proj",
86
- "mlp.experts.12.up_proj",
87
- "mlp.experts.12.down_proj",
88
- "mlp.experts.13.gate_proj",
89
- "mlp.experts.13.up_proj",
90
- "mlp.experts.13.down_proj",
91
- "mlp.experts.14.gate_proj",
92
- "mlp.experts.14.up_proj",
93
- "mlp.experts.14.down_proj",
94
- "mlp.experts.15.gate_proj",
95
- "mlp.experts.15.up_proj",
96
- "mlp.experts.15.down_proj",
97
- "mlp.experts.16.gate_proj",
98
- "mlp.experts.16.up_proj",
99
- "mlp.experts.16.down_proj",
100
- "mlp.experts.17.gate_proj",
101
- "mlp.experts.17.up_proj",
102
- "mlp.experts.17.down_proj",
103
- "mlp.experts.18.gate_proj",
104
- "mlp.experts.18.up_proj",
105
- "mlp.experts.18.down_proj",
106
- "mlp.experts.19.gate_proj",
107
- "mlp.experts.19.up_proj",
108
- "mlp.experts.19.down_proj",
109
- "mlp.experts.20.gate_proj",
110
- "mlp.experts.20.up_proj",
111
- "mlp.experts.20.down_proj",
112
- "mlp.experts.21.gate_proj",
113
- "mlp.experts.21.up_proj",
114
- "mlp.experts.21.down_proj",
115
- "mlp.experts.22.gate_proj",
116
- "mlp.experts.22.up_proj",
117
- "mlp.experts.22.down_proj",
118
- "mlp.experts.23.gate_proj",
119
- "mlp.experts.23.up_proj",
120
- "mlp.experts.23.down_proj",
121
- "mlp.experts.24.gate_proj",
122
- "mlp.experts.24.up_proj",
123
- "mlp.experts.24.down_proj",
124
- "mlp.experts.25.gate_proj",
125
- "mlp.experts.25.up_proj",
126
- "mlp.experts.25.down_proj",
127
- "mlp.experts.26.gate_proj",
128
- "mlp.experts.26.up_proj",
129
- "mlp.experts.26.down_proj",
130
- "mlp.experts.27.gate_proj",
131
- "mlp.experts.27.up_proj",
132
- "mlp.experts.27.down_proj",
133
- "mlp.experts.28.gate_proj",
134
- "mlp.experts.28.up_proj",
135
- "mlp.experts.28.down_proj",
136
- "mlp.experts.29.gate_proj",
137
- "mlp.experts.29.up_proj",
138
- "mlp.experts.29.down_proj",
139
- "mlp.experts.30.gate_proj",
140
- "mlp.experts.30.up_proj",
141
- "mlp.experts.30.down_proj",
142
- "mlp.experts.31.gate_proj",
143
- "mlp.experts.31.up_proj",
144
- "mlp.experts.31.down_proj",
145
- "mlp.experts.32.gate_proj",
146
- "mlp.experts.32.up_proj",
147
- "mlp.experts.32.down_proj",
148
- "mlp.experts.33.gate_proj",
149
- "mlp.experts.33.up_proj",
150
- "mlp.experts.33.down_proj",
151
- "mlp.experts.34.gate_proj",
152
- "mlp.experts.34.up_proj",
153
- "mlp.experts.34.down_proj",
154
- "mlp.experts.35.gate_proj",
155
- "mlp.experts.35.up_proj",
156
- "mlp.experts.35.down_proj",
157
- "mlp.experts.36.gate_proj",
158
- "mlp.experts.36.up_proj",
159
- "mlp.experts.36.down_proj",
160
- "mlp.experts.37.gate_proj",
161
- "mlp.experts.37.up_proj",
162
- "mlp.experts.37.down_proj",
163
- "mlp.experts.38.gate_proj",
164
- "mlp.experts.38.up_proj",
165
- "mlp.experts.38.down_proj",
166
- "mlp.experts.39.gate_proj",
167
- "mlp.experts.39.up_proj",
168
- "mlp.experts.39.down_proj",
169
- "mlp.experts.40.gate_proj",
170
- "mlp.experts.40.up_proj",
171
- "mlp.experts.40.down_proj",
172
- "mlp.experts.41.gate_proj",
173
- "mlp.experts.41.up_proj",
174
- "mlp.experts.41.down_proj",
175
- "mlp.experts.42.gate_proj",
176
- "mlp.experts.42.up_proj",
177
- "mlp.experts.42.down_proj",
178
- "mlp.experts.43.gate_proj",
179
- "mlp.experts.43.up_proj",
180
- "mlp.experts.43.down_proj",
181
- "mlp.experts.44.gate_proj",
182
- "mlp.experts.44.up_proj",
183
- "mlp.experts.44.down_proj",
184
- "mlp.experts.45.gate_proj",
185
- "mlp.experts.45.up_proj",
186
- "mlp.experts.45.down_proj",
187
- "mlp.experts.46.gate_proj",
188
- "mlp.experts.46.up_proj",
189
- "mlp.experts.46.down_proj",
190
- "mlp.experts.47.gate_proj",
191
- "mlp.experts.47.up_proj",
192
- "mlp.experts.47.down_proj",
193
- "mlp.experts.48.gate_proj",
194
- "mlp.experts.48.up_proj",
195
- "mlp.experts.48.down_proj",
196
- "mlp.experts.49.gate_proj",
197
- "mlp.experts.49.up_proj",
198
- "mlp.experts.49.down_proj",
199
- "mlp.experts.50.gate_proj",
200
- "mlp.experts.50.up_proj",
201
- "mlp.experts.50.down_proj",
202
- "mlp.experts.51.gate_proj",
203
- "mlp.experts.51.up_proj",
204
- "mlp.experts.51.down_proj",
205
- "mlp.experts.52.gate_proj",
206
- "mlp.experts.52.up_proj",
207
- "mlp.experts.52.down_proj",
208
- "mlp.experts.53.gate_proj",
209
- "mlp.experts.53.up_proj",
210
- "mlp.experts.53.down_proj",
211
- "mlp.experts.54.gate_proj",
212
- "mlp.experts.54.up_proj",
213
- "mlp.experts.54.down_proj",
214
- "mlp.experts.55.gate_proj",
215
- "mlp.experts.55.up_proj",
216
- "mlp.experts.55.down_proj",
217
- "mlp.experts.56.gate_proj",
218
- "mlp.experts.56.up_proj",
219
- "mlp.experts.56.down_proj",
220
- "mlp.experts.57.gate_proj",
221
- "mlp.experts.57.up_proj",
222
- "mlp.experts.57.down_proj",
223
- "mlp.experts.58.gate_proj",
224
- "mlp.experts.58.up_proj",
225
- "mlp.experts.58.down_proj",
226
- "mlp.experts.59.gate_proj",
227
- "mlp.experts.59.up_proj",
228
- "mlp.experts.59.down_proj",
229
- "mlp.experts.60.gate_proj",
230
- "mlp.experts.60.up_proj",
231
- "mlp.experts.60.down_proj",
232
- "mlp.experts.61.gate_proj",
233
- "mlp.experts.61.up_proj",
234
- "mlp.experts.61.down_proj",
235
- "mlp.experts.62.gate_proj",
236
- "mlp.experts.62.up_proj",
237
- "mlp.experts.62.down_proj",
238
- "mlp.experts.63.gate_proj",
239
- "mlp.experts.63.up_proj",
240
- "mlp.experts.63.down_proj",
241
- "mlp.shared_expert.gate_proj",
242
- "mlp.shared_expert.up_proj",
243
- "mlp.shared_expert.down_proj"
244
- ]
245
- ],
246
- "nsamples": 512,
247
- "quant_block_list": null,
248
- "quant_method": "gptq",
249
- "scale_dtype": "torch.float16",
250
- "seqlen": 2048,
251
- "sym": true,
252
- "train_bs": 8,
253
- "true_sequential": false
254
- },
255
- "rms_norm_eps": 1e-06,
256
- "rope_theta": 1000000.0,
257
- "router_aux_loss_coef": 0.001,
258
- "shared_expert_intermediate_size": 20480,
259
- "sliding_window": null,
260
- "tie_word_embeddings": false,
261
- "torch_dtype": "float16",
262
- "transformers_version": "4.44.2",
263
- "use_cache": true,
264
- "use_sliding_window": false,
265
- "vocab_size": 151936
266
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
generation_config.json DELETED
@@ -1,14 +0,0 @@
1
- {
2
- "bos_token_id": 151643,
3
- "do_sample": true,
4
- "eos_token_id": [
5
- 151645,
6
- 151643
7
- ],
8
- "pad_token_id": 151643,
9
- "repetition_penalty": 1.05,
10
- "temperature": 0.7,
11
- "top_k": 20,
12
- "top_p": 0.8,
13
- "transformers_version": "4.44.2"
14
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
merges.txt DELETED
The diff for this file is too large to render. See raw diff
 
model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:d15f21e20a022b32b6099710f772e88b8586b50af88871fb44e2422664109fa2
3
- size 31475221720
 
 
 
 
quantize_config.json DELETED
@@ -1,229 +0,0 @@
1
- {
2
- "bits": 4,
3
- "group_size": 128,
4
- "sym": true,
5
- "data_type": "int",
6
- "enable_quanted_input": true,
7
- "enable_minmax_tuning": true,
8
- "seqlen": 2048,
9
- "train_bs": 8,
10
- "scale_dtype": "torch.float16",
11
- "lr": 0.001,
12
- "minmax_lr": 0.001,
13
- "gradient_accumulate_steps": 1,
14
- "iters": 1000,
15
- "amp": true,
16
- "nsamples": 512,
17
- "low_gpu_mem_usage": false,
18
- "quant_block_list": null,
19
- "enable_norm_bias_tuning": false,
20
- "autoround_version": "0.3.1.dev",
21
- "quant_method": "gptq",
22
- "desc_act": false,
23
- "true_sequential": false,
24
- "damp_percent": 0.01,
25
- "modules_in_block_to_quantize": [
26
- [
27
- "self_attn.q_proj",
28
- "self_attn.k_proj",
29
- "self_attn.v_proj",
30
- "self_attn.o_proj",
31
- "mlp.gate",
32
- "mlp.experts.0.gate_proj",
33
- "mlp.experts.0.up_proj",
34
- "mlp.experts.0.down_proj",
35
- "mlp.experts.1.gate_proj",
36
- "mlp.experts.1.up_proj",
37
- "mlp.experts.1.down_proj",
38
- "mlp.experts.2.gate_proj",
39
- "mlp.experts.2.up_proj",
40
- "mlp.experts.2.down_proj",
41
- "mlp.experts.3.gate_proj",
42
- "mlp.experts.3.up_proj",
43
- "mlp.experts.3.down_proj",
44
- "mlp.experts.4.gate_proj",
45
- "mlp.experts.4.up_proj",
46
- "mlp.experts.4.down_proj",
47
- "mlp.experts.5.gate_proj",
48
- "mlp.experts.5.up_proj",
49
- "mlp.experts.5.down_proj",
50
- "mlp.experts.6.gate_proj",
51
- "mlp.experts.6.up_proj",
52
- "mlp.experts.6.down_proj",
53
- "mlp.experts.7.gate_proj",
54
- "mlp.experts.7.up_proj",
55
- "mlp.experts.7.down_proj",
56
- "mlp.experts.8.gate_proj",
57
- "mlp.experts.8.up_proj",
58
- "mlp.experts.8.down_proj",
59
- "mlp.experts.9.gate_proj",
60
- "mlp.experts.9.up_proj",
61
- "mlp.experts.9.down_proj",
62
- "mlp.experts.10.gate_proj",
63
- "mlp.experts.10.up_proj",
64
- "mlp.experts.10.down_proj",
65
- "mlp.experts.11.gate_proj",
66
- "mlp.experts.11.up_proj",
67
- "mlp.experts.11.down_proj",
68
- "mlp.experts.12.gate_proj",
69
- "mlp.experts.12.up_proj",
70
- "mlp.experts.12.down_proj",
71
- "mlp.experts.13.gate_proj",
72
- "mlp.experts.13.up_proj",
73
- "mlp.experts.13.down_proj",
74
- "mlp.experts.14.gate_proj",
75
- "mlp.experts.14.up_proj",
76
- "mlp.experts.14.down_proj",
77
- "mlp.experts.15.gate_proj",
78
- "mlp.experts.15.up_proj",
79
- "mlp.experts.15.down_proj",
80
- "mlp.experts.16.gate_proj",
81
- "mlp.experts.16.up_proj",
82
- "mlp.experts.16.down_proj",
83
- "mlp.experts.17.gate_proj",
84
- "mlp.experts.17.up_proj",
85
- "mlp.experts.17.down_proj",
86
- "mlp.experts.18.gate_proj",
87
- "mlp.experts.18.up_proj",
88
- "mlp.experts.18.down_proj",
89
- "mlp.experts.19.gate_proj",
90
- "mlp.experts.19.up_proj",
91
- "mlp.experts.19.down_proj",
92
- "mlp.experts.20.gate_proj",
93
- "mlp.experts.20.up_proj",
94
- "mlp.experts.20.down_proj",
95
- "mlp.experts.21.gate_proj",
96
- "mlp.experts.21.up_proj",
97
- "mlp.experts.21.down_proj",
98
- "mlp.experts.22.gate_proj",
99
- "mlp.experts.22.up_proj",
100
- "mlp.experts.22.down_proj",
101
- "mlp.experts.23.gate_proj",
102
- "mlp.experts.23.up_proj",
103
- "mlp.experts.23.down_proj",
104
- "mlp.experts.24.gate_proj",
105
- "mlp.experts.24.up_proj",
106
- "mlp.experts.24.down_proj",
107
- "mlp.experts.25.gate_proj",
108
- "mlp.experts.25.up_proj",
109
- "mlp.experts.25.down_proj",
110
- "mlp.experts.26.gate_proj",
111
- "mlp.experts.26.up_proj",
112
- "mlp.experts.26.down_proj",
113
- "mlp.experts.27.gate_proj",
114
- "mlp.experts.27.up_proj",
115
- "mlp.experts.27.down_proj",
116
- "mlp.experts.28.gate_proj",
117
- "mlp.experts.28.up_proj",
118
- "mlp.experts.28.down_proj",
119
- "mlp.experts.29.gate_proj",
120
- "mlp.experts.29.up_proj",
121
- "mlp.experts.29.down_proj",
122
- "mlp.experts.30.gate_proj",
123
- "mlp.experts.30.up_proj",
124
- "mlp.experts.30.down_proj",
125
- "mlp.experts.31.gate_proj",
126
- "mlp.experts.31.up_proj",
127
- "mlp.experts.31.down_proj",
128
- "mlp.experts.32.gate_proj",
129
- "mlp.experts.32.up_proj",
130
- "mlp.experts.32.down_proj",
131
- "mlp.experts.33.gate_proj",
132
- "mlp.experts.33.up_proj",
133
- "mlp.experts.33.down_proj",
134
- "mlp.experts.34.gate_proj",
135
- "mlp.experts.34.up_proj",
136
- "mlp.experts.34.down_proj",
137
- "mlp.experts.35.gate_proj",
138
- "mlp.experts.35.up_proj",
139
- "mlp.experts.35.down_proj",
140
- "mlp.experts.36.gate_proj",
141
- "mlp.experts.36.up_proj",
142
- "mlp.experts.36.down_proj",
143
- "mlp.experts.37.gate_proj",
144
- "mlp.experts.37.up_proj",
145
- "mlp.experts.37.down_proj",
146
- "mlp.experts.38.gate_proj",
147
- "mlp.experts.38.up_proj",
148
- "mlp.experts.38.down_proj",
149
- "mlp.experts.39.gate_proj",
150
- "mlp.experts.39.up_proj",
151
- "mlp.experts.39.down_proj",
152
- "mlp.experts.40.gate_proj",
153
- "mlp.experts.40.up_proj",
154
- "mlp.experts.40.down_proj",
155
- "mlp.experts.41.gate_proj",
156
- "mlp.experts.41.up_proj",
157
- "mlp.experts.41.down_proj",
158
- "mlp.experts.42.gate_proj",
159
- "mlp.experts.42.up_proj",
160
- "mlp.experts.42.down_proj",
161
- "mlp.experts.43.gate_proj",
162
- "mlp.experts.43.up_proj",
163
- "mlp.experts.43.down_proj",
164
- "mlp.experts.44.gate_proj",
165
- "mlp.experts.44.up_proj",
166
- "mlp.experts.44.down_proj",
167
- "mlp.experts.45.gate_proj",
168
- "mlp.experts.45.up_proj",
169
- "mlp.experts.45.down_proj",
170
- "mlp.experts.46.gate_proj",
171
- "mlp.experts.46.up_proj",
172
- "mlp.experts.46.down_proj",
173
- "mlp.experts.47.gate_proj",
174
- "mlp.experts.47.up_proj",
175
- "mlp.experts.47.down_proj",
176
- "mlp.experts.48.gate_proj",
177
- "mlp.experts.48.up_proj",
178
- "mlp.experts.48.down_proj",
179
- "mlp.experts.49.gate_proj",
180
- "mlp.experts.49.up_proj",
181
- "mlp.experts.49.down_proj",
182
- "mlp.experts.50.gate_proj",
183
- "mlp.experts.50.up_proj",
184
- "mlp.experts.50.down_proj",
185
- "mlp.experts.51.gate_proj",
186
- "mlp.experts.51.up_proj",
187
- "mlp.experts.51.down_proj",
188
- "mlp.experts.52.gate_proj",
189
- "mlp.experts.52.up_proj",
190
- "mlp.experts.52.down_proj",
191
- "mlp.experts.53.gate_proj",
192
- "mlp.experts.53.up_proj",
193
- "mlp.experts.53.down_proj",
194
- "mlp.experts.54.gate_proj",
195
- "mlp.experts.54.up_proj",
196
- "mlp.experts.54.down_proj",
197
- "mlp.experts.55.gate_proj",
198
- "mlp.experts.55.up_proj",
199
- "mlp.experts.55.down_proj",
200
- "mlp.experts.56.gate_proj",
201
- "mlp.experts.56.up_proj",
202
- "mlp.experts.56.down_proj",
203
- "mlp.experts.57.gate_proj",
204
- "mlp.experts.57.up_proj",
205
- "mlp.experts.57.down_proj",
206
- "mlp.experts.58.gate_proj",
207
- "mlp.experts.58.up_proj",
208
- "mlp.experts.58.down_proj",
209
- "mlp.experts.59.gate_proj",
210
- "mlp.experts.59.up_proj",
211
- "mlp.experts.59.down_proj",
212
- "mlp.experts.60.gate_proj",
213
- "mlp.experts.60.up_proj",
214
- "mlp.experts.60.down_proj",
215
- "mlp.experts.61.gate_proj",
216
- "mlp.experts.61.up_proj",
217
- "mlp.experts.61.down_proj",
218
- "mlp.experts.62.gate_proj",
219
- "mlp.experts.62.up_proj",
220
- "mlp.experts.62.down_proj",
221
- "mlp.experts.63.gate_proj",
222
- "mlp.experts.63.up_proj",
223
- "mlp.experts.63.down_proj",
224
- "mlp.shared_expert.gate_proj",
225
- "mlp.shared_expert.up_proj",
226
- "mlp.shared_expert.down_proj"
227
- ]
228
- ]
229
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
special_tokens_map.json DELETED
@@ -1,20 +0,0 @@
1
- {
2
- "additional_special_tokens": [
3
- "<|im_start|>",
4
- "<|im_end|>"
5
- ],
6
- "eos_token": {
7
- "content": "<|im_end|>",
8
- "lstrip": false,
9
- "normalized": false,
10
- "rstrip": false,
11
- "single_word": false
12
- },
13
- "pad_token": {
14
- "content": "<|endoftext|>",
15
- "lstrip": false,
16
- "normalized": false,
17
- "rstrip": false,
18
- "single_word": false
19
- }
20
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json DELETED
@@ -1,43 +0,0 @@
1
- {
2
- "add_prefix_space": false,
3
- "added_tokens_decoder": {
4
- "151643": {
5
- "content": "<|endoftext|>",
6
- "lstrip": false,
7
- "normalized": false,
8
- "rstrip": false,
9
- "single_word": false,
10
- "special": true
11
- },
12
- "151644": {
13
- "content": "<|im_start|>",
14
- "lstrip": false,
15
- "normalized": false,
16
- "rstrip": false,
17
- "single_word": false,
18
- "special": true
19
- },
20
- "151645": {
21
- "content": "<|im_end|>",
22
- "lstrip": false,
23
- "normalized": false,
24
- "rstrip": false,
25
- "single_word": false,
26
- "special": true
27
- }
28
- },
29
- "additional_special_tokens": [
30
- "<|im_start|>",
31
- "<|im_end|>"
32
- ],
33
- "bos_token": null,
34
- "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
35
- "clean_up_tokenization_spaces": false,
36
- "eos_token": "<|im_end|>",
37
- "errors": "replace",
38
- "model_max_length": 65536,
39
- "pad_token": "<|endoftext|>",
40
- "split_special_tokens": false,
41
- "tokenizer_class": "Qwen2Tokenizer",
42
- "unk_token": null
43
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vocab.json DELETED
The diff for this file is too large to render. See raw diff