riczhou commited on
Commit
1ad6f06
1 Parent(s): 07f7927

Initial commit

Browse files
logs.txt ADDED
@@ -0,0 +1,235 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0
  0%| | 0/171 [00:00<?, ?it/s]
1
 
 
2
  0%| | 0/171 [00:00<?, ?it/s]
3
 
 
4
  0%| | 0/171 [00:03<?, ?it/s]
5
  1%| | 1/171 [00:04<12:17, 4.34s/it]
6
 
 
7
  1%| | 1/171 [00:05<12:17, 4.34s/it]
8
  1%| | 2/171 [00:06<08:12, 2.92s/it]
9
 
 
10
  1%| | 2/171 [00:06<08:12, 2.92s/it]
11
 
 
12
  1%| | 2/171 [00:06<08:12, 2.92s/it]
13
 
 
14
  1%| | 2/171 [00:06<08:12, 2.92s/it]
15
 
 
16
  1%| | 2/171 [00:06<08:12, 2.92s/it]
17
 
 
18
  1%| | 2/171 [00:06<08:12, 2.92s/it]
19
 
 
20
  1%| | 2/171 [00:06<08:12, 2.92s/it]
21
  5%|▍ | 8/171 [00:06<01:21, 2.01it/s]
22
 
 
23
  5%|▍ | 8/171 [00:06<01:21, 2.01it/s]
24
 
 
25
  5%|▍ | 8/171 [00:06<01:21, 2.01it/s]
26
 
 
27
  5%|▍ | 8/171 [00:06<01:21, 2.01it/s]
28
 
 
29
  5%|▍ | 8/171 [00:06<01:21, 2.01it/s]
30
  7%|▋ | 12/171 [00:06<00:46, 3.45it/s]
31
 
 
32
  7%|▋ | 12/171 [00:06<00:46, 3.45it/s]
33
 
 
34
  7%|▋ | 12/171 [00:06<00:46, 3.45it/s]
35
 
 
36
  7%|▋ | 12/171 [00:06<00:46, 3.45it/s]
37
 
 
38
  7%|▋ | 12/171 [00:06<00:46, 3.45it/s]
39
 
 
40
  7%|▋ | 12/171 [00:06<00:46, 3.45it/s]
41
 
 
42
  7%|▋ | 12/171 [00:06<00:46, 3.45it/s]
43
 
 
44
  7%|▋ | 12/171 [00:06<00:46, 3.45it/s]
45
  11%|█ | 19/171 [00:06<00:22, 6.84it/s]
46
 
 
47
  11%|█ | 19/171 [00:06<00:22, 6.84it/s]
48
 
 
49
  11%|█ | 19/171 [00:06<00:22, 6.84it/s]
50
 
 
51
  11%|█ | 19/171 [00:06<00:22, 6.84it/s]
52
 
 
53
  11%|█ | 19/171 [00:06<00:22, 6.84it/s]
54
 
 
55
  11%|█ | 19/171 [00:06<00:22, 6.84it/s]
56
 
 
57
  11%|█ | 19/171 [00:06<00:22, 6.84it/s]
58
 
 
59
  11%|█ | 19/171 [00:06<00:22, 6.84it/s]
60
  15%|█▌ | 26/171 [00:06<00:13, 11.08it/s]
61
 
 
62
  15%|█▌ | 26/171 [00:06<00:13, 11.08it/s]
63
 
 
64
  15%|█▌ | 26/171 [00:06<00:13, 11.08it/s]
65
 
 
66
  15%|█▌ | 26/171 [00:06<00:13, 11.08it/s]
67
 
 
68
  15%|█▌ | 26/171 [00:06<00:13, 11.08it/s]
69
 
 
70
  15%|█▌ | 26/171 [00:06<00:13, 11.08it/s]
71
 
 
72
  15%|█▌ | 26/171 [00:06<00:13, 11.08it/s]
73
 
 
74
  15%|█▌ | 26/171 [00:06<00:13, 11.08it/s]
75
  19%|█▉ | 33/171 [00:06<00:08, 16.15it/s]
76
 
 
77
  19%|█▉ | 33/171 [00:06<00:08, 16.15it/s]
78
 
 
79
  19%|█▉ | 33/171 [00:06<00:08, 16.15it/s]
80
 
 
81
  19%|█▉ | 33/171 [00:06<00:08, 16.15it/s]
82
 
 
83
  19%|█▉ | 33/171 [00:06<00:08, 16.15it/s]
84
 
 
85
  19%|█▉ | 33/171 [00:06<00:08, 16.15it/s]
86
 
 
87
  19%|█▉ | 33/171 [00:06<00:08, 16.15it/s]
88
 
 
89
  19%|█▉ | 33/171 [00:06<00:08, 16.15it/s]
90
  23%|██▎ | 40/171 [00:06<00:05, 21.87it/s]
91
 
 
92
  23%|██▎ | 40/171 [00:06<00:05, 21.87it/s]
93
 
 
94
  23%|██▎ | 40/171 [00:06<00:05, 21.87it/s]
95
 
 
96
  23%|██▎ | 40/171 [00:06<00:05, 21.87it/s]
97
 
 
98
  23%|██▎ | 40/171 [00:06<00:05, 21.87it/s]
99
 
 
100
  23%|██▎ | 40/171 [00:06<00:05, 21.87it/s]
101
 
 
102
  23%|██▎ | 40/171 [00:06<00:05, 21.87it/s]
103
 
 
104
  23%|██▎ | 40/171 [00:06<00:05, 21.87it/s]
105
  27%|██▋ | 47/171 [00:07<00:04, 28.01it/s]
106
 
 
107
  27%|██▋ | 47/171 [00:07<00:04, 28.01it/s]
108
 
 
109
  27%|██▋ | 47/171 [00:07<00:04, 28.01it/s]
110
 
 
111
  27%|██▋ | 47/171 [00:07<00:04, 28.01it/s]
112
 
 
113
  27%|██▋ | 47/171 [00:07<00:04, 28.01it/s]
114
 
 
115
  27%|██▋ | 47/171 [00:07<00:04, 28.01it/s]
116
 
 
117
  27%|██▋ | 47/171 [00:07<00:04, 28.01it/s]
118
 
 
119
  27%|██▋ | 47/171 [00:07<00:04, 28.01it/s]
120
  32%|███▏ | 54/171 [00:07<00:03, 34.16it/s]
121
 
 
122
  32%|███▏ | 54/171 [00:07<00:03, 34.16it/s]
123
 
 
124
  32%|███▏ | 54/171 [00:07<00:03, 34.16it/s]
125
 
 
126
  32%|███▏ | 54/171 [00:07<00:03, 34.16it/s]
127
 
 
128
  32%|███▏ | 54/171 [00:07<00:03, 34.16it/s]
129
 
 
130
  32%|███▏ | 54/171 [00:07<00:03, 34.16it/s]
131
 
 
132
  32%|███▏ | 54/171 [00:07<00:03, 34.16it/s]
133
 
 
134
  32%|███▏ | 54/171 [00:07<00:03, 34.16it/s]
135
  36%|███▌ | 61/171 [00:07<00:02, 39.98it/s]
136
 
 
137
  36%|███▌ | 61/171 [00:07<00:02, 39.98it/s]
138
 
 
139
  36%|███▌ | 61/171 [00:07<00:02, 39.98it/s]
140
 
 
141
  36%|███▌ | 61/171 [00:07<00:02, 39.98it/s]
142
 
 
143
  36%|███▌ | 61/171 [00:07<00:02, 39.98it/s]
144
 
 
145
  36%|███▌ | 61/171 [00:07<00:02, 39.98it/s]
146
 
 
147
  36%|███▌ | 61/171 [00:07<00:02, 39.98it/s]
148
 
 
149
  36%|███▌ | 61/171 [00:07<00:02, 39.98it/s]
150
  40%|███▉ | 68/171 [00:07<00:02, 45.10it/s]
151
 
 
152
  40%|███▉ | 68/171 [00:07<00:02, 45.10it/s]
153
 
 
154
  40%|███▉ | 68/171 [00:07<00:02, 45.10it/s]
155
 
 
156
  40%|███▉ | 68/171 [00:07<00:02, 45.10it/s]
157
 
 
158
  40%|███▉ | 68/171 [00:07<00:02, 45.10it/s]
159
 
 
160
  40%|███▉ | 68/171 [00:07<00:02, 45.10it/s]
161
 
 
162
  40%|███▉ | 68/171 [00:07<00:02, 45.10it/s]
163
 
 
164
  40%|███▉ | 68/171 [00:07<00:02, 45.10it/s]
165
  44%|████▍ | 75/171 [00:07<00:01, 49.51it/s]
166
 
 
167
  44%|████▍ | 75/171 [00:07<00:01, 49.51it/s]
168
 
 
169
  44%|████▍ | 75/171 [00:07<00:01, 49.51it/s]
170
 
 
171
  44%|████▍ | 75/171 [00:07<00:01, 49.51it/s]
172
 
 
173
  44%|████▍ | 75/171 [00:07<00:01, 49.51it/s]
174
 
 
175
  44%|████▍ | 75/171 [00:07<00:01, 49.51it/s]
176
 
 
177
  44%|████▍ | 75/171 [00:07<00:01, 49.51it/s]
178
 
 
179
  44%|████▍ | 75/171 [00:07<00:01, 49.51it/s]
180
  48%|████▊ | 82/171 [00:07<00:01, 53.13it/s]
181
 
 
182
  48%|████▊ | 82/171 [00:07<00:01, 53.13it/s]
183
 
 
184
  48%|████▊ | 82/171 [00:07<00:01, 53.13it/s]
185
 
 
186
  48%|████▊ | 82/171 [00:07<00:01, 53.13it/s]
187
 
 
188
  48%|████▊ | 82/171 [00:07<00:01, 53.13it/s]
189
 
 
190
  48%|████▊ | 82/171 [00:07<00:01, 53.13it/s]
191
 
 
192
  48%|████▊ | 82/171 [00:07<00:01, 53.13it/s]
193
 
 
194
  48%|████▊ | 82/171 [00:07<00:01, 53.13it/s]
195
  52%|█████▏ | 89/171 [00:07<00:01, 55.91it/s]
196
 
 
197
  52%|█████▏ | 89/171 [00:07<00:01, 55.91it/s]
198
 
 
199
  52%|█████▏ | 89/171 [00:07<00:01, 55.91it/s]
200
 
 
201
  52%|█████▏ | 89/171 [00:07<00:01, 55.91it/s]
202
 
 
203
  52%|█████▏ | 89/171 [00:07<00:01, 55.91it/s]
204
 
 
205
  52%|█████▏ | 89/171 [00:07<00:01, 55.91it/s]
206
 
 
207
  52%|█████▏ | 89/171 [00:07<00:01, 55.91it/s]
208
 
 
209
  52%|█████▏ | 89/171 [00:07<00:01, 55.91it/s]
210
  56%|█████▌ | 96/171 [00:07<00:01, 58.10it/s]
211
 
 
212
  56%|█████▌ | 96/171 [00:07<00:01, 58.10it/s]
213
 
 
214
  56%|█████▌ | 96/171 [00:07<00:01, 58.10it/s]
215
 
 
216
  56%|█████▌ | 96/171 [00:07<00:01, 58.10it/s]
217
 
 
218
  56%|█████▌ | 96/171 [00:07<00:01, 58.10it/s]
219
 
 
220
  56%|█████▌ | 96/171 [00:07<00:01, 58.10it/s]
221
 
 
222
  56%|█████▌ | 96/171 [00:07<00:01, 58.10it/s]
223
 
 
224
  56%|█████▌ | 96/171 [00:07<00:01, 58.10it/s]
225
  60%|██████ | 103/171 [00:07<00:01, 59.65it/s]
226
 
 
227
  60%|██████ | 103/171 [00:07<00:01, 59.65it/s]
228
 
 
229
  60%|██████ | 103/171 [00:07<00:01, 59.65it/s]
230
 
 
231
  60%|██████ | 103/171 [00:07<00:01, 59.65it/s]
232
 
 
233
  60%|██████ | 103/171 [00:07<00:01, 59.65it/s]
234
 
 
235
  60%|██████ | 103/171 [00:07<00:01, 59.65it/s]
236
 
 
237
  60%|██████ | 103/171 [00:07<00:01, 59.65it/s]
238
 
 
239
  60%|██████ | 103/171 [00:07<00:01, 59.65it/s]
240
  64%|██████▍ | 110/171 [00:08<00:01, 60.77it/s]
241
 
 
242
  64%|██████▍ | 110/171 [00:08<00:01, 60.77it/s]
243
 
 
244
  64%|██████▍ | 110/171 [00:08<00:01, 60.77it/s]
245
 
 
246
  64%|██████▍ | 110/171 [00:08<00:01, 60.77it/s]
247
 
 
248
  64%|██████▍ | 110/171 [00:08<00:01, 60.77it/s]
249
 
 
250
  64%|██████▍ | 110/171 [00:08<00:01, 60.77it/s]
251
 
 
252
  64%|██████▍ | 110/171 [00:08<00:01, 60.77it/s]
253
 
 
254
  64%|██████▍ | 110/171 [00:08<00:01, 60.77it/s]
255
  68%|██████▊ | 117/171 [00:08<00:00, 61.63it/s]
256
 
 
257
  68%|██████▊ | 117/171 [00:08<00:00, 61.63it/s]
258
 
 
259
  68%|██████▊ | 117/171 [00:08<00:00, 61.63it/s]
260
 
 
261
  68%|██████▊ | 117/171 [00:08<00:00, 61.63it/s]
262
 
 
263
  68%|██████▊ | 117/171 [00:08<00:00, 61.63it/s]
264
 
 
265
  68%|██████▊ | 117/171 [00:08<00:00, 61.63it/s]
266
 
 
267
  68%|██████▊ | 117/171 [00:08<00:00, 61.63it/s]
268
 
 
269
  68%|██████▊ | 117/171 [00:08<00:00, 61.63it/s]
270
  73%|███████▎ | 124/171 [00:08<00:00, 62.24it/s]
271
 
 
272
  73%|███████▎ | 124/171 [00:08<00:00, 62.24it/s]
273
 
 
274
  73%|███████▎ | 124/171 [00:08<00:00, 62.24it/s]
275
 
 
276
  73%|███████▎ | 124/171 [00:08<00:00, 62.24it/s]
277
 
 
278
  73%|███████▎ | 124/171 [00:08<00:00, 62.24it/s]
279
 
 
280
  73%|███████▎ | 124/171 [00:08<00:00, 62.24it/s]
281
 
 
282
  73%|███████▎ | 124/171 [00:08<00:00, 62.24it/s]
283
 
 
284
  73%|███████▎ | 124/171 [00:08<00:00, 62.24it/s]
285
  77%|███████▋ | 131/171 [00:08<00:00, 62.63it/s]
286
 
 
287
  77%|███████▋ | 131/171 [00:08<00:00, 62.63it/s]
288
 
 
289
  77%|███████▋ | 131/171 [00:08<00:00, 62.63it/s]
290
 
 
291
  77%|███████▋ | 131/171 [00:08<00:00, 62.63it/s]
292
 
 
293
  77%|███████▋ | 131/171 [00:08<00:00, 62.63it/s]
294
 
 
295
  77%|███████▋ | 131/171 [00:08<00:00, 62.63it/s]
296
 
 
297
  77%|███████▋ | 131/171 [00:08<00:00, 62.63it/s]
298
 
 
299
  77%|███████▋ | 131/171 [00:08<00:00, 62.63it/s]
300
  81%|████████ | 138/171 [00:08<00:00, 62.88it/s]
301
 
 
302
  81%|████████ | 138/171 [00:08<00:00, 62.88it/s]
303
 
 
304
  81%|████████ | 138/171 [00:08<00:00, 62.88it/s]
305
 
 
306
  81%|████████ | 138/171 [00:08<00:00, 62.88it/s]
307
 
 
308
  81%|████████ | 138/171 [00:08<00:00, 62.88it/s]
309
 
 
310
  81%|████████ | 138/171 [00:08<00:00, 62.88it/s]
311
 
 
312
  81%|████████ | 138/171 [00:08<00:00, 62.88it/s]
313
 
 
314
  81%|████████ | 138/171 [00:08<00:00, 62.88it/s]
315
  85%|████████▍ | 145/171 [00:08<00:00, 62.95it/s]
316
 
 
317
  85%|████████▍ | 145/171 [00:08<00:00, 62.95it/s]
318
 
 
319
  85%|████████▍ | 145/171 [00:08<00:00, 62.95it/s]
320
 
 
321
  85%|████████▍ | 145/171 [00:08<00:00, 62.95it/s]
322
 
 
323
  85%|████████▍ | 145/171 [00:08<00:00, 62.95it/s]
324
 
 
325
  85%|████████▍ | 145/171 [00:08<00:00, 62.95it/s]
326
 
 
327
  85%|████████▍ | 145/171 [00:08<00:00, 62.95it/s]
328
 
 
329
  85%|████████▍ | 145/171 [00:08<00:00, 62.95it/s]
330
  89%|████████▉ | 152/171 [00:08<00:00, 63.19it/s]
331
 
 
332
  89%|████████▉ | 152/171 [00:08<00:00, 63.19it/s]
333
 
 
334
  89%|████████▉ | 152/171 [00:08<00:00, 63.19it/s]
335
 
 
336
  89%|████████▉ | 152/171 [00:08<00:00, 63.19it/s]
337
 
 
338
  89%|████████▉ | 152/171 [00:08<00:00, 63.19it/s]
339
 
 
340
  89%|████████▉ | 152/171 [00:08<00:00, 63.19it/s]
341
 
 
342
  89%|████████▉ | 152/171 [00:08<00:00, 63.19it/s]
343
 
 
344
  89%|████████▉ | 152/171 [00:08<00:00, 63.19it/s]
345
  93%|█████████▎| 159/171 [00:08<00:00, 62.45it/s]
346
 
 
347
  93%|█████████▎| 159/171 [00:08<00:00, 62.45it/s]
348
 
 
349
  93%|█████████▎| 159/171 [00:08<00:00, 62.45it/s]
350
 
 
351
  93%|█████████▎| 159/171 [00:08<00:00, 62.45it/s]
352
 
 
353
  93%|█████████▎| 159/171 [00:08<00:00, 62.45it/s]
354
 
 
355
  93%|█████████▎| 159/171 [00:08<00:00, 62.45it/s]
356
 
 
357
  93%|█████████▎| 159/171 [00:08<00:00, 62.45it/s]
358
 
 
359
  93%|█████████▎| 159/171 [00:08<00:00, 62.45it/s]
360
  97%|█████████▋| 166/171 [00:08<00:00, 62.76it/s]
361
 
 
362
  97%|█████████▋| 166/171 [00:08<00:00, 62.76it/s]
363
 
 
364
  97%|█████████▋| 166/171 [00:08<00:00, 62.76it/s]
365
 
 
366
  97%|█████████▋| 166/171 [00:08<00:00, 62.76it/s]
367
 
 
368
  97%|█████████▋| 166/171 [00:08<00:00, 62.76it/s]
369
 
 
370
  97%|█████████▋| 166/171 [00:08<00:00, 62.76it/s]
 
 
 
 
 
 
 
 
 
 
1
+ /opt/conda/envs/py310/bin/python -m mlc_llm gen_config /models/Qwen1.5-0.5B-Chat --quantization q0f16 --conv-template chatml --output /models/mlc-delivery/hf/mlc-ai/Qwen1.5-0.5B-Chat-q0f16-MLC
2
+ [2024-06-04 03:31:58] INFO auto_config.py:116: Found model configuration: /models/Qwen1.5-0.5B-Chat/config.json
3
+ [2024-06-04 03:31:58] INFO auto_config.py:154: Found model type: qwen2. Use `--model-type` to override.
4
+ [2024-06-04 03:31:58] INFO qwen2_model.py:49: context_window_size not found in config.json. Falling back to max_position_embeddings (32768)
5
+ [2024-06-04 03:31:58] INFO qwen2_model.py:66: prefill_chunk_size defaults to 2048
6
+ [2024-06-04 03:31:58] INFO config.py:107: Overriding max_batch_size from 1 to 80
7
+ [2024-06-04 03:31:58] INFO gen_config.py:143: [generation_config.json] Setting bos_token_id: 151643
8
+ [2024-06-04 03:31:58] INFO gen_config.py:143: [generation_config.json] Setting pad_token_id: 151643
9
+ [2024-06-04 03:31:58] INFO gen_config.py:143: [generation_config.json] Setting eos_token_id: [151645, 151643]
10
+ [2024-06-04 03:31:58] INFO gen_config.py:143: [generation_config.json] Setting repetition_penalty: 1.1
11
+ [2024-06-04 03:31:58] INFO gen_config.py:143: [generation_config.json] Setting top_p: 0.8
12
+ [2024-06-04 03:31:58] INFO gen_config.py:157: Not found tokenizer config: /models/Qwen1.5-0.5B-Chat/tokenizer.model
13
+ [2024-06-04 03:31:58] INFO gen_config.py:155: Found tokenizer config: /models/Qwen1.5-0.5B-Chat/tokenizer.json. Copying to /models/mlc-delivery/hf/mlc-ai/Qwen1.5-0.5B-Chat-q0f16-MLC/tokenizer.json
14
+ [2024-06-04 03:31:58] INFO gen_config.py:155: Found tokenizer config: /models/Qwen1.5-0.5B-Chat/vocab.json. Copying to /models/mlc-delivery/hf/mlc-ai/Qwen1.5-0.5B-Chat-q0f16-MLC/vocab.json
15
+ [2024-06-04 03:31:58] INFO gen_config.py:155: Found tokenizer config: /models/Qwen1.5-0.5B-Chat/merges.txt. Copying to /models/mlc-delivery/hf/mlc-ai/Qwen1.5-0.5B-Chat-q0f16-MLC/merges.txt
16
+ [2024-06-04 03:31:58] INFO gen_config.py:157: Not found tokenizer config: /models/Qwen1.5-0.5B-Chat/added_tokens.json
17
+ [2024-06-04 03:31:58] INFO gen_config.py:155: Found tokenizer config: /models/Qwen1.5-0.5B-Chat/tokenizer_config.json. Copying to /models/mlc-delivery/hf/mlc-ai/Qwen1.5-0.5B-Chat-q0f16-MLC/tokenizer_config.json
18
+ [2024-06-04 03:31:58] INFO gen_config.py:216: Detected tokenizer info: {'token_postproc_method': 'byte_level', 'prepend_space_in_encode': False, 'strip_space_in_decode': False}
19
+ [2024-06-04 03:31:58] INFO gen_config.py:32: [System default] Setting temperature: 1.0
20
+ [2024-06-04 03:31:58] INFO gen_config.py:32: [System default] Setting presence_penalty: 0.0
21
+ [2024-06-04 03:31:58] INFO gen_config.py:32: [System default] Setting frequency_penalty: 0.0
22
+ [2024-06-04 03:31:58] INFO gen_config.py:32: [System default] Setting mean_gen_len: 128
23
+ [2024-06-04 03:31:58] INFO gen_config.py:32: [System default] Setting max_gen_len: 512
24
+ [2024-06-04 03:31:58] INFO gen_config.py:32: [System default] Setting shift_fill_factor: 0.3
25
+ [2024-06-04 03:31:58] INFO gen_config.py:223: Dumping configuration file to: /models/mlc-delivery/hf/mlc-ai/Qwen1.5-0.5B-Chat-q0f16-MLC/mlc-chat-config.json
26
+ /opt/conda/envs/py310/bin/python -m mlc_llm convert_weight /models/Qwen1.5-0.5B-Chat --quantization q0f16 --output /models/mlc-delivery/hf/mlc-ai/Qwen1.5-0.5B-Chat-q0f16-MLC
27
+ [2024-06-04 03:32:00] INFO auto_config.py:116: Found model configuration: /models/Qwen1.5-0.5B-Chat/config.json
28
+ [2024-06-04 03:32:01] INFO auto_device.py:79: Found device: cuda:0
29
+ [2024-06-04 03:32:03] INFO auto_device.py:88: Not found device: rocm:0
30
+ [2024-06-04 03:32:04] INFO auto_device.py:88: Not found device: metal:0
31
+ [2024-06-04 03:32:06] INFO auto_device.py:79: Found device: vulkan:0
32
+ [2024-06-04 03:32:06] INFO auto_device.py:79: Found device: vulkan:1
33
+ [2024-06-04 03:32:06] INFO auto_device.py:79: Found device: vulkan:2
34
+ [2024-06-04 03:32:06] INFO auto_device.py:79: Found device: vulkan:3
35
+ [2024-06-04 03:32:07] INFO auto_device.py:88: Not found device: opencl:0
36
+ [2024-06-04 03:32:07] INFO auto_device.py:35: Using device: cuda:0
37
+ [2024-06-04 03:32:07] INFO auto_weight.py:71: Finding weights in: /models/Qwen1.5-0.5B-Chat
38
+ [2024-06-04 03:32:07] INFO auto_weight.py:137: Not found Huggingface PyTorch
39
+ [2024-06-04 03:32:07] INFO auto_weight.py:144: Found source weight format: huggingface-safetensor. Source configuration: /models/Qwen1.5-0.5B-Chat/model.safetensors.index.json
40
+ [2024-06-04 03:32:07] INFO auto_weight.py:107: Using source weight configuration: /models/Qwen1.5-0.5B-Chat/model.safetensors.index.json. Use `--source` to override.
41
+ [2024-06-04 03:32:07] INFO auto_weight.py:111: Using source weight format: huggingface-safetensor. Use `--source-format` to override.
42
+ [2024-06-04 03:32:07] INFO auto_config.py:154: Found model type: qwen2. Use `--model-type` to override.
43
+ [2024-06-04 03:32:07] INFO qwen2_model.py:49: context_window_size not found in config.json. Falling back to max_position_embeddings (32768)
44
+ [2024-06-04 03:32:07] INFO qwen2_model.py:66: prefill_chunk_size defaults to 2048
45
+ Weight conversion with arguments:
46
+ --config /models/Qwen1.5-0.5B-Chat/config.json
47
+ --quantization NoQuantize(name='q0f16', kind='no-quant', model_dtype='float16')
48
+ --model-type qwen2
49
+ --device cuda:0
50
+ --source /models/Qwen1.5-0.5B-Chat/model.safetensors.index.json
51
+ --source-format huggingface-safetensor
52
+ --output /models/mlc-delivery/hf/mlc-ai/Qwen1.5-0.5B-Chat-q0f16-MLC
53
+ Start storing to cache /models/mlc-delivery/hf/mlc-ai/Qwen1.5-0.5B-Chat-q0f16-MLC
54
+
55
  0%| | 0/171 [00:00<?, ?it/s]
56
 
57
+
58
  0%| | 0/171 [00:00<?, ?it/s]
59
 
60
+
61
  0%| | 0/171 [00:03<?, ?it/s]
62
  1%| | 1/171 [00:04<12:17, 4.34s/it]
63
 
64
+
65
  1%| | 1/171 [00:05<12:17, 4.34s/it]
66
  1%| | 2/171 [00:06<08:12, 2.92s/it]
67
 
68
+
69
  1%| | 2/171 [00:06<08:12, 2.92s/it]
70
 
71
+
72
  1%| | 2/171 [00:06<08:12, 2.92s/it]
73
 
74
+
75
  1%| | 2/171 [00:06<08:12, 2.92s/it]
76
 
77
+
78
  1%| | 2/171 [00:06<08:12, 2.92s/it]
79
 
80
+
81
  1%| | 2/171 [00:06<08:12, 2.92s/it]
82
 
83
+
84
  1%| | 2/171 [00:06<08:12, 2.92s/it]
85
  5%|▍ | 8/171 [00:06<01:21, 2.01it/s]
86
 
87
+
88
  5%|▍ | 8/171 [00:06<01:21, 2.01it/s]
89
 
90
+
91
  5%|▍ | 8/171 [00:06<01:21, 2.01it/s]
92
 
93
+
94
  5%|▍ | 8/171 [00:06<01:21, 2.01it/s]
95
 
96
+
97
  5%|▍ | 8/171 [00:06<01:21, 2.01it/s]
98
  7%|▋ | 12/171 [00:06<00:46, 3.45it/s]
99
 
100
+
101
  7%|▋ | 12/171 [00:06<00:46, 3.45it/s]
102
 
103
+
104
  7%|▋ | 12/171 [00:06<00:46, 3.45it/s]
105
 
106
+
107
  7%|▋ | 12/171 [00:06<00:46, 3.45it/s]
108
 
109
+
110
  7%|▋ | 12/171 [00:06<00:46, 3.45it/s]
111
 
112
+
113
  7%|▋ | 12/171 [00:06<00:46, 3.45it/s]
114
 
115
+
116
  7%|▋ | 12/171 [00:06<00:46, 3.45it/s]
117
 
118
+
119
  7%|▋ | 12/171 [00:06<00:46, 3.45it/s]
120
  11%|█ | 19/171 [00:06<00:22, 6.84it/s]
121
 
122
+
123
  11%|█ | 19/171 [00:06<00:22, 6.84it/s]
124
 
125
+
126
  11%|█ | 19/171 [00:06<00:22, 6.84it/s]
127
 
128
+
129
  11%|█ | 19/171 [00:06<00:22, 6.84it/s]
130
 
131
+
132
  11%|█ | 19/171 [00:06<00:22, 6.84it/s]
133
 
134
+
135
  11%|█ | 19/171 [00:06<00:22, 6.84it/s]
136
 
137
+
138
  11%|█ | 19/171 [00:06<00:22, 6.84it/s]
139
 
140
+
141
  11%|█ | 19/171 [00:06<00:22, 6.84it/s]
142
  15%|█▌ | 26/171 [00:06<00:13, 11.08it/s]
143
 
144
+
145
  15%|█▌ | 26/171 [00:06<00:13, 11.08it/s]
146
 
147
+
148
  15%|█▌ | 26/171 [00:06<00:13, 11.08it/s]
149
 
150
+
151
  15%|█▌ | 26/171 [00:06<00:13, 11.08it/s]
152
 
153
+
154
  15%|█▌ | 26/171 [00:06<00:13, 11.08it/s]
155
 
156
+
157
  15%|█▌ | 26/171 [00:06<00:13, 11.08it/s]
158
 
159
+
160
  15%|█▌ | 26/171 [00:06<00:13, 11.08it/s]
161
 
162
+
163
  15%|█▌ | 26/171 [00:06<00:13, 11.08it/s]
164
  19%|█▉ | 33/171 [00:06<00:08, 16.15it/s]
165
 
166
+
167
  19%|█▉ | 33/171 [00:06<00:08, 16.15it/s]
168
 
169
+
170
  19%|█▉ | 33/171 [00:06<00:08, 16.15it/s]
171
 
172
+
173
  19%|█▉ | 33/171 [00:06<00:08, 16.15it/s]
174
 
175
+
176
  19%|█▉ | 33/171 [00:06<00:08, 16.15it/s]
177
 
178
+
179
  19%|█▉ | 33/171 [00:06<00:08, 16.15it/s]
180
 
181
+
182
  19%|█▉ | 33/171 [00:06<00:08, 16.15it/s]
183
 
184
+
185
  19%|█▉ | 33/171 [00:06<00:08, 16.15it/s]
186
  23%|██▎ | 40/171 [00:06<00:05, 21.87it/s]
187
 
188
+
189
  23%|██▎ | 40/171 [00:06<00:05, 21.87it/s]
190
 
191
+
192
  23%|██▎ | 40/171 [00:06<00:05, 21.87it/s]
193
 
194
+
195
  23%|██▎ | 40/171 [00:06<00:05, 21.87it/s]
196
 
197
+
198
  23%|██▎ | 40/171 [00:06<00:05, 21.87it/s]
199
 
200
+
201
  23%|██▎ | 40/171 [00:06<00:05, 21.87it/s]
202
 
203
+
204
  23%|██▎ | 40/171 [00:06<00:05, 21.87it/s]
205
 
206
+
207
  23%|██▎ | 40/171 [00:06<00:05, 21.87it/s]
208
  27%|██▋ | 47/171 [00:07<00:04, 28.01it/s]
209
 
210
+
211
  27%|██▋ | 47/171 [00:07<00:04, 28.01it/s]
212
 
213
+
214
  27%|██▋ | 47/171 [00:07<00:04, 28.01it/s]
215
 
216
+
217
  27%|██▋ | 47/171 [00:07<00:04, 28.01it/s]
218
 
219
+
220
  27%|██▋ | 47/171 [00:07<00:04, 28.01it/s]
221
 
222
+
223
  27%|██▋ | 47/171 [00:07<00:04, 28.01it/s]
224
 
225
+
226
  27%|██▋ | 47/171 [00:07<00:04, 28.01it/s]
227
 
228
+
229
  27%|██▋ | 47/171 [00:07<00:04, 28.01it/s]
230
  32%|███▏ | 54/171 [00:07<00:03, 34.16it/s]
231
 
232
+
233
  32%|███▏ | 54/171 [00:07<00:03, 34.16it/s]
234
 
235
+
236
  32%|███▏ | 54/171 [00:07<00:03, 34.16it/s]
237
 
238
+
239
  32%|███▏ | 54/171 [00:07<00:03, 34.16it/s]
240
 
241
+
242
  32%|███▏ | 54/171 [00:07<00:03, 34.16it/s]
243
 
244
+
245
  32%|███▏ | 54/171 [00:07<00:03, 34.16it/s]
246
 
247
+
248
  32%|███▏ | 54/171 [00:07<00:03, 34.16it/s]
249
 
250
+
251
  32%|███▏ | 54/171 [00:07<00:03, 34.16it/s]
252
  36%|███▌ | 61/171 [00:07<00:02, 39.98it/s]
253
 
254
+
255
  36%|███▌ | 61/171 [00:07<00:02, 39.98it/s]
256
 
257
+
258
  36%|███▌ | 61/171 [00:07<00:02, 39.98it/s]
259
 
260
+
261
  36%|███▌ | 61/171 [00:07<00:02, 39.98it/s]
262
 
263
+
264
  36%|███▌ | 61/171 [00:07<00:02, 39.98it/s]
265
 
266
+
267
  36%|███▌ | 61/171 [00:07<00:02, 39.98it/s]
268
 
269
+
270
  36%|███▌ | 61/171 [00:07<00:02, 39.98it/s]
271
 
272
+
273
  36%|███▌ | 61/171 [00:07<00:02, 39.98it/s]
274
  40%|███▉ | 68/171 [00:07<00:02, 45.10it/s]
275
 
276
+
277
  40%|███▉ | 68/171 [00:07<00:02, 45.10it/s]
278
 
279
+
280
  40%|███▉ | 68/171 [00:07<00:02, 45.10it/s]
281
 
282
+
283
  40%|███▉ | 68/171 [00:07<00:02, 45.10it/s]
284
 
285
+
286
  40%|███▉ | 68/171 [00:07<00:02, 45.10it/s]
287
 
288
+
289
  40%|███▉ | 68/171 [00:07<00:02, 45.10it/s]
290
 
291
+
292
  40%|███▉ | 68/171 [00:07<00:02, 45.10it/s]
293
 
294
+
295
  40%|███▉ | 68/171 [00:07<00:02, 45.10it/s]
296
  44%|████▍ | 75/171 [00:07<00:01, 49.51it/s]
297
 
298
+
299
  44%|████▍ | 75/171 [00:07<00:01, 49.51it/s]
300
 
301
+
302
  44%|████▍ | 75/171 [00:07<00:01, 49.51it/s]
303
 
304
+
305
  44%|████▍ | 75/171 [00:07<00:01, 49.51it/s]
306
 
307
+
308
  44%|████▍ | 75/171 [00:07<00:01, 49.51it/s]
309
 
310
+
311
  44%|████▍ | 75/171 [00:07<00:01, 49.51it/s]
312
 
313
+
314
  44%|████▍ | 75/171 [00:07<00:01, 49.51it/s]
315
 
316
+
317
  44%|████▍ | 75/171 [00:07<00:01, 49.51it/s]
318
  48%|████▊ | 82/171 [00:07<00:01, 53.13it/s]
319
 
320
+
321
  48%|████▊ | 82/171 [00:07<00:01, 53.13it/s]
322
 
323
+
324
  48%|████▊ | 82/171 [00:07<00:01, 53.13it/s]
325
 
326
+
327
  48%|████▊ | 82/171 [00:07<00:01, 53.13it/s]
328
 
329
+
330
  48%|████▊ | 82/171 [00:07<00:01, 53.13it/s]
331
 
332
+
333
  48%|████▊ | 82/171 [00:07<00:01, 53.13it/s]
334
 
335
+
336
  48%|████▊ | 82/171 [00:07<00:01, 53.13it/s]
337
 
338
+
339
  48%|████▊ | 82/171 [00:07<00:01, 53.13it/s]
340
  52%|█████▏ | 89/171 [00:07<00:01, 55.91it/s]
341
 
342
+
343
  52%|█████▏ | 89/171 [00:07<00:01, 55.91it/s]
344
 
345
+
346
  52%|█████▏ | 89/171 [00:07<00:01, 55.91it/s]
347
 
348
+
349
  52%|█████▏ | 89/171 [00:07<00:01, 55.91it/s]
350
 
351
+
352
  52%|█████▏ | 89/171 [00:07<00:01, 55.91it/s]
353
 
354
+
355
  52%|█████▏ | 89/171 [00:07<00:01, 55.91it/s]
356
 
357
+
358
  52%|█████▏ | 89/171 [00:07<00:01, 55.91it/s]
359
 
360
+
361
  52%|█████▏ | 89/171 [00:07<00:01, 55.91it/s]
362
  56%|█████▌ | 96/171 [00:07<00:01, 58.10it/s]
363
 
364
+
365
  56%|█████▌ | 96/171 [00:07<00:01, 58.10it/s]
366
 
367
+
368
  56%|█████▌ | 96/171 [00:07<00:01, 58.10it/s]
369
 
370
+
371
  56%|█████▌ | 96/171 [00:07<00:01, 58.10it/s]
372
 
373
+
374
  56%|█████▌ | 96/171 [00:07<00:01, 58.10it/s]
375
 
376
+
377
  56%|█████▌ | 96/171 [00:07<00:01, 58.10it/s]
378
 
379
+
380
  56%|█████▌ | 96/171 [00:07<00:01, 58.10it/s]
381
 
382
+
383
  56%|█████▌ | 96/171 [00:07<00:01, 58.10it/s]
384
  60%|██████ | 103/171 [00:07<00:01, 59.65it/s]
385
 
386
+
387
  60%|██████ | 103/171 [00:07<00:01, 59.65it/s]
388
 
389
+
390
  60%|██████ | 103/171 [00:07<00:01, 59.65it/s]
391
 
392
+
393
  60%|██████ | 103/171 [00:07<00:01, 59.65it/s]
394
 
395
+
396
  60%|██████ | 103/171 [00:07<00:01, 59.65it/s]
397
 
398
+
399
  60%|██████ | 103/171 [00:07<00:01, 59.65it/s]
400
 
401
+
402
  60%|██████ | 103/171 [00:07<00:01, 59.65it/s]
403
 
404
+
405
  60%|██████ | 103/171 [00:07<00:01, 59.65it/s]
406
  64%|██████▍ | 110/171 [00:08<00:01, 60.77it/s]
407
 
408
+
409
  64%|██████▍ | 110/171 [00:08<00:01, 60.77it/s]
410
 
411
+
412
  64%|██████▍ | 110/171 [00:08<00:01, 60.77it/s]
413
 
414
+
415
  64%|██████▍ | 110/171 [00:08<00:01, 60.77it/s]
416
 
417
+
418
  64%|██████▍ | 110/171 [00:08<00:01, 60.77it/s]
419
 
420
+
421
  64%|██████▍ | 110/171 [00:08<00:01, 60.77it/s]
422
 
423
+
424
  64%|██████▍ | 110/171 [00:08<00:01, 60.77it/s]
425
 
426
+
427
  64%|██████▍ | 110/171 [00:08<00:01, 60.77it/s]
428
  68%|██████▊ | 117/171 [00:08<00:00, 61.63it/s]
429
 
430
+
431
  68%|██████▊ | 117/171 [00:08<00:00, 61.63it/s]
432
 
433
+
434
  68%|██████▊ | 117/171 [00:08<00:00, 61.63it/s]
435
 
436
+
437
  68%|██████▊ | 117/171 [00:08<00:00, 61.63it/s]
438
 
439
+
440
  68%|██████▊ | 117/171 [00:08<00:00, 61.63it/s]
441
 
442
+
443
  68%|██████▊ | 117/171 [00:08<00:00, 61.63it/s]
444
 
445
+
446
  68%|██████▊ | 117/171 [00:08<00:00, 61.63it/s]
447
 
448
+
449
  68%|██████▊ | 117/171 [00:08<00:00, 61.63it/s]
450
  73%|███████▎ | 124/171 [00:08<00:00, 62.24it/s]
451
 
452
+
453
  73%|███████▎ | 124/171 [00:08<00:00, 62.24it/s]
454
 
455
+
456
  73%|███████▎ | 124/171 [00:08<00:00, 62.24it/s]
457
 
458
+
459
  73%|███████▎ | 124/171 [00:08<00:00, 62.24it/s]
460
 
461
+
462
  73%|███████▎ | 124/171 [00:08<00:00, 62.24it/s]
463
 
464
+
465
  73%|███████▎ | 124/171 [00:08<00:00, 62.24it/s]
466
 
467
+
468
  73%|███████▎ | 124/171 [00:08<00:00, 62.24it/s]
469
 
470
+
471
  73%|███████▎ | 124/171 [00:08<00:00, 62.24it/s]
472
  77%|███████▋ | 131/171 [00:08<00:00, 62.63it/s]
473
 
474
+
475
  77%|███████▋ | 131/171 [00:08<00:00, 62.63it/s]
476
 
477
+
478
  77%|███████▋ | 131/171 [00:08<00:00, 62.63it/s]
479
 
480
+
481
  77%|███████▋ | 131/171 [00:08<00:00, 62.63it/s]
482
 
483
+
484
  77%|███████▋ | 131/171 [00:08<00:00, 62.63it/s]
485
 
486
+
487
  77%|███████▋ | 131/171 [00:08<00:00, 62.63it/s]
488
 
489
+
490
  77%|███████▋ | 131/171 [00:08<00:00, 62.63it/s]
491
 
492
+
493
  77%|███████▋ | 131/171 [00:08<00:00, 62.63it/s]
494
  81%|████████ | 138/171 [00:08<00:00, 62.88it/s]
495
 
496
+
497
  81%|████████ | 138/171 [00:08<00:00, 62.88it/s]
498
 
499
+
500
  81%|████████ | 138/171 [00:08<00:00, 62.88it/s]
501
 
502
+
503
  81%|████████ | 138/171 [00:08<00:00, 62.88it/s]
504
 
505
+
506
  81%|████████ | 138/171 [00:08<00:00, 62.88it/s]
507
 
508
+
509
  81%|████████ | 138/171 [00:08<00:00, 62.88it/s]
510
 
511
+
512
  81%|████████ | 138/171 [00:08<00:00, 62.88it/s]
513
 
514
+
515
  81%|████████ | 138/171 [00:08<00:00, 62.88it/s]
516
  85%|████████▍ | 145/171 [00:08<00:00, 62.95it/s]
517
 
518
+
519
  85%|████████▍ | 145/171 [00:08<00:00, 62.95it/s]
520
 
521
+
522
  85%|████████▍ | 145/171 [00:08<00:00, 62.95it/s]
523
 
524
+
525
  85%|████████▍ | 145/171 [00:08<00:00, 62.95it/s]
526
 
527
+
528
  85%|████████▍ | 145/171 [00:08<00:00, 62.95it/s]
529
 
530
+
531
  85%|████████▍ | 145/171 [00:08<00:00, 62.95it/s]
532
 
533
+
534
  85%|████████▍ | 145/171 [00:08<00:00, 62.95it/s]
535
 
536
+
537
  85%|████████▍ | 145/171 [00:08<00:00, 62.95it/s]
538
  89%|████████▉ | 152/171 [00:08<00:00, 63.19it/s]
539
 
540
+
541
  89%|████████▉ | 152/171 [00:08<00:00, 63.19it/s]
542
 
543
+
544
  89%|████████▉ | 152/171 [00:08<00:00, 63.19it/s]
545
 
546
+
547
  89%|████████▉ | 152/171 [00:08<00:00, 63.19it/s]
548
 
549
+
550
  89%|████████▉ | 152/171 [00:08<00:00, 63.19it/s]
551
 
552
+
553
  89%|████████▉ | 152/171 [00:08<00:00, 63.19it/s]
554
 
555
+
556
  89%|████████▉ | 152/171 [00:08<00:00, 63.19it/s]
557
 
558
+
559
  89%|████████▉ | 152/171 [00:08<00:00, 63.19it/s]
560
  93%|█████████▎| 159/171 [00:08<00:00, 62.45it/s]
561
 
562
+
563
  93%|█████████▎| 159/171 [00:08<00:00, 62.45it/s]
564
 
565
+
566
  93%|█████████▎| 159/171 [00:08<00:00, 62.45it/s]
567
 
568
+
569
  93%|█████████▎| 159/171 [00:08<00:00, 62.45it/s]
570
 
571
+
572
  93%|█████████▎| 159/171 [00:08<00:00, 62.45it/s]
573
 
574
+
575
  93%|█████████▎| 159/171 [00:08<00:00, 62.45it/s]
576
 
577
+
578
  93%|█████████▎| 159/171 [00:08<00:00, 62.45it/s]
579
 
580
+
581
  93%|█████████▎| 159/171 [00:08<00:00, 62.45it/s]
582
  97%|█████████▋| 166/171 [00:08<00:00, 62.76it/s]
583
 
584
+
585
  97%|█████████▋| 166/171 [00:08<00:00, 62.76it/s]
586
 
587
+
588
  97%|█████████▋| 166/171 [00:08<00:00, 62.76it/s]
589
 
590
+
591
  97%|█████████▋| 166/171 [00:08<00:00, 62.76it/s]
592
 
593
+
594
  97%|█████████▋| 166/171 [00:08<00:00, 62.76it/s]
595
 
596
+
597
  97%|█████████▋| 166/171 [00:08<00:00, 62.76it/s]
598
+ [2024-06-04 03:32:17] INFO huggingface_loader.py:197: Unloading HF weight file: /models/Qwen1.5-0.5B-Chat/model.safetensors
599
+ [2024-06-04 03:32:17] INFO stats.py:77: Time usage: HF loading: 2.308 sec; Pre-quantization mapping: 2.992 sec; Quantization: 0.000 sec
600
+ [2024-06-04 03:32:17] INFO stats.py:91: RAM usage: Peak RAM: 2.308 GB. Total bytes loaded from disk: 2.308 GB
601
+ [2024-06-04 03:32:17] INFO convert_weight.py:155: Parameter size after quantization: 1.154 GB
602
+ [2024-06-04 03:32:17] INFO convert_weight.py:160: Total parameters: 619,570,176
603
+ [2024-06-04 03:32:17] INFO convert_weight.py:161: Bits per parameter: 16.000
604
+ [2024-06-04 03:32:17] INFO convert_weight.py:166: Saved to directory: /models/mlc-delivery/hf/mlc-ai/Qwen1.5-0.5B-Chat-q0f16-MLC
605
+
606
+ All finished, 26 total shards committed, record saved to /models/mlc-delivery/hf/mlc-ai/Qwen1.5-0.5B-Chat-q0f16-MLC/ndarray-cache.json
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
mlc-chat-config.json ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "0.1.0",
3
+ "model_type": "qwen2",
4
+ "quantization": "q0f16",
5
+ "model_config": {
6
+ "hidden_act": "silu",
7
+ "hidden_size": 1024,
8
+ "intermediate_size": 2816,
9
+ "num_attention_heads": 16,
10
+ "num_hidden_layers": 24,
11
+ "num_key_value_heads": 16,
12
+ "rms_norm_eps": 1e-06,
13
+ "rope_theta": 1000000.0,
14
+ "vocab_size": 151936,
15
+ "context_window_size": 32768,
16
+ "prefill_chunk_size": 2048,
17
+ "tensor_parallel_shards": 1,
18
+ "head_dim": 64,
19
+ "dtype": "float32",
20
+ "max_batch_size": 80
21
+ },
22
+ "vocab_size": 151936,
23
+ "context_window_size": 32768,
24
+ "sliding_window_size": -1,
25
+ "prefill_chunk_size": 2048,
26
+ "attention_sink_size": -1,
27
+ "tensor_parallel_shards": 1,
28
+ "temperature": 1.0,
29
+ "presence_penalty": 0.0,
30
+ "frequency_penalty": 0.0,
31
+ "repetition_penalty": 1.1,
32
+ "top_p": 0.8,
33
+ "tokenizer_files": [
34
+ "tokenizer.json",
35
+ "vocab.json",
36
+ "merges.txt",
37
+ "tokenizer_config.json"
38
+ ],
39
+ "tokenizer_info": {
40
+ "token_postproc_method": "byte_level",
41
+ "prepend_space_in_encode": false,
42
+ "strip_space_in_decode": false
43
+ },
44
+ "conv_template": {
45
+ "name": "chatml",
46
+ "system_template": "<|im_start|>system\n{system_message}",
47
+ "system_message": "A conversation between a user and an LLM-based AI assistant. The assistant gives helpful and honest answers.",
48
+ "system_prefix_token_ids": null,
49
+ "add_role_after_system_message": true,
50
+ "roles": {
51
+ "user": "<|im_start|>user",
52
+ "assistant": "<|im_start|>assistant"
53
+ },
54
+ "role_templates": {
55
+ "user": "{user_message}",
56
+ "assistant": "{assistant_message}",
57
+ "tool": "{tool_message}"
58
+ },
59
+ "messages": [],
60
+ "seps": [
61
+ "<|im_end|>\n"
62
+ ],
63
+ "role_content_sep": "\n",
64
+ "role_empty_sep": "\n",
65
+ "stop_str": [
66
+ "<|im_end|>"
67
+ ],
68
+ "stop_token_ids": [
69
+ 2
70
+ ],
71
+ "function_string": "",
72
+ "use_function_calling": false
73
+ },
74
+ "pad_token_id": 151643,
75
+ "bos_token_id": 151643,
76
+ "eos_token_id": [
77
+ 151645,
78
+ 151643
79
+ ],
80
+ "mean_gen_len": 128,
81
+ "max_gen_len": 512,
82
+ "shift_fill_factor": 0.3
83
+ }
ndarray-cache.json ADDED
@@ -0,0 +1,2025 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "ParamSize": 171,
4
+ "ParamBytes": 1239140352.0,
5
+ "BitsPerParam": 16.0
6
+ },
7
+ "records": [
8
+ {
9
+ "dataPath": "params_shard_0.bin",
10
+ "format": "raw-shard",
11
+ "nbytes": 311164928,
12
+ "records": [
13
+ {
14
+ "name": "lm_head.weight",
15
+ "shape": [
16
+ 151936,
17
+ 1024
18
+ ],
19
+ "dtype": "float16",
20
+ "format": "f32-to-bf16",
21
+ "nbytes": 311164928,
22
+ "byteOffset": 0
23
+ }
24
+ ],
25
+ "md5sum": "9ad841023c938873aacc8dfa7bf3566a"
26
+ },
27
+ {
28
+ "dataPath": "params_shard_1.bin",
29
+ "format": "raw-shard",
30
+ "nbytes": 311164928,
31
+ "records": [
32
+ {
33
+ "name": "model.embed_tokens.weight",
34
+ "shape": [
35
+ 151936,
36
+ 1024
37
+ ],
38
+ "dtype": "float16",
39
+ "format": "f32-to-bf16",
40
+ "nbytes": 311164928,
41
+ "byteOffset": 0
42
+ }
43
+ ],
44
+ "md5sum": "9ad841023c938873aacc8dfa7bf3566a"
45
+ },
46
+ {
47
+ "dataPath": "params_shard_2.bin",
48
+ "format": "raw-shard",
49
+ "nbytes": 31469568,
50
+ "records": [
51
+ {
52
+ "name": "model.layers.0.input_layernorm.weight",
53
+ "shape": [
54
+ 1024
55
+ ],
56
+ "dtype": "float16",
57
+ "format": "f32-to-bf16",
58
+ "nbytes": 2048,
59
+ "byteOffset": 0
60
+ },
61
+ {
62
+ "name": "model.layers.0.mlp.down_proj.weight",
63
+ "shape": [
64
+ 1024,
65
+ 2816
66
+ ],
67
+ "dtype": "float16",
68
+ "format": "f32-to-bf16",
69
+ "nbytes": 5767168,
70
+ "byteOffset": 2048
71
+ },
72
+ {
73
+ "name": "model.layers.0.mlp.gate_up_proj.weight",
74
+ "shape": [
75
+ 5632,
76
+ 1024
77
+ ],
78
+ "dtype": "float16",
79
+ "format": "f32-to-bf16",
80
+ "nbytes": 11534336,
81
+ "byteOffset": 5769216
82
+ },
83
+ {
84
+ "name": "model.layers.0.post_attention_layernorm.weight",
85
+ "shape": [
86
+ 1024
87
+ ],
88
+ "dtype": "float16",
89
+ "format": "f32-to-bf16",
90
+ "nbytes": 2048,
91
+ "byteOffset": 17303552
92
+ },
93
+ {
94
+ "name": "model.layers.0.self_attn.c_attn.bias",
95
+ "shape": [
96
+ 3072
97
+ ],
98
+ "dtype": "float16",
99
+ "format": "f32-to-bf16",
100
+ "nbytes": 6144,
101
+ "byteOffset": 17305600
102
+ },
103
+ {
104
+ "name": "model.layers.0.self_attn.c_attn.weight",
105
+ "shape": [
106
+ 3072,
107
+ 1024
108
+ ],
109
+ "dtype": "float16",
110
+ "format": "f32-to-bf16",
111
+ "nbytes": 6291456,
112
+ "byteOffset": 17311744
113
+ },
114
+ {
115
+ "name": "model.layers.0.self_attn.o_proj.weight",
116
+ "shape": [
117
+ 1024,
118
+ 1024
119
+ ],
120
+ "dtype": "float16",
121
+ "format": "f32-to-bf16",
122
+ "nbytes": 2097152,
123
+ "byteOffset": 23603200
124
+ },
125
+ {
126
+ "name": "model.layers.1.input_layernorm.weight",
127
+ "shape": [
128
+ 1024
129
+ ],
130
+ "dtype": "float16",
131
+ "format": "f32-to-bf16",
132
+ "nbytes": 2048,
133
+ "byteOffset": 25700352
134
+ },
135
+ {
136
+ "name": "model.layers.1.mlp.down_proj.weight",
137
+ "shape": [
138
+ 1024,
139
+ 2816
140
+ ],
141
+ "dtype": "float16",
142
+ "format": "f32-to-bf16",
143
+ "nbytes": 5767168,
144
+ "byteOffset": 25702400
145
+ }
146
+ ],
147
+ "md5sum": "c83307e4bf50d2e47a2a452caf6502b4"
148
+ },
149
+ {
150
+ "dataPath": "params_shard_3.bin",
151
+ "format": "raw-shard",
152
+ "nbytes": 25700352,
153
+ "records": [
154
+ {
155
+ "name": "model.layers.1.mlp.gate_up_proj.weight",
156
+ "shape": [
157
+ 5632,
158
+ 1024
159
+ ],
160
+ "dtype": "float16",
161
+ "format": "f32-to-bf16",
162
+ "nbytes": 11534336,
163
+ "byteOffset": 0
164
+ },
165
+ {
166
+ "name": "model.layers.1.post_attention_layernorm.weight",
167
+ "shape": [
168
+ 1024
169
+ ],
170
+ "dtype": "float16",
171
+ "format": "f32-to-bf16",
172
+ "nbytes": 2048,
173
+ "byteOffset": 11534336
174
+ },
175
+ {
176
+ "name": "model.layers.1.self_attn.c_attn.bias",
177
+ "shape": [
178
+ 3072
179
+ ],
180
+ "dtype": "float16",
181
+ "format": "f32-to-bf16",
182
+ "nbytes": 6144,
183
+ "byteOffset": 11536384
184
+ },
185
+ {
186
+ "name": "model.layers.1.self_attn.c_attn.weight",
187
+ "shape": [
188
+ 3072,
189
+ 1024
190
+ ],
191
+ "dtype": "float16",
192
+ "format": "f32-to-bf16",
193
+ "nbytes": 6291456,
194
+ "byteOffset": 11542528
195
+ },
196
+ {
197
+ "name": "model.layers.1.self_attn.o_proj.weight",
198
+ "shape": [
199
+ 1024,
200
+ 1024
201
+ ],
202
+ "dtype": "float16",
203
+ "format": "f32-to-bf16",
204
+ "nbytes": 2097152,
205
+ "byteOffset": 17833984
206
+ },
207
+ {
208
+ "name": "model.layers.10.input_layernorm.weight",
209
+ "shape": [
210
+ 1024
211
+ ],
212
+ "dtype": "float16",
213
+ "format": "f32-to-bf16",
214
+ "nbytes": 2048,
215
+ "byteOffset": 19931136
216
+ },
217
+ {
218
+ "name": "model.layers.10.mlp.down_proj.weight",
219
+ "shape": [
220
+ 1024,
221
+ 2816
222
+ ],
223
+ "dtype": "float16",
224
+ "format": "f32-to-bf16",
225
+ "nbytes": 5767168,
226
+ "byteOffset": 19933184
227
+ }
228
+ ],
229
+ "md5sum": "40ec9a6cf11496051e8eb99ad47b508a"
230
+ },
231
+ {
232
+ "dataPath": "params_shard_4.bin",
233
+ "format": "raw-shard",
234
+ "nbytes": 25700352,
235
+ "records": [
236
+ {
237
+ "name": "model.layers.10.mlp.gate_up_proj.weight",
238
+ "shape": [
239
+ 5632,
240
+ 1024
241
+ ],
242
+ "dtype": "float16",
243
+ "format": "f32-to-bf16",
244
+ "nbytes": 11534336,
245
+ "byteOffset": 0
246
+ },
247
+ {
248
+ "name": "model.layers.10.post_attention_layernorm.weight",
249
+ "shape": [
250
+ 1024
251
+ ],
252
+ "dtype": "float16",
253
+ "format": "f32-to-bf16",
254
+ "nbytes": 2048,
255
+ "byteOffset": 11534336
256
+ },
257
+ {
258
+ "name": "model.layers.10.self_attn.c_attn.bias",
259
+ "shape": [
260
+ 3072
261
+ ],
262
+ "dtype": "float16",
263
+ "format": "f32-to-bf16",
264
+ "nbytes": 6144,
265
+ "byteOffset": 11536384
266
+ },
267
+ {
268
+ "name": "model.layers.10.self_attn.c_attn.weight",
269
+ "shape": [
270
+ 3072,
271
+ 1024
272
+ ],
273
+ "dtype": "float16",
274
+ "format": "f32-to-bf16",
275
+ "nbytes": 6291456,
276
+ "byteOffset": 11542528
277
+ },
278
+ {
279
+ "name": "model.layers.10.self_attn.o_proj.weight",
280
+ "shape": [
281
+ 1024,
282
+ 1024
283
+ ],
284
+ "dtype": "float16",
285
+ "format": "f32-to-bf16",
286
+ "nbytes": 2097152,
287
+ "byteOffset": 17833984
288
+ },
289
+ {
290
+ "name": "model.layers.11.input_layernorm.weight",
291
+ "shape": [
292
+ 1024
293
+ ],
294
+ "dtype": "float16",
295
+ "format": "f32-to-bf16",
296
+ "nbytes": 2048,
297
+ "byteOffset": 19931136
298
+ },
299
+ {
300
+ "name": "model.layers.11.mlp.down_proj.weight",
301
+ "shape": [
302
+ 1024,
303
+ 2816
304
+ ],
305
+ "dtype": "float16",
306
+ "format": "f32-to-bf16",
307
+ "nbytes": 5767168,
308
+ "byteOffset": 19933184
309
+ }
310
+ ],
311
+ "md5sum": "8c3f85d877c238dd743f6f1b2b03ba8a"
312
+ },
313
+ {
314
+ "dataPath": "params_shard_5.bin",
315
+ "format": "raw-shard",
316
+ "nbytes": 25700352,
317
+ "records": [
318
+ {
319
+ "name": "model.layers.11.mlp.gate_up_proj.weight",
320
+ "shape": [
321
+ 5632,
322
+ 1024
323
+ ],
324
+ "dtype": "float16",
325
+ "format": "f32-to-bf16",
326
+ "nbytes": 11534336,
327
+ "byteOffset": 0
328
+ },
329
+ {
330
+ "name": "model.layers.11.post_attention_layernorm.weight",
331
+ "shape": [
332
+ 1024
333
+ ],
334
+ "dtype": "float16",
335
+ "format": "f32-to-bf16",
336
+ "nbytes": 2048,
337
+ "byteOffset": 11534336
338
+ },
339
+ {
340
+ "name": "model.layers.11.self_attn.c_attn.bias",
341
+ "shape": [
342
+ 3072
343
+ ],
344
+ "dtype": "float16",
345
+ "format": "f32-to-bf16",
346
+ "nbytes": 6144,
347
+ "byteOffset": 11536384
348
+ },
349
+ {
350
+ "name": "model.layers.11.self_attn.c_attn.weight",
351
+ "shape": [
352
+ 3072,
353
+ 1024
354
+ ],
355
+ "dtype": "float16",
356
+ "format": "f32-to-bf16",
357
+ "nbytes": 6291456,
358
+ "byteOffset": 11542528
359
+ },
360
+ {
361
+ "name": "model.layers.11.self_attn.o_proj.weight",
362
+ "shape": [
363
+ 1024,
364
+ 1024
365
+ ],
366
+ "dtype": "float16",
367
+ "format": "f32-to-bf16",
368
+ "nbytes": 2097152,
369
+ "byteOffset": 17833984
370
+ },
371
+ {
372
+ "name": "model.layers.12.input_layernorm.weight",
373
+ "shape": [
374
+ 1024
375
+ ],
376
+ "dtype": "float16",
377
+ "format": "f32-to-bf16",
378
+ "nbytes": 2048,
379
+ "byteOffset": 19931136
380
+ },
381
+ {
382
+ "name": "model.layers.12.mlp.down_proj.weight",
383
+ "shape": [
384
+ 1024,
385
+ 2816
386
+ ],
387
+ "dtype": "float16",
388
+ "format": "f32-to-bf16",
389
+ "nbytes": 5767168,
390
+ "byteOffset": 19933184
391
+ }
392
+ ],
393
+ "md5sum": "04478e6bf88bd0157de04733e0501599"
394
+ },
395
+ {
396
+ "dataPath": "params_shard_6.bin",
397
+ "format": "raw-shard",
398
+ "nbytes": 25700352,
399
+ "records": [
400
+ {
401
+ "name": "model.layers.12.mlp.gate_up_proj.weight",
402
+ "shape": [
403
+ 5632,
404
+ 1024
405
+ ],
406
+ "dtype": "float16",
407
+ "format": "f32-to-bf16",
408
+ "nbytes": 11534336,
409
+ "byteOffset": 0
410
+ },
411
+ {
412
+ "name": "model.layers.12.post_attention_layernorm.weight",
413
+ "shape": [
414
+ 1024
415
+ ],
416
+ "dtype": "float16",
417
+ "format": "f32-to-bf16",
418
+ "nbytes": 2048,
419
+ "byteOffset": 11534336
420
+ },
421
+ {
422
+ "name": "model.layers.12.self_attn.c_attn.bias",
423
+ "shape": [
424
+ 3072
425
+ ],
426
+ "dtype": "float16",
427
+ "format": "f32-to-bf16",
428
+ "nbytes": 6144,
429
+ "byteOffset": 11536384
430
+ },
431
+ {
432
+ "name": "model.layers.12.self_attn.c_attn.weight",
433
+ "shape": [
434
+ 3072,
435
+ 1024
436
+ ],
437
+ "dtype": "float16",
438
+ "format": "f32-to-bf16",
439
+ "nbytes": 6291456,
440
+ "byteOffset": 11542528
441
+ },
442
+ {
443
+ "name": "model.layers.12.self_attn.o_proj.weight",
444
+ "shape": [
445
+ 1024,
446
+ 1024
447
+ ],
448
+ "dtype": "float16",
449
+ "format": "f32-to-bf16",
450
+ "nbytes": 2097152,
451
+ "byteOffset": 17833984
452
+ },
453
+ {
454
+ "name": "model.layers.13.input_layernorm.weight",
455
+ "shape": [
456
+ 1024
457
+ ],
458
+ "dtype": "float16",
459
+ "format": "f32-to-bf16",
460
+ "nbytes": 2048,
461
+ "byteOffset": 19931136
462
+ },
463
+ {
464
+ "name": "model.layers.13.mlp.down_proj.weight",
465
+ "shape": [
466
+ 1024,
467
+ 2816
468
+ ],
469
+ "dtype": "float16",
470
+ "format": "f32-to-bf16",
471
+ "nbytes": 5767168,
472
+ "byteOffset": 19933184
473
+ }
474
+ ],
475
+ "md5sum": "c0c36c829d9a50aaa962d9a4542bf4dc"
476
+ },
477
+ {
478
+ "dataPath": "params_shard_7.bin",
479
+ "format": "raw-shard",
480
+ "nbytes": 25700352,
481
+ "records": [
482
+ {
483
+ "name": "model.layers.13.mlp.gate_up_proj.weight",
484
+ "shape": [
485
+ 5632,
486
+ 1024
487
+ ],
488
+ "dtype": "float16",
489
+ "format": "f32-to-bf16",
490
+ "nbytes": 11534336,
491
+ "byteOffset": 0
492
+ },
493
+ {
494
+ "name": "model.layers.13.post_attention_layernorm.weight",
495
+ "shape": [
496
+ 1024
497
+ ],
498
+ "dtype": "float16",
499
+ "format": "f32-to-bf16",
500
+ "nbytes": 2048,
501
+ "byteOffset": 11534336
502
+ },
503
+ {
504
+ "name": "model.layers.13.self_attn.c_attn.bias",
505
+ "shape": [
506
+ 3072
507
+ ],
508
+ "dtype": "float16",
509
+ "format": "f32-to-bf16",
510
+ "nbytes": 6144,
511
+ "byteOffset": 11536384
512
+ },
513
+ {
514
+ "name": "model.layers.13.self_attn.c_attn.weight",
515
+ "shape": [
516
+ 3072,
517
+ 1024
518
+ ],
519
+ "dtype": "float16",
520
+ "format": "f32-to-bf16",
521
+ "nbytes": 6291456,
522
+ "byteOffset": 11542528
523
+ },
524
+ {
525
+ "name": "model.layers.13.self_attn.o_proj.weight",
526
+ "shape": [
527
+ 1024,
528
+ 1024
529
+ ],
530
+ "dtype": "float16",
531
+ "format": "f32-to-bf16",
532
+ "nbytes": 2097152,
533
+ "byteOffset": 17833984
534
+ },
535
+ {
536
+ "name": "model.layers.14.input_layernorm.weight",
537
+ "shape": [
538
+ 1024
539
+ ],
540
+ "dtype": "float16",
541
+ "format": "f32-to-bf16",
542
+ "nbytes": 2048,
543
+ "byteOffset": 19931136
544
+ },
545
+ {
546
+ "name": "model.layers.14.mlp.down_proj.weight",
547
+ "shape": [
548
+ 1024,
549
+ 2816
550
+ ],
551
+ "dtype": "float16",
552
+ "format": "f32-to-bf16",
553
+ "nbytes": 5767168,
554
+ "byteOffset": 19933184
555
+ }
556
+ ],
557
+ "md5sum": "c4ef41fe310776e417ebf57681f5aa85"
558
+ },
559
+ {
560
+ "dataPath": "params_shard_8.bin",
561
+ "format": "raw-shard",
562
+ "nbytes": 25700352,
563
+ "records": [
564
+ {
565
+ "name": "model.layers.14.mlp.gate_up_proj.weight",
566
+ "shape": [
567
+ 5632,
568
+ 1024
569
+ ],
570
+ "dtype": "float16",
571
+ "format": "f32-to-bf16",
572
+ "nbytes": 11534336,
573
+ "byteOffset": 0
574
+ },
575
+ {
576
+ "name": "model.layers.14.post_attention_layernorm.weight",
577
+ "shape": [
578
+ 1024
579
+ ],
580
+ "dtype": "float16",
581
+ "format": "f32-to-bf16",
582
+ "nbytes": 2048,
583
+ "byteOffset": 11534336
584
+ },
585
+ {
586
+ "name": "model.layers.14.self_attn.c_attn.bias",
587
+ "shape": [
588
+ 3072
589
+ ],
590
+ "dtype": "float16",
591
+ "format": "f32-to-bf16",
592
+ "nbytes": 6144,
593
+ "byteOffset": 11536384
594
+ },
595
+ {
596
+ "name": "model.layers.14.self_attn.c_attn.weight",
597
+ "shape": [
598
+ 3072,
599
+ 1024
600
+ ],
601
+ "dtype": "float16",
602
+ "format": "f32-to-bf16",
603
+ "nbytes": 6291456,
604
+ "byteOffset": 11542528
605
+ },
606
+ {
607
+ "name": "model.layers.14.self_attn.o_proj.weight",
608
+ "shape": [
609
+ 1024,
610
+ 1024
611
+ ],
612
+ "dtype": "float16",
613
+ "format": "f32-to-bf16",
614
+ "nbytes": 2097152,
615
+ "byteOffset": 17833984
616
+ },
617
+ {
618
+ "name": "model.layers.15.input_layernorm.weight",
619
+ "shape": [
620
+ 1024
621
+ ],
622
+ "dtype": "float16",
623
+ "format": "f32-to-bf16",
624
+ "nbytes": 2048,
625
+ "byteOffset": 19931136
626
+ },
627
+ {
628
+ "name": "model.layers.15.mlp.down_proj.weight",
629
+ "shape": [
630
+ 1024,
631
+ 2816
632
+ ],
633
+ "dtype": "float16",
634
+ "format": "f32-to-bf16",
635
+ "nbytes": 5767168,
636
+ "byteOffset": 19933184
637
+ }
638
+ ],
639
+ "md5sum": "dff83490a4e7a16cd2b905b92b2a1eda"
640
+ },
641
+ {
642
+ "dataPath": "params_shard_9.bin",
643
+ "format": "raw-shard",
644
+ "nbytes": 25700352,
645
+ "records": [
646
+ {
647
+ "name": "model.layers.15.mlp.gate_up_proj.weight",
648
+ "shape": [
649
+ 5632,
650
+ 1024
651
+ ],
652
+ "dtype": "float16",
653
+ "format": "f32-to-bf16",
654
+ "nbytes": 11534336,
655
+ "byteOffset": 0
656
+ },
657
+ {
658
+ "name": "model.layers.15.post_attention_layernorm.weight",
659
+ "shape": [
660
+ 1024
661
+ ],
662
+ "dtype": "float16",
663
+ "format": "f32-to-bf16",
664
+ "nbytes": 2048,
665
+ "byteOffset": 11534336
666
+ },
667
+ {
668
+ "name": "model.layers.15.self_attn.c_attn.bias",
669
+ "shape": [
670
+ 3072
671
+ ],
672
+ "dtype": "float16",
673
+ "format": "f32-to-bf16",
674
+ "nbytes": 6144,
675
+ "byteOffset": 11536384
676
+ },
677
+ {
678
+ "name": "model.layers.15.self_attn.c_attn.weight",
679
+ "shape": [
680
+ 3072,
681
+ 1024
682
+ ],
683
+ "dtype": "float16",
684
+ "format": "f32-to-bf16",
685
+ "nbytes": 6291456,
686
+ "byteOffset": 11542528
687
+ },
688
+ {
689
+ "name": "model.layers.15.self_attn.o_proj.weight",
690
+ "shape": [
691
+ 1024,
692
+ 1024
693
+ ],
694
+ "dtype": "float16",
695
+ "format": "f32-to-bf16",
696
+ "nbytes": 2097152,
697
+ "byteOffset": 17833984
698
+ },
699
+ {
700
+ "name": "model.layers.16.input_layernorm.weight",
701
+ "shape": [
702
+ 1024
703
+ ],
704
+ "dtype": "float16",
705
+ "format": "f32-to-bf16",
706
+ "nbytes": 2048,
707
+ "byteOffset": 19931136
708
+ },
709
+ {
710
+ "name": "model.layers.16.mlp.down_proj.weight",
711
+ "shape": [
712
+ 1024,
713
+ 2816
714
+ ],
715
+ "dtype": "float16",
716
+ "format": "f32-to-bf16",
717
+ "nbytes": 5767168,
718
+ "byteOffset": 19933184
719
+ }
720
+ ],
721
+ "md5sum": "68a5d47ad021ef139e7b57db6bce2204"
722
+ },
723
+ {
724
+ "dataPath": "params_shard_10.bin",
725
+ "format": "raw-shard",
726
+ "nbytes": 25700352,
727
+ "records": [
728
+ {
729
+ "name": "model.layers.16.mlp.gate_up_proj.weight",
730
+ "shape": [
731
+ 5632,
732
+ 1024
733
+ ],
734
+ "dtype": "float16",
735
+ "format": "f32-to-bf16",
736
+ "nbytes": 11534336,
737
+ "byteOffset": 0
738
+ },
739
+ {
740
+ "name": "model.layers.16.post_attention_layernorm.weight",
741
+ "shape": [
742
+ 1024
743
+ ],
744
+ "dtype": "float16",
745
+ "format": "f32-to-bf16",
746
+ "nbytes": 2048,
747
+ "byteOffset": 11534336
748
+ },
749
+ {
750
+ "name": "model.layers.16.self_attn.c_attn.bias",
751
+ "shape": [
752
+ 3072
753
+ ],
754
+ "dtype": "float16",
755
+ "format": "f32-to-bf16",
756
+ "nbytes": 6144,
757
+ "byteOffset": 11536384
758
+ },
759
+ {
760
+ "name": "model.layers.16.self_attn.c_attn.weight",
761
+ "shape": [
762
+ 3072,
763
+ 1024
764
+ ],
765
+ "dtype": "float16",
766
+ "format": "f32-to-bf16",
767
+ "nbytes": 6291456,
768
+ "byteOffset": 11542528
769
+ },
770
+ {
771
+ "name": "model.layers.16.self_attn.o_proj.weight",
772
+ "shape": [
773
+ 1024,
774
+ 1024
775
+ ],
776
+ "dtype": "float16",
777
+ "format": "f32-to-bf16",
778
+ "nbytes": 2097152,
779
+ "byteOffset": 17833984
780
+ },
781
+ {
782
+ "name": "model.layers.17.input_layernorm.weight",
783
+ "shape": [
784
+ 1024
785
+ ],
786
+ "dtype": "float16",
787
+ "format": "f32-to-bf16",
788
+ "nbytes": 2048,
789
+ "byteOffset": 19931136
790
+ },
791
+ {
792
+ "name": "model.layers.17.mlp.down_proj.weight",
793
+ "shape": [
794
+ 1024,
795
+ 2816
796
+ ],
797
+ "dtype": "float16",
798
+ "format": "f32-to-bf16",
799
+ "nbytes": 5767168,
800
+ "byteOffset": 19933184
801
+ }
802
+ ],
803
+ "md5sum": "afee818b99782f1be66296f5679a5a96"
804
+ },
805
+ {
806
+ "dataPath": "params_shard_11.bin",
807
+ "format": "raw-shard",
808
+ "nbytes": 25700352,
809
+ "records": [
810
+ {
811
+ "name": "model.layers.17.mlp.gate_up_proj.weight",
812
+ "shape": [
813
+ 5632,
814
+ 1024
815
+ ],
816
+ "dtype": "float16",
817
+ "format": "f32-to-bf16",
818
+ "nbytes": 11534336,
819
+ "byteOffset": 0
820
+ },
821
+ {
822
+ "name": "model.layers.17.post_attention_layernorm.weight",
823
+ "shape": [
824
+ 1024
825
+ ],
826
+ "dtype": "float16",
827
+ "format": "f32-to-bf16",
828
+ "nbytes": 2048,
829
+ "byteOffset": 11534336
830
+ },
831
+ {
832
+ "name": "model.layers.17.self_attn.c_attn.bias",
833
+ "shape": [
834
+ 3072
835
+ ],
836
+ "dtype": "float16",
837
+ "format": "f32-to-bf16",
838
+ "nbytes": 6144,
839
+ "byteOffset": 11536384
840
+ },
841
+ {
842
+ "name": "model.layers.17.self_attn.c_attn.weight",
843
+ "shape": [
844
+ 3072,
845
+ 1024
846
+ ],
847
+ "dtype": "float16",
848
+ "format": "f32-to-bf16",
849
+ "nbytes": 6291456,
850
+ "byteOffset": 11542528
851
+ },
852
+ {
853
+ "name": "model.layers.17.self_attn.o_proj.weight",
854
+ "shape": [
855
+ 1024,
856
+ 1024
857
+ ],
858
+ "dtype": "float16",
859
+ "format": "f32-to-bf16",
860
+ "nbytes": 2097152,
861
+ "byteOffset": 17833984
862
+ },
863
+ {
864
+ "name": "model.layers.18.input_layernorm.weight",
865
+ "shape": [
866
+ 1024
867
+ ],
868
+ "dtype": "float16",
869
+ "format": "f32-to-bf16",
870
+ "nbytes": 2048,
871
+ "byteOffset": 19931136
872
+ },
873
+ {
874
+ "name": "model.layers.18.mlp.down_proj.weight",
875
+ "shape": [
876
+ 1024,
877
+ 2816
878
+ ],
879
+ "dtype": "float16",
880
+ "format": "f32-to-bf16",
881
+ "nbytes": 5767168,
882
+ "byteOffset": 19933184
883
+ }
884
+ ],
885
+ "md5sum": "4930a3e6478ee30cd437add5fedfe060"
886
+ },
887
+ {
888
+ "dataPath": "params_shard_12.bin",
889
+ "format": "raw-shard",
890
+ "nbytes": 25700352,
891
+ "records": [
892
+ {
893
+ "name": "model.layers.18.mlp.gate_up_proj.weight",
894
+ "shape": [
895
+ 5632,
896
+ 1024
897
+ ],
898
+ "dtype": "float16",
899
+ "format": "f32-to-bf16",
900
+ "nbytes": 11534336,
901
+ "byteOffset": 0
902
+ },
903
+ {
904
+ "name": "model.layers.18.post_attention_layernorm.weight",
905
+ "shape": [
906
+ 1024
907
+ ],
908
+ "dtype": "float16",
909
+ "format": "f32-to-bf16",
910
+ "nbytes": 2048,
911
+ "byteOffset": 11534336
912
+ },
913
+ {
914
+ "name": "model.layers.18.self_attn.c_attn.bias",
915
+ "shape": [
916
+ 3072
917
+ ],
918
+ "dtype": "float16",
919
+ "format": "f32-to-bf16",
920
+ "nbytes": 6144,
921
+ "byteOffset": 11536384
922
+ },
923
+ {
924
+ "name": "model.layers.18.self_attn.c_attn.weight",
925
+ "shape": [
926
+ 3072,
927
+ 1024
928
+ ],
929
+ "dtype": "float16",
930
+ "format": "f32-to-bf16",
931
+ "nbytes": 6291456,
932
+ "byteOffset": 11542528
933
+ },
934
+ {
935
+ "name": "model.layers.18.self_attn.o_proj.weight",
936
+ "shape": [
937
+ 1024,
938
+ 1024
939
+ ],
940
+ "dtype": "float16",
941
+ "format": "f32-to-bf16",
942
+ "nbytes": 2097152,
943
+ "byteOffset": 17833984
944
+ },
945
+ {
946
+ "name": "model.layers.19.input_layernorm.weight",
947
+ "shape": [
948
+ 1024
949
+ ],
950
+ "dtype": "float16",
951
+ "format": "f32-to-bf16",
952
+ "nbytes": 2048,
953
+ "byteOffset": 19931136
954
+ },
955
+ {
956
+ "name": "model.layers.19.mlp.down_proj.weight",
957
+ "shape": [
958
+ 1024,
959
+ 2816
960
+ ],
961
+ "dtype": "float16",
962
+ "format": "f32-to-bf16",
963
+ "nbytes": 5767168,
964
+ "byteOffset": 19933184
965
+ }
966
+ ],
967
+ "md5sum": "e8b152a6bc0b7318999a7ecf4758a4ae"
968
+ },
969
+ {
970
+ "dataPath": "params_shard_13.bin",
971
+ "format": "raw-shard",
972
+ "nbytes": 25700352,
973
+ "records": [
974
+ {
975
+ "name": "model.layers.19.mlp.gate_up_proj.weight",
976
+ "shape": [
977
+ 5632,
978
+ 1024
979
+ ],
980
+ "dtype": "float16",
981
+ "format": "f32-to-bf16",
982
+ "nbytes": 11534336,
983
+ "byteOffset": 0
984
+ },
985
+ {
986
+ "name": "model.layers.19.post_attention_layernorm.weight",
987
+ "shape": [
988
+ 1024
989
+ ],
990
+ "dtype": "float16",
991
+ "format": "f32-to-bf16",
992
+ "nbytes": 2048,
993
+ "byteOffset": 11534336
994
+ },
995
+ {
996
+ "name": "model.layers.19.self_attn.c_attn.bias",
997
+ "shape": [
998
+ 3072
999
+ ],
1000
+ "dtype": "float16",
1001
+ "format": "f32-to-bf16",
1002
+ "nbytes": 6144,
1003
+ "byteOffset": 11536384
1004
+ },
1005
+ {
1006
+ "name": "model.layers.19.self_attn.c_attn.weight",
1007
+ "shape": [
1008
+ 3072,
1009
+ 1024
1010
+ ],
1011
+ "dtype": "float16",
1012
+ "format": "f32-to-bf16",
1013
+ "nbytes": 6291456,
1014
+ "byteOffset": 11542528
1015
+ },
1016
+ {
1017
+ "name": "model.layers.19.self_attn.o_proj.weight",
1018
+ "shape": [
1019
+ 1024,
1020
+ 1024
1021
+ ],
1022
+ "dtype": "float16",
1023
+ "format": "f32-to-bf16",
1024
+ "nbytes": 2097152,
1025
+ "byteOffset": 17833984
1026
+ },
1027
+ {
1028
+ "name": "model.layers.2.input_layernorm.weight",
1029
+ "shape": [
1030
+ 1024
1031
+ ],
1032
+ "dtype": "float16",
1033
+ "format": "f32-to-bf16",
1034
+ "nbytes": 2048,
1035
+ "byteOffset": 19931136
1036
+ },
1037
+ {
1038
+ "name": "model.layers.2.mlp.down_proj.weight",
1039
+ "shape": [
1040
+ 1024,
1041
+ 2816
1042
+ ],
1043
+ "dtype": "float16",
1044
+ "format": "f32-to-bf16",
1045
+ "nbytes": 5767168,
1046
+ "byteOffset": 19933184
1047
+ }
1048
+ ],
1049
+ "md5sum": "6c9a16660d168d0013f211264c973faf"
1050
+ },
1051
+ {
1052
+ "dataPath": "params_shard_14.bin",
1053
+ "format": "raw-shard",
1054
+ "nbytes": 25700352,
1055
+ "records": [
1056
+ {
1057
+ "name": "model.layers.2.mlp.gate_up_proj.weight",
1058
+ "shape": [
1059
+ 5632,
1060
+ 1024
1061
+ ],
1062
+ "dtype": "float16",
1063
+ "format": "f32-to-bf16",
1064
+ "nbytes": 11534336,
1065
+ "byteOffset": 0
1066
+ },
1067
+ {
1068
+ "name": "model.layers.2.post_attention_layernorm.weight",
1069
+ "shape": [
1070
+ 1024
1071
+ ],
1072
+ "dtype": "float16",
1073
+ "format": "f32-to-bf16",
1074
+ "nbytes": 2048,
1075
+ "byteOffset": 11534336
1076
+ },
1077
+ {
1078
+ "name": "model.layers.2.self_attn.c_attn.bias",
1079
+ "shape": [
1080
+ 3072
1081
+ ],
1082
+ "dtype": "float16",
1083
+ "format": "f32-to-bf16",
1084
+ "nbytes": 6144,
1085
+ "byteOffset": 11536384
1086
+ },
1087
+ {
1088
+ "name": "model.layers.2.self_attn.c_attn.weight",
1089
+ "shape": [
1090
+ 3072,
1091
+ 1024
1092
+ ],
1093
+ "dtype": "float16",
1094
+ "format": "f32-to-bf16",
1095
+ "nbytes": 6291456,
1096
+ "byteOffset": 11542528
1097
+ },
1098
+ {
1099
+ "name": "model.layers.2.self_attn.o_proj.weight",
1100
+ "shape": [
1101
+ 1024,
1102
+ 1024
1103
+ ],
1104
+ "dtype": "float16",
1105
+ "format": "f32-to-bf16",
1106
+ "nbytes": 2097152,
1107
+ "byteOffset": 17833984
1108
+ },
1109
+ {
1110
+ "name": "model.layers.20.input_layernorm.weight",
1111
+ "shape": [
1112
+ 1024
1113
+ ],
1114
+ "dtype": "float16",
1115
+ "format": "f32-to-bf16",
1116
+ "nbytes": 2048,
1117
+ "byteOffset": 19931136
1118
+ },
1119
+ {
1120
+ "name": "model.layers.20.mlp.down_proj.weight",
1121
+ "shape": [
1122
+ 1024,
1123
+ 2816
1124
+ ],
1125
+ "dtype": "float16",
1126
+ "format": "f32-to-bf16",
1127
+ "nbytes": 5767168,
1128
+ "byteOffset": 19933184
1129
+ }
1130
+ ],
1131
+ "md5sum": "2135bcb7dc23c91c41f641cd518d9dac"
1132
+ },
1133
+ {
1134
+ "dataPath": "params_shard_15.bin",
1135
+ "format": "raw-shard",
1136
+ "nbytes": 25700352,
1137
+ "records": [
1138
+ {
1139
+ "name": "model.layers.20.mlp.gate_up_proj.weight",
1140
+ "shape": [
1141
+ 5632,
1142
+ 1024
1143
+ ],
1144
+ "dtype": "float16",
1145
+ "format": "f32-to-bf16",
1146
+ "nbytes": 11534336,
1147
+ "byteOffset": 0
1148
+ },
1149
+ {
1150
+ "name": "model.layers.20.post_attention_layernorm.weight",
1151
+ "shape": [
1152
+ 1024
1153
+ ],
1154
+ "dtype": "float16",
1155
+ "format": "f32-to-bf16",
1156
+ "nbytes": 2048,
1157
+ "byteOffset": 11534336
1158
+ },
1159
+ {
1160
+ "name": "model.layers.20.self_attn.c_attn.bias",
1161
+ "shape": [
1162
+ 3072
1163
+ ],
1164
+ "dtype": "float16",
1165
+ "format": "f32-to-bf16",
1166
+ "nbytes": 6144,
1167
+ "byteOffset": 11536384
1168
+ },
1169
+ {
1170
+ "name": "model.layers.20.self_attn.c_attn.weight",
1171
+ "shape": [
1172
+ 3072,
1173
+ 1024
1174
+ ],
1175
+ "dtype": "float16",
1176
+ "format": "f32-to-bf16",
1177
+ "nbytes": 6291456,
1178
+ "byteOffset": 11542528
1179
+ },
1180
+ {
1181
+ "name": "model.layers.20.self_attn.o_proj.weight",
1182
+ "shape": [
1183
+ 1024,
1184
+ 1024
1185
+ ],
1186
+ "dtype": "float16",
1187
+ "format": "f32-to-bf16",
1188
+ "nbytes": 2097152,
1189
+ "byteOffset": 17833984
1190
+ },
1191
+ {
1192
+ "name": "model.layers.21.input_layernorm.weight",
1193
+ "shape": [
1194
+ 1024
1195
+ ],
1196
+ "dtype": "float16",
1197
+ "format": "f32-to-bf16",
1198
+ "nbytes": 2048,
1199
+ "byteOffset": 19931136
1200
+ },
1201
+ {
1202
+ "name": "model.layers.21.mlp.down_proj.weight",
1203
+ "shape": [
1204
+ 1024,
1205
+ 2816
1206
+ ],
1207
+ "dtype": "float16",
1208
+ "format": "f32-to-bf16",
1209
+ "nbytes": 5767168,
1210
+ "byteOffset": 19933184
1211
+ }
1212
+ ],
1213
+ "md5sum": "4fb95fba954bdca80dddb47cf6f38955"
1214
+ },
1215
+ {
1216
+ "dataPath": "params_shard_16.bin",
1217
+ "format": "raw-shard",
1218
+ "nbytes": 25700352,
1219
+ "records": [
1220
+ {
1221
+ "name": "model.layers.21.mlp.gate_up_proj.weight",
1222
+ "shape": [
1223
+ 5632,
1224
+ 1024
1225
+ ],
1226
+ "dtype": "float16",
1227
+ "format": "f32-to-bf16",
1228
+ "nbytes": 11534336,
1229
+ "byteOffset": 0
1230
+ },
1231
+ {
1232
+ "name": "model.layers.21.post_attention_layernorm.weight",
1233
+ "shape": [
1234
+ 1024
1235
+ ],
1236
+ "dtype": "float16",
1237
+ "format": "f32-to-bf16",
1238
+ "nbytes": 2048,
1239
+ "byteOffset": 11534336
1240
+ },
1241
+ {
1242
+ "name": "model.layers.21.self_attn.c_attn.bias",
1243
+ "shape": [
1244
+ 3072
1245
+ ],
1246
+ "dtype": "float16",
1247
+ "format": "f32-to-bf16",
1248
+ "nbytes": 6144,
1249
+ "byteOffset": 11536384
1250
+ },
1251
+ {
1252
+ "name": "model.layers.21.self_attn.c_attn.weight",
1253
+ "shape": [
1254
+ 3072,
1255
+ 1024
1256
+ ],
1257
+ "dtype": "float16",
1258
+ "format": "f32-to-bf16",
1259
+ "nbytes": 6291456,
1260
+ "byteOffset": 11542528
1261
+ },
1262
+ {
1263
+ "name": "model.layers.21.self_attn.o_proj.weight",
1264
+ "shape": [
1265
+ 1024,
1266
+ 1024
1267
+ ],
1268
+ "dtype": "float16",
1269
+ "format": "f32-to-bf16",
1270
+ "nbytes": 2097152,
1271
+ "byteOffset": 17833984
1272
+ },
1273
+ {
1274
+ "name": "model.layers.22.input_layernorm.weight",
1275
+ "shape": [
1276
+ 1024
1277
+ ],
1278
+ "dtype": "float16",
1279
+ "format": "f32-to-bf16",
1280
+ "nbytes": 2048,
1281
+ "byteOffset": 19931136
1282
+ },
1283
+ {
1284
+ "name": "model.layers.22.mlp.down_proj.weight",
1285
+ "shape": [
1286
+ 1024,
1287
+ 2816
1288
+ ],
1289
+ "dtype": "float16",
1290
+ "format": "f32-to-bf16",
1291
+ "nbytes": 5767168,
1292
+ "byteOffset": 19933184
1293
+ }
1294
+ ],
1295
+ "md5sum": "99cf1ab9f9f3873d10b23074aab226ca"
1296
+ },
1297
+ {
1298
+ "dataPath": "params_shard_17.bin",
1299
+ "format": "raw-shard",
1300
+ "nbytes": 25700352,
1301
+ "records": [
1302
+ {
1303
+ "name": "model.layers.22.mlp.gate_up_proj.weight",
1304
+ "shape": [
1305
+ 5632,
1306
+ 1024
1307
+ ],
1308
+ "dtype": "float16",
1309
+ "format": "f32-to-bf16",
1310
+ "nbytes": 11534336,
1311
+ "byteOffset": 0
1312
+ },
1313
+ {
1314
+ "name": "model.layers.22.post_attention_layernorm.weight",
1315
+ "shape": [
1316
+ 1024
1317
+ ],
1318
+ "dtype": "float16",
1319
+ "format": "f32-to-bf16",
1320
+ "nbytes": 2048,
1321
+ "byteOffset": 11534336
1322
+ },
1323
+ {
1324
+ "name": "model.layers.22.self_attn.c_attn.bias",
1325
+ "shape": [
1326
+ 3072
1327
+ ],
1328
+ "dtype": "float16",
1329
+ "format": "f32-to-bf16",
1330
+ "nbytes": 6144,
1331
+ "byteOffset": 11536384
1332
+ },
1333
+ {
1334
+ "name": "model.layers.22.self_attn.c_attn.weight",
1335
+ "shape": [
1336
+ 3072,
1337
+ 1024
1338
+ ],
1339
+ "dtype": "float16",
1340
+ "format": "f32-to-bf16",
1341
+ "nbytes": 6291456,
1342
+ "byteOffset": 11542528
1343
+ },
1344
+ {
1345
+ "name": "model.layers.22.self_attn.o_proj.weight",
1346
+ "shape": [
1347
+ 1024,
1348
+ 1024
1349
+ ],
1350
+ "dtype": "float16",
1351
+ "format": "f32-to-bf16",
1352
+ "nbytes": 2097152,
1353
+ "byteOffset": 17833984
1354
+ },
1355
+ {
1356
+ "name": "model.layers.23.input_layernorm.weight",
1357
+ "shape": [
1358
+ 1024
1359
+ ],
1360
+ "dtype": "float16",
1361
+ "format": "f32-to-bf16",
1362
+ "nbytes": 2048,
1363
+ "byteOffset": 19931136
1364
+ },
1365
+ {
1366
+ "name": "model.layers.23.mlp.down_proj.weight",
1367
+ "shape": [
1368
+ 1024,
1369
+ 2816
1370
+ ],
1371
+ "dtype": "float16",
1372
+ "format": "f32-to-bf16",
1373
+ "nbytes": 5767168,
1374
+ "byteOffset": 19933184
1375
+ }
1376
+ ],
1377
+ "md5sum": "af41b41da2365dd9e67788a2bbf4133f"
1378
+ },
1379
+ {
1380
+ "dataPath": "params_shard_18.bin",
1381
+ "format": "raw-shard",
1382
+ "nbytes": 25700352,
1383
+ "records": [
1384
+ {
1385
+ "name": "model.layers.23.mlp.gate_up_proj.weight",
1386
+ "shape": [
1387
+ 5632,
1388
+ 1024
1389
+ ],
1390
+ "dtype": "float16",
1391
+ "format": "f32-to-bf16",
1392
+ "nbytes": 11534336,
1393
+ "byteOffset": 0
1394
+ },
1395
+ {
1396
+ "name": "model.layers.23.post_attention_layernorm.weight",
1397
+ "shape": [
1398
+ 1024
1399
+ ],
1400
+ "dtype": "float16",
1401
+ "format": "f32-to-bf16",
1402
+ "nbytes": 2048,
1403
+ "byteOffset": 11534336
1404
+ },
1405
+ {
1406
+ "name": "model.layers.23.self_attn.c_attn.bias",
1407
+ "shape": [
1408
+ 3072
1409
+ ],
1410
+ "dtype": "float16",
1411
+ "format": "f32-to-bf16",
1412
+ "nbytes": 6144,
1413
+ "byteOffset": 11536384
1414
+ },
1415
+ {
1416
+ "name": "model.layers.23.self_attn.c_attn.weight",
1417
+ "shape": [
1418
+ 3072,
1419
+ 1024
1420
+ ],
1421
+ "dtype": "float16",
1422
+ "format": "f32-to-bf16",
1423
+ "nbytes": 6291456,
1424
+ "byteOffset": 11542528
1425
+ },
1426
+ {
1427
+ "name": "model.layers.23.self_attn.o_proj.weight",
1428
+ "shape": [
1429
+ 1024,
1430
+ 1024
1431
+ ],
1432
+ "dtype": "float16",
1433
+ "format": "f32-to-bf16",
1434
+ "nbytes": 2097152,
1435
+ "byteOffset": 17833984
1436
+ },
1437
+ {
1438
+ "name": "model.layers.3.input_layernorm.weight",
1439
+ "shape": [
1440
+ 1024
1441
+ ],
1442
+ "dtype": "float16",
1443
+ "format": "f32-to-bf16",
1444
+ "nbytes": 2048,
1445
+ "byteOffset": 19931136
1446
+ },
1447
+ {
1448
+ "name": "model.layers.3.mlp.down_proj.weight",
1449
+ "shape": [
1450
+ 1024,
1451
+ 2816
1452
+ ],
1453
+ "dtype": "float16",
1454
+ "format": "f32-to-bf16",
1455
+ "nbytes": 5767168,
1456
+ "byteOffset": 19933184
1457
+ }
1458
+ ],
1459
+ "md5sum": "e8209ecdf7ce06c90d2c18783dc940e3"
1460
+ },
1461
+ {
1462
+ "dataPath": "params_shard_19.bin",
1463
+ "format": "raw-shard",
1464
+ "nbytes": 25700352,
1465
+ "records": [
1466
+ {
1467
+ "name": "model.layers.3.mlp.gate_up_proj.weight",
1468
+ "shape": [
1469
+ 5632,
1470
+ 1024
1471
+ ],
1472
+ "dtype": "float16",
1473
+ "format": "f32-to-bf16",
1474
+ "nbytes": 11534336,
1475
+ "byteOffset": 0
1476
+ },
1477
+ {
1478
+ "name": "model.layers.3.post_attention_layernorm.weight",
1479
+ "shape": [
1480
+ 1024
1481
+ ],
1482
+ "dtype": "float16",
1483
+ "format": "f32-to-bf16",
1484
+ "nbytes": 2048,
1485
+ "byteOffset": 11534336
1486
+ },
1487
+ {
1488
+ "name": "model.layers.3.self_attn.c_attn.bias",
1489
+ "shape": [
1490
+ 3072
1491
+ ],
1492
+ "dtype": "float16",
1493
+ "format": "f32-to-bf16",
1494
+ "nbytes": 6144,
1495
+ "byteOffset": 11536384
1496
+ },
1497
+ {
1498
+ "name": "model.layers.3.self_attn.c_attn.weight",
1499
+ "shape": [
1500
+ 3072,
1501
+ 1024
1502
+ ],
1503
+ "dtype": "float16",
1504
+ "format": "f32-to-bf16",
1505
+ "nbytes": 6291456,
1506
+ "byteOffset": 11542528
1507
+ },
1508
+ {
1509
+ "name": "model.layers.3.self_attn.o_proj.weight",
1510
+ "shape": [
1511
+ 1024,
1512
+ 1024
1513
+ ],
1514
+ "dtype": "float16",
1515
+ "format": "f32-to-bf16",
1516
+ "nbytes": 2097152,
1517
+ "byteOffset": 17833984
1518
+ },
1519
+ {
1520
+ "name": "model.layers.4.input_layernorm.weight",
1521
+ "shape": [
1522
+ 1024
1523
+ ],
1524
+ "dtype": "float16",
1525
+ "format": "f32-to-bf16",
1526
+ "nbytes": 2048,
1527
+ "byteOffset": 19931136
1528
+ },
1529
+ {
1530
+ "name": "model.layers.4.mlp.down_proj.weight",
1531
+ "shape": [
1532
+ 1024,
1533
+ 2816
1534
+ ],
1535
+ "dtype": "float16",
1536
+ "format": "f32-to-bf16",
1537
+ "nbytes": 5767168,
1538
+ "byteOffset": 19933184
1539
+ }
1540
+ ],
1541
+ "md5sum": "6d10940312736cb810f03379ad0eaacb"
1542
+ },
1543
+ {
1544
+ "dataPath": "params_shard_20.bin",
1545
+ "format": "raw-shard",
1546
+ "nbytes": 25700352,
1547
+ "records": [
1548
+ {
1549
+ "name": "model.layers.4.mlp.gate_up_proj.weight",
1550
+ "shape": [
1551
+ 5632,
1552
+ 1024
1553
+ ],
1554
+ "dtype": "float16",
1555
+ "format": "f32-to-bf16",
1556
+ "nbytes": 11534336,
1557
+ "byteOffset": 0
1558
+ },
1559
+ {
1560
+ "name": "model.layers.4.post_attention_layernorm.weight",
1561
+ "shape": [
1562
+ 1024
1563
+ ],
1564
+ "dtype": "float16",
1565
+ "format": "f32-to-bf16",
1566
+ "nbytes": 2048,
1567
+ "byteOffset": 11534336
1568
+ },
1569
+ {
1570
+ "name": "model.layers.4.self_attn.c_attn.bias",
1571
+ "shape": [
1572
+ 3072
1573
+ ],
1574
+ "dtype": "float16",
1575
+ "format": "f32-to-bf16",
1576
+ "nbytes": 6144,
1577
+ "byteOffset": 11536384
1578
+ },
1579
+ {
1580
+ "name": "model.layers.4.self_attn.c_attn.weight",
1581
+ "shape": [
1582
+ 3072,
1583
+ 1024
1584
+ ],
1585
+ "dtype": "float16",
1586
+ "format": "f32-to-bf16",
1587
+ "nbytes": 6291456,
1588
+ "byteOffset": 11542528
1589
+ },
1590
+ {
1591
+ "name": "model.layers.4.self_attn.o_proj.weight",
1592
+ "shape": [
1593
+ 1024,
1594
+ 1024
1595
+ ],
1596
+ "dtype": "float16",
1597
+ "format": "f32-to-bf16",
1598
+ "nbytes": 2097152,
1599
+ "byteOffset": 17833984
1600
+ },
1601
+ {
1602
+ "name": "model.layers.5.input_layernorm.weight",
1603
+ "shape": [
1604
+ 1024
1605
+ ],
1606
+ "dtype": "float16",
1607
+ "format": "f32-to-bf16",
1608
+ "nbytes": 2048,
1609
+ "byteOffset": 19931136
1610
+ },
1611
+ {
1612
+ "name": "model.layers.5.mlp.down_proj.weight",
1613
+ "shape": [
1614
+ 1024,
1615
+ 2816
1616
+ ],
1617
+ "dtype": "float16",
1618
+ "format": "f32-to-bf16",
1619
+ "nbytes": 5767168,
1620
+ "byteOffset": 19933184
1621
+ }
1622
+ ],
1623
+ "md5sum": "66ba043038bbfa34c5071fcd2fc80477"
1624
+ },
1625
+ {
1626
+ "dataPath": "params_shard_21.bin",
1627
+ "format": "raw-shard",
1628
+ "nbytes": 25700352,
1629
+ "records": [
1630
+ {
1631
+ "name": "model.layers.5.mlp.gate_up_proj.weight",
1632
+ "shape": [
1633
+ 5632,
1634
+ 1024
1635
+ ],
1636
+ "dtype": "float16",
1637
+ "format": "f32-to-bf16",
1638
+ "nbytes": 11534336,
1639
+ "byteOffset": 0
1640
+ },
1641
+ {
1642
+ "name": "model.layers.5.post_attention_layernorm.weight",
1643
+ "shape": [
1644
+ 1024
1645
+ ],
1646
+ "dtype": "float16",
1647
+ "format": "f32-to-bf16",
1648
+ "nbytes": 2048,
1649
+ "byteOffset": 11534336
1650
+ },
1651
+ {
1652
+ "name": "model.layers.5.self_attn.c_attn.bias",
1653
+ "shape": [
1654
+ 3072
1655
+ ],
1656
+ "dtype": "float16",
1657
+ "format": "f32-to-bf16",
1658
+ "nbytes": 6144,
1659
+ "byteOffset": 11536384
1660
+ },
1661
+ {
1662
+ "name": "model.layers.5.self_attn.c_attn.weight",
1663
+ "shape": [
1664
+ 3072,
1665
+ 1024
1666
+ ],
1667
+ "dtype": "float16",
1668
+ "format": "f32-to-bf16",
1669
+ "nbytes": 6291456,
1670
+ "byteOffset": 11542528
1671
+ },
1672
+ {
1673
+ "name": "model.layers.5.self_attn.o_proj.weight",
1674
+ "shape": [
1675
+ 1024,
1676
+ 1024
1677
+ ],
1678
+ "dtype": "float16",
1679
+ "format": "f32-to-bf16",
1680
+ "nbytes": 2097152,
1681
+ "byteOffset": 17833984
1682
+ },
1683
+ {
1684
+ "name": "model.layers.6.input_layernorm.weight",
1685
+ "shape": [
1686
+ 1024
1687
+ ],
1688
+ "dtype": "float16",
1689
+ "format": "f32-to-bf16",
1690
+ "nbytes": 2048,
1691
+ "byteOffset": 19931136
1692
+ },
1693
+ {
1694
+ "name": "model.layers.6.mlp.down_proj.weight",
1695
+ "shape": [
1696
+ 1024,
1697
+ 2816
1698
+ ],
1699
+ "dtype": "float16",
1700
+ "format": "f32-to-bf16",
1701
+ "nbytes": 5767168,
1702
+ "byteOffset": 19933184
1703
+ }
1704
+ ],
1705
+ "md5sum": "8efd4c932df27b614e0297c8f4863b6c"
1706
+ },
1707
+ {
1708
+ "dataPath": "params_shard_22.bin",
1709
+ "format": "raw-shard",
1710
+ "nbytes": 25700352,
1711
+ "records": [
1712
+ {
1713
+ "name": "model.layers.6.mlp.gate_up_proj.weight",
1714
+ "shape": [
1715
+ 5632,
1716
+ 1024
1717
+ ],
1718
+ "dtype": "float16",
1719
+ "format": "f32-to-bf16",
1720
+ "nbytes": 11534336,
1721
+ "byteOffset": 0
1722
+ },
1723
+ {
1724
+ "name": "model.layers.6.post_attention_layernorm.weight",
1725
+ "shape": [
1726
+ 1024
1727
+ ],
1728
+ "dtype": "float16",
1729
+ "format": "f32-to-bf16",
1730
+ "nbytes": 2048,
1731
+ "byteOffset": 11534336
1732
+ },
1733
+ {
1734
+ "name": "model.layers.6.self_attn.c_attn.bias",
1735
+ "shape": [
1736
+ 3072
1737
+ ],
1738
+ "dtype": "float16",
1739
+ "format": "f32-to-bf16",
1740
+ "nbytes": 6144,
1741
+ "byteOffset": 11536384
1742
+ },
1743
+ {
1744
+ "name": "model.layers.6.self_attn.c_attn.weight",
1745
+ "shape": [
1746
+ 3072,
1747
+ 1024
1748
+ ],
1749
+ "dtype": "float16",
1750
+ "format": "f32-to-bf16",
1751
+ "nbytes": 6291456,
1752
+ "byteOffset": 11542528
1753
+ },
1754
+ {
1755
+ "name": "model.layers.6.self_attn.o_proj.weight",
1756
+ "shape": [
1757
+ 1024,
1758
+ 1024
1759
+ ],
1760
+ "dtype": "float16",
1761
+ "format": "f32-to-bf16",
1762
+ "nbytes": 2097152,
1763
+ "byteOffset": 17833984
1764
+ },
1765
+ {
1766
+ "name": "model.layers.7.input_layernorm.weight",
1767
+ "shape": [
1768
+ 1024
1769
+ ],
1770
+ "dtype": "float16",
1771
+ "format": "f32-to-bf16",
1772
+ "nbytes": 2048,
1773
+ "byteOffset": 19931136
1774
+ },
1775
+ {
1776
+ "name": "model.layers.7.mlp.down_proj.weight",
1777
+ "shape": [
1778
+ 1024,
1779
+ 2816
1780
+ ],
1781
+ "dtype": "float16",
1782
+ "format": "f32-to-bf16",
1783
+ "nbytes": 5767168,
1784
+ "byteOffset": 19933184
1785
+ }
1786
+ ],
1787
+ "md5sum": "c9cc4bbf32d4d3cd71fc3d9aac0e8d09"
1788
+ },
1789
+ {
1790
+ "dataPath": "params_shard_23.bin",
1791
+ "format": "raw-shard",
1792
+ "nbytes": 25700352,
1793
+ "records": [
1794
+ {
1795
+ "name": "model.layers.7.mlp.gate_up_proj.weight",
1796
+ "shape": [
1797
+ 5632,
1798
+ 1024
1799
+ ],
1800
+ "dtype": "float16",
1801
+ "format": "f32-to-bf16",
1802
+ "nbytes": 11534336,
1803
+ "byteOffset": 0
1804
+ },
1805
+ {
1806
+ "name": "model.layers.7.post_attention_layernorm.weight",
1807
+ "shape": [
1808
+ 1024
1809
+ ],
1810
+ "dtype": "float16",
1811
+ "format": "f32-to-bf16",
1812
+ "nbytes": 2048,
1813
+ "byteOffset": 11534336
1814
+ },
1815
+ {
1816
+ "name": "model.layers.7.self_attn.c_attn.bias",
1817
+ "shape": [
1818
+ 3072
1819
+ ],
1820
+ "dtype": "float16",
1821
+ "format": "f32-to-bf16",
1822
+ "nbytes": 6144,
1823
+ "byteOffset": 11536384
1824
+ },
1825
+ {
1826
+ "name": "model.layers.7.self_attn.c_attn.weight",
1827
+ "shape": [
1828
+ 3072,
1829
+ 1024
1830
+ ],
1831
+ "dtype": "float16",
1832
+ "format": "f32-to-bf16",
1833
+ "nbytes": 6291456,
1834
+ "byteOffset": 11542528
1835
+ },
1836
+ {
1837
+ "name": "model.layers.7.self_attn.o_proj.weight",
1838
+ "shape": [
1839
+ 1024,
1840
+ 1024
1841
+ ],
1842
+ "dtype": "float16",
1843
+ "format": "f32-to-bf16",
1844
+ "nbytes": 2097152,
1845
+ "byteOffset": 17833984
1846
+ },
1847
+ {
1848
+ "name": "model.layers.8.input_layernorm.weight",
1849
+ "shape": [
1850
+ 1024
1851
+ ],
1852
+ "dtype": "float16",
1853
+ "format": "f32-to-bf16",
1854
+ "nbytes": 2048,
1855
+ "byteOffset": 19931136
1856
+ },
1857
+ {
1858
+ "name": "model.layers.8.mlp.down_proj.weight",
1859
+ "shape": [
1860
+ 1024,
1861
+ 2816
1862
+ ],
1863
+ "dtype": "float16",
1864
+ "format": "f32-to-bf16",
1865
+ "nbytes": 5767168,
1866
+ "byteOffset": 19933184
1867
+ }
1868
+ ],
1869
+ "md5sum": "0349c6929c5eb51dceeadd09a33df500"
1870
+ },
1871
+ {
1872
+ "dataPath": "params_shard_24.bin",
1873
+ "format": "raw-shard",
1874
+ "nbytes": 25700352,
1875
+ "records": [
1876
+ {
1877
+ "name": "model.layers.8.mlp.gate_up_proj.weight",
1878
+ "shape": [
1879
+ 5632,
1880
+ 1024
1881
+ ],
1882
+ "dtype": "float16",
1883
+ "format": "f32-to-bf16",
1884
+ "nbytes": 11534336,
1885
+ "byteOffset": 0
1886
+ },
1887
+ {
1888
+ "name": "model.layers.8.post_attention_layernorm.weight",
1889
+ "shape": [
1890
+ 1024
1891
+ ],
1892
+ "dtype": "float16",
1893
+ "format": "f32-to-bf16",
1894
+ "nbytes": 2048,
1895
+ "byteOffset": 11534336
1896
+ },
1897
+ {
1898
+ "name": "model.layers.8.self_attn.c_attn.bias",
1899
+ "shape": [
1900
+ 3072
1901
+ ],
1902
+ "dtype": "float16",
1903
+ "format": "f32-to-bf16",
1904
+ "nbytes": 6144,
1905
+ "byteOffset": 11536384
1906
+ },
1907
+ {
1908
+ "name": "model.layers.8.self_attn.c_attn.weight",
1909
+ "shape": [
1910
+ 3072,
1911
+ 1024
1912
+ ],
1913
+ "dtype": "float16",
1914
+ "format": "f32-to-bf16",
1915
+ "nbytes": 6291456,
1916
+ "byteOffset": 11542528
1917
+ },
1918
+ {
1919
+ "name": "model.layers.8.self_attn.o_proj.weight",
1920
+ "shape": [
1921
+ 1024,
1922
+ 1024
1923
+ ],
1924
+ "dtype": "float16",
1925
+ "format": "f32-to-bf16",
1926
+ "nbytes": 2097152,
1927
+ "byteOffset": 17833984
1928
+ },
1929
+ {
1930
+ "name": "model.layers.9.input_layernorm.weight",
1931
+ "shape": [
1932
+ 1024
1933
+ ],
1934
+ "dtype": "float16",
1935
+ "format": "f32-to-bf16",
1936
+ "nbytes": 2048,
1937
+ "byteOffset": 19931136
1938
+ },
1939
+ {
1940
+ "name": "model.layers.9.mlp.down_proj.weight",
1941
+ "shape": [
1942
+ 1024,
1943
+ 2816
1944
+ ],
1945
+ "dtype": "float16",
1946
+ "format": "f32-to-bf16",
1947
+ "nbytes": 5767168,
1948
+ "byteOffset": 19933184
1949
+ }
1950
+ ],
1951
+ "md5sum": "5027c555bd33d3a6ee38ae82c4dcf8fe"
1952
+ },
1953
+ {
1954
+ "dataPath": "params_shard_25.bin",
1955
+ "format": "raw-shard",
1956
+ "nbytes": 19933184,
1957
+ "records": [
1958
+ {
1959
+ "name": "model.layers.9.mlp.gate_up_proj.weight",
1960
+ "shape": [
1961
+ 5632,
1962
+ 1024
1963
+ ],
1964
+ "dtype": "float16",
1965
+ "format": "f32-to-bf16",
1966
+ "nbytes": 11534336,
1967
+ "byteOffset": 0
1968
+ },
1969
+ {
1970
+ "name": "model.layers.9.post_attention_layernorm.weight",
1971
+ "shape": [
1972
+ 1024
1973
+ ],
1974
+ "dtype": "float16",
1975
+ "format": "f32-to-bf16",
1976
+ "nbytes": 2048,
1977
+ "byteOffset": 11534336
1978
+ },
1979
+ {
1980
+ "name": "model.layers.9.self_attn.c_attn.bias",
1981
+ "shape": [
1982
+ 3072
1983
+ ],
1984
+ "dtype": "float16",
1985
+ "format": "f32-to-bf16",
1986
+ "nbytes": 6144,
1987
+ "byteOffset": 11536384
1988
+ },
1989
+ {
1990
+ "name": "model.layers.9.self_attn.c_attn.weight",
1991
+ "shape": [
1992
+ 3072,
1993
+ 1024
1994
+ ],
1995
+ "dtype": "float16",
1996
+ "format": "f32-to-bf16",
1997
+ "nbytes": 6291456,
1998
+ "byteOffset": 11542528
1999
+ },
2000
+ {
2001
+ "name": "model.layers.9.self_attn.o_proj.weight",
2002
+ "shape": [
2003
+ 1024,
2004
+ 1024
2005
+ ],
2006
+ "dtype": "float16",
2007
+ "format": "f32-to-bf16",
2008
+ "nbytes": 2097152,
2009
+ "byteOffset": 17833984
2010
+ },
2011
+ {
2012
+ "name": "model.norm.weight",
2013
+ "shape": [
2014
+ 1024
2015
+ ],
2016
+ "dtype": "float16",
2017
+ "format": "f32-to-bf16",
2018
+ "nbytes": 2048,
2019
+ "byteOffset": 19931136
2020
+ }
2021
+ ],
2022
+ "md5sum": "31f107dcd7122933055d18604f4a83e9"
2023
+ }
2024
+ ]
2025
+ }
params_shard_0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86ae636e946d87a0ff123440a5bd6df1be6327b6d6cae1449197ffe4b1dd554f
3
+ size 311164928
params_shard_1.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86ae636e946d87a0ff123440a5bd6df1be6327b6d6cae1449197ffe4b1dd554f
3
+ size 311164928
params_shard_10.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:771729f299a7a91d15f2781383c123f40a5d0c7b2c552f8bf6a3fe2ead987275
3
+ size 25700352
params_shard_11.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:915a26324923b6109936fbc56dcaad62cf2e6770f4bf565587eff461ae41d9d5
3
+ size 25700352
params_shard_12.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31c97b8caeb084ece17b1aea47319d4db0045439e88700c547e931857594146a
3
+ size 25700352
params_shard_13.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d66c8bc1dd6a1889df756794eb1b4109e60eda391e1dd86f1e1d1781cedb12e9
3
+ size 25700352
params_shard_14.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a27a06b689f616345df5e9321a899c3eae2c88adb6554f5dc99191b8aab2df1
3
+ size 25700352
params_shard_15.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1eefbed6ef9fcd2f036866391bd6e3e435820f196ed61e817638ea873913fbb6
3
+ size 25700352
params_shard_16.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffcd64b6f6a3f022fe2276eb492cff1550b8be9fe419848b405963b444f29f3c
3
+ size 25700352
params_shard_17.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00a907ce82c60143387ac15ff147735726b37d368690228eb5311534f8385bcc
3
+ size 25700352
params_shard_18.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed32a1e2b94601a25c122f27818b690e76cb70f393498b398b368452ef0b3846
3
+ size 25700352
params_shard_19.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5b276d0829ec0d2d75de5f3d70bfed9a4e5091fa652982a86fc0942e7c42ceb
3
+ size 25700352
params_shard_2.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8df6b3b090f062ca2da7b95ff66b80aa2a52c4f08c0788aa015a47b2354686b2
3
+ size 31469568
params_shard_20.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3449213182cbdb1f892f59ea936e7a6d953554d589aa3543dae579c89c66b4a3
3
+ size 25700352
params_shard_21.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f723a7f06ddab041d2a6ac8acf15a10e98aa57434e7300e4070d1aa7b30c45bf
3
+ size 25700352
params_shard_22.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbd452233020c2864403f79db8918059187e34d82fa8a1e7bdae2f78bbfcf659
3
+ size 25700352
params_shard_23.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25a152126a1572e96c1963c4746ff53fa4d8c413e6d58899cb03692e23ff620e
3
+ size 25700352
params_shard_24.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f27874cef221739bfb038ead03bf1d9430da7b3f4c64e65196d990f8b073f6be
3
+ size 25700352
params_shard_25.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3751538413e23dace2f6bab2fb7a6af7d490666cea05f365a6e55182a0b8fc8
3
+ size 19933184
params_shard_3.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2cad55e9c8b65fbec140b085e09c97ea3ec5fd13ac2cfdcb2e7a2d5a3952407e
3
+ size 25700352
params_shard_4.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88564049c7d1b85bb07a82eadba43f7848f1bd784cbd306e81c94052cd545c57
3
+ size 25700352
params_shard_5.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9665a592d7c4121288356215544075c02b0562232b6e93b65b928effbbae9153
3
+ size 25700352
params_shard_6.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f06b3f823bdd378a1c8e92647838aa7d193a53c9cdb19c6c2ddd1578cafaa56a
3
+ size 25700352
params_shard_7.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eaedd5c74f640e34bd74111572f6369093e21b381f1aabf779d4fd837c47f071
3
+ size 25700352
params_shard_8.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fba06efb22baa9f82a0cd02db328b8716b08fbb184840ab1869f61d942237d69
3
+ size 25700352
params_shard_9.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79130ec74ebe4d23a34bb769f2e4e9b162ce0db335da1888d8ea8e65cc4a8edf
3
+ size 25700352
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "151643": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "151644": {
13
+ "content": "<|im_start|>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "151645": {
21
+ "content": "<|im_end|>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ }
28
+ },
29
+ "additional_special_tokens": ["<|im_start|>", "<|im_end|>"],
30
+ "bos_token": null,
31
+ "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
32
+ "clean_up_tokenization_spaces": false,
33
+ "eos_token": "<|im_end|>",
34
+ "errors": "replace",
35
+ "model_max_length": 32768,
36
+ "pad_token": "<|endoftext|>",
37
+ "split_special_tokens": false,
38
+ "tokenizer_class": "Qwen2Tokenizer",
39
+ "unk_token": null
40
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff