Tlopex commited on
Commit
2b5d618
1 Parent(s): b10d7ed

Initial commit

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
logs.txt ADDED
@@ -0,0 +1,286 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0
  0%| | 0/220 [00:00<?, ?it/s]
1
 
 
2
  0%| | 0/220 [00:00<?, ?it/s]
3
 
 
4
  0%| | 0/220 [00:01<?, ?it/s]/home/tlopex/.local/lib/python3.8/site-packages/numpy/core/getlimits.py:518: UserWarning: The value of the smallest subnormal for <class 'numpy.float32'> type is zero.
 
 
 
 
5
  0%| | 1/220 [00:04<17:42, 4.85s/it]
6
 
 
7
  0%| | 1/220 [00:05<17:42, 4.85s/it]
8
  1%| | 2/220 [00:08<15:30, 4.27s/it]
9
 
 
10
  1%| | 2/220 [00:08<15:30, 4.27s/it]
11
 
 
12
  1%| | 2/220 [00:08<15:30, 4.27s/it]
13
 
 
14
  1%| | 2/220 [00:08<15:30, 4.27s/it]
15
  2%|▏ | 5/220 [00:08<04:31, 1.26s/it]
16
 
 
17
  2%|▏ | 5/220 [00:09<04:31, 1.26s/it]
18
  3%|▎ | 6/220 [00:09<03:45, 1.05s/it]
19
 
 
20
  3%|▎ | 6/220 [00:09<03:45, 1.05s/it]
21
 
 
22
  3%|▎ | 6/220 [00:09<03:45, 1.05s/it]
23
 
 
24
  3%|▎ | 6/220 [00:09<03:45, 1.05s/it]
25
 
 
26
  3%|▎ | 6/220 [00:09<03:45, 1.05s/it]
27
  5%|▍ | 10/220 [00:09<01:37, 2.15it/s]
28
 
 
29
  5%|▍ | 10/220 [00:09<01:37, 2.15it/s]
30
 
 
31
  5%|▍ | 10/220 [00:09<01:37, 2.15it/s]
32
 
 
33
  5%|▍ | 10/220 [00:09<01:37, 2.15it/s]
34
 
 
35
  5%|▍ | 10/220 [00:09<01:37, 2.15it/s]
36
  6%|▋ | 14/220 [00:09<00:58, 3.54it/s]
37
 
 
38
  6%|▋ | 14/220 [00:09<00:58, 3.54it/s]
39
 
 
40
  6%|▋ | 14/220 [00:10<00:58, 3.54it/s]
41
  7%|▋ | 16/220 [00:10<00:54, 3.74it/s]
42
 
 
43
  7%|▋ | 16/220 [00:10<00:54, 3.74it/s]
44
 
 
45
  7%|▋ | 16/220 [00:10<00:54, 3.74it/s]
46
 
 
47
  7%|▋ | 16/220 [00:10<00:54, 3.74it/s]
48
  9%|▊ | 19/220 [00:10<00:40, 4.95it/s]
49
 
 
50
  9%|▊ | 19/220 [00:10<00:40, 4.95it/s]
51
 
 
52
  9%|▊ | 19/220 [00:10<00:40, 4.95it/s]
53
  10%|▉ | 21/220 [00:10<00:34, 5.85it/s]
54
 
 
55
  10%|▉ | 21/220 [00:10<00:34, 5.85it/s]
56
 
 
57
  10%|▉ | 21/220 [00:10<00:34, 5.85it/s]
58
  10%|█ | 23/220 [00:10<00:29, 6.72it/s]
59
 
 
60
  10%|█ | 23/220 [00:10<00:29, 6.72it/s]
61
 
 
62
  10%|█ | 23/220 [00:11<00:29, 6.72it/s]
63
  11%|█▏ | 25/220 [00:11<00:32, 5.95it/s]
64
 
 
65
  11%|█▏ | 25/220 [00:11<00:32, 5.95it/s]
66
 
 
67
  11%|█▏ | 25/220 [00:11<00:32, 5.95it/s]
68
 
 
69
  11%|█▏ | 25/220 [00:11<00:32, 5.95it/s]
70
  13%|█▎ | 28/220 [00:11<00:25, 7.41it/s]
71
 
 
72
  13%|█▎ | 28/220 [00:11<00:25, 7.41it/s]
73
 
 
74
  13%|█▎ | 28/220 [00:11<00:25, 7.41it/s]
75
  14%|█▎ | 30/220 [00:11<00:22, 8.63it/s]
76
 
 
77
  14%|█▎ | 30/220 [00:11<00:22, 8.63it/s]
78
 
 
79
  14%|█▎ | 30/220 [00:11<00:22, 8.63it/s]
80
  15%|█▍ | 32/220 [00:11<00:20, 9.32it/s]
81
 
 
82
  15%|█▍ | 32/220 [00:11<00:20, 9.32it/s]
83
 
 
84
  15%|█▍ | 32/220 [00:12<00:20, 9.32it/s]
85
  15%|█▌ | 34/220 [00:12<00:25, 7.22it/s]
86
 
 
87
  15%|█▌ | 34/220 [00:12<00:25, 7.22it/s]
88
 
 
89
  15%|█▌ | 34/220 [00:12<00:25, 7.22it/s]
90
 
 
91
  15%|█▌ | 34/220 [00:12<00:25, 7.22it/s]
92
  17%|█▋ | 37/220 [00:12<00:21, 8.63it/s]
93
 
 
94
  17%|█▋ | 37/220 [00:12<00:21, 8.63it/s]
95
 
 
96
  17%|█▋ | 37/220 [00:12<00:21, 8.63it/s]
97
  18%|█▊ | 39/220 [00:12<00:18, 9.88it/s]
98
 
 
99
  18%|█▊ | 39/220 [00:12<00:18, 9.88it/s]
100
 
 
101
  18%|█▊ | 39/220 [00:12<00:18, 9.88it/s]
102
  19%|█▊ | 41/220 [00:12<00:17, 10.35it/s]
103
 
 
104
  19%|█▊ | 41/220 [00:12<00:17, 10.35it/s]
105
 
 
106
  19%|█▊ | 41/220 [00:13<00:17, 10.35it/s]
107
  20%|█▉ | 43/220 [00:13<00:23, 7.63it/s]
108
 
 
109
  20%|█▉ | 43/220 [00:13<00:23, 7.63it/s]
110
 
 
111
  20%|█▉ | 43/220 [00:13<00:23, 7.63it/s]
112
 
 
113
  20%|█▉ | 43/220 [00:13<00:23, 7.63it/s]
114
  21%|██ | 46/220 [00:13<00:19, 9.01it/s]
115
 
 
116
  21%|██ | 46/220 [00:13<00:19, 9.01it/s]
117
 
 
118
  21%|██ | 46/220 [00:13<00:19, 9.01it/s]
119
  22%|██▏ | 48/220 [00:13<00:16, 10.25it/s]
120
 
 
121
  22%|██▏ | 48/220 [00:13<00:16, 10.25it/s]
122
 
 
123
  22%|██▏ | 48/220 [00:13<00:16, 10.25it/s]
124
  23%|██▎ | 50/220 [00:13<00:15, 10.64it/s]
125
 
 
126
  23%|██▎ | 50/220 [00:13<00:15, 10.64it/s]
127
 
 
128
  23%|██▎ | 50/220 [00:14<00:15, 10.64it/s]
129
  24%|██▎ | 52/220 [00:14<00:21, 7.74it/s]
130
 
 
131
  24%|██▎ | 52/220 [00:14<00:21, 7.74it/s]
132
 
 
133
  24%|██▎ | 52/220 [00:14<00:21, 7.74it/s]
134
 
 
135
  24%|██▎ | 52/220 [00:14<00:21, 7.74it/s]
136
  25%|██▌ | 55/220 [00:14<00:18, 9.10it/s]
137
 
 
138
  25%|██▌ | 55/220 [00:14<00:18, 9.10it/s]
139
 
 
140
  25%|██▌ | 55/220 [00:14<00:18, 9.10it/s]
141
  26%|██▌ | 57/220 [00:14<00:15, 10.34it/s]
142
 
 
143
  26%|██▌ | 57/220 [00:14<00:15, 10.34it/s]
144
 
 
145
  26%|██▌ | 57/220 [00:14<00:15, 10.34it/s]
146
  27%|██▋ | 59/220 [00:14<00:15, 10.71it/s]
147
 
 
148
  27%|██▋ | 59/220 [00:14<00:15, 10.71it/s]
149
 
 
150
  27%|██▋ | 59/220 [00:15<00:15, 10.71it/s]
151
  28%|██▊ | 61/220 [00:15<00:20, 7.76it/s]
152
 
 
153
  28%|██▊ | 61/220 [00:15<00:20, 7.76it/s]
154
 
 
155
  28%|██▊ | 61/220 [00:15<00:20, 7.76it/s]
156
 
 
157
  28%|██▊ | 61/220 [00:15<00:20, 7.76it/s]
158
  29%|██▉ | 64/220 [00:15<00:17, 9.12it/s]
159
 
 
160
  29%|██▉ | 64/220 [00:15<00:17, 9.12it/s]
161
 
 
162
  29%|██▉ | 64/220 [00:15<00:17, 9.12it/s]
163
  30%|███ | 66/220 [00:15<00:14, 10.35it/s]
164
 
 
165
  30%|███ | 66/220 [00:15<00:14, 10.35it/s]
166
 
 
167
  30%|███ | 66/220 [00:15<00:14, 10.35it/s]
168
  31%|███ | 68/220 [00:15<00:14, 10.74it/s]
169
 
 
170
  31%|███ | 68/220 [00:15<00:14, 10.74it/s]
171
 
 
172
  31%|███ | 68/220 [00:16<00:14, 10.74it/s]
173
  32%|███▏ | 70/220 [00:16<00:19, 7.77it/s]
174
 
 
175
  32%|███▏ | 70/220 [00:16<00:19, 7.77it/s]
176
 
 
177
  32%|███▏ | 70/220 [00:16<00:19, 7.77it/s]
178
 
 
179
  32%|███▏ | 70/220 [00:16<00:19, 7.77it/s]
180
  33%|███▎ | 73/220 [00:16<00:16, 9.14it/s]
181
 
 
182
  33%|███▎ | 73/220 [00:16<00:16, 9.14it/s]
183
 
 
184
  33%|███▎ | 73/220 [00:16<00:16, 9.14it/s]
185
  34%|███▍ | 75/220 [00:16<00:13, 10.37it/s]
186
 
 
187
  34%|███▍ | 75/220 [00:16<00:13, 10.37it/s]
188
 
 
189
  34%|███▍ | 75/220 [00:16<00:13, 10.37it/s]
190
  35%|███▌ | 77/220 [00:16<00:13, 10.74it/s]
191
 
 
192
  35%|███▌ | 77/220 [00:16<00:13, 10.74it/s]
193
 
 
194
  35%|███▌ | 77/220 [00:17<00:13, 10.74it/s]
195
  36%|███▌ | 79/220 [00:17<00:18, 7.77it/s]
196
 
 
197
  36%|███▌ | 79/220 [00:17<00:18, 7.77it/s]
198
 
 
199
  36%|███▌ | 79/220 [00:17<00:18, 7.77it/s]
200
 
 
201
  36%|███▌ | 79/220 [00:17<00:18, 7.77it/s]
202
  37%|███▋ | 82/220 [00:17<00:15, 9.14it/s]
203
 
 
204
  37%|███▋ | 82/220 [00:17<00:15, 9.14it/s]
205
 
 
206
  37%|█���█▋ | 82/220 [00:17<00:15, 9.14it/s]
207
  38%|███▊ | 84/220 [00:17<00:13, 10.36it/s]
208
 
 
209
  38%|███▊ | 84/220 [00:17<00:13, 10.36it/s]
210
 
 
211
  38%|███▊ | 84/220 [00:17<00:13, 10.36it/s]
212
  39%|███▉ | 86/220 [00:17<00:12, 10.74it/s]
213
 
 
214
  39%|███▉ | 86/220 [00:17<00:12, 10.74it/s]
215
 
 
216
  39%|███▉ | 86/220 [00:18<00:12, 10.74it/s]
217
  40%|████ | 88/220 [00:18<00:17, 7.76it/s]
218
 
 
219
  40%|████ | 88/220 [00:18<00:17, 7.76it/s]
220
 
 
221
  40%|████ | 88/220 [00:18<00:17, 7.76it/s]
222
 
 
223
  40%|████ | 88/220 [00:18<00:17, 7.76it/s]
224
  41%|████▏ | 91/220 [00:18<00:14, 9.13it/s]
225
 
 
226
  41%|████▏ | 91/220 [00:18<00:14, 9.13it/s]
227
 
 
228
  41%|████▏ | 91/220 [00:18<00:14, 9.13it/s]
229
  42%|████▏ | 93/220 [00:18<00:12, 10.37it/s]
230
 
 
231
  42%|████▏ | 93/220 [00:18<00:12, 10.37it/s]
232
 
 
233
  42%|████▏ | 93/220 [00:18<00:12, 10.37it/s]
234
  43%|████▎ | 95/220 [00:18<00:11, 10.74it/s]
235
 
 
236
  43%|████▎ | 95/220 [00:18<00:11, 10.74it/s]
237
 
 
238
  43%|████▎ | 95/220 [00:19<00:11, 10.74it/s]
239
  44%|████▍ | 97/220 [00:19<00:15, 7.77it/s]
240
 
 
241
  44%|████▍ | 97/220 [00:19<00:15, 7.77it/s]
242
 
 
243
  44%|████▍ | 97/220 [00:19<00:15, 7.77it/s]
244
 
 
245
  44%|████▍ | 97/220 [00:19<00:15, 7.77it/s]
246
  45%|████▌ | 100/220 [00:19<00:13, 9.13it/s]
247
 
 
248
  45%|████▌ | 100/220 [00:19<00:13, 9.13it/s]
249
 
 
250
  45%|████▌ | 100/220 [00:19<00:13, 9.13it/s]
251
  46%|████▋ | 102/220 [00:19<00:11, 10.37it/s]
252
 
 
253
  46%|████▋ | 102/220 [00:19<00:11, 10.37it/s]
254
 
 
255
  46%|████▋ | 102/220 [00:19<00:11, 10.37it/s]
256
  47%|████▋ | 104/220 [00:19<00:10, 10.74it/s]
257
 
 
258
  47%|████▋ | 104/220 [00:19<00:10, 10.74it/s]
259
 
 
260
  47%|████▋ | 104/220 [00:19<00:10, 10.74it/s]
261
  48%|████▊ | 106/220 [00:19<00:14, 7.78it/s]
262
 
 
263
  48%|████▊ | 106/220 [00:19<00:14, 7.78it/s]
264
 
 
265
  48%|████▊ | 106/220 [00:19<00:14, 7.78it/s]
266
 
 
267
  48%|████▊ | 106/220 [00:20<00:14, 7.78it/s]
268
  50%|████▉ | 109/220 [00:20<00:12, 9.14it/s]
269
 
 
270
  50%|████▉ | 109/220 [00:20<00:12, 9.14it/s]
271
 
 
272
  50%|████▉ | 109/220 [00:20<00:12, 9.14it/s]
273
  50%|█████ | 111/220 [00:20<00:10, 10.38it/s]
274
 
 
275
  50%|█████ | 111/220 [00:20<00:10, 10.38it/s]
276
 
 
277
  50%|█████ | 111/220 [00:20<00:10, 10.38it/s]
278
  51%|█████▏ | 113/220 [00:20<00:09, 10.76it/s]
279
 
 
280
  51%|█████▏ | 113/220 [00:20<00:09, 10.76it/s]
281
 
 
282
  51%|█████▏ | 113/220 [00:20<00:09, 10.76it/s]
283
  52%|█████▏ | 115/220 [00:20<00:13, 7.75it/s]
284
 
 
285
  52%|█████▏ | 115/220 [00:20<00:13, 7.75it/s]
286
 
 
287
  52%|█████▏ | 115/220 [00:20<00:13, 7.75it/s]
288
 
 
289
  52%|█████▏ | 115/220 [00:20<00:13, 7.75it/s]
290
  54%|█████▎ | 118/220 [00:21<00:11, 9.11it/s]
291
 
 
292
  54%|█████▎ | 118/220 [00:21<00:11, 9.11it/s]
293
 
 
294
  54%|█████▎ | 118/220 [00:21<00:11, 9.11it/s]
295
  55%|█████▍ | 120/220 [00:21<00:09, 10.34it/s]
296
 
 
297
  55%|█████▍ | 120/220 [00:21<00:09, 10.34it/s]
298
 
 
299
  55%|█████▍ | 120/220 [00:21<00:09, 10.34it/s]
300
  55%|█████▌ | 122/220 [00:21<00:09, 10.71it/s]
301
 
 
302
  55%|█████▌ | 122/220 [00:21<00:09, 10.71it/s]
303
 
 
304
  55%|█████▌ | 122/220 [00:21<00:09, 10.71it/s]
305
  56%|█████▋ | 124/220 [00:21<00:12, 7.76it/s]
306
 
 
307
  56%|█████▋ | 124/220 [00:21<00:12, 7.76it/s]
308
 
 
309
  56%|█████▋ | 124/220 [00:21<00:12, 7.76it/s]
310
 
 
311
  56%|█████▋ | 124/220 [00:21<00:12, 7.76it/s]
312
  58%|█████▊ | 127/220 [00:22<00:10, 9.13it/s]
313
 
 
314
  58%|█████▊ | 127/220 [00:22<00:10, 9.13it/s]
315
 
 
316
  58%|█████▊ | 127/220 [00:22<00:10, 9.13it/s]
317
  59%|█████▊ | 129/220 [00:22<00:08, 10.36it/s]
318
 
 
319
  59%|█████▊ | 129/220 [00:22<00:08, 10.36it/s]
320
 
 
321
  59%|█████▊ | 129/220 [00:22<00:08, 10.36it/s]
322
  60%|█████▉ | 131/220 [00:22<00:08, 10.73it/s]
323
 
 
324
  60%|█████▉ | 131/220 [00:22<00:08, 10.73it/s]
325
 
 
326
  60%|█████▉ | 131/220 [00:22<00:08, 10.73it/s]
327
  60%|██████ | 133/220 [00:22<00:11, 7.77it/s]
328
 
 
329
  60%|██████ | 133/220 [00:22<00:11, 7.77it/s]
330
 
 
331
  60%|██████ | 133/220 [00:22<00:11, 7.77it/s]
332
 
 
333
  60%|██████ | 133/220 [00:22<00:11, 7.77it/s]
334
  62%|██████▏ | 136/220 [00:23<00:09, 9.14it/s]
335
 
 
336
  62%|██████▏ | 136/220 [00:23<00:09, 9.14it/s]
337
 
 
338
  62%|██████▏ | 136/220 [00:23<00:09, 9.14it/s]
339
  63%|██████▎ | 138/220 [00:23<00:07, 10.37it/s]
340
 
 
341
  63%|██████▎ | 138/220 [00:23<00:07, 10.37it/s]
342
 
 
343
  63%|██████▎ | 138/220 [00:23<00:07, 10.37it/s]
344
  64%|██████▎ | 140/220 [00:23<00:07, 10.74it/s]
345
 
 
346
  64%|██████▎ | 140/220 [00:23<00:07, 10.74it/s]
347
 
 
348
  64%|██████▎ | 140/220 [00:23<00:07, 10.74it/s]
349
  65%|██████▍ | 142/220 [00:23<00:10, 7.78it/s]
350
 
 
351
  65%|██████▍ | 142/220 [00:23<00:10, 7.78it/s]
352
 
 
353
  65%|██████▍ | 142/220 [00:23<00:10, 7.78it/s]
354
 
 
355
  65%|██████▍ | 142/220 [00:23<00:10, 7.78it/s]
356
  66%|██████▌ | 145/220 [00:24<00:08, 9.15it/s]
357
 
 
358
  66%|██████▌ | 145/220 [00:24<00:08, 9.15it/s]
359
 
 
360
  66%|██████▌ | 145/220 [00:24<00:08, 9.15it/s]
361
  67%|██████▋ | 147/220 [00:24<00:07, 10.38it/s]
362
 
 
363
  67%|██████▋ | 147/220 [00:24<00:07, 10.38it/s]
364
 
 
365
  67%|██████▋ | 147/220 [00:24<00:07, 10.38it/s]
366
  68%|██████▊ | 149/220 [00:24<00:06, 10.75it/s]
367
 
 
368
  68%|██████▊ | 149/220 [00:24<00:06, 10.75it/s]
369
 
 
370
  68%|██████▊ | 149/220 [00:24<00:06, 10.75it/s]
371
  69%|██████▊ | 151/220 [00:24<00:08, 7.78it/s]
372
 
 
373
  69%|██████▊ | 151/220 [00:24<00:08, 7.78it/s]
374
 
 
375
  69%|██████▊ | 151/220 [00:24<00:08, 7.78it/s]
376
 
 
377
  69%|██████▊ | 151/220 [00:24<00:08, 7.78it/s]
378
  70%|███████ | 154/220 [00:25<00:07, 9.12it/s]
379
 
 
380
  70%|███████ | 154/220 [00:25<00:07, 9.12it/s]
381
 
 
382
  70%|███████ | 154/220 [00:25<00:07, 9.12it/s]
383
  71%|███████ | 156/220 [00:25<00:06, 10.37it/s]
384
 
 
385
  71%|███████ | 156/220 [00:25<00:06, 10.37it/s]
386
 
 
387
  71%|███████ | 156/220 [00:25<00:06, 10.37it/s]
388
  72%|███████▏ | 158/220 [00:25<00:05, 10.75it/s]
389
 
 
390
  72%|███████▏ | 158/220 [00:25<00:05, 10.75it/s]
391
 
 
392
  72%|███████▏ | 158/220 [00:25<00:05, 10.75it/s]
393
  73%|███████▎ | 160/220 [00:25<00:07, 7.77it/s]
394
 
 
395
  73%|███████▎ | 160/220 [00:25<00:07, 7.77it/s]
396
 
 
397
  73%|███████▎ | 160/220 [00:25<00:07, 7.77it/s]
398
 
 
399
  73%|███████▎ | 160/220 [00:25<00:07, 7.77it/s]
400
  74%|███████▍ | 163/220 [00:25<00:06, 9.13it/s]
401
 
 
402
  74%|███████▍ | 163/220 [00:25<00:06, 9.13it/s]
403
 
 
404
  74%|███████▍ | 163/220 [00:26<00:06, 9.13it/s]
405
  75%|███████▌ | 165/220 [00:26<00:05, 10.37it/s]
406
 
 
407
  75%|███████▌ | 165/220 [00:26<00:05, 10.37it/s]
408
 
 
409
  75%|███████▌ | 165/220 [00:26<00:05, 10.37it/s]
410
  76%|███████▌ | 167/220 [00:26<00:04, 10.75it/s]
411
 
 
412
  76%|███████▌ | 167/220 [00:26<00:04, 10.75it/s]
413
 
 
414
  76%|███████▌ | 167/220 [00:26<00:04, 10.75it/s]
415
  77%|███████▋ | 169/220 [00:26<00:06, 7.77it/s]
416
 
 
417
  77%|███████▋ | 169/220 [00:26<00:06, 7.77it/s]
418
 
 
419
  77%|███████▋ | 169/220 [00:26<00:06, 7.77it/s]
420
 
 
421
  77%|███████▋ | 169/220 [00:26<00:06, 7.77it/s]
422
  78%|███████▊ | 172/220 [00:26<00:05, 9.13it/s]
423
 
 
424
  78%|███████▊ | 172/220 [00:26<00:05, 9.13it/s]
425
 
 
426
  78%|███████▊ | 172/220 [00:27<00:05, 9.13it/s]
427
  79%|███████▉ | 174/220 [00:27<00:04, 10.36it/s]
428
 
 
429
  79%|███████▉ | 174/220 [00:27<00:04, 10.36it/s]
430
 
 
431
  79%|███████▉ | 174/220 [00:27<00:04, 10.36it/s]
432
  80%|████████ | 176/220 [00:27<00:04, 10.74it/s]
433
 
 
434
  80%|████████ | 176/220 [00:27<00:04, 10.74it/s]
435
 
 
436
  80%|████████ | 176/220 [00:27<00:04, 10.74it/s]
437
  81%|████████ | 178/220 [00:27<00:05, 7.77it/s]
438
 
 
439
  81%|████████ | 178/220 [00:27<00:05, 7.77it/s]
440
 
 
441
  81%|████████ | 178/220 [00:27<00:05, 7.77it/s]
442
 
 
443
  81%|████████ | 178/220 [00:27<00:05, 7.77it/s]
444
  82%|████████▏ | 181/220 [00:27<00:04, 9.13it/s]
445
 
 
446
  82%|████████▏ | 181/220 [00:27<00:04, 9.13it/s]
447
 
 
448
  82%|████████▏ | 181/220 [00:28<00:04, 9.13it/s]
449
  83%|████████▎ | 183/220 [00:28<00:03, 10.37it/s]
450
 
 
451
  83%|████████▎ | 183/220 [00:28<00:03, 10.37it/s]
452
 
 
453
  83%|████████▎ | 183/220 [00:28<00:03, 10.37it/s]
454
  84%|████████▍ | 185/220 [00:28<00:03, 10.73it/s]
455
 
 
456
  84%|████████▍ | 185/220 [00:28<00:03, 10.73it/s]
457
 
 
458
  84%|████████▍ | 185/220 [00:28<00:03, 10.73it/s]
459
  85%|████████▌ | 187/220 [00:28<00:04, 7.76it/s]
460
 
 
461
  85%|████████▌ | 187/220 [00:28<00:04, 7.76it/s]
462
 
 
463
  85%|████████▌ | 187/220 [00:28<00:04, 7.76it/s]
464
 
 
465
  85%|████████▌ | 187/220 [00:28<00:04, 7.76it/s]
466
  86%|████████▋ | 190/220 [00:28<00:03, 9.13it/s]
467
 
 
468
  86%|████████▋ | 190/220 [00:28<00:03, 9.13it/s]
469
 
 
470
  86%|████████▋ | 190/220 [00:28<00:03, 9.13it/s]
471
  87%|████████▋ | 192/220 [00:28<00:02, 10.37it/s]
472
 
 
473
  87%|████████▋ | 192/220 [00:28<00:02, 10.37it/s]
474
 
 
475
  87%|████████▋ | 192/220 [00:29<00:02, 10.37it/s]
476
  88%|████████▊ | 194/220 [00:29<00:02, 10.73it/s]
477
 
 
478
  88%|████████▊ | 194/220 [00:29<00:02, 10.73it/s]
479
 
 
480
  88%|████████▊ | 194/220 [00:29<00:02, 10.73it/s]
481
  89%|████████▉ | 196/220 [00:29<00:03, 7.77it/s]
482
 
 
483
  89%|████████▉ | 196/220 [00:29<00:03, 7.77it/s]
484
 
 
485
  89%|████████▉ | 196/220 [00:29<00:03, 7.77it/s]
486
 
 
487
  89%|████████▉ | 196/220 [00:29<00:03, 7.77it/s]
488
  90%|█████████ | 199/220 [00:29<00:02, 9.14it/s]
489
 
 
490
  90%|█████████ | 199/220 [00:29<00:02, 9.14it/s]
491
 
 
492
  90%|█████████ | 199/220 [00:29<00:02, 9.14it/s]
493
  91%|█████████▏| 201/220 [00:29<00:01, 10.38it/s]
494
 
 
495
  91%|█████████▏| 201/220 [00:29<00:01, 10.38it/s]
496
 
 
497
  91%|█████████▏| 201/220 [00:29<00:01, 10.38it/s]
498
  92%|█████████▏| 203/220 [00:30<00:01, 10.74it/s]
499
 
 
500
  92%|█████████▏| 203/220 [00:30<00:01, 10.74it/s]
501
 
 
502
  92%|█████████▏| 203/220 [00:30<00:01, 10.74it/s]
503
  93%|█████████▎| 205/220 [00:30<00:01, 7.78it/s]
504
 
 
505
  93%|█████████▎| 205/220 [00:30<00:01, 7.78it/s]
506
 
 
507
  93%|█████████▎| 205/220 [00:30<00:01, 7.78it/s]
508
 
 
509
  93%|█████████▎| 205/220 [00:30<00:01, 7.78it/s]
510
  95%|█████████▍| 208/220 [00:30<00:01, 9.14it/s]
511
 
 
512
  95%|█████████▍| 208/220 [00:30<00:01, 9.14it/s]
513
 
 
514
  95%|█████████▍| 208/220 [00:30<00:01, 9.14it/s]
515
  95%|█████████▌| 210/220 [00:30<00:00, 10.37it/s]
516
 
 
517
  95%|█████████▌| 210/220 [00:30<00:00, 10.37it/s]
518
 
 
519
  95%|█████████▌| 210/220 [00:30<00:00, 10.37it/s]
520
  96%|█████████▋| 212/220 [00:31<00:00, 10.17it/s]
521
 
 
522
  96%|█████████▋| 212/220 [00:31<00:00, 10.17it/s]
523
 
 
524
  96%|█████████▋| 212/220 [00:31<00:00, 10.17it/s]
525
  97%|█████████▋| 214/220 [00:31<00:00, 7.56it/s]
526
 
 
527
  97%|█████████▋| 214/220 [00:31<00:00, 7.56it/s]
528
 
 
529
  97%|█████████▋| 214/220 [00:31<00:00, 7.56it/s]
530
 
 
531
  97%|█████████▋| 214/220 [00:31<00:00, 7.56it/s]
532
  99%|█████████▊| 217/220 [00:31<00:00, 8.96it/s]
533
 
 
534
  99%|█████████▊| 217/220 [00:31<00:00, 8.96it/s]
535
 
 
536
  99%|█████████▊| 217/220 [00:31<00:00, 8.96it/s]
537
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /usr/bin/python3 -m mlc_llm gen_config dist/models/stablelm-2-zephyr-1_6b --quantization q0f32 --conv-template stablelm-2 --output /tmp/tmpjjd44ai4 --context-window-size 4096
2
+ [2024-05-24 18:23:54] INFO auto_config.py:115: Found model configuration: dist/models/stablelm-2-zephyr-1_6b/config.json
3
+ [2024-05-24 18:23:54] INFO auto_config.py:153: Found model type: stablelm. Use `--model-type` to override.
4
+ [2024-05-24 18:23:54] INFO stablelm_model.py:49: context_window_size not found in config.json. Falling back to max_position_embeddings (4096)
5
+ [2024-05-24 18:23:54] INFO stablelm_model.py:66: prefill_chunk_size defaults to 2048
6
+ [2024-05-24 18:23:54] INFO config.py:106: Overriding context_window_size from 4096 to 4096
7
+ [2024-05-24 18:23:54] INFO config.py:106: Overriding max_batch_size from 1 to 80
8
+ [2024-05-24 18:23:54] INFO gen_config.py:255: [generation_config.json] Setting bos_token_id: 100257
9
+ [2024-05-24 18:23:54] INFO gen_config.py:255: [generation_config.json] Setting eos_token_id: 100257
10
+ [2024-05-24 18:23:54] INFO gen_config.py:269: Not found tokenizer config: dist/models/stablelm-2-zephyr-1_6b/tokenizer.model
11
+ [2024-05-24 18:23:54] INFO gen_config.py:267: Found tokenizer config: dist/models/stablelm-2-zephyr-1_6b/tokenizer.json. Copying to /tmp/tmpjjd44ai4/tokenizer.json
12
+ [2024-05-24 18:23:54] INFO gen_config.py:267: Found tokenizer config: dist/models/stablelm-2-zephyr-1_6b/vocab.json. Copying to /tmp/tmpjjd44ai4/vocab.json
13
+ [2024-05-24 18:23:54] INFO gen_config.py:267: Found tokenizer config: dist/models/stablelm-2-zephyr-1_6b/merges.txt. Copying to /tmp/tmpjjd44ai4/merges.txt
14
+ [2024-05-24 18:23:54] INFO gen_config.py:269: Not found tokenizer config: dist/models/stablelm-2-zephyr-1_6b/added_tokens.json
15
+ [2024-05-24 18:23:54] INFO gen_config.py:267: Found tokenizer config: dist/models/stablelm-2-zephyr-1_6b/tokenizer_config.json. Copying to /tmp/tmpjjd44ai4/tokenizer_config.json
16
+ [2024-05-24 18:23:54] INFO gen_config.py:80: [System default] Setting pad_token_id: 0
17
+ [2024-05-24 18:23:54] INFO gen_config.py:80: [System default] Setting temperature: 0.7
18
+ [2024-05-24 18:23:54] INFO gen_config.py:80: [System default] Setting presence_penalty: 0.0
19
+ [2024-05-24 18:23:54] INFO gen_config.py:80: [System default] Setting frequency_penalty: 0.0
20
+ [2024-05-24 18:23:54] INFO gen_config.py:80: [System default] Setting repetition_penalty: 1.0
21
+ [2024-05-24 18:23:54] INFO gen_config.py:80: [System default] Setting top_p: 0.95
22
+ [2024-05-24 18:23:54] INFO gen_config.py:80: [System default] Setting mean_gen_len: 128
23
+ [2024-05-24 18:23:54] INFO gen_config.py:80: [System default] Setting max_gen_len: 512
24
+ [2024-05-24 18:23:54] INFO gen_config.py:80: [System default] Setting shift_fill_factor: 0.3
25
+ [2024-05-24 18:23:54] INFO gen_config.py:335: Dumping configuration file to: /tmp/tmpjjd44ai4/mlc-chat-config.json
26
+ /usr/bin/python3 -m mlc_llm convert_weight dist/models/stablelm-2-zephyr-1_6b --quantization q0f32 --source-format auto --output /tmp/tmpjjd44ai4
27
+ [2024-05-24 18:23:55] INFO auto_config.py:115: Found model configuration: dist/models/stablelm-2-zephyr-1_6b/config.json
28
+ [2024-05-24 18:23:58] INFO auto_device.py:79: Found device: cuda:0
29
+ [2024-05-24 18:23:59] INFO auto_device.py:88: Not found device: rocm:0
30
+ [2024-05-24 18:24:00] INFO auto_device.py:88: Not found device: metal:0
31
+ [2024-05-24 18:24:01] INFO auto_device.py:88: Not found device: vulkan:0
32
+ [2024-05-24 18:24:01] INFO auto_device.py:88: Not found device: opencl:0
33
+ [2024-05-24 18:24:01] INFO auto_device.py:35: Using device: cuda:0
34
+ [2024-05-24 18:24:01] INFO auto_weight.py:70: Finding weights in: dist/models/stablelm-2-zephyr-1_6b
35
+ [2024-05-24 18:24:01] INFO auto_weight.py:136: Not found Huggingface PyTorch
36
+ [2024-05-24 18:24:01] INFO auto_weight.py:143: Found source weight format: huggingface-safetensor. Source configuration: dist/models/stablelm-2-zephyr-1_6b/model.safetensors.index.json
37
+ [2024-05-24 18:24:01] INFO auto_weight.py:106: Using source weight configuration: dist/models/stablelm-2-zephyr-1_6b/model.safetensors.index.json. Use `--source` to override.
38
+ [2024-05-24 18:24:01] INFO auto_weight.py:110: Using source weight format: huggingface-safetensor. Use `--source-format` to override.
39
+ [2024-05-24 18:24:01] INFO auto_config.py:153: Found model type: stablelm. Use `--model-type` to override.
40
+ [2024-05-24 18:24:01] INFO stablelm_model.py:49: context_window_size not found in config.json. Falling back to max_position_embeddings (4096)
41
+ [2024-05-24 18:24:01] INFO stablelm_model.py:66: prefill_chunk_size defaults to 2048
42
+ Weight conversion with arguments:
43
+ --config dist/models/stablelm-2-zephyr-1_6b/config.json
44
+ --quantization NoQuantize(name='q0f32', kind='no-quant', model_dtype='float32')
45
+ --model-type stablelm
46
+ --device cuda:0
47
+ --source dist/models/stablelm-2-zephyr-1_6b/model.safetensors.index.json
48
+ --source-format huggingface-safetensor
49
+ --output /tmp/tmpjjd44ai4
50
+ Start storing to cache /tmp/tmpjjd44ai4
51
+
52
  0%| | 0/220 [00:00<?, ?it/s]
53
 
54
+
55
  0%| | 0/220 [00:00<?, ?it/s]
56
 
57
+
58
  0%| | 0/220 [00:01<?, ?it/s]/home/tlopex/.local/lib/python3.8/site-packages/numpy/core/getlimits.py:518: UserWarning: The value of the smallest subnormal for <class 'numpy.float32'> type is zero.
59
+ setattr(self, word, getattr(machar, word).flat[0])
60
+ /home/tlopex/.local/lib/python3.8/site-packages/numpy/core/getlimits.py:89: UserWarning: The value of the smallest subnormal for <class 'numpy.float32'> type is zero.
61
+ return self._float_to_str(self.smallest_subnormal)
62
+
63
  0%| | 1/220 [00:04<17:42, 4.85s/it]
64
 
65
+
66
  0%| | 1/220 [00:05<17:42, 4.85s/it]
67
  1%| | 2/220 [00:08<15:30, 4.27s/it]
68
 
69
+
70
  1%| | 2/220 [00:08<15:30, 4.27s/it]
71
 
72
+
73
  1%| | 2/220 [00:08<15:30, 4.27s/it]
74
 
75
+
76
  1%| | 2/220 [00:08<15:30, 4.27s/it]
77
  2%|▏ | 5/220 [00:08<04:31, 1.26s/it]
78
 
79
+
80
  2%|▏ | 5/220 [00:09<04:31, 1.26s/it]
81
  3%|▎ | 6/220 [00:09<03:45, 1.05s/it]
82
 
83
+
84
  3%|▎ | 6/220 [00:09<03:45, 1.05s/it]
85
 
86
+
87
  3%|▎ | 6/220 [00:09<03:45, 1.05s/it]
88
 
89
+
90
  3%|▎ | 6/220 [00:09<03:45, 1.05s/it]
91
 
92
+
93
  3%|▎ | 6/220 [00:09<03:45, 1.05s/it]
94
  5%|▍ | 10/220 [00:09<01:37, 2.15it/s]
95
 
96
+
97
  5%|▍ | 10/220 [00:09<01:37, 2.15it/s]
98
 
99
+
100
  5%|▍ | 10/220 [00:09<01:37, 2.15it/s]
101
 
102
+
103
  5%|▍ | 10/220 [00:09<01:37, 2.15it/s]
104
 
105
+
106
  5%|▍ | 10/220 [00:09<01:37, 2.15it/s]
107
  6%|▋ | 14/220 [00:09<00:58, 3.54it/s]
108
 
109
+
110
  6%|▋ | 14/220 [00:09<00:58, 3.54it/s]
111
 
112
+
113
  6%|▋ | 14/220 [00:10<00:58, 3.54it/s]
114
  7%|▋ | 16/220 [00:10<00:54, 3.74it/s]
115
 
116
+
117
  7%|▋ | 16/220 [00:10<00:54, 3.74it/s]
118
 
119
+
120
  7%|▋ | 16/220 [00:10<00:54, 3.74it/s]
121
 
122
+
123
  7%|▋ | 16/220 [00:10<00:54, 3.74it/s]
124
  9%|▊ | 19/220 [00:10<00:40, 4.95it/s]
125
 
126
+
127
  9%|▊ | 19/220 [00:10<00:40, 4.95it/s]
128
 
129
+
130
  9%|▊ | 19/220 [00:10<00:40, 4.95it/s]
131
  10%|▉ | 21/220 [00:10<00:34, 5.85it/s]
132
 
133
+
134
  10%|▉ | 21/220 [00:10<00:34, 5.85it/s]
135
 
136
+
137
  10%|▉ | 21/220 [00:10<00:34, 5.85it/s]
138
  10%|█ | 23/220 [00:10<00:29, 6.72it/s]
139
 
140
+
141
  10%|█ | 23/220 [00:10<00:29, 6.72it/s]
142
 
143
+
144
  10%|█ | 23/220 [00:11<00:29, 6.72it/s]
145
  11%|█▏ | 25/220 [00:11<00:32, 5.95it/s]
146
 
147
+
148
  11%|█▏ | 25/220 [00:11<00:32, 5.95it/s]
149
 
150
+
151
  11%|█▏ | 25/220 [00:11<00:32, 5.95it/s]
152
 
153
+
154
  11%|█▏ | 25/220 [00:11<00:32, 5.95it/s]
155
  13%|█▎ | 28/220 [00:11<00:25, 7.41it/s]
156
 
157
+
158
  13%|█▎ | 28/220 [00:11<00:25, 7.41it/s]
159
 
160
+
161
  13%|█▎ | 28/220 [00:11<00:25, 7.41it/s]
162
  14%|█▎ | 30/220 [00:11<00:22, 8.63it/s]
163
 
164
+
165
  14%|█▎ | 30/220 [00:11<00:22, 8.63it/s]
166
 
167
+
168
  14%|█▎ | 30/220 [00:11<00:22, 8.63it/s]
169
  15%|█▍ | 32/220 [00:11<00:20, 9.32it/s]
170
 
171
+
172
  15%|█▍ | 32/220 [00:11<00:20, 9.32it/s]
173
 
174
+
175
  15%|█▍ | 32/220 [00:12<00:20, 9.32it/s]
176
  15%|█▌ | 34/220 [00:12<00:25, 7.22it/s]
177
 
178
+
179
  15%|█▌ | 34/220 [00:12<00:25, 7.22it/s]
180
 
181
+
182
  15%|█▌ | 34/220 [00:12<00:25, 7.22it/s]
183
 
184
+
185
  15%|█▌ | 34/220 [00:12<00:25, 7.22it/s]
186
  17%|█▋ | 37/220 [00:12<00:21, 8.63it/s]
187
 
188
+
189
  17%|█▋ | 37/220 [00:12<00:21, 8.63it/s]
190
 
191
+
192
  17%|█▋ | 37/220 [00:12<00:21, 8.63it/s]
193
  18%|█▊ | 39/220 [00:12<00:18, 9.88it/s]
194
 
195
+
196
  18%|█▊ | 39/220 [00:12<00:18, 9.88it/s]
197
 
198
+
199
  18%|█▊ | 39/220 [00:12<00:18, 9.88it/s]
200
  19%|█▊ | 41/220 [00:12<00:17, 10.35it/s]
201
 
202
+
203
  19%|█▊ | 41/220 [00:12<00:17, 10.35it/s]
204
 
205
+
206
  19%|█▊ | 41/220 [00:13<00:17, 10.35it/s]
207
  20%|█▉ | 43/220 [00:13<00:23, 7.63it/s]
208
 
209
+
210
  20%|█▉ | 43/220 [00:13<00:23, 7.63it/s]
211
 
212
+
213
  20%|█▉ | 43/220 [00:13<00:23, 7.63it/s]
214
 
215
+
216
  20%|█▉ | 43/220 [00:13<00:23, 7.63it/s]
217
  21%|██ | 46/220 [00:13<00:19, 9.01it/s]
218
 
219
+
220
  21%|██ | 46/220 [00:13<00:19, 9.01it/s]
221
 
222
+
223
  21%|██ | 46/220 [00:13<00:19, 9.01it/s]
224
  22%|██▏ | 48/220 [00:13<00:16, 10.25it/s]
225
 
226
+
227
  22%|██▏ | 48/220 [00:13<00:16, 10.25it/s]
228
 
229
+
230
  22%|██▏ | 48/220 [00:13<00:16, 10.25it/s]
231
  23%|██▎ | 50/220 [00:13<00:15, 10.64it/s]
232
 
233
+
234
  23%|██▎ | 50/220 [00:13<00:15, 10.64it/s]
235
 
236
+
237
  23%|██▎ | 50/220 [00:14<00:15, 10.64it/s]
238
  24%|██▎ | 52/220 [00:14<00:21, 7.74it/s]
239
 
240
+
241
  24%|██▎ | 52/220 [00:14<00:21, 7.74it/s]
242
 
243
+
244
  24%|██▎ | 52/220 [00:14<00:21, 7.74it/s]
245
 
246
+
247
  24%|██▎ | 52/220 [00:14<00:21, 7.74it/s]
248
  25%|██▌ | 55/220 [00:14<00:18, 9.10it/s]
249
 
250
+
251
  25%|██▌ | 55/220 [00:14<00:18, 9.10it/s]
252
 
253
+
254
  25%|██▌ | 55/220 [00:14<00:18, 9.10it/s]
255
  26%|██▌ | 57/220 [00:14<00:15, 10.34it/s]
256
 
257
+
258
  26%|██▌ | 57/220 [00:14<00:15, 10.34it/s]
259
 
260
+
261
  26%|██▌ | 57/220 [00:14<00:15, 10.34it/s]
262
  27%|██▋ | 59/220 [00:14<00:15, 10.71it/s]
263
 
264
+
265
  27%|██▋ | 59/220 [00:14<00:15, 10.71it/s]
266
 
267
+
268
  27%|██▋ | 59/220 [00:15<00:15, 10.71it/s]
269
  28%|██▊ | 61/220 [00:15<00:20, 7.76it/s]
270
 
271
+
272
  28%|██▊ | 61/220 [00:15<00:20, 7.76it/s]
273
 
274
+
275
  28%|██▊ | 61/220 [00:15<00:20, 7.76it/s]
276
 
277
+
278
  28%|██▊ | 61/220 [00:15<00:20, 7.76it/s]
279
  29%|██▉ | 64/220 [00:15<00:17, 9.12it/s]
280
 
281
+
282
  29%|██▉ | 64/220 [00:15<00:17, 9.12it/s]
283
 
284
+
285
  29%|██▉ | 64/220 [00:15<00:17, 9.12it/s]
286
  30%|███ | 66/220 [00:15<00:14, 10.35it/s]
287
 
288
+
289
  30%|███ | 66/220 [00:15<00:14, 10.35it/s]
290
 
291
+
292
  30%|███ | 66/220 [00:15<00:14, 10.35it/s]
293
  31%|███ | 68/220 [00:15<00:14, 10.74it/s]
294
 
295
+
296
  31%|███ | 68/220 [00:15<00:14, 10.74it/s]
297
 
298
+
299
  31%|███ | 68/220 [00:16<00:14, 10.74it/s]
300
  32%|███▏ | 70/220 [00:16<00:19, 7.77it/s]
301
 
302
+
303
  32%|███▏ | 70/220 [00:16<00:19, 7.77it/s]
304
 
305
+
306
  32%|███▏ | 70/220 [00:16<00:19, 7.77it/s]
307
 
308
+
309
  32%|███▏ | 70/220 [00:16<00:19, 7.77it/s]
310
  33%|███▎ | 73/220 [00:16<00:16, 9.14it/s]
311
 
312
+
313
  33%|███▎ | 73/220 [00:16<00:16, 9.14it/s]
314
 
315
+
316
  33%|███▎ | 73/220 [00:16<00:16, 9.14it/s]
317
  34%|███▍ | 75/220 [00:16<00:13, 10.37it/s]
318
 
319
+
320
  34%|███▍ | 75/220 [00:16<00:13, 10.37it/s]
321
 
322
+
323
  34%|███▍ | 75/220 [00:16<00:13, 10.37it/s]
324
  35%|███▌ | 77/220 [00:16<00:13, 10.74it/s]
325
 
326
+
327
  35%|███▌ | 77/220 [00:16<00:13, 10.74it/s]
328
 
329
+
330
  35%|███▌ | 77/220 [00:17<00:13, 10.74it/s]
331
  36%|███▌ | 79/220 [00:17<00:18, 7.77it/s]
332
 
333
+
334
  36%|███▌ | 79/220 [00:17<00:18, 7.77it/s]
335
 
336
+
337
  36%|███▌ | 79/220 [00:17<00:18, 7.77it/s]
338
 
339
+
340
  36%|███▌ | 79/220 [00:17<00:18, 7.77it/s]
341
  37%|███▋ | 82/220 [00:17<00:15, 9.14it/s]
342
 
343
+
344
  37%|███▋ | 82/220 [00:17<00:15, 9.14it/s]
345
 
346
+
347
  37%|█���█▋ | 82/220 [00:17<00:15, 9.14it/s]
348
  38%|███▊ | 84/220 [00:17<00:13, 10.36it/s]
349
 
350
+
351
  38%|███▊ | 84/220 [00:17<00:13, 10.36it/s]
352
 
353
+
354
  38%|███▊ | 84/220 [00:17<00:13, 10.36it/s]
355
  39%|███▉ | 86/220 [00:17<00:12, 10.74it/s]
356
 
357
+
358
  39%|███▉ | 86/220 [00:17<00:12, 10.74it/s]
359
 
360
+
361
  39%|███▉ | 86/220 [00:18<00:12, 10.74it/s]
362
  40%|████ | 88/220 [00:18<00:17, 7.76it/s]
363
 
364
+
365
  40%|████ | 88/220 [00:18<00:17, 7.76it/s]
366
 
367
+
368
  40%|████ | 88/220 [00:18<00:17, 7.76it/s]
369
 
370
+
371
  40%|████ | 88/220 [00:18<00:17, 7.76it/s]
372
  41%|████▏ | 91/220 [00:18<00:14, 9.13it/s]
373
 
374
+
375
  41%|████▏ | 91/220 [00:18<00:14, 9.13it/s]
376
 
377
+
378
  41%|████▏ | 91/220 [00:18<00:14, 9.13it/s]
379
  42%|████▏ | 93/220 [00:18<00:12, 10.37it/s]
380
 
381
+
382
  42%|████▏ | 93/220 [00:18<00:12, 10.37it/s]
383
 
384
+
385
  42%|████▏ | 93/220 [00:18<00:12, 10.37it/s]
386
  43%|████▎ | 95/220 [00:18<00:11, 10.74it/s]
387
 
388
+
389
  43%|████▎ | 95/220 [00:18<00:11, 10.74it/s]
390
 
391
+
392
  43%|████▎ | 95/220 [00:19<00:11, 10.74it/s]
393
  44%|████▍ | 97/220 [00:19<00:15, 7.77it/s]
394
 
395
+
396
  44%|████▍ | 97/220 [00:19<00:15, 7.77it/s]
397
 
398
+
399
  44%|████▍ | 97/220 [00:19<00:15, 7.77it/s]
400
 
401
+
402
  44%|████▍ | 97/220 [00:19<00:15, 7.77it/s]
403
  45%|████▌ | 100/220 [00:19<00:13, 9.13it/s]
404
 
405
+
406
  45%|████▌ | 100/220 [00:19<00:13, 9.13it/s]
407
 
408
+
409
  45%|████▌ | 100/220 [00:19<00:13, 9.13it/s]
410
  46%|████▋ | 102/220 [00:19<00:11, 10.37it/s]
411
 
412
+
413
  46%|████▋ | 102/220 [00:19<00:11, 10.37it/s]
414
 
415
+
416
  46%|████▋ | 102/220 [00:19<00:11, 10.37it/s]
417
  47%|████▋ | 104/220 [00:19<00:10, 10.74it/s]
418
 
419
+
420
  47%|████▋ | 104/220 [00:19<00:10, 10.74it/s]
421
 
422
+
423
  47%|████▋ | 104/220 [00:19<00:10, 10.74it/s]
424
  48%|████▊ | 106/220 [00:19<00:14, 7.78it/s]
425
 
426
+
427
  48%|████▊ | 106/220 [00:19<00:14, 7.78it/s]
428
 
429
+
430
  48%|████▊ | 106/220 [00:19<00:14, 7.78it/s]
431
 
432
+
433
  48%|████▊ | 106/220 [00:20<00:14, 7.78it/s]
434
  50%|████▉ | 109/220 [00:20<00:12, 9.14it/s]
435
 
436
+
437
  50%|████▉ | 109/220 [00:20<00:12, 9.14it/s]
438
 
439
+
440
  50%|████▉ | 109/220 [00:20<00:12, 9.14it/s]
441
  50%|█████ | 111/220 [00:20<00:10, 10.38it/s]
442
 
443
+
444
  50%|█████ | 111/220 [00:20<00:10, 10.38it/s]
445
 
446
+
447
  50%|█████ | 111/220 [00:20<00:10, 10.38it/s]
448
  51%|█████▏ | 113/220 [00:20<00:09, 10.76it/s]
449
 
450
+
451
  51%|█████▏ | 113/220 [00:20<00:09, 10.76it/s]
452
 
453
+
454
  51%|█████▏ | 113/220 [00:20<00:09, 10.76it/s]
455
  52%|█████▏ | 115/220 [00:20<00:13, 7.75it/s]
456
 
457
+
458
  52%|█████▏ | 115/220 [00:20<00:13, 7.75it/s]
459
 
460
+
461
  52%|█████▏ | 115/220 [00:20<00:13, 7.75it/s]
462
 
463
+
464
  52%|█████▏ | 115/220 [00:20<00:13, 7.75it/s]
465
  54%|█████▎ | 118/220 [00:21<00:11, 9.11it/s]
466
 
467
+
468
  54%|█████▎ | 118/220 [00:21<00:11, 9.11it/s]
469
 
470
+
471
  54%|█████▎ | 118/220 [00:21<00:11, 9.11it/s]
472
  55%|█████▍ | 120/220 [00:21<00:09, 10.34it/s]
473
 
474
+
475
  55%|█████▍ | 120/220 [00:21<00:09, 10.34it/s]
476
 
477
+
478
  55%|█████▍ | 120/220 [00:21<00:09, 10.34it/s]
479
  55%|█████▌ | 122/220 [00:21<00:09, 10.71it/s]
480
 
481
+
482
  55%|█████▌ | 122/220 [00:21<00:09, 10.71it/s]
483
 
484
+
485
  55%|█████▌ | 122/220 [00:21<00:09, 10.71it/s]
486
  56%|█████▋ | 124/220 [00:21<00:12, 7.76it/s]
487
 
488
+
489
  56%|█████▋ | 124/220 [00:21<00:12, 7.76it/s]
490
 
491
+
492
  56%|█████▋ | 124/220 [00:21<00:12, 7.76it/s]
493
 
494
+
495
  56%|█████▋ | 124/220 [00:21<00:12, 7.76it/s]
496
  58%|█████▊ | 127/220 [00:22<00:10, 9.13it/s]
497
 
498
+
499
  58%|█████▊ | 127/220 [00:22<00:10, 9.13it/s]
500
 
501
+
502
  58%|█████▊ | 127/220 [00:22<00:10, 9.13it/s]
503
  59%|█████▊ | 129/220 [00:22<00:08, 10.36it/s]
504
 
505
+
506
  59%|█████▊ | 129/220 [00:22<00:08, 10.36it/s]
507
 
508
+
509
  59%|█████▊ | 129/220 [00:22<00:08, 10.36it/s]
510
  60%|█████▉ | 131/220 [00:22<00:08, 10.73it/s]
511
 
512
+
513
  60%|█████▉ | 131/220 [00:22<00:08, 10.73it/s]
514
 
515
+
516
  60%|█████▉ | 131/220 [00:22<00:08, 10.73it/s]
517
  60%|██████ | 133/220 [00:22<00:11, 7.77it/s]
518
 
519
+
520
  60%|██████ | 133/220 [00:22<00:11, 7.77it/s]
521
 
522
+
523
  60%|██████ | 133/220 [00:22<00:11, 7.77it/s]
524
 
525
+
526
  60%|██████ | 133/220 [00:22<00:11, 7.77it/s]
527
  62%|██████▏ | 136/220 [00:23<00:09, 9.14it/s]
528
 
529
+
530
  62%|██████▏ | 136/220 [00:23<00:09, 9.14it/s]
531
 
532
+
533
  62%|██████▏ | 136/220 [00:23<00:09, 9.14it/s]
534
  63%|██████▎ | 138/220 [00:23<00:07, 10.37it/s]
535
 
536
+
537
  63%|██████▎ | 138/220 [00:23<00:07, 10.37it/s]
538
 
539
+
540
  63%|██████▎ | 138/220 [00:23<00:07, 10.37it/s]
541
  64%|██████▎ | 140/220 [00:23<00:07, 10.74it/s]
542
 
543
+
544
  64%|██████▎ | 140/220 [00:23<00:07, 10.74it/s]
545
 
546
+
547
  64%|██████▎ | 140/220 [00:23<00:07, 10.74it/s]
548
  65%|██████▍ | 142/220 [00:23<00:10, 7.78it/s]
549
 
550
+
551
  65%|██████▍ | 142/220 [00:23<00:10, 7.78it/s]
552
 
553
+
554
  65%|██████▍ | 142/220 [00:23<00:10, 7.78it/s]
555
 
556
+
557
  65%|██████▍ | 142/220 [00:23<00:10, 7.78it/s]
558
  66%|██████▌ | 145/220 [00:24<00:08, 9.15it/s]
559
 
560
+
561
  66%|██████▌ | 145/220 [00:24<00:08, 9.15it/s]
562
 
563
+
564
  66%|██████▌ | 145/220 [00:24<00:08, 9.15it/s]
565
  67%|██████▋ | 147/220 [00:24<00:07, 10.38it/s]
566
 
567
+
568
  67%|██████▋ | 147/220 [00:24<00:07, 10.38it/s]
569
 
570
+
571
  67%|██████▋ | 147/220 [00:24<00:07, 10.38it/s]
572
  68%|██████▊ | 149/220 [00:24<00:06, 10.75it/s]
573
 
574
+
575
  68%|██████▊ | 149/220 [00:24<00:06, 10.75it/s]
576
 
577
+
578
  68%|██████▊ | 149/220 [00:24<00:06, 10.75it/s]
579
  69%|██████▊ | 151/220 [00:24<00:08, 7.78it/s]
580
 
581
+
582
  69%|██████▊ | 151/220 [00:24<00:08, 7.78it/s]
583
 
584
+
585
  69%|██████▊ | 151/220 [00:24<00:08, 7.78it/s]
586
 
587
+
588
  69%|██████▊ | 151/220 [00:24<00:08, 7.78it/s]
589
  70%|███████ | 154/220 [00:25<00:07, 9.12it/s]
590
 
591
+
592
  70%|███████ | 154/220 [00:25<00:07, 9.12it/s]
593
 
594
+
595
  70%|███████ | 154/220 [00:25<00:07, 9.12it/s]
596
  71%|███████ | 156/220 [00:25<00:06, 10.37it/s]
597
 
598
+
599
  71%|███████ | 156/220 [00:25<00:06, 10.37it/s]
600
 
601
+
602
  71%|███████ | 156/220 [00:25<00:06, 10.37it/s]
603
  72%|███████▏ | 158/220 [00:25<00:05, 10.75it/s]
604
 
605
+
606
  72%|███████▏ | 158/220 [00:25<00:05, 10.75it/s]
607
 
608
+
609
  72%|███████▏ | 158/220 [00:25<00:05, 10.75it/s]
610
  73%|███████▎ | 160/220 [00:25<00:07, 7.77it/s]
611
 
612
+
613
  73%|███████▎ | 160/220 [00:25<00:07, 7.77it/s]
614
 
615
+
616
  73%|███████▎ | 160/220 [00:25<00:07, 7.77it/s]
617
 
618
+
619
  73%|███████▎ | 160/220 [00:25<00:07, 7.77it/s]
620
  74%|███████▍ | 163/220 [00:25<00:06, 9.13it/s]
621
 
622
+
623
  74%|███████▍ | 163/220 [00:25<00:06, 9.13it/s]
624
 
625
+
626
  74%|███████▍ | 163/220 [00:26<00:06, 9.13it/s]
627
  75%|███████▌ | 165/220 [00:26<00:05, 10.37it/s]
628
 
629
+
630
  75%|███████▌ | 165/220 [00:26<00:05, 10.37it/s]
631
 
632
+
633
  75%|███████▌ | 165/220 [00:26<00:05, 10.37it/s]
634
  76%|███████▌ | 167/220 [00:26<00:04, 10.75it/s]
635
 
636
+
637
  76%|███████▌ | 167/220 [00:26<00:04, 10.75it/s]
638
 
639
+
640
  76%|███████▌ | 167/220 [00:26<00:04, 10.75it/s]
641
  77%|███████▋ | 169/220 [00:26<00:06, 7.77it/s]
642
 
643
+
644
  77%|███████▋ | 169/220 [00:26<00:06, 7.77it/s]
645
 
646
+
647
  77%|███████▋ | 169/220 [00:26<00:06, 7.77it/s]
648
 
649
+
650
  77%|███████▋ | 169/220 [00:26<00:06, 7.77it/s]
651
  78%|███████▊ | 172/220 [00:26<00:05, 9.13it/s]
652
 
653
+
654
  78%|███████▊ | 172/220 [00:26<00:05, 9.13it/s]
655
 
656
+
657
  78%|███████▊ | 172/220 [00:27<00:05, 9.13it/s]
658
  79%|███████▉ | 174/220 [00:27<00:04, 10.36it/s]
659
 
660
+
661
  79%|███████▉ | 174/220 [00:27<00:04, 10.36it/s]
662
 
663
+
664
  79%|███████▉ | 174/220 [00:27<00:04, 10.36it/s]
665
  80%|████████ | 176/220 [00:27<00:04, 10.74it/s]
666
 
667
+
668
  80%|████████ | 176/220 [00:27<00:04, 10.74it/s]
669
 
670
+
671
  80%|████████ | 176/220 [00:27<00:04, 10.74it/s]
672
  81%|████████ | 178/220 [00:27<00:05, 7.77it/s]
673
 
674
+
675
  81%|████████ | 178/220 [00:27<00:05, 7.77it/s]
676
 
677
+
678
  81%|████████ | 178/220 [00:27<00:05, 7.77it/s]
679
 
680
+
681
  81%|████████ | 178/220 [00:27<00:05, 7.77it/s]
682
  82%|████████▏ | 181/220 [00:27<00:04, 9.13it/s]
683
 
684
+
685
  82%|████████▏ | 181/220 [00:27<00:04, 9.13it/s]
686
 
687
+
688
  82%|████████▏ | 181/220 [00:28<00:04, 9.13it/s]
689
  83%|████████▎ | 183/220 [00:28<00:03, 10.37it/s]
690
 
691
+
692
  83%|████████▎ | 183/220 [00:28<00:03, 10.37it/s]
693
 
694
+
695
  83%|████████▎ | 183/220 [00:28<00:03, 10.37it/s]
696
  84%|████████▍ | 185/220 [00:28<00:03, 10.73it/s]
697
 
698
+
699
  84%|████████▍ | 185/220 [00:28<00:03, 10.73it/s]
700
 
701
+
702
  84%|████████▍ | 185/220 [00:28<00:03, 10.73it/s]
703
  85%|████████▌ | 187/220 [00:28<00:04, 7.76it/s]
704
 
705
+
706
  85%|████████▌ | 187/220 [00:28<00:04, 7.76it/s]
707
 
708
+
709
  85%|████████▌ | 187/220 [00:28<00:04, 7.76it/s]
710
 
711
+
712
  85%|████████▌ | 187/220 [00:28<00:04, 7.76it/s]
713
  86%|████████▋ | 190/220 [00:28<00:03, 9.13it/s]
714
 
715
+
716
  86%|████████▋ | 190/220 [00:28<00:03, 9.13it/s]
717
 
718
+
719
  86%|████████▋ | 190/220 [00:28<00:03, 9.13it/s]
720
  87%|████████▋ | 192/220 [00:28<00:02, 10.37it/s]
721
 
722
+
723
  87%|████████▋ | 192/220 [00:28<00:02, 10.37it/s]
724
 
725
+
726
  87%|████████▋ | 192/220 [00:29<00:02, 10.37it/s]
727
  88%|████████▊ | 194/220 [00:29<00:02, 10.73it/s]
728
 
729
+
730
  88%|████████▊ | 194/220 [00:29<00:02, 10.73it/s]
731
 
732
+
733
  88%|████████▊ | 194/220 [00:29<00:02, 10.73it/s]
734
  89%|████████▉ | 196/220 [00:29<00:03, 7.77it/s]
735
 
736
+
737
  89%|████████▉ | 196/220 [00:29<00:03, 7.77it/s]
738
 
739
+
740
  89%|████████▉ | 196/220 [00:29<00:03, 7.77it/s]
741
 
742
+
743
  89%|████████▉ | 196/220 [00:29<00:03, 7.77it/s]
744
  90%|█████████ | 199/220 [00:29<00:02, 9.14it/s]
745
 
746
+
747
  90%|█████████ | 199/220 [00:29<00:02, 9.14it/s]
748
 
749
+
750
  90%|█████████ | 199/220 [00:29<00:02, 9.14it/s]
751
  91%|█████████▏| 201/220 [00:29<00:01, 10.38it/s]
752
 
753
+
754
  91%|█████████▏| 201/220 [00:29<00:01, 10.38it/s]
755
 
756
+
757
  91%|█████████▏| 201/220 [00:29<00:01, 10.38it/s]
758
  92%|█████████▏| 203/220 [00:30<00:01, 10.74it/s]
759
 
760
+
761
  92%|█████████▏| 203/220 [00:30<00:01, 10.74it/s]
762
 
763
+
764
  92%|█████████▏| 203/220 [00:30<00:01, 10.74it/s]
765
  93%|█████████▎| 205/220 [00:30<00:01, 7.78it/s]
766
 
767
+
768
  93%|█████████▎| 205/220 [00:30<00:01, 7.78it/s]
769
 
770
+
771
  93%|█████████▎| 205/220 [00:30<00:01, 7.78it/s]
772
 
773
+
774
  93%|█████████▎| 205/220 [00:30<00:01, 7.78it/s]
775
  95%|█████████▍| 208/220 [00:30<00:01, 9.14it/s]
776
 
777
+
778
  95%|█████████▍| 208/220 [00:30<00:01, 9.14it/s]
779
 
780
+
781
  95%|█████████▍| 208/220 [00:30<00:01, 9.14it/s]
782
  95%|█████████▌| 210/220 [00:30<00:00, 10.37it/s]
783
 
784
+
785
  95%|█████████▌| 210/220 [00:30<00:00, 10.37it/s]
786
 
787
+
788
  95%|█████████▌| 210/220 [00:30<00:00, 10.37it/s]
789
  96%|█████████▋| 212/220 [00:31<00:00, 10.17it/s]
790
 
791
+
792
  96%|█████████▋| 212/220 [00:31<00:00, 10.17it/s]
793
 
794
+
795
  96%|█████████▋| 212/220 [00:31<00:00, 10.17it/s]
796
  97%|█████████▋| 214/220 [00:31<00:00, 7.56it/s]
797
 
798
+
799
  97%|█████████▋| 214/220 [00:31<00:00, 7.56it/s]
800
 
801
+
802
  97%|█████████▋| 214/220 [00:31<00:00, 7.56it/s]
803
 
804
+
805
  97%|█████████▋| 214/220 [00:31<00:00, 7.56it/s]
806
  99%|█████████▊| 217/220 [00:31<00:00, 8.96it/s]
807
 
808
+
809
  99%|█████████▊| 217/220 [00:31<00:00, 8.96it/s]
810
 
811
+
812
  99%|█████████▊| 217/220 [00:31<00:00, 8.96it/s]
813
 
814
+
815
+ [2024-05-24 18:24:35] INFO huggingface_loader.py:196: Unloading HF weight file: dist/models/stablelm-2-zephyr-1_6b/model.safetensors
816
+ [2024-05-24 18:24:35] INFO stats.py:76: Time usage: HF loading: 1.017 sec; Pre-quantization mapping: 5.571 sec; Quantization: 0.000 sec
817
+ [2024-05-24 18:24:35] INFO stats.py:90: RAM usage: Peak RAM: 3.063 GB. Total bytes loaded from disk: 3.063 GB
818
+ [2024-05-24 18:24:35] INFO convert_weight.py:155: Parameter size after quantization: 6.126 GB
819
+ [2024-05-24 18:24:35] INFO convert_weight.py:160: Total parameters: 1,644,515,328
820
+ [2024-05-24 18:24:35] INFO convert_weight.py:161: Bits per parameter: 32.000
821
+ [2024-05-24 18:24:35] INFO convert_weight.py:166: Saved to directory: /tmp/tmpjjd44ai4
822
+
823
+ All finished, 75 total shards committed, record saved to /tmp/tmpjjd44ai4/ndarray-cache.json
824
+ Also saved a bf16 record to /tmp/tmpjjd44ai4/ndarray-cache-b16.json
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
mlc-chat-config.json ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "stablelm",
3
+ "quantization": "q0f32",
4
+ "model_config": {
5
+ "vocab_size": 100352,
6
+ "hidden_size": 2048,
7
+ "num_hidden_layers": 24,
8
+ "num_attention_heads": 32,
9
+ "num_key_value_heads": 32,
10
+ "layer_norm_eps": 1e-05,
11
+ "partial_rotary_factor": 0.25,
12
+ "rope_theta": 10000,
13
+ "intermediate_size": 5632,
14
+ "use_qkv_bias": true,
15
+ "head_dim": 64,
16
+ "context_window_size": 4096,
17
+ "prefill_chunk_size": 2048,
18
+ "tensor_parallel_shards": 1,
19
+ "max_batch_size": 80
20
+ },
21
+ "vocab_size": 100352,
22
+ "context_window_size": 4096,
23
+ "sliding_window_size": -1,
24
+ "prefill_chunk_size": 2048,
25
+ "attention_sink_size": -1,
26
+ "tensor_parallel_shards": 1,
27
+ "mean_gen_len": 128,
28
+ "max_gen_len": 512,
29
+ "shift_fill_factor": 0.3,
30
+ "temperature": 0.7,
31
+ "presence_penalty": 0.0,
32
+ "frequency_penalty": 0.0,
33
+ "repetition_penalty": 1.0,
34
+ "top_p": 0.95,
35
+ "conv_template": {
36
+ "name": "stablelm-2",
37
+ "system_template": "{system_message}",
38
+ "system_message": "",
39
+ "add_role_after_system_message": true,
40
+ "roles": {
41
+ "user": "<|user|>",
42
+ "assistant": "<|assistant|>"
43
+ },
44
+ "role_templates": {
45
+ "user": "{user_message}",
46
+ "assistant": "{assistant_message}",
47
+ "tool": "{tool_message}"
48
+ },
49
+ "messages": [],
50
+ "seps": [
51
+ "<|endoftext|>",
52
+ "<|endoftext|>"
53
+ ],
54
+ "role_content_sep": "\n",
55
+ "role_empty_sep": "\n",
56
+ "stop_str": [
57
+ "<|endoftext|>"
58
+ ],
59
+ "stop_token_ids": [
60
+ 100257
61
+ ],
62
+ "function_string": "",
63
+ "use_function_calling": false
64
+ },
65
+ "pad_token_id": 0,
66
+ "bos_token_id": 100257,
67
+ "eos_token_id": 100257,
68
+ "tokenizer_files": [
69
+ "tokenizer.json",
70
+ "vocab.json",
71
+ "merges.txt",
72
+ "tokenizer_config.json"
73
+ ],
74
+ "token_table_postproc_method": "byte_level",
75
+ "version": "0.1.0"
76
+ }
ndarray-cache-b16.json ADDED
@@ -0,0 +1,2907 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "ParamSize": 220,
4
+ "ParamBytes": 6578061312.0,
5
+ "BitsPerParam": 32.0
6
+ },
7
+ "records": [
8
+ {
9
+ "dataPath": "params_shard_0.bin",
10
+ "format": "raw-shard",
11
+ "nbytes": 411041792,
12
+ "records": [
13
+ {
14
+ "name": "lm_head.weight",
15
+ "shape": [
16
+ 100352,
17
+ 2048
18
+ ],
19
+ "dtype": "bfloat16",
20
+ "format": "raw",
21
+ "nbytes": 411041792,
22
+ "byteOffset": 0
23
+ }
24
+ ],
25
+ "md5sum": "8feaa03c43296af6d56d6ae34d79ff49"
26
+ },
27
+ {
28
+ "dataPath": "params_shard_1.bin",
29
+ "format": "raw-shard",
30
+ "nbytes": 411041792,
31
+ "records": [
32
+ {
33
+ "name": "model.embed_tokens.weight",
34
+ "shape": [
35
+ 100352,
36
+ 2048
37
+ ],
38
+ "dtype": "bfloat16",
39
+ "format": "raw",
40
+ "nbytes": 411041792,
41
+ "byteOffset": 0
42
+ }
43
+ ],
44
+ "md5sum": "6e70bbcc4f9a5ebec925c6ce74a85d6e"
45
+ },
46
+ {
47
+ "dataPath": "params_shard_2.bin",
48
+ "format": "raw-shard",
49
+ "nbytes": 46137344,
50
+ "records": [
51
+ {
52
+ "name": "model.layers.0.mlp.gate_up_proj.weight",
53
+ "shape": [
54
+ 11264,
55
+ 2048
56
+ ],
57
+ "dtype": "bfloat16",
58
+ "format": "raw",
59
+ "nbytes": 46137344,
60
+ "byteOffset": 0
61
+ }
62
+ ],
63
+ "md5sum": "1ee2c21c4c42bbfd1a96723eac48ed6f"
64
+ },
65
+ {
66
+ "dataPath": "params_shard_3.bin",
67
+ "format": "raw-shard",
68
+ "nbytes": 25165824,
69
+ "records": [
70
+ {
71
+ "name": "model.layers.0.self_attn.qkv_proj.weight",
72
+ "shape": [
73
+ 6144,
74
+ 2048
75
+ ],
76
+ "dtype": "bfloat16",
77
+ "format": "raw",
78
+ "nbytes": 25165824,
79
+ "byteOffset": 0
80
+ }
81
+ ],
82
+ "md5sum": "8650fd58ae045e16af2dc1f9fdbc7c60"
83
+ },
84
+ {
85
+ "dataPath": "params_shard_4.bin",
86
+ "format": "raw-shard",
87
+ "nbytes": 23068672,
88
+ "records": [
89
+ {
90
+ "name": "model.layers.1.mlp.down_proj.weight",
91
+ "shape": [
92
+ 2048,
93
+ 5632
94
+ ],
95
+ "dtype": "bfloat16",
96
+ "format": "raw",
97
+ "nbytes": 23068672,
98
+ "byteOffset": 0
99
+ }
100
+ ],
101
+ "md5sum": "10234dc2b68ee4e0828d1e7a8751562c"
102
+ },
103
+ {
104
+ "dataPath": "params_shard_5.bin",
105
+ "format": "raw-shard",
106
+ "nbytes": 46137344,
107
+ "records": [
108
+ {
109
+ "name": "model.layers.1.mlp.gate_up_proj.weight",
110
+ "shape": [
111
+ 11264,
112
+ 2048
113
+ ],
114
+ "dtype": "bfloat16",
115
+ "format": "raw",
116
+ "nbytes": 46137344,
117
+ "byteOffset": 0
118
+ }
119
+ ],
120
+ "md5sum": "fa1fc18500fca5973ad17d3129d76d1c"
121
+ },
122
+ {
123
+ "dataPath": "params_shard_6.bin",
124
+ "format": "raw-shard",
125
+ "nbytes": 25165824,
126
+ "records": [
127
+ {
128
+ "name": "model.layers.1.self_attn.qkv_proj.weight",
129
+ "shape": [
130
+ 6144,
131
+ 2048
132
+ ],
133
+ "dtype": "bfloat16",
134
+ "format": "raw",
135
+ "nbytes": 25165824,
136
+ "byteOffset": 0
137
+ }
138
+ ],
139
+ "md5sum": "b0aa899c673432f2ab23655f1779b287"
140
+ },
141
+ {
142
+ "dataPath": "params_shard_7.bin",
143
+ "format": "raw-shard",
144
+ "nbytes": 31514624,
145
+ "records": [
146
+ {
147
+ "name": "model.layers.0.input_layernorm.bias",
148
+ "shape": [
149
+ 2048
150
+ ],
151
+ "dtype": "bfloat16",
152
+ "format": "raw",
153
+ "nbytes": 4096,
154
+ "byteOffset": 0
155
+ },
156
+ {
157
+ "name": "model.layers.0.input_layernorm.weight",
158
+ "shape": [
159
+ 2048
160
+ ],
161
+ "dtype": "bfloat16",
162
+ "format": "raw",
163
+ "nbytes": 4096,
164
+ "byteOffset": 4096
165
+ },
166
+ {
167
+ "name": "model.layers.0.mlp.down_proj.weight",
168
+ "shape": [
169
+ 2048,
170
+ 5632
171
+ ],
172
+ "dtype": "bfloat16",
173
+ "format": "raw",
174
+ "nbytes": 23068672,
175
+ "byteOffset": 8192
176
+ },
177
+ {
178
+ "name": "model.layers.0.post_attention_layernorm.bias",
179
+ "shape": [
180
+ 2048
181
+ ],
182
+ "dtype": "bfloat16",
183
+ "format": "raw",
184
+ "nbytes": 4096,
185
+ "byteOffset": 23076864
186
+ },
187
+ {
188
+ "name": "model.layers.0.post_attention_layernorm.weight",
189
+ "shape": [
190
+ 2048
191
+ ],
192
+ "dtype": "bfloat16",
193
+ "format": "raw",
194
+ "nbytes": 4096,
195
+ "byteOffset": 23080960
196
+ },
197
+ {
198
+ "name": "model.layers.0.self_attn.qkv_proj.bias",
199
+ "shape": [
200
+ 6144
201
+ ],
202
+ "dtype": "bfloat16",
203
+ "format": "raw",
204
+ "nbytes": 12288,
205
+ "byteOffset": 23085056
206
+ },
207
+ {
208
+ "name": "model.layers.0.self_attn.o_proj.weight",
209
+ "shape": [
210
+ 2048,
211
+ 2048
212
+ ],
213
+ "dtype": "bfloat16",
214
+ "format": "raw",
215
+ "nbytes": 8388608,
216
+ "byteOffset": 23097344
217
+ },
218
+ {
219
+ "name": "model.layers.1.input_layernorm.bias",
220
+ "shape": [
221
+ 2048
222
+ ],
223
+ "dtype": "bfloat16",
224
+ "format": "raw",
225
+ "nbytes": 4096,
226
+ "byteOffset": 31485952
227
+ },
228
+ {
229
+ "name": "model.layers.1.input_layernorm.weight",
230
+ "shape": [
231
+ 2048
232
+ ],
233
+ "dtype": "bfloat16",
234
+ "format": "raw",
235
+ "nbytes": 4096,
236
+ "byteOffset": 31490048
237
+ },
238
+ {
239
+ "name": "model.layers.1.post_attention_layernorm.bias",
240
+ "shape": [
241
+ 2048
242
+ ],
243
+ "dtype": "bfloat16",
244
+ "format": "raw",
245
+ "nbytes": 4096,
246
+ "byteOffset": 31494144
247
+ },
248
+ {
249
+ "name": "model.layers.1.post_attention_layernorm.weight",
250
+ "shape": [
251
+ 2048
252
+ ],
253
+ "dtype": "bfloat16",
254
+ "format": "raw",
255
+ "nbytes": 4096,
256
+ "byteOffset": 31498240
257
+ },
258
+ {
259
+ "name": "model.layers.1.self_attn.qkv_proj.bias",
260
+ "shape": [
261
+ 6144
262
+ ],
263
+ "dtype": "bfloat16",
264
+ "format": "raw",
265
+ "nbytes": 12288,
266
+ "byteOffset": 31502336
267
+ }
268
+ ],
269
+ "md5sum": "a366140336e0fc5625f067a9715bbf3f"
270
+ },
271
+ {
272
+ "dataPath": "params_shard_8.bin",
273
+ "format": "raw-shard",
274
+ "nbytes": 46137344,
275
+ "records": [
276
+ {
277
+ "name": "model.layers.10.mlp.gate_up_proj.weight",
278
+ "shape": [
279
+ 11264,
280
+ 2048
281
+ ],
282
+ "dtype": "bfloat16",
283
+ "format": "raw",
284
+ "nbytes": 46137344,
285
+ "byteOffset": 0
286
+ }
287
+ ],
288
+ "md5sum": "76709d8055c55eff2616cd2ee3fbb222"
289
+ },
290
+ {
291
+ "dataPath": "params_shard_9.bin",
292
+ "format": "raw-shard",
293
+ "nbytes": 25165824,
294
+ "records": [
295
+ {
296
+ "name": "model.layers.10.self_attn.qkv_proj.weight",
297
+ "shape": [
298
+ 6144,
299
+ 2048
300
+ ],
301
+ "dtype": "bfloat16",
302
+ "format": "raw",
303
+ "nbytes": 25165824,
304
+ "byteOffset": 0
305
+ }
306
+ ],
307
+ "md5sum": "2157fce09a8f8aaaa3762fcb9646afcf"
308
+ },
309
+ {
310
+ "dataPath": "params_shard_10.bin",
311
+ "format": "raw-shard",
312
+ "nbytes": 31485952,
313
+ "records": [
314
+ {
315
+ "name": "model.layers.1.self_attn.o_proj.weight",
316
+ "shape": [
317
+ 2048,
318
+ 2048
319
+ ],
320
+ "dtype": "bfloat16",
321
+ "format": "raw",
322
+ "nbytes": 8388608,
323
+ "byteOffset": 0
324
+ },
325
+ {
326
+ "name": "model.layers.10.input_layernorm.bias",
327
+ "shape": [
328
+ 2048
329
+ ],
330
+ "dtype": "bfloat16",
331
+ "format": "raw",
332
+ "nbytes": 4096,
333
+ "byteOffset": 8388608
334
+ },
335
+ {
336
+ "name": "model.layers.10.input_layernorm.weight",
337
+ "shape": [
338
+ 2048
339
+ ],
340
+ "dtype": "bfloat16",
341
+ "format": "raw",
342
+ "nbytes": 4096,
343
+ "byteOffset": 8392704
344
+ },
345
+ {
346
+ "name": "model.layers.10.mlp.down_proj.weight",
347
+ "shape": [
348
+ 2048,
349
+ 5632
350
+ ],
351
+ "dtype": "bfloat16",
352
+ "format": "raw",
353
+ "nbytes": 23068672,
354
+ "byteOffset": 8396800
355
+ },
356
+ {
357
+ "name": "model.layers.10.post_attention_layernorm.bias",
358
+ "shape": [
359
+ 2048
360
+ ],
361
+ "dtype": "bfloat16",
362
+ "format": "raw",
363
+ "nbytes": 4096,
364
+ "byteOffset": 31465472
365
+ },
366
+ {
367
+ "name": "model.layers.10.post_attention_layernorm.weight",
368
+ "shape": [
369
+ 2048
370
+ ],
371
+ "dtype": "bfloat16",
372
+ "format": "raw",
373
+ "nbytes": 4096,
374
+ "byteOffset": 31469568
375
+ },
376
+ {
377
+ "name": "model.layers.10.self_attn.qkv_proj.bias",
378
+ "shape": [
379
+ 6144
380
+ ],
381
+ "dtype": "bfloat16",
382
+ "format": "raw",
383
+ "nbytes": 12288,
384
+ "byteOffset": 31473664
385
+ }
386
+ ],
387
+ "md5sum": "f5bb6f1e81f74526d8d921862d150950"
388
+ },
389
+ {
390
+ "dataPath": "params_shard_11.bin",
391
+ "format": "raw-shard",
392
+ "nbytes": 46137344,
393
+ "records": [
394
+ {
395
+ "name": "model.layers.11.mlp.gate_up_proj.weight",
396
+ "shape": [
397
+ 11264,
398
+ 2048
399
+ ],
400
+ "dtype": "bfloat16",
401
+ "format": "raw",
402
+ "nbytes": 46137344,
403
+ "byteOffset": 0
404
+ }
405
+ ],
406
+ "md5sum": "6366654f8345be2accfc828ff5be0a2f"
407
+ },
408
+ {
409
+ "dataPath": "params_shard_12.bin",
410
+ "format": "raw-shard",
411
+ "nbytes": 25165824,
412
+ "records": [
413
+ {
414
+ "name": "model.layers.11.self_attn.qkv_proj.weight",
415
+ "shape": [
416
+ 6144,
417
+ 2048
418
+ ],
419
+ "dtype": "bfloat16",
420
+ "format": "raw",
421
+ "nbytes": 25165824,
422
+ "byteOffset": 0
423
+ }
424
+ ],
425
+ "md5sum": "ddeaf590f32dbad9675ffb4f161aa468"
426
+ },
427
+ {
428
+ "dataPath": "params_shard_13.bin",
429
+ "format": "raw-shard",
430
+ "nbytes": 31485952,
431
+ "records": [
432
+ {
433
+ "name": "model.layers.10.self_attn.o_proj.weight",
434
+ "shape": [
435
+ 2048,
436
+ 2048
437
+ ],
438
+ "dtype": "bfloat16",
439
+ "format": "raw",
440
+ "nbytes": 8388608,
441
+ "byteOffset": 0
442
+ },
443
+ {
444
+ "name": "model.layers.11.input_layernorm.bias",
445
+ "shape": [
446
+ 2048
447
+ ],
448
+ "dtype": "bfloat16",
449
+ "format": "raw",
450
+ "nbytes": 4096,
451
+ "byteOffset": 8388608
452
+ },
453
+ {
454
+ "name": "model.layers.11.input_layernorm.weight",
455
+ "shape": [
456
+ 2048
457
+ ],
458
+ "dtype": "bfloat16",
459
+ "format": "raw",
460
+ "nbytes": 4096,
461
+ "byteOffset": 8392704
462
+ },
463
+ {
464
+ "name": "model.layers.11.mlp.down_proj.weight",
465
+ "shape": [
466
+ 2048,
467
+ 5632
468
+ ],
469
+ "dtype": "bfloat16",
470
+ "format": "raw",
471
+ "nbytes": 23068672,
472
+ "byteOffset": 8396800
473
+ },
474
+ {
475
+ "name": "model.layers.11.post_attention_layernorm.bias",
476
+ "shape": [
477
+ 2048
478
+ ],
479
+ "dtype": "bfloat16",
480
+ "format": "raw",
481
+ "nbytes": 4096,
482
+ "byteOffset": 31465472
483
+ },
484
+ {
485
+ "name": "model.layers.11.post_attention_layernorm.weight",
486
+ "shape": [
487
+ 2048
488
+ ],
489
+ "dtype": "bfloat16",
490
+ "format": "raw",
491
+ "nbytes": 4096,
492
+ "byteOffset": 31469568
493
+ },
494
+ {
495
+ "name": "model.layers.11.self_attn.qkv_proj.bias",
496
+ "shape": [
497
+ 6144
498
+ ],
499
+ "dtype": "bfloat16",
500
+ "format": "raw",
501
+ "nbytes": 12288,
502
+ "byteOffset": 31473664
503
+ }
504
+ ],
505
+ "md5sum": "861caedc6a61e8de4eb1c73344b8b352"
506
+ },
507
+ {
508
+ "dataPath": "params_shard_14.bin",
509
+ "format": "raw-shard",
510
+ "nbytes": 46137344,
511
+ "records": [
512
+ {
513
+ "name": "model.layers.12.mlp.gate_up_proj.weight",
514
+ "shape": [
515
+ 11264,
516
+ 2048
517
+ ],
518
+ "dtype": "bfloat16",
519
+ "format": "raw",
520
+ "nbytes": 46137344,
521
+ "byteOffset": 0
522
+ }
523
+ ],
524
+ "md5sum": "54f1342270651067c9698809b8130756"
525
+ },
526
+ {
527
+ "dataPath": "params_shard_15.bin",
528
+ "format": "raw-shard",
529
+ "nbytes": 25165824,
530
+ "records": [
531
+ {
532
+ "name": "model.layers.12.self_attn.qkv_proj.weight",
533
+ "shape": [
534
+ 6144,
535
+ 2048
536
+ ],
537
+ "dtype": "bfloat16",
538
+ "format": "raw",
539
+ "nbytes": 25165824,
540
+ "byteOffset": 0
541
+ }
542
+ ],
543
+ "md5sum": "8ba339fa062866f94263c2f7ca7f7a6a"
544
+ },
545
+ {
546
+ "dataPath": "params_shard_16.bin",
547
+ "format": "raw-shard",
548
+ "nbytes": 31485952,
549
+ "records": [
550
+ {
551
+ "name": "model.layers.11.self_attn.o_proj.weight",
552
+ "shape": [
553
+ 2048,
554
+ 2048
555
+ ],
556
+ "dtype": "bfloat16",
557
+ "format": "raw",
558
+ "nbytes": 8388608,
559
+ "byteOffset": 0
560
+ },
561
+ {
562
+ "name": "model.layers.12.input_layernorm.bias",
563
+ "shape": [
564
+ 2048
565
+ ],
566
+ "dtype": "bfloat16",
567
+ "format": "raw",
568
+ "nbytes": 4096,
569
+ "byteOffset": 8388608
570
+ },
571
+ {
572
+ "name": "model.layers.12.input_layernorm.weight",
573
+ "shape": [
574
+ 2048
575
+ ],
576
+ "dtype": "bfloat16",
577
+ "format": "raw",
578
+ "nbytes": 4096,
579
+ "byteOffset": 8392704
580
+ },
581
+ {
582
+ "name": "model.layers.12.mlp.down_proj.weight",
583
+ "shape": [
584
+ 2048,
585
+ 5632
586
+ ],
587
+ "dtype": "bfloat16",
588
+ "format": "raw",
589
+ "nbytes": 23068672,
590
+ "byteOffset": 8396800
591
+ },
592
+ {
593
+ "name": "model.layers.12.post_attention_layernorm.bias",
594
+ "shape": [
595
+ 2048
596
+ ],
597
+ "dtype": "bfloat16",
598
+ "format": "raw",
599
+ "nbytes": 4096,
600
+ "byteOffset": 31465472
601
+ },
602
+ {
603
+ "name": "model.layers.12.post_attention_layernorm.weight",
604
+ "shape": [
605
+ 2048
606
+ ],
607
+ "dtype": "bfloat16",
608
+ "format": "raw",
609
+ "nbytes": 4096,
610
+ "byteOffset": 31469568
611
+ },
612
+ {
613
+ "name": "model.layers.12.self_attn.qkv_proj.bias",
614
+ "shape": [
615
+ 6144
616
+ ],
617
+ "dtype": "bfloat16",
618
+ "format": "raw",
619
+ "nbytes": 12288,
620
+ "byteOffset": 31473664
621
+ }
622
+ ],
623
+ "md5sum": "79585b00793455bac8b7f41eb03a6531"
624
+ },
625
+ {
626
+ "dataPath": "params_shard_17.bin",
627
+ "format": "raw-shard",
628
+ "nbytes": 46137344,
629
+ "records": [
630
+ {
631
+ "name": "model.layers.13.mlp.gate_up_proj.weight",
632
+ "shape": [
633
+ 11264,
634
+ 2048
635
+ ],
636
+ "dtype": "bfloat16",
637
+ "format": "raw",
638
+ "nbytes": 46137344,
639
+ "byteOffset": 0
640
+ }
641
+ ],
642
+ "md5sum": "cb530c4b04e5523a06192b121beee591"
643
+ },
644
+ {
645
+ "dataPath": "params_shard_18.bin",
646
+ "format": "raw-shard",
647
+ "nbytes": 25165824,
648
+ "records": [
649
+ {
650
+ "name": "model.layers.13.self_attn.qkv_proj.weight",
651
+ "shape": [
652
+ 6144,
653
+ 2048
654
+ ],
655
+ "dtype": "bfloat16",
656
+ "format": "raw",
657
+ "nbytes": 25165824,
658
+ "byteOffset": 0
659
+ }
660
+ ],
661
+ "md5sum": "42561efa3873c8352ea1400b11c4eb87"
662
+ },
663
+ {
664
+ "dataPath": "params_shard_19.bin",
665
+ "format": "raw-shard",
666
+ "nbytes": 31485952,
667
+ "records": [
668
+ {
669
+ "name": "model.layers.12.self_attn.o_proj.weight",
670
+ "shape": [
671
+ 2048,
672
+ 2048
673
+ ],
674
+ "dtype": "bfloat16",
675
+ "format": "raw",
676
+ "nbytes": 8388608,
677
+ "byteOffset": 0
678
+ },
679
+ {
680
+ "name": "model.layers.13.input_layernorm.bias",
681
+ "shape": [
682
+ 2048
683
+ ],
684
+ "dtype": "bfloat16",
685
+ "format": "raw",
686
+ "nbytes": 4096,
687
+ "byteOffset": 8388608
688
+ },
689
+ {
690
+ "name": "model.layers.13.input_layernorm.weight",
691
+ "shape": [
692
+ 2048
693
+ ],
694
+ "dtype": "bfloat16",
695
+ "format": "raw",
696
+ "nbytes": 4096,
697
+ "byteOffset": 8392704
698
+ },
699
+ {
700
+ "name": "model.layers.13.mlp.down_proj.weight",
701
+ "shape": [
702
+ 2048,
703
+ 5632
704
+ ],
705
+ "dtype": "bfloat16",
706
+ "format": "raw",
707
+ "nbytes": 23068672,
708
+ "byteOffset": 8396800
709
+ },
710
+ {
711
+ "name": "model.layers.13.post_attention_layernorm.bias",
712
+ "shape": [
713
+ 2048
714
+ ],
715
+ "dtype": "bfloat16",
716
+ "format": "raw",
717
+ "nbytes": 4096,
718
+ "byteOffset": 31465472
719
+ },
720
+ {
721
+ "name": "model.layers.13.post_attention_layernorm.weight",
722
+ "shape": [
723
+ 2048
724
+ ],
725
+ "dtype": "bfloat16",
726
+ "format": "raw",
727
+ "nbytes": 4096,
728
+ "byteOffset": 31469568
729
+ },
730
+ {
731
+ "name": "model.layers.13.self_attn.qkv_proj.bias",
732
+ "shape": [
733
+ 6144
734
+ ],
735
+ "dtype": "bfloat16",
736
+ "format": "raw",
737
+ "nbytes": 12288,
738
+ "byteOffset": 31473664
739
+ }
740
+ ],
741
+ "md5sum": "f270a9de3e40c9bb3289c47d3975e489"
742
+ },
743
+ {
744
+ "dataPath": "params_shard_20.bin",
745
+ "format": "raw-shard",
746
+ "nbytes": 46137344,
747
+ "records": [
748
+ {
749
+ "name": "model.layers.14.mlp.gate_up_proj.weight",
750
+ "shape": [
751
+ 11264,
752
+ 2048
753
+ ],
754
+ "dtype": "bfloat16",
755
+ "format": "raw",
756
+ "nbytes": 46137344,
757
+ "byteOffset": 0
758
+ }
759
+ ],
760
+ "md5sum": "96ac57d821a2148d40bf5da25776db0a"
761
+ },
762
+ {
763
+ "dataPath": "params_shard_21.bin",
764
+ "format": "raw-shard",
765
+ "nbytes": 25165824,
766
+ "records": [
767
+ {
768
+ "name": "model.layers.14.self_attn.qkv_proj.weight",
769
+ "shape": [
770
+ 6144,
771
+ 2048
772
+ ],
773
+ "dtype": "bfloat16",
774
+ "format": "raw",
775
+ "nbytes": 25165824,
776
+ "byteOffset": 0
777
+ }
778
+ ],
779
+ "md5sum": "535b088c9a11993bc5716e8202abba1e"
780
+ },
781
+ {
782
+ "dataPath": "params_shard_22.bin",
783
+ "format": "raw-shard",
784
+ "nbytes": 31485952,
785
+ "records": [
786
+ {
787
+ "name": "model.layers.13.self_attn.o_proj.weight",
788
+ "shape": [
789
+ 2048,
790
+ 2048
791
+ ],
792
+ "dtype": "bfloat16",
793
+ "format": "raw",
794
+ "nbytes": 8388608,
795
+ "byteOffset": 0
796
+ },
797
+ {
798
+ "name": "model.layers.14.input_layernorm.bias",
799
+ "shape": [
800
+ 2048
801
+ ],
802
+ "dtype": "bfloat16",
803
+ "format": "raw",
804
+ "nbytes": 4096,
805
+ "byteOffset": 8388608
806
+ },
807
+ {
808
+ "name": "model.layers.14.input_layernorm.weight",
809
+ "shape": [
810
+ 2048
811
+ ],
812
+ "dtype": "bfloat16",
813
+ "format": "raw",
814
+ "nbytes": 4096,
815
+ "byteOffset": 8392704
816
+ },
817
+ {
818
+ "name": "model.layers.14.mlp.down_proj.weight",
819
+ "shape": [
820
+ 2048,
821
+ 5632
822
+ ],
823
+ "dtype": "bfloat16",
824
+ "format": "raw",
825
+ "nbytes": 23068672,
826
+ "byteOffset": 8396800
827
+ },
828
+ {
829
+ "name": "model.layers.14.post_attention_layernorm.bias",
830
+ "shape": [
831
+ 2048
832
+ ],
833
+ "dtype": "bfloat16",
834
+ "format": "raw",
835
+ "nbytes": 4096,
836
+ "byteOffset": 31465472
837
+ },
838
+ {
839
+ "name": "model.layers.14.post_attention_layernorm.weight",
840
+ "shape": [
841
+ 2048
842
+ ],
843
+ "dtype": "bfloat16",
844
+ "format": "raw",
845
+ "nbytes": 4096,
846
+ "byteOffset": 31469568
847
+ },
848
+ {
849
+ "name": "model.layers.14.self_attn.qkv_proj.bias",
850
+ "shape": [
851
+ 6144
852
+ ],
853
+ "dtype": "bfloat16",
854
+ "format": "raw",
855
+ "nbytes": 12288,
856
+ "byteOffset": 31473664
857
+ }
858
+ ],
859
+ "md5sum": "39bbf585ed946719daa1b63ca36ff72a"
860
+ },
861
+ {
862
+ "dataPath": "params_shard_23.bin",
863
+ "format": "raw-shard",
864
+ "nbytes": 46137344,
865
+ "records": [
866
+ {
867
+ "name": "model.layers.15.mlp.gate_up_proj.weight",
868
+ "shape": [
869
+ 11264,
870
+ 2048
871
+ ],
872
+ "dtype": "bfloat16",
873
+ "format": "raw",
874
+ "nbytes": 46137344,
875
+ "byteOffset": 0
876
+ }
877
+ ],
878
+ "md5sum": "cc11aba9e48fe3e0ff913800e04bbc3d"
879
+ },
880
+ {
881
+ "dataPath": "params_shard_24.bin",
882
+ "format": "raw-shard",
883
+ "nbytes": 25165824,
884
+ "records": [
885
+ {
886
+ "name": "model.layers.15.self_attn.qkv_proj.weight",
887
+ "shape": [
888
+ 6144,
889
+ 2048
890
+ ],
891
+ "dtype": "bfloat16",
892
+ "format": "raw",
893
+ "nbytes": 25165824,
894
+ "byteOffset": 0
895
+ }
896
+ ],
897
+ "md5sum": "b8aba12c02a13c086f49ba2c0d08f534"
898
+ },
899
+ {
900
+ "dataPath": "params_shard_25.bin",
901
+ "format": "raw-shard",
902
+ "nbytes": 31485952,
903
+ "records": [
904
+ {
905
+ "name": "model.layers.14.self_attn.o_proj.weight",
906
+ "shape": [
907
+ 2048,
908
+ 2048
909
+ ],
910
+ "dtype": "bfloat16",
911
+ "format": "raw",
912
+ "nbytes": 8388608,
913
+ "byteOffset": 0
914
+ },
915
+ {
916
+ "name": "model.layers.15.input_layernorm.bias",
917
+ "shape": [
918
+ 2048
919
+ ],
920
+ "dtype": "bfloat16",
921
+ "format": "raw",
922
+ "nbytes": 4096,
923
+ "byteOffset": 8388608
924
+ },
925
+ {
926
+ "name": "model.layers.15.input_layernorm.weight",
927
+ "shape": [
928
+ 2048
929
+ ],
930
+ "dtype": "bfloat16",
931
+ "format": "raw",
932
+ "nbytes": 4096,
933
+ "byteOffset": 8392704
934
+ },
935
+ {
936
+ "name": "model.layers.15.mlp.down_proj.weight",
937
+ "shape": [
938
+ 2048,
939
+ 5632
940
+ ],
941
+ "dtype": "bfloat16",
942
+ "format": "raw",
943
+ "nbytes": 23068672,
944
+ "byteOffset": 8396800
945
+ },
946
+ {
947
+ "name": "model.layers.15.post_attention_layernorm.bias",
948
+ "shape": [
949
+ 2048
950
+ ],
951
+ "dtype": "bfloat16",
952
+ "format": "raw",
953
+ "nbytes": 4096,
954
+ "byteOffset": 31465472
955
+ },
956
+ {
957
+ "name": "model.layers.15.post_attention_layernorm.weight",
958
+ "shape": [
959
+ 2048
960
+ ],
961
+ "dtype": "bfloat16",
962
+ "format": "raw",
963
+ "nbytes": 4096,
964
+ "byteOffset": 31469568
965
+ },
966
+ {
967
+ "name": "model.layers.15.self_attn.qkv_proj.bias",
968
+ "shape": [
969
+ 6144
970
+ ],
971
+ "dtype": "bfloat16",
972
+ "format": "raw",
973
+ "nbytes": 12288,
974
+ "byteOffset": 31473664
975
+ }
976
+ ],
977
+ "md5sum": "c348dfe6bca6a8f5dd9f16a62cf76c4f"
978
+ },
979
+ {
980
+ "dataPath": "params_shard_26.bin",
981
+ "format": "raw-shard",
982
+ "nbytes": 46137344,
983
+ "records": [
984
+ {
985
+ "name": "model.layers.16.mlp.gate_up_proj.weight",
986
+ "shape": [
987
+ 11264,
988
+ 2048
989
+ ],
990
+ "dtype": "bfloat16",
991
+ "format": "raw",
992
+ "nbytes": 46137344,
993
+ "byteOffset": 0
994
+ }
995
+ ],
996
+ "md5sum": "627233672b35ee886132e8a36824eaa0"
997
+ },
998
+ {
999
+ "dataPath": "params_shard_27.bin",
1000
+ "format": "raw-shard",
1001
+ "nbytes": 25165824,
1002
+ "records": [
1003
+ {
1004
+ "name": "model.layers.16.self_attn.qkv_proj.weight",
1005
+ "shape": [
1006
+ 6144,
1007
+ 2048
1008
+ ],
1009
+ "dtype": "bfloat16",
1010
+ "format": "raw",
1011
+ "nbytes": 25165824,
1012
+ "byteOffset": 0
1013
+ }
1014
+ ],
1015
+ "md5sum": "87eac07c30d3da72dde134591c228061"
1016
+ },
1017
+ {
1018
+ "dataPath": "params_shard_28.bin",
1019
+ "format": "raw-shard",
1020
+ "nbytes": 31485952,
1021
+ "records": [
1022
+ {
1023
+ "name": "model.layers.15.self_attn.o_proj.weight",
1024
+ "shape": [
1025
+ 2048,
1026
+ 2048
1027
+ ],
1028
+ "dtype": "bfloat16",
1029
+ "format": "raw",
1030
+ "nbytes": 8388608,
1031
+ "byteOffset": 0
1032
+ },
1033
+ {
1034
+ "name": "model.layers.16.input_layernorm.bias",
1035
+ "shape": [
1036
+ 2048
1037
+ ],
1038
+ "dtype": "bfloat16",
1039
+ "format": "raw",
1040
+ "nbytes": 4096,
1041
+ "byteOffset": 8388608
1042
+ },
1043
+ {
1044
+ "name": "model.layers.16.input_layernorm.weight",
1045
+ "shape": [
1046
+ 2048
1047
+ ],
1048
+ "dtype": "bfloat16",
1049
+ "format": "raw",
1050
+ "nbytes": 4096,
1051
+ "byteOffset": 8392704
1052
+ },
1053
+ {
1054
+ "name": "model.layers.16.mlp.down_proj.weight",
1055
+ "shape": [
1056
+ 2048,
1057
+ 5632
1058
+ ],
1059
+ "dtype": "bfloat16",
1060
+ "format": "raw",
1061
+ "nbytes": 23068672,
1062
+ "byteOffset": 8396800
1063
+ },
1064
+ {
1065
+ "name": "model.layers.16.post_attention_layernorm.bias",
1066
+ "shape": [
1067
+ 2048
1068
+ ],
1069
+ "dtype": "bfloat16",
1070
+ "format": "raw",
1071
+ "nbytes": 4096,
1072
+ "byteOffset": 31465472
1073
+ },
1074
+ {
1075
+ "name": "model.layers.16.post_attention_layernorm.weight",
1076
+ "shape": [
1077
+ 2048
1078
+ ],
1079
+ "dtype": "bfloat16",
1080
+ "format": "raw",
1081
+ "nbytes": 4096,
1082
+ "byteOffset": 31469568
1083
+ },
1084
+ {
1085
+ "name": "model.layers.16.self_attn.qkv_proj.bias",
1086
+ "shape": [
1087
+ 6144
1088
+ ],
1089
+ "dtype": "bfloat16",
1090
+ "format": "raw",
1091
+ "nbytes": 12288,
1092
+ "byteOffset": 31473664
1093
+ }
1094
+ ],
1095
+ "md5sum": "a30882c3d8928cc34082b5578e15e158"
1096
+ },
1097
+ {
1098
+ "dataPath": "params_shard_29.bin",
1099
+ "format": "raw-shard",
1100
+ "nbytes": 46137344,
1101
+ "records": [
1102
+ {
1103
+ "name": "model.layers.17.mlp.gate_up_proj.weight",
1104
+ "shape": [
1105
+ 11264,
1106
+ 2048
1107
+ ],
1108
+ "dtype": "bfloat16",
1109
+ "format": "raw",
1110
+ "nbytes": 46137344,
1111
+ "byteOffset": 0
1112
+ }
1113
+ ],
1114
+ "md5sum": "29e98bbacba4fb105f50eb5fb6a587cf"
1115
+ },
1116
+ {
1117
+ "dataPath": "params_shard_30.bin",
1118
+ "format": "raw-shard",
1119
+ "nbytes": 25165824,
1120
+ "records": [
1121
+ {
1122
+ "name": "model.layers.17.self_attn.qkv_proj.weight",
1123
+ "shape": [
1124
+ 6144,
1125
+ 2048
1126
+ ],
1127
+ "dtype": "bfloat16",
1128
+ "format": "raw",
1129
+ "nbytes": 25165824,
1130
+ "byteOffset": 0
1131
+ }
1132
+ ],
1133
+ "md5sum": "d62b14f6d9e4a43032ca3f11957aa408"
1134
+ },
1135
+ {
1136
+ "dataPath": "params_shard_31.bin",
1137
+ "format": "raw-shard",
1138
+ "nbytes": 31485952,
1139
+ "records": [
1140
+ {
1141
+ "name": "model.layers.16.self_attn.o_proj.weight",
1142
+ "shape": [
1143
+ 2048,
1144
+ 2048
1145
+ ],
1146
+ "dtype": "bfloat16",
1147
+ "format": "raw",
1148
+ "nbytes": 8388608,
1149
+ "byteOffset": 0
1150
+ },
1151
+ {
1152
+ "name": "model.layers.17.input_layernorm.bias",
1153
+ "shape": [
1154
+ 2048
1155
+ ],
1156
+ "dtype": "bfloat16",
1157
+ "format": "raw",
1158
+ "nbytes": 4096,
1159
+ "byteOffset": 8388608
1160
+ },
1161
+ {
1162
+ "name": "model.layers.17.input_layernorm.weight",
1163
+ "shape": [
1164
+ 2048
1165
+ ],
1166
+ "dtype": "bfloat16",
1167
+ "format": "raw",
1168
+ "nbytes": 4096,
1169
+ "byteOffset": 8392704
1170
+ },
1171
+ {
1172
+ "name": "model.layers.17.mlp.down_proj.weight",
1173
+ "shape": [
1174
+ 2048,
1175
+ 5632
1176
+ ],
1177
+ "dtype": "bfloat16",
1178
+ "format": "raw",
1179
+ "nbytes": 23068672,
1180
+ "byteOffset": 8396800
1181
+ },
1182
+ {
1183
+ "name": "model.layers.17.post_attention_layernorm.bias",
1184
+ "shape": [
1185
+ 2048
1186
+ ],
1187
+ "dtype": "bfloat16",
1188
+ "format": "raw",
1189
+ "nbytes": 4096,
1190
+ "byteOffset": 31465472
1191
+ },
1192
+ {
1193
+ "name": "model.layers.17.post_attention_layernorm.weight",
1194
+ "shape": [
1195
+ 2048
1196
+ ],
1197
+ "dtype": "bfloat16",
1198
+ "format": "raw",
1199
+ "nbytes": 4096,
1200
+ "byteOffset": 31469568
1201
+ },
1202
+ {
1203
+ "name": "model.layers.17.self_attn.qkv_proj.bias",
1204
+ "shape": [
1205
+ 6144
1206
+ ],
1207
+ "dtype": "bfloat16",
1208
+ "format": "raw",
1209
+ "nbytes": 12288,
1210
+ "byteOffset": 31473664
1211
+ }
1212
+ ],
1213
+ "md5sum": "9042ac3354d0c7eadb8c49892880a7e2"
1214
+ },
1215
+ {
1216
+ "dataPath": "params_shard_32.bin",
1217
+ "format": "raw-shard",
1218
+ "nbytes": 46137344,
1219
+ "records": [
1220
+ {
1221
+ "name": "model.layers.18.mlp.gate_up_proj.weight",
1222
+ "shape": [
1223
+ 11264,
1224
+ 2048
1225
+ ],
1226
+ "dtype": "bfloat16",
1227
+ "format": "raw",
1228
+ "nbytes": 46137344,
1229
+ "byteOffset": 0
1230
+ }
1231
+ ],
1232
+ "md5sum": "3f0701e33136482f785a5fa0393f91e5"
1233
+ },
1234
+ {
1235
+ "dataPath": "params_shard_33.bin",
1236
+ "format": "raw-shard",
1237
+ "nbytes": 25165824,
1238
+ "records": [
1239
+ {
1240
+ "name": "model.layers.18.self_attn.qkv_proj.weight",
1241
+ "shape": [
1242
+ 6144,
1243
+ 2048
1244
+ ],
1245
+ "dtype": "bfloat16",
1246
+ "format": "raw",
1247
+ "nbytes": 25165824,
1248
+ "byteOffset": 0
1249
+ }
1250
+ ],
1251
+ "md5sum": "2edf3df335285fe75a275d71b0cf0e46"
1252
+ },
1253
+ {
1254
+ "dataPath": "params_shard_34.bin",
1255
+ "format": "raw-shard",
1256
+ "nbytes": 31485952,
1257
+ "records": [
1258
+ {
1259
+ "name": "model.layers.17.self_attn.o_proj.weight",
1260
+ "shape": [
1261
+ 2048,
1262
+ 2048
1263
+ ],
1264
+ "dtype": "bfloat16",
1265
+ "format": "raw",
1266
+ "nbytes": 8388608,
1267
+ "byteOffset": 0
1268
+ },
1269
+ {
1270
+ "name": "model.layers.18.input_layernorm.bias",
1271
+ "shape": [
1272
+ 2048
1273
+ ],
1274
+ "dtype": "bfloat16",
1275
+ "format": "raw",
1276
+ "nbytes": 4096,
1277
+ "byteOffset": 8388608
1278
+ },
1279
+ {
1280
+ "name": "model.layers.18.input_layernorm.weight",
1281
+ "shape": [
1282
+ 2048
1283
+ ],
1284
+ "dtype": "bfloat16",
1285
+ "format": "raw",
1286
+ "nbytes": 4096,
1287
+ "byteOffset": 8392704
1288
+ },
1289
+ {
1290
+ "name": "model.layers.18.mlp.down_proj.weight",
1291
+ "shape": [
1292
+ 2048,
1293
+ 5632
1294
+ ],
1295
+ "dtype": "bfloat16",
1296
+ "format": "raw",
1297
+ "nbytes": 23068672,
1298
+ "byteOffset": 8396800
1299
+ },
1300
+ {
1301
+ "name": "model.layers.18.post_attention_layernorm.bias",
1302
+ "shape": [
1303
+ 2048
1304
+ ],
1305
+ "dtype": "bfloat16",
1306
+ "format": "raw",
1307
+ "nbytes": 4096,
1308
+ "byteOffset": 31465472
1309
+ },
1310
+ {
1311
+ "name": "model.layers.18.post_attention_layernorm.weight",
1312
+ "shape": [
1313
+ 2048
1314
+ ],
1315
+ "dtype": "bfloat16",
1316
+ "format": "raw",
1317
+ "nbytes": 4096,
1318
+ "byteOffset": 31469568
1319
+ },
1320
+ {
1321
+ "name": "model.layers.18.self_attn.qkv_proj.bias",
1322
+ "shape": [
1323
+ 6144
1324
+ ],
1325
+ "dtype": "bfloat16",
1326
+ "format": "raw",
1327
+ "nbytes": 12288,
1328
+ "byteOffset": 31473664
1329
+ }
1330
+ ],
1331
+ "md5sum": "6c50305e7eff773699910ef8c2469366"
1332
+ },
1333
+ {
1334
+ "dataPath": "params_shard_35.bin",
1335
+ "format": "raw-shard",
1336
+ "nbytes": 46137344,
1337
+ "records": [
1338
+ {
1339
+ "name": "model.layers.19.mlp.gate_up_proj.weight",
1340
+ "shape": [
1341
+ 11264,
1342
+ 2048
1343
+ ],
1344
+ "dtype": "bfloat16",
1345
+ "format": "raw",
1346
+ "nbytes": 46137344,
1347
+ "byteOffset": 0
1348
+ }
1349
+ ],
1350
+ "md5sum": "4e28c8d694655bd6560df661988f48ea"
1351
+ },
1352
+ {
1353
+ "dataPath": "params_shard_36.bin",
1354
+ "format": "raw-shard",
1355
+ "nbytes": 25165824,
1356
+ "records": [
1357
+ {
1358
+ "name": "model.layers.19.self_attn.qkv_proj.weight",
1359
+ "shape": [
1360
+ 6144,
1361
+ 2048
1362
+ ],
1363
+ "dtype": "bfloat16",
1364
+ "format": "raw",
1365
+ "nbytes": 25165824,
1366
+ "byteOffset": 0
1367
+ }
1368
+ ],
1369
+ "md5sum": "9bef8b9a825ce9864f1a65dff4be96ee"
1370
+ },
1371
+ {
1372
+ "dataPath": "params_shard_37.bin",
1373
+ "format": "raw-shard",
1374
+ "nbytes": 31485952,
1375
+ "records": [
1376
+ {
1377
+ "name": "model.layers.18.self_attn.o_proj.weight",
1378
+ "shape": [
1379
+ 2048,
1380
+ 2048
1381
+ ],
1382
+ "dtype": "bfloat16",
1383
+ "format": "raw",
1384
+ "nbytes": 8388608,
1385
+ "byteOffset": 0
1386
+ },
1387
+ {
1388
+ "name": "model.layers.19.input_layernorm.bias",
1389
+ "shape": [
1390
+ 2048
1391
+ ],
1392
+ "dtype": "bfloat16",
1393
+ "format": "raw",
1394
+ "nbytes": 4096,
1395
+ "byteOffset": 8388608
1396
+ },
1397
+ {
1398
+ "name": "model.layers.19.input_layernorm.weight",
1399
+ "shape": [
1400
+ 2048
1401
+ ],
1402
+ "dtype": "bfloat16",
1403
+ "format": "raw",
1404
+ "nbytes": 4096,
1405
+ "byteOffset": 8392704
1406
+ },
1407
+ {
1408
+ "name": "model.layers.19.mlp.down_proj.weight",
1409
+ "shape": [
1410
+ 2048,
1411
+ 5632
1412
+ ],
1413
+ "dtype": "bfloat16",
1414
+ "format": "raw",
1415
+ "nbytes": 23068672,
1416
+ "byteOffset": 8396800
1417
+ },
1418
+ {
1419
+ "name": "model.layers.19.post_attention_layernorm.bias",
1420
+ "shape": [
1421
+ 2048
1422
+ ],
1423
+ "dtype": "bfloat16",
1424
+ "format": "raw",
1425
+ "nbytes": 4096,
1426
+ "byteOffset": 31465472
1427
+ },
1428
+ {
1429
+ "name": "model.layers.19.post_attention_layernorm.weight",
1430
+ "shape": [
1431
+ 2048
1432
+ ],
1433
+ "dtype": "bfloat16",
1434
+ "format": "raw",
1435
+ "nbytes": 4096,
1436
+ "byteOffset": 31469568
1437
+ },
1438
+ {
1439
+ "name": "model.layers.19.self_attn.qkv_proj.bias",
1440
+ "shape": [
1441
+ 6144
1442
+ ],
1443
+ "dtype": "bfloat16",
1444
+ "format": "raw",
1445
+ "nbytes": 12288,
1446
+ "byteOffset": 31473664
1447
+ }
1448
+ ],
1449
+ "md5sum": "bc68b257041ef3cbc95abe946a54f69b"
1450
+ },
1451
+ {
1452
+ "dataPath": "params_shard_38.bin",
1453
+ "format": "raw-shard",
1454
+ "nbytes": 46137344,
1455
+ "records": [
1456
+ {
1457
+ "name": "model.layers.2.mlp.gate_up_proj.weight",
1458
+ "shape": [
1459
+ 11264,
1460
+ 2048
1461
+ ],
1462
+ "dtype": "bfloat16",
1463
+ "format": "raw",
1464
+ "nbytes": 46137344,
1465
+ "byteOffset": 0
1466
+ }
1467
+ ],
1468
+ "md5sum": "96ee8e329c66e71303856240b850a854"
1469
+ },
1470
+ {
1471
+ "dataPath": "params_shard_39.bin",
1472
+ "format": "raw-shard",
1473
+ "nbytes": 25165824,
1474
+ "records": [
1475
+ {
1476
+ "name": "model.layers.2.self_attn.qkv_proj.weight",
1477
+ "shape": [
1478
+ 6144,
1479
+ 2048
1480
+ ],
1481
+ "dtype": "bfloat16",
1482
+ "format": "raw",
1483
+ "nbytes": 25165824,
1484
+ "byteOffset": 0
1485
+ }
1486
+ ],
1487
+ "md5sum": "287cbd3672f6456495f9f0388cd00ebd"
1488
+ },
1489
+ {
1490
+ "dataPath": "params_shard_40.bin",
1491
+ "format": "raw-shard",
1492
+ "nbytes": 31485952,
1493
+ "records": [
1494
+ {
1495
+ "name": "model.layers.19.self_attn.o_proj.weight",
1496
+ "shape": [
1497
+ 2048,
1498
+ 2048
1499
+ ],
1500
+ "dtype": "bfloat16",
1501
+ "format": "raw",
1502
+ "nbytes": 8388608,
1503
+ "byteOffset": 0
1504
+ },
1505
+ {
1506
+ "name": "model.layers.2.input_layernorm.bias",
1507
+ "shape": [
1508
+ 2048
1509
+ ],
1510
+ "dtype": "bfloat16",
1511
+ "format": "raw",
1512
+ "nbytes": 4096,
1513
+ "byteOffset": 8388608
1514
+ },
1515
+ {
1516
+ "name": "model.layers.2.input_layernorm.weight",
1517
+ "shape": [
1518
+ 2048
1519
+ ],
1520
+ "dtype": "bfloat16",
1521
+ "format": "raw",
1522
+ "nbytes": 4096,
1523
+ "byteOffset": 8392704
1524
+ },
1525
+ {
1526
+ "name": "model.layers.2.mlp.down_proj.weight",
1527
+ "shape": [
1528
+ 2048,
1529
+ 5632
1530
+ ],
1531
+ "dtype": "bfloat16",
1532
+ "format": "raw",
1533
+ "nbytes": 23068672,
1534
+ "byteOffset": 8396800
1535
+ },
1536
+ {
1537
+ "name": "model.layers.2.post_attention_layernorm.bias",
1538
+ "shape": [
1539
+ 2048
1540
+ ],
1541
+ "dtype": "bfloat16",
1542
+ "format": "raw",
1543
+ "nbytes": 4096,
1544
+ "byteOffset": 31465472
1545
+ },
1546
+ {
1547
+ "name": "model.layers.2.post_attention_layernorm.weight",
1548
+ "shape": [
1549
+ 2048
1550
+ ],
1551
+ "dtype": "bfloat16",
1552
+ "format": "raw",
1553
+ "nbytes": 4096,
1554
+ "byteOffset": 31469568
1555
+ },
1556
+ {
1557
+ "name": "model.layers.2.self_attn.qkv_proj.bias",
1558
+ "shape": [
1559
+ 6144
1560
+ ],
1561
+ "dtype": "bfloat16",
1562
+ "format": "raw",
1563
+ "nbytes": 12288,
1564
+ "byteOffset": 31473664
1565
+ }
1566
+ ],
1567
+ "md5sum": "e8f214a9c7b6b29cc31d92fa41dea50d"
1568
+ },
1569
+ {
1570
+ "dataPath": "params_shard_41.bin",
1571
+ "format": "raw-shard",
1572
+ "nbytes": 46137344,
1573
+ "records": [
1574
+ {
1575
+ "name": "model.layers.20.mlp.gate_up_proj.weight",
1576
+ "shape": [
1577
+ 11264,
1578
+ 2048
1579
+ ],
1580
+ "dtype": "bfloat16",
1581
+ "format": "raw",
1582
+ "nbytes": 46137344,
1583
+ "byteOffset": 0
1584
+ }
1585
+ ],
1586
+ "md5sum": "8e6645323550eef19f6d76c6058ce8e3"
1587
+ },
1588
+ {
1589
+ "dataPath": "params_shard_42.bin",
1590
+ "format": "raw-shard",
1591
+ "nbytes": 25165824,
1592
+ "records": [
1593
+ {
1594
+ "name": "model.layers.20.self_attn.qkv_proj.weight",
1595
+ "shape": [
1596
+ 6144,
1597
+ 2048
1598
+ ],
1599
+ "dtype": "bfloat16",
1600
+ "format": "raw",
1601
+ "nbytes": 25165824,
1602
+ "byteOffset": 0
1603
+ }
1604
+ ],
1605
+ "md5sum": "8e018ed3531c005f58fe7e70146c5172"
1606
+ },
1607
+ {
1608
+ "dataPath": "params_shard_43.bin",
1609
+ "format": "raw-shard",
1610
+ "nbytes": 31485952,
1611
+ "records": [
1612
+ {
1613
+ "name": "model.layers.2.self_attn.o_proj.weight",
1614
+ "shape": [
1615
+ 2048,
1616
+ 2048
1617
+ ],
1618
+ "dtype": "bfloat16",
1619
+ "format": "raw",
1620
+ "nbytes": 8388608,
1621
+ "byteOffset": 0
1622
+ },
1623
+ {
1624
+ "name": "model.layers.20.input_layernorm.bias",
1625
+ "shape": [
1626
+ 2048
1627
+ ],
1628
+ "dtype": "bfloat16",
1629
+ "format": "raw",
1630
+ "nbytes": 4096,
1631
+ "byteOffset": 8388608
1632
+ },
1633
+ {
1634
+ "name": "model.layers.20.input_layernorm.weight",
1635
+ "shape": [
1636
+ 2048
1637
+ ],
1638
+ "dtype": "bfloat16",
1639
+ "format": "raw",
1640
+ "nbytes": 4096,
1641
+ "byteOffset": 8392704
1642
+ },
1643
+ {
1644
+ "name": "model.layers.20.mlp.down_proj.weight",
1645
+ "shape": [
1646
+ 2048,
1647
+ 5632
1648
+ ],
1649
+ "dtype": "bfloat16",
1650
+ "format": "raw",
1651
+ "nbytes": 23068672,
1652
+ "byteOffset": 8396800
1653
+ },
1654
+ {
1655
+ "name": "model.layers.20.post_attention_layernorm.bias",
1656
+ "shape": [
1657
+ 2048
1658
+ ],
1659
+ "dtype": "bfloat16",
1660
+ "format": "raw",
1661
+ "nbytes": 4096,
1662
+ "byteOffset": 31465472
1663
+ },
1664
+ {
1665
+ "name": "model.layers.20.post_attention_layernorm.weight",
1666
+ "shape": [
1667
+ 2048
1668
+ ],
1669
+ "dtype": "bfloat16",
1670
+ "format": "raw",
1671
+ "nbytes": 4096,
1672
+ "byteOffset": 31469568
1673
+ },
1674
+ {
1675
+ "name": "model.layers.20.self_attn.qkv_proj.bias",
1676
+ "shape": [
1677
+ 6144
1678
+ ],
1679
+ "dtype": "bfloat16",
1680
+ "format": "raw",
1681
+ "nbytes": 12288,
1682
+ "byteOffset": 31473664
1683
+ }
1684
+ ],
1685
+ "md5sum": "46a472f2192f8f07db7174ac0445e81e"
1686
+ },
1687
+ {
1688
+ "dataPath": "params_shard_44.bin",
1689
+ "format": "raw-shard",
1690
+ "nbytes": 46137344,
1691
+ "records": [
1692
+ {
1693
+ "name": "model.layers.21.mlp.gate_up_proj.weight",
1694
+ "shape": [
1695
+ 11264,
1696
+ 2048
1697
+ ],
1698
+ "dtype": "bfloat16",
1699
+ "format": "raw",
1700
+ "nbytes": 46137344,
1701
+ "byteOffset": 0
1702
+ }
1703
+ ],
1704
+ "md5sum": "9ffaf5430d17166ddddb46541e38a052"
1705
+ },
1706
+ {
1707
+ "dataPath": "params_shard_45.bin",
1708
+ "format": "raw-shard",
1709
+ "nbytes": 25165824,
1710
+ "records": [
1711
+ {
1712
+ "name": "model.layers.21.self_attn.qkv_proj.weight",
1713
+ "shape": [
1714
+ 6144,
1715
+ 2048
1716
+ ],
1717
+ "dtype": "bfloat16",
1718
+ "format": "raw",
1719
+ "nbytes": 25165824,
1720
+ "byteOffset": 0
1721
+ }
1722
+ ],
1723
+ "md5sum": "9e40d634a0fc2ad35cf4030fbbdabc11"
1724
+ },
1725
+ {
1726
+ "dataPath": "params_shard_46.bin",
1727
+ "format": "raw-shard",
1728
+ "nbytes": 31485952,
1729
+ "records": [
1730
+ {
1731
+ "name": "model.layers.20.self_attn.o_proj.weight",
1732
+ "shape": [
1733
+ 2048,
1734
+ 2048
1735
+ ],
1736
+ "dtype": "bfloat16",
1737
+ "format": "raw",
1738
+ "nbytes": 8388608,
1739
+ "byteOffset": 0
1740
+ },
1741
+ {
1742
+ "name": "model.layers.21.input_layernorm.bias",
1743
+ "shape": [
1744
+ 2048
1745
+ ],
1746
+ "dtype": "bfloat16",
1747
+ "format": "raw",
1748
+ "nbytes": 4096,
1749
+ "byteOffset": 8388608
1750
+ },
1751
+ {
1752
+ "name": "model.layers.21.input_layernorm.weight",
1753
+ "shape": [
1754
+ 2048
1755
+ ],
1756
+ "dtype": "bfloat16",
1757
+ "format": "raw",
1758
+ "nbytes": 4096,
1759
+ "byteOffset": 8392704
1760
+ },
1761
+ {
1762
+ "name": "model.layers.21.mlp.down_proj.weight",
1763
+ "shape": [
1764
+ 2048,
1765
+ 5632
1766
+ ],
1767
+ "dtype": "bfloat16",
1768
+ "format": "raw",
1769
+ "nbytes": 23068672,
1770
+ "byteOffset": 8396800
1771
+ },
1772
+ {
1773
+ "name": "model.layers.21.post_attention_layernorm.bias",
1774
+ "shape": [
1775
+ 2048
1776
+ ],
1777
+ "dtype": "bfloat16",
1778
+ "format": "raw",
1779
+ "nbytes": 4096,
1780
+ "byteOffset": 31465472
1781
+ },
1782
+ {
1783
+ "name": "model.layers.21.post_attention_layernorm.weight",
1784
+ "shape": [
1785
+ 2048
1786
+ ],
1787
+ "dtype": "bfloat16",
1788
+ "format": "raw",
1789
+ "nbytes": 4096,
1790
+ "byteOffset": 31469568
1791
+ },
1792
+ {
1793
+ "name": "model.layers.21.self_attn.qkv_proj.bias",
1794
+ "shape": [
1795
+ 6144
1796
+ ],
1797
+ "dtype": "bfloat16",
1798
+ "format": "raw",
1799
+ "nbytes": 12288,
1800
+ "byteOffset": 31473664
1801
+ }
1802
+ ],
1803
+ "md5sum": "16df28feb9c1f5e7a4d8e6f68862572b"
1804
+ },
1805
+ {
1806
+ "dataPath": "params_shard_47.bin",
1807
+ "format": "raw-shard",
1808
+ "nbytes": 46137344,
1809
+ "records": [
1810
+ {
1811
+ "name": "model.layers.22.mlp.gate_up_proj.weight",
1812
+ "shape": [
1813
+ 11264,
1814
+ 2048
1815
+ ],
1816
+ "dtype": "bfloat16",
1817
+ "format": "raw",
1818
+ "nbytes": 46137344,
1819
+ "byteOffset": 0
1820
+ }
1821
+ ],
1822
+ "md5sum": "56f467ad10ab61c433da8587e95dc725"
1823
+ },
1824
+ {
1825
+ "dataPath": "params_shard_48.bin",
1826
+ "format": "raw-shard",
1827
+ "nbytes": 25165824,
1828
+ "records": [
1829
+ {
1830
+ "name": "model.layers.22.self_attn.qkv_proj.weight",
1831
+ "shape": [
1832
+ 6144,
1833
+ 2048
1834
+ ],
1835
+ "dtype": "bfloat16",
1836
+ "format": "raw",
1837
+ "nbytes": 25165824,
1838
+ "byteOffset": 0
1839
+ }
1840
+ ],
1841
+ "md5sum": "0e6a1dac99710057507bd0d11b50eab4"
1842
+ },
1843
+ {
1844
+ "dataPath": "params_shard_49.bin",
1845
+ "format": "raw-shard",
1846
+ "nbytes": 31485952,
1847
+ "records": [
1848
+ {
1849
+ "name": "model.layers.21.self_attn.o_proj.weight",
1850
+ "shape": [
1851
+ 2048,
1852
+ 2048
1853
+ ],
1854
+ "dtype": "bfloat16",
1855
+ "format": "raw",
1856
+ "nbytes": 8388608,
1857
+ "byteOffset": 0
1858
+ },
1859
+ {
1860
+ "name": "model.layers.22.input_layernorm.bias",
1861
+ "shape": [
1862
+ 2048
1863
+ ],
1864
+ "dtype": "bfloat16",
1865
+ "format": "raw",
1866
+ "nbytes": 4096,
1867
+ "byteOffset": 8388608
1868
+ },
1869
+ {
1870
+ "name": "model.layers.22.input_layernorm.weight",
1871
+ "shape": [
1872
+ 2048
1873
+ ],
1874
+ "dtype": "bfloat16",
1875
+ "format": "raw",
1876
+ "nbytes": 4096,
1877
+ "byteOffset": 8392704
1878
+ },
1879
+ {
1880
+ "name": "model.layers.22.mlp.down_proj.weight",
1881
+ "shape": [
1882
+ 2048,
1883
+ 5632
1884
+ ],
1885
+ "dtype": "bfloat16",
1886
+ "format": "raw",
1887
+ "nbytes": 23068672,
1888
+ "byteOffset": 8396800
1889
+ },
1890
+ {
1891
+ "name": "model.layers.22.post_attention_layernorm.bias",
1892
+ "shape": [
1893
+ 2048
1894
+ ],
1895
+ "dtype": "bfloat16",
1896
+ "format": "raw",
1897
+ "nbytes": 4096,
1898
+ "byteOffset": 31465472
1899
+ },
1900
+ {
1901
+ "name": "model.layers.22.post_attention_layernorm.weight",
1902
+ "shape": [
1903
+ 2048
1904
+ ],
1905
+ "dtype": "bfloat16",
1906
+ "format": "raw",
1907
+ "nbytes": 4096,
1908
+ "byteOffset": 31469568
1909
+ },
1910
+ {
1911
+ "name": "model.layers.22.self_attn.qkv_proj.bias",
1912
+ "shape": [
1913
+ 6144
1914
+ ],
1915
+ "dtype": "bfloat16",
1916
+ "format": "raw",
1917
+ "nbytes": 12288,
1918
+ "byteOffset": 31473664
1919
+ }
1920
+ ],
1921
+ "md5sum": "c0d30bda2866f2ea1f5c9d2edb6c3d31"
1922
+ },
1923
+ {
1924
+ "dataPath": "params_shard_50.bin",
1925
+ "format": "raw-shard",
1926
+ "nbytes": 46137344,
1927
+ "records": [
1928
+ {
1929
+ "name": "model.layers.23.mlp.gate_up_proj.weight",
1930
+ "shape": [
1931
+ 11264,
1932
+ 2048
1933
+ ],
1934
+ "dtype": "bfloat16",
1935
+ "format": "raw",
1936
+ "nbytes": 46137344,
1937
+ "byteOffset": 0
1938
+ }
1939
+ ],
1940
+ "md5sum": "8991cc0cc00127f6b02c64a354f488b0"
1941
+ },
1942
+ {
1943
+ "dataPath": "params_shard_51.bin",
1944
+ "format": "raw-shard",
1945
+ "nbytes": 25165824,
1946
+ "records": [
1947
+ {
1948
+ "name": "model.layers.23.self_attn.qkv_proj.weight",
1949
+ "shape": [
1950
+ 6144,
1951
+ 2048
1952
+ ],
1953
+ "dtype": "bfloat16",
1954
+ "format": "raw",
1955
+ "nbytes": 25165824,
1956
+ "byteOffset": 0
1957
+ }
1958
+ ],
1959
+ "md5sum": "7d908cc3592d6ac0f579d9af9c65880b"
1960
+ },
1961
+ {
1962
+ "dataPath": "params_shard_52.bin",
1963
+ "format": "raw-shard",
1964
+ "nbytes": 31485952,
1965
+ "records": [
1966
+ {
1967
+ "name": "model.layers.22.self_attn.o_proj.weight",
1968
+ "shape": [
1969
+ 2048,
1970
+ 2048
1971
+ ],
1972
+ "dtype": "bfloat16",
1973
+ "format": "raw",
1974
+ "nbytes": 8388608,
1975
+ "byteOffset": 0
1976
+ },
1977
+ {
1978
+ "name": "model.layers.23.input_layernorm.bias",
1979
+ "shape": [
1980
+ 2048
1981
+ ],
1982
+ "dtype": "bfloat16",
1983
+ "format": "raw",
1984
+ "nbytes": 4096,
1985
+ "byteOffset": 8388608
1986
+ },
1987
+ {
1988
+ "name": "model.layers.23.input_layernorm.weight",
1989
+ "shape": [
1990
+ 2048
1991
+ ],
1992
+ "dtype": "bfloat16",
1993
+ "format": "raw",
1994
+ "nbytes": 4096,
1995
+ "byteOffset": 8392704
1996
+ },
1997
+ {
1998
+ "name": "model.layers.23.mlp.down_proj.weight",
1999
+ "shape": [
2000
+ 2048,
2001
+ 5632
2002
+ ],
2003
+ "dtype": "bfloat16",
2004
+ "format": "raw",
2005
+ "nbytes": 23068672,
2006
+ "byteOffset": 8396800
2007
+ },
2008
+ {
2009
+ "name": "model.layers.23.post_attention_layernorm.bias",
2010
+ "shape": [
2011
+ 2048
2012
+ ],
2013
+ "dtype": "bfloat16",
2014
+ "format": "raw",
2015
+ "nbytes": 4096,
2016
+ "byteOffset": 31465472
2017
+ },
2018
+ {
2019
+ "name": "model.layers.23.post_attention_layernorm.weight",
2020
+ "shape": [
2021
+ 2048
2022
+ ],
2023
+ "dtype": "bfloat16",
2024
+ "format": "raw",
2025
+ "nbytes": 4096,
2026
+ "byteOffset": 31469568
2027
+ },
2028
+ {
2029
+ "name": "model.layers.23.self_attn.qkv_proj.bias",
2030
+ "shape": [
2031
+ 6144
2032
+ ],
2033
+ "dtype": "bfloat16",
2034
+ "format": "raw",
2035
+ "nbytes": 12288,
2036
+ "byteOffset": 31473664
2037
+ }
2038
+ ],
2039
+ "md5sum": "3791dde6c32459d61ce491d5c68fbe75"
2040
+ },
2041
+ {
2042
+ "dataPath": "params_shard_53.bin",
2043
+ "format": "raw-shard",
2044
+ "nbytes": 46137344,
2045
+ "records": [
2046
+ {
2047
+ "name": "model.layers.3.mlp.gate_up_proj.weight",
2048
+ "shape": [
2049
+ 11264,
2050
+ 2048
2051
+ ],
2052
+ "dtype": "bfloat16",
2053
+ "format": "raw",
2054
+ "nbytes": 46137344,
2055
+ "byteOffset": 0
2056
+ }
2057
+ ],
2058
+ "md5sum": "eaef3c26179adfc8b0e0e6da6ef384dc"
2059
+ },
2060
+ {
2061
+ "dataPath": "params_shard_54.bin",
2062
+ "format": "raw-shard",
2063
+ "nbytes": 25165824,
2064
+ "records": [
2065
+ {
2066
+ "name": "model.layers.3.self_attn.qkv_proj.weight",
2067
+ "shape": [
2068
+ 6144,
2069
+ 2048
2070
+ ],
2071
+ "dtype": "bfloat16",
2072
+ "format": "raw",
2073
+ "nbytes": 25165824,
2074
+ "byteOffset": 0
2075
+ }
2076
+ ],
2077
+ "md5sum": "53ce84bd9cefa35a76b68651e91bde4e"
2078
+ },
2079
+ {
2080
+ "dataPath": "params_shard_55.bin",
2081
+ "format": "raw-shard",
2082
+ "nbytes": 31485952,
2083
+ "records": [
2084
+ {
2085
+ "name": "model.layers.23.self_attn.o_proj.weight",
2086
+ "shape": [
2087
+ 2048,
2088
+ 2048
2089
+ ],
2090
+ "dtype": "bfloat16",
2091
+ "format": "raw",
2092
+ "nbytes": 8388608,
2093
+ "byteOffset": 0
2094
+ },
2095
+ {
2096
+ "name": "model.layers.3.input_layernorm.bias",
2097
+ "shape": [
2098
+ 2048
2099
+ ],
2100
+ "dtype": "bfloat16",
2101
+ "format": "raw",
2102
+ "nbytes": 4096,
2103
+ "byteOffset": 8388608
2104
+ },
2105
+ {
2106
+ "name": "model.layers.3.input_layernorm.weight",
2107
+ "shape": [
2108
+ 2048
2109
+ ],
2110
+ "dtype": "bfloat16",
2111
+ "format": "raw",
2112
+ "nbytes": 4096,
2113
+ "byteOffset": 8392704
2114
+ },
2115
+ {
2116
+ "name": "model.layers.3.mlp.down_proj.weight",
2117
+ "shape": [
2118
+ 2048,
2119
+ 5632
2120
+ ],
2121
+ "dtype": "bfloat16",
2122
+ "format": "raw",
2123
+ "nbytes": 23068672,
2124
+ "byteOffset": 8396800
2125
+ },
2126
+ {
2127
+ "name": "model.layers.3.post_attention_layernorm.bias",
2128
+ "shape": [
2129
+ 2048
2130
+ ],
2131
+ "dtype": "bfloat16",
2132
+ "format": "raw",
2133
+ "nbytes": 4096,
2134
+ "byteOffset": 31465472
2135
+ },
2136
+ {
2137
+ "name": "model.layers.3.post_attention_layernorm.weight",
2138
+ "shape": [
2139
+ 2048
2140
+ ],
2141
+ "dtype": "bfloat16",
2142
+ "format": "raw",
2143
+ "nbytes": 4096,
2144
+ "byteOffset": 31469568
2145
+ },
2146
+ {
2147
+ "name": "model.layers.3.self_attn.qkv_proj.bias",
2148
+ "shape": [
2149
+ 6144
2150
+ ],
2151
+ "dtype": "bfloat16",
2152
+ "format": "raw",
2153
+ "nbytes": 12288,
2154
+ "byteOffset": 31473664
2155
+ }
2156
+ ],
2157
+ "md5sum": "9fd086d4255532fe0553aa78febfa3c5"
2158
+ },
2159
+ {
2160
+ "dataPath": "params_shard_56.bin",
2161
+ "format": "raw-shard",
2162
+ "nbytes": 46137344,
2163
+ "records": [
2164
+ {
2165
+ "name": "model.layers.4.mlp.gate_up_proj.weight",
2166
+ "shape": [
2167
+ 11264,
2168
+ 2048
2169
+ ],
2170
+ "dtype": "bfloat16",
2171
+ "format": "raw",
2172
+ "nbytes": 46137344,
2173
+ "byteOffset": 0
2174
+ }
2175
+ ],
2176
+ "md5sum": "e5418b1796c0d0bf6110b60f658db040"
2177
+ },
2178
+ {
2179
+ "dataPath": "params_shard_57.bin",
2180
+ "format": "raw-shard",
2181
+ "nbytes": 25165824,
2182
+ "records": [
2183
+ {
2184
+ "name": "model.layers.4.self_attn.qkv_proj.weight",
2185
+ "shape": [
2186
+ 6144,
2187
+ 2048
2188
+ ],
2189
+ "dtype": "bfloat16",
2190
+ "format": "raw",
2191
+ "nbytes": 25165824,
2192
+ "byteOffset": 0
2193
+ }
2194
+ ],
2195
+ "md5sum": "de06fb6e7027821c255741e483f720c3"
2196
+ },
2197
+ {
2198
+ "dataPath": "params_shard_58.bin",
2199
+ "format": "raw-shard",
2200
+ "nbytes": 31485952,
2201
+ "records": [
2202
+ {
2203
+ "name": "model.layers.3.self_attn.o_proj.weight",
2204
+ "shape": [
2205
+ 2048,
2206
+ 2048
2207
+ ],
2208
+ "dtype": "bfloat16",
2209
+ "format": "raw",
2210
+ "nbytes": 8388608,
2211
+ "byteOffset": 0
2212
+ },
2213
+ {
2214
+ "name": "model.layers.4.input_layernorm.bias",
2215
+ "shape": [
2216
+ 2048
2217
+ ],
2218
+ "dtype": "bfloat16",
2219
+ "format": "raw",
2220
+ "nbytes": 4096,
2221
+ "byteOffset": 8388608
2222
+ },
2223
+ {
2224
+ "name": "model.layers.4.input_layernorm.weight",
2225
+ "shape": [
2226
+ 2048
2227
+ ],
2228
+ "dtype": "bfloat16",
2229
+ "format": "raw",
2230
+ "nbytes": 4096,
2231
+ "byteOffset": 8392704
2232
+ },
2233
+ {
2234
+ "name": "model.layers.4.mlp.down_proj.weight",
2235
+ "shape": [
2236
+ 2048,
2237
+ 5632
2238
+ ],
2239
+ "dtype": "bfloat16",
2240
+ "format": "raw",
2241
+ "nbytes": 23068672,
2242
+ "byteOffset": 8396800
2243
+ },
2244
+ {
2245
+ "name": "model.layers.4.post_attention_layernorm.bias",
2246
+ "shape": [
2247
+ 2048
2248
+ ],
2249
+ "dtype": "bfloat16",
2250
+ "format": "raw",
2251
+ "nbytes": 4096,
2252
+ "byteOffset": 31465472
2253
+ },
2254
+ {
2255
+ "name": "model.layers.4.post_attention_layernorm.weight",
2256
+ "shape": [
2257
+ 2048
2258
+ ],
2259
+ "dtype": "bfloat16",
2260
+ "format": "raw",
2261
+ "nbytes": 4096,
2262
+ "byteOffset": 31469568
2263
+ },
2264
+ {
2265
+ "name": "model.layers.4.self_attn.qkv_proj.bias",
2266
+ "shape": [
2267
+ 6144
2268
+ ],
2269
+ "dtype": "bfloat16",
2270
+ "format": "raw",
2271
+ "nbytes": 12288,
2272
+ "byteOffset": 31473664
2273
+ }
2274
+ ],
2275
+ "md5sum": "3500a1c3f75d576966cc0ce12060a58d"
2276
+ },
2277
+ {
2278
+ "dataPath": "params_shard_59.bin",
2279
+ "format": "raw-shard",
2280
+ "nbytes": 46137344,
2281
+ "records": [
2282
+ {
2283
+ "name": "model.layers.5.mlp.gate_up_proj.weight",
2284
+ "shape": [
2285
+ 11264,
2286
+ 2048
2287
+ ],
2288
+ "dtype": "bfloat16",
2289
+ "format": "raw",
2290
+ "nbytes": 46137344,
2291
+ "byteOffset": 0
2292
+ }
2293
+ ],
2294
+ "md5sum": "86566a953401d3bc7dbdb52821737977"
2295
+ },
2296
+ {
2297
+ "dataPath": "params_shard_60.bin",
2298
+ "format": "raw-shard",
2299
+ "nbytes": 25165824,
2300
+ "records": [
2301
+ {
2302
+ "name": "model.layers.5.self_attn.qkv_proj.weight",
2303
+ "shape": [
2304
+ 6144,
2305
+ 2048
2306
+ ],
2307
+ "dtype": "bfloat16",
2308
+ "format": "raw",
2309
+ "nbytes": 25165824,
2310
+ "byteOffset": 0
2311
+ }
2312
+ ],
2313
+ "md5sum": "4a8e7de14545f19604080f3675bd4e25"
2314
+ },
2315
+ {
2316
+ "dataPath": "params_shard_61.bin",
2317
+ "format": "raw-shard",
2318
+ "nbytes": 31485952,
2319
+ "records": [
2320
+ {
2321
+ "name": "model.layers.4.self_attn.o_proj.weight",
2322
+ "shape": [
2323
+ 2048,
2324
+ 2048
2325
+ ],
2326
+ "dtype": "bfloat16",
2327
+ "format": "raw",
2328
+ "nbytes": 8388608,
2329
+ "byteOffset": 0
2330
+ },
2331
+ {
2332
+ "name": "model.layers.5.input_layernorm.bias",
2333
+ "shape": [
2334
+ 2048
2335
+ ],
2336
+ "dtype": "bfloat16",
2337
+ "format": "raw",
2338
+ "nbytes": 4096,
2339
+ "byteOffset": 8388608
2340
+ },
2341
+ {
2342
+ "name": "model.layers.5.input_layernorm.weight",
2343
+ "shape": [
2344
+ 2048
2345
+ ],
2346
+ "dtype": "bfloat16",
2347
+ "format": "raw",
2348
+ "nbytes": 4096,
2349
+ "byteOffset": 8392704
2350
+ },
2351
+ {
2352
+ "name": "model.layers.5.mlp.down_proj.weight",
2353
+ "shape": [
2354
+ 2048,
2355
+ 5632
2356
+ ],
2357
+ "dtype": "bfloat16",
2358
+ "format": "raw",
2359
+ "nbytes": 23068672,
2360
+ "byteOffset": 8396800
2361
+ },
2362
+ {
2363
+ "name": "model.layers.5.post_attention_layernorm.bias",
2364
+ "shape": [
2365
+ 2048
2366
+ ],
2367
+ "dtype": "bfloat16",
2368
+ "format": "raw",
2369
+ "nbytes": 4096,
2370
+ "byteOffset": 31465472
2371
+ },
2372
+ {
2373
+ "name": "model.layers.5.post_attention_layernorm.weight",
2374
+ "shape": [
2375
+ 2048
2376
+ ],
2377
+ "dtype": "bfloat16",
2378
+ "format": "raw",
2379
+ "nbytes": 4096,
2380
+ "byteOffset": 31469568
2381
+ },
2382
+ {
2383
+ "name": "model.layers.5.self_attn.qkv_proj.bias",
2384
+ "shape": [
2385
+ 6144
2386
+ ],
2387
+ "dtype": "bfloat16",
2388
+ "format": "raw",
2389
+ "nbytes": 12288,
2390
+ "byteOffset": 31473664
2391
+ }
2392
+ ],
2393
+ "md5sum": "a5e78848bfe56b5f399599ba13cff465"
2394
+ },
2395
+ {
2396
+ "dataPath": "params_shard_62.bin",
2397
+ "format": "raw-shard",
2398
+ "nbytes": 46137344,
2399
+ "records": [
2400
+ {
2401
+ "name": "model.layers.6.mlp.gate_up_proj.weight",
2402
+ "shape": [
2403
+ 11264,
2404
+ 2048
2405
+ ],
2406
+ "dtype": "bfloat16",
2407
+ "format": "raw",
2408
+ "nbytes": 46137344,
2409
+ "byteOffset": 0
2410
+ }
2411
+ ],
2412
+ "md5sum": "4fc2f473360bc7e4074f1eaf56407815"
2413
+ },
2414
+ {
2415
+ "dataPath": "params_shard_63.bin",
2416
+ "format": "raw-shard",
2417
+ "nbytes": 25165824,
2418
+ "records": [
2419
+ {
2420
+ "name": "model.layers.6.self_attn.qkv_proj.weight",
2421
+ "shape": [
2422
+ 6144,
2423
+ 2048
2424
+ ],
2425
+ "dtype": "bfloat16",
2426
+ "format": "raw",
2427
+ "nbytes": 25165824,
2428
+ "byteOffset": 0
2429
+ }
2430
+ ],
2431
+ "md5sum": "dd64a40d442250a3e3e8d5ab80969163"
2432
+ },
2433
+ {
2434
+ "dataPath": "params_shard_64.bin",
2435
+ "format": "raw-shard",
2436
+ "nbytes": 31485952,
2437
+ "records": [
2438
+ {
2439
+ "name": "model.layers.5.self_attn.o_proj.weight",
2440
+ "shape": [
2441
+ 2048,
2442
+ 2048
2443
+ ],
2444
+ "dtype": "bfloat16",
2445
+ "format": "raw",
2446
+ "nbytes": 8388608,
2447
+ "byteOffset": 0
2448
+ },
2449
+ {
2450
+ "name": "model.layers.6.input_layernorm.bias",
2451
+ "shape": [
2452
+ 2048
2453
+ ],
2454
+ "dtype": "bfloat16",
2455
+ "format": "raw",
2456
+ "nbytes": 4096,
2457
+ "byteOffset": 8388608
2458
+ },
2459
+ {
2460
+ "name": "model.layers.6.input_layernorm.weight",
2461
+ "shape": [
2462
+ 2048
2463
+ ],
2464
+ "dtype": "bfloat16",
2465
+ "format": "raw",
2466
+ "nbytes": 4096,
2467
+ "byteOffset": 8392704
2468
+ },
2469
+ {
2470
+ "name": "model.layers.6.mlp.down_proj.weight",
2471
+ "shape": [
2472
+ 2048,
2473
+ 5632
2474
+ ],
2475
+ "dtype": "bfloat16",
2476
+ "format": "raw",
2477
+ "nbytes": 23068672,
2478
+ "byteOffset": 8396800
2479
+ },
2480
+ {
2481
+ "name": "model.layers.6.post_attention_layernorm.bias",
2482
+ "shape": [
2483
+ 2048
2484
+ ],
2485
+ "dtype": "bfloat16",
2486
+ "format": "raw",
2487
+ "nbytes": 4096,
2488
+ "byteOffset": 31465472
2489
+ },
2490
+ {
2491
+ "name": "model.layers.6.post_attention_layernorm.weight",
2492
+ "shape": [
2493
+ 2048
2494
+ ],
2495
+ "dtype": "bfloat16",
2496
+ "format": "raw",
2497
+ "nbytes": 4096,
2498
+ "byteOffset": 31469568
2499
+ },
2500
+ {
2501
+ "name": "model.layers.6.self_attn.qkv_proj.bias",
2502
+ "shape": [
2503
+ 6144
2504
+ ],
2505
+ "dtype": "bfloat16",
2506
+ "format": "raw",
2507
+ "nbytes": 12288,
2508
+ "byteOffset": 31473664
2509
+ }
2510
+ ],
2511
+ "md5sum": "6664340a7fa63a488e56d3922dc4e6fc"
2512
+ },
2513
+ {
2514
+ "dataPath": "params_shard_65.bin",
2515
+ "format": "raw-shard",
2516
+ "nbytes": 46137344,
2517
+ "records": [
2518
+ {
2519
+ "name": "model.layers.7.mlp.gate_up_proj.weight",
2520
+ "shape": [
2521
+ 11264,
2522
+ 2048
2523
+ ],
2524
+ "dtype": "bfloat16",
2525
+ "format": "raw",
2526
+ "nbytes": 46137344,
2527
+ "byteOffset": 0
2528
+ }
2529
+ ],
2530
+ "md5sum": "0272f4828c8856671f67f0f8c801c9ea"
2531
+ },
2532
+ {
2533
+ "dataPath": "params_shard_66.bin",
2534
+ "format": "raw-shard",
2535
+ "nbytes": 25165824,
2536
+ "records": [
2537
+ {
2538
+ "name": "model.layers.7.self_attn.qkv_proj.weight",
2539
+ "shape": [
2540
+ 6144,
2541
+ 2048
2542
+ ],
2543
+ "dtype": "bfloat16",
2544
+ "format": "raw",
2545
+ "nbytes": 25165824,
2546
+ "byteOffset": 0
2547
+ }
2548
+ ],
2549
+ "md5sum": "19d4ca5052117c93fa60acc136cdf3c4"
2550
+ },
2551
+ {
2552
+ "dataPath": "params_shard_67.bin",
2553
+ "format": "raw-shard",
2554
+ "nbytes": 31485952,
2555
+ "records": [
2556
+ {
2557
+ "name": "model.layers.6.self_attn.o_proj.weight",
2558
+ "shape": [
2559
+ 2048,
2560
+ 2048
2561
+ ],
2562
+ "dtype": "bfloat16",
2563
+ "format": "raw",
2564
+ "nbytes": 8388608,
2565
+ "byteOffset": 0
2566
+ },
2567
+ {
2568
+ "name": "model.layers.7.input_layernorm.bias",
2569
+ "shape": [
2570
+ 2048
2571
+ ],
2572
+ "dtype": "bfloat16",
2573
+ "format": "raw",
2574
+ "nbytes": 4096,
2575
+ "byteOffset": 8388608
2576
+ },
2577
+ {
2578
+ "name": "model.layers.7.input_layernorm.weight",
2579
+ "shape": [
2580
+ 2048
2581
+ ],
2582
+ "dtype": "bfloat16",
2583
+ "format": "raw",
2584
+ "nbytes": 4096,
2585
+ "byteOffset": 8392704
2586
+ },
2587
+ {
2588
+ "name": "model.layers.7.mlp.down_proj.weight",
2589
+ "shape": [
2590
+ 2048,
2591
+ 5632
2592
+ ],
2593
+ "dtype": "bfloat16",
2594
+ "format": "raw",
2595
+ "nbytes": 23068672,
2596
+ "byteOffset": 8396800
2597
+ },
2598
+ {
2599
+ "name": "model.layers.7.post_attention_layernorm.bias",
2600
+ "shape": [
2601
+ 2048
2602
+ ],
2603
+ "dtype": "bfloat16",
2604
+ "format": "raw",
2605
+ "nbytes": 4096,
2606
+ "byteOffset": 31465472
2607
+ },
2608
+ {
2609
+ "name": "model.layers.7.post_attention_layernorm.weight",
2610
+ "shape": [
2611
+ 2048
2612
+ ],
2613
+ "dtype": "bfloat16",
2614
+ "format": "raw",
2615
+ "nbytes": 4096,
2616
+ "byteOffset": 31469568
2617
+ },
2618
+ {
2619
+ "name": "model.layers.7.self_attn.qkv_proj.bias",
2620
+ "shape": [
2621
+ 6144
2622
+ ],
2623
+ "dtype": "bfloat16",
2624
+ "format": "raw",
2625
+ "nbytes": 12288,
2626
+ "byteOffset": 31473664
2627
+ }
2628
+ ],
2629
+ "md5sum": "5f01cda40185eca631e4e4580e5c7d43"
2630
+ },
2631
+ {
2632
+ "dataPath": "params_shard_68.bin",
2633
+ "format": "raw-shard",
2634
+ "nbytes": 46137344,
2635
+ "records": [
2636
+ {
2637
+ "name": "model.layers.8.mlp.gate_up_proj.weight",
2638
+ "shape": [
2639
+ 11264,
2640
+ 2048
2641
+ ],
2642
+ "dtype": "bfloat16",
2643
+ "format": "raw",
2644
+ "nbytes": 46137344,
2645
+ "byteOffset": 0
2646
+ }
2647
+ ],
2648
+ "md5sum": "13c46a4e7b4595ea9dbe84fd3231411f"
2649
+ },
2650
+ {
2651
+ "dataPath": "params_shard_69.bin",
2652
+ "format": "raw-shard",
2653
+ "nbytes": 25165824,
2654
+ "records": [
2655
+ {
2656
+ "name": "model.layers.8.self_attn.qkv_proj.weight",
2657
+ "shape": [
2658
+ 6144,
2659
+ 2048
2660
+ ],
2661
+ "dtype": "bfloat16",
2662
+ "format": "raw",
2663
+ "nbytes": 25165824,
2664
+ "byteOffset": 0
2665
+ }
2666
+ ],
2667
+ "md5sum": "a0e98bc964fe5c3169ffed182e2c2f32"
2668
+ },
2669
+ {
2670
+ "dataPath": "params_shard_70.bin",
2671
+ "format": "raw-shard",
2672
+ "nbytes": 31485952,
2673
+ "records": [
2674
+ {
2675
+ "name": "model.layers.7.self_attn.o_proj.weight",
2676
+ "shape": [
2677
+ 2048,
2678
+ 2048
2679
+ ],
2680
+ "dtype": "bfloat16",
2681
+ "format": "raw",
2682
+ "nbytes": 8388608,
2683
+ "byteOffset": 0
2684
+ },
2685
+ {
2686
+ "name": "model.layers.8.input_layernorm.bias",
2687
+ "shape": [
2688
+ 2048
2689
+ ],
2690
+ "dtype": "bfloat16",
2691
+ "format": "raw",
2692
+ "nbytes": 4096,
2693
+ "byteOffset": 8388608
2694
+ },
2695
+ {
2696
+ "name": "model.layers.8.input_layernorm.weight",
2697
+ "shape": [
2698
+ 2048
2699
+ ],
2700
+ "dtype": "bfloat16",
2701
+ "format": "raw",
2702
+ "nbytes": 4096,
2703
+ "byteOffset": 8392704
2704
+ },
2705
+ {
2706
+ "name": "model.layers.8.mlp.down_proj.weight",
2707
+ "shape": [
2708
+ 2048,
2709
+ 5632
2710
+ ],
2711
+ "dtype": "bfloat16",
2712
+ "format": "raw",
2713
+ "nbytes": 23068672,
2714
+ "byteOffset": 8396800
2715
+ },
2716
+ {
2717
+ "name": "model.layers.8.post_attention_layernorm.bias",
2718
+ "shape": [
2719
+ 2048
2720
+ ],
2721
+ "dtype": "bfloat16",
2722
+ "format": "raw",
2723
+ "nbytes": 4096,
2724
+ "byteOffset": 31465472
2725
+ },
2726
+ {
2727
+ "name": "model.layers.8.post_attention_layernorm.weight",
2728
+ "shape": [
2729
+ 2048
2730
+ ],
2731
+ "dtype": "bfloat16",
2732
+ "format": "raw",
2733
+ "nbytes": 4096,
2734
+ "byteOffset": 31469568
2735
+ },
2736
+ {
2737
+ "name": "model.layers.8.self_attn.qkv_proj.bias",
2738
+ "shape": [
2739
+ 6144
2740
+ ],
2741
+ "dtype": "bfloat16",
2742
+ "format": "raw",
2743
+ "nbytes": 12288,
2744
+ "byteOffset": 31473664
2745
+ }
2746
+ ],
2747
+ "md5sum": "e6e2237c4be82896f7d3e51e80d3e532"
2748
+ },
2749
+ {
2750
+ "dataPath": "params_shard_71.bin",
2751
+ "format": "raw-shard",
2752
+ "nbytes": 46137344,
2753
+ "records": [
2754
+ {
2755
+ "name": "model.layers.9.mlp.gate_up_proj.weight",
2756
+ "shape": [
2757
+ 11264,
2758
+ 2048
2759
+ ],
2760
+ "dtype": "bfloat16",
2761
+ "format": "raw",
2762
+ "nbytes": 46137344,
2763
+ "byteOffset": 0
2764
+ }
2765
+ ],
2766
+ "md5sum": "bedc9b33733acfb4eb0e63c6ea0caf49"
2767
+ },
2768
+ {
2769
+ "dataPath": "params_shard_72.bin",
2770
+ "format": "raw-shard",
2771
+ "nbytes": 25165824,
2772
+ "records": [
2773
+ {
2774
+ "name": "model.layers.9.self_attn.qkv_proj.weight",
2775
+ "shape": [
2776
+ 6144,
2777
+ 2048
2778
+ ],
2779
+ "dtype": "bfloat16",
2780
+ "format": "raw",
2781
+ "nbytes": 25165824,
2782
+ "byteOffset": 0
2783
+ }
2784
+ ],
2785
+ "md5sum": "e2a0d2d0e1cc5aabd2c6d3e4d2aee4f7"
2786
+ },
2787
+ {
2788
+ "dataPath": "params_shard_73.bin",
2789
+ "format": "raw-shard",
2790
+ "nbytes": 31485952,
2791
+ "records": [
2792
+ {
2793
+ "name": "model.layers.8.self_attn.o_proj.weight",
2794
+ "shape": [
2795
+ 2048,
2796
+ 2048
2797
+ ],
2798
+ "dtype": "bfloat16",
2799
+ "format": "raw",
2800
+ "nbytes": 8388608,
2801
+ "byteOffset": 0
2802
+ },
2803
+ {
2804
+ "name": "model.layers.9.input_layernorm.bias",
2805
+ "shape": [
2806
+ 2048
2807
+ ],
2808
+ "dtype": "bfloat16",
2809
+ "format": "raw",
2810
+ "nbytes": 4096,
2811
+ "byteOffset": 8388608
2812
+ },
2813
+ {
2814
+ "name": "model.layers.9.input_layernorm.weight",
2815
+ "shape": [
2816
+ 2048
2817
+ ],
2818
+ "dtype": "bfloat16",
2819
+ "format": "raw",
2820
+ "nbytes": 4096,
2821
+ "byteOffset": 8392704
2822
+ },
2823
+ {
2824
+ "name": "model.layers.9.mlp.down_proj.weight",
2825
+ "shape": [
2826
+ 2048,
2827
+ 5632
2828
+ ],
2829
+ "dtype": "bfloat16",
2830
+ "format": "raw",
2831
+ "nbytes": 23068672,
2832
+ "byteOffset": 8396800
2833
+ },
2834
+ {
2835
+ "name": "model.layers.9.post_attention_layernorm.bias",
2836
+ "shape": [
2837
+ 2048
2838
+ ],
2839
+ "dtype": "bfloat16",
2840
+ "format": "raw",
2841
+ "nbytes": 4096,
2842
+ "byteOffset": 31465472
2843
+ },
2844
+ {
2845
+ "name": "model.layers.9.post_attention_layernorm.weight",
2846
+ "shape": [
2847
+ 2048
2848
+ ],
2849
+ "dtype": "bfloat16",
2850
+ "format": "raw",
2851
+ "nbytes": 4096,
2852
+ "byteOffset": 31469568
2853
+ },
2854
+ {
2855
+ "name": "model.layers.9.self_attn.qkv_proj.bias",
2856
+ "shape": [
2857
+ 6144
2858
+ ],
2859
+ "dtype": "bfloat16",
2860
+ "format": "raw",
2861
+ "nbytes": 12288,
2862
+ "byteOffset": 31473664
2863
+ }
2864
+ ],
2865
+ "md5sum": "67a9e7491bfc338ff5dd57b5edf571e6"
2866
+ },
2867
+ {
2868
+ "dataPath": "params_shard_74.bin",
2869
+ "format": "raw-shard",
2870
+ "nbytes": 8396800,
2871
+ "records": [
2872
+ {
2873
+ "name": "model.layers.9.self_attn.o_proj.weight",
2874
+ "shape": [
2875
+ 2048,
2876
+ 2048
2877
+ ],
2878
+ "dtype": "bfloat16",
2879
+ "format": "raw",
2880
+ "nbytes": 8388608,
2881
+ "byteOffset": 0
2882
+ },
2883
+ {
2884
+ "name": "model.norm.bias",
2885
+ "shape": [
2886
+ 2048
2887
+ ],
2888
+ "dtype": "bfloat16",
2889
+ "format": "raw",
2890
+ "nbytes": 4096,
2891
+ "byteOffset": 8388608
2892
+ },
2893
+ {
2894
+ "name": "model.norm.weight",
2895
+ "shape": [
2896
+ 2048
2897
+ ],
2898
+ "dtype": "bfloat16",
2899
+ "format": "raw",
2900
+ "nbytes": 4096,
2901
+ "byteOffset": 8392704
2902
+ }
2903
+ ],
2904
+ "md5sum": "85fa8f8180a156fa5d21e32c7413cce0"
2905
+ }
2906
+ ]
2907
+ }
ndarray-cache.json ADDED
The diff for this file is too large to render. See raw diff
 
params_shard_0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:452b41782b3bace6e61b1ce66c33380b6977a024a2def654653f985247ac3587
3
+ size 411041792
params_shard_1.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:464800b2e5443aad9cba9629e66c08a2fcf6e347b43c8fee5fc6b3e7f673dc36
3
+ size 411041792
params_shard_10.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4853baee90177cf17b9ff9c6572c3b963bcde600d96fea616262768be4149f3c
3
+ size 31485952
params_shard_11.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4203ea3b6b1ce4fc8d35bf571cc9ecc6fee4d924310fb9cbf2117eaefa751bdb
3
+ size 46137344
params_shard_12.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ca3e37c74245161771737bda3c66fb5498b13bb02e098074b7b9316fa3ef774
3
+ size 25165824
params_shard_13.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d4a886aff72df697f10f8b1bbe94136e7388cc725ed77e5d8673a6b2aa14455
3
+ size 31485952
params_shard_14.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:883166c6fbc5a76a7ad872627e2dd5239102d084a01af0789e3b18386695c8a7
3
+ size 46137344
params_shard_15.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7514f0aba9bafc6edeb44225a246413e5f75df4ff128c778d025457b9b17a0f6
3
+ size 25165824
params_shard_16.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2a390b4f6d2ab34611b2b176c397c12de7152a64e24e093f0e7bf868fb89630
3
+ size 31485952
params_shard_17.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f458d8589b351098f7b9c8d35f93c927d50f659804ef01f0239f68494cfe3641
3
+ size 46137344
params_shard_18.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84577ec419412d6291e7e548b0975d1f305ef7e48c4769ee421fa9fdd99ef7e5
3
+ size 25165824
params_shard_19.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60d4a8c18b582b21c522255cc04d8a1cb5bb33d3178c176165b25d9a85b00aeb
3
+ size 31485952
params_shard_2.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ded6bc9afdce13115535bfaf66341aca47d72467512e73f452ab6d0e1de67641
3
+ size 46137344
params_shard_20.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28b4f87e28cbe07159682170db8398c4c3c0c66f1348605d71e9ce6dbf930c74
3
+ size 46137344
params_shard_21.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e833c1c5a205eb1754924acedcb98460e495e5ee2d02df1ea0f290467e4e49f5
3
+ size 25165824
params_shard_22.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd71c025b1c6bb996fb541eea0350643dd7ba520ff19d36bb873162f4f58942e
3
+ size 31485952
params_shard_23.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f52c4e1fe68d39a0da235dca459c8f62fc2373702351e40e6a34ec1e80caf85
3
+ size 46137344
params_shard_24.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:529d426c2d98a00fca6ed1fcd97d8af149087a5e010b0086c09907fc2fb51326
3
+ size 25165824
params_shard_25.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68d5651b35446219986f87aa4dd1eeef98da8a9ad1b1a25fcca9d95c731cb2cc
3
+ size 31485952
params_shard_26.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13cdef2872d8ee6169590d8a5b449e61816bc00f1a5d5c772a63f5b12eaa9d9b
3
+ size 46137344
params_shard_27.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd8644d1bb1c9e1a794f1c992b55127f3030e08cbdc401270f1c88a8f7f00dd5
3
+ size 25165824
params_shard_28.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0029144ab7089adb3d8c949e9152b6a42f053db105f0ba196bc48445a12b90cf
3
+ size 31485952
params_shard_29.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e5c2e029575d21d5b4081d0eccd5dca26e396608c07733764341258492974b1
3
+ size 46137344
params_shard_3.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72bf6a1c0e4ec8af170e52d9b1fb25858e591be4ad8561ea74746156123f64f7
3
+ size 25165824
params_shard_30.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40c56afed5444ee49f5ec8130457bfdc1a9a05372d4baae1bff27a8f639c3d74
3
+ size 25165824
params_shard_31.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3cd0cd01e93c2a717972faaeac1707ffe25d31fd8b02e4c161bd09ec9ab6caa6
3
+ size 31485952
params_shard_32.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8400b0182818b9339c9b074634100e628c1802cbcb2d7ae5d70b5a9fee8d2f22
3
+ size 46137344
params_shard_33.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3bc49bce7c2ed8b4ec15f5583966ef2d192bef2784525f78b3d66006e3031de4
3
+ size 25165824
params_shard_34.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a95b90d5e615d16bf9658c88830972774c1dbcd1cbde5e4ae5d7d2bb9468439f
3
+ size 31485952
params_shard_35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3dac5ab3f63a3e55da3e63c6922f5edf4c03928f1a6e239f7797e0b4395c0d4a
3
+ size 46137344
params_shard_36.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:efcc8f385121ac76dfd409b3b50bb4ef5260cb7c14e6a87d3cd7fd2219a65030
3
+ size 25165824
params_shard_37.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ee3113e5f23ae92f318b85fd9337e89b615628199a6a5959f4216e032757cab
3
+ size 31485952
params_shard_38.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7aadcca6f7df0b4f74d02aa061c42a074b0605db22c34579eb0e7e46b280af9e
3
+ size 46137344
params_shard_39.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d43220295a4be33f2698937d3e5770d211e2490ab755662ebe30fb0b444a1a1a
3
+ size 25165824
params_shard_4.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2414f50c3ce09bc7866cb76045f2c2b9c4f1925c2b280e7a9727d2a47f5ab79d
3
+ size 23068672
params_shard_40.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca76cae6214a33781cb64427e399da065d8be096d71baf048717ea0d4c372a8c
3
+ size 31485952
params_shard_41.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2904bb7d3c9a1797ed16316e8cce21c601f6cf8ef01d96893c6986f93b217ce5
3
+ size 46137344
params_shard_42.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9280ba63e95731c1235f18304f4fb6f12c988c959bea2b2213e20699583e6aff
3
+ size 25165824
params_shard_43.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75d9eb2df629e4c95e4b77b31e4434d5103324d4edfca5dcc78307888399f9a5
3
+ size 31485952
params_shard_44.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01bb5c4280dcdd7ce8c610cdfa1403a4fef82514a9869736305084a5bc3b28eb
3
+ size 46137344
params_shard_45.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b422164a123450a2d0044eb27d09ee9091bd1e9a0a29a71df5399c15aebf9307
3
+ size 25165824
params_shard_46.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23c77953bc8418ef34526958ec9599aad66c31db50d36d5e0d3a3f077ebd5758
3
+ size 31485952
params_shard_47.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ea1ad50e90810605e7e09224f02b4d1a3adc6d01a27b344688ad2eea62ef513
3
+ size 46137344
params_shard_48.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85ccbdba8be346de0fca89acbea51cf09209db7915f96c0a570787593d756d75
3
+ size 25165824
params_shard_49.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:daea179baeda6ea8c8ed7571f6159177dd0ac3e8da127c823ec3f56801ef06f4
3
+ size 31485952