Rick Zhou commited on
Commit
a2d06cd
1 Parent(s): 83b3e60

upload weights

Browse files
mlc-chat-config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "gpt2",
3
+ "quantization": "q0f32",
4
+ "model_config": {
5
+ "vocab_size": 55028,
6
+ "n_embd": 768,
7
+ "n_layer": 12,
8
+ "n_head": 12,
9
+ "layer_norm_epsilon": 1e-05,
10
+ "n_inner": 3072,
11
+ "context_window_size": 1024,
12
+ "prefill_chunk_size": 1024,
13
+ "scale_attn_by_inverse_layer_idx": true
14
+ },
15
+ "vocab_size": 55028,
16
+ "context_window_size": 1024,
17
+ "sliding_window": -1,
18
+ "prefill_chunk_size": 1024,
19
+ "mean_gen_len": 128,
20
+ "max_gen_len": 512,
21
+ "shift_fill_factor": 0.3,
22
+ "temperature": 1.0,
23
+ "repetition_penalty": 1.0,
24
+ "top_p": 1.0,
25
+ "conv_template": "gpt2",
26
+ "pad_token_id": null,
27
+ "bos_token_id": 55025,
28
+ "eos_token_id": 55025,
29
+ "tokenizer_files": [],
30
+ "version": "0.1.0"
31
+ }
ndarray-cache.json ADDED
@@ -0,0 +1,1662 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "ParamSize": 149,
4
+ "ParamBytes": 681461760.0,
5
+ "BitsPerParam": 32.0
6
+ },
7
+ "records": [
8
+ {
9
+ "dataPath": "params_shard_0.bin",
10
+ "format": "raw-shard",
11
+ "nbytes": 28385280,
12
+ "records": [
13
+ {
14
+ "name": "transformer.h.0.ln_1.weight",
15
+ "shape": [
16
+ 768
17
+ ],
18
+ "dtype": "float32",
19
+ "format": "raw",
20
+ "nbytes": 3072,
21
+ "byteOffset": 0
22
+ },
23
+ {
24
+ "name": "transformer.h.1.ln_1.weight",
25
+ "shape": [
26
+ 768
27
+ ],
28
+ "dtype": "float32",
29
+ "format": "raw",
30
+ "nbytes": 3072,
31
+ "byteOffset": 3072
32
+ },
33
+ {
34
+ "name": "transformer.h.2.ln_1.weight",
35
+ "shape": [
36
+ 768
37
+ ],
38
+ "dtype": "float32",
39
+ "format": "raw",
40
+ "nbytes": 3072,
41
+ "byteOffset": 6144
42
+ },
43
+ {
44
+ "name": "transformer.h.3.ln_1.weight",
45
+ "shape": [
46
+ 768
47
+ ],
48
+ "dtype": "float32",
49
+ "format": "raw",
50
+ "nbytes": 3072,
51
+ "byteOffset": 9216
52
+ },
53
+ {
54
+ "name": "transformer.h.4.ln_1.weight",
55
+ "shape": [
56
+ 768
57
+ ],
58
+ "dtype": "float32",
59
+ "format": "raw",
60
+ "nbytes": 3072,
61
+ "byteOffset": 12288
62
+ },
63
+ {
64
+ "name": "transformer.h.5.ln_1.weight",
65
+ "shape": [
66
+ 768
67
+ ],
68
+ "dtype": "float32",
69
+ "format": "raw",
70
+ "nbytes": 3072,
71
+ "byteOffset": 15360
72
+ },
73
+ {
74
+ "name": "transformer.h.6.ln_1.weight",
75
+ "shape": [
76
+ 768
77
+ ],
78
+ "dtype": "float32",
79
+ "format": "raw",
80
+ "nbytes": 3072,
81
+ "byteOffset": 18432
82
+ },
83
+ {
84
+ "name": "transformer.h.7.ln_1.weight",
85
+ "shape": [
86
+ 768
87
+ ],
88
+ "dtype": "float32",
89
+ "format": "raw",
90
+ "nbytes": 3072,
91
+ "byteOffset": 21504
92
+ },
93
+ {
94
+ "name": "transformer.h.8.ln_1.weight",
95
+ "shape": [
96
+ 768
97
+ ],
98
+ "dtype": "float32",
99
+ "format": "raw",
100
+ "nbytes": 3072,
101
+ "byteOffset": 24576
102
+ },
103
+ {
104
+ "name": "transformer.h.9.ln_1.weight",
105
+ "shape": [
106
+ 768
107
+ ],
108
+ "dtype": "float32",
109
+ "format": "raw",
110
+ "nbytes": 3072,
111
+ "byteOffset": 27648
112
+ },
113
+ {
114
+ "name": "transformer.h.10.ln_1.weight",
115
+ "shape": [
116
+ 768
117
+ ],
118
+ "dtype": "float32",
119
+ "format": "raw",
120
+ "nbytes": 3072,
121
+ "byteOffset": 30720
122
+ },
123
+ {
124
+ "name": "transformer.h.11.ln_1.weight",
125
+ "shape": [
126
+ 768
127
+ ],
128
+ "dtype": "float32",
129
+ "format": "raw",
130
+ "nbytes": 3072,
131
+ "byteOffset": 33792
132
+ },
133
+ {
134
+ "name": "transformer.h.0.ln_1.bias",
135
+ "shape": [
136
+ 768
137
+ ],
138
+ "dtype": "float32",
139
+ "format": "raw",
140
+ "nbytes": 3072,
141
+ "byteOffset": 36864
142
+ },
143
+ {
144
+ "name": "transformer.h.1.ln_1.bias",
145
+ "shape": [
146
+ 768
147
+ ],
148
+ "dtype": "float32",
149
+ "format": "raw",
150
+ "nbytes": 3072,
151
+ "byteOffset": 39936
152
+ },
153
+ {
154
+ "name": "transformer.h.2.ln_1.bias",
155
+ "shape": [
156
+ 768
157
+ ],
158
+ "dtype": "float32",
159
+ "format": "raw",
160
+ "nbytes": 3072,
161
+ "byteOffset": 43008
162
+ },
163
+ {
164
+ "name": "transformer.h.3.ln_1.bias",
165
+ "shape": [
166
+ 768
167
+ ],
168
+ "dtype": "float32",
169
+ "format": "raw",
170
+ "nbytes": 3072,
171
+ "byteOffset": 46080
172
+ },
173
+ {
174
+ "name": "transformer.h.4.ln_1.bias",
175
+ "shape": [
176
+ 768
177
+ ],
178
+ "dtype": "float32",
179
+ "format": "raw",
180
+ "nbytes": 3072,
181
+ "byteOffset": 49152
182
+ },
183
+ {
184
+ "name": "transformer.h.5.ln_1.bias",
185
+ "shape": [
186
+ 768
187
+ ],
188
+ "dtype": "float32",
189
+ "format": "raw",
190
+ "nbytes": 3072,
191
+ "byteOffset": 52224
192
+ },
193
+ {
194
+ "name": "transformer.h.6.ln_1.bias",
195
+ "shape": [
196
+ 768
197
+ ],
198
+ "dtype": "float32",
199
+ "format": "raw",
200
+ "nbytes": 3072,
201
+ "byteOffset": 55296
202
+ },
203
+ {
204
+ "name": "transformer.h.7.ln_1.bias",
205
+ "shape": [
206
+ 768
207
+ ],
208
+ "dtype": "float32",
209
+ "format": "raw",
210
+ "nbytes": 3072,
211
+ "byteOffset": 58368
212
+ },
213
+ {
214
+ "name": "transformer.h.8.ln_1.bias",
215
+ "shape": [
216
+ 768
217
+ ],
218
+ "dtype": "float32",
219
+ "format": "raw",
220
+ "nbytes": 3072,
221
+ "byteOffset": 61440
222
+ },
223
+ {
224
+ "name": "transformer.h.9.ln_1.bias",
225
+ "shape": [
226
+ 768
227
+ ],
228
+ "dtype": "float32",
229
+ "format": "raw",
230
+ "nbytes": 3072,
231
+ "byteOffset": 64512
232
+ },
233
+ {
234
+ "name": "transformer.h.10.ln_1.bias",
235
+ "shape": [
236
+ 768
237
+ ],
238
+ "dtype": "float32",
239
+ "format": "raw",
240
+ "nbytes": 3072,
241
+ "byteOffset": 67584
242
+ },
243
+ {
244
+ "name": "transformer.h.11.ln_1.bias",
245
+ "shape": [
246
+ 768
247
+ ],
248
+ "dtype": "float32",
249
+ "format": "raw",
250
+ "nbytes": 3072,
251
+ "byteOffset": 70656
252
+ },
253
+ {
254
+ "name": "transformer.h.0.attn.c_attn.weight",
255
+ "shape": [
256
+ 2304,
257
+ 768
258
+ ],
259
+ "dtype": "float32",
260
+ "format": "raw",
261
+ "nbytes": 7077888,
262
+ "byteOffset": 73728
263
+ },
264
+ {
265
+ "name": "transformer.h.1.attn.c_attn.weight",
266
+ "shape": [
267
+ 2304,
268
+ 768
269
+ ],
270
+ "dtype": "float32",
271
+ "format": "raw",
272
+ "nbytes": 7077888,
273
+ "byteOffset": 7151616
274
+ },
275
+ {
276
+ "name": "transformer.h.2.attn.c_attn.weight",
277
+ "shape": [
278
+ 2304,
279
+ 768
280
+ ],
281
+ "dtype": "float32",
282
+ "format": "raw",
283
+ "nbytes": 7077888,
284
+ "byteOffset": 14229504
285
+ },
286
+ {
287
+ "name": "transformer.h.3.attn.c_attn.weight",
288
+ "shape": [
289
+ 2304,
290
+ 768
291
+ ],
292
+ "dtype": "float32",
293
+ "format": "raw",
294
+ "nbytes": 7077888,
295
+ "byteOffset": 21307392
296
+ }
297
+ ],
298
+ "md5sum": "23007ed9914f6fb9f04ccbd14426d358"
299
+ },
300
+ {
301
+ "dataPath": "params_shard_1.bin",
302
+ "format": "raw-shard",
303
+ "nbytes": 28311552,
304
+ "records": [
305
+ {
306
+ "name": "transformer.h.4.attn.c_attn.weight",
307
+ "shape": [
308
+ 2304,
309
+ 768
310
+ ],
311
+ "dtype": "float32",
312
+ "format": "raw",
313
+ "nbytes": 7077888,
314
+ "byteOffset": 0
315
+ },
316
+ {
317
+ "name": "transformer.h.5.attn.c_attn.weight",
318
+ "shape": [
319
+ 2304,
320
+ 768
321
+ ],
322
+ "dtype": "float32",
323
+ "format": "raw",
324
+ "nbytes": 7077888,
325
+ "byteOffset": 7077888
326
+ },
327
+ {
328
+ "name": "transformer.h.6.attn.c_attn.weight",
329
+ "shape": [
330
+ 2304,
331
+ 768
332
+ ],
333
+ "dtype": "float32",
334
+ "format": "raw",
335
+ "nbytes": 7077888,
336
+ "byteOffset": 14155776
337
+ },
338
+ {
339
+ "name": "transformer.h.7.attn.c_attn.weight",
340
+ "shape": [
341
+ 2304,
342
+ 768
343
+ ],
344
+ "dtype": "float32",
345
+ "format": "raw",
346
+ "nbytes": 7077888,
347
+ "byteOffset": 21233664
348
+ }
349
+ ],
350
+ "md5sum": "34800fa175402b89912639d507ac052b"
351
+ },
352
+ {
353
+ "dataPath": "params_shard_2.bin",
354
+ "format": "raw-shard",
355
+ "nbytes": 33140736,
356
+ "records": [
357
+ {
358
+ "name": "transformer.h.8.attn.c_attn.weight",
359
+ "shape": [
360
+ 2304,
361
+ 768
362
+ ],
363
+ "dtype": "float32",
364
+ "format": "raw",
365
+ "nbytes": 7077888,
366
+ "byteOffset": 0
367
+ },
368
+ {
369
+ "name": "transformer.h.9.attn.c_attn.weight",
370
+ "shape": [
371
+ 2304,
372
+ 768
373
+ ],
374
+ "dtype": "float32",
375
+ "format": "raw",
376
+ "nbytes": 7077888,
377
+ "byteOffset": 7077888
378
+ },
379
+ {
380
+ "name": "transformer.h.10.attn.c_attn.weight",
381
+ "shape": [
382
+ 2304,
383
+ 768
384
+ ],
385
+ "dtype": "float32",
386
+ "format": "raw",
387
+ "nbytes": 7077888,
388
+ "byteOffset": 14155776
389
+ },
390
+ {
391
+ "name": "transformer.h.11.attn.c_attn.weight",
392
+ "shape": [
393
+ 2304,
394
+ 768
395
+ ],
396
+ "dtype": "float32",
397
+ "format": "raw",
398
+ "nbytes": 7077888,
399
+ "byteOffset": 21233664
400
+ },
401
+ {
402
+ "name": "transformer.h.0.attn.c_attn.bias",
403
+ "shape": [
404
+ 2304
405
+ ],
406
+ "dtype": "float32",
407
+ "format": "raw",
408
+ "nbytes": 9216,
409
+ "byteOffset": 28311552
410
+ },
411
+ {
412
+ "name": "transformer.h.1.attn.c_attn.bias",
413
+ "shape": [
414
+ 2304
415
+ ],
416
+ "dtype": "float32",
417
+ "format": "raw",
418
+ "nbytes": 9216,
419
+ "byteOffset": 28320768
420
+ },
421
+ {
422
+ "name": "transformer.h.2.attn.c_attn.bias",
423
+ "shape": [
424
+ 2304
425
+ ],
426
+ "dtype": "float32",
427
+ "format": "raw",
428
+ "nbytes": 9216,
429
+ "byteOffset": 28329984
430
+ },
431
+ {
432
+ "name": "transformer.h.3.attn.c_attn.bias",
433
+ "shape": [
434
+ 2304
435
+ ],
436
+ "dtype": "float32",
437
+ "format": "raw",
438
+ "nbytes": 9216,
439
+ "byteOffset": 28339200
440
+ },
441
+ {
442
+ "name": "transformer.h.4.attn.c_attn.bias",
443
+ "shape": [
444
+ 2304
445
+ ],
446
+ "dtype": "float32",
447
+ "format": "raw",
448
+ "nbytes": 9216,
449
+ "byteOffset": 28348416
450
+ },
451
+ {
452
+ "name": "transformer.h.5.attn.c_attn.bias",
453
+ "shape": [
454
+ 2304
455
+ ],
456
+ "dtype": "float32",
457
+ "format": "raw",
458
+ "nbytes": 9216,
459
+ "byteOffset": 28357632
460
+ },
461
+ {
462
+ "name": "transformer.h.6.attn.c_attn.bias",
463
+ "shape": [
464
+ 2304
465
+ ],
466
+ "dtype": "float32",
467
+ "format": "raw",
468
+ "nbytes": 9216,
469
+ "byteOffset": 28366848
470
+ },
471
+ {
472
+ "name": "transformer.h.7.attn.c_attn.bias",
473
+ "shape": [
474
+ 2304
475
+ ],
476
+ "dtype": "float32",
477
+ "format": "raw",
478
+ "nbytes": 9216,
479
+ "byteOffset": 28376064
480
+ },
481
+ {
482
+ "name": "transformer.h.8.attn.c_attn.bias",
483
+ "shape": [
484
+ 2304
485
+ ],
486
+ "dtype": "float32",
487
+ "format": "raw",
488
+ "nbytes": 9216,
489
+ "byteOffset": 28385280
490
+ },
491
+ {
492
+ "name": "transformer.h.9.attn.c_attn.bias",
493
+ "shape": [
494
+ 2304
495
+ ],
496
+ "dtype": "float32",
497
+ "format": "raw",
498
+ "nbytes": 9216,
499
+ "byteOffset": 28394496
500
+ },
501
+ {
502
+ "name": "transformer.h.10.attn.c_attn.bias",
503
+ "shape": [
504
+ 2304
505
+ ],
506
+ "dtype": "float32",
507
+ "format": "raw",
508
+ "nbytes": 9216,
509
+ "byteOffset": 28403712
510
+ },
511
+ {
512
+ "name": "transformer.h.11.attn.c_attn.bias",
513
+ "shape": [
514
+ 2304
515
+ ],
516
+ "dtype": "float32",
517
+ "format": "raw",
518
+ "nbytes": 9216,
519
+ "byteOffset": 28412928
520
+ },
521
+ {
522
+ "name": "transformer.h.0.attn.c_proj.weight",
523
+ "shape": [
524
+ 768,
525
+ 768
526
+ ],
527
+ "dtype": "float32",
528
+ "format": "raw",
529
+ "nbytes": 2359296,
530
+ "byteOffset": 28422144
531
+ },
532
+ {
533
+ "name": "transformer.h.1.attn.c_proj.weight",
534
+ "shape": [
535
+ 768,
536
+ 768
537
+ ],
538
+ "dtype": "float32",
539
+ "format": "raw",
540
+ "nbytes": 2359296,
541
+ "byteOffset": 30781440
542
+ }
543
+ ],
544
+ "md5sum": "3bc3ef27da003e2e9a77760500ddaf55"
545
+ },
546
+ {
547
+ "dataPath": "params_shard_3.bin",
548
+ "format": "raw-shard",
549
+ "nbytes": 33140736,
550
+ "records": [
551
+ {
552
+ "name": "transformer.h.2.attn.c_proj.weight",
553
+ "shape": [
554
+ 768,
555
+ 768
556
+ ],
557
+ "dtype": "float32",
558
+ "format": "raw",
559
+ "nbytes": 2359296,
560
+ "byteOffset": 0
561
+ },
562
+ {
563
+ "name": "transformer.h.3.attn.c_proj.weight",
564
+ "shape": [
565
+ 768,
566
+ 768
567
+ ],
568
+ "dtype": "float32",
569
+ "format": "raw",
570
+ "nbytes": 2359296,
571
+ "byteOffset": 2359296
572
+ },
573
+ {
574
+ "name": "transformer.h.4.attn.c_proj.weight",
575
+ "shape": [
576
+ 768,
577
+ 768
578
+ ],
579
+ "dtype": "float32",
580
+ "format": "raw",
581
+ "nbytes": 2359296,
582
+ "byteOffset": 4718592
583
+ },
584
+ {
585
+ "name": "transformer.h.5.attn.c_proj.weight",
586
+ "shape": [
587
+ 768,
588
+ 768
589
+ ],
590
+ "dtype": "float32",
591
+ "format": "raw",
592
+ "nbytes": 2359296,
593
+ "byteOffset": 7077888
594
+ },
595
+ {
596
+ "name": "transformer.h.6.attn.c_proj.weight",
597
+ "shape": [
598
+ 768,
599
+ 768
600
+ ],
601
+ "dtype": "float32",
602
+ "format": "raw",
603
+ "nbytes": 2359296,
604
+ "byteOffset": 9437184
605
+ },
606
+ {
607
+ "name": "transformer.h.7.attn.c_proj.weight",
608
+ "shape": [
609
+ 768,
610
+ 768
611
+ ],
612
+ "dtype": "float32",
613
+ "format": "raw",
614
+ "nbytes": 2359296,
615
+ "byteOffset": 11796480
616
+ },
617
+ {
618
+ "name": "transformer.h.8.attn.c_proj.weight",
619
+ "shape": [
620
+ 768,
621
+ 768
622
+ ],
623
+ "dtype": "float32",
624
+ "format": "raw",
625
+ "nbytes": 2359296,
626
+ "byteOffset": 14155776
627
+ },
628
+ {
629
+ "name": "transformer.h.9.attn.c_proj.weight",
630
+ "shape": [
631
+ 768,
632
+ 768
633
+ ],
634
+ "dtype": "float32",
635
+ "format": "raw",
636
+ "nbytes": 2359296,
637
+ "byteOffset": 16515072
638
+ },
639
+ {
640
+ "name": "transformer.h.10.attn.c_proj.weight",
641
+ "shape": [
642
+ 768,
643
+ 768
644
+ ],
645
+ "dtype": "float32",
646
+ "format": "raw",
647
+ "nbytes": 2359296,
648
+ "byteOffset": 18874368
649
+ },
650
+ {
651
+ "name": "transformer.h.11.attn.c_proj.weight",
652
+ "shape": [
653
+ 768,
654
+ 768
655
+ ],
656
+ "dtype": "float32",
657
+ "format": "raw",
658
+ "nbytes": 2359296,
659
+ "byteOffset": 21233664
660
+ },
661
+ {
662
+ "name": "transformer.h.0.attn.c_proj.bias",
663
+ "shape": [
664
+ 768
665
+ ],
666
+ "dtype": "float32",
667
+ "format": "raw",
668
+ "nbytes": 3072,
669
+ "byteOffset": 23592960
670
+ },
671
+ {
672
+ "name": "transformer.h.1.attn.c_proj.bias",
673
+ "shape": [
674
+ 768
675
+ ],
676
+ "dtype": "float32",
677
+ "format": "raw",
678
+ "nbytes": 3072,
679
+ "byteOffset": 23596032
680
+ },
681
+ {
682
+ "name": "transformer.h.2.attn.c_proj.bias",
683
+ "shape": [
684
+ 768
685
+ ],
686
+ "dtype": "float32",
687
+ "format": "raw",
688
+ "nbytes": 3072,
689
+ "byteOffset": 23599104
690
+ },
691
+ {
692
+ "name": "transformer.h.3.attn.c_proj.bias",
693
+ "shape": [
694
+ 768
695
+ ],
696
+ "dtype": "float32",
697
+ "format": "raw",
698
+ "nbytes": 3072,
699
+ "byteOffset": 23602176
700
+ },
701
+ {
702
+ "name": "transformer.h.4.attn.c_proj.bias",
703
+ "shape": [
704
+ 768
705
+ ],
706
+ "dtype": "float32",
707
+ "format": "raw",
708
+ "nbytes": 3072,
709
+ "byteOffset": 23605248
710
+ },
711
+ {
712
+ "name": "transformer.h.5.attn.c_proj.bias",
713
+ "shape": [
714
+ 768
715
+ ],
716
+ "dtype": "float32",
717
+ "format": "raw",
718
+ "nbytes": 3072,
719
+ "byteOffset": 23608320
720
+ },
721
+ {
722
+ "name": "transformer.h.6.attn.c_proj.bias",
723
+ "shape": [
724
+ 768
725
+ ],
726
+ "dtype": "float32",
727
+ "format": "raw",
728
+ "nbytes": 3072,
729
+ "byteOffset": 23611392
730
+ },
731
+ {
732
+ "name": "transformer.h.7.attn.c_proj.bias",
733
+ "shape": [
734
+ 768
735
+ ],
736
+ "dtype": "float32",
737
+ "format": "raw",
738
+ "nbytes": 3072,
739
+ "byteOffset": 23614464
740
+ },
741
+ {
742
+ "name": "transformer.h.8.attn.c_proj.bias",
743
+ "shape": [
744
+ 768
745
+ ],
746
+ "dtype": "float32",
747
+ "format": "raw",
748
+ "nbytes": 3072,
749
+ "byteOffset": 23617536
750
+ },
751
+ {
752
+ "name": "transformer.h.9.attn.c_proj.bias",
753
+ "shape": [
754
+ 768
755
+ ],
756
+ "dtype": "float32",
757
+ "format": "raw",
758
+ "nbytes": 3072,
759
+ "byteOffset": 23620608
760
+ },
761
+ {
762
+ "name": "transformer.h.10.attn.c_proj.bias",
763
+ "shape": [
764
+ 768
765
+ ],
766
+ "dtype": "float32",
767
+ "format": "raw",
768
+ "nbytes": 3072,
769
+ "byteOffset": 23623680
770
+ },
771
+ {
772
+ "name": "transformer.h.11.attn.c_proj.bias",
773
+ "shape": [
774
+ 768
775
+ ],
776
+ "dtype": "float32",
777
+ "format": "raw",
778
+ "nbytes": 3072,
779
+ "byteOffset": 23626752
780
+ },
781
+ {
782
+ "name": "transformer.h.0.ln_2.weight",
783
+ "shape": [
784
+ 768
785
+ ],
786
+ "dtype": "float32",
787
+ "format": "raw",
788
+ "nbytes": 3072,
789
+ "byteOffset": 23629824
790
+ },
791
+ {
792
+ "name": "transformer.h.1.ln_2.weight",
793
+ "shape": [
794
+ 768
795
+ ],
796
+ "dtype": "float32",
797
+ "format": "raw",
798
+ "nbytes": 3072,
799
+ "byteOffset": 23632896
800
+ },
801
+ {
802
+ "name": "transformer.h.2.ln_2.weight",
803
+ "shape": [
804
+ 768
805
+ ],
806
+ "dtype": "float32",
807
+ "format": "raw",
808
+ "nbytes": 3072,
809
+ "byteOffset": 23635968
810
+ },
811
+ {
812
+ "name": "transformer.h.3.ln_2.weight",
813
+ "shape": [
814
+ 768
815
+ ],
816
+ "dtype": "float32",
817
+ "format": "raw",
818
+ "nbytes": 3072,
819
+ "byteOffset": 23639040
820
+ },
821
+ {
822
+ "name": "transformer.h.4.ln_2.weight",
823
+ "shape": [
824
+ 768
825
+ ],
826
+ "dtype": "float32",
827
+ "format": "raw",
828
+ "nbytes": 3072,
829
+ "byteOffset": 23642112
830
+ },
831
+ {
832
+ "name": "transformer.h.5.ln_2.weight",
833
+ "shape": [
834
+ 768
835
+ ],
836
+ "dtype": "float32",
837
+ "format": "raw",
838
+ "nbytes": 3072,
839
+ "byteOffset": 23645184
840
+ },
841
+ {
842
+ "name": "transformer.h.6.ln_2.weight",
843
+ "shape": [
844
+ 768
845
+ ],
846
+ "dtype": "float32",
847
+ "format": "raw",
848
+ "nbytes": 3072,
849
+ "byteOffset": 23648256
850
+ },
851
+ {
852
+ "name": "transformer.h.7.ln_2.weight",
853
+ "shape": [
854
+ 768
855
+ ],
856
+ "dtype": "float32",
857
+ "format": "raw",
858
+ "nbytes": 3072,
859
+ "byteOffset": 23651328
860
+ },
861
+ {
862
+ "name": "transformer.h.8.ln_2.weight",
863
+ "shape": [
864
+ 768
865
+ ],
866
+ "dtype": "float32",
867
+ "format": "raw",
868
+ "nbytes": 3072,
869
+ "byteOffset": 23654400
870
+ },
871
+ {
872
+ "name": "transformer.h.9.ln_2.weight",
873
+ "shape": [
874
+ 768
875
+ ],
876
+ "dtype": "float32",
877
+ "format": "raw",
878
+ "nbytes": 3072,
879
+ "byteOffset": 23657472
880
+ },
881
+ {
882
+ "name": "transformer.h.10.ln_2.weight",
883
+ "shape": [
884
+ 768
885
+ ],
886
+ "dtype": "float32",
887
+ "format": "raw",
888
+ "nbytes": 3072,
889
+ "byteOffset": 23660544
890
+ },
891
+ {
892
+ "name": "transformer.h.11.ln_2.weight",
893
+ "shape": [
894
+ 768
895
+ ],
896
+ "dtype": "float32",
897
+ "format": "raw",
898
+ "nbytes": 3072,
899
+ "byteOffset": 23663616
900
+ },
901
+ {
902
+ "name": "transformer.h.0.ln_2.bias",
903
+ "shape": [
904
+ 768
905
+ ],
906
+ "dtype": "float32",
907
+ "format": "raw",
908
+ "nbytes": 3072,
909
+ "byteOffset": 23666688
910
+ },
911
+ {
912
+ "name": "transformer.h.1.ln_2.bias",
913
+ "shape": [
914
+ 768
915
+ ],
916
+ "dtype": "float32",
917
+ "format": "raw",
918
+ "nbytes": 3072,
919
+ "byteOffset": 23669760
920
+ },
921
+ {
922
+ "name": "transformer.h.2.ln_2.bias",
923
+ "shape": [
924
+ 768
925
+ ],
926
+ "dtype": "float32",
927
+ "format": "raw",
928
+ "nbytes": 3072,
929
+ "byteOffset": 23672832
930
+ },
931
+ {
932
+ "name": "transformer.h.3.ln_2.bias",
933
+ "shape": [
934
+ 768
935
+ ],
936
+ "dtype": "float32",
937
+ "format": "raw",
938
+ "nbytes": 3072,
939
+ "byteOffset": 23675904
940
+ },
941
+ {
942
+ "name": "transformer.h.4.ln_2.bias",
943
+ "shape": [
944
+ 768
945
+ ],
946
+ "dtype": "float32",
947
+ "format": "raw",
948
+ "nbytes": 3072,
949
+ "byteOffset": 23678976
950
+ },
951
+ {
952
+ "name": "transformer.h.5.ln_2.bias",
953
+ "shape": [
954
+ 768
955
+ ],
956
+ "dtype": "float32",
957
+ "format": "raw",
958
+ "nbytes": 3072,
959
+ "byteOffset": 23682048
960
+ },
961
+ {
962
+ "name": "transformer.h.6.ln_2.bias",
963
+ "shape": [
964
+ 768
965
+ ],
966
+ "dtype": "float32",
967
+ "format": "raw",
968
+ "nbytes": 3072,
969
+ "byteOffset": 23685120
970
+ },
971
+ {
972
+ "name": "transformer.h.7.ln_2.bias",
973
+ "shape": [
974
+ 768
975
+ ],
976
+ "dtype": "float32",
977
+ "format": "raw",
978
+ "nbytes": 3072,
979
+ "byteOffset": 23688192
980
+ },
981
+ {
982
+ "name": "transformer.h.8.ln_2.bias",
983
+ "shape": [
984
+ 768
985
+ ],
986
+ "dtype": "float32",
987
+ "format": "raw",
988
+ "nbytes": 3072,
989
+ "byteOffset": 23691264
990
+ },
991
+ {
992
+ "name": "transformer.h.9.ln_2.bias",
993
+ "shape": [
994
+ 768
995
+ ],
996
+ "dtype": "float32",
997
+ "format": "raw",
998
+ "nbytes": 3072,
999
+ "byteOffset": 23694336
1000
+ },
1001
+ {
1002
+ "name": "transformer.h.10.ln_2.bias",
1003
+ "shape": [
1004
+ 768
1005
+ ],
1006
+ "dtype": "float32",
1007
+ "format": "raw",
1008
+ "nbytes": 3072,
1009
+ "byteOffset": 23697408
1010
+ },
1011
+ {
1012
+ "name": "transformer.h.11.ln_2.bias",
1013
+ "shape": [
1014
+ 768
1015
+ ],
1016
+ "dtype": "float32",
1017
+ "format": "raw",
1018
+ "nbytes": 3072,
1019
+ "byteOffset": 23700480
1020
+ },
1021
+ {
1022
+ "name": "transformer.h.0.mlp.c_fc.weight",
1023
+ "shape": [
1024
+ 3072,
1025
+ 768
1026
+ ],
1027
+ "dtype": "float32",
1028
+ "format": "raw",
1029
+ "nbytes": 9437184,
1030
+ "byteOffset": 23703552
1031
+ }
1032
+ ],
1033
+ "md5sum": "31854bcfd1e8696dece65b47518b87b3"
1034
+ },
1035
+ {
1036
+ "dataPath": "params_shard_4.bin",
1037
+ "format": "raw-shard",
1038
+ "nbytes": 28311552,
1039
+ "records": [
1040
+ {
1041
+ "name": "transformer.h.1.mlp.c_fc.weight",
1042
+ "shape": [
1043
+ 3072,
1044
+ 768
1045
+ ],
1046
+ "dtype": "float32",
1047
+ "format": "raw",
1048
+ "nbytes": 9437184,
1049
+ "byteOffset": 0
1050
+ },
1051
+ {
1052
+ "name": "transformer.h.2.mlp.c_fc.weight",
1053
+ "shape": [
1054
+ 3072,
1055
+ 768
1056
+ ],
1057
+ "dtype": "float32",
1058
+ "format": "raw",
1059
+ "nbytes": 9437184,
1060
+ "byteOffset": 9437184
1061
+ },
1062
+ {
1063
+ "name": "transformer.h.3.mlp.c_fc.weight",
1064
+ "shape": [
1065
+ 3072,
1066
+ 768
1067
+ ],
1068
+ "dtype": "float32",
1069
+ "format": "raw",
1070
+ "nbytes": 9437184,
1071
+ "byteOffset": 18874368
1072
+ }
1073
+ ],
1074
+ "md5sum": "967b8189775a14280c96c82a222a2ff0"
1075
+ },
1076
+ {
1077
+ "dataPath": "params_shard_5.bin",
1078
+ "format": "raw-shard",
1079
+ "nbytes": 28311552,
1080
+ "records": [
1081
+ {
1082
+ "name": "transformer.h.4.mlp.c_fc.weight",
1083
+ "shape": [
1084
+ 3072,
1085
+ 768
1086
+ ],
1087
+ "dtype": "float32",
1088
+ "format": "raw",
1089
+ "nbytes": 9437184,
1090
+ "byteOffset": 0
1091
+ },
1092
+ {
1093
+ "name": "transformer.h.5.mlp.c_fc.weight",
1094
+ "shape": [
1095
+ 3072,
1096
+ 768
1097
+ ],
1098
+ "dtype": "float32",
1099
+ "format": "raw",
1100
+ "nbytes": 9437184,
1101
+ "byteOffset": 9437184
1102
+ },
1103
+ {
1104
+ "name": "transformer.h.6.mlp.c_fc.weight",
1105
+ "shape": [
1106
+ 3072,
1107
+ 768
1108
+ ],
1109
+ "dtype": "float32",
1110
+ "format": "raw",
1111
+ "nbytes": 9437184,
1112
+ "byteOffset": 18874368
1113
+ }
1114
+ ],
1115
+ "md5sum": "f843773afbb5f2cec9ac7baa18fb2d1f"
1116
+ },
1117
+ {
1118
+ "dataPath": "params_shard_6.bin",
1119
+ "format": "raw-shard",
1120
+ "nbytes": 28311552,
1121
+ "records": [
1122
+ {
1123
+ "name": "transformer.h.7.mlp.c_fc.weight",
1124
+ "shape": [
1125
+ 3072,
1126
+ 768
1127
+ ],
1128
+ "dtype": "float32",
1129
+ "format": "raw",
1130
+ "nbytes": 9437184,
1131
+ "byteOffset": 0
1132
+ },
1133
+ {
1134
+ "name": "transformer.h.8.mlp.c_fc.weight",
1135
+ "shape": [
1136
+ 3072,
1137
+ 768
1138
+ ],
1139
+ "dtype": "float32",
1140
+ "format": "raw",
1141
+ "nbytes": 9437184,
1142
+ "byteOffset": 9437184
1143
+ },
1144
+ {
1145
+ "name": "transformer.h.9.mlp.c_fc.weight",
1146
+ "shape": [
1147
+ 3072,
1148
+ 768
1149
+ ],
1150
+ "dtype": "float32",
1151
+ "format": "raw",
1152
+ "nbytes": 9437184,
1153
+ "byteOffset": 18874368
1154
+ }
1155
+ ],
1156
+ "md5sum": "e6dbc2df730448a99ee03e2b2b7db7ac"
1157
+ },
1158
+ {
1159
+ "dataPath": "params_shard_7.bin",
1160
+ "format": "raw-shard",
1161
+ "nbytes": 28459008,
1162
+ "records": [
1163
+ {
1164
+ "name": "transformer.h.10.mlp.c_fc.weight",
1165
+ "shape": [
1166
+ 3072,
1167
+ 768
1168
+ ],
1169
+ "dtype": "float32",
1170
+ "format": "raw",
1171
+ "nbytes": 9437184,
1172
+ "byteOffset": 0
1173
+ },
1174
+ {
1175
+ "name": "transformer.h.11.mlp.c_fc.weight",
1176
+ "shape": [
1177
+ 3072,
1178
+ 768
1179
+ ],
1180
+ "dtype": "float32",
1181
+ "format": "raw",
1182
+ "nbytes": 9437184,
1183
+ "byteOffset": 9437184
1184
+ },
1185
+ {
1186
+ "name": "transformer.h.0.mlp.c_fc.bias",
1187
+ "shape": [
1188
+ 3072
1189
+ ],
1190
+ "dtype": "float32",
1191
+ "format": "raw",
1192
+ "nbytes": 12288,
1193
+ "byteOffset": 18874368
1194
+ },
1195
+ {
1196
+ "name": "transformer.h.1.mlp.c_fc.bias",
1197
+ "shape": [
1198
+ 3072
1199
+ ],
1200
+ "dtype": "float32",
1201
+ "format": "raw",
1202
+ "nbytes": 12288,
1203
+ "byteOffset": 18886656
1204
+ },
1205
+ {
1206
+ "name": "transformer.h.2.mlp.c_fc.bias",
1207
+ "shape": [
1208
+ 3072
1209
+ ],
1210
+ "dtype": "float32",
1211
+ "format": "raw",
1212
+ "nbytes": 12288,
1213
+ "byteOffset": 18898944
1214
+ },
1215
+ {
1216
+ "name": "transformer.h.3.mlp.c_fc.bias",
1217
+ "shape": [
1218
+ 3072
1219
+ ],
1220
+ "dtype": "float32",
1221
+ "format": "raw",
1222
+ "nbytes": 12288,
1223
+ "byteOffset": 18911232
1224
+ },
1225
+ {
1226
+ "name": "transformer.h.4.mlp.c_fc.bias",
1227
+ "shape": [
1228
+ 3072
1229
+ ],
1230
+ "dtype": "float32",
1231
+ "format": "raw",
1232
+ "nbytes": 12288,
1233
+ "byteOffset": 18923520
1234
+ },
1235
+ {
1236
+ "name": "transformer.h.5.mlp.c_fc.bias",
1237
+ "shape": [
1238
+ 3072
1239
+ ],
1240
+ "dtype": "float32",
1241
+ "format": "raw",
1242
+ "nbytes": 12288,
1243
+ "byteOffset": 18935808
1244
+ },
1245
+ {
1246
+ "name": "transformer.h.6.mlp.c_fc.bias",
1247
+ "shape": [
1248
+ 3072
1249
+ ],
1250
+ "dtype": "float32",
1251
+ "format": "raw",
1252
+ "nbytes": 12288,
1253
+ "byteOffset": 18948096
1254
+ },
1255
+ {
1256
+ "name": "transformer.h.7.mlp.c_fc.bias",
1257
+ "shape": [
1258
+ 3072
1259
+ ],
1260
+ "dtype": "float32",
1261
+ "format": "raw",
1262
+ "nbytes": 12288,
1263
+ "byteOffset": 18960384
1264
+ },
1265
+ {
1266
+ "name": "transformer.h.8.mlp.c_fc.bias",
1267
+ "shape": [
1268
+ 3072
1269
+ ],
1270
+ "dtype": "float32",
1271
+ "format": "raw",
1272
+ "nbytes": 12288,
1273
+ "byteOffset": 18972672
1274
+ },
1275
+ {
1276
+ "name": "transformer.h.9.mlp.c_fc.bias",
1277
+ "shape": [
1278
+ 3072
1279
+ ],
1280
+ "dtype": "float32",
1281
+ "format": "raw",
1282
+ "nbytes": 12288,
1283
+ "byteOffset": 18984960
1284
+ },
1285
+ {
1286
+ "name": "transformer.h.10.mlp.c_fc.bias",
1287
+ "shape": [
1288
+ 3072
1289
+ ],
1290
+ "dtype": "float32",
1291
+ "format": "raw",
1292
+ "nbytes": 12288,
1293
+ "byteOffset": 18997248
1294
+ },
1295
+ {
1296
+ "name": "transformer.h.11.mlp.c_fc.bias",
1297
+ "shape": [
1298
+ 3072
1299
+ ],
1300
+ "dtype": "float32",
1301
+ "format": "raw",
1302
+ "nbytes": 12288,
1303
+ "byteOffset": 19009536
1304
+ },
1305
+ {
1306
+ "name": "transformer.h.0.mlp.c_proj.weight",
1307
+ "shape": [
1308
+ 768,
1309
+ 3072
1310
+ ],
1311
+ "dtype": "float32",
1312
+ "format": "raw",
1313
+ "nbytes": 9437184,
1314
+ "byteOffset": 19021824
1315
+ }
1316
+ ],
1317
+ "md5sum": "7d5d8531365b099e637a8fa0019af00b"
1318
+ },
1319
+ {
1320
+ "dataPath": "params_shard_8.bin",
1321
+ "format": "raw-shard",
1322
+ "nbytes": 28311552,
1323
+ "records": [
1324
+ {
1325
+ "name": "transformer.h.1.mlp.c_proj.weight",
1326
+ "shape": [
1327
+ 768,
1328
+ 3072
1329
+ ],
1330
+ "dtype": "float32",
1331
+ "format": "raw",
1332
+ "nbytes": 9437184,
1333
+ "byteOffset": 0
1334
+ },
1335
+ {
1336
+ "name": "transformer.h.2.mlp.c_proj.weight",
1337
+ "shape": [
1338
+ 768,
1339
+ 3072
1340
+ ],
1341
+ "dtype": "float32",
1342
+ "format": "raw",
1343
+ "nbytes": 9437184,
1344
+ "byteOffset": 9437184
1345
+ },
1346
+ {
1347
+ "name": "transformer.h.3.mlp.c_proj.weight",
1348
+ "shape": [
1349
+ 768,
1350
+ 3072
1351
+ ],
1352
+ "dtype": "float32",
1353
+ "format": "raw",
1354
+ "nbytes": 9437184,
1355
+ "byteOffset": 18874368
1356
+ }
1357
+ ],
1358
+ "md5sum": "605b25914c5d1814b6a87cd7ceb5bf4e"
1359
+ },
1360
+ {
1361
+ "dataPath": "params_shard_9.bin",
1362
+ "format": "raw-shard",
1363
+ "nbytes": 28311552,
1364
+ "records": [
1365
+ {
1366
+ "name": "transformer.h.4.mlp.c_proj.weight",
1367
+ "shape": [
1368
+ 768,
1369
+ 3072
1370
+ ],
1371
+ "dtype": "float32",
1372
+ "format": "raw",
1373
+ "nbytes": 9437184,
1374
+ "byteOffset": 0
1375
+ },
1376
+ {
1377
+ "name": "transformer.h.5.mlp.c_proj.weight",
1378
+ "shape": [
1379
+ 768,
1380
+ 3072
1381
+ ],
1382
+ "dtype": "float32",
1383
+ "format": "raw",
1384
+ "nbytes": 9437184,
1385
+ "byteOffset": 9437184
1386
+ },
1387
+ {
1388
+ "name": "transformer.h.6.mlp.c_proj.weight",
1389
+ "shape": [
1390
+ 768,
1391
+ 3072
1392
+ ],
1393
+ "dtype": "float32",
1394
+ "format": "raw",
1395
+ "nbytes": 9437184,
1396
+ "byteOffset": 18874368
1397
+ }
1398
+ ],
1399
+ "md5sum": "ffa9ce5855d4cbe6455facb4399663f0"
1400
+ },
1401
+ {
1402
+ "dataPath": "params_shard_10.bin",
1403
+ "format": "raw-shard",
1404
+ "nbytes": 28311552,
1405
+ "records": [
1406
+ {
1407
+ "name": "transformer.h.7.mlp.c_proj.weight",
1408
+ "shape": [
1409
+ 768,
1410
+ 3072
1411
+ ],
1412
+ "dtype": "float32",
1413
+ "format": "raw",
1414
+ "nbytes": 9437184,
1415
+ "byteOffset": 0
1416
+ },
1417
+ {
1418
+ "name": "transformer.h.8.mlp.c_proj.weight",
1419
+ "shape": [
1420
+ 768,
1421
+ 3072
1422
+ ],
1423
+ "dtype": "float32",
1424
+ "format": "raw",
1425
+ "nbytes": 9437184,
1426
+ "byteOffset": 9437184
1427
+ },
1428
+ {
1429
+ "name": "transformer.h.9.mlp.c_proj.weight",
1430
+ "shape": [
1431
+ 768,
1432
+ 3072
1433
+ ],
1434
+ "dtype": "float32",
1435
+ "format": "raw",
1436
+ "nbytes": 9437184,
1437
+ "byteOffset": 18874368
1438
+ }
1439
+ ],
1440
+ "md5sum": "0f5a870c04c9c095e1cf30f23c9e579e"
1441
+ },
1442
+ {
1443
+ "dataPath": "params_shard_11.bin",
1444
+ "format": "raw-shard",
1445
+ "nbytes": 169046016,
1446
+ "records": [
1447
+ {
1448
+ "name": "lm_head.weight",
1449
+ "shape": [
1450
+ 55028,
1451
+ 768
1452
+ ],
1453
+ "dtype": "float32",
1454
+ "format": "raw",
1455
+ "nbytes": 169046016,
1456
+ "byteOffset": 0
1457
+ }
1458
+ ],
1459
+ "md5sum": "e913e1b035726d5af7b6bebe91ca5600"
1460
+ },
1461
+ {
1462
+ "dataPath": "params_shard_12.bin",
1463
+ "format": "raw-shard",
1464
+ "nbytes": 169046016,
1465
+ "records": [
1466
+ {
1467
+ "name": "transformer.wte.weight",
1468
+ "shape": [
1469
+ 55028,
1470
+ 768
1471
+ ],
1472
+ "dtype": "float32",
1473
+ "format": "raw",
1474
+ "nbytes": 169046016,
1475
+ "byteOffset": 0
1476
+ }
1477
+ ],
1478
+ "md5sum": "e913e1b035726d5af7b6bebe91ca5600"
1479
+ },
1480
+ {
1481
+ "dataPath": "params_shard_13.bin",
1482
+ "format": "raw-shard",
1483
+ "nbytes": 22063104,
1484
+ "records": [
1485
+ {
1486
+ "name": "transformer.h.10.mlp.c_proj.weight",
1487
+ "shape": [
1488
+ 768,
1489
+ 3072
1490
+ ],
1491
+ "dtype": "float32",
1492
+ "format": "raw",
1493
+ "nbytes": 9437184,
1494
+ "byteOffset": 0
1495
+ },
1496
+ {
1497
+ "name": "transformer.h.11.mlp.c_proj.weight",
1498
+ "shape": [
1499
+ 768,
1500
+ 3072
1501
+ ],
1502
+ "dtype": "float32",
1503
+ "format": "raw",
1504
+ "nbytes": 9437184,
1505
+ "byteOffset": 9437184
1506
+ },
1507
+ {
1508
+ "name": "transformer.h.0.mlp.c_proj.bias",
1509
+ "shape": [
1510
+ 768
1511
+ ],
1512
+ "dtype": "float32",
1513
+ "format": "raw",
1514
+ "nbytes": 3072,
1515
+ "byteOffset": 18874368
1516
+ },
1517
+ {
1518
+ "name": "transformer.h.1.mlp.c_proj.bias",
1519
+ "shape": [
1520
+ 768
1521
+ ],
1522
+ "dtype": "float32",
1523
+ "format": "raw",
1524
+ "nbytes": 3072,
1525
+ "byteOffset": 18877440
1526
+ },
1527
+ {
1528
+ "name": "transformer.h.2.mlp.c_proj.bias",
1529
+ "shape": [
1530
+ 768
1531
+ ],
1532
+ "dtype": "float32",
1533
+ "format": "raw",
1534
+ "nbytes": 3072,
1535
+ "byteOffset": 18880512
1536
+ },
1537
+ {
1538
+ "name": "transformer.h.3.mlp.c_proj.bias",
1539
+ "shape": [
1540
+ 768
1541
+ ],
1542
+ "dtype": "float32",
1543
+ "format": "raw",
1544
+ "nbytes": 3072,
1545
+ "byteOffset": 18883584
1546
+ },
1547
+ {
1548
+ "name": "transformer.h.4.mlp.c_proj.bias",
1549
+ "shape": [
1550
+ 768
1551
+ ],
1552
+ "dtype": "float32",
1553
+ "format": "raw",
1554
+ "nbytes": 3072,
1555
+ "byteOffset": 18886656
1556
+ },
1557
+ {
1558
+ "name": "transformer.h.5.mlp.c_proj.bias",
1559
+ "shape": [
1560
+ 768
1561
+ ],
1562
+ "dtype": "float32",
1563
+ "format": "raw",
1564
+ "nbytes": 3072,
1565
+ "byteOffset": 18889728
1566
+ },
1567
+ {
1568
+ "name": "transformer.h.6.mlp.c_proj.bias",
1569
+ "shape": [
1570
+ 768
1571
+ ],
1572
+ "dtype": "float32",
1573
+ "format": "raw",
1574
+ "nbytes": 3072,
1575
+ "byteOffset": 18892800
1576
+ },
1577
+ {
1578
+ "name": "transformer.h.7.mlp.c_proj.bias",
1579
+ "shape": [
1580
+ 768
1581
+ ],
1582
+ "dtype": "float32",
1583
+ "format": "raw",
1584
+ "nbytes": 3072,
1585
+ "byteOffset": 18895872
1586
+ },
1587
+ {
1588
+ "name": "transformer.h.8.mlp.c_proj.bias",
1589
+ "shape": [
1590
+ 768
1591
+ ],
1592
+ "dtype": "float32",
1593
+ "format": "raw",
1594
+ "nbytes": 3072,
1595
+ "byteOffset": 18898944
1596
+ },
1597
+ {
1598
+ "name": "transformer.h.9.mlp.c_proj.bias",
1599
+ "shape": [
1600
+ 768
1601
+ ],
1602
+ "dtype": "float32",
1603
+ "format": "raw",
1604
+ "nbytes": 3072,
1605
+ "byteOffset": 18902016
1606
+ },
1607
+ {
1608
+ "name": "transformer.h.10.mlp.c_proj.bias",
1609
+ "shape": [
1610
+ 768
1611
+ ],
1612
+ "dtype": "float32",
1613
+ "format": "raw",
1614
+ "nbytes": 3072,
1615
+ "byteOffset": 18905088
1616
+ },
1617
+ {
1618
+ "name": "transformer.h.11.mlp.c_proj.bias",
1619
+ "shape": [
1620
+ 768
1621
+ ],
1622
+ "dtype": "float32",
1623
+ "format": "raw",
1624
+ "nbytes": 3072,
1625
+ "byteOffset": 18908160
1626
+ },
1627
+ {
1628
+ "name": "transformer.ln_f.weight",
1629
+ "shape": [
1630
+ 768
1631
+ ],
1632
+ "dtype": "float32",
1633
+ "format": "raw",
1634
+ "nbytes": 3072,
1635
+ "byteOffset": 18911232
1636
+ },
1637
+ {
1638
+ "name": "transformer.ln_f.bias",
1639
+ "shape": [
1640
+ 768
1641
+ ],
1642
+ "dtype": "float32",
1643
+ "format": "raw",
1644
+ "nbytes": 3072,
1645
+ "byteOffset": 18914304
1646
+ },
1647
+ {
1648
+ "name": "transformer.wpe.weight",
1649
+ "shape": [
1650
+ 1024,
1651
+ 768
1652
+ ],
1653
+ "dtype": "float32",
1654
+ "format": "raw",
1655
+ "nbytes": 3145728,
1656
+ "byteOffset": 18917376
1657
+ }
1658
+ ],
1659
+ "md5sum": "9ec2e8bf42f82c144cfd2253047927c2"
1660
+ }
1661
+ ]
1662
+ }
params_shard_0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a6335cabb84d127d32cfc1b0d7f854d2e427291aea33de4aef8261a0cbc5839
3
+ size 28385280
params_shard_1.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01fa5af9e2523b38e03e7d6916601065587ce69bf37627e6aed908daddc4dc25
3
+ size 28311552
params_shard_10.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:911d5dff9304db282a5ef87bf3eb8332fbba67c9b3b0cf917b670da3337008a9
3
+ size 28311552
params_shard_11.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0184df2ecf82ef08b2d16a3d0fb170cb3d3482b4015debfd304a433e4c670f1d
3
+ size 169046016
params_shard_12.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0184df2ecf82ef08b2d16a3d0fb170cb3d3482b4015debfd304a433e4c670f1d
3
+ size 169046016
params_shard_13.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f70226b458af43587fea1b4681866061f7fe6a4d01ad0c2cc4e3739cf36f3bcc
3
+ size 22063104
params_shard_2.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8dd9e89848a3fec378757339f55ed36c05cc1923bf41e169a683272b37889602
3
+ size 33140736
params_shard_3.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f24f71e813fba62a3e60cd337e7bcdc9ae52d05444d7d16f930e561b322131d9
3
+ size 33140736
params_shard_4.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c83aec1602fcad3088fe5fdbbf00f4e95d64dd0b7030f9c8994b7507fda4d11
3
+ size 28311552
params_shard_5.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5242c8169ae7b355066549c1c5305f1184e79407d5c8d008b08692e4ce52ba83
3
+ size 28311552
params_shard_6.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc172f867b64cfda197ae2b8dd9c0589c4c130f2cb3f8624a9fa43060b258e4c
3
+ size 28311552
params_shard_7.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf3e125dea2181c164bed2cf1673b28cea24678557bcfb01d7012886da46b0dd
3
+ size 28459008
params_shard_8.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7157d20276075cc3d7d592b1881743230849312d38778e14b53551bf4e38df1c
3
+ size 28311552
params_shard_9.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7c8c47d2ba99dfd0e5e8bf132d637014b723cb4313afac0193897a9ef1cf889
3
+ size 28311552