Charlie Ruan commited on
Commit
96def28
1 Parent(s): adb2ccb

Add weights

Browse files
mlc-chat-config.json ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "0.1.0",
3
+ "model_type": "bert",
4
+ "quantization": "q0f32",
5
+ "model_config": {
6
+ "vocab_size": 30522,
7
+ "hidden_size": 768,
8
+ "num_hidden_layers": 12,
9
+ "num_attention_heads": 12,
10
+ "intermediate_size": 3072,
11
+ "hidden_act": "gelu",
12
+ "layer_norm_eps": 1e-12,
13
+ "context_window_size": 512,
14
+ "prefill_chunk_size": 512,
15
+ "tensor_parallel_shards": 1,
16
+ "head_dim": 64,
17
+ "max_batch_size": 80
18
+ },
19
+ "vocab_size": 30522,
20
+ "context_window_size": 512,
21
+ "sliding_window_size": -1,
22
+ "prefill_chunk_size": 512,
23
+ "attention_sink_size": -1,
24
+ "tensor_parallel_shards": 1,
25
+ "pipeline_parallel_stages": 1,
26
+ "temperature": 1.0,
27
+ "presence_penalty": 0.0,
28
+ "frequency_penalty": 0.0,
29
+ "repetition_penalty": 1.0,
30
+ "top_p": 1.0,
31
+ "tokenizer_files": [
32
+ "tokenizer.json",
33
+ "tokenizer_config.json"
34
+ ],
35
+ "tokenizer_info": {
36
+ "token_postproc_method": "byte_fallback",
37
+ "prepend_space_in_encode": false,
38
+ "strip_space_in_decode": false
39
+ },
40
+ "conv_template": {
41
+ "name": "LM",
42
+ "system_template": "{system_message}",
43
+ "system_message": "",
44
+ "system_prefix_token_ids": [
45
+ 1
46
+ ],
47
+ "add_role_after_system_message": true,
48
+ "roles": {
49
+ "user": "",
50
+ "assistant": ""
51
+ },
52
+ "role_templates": {
53
+ "user": "{user_message}",
54
+ "assistant": "{assistant_message}",
55
+ "tool": "{tool_message}"
56
+ },
57
+ "messages": [],
58
+ "seps": [
59
+ ""
60
+ ],
61
+ "role_content_sep": "",
62
+ "role_empty_sep": "",
63
+ "stop_str": [],
64
+ "stop_token_ids": [
65
+ 2
66
+ ],
67
+ "function_string": "",
68
+ "use_function_calling": false
69
+ },
70
+ "pad_token_id": 0,
71
+ "bos_token_id": 1,
72
+ "eos_token_id": 2
73
+ }
ndarray-cache-b16.json ADDED
@@ -0,0 +1,1606 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "ParamSize": 149,
4
+ "ParamBytes": 435566592.0,
5
+ "BitsPerParam": 32.0
6
+ },
7
+ "records": [
8
+ {
9
+ "dataPath": "params_shard_0.bin",
10
+ "format": "raw-shard",
11
+ "nbytes": 46881792,
12
+ "records": [
13
+ {
14
+ "name": "embeddings.word_embeddings.weight",
15
+ "shape": [
16
+ 30522,
17
+ 768
18
+ ],
19
+ "dtype": "bfloat16",
20
+ "format": "raw",
21
+ "nbytes": 46881792,
22
+ "byteOffset": 0
23
+ }
24
+ ],
25
+ "md5sum": "2eeaa439340fec525d791ca37a3dd753"
26
+ },
27
+ {
28
+ "dataPath": "params_shard_1.bin",
29
+ "format": "raw-shard",
30
+ "nbytes": 30332928,
31
+ "records": [
32
+ {
33
+ "name": "embeddings.LayerNorm.bias",
34
+ "shape": [
35
+ 768
36
+ ],
37
+ "dtype": "bfloat16",
38
+ "format": "raw",
39
+ "nbytes": 1536,
40
+ "byteOffset": 0
41
+ },
42
+ {
43
+ "name": "embeddings.LayerNorm.weight",
44
+ "shape": [
45
+ 768
46
+ ],
47
+ "dtype": "bfloat16",
48
+ "format": "raw",
49
+ "nbytes": 1536,
50
+ "byteOffset": 1536
51
+ },
52
+ {
53
+ "name": "embeddings.position_embeddings.weight",
54
+ "shape": [
55
+ 512,
56
+ 768
57
+ ],
58
+ "dtype": "bfloat16",
59
+ "format": "raw",
60
+ "nbytes": 786432,
61
+ "byteOffset": 3072
62
+ },
63
+ {
64
+ "name": "embeddings.token_type_embeddings.weight",
65
+ "shape": [
66
+ 2,
67
+ 768
68
+ ],
69
+ "dtype": "bfloat16",
70
+ "format": "raw",
71
+ "nbytes": 3072,
72
+ "byteOffset": 789504
73
+ },
74
+ {
75
+ "name": "encoder.layer.0.attention.output.LayerNorm.bias",
76
+ "shape": [
77
+ 768
78
+ ],
79
+ "dtype": "bfloat16",
80
+ "format": "raw",
81
+ "nbytes": 1536,
82
+ "byteOffset": 792576
83
+ },
84
+ {
85
+ "name": "encoder.layer.0.attention.output.LayerNorm.weight",
86
+ "shape": [
87
+ 768
88
+ ],
89
+ "dtype": "bfloat16",
90
+ "format": "raw",
91
+ "nbytes": 1536,
92
+ "byteOffset": 794112
93
+ },
94
+ {
95
+ "name": "encoder.layer.0.attention.output.dense.bias",
96
+ "shape": [
97
+ 768
98
+ ],
99
+ "dtype": "bfloat16",
100
+ "format": "raw",
101
+ "nbytes": 1536,
102
+ "byteOffset": 795648
103
+ },
104
+ {
105
+ "name": "encoder.layer.0.attention.output.dense.weight",
106
+ "shape": [
107
+ 768,
108
+ 768
109
+ ],
110
+ "dtype": "bfloat16",
111
+ "format": "raw",
112
+ "nbytes": 1179648,
113
+ "byteOffset": 797184
114
+ },
115
+ {
116
+ "name": "encoder.layer.0.attention.self.qkv.bias",
117
+ "shape": [
118
+ 2304
119
+ ],
120
+ "dtype": "bfloat16",
121
+ "format": "raw",
122
+ "nbytes": 4608,
123
+ "byteOffset": 1976832
124
+ },
125
+ {
126
+ "name": "encoder.layer.0.attention.self.qkv.weight",
127
+ "shape": [
128
+ 2304,
129
+ 768
130
+ ],
131
+ "dtype": "bfloat16",
132
+ "format": "raw",
133
+ "nbytes": 3538944,
134
+ "byteOffset": 1981440
135
+ },
136
+ {
137
+ "name": "encoder.layer.0.intermediate.dense.bias",
138
+ "shape": [
139
+ 3072
140
+ ],
141
+ "dtype": "bfloat16",
142
+ "format": "raw",
143
+ "nbytes": 6144,
144
+ "byteOffset": 5520384
145
+ },
146
+ {
147
+ "name": "encoder.layer.0.intermediate.dense.weight",
148
+ "shape": [
149
+ 3072,
150
+ 768
151
+ ],
152
+ "dtype": "bfloat16",
153
+ "format": "raw",
154
+ "nbytes": 4718592,
155
+ "byteOffset": 5526528
156
+ },
157
+ {
158
+ "name": "encoder.layer.0.output.LayerNorm.bias",
159
+ "shape": [
160
+ 768
161
+ ],
162
+ "dtype": "bfloat16",
163
+ "format": "raw",
164
+ "nbytes": 1536,
165
+ "byteOffset": 10245120
166
+ },
167
+ {
168
+ "name": "encoder.layer.0.output.LayerNorm.weight",
169
+ "shape": [
170
+ 768
171
+ ],
172
+ "dtype": "bfloat16",
173
+ "format": "raw",
174
+ "nbytes": 1536,
175
+ "byteOffset": 10246656
176
+ },
177
+ {
178
+ "name": "encoder.layer.0.output.dense.bias",
179
+ "shape": [
180
+ 768
181
+ ],
182
+ "dtype": "bfloat16",
183
+ "format": "raw",
184
+ "nbytes": 1536,
185
+ "byteOffset": 10248192
186
+ },
187
+ {
188
+ "name": "encoder.layer.0.output.dense.weight",
189
+ "shape": [
190
+ 768,
191
+ 3072
192
+ ],
193
+ "dtype": "bfloat16",
194
+ "format": "raw",
195
+ "nbytes": 4718592,
196
+ "byteOffset": 10249728
197
+ },
198
+ {
199
+ "name": "encoder.layer.1.attention.output.LayerNorm.bias",
200
+ "shape": [
201
+ 768
202
+ ],
203
+ "dtype": "bfloat16",
204
+ "format": "raw",
205
+ "nbytes": 1536,
206
+ "byteOffset": 14968320
207
+ },
208
+ {
209
+ "name": "encoder.layer.1.attention.output.LayerNorm.weight",
210
+ "shape": [
211
+ 768
212
+ ],
213
+ "dtype": "bfloat16",
214
+ "format": "raw",
215
+ "nbytes": 1536,
216
+ "byteOffset": 14969856
217
+ },
218
+ {
219
+ "name": "encoder.layer.1.attention.output.dense.bias",
220
+ "shape": [
221
+ 768
222
+ ],
223
+ "dtype": "bfloat16",
224
+ "format": "raw",
225
+ "nbytes": 1536,
226
+ "byteOffset": 14971392
227
+ },
228
+ {
229
+ "name": "encoder.layer.1.attention.output.dense.weight",
230
+ "shape": [
231
+ 768,
232
+ 768
233
+ ],
234
+ "dtype": "bfloat16",
235
+ "format": "raw",
236
+ "nbytes": 1179648,
237
+ "byteOffset": 14972928
238
+ },
239
+ {
240
+ "name": "encoder.layer.1.attention.self.qkv.bias",
241
+ "shape": [
242
+ 2304
243
+ ],
244
+ "dtype": "bfloat16",
245
+ "format": "raw",
246
+ "nbytes": 4608,
247
+ "byteOffset": 16152576
248
+ },
249
+ {
250
+ "name": "encoder.layer.1.attention.self.qkv.weight",
251
+ "shape": [
252
+ 2304,
253
+ 768
254
+ ],
255
+ "dtype": "bfloat16",
256
+ "format": "raw",
257
+ "nbytes": 3538944,
258
+ "byteOffset": 16157184
259
+ },
260
+ {
261
+ "name": "encoder.layer.1.intermediate.dense.bias",
262
+ "shape": [
263
+ 3072
264
+ ],
265
+ "dtype": "bfloat16",
266
+ "format": "raw",
267
+ "nbytes": 6144,
268
+ "byteOffset": 19696128
269
+ },
270
+ {
271
+ "name": "encoder.layer.1.intermediate.dense.weight",
272
+ "shape": [
273
+ 3072,
274
+ 768
275
+ ],
276
+ "dtype": "bfloat16",
277
+ "format": "raw",
278
+ "nbytes": 4718592,
279
+ "byteOffset": 19702272
280
+ },
281
+ {
282
+ "name": "encoder.layer.1.output.LayerNorm.bias",
283
+ "shape": [
284
+ 768
285
+ ],
286
+ "dtype": "bfloat16",
287
+ "format": "raw",
288
+ "nbytes": 1536,
289
+ "byteOffset": 24420864
290
+ },
291
+ {
292
+ "name": "encoder.layer.1.output.LayerNorm.weight",
293
+ "shape": [
294
+ 768
295
+ ],
296
+ "dtype": "bfloat16",
297
+ "format": "raw",
298
+ "nbytes": 1536,
299
+ "byteOffset": 24422400
300
+ },
301
+ {
302
+ "name": "encoder.layer.1.output.dense.bias",
303
+ "shape": [
304
+ 768
305
+ ],
306
+ "dtype": "bfloat16",
307
+ "format": "raw",
308
+ "nbytes": 1536,
309
+ "byteOffset": 24423936
310
+ },
311
+ {
312
+ "name": "encoder.layer.1.output.dense.weight",
313
+ "shape": [
314
+ 768,
315
+ 3072
316
+ ],
317
+ "dtype": "bfloat16",
318
+ "format": "raw",
319
+ "nbytes": 4718592,
320
+ "byteOffset": 24425472
321
+ },
322
+ {
323
+ "name": "encoder.layer.10.attention.output.LayerNorm.bias",
324
+ "shape": [
325
+ 768
326
+ ],
327
+ "dtype": "bfloat16",
328
+ "format": "raw",
329
+ "nbytes": 1536,
330
+ "byteOffset": 29144064
331
+ },
332
+ {
333
+ "name": "encoder.layer.10.attention.output.LayerNorm.weight",
334
+ "shape": [
335
+ 768
336
+ ],
337
+ "dtype": "bfloat16",
338
+ "format": "raw",
339
+ "nbytes": 1536,
340
+ "byteOffset": 29145600
341
+ },
342
+ {
343
+ "name": "encoder.layer.10.attention.output.dense.bias",
344
+ "shape": [
345
+ 768
346
+ ],
347
+ "dtype": "bfloat16",
348
+ "format": "raw",
349
+ "nbytes": 1536,
350
+ "byteOffset": 29147136
351
+ },
352
+ {
353
+ "name": "encoder.layer.10.attention.output.dense.weight",
354
+ "shape": [
355
+ 768,
356
+ 768
357
+ ],
358
+ "dtype": "bfloat16",
359
+ "format": "raw",
360
+ "nbytes": 1179648,
361
+ "byteOffset": 29148672
362
+ },
363
+ {
364
+ "name": "encoder.layer.10.attention.self.qkv.bias",
365
+ "shape": [
366
+ 2304
367
+ ],
368
+ "dtype": "bfloat16",
369
+ "format": "raw",
370
+ "nbytes": 4608,
371
+ "byteOffset": 30328320
372
+ }
373
+ ],
374
+ "md5sum": "d19301ea1b244630109761e9a47e8c0f"
375
+ },
376
+ {
377
+ "dataPath": "params_shard_2.bin",
378
+ "format": "raw-shard",
379
+ "nbytes": 31896576,
380
+ "records": [
381
+ {
382
+ "name": "encoder.layer.10.attention.self.qkv.weight",
383
+ "shape": [
384
+ 2304,
385
+ 768
386
+ ],
387
+ "dtype": "bfloat16",
388
+ "format": "raw",
389
+ "nbytes": 3538944,
390
+ "byteOffset": 0
391
+ },
392
+ {
393
+ "name": "encoder.layer.10.intermediate.dense.bias",
394
+ "shape": [
395
+ 3072
396
+ ],
397
+ "dtype": "bfloat16",
398
+ "format": "raw",
399
+ "nbytes": 6144,
400
+ "byteOffset": 3538944
401
+ },
402
+ {
403
+ "name": "encoder.layer.10.intermediate.dense.weight",
404
+ "shape": [
405
+ 3072,
406
+ 768
407
+ ],
408
+ "dtype": "bfloat16",
409
+ "format": "raw",
410
+ "nbytes": 4718592,
411
+ "byteOffset": 3545088
412
+ },
413
+ {
414
+ "name": "encoder.layer.10.output.LayerNorm.bias",
415
+ "shape": [
416
+ 768
417
+ ],
418
+ "dtype": "bfloat16",
419
+ "format": "raw",
420
+ "nbytes": 1536,
421
+ "byteOffset": 8263680
422
+ },
423
+ {
424
+ "name": "encoder.layer.10.output.LayerNorm.weight",
425
+ "shape": [
426
+ 768
427
+ ],
428
+ "dtype": "bfloat16",
429
+ "format": "raw",
430
+ "nbytes": 1536,
431
+ "byteOffset": 8265216
432
+ },
433
+ {
434
+ "name": "encoder.layer.10.output.dense.bias",
435
+ "shape": [
436
+ 768
437
+ ],
438
+ "dtype": "bfloat16",
439
+ "format": "raw",
440
+ "nbytes": 1536,
441
+ "byteOffset": 8266752
442
+ },
443
+ {
444
+ "name": "encoder.layer.10.output.dense.weight",
445
+ "shape": [
446
+ 768,
447
+ 3072
448
+ ],
449
+ "dtype": "bfloat16",
450
+ "format": "raw",
451
+ "nbytes": 4718592,
452
+ "byteOffset": 8268288
453
+ },
454
+ {
455
+ "name": "encoder.layer.11.attention.output.LayerNorm.bias",
456
+ "shape": [
457
+ 768
458
+ ],
459
+ "dtype": "bfloat16",
460
+ "format": "raw",
461
+ "nbytes": 1536,
462
+ "byteOffset": 12986880
463
+ },
464
+ {
465
+ "name": "encoder.layer.11.attention.output.LayerNorm.weight",
466
+ "shape": [
467
+ 768
468
+ ],
469
+ "dtype": "bfloat16",
470
+ "format": "raw",
471
+ "nbytes": 1536,
472
+ "byteOffset": 12988416
473
+ },
474
+ {
475
+ "name": "encoder.layer.11.attention.output.dense.bias",
476
+ "shape": [
477
+ 768
478
+ ],
479
+ "dtype": "bfloat16",
480
+ "format": "raw",
481
+ "nbytes": 1536,
482
+ "byteOffset": 12989952
483
+ },
484
+ {
485
+ "name": "encoder.layer.11.attention.output.dense.weight",
486
+ "shape": [
487
+ 768,
488
+ 768
489
+ ],
490
+ "dtype": "bfloat16",
491
+ "format": "raw",
492
+ "nbytes": 1179648,
493
+ "byteOffset": 12991488
494
+ },
495
+ {
496
+ "name": "encoder.layer.11.attention.self.qkv.bias",
497
+ "shape": [
498
+ 2304
499
+ ],
500
+ "dtype": "bfloat16",
501
+ "format": "raw",
502
+ "nbytes": 4608,
503
+ "byteOffset": 14171136
504
+ },
505
+ {
506
+ "name": "encoder.layer.11.attention.self.qkv.weight",
507
+ "shape": [
508
+ 2304,
509
+ 768
510
+ ],
511
+ "dtype": "bfloat16",
512
+ "format": "raw",
513
+ "nbytes": 3538944,
514
+ "byteOffset": 14175744
515
+ },
516
+ {
517
+ "name": "encoder.layer.11.intermediate.dense.bias",
518
+ "shape": [
519
+ 3072
520
+ ],
521
+ "dtype": "bfloat16",
522
+ "format": "raw",
523
+ "nbytes": 6144,
524
+ "byteOffset": 17714688
525
+ },
526
+ {
527
+ "name": "encoder.layer.11.intermediate.dense.weight",
528
+ "shape": [
529
+ 3072,
530
+ 768
531
+ ],
532
+ "dtype": "bfloat16",
533
+ "format": "raw",
534
+ "nbytes": 4718592,
535
+ "byteOffset": 17720832
536
+ },
537
+ {
538
+ "name": "encoder.layer.11.output.LayerNorm.bias",
539
+ "shape": [
540
+ 768
541
+ ],
542
+ "dtype": "bfloat16",
543
+ "format": "raw",
544
+ "nbytes": 1536,
545
+ "byteOffset": 22439424
546
+ },
547
+ {
548
+ "name": "encoder.layer.11.output.LayerNorm.weight",
549
+ "shape": [
550
+ 768
551
+ ],
552
+ "dtype": "bfloat16",
553
+ "format": "raw",
554
+ "nbytes": 1536,
555
+ "byteOffset": 22440960
556
+ },
557
+ {
558
+ "name": "encoder.layer.11.output.dense.bias",
559
+ "shape": [
560
+ 768
561
+ ],
562
+ "dtype": "bfloat16",
563
+ "format": "raw",
564
+ "nbytes": 1536,
565
+ "byteOffset": 22442496
566
+ },
567
+ {
568
+ "name": "encoder.layer.11.output.dense.weight",
569
+ "shape": [
570
+ 768,
571
+ 3072
572
+ ],
573
+ "dtype": "bfloat16",
574
+ "format": "raw",
575
+ "nbytes": 4718592,
576
+ "byteOffset": 22444032
577
+ },
578
+ {
579
+ "name": "encoder.layer.2.attention.output.LayerNorm.bias",
580
+ "shape": [
581
+ 768
582
+ ],
583
+ "dtype": "bfloat16",
584
+ "format": "raw",
585
+ "nbytes": 1536,
586
+ "byteOffset": 27162624
587
+ },
588
+ {
589
+ "name": "encoder.layer.2.attention.output.LayerNorm.weight",
590
+ "shape": [
591
+ 768
592
+ ],
593
+ "dtype": "bfloat16",
594
+ "format": "raw",
595
+ "nbytes": 1536,
596
+ "byteOffset": 27164160
597
+ },
598
+ {
599
+ "name": "encoder.layer.2.attention.output.dense.bias",
600
+ "shape": [
601
+ 768
602
+ ],
603
+ "dtype": "bfloat16",
604
+ "format": "raw",
605
+ "nbytes": 1536,
606
+ "byteOffset": 27165696
607
+ },
608
+ {
609
+ "name": "encoder.layer.2.attention.output.dense.weight",
610
+ "shape": [
611
+ 768,
612
+ 768
613
+ ],
614
+ "dtype": "bfloat16",
615
+ "format": "raw",
616
+ "nbytes": 1179648,
617
+ "byteOffset": 27167232
618
+ },
619
+ {
620
+ "name": "encoder.layer.2.attention.self.qkv.bias",
621
+ "shape": [
622
+ 2304
623
+ ],
624
+ "dtype": "bfloat16",
625
+ "format": "raw",
626
+ "nbytes": 4608,
627
+ "byteOffset": 28346880
628
+ },
629
+ {
630
+ "name": "encoder.layer.2.attention.self.qkv.weight",
631
+ "shape": [
632
+ 2304,
633
+ 768
634
+ ],
635
+ "dtype": "bfloat16",
636
+ "format": "raw",
637
+ "nbytes": 3538944,
638
+ "byteOffset": 28351488
639
+ },
640
+ {
641
+ "name": "encoder.layer.2.intermediate.dense.bias",
642
+ "shape": [
643
+ 3072
644
+ ],
645
+ "dtype": "bfloat16",
646
+ "format": "raw",
647
+ "nbytes": 6144,
648
+ "byteOffset": 31890432
649
+ }
650
+ ],
651
+ "md5sum": "97adefcd2277d459f53c9bf2d25bf264"
652
+ },
653
+ {
654
+ "dataPath": "params_shard_3.bin",
655
+ "format": "raw-shard",
656
+ "nbytes": 33074688,
657
+ "records": [
658
+ {
659
+ "name": "encoder.layer.2.intermediate.dense.weight",
660
+ "shape": [
661
+ 3072,
662
+ 768
663
+ ],
664
+ "dtype": "bfloat16",
665
+ "format": "raw",
666
+ "nbytes": 4718592,
667
+ "byteOffset": 0
668
+ },
669
+ {
670
+ "name": "encoder.layer.2.output.LayerNorm.bias",
671
+ "shape": [
672
+ 768
673
+ ],
674
+ "dtype": "bfloat16",
675
+ "format": "raw",
676
+ "nbytes": 1536,
677
+ "byteOffset": 4718592
678
+ },
679
+ {
680
+ "name": "encoder.layer.2.output.LayerNorm.weight",
681
+ "shape": [
682
+ 768
683
+ ],
684
+ "dtype": "bfloat16",
685
+ "format": "raw",
686
+ "nbytes": 1536,
687
+ "byteOffset": 4720128
688
+ },
689
+ {
690
+ "name": "encoder.layer.2.output.dense.bias",
691
+ "shape": [
692
+ 768
693
+ ],
694
+ "dtype": "bfloat16",
695
+ "format": "raw",
696
+ "nbytes": 1536,
697
+ "byteOffset": 4721664
698
+ },
699
+ {
700
+ "name": "encoder.layer.2.output.dense.weight",
701
+ "shape": [
702
+ 768,
703
+ 3072
704
+ ],
705
+ "dtype": "bfloat16",
706
+ "format": "raw",
707
+ "nbytes": 4718592,
708
+ "byteOffset": 4723200
709
+ },
710
+ {
711
+ "name": "encoder.layer.3.attention.output.LayerNorm.bias",
712
+ "shape": [
713
+ 768
714
+ ],
715
+ "dtype": "bfloat16",
716
+ "format": "raw",
717
+ "nbytes": 1536,
718
+ "byteOffset": 9441792
719
+ },
720
+ {
721
+ "name": "encoder.layer.3.attention.output.LayerNorm.weight",
722
+ "shape": [
723
+ 768
724
+ ],
725
+ "dtype": "bfloat16",
726
+ "format": "raw",
727
+ "nbytes": 1536,
728
+ "byteOffset": 9443328
729
+ },
730
+ {
731
+ "name": "encoder.layer.3.attention.output.dense.bias",
732
+ "shape": [
733
+ 768
734
+ ],
735
+ "dtype": "bfloat16",
736
+ "format": "raw",
737
+ "nbytes": 1536,
738
+ "byteOffset": 9444864
739
+ },
740
+ {
741
+ "name": "encoder.layer.3.attention.output.dense.weight",
742
+ "shape": [
743
+ 768,
744
+ 768
745
+ ],
746
+ "dtype": "bfloat16",
747
+ "format": "raw",
748
+ "nbytes": 1179648,
749
+ "byteOffset": 9446400
750
+ },
751
+ {
752
+ "name": "encoder.layer.3.attention.self.qkv.bias",
753
+ "shape": [
754
+ 2304
755
+ ],
756
+ "dtype": "bfloat16",
757
+ "format": "raw",
758
+ "nbytes": 4608,
759
+ "byteOffset": 10626048
760
+ },
761
+ {
762
+ "name": "encoder.layer.3.attention.self.qkv.weight",
763
+ "shape": [
764
+ 2304,
765
+ 768
766
+ ],
767
+ "dtype": "bfloat16",
768
+ "format": "raw",
769
+ "nbytes": 3538944,
770
+ "byteOffset": 10630656
771
+ },
772
+ {
773
+ "name": "encoder.layer.3.intermediate.dense.bias",
774
+ "shape": [
775
+ 3072
776
+ ],
777
+ "dtype": "bfloat16",
778
+ "format": "raw",
779
+ "nbytes": 6144,
780
+ "byteOffset": 14169600
781
+ },
782
+ {
783
+ "name": "encoder.layer.3.intermediate.dense.weight",
784
+ "shape": [
785
+ 3072,
786
+ 768
787
+ ],
788
+ "dtype": "bfloat16",
789
+ "format": "raw",
790
+ "nbytes": 4718592,
791
+ "byteOffset": 14175744
792
+ },
793
+ {
794
+ "name": "encoder.layer.3.output.LayerNorm.bias",
795
+ "shape": [
796
+ 768
797
+ ],
798
+ "dtype": "bfloat16",
799
+ "format": "raw",
800
+ "nbytes": 1536,
801
+ "byteOffset": 18894336
802
+ },
803
+ {
804
+ "name": "encoder.layer.3.output.LayerNorm.weight",
805
+ "shape": [
806
+ 768
807
+ ],
808
+ "dtype": "bfloat16",
809
+ "format": "raw",
810
+ "nbytes": 1536,
811
+ "byteOffset": 18895872
812
+ },
813
+ {
814
+ "name": "encoder.layer.3.output.dense.bias",
815
+ "shape": [
816
+ 768
817
+ ],
818
+ "dtype": "bfloat16",
819
+ "format": "raw",
820
+ "nbytes": 1536,
821
+ "byteOffset": 18897408
822
+ },
823
+ {
824
+ "name": "encoder.layer.3.output.dense.weight",
825
+ "shape": [
826
+ 768,
827
+ 3072
828
+ ],
829
+ "dtype": "bfloat16",
830
+ "format": "raw",
831
+ "nbytes": 4718592,
832
+ "byteOffset": 18898944
833
+ },
834
+ {
835
+ "name": "encoder.layer.4.attention.output.LayerNorm.bias",
836
+ "shape": [
837
+ 768
838
+ ],
839
+ "dtype": "bfloat16",
840
+ "format": "raw",
841
+ "nbytes": 1536,
842
+ "byteOffset": 23617536
843
+ },
844
+ {
845
+ "name": "encoder.layer.4.attention.output.LayerNorm.weight",
846
+ "shape": [
847
+ 768
848
+ ],
849
+ "dtype": "bfloat16",
850
+ "format": "raw",
851
+ "nbytes": 1536,
852
+ "byteOffset": 23619072
853
+ },
854
+ {
855
+ "name": "encoder.layer.4.attention.output.dense.bias",
856
+ "shape": [
857
+ 768
858
+ ],
859
+ "dtype": "bfloat16",
860
+ "format": "raw",
861
+ "nbytes": 1536,
862
+ "byteOffset": 23620608
863
+ },
864
+ {
865
+ "name": "encoder.layer.4.attention.output.dense.weight",
866
+ "shape": [
867
+ 768,
868
+ 768
869
+ ],
870
+ "dtype": "bfloat16",
871
+ "format": "raw",
872
+ "nbytes": 1179648,
873
+ "byteOffset": 23622144
874
+ },
875
+ {
876
+ "name": "encoder.layer.4.attention.self.qkv.bias",
877
+ "shape": [
878
+ 2304
879
+ ],
880
+ "dtype": "bfloat16",
881
+ "format": "raw",
882
+ "nbytes": 4608,
883
+ "byteOffset": 24801792
884
+ },
885
+ {
886
+ "name": "encoder.layer.4.attention.self.qkv.weight",
887
+ "shape": [
888
+ 2304,
889
+ 768
890
+ ],
891
+ "dtype": "bfloat16",
892
+ "format": "raw",
893
+ "nbytes": 3538944,
894
+ "byteOffset": 24806400
895
+ },
896
+ {
897
+ "name": "encoder.layer.4.intermediate.dense.bias",
898
+ "shape": [
899
+ 3072
900
+ ],
901
+ "dtype": "bfloat16",
902
+ "format": "raw",
903
+ "nbytes": 6144,
904
+ "byteOffset": 28345344
905
+ },
906
+ {
907
+ "name": "encoder.layer.4.intermediate.dense.weight",
908
+ "shape": [
909
+ 3072,
910
+ 768
911
+ ],
912
+ "dtype": "bfloat16",
913
+ "format": "raw",
914
+ "nbytes": 4718592,
915
+ "byteOffset": 28351488
916
+ },
917
+ {
918
+ "name": "encoder.layer.4.output.LayerNorm.bias",
919
+ "shape": [
920
+ 768
921
+ ],
922
+ "dtype": "bfloat16",
923
+ "format": "raw",
924
+ "nbytes": 1536,
925
+ "byteOffset": 33070080
926
+ },
927
+ {
928
+ "name": "encoder.layer.4.output.LayerNorm.weight",
929
+ "shape": [
930
+ 768
931
+ ],
932
+ "dtype": "bfloat16",
933
+ "format": "raw",
934
+ "nbytes": 1536,
935
+ "byteOffset": 33071616
936
+ },
937
+ {
938
+ "name": "encoder.layer.4.output.dense.bias",
939
+ "shape": [
940
+ 768
941
+ ],
942
+ "dtype": "bfloat16",
943
+ "format": "raw",
944
+ "nbytes": 1536,
945
+ "byteOffset": 33073152
946
+ }
947
+ ],
948
+ "md5sum": "e9dd727b06f09c2a5284809b08e3eff9"
949
+ },
950
+ {
951
+ "dataPath": "params_shard_4.bin",
952
+ "format": "raw-shard",
953
+ "nbytes": 33074688,
954
+ "records": [
955
+ {
956
+ "name": "encoder.layer.4.output.dense.weight",
957
+ "shape": [
958
+ 768,
959
+ 3072
960
+ ],
961
+ "dtype": "bfloat16",
962
+ "format": "raw",
963
+ "nbytes": 4718592,
964
+ "byteOffset": 0
965
+ },
966
+ {
967
+ "name": "encoder.layer.5.attention.output.LayerNorm.bias",
968
+ "shape": [
969
+ 768
970
+ ],
971
+ "dtype": "bfloat16",
972
+ "format": "raw",
973
+ "nbytes": 1536,
974
+ "byteOffset": 4718592
975
+ },
976
+ {
977
+ "name": "encoder.layer.5.attention.output.LayerNorm.weight",
978
+ "shape": [
979
+ 768
980
+ ],
981
+ "dtype": "bfloat16",
982
+ "format": "raw",
983
+ "nbytes": 1536,
984
+ "byteOffset": 4720128
985
+ },
986
+ {
987
+ "name": "encoder.layer.5.attention.output.dense.bias",
988
+ "shape": [
989
+ 768
990
+ ],
991
+ "dtype": "bfloat16",
992
+ "format": "raw",
993
+ "nbytes": 1536,
994
+ "byteOffset": 4721664
995
+ },
996
+ {
997
+ "name": "encoder.layer.5.attention.output.dense.weight",
998
+ "shape": [
999
+ 768,
1000
+ 768
1001
+ ],
1002
+ "dtype": "bfloat16",
1003
+ "format": "raw",
1004
+ "nbytes": 1179648,
1005
+ "byteOffset": 4723200
1006
+ },
1007
+ {
1008
+ "name": "encoder.layer.5.attention.self.qkv.bias",
1009
+ "shape": [
1010
+ 2304
1011
+ ],
1012
+ "dtype": "bfloat16",
1013
+ "format": "raw",
1014
+ "nbytes": 4608,
1015
+ "byteOffset": 5902848
1016
+ },
1017
+ {
1018
+ "name": "encoder.layer.5.attention.self.qkv.weight",
1019
+ "shape": [
1020
+ 2304,
1021
+ 768
1022
+ ],
1023
+ "dtype": "bfloat16",
1024
+ "format": "raw",
1025
+ "nbytes": 3538944,
1026
+ "byteOffset": 5907456
1027
+ },
1028
+ {
1029
+ "name": "encoder.layer.5.intermediate.dense.bias",
1030
+ "shape": [
1031
+ 3072
1032
+ ],
1033
+ "dtype": "bfloat16",
1034
+ "format": "raw",
1035
+ "nbytes": 6144,
1036
+ "byteOffset": 9446400
1037
+ },
1038
+ {
1039
+ "name": "encoder.layer.5.intermediate.dense.weight",
1040
+ "shape": [
1041
+ 3072,
1042
+ 768
1043
+ ],
1044
+ "dtype": "bfloat16",
1045
+ "format": "raw",
1046
+ "nbytes": 4718592,
1047
+ "byteOffset": 9452544
1048
+ },
1049
+ {
1050
+ "name": "encoder.layer.5.output.LayerNorm.bias",
1051
+ "shape": [
1052
+ 768
1053
+ ],
1054
+ "dtype": "bfloat16",
1055
+ "format": "raw",
1056
+ "nbytes": 1536,
1057
+ "byteOffset": 14171136
1058
+ },
1059
+ {
1060
+ "name": "encoder.layer.5.output.LayerNorm.weight",
1061
+ "shape": [
1062
+ 768
1063
+ ],
1064
+ "dtype": "bfloat16",
1065
+ "format": "raw",
1066
+ "nbytes": 1536,
1067
+ "byteOffset": 14172672
1068
+ },
1069
+ {
1070
+ "name": "encoder.layer.5.output.dense.bias",
1071
+ "shape": [
1072
+ 768
1073
+ ],
1074
+ "dtype": "bfloat16",
1075
+ "format": "raw",
1076
+ "nbytes": 1536,
1077
+ "byteOffset": 14174208
1078
+ },
1079
+ {
1080
+ "name": "encoder.layer.5.output.dense.weight",
1081
+ "shape": [
1082
+ 768,
1083
+ 3072
1084
+ ],
1085
+ "dtype": "bfloat16",
1086
+ "format": "raw",
1087
+ "nbytes": 4718592,
1088
+ "byteOffset": 14175744
1089
+ },
1090
+ {
1091
+ "name": "encoder.layer.6.attention.output.LayerNorm.bias",
1092
+ "shape": [
1093
+ 768
1094
+ ],
1095
+ "dtype": "bfloat16",
1096
+ "format": "raw",
1097
+ "nbytes": 1536,
1098
+ "byteOffset": 18894336
1099
+ },
1100
+ {
1101
+ "name": "encoder.layer.6.attention.output.LayerNorm.weight",
1102
+ "shape": [
1103
+ 768
1104
+ ],
1105
+ "dtype": "bfloat16",
1106
+ "format": "raw",
1107
+ "nbytes": 1536,
1108
+ "byteOffset": 18895872
1109
+ },
1110
+ {
1111
+ "name": "encoder.layer.6.attention.output.dense.bias",
1112
+ "shape": [
1113
+ 768
1114
+ ],
1115
+ "dtype": "bfloat16",
1116
+ "format": "raw",
1117
+ "nbytes": 1536,
1118
+ "byteOffset": 18897408
1119
+ },
1120
+ {
1121
+ "name": "encoder.layer.6.attention.output.dense.weight",
1122
+ "shape": [
1123
+ 768,
1124
+ 768
1125
+ ],
1126
+ "dtype": "bfloat16",
1127
+ "format": "raw",
1128
+ "nbytes": 1179648,
1129
+ "byteOffset": 18898944
1130
+ },
1131
+ {
1132
+ "name": "encoder.layer.6.attention.self.qkv.bias",
1133
+ "shape": [
1134
+ 2304
1135
+ ],
1136
+ "dtype": "bfloat16",
1137
+ "format": "raw",
1138
+ "nbytes": 4608,
1139
+ "byteOffset": 20078592
1140
+ },
1141
+ {
1142
+ "name": "encoder.layer.6.attention.self.qkv.weight",
1143
+ "shape": [
1144
+ 2304,
1145
+ 768
1146
+ ],
1147
+ "dtype": "bfloat16",
1148
+ "format": "raw",
1149
+ "nbytes": 3538944,
1150
+ "byteOffset": 20083200
1151
+ },
1152
+ {
1153
+ "name": "encoder.layer.6.intermediate.dense.bias",
1154
+ "shape": [
1155
+ 3072
1156
+ ],
1157
+ "dtype": "bfloat16",
1158
+ "format": "raw",
1159
+ "nbytes": 6144,
1160
+ "byteOffset": 23622144
1161
+ },
1162
+ {
1163
+ "name": "encoder.layer.6.intermediate.dense.weight",
1164
+ "shape": [
1165
+ 3072,
1166
+ 768
1167
+ ],
1168
+ "dtype": "bfloat16",
1169
+ "format": "raw",
1170
+ "nbytes": 4718592,
1171
+ "byteOffset": 23628288
1172
+ },
1173
+ {
1174
+ "name": "encoder.layer.6.output.LayerNorm.bias",
1175
+ "shape": [
1176
+ 768
1177
+ ],
1178
+ "dtype": "bfloat16",
1179
+ "format": "raw",
1180
+ "nbytes": 1536,
1181
+ "byteOffset": 28346880
1182
+ },
1183
+ {
1184
+ "name": "encoder.layer.6.output.LayerNorm.weight",
1185
+ "shape": [
1186
+ 768
1187
+ ],
1188
+ "dtype": "bfloat16",
1189
+ "format": "raw",
1190
+ "nbytes": 1536,
1191
+ "byteOffset": 28348416
1192
+ },
1193
+ {
1194
+ "name": "encoder.layer.6.output.dense.bias",
1195
+ "shape": [
1196
+ 768
1197
+ ],
1198
+ "dtype": "bfloat16",
1199
+ "format": "raw",
1200
+ "nbytes": 1536,
1201
+ "byteOffset": 28349952
1202
+ },
1203
+ {
1204
+ "name": "encoder.layer.6.output.dense.weight",
1205
+ "shape": [
1206
+ 768,
1207
+ 3072
1208
+ ],
1209
+ "dtype": "bfloat16",
1210
+ "format": "raw",
1211
+ "nbytes": 4718592,
1212
+ "byteOffset": 28351488
1213
+ },
1214
+ {
1215
+ "name": "encoder.layer.7.attention.output.LayerNorm.bias",
1216
+ "shape": [
1217
+ 768
1218
+ ],
1219
+ "dtype": "bfloat16",
1220
+ "format": "raw",
1221
+ "nbytes": 1536,
1222
+ "byteOffset": 33070080
1223
+ },
1224
+ {
1225
+ "name": "encoder.layer.7.attention.output.LayerNorm.weight",
1226
+ "shape": [
1227
+ 768
1228
+ ],
1229
+ "dtype": "bfloat16",
1230
+ "format": "raw",
1231
+ "nbytes": 1536,
1232
+ "byteOffset": 33071616
1233
+ },
1234
+ {
1235
+ "name": "encoder.layer.7.attention.output.dense.bias",
1236
+ "shape": [
1237
+ 768
1238
+ ],
1239
+ "dtype": "bfloat16",
1240
+ "format": "raw",
1241
+ "nbytes": 1536,
1242
+ "byteOffset": 33073152
1243
+ }
1244
+ ],
1245
+ "md5sum": "9fbad31fed16b934acb1158c464c9774"
1246
+ },
1247
+ {
1248
+ "dataPath": "params_shard_5.bin",
1249
+ "format": "raw-shard",
1250
+ "nbytes": 33080832,
1251
+ "records": [
1252
+ {
1253
+ "name": "encoder.layer.7.attention.output.dense.weight",
1254
+ "shape": [
1255
+ 768,
1256
+ 768
1257
+ ],
1258
+ "dtype": "bfloat16",
1259
+ "format": "raw",
1260
+ "nbytes": 1179648,
1261
+ "byteOffset": 0
1262
+ },
1263
+ {
1264
+ "name": "encoder.layer.7.attention.self.qkv.bias",
1265
+ "shape": [
1266
+ 2304
1267
+ ],
1268
+ "dtype": "bfloat16",
1269
+ "format": "raw",
1270
+ "nbytes": 4608,
1271
+ "byteOffset": 1179648
1272
+ },
1273
+ {
1274
+ "name": "encoder.layer.7.attention.self.qkv.weight",
1275
+ "shape": [
1276
+ 2304,
1277
+ 768
1278
+ ],
1279
+ "dtype": "bfloat16",
1280
+ "format": "raw",
1281
+ "nbytes": 3538944,
1282
+ "byteOffset": 1184256
1283
+ },
1284
+ {
1285
+ "name": "encoder.layer.7.intermediate.dense.bias",
1286
+ "shape": [
1287
+ 3072
1288
+ ],
1289
+ "dtype": "bfloat16",
1290
+ "format": "raw",
1291
+ "nbytes": 6144,
1292
+ "byteOffset": 4723200
1293
+ },
1294
+ {
1295
+ "name": "encoder.layer.7.intermediate.dense.weight",
1296
+ "shape": [
1297
+ 3072,
1298
+ 768
1299
+ ],
1300
+ "dtype": "bfloat16",
1301
+ "format": "raw",
1302
+ "nbytes": 4718592,
1303
+ "byteOffset": 4729344
1304
+ },
1305
+ {
1306
+ "name": "encoder.layer.7.output.LayerNorm.bias",
1307
+ "shape": [
1308
+ 768
1309
+ ],
1310
+ "dtype": "bfloat16",
1311
+ "format": "raw",
1312
+ "nbytes": 1536,
1313
+ "byteOffset": 9447936
1314
+ },
1315
+ {
1316
+ "name": "encoder.layer.7.output.LayerNorm.weight",
1317
+ "shape": [
1318
+ 768
1319
+ ],
1320
+ "dtype": "bfloat16",
1321
+ "format": "raw",
1322
+ "nbytes": 1536,
1323
+ "byteOffset": 9449472
1324
+ },
1325
+ {
1326
+ "name": "encoder.layer.7.output.dense.bias",
1327
+ "shape": [
1328
+ 768
1329
+ ],
1330
+ "dtype": "bfloat16",
1331
+ "format": "raw",
1332
+ "nbytes": 1536,
1333
+ "byteOffset": 9451008
1334
+ },
1335
+ {
1336
+ "name": "encoder.layer.7.output.dense.weight",
1337
+ "shape": [
1338
+ 768,
1339
+ 3072
1340
+ ],
1341
+ "dtype": "bfloat16",
1342
+ "format": "raw",
1343
+ "nbytes": 4718592,
1344
+ "byteOffset": 9452544
1345
+ },
1346
+ {
1347
+ "name": "encoder.layer.8.attention.output.LayerNorm.bias",
1348
+ "shape": [
1349
+ 768
1350
+ ],
1351
+ "dtype": "bfloat16",
1352
+ "format": "raw",
1353
+ "nbytes": 1536,
1354
+ "byteOffset": 14171136
1355
+ },
1356
+ {
1357
+ "name": "encoder.layer.8.attention.output.LayerNorm.weight",
1358
+ "shape": [
1359
+ 768
1360
+ ],
1361
+ "dtype": "bfloat16",
1362
+ "format": "raw",
1363
+ "nbytes": 1536,
1364
+ "byteOffset": 14172672
1365
+ },
1366
+ {
1367
+ "name": "encoder.layer.8.attention.output.dense.bias",
1368
+ "shape": [
1369
+ 768
1370
+ ],
1371
+ "dtype": "bfloat16",
1372
+ "format": "raw",
1373
+ "nbytes": 1536,
1374
+ "byteOffset": 14174208
1375
+ },
1376
+ {
1377
+ "name": "encoder.layer.8.attention.output.dense.weight",
1378
+ "shape": [
1379
+ 768,
1380
+ 768
1381
+ ],
1382
+ "dtype": "bfloat16",
1383
+ "format": "raw",
1384
+ "nbytes": 1179648,
1385
+ "byteOffset": 14175744
1386
+ },
1387
+ {
1388
+ "name": "encoder.layer.8.attention.self.qkv.bias",
1389
+ "shape": [
1390
+ 2304
1391
+ ],
1392
+ "dtype": "bfloat16",
1393
+ "format": "raw",
1394
+ "nbytes": 4608,
1395
+ "byteOffset": 15355392
1396
+ },
1397
+ {
1398
+ "name": "encoder.layer.8.attention.self.qkv.weight",
1399
+ "shape": [
1400
+ 2304,
1401
+ 768
1402
+ ],
1403
+ "dtype": "bfloat16",
1404
+ "format": "raw",
1405
+ "nbytes": 3538944,
1406
+ "byteOffset": 15360000
1407
+ },
1408
+ {
1409
+ "name": "encoder.layer.8.intermediate.dense.bias",
1410
+ "shape": [
1411
+ 3072
1412
+ ],
1413
+ "dtype": "bfloat16",
1414
+ "format": "raw",
1415
+ "nbytes": 6144,
1416
+ "byteOffset": 18898944
1417
+ },
1418
+ {
1419
+ "name": "encoder.layer.8.intermediate.dense.weight",
1420
+ "shape": [
1421
+ 3072,
1422
+ 768
1423
+ ],
1424
+ "dtype": "bfloat16",
1425
+ "format": "raw",
1426
+ "nbytes": 4718592,
1427
+ "byteOffset": 18905088
1428
+ },
1429
+ {
1430
+ "name": "encoder.layer.8.output.LayerNorm.bias",
1431
+ "shape": [
1432
+ 768
1433
+ ],
1434
+ "dtype": "bfloat16",
1435
+ "format": "raw",
1436
+ "nbytes": 1536,
1437
+ "byteOffset": 23623680
1438
+ },
1439
+ {
1440
+ "name": "encoder.layer.8.output.LayerNorm.weight",
1441
+ "shape": [
1442
+ 768
1443
+ ],
1444
+ "dtype": "bfloat16",
1445
+ "format": "raw",
1446
+ "nbytes": 1536,
1447
+ "byteOffset": 23625216
1448
+ },
1449
+ {
1450
+ "name": "encoder.layer.8.output.dense.bias",
1451
+ "shape": [
1452
+ 768
1453
+ ],
1454
+ "dtype": "bfloat16",
1455
+ "format": "raw",
1456
+ "nbytes": 1536,
1457
+ "byteOffset": 23626752
1458
+ },
1459
+ {
1460
+ "name": "encoder.layer.8.output.dense.weight",
1461
+ "shape": [
1462
+ 768,
1463
+ 3072
1464
+ ],
1465
+ "dtype": "bfloat16",
1466
+ "format": "raw",
1467
+ "nbytes": 4718592,
1468
+ "byteOffset": 23628288
1469
+ },
1470
+ {
1471
+ "name": "encoder.layer.9.attention.output.LayerNorm.bias",
1472
+ "shape": [
1473
+ 768
1474
+ ],
1475
+ "dtype": "bfloat16",
1476
+ "format": "raw",
1477
+ "nbytes": 1536,
1478
+ "byteOffset": 28346880
1479
+ },
1480
+ {
1481
+ "name": "encoder.layer.9.attention.output.LayerNorm.weight",
1482
+ "shape": [
1483
+ 768
1484
+ ],
1485
+ "dtype": "bfloat16",
1486
+ "format": "raw",
1487
+ "nbytes": 1536,
1488
+ "byteOffset": 28348416
1489
+ },
1490
+ {
1491
+ "name": "encoder.layer.9.attention.output.dense.bias",
1492
+ "shape": [
1493
+ 768
1494
+ ],
1495
+ "dtype": "bfloat16",
1496
+ "format": "raw",
1497
+ "nbytes": 1536,
1498
+ "byteOffset": 28349952
1499
+ },
1500
+ {
1501
+ "name": "encoder.layer.9.attention.output.dense.weight",
1502
+ "shape": [
1503
+ 768,
1504
+ 768
1505
+ ],
1506
+ "dtype": "bfloat16",
1507
+ "format": "raw",
1508
+ "nbytes": 1179648,
1509
+ "byteOffset": 28351488
1510
+ },
1511
+ {
1512
+ "name": "encoder.layer.9.attention.self.qkv.bias",
1513
+ "shape": [
1514
+ 2304
1515
+ ],
1516
+ "dtype": "bfloat16",
1517
+ "format": "raw",
1518
+ "nbytes": 4608,
1519
+ "byteOffset": 29531136
1520
+ },
1521
+ {
1522
+ "name": "encoder.layer.9.attention.self.qkv.weight",
1523
+ "shape": [
1524
+ 2304,
1525
+ 768
1526
+ ],
1527
+ "dtype": "bfloat16",
1528
+ "format": "raw",
1529
+ "nbytes": 3538944,
1530
+ "byteOffset": 29535744
1531
+ },
1532
+ {
1533
+ "name": "encoder.layer.9.intermediate.dense.bias",
1534
+ "shape": [
1535
+ 3072
1536
+ ],
1537
+ "dtype": "bfloat16",
1538
+ "format": "raw",
1539
+ "nbytes": 6144,
1540
+ "byteOffset": 33074688
1541
+ }
1542
+ ],
1543
+ "md5sum": "7a49f1f4cfd562bb32cb568fc622b50e"
1544
+ },
1545
+ {
1546
+ "dataPath": "params_shard_6.bin",
1547
+ "format": "raw-shard",
1548
+ "nbytes": 9441792,
1549
+ "records": [
1550
+ {
1551
+ "name": "encoder.layer.9.intermediate.dense.weight",
1552
+ "shape": [
1553
+ 3072,
1554
+ 768
1555
+ ],
1556
+ "dtype": "bfloat16",
1557
+ "format": "raw",
1558
+ "nbytes": 4718592,
1559
+ "byteOffset": 0
1560
+ },
1561
+ {
1562
+ "name": "encoder.layer.9.output.LayerNorm.bias",
1563
+ "shape": [
1564
+ 768
1565
+ ],
1566
+ "dtype": "bfloat16",
1567
+ "format": "raw",
1568
+ "nbytes": 1536,
1569
+ "byteOffset": 4718592
1570
+ },
1571
+ {
1572
+ "name": "encoder.layer.9.output.LayerNorm.weight",
1573
+ "shape": [
1574
+ 768
1575
+ ],
1576
+ "dtype": "bfloat16",
1577
+ "format": "raw",
1578
+ "nbytes": 1536,
1579
+ "byteOffset": 4720128
1580
+ },
1581
+ {
1582
+ "name": "encoder.layer.9.output.dense.bias",
1583
+ "shape": [
1584
+ 768
1585
+ ],
1586
+ "dtype": "bfloat16",
1587
+ "format": "raw",
1588
+ "nbytes": 1536,
1589
+ "byteOffset": 4721664
1590
+ },
1591
+ {
1592
+ "name": "encoder.layer.9.output.dense.weight",
1593
+ "shape": [
1594
+ 768,
1595
+ 3072
1596
+ ],
1597
+ "dtype": "bfloat16",
1598
+ "format": "raw",
1599
+ "nbytes": 4718592,
1600
+ "byteOffset": 4723200
1601
+ }
1602
+ ],
1603
+ "md5sum": "2d4a5c41b3ca6eab2b402b5731cbc3cf"
1604
+ }
1605
+ ]
1606
+ }
ndarray-cache.json ADDED
@@ -0,0 +1,1606 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "ParamSize": 149,
4
+ "ParamBytes": 435566592.0,
5
+ "BitsPerParam": 32.0
6
+ },
7
+ "records": [
8
+ {
9
+ "dataPath": "params_shard_0.bin",
10
+ "format": "raw-shard",
11
+ "nbytes": 46881792,
12
+ "records": [
13
+ {
14
+ "name": "embeddings.word_embeddings.weight",
15
+ "shape": [
16
+ 30522,
17
+ 768
18
+ ],
19
+ "dtype": "float32",
20
+ "format": "f32-to-bf16",
21
+ "nbytes": 46881792,
22
+ "byteOffset": 0
23
+ }
24
+ ],
25
+ "md5sum": "2eeaa439340fec525d791ca37a3dd753"
26
+ },
27
+ {
28
+ "dataPath": "params_shard_1.bin",
29
+ "format": "raw-shard",
30
+ "nbytes": 30332928,
31
+ "records": [
32
+ {
33
+ "name": "embeddings.LayerNorm.bias",
34
+ "shape": [
35
+ 768
36
+ ],
37
+ "dtype": "float32",
38
+ "format": "f32-to-bf16",
39
+ "nbytes": 1536,
40
+ "byteOffset": 0
41
+ },
42
+ {
43
+ "name": "embeddings.LayerNorm.weight",
44
+ "shape": [
45
+ 768
46
+ ],
47
+ "dtype": "float32",
48
+ "format": "f32-to-bf16",
49
+ "nbytes": 1536,
50
+ "byteOffset": 1536
51
+ },
52
+ {
53
+ "name": "embeddings.position_embeddings.weight",
54
+ "shape": [
55
+ 512,
56
+ 768
57
+ ],
58
+ "dtype": "float32",
59
+ "format": "f32-to-bf16",
60
+ "nbytes": 786432,
61
+ "byteOffset": 3072
62
+ },
63
+ {
64
+ "name": "embeddings.token_type_embeddings.weight",
65
+ "shape": [
66
+ 2,
67
+ 768
68
+ ],
69
+ "dtype": "float32",
70
+ "format": "f32-to-bf16",
71
+ "nbytes": 3072,
72
+ "byteOffset": 789504
73
+ },
74
+ {
75
+ "name": "encoder.layer.0.attention.output.LayerNorm.bias",
76
+ "shape": [
77
+ 768
78
+ ],
79
+ "dtype": "float32",
80
+ "format": "f32-to-bf16",
81
+ "nbytes": 1536,
82
+ "byteOffset": 792576
83
+ },
84
+ {
85
+ "name": "encoder.layer.0.attention.output.LayerNorm.weight",
86
+ "shape": [
87
+ 768
88
+ ],
89
+ "dtype": "float32",
90
+ "format": "f32-to-bf16",
91
+ "nbytes": 1536,
92
+ "byteOffset": 794112
93
+ },
94
+ {
95
+ "name": "encoder.layer.0.attention.output.dense.bias",
96
+ "shape": [
97
+ 768
98
+ ],
99
+ "dtype": "float32",
100
+ "format": "f32-to-bf16",
101
+ "nbytes": 1536,
102
+ "byteOffset": 795648
103
+ },
104
+ {
105
+ "name": "encoder.layer.0.attention.output.dense.weight",
106
+ "shape": [
107
+ 768,
108
+ 768
109
+ ],
110
+ "dtype": "float32",
111
+ "format": "f32-to-bf16",
112
+ "nbytes": 1179648,
113
+ "byteOffset": 797184
114
+ },
115
+ {
116
+ "name": "encoder.layer.0.attention.self.qkv.bias",
117
+ "shape": [
118
+ 2304
119
+ ],
120
+ "dtype": "float32",
121
+ "format": "f32-to-bf16",
122
+ "nbytes": 4608,
123
+ "byteOffset": 1976832
124
+ },
125
+ {
126
+ "name": "encoder.layer.0.attention.self.qkv.weight",
127
+ "shape": [
128
+ 2304,
129
+ 768
130
+ ],
131
+ "dtype": "float32",
132
+ "format": "f32-to-bf16",
133
+ "nbytes": 3538944,
134
+ "byteOffset": 1981440
135
+ },
136
+ {
137
+ "name": "encoder.layer.0.intermediate.dense.bias",
138
+ "shape": [
139
+ 3072
140
+ ],
141
+ "dtype": "float32",
142
+ "format": "f32-to-bf16",
143
+ "nbytes": 6144,
144
+ "byteOffset": 5520384
145
+ },
146
+ {
147
+ "name": "encoder.layer.0.intermediate.dense.weight",
148
+ "shape": [
149
+ 3072,
150
+ 768
151
+ ],
152
+ "dtype": "float32",
153
+ "format": "f32-to-bf16",
154
+ "nbytes": 4718592,
155
+ "byteOffset": 5526528
156
+ },
157
+ {
158
+ "name": "encoder.layer.0.output.LayerNorm.bias",
159
+ "shape": [
160
+ 768
161
+ ],
162
+ "dtype": "float32",
163
+ "format": "f32-to-bf16",
164
+ "nbytes": 1536,
165
+ "byteOffset": 10245120
166
+ },
167
+ {
168
+ "name": "encoder.layer.0.output.LayerNorm.weight",
169
+ "shape": [
170
+ 768
171
+ ],
172
+ "dtype": "float32",
173
+ "format": "f32-to-bf16",
174
+ "nbytes": 1536,
175
+ "byteOffset": 10246656
176
+ },
177
+ {
178
+ "name": "encoder.layer.0.output.dense.bias",
179
+ "shape": [
180
+ 768
181
+ ],
182
+ "dtype": "float32",
183
+ "format": "f32-to-bf16",
184
+ "nbytes": 1536,
185
+ "byteOffset": 10248192
186
+ },
187
+ {
188
+ "name": "encoder.layer.0.output.dense.weight",
189
+ "shape": [
190
+ 768,
191
+ 3072
192
+ ],
193
+ "dtype": "float32",
194
+ "format": "f32-to-bf16",
195
+ "nbytes": 4718592,
196
+ "byteOffset": 10249728
197
+ },
198
+ {
199
+ "name": "encoder.layer.1.attention.output.LayerNorm.bias",
200
+ "shape": [
201
+ 768
202
+ ],
203
+ "dtype": "float32",
204
+ "format": "f32-to-bf16",
205
+ "nbytes": 1536,
206
+ "byteOffset": 14968320
207
+ },
208
+ {
209
+ "name": "encoder.layer.1.attention.output.LayerNorm.weight",
210
+ "shape": [
211
+ 768
212
+ ],
213
+ "dtype": "float32",
214
+ "format": "f32-to-bf16",
215
+ "nbytes": 1536,
216
+ "byteOffset": 14969856
217
+ },
218
+ {
219
+ "name": "encoder.layer.1.attention.output.dense.bias",
220
+ "shape": [
221
+ 768
222
+ ],
223
+ "dtype": "float32",
224
+ "format": "f32-to-bf16",
225
+ "nbytes": 1536,
226
+ "byteOffset": 14971392
227
+ },
228
+ {
229
+ "name": "encoder.layer.1.attention.output.dense.weight",
230
+ "shape": [
231
+ 768,
232
+ 768
233
+ ],
234
+ "dtype": "float32",
235
+ "format": "f32-to-bf16",
236
+ "nbytes": 1179648,
237
+ "byteOffset": 14972928
238
+ },
239
+ {
240
+ "name": "encoder.layer.1.attention.self.qkv.bias",
241
+ "shape": [
242
+ 2304
243
+ ],
244
+ "dtype": "float32",
245
+ "format": "f32-to-bf16",
246
+ "nbytes": 4608,
247
+ "byteOffset": 16152576
248
+ },
249
+ {
250
+ "name": "encoder.layer.1.attention.self.qkv.weight",
251
+ "shape": [
252
+ 2304,
253
+ 768
254
+ ],
255
+ "dtype": "float32",
256
+ "format": "f32-to-bf16",
257
+ "nbytes": 3538944,
258
+ "byteOffset": 16157184
259
+ },
260
+ {
261
+ "name": "encoder.layer.1.intermediate.dense.bias",
262
+ "shape": [
263
+ 3072
264
+ ],
265
+ "dtype": "float32",
266
+ "format": "f32-to-bf16",
267
+ "nbytes": 6144,
268
+ "byteOffset": 19696128
269
+ },
270
+ {
271
+ "name": "encoder.layer.1.intermediate.dense.weight",
272
+ "shape": [
273
+ 3072,
274
+ 768
275
+ ],
276
+ "dtype": "float32",
277
+ "format": "f32-to-bf16",
278
+ "nbytes": 4718592,
279
+ "byteOffset": 19702272
280
+ },
281
+ {
282
+ "name": "encoder.layer.1.output.LayerNorm.bias",
283
+ "shape": [
284
+ 768
285
+ ],
286
+ "dtype": "float32",
287
+ "format": "f32-to-bf16",
288
+ "nbytes": 1536,
289
+ "byteOffset": 24420864
290
+ },
291
+ {
292
+ "name": "encoder.layer.1.output.LayerNorm.weight",
293
+ "shape": [
294
+ 768
295
+ ],
296
+ "dtype": "float32",
297
+ "format": "f32-to-bf16",
298
+ "nbytes": 1536,
299
+ "byteOffset": 24422400
300
+ },
301
+ {
302
+ "name": "encoder.layer.1.output.dense.bias",
303
+ "shape": [
304
+ 768
305
+ ],
306
+ "dtype": "float32",
307
+ "format": "f32-to-bf16",
308
+ "nbytes": 1536,
309
+ "byteOffset": 24423936
310
+ },
311
+ {
312
+ "name": "encoder.layer.1.output.dense.weight",
313
+ "shape": [
314
+ 768,
315
+ 3072
316
+ ],
317
+ "dtype": "float32",
318
+ "format": "f32-to-bf16",
319
+ "nbytes": 4718592,
320
+ "byteOffset": 24425472
321
+ },
322
+ {
323
+ "name": "encoder.layer.10.attention.output.LayerNorm.bias",
324
+ "shape": [
325
+ 768
326
+ ],
327
+ "dtype": "float32",
328
+ "format": "f32-to-bf16",
329
+ "nbytes": 1536,
330
+ "byteOffset": 29144064
331
+ },
332
+ {
333
+ "name": "encoder.layer.10.attention.output.LayerNorm.weight",
334
+ "shape": [
335
+ 768
336
+ ],
337
+ "dtype": "float32",
338
+ "format": "f32-to-bf16",
339
+ "nbytes": 1536,
340
+ "byteOffset": 29145600
341
+ },
342
+ {
343
+ "name": "encoder.layer.10.attention.output.dense.bias",
344
+ "shape": [
345
+ 768
346
+ ],
347
+ "dtype": "float32",
348
+ "format": "f32-to-bf16",
349
+ "nbytes": 1536,
350
+ "byteOffset": 29147136
351
+ },
352
+ {
353
+ "name": "encoder.layer.10.attention.output.dense.weight",
354
+ "shape": [
355
+ 768,
356
+ 768
357
+ ],
358
+ "dtype": "float32",
359
+ "format": "f32-to-bf16",
360
+ "nbytes": 1179648,
361
+ "byteOffset": 29148672
362
+ },
363
+ {
364
+ "name": "encoder.layer.10.attention.self.qkv.bias",
365
+ "shape": [
366
+ 2304
367
+ ],
368
+ "dtype": "float32",
369
+ "format": "f32-to-bf16",
370
+ "nbytes": 4608,
371
+ "byteOffset": 30328320
372
+ }
373
+ ],
374
+ "md5sum": "d19301ea1b244630109761e9a47e8c0f"
375
+ },
376
+ {
377
+ "dataPath": "params_shard_2.bin",
378
+ "format": "raw-shard",
379
+ "nbytes": 31896576,
380
+ "records": [
381
+ {
382
+ "name": "encoder.layer.10.attention.self.qkv.weight",
383
+ "shape": [
384
+ 2304,
385
+ 768
386
+ ],
387
+ "dtype": "float32",
388
+ "format": "f32-to-bf16",
389
+ "nbytes": 3538944,
390
+ "byteOffset": 0
391
+ },
392
+ {
393
+ "name": "encoder.layer.10.intermediate.dense.bias",
394
+ "shape": [
395
+ 3072
396
+ ],
397
+ "dtype": "float32",
398
+ "format": "f32-to-bf16",
399
+ "nbytes": 6144,
400
+ "byteOffset": 3538944
401
+ },
402
+ {
403
+ "name": "encoder.layer.10.intermediate.dense.weight",
404
+ "shape": [
405
+ 3072,
406
+ 768
407
+ ],
408
+ "dtype": "float32",
409
+ "format": "f32-to-bf16",
410
+ "nbytes": 4718592,
411
+ "byteOffset": 3545088
412
+ },
413
+ {
414
+ "name": "encoder.layer.10.output.LayerNorm.bias",
415
+ "shape": [
416
+ 768
417
+ ],
418
+ "dtype": "float32",
419
+ "format": "f32-to-bf16",
420
+ "nbytes": 1536,
421
+ "byteOffset": 8263680
422
+ },
423
+ {
424
+ "name": "encoder.layer.10.output.LayerNorm.weight",
425
+ "shape": [
426
+ 768
427
+ ],
428
+ "dtype": "float32",
429
+ "format": "f32-to-bf16",
430
+ "nbytes": 1536,
431
+ "byteOffset": 8265216
432
+ },
433
+ {
434
+ "name": "encoder.layer.10.output.dense.bias",
435
+ "shape": [
436
+ 768
437
+ ],
438
+ "dtype": "float32",
439
+ "format": "f32-to-bf16",
440
+ "nbytes": 1536,
441
+ "byteOffset": 8266752
442
+ },
443
+ {
444
+ "name": "encoder.layer.10.output.dense.weight",
445
+ "shape": [
446
+ 768,
447
+ 3072
448
+ ],
449
+ "dtype": "float32",
450
+ "format": "f32-to-bf16",
451
+ "nbytes": 4718592,
452
+ "byteOffset": 8268288
453
+ },
454
+ {
455
+ "name": "encoder.layer.11.attention.output.LayerNorm.bias",
456
+ "shape": [
457
+ 768
458
+ ],
459
+ "dtype": "float32",
460
+ "format": "f32-to-bf16",
461
+ "nbytes": 1536,
462
+ "byteOffset": 12986880
463
+ },
464
+ {
465
+ "name": "encoder.layer.11.attention.output.LayerNorm.weight",
466
+ "shape": [
467
+ 768
468
+ ],
469
+ "dtype": "float32",
470
+ "format": "f32-to-bf16",
471
+ "nbytes": 1536,
472
+ "byteOffset": 12988416
473
+ },
474
+ {
475
+ "name": "encoder.layer.11.attention.output.dense.bias",
476
+ "shape": [
477
+ 768
478
+ ],
479
+ "dtype": "float32",
480
+ "format": "f32-to-bf16",
481
+ "nbytes": 1536,
482
+ "byteOffset": 12989952
483
+ },
484
+ {
485
+ "name": "encoder.layer.11.attention.output.dense.weight",
486
+ "shape": [
487
+ 768,
488
+ 768
489
+ ],
490
+ "dtype": "float32",
491
+ "format": "f32-to-bf16",
492
+ "nbytes": 1179648,
493
+ "byteOffset": 12991488
494
+ },
495
+ {
496
+ "name": "encoder.layer.11.attention.self.qkv.bias",
497
+ "shape": [
498
+ 2304
499
+ ],
500
+ "dtype": "float32",
501
+ "format": "f32-to-bf16",
502
+ "nbytes": 4608,
503
+ "byteOffset": 14171136
504
+ },
505
+ {
506
+ "name": "encoder.layer.11.attention.self.qkv.weight",
507
+ "shape": [
508
+ 2304,
509
+ 768
510
+ ],
511
+ "dtype": "float32",
512
+ "format": "f32-to-bf16",
513
+ "nbytes": 3538944,
514
+ "byteOffset": 14175744
515
+ },
516
+ {
517
+ "name": "encoder.layer.11.intermediate.dense.bias",
518
+ "shape": [
519
+ 3072
520
+ ],
521
+ "dtype": "float32",
522
+ "format": "f32-to-bf16",
523
+ "nbytes": 6144,
524
+ "byteOffset": 17714688
525
+ },
526
+ {
527
+ "name": "encoder.layer.11.intermediate.dense.weight",
528
+ "shape": [
529
+ 3072,
530
+ 768
531
+ ],
532
+ "dtype": "float32",
533
+ "format": "f32-to-bf16",
534
+ "nbytes": 4718592,
535
+ "byteOffset": 17720832
536
+ },
537
+ {
538
+ "name": "encoder.layer.11.output.LayerNorm.bias",
539
+ "shape": [
540
+ 768
541
+ ],
542
+ "dtype": "float32",
543
+ "format": "f32-to-bf16",
544
+ "nbytes": 1536,
545
+ "byteOffset": 22439424
546
+ },
547
+ {
548
+ "name": "encoder.layer.11.output.LayerNorm.weight",
549
+ "shape": [
550
+ 768
551
+ ],
552
+ "dtype": "float32",
553
+ "format": "f32-to-bf16",
554
+ "nbytes": 1536,
555
+ "byteOffset": 22440960
556
+ },
557
+ {
558
+ "name": "encoder.layer.11.output.dense.bias",
559
+ "shape": [
560
+ 768
561
+ ],
562
+ "dtype": "float32",
563
+ "format": "f32-to-bf16",
564
+ "nbytes": 1536,
565
+ "byteOffset": 22442496
566
+ },
567
+ {
568
+ "name": "encoder.layer.11.output.dense.weight",
569
+ "shape": [
570
+ 768,
571
+ 3072
572
+ ],
573
+ "dtype": "float32",
574
+ "format": "f32-to-bf16",
575
+ "nbytes": 4718592,
576
+ "byteOffset": 22444032
577
+ },
578
+ {
579
+ "name": "encoder.layer.2.attention.output.LayerNorm.bias",
580
+ "shape": [
581
+ 768
582
+ ],
583
+ "dtype": "float32",
584
+ "format": "f32-to-bf16",
585
+ "nbytes": 1536,
586
+ "byteOffset": 27162624
587
+ },
588
+ {
589
+ "name": "encoder.layer.2.attention.output.LayerNorm.weight",
590
+ "shape": [
591
+ 768
592
+ ],
593
+ "dtype": "float32",
594
+ "format": "f32-to-bf16",
595
+ "nbytes": 1536,
596
+ "byteOffset": 27164160
597
+ },
598
+ {
599
+ "name": "encoder.layer.2.attention.output.dense.bias",
600
+ "shape": [
601
+ 768
602
+ ],
603
+ "dtype": "float32",
604
+ "format": "f32-to-bf16",
605
+ "nbytes": 1536,
606
+ "byteOffset": 27165696
607
+ },
608
+ {
609
+ "name": "encoder.layer.2.attention.output.dense.weight",
610
+ "shape": [
611
+ 768,
612
+ 768
613
+ ],
614
+ "dtype": "float32",
615
+ "format": "f32-to-bf16",
616
+ "nbytes": 1179648,
617
+ "byteOffset": 27167232
618
+ },
619
+ {
620
+ "name": "encoder.layer.2.attention.self.qkv.bias",
621
+ "shape": [
622
+ 2304
623
+ ],
624
+ "dtype": "float32",
625
+ "format": "f32-to-bf16",
626
+ "nbytes": 4608,
627
+ "byteOffset": 28346880
628
+ },
629
+ {
630
+ "name": "encoder.layer.2.attention.self.qkv.weight",
631
+ "shape": [
632
+ 2304,
633
+ 768
634
+ ],
635
+ "dtype": "float32",
636
+ "format": "f32-to-bf16",
637
+ "nbytes": 3538944,
638
+ "byteOffset": 28351488
639
+ },
640
+ {
641
+ "name": "encoder.layer.2.intermediate.dense.bias",
642
+ "shape": [
643
+ 3072
644
+ ],
645
+ "dtype": "float32",
646
+ "format": "f32-to-bf16",
647
+ "nbytes": 6144,
648
+ "byteOffset": 31890432
649
+ }
650
+ ],
651
+ "md5sum": "97adefcd2277d459f53c9bf2d25bf264"
652
+ },
653
+ {
654
+ "dataPath": "params_shard_3.bin",
655
+ "format": "raw-shard",
656
+ "nbytes": 33074688,
657
+ "records": [
658
+ {
659
+ "name": "encoder.layer.2.intermediate.dense.weight",
660
+ "shape": [
661
+ 3072,
662
+ 768
663
+ ],
664
+ "dtype": "float32",
665
+ "format": "f32-to-bf16",
666
+ "nbytes": 4718592,
667
+ "byteOffset": 0
668
+ },
669
+ {
670
+ "name": "encoder.layer.2.output.LayerNorm.bias",
671
+ "shape": [
672
+ 768
673
+ ],
674
+ "dtype": "float32",
675
+ "format": "f32-to-bf16",
676
+ "nbytes": 1536,
677
+ "byteOffset": 4718592
678
+ },
679
+ {
680
+ "name": "encoder.layer.2.output.LayerNorm.weight",
681
+ "shape": [
682
+ 768
683
+ ],
684
+ "dtype": "float32",
685
+ "format": "f32-to-bf16",
686
+ "nbytes": 1536,
687
+ "byteOffset": 4720128
688
+ },
689
+ {
690
+ "name": "encoder.layer.2.output.dense.bias",
691
+ "shape": [
692
+ 768
693
+ ],
694
+ "dtype": "float32",
695
+ "format": "f32-to-bf16",
696
+ "nbytes": 1536,
697
+ "byteOffset": 4721664
698
+ },
699
+ {
700
+ "name": "encoder.layer.2.output.dense.weight",
701
+ "shape": [
702
+ 768,
703
+ 3072
704
+ ],
705
+ "dtype": "float32",
706
+ "format": "f32-to-bf16",
707
+ "nbytes": 4718592,
708
+ "byteOffset": 4723200
709
+ },
710
+ {
711
+ "name": "encoder.layer.3.attention.output.LayerNorm.bias",
712
+ "shape": [
713
+ 768
714
+ ],
715
+ "dtype": "float32",
716
+ "format": "f32-to-bf16",
717
+ "nbytes": 1536,
718
+ "byteOffset": 9441792
719
+ },
720
+ {
721
+ "name": "encoder.layer.3.attention.output.LayerNorm.weight",
722
+ "shape": [
723
+ 768
724
+ ],
725
+ "dtype": "float32",
726
+ "format": "f32-to-bf16",
727
+ "nbytes": 1536,
728
+ "byteOffset": 9443328
729
+ },
730
+ {
731
+ "name": "encoder.layer.3.attention.output.dense.bias",
732
+ "shape": [
733
+ 768
734
+ ],
735
+ "dtype": "float32",
736
+ "format": "f32-to-bf16",
737
+ "nbytes": 1536,
738
+ "byteOffset": 9444864
739
+ },
740
+ {
741
+ "name": "encoder.layer.3.attention.output.dense.weight",
742
+ "shape": [
743
+ 768,
744
+ 768
745
+ ],
746
+ "dtype": "float32",
747
+ "format": "f32-to-bf16",
748
+ "nbytes": 1179648,
749
+ "byteOffset": 9446400
750
+ },
751
+ {
752
+ "name": "encoder.layer.3.attention.self.qkv.bias",
753
+ "shape": [
754
+ 2304
755
+ ],
756
+ "dtype": "float32",
757
+ "format": "f32-to-bf16",
758
+ "nbytes": 4608,
759
+ "byteOffset": 10626048
760
+ },
761
+ {
762
+ "name": "encoder.layer.3.attention.self.qkv.weight",
763
+ "shape": [
764
+ 2304,
765
+ 768
766
+ ],
767
+ "dtype": "float32",
768
+ "format": "f32-to-bf16",
769
+ "nbytes": 3538944,
770
+ "byteOffset": 10630656
771
+ },
772
+ {
773
+ "name": "encoder.layer.3.intermediate.dense.bias",
774
+ "shape": [
775
+ 3072
776
+ ],
777
+ "dtype": "float32",
778
+ "format": "f32-to-bf16",
779
+ "nbytes": 6144,
780
+ "byteOffset": 14169600
781
+ },
782
+ {
783
+ "name": "encoder.layer.3.intermediate.dense.weight",
784
+ "shape": [
785
+ 3072,
786
+ 768
787
+ ],
788
+ "dtype": "float32",
789
+ "format": "f32-to-bf16",
790
+ "nbytes": 4718592,
791
+ "byteOffset": 14175744
792
+ },
793
+ {
794
+ "name": "encoder.layer.3.output.LayerNorm.bias",
795
+ "shape": [
796
+ 768
797
+ ],
798
+ "dtype": "float32",
799
+ "format": "f32-to-bf16",
800
+ "nbytes": 1536,
801
+ "byteOffset": 18894336
802
+ },
803
+ {
804
+ "name": "encoder.layer.3.output.LayerNorm.weight",
805
+ "shape": [
806
+ 768
807
+ ],
808
+ "dtype": "float32",
809
+ "format": "f32-to-bf16",
810
+ "nbytes": 1536,
811
+ "byteOffset": 18895872
812
+ },
813
+ {
814
+ "name": "encoder.layer.3.output.dense.bias",
815
+ "shape": [
816
+ 768
817
+ ],
818
+ "dtype": "float32",
819
+ "format": "f32-to-bf16",
820
+ "nbytes": 1536,
821
+ "byteOffset": 18897408
822
+ },
823
+ {
824
+ "name": "encoder.layer.3.output.dense.weight",
825
+ "shape": [
826
+ 768,
827
+ 3072
828
+ ],
829
+ "dtype": "float32",
830
+ "format": "f32-to-bf16",
831
+ "nbytes": 4718592,
832
+ "byteOffset": 18898944
833
+ },
834
+ {
835
+ "name": "encoder.layer.4.attention.output.LayerNorm.bias",
836
+ "shape": [
837
+ 768
838
+ ],
839
+ "dtype": "float32",
840
+ "format": "f32-to-bf16",
841
+ "nbytes": 1536,
842
+ "byteOffset": 23617536
843
+ },
844
+ {
845
+ "name": "encoder.layer.4.attention.output.LayerNorm.weight",
846
+ "shape": [
847
+ 768
848
+ ],
849
+ "dtype": "float32",
850
+ "format": "f32-to-bf16",
851
+ "nbytes": 1536,
852
+ "byteOffset": 23619072
853
+ },
854
+ {
855
+ "name": "encoder.layer.4.attention.output.dense.bias",
856
+ "shape": [
857
+ 768
858
+ ],
859
+ "dtype": "float32",
860
+ "format": "f32-to-bf16",
861
+ "nbytes": 1536,
862
+ "byteOffset": 23620608
863
+ },
864
+ {
865
+ "name": "encoder.layer.4.attention.output.dense.weight",
866
+ "shape": [
867
+ 768,
868
+ 768
869
+ ],
870
+ "dtype": "float32",
871
+ "format": "f32-to-bf16",
872
+ "nbytes": 1179648,
873
+ "byteOffset": 23622144
874
+ },
875
+ {
876
+ "name": "encoder.layer.4.attention.self.qkv.bias",
877
+ "shape": [
878
+ 2304
879
+ ],
880
+ "dtype": "float32",
881
+ "format": "f32-to-bf16",
882
+ "nbytes": 4608,
883
+ "byteOffset": 24801792
884
+ },
885
+ {
886
+ "name": "encoder.layer.4.attention.self.qkv.weight",
887
+ "shape": [
888
+ 2304,
889
+ 768
890
+ ],
891
+ "dtype": "float32",
892
+ "format": "f32-to-bf16",
893
+ "nbytes": 3538944,
894
+ "byteOffset": 24806400
895
+ },
896
+ {
897
+ "name": "encoder.layer.4.intermediate.dense.bias",
898
+ "shape": [
899
+ 3072
900
+ ],
901
+ "dtype": "float32",
902
+ "format": "f32-to-bf16",
903
+ "nbytes": 6144,
904
+ "byteOffset": 28345344
905
+ },
906
+ {
907
+ "name": "encoder.layer.4.intermediate.dense.weight",
908
+ "shape": [
909
+ 3072,
910
+ 768
911
+ ],
912
+ "dtype": "float32",
913
+ "format": "f32-to-bf16",
914
+ "nbytes": 4718592,
915
+ "byteOffset": 28351488
916
+ },
917
+ {
918
+ "name": "encoder.layer.4.output.LayerNorm.bias",
919
+ "shape": [
920
+ 768
921
+ ],
922
+ "dtype": "float32",
923
+ "format": "f32-to-bf16",
924
+ "nbytes": 1536,
925
+ "byteOffset": 33070080
926
+ },
927
+ {
928
+ "name": "encoder.layer.4.output.LayerNorm.weight",
929
+ "shape": [
930
+ 768
931
+ ],
932
+ "dtype": "float32",
933
+ "format": "f32-to-bf16",
934
+ "nbytes": 1536,
935
+ "byteOffset": 33071616
936
+ },
937
+ {
938
+ "name": "encoder.layer.4.output.dense.bias",
939
+ "shape": [
940
+ 768
941
+ ],
942
+ "dtype": "float32",
943
+ "format": "f32-to-bf16",
944
+ "nbytes": 1536,
945
+ "byteOffset": 33073152
946
+ }
947
+ ],
948
+ "md5sum": "e9dd727b06f09c2a5284809b08e3eff9"
949
+ },
950
+ {
951
+ "dataPath": "params_shard_4.bin",
952
+ "format": "raw-shard",
953
+ "nbytes": 33074688,
954
+ "records": [
955
+ {
956
+ "name": "encoder.layer.4.output.dense.weight",
957
+ "shape": [
958
+ 768,
959
+ 3072
960
+ ],
961
+ "dtype": "float32",
962
+ "format": "f32-to-bf16",
963
+ "nbytes": 4718592,
964
+ "byteOffset": 0
965
+ },
966
+ {
967
+ "name": "encoder.layer.5.attention.output.LayerNorm.bias",
968
+ "shape": [
969
+ 768
970
+ ],
971
+ "dtype": "float32",
972
+ "format": "f32-to-bf16",
973
+ "nbytes": 1536,
974
+ "byteOffset": 4718592
975
+ },
976
+ {
977
+ "name": "encoder.layer.5.attention.output.LayerNorm.weight",
978
+ "shape": [
979
+ 768
980
+ ],
981
+ "dtype": "float32",
982
+ "format": "f32-to-bf16",
983
+ "nbytes": 1536,
984
+ "byteOffset": 4720128
985
+ },
986
+ {
987
+ "name": "encoder.layer.5.attention.output.dense.bias",
988
+ "shape": [
989
+ 768
990
+ ],
991
+ "dtype": "float32",
992
+ "format": "f32-to-bf16",
993
+ "nbytes": 1536,
994
+ "byteOffset": 4721664
995
+ },
996
+ {
997
+ "name": "encoder.layer.5.attention.output.dense.weight",
998
+ "shape": [
999
+ 768,
1000
+ 768
1001
+ ],
1002
+ "dtype": "float32",
1003
+ "format": "f32-to-bf16",
1004
+ "nbytes": 1179648,
1005
+ "byteOffset": 4723200
1006
+ },
1007
+ {
1008
+ "name": "encoder.layer.5.attention.self.qkv.bias",
1009
+ "shape": [
1010
+ 2304
1011
+ ],
1012
+ "dtype": "float32",
1013
+ "format": "f32-to-bf16",
1014
+ "nbytes": 4608,
1015
+ "byteOffset": 5902848
1016
+ },
1017
+ {
1018
+ "name": "encoder.layer.5.attention.self.qkv.weight",
1019
+ "shape": [
1020
+ 2304,
1021
+ 768
1022
+ ],
1023
+ "dtype": "float32",
1024
+ "format": "f32-to-bf16",
1025
+ "nbytes": 3538944,
1026
+ "byteOffset": 5907456
1027
+ },
1028
+ {
1029
+ "name": "encoder.layer.5.intermediate.dense.bias",
1030
+ "shape": [
1031
+ 3072
1032
+ ],
1033
+ "dtype": "float32",
1034
+ "format": "f32-to-bf16",
1035
+ "nbytes": 6144,
1036
+ "byteOffset": 9446400
1037
+ },
1038
+ {
1039
+ "name": "encoder.layer.5.intermediate.dense.weight",
1040
+ "shape": [
1041
+ 3072,
1042
+ 768
1043
+ ],
1044
+ "dtype": "float32",
1045
+ "format": "f32-to-bf16",
1046
+ "nbytes": 4718592,
1047
+ "byteOffset": 9452544
1048
+ },
1049
+ {
1050
+ "name": "encoder.layer.5.output.LayerNorm.bias",
1051
+ "shape": [
1052
+ 768
1053
+ ],
1054
+ "dtype": "float32",
1055
+ "format": "f32-to-bf16",
1056
+ "nbytes": 1536,
1057
+ "byteOffset": 14171136
1058
+ },
1059
+ {
1060
+ "name": "encoder.layer.5.output.LayerNorm.weight",
1061
+ "shape": [
1062
+ 768
1063
+ ],
1064
+ "dtype": "float32",
1065
+ "format": "f32-to-bf16",
1066
+ "nbytes": 1536,
1067
+ "byteOffset": 14172672
1068
+ },
1069
+ {
1070
+ "name": "encoder.layer.5.output.dense.bias",
1071
+ "shape": [
1072
+ 768
1073
+ ],
1074
+ "dtype": "float32",
1075
+ "format": "f32-to-bf16",
1076
+ "nbytes": 1536,
1077
+ "byteOffset": 14174208
1078
+ },
1079
+ {
1080
+ "name": "encoder.layer.5.output.dense.weight",
1081
+ "shape": [
1082
+ 768,
1083
+ 3072
1084
+ ],
1085
+ "dtype": "float32",
1086
+ "format": "f32-to-bf16",
1087
+ "nbytes": 4718592,
1088
+ "byteOffset": 14175744
1089
+ },
1090
+ {
1091
+ "name": "encoder.layer.6.attention.output.LayerNorm.bias",
1092
+ "shape": [
1093
+ 768
1094
+ ],
1095
+ "dtype": "float32",
1096
+ "format": "f32-to-bf16",
1097
+ "nbytes": 1536,
1098
+ "byteOffset": 18894336
1099
+ },
1100
+ {
1101
+ "name": "encoder.layer.6.attention.output.LayerNorm.weight",
1102
+ "shape": [
1103
+ 768
1104
+ ],
1105
+ "dtype": "float32",
1106
+ "format": "f32-to-bf16",
1107
+ "nbytes": 1536,
1108
+ "byteOffset": 18895872
1109
+ },
1110
+ {
1111
+ "name": "encoder.layer.6.attention.output.dense.bias",
1112
+ "shape": [
1113
+ 768
1114
+ ],
1115
+ "dtype": "float32",
1116
+ "format": "f32-to-bf16",
1117
+ "nbytes": 1536,
1118
+ "byteOffset": 18897408
1119
+ },
1120
+ {
1121
+ "name": "encoder.layer.6.attention.output.dense.weight",
1122
+ "shape": [
1123
+ 768,
1124
+ 768
1125
+ ],
1126
+ "dtype": "float32",
1127
+ "format": "f32-to-bf16",
1128
+ "nbytes": 1179648,
1129
+ "byteOffset": 18898944
1130
+ },
1131
+ {
1132
+ "name": "encoder.layer.6.attention.self.qkv.bias",
1133
+ "shape": [
1134
+ 2304
1135
+ ],
1136
+ "dtype": "float32",
1137
+ "format": "f32-to-bf16",
1138
+ "nbytes": 4608,
1139
+ "byteOffset": 20078592
1140
+ },
1141
+ {
1142
+ "name": "encoder.layer.6.attention.self.qkv.weight",
1143
+ "shape": [
1144
+ 2304,
1145
+ 768
1146
+ ],
1147
+ "dtype": "float32",
1148
+ "format": "f32-to-bf16",
1149
+ "nbytes": 3538944,
1150
+ "byteOffset": 20083200
1151
+ },
1152
+ {
1153
+ "name": "encoder.layer.6.intermediate.dense.bias",
1154
+ "shape": [
1155
+ 3072
1156
+ ],
1157
+ "dtype": "float32",
1158
+ "format": "f32-to-bf16",
1159
+ "nbytes": 6144,
1160
+ "byteOffset": 23622144
1161
+ },
1162
+ {
1163
+ "name": "encoder.layer.6.intermediate.dense.weight",
1164
+ "shape": [
1165
+ 3072,
1166
+ 768
1167
+ ],
1168
+ "dtype": "float32",
1169
+ "format": "f32-to-bf16",
1170
+ "nbytes": 4718592,
1171
+ "byteOffset": 23628288
1172
+ },
1173
+ {
1174
+ "name": "encoder.layer.6.output.LayerNorm.bias",
1175
+ "shape": [
1176
+ 768
1177
+ ],
1178
+ "dtype": "float32",
1179
+ "format": "f32-to-bf16",
1180
+ "nbytes": 1536,
1181
+ "byteOffset": 28346880
1182
+ },
1183
+ {
1184
+ "name": "encoder.layer.6.output.LayerNorm.weight",
1185
+ "shape": [
1186
+ 768
1187
+ ],
1188
+ "dtype": "float32",
1189
+ "format": "f32-to-bf16",
1190
+ "nbytes": 1536,
1191
+ "byteOffset": 28348416
1192
+ },
1193
+ {
1194
+ "name": "encoder.layer.6.output.dense.bias",
1195
+ "shape": [
1196
+ 768
1197
+ ],
1198
+ "dtype": "float32",
1199
+ "format": "f32-to-bf16",
1200
+ "nbytes": 1536,
1201
+ "byteOffset": 28349952
1202
+ },
1203
+ {
1204
+ "name": "encoder.layer.6.output.dense.weight",
1205
+ "shape": [
1206
+ 768,
1207
+ 3072
1208
+ ],
1209
+ "dtype": "float32",
1210
+ "format": "f32-to-bf16",
1211
+ "nbytes": 4718592,
1212
+ "byteOffset": 28351488
1213
+ },
1214
+ {
1215
+ "name": "encoder.layer.7.attention.output.LayerNorm.bias",
1216
+ "shape": [
1217
+ 768
1218
+ ],
1219
+ "dtype": "float32",
1220
+ "format": "f32-to-bf16",
1221
+ "nbytes": 1536,
1222
+ "byteOffset": 33070080
1223
+ },
1224
+ {
1225
+ "name": "encoder.layer.7.attention.output.LayerNorm.weight",
1226
+ "shape": [
1227
+ 768
1228
+ ],
1229
+ "dtype": "float32",
1230
+ "format": "f32-to-bf16",
1231
+ "nbytes": 1536,
1232
+ "byteOffset": 33071616
1233
+ },
1234
+ {
1235
+ "name": "encoder.layer.7.attention.output.dense.bias",
1236
+ "shape": [
1237
+ 768
1238
+ ],
1239
+ "dtype": "float32",
1240
+ "format": "f32-to-bf16",
1241
+ "nbytes": 1536,
1242
+ "byteOffset": 33073152
1243
+ }
1244
+ ],
1245
+ "md5sum": "9fbad31fed16b934acb1158c464c9774"
1246
+ },
1247
+ {
1248
+ "dataPath": "params_shard_5.bin",
1249
+ "format": "raw-shard",
1250
+ "nbytes": 33080832,
1251
+ "records": [
1252
+ {
1253
+ "name": "encoder.layer.7.attention.output.dense.weight",
1254
+ "shape": [
1255
+ 768,
1256
+ 768
1257
+ ],
1258
+ "dtype": "float32",
1259
+ "format": "f32-to-bf16",
1260
+ "nbytes": 1179648,
1261
+ "byteOffset": 0
1262
+ },
1263
+ {
1264
+ "name": "encoder.layer.7.attention.self.qkv.bias",
1265
+ "shape": [
1266
+ 2304
1267
+ ],
1268
+ "dtype": "float32",
1269
+ "format": "f32-to-bf16",
1270
+ "nbytes": 4608,
1271
+ "byteOffset": 1179648
1272
+ },
1273
+ {
1274
+ "name": "encoder.layer.7.attention.self.qkv.weight",
1275
+ "shape": [
1276
+ 2304,
1277
+ 768
1278
+ ],
1279
+ "dtype": "float32",
1280
+ "format": "f32-to-bf16",
1281
+ "nbytes": 3538944,
1282
+ "byteOffset": 1184256
1283
+ },
1284
+ {
1285
+ "name": "encoder.layer.7.intermediate.dense.bias",
1286
+ "shape": [
1287
+ 3072
1288
+ ],
1289
+ "dtype": "float32",
1290
+ "format": "f32-to-bf16",
1291
+ "nbytes": 6144,
1292
+ "byteOffset": 4723200
1293
+ },
1294
+ {
1295
+ "name": "encoder.layer.7.intermediate.dense.weight",
1296
+ "shape": [
1297
+ 3072,
1298
+ 768
1299
+ ],
1300
+ "dtype": "float32",
1301
+ "format": "f32-to-bf16",
1302
+ "nbytes": 4718592,
1303
+ "byteOffset": 4729344
1304
+ },
1305
+ {
1306
+ "name": "encoder.layer.7.output.LayerNorm.bias",
1307
+ "shape": [
1308
+ 768
1309
+ ],
1310
+ "dtype": "float32",
1311
+ "format": "f32-to-bf16",
1312
+ "nbytes": 1536,
1313
+ "byteOffset": 9447936
1314
+ },
1315
+ {
1316
+ "name": "encoder.layer.7.output.LayerNorm.weight",
1317
+ "shape": [
1318
+ 768
1319
+ ],
1320
+ "dtype": "float32",
1321
+ "format": "f32-to-bf16",
1322
+ "nbytes": 1536,
1323
+ "byteOffset": 9449472
1324
+ },
1325
+ {
1326
+ "name": "encoder.layer.7.output.dense.bias",
1327
+ "shape": [
1328
+ 768
1329
+ ],
1330
+ "dtype": "float32",
1331
+ "format": "f32-to-bf16",
1332
+ "nbytes": 1536,
1333
+ "byteOffset": 9451008
1334
+ },
1335
+ {
1336
+ "name": "encoder.layer.7.output.dense.weight",
1337
+ "shape": [
1338
+ 768,
1339
+ 3072
1340
+ ],
1341
+ "dtype": "float32",
1342
+ "format": "f32-to-bf16",
1343
+ "nbytes": 4718592,
1344
+ "byteOffset": 9452544
1345
+ },
1346
+ {
1347
+ "name": "encoder.layer.8.attention.output.LayerNorm.bias",
1348
+ "shape": [
1349
+ 768
1350
+ ],
1351
+ "dtype": "float32",
1352
+ "format": "f32-to-bf16",
1353
+ "nbytes": 1536,
1354
+ "byteOffset": 14171136
1355
+ },
1356
+ {
1357
+ "name": "encoder.layer.8.attention.output.LayerNorm.weight",
1358
+ "shape": [
1359
+ 768
1360
+ ],
1361
+ "dtype": "float32",
1362
+ "format": "f32-to-bf16",
1363
+ "nbytes": 1536,
1364
+ "byteOffset": 14172672
1365
+ },
1366
+ {
1367
+ "name": "encoder.layer.8.attention.output.dense.bias",
1368
+ "shape": [
1369
+ 768
1370
+ ],
1371
+ "dtype": "float32",
1372
+ "format": "f32-to-bf16",
1373
+ "nbytes": 1536,
1374
+ "byteOffset": 14174208
1375
+ },
1376
+ {
1377
+ "name": "encoder.layer.8.attention.output.dense.weight",
1378
+ "shape": [
1379
+ 768,
1380
+ 768
1381
+ ],
1382
+ "dtype": "float32",
1383
+ "format": "f32-to-bf16",
1384
+ "nbytes": 1179648,
1385
+ "byteOffset": 14175744
1386
+ },
1387
+ {
1388
+ "name": "encoder.layer.8.attention.self.qkv.bias",
1389
+ "shape": [
1390
+ 2304
1391
+ ],
1392
+ "dtype": "float32",
1393
+ "format": "f32-to-bf16",
1394
+ "nbytes": 4608,
1395
+ "byteOffset": 15355392
1396
+ },
1397
+ {
1398
+ "name": "encoder.layer.8.attention.self.qkv.weight",
1399
+ "shape": [
1400
+ 2304,
1401
+ 768
1402
+ ],
1403
+ "dtype": "float32",
1404
+ "format": "f32-to-bf16",
1405
+ "nbytes": 3538944,
1406
+ "byteOffset": 15360000
1407
+ },
1408
+ {
1409
+ "name": "encoder.layer.8.intermediate.dense.bias",
1410
+ "shape": [
1411
+ 3072
1412
+ ],
1413
+ "dtype": "float32",
1414
+ "format": "f32-to-bf16",
1415
+ "nbytes": 6144,
1416
+ "byteOffset": 18898944
1417
+ },
1418
+ {
1419
+ "name": "encoder.layer.8.intermediate.dense.weight",
1420
+ "shape": [
1421
+ 3072,
1422
+ 768
1423
+ ],
1424
+ "dtype": "float32",
1425
+ "format": "f32-to-bf16",
1426
+ "nbytes": 4718592,
1427
+ "byteOffset": 18905088
1428
+ },
1429
+ {
1430
+ "name": "encoder.layer.8.output.LayerNorm.bias",
1431
+ "shape": [
1432
+ 768
1433
+ ],
1434
+ "dtype": "float32",
1435
+ "format": "f32-to-bf16",
1436
+ "nbytes": 1536,
1437
+ "byteOffset": 23623680
1438
+ },
1439
+ {
1440
+ "name": "encoder.layer.8.output.LayerNorm.weight",
1441
+ "shape": [
1442
+ 768
1443
+ ],
1444
+ "dtype": "float32",
1445
+ "format": "f32-to-bf16",
1446
+ "nbytes": 1536,
1447
+ "byteOffset": 23625216
1448
+ },
1449
+ {
1450
+ "name": "encoder.layer.8.output.dense.bias",
1451
+ "shape": [
1452
+ 768
1453
+ ],
1454
+ "dtype": "float32",
1455
+ "format": "f32-to-bf16",
1456
+ "nbytes": 1536,
1457
+ "byteOffset": 23626752
1458
+ },
1459
+ {
1460
+ "name": "encoder.layer.8.output.dense.weight",
1461
+ "shape": [
1462
+ 768,
1463
+ 3072
1464
+ ],
1465
+ "dtype": "float32",
1466
+ "format": "f32-to-bf16",
1467
+ "nbytes": 4718592,
1468
+ "byteOffset": 23628288
1469
+ },
1470
+ {
1471
+ "name": "encoder.layer.9.attention.output.LayerNorm.bias",
1472
+ "shape": [
1473
+ 768
1474
+ ],
1475
+ "dtype": "float32",
1476
+ "format": "f32-to-bf16",
1477
+ "nbytes": 1536,
1478
+ "byteOffset": 28346880
1479
+ },
1480
+ {
1481
+ "name": "encoder.layer.9.attention.output.LayerNorm.weight",
1482
+ "shape": [
1483
+ 768
1484
+ ],
1485
+ "dtype": "float32",
1486
+ "format": "f32-to-bf16",
1487
+ "nbytes": 1536,
1488
+ "byteOffset": 28348416
1489
+ },
1490
+ {
1491
+ "name": "encoder.layer.9.attention.output.dense.bias",
1492
+ "shape": [
1493
+ 768
1494
+ ],
1495
+ "dtype": "float32",
1496
+ "format": "f32-to-bf16",
1497
+ "nbytes": 1536,
1498
+ "byteOffset": 28349952
1499
+ },
1500
+ {
1501
+ "name": "encoder.layer.9.attention.output.dense.weight",
1502
+ "shape": [
1503
+ 768,
1504
+ 768
1505
+ ],
1506
+ "dtype": "float32",
1507
+ "format": "f32-to-bf16",
1508
+ "nbytes": 1179648,
1509
+ "byteOffset": 28351488
1510
+ },
1511
+ {
1512
+ "name": "encoder.layer.9.attention.self.qkv.bias",
1513
+ "shape": [
1514
+ 2304
1515
+ ],
1516
+ "dtype": "float32",
1517
+ "format": "f32-to-bf16",
1518
+ "nbytes": 4608,
1519
+ "byteOffset": 29531136
1520
+ },
1521
+ {
1522
+ "name": "encoder.layer.9.attention.self.qkv.weight",
1523
+ "shape": [
1524
+ 2304,
1525
+ 768
1526
+ ],
1527
+ "dtype": "float32",
1528
+ "format": "f32-to-bf16",
1529
+ "nbytes": 3538944,
1530
+ "byteOffset": 29535744
1531
+ },
1532
+ {
1533
+ "name": "encoder.layer.9.intermediate.dense.bias",
1534
+ "shape": [
1535
+ 3072
1536
+ ],
1537
+ "dtype": "float32",
1538
+ "format": "f32-to-bf16",
1539
+ "nbytes": 6144,
1540
+ "byteOffset": 33074688
1541
+ }
1542
+ ],
1543
+ "md5sum": "7a49f1f4cfd562bb32cb568fc622b50e"
1544
+ },
1545
+ {
1546
+ "dataPath": "params_shard_6.bin",
1547
+ "format": "raw-shard",
1548
+ "nbytes": 9441792,
1549
+ "records": [
1550
+ {
1551
+ "name": "encoder.layer.9.intermediate.dense.weight",
1552
+ "shape": [
1553
+ 3072,
1554
+ 768
1555
+ ],
1556
+ "dtype": "float32",
1557
+ "format": "f32-to-bf16",
1558
+ "nbytes": 4718592,
1559
+ "byteOffset": 0
1560
+ },
1561
+ {
1562
+ "name": "encoder.layer.9.output.LayerNorm.bias",
1563
+ "shape": [
1564
+ 768
1565
+ ],
1566
+ "dtype": "float32",
1567
+ "format": "f32-to-bf16",
1568
+ "nbytes": 1536,
1569
+ "byteOffset": 4718592
1570
+ },
1571
+ {
1572
+ "name": "encoder.layer.9.output.LayerNorm.weight",
1573
+ "shape": [
1574
+ 768
1575
+ ],
1576
+ "dtype": "float32",
1577
+ "format": "f32-to-bf16",
1578
+ "nbytes": 1536,
1579
+ "byteOffset": 4720128
1580
+ },
1581
+ {
1582
+ "name": "encoder.layer.9.output.dense.bias",
1583
+ "shape": [
1584
+ 768
1585
+ ],
1586
+ "dtype": "float32",
1587
+ "format": "f32-to-bf16",
1588
+ "nbytes": 1536,
1589
+ "byteOffset": 4721664
1590
+ },
1591
+ {
1592
+ "name": "encoder.layer.9.output.dense.weight",
1593
+ "shape": [
1594
+ 768,
1595
+ 3072
1596
+ ],
1597
+ "dtype": "float32",
1598
+ "format": "f32-to-bf16",
1599
+ "nbytes": 4718592,
1600
+ "byteOffset": 4723200
1601
+ }
1602
+ ],
1603
+ "md5sum": "2d4a5c41b3ca6eab2b402b5731cbc3cf"
1604
+ }
1605
+ ]
1606
+ }
params_shard_0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f69c47a44fe73a278da2f4831922de8d6affd8817ca93599feff02924256a396
3
+ size 46881792
params_shard_1.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08e793328340075cf0c1fb67a07705609daa428bdefba3b89a9b6440efa23bd3
3
+ size 30332928
params_shard_2.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7de55b0183d1b0d51731962008ad0c529e51b42e4f1459c4a8328904c5361f3d
3
+ size 31896576
params_shard_3.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2dc3b5620241e1310a88f0f21f1eba0e866b75995b763718684b4dcb5c0b3cb
3
+ size 33074688
params_shard_4.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f9c5f6b075ed33870a4e9f833a960e1368ae57c4a36c4b45ca324ce4ad60b5e
3
+ size 33074688
params_shard_5.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ee8bf4b86c3ee6bcf2e63aed4ee09ed5ebb874193264d2d25606ac017447576
3
+ size 33080832
params_shard_6.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec281311e092824063785b1d51fb6a36d7b39dd6df1a9aea8f1a135a87b48e0d
3
+ size 9441792
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "mask_token": "[MASK]",
48
+ "max_length": 512,
49
+ "model_max_length": 512,
50
+ "pad_to_multiple_of": null,
51
+ "pad_token": "[PAD]",
52
+ "pad_token_type_id": 0,
53
+ "padding_side": "right",
54
+ "sep_token": "[SEP]",
55
+ "stride": 0,
56
+ "strip_accents": null,
57
+ "tokenize_chinese_chars": true,
58
+ "tokenizer_class": "BertTokenizer",
59
+ "truncation_side": "right",
60
+ "truncation_strategy": "longest_first",
61
+ "unk_token": "[UNK]"
62
+ }