Lumi-node commited on
Commit
755cb78
·
verified ·
1 Parent(s): 1370bb3

Upload spec.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. spec.json +715 -0
spec.json ADDED
@@ -0,0 +1,715 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_id": "gpt2",
3
+ "family": "gpt2",
4
+ "hidden_dim": 768,
5
+ "num_layers": 12,
6
+ "num_heads": 12,
7
+ "vocab_size": 50257,
8
+ "max_seq_len": 1024,
9
+ "parts": {
10
+ "embedding": {
11
+ "part_type": "embedding",
12
+ "layer_idx": null,
13
+ "module_path": "transformer.wte",
14
+ "input_dim": 50257,
15
+ "output_dim": 768,
16
+ "num_heads": null,
17
+ "head_dim": null,
18
+ "intermediate_dim": null,
19
+ "extra_info": {}
20
+ },
21
+ "pos_embedding": {
22
+ "part_type": "embedding",
23
+ "layer_idx": null,
24
+ "module_path": "transformer.wpe",
25
+ "input_dim": 1024,
26
+ "output_dim": 768,
27
+ "num_heads": null,
28
+ "head_dim": null,
29
+ "intermediate_dim": null,
30
+ "extra_info": {}
31
+ },
32
+ "attention_0": {
33
+ "part_type": "attention",
34
+ "layer_idx": 0,
35
+ "module_path": "transformer.h.0.attn",
36
+ "input_dim": 768,
37
+ "output_dim": 768,
38
+ "num_heads": 12,
39
+ "head_dim": 64,
40
+ "intermediate_dim": null,
41
+ "extra_info": {}
42
+ },
43
+ "ffn_0": {
44
+ "part_type": "ffn",
45
+ "layer_idx": 0,
46
+ "module_path": "transformer.h.0.mlp",
47
+ "input_dim": 768,
48
+ "output_dim": 768,
49
+ "num_heads": null,
50
+ "head_dim": null,
51
+ "intermediate_dim": 3072,
52
+ "extra_info": {}
53
+ },
54
+ "ln_1_0": {
55
+ "part_type": "layer_norm",
56
+ "layer_idx": 0,
57
+ "module_path": "transformer.h.0.ln_1",
58
+ "input_dim": 768,
59
+ "output_dim": 768,
60
+ "num_heads": null,
61
+ "head_dim": null,
62
+ "intermediate_dim": null,
63
+ "extra_info": {}
64
+ },
65
+ "ln_2_0": {
66
+ "part_type": "layer_norm",
67
+ "layer_idx": 0,
68
+ "module_path": "transformer.h.0.ln_2",
69
+ "input_dim": 768,
70
+ "output_dim": 768,
71
+ "num_heads": null,
72
+ "head_dim": null,
73
+ "intermediate_dim": null,
74
+ "extra_info": {}
75
+ },
76
+ "layer_0": {
77
+ "part_type": "full_layer",
78
+ "layer_idx": 0,
79
+ "module_path": "transformer.h.0",
80
+ "input_dim": 768,
81
+ "output_dim": 768,
82
+ "num_heads": 12,
83
+ "head_dim": null,
84
+ "intermediate_dim": 3072,
85
+ "extra_info": {}
86
+ },
87
+ "attention_1": {
88
+ "part_type": "attention",
89
+ "layer_idx": 1,
90
+ "module_path": "transformer.h.1.attn",
91
+ "input_dim": 768,
92
+ "output_dim": 768,
93
+ "num_heads": 12,
94
+ "head_dim": 64,
95
+ "intermediate_dim": null,
96
+ "extra_info": {}
97
+ },
98
+ "ffn_1": {
99
+ "part_type": "ffn",
100
+ "layer_idx": 1,
101
+ "module_path": "transformer.h.1.mlp",
102
+ "input_dim": 768,
103
+ "output_dim": 768,
104
+ "num_heads": null,
105
+ "head_dim": null,
106
+ "intermediate_dim": 3072,
107
+ "extra_info": {}
108
+ },
109
+ "ln_1_1": {
110
+ "part_type": "layer_norm",
111
+ "layer_idx": 1,
112
+ "module_path": "transformer.h.1.ln_1",
113
+ "input_dim": 768,
114
+ "output_dim": 768,
115
+ "num_heads": null,
116
+ "head_dim": null,
117
+ "intermediate_dim": null,
118
+ "extra_info": {}
119
+ },
120
+ "ln_2_1": {
121
+ "part_type": "layer_norm",
122
+ "layer_idx": 1,
123
+ "module_path": "transformer.h.1.ln_2",
124
+ "input_dim": 768,
125
+ "output_dim": 768,
126
+ "num_heads": null,
127
+ "head_dim": null,
128
+ "intermediate_dim": null,
129
+ "extra_info": {}
130
+ },
131
+ "layer_1": {
132
+ "part_type": "full_layer",
133
+ "layer_idx": 1,
134
+ "module_path": "transformer.h.1",
135
+ "input_dim": 768,
136
+ "output_dim": 768,
137
+ "num_heads": 12,
138
+ "head_dim": null,
139
+ "intermediate_dim": 3072,
140
+ "extra_info": {}
141
+ },
142
+ "attention_2": {
143
+ "part_type": "attention",
144
+ "layer_idx": 2,
145
+ "module_path": "transformer.h.2.attn",
146
+ "input_dim": 768,
147
+ "output_dim": 768,
148
+ "num_heads": 12,
149
+ "head_dim": 64,
150
+ "intermediate_dim": null,
151
+ "extra_info": {}
152
+ },
153
+ "ffn_2": {
154
+ "part_type": "ffn",
155
+ "layer_idx": 2,
156
+ "module_path": "transformer.h.2.mlp",
157
+ "input_dim": 768,
158
+ "output_dim": 768,
159
+ "num_heads": null,
160
+ "head_dim": null,
161
+ "intermediate_dim": 3072,
162
+ "extra_info": {}
163
+ },
164
+ "ln_1_2": {
165
+ "part_type": "layer_norm",
166
+ "layer_idx": 2,
167
+ "module_path": "transformer.h.2.ln_1",
168
+ "input_dim": 768,
169
+ "output_dim": 768,
170
+ "num_heads": null,
171
+ "head_dim": null,
172
+ "intermediate_dim": null,
173
+ "extra_info": {}
174
+ },
175
+ "ln_2_2": {
176
+ "part_type": "layer_norm",
177
+ "layer_idx": 2,
178
+ "module_path": "transformer.h.2.ln_2",
179
+ "input_dim": 768,
180
+ "output_dim": 768,
181
+ "num_heads": null,
182
+ "head_dim": null,
183
+ "intermediate_dim": null,
184
+ "extra_info": {}
185
+ },
186
+ "layer_2": {
187
+ "part_type": "full_layer",
188
+ "layer_idx": 2,
189
+ "module_path": "transformer.h.2",
190
+ "input_dim": 768,
191
+ "output_dim": 768,
192
+ "num_heads": 12,
193
+ "head_dim": null,
194
+ "intermediate_dim": 3072,
195
+ "extra_info": {}
196
+ },
197
+ "attention_3": {
198
+ "part_type": "attention",
199
+ "layer_idx": 3,
200
+ "module_path": "transformer.h.3.attn",
201
+ "input_dim": 768,
202
+ "output_dim": 768,
203
+ "num_heads": 12,
204
+ "head_dim": 64,
205
+ "intermediate_dim": null,
206
+ "extra_info": {}
207
+ },
208
+ "ffn_3": {
209
+ "part_type": "ffn",
210
+ "layer_idx": 3,
211
+ "module_path": "transformer.h.3.mlp",
212
+ "input_dim": 768,
213
+ "output_dim": 768,
214
+ "num_heads": null,
215
+ "head_dim": null,
216
+ "intermediate_dim": 3072,
217
+ "extra_info": {}
218
+ },
219
+ "ln_1_3": {
220
+ "part_type": "layer_norm",
221
+ "layer_idx": 3,
222
+ "module_path": "transformer.h.3.ln_1",
223
+ "input_dim": 768,
224
+ "output_dim": 768,
225
+ "num_heads": null,
226
+ "head_dim": null,
227
+ "intermediate_dim": null,
228
+ "extra_info": {}
229
+ },
230
+ "ln_2_3": {
231
+ "part_type": "layer_norm",
232
+ "layer_idx": 3,
233
+ "module_path": "transformer.h.3.ln_2",
234
+ "input_dim": 768,
235
+ "output_dim": 768,
236
+ "num_heads": null,
237
+ "head_dim": null,
238
+ "intermediate_dim": null,
239
+ "extra_info": {}
240
+ },
241
+ "layer_3": {
242
+ "part_type": "full_layer",
243
+ "layer_idx": 3,
244
+ "module_path": "transformer.h.3",
245
+ "input_dim": 768,
246
+ "output_dim": 768,
247
+ "num_heads": 12,
248
+ "head_dim": null,
249
+ "intermediate_dim": 3072,
250
+ "extra_info": {}
251
+ },
252
+ "attention_4": {
253
+ "part_type": "attention",
254
+ "layer_idx": 4,
255
+ "module_path": "transformer.h.4.attn",
256
+ "input_dim": 768,
257
+ "output_dim": 768,
258
+ "num_heads": 12,
259
+ "head_dim": 64,
260
+ "intermediate_dim": null,
261
+ "extra_info": {}
262
+ },
263
+ "ffn_4": {
264
+ "part_type": "ffn",
265
+ "layer_idx": 4,
266
+ "module_path": "transformer.h.4.mlp",
267
+ "input_dim": 768,
268
+ "output_dim": 768,
269
+ "num_heads": null,
270
+ "head_dim": null,
271
+ "intermediate_dim": 3072,
272
+ "extra_info": {}
273
+ },
274
+ "ln_1_4": {
275
+ "part_type": "layer_norm",
276
+ "layer_idx": 4,
277
+ "module_path": "transformer.h.4.ln_1",
278
+ "input_dim": 768,
279
+ "output_dim": 768,
280
+ "num_heads": null,
281
+ "head_dim": null,
282
+ "intermediate_dim": null,
283
+ "extra_info": {}
284
+ },
285
+ "ln_2_4": {
286
+ "part_type": "layer_norm",
287
+ "layer_idx": 4,
288
+ "module_path": "transformer.h.4.ln_2",
289
+ "input_dim": 768,
290
+ "output_dim": 768,
291
+ "num_heads": null,
292
+ "head_dim": null,
293
+ "intermediate_dim": null,
294
+ "extra_info": {}
295
+ },
296
+ "layer_4": {
297
+ "part_type": "full_layer",
298
+ "layer_idx": 4,
299
+ "module_path": "transformer.h.4",
300
+ "input_dim": 768,
301
+ "output_dim": 768,
302
+ "num_heads": 12,
303
+ "head_dim": null,
304
+ "intermediate_dim": 3072,
305
+ "extra_info": {}
306
+ },
307
+ "attention_5": {
308
+ "part_type": "attention",
309
+ "layer_idx": 5,
310
+ "module_path": "transformer.h.5.attn",
311
+ "input_dim": 768,
312
+ "output_dim": 768,
313
+ "num_heads": 12,
314
+ "head_dim": 64,
315
+ "intermediate_dim": null,
316
+ "extra_info": {}
317
+ },
318
+ "ffn_5": {
319
+ "part_type": "ffn",
320
+ "layer_idx": 5,
321
+ "module_path": "transformer.h.5.mlp",
322
+ "input_dim": 768,
323
+ "output_dim": 768,
324
+ "num_heads": null,
325
+ "head_dim": null,
326
+ "intermediate_dim": 3072,
327
+ "extra_info": {}
328
+ },
329
+ "ln_1_5": {
330
+ "part_type": "layer_norm",
331
+ "layer_idx": 5,
332
+ "module_path": "transformer.h.5.ln_1",
333
+ "input_dim": 768,
334
+ "output_dim": 768,
335
+ "num_heads": null,
336
+ "head_dim": null,
337
+ "intermediate_dim": null,
338
+ "extra_info": {}
339
+ },
340
+ "ln_2_5": {
341
+ "part_type": "layer_norm",
342
+ "layer_idx": 5,
343
+ "module_path": "transformer.h.5.ln_2",
344
+ "input_dim": 768,
345
+ "output_dim": 768,
346
+ "num_heads": null,
347
+ "head_dim": null,
348
+ "intermediate_dim": null,
349
+ "extra_info": {}
350
+ },
351
+ "layer_5": {
352
+ "part_type": "full_layer",
353
+ "layer_idx": 5,
354
+ "module_path": "transformer.h.5",
355
+ "input_dim": 768,
356
+ "output_dim": 768,
357
+ "num_heads": 12,
358
+ "head_dim": null,
359
+ "intermediate_dim": 3072,
360
+ "extra_info": {}
361
+ },
362
+ "attention_6": {
363
+ "part_type": "attention",
364
+ "layer_idx": 6,
365
+ "module_path": "transformer.h.6.attn",
366
+ "input_dim": 768,
367
+ "output_dim": 768,
368
+ "num_heads": 12,
369
+ "head_dim": 64,
370
+ "intermediate_dim": null,
371
+ "extra_info": {}
372
+ },
373
+ "ffn_6": {
374
+ "part_type": "ffn",
375
+ "layer_idx": 6,
376
+ "module_path": "transformer.h.6.mlp",
377
+ "input_dim": 768,
378
+ "output_dim": 768,
379
+ "num_heads": null,
380
+ "head_dim": null,
381
+ "intermediate_dim": 3072,
382
+ "extra_info": {}
383
+ },
384
+ "ln_1_6": {
385
+ "part_type": "layer_norm",
386
+ "layer_idx": 6,
387
+ "module_path": "transformer.h.6.ln_1",
388
+ "input_dim": 768,
389
+ "output_dim": 768,
390
+ "num_heads": null,
391
+ "head_dim": null,
392
+ "intermediate_dim": null,
393
+ "extra_info": {}
394
+ },
395
+ "ln_2_6": {
396
+ "part_type": "layer_norm",
397
+ "layer_idx": 6,
398
+ "module_path": "transformer.h.6.ln_2",
399
+ "input_dim": 768,
400
+ "output_dim": 768,
401
+ "num_heads": null,
402
+ "head_dim": null,
403
+ "intermediate_dim": null,
404
+ "extra_info": {}
405
+ },
406
+ "layer_6": {
407
+ "part_type": "full_layer",
408
+ "layer_idx": 6,
409
+ "module_path": "transformer.h.6",
410
+ "input_dim": 768,
411
+ "output_dim": 768,
412
+ "num_heads": 12,
413
+ "head_dim": null,
414
+ "intermediate_dim": 3072,
415
+ "extra_info": {}
416
+ },
417
+ "attention_7": {
418
+ "part_type": "attention",
419
+ "layer_idx": 7,
420
+ "module_path": "transformer.h.7.attn",
421
+ "input_dim": 768,
422
+ "output_dim": 768,
423
+ "num_heads": 12,
424
+ "head_dim": 64,
425
+ "intermediate_dim": null,
426
+ "extra_info": {}
427
+ },
428
+ "ffn_7": {
429
+ "part_type": "ffn",
430
+ "layer_idx": 7,
431
+ "module_path": "transformer.h.7.mlp",
432
+ "input_dim": 768,
433
+ "output_dim": 768,
434
+ "num_heads": null,
435
+ "head_dim": null,
436
+ "intermediate_dim": 3072,
437
+ "extra_info": {}
438
+ },
439
+ "ln_1_7": {
440
+ "part_type": "layer_norm",
441
+ "layer_idx": 7,
442
+ "module_path": "transformer.h.7.ln_1",
443
+ "input_dim": 768,
444
+ "output_dim": 768,
445
+ "num_heads": null,
446
+ "head_dim": null,
447
+ "intermediate_dim": null,
448
+ "extra_info": {}
449
+ },
450
+ "ln_2_7": {
451
+ "part_type": "layer_norm",
452
+ "layer_idx": 7,
453
+ "module_path": "transformer.h.7.ln_2",
454
+ "input_dim": 768,
455
+ "output_dim": 768,
456
+ "num_heads": null,
457
+ "head_dim": null,
458
+ "intermediate_dim": null,
459
+ "extra_info": {}
460
+ },
461
+ "layer_7": {
462
+ "part_type": "full_layer",
463
+ "layer_idx": 7,
464
+ "module_path": "transformer.h.7",
465
+ "input_dim": 768,
466
+ "output_dim": 768,
467
+ "num_heads": 12,
468
+ "head_dim": null,
469
+ "intermediate_dim": 3072,
470
+ "extra_info": {}
471
+ },
472
+ "attention_8": {
473
+ "part_type": "attention",
474
+ "layer_idx": 8,
475
+ "module_path": "transformer.h.8.attn",
476
+ "input_dim": 768,
477
+ "output_dim": 768,
478
+ "num_heads": 12,
479
+ "head_dim": 64,
480
+ "intermediate_dim": null,
481
+ "extra_info": {}
482
+ },
483
+ "ffn_8": {
484
+ "part_type": "ffn",
485
+ "layer_idx": 8,
486
+ "module_path": "transformer.h.8.mlp",
487
+ "input_dim": 768,
488
+ "output_dim": 768,
489
+ "num_heads": null,
490
+ "head_dim": null,
491
+ "intermediate_dim": 3072,
492
+ "extra_info": {}
493
+ },
494
+ "ln_1_8": {
495
+ "part_type": "layer_norm",
496
+ "layer_idx": 8,
497
+ "module_path": "transformer.h.8.ln_1",
498
+ "input_dim": 768,
499
+ "output_dim": 768,
500
+ "num_heads": null,
501
+ "head_dim": null,
502
+ "intermediate_dim": null,
503
+ "extra_info": {}
504
+ },
505
+ "ln_2_8": {
506
+ "part_type": "layer_norm",
507
+ "layer_idx": 8,
508
+ "module_path": "transformer.h.8.ln_2",
509
+ "input_dim": 768,
510
+ "output_dim": 768,
511
+ "num_heads": null,
512
+ "head_dim": null,
513
+ "intermediate_dim": null,
514
+ "extra_info": {}
515
+ },
516
+ "layer_8": {
517
+ "part_type": "full_layer",
518
+ "layer_idx": 8,
519
+ "module_path": "transformer.h.8",
520
+ "input_dim": 768,
521
+ "output_dim": 768,
522
+ "num_heads": 12,
523
+ "head_dim": null,
524
+ "intermediate_dim": 3072,
525
+ "extra_info": {}
526
+ },
527
+ "attention_9": {
528
+ "part_type": "attention",
529
+ "layer_idx": 9,
530
+ "module_path": "transformer.h.9.attn",
531
+ "input_dim": 768,
532
+ "output_dim": 768,
533
+ "num_heads": 12,
534
+ "head_dim": 64,
535
+ "intermediate_dim": null,
536
+ "extra_info": {}
537
+ },
538
+ "ffn_9": {
539
+ "part_type": "ffn",
540
+ "layer_idx": 9,
541
+ "module_path": "transformer.h.9.mlp",
542
+ "input_dim": 768,
543
+ "output_dim": 768,
544
+ "num_heads": null,
545
+ "head_dim": null,
546
+ "intermediate_dim": 3072,
547
+ "extra_info": {}
548
+ },
549
+ "ln_1_9": {
550
+ "part_type": "layer_norm",
551
+ "layer_idx": 9,
552
+ "module_path": "transformer.h.9.ln_1",
553
+ "input_dim": 768,
554
+ "output_dim": 768,
555
+ "num_heads": null,
556
+ "head_dim": null,
557
+ "intermediate_dim": null,
558
+ "extra_info": {}
559
+ },
560
+ "ln_2_9": {
561
+ "part_type": "layer_norm",
562
+ "layer_idx": 9,
563
+ "module_path": "transformer.h.9.ln_2",
564
+ "input_dim": 768,
565
+ "output_dim": 768,
566
+ "num_heads": null,
567
+ "head_dim": null,
568
+ "intermediate_dim": null,
569
+ "extra_info": {}
570
+ },
571
+ "layer_9": {
572
+ "part_type": "full_layer",
573
+ "layer_idx": 9,
574
+ "module_path": "transformer.h.9",
575
+ "input_dim": 768,
576
+ "output_dim": 768,
577
+ "num_heads": 12,
578
+ "head_dim": null,
579
+ "intermediate_dim": 3072,
580
+ "extra_info": {}
581
+ },
582
+ "attention_10": {
583
+ "part_type": "attention",
584
+ "layer_idx": 10,
585
+ "module_path": "transformer.h.10.attn",
586
+ "input_dim": 768,
587
+ "output_dim": 768,
588
+ "num_heads": 12,
589
+ "head_dim": 64,
590
+ "intermediate_dim": null,
591
+ "extra_info": {}
592
+ },
593
+ "ffn_10": {
594
+ "part_type": "ffn",
595
+ "layer_idx": 10,
596
+ "module_path": "transformer.h.10.mlp",
597
+ "input_dim": 768,
598
+ "output_dim": 768,
599
+ "num_heads": null,
600
+ "head_dim": null,
601
+ "intermediate_dim": 3072,
602
+ "extra_info": {}
603
+ },
604
+ "ln_1_10": {
605
+ "part_type": "layer_norm",
606
+ "layer_idx": 10,
607
+ "module_path": "transformer.h.10.ln_1",
608
+ "input_dim": 768,
609
+ "output_dim": 768,
610
+ "num_heads": null,
611
+ "head_dim": null,
612
+ "intermediate_dim": null,
613
+ "extra_info": {}
614
+ },
615
+ "ln_2_10": {
616
+ "part_type": "layer_norm",
617
+ "layer_idx": 10,
618
+ "module_path": "transformer.h.10.ln_2",
619
+ "input_dim": 768,
620
+ "output_dim": 768,
621
+ "num_heads": null,
622
+ "head_dim": null,
623
+ "intermediate_dim": null,
624
+ "extra_info": {}
625
+ },
626
+ "layer_10": {
627
+ "part_type": "full_layer",
628
+ "layer_idx": 10,
629
+ "module_path": "transformer.h.10",
630
+ "input_dim": 768,
631
+ "output_dim": 768,
632
+ "num_heads": 12,
633
+ "head_dim": null,
634
+ "intermediate_dim": 3072,
635
+ "extra_info": {}
636
+ },
637
+ "attention_11": {
638
+ "part_type": "attention",
639
+ "layer_idx": 11,
640
+ "module_path": "transformer.h.11.attn",
641
+ "input_dim": 768,
642
+ "output_dim": 768,
643
+ "num_heads": 12,
644
+ "head_dim": 64,
645
+ "intermediate_dim": null,
646
+ "extra_info": {}
647
+ },
648
+ "ffn_11": {
649
+ "part_type": "ffn",
650
+ "layer_idx": 11,
651
+ "module_path": "transformer.h.11.mlp",
652
+ "input_dim": 768,
653
+ "output_dim": 768,
654
+ "num_heads": null,
655
+ "head_dim": null,
656
+ "intermediate_dim": 3072,
657
+ "extra_info": {}
658
+ },
659
+ "ln_1_11": {
660
+ "part_type": "layer_norm",
661
+ "layer_idx": 11,
662
+ "module_path": "transformer.h.11.ln_1",
663
+ "input_dim": 768,
664
+ "output_dim": 768,
665
+ "num_heads": null,
666
+ "head_dim": null,
667
+ "intermediate_dim": null,
668
+ "extra_info": {}
669
+ },
670
+ "ln_2_11": {
671
+ "part_type": "layer_norm",
672
+ "layer_idx": 11,
673
+ "module_path": "transformer.h.11.ln_2",
674
+ "input_dim": 768,
675
+ "output_dim": 768,
676
+ "num_heads": null,
677
+ "head_dim": null,
678
+ "intermediate_dim": null,
679
+ "extra_info": {}
680
+ },
681
+ "layer_11": {
682
+ "part_type": "full_layer",
683
+ "layer_idx": 11,
684
+ "module_path": "transformer.h.11",
685
+ "input_dim": 768,
686
+ "output_dim": 768,
687
+ "num_heads": 12,
688
+ "head_dim": null,
689
+ "intermediate_dim": 3072,
690
+ "extra_info": {}
691
+ },
692
+ "ln_f": {
693
+ "part_type": "layer_norm",
694
+ "layer_idx": null,
695
+ "module_path": "transformer.ln_f",
696
+ "input_dim": 768,
697
+ "output_dim": 768,
698
+ "num_heads": null,
699
+ "head_dim": null,
700
+ "intermediate_dim": null,
701
+ "extra_info": {}
702
+ },
703
+ "output_head": {
704
+ "part_type": "output_head",
705
+ "layer_idx": null,
706
+ "module_path": "lm_head",
707
+ "input_dim": 768,
708
+ "output_dim": 50257,
709
+ "num_heads": null,
710
+ "head_dim": null,
711
+ "intermediate_dim": null,
712
+ "extra_info": {}
713
+ }
714
+ }
715
+ }