File size: 5,088 Bytes
c25c3d2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
,linear_id,shape,param_count,nnz_count
0,bert.encoder.layer.0.attention.self.query,"[768, 768]",589824,1757
1,bert.encoder.layer.0.attention.self.key,"[768, 768]",589824,1800
2,bert.encoder.layer.0.attention.self.value,"[768, 768]",589824,5416
3,bert.encoder.layer.0.attention.output.dense,"[768, 768]",589824,7845
4,bert.encoder.layer.0.intermediate.dense,"[3072, 768]",2359296,701532
5,bert.encoder.layer.0.output.dense,"[768, 3072]",2359296,591365
6,bert.encoder.layer.1.attention.self.query,"[768, 768]",589824,27422
7,bert.encoder.layer.1.attention.self.key,"[768, 768]",589824,27291
8,bert.encoder.layer.1.attention.self.value,"[768, 768]",589824,22259
9,bert.encoder.layer.1.attention.output.dense,"[768, 768]",589824,27236
10,bert.encoder.layer.1.intermediate.dense,"[3072, 768]",2359296,775297
11,bert.encoder.layer.1.output.dense,"[768, 3072]",2359296,599687
12,bert.encoder.layer.2.attention.self.query,"[768, 768]",589824,31292
13,bert.encoder.layer.2.attention.self.key,"[768, 768]",589824,30911
14,bert.encoder.layer.2.attention.self.value,"[768, 768]",589824,24690
15,bert.encoder.layer.2.attention.output.dense,"[768, 768]",589824,24902
16,bert.encoder.layer.2.intermediate.dense,"[3072, 768]",2359296,804492
17,bert.encoder.layer.2.output.dense,"[768, 3072]",2359296,606736
18,bert.encoder.layer.3.attention.self.query,"[768, 768]",589824,35285
19,bert.encoder.layer.3.attention.self.key,"[768, 768]",589824,38990
20,bert.encoder.layer.3.attention.self.value,"[768, 768]",589824,38379
21,bert.encoder.layer.3.attention.output.dense,"[768, 768]",589824,39416
22,bert.encoder.layer.3.intermediate.dense,"[3072, 768]",2359296,803280
23,bert.encoder.layer.3.output.dense,"[768, 3072]",2359296,592789
24,bert.encoder.layer.4.attention.self.query,"[768, 768]",589824,28239
25,bert.encoder.layer.4.attention.self.key,"[768, 768]",589824,30768
26,bert.encoder.layer.4.attention.self.value,"[768, 768]",589824,39479
27,bert.encoder.layer.4.attention.output.dense,"[768, 768]",589824,39703
28,bert.encoder.layer.4.intermediate.dense,"[3072, 768]",2359296,784797
29,bert.encoder.layer.4.output.dense,"[768, 3072]",2359296,550626
30,bert.encoder.layer.5.attention.self.query,"[768, 768]",589824,16702
31,bert.encoder.layer.5.attention.self.key,"[768, 768]",589824,21640
32,bert.encoder.layer.5.attention.self.value,"[768, 768]",589824,35016
33,bert.encoder.layer.5.attention.output.dense,"[768, 768]",589824,33592
34,bert.encoder.layer.5.intermediate.dense,"[3072, 768]",2359296,785249
35,bert.encoder.layer.5.output.dense,"[768, 3072]",2359296,530621
36,bert.encoder.layer.6.attention.self.query,"[768, 768]",589824,16901
37,bert.encoder.layer.6.attention.self.key,"[768, 768]",589824,20927
38,bert.encoder.layer.6.attention.self.value,"[768, 768]",589824,29547
39,bert.encoder.layer.6.attention.output.dense,"[768, 768]",589824,26602
40,bert.encoder.layer.6.intermediate.dense,"[3072, 768]",2359296,706133
41,bert.encoder.layer.6.output.dense,"[768, 3072]",2359296,458484
42,bert.encoder.layer.7.attention.self.query,"[768, 768]",589824,15264
43,bert.encoder.layer.7.attention.self.key,"[768, 768]",589824,21032
44,bert.encoder.layer.7.attention.self.value,"[768, 768]",589824,34524
45,bert.encoder.layer.7.attention.output.dense,"[768, 768]",589824,26950
46,bert.encoder.layer.7.intermediate.dense,"[3072, 768]",2359296,558548
47,bert.encoder.layer.7.output.dense,"[768, 3072]",2359296,366404
48,bert.encoder.layer.8.attention.self.query,"[768, 768]",589824,14420
49,bert.encoder.layer.8.attention.self.key,"[768, 768]",589824,18006
50,bert.encoder.layer.8.attention.self.value,"[768, 768]",589824,27492
51,bert.encoder.layer.8.attention.output.dense,"[768, 768]",589824,21056
52,bert.encoder.layer.8.intermediate.dense,"[3072, 768]",2359296,388041
53,bert.encoder.layer.8.output.dense,"[768, 3072]",2359296,246678
54,bert.encoder.layer.9.attention.self.query,"[768, 768]",589824,32843
55,bert.encoder.layer.9.attention.self.key,"[768, 768]",589824,31522
56,bert.encoder.layer.9.attention.self.value,"[768, 768]",589824,8125
57,bert.encoder.layer.9.attention.output.dense,"[768, 768]",589824,6000
58,bert.encoder.layer.9.intermediate.dense,"[3072, 768]",2359296,148663
59,bert.encoder.layer.9.output.dense,"[768, 3072]",2359296,82095
60,bert.encoder.layer.10.attention.self.query,"[768, 768]",589824,19588
61,bert.encoder.layer.10.attention.self.key,"[768, 768]",589824,19232
62,bert.encoder.layer.10.attention.self.value,"[768, 768]",589824,5844
63,bert.encoder.layer.10.attention.output.dense,"[768, 768]",589824,4472
64,bert.encoder.layer.10.intermediate.dense,"[3072, 768]",2359296,95918
65,bert.encoder.layer.10.output.dense,"[768, 3072]",2359296,40352
66,bert.encoder.layer.11.attention.self.query,"[768, 768]",589824,4707
67,bert.encoder.layer.11.attention.self.key,"[768, 768]",589824,5807
68,bert.encoder.layer.11.attention.self.value,"[768, 768]",589824,2160
69,bert.encoder.layer.11.attention.output.dense,"[768, 768]",589824,1310
70,bert.encoder.layer.11.intermediate.dense,"[3072, 768]",2359296,71477
71,bert.encoder.layer.11.output.dense,"[768, 3072]",2359296,26152
|