bert-base-uncased-squadv1-59.6-sparse
/
linear_layer_sparse_stats_total_34M_59.6_relative_sparsity.csv
,linear_id,shape,param_count,nnz_count | |
0,bert.encoder.layer.0.attention.self.query,"[768, 768]",589824,55856 | |
1,bert.encoder.layer.0.attention.self.key,"[768, 768]",589824,43957 | |
2,bert.encoder.layer.0.attention.self.value,"[768, 768]",589824,89632 | |
3,bert.encoder.layer.0.attention.output.dense,"[768, 768]",589824,110940 | |
4,bert.encoder.layer.0.intermediate.dense,"[3072, 768]",2359296,1531180 | |
5,bert.encoder.layer.0.output.dense,"[768, 3072]",2359296,1452615 | |
6,bert.encoder.layer.1.attention.self.query,"[768, 768]",589824,115146 | |
7,bert.encoder.layer.1.attention.self.key,"[768, 768]",589824,102899 | |
8,bert.encoder.layer.1.attention.self.value,"[768, 768]",589824,116354 | |
9,bert.encoder.layer.1.attention.output.dense,"[768, 768]",589824,126468 | |
10,bert.encoder.layer.1.intermediate.dense,"[3072, 768]",2359296,1580627 | |
11,bert.encoder.layer.1.output.dense,"[768, 3072]",2359296,1445362 | |
12,bert.encoder.layer.2.attention.self.query,"[768, 768]",589824,140857 | |
13,bert.encoder.layer.2.attention.self.key,"[768, 768]",589824,133268 | |
14,bert.encoder.layer.2.attention.self.value,"[768, 768]",589824,141184 | |
15,bert.encoder.layer.2.attention.output.dense,"[768, 768]",589824,140893 | |
16,bert.encoder.layer.2.intermediate.dense,"[3072, 768]",2359296,1608609 | |
17,bert.encoder.layer.2.output.dense,"[768, 3072]",2359296,1471410 | |
18,bert.encoder.layer.3.attention.self.query,"[768, 768]",589824,150136 | |
19,bert.encoder.layer.3.attention.self.key,"[768, 768]",589824,150781 | |
20,bert.encoder.layer.3.attention.self.value,"[768, 768]",589824,180987 | |
21,bert.encoder.layer.3.attention.output.dense,"[768, 768]",589824,175778 | |
22,bert.encoder.layer.3.intermediate.dense,"[3072, 768]",2359296,1606543 | |
23,bert.encoder.layer.3.output.dense,"[768, 3072]",2359296,1456896 | |
24,bert.encoder.layer.4.attention.self.query,"[768, 768]",589824,147969 | |
25,bert.encoder.layer.4.attention.self.key,"[768, 768]",589824,145851 | |
26,bert.encoder.layer.4.attention.self.value,"[768, 768]",589824,191670 | |
27,bert.encoder.layer.4.attention.output.dense,"[768, 768]",589824,190871 | |
28,bert.encoder.layer.4.intermediate.dense,"[3072, 768]",2359296,1597771 | |
29,bert.encoder.layer.4.output.dense,"[768, 3072]",2359296,1428619 | |
30,bert.encoder.layer.5.attention.self.query,"[768, 768]",589824,131102 | |
31,bert.encoder.layer.5.attention.self.key,"[768, 768]",589824,139321 | |
32,bert.encoder.layer.5.attention.self.value,"[768, 768]",589824,192204 | |
33,bert.encoder.layer.5.attention.output.dense,"[768, 768]",589824,185803 | |
34,bert.encoder.layer.5.intermediate.dense,"[3072, 768]",2359296,1616886 | |
35,bert.encoder.layer.5.output.dense,"[768, 3072]",2359296,1426761 | |
36,bert.encoder.layer.6.attention.self.query,"[768, 768]",589824,132493 | |
37,bert.encoder.layer.6.attention.self.key,"[768, 768]",589824,142601 | |
38,bert.encoder.layer.6.attention.self.value,"[768, 768]",589824,193504 | |
39,bert.encoder.layer.6.attention.output.dense,"[768, 768]",589824,179860 | |
40,bert.encoder.layer.6.intermediate.dense,"[3072, 768]",2359296,1557741 | |
41,bert.encoder.layer.6.output.dense,"[768, 3072]",2359296,1345610 | |
42,bert.encoder.layer.7.attention.self.query,"[768, 768]",589824,98196 | |
43,bert.encoder.layer.7.attention.self.key,"[768, 768]",589824,112797 | |
44,bert.encoder.layer.7.attention.self.value,"[768, 768]",589824,166399 | |
45,bert.encoder.layer.7.attention.output.dense,"[768, 768]",589824,150757 | |
46,bert.encoder.layer.7.intermediate.dense,"[3072, 768]",2359296,1395257 | |
47,bert.encoder.layer.7.output.dense,"[768, 3072]",2359296,1209690 | |
48,bert.encoder.layer.8.attention.self.query,"[768, 768]",589824,119328 | |
49,bert.encoder.layer.8.attention.self.key,"[768, 768]",589824,127164 | |
50,bert.encoder.layer.8.attention.self.value,"[768, 768]",589824,189418 | |
51,bert.encoder.layer.8.attention.output.dense,"[768, 768]",589824,161706 | |
52,bert.encoder.layer.8.intermediate.dense,"[3072, 768]",2359296,1154515 | |
53,bert.encoder.layer.8.output.dense,"[768, 3072]",2359296,974995 | |
54,bert.encoder.layer.9.attention.self.query,"[768, 768]",589824,113893 | |
55,bert.encoder.layer.9.attention.self.key,"[768, 768]",589824,114749 | |
56,bert.encoder.layer.9.attention.self.value,"[768, 768]",589824,81969 | |
57,bert.encoder.layer.9.attention.output.dense,"[768, 768]",589824,68461 | |
58,bert.encoder.layer.9.intermediate.dense,"[3072, 768]",2359296,702337 | |
59,bert.encoder.layer.9.output.dense,"[768, 3072]",2359296,576653 | |
60,bert.encoder.layer.10.attention.self.query,"[768, 768]",589824,76831 | |
61,bert.encoder.layer.10.attention.self.key,"[768, 768]",589824,78612 | |
62,bert.encoder.layer.10.attention.self.value,"[768, 768]",589824,43956 | |
63,bert.encoder.layer.10.attention.output.dense,"[768, 768]",589824,32761 | |
64,bert.encoder.layer.10.intermediate.dense,"[3072, 768]",2359296,472830 | |
65,bert.encoder.layer.10.output.dense,"[768, 3072]",2359296,346292 | |
66,bert.encoder.layer.11.attention.self.query,"[768, 768]",589824,30540 | |
67,bert.encoder.layer.11.attention.self.key,"[768, 768]",589824,36104 | |
68,bert.encoder.layer.11.attention.self.value,"[768, 768]",589824,23388 | |
69,bert.encoder.layer.11.attention.output.dense,"[768, 768]",589824,12039 | |
70,bert.encoder.layer.11.intermediate.dense,"[3072, 768]",2359296,386672 | |
71,bert.encoder.layer.11.output.dense,"[768, 3072]",2359296,167064 | |