bert-base-uncased-squadv1-59.6-sparse / linear_layer_sparse_stats_total_34M_59.6_relative_sparsity.csv
Chua, Vui Seng
Initial model commit
6094899
,linear_id,shape,param_count,nnz_count
0,bert.encoder.layer.0.attention.self.query,"[768, 768]",589824,55856
1,bert.encoder.layer.0.attention.self.key,"[768, 768]",589824,43957
2,bert.encoder.layer.0.attention.self.value,"[768, 768]",589824,89632
3,bert.encoder.layer.0.attention.output.dense,"[768, 768]",589824,110940
4,bert.encoder.layer.0.intermediate.dense,"[3072, 768]",2359296,1531180
5,bert.encoder.layer.0.output.dense,"[768, 3072]",2359296,1452615
6,bert.encoder.layer.1.attention.self.query,"[768, 768]",589824,115146
7,bert.encoder.layer.1.attention.self.key,"[768, 768]",589824,102899
8,bert.encoder.layer.1.attention.self.value,"[768, 768]",589824,116354
9,bert.encoder.layer.1.attention.output.dense,"[768, 768]",589824,126468
10,bert.encoder.layer.1.intermediate.dense,"[3072, 768]",2359296,1580627
11,bert.encoder.layer.1.output.dense,"[768, 3072]",2359296,1445362
12,bert.encoder.layer.2.attention.self.query,"[768, 768]",589824,140857
13,bert.encoder.layer.2.attention.self.key,"[768, 768]",589824,133268
14,bert.encoder.layer.2.attention.self.value,"[768, 768]",589824,141184
15,bert.encoder.layer.2.attention.output.dense,"[768, 768]",589824,140893
16,bert.encoder.layer.2.intermediate.dense,"[3072, 768]",2359296,1608609
17,bert.encoder.layer.2.output.dense,"[768, 3072]",2359296,1471410
18,bert.encoder.layer.3.attention.self.query,"[768, 768]",589824,150136
19,bert.encoder.layer.3.attention.self.key,"[768, 768]",589824,150781
20,bert.encoder.layer.3.attention.self.value,"[768, 768]",589824,180987
21,bert.encoder.layer.3.attention.output.dense,"[768, 768]",589824,175778
22,bert.encoder.layer.3.intermediate.dense,"[3072, 768]",2359296,1606543
23,bert.encoder.layer.3.output.dense,"[768, 3072]",2359296,1456896
24,bert.encoder.layer.4.attention.self.query,"[768, 768]",589824,147969
25,bert.encoder.layer.4.attention.self.key,"[768, 768]",589824,145851
26,bert.encoder.layer.4.attention.self.value,"[768, 768]",589824,191670
27,bert.encoder.layer.4.attention.output.dense,"[768, 768]",589824,190871
28,bert.encoder.layer.4.intermediate.dense,"[3072, 768]",2359296,1597771
29,bert.encoder.layer.4.output.dense,"[768, 3072]",2359296,1428619
30,bert.encoder.layer.5.attention.self.query,"[768, 768]",589824,131102
31,bert.encoder.layer.5.attention.self.key,"[768, 768]",589824,139321
32,bert.encoder.layer.5.attention.self.value,"[768, 768]",589824,192204
33,bert.encoder.layer.5.attention.output.dense,"[768, 768]",589824,185803
34,bert.encoder.layer.5.intermediate.dense,"[3072, 768]",2359296,1616886
35,bert.encoder.layer.5.output.dense,"[768, 3072]",2359296,1426761
36,bert.encoder.layer.6.attention.self.query,"[768, 768]",589824,132493
37,bert.encoder.layer.6.attention.self.key,"[768, 768]",589824,142601
38,bert.encoder.layer.6.attention.self.value,"[768, 768]",589824,193504
39,bert.encoder.layer.6.attention.output.dense,"[768, 768]",589824,179860
40,bert.encoder.layer.6.intermediate.dense,"[3072, 768]",2359296,1557741
41,bert.encoder.layer.6.output.dense,"[768, 3072]",2359296,1345610
42,bert.encoder.layer.7.attention.self.query,"[768, 768]",589824,98196
43,bert.encoder.layer.7.attention.self.key,"[768, 768]",589824,112797
44,bert.encoder.layer.7.attention.self.value,"[768, 768]",589824,166399
45,bert.encoder.layer.7.attention.output.dense,"[768, 768]",589824,150757
46,bert.encoder.layer.7.intermediate.dense,"[3072, 768]",2359296,1395257
47,bert.encoder.layer.7.output.dense,"[768, 3072]",2359296,1209690
48,bert.encoder.layer.8.attention.self.query,"[768, 768]",589824,119328
49,bert.encoder.layer.8.attention.self.key,"[768, 768]",589824,127164
50,bert.encoder.layer.8.attention.self.value,"[768, 768]",589824,189418
51,bert.encoder.layer.8.attention.output.dense,"[768, 768]",589824,161706
52,bert.encoder.layer.8.intermediate.dense,"[3072, 768]",2359296,1154515
53,bert.encoder.layer.8.output.dense,"[768, 3072]",2359296,974995
54,bert.encoder.layer.9.attention.self.query,"[768, 768]",589824,113893
55,bert.encoder.layer.9.attention.self.key,"[768, 768]",589824,114749
56,bert.encoder.layer.9.attention.self.value,"[768, 768]",589824,81969
57,bert.encoder.layer.9.attention.output.dense,"[768, 768]",589824,68461
58,bert.encoder.layer.9.intermediate.dense,"[3072, 768]",2359296,702337
59,bert.encoder.layer.9.output.dense,"[768, 3072]",2359296,576653
60,bert.encoder.layer.10.attention.self.query,"[768, 768]",589824,76831
61,bert.encoder.layer.10.attention.self.key,"[768, 768]",589824,78612
62,bert.encoder.layer.10.attention.self.value,"[768, 768]",589824,43956
63,bert.encoder.layer.10.attention.output.dense,"[768, 768]",589824,32761
64,bert.encoder.layer.10.intermediate.dense,"[3072, 768]",2359296,472830
65,bert.encoder.layer.10.output.dense,"[768, 3072]",2359296,346292
66,bert.encoder.layer.11.attention.self.query,"[768, 768]",589824,30540
67,bert.encoder.layer.11.attention.self.key,"[768, 768]",589824,36104
68,bert.encoder.layer.11.attention.self.value,"[768, 768]",589824,23388
69,bert.encoder.layer.11.attention.output.dense,"[768, 768]",589824,12039
70,bert.encoder.layer.11.intermediate.dense,"[3072, 768]",2359296,386672
71,bert.encoder.layer.11.output.dense,"[768, 3072]",2359296,167064