bert-base-uncased-squadv1-72.9-sparse / linear_layer_sparse_stats_total_23M_72.9_relative_sparsity.csv
Chua, Vui Seng
Initial model commit
663f02a
,linear_id,shape,param_count,nnz_count
0,bert.encoder.layer.0.attention.self.query,"[768, 768]",589824,16252
1,bert.encoder.layer.0.attention.self.key,"[768, 768]",589824,13054
2,bert.encoder.layer.0.attention.self.value,"[768, 768]",589824,31394
3,bert.encoder.layer.0.attention.output.dense,"[768, 768]",589824,44074
4,bert.encoder.layer.0.intermediate.dense,"[3072, 768]",2359296,1159424
5,bert.encoder.layer.0.output.dense,"[768, 3072]",2359296,1061306
6,bert.encoder.layer.1.attention.self.query,"[768, 768]",589824,58240
7,bert.encoder.layer.1.attention.self.key,"[768, 768]",589824,54729
8,bert.encoder.layer.1.attention.self.value,"[768, 768]",589824,57604
9,bert.encoder.layer.1.attention.output.dense,"[768, 768]",589824,65732
10,bert.encoder.layer.1.intermediate.dense,"[3072, 768]",2359296,1230849
11,bert.encoder.layer.1.output.dense,"[768, 3072]",2359296,1063370
12,bert.encoder.layer.2.attention.self.query,"[768, 768]",589824,76074
13,bert.encoder.layer.2.attention.self.key,"[768, 768]",589824,72798
14,bert.encoder.layer.2.attention.self.value,"[768, 768]",589824,69367
15,bert.encoder.layer.2.attention.output.dense,"[768, 768]",589824,68078
16,bert.encoder.layer.2.intermediate.dense,"[3072, 768]",2359296,1260119
17,bert.encoder.layer.2.output.dense,"[768, 3072]",2359296,1081396
18,bert.encoder.layer.3.attention.self.query,"[768, 768]",589824,79954
19,bert.encoder.layer.3.attention.self.key,"[768, 768]",589824,84045
20,bert.encoder.layer.3.attention.self.value,"[768, 768]",589824,97323
21,bert.encoder.layer.3.attention.output.dense,"[768, 768]",589824,95918
22,bert.encoder.layer.3.intermediate.dense,"[3072, 768]",2359296,1263350
23,bert.encoder.layer.3.output.dense,"[768, 3072]",2359296,1069440
24,bert.encoder.layer.4.attention.self.query,"[768, 768]",589824,77306
25,bert.encoder.layer.4.attention.self.key,"[768, 768]",589824,77738
26,bert.encoder.layer.4.attention.self.value,"[768, 768]",589824,103253
27,bert.encoder.layer.4.attention.output.dense,"[768, 768]",589824,103673
28,bert.encoder.layer.4.intermediate.dense,"[3072, 768]",2359296,1253106
29,bert.encoder.layer.4.output.dense,"[768, 3072]",2359296,1031342
30,bert.encoder.layer.5.attention.self.query,"[768, 768]",589824,59269
31,bert.encoder.layer.5.attention.self.key,"[768, 768]",589824,66896
32,bert.encoder.layer.5.attention.self.value,"[768, 768]",589824,97790
33,bert.encoder.layer.5.attention.output.dense,"[768, 768]",589824,93833
34,bert.encoder.layer.5.intermediate.dense,"[3072, 768]",2359296,1263747
35,bert.encoder.layer.5.output.dense,"[768, 3072]",2359296,1014643
36,bert.encoder.layer.6.attention.self.query,"[768, 768]",589824,59641
37,bert.encoder.layer.6.attention.self.key,"[768, 768]",589824,67416
38,bert.encoder.layer.6.attention.self.value,"[768, 768]",589824,95731
39,bert.encoder.layer.6.attention.output.dense,"[768, 768]",589824,85998
40,bert.encoder.layer.6.intermediate.dense,"[3072, 768]",2359296,1181526
41,bert.encoder.layer.6.output.dense,"[768, 3072]",2359296,920703
42,bert.encoder.layer.7.attention.self.query,"[768, 768]",589824,44935
43,bert.encoder.layer.7.attention.self.key,"[768, 768]",589824,56868
44,bert.encoder.layer.7.attention.self.value,"[768, 768]",589824,89913
45,bert.encoder.layer.7.attention.output.dense,"[768, 768]",589824,74029
46,bert.encoder.layer.7.intermediate.dense,"[3072, 768]",2359296,995977
47,bert.encoder.layer.7.output.dense,"[768, 3072]",2359296,777939
48,bert.encoder.layer.8.attention.self.query,"[768, 768]",589824,49884
49,bert.encoder.layer.8.attention.self.key,"[768, 768]",589824,57649
50,bert.encoder.layer.8.attention.self.value,"[768, 768]",589824,92159
51,bert.encoder.layer.8.attention.output.dense,"[768, 768]",589824,71883
52,bert.encoder.layer.8.intermediate.dense,"[3072, 768]",2359296,757196
53,bert.encoder.layer.8.output.dense,"[768, 3072]",2359296,568698
54,bert.encoder.layer.9.attention.self.query,"[768, 768]",589824,68469
55,bert.encoder.layer.9.attention.self.key,"[768, 768]",589824,69557
56,bert.encoder.layer.9.attention.self.value,"[768, 768]",589824,32458
57,bert.encoder.layer.9.attention.output.dense,"[768, 768]",589824,23895
58,bert.encoder.layer.9.intermediate.dense,"[3072, 768]",2359296,359909
59,bert.encoder.layer.9.output.dense,"[768, 3072]",2359296,245729
60,bert.encoder.layer.10.attention.self.query,"[768, 768]",589824,42730
61,bert.encoder.layer.10.attention.self.key,"[768, 768]",589824,44139
62,bert.encoder.layer.10.attention.self.value,"[768, 768]",589824,17533
63,bert.encoder.layer.10.attention.output.dense,"[768, 768]",589824,12605
64,bert.encoder.layer.10.intermediate.dense,"[3072, 768]",2359296,233883
65,bert.encoder.layer.10.output.dense,"[768, 3072]",2359296,130882
66,bert.encoder.layer.11.attention.self.query,"[768, 768]",589824,11427
67,bert.encoder.layer.11.attention.self.key,"[768, 768]",589824,14775
68,bert.encoder.layer.11.attention.self.value,"[768, 768]",589824,6865
69,bert.encoder.layer.11.attention.output.dense,"[768, 768]",589824,3223
70,bert.encoder.layer.11.intermediate.dense,"[3072, 768]",2359296,190784
71,bert.encoder.layer.11.output.dense,"[768, 3072]",2359296,71760