bert-base-uncased-squadv1-65.1-sparse / linear_layer_sparse_stats_total_30M_65.1_relative_sparsity.csv
Chua, Vui Seng
Initial model commit
7e13174
,linear_id,shape,param_count,nnz_count
0,bert.encoder.layer.0.attention.self.query,"[768, 768]",589824,36266
1,bert.encoder.layer.0.attention.self.key,"[768, 768]",589824,28736
2,bert.encoder.layer.0.attention.self.value,"[768, 768]",589824,62217
3,bert.encoder.layer.0.attention.output.dense,"[768, 768]",589824,80877
4,bert.encoder.layer.0.intermediate.dense,"[3072, 768]",2359296,1391950
5,bert.encoder.layer.0.output.dense,"[768, 3072]",2359296,1304418
6,bert.encoder.layer.1.attention.self.query,"[768, 768]",589824,88856
7,bert.encoder.layer.1.attention.self.key,"[768, 768]",589824,80852
8,bert.encoder.layer.1.attention.self.value,"[768, 768]",589824,89520
9,bert.encoder.layer.1.attention.output.dense,"[768, 768]",589824,99229
10,bert.encoder.layer.1.intermediate.dense,"[3072, 768]",2359296,1448241
11,bert.encoder.layer.1.output.dense,"[768, 3072]",2359296,1298451
12,bert.encoder.layer.2.attention.self.query,"[768, 768]",589824,111820
13,bert.encoder.layer.2.attention.self.key,"[768, 768]",589824,105781
14,bert.encoder.layer.2.attention.self.value,"[768, 768]",589824,107982
15,bert.encoder.layer.2.attention.output.dense,"[768, 768]",589824,107182
16,bert.encoder.layer.2.intermediate.dense,"[3072, 768]",2359296,1478488
17,bert.encoder.layer.2.output.dense,"[768, 3072]",2359296,1324632
18,bert.encoder.layer.3.attention.self.query,"[768, 768]",589824,119244
19,bert.encoder.layer.3.attention.self.key,"[768, 768]",589824,121415
20,bert.encoder.layer.3.attention.self.value,"[768, 768]",589824,144196
21,bert.encoder.layer.3.attention.output.dense,"[768, 768]",589824,141057
22,bert.encoder.layer.3.intermediate.dense,"[3072, 768]",2359296,1477159
23,bert.encoder.layer.3.output.dense,"[768, 3072]",2359296,1308149
24,bert.encoder.layer.4.attention.self.query,"[768, 768]",589824,116629
25,bert.encoder.layer.4.attention.self.key,"[768, 768]",589824,115443
26,bert.encoder.layer.4.attention.self.value,"[768, 768]",589824,153857
27,bert.encoder.layer.4.attention.output.dense,"[768, 768]",589824,153726
28,bert.encoder.layer.4.intermediate.dense,"[3072, 768]",2359296,1467353
29,bert.encoder.layer.4.output.dense,"[768, 3072]",2359296,1275091
30,bert.encoder.layer.5.attention.self.query,"[768, 768]",589824,99164
31,bert.encoder.layer.5.attention.self.key,"[768, 768]",589824,106720
32,bert.encoder.layer.5.attention.self.value,"[768, 768]",589824,151873
33,bert.encoder.layer.5.attention.output.dense,"[768, 768]",589824,146952
34,bert.encoder.layer.5.intermediate.dense,"[3072, 768]",2359296,1483651
35,bert.encoder.layer.5.output.dense,"[768, 3072]",2359296,1268211
36,bert.encoder.layer.6.attention.self.query,"[768, 768]",589824,99027
37,bert.encoder.layer.6.attention.self.key,"[768, 768]",589824,108498
38,bert.encoder.layer.6.attention.self.value,"[768, 768]",589824,151369
39,bert.encoder.layer.6.attention.output.dense,"[768, 768]",589824,138662
40,bert.encoder.layer.6.intermediate.dense,"[3072, 768]",2359296,1413626
41,bert.encoder.layer.6.output.dense,"[768, 3072]",2359296,1178877
42,bert.encoder.layer.7.attention.self.query,"[768, 768]",589824,72908
43,bert.encoder.layer.7.attention.self.key,"[768, 768]",589824,87122
44,bert.encoder.layer.7.attention.self.value,"[768, 768]",589824,134079
45,bert.encoder.layer.7.attention.output.dense,"[768, 768]",589824,117207
46,bert.encoder.layer.7.intermediate.dense,"[3072, 768]",2359296,1240812
47,bert.encoder.layer.7.output.dense,"[768, 3072]",2359296,1039716
48,bert.encoder.layer.8.attention.self.query,"[768, 768]",589824,87546
49,bert.encoder.layer.8.attention.self.key,"[768, 768]",589824,95542
50,bert.encoder.layer.8.attention.self.value,"[768, 768]",589824,147149
51,bert.encoder.layer.8.attention.output.dense,"[768, 768]",589824,121221
52,bert.encoder.layer.8.intermediate.dense,"[3072, 768]",2359296,991806
53,bert.encoder.layer.8.output.dense,"[768, 3072]",2359296,805916
54,bert.encoder.layer.9.attention.self.query,"[768, 768]",589824,94420
55,bert.encoder.layer.9.attention.self.key,"[768, 768]",589824,95694
56,bert.encoder.layer.9.attention.self.value,"[768, 768]",589824,58481
57,bert.encoder.layer.9.attention.output.dense,"[768, 768]",589824,46748
58,bert.encoder.layer.9.intermediate.dense,"[3072, 768]",2359296,550773
59,bert.encoder.layer.9.output.dense,"[768, 3072]",2359296,425540
60,bert.encoder.layer.10.attention.self.query,"[768, 768]",589824,62325
61,bert.encoder.layer.10.attention.self.key,"[768, 768]",589824,64069
62,bert.encoder.layer.10.attention.self.value,"[768, 768]",589824,31075
63,bert.encoder.layer.10.attention.output.dense,"[768, 768]",589824,22579
64,bert.encoder.layer.10.intermediate.dense,"[3072, 768]",2359296,361008
65,bert.encoder.layer.10.output.dense,"[768, 3072]",2359296,240192
66,bert.encoder.layer.11.attention.self.query,"[768, 768]",589824,20394
67,bert.encoder.layer.11.attention.self.key,"[768, 768]",589824,25082
68,bert.encoder.layer.11.attention.self.value,"[768, 768]",589824,14505
69,bert.encoder.layer.11.attention.output.dense,"[768, 768]",589824,6766
70,bert.encoder.layer.11.intermediate.dense,"[3072, 768]",2359296,296386
71,bert.encoder.layer.11.output.dense,"[768, 3072]",2359296,119879