bert-base-uncased-squadv1-65.1-sparse
/
linear_layer_sparse_stats_total_30M_65.1_relative_sparsity.csv
,linear_id,shape,param_count,nnz_count | |
0,bert.encoder.layer.0.attention.self.query,"[768, 768]",589824,36266 | |
1,bert.encoder.layer.0.attention.self.key,"[768, 768]",589824,28736 | |
2,bert.encoder.layer.0.attention.self.value,"[768, 768]",589824,62217 | |
3,bert.encoder.layer.0.attention.output.dense,"[768, 768]",589824,80877 | |
4,bert.encoder.layer.0.intermediate.dense,"[3072, 768]",2359296,1391950 | |
5,bert.encoder.layer.0.output.dense,"[768, 3072]",2359296,1304418 | |
6,bert.encoder.layer.1.attention.self.query,"[768, 768]",589824,88856 | |
7,bert.encoder.layer.1.attention.self.key,"[768, 768]",589824,80852 | |
8,bert.encoder.layer.1.attention.self.value,"[768, 768]",589824,89520 | |
9,bert.encoder.layer.1.attention.output.dense,"[768, 768]",589824,99229 | |
10,bert.encoder.layer.1.intermediate.dense,"[3072, 768]",2359296,1448241 | |
11,bert.encoder.layer.1.output.dense,"[768, 3072]",2359296,1298451 | |
12,bert.encoder.layer.2.attention.self.query,"[768, 768]",589824,111820 | |
13,bert.encoder.layer.2.attention.self.key,"[768, 768]",589824,105781 | |
14,bert.encoder.layer.2.attention.self.value,"[768, 768]",589824,107982 | |
15,bert.encoder.layer.2.attention.output.dense,"[768, 768]",589824,107182 | |
16,bert.encoder.layer.2.intermediate.dense,"[3072, 768]",2359296,1478488 | |
17,bert.encoder.layer.2.output.dense,"[768, 3072]",2359296,1324632 | |
18,bert.encoder.layer.3.attention.self.query,"[768, 768]",589824,119244 | |
19,bert.encoder.layer.3.attention.self.key,"[768, 768]",589824,121415 | |
20,bert.encoder.layer.3.attention.self.value,"[768, 768]",589824,144196 | |
21,bert.encoder.layer.3.attention.output.dense,"[768, 768]",589824,141057 | |
22,bert.encoder.layer.3.intermediate.dense,"[3072, 768]",2359296,1477159 | |
23,bert.encoder.layer.3.output.dense,"[768, 3072]",2359296,1308149 | |
24,bert.encoder.layer.4.attention.self.query,"[768, 768]",589824,116629 | |
25,bert.encoder.layer.4.attention.self.key,"[768, 768]",589824,115443 | |
26,bert.encoder.layer.4.attention.self.value,"[768, 768]",589824,153857 | |
27,bert.encoder.layer.4.attention.output.dense,"[768, 768]",589824,153726 | |
28,bert.encoder.layer.4.intermediate.dense,"[3072, 768]",2359296,1467353 | |
29,bert.encoder.layer.4.output.dense,"[768, 3072]",2359296,1275091 | |
30,bert.encoder.layer.5.attention.self.query,"[768, 768]",589824,99164 | |
31,bert.encoder.layer.5.attention.self.key,"[768, 768]",589824,106720 | |
32,bert.encoder.layer.5.attention.self.value,"[768, 768]",589824,151873 | |
33,bert.encoder.layer.5.attention.output.dense,"[768, 768]",589824,146952 | |
34,bert.encoder.layer.5.intermediate.dense,"[3072, 768]",2359296,1483651 | |
35,bert.encoder.layer.5.output.dense,"[768, 3072]",2359296,1268211 | |
36,bert.encoder.layer.6.attention.self.query,"[768, 768]",589824,99027 | |
37,bert.encoder.layer.6.attention.self.key,"[768, 768]",589824,108498 | |
38,bert.encoder.layer.6.attention.self.value,"[768, 768]",589824,151369 | |
39,bert.encoder.layer.6.attention.output.dense,"[768, 768]",589824,138662 | |
40,bert.encoder.layer.6.intermediate.dense,"[3072, 768]",2359296,1413626 | |
41,bert.encoder.layer.6.output.dense,"[768, 3072]",2359296,1178877 | |
42,bert.encoder.layer.7.attention.self.query,"[768, 768]",589824,72908 | |
43,bert.encoder.layer.7.attention.self.key,"[768, 768]",589824,87122 | |
44,bert.encoder.layer.7.attention.self.value,"[768, 768]",589824,134079 | |
45,bert.encoder.layer.7.attention.output.dense,"[768, 768]",589824,117207 | |
46,bert.encoder.layer.7.intermediate.dense,"[3072, 768]",2359296,1240812 | |
47,bert.encoder.layer.7.output.dense,"[768, 3072]",2359296,1039716 | |
48,bert.encoder.layer.8.attention.self.query,"[768, 768]",589824,87546 | |
49,bert.encoder.layer.8.attention.self.key,"[768, 768]",589824,95542 | |
50,bert.encoder.layer.8.attention.self.value,"[768, 768]",589824,147149 | |
51,bert.encoder.layer.8.attention.output.dense,"[768, 768]",589824,121221 | |
52,bert.encoder.layer.8.intermediate.dense,"[3072, 768]",2359296,991806 | |
53,bert.encoder.layer.8.output.dense,"[768, 3072]",2359296,805916 | |
54,bert.encoder.layer.9.attention.self.query,"[768, 768]",589824,94420 | |
55,bert.encoder.layer.9.attention.self.key,"[768, 768]",589824,95694 | |
56,bert.encoder.layer.9.attention.self.value,"[768, 768]",589824,58481 | |
57,bert.encoder.layer.9.attention.output.dense,"[768, 768]",589824,46748 | |
58,bert.encoder.layer.9.intermediate.dense,"[3072, 768]",2359296,550773 | |
59,bert.encoder.layer.9.output.dense,"[768, 3072]",2359296,425540 | |
60,bert.encoder.layer.10.attention.self.query,"[768, 768]",589824,62325 | |
61,bert.encoder.layer.10.attention.self.key,"[768, 768]",589824,64069 | |
62,bert.encoder.layer.10.attention.self.value,"[768, 768]",589824,31075 | |
63,bert.encoder.layer.10.attention.output.dense,"[768, 768]",589824,22579 | |
64,bert.encoder.layer.10.intermediate.dense,"[3072, 768]",2359296,361008 | |
65,bert.encoder.layer.10.output.dense,"[768, 3072]",2359296,240192 | |
66,bert.encoder.layer.11.attention.self.query,"[768, 768]",589824,20394 | |
67,bert.encoder.layer.11.attention.self.key,"[768, 768]",589824,25082 | |
68,bert.encoder.layer.11.attention.self.value,"[768, 768]",589824,14505 | |
69,bert.encoder.layer.11.attention.output.dense,"[768, 768]",589824,6766 | |
70,bert.encoder.layer.11.intermediate.dense,"[3072, 768]",2359296,296386 | |
71,bert.encoder.layer.11.output.dense,"[768, 3072]",2359296,119879 | |