bert-base-squadv1-block-pruning-hybrid / XP_linear_layer_sparsity_20M_params_33.64_sparsity.md
Chua, Vui Seng
Update readme and model analysis
ac8897e
layer_id layer_type param_type shape nparam nnz sparsity
5 bert.encoder.layer.0.attention.self.query Linear weight [320, 768] 245760 135168 0.45
7 bert.encoder.layer.0.attention.self.key Linear weight [320, 768] 245760 149504 0.391667
9 bert.encoder.layer.0.attention.self.value Linear weight [320, 768] 245760 173056 0.295833
11 bert.encoder.layer.0.attention.output.dense Linear weight [768, 320] 245760 181248 0.2625
15 bert.encoder.layer.0.intermediate.dense Linear weight [185, 768] 142080 142080 0
17 bert.encoder.layer.0.output.dense Linear weight [768, 185] 142080 142080 0
21 bert.encoder.layer.1.attention.self.query Linear weight [320, 768] 245760 175104 0.2875
23 bert.encoder.layer.1.attention.self.key Linear weight [320, 768] 245760 177152 0.279167
25 bert.encoder.layer.1.attention.self.value Linear weight [320, 768] 245760 166912 0.320833
27 bert.encoder.layer.1.attention.output.dense Linear weight [768, 320] 245760 167936 0.316667
31 bert.encoder.layer.1.intermediate.dense Linear weight [315, 768] 241920 241920 0
33 bert.encoder.layer.1.output.dense Linear weight [768, 315] 241920 241920 0
37 bert.encoder.layer.2.attention.self.query Linear weight [576, 768] 442368 285696 0.354167
39 bert.encoder.layer.2.attention.self.key Linear weight [576, 768] 442368 297984 0.326389
41 bert.encoder.layer.2.attention.self.value Linear weight [576, 768] 442368 226304 0.488426
43 bert.encoder.layer.2.attention.output.dense Linear weight [768, 576] 442368 237568 0.462963
47 bert.encoder.layer.2.intermediate.dense Linear weight [339, 768] 260352 260352 0
49 bert.encoder.layer.2.output.dense Linear weight [768, 339] 260352 260352 0
53 bert.encoder.layer.3.attention.self.query Linear weight [576, 768] 442368 277504 0.372685
55 bert.encoder.layer.3.attention.self.key Linear weight [576, 768] 442368 303104 0.314815
57 bert.encoder.layer.3.attention.self.value Linear weight [576, 768] 442368 297984 0.326389
59 bert.encoder.layer.3.attention.output.dense Linear weight [768, 576] 442368 308224 0.303241
63 bert.encoder.layer.3.intermediate.dense Linear weight [368, 768] 282624 282624 0
65 bert.encoder.layer.3.output.dense Linear weight [768, 368] 282624 282624 0
69 bert.encoder.layer.4.attention.self.query Linear weight [576, 768] 442368 291840 0.340278
71 bert.encoder.layer.4.attention.self.key Linear weight [576, 768] 442368 310272 0.298611
73 bert.encoder.layer.4.attention.self.value Linear weight [576, 768] 442368 272384 0.384259
75 bert.encoder.layer.4.attention.output.dense Linear weight [768, 576] 442368 263168 0.405093
79 bert.encoder.layer.4.intermediate.dense Linear weight [386, 768] 296448 296448 0
81 bert.encoder.layer.4.output.dense Linear weight [768, 386] 296448 296448 0
85 bert.encoder.layer.5.attention.self.query Linear weight [384, 768] 294912 171008 0.420139
87 bert.encoder.layer.5.attention.self.key Linear weight [384, 768] 294912 205824 0.302083
89 bert.encoder.layer.5.attention.self.value Linear weight [384, 768] 294912 217088 0.263889
91 bert.encoder.layer.5.attention.output.dense Linear weight [768, 384] 294912 223232 0.243056
95 bert.encoder.layer.5.intermediate.dense Linear weight [336, 768] 258048 258048 0
97 bert.encoder.layer.5.output.dense Linear weight [768, 336] 258048 258048 0
101 bert.encoder.layer.6.attention.self.query Linear weight [448, 768] 344064 192512 0.440476
103 bert.encoder.layer.6.attention.self.key Linear weight [448, 768] 344064 224256 0.348214
105 bert.encoder.layer.6.attention.self.value Linear weight [448, 768] 344064 209920 0.389881
107 bert.encoder.layer.6.attention.output.dense Linear weight [768, 448] 344064 199680 0.419643
111 bert.encoder.layer.6.intermediate.dense Linear weight [280, 768] 215040 215040 0
113 bert.encoder.layer.6.output.dense Linear weight [768, 280] 215040 215040 0
117 bert.encoder.layer.7.attention.self.query Linear weight [448, 768] 344064 201728 0.41369
119 bert.encoder.layer.7.attention.self.key Linear weight [448, 768] 344064 237568 0.309524
121 bert.encoder.layer.7.attention.self.value Linear weight [448, 768] 344064 218112 0.366071
123 bert.encoder.layer.7.attention.output.dense Linear weight [768, 448] 344064 202752 0.410714
127 bert.encoder.layer.7.intermediate.dense Linear weight [211, 768] 162048 162048 0
129 bert.encoder.layer.7.output.dense Linear weight [768, 211] 162048 162048 0
133 bert.encoder.layer.8.attention.self.query Linear weight [448, 768] 344064 186368 0.458333
135 bert.encoder.layer.8.attention.self.key Linear weight [448, 768] 344064 197632 0.425595
137 bert.encoder.layer.8.attention.self.value Linear weight [448, 768] 344064 154624 0.550595
139 bert.encoder.layer.8.attention.output.dense Linear weight [768, 448] 344064 148480 0.568452
143 bert.encoder.layer.8.intermediate.dense Linear weight [108, 768] 82944 82944 0
145 bert.encoder.layer.8.output.dense Linear weight [768, 108] 82944 82944 0
149 bert.encoder.layer.9.attention.self.query Linear weight [320, 768] 245760 144384 0.4125
151 bert.encoder.layer.9.attention.self.key Linear weight [320, 768] 245760 155648 0.366667
153 bert.encoder.layer.9.attention.self.value Linear weight [320, 768] 245760 63488 0.741667
155 bert.encoder.layer.9.attention.output.dense Linear weight [768, 320] 245760 65536 0.733333
159 bert.encoder.layer.9.intermediate.dense Linear weight [53, 768] 40704 40704 5.96046e-08
161 bert.encoder.layer.9.output.dense Linear weight [768, 53] 40704 40704 5.96046e-08
165 bert.encoder.layer.10.attention.self.query Linear weight [384, 768] 294912 158720 0.461806
167 bert.encoder.layer.10.attention.self.key Linear weight [384, 768] 294912 158720 0.461806
169 bert.encoder.layer.10.attention.self.value Linear weight [384, 768] 294912 77824 0.736111
171 bert.encoder.layer.10.attention.output.dense Linear weight [768, 384] 294912 78848 0.732639
175 bert.encoder.layer.10.intermediate.dense Linear weight [86, 768] 66048 66048 0
177 bert.encoder.layer.10.output.dense Linear weight [768, 86] 66048 66048 0
181 bert.encoder.layer.11.attention.self.query Linear weight [384, 768] 294912 107520 0.635417
183 bert.encoder.layer.11.attention.self.key Linear weight [384, 768] 294912 118784 0.597222
185 bert.encoder.layer.11.attention.self.value Linear weight [384, 768] 294912 62464 0.788194
187 bert.encoder.layer.11.attention.output.dense Linear weight [768, 384] 294912 54272 0.815972
191 bert.encoder.layer.11.intermediate.dense Linear weight [105, 768] 80640 80640 0
193 bert.encoder.layer.11.output.dense Linear weight [768, 105] 80640 80640 0