bert-base-squadv1-block-pruning-hybrid-filled-lt / XP_layer_wise_sparsity_global_rate_0.00.md
Chua, Vui Seng
Update readme, add collaterals and model analysis report
c83a9d5
layer_id layer_type param_type shape nparam nnz sparsity
0 bert.embeddings.word_embeddings Embedding weight [30522, 768] 23440896 23440896 0
1 bert.embeddings.position_embeddings Embedding weight [512, 768] 393216 393216 0
2 bert.embeddings.token_type_embeddings Embedding weight [2, 768] 1536 1536 0
3 bert.embeddings.LayerNorm LayerNorm weight [768] 768 768 0
4 bert.embeddings.LayerNorm LayerNorm bias [768] 768 768 0
5 bert.encoder.layer.0.attention.self.query Linear weight [320, 768] 245760 245760 0
6 bert.encoder.layer.0.attention.self.query Linear bias [320] 320 320 0
7 bert.encoder.layer.0.attention.self.key Linear weight [320, 768] 245760 245760 0
8 bert.encoder.layer.0.attention.self.key Linear bias [320] 320 320 0
9 bert.encoder.layer.0.attention.self.value Linear weight [320, 768] 245760 245760 0
10 bert.encoder.layer.0.attention.self.value Linear bias [320] 320 320 0
11 bert.encoder.layer.0.attention.output.dense Linear weight [768, 320] 245760 245760 0
12 bert.encoder.layer.0.attention.output.dense Linear bias [768] 768 768 0
13 bert.encoder.layer.0.attention.output.LayerNorm LayerNorm weight [768] 768 768 0
14 bert.encoder.layer.0.attention.output.LayerNorm LayerNorm bias [768] 768 768 0
15 bert.encoder.layer.0.intermediate.dense Linear weight [185, 768] 142080 142080 0
16 bert.encoder.layer.0.intermediate.dense Linear bias [185] 185 185 0
17 bert.encoder.layer.0.output.dense Linear weight [768, 185] 142080 142080 0
18 bert.encoder.layer.0.output.dense Linear bias [768] 768 768 0
19 bert.encoder.layer.0.output.LayerNorm LayerNorm weight [768] 768 768 0
20 bert.encoder.layer.0.output.LayerNorm LayerNorm bias [768] 768 768 0
21 bert.encoder.layer.1.attention.self.query Linear weight [320, 768] 245760 245760 0
22 bert.encoder.layer.1.attention.self.query Linear bias [320] 320 320 0
23 bert.encoder.layer.1.attention.self.key Linear weight [320, 768] 245760 245760 0
24 bert.encoder.layer.1.attention.self.key Linear bias [320] 320 320 0
25 bert.encoder.layer.1.attention.self.value Linear weight [320, 768] 245760 245760 0
26 bert.encoder.layer.1.attention.self.value Linear bias [320] 320 320 0
27 bert.encoder.layer.1.attention.output.dense Linear weight [768, 320] 245760 245760 0
28 bert.encoder.layer.1.attention.output.dense Linear bias [768] 768 768 0
29 bert.encoder.layer.1.attention.output.LayerNorm LayerNorm weight [768] 768 768 0
30 bert.encoder.layer.1.attention.output.LayerNorm LayerNorm bias [768] 768 768 0
31 bert.encoder.layer.1.intermediate.dense Linear weight [315, 768] 241920 241920 0
32 bert.encoder.layer.1.intermediate.dense Linear bias [315] 315 315 0
33 bert.encoder.layer.1.output.dense Linear weight [768, 315] 241920 241920 0
34 bert.encoder.layer.1.output.dense Linear bias [768] 768 768 0
35 bert.encoder.layer.1.output.LayerNorm LayerNorm weight [768] 768 768 0
36 bert.encoder.layer.1.output.LayerNorm LayerNorm bias [768] 768 768 0
37 bert.encoder.layer.2.attention.self.query Linear weight [576, 768] 442368 442368 0
38 bert.encoder.layer.2.attention.self.query Linear bias [576] 576 576 0
39 bert.encoder.layer.2.attention.self.key Linear weight [576, 768] 442368 442368 0
40 bert.encoder.layer.2.attention.self.key Linear bias [576] 576 576 0
41 bert.encoder.layer.2.attention.self.value Linear weight [576, 768] 442368 442368 0
42 bert.encoder.layer.2.attention.self.value Linear bias [576] 576 576 0
43 bert.encoder.layer.2.attention.output.dense Linear weight [768, 576] 442368 442368 0
44 bert.encoder.layer.2.attention.output.dense Linear bias [768] 768 768 0
45 bert.encoder.layer.2.attention.output.LayerNorm LayerNorm weight [768] 768 768 0
46 bert.encoder.layer.2.attention.output.LayerNorm LayerNorm bias [768] 768 768 0
47 bert.encoder.layer.2.intermediate.dense Linear weight [339, 768] 260352 260352 0
48 bert.encoder.layer.2.intermediate.dense Linear bias [339] 339 339 0
49 bert.encoder.layer.2.output.dense Linear weight [768, 339] 260352 260352 0
50 bert.encoder.layer.2.output.dense Linear bias [768] 768 768 0
51 bert.encoder.layer.2.output.LayerNorm LayerNorm weight [768] 768 768 0
52 bert.encoder.layer.2.output.LayerNorm LayerNorm bias [768] 768 768 0
53 bert.encoder.layer.3.attention.self.query Linear weight [576, 768] 442368 442368 0
54 bert.encoder.layer.3.attention.self.query Linear bias [576] 576 576 0
55 bert.encoder.layer.3.attention.self.key Linear weight [576, 768] 442368 442368 0
56 bert.encoder.layer.3.attention.self.key Linear bias [576] 576 576 0
57 bert.encoder.layer.3.attention.self.value Linear weight [576, 768] 442368 442368 0
58 bert.encoder.layer.3.attention.self.value Linear bias [576] 576 576 0
59 bert.encoder.layer.3.attention.output.dense Linear weight [768, 576] 442368 442368 0
60 bert.encoder.layer.3.attention.output.dense Linear bias [768] 768 768 0
61 bert.encoder.layer.3.attention.output.LayerNorm LayerNorm weight [768] 768 768 0
62 bert.encoder.layer.3.attention.output.LayerNorm LayerNorm bias [768] 768 768 0
63 bert.encoder.layer.3.intermediate.dense Linear weight [368, 768] 282624 282624 0
64 bert.encoder.layer.3.intermediate.dense Linear bias [368] 368 368 0
65 bert.encoder.layer.3.output.dense Linear weight [768, 368] 282624 282624 0
66 bert.encoder.layer.3.output.dense Linear bias [768] 768 768 0
67 bert.encoder.layer.3.output.LayerNorm LayerNorm weight [768] 768 768 0
68 bert.encoder.layer.3.output.LayerNorm LayerNorm bias [768] 768 768 0
69 bert.encoder.layer.4.attention.self.query Linear weight [576, 768] 442368 442368 0
70 bert.encoder.layer.4.attention.self.query Linear bias [576] 576 576 0
71 bert.encoder.layer.4.attention.self.key Linear weight [576, 768] 442368 442368 0
72 bert.encoder.layer.4.attention.self.key Linear bias [576] 576 576 0
73 bert.encoder.layer.4.attention.self.value Linear weight [576, 768] 442368 442368 0
74 bert.encoder.layer.4.attention.self.value Linear bias [576] 576 576 0
75 bert.encoder.layer.4.attention.output.dense Linear weight [768, 576] 442368 442368 0
76 bert.encoder.layer.4.attention.output.dense Linear bias [768] 768 768 0
77 bert.encoder.layer.4.attention.output.LayerNorm LayerNorm weight [768] 768 768 0
78 bert.encoder.layer.4.attention.output.LayerNorm LayerNorm bias [768] 768 768 0
79 bert.encoder.layer.4.intermediate.dense Linear weight [386, 768] 296448 296448 0
80 bert.encoder.layer.4.intermediate.dense Linear bias [386] 386 386 0
81 bert.encoder.layer.4.output.dense Linear weight [768, 386] 296448 296448 0
82 bert.encoder.layer.4.output.dense Linear bias [768] 768 768 0
83 bert.encoder.layer.4.output.LayerNorm LayerNorm weight [768] 768 768 0
84 bert.encoder.layer.4.output.LayerNorm LayerNorm bias [768] 768 768 0
85 bert.encoder.layer.5.attention.self.query Linear weight [384, 768] 294912 294912 0
86 bert.encoder.layer.5.attention.self.query Linear bias [384] 384 384 0
87 bert.encoder.layer.5.attention.self.key Linear weight [384, 768] 294912 294912 0
88 bert.encoder.layer.5.attention.self.key Linear bias [384] 384 384 0
89 bert.encoder.layer.5.attention.self.value Linear weight [384, 768] 294912 294912 0
90 bert.encoder.layer.5.attention.self.value Linear bias [384] 384 384 0
91 bert.encoder.layer.5.attention.output.dense Linear weight [768, 384] 294912 294912 0
92 bert.encoder.layer.5.attention.output.dense Linear bias [768] 768 768 0
93 bert.encoder.layer.5.attention.output.LayerNorm LayerNorm weight [768] 768 768 0
94 bert.encoder.layer.5.attention.output.LayerNorm LayerNorm bias [768] 768 768 0
95 bert.encoder.layer.5.intermediate.dense Linear weight [336, 768] 258048 258048 0
96 bert.encoder.layer.5.intermediate.dense Linear bias [336] 336 336 0
97 bert.encoder.layer.5.output.dense Linear weight [768, 336] 258048 258048 0
98 bert.encoder.layer.5.output.dense Linear bias [768] 768 768 0
99 bert.encoder.layer.5.output.LayerNorm LayerNorm weight [768] 768 768 0
100 bert.encoder.layer.5.output.LayerNorm LayerNorm bias [768] 768 768 0
101 bert.encoder.layer.6.attention.self.query Linear weight [448, 768] 344064 344064 0
102 bert.encoder.layer.6.attention.self.query Linear bias [448] 448 448 0
103 bert.encoder.layer.6.attention.self.key Linear weight [448, 768] 344064 344064 0
104 bert.encoder.layer.6.attention.self.key Linear bias [448] 448 448 0
105 bert.encoder.layer.6.attention.self.value Linear weight [448, 768] 344064 344064 0
106 bert.encoder.layer.6.attention.self.value Linear bias [448] 448 448 0
107 bert.encoder.layer.6.attention.output.dense Linear weight [768, 448] 344064 344064 0
108 bert.encoder.layer.6.attention.output.dense Linear bias [768] 768 768 0
109 bert.encoder.layer.6.attention.output.LayerNorm LayerNorm weight [768] 768 768 0
110 bert.encoder.layer.6.attention.output.LayerNorm LayerNorm bias [768] 768 768 0
111 bert.encoder.layer.6.intermediate.dense Linear weight [280, 768] 215040 215040 0
112 bert.encoder.layer.6.intermediate.dense Linear bias [280] 280 280 0
113 bert.encoder.layer.6.output.dense Linear weight [768, 280] 215040 215040 0
114 bert.encoder.layer.6.output.dense Linear bias [768] 768 768 0
115 bert.encoder.layer.6.output.LayerNorm LayerNorm weight [768] 768 768 0
116 bert.encoder.layer.6.output.LayerNorm LayerNorm bias [768] 768 768 0
117 bert.encoder.layer.7.attention.self.query Linear weight [448, 768] 344064 344064 0
118 bert.encoder.layer.7.attention.self.query Linear bias [448] 448 448 0
119 bert.encoder.layer.7.attention.self.key Linear weight [448, 768] 344064 344064 0
120 bert.encoder.layer.7.attention.self.key Linear bias [448] 448 448 0
121 bert.encoder.layer.7.attention.self.value Linear weight [448, 768] 344064 344064 0
122 bert.encoder.layer.7.attention.self.value Linear bias [448] 448 448 0
123 bert.encoder.layer.7.attention.output.dense Linear weight [768, 448] 344064 344064 0
124 bert.encoder.layer.7.attention.output.dense Linear bias [768] 768 768 0
125 bert.encoder.layer.7.attention.output.LayerNorm LayerNorm weight [768] 768 768 0
126 bert.encoder.layer.7.attention.output.LayerNorm LayerNorm bias [768] 768 768 0
127 bert.encoder.layer.7.intermediate.dense Linear weight [211, 768] 162048 162048 0
128 bert.encoder.layer.7.intermediate.dense Linear bias [211] 211 211 0
129 bert.encoder.layer.7.output.dense Linear weight [768, 211] 162048 162048 0
130 bert.encoder.layer.7.output.dense Linear bias [768] 768 768 0
131 bert.encoder.layer.7.output.LayerNorm LayerNorm weight [768] 768 768 0
132 bert.encoder.layer.7.output.LayerNorm LayerNorm bias [768] 768 768 0
133 bert.encoder.layer.8.attention.self.query Linear weight [448, 768] 344064 344064 0
134 bert.encoder.layer.8.attention.self.query Linear bias [448] 448 448 0
135 bert.encoder.layer.8.attention.self.key Linear weight [448, 768] 344064 344064 0
136 bert.encoder.layer.8.attention.self.key Linear bias [448] 448 448 0
137 bert.encoder.layer.8.attention.self.value Linear weight [448, 768] 344064 344064 0
138 bert.encoder.layer.8.attention.self.value Linear bias [448] 448 448 0
139 bert.encoder.layer.8.attention.output.dense Linear weight [768, 448] 344064 344064 0
140 bert.encoder.layer.8.attention.output.dense Linear bias [768] 768 768 0
141 bert.encoder.layer.8.attention.output.LayerNorm LayerNorm weight [768] 768 768 0
142 bert.encoder.layer.8.attention.output.LayerNorm LayerNorm bias [768] 768 768 0
143 bert.encoder.layer.8.intermediate.dense Linear weight [108, 768] 82944 82944 0
144 bert.encoder.layer.8.intermediate.dense Linear bias [108] 108 108 0
145 bert.encoder.layer.8.output.dense Linear weight [768, 108] 82944 82944 0
146 bert.encoder.layer.8.output.dense Linear bias [768] 768 768 0
147 bert.encoder.layer.8.output.LayerNorm LayerNorm weight [768] 768 768 0
148 bert.encoder.layer.8.output.LayerNorm LayerNorm bias [768] 768 768 0
149 bert.encoder.layer.9.attention.self.query Linear weight [320, 768] 245760 245760 0
150 bert.encoder.layer.9.attention.self.query Linear bias [320] 320 320 0
151 bert.encoder.layer.9.attention.self.key Linear weight [320, 768] 245760 245760 0
152 bert.encoder.layer.9.attention.self.key Linear bias [320] 320 320 0
153 bert.encoder.layer.9.attention.self.value Linear weight [320, 768] 245760 245760 0
154 bert.encoder.layer.9.attention.self.value Linear bias [320] 320 320 0
155 bert.encoder.layer.9.attention.output.dense Linear weight [768, 320] 245760 245760 0
156 bert.encoder.layer.9.attention.output.dense Linear bias [768] 768 768 0
157 bert.encoder.layer.9.attention.output.LayerNorm LayerNorm weight [768] 768 768 0
158 bert.encoder.layer.9.attention.output.LayerNorm LayerNorm bias [768] 768 768 0
159 bert.encoder.layer.9.intermediate.dense Linear weight [53, 768] 40704 40704 5.96046e-08
160 bert.encoder.layer.9.intermediate.dense Linear bias [53] 53 53 0
161 bert.encoder.layer.9.output.dense Linear weight [768, 53] 40704 40704 5.96046e-08
162 bert.encoder.layer.9.output.dense Linear bias [768] 768 768 0
163 bert.encoder.layer.9.output.LayerNorm LayerNorm weight [768] 768 768 0
164 bert.encoder.layer.9.output.LayerNorm LayerNorm bias [768] 768 768 0
165 bert.encoder.layer.10.attention.self.query Linear weight [384, 768] 294912 294912 0
166 bert.encoder.layer.10.attention.self.query Linear bias [384] 384 384 0
167 bert.encoder.layer.10.attention.self.key Linear weight [384, 768] 294912 294912 0
168 bert.encoder.layer.10.attention.self.key Linear bias [384] 384 384 0
169 bert.encoder.layer.10.attention.self.value Linear weight [384, 768] 294912 294912 0
170 bert.encoder.layer.10.attention.self.value Linear bias [384] 384 384 0
171 bert.encoder.layer.10.attention.output.dense Linear weight [768, 384] 294912 294912 0
172 bert.encoder.layer.10.attention.output.dense Linear bias [768] 768 768 0
173 bert.encoder.layer.10.attention.output.LayerNorm LayerNorm weight [768] 768 768 0
174 bert.encoder.layer.10.attention.output.LayerNorm LayerNorm bias [768] 768 768 0
175 bert.encoder.layer.10.intermediate.dense Linear weight [86, 768] 66048 66048 0
176 bert.encoder.layer.10.intermediate.dense Linear bias [86] 86 86 0
177 bert.encoder.layer.10.output.dense Linear weight [768, 86] 66048 66048 0
178 bert.encoder.layer.10.output.dense Linear bias [768] 768 768 0
179 bert.encoder.layer.10.output.LayerNorm LayerNorm weight [768] 768 768 0
180 bert.encoder.layer.10.output.LayerNorm LayerNorm bias [768] 768 768 0
181 bert.encoder.layer.11.attention.self.query Linear weight [384, 768] 294912 294912 0
182 bert.encoder.layer.11.attention.self.query Linear bias [384] 384 384 0
183 bert.encoder.layer.11.attention.self.key Linear weight [384, 768] 294912 294912 0
184 bert.encoder.layer.11.attention.self.key Linear bias [384] 384 384 0
185 bert.encoder.layer.11.attention.self.value Linear weight [384, 768] 294912 294912 0
186 bert.encoder.layer.11.attention.self.value Linear bias [384] 384 384 0
187 bert.encoder.layer.11.attention.output.dense Linear weight [768, 384] 294912 294912 0
188 bert.encoder.layer.11.attention.output.dense Linear bias [768] 768 768 0
189 bert.encoder.layer.11.attention.output.LayerNorm LayerNorm weight [768] 768 768 0
190 bert.encoder.layer.11.attention.output.LayerNorm LayerNorm bias [768] 768 768 0
191 bert.encoder.layer.11.intermediate.dense Linear weight [105, 768] 80640 80640 0
192 bert.encoder.layer.11.intermediate.dense Linear bias [105] 105 105 0
193 bert.encoder.layer.11.output.dense Linear weight [768, 105] 80640 80640 0
194 bert.encoder.layer.11.output.dense Linear bias [768] 768 768 0
195 bert.encoder.layer.11.output.LayerNorm LayerNorm weight [768] 768 768 0
196 bert.encoder.layer.11.output.LayerNorm LayerNorm bias [768] 768 768 0
197 qa_outputs Linear weight [2, 768] 1536 1536 0
198 qa_outputs Linear bias [2] 2 2 0