bert-base-squadv1-pruneofa-90pc-bt / layer_wise_sparsity_global_rate_70.20.csv
Chua, Vui Seng
Add collaterals
437729a
,layer_id,layer_type,param_type,shape,nparam,nnz,sparsity
0,bert.embeddings.word_embeddings,Embedding,weight,"[30522, 768]",23440896,23440896,0.0
1,bert.embeddings.position_embeddings,Embedding,weight,"[512, 768]",393216,393216,0.0
2,bert.embeddings.token_type_embeddings,Embedding,weight,"[2, 768]",1536,1536,0.0
3,bert.embeddings.LayerNorm,LayerNorm,weight,[768],768,768,0.0
4,bert.embeddings.LayerNorm,LayerNorm,bias,[768],768,768,0.0
5,bert.encoder.layer.0.attention.self.query,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809
6,bert.encoder.layer.0.attention.self.query,Linear,bias,[768],768,768,0.0
7,bert.encoder.layer.0.attention.self.key,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809
8,bert.encoder.layer.0.attention.self.key,Linear,bias,[768],768,768,0.0
9,bert.encoder.layer.0.attention.self.value,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809
10,bert.encoder.layer.0.attention.self.value,Linear,bias,[768],768,768,0.0
11,bert.encoder.layer.0.attention.output.dense,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809
12,bert.encoder.layer.0.attention.output.dense,Linear,bias,[768],768,768,0.0
13,bert.encoder.layer.0.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
14,bert.encoder.layer.0.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
15,bert.encoder.layer.0.intermediate.dense,Linear,weight,"[3072, 768]",2359296,235930,0.8999998569488525
16,bert.encoder.layer.0.intermediate.dense,Linear,bias,[3072],3072,3072,0.0
17,bert.encoder.layer.0.output.dense,Linear,weight,"[768, 3072]",2359296,235930,0.8999998569488525
18,bert.encoder.layer.0.output.dense,Linear,bias,[768],768,768,0.0
19,bert.encoder.layer.0.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
20,bert.encoder.layer.0.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
21,bert.encoder.layer.1.attention.self.query,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809
22,bert.encoder.layer.1.attention.self.query,Linear,bias,[768],768,768,0.0
23,bert.encoder.layer.1.attention.self.key,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809
24,bert.encoder.layer.1.attention.self.key,Linear,bias,[768],768,768,0.0
25,bert.encoder.layer.1.attention.self.value,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809
26,bert.encoder.layer.1.attention.self.value,Linear,bias,[768],768,768,0.0
27,bert.encoder.layer.1.attention.output.dense,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809
28,bert.encoder.layer.1.attention.output.dense,Linear,bias,[768],768,768,0.0
29,bert.encoder.layer.1.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
30,bert.encoder.layer.1.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
31,bert.encoder.layer.1.intermediate.dense,Linear,weight,"[3072, 768]",2359296,235930,0.8999998569488525
32,bert.encoder.layer.1.intermediate.dense,Linear,bias,[3072],3072,3072,0.0
33,bert.encoder.layer.1.output.dense,Linear,weight,"[768, 3072]",2359296,235930,0.8999998569488525
34,bert.encoder.layer.1.output.dense,Linear,bias,[768],768,768,0.0
35,bert.encoder.layer.1.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
36,bert.encoder.layer.1.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
37,bert.encoder.layer.2.attention.self.query,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809
38,bert.encoder.layer.2.attention.self.query,Linear,bias,[768],768,768,0.0
39,bert.encoder.layer.2.attention.self.key,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809
40,bert.encoder.layer.2.attention.self.key,Linear,bias,[768],768,768,0.0
41,bert.encoder.layer.2.attention.self.value,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809
42,bert.encoder.layer.2.attention.self.value,Linear,bias,[768],768,768,0.0
43,bert.encoder.layer.2.attention.output.dense,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809
44,bert.encoder.layer.2.attention.output.dense,Linear,bias,[768],768,768,0.0
45,bert.encoder.layer.2.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
46,bert.encoder.layer.2.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
47,bert.encoder.layer.2.intermediate.dense,Linear,weight,"[3072, 768]",2359296,235930,0.8999998569488525
48,bert.encoder.layer.2.intermediate.dense,Linear,bias,[3072],3072,3072,0.0
49,bert.encoder.layer.2.output.dense,Linear,weight,"[768, 3072]",2359296,235930,0.8999998569488525
50,bert.encoder.layer.2.output.dense,Linear,bias,[768],768,768,0.0
51,bert.encoder.layer.2.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
52,bert.encoder.layer.2.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
53,bert.encoder.layer.3.attention.self.query,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809
54,bert.encoder.layer.3.attention.self.query,Linear,bias,[768],768,768,0.0
55,bert.encoder.layer.3.attention.self.key,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809
56,bert.encoder.layer.3.attention.self.key,Linear,bias,[768],768,768,0.0
57,bert.encoder.layer.3.attention.self.value,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809
58,bert.encoder.layer.3.attention.self.value,Linear,bias,[768],768,768,0.0
59,bert.encoder.layer.3.attention.output.dense,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809
60,bert.encoder.layer.3.attention.output.dense,Linear,bias,[768],768,768,0.0
61,bert.encoder.layer.3.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
62,bert.encoder.layer.3.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
63,bert.encoder.layer.3.intermediate.dense,Linear,weight,"[3072, 768]",2359296,235930,0.8999998569488525
64,bert.encoder.layer.3.intermediate.dense,Linear,bias,[3072],3072,3072,0.0
65,bert.encoder.layer.3.output.dense,Linear,weight,"[768, 3072]",2359296,235930,0.8999998569488525
66,bert.encoder.layer.3.output.dense,Linear,bias,[768],768,768,0.0
67,bert.encoder.layer.3.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
68,bert.encoder.layer.3.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
69,bert.encoder.layer.4.attention.self.query,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809
70,bert.encoder.layer.4.attention.self.query,Linear,bias,[768],768,768,0.0
71,bert.encoder.layer.4.attention.self.key,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809
72,bert.encoder.layer.4.attention.self.key,Linear,bias,[768],768,768,0.0
73,bert.encoder.layer.4.attention.self.value,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809
74,bert.encoder.layer.4.attention.self.value,Linear,bias,[768],768,768,0.0
75,bert.encoder.layer.4.attention.output.dense,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809
76,bert.encoder.layer.4.attention.output.dense,Linear,bias,[768],768,768,0.0
77,bert.encoder.layer.4.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
78,bert.encoder.layer.4.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
79,bert.encoder.layer.4.intermediate.dense,Linear,weight,"[3072, 768]",2359296,235930,0.8999998569488525
80,bert.encoder.layer.4.intermediate.dense,Linear,bias,[3072],3072,3072,0.0
81,bert.encoder.layer.4.output.dense,Linear,weight,"[768, 3072]",2359296,235930,0.8999998569488525
82,bert.encoder.layer.4.output.dense,Linear,bias,[768],768,768,0.0
83,bert.encoder.layer.4.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
84,bert.encoder.layer.4.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
85,bert.encoder.layer.5.attention.self.query,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809
86,bert.encoder.layer.5.attention.self.query,Linear,bias,[768],768,768,0.0
87,bert.encoder.layer.5.attention.self.key,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809
88,bert.encoder.layer.5.attention.self.key,Linear,bias,[768],768,768,0.0
89,bert.encoder.layer.5.attention.self.value,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809
90,bert.encoder.layer.5.attention.self.value,Linear,bias,[768],768,768,0.0
91,bert.encoder.layer.5.attention.output.dense,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809
92,bert.encoder.layer.5.attention.output.dense,Linear,bias,[768],768,768,0.0
93,bert.encoder.layer.5.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
94,bert.encoder.layer.5.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
95,bert.encoder.layer.5.intermediate.dense,Linear,weight,"[3072, 768]",2359296,235930,0.8999998569488525
96,bert.encoder.layer.5.intermediate.dense,Linear,bias,[3072],3072,3072,0.0
97,bert.encoder.layer.5.output.dense,Linear,weight,"[768, 3072]",2359296,235930,0.8999998569488525
98,bert.encoder.layer.5.output.dense,Linear,bias,[768],768,768,0.0
99,bert.encoder.layer.5.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
100,bert.encoder.layer.5.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
101,bert.encoder.layer.6.attention.self.query,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809
102,bert.encoder.layer.6.attention.self.query,Linear,bias,[768],768,768,0.0
103,bert.encoder.layer.6.attention.self.key,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809
104,bert.encoder.layer.6.attention.self.key,Linear,bias,[768],768,768,0.0
105,bert.encoder.layer.6.attention.self.value,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809
106,bert.encoder.layer.6.attention.self.value,Linear,bias,[768],768,768,0.0
107,bert.encoder.layer.6.attention.output.dense,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809
108,bert.encoder.layer.6.attention.output.dense,Linear,bias,[768],768,768,0.0
109,bert.encoder.layer.6.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
110,bert.encoder.layer.6.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
111,bert.encoder.layer.6.intermediate.dense,Linear,weight,"[3072, 768]",2359296,235930,0.8999998569488525
112,bert.encoder.layer.6.intermediate.dense,Linear,bias,[3072],3072,3072,0.0
113,bert.encoder.layer.6.output.dense,Linear,weight,"[768, 3072]",2359296,235930,0.8999998569488525
114,bert.encoder.layer.6.output.dense,Linear,bias,[768],768,768,0.0
115,bert.encoder.layer.6.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
116,bert.encoder.layer.6.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
117,bert.encoder.layer.7.attention.self.query,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809
118,bert.encoder.layer.7.attention.self.query,Linear,bias,[768],768,768,0.0
119,bert.encoder.layer.7.attention.self.key,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809
120,bert.encoder.layer.7.attention.self.key,Linear,bias,[768],768,768,0.0
121,bert.encoder.layer.7.attention.self.value,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809
122,bert.encoder.layer.7.attention.self.value,Linear,bias,[768],768,768,0.0
123,bert.encoder.layer.7.attention.output.dense,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809
124,bert.encoder.layer.7.attention.output.dense,Linear,bias,[768],768,768,0.0
125,bert.encoder.layer.7.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
126,bert.encoder.layer.7.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
127,bert.encoder.layer.7.intermediate.dense,Linear,weight,"[3072, 768]",2359296,235930,0.8999998569488525
128,bert.encoder.layer.7.intermediate.dense,Linear,bias,[3072],3072,3072,0.0
129,bert.encoder.layer.7.output.dense,Linear,weight,"[768, 3072]",2359296,235929,0.900000274181366
130,bert.encoder.layer.7.output.dense,Linear,bias,[768],768,768,0.0
131,bert.encoder.layer.7.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
132,bert.encoder.layer.7.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
133,bert.encoder.layer.8.attention.self.query,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809
134,bert.encoder.layer.8.attention.self.query,Linear,bias,[768],768,768,0.0
135,bert.encoder.layer.8.attention.self.key,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809
136,bert.encoder.layer.8.attention.self.key,Linear,bias,[768],768,768,0.0
137,bert.encoder.layer.8.attention.self.value,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809
138,bert.encoder.layer.8.attention.self.value,Linear,bias,[768],768,768,0.0
139,bert.encoder.layer.8.attention.output.dense,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809
140,bert.encoder.layer.8.attention.output.dense,Linear,bias,[768],768,768,0.0
141,bert.encoder.layer.8.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
142,bert.encoder.layer.8.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
143,bert.encoder.layer.8.intermediate.dense,Linear,weight,"[3072, 768]",2359296,235930,0.8999998569488525
144,bert.encoder.layer.8.intermediate.dense,Linear,bias,[3072],3072,3072,0.0
145,bert.encoder.layer.8.output.dense,Linear,weight,"[768, 3072]",2359296,235930,0.8999998569488525
146,bert.encoder.layer.8.output.dense,Linear,bias,[768],768,768,0.0
147,bert.encoder.layer.8.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
148,bert.encoder.layer.8.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
149,bert.encoder.layer.9.attention.self.query,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809
150,bert.encoder.layer.9.attention.self.query,Linear,bias,[768],768,768,0.0
151,bert.encoder.layer.9.attention.self.key,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809
152,bert.encoder.layer.9.attention.self.key,Linear,bias,[768],768,768,0.0
153,bert.encoder.layer.9.attention.self.value,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809
154,bert.encoder.layer.9.attention.self.value,Linear,bias,[768],768,768,0.0
155,bert.encoder.layer.9.attention.output.dense,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809
156,bert.encoder.layer.9.attention.output.dense,Linear,bias,[768],768,768,0.0
157,bert.encoder.layer.9.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
158,bert.encoder.layer.9.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
159,bert.encoder.layer.9.intermediate.dense,Linear,weight,"[3072, 768]",2359296,235930,0.8999998569488525
160,bert.encoder.layer.9.intermediate.dense,Linear,bias,[3072],3072,3072,0.0
161,bert.encoder.layer.9.output.dense,Linear,weight,"[768, 3072]",2359296,235930,0.8999998569488525
162,bert.encoder.layer.9.output.dense,Linear,bias,[768],768,768,0.0
163,bert.encoder.layer.9.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
164,bert.encoder.layer.9.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
165,bert.encoder.layer.10.attention.self.query,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809
166,bert.encoder.layer.10.attention.self.query,Linear,bias,[768],768,768,0.0
167,bert.encoder.layer.10.attention.self.key,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809
168,bert.encoder.layer.10.attention.self.key,Linear,bias,[768],768,768,0.0
169,bert.encoder.layer.10.attention.self.value,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809
170,bert.encoder.layer.10.attention.self.value,Linear,bias,[768],768,768,0.0
171,bert.encoder.layer.10.attention.output.dense,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809
172,bert.encoder.layer.10.attention.output.dense,Linear,bias,[768],768,768,0.0
173,bert.encoder.layer.10.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
174,bert.encoder.layer.10.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
175,bert.encoder.layer.10.intermediate.dense,Linear,weight,"[3072, 768]",2359296,235930,0.8999998569488525
176,bert.encoder.layer.10.intermediate.dense,Linear,bias,[3072],3072,3072,0.0
177,bert.encoder.layer.10.output.dense,Linear,weight,"[768, 3072]",2359296,235929,0.900000274181366
178,bert.encoder.layer.10.output.dense,Linear,bias,[768],768,768,0.0
179,bert.encoder.layer.10.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
180,bert.encoder.layer.10.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
181,bert.encoder.layer.11.attention.self.query,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809
182,bert.encoder.layer.11.attention.self.query,Linear,bias,[768],768,768,0.0
183,bert.encoder.layer.11.attention.self.key,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809
184,bert.encoder.layer.11.attention.self.key,Linear,bias,[768],768,768,0.0
185,bert.encoder.layer.11.attention.self.value,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809
186,bert.encoder.layer.11.attention.self.value,Linear,bias,[768],768,768,0.0
187,bert.encoder.layer.11.attention.output.dense,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809
188,bert.encoder.layer.11.attention.output.dense,Linear,bias,[768],768,768,0.0
189,bert.encoder.layer.11.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
190,bert.encoder.layer.11.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
191,bert.encoder.layer.11.intermediate.dense,Linear,weight,"[3072, 768]",2359296,235930,0.8999998569488525
192,bert.encoder.layer.11.intermediate.dense,Linear,bias,[3072],3072,3072,0.0
193,bert.encoder.layer.11.output.dense,Linear,weight,"[768, 3072]",2359296,235930,0.8999998569488525
194,bert.encoder.layer.11.output.dense,Linear,bias,[768],768,768,0.0
195,bert.encoder.layer.11.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
196,bert.encoder.layer.11.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
197,qa_outputs,Linear,weight,"[2, 768]",1536,1536,0.0
198,qa_outputs,Linear,bias,[2],2,2,0.0