XP_linear_layer_sparsity_20M_params_33.64_sparsity.md · vuiseng9/bert-base-squadv1-block-pruning-hybrid at main

	layer_id	layer_type	param_type	shape	nparam	nnz	sparsity
5	bert.encoder.layer.0.attention.self.query	Linear	weight	[320, 768]	245760	135168	0.45
7	bert.encoder.layer.0.attention.self.key	Linear	weight	[320, 768]	245760	149504	0.391667
9	bert.encoder.layer.0.attention.self.value	Linear	weight	[320, 768]	245760	173056	0.295833
11	bert.encoder.layer.0.attention.output.dense	Linear	weight	[768, 320]	245760	181248	0.2625
15	bert.encoder.layer.0.intermediate.dense	Linear	weight	[185, 768]	142080	142080	0
17	bert.encoder.layer.0.output.dense	Linear	weight	[768, 185]	142080	142080	0
21	bert.encoder.layer.1.attention.self.query	Linear	weight	[320, 768]	245760	175104	0.2875
23	bert.encoder.layer.1.attention.self.key	Linear	weight	[320, 768]	245760	177152	0.279167
25	bert.encoder.layer.1.attention.self.value	Linear	weight	[320, 768]	245760	166912	0.320833
27	bert.encoder.layer.1.attention.output.dense	Linear	weight	[768, 320]	245760	167936	0.316667
31	bert.encoder.layer.1.intermediate.dense	Linear	weight	[315, 768]	241920	241920	0
33	bert.encoder.layer.1.output.dense	Linear	weight	[768, 315]	241920	241920	0
37	bert.encoder.layer.2.attention.self.query	Linear	weight	[576, 768]	442368	285696	0.354167
39	bert.encoder.layer.2.attention.self.key	Linear	weight	[576, 768]	442368	297984	0.326389
41	bert.encoder.layer.2.attention.self.value	Linear	weight	[576, 768]	442368	226304	0.488426
43	bert.encoder.layer.2.attention.output.dense	Linear	weight	[768, 576]	442368	237568	0.462963
47	bert.encoder.layer.2.intermediate.dense	Linear	weight	[339, 768]	260352	260352	0
49	bert.encoder.layer.2.output.dense	Linear	weight	[768, 339]	260352	260352	0
53	bert.encoder.layer.3.attention.self.query	Linear	weight	[576, 768]	442368	277504	0.372685
55	bert.encoder.layer.3.attention.self.key	Linear	weight	[576, 768]	442368	303104	0.314815
57	bert.encoder.layer.3.attention.self.value	Linear	weight	[576, 768]	442368	297984	0.326389
59	bert.encoder.layer.3.attention.output.dense	Linear	weight	[768, 576]	442368	308224	0.303241
63	bert.encoder.layer.3.intermediate.dense	Linear	weight	[368, 768]	282624	282624	0
65	bert.encoder.layer.3.output.dense	Linear	weight	[768, 368]	282624	282624	0
69	bert.encoder.layer.4.attention.self.query	Linear	weight	[576, 768]	442368	291840	0.340278
71	bert.encoder.layer.4.attention.self.key	Linear	weight	[576, 768]	442368	310272	0.298611
73	bert.encoder.layer.4.attention.self.value	Linear	weight	[576, 768]	442368	272384	0.384259
75	bert.encoder.layer.4.attention.output.dense	Linear	weight	[768, 576]	442368	263168	0.405093
79	bert.encoder.layer.4.intermediate.dense	Linear	weight	[386, 768]	296448	296448	0
81	bert.encoder.layer.4.output.dense	Linear	weight	[768, 386]	296448	296448	0
85	bert.encoder.layer.5.attention.self.query	Linear	weight	[384, 768]	294912	171008	0.420139
87	bert.encoder.layer.5.attention.self.key	Linear	weight	[384, 768]	294912	205824	0.302083
89	bert.encoder.layer.5.attention.self.value	Linear	weight	[384, 768]	294912	217088	0.263889
91	bert.encoder.layer.5.attention.output.dense	Linear	weight	[768, 384]	294912	223232	0.243056
95	bert.encoder.layer.5.intermediate.dense	Linear	weight	[336, 768]	258048	258048	0
97	bert.encoder.layer.5.output.dense	Linear	weight	[768, 336]	258048	258048	0
101	bert.encoder.layer.6.attention.self.query	Linear	weight	[448, 768]	344064	192512	0.440476
103	bert.encoder.layer.6.attention.self.key	Linear	weight	[448, 768]	344064	224256	0.348214
105	bert.encoder.layer.6.attention.self.value	Linear	weight	[448, 768]	344064	209920	0.389881
107	bert.encoder.layer.6.attention.output.dense	Linear	weight	[768, 448]	344064	199680	0.419643
111	bert.encoder.layer.6.intermediate.dense	Linear	weight	[280, 768]	215040	215040	0
113	bert.encoder.layer.6.output.dense	Linear	weight	[768, 280]	215040	215040	0
117	bert.encoder.layer.7.attention.self.query	Linear	weight	[448, 768]	344064	201728	0.41369
119	bert.encoder.layer.7.attention.self.key	Linear	weight	[448, 768]	344064	237568	0.309524
121	bert.encoder.layer.7.attention.self.value	Linear	weight	[448, 768]	344064	218112	0.366071
123	bert.encoder.layer.7.attention.output.dense	Linear	weight	[768, 448]	344064	202752	0.410714
127	bert.encoder.layer.7.intermediate.dense	Linear	weight	[211, 768]	162048	162048	0
129	bert.encoder.layer.7.output.dense	Linear	weight	[768, 211]	162048	162048	0
133	bert.encoder.layer.8.attention.self.query	Linear	weight	[448, 768]	344064	186368	0.458333
135	bert.encoder.layer.8.attention.self.key	Linear	weight	[448, 768]	344064	197632	0.425595
137	bert.encoder.layer.8.attention.self.value	Linear	weight	[448, 768]	344064	154624	0.550595
139	bert.encoder.layer.8.attention.output.dense	Linear	weight	[768, 448]	344064	148480	0.568452
143	bert.encoder.layer.8.intermediate.dense	Linear	weight	[108, 768]	82944	82944	0
145	bert.encoder.layer.8.output.dense	Linear	weight	[768, 108]	82944	82944	0
149	bert.encoder.layer.9.attention.self.query	Linear	weight	[320, 768]	245760	144384	0.4125
151	bert.encoder.layer.9.attention.self.key	Linear	weight	[320, 768]	245760	155648	0.366667
153	bert.encoder.layer.9.attention.self.value	Linear	weight	[320, 768]	245760	63488	0.741667
155	bert.encoder.layer.9.attention.output.dense	Linear	weight	[768, 320]	245760	65536	0.733333
159	bert.encoder.layer.9.intermediate.dense	Linear	weight	[53, 768]	40704	40704	5.96046e-08
161	bert.encoder.layer.9.output.dense	Linear	weight	[768, 53]	40704	40704	5.96046e-08
165	bert.encoder.layer.10.attention.self.query	Linear	weight	[384, 768]	294912	158720	0.461806
167	bert.encoder.layer.10.attention.self.key	Linear	weight	[384, 768]	294912	158720	0.461806
169	bert.encoder.layer.10.attention.self.value	Linear	weight	[384, 768]	294912	77824	0.736111
171	bert.encoder.layer.10.attention.output.dense	Linear	weight	[768, 384]	294912	78848	0.732639
175	bert.encoder.layer.10.intermediate.dense	Linear	weight	[86, 768]	66048	66048	0
177	bert.encoder.layer.10.output.dense	Linear	weight	[768, 86]	66048	66048	0
181	bert.encoder.layer.11.attention.self.query	Linear	weight	[384, 768]	294912	107520	0.635417
183	bert.encoder.layer.11.attention.self.key	Linear	weight	[384, 768]	294912	118784	0.597222
185	bert.encoder.layer.11.attention.self.value	Linear	weight	[384, 768]	294912	62464	0.788194
187	bert.encoder.layer.11.attention.output.dense	Linear	weight	[768, 384]	294912	54272	0.815972
191	bert.encoder.layer.11.intermediate.dense	Linear	weight	[105, 768]	80640	80640	0
193	bert.encoder.layer.11.output.dense	Linear	weight	[768, 105]	80640	80640	0