bert-base-squadv1-pruneofa-90pc-bt-qat-lt / layer_wise_sparsity_global_rate_70.20.csv
Chua, Vui Seng
Add collaterals
0e66ada
,layer_id,layer_type,param_type,shape,nparam,nnz,sparsity
0,nncf_module.bert.embeddings.word_embeddings,NNCFEmbedding,weight,"[30522, 768]",23440896,23440896,0.0
1,nncf_module.bert.embeddings.position_embeddings,NNCFEmbedding,weight,"[512, 768]",393216,393216,0.0
2,nncf_module.bert.embeddings.token_type_embeddings,NNCFEmbedding,weight,"[2, 768]",1536,1536,0.0
3,nncf_module.bert.embeddings.LayerNorm,LayerNorm,weight,[768],768,768,0.0
4,nncf_module.bert.embeddings.LayerNorm,LayerNorm,bias,[768],768,768,0.0
5,nncf_module.bert.encoder.layer.0.attention.self.query,NNCFLinear,weight,"[768, 768]",589824,58983,0.8999989628791809
6,nncf_module.bert.encoder.layer.0.attention.self.query,NNCFLinear,bias,[768],768,768,0.0
7,nncf_module.bert.encoder.layer.0.attention.self.key,NNCFLinear,weight,"[768, 768]",589824,58983,0.8999989628791809
8,nncf_module.bert.encoder.layer.0.attention.self.key,NNCFLinear,bias,[768],768,768,0.0
9,nncf_module.bert.encoder.layer.0.attention.self.value,NNCFLinear,weight,"[768, 768]",589824,58983,0.8999989628791809
10,nncf_module.bert.encoder.layer.0.attention.self.value,NNCFLinear,bias,[768],768,768,0.0
11,nncf_module.bert.encoder.layer.0.attention.output.dense,NNCFLinear,weight,"[768, 768]",589824,58983,0.8999989628791809
12,nncf_module.bert.encoder.layer.0.attention.output.dense,NNCFLinear,bias,[768],768,768,0.0
13,nncf_module.bert.encoder.layer.0.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
14,nncf_module.bert.encoder.layer.0.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
15,nncf_module.bert.encoder.layer.0.intermediate.dense,NNCFLinear,weight,"[3072, 768]",2359296,235930,0.8999998569488525
16,nncf_module.bert.encoder.layer.0.intermediate.dense,NNCFLinear,bias,[3072],3072,3072,0.0
17,nncf_module.bert.encoder.layer.0.output.dense,NNCFLinear,weight,"[768, 3072]",2359296,235930,0.8999998569488525
18,nncf_module.bert.encoder.layer.0.output.dense,NNCFLinear,bias,[768],768,768,0.0
19,nncf_module.bert.encoder.layer.0.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
20,nncf_module.bert.encoder.layer.0.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
21,nncf_module.bert.encoder.layer.1.attention.self.query,NNCFLinear,weight,"[768, 768]",589824,58983,0.8999989628791809
22,nncf_module.bert.encoder.layer.1.attention.self.query,NNCFLinear,bias,[768],768,768,0.0
23,nncf_module.bert.encoder.layer.1.attention.self.key,NNCFLinear,weight,"[768, 768]",589824,58983,0.8999989628791809
24,nncf_module.bert.encoder.layer.1.attention.self.key,NNCFLinear,bias,[768],768,768,0.0
25,nncf_module.bert.encoder.layer.1.attention.self.value,NNCFLinear,weight,"[768, 768]",589824,58983,0.8999989628791809
26,nncf_module.bert.encoder.layer.1.attention.self.value,NNCFLinear,bias,[768],768,768,0.0
27,nncf_module.bert.encoder.layer.1.attention.output.dense,NNCFLinear,weight,"[768, 768]",589824,58983,0.8999989628791809
28,nncf_module.bert.encoder.layer.1.attention.output.dense,NNCFLinear,bias,[768],768,768,0.0
29,nncf_module.bert.encoder.layer.1.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
30,nncf_module.bert.encoder.layer.1.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
31,nncf_module.bert.encoder.layer.1.intermediate.dense,NNCFLinear,weight,"[3072, 768]",2359296,235930,0.8999998569488525
32,nncf_module.bert.encoder.layer.1.intermediate.dense,NNCFLinear,bias,[3072],3072,3072,0.0
33,nncf_module.bert.encoder.layer.1.output.dense,NNCFLinear,weight,"[768, 3072]",2359296,235930,0.8999998569488525
34,nncf_module.bert.encoder.layer.1.output.dense,NNCFLinear,bias,[768],768,768,0.0
35,nncf_module.bert.encoder.layer.1.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
36,nncf_module.bert.encoder.layer.1.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
37,nncf_module.bert.encoder.layer.2.attention.self.query,NNCFLinear,weight,"[768, 768]",589824,58983,0.8999989628791809
38,nncf_module.bert.encoder.layer.2.attention.self.query,NNCFLinear,bias,[768],768,768,0.0
39,nncf_module.bert.encoder.layer.2.attention.self.key,NNCFLinear,weight,"[768, 768]",589824,58983,0.8999989628791809
40,nncf_module.bert.encoder.layer.2.attention.self.key,NNCFLinear,bias,[768],768,768,0.0
41,nncf_module.bert.encoder.layer.2.attention.self.value,NNCFLinear,weight,"[768, 768]",589824,58983,0.8999989628791809
42,nncf_module.bert.encoder.layer.2.attention.self.value,NNCFLinear,bias,[768],768,768,0.0
43,nncf_module.bert.encoder.layer.2.attention.output.dense,NNCFLinear,weight,"[768, 768]",589824,58983,0.8999989628791809
44,nncf_module.bert.encoder.layer.2.attention.output.dense,NNCFLinear,bias,[768],768,768,0.0
45,nncf_module.bert.encoder.layer.2.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
46,nncf_module.bert.encoder.layer.2.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
47,nncf_module.bert.encoder.layer.2.intermediate.dense,NNCFLinear,weight,"[3072, 768]",2359296,235930,0.8999998569488525
48,nncf_module.bert.encoder.layer.2.intermediate.dense,NNCFLinear,bias,[3072],3072,3072,0.0
49,nncf_module.bert.encoder.layer.2.output.dense,NNCFLinear,weight,"[768, 3072]",2359296,235930,0.8999998569488525
50,nncf_module.bert.encoder.layer.2.output.dense,NNCFLinear,bias,[768],768,768,0.0
51,nncf_module.bert.encoder.layer.2.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
52,nncf_module.bert.encoder.layer.2.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
53,nncf_module.bert.encoder.layer.3.attention.self.query,NNCFLinear,weight,"[768, 768]",589824,58983,0.8999989628791809
54,nncf_module.bert.encoder.layer.3.attention.self.query,NNCFLinear,bias,[768],768,768,0.0
55,nncf_module.bert.encoder.layer.3.attention.self.key,NNCFLinear,weight,"[768, 768]",589824,58983,0.8999989628791809
56,nncf_module.bert.encoder.layer.3.attention.self.key,NNCFLinear,bias,[768],768,768,0.0
57,nncf_module.bert.encoder.layer.3.attention.self.value,NNCFLinear,weight,"[768, 768]",589824,58983,0.8999989628791809
58,nncf_module.bert.encoder.layer.3.attention.self.value,NNCFLinear,bias,[768],768,768,0.0
59,nncf_module.bert.encoder.layer.3.attention.output.dense,NNCFLinear,weight,"[768, 768]",589824,58983,0.8999989628791809
60,nncf_module.bert.encoder.layer.3.attention.output.dense,NNCFLinear,bias,[768],768,768,0.0
61,nncf_module.bert.encoder.layer.3.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
62,nncf_module.bert.encoder.layer.3.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
63,nncf_module.bert.encoder.layer.3.intermediate.dense,NNCFLinear,weight,"[3072, 768]",2359296,235930,0.8999998569488525
64,nncf_module.bert.encoder.layer.3.intermediate.dense,NNCFLinear,bias,[3072],3072,3072,0.0
65,nncf_module.bert.encoder.layer.3.output.dense,NNCFLinear,weight,"[768, 3072]",2359296,235930,0.8999998569488525
66,nncf_module.bert.encoder.layer.3.output.dense,NNCFLinear,bias,[768],768,768,0.0
67,nncf_module.bert.encoder.layer.3.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
68,nncf_module.bert.encoder.layer.3.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
69,nncf_module.bert.encoder.layer.4.attention.self.query,NNCFLinear,weight,"[768, 768]",589824,58983,0.8999989628791809
70,nncf_module.bert.encoder.layer.4.attention.self.query,NNCFLinear,bias,[768],768,768,0.0
71,nncf_module.bert.encoder.layer.4.attention.self.key,NNCFLinear,weight,"[768, 768]",589824,58983,0.8999989628791809
72,nncf_module.bert.encoder.layer.4.attention.self.key,NNCFLinear,bias,[768],768,768,0.0
73,nncf_module.bert.encoder.layer.4.attention.self.value,NNCFLinear,weight,"[768, 768]",589824,58983,0.8999989628791809
74,nncf_module.bert.encoder.layer.4.attention.self.value,NNCFLinear,bias,[768],768,768,0.0
75,nncf_module.bert.encoder.layer.4.attention.output.dense,NNCFLinear,weight,"[768, 768]",589824,58983,0.8999989628791809
76,nncf_module.bert.encoder.layer.4.attention.output.dense,NNCFLinear,bias,[768],768,768,0.0
77,nncf_module.bert.encoder.layer.4.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
78,nncf_module.bert.encoder.layer.4.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
79,nncf_module.bert.encoder.layer.4.intermediate.dense,NNCFLinear,weight,"[3072, 768]",2359296,235930,0.8999998569488525
80,nncf_module.bert.encoder.layer.4.intermediate.dense,NNCFLinear,bias,[3072],3072,3072,0.0
81,nncf_module.bert.encoder.layer.4.output.dense,NNCFLinear,weight,"[768, 3072]",2359296,235930,0.8999998569488525
82,nncf_module.bert.encoder.layer.4.output.dense,NNCFLinear,bias,[768],768,768,0.0
83,nncf_module.bert.encoder.layer.4.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
84,nncf_module.bert.encoder.layer.4.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
85,nncf_module.bert.encoder.layer.5.attention.self.query,NNCFLinear,weight,"[768, 768]",589824,58983,0.8999989628791809
86,nncf_module.bert.encoder.layer.5.attention.self.query,NNCFLinear,bias,[768],768,768,0.0
87,nncf_module.bert.encoder.layer.5.attention.self.key,NNCFLinear,weight,"[768, 768]",589824,58983,0.8999989628791809
88,nncf_module.bert.encoder.layer.5.attention.self.key,NNCFLinear,bias,[768],768,768,0.0
89,nncf_module.bert.encoder.layer.5.attention.self.value,NNCFLinear,weight,"[768, 768]",589824,58983,0.8999989628791809
90,nncf_module.bert.encoder.layer.5.attention.self.value,NNCFLinear,bias,[768],768,768,0.0
91,nncf_module.bert.encoder.layer.5.attention.output.dense,NNCFLinear,weight,"[768, 768]",589824,58983,0.8999989628791809
92,nncf_module.bert.encoder.layer.5.attention.output.dense,NNCFLinear,bias,[768],768,768,0.0
93,nncf_module.bert.encoder.layer.5.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
94,nncf_module.bert.encoder.layer.5.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
95,nncf_module.bert.encoder.layer.5.intermediate.dense,NNCFLinear,weight,"[3072, 768]",2359296,235930,0.8999998569488525
96,nncf_module.bert.encoder.layer.5.intermediate.dense,NNCFLinear,bias,[3072],3072,3072,0.0
97,nncf_module.bert.encoder.layer.5.output.dense,NNCFLinear,weight,"[768, 3072]",2359296,235930,0.8999998569488525
98,nncf_module.bert.encoder.layer.5.output.dense,NNCFLinear,bias,[768],768,768,0.0
99,nncf_module.bert.encoder.layer.5.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
100,nncf_module.bert.encoder.layer.5.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
101,nncf_module.bert.encoder.layer.6.attention.self.query,NNCFLinear,weight,"[768, 768]",589824,58983,0.8999989628791809
102,nncf_module.bert.encoder.layer.6.attention.self.query,NNCFLinear,bias,[768],768,768,0.0
103,nncf_module.bert.encoder.layer.6.attention.self.key,NNCFLinear,weight,"[768, 768]",589824,58983,0.8999989628791809
104,nncf_module.bert.encoder.layer.6.attention.self.key,NNCFLinear,bias,[768],768,768,0.0
105,nncf_module.bert.encoder.layer.6.attention.self.value,NNCFLinear,weight,"[768, 768]",589824,58983,0.8999989628791809
106,nncf_module.bert.encoder.layer.6.attention.self.value,NNCFLinear,bias,[768],768,768,0.0
107,nncf_module.bert.encoder.layer.6.attention.output.dense,NNCFLinear,weight,"[768, 768]",589824,58983,0.8999989628791809
108,nncf_module.bert.encoder.layer.6.attention.output.dense,NNCFLinear,bias,[768],768,768,0.0
109,nncf_module.bert.encoder.layer.6.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
110,nncf_module.bert.encoder.layer.6.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
111,nncf_module.bert.encoder.layer.6.intermediate.dense,NNCFLinear,weight,"[3072, 768]",2359296,235930,0.8999998569488525
112,nncf_module.bert.encoder.layer.6.intermediate.dense,NNCFLinear,bias,[3072],3072,3072,0.0
113,nncf_module.bert.encoder.layer.6.output.dense,NNCFLinear,weight,"[768, 3072]",2359296,235930,0.8999998569488525
114,nncf_module.bert.encoder.layer.6.output.dense,NNCFLinear,bias,[768],768,768,0.0
115,nncf_module.bert.encoder.layer.6.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
116,nncf_module.bert.encoder.layer.6.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
117,nncf_module.bert.encoder.layer.7.attention.self.query,NNCFLinear,weight,"[768, 768]",589824,58983,0.8999989628791809
118,nncf_module.bert.encoder.layer.7.attention.self.query,NNCFLinear,bias,[768],768,768,0.0
119,nncf_module.bert.encoder.layer.7.attention.self.key,NNCFLinear,weight,"[768, 768]",589824,58983,0.8999989628791809
120,nncf_module.bert.encoder.layer.7.attention.self.key,NNCFLinear,bias,[768],768,768,0.0
121,nncf_module.bert.encoder.layer.7.attention.self.value,NNCFLinear,weight,"[768, 768]",589824,58983,0.8999989628791809
122,nncf_module.bert.encoder.layer.7.attention.self.value,NNCFLinear,bias,[768],768,768,0.0
123,nncf_module.bert.encoder.layer.7.attention.output.dense,NNCFLinear,weight,"[768, 768]",589824,58983,0.8999989628791809
124,nncf_module.bert.encoder.layer.7.attention.output.dense,NNCFLinear,bias,[768],768,768,0.0
125,nncf_module.bert.encoder.layer.7.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
126,nncf_module.bert.encoder.layer.7.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
127,nncf_module.bert.encoder.layer.7.intermediate.dense,NNCFLinear,weight,"[3072, 768]",2359296,235930,0.8999998569488525
128,nncf_module.bert.encoder.layer.7.intermediate.dense,NNCFLinear,bias,[3072],3072,3072,0.0
129,nncf_module.bert.encoder.layer.7.output.dense,NNCFLinear,weight,"[768, 3072]",2359296,235929,0.900000274181366
130,nncf_module.bert.encoder.layer.7.output.dense,NNCFLinear,bias,[768],768,768,0.0
131,nncf_module.bert.encoder.layer.7.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
132,nncf_module.bert.encoder.layer.7.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
133,nncf_module.bert.encoder.layer.8.attention.self.query,NNCFLinear,weight,"[768, 768]",589824,58983,0.8999989628791809
134,nncf_module.bert.encoder.layer.8.attention.self.query,NNCFLinear,bias,[768],768,768,0.0
135,nncf_module.bert.encoder.layer.8.attention.self.key,NNCFLinear,weight,"[768, 768]",589824,58983,0.8999989628791809
136,nncf_module.bert.encoder.layer.8.attention.self.key,NNCFLinear,bias,[768],768,768,0.0
137,nncf_module.bert.encoder.layer.8.attention.self.value,NNCFLinear,weight,"[768, 768]",589824,58983,0.8999989628791809
138,nncf_module.bert.encoder.layer.8.attention.self.value,NNCFLinear,bias,[768],768,768,0.0
139,nncf_module.bert.encoder.layer.8.attention.output.dense,NNCFLinear,weight,"[768, 768]",589824,58983,0.8999989628791809
140,nncf_module.bert.encoder.layer.8.attention.output.dense,NNCFLinear,bias,[768],768,768,0.0
141,nncf_module.bert.encoder.layer.8.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
142,nncf_module.bert.encoder.layer.8.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
143,nncf_module.bert.encoder.layer.8.intermediate.dense,NNCFLinear,weight,"[3072, 768]",2359296,235930,0.8999998569488525
144,nncf_module.bert.encoder.layer.8.intermediate.dense,NNCFLinear,bias,[3072],3072,3072,0.0
145,nncf_module.bert.encoder.layer.8.output.dense,NNCFLinear,weight,"[768, 3072]",2359296,235930,0.8999998569488525
146,nncf_module.bert.encoder.layer.8.output.dense,NNCFLinear,bias,[768],768,768,0.0
147,nncf_module.bert.encoder.layer.8.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
148,nncf_module.bert.encoder.layer.8.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
149,nncf_module.bert.encoder.layer.9.attention.self.query,NNCFLinear,weight,"[768, 768]",589824,58983,0.8999989628791809
150,nncf_module.bert.encoder.layer.9.attention.self.query,NNCFLinear,bias,[768],768,768,0.0
151,nncf_module.bert.encoder.layer.9.attention.self.key,NNCFLinear,weight,"[768, 768]",589824,58983,0.8999989628791809
152,nncf_module.bert.encoder.layer.9.attention.self.key,NNCFLinear,bias,[768],768,768,0.0
153,nncf_module.bert.encoder.layer.9.attention.self.value,NNCFLinear,weight,"[768, 768]",589824,58983,0.8999989628791809
154,nncf_module.bert.encoder.layer.9.attention.self.value,NNCFLinear,bias,[768],768,768,0.0
155,nncf_module.bert.encoder.layer.9.attention.output.dense,NNCFLinear,weight,"[768, 768]",589824,58983,0.8999989628791809
156,nncf_module.bert.encoder.layer.9.attention.output.dense,NNCFLinear,bias,[768],768,768,0.0
157,nncf_module.bert.encoder.layer.9.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
158,nncf_module.bert.encoder.layer.9.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
159,nncf_module.bert.encoder.layer.9.intermediate.dense,NNCFLinear,weight,"[3072, 768]",2359296,235930,0.8999998569488525
160,nncf_module.bert.encoder.layer.9.intermediate.dense,NNCFLinear,bias,[3072],3072,3072,0.0
161,nncf_module.bert.encoder.layer.9.output.dense,NNCFLinear,weight,"[768, 3072]",2359296,235930,0.8999998569488525
162,nncf_module.bert.encoder.layer.9.output.dense,NNCFLinear,bias,[768],768,768,0.0
163,nncf_module.bert.encoder.layer.9.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
164,nncf_module.bert.encoder.layer.9.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
165,nncf_module.bert.encoder.layer.10.attention.self.query,NNCFLinear,weight,"[768, 768]",589824,58983,0.8999989628791809
166,nncf_module.bert.encoder.layer.10.attention.self.query,NNCFLinear,bias,[768],768,768,0.0
167,nncf_module.bert.encoder.layer.10.attention.self.key,NNCFLinear,weight,"[768, 768]",589824,58983,0.8999989628791809
168,nncf_module.bert.encoder.layer.10.attention.self.key,NNCFLinear,bias,[768],768,768,0.0
169,nncf_module.bert.encoder.layer.10.attention.self.value,NNCFLinear,weight,"[768, 768]",589824,58983,0.8999989628791809
170,nncf_module.bert.encoder.layer.10.attention.self.value,NNCFLinear,bias,[768],768,768,0.0
171,nncf_module.bert.encoder.layer.10.attention.output.dense,NNCFLinear,weight,"[768, 768]",589824,58983,0.8999989628791809
172,nncf_module.bert.encoder.layer.10.attention.output.dense,NNCFLinear,bias,[768],768,768,0.0
173,nncf_module.bert.encoder.layer.10.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
174,nncf_module.bert.encoder.layer.10.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
175,nncf_module.bert.encoder.layer.10.intermediate.dense,NNCFLinear,weight,"[3072, 768]",2359296,235930,0.8999998569488525
176,nncf_module.bert.encoder.layer.10.intermediate.dense,NNCFLinear,bias,[3072],3072,3072,0.0
177,nncf_module.bert.encoder.layer.10.output.dense,NNCFLinear,weight,"[768, 3072]",2359296,235929,0.900000274181366
178,nncf_module.bert.encoder.layer.10.output.dense,NNCFLinear,bias,[768],768,768,0.0
179,nncf_module.bert.encoder.layer.10.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
180,nncf_module.bert.encoder.layer.10.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
181,nncf_module.bert.encoder.layer.11.attention.self.query,NNCFLinear,weight,"[768, 768]",589824,58983,0.8999989628791809
182,nncf_module.bert.encoder.layer.11.attention.self.query,NNCFLinear,bias,[768],768,768,0.0
183,nncf_module.bert.encoder.layer.11.attention.self.key,NNCFLinear,weight,"[768, 768]",589824,58983,0.8999989628791809
184,nncf_module.bert.encoder.layer.11.attention.self.key,NNCFLinear,bias,[768],768,768,0.0
185,nncf_module.bert.encoder.layer.11.attention.self.value,NNCFLinear,weight,"[768, 768]",589824,58983,0.8999989628791809
186,nncf_module.bert.encoder.layer.11.attention.self.value,NNCFLinear,bias,[768],768,768,0.0
187,nncf_module.bert.encoder.layer.11.attention.output.dense,NNCFLinear,weight,"[768, 768]",589824,58983,0.8999989628791809
188,nncf_module.bert.encoder.layer.11.attention.output.dense,NNCFLinear,bias,[768],768,768,0.0
189,nncf_module.bert.encoder.layer.11.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
190,nncf_module.bert.encoder.layer.11.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
191,nncf_module.bert.encoder.layer.11.intermediate.dense,NNCFLinear,weight,"[3072, 768]",2359296,235930,0.8999998569488525
192,nncf_module.bert.encoder.layer.11.intermediate.dense,NNCFLinear,bias,[3072],3072,3072,0.0
193,nncf_module.bert.encoder.layer.11.output.dense,NNCFLinear,weight,"[768, 3072]",2359296,235930,0.8999998569488525
194,nncf_module.bert.encoder.layer.11.output.dense,NNCFLinear,bias,[768],768,768,0.0
195,nncf_module.bert.encoder.layer.11.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0
196,nncf_module.bert.encoder.layer.11.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0
197,nncf_module.qa_outputs,NNCFLinear,weight,"[2, 768]",1536,1536,0.0
198,nncf_module.qa_outputs,NNCFLinear,bias,[2],2,2,0.0