|
,layer_id,layer_type,param_type,shape,nparam,nnz,sparsity |
|
0,bert.embeddings.word_embeddings,Embedding,weight,"[30522, 768]",23440896,23440896,0.0 |
|
1,bert.embeddings.position_embeddings,Embedding,weight,"[512, 768]",393216,393216,0.0 |
|
2,bert.embeddings.token_type_embeddings,Embedding,weight,"[2, 768]",1536,1536,0.0 |
|
3,bert.embeddings.LayerNorm,LayerNorm,weight,[768],768,768,0.0 |
|
4,bert.embeddings.LayerNorm,LayerNorm,bias,[768],768,768,0.0 |
|
5,bert.encoder.layer.0.attention.self.query,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809 |
|
6,bert.encoder.layer.0.attention.self.query,Linear,bias,[768],768,768,0.0 |
|
7,bert.encoder.layer.0.attention.self.key,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809 |
|
8,bert.encoder.layer.0.attention.self.key,Linear,bias,[768],768,768,0.0 |
|
9,bert.encoder.layer.0.attention.self.value,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809 |
|
10,bert.encoder.layer.0.attention.self.value,Linear,bias,[768],768,768,0.0 |
|
11,bert.encoder.layer.0.attention.output.dense,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809 |
|
12,bert.encoder.layer.0.attention.output.dense,Linear,bias,[768],768,768,0.0 |
|
13,bert.encoder.layer.0.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 |
|
14,bert.encoder.layer.0.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 |
|
15,bert.encoder.layer.0.intermediate.dense,Linear,weight,"[3072, 768]",2359296,235930,0.8999998569488525 |
|
16,bert.encoder.layer.0.intermediate.dense,Linear,bias,[3072],3072,3072,0.0 |
|
17,bert.encoder.layer.0.output.dense,Linear,weight,"[768, 3072]",2359296,235930,0.8999998569488525 |
|
18,bert.encoder.layer.0.output.dense,Linear,bias,[768],768,768,0.0 |
|
19,bert.encoder.layer.0.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 |
|
20,bert.encoder.layer.0.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 |
|
21,bert.encoder.layer.1.attention.self.query,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809 |
|
22,bert.encoder.layer.1.attention.self.query,Linear,bias,[768],768,768,0.0 |
|
23,bert.encoder.layer.1.attention.self.key,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809 |
|
24,bert.encoder.layer.1.attention.self.key,Linear,bias,[768],768,768,0.0 |
|
25,bert.encoder.layer.1.attention.self.value,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809 |
|
26,bert.encoder.layer.1.attention.self.value,Linear,bias,[768],768,768,0.0 |
|
27,bert.encoder.layer.1.attention.output.dense,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809 |
|
28,bert.encoder.layer.1.attention.output.dense,Linear,bias,[768],768,768,0.0 |
|
29,bert.encoder.layer.1.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 |
|
30,bert.encoder.layer.1.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 |
|
31,bert.encoder.layer.1.intermediate.dense,Linear,weight,"[3072, 768]",2359296,235930,0.8999998569488525 |
|
32,bert.encoder.layer.1.intermediate.dense,Linear,bias,[3072],3072,3072,0.0 |
|
33,bert.encoder.layer.1.output.dense,Linear,weight,"[768, 3072]",2359296,235930,0.8999998569488525 |
|
34,bert.encoder.layer.1.output.dense,Linear,bias,[768],768,768,0.0 |
|
35,bert.encoder.layer.1.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 |
|
36,bert.encoder.layer.1.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 |
|
37,bert.encoder.layer.2.attention.self.query,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809 |
|
38,bert.encoder.layer.2.attention.self.query,Linear,bias,[768],768,768,0.0 |
|
39,bert.encoder.layer.2.attention.self.key,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809 |
|
40,bert.encoder.layer.2.attention.self.key,Linear,bias,[768],768,768,0.0 |
|
41,bert.encoder.layer.2.attention.self.value,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809 |
|
42,bert.encoder.layer.2.attention.self.value,Linear,bias,[768],768,768,0.0 |
|
43,bert.encoder.layer.2.attention.output.dense,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809 |
|
44,bert.encoder.layer.2.attention.output.dense,Linear,bias,[768],768,768,0.0 |
|
45,bert.encoder.layer.2.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 |
|
46,bert.encoder.layer.2.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 |
|
47,bert.encoder.layer.2.intermediate.dense,Linear,weight,"[3072, 768]",2359296,235930,0.8999998569488525 |
|
48,bert.encoder.layer.2.intermediate.dense,Linear,bias,[3072],3072,3072,0.0 |
|
49,bert.encoder.layer.2.output.dense,Linear,weight,"[768, 3072]",2359296,235930,0.8999998569488525 |
|
50,bert.encoder.layer.2.output.dense,Linear,bias,[768],768,768,0.0 |
|
51,bert.encoder.layer.2.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 |
|
52,bert.encoder.layer.2.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 |
|
53,bert.encoder.layer.3.attention.self.query,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809 |
|
54,bert.encoder.layer.3.attention.self.query,Linear,bias,[768],768,768,0.0 |
|
55,bert.encoder.layer.3.attention.self.key,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809 |
|
56,bert.encoder.layer.3.attention.self.key,Linear,bias,[768],768,768,0.0 |
|
57,bert.encoder.layer.3.attention.self.value,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809 |
|
58,bert.encoder.layer.3.attention.self.value,Linear,bias,[768],768,768,0.0 |
|
59,bert.encoder.layer.3.attention.output.dense,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809 |
|
60,bert.encoder.layer.3.attention.output.dense,Linear,bias,[768],768,768,0.0 |
|
61,bert.encoder.layer.3.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 |
|
62,bert.encoder.layer.3.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 |
|
63,bert.encoder.layer.3.intermediate.dense,Linear,weight,"[3072, 768]",2359296,235930,0.8999998569488525 |
|
64,bert.encoder.layer.3.intermediate.dense,Linear,bias,[3072],3072,3072,0.0 |
|
65,bert.encoder.layer.3.output.dense,Linear,weight,"[768, 3072]",2359296,235930,0.8999998569488525 |
|
66,bert.encoder.layer.3.output.dense,Linear,bias,[768],768,768,0.0 |
|
67,bert.encoder.layer.3.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 |
|
68,bert.encoder.layer.3.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 |
|
69,bert.encoder.layer.4.attention.self.query,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809 |
|
70,bert.encoder.layer.4.attention.self.query,Linear,bias,[768],768,768,0.0 |
|
71,bert.encoder.layer.4.attention.self.key,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809 |
|
72,bert.encoder.layer.4.attention.self.key,Linear,bias,[768],768,768,0.0 |
|
73,bert.encoder.layer.4.attention.self.value,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809 |
|
74,bert.encoder.layer.4.attention.self.value,Linear,bias,[768],768,768,0.0 |
|
75,bert.encoder.layer.4.attention.output.dense,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809 |
|
76,bert.encoder.layer.4.attention.output.dense,Linear,bias,[768],768,768,0.0 |
|
77,bert.encoder.layer.4.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 |
|
78,bert.encoder.layer.4.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 |
|
79,bert.encoder.layer.4.intermediate.dense,Linear,weight,"[3072, 768]",2359296,235930,0.8999998569488525 |
|
80,bert.encoder.layer.4.intermediate.dense,Linear,bias,[3072],3072,3072,0.0 |
|
81,bert.encoder.layer.4.output.dense,Linear,weight,"[768, 3072]",2359296,235930,0.8999998569488525 |
|
82,bert.encoder.layer.4.output.dense,Linear,bias,[768],768,768,0.0 |
|
83,bert.encoder.layer.4.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 |
|
84,bert.encoder.layer.4.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 |
|
85,bert.encoder.layer.5.attention.self.query,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809 |
|
86,bert.encoder.layer.5.attention.self.query,Linear,bias,[768],768,768,0.0 |
|
87,bert.encoder.layer.5.attention.self.key,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809 |
|
88,bert.encoder.layer.5.attention.self.key,Linear,bias,[768],768,768,0.0 |
|
89,bert.encoder.layer.5.attention.self.value,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809 |
|
90,bert.encoder.layer.5.attention.self.value,Linear,bias,[768],768,768,0.0 |
|
91,bert.encoder.layer.5.attention.output.dense,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809 |
|
92,bert.encoder.layer.5.attention.output.dense,Linear,bias,[768],768,768,0.0 |
|
93,bert.encoder.layer.5.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 |
|
94,bert.encoder.layer.5.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 |
|
95,bert.encoder.layer.5.intermediate.dense,Linear,weight,"[3072, 768]",2359296,235930,0.8999998569488525 |
|
96,bert.encoder.layer.5.intermediate.dense,Linear,bias,[3072],3072,3072,0.0 |
|
97,bert.encoder.layer.5.output.dense,Linear,weight,"[768, 3072]",2359296,235930,0.8999998569488525 |
|
98,bert.encoder.layer.5.output.dense,Linear,bias,[768],768,768,0.0 |
|
99,bert.encoder.layer.5.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 |
|
100,bert.encoder.layer.5.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 |
|
101,bert.encoder.layer.6.attention.self.query,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809 |
|
102,bert.encoder.layer.6.attention.self.query,Linear,bias,[768],768,768,0.0 |
|
103,bert.encoder.layer.6.attention.self.key,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809 |
|
104,bert.encoder.layer.6.attention.self.key,Linear,bias,[768],768,768,0.0 |
|
105,bert.encoder.layer.6.attention.self.value,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809 |
|
106,bert.encoder.layer.6.attention.self.value,Linear,bias,[768],768,768,0.0 |
|
107,bert.encoder.layer.6.attention.output.dense,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809 |
|
108,bert.encoder.layer.6.attention.output.dense,Linear,bias,[768],768,768,0.0 |
|
109,bert.encoder.layer.6.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 |
|
110,bert.encoder.layer.6.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 |
|
111,bert.encoder.layer.6.intermediate.dense,Linear,weight,"[3072, 768]",2359296,235930,0.8999998569488525 |
|
112,bert.encoder.layer.6.intermediate.dense,Linear,bias,[3072],3072,3072,0.0 |
|
113,bert.encoder.layer.6.output.dense,Linear,weight,"[768, 3072]",2359296,235930,0.8999998569488525 |
|
114,bert.encoder.layer.6.output.dense,Linear,bias,[768],768,768,0.0 |
|
115,bert.encoder.layer.6.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 |
|
116,bert.encoder.layer.6.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 |
|
117,bert.encoder.layer.7.attention.self.query,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809 |
|
118,bert.encoder.layer.7.attention.self.query,Linear,bias,[768],768,768,0.0 |
|
119,bert.encoder.layer.7.attention.self.key,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809 |
|
120,bert.encoder.layer.7.attention.self.key,Linear,bias,[768],768,768,0.0 |
|
121,bert.encoder.layer.7.attention.self.value,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809 |
|
122,bert.encoder.layer.7.attention.self.value,Linear,bias,[768],768,768,0.0 |
|
123,bert.encoder.layer.7.attention.output.dense,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809 |
|
124,bert.encoder.layer.7.attention.output.dense,Linear,bias,[768],768,768,0.0 |
|
125,bert.encoder.layer.7.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 |
|
126,bert.encoder.layer.7.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 |
|
127,bert.encoder.layer.7.intermediate.dense,Linear,weight,"[3072, 768]",2359296,235930,0.8999998569488525 |
|
128,bert.encoder.layer.7.intermediate.dense,Linear,bias,[3072],3072,3072,0.0 |
|
129,bert.encoder.layer.7.output.dense,Linear,weight,"[768, 3072]",2359296,235929,0.900000274181366 |
|
130,bert.encoder.layer.7.output.dense,Linear,bias,[768],768,768,0.0 |
|
131,bert.encoder.layer.7.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 |
|
132,bert.encoder.layer.7.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 |
|
133,bert.encoder.layer.8.attention.self.query,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809 |
|
134,bert.encoder.layer.8.attention.self.query,Linear,bias,[768],768,768,0.0 |
|
135,bert.encoder.layer.8.attention.self.key,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809 |
|
136,bert.encoder.layer.8.attention.self.key,Linear,bias,[768],768,768,0.0 |
|
137,bert.encoder.layer.8.attention.self.value,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809 |
|
138,bert.encoder.layer.8.attention.self.value,Linear,bias,[768],768,768,0.0 |
|
139,bert.encoder.layer.8.attention.output.dense,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809 |
|
140,bert.encoder.layer.8.attention.output.dense,Linear,bias,[768],768,768,0.0 |
|
141,bert.encoder.layer.8.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 |
|
142,bert.encoder.layer.8.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 |
|
143,bert.encoder.layer.8.intermediate.dense,Linear,weight,"[3072, 768]",2359296,235930,0.8999998569488525 |
|
144,bert.encoder.layer.8.intermediate.dense,Linear,bias,[3072],3072,3072,0.0 |
|
145,bert.encoder.layer.8.output.dense,Linear,weight,"[768, 3072]",2359296,235930,0.8999998569488525 |
|
146,bert.encoder.layer.8.output.dense,Linear,bias,[768],768,768,0.0 |
|
147,bert.encoder.layer.8.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 |
|
148,bert.encoder.layer.8.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 |
|
149,bert.encoder.layer.9.attention.self.query,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809 |
|
150,bert.encoder.layer.9.attention.self.query,Linear,bias,[768],768,768,0.0 |
|
151,bert.encoder.layer.9.attention.self.key,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809 |
|
152,bert.encoder.layer.9.attention.self.key,Linear,bias,[768],768,768,0.0 |
|
153,bert.encoder.layer.9.attention.self.value,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809 |
|
154,bert.encoder.layer.9.attention.self.value,Linear,bias,[768],768,768,0.0 |
|
155,bert.encoder.layer.9.attention.output.dense,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809 |
|
156,bert.encoder.layer.9.attention.output.dense,Linear,bias,[768],768,768,0.0 |
|
157,bert.encoder.layer.9.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 |
|
158,bert.encoder.layer.9.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 |
|
159,bert.encoder.layer.9.intermediate.dense,Linear,weight,"[3072, 768]",2359296,235930,0.8999998569488525 |
|
160,bert.encoder.layer.9.intermediate.dense,Linear,bias,[3072],3072,3072,0.0 |
|
161,bert.encoder.layer.9.output.dense,Linear,weight,"[768, 3072]",2359296,235930,0.8999998569488525 |
|
162,bert.encoder.layer.9.output.dense,Linear,bias,[768],768,768,0.0 |
|
163,bert.encoder.layer.9.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 |
|
164,bert.encoder.layer.9.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 |
|
165,bert.encoder.layer.10.attention.self.query,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809 |
|
166,bert.encoder.layer.10.attention.self.query,Linear,bias,[768],768,768,0.0 |
|
167,bert.encoder.layer.10.attention.self.key,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809 |
|
168,bert.encoder.layer.10.attention.self.key,Linear,bias,[768],768,768,0.0 |
|
169,bert.encoder.layer.10.attention.self.value,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809 |
|
170,bert.encoder.layer.10.attention.self.value,Linear,bias,[768],768,768,0.0 |
|
171,bert.encoder.layer.10.attention.output.dense,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809 |
|
172,bert.encoder.layer.10.attention.output.dense,Linear,bias,[768],768,768,0.0 |
|
173,bert.encoder.layer.10.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 |
|
174,bert.encoder.layer.10.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 |
|
175,bert.encoder.layer.10.intermediate.dense,Linear,weight,"[3072, 768]",2359296,235930,0.8999998569488525 |
|
176,bert.encoder.layer.10.intermediate.dense,Linear,bias,[3072],3072,3072,0.0 |
|
177,bert.encoder.layer.10.output.dense,Linear,weight,"[768, 3072]",2359296,235929,0.900000274181366 |
|
178,bert.encoder.layer.10.output.dense,Linear,bias,[768],768,768,0.0 |
|
179,bert.encoder.layer.10.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 |
|
180,bert.encoder.layer.10.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 |
|
181,bert.encoder.layer.11.attention.self.query,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809 |
|
182,bert.encoder.layer.11.attention.self.query,Linear,bias,[768],768,768,0.0 |
|
183,bert.encoder.layer.11.attention.self.key,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809 |
|
184,bert.encoder.layer.11.attention.self.key,Linear,bias,[768],768,768,0.0 |
|
185,bert.encoder.layer.11.attention.self.value,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809 |
|
186,bert.encoder.layer.11.attention.self.value,Linear,bias,[768],768,768,0.0 |
|
187,bert.encoder.layer.11.attention.output.dense,Linear,weight,"[768, 768]",589824,58983,0.8999989628791809 |
|
188,bert.encoder.layer.11.attention.output.dense,Linear,bias,[768],768,768,0.0 |
|
189,bert.encoder.layer.11.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 |
|
190,bert.encoder.layer.11.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 |
|
191,bert.encoder.layer.11.intermediate.dense,Linear,weight,"[3072, 768]",2359296,235930,0.8999998569488525 |
|
192,bert.encoder.layer.11.intermediate.dense,Linear,bias,[3072],3072,3072,0.0 |
|
193,bert.encoder.layer.11.output.dense,Linear,weight,"[768, 3072]",2359296,235930,0.8999998569488525 |
|
194,bert.encoder.layer.11.output.dense,Linear,bias,[768],768,768,0.0 |
|
195,bert.encoder.layer.11.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 |
|
196,bert.encoder.layer.11.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 |
|
197,qa_outputs,Linear,weight,"[2, 768]",1536,1536,0.0 |
|
198,qa_outputs,Linear,bias,[2],2,2,0.0 |
|
|