,linear_id,shape,param_count,nnz_count 0,bert.encoder.layer.0.attention.self.query,"[768, 768]",589824,36266 1,bert.encoder.layer.0.attention.self.key,"[768, 768]",589824,28736 2,bert.encoder.layer.0.attention.self.value,"[768, 768]",589824,62217 3,bert.encoder.layer.0.attention.output.dense,"[768, 768]",589824,80877 4,bert.encoder.layer.0.intermediate.dense,"[3072, 768]",2359296,1391950 5,bert.encoder.layer.0.output.dense,"[768, 3072]",2359296,1304418 6,bert.encoder.layer.1.attention.self.query,"[768, 768]",589824,88856 7,bert.encoder.layer.1.attention.self.key,"[768, 768]",589824,80852 8,bert.encoder.layer.1.attention.self.value,"[768, 768]",589824,89520 9,bert.encoder.layer.1.attention.output.dense,"[768, 768]",589824,99229 10,bert.encoder.layer.1.intermediate.dense,"[3072, 768]",2359296,1448241 11,bert.encoder.layer.1.output.dense,"[768, 3072]",2359296,1298451 12,bert.encoder.layer.2.attention.self.query,"[768, 768]",589824,111820 13,bert.encoder.layer.2.attention.self.key,"[768, 768]",589824,105781 14,bert.encoder.layer.2.attention.self.value,"[768, 768]",589824,107982 15,bert.encoder.layer.2.attention.output.dense,"[768, 768]",589824,107182 16,bert.encoder.layer.2.intermediate.dense,"[3072, 768]",2359296,1478488 17,bert.encoder.layer.2.output.dense,"[768, 3072]",2359296,1324632 18,bert.encoder.layer.3.attention.self.query,"[768, 768]",589824,119244 19,bert.encoder.layer.3.attention.self.key,"[768, 768]",589824,121415 20,bert.encoder.layer.3.attention.self.value,"[768, 768]",589824,144196 21,bert.encoder.layer.3.attention.output.dense,"[768, 768]",589824,141057 22,bert.encoder.layer.3.intermediate.dense,"[3072, 768]",2359296,1477159 23,bert.encoder.layer.3.output.dense,"[768, 3072]",2359296,1308149 24,bert.encoder.layer.4.attention.self.query,"[768, 768]",589824,116629 25,bert.encoder.layer.4.attention.self.key,"[768, 768]",589824,115443 26,bert.encoder.layer.4.attention.self.value,"[768, 768]",589824,153857 27,bert.encoder.layer.4.attention.output.dense,"[768, 768]",589824,153726 28,bert.encoder.layer.4.intermediate.dense,"[3072, 768]",2359296,1467353 29,bert.encoder.layer.4.output.dense,"[768, 3072]",2359296,1275091 30,bert.encoder.layer.5.attention.self.query,"[768, 768]",589824,99164 31,bert.encoder.layer.5.attention.self.key,"[768, 768]",589824,106720 32,bert.encoder.layer.5.attention.self.value,"[768, 768]",589824,151873 33,bert.encoder.layer.5.attention.output.dense,"[768, 768]",589824,146952 34,bert.encoder.layer.5.intermediate.dense,"[3072, 768]",2359296,1483651 35,bert.encoder.layer.5.output.dense,"[768, 3072]",2359296,1268211 36,bert.encoder.layer.6.attention.self.query,"[768, 768]",589824,99027 37,bert.encoder.layer.6.attention.self.key,"[768, 768]",589824,108498 38,bert.encoder.layer.6.attention.self.value,"[768, 768]",589824,151369 39,bert.encoder.layer.6.attention.output.dense,"[768, 768]",589824,138662 40,bert.encoder.layer.6.intermediate.dense,"[3072, 768]",2359296,1413626 41,bert.encoder.layer.6.output.dense,"[768, 3072]",2359296,1178877 42,bert.encoder.layer.7.attention.self.query,"[768, 768]",589824,72908 43,bert.encoder.layer.7.attention.self.key,"[768, 768]",589824,87122 44,bert.encoder.layer.7.attention.self.value,"[768, 768]",589824,134079 45,bert.encoder.layer.7.attention.output.dense,"[768, 768]",589824,117207 46,bert.encoder.layer.7.intermediate.dense,"[3072, 768]",2359296,1240812 47,bert.encoder.layer.7.output.dense,"[768, 3072]",2359296,1039716 48,bert.encoder.layer.8.attention.self.query,"[768, 768]",589824,87546 49,bert.encoder.layer.8.attention.self.key,"[768, 768]",589824,95542 50,bert.encoder.layer.8.attention.self.value,"[768, 768]",589824,147149 51,bert.encoder.layer.8.attention.output.dense,"[768, 768]",589824,121221 52,bert.encoder.layer.8.intermediate.dense,"[3072, 768]",2359296,991806 53,bert.encoder.layer.8.output.dense,"[768, 3072]",2359296,805916 54,bert.encoder.layer.9.attention.self.query,"[768, 768]",589824,94420 55,bert.encoder.layer.9.attention.self.key,"[768, 768]",589824,95694 56,bert.encoder.layer.9.attention.self.value,"[768, 768]",589824,58481 57,bert.encoder.layer.9.attention.output.dense,"[768, 768]",589824,46748 58,bert.encoder.layer.9.intermediate.dense,"[3072, 768]",2359296,550773 59,bert.encoder.layer.9.output.dense,"[768, 3072]",2359296,425540 60,bert.encoder.layer.10.attention.self.query,"[768, 768]",589824,62325 61,bert.encoder.layer.10.attention.self.key,"[768, 768]",589824,64069 62,bert.encoder.layer.10.attention.self.value,"[768, 768]",589824,31075 63,bert.encoder.layer.10.attention.output.dense,"[768, 768]",589824,22579 64,bert.encoder.layer.10.intermediate.dense,"[3072, 768]",2359296,361008 65,bert.encoder.layer.10.output.dense,"[768, 3072]",2359296,240192 66,bert.encoder.layer.11.attention.self.query,"[768, 768]",589824,20394 67,bert.encoder.layer.11.attention.self.key,"[768, 768]",589824,25082 68,bert.encoder.layer.11.attention.self.value,"[768, 768]",589824,14505 69,bert.encoder.layer.11.attention.output.dense,"[768, 768]",589824,6766 70,bert.encoder.layer.11.intermediate.dense,"[3072, 768]",2359296,296386 71,bert.encoder.layer.11.output.dense,"[768, 3072]",2359296,119879