,linear_id,shape,param_count,nnz_count 0,bert.encoder.layer.0.attention.self.query,"[768, 768]",589824,1757 1,bert.encoder.layer.0.attention.self.key,"[768, 768]",589824,1800 2,bert.encoder.layer.0.attention.self.value,"[768, 768]",589824,5416 3,bert.encoder.layer.0.attention.output.dense,"[768, 768]",589824,7845 4,bert.encoder.layer.0.intermediate.dense,"[3072, 768]",2359296,701532 5,bert.encoder.layer.0.output.dense,"[768, 3072]",2359296,591365 6,bert.encoder.layer.1.attention.self.query,"[768, 768]",589824,27422 7,bert.encoder.layer.1.attention.self.key,"[768, 768]",589824,27291 8,bert.encoder.layer.1.attention.self.value,"[768, 768]",589824,22259 9,bert.encoder.layer.1.attention.output.dense,"[768, 768]",589824,27236 10,bert.encoder.layer.1.intermediate.dense,"[3072, 768]",2359296,775297 11,bert.encoder.layer.1.output.dense,"[768, 3072]",2359296,599687 12,bert.encoder.layer.2.attention.self.query,"[768, 768]",589824,31292 13,bert.encoder.layer.2.attention.self.key,"[768, 768]",589824,30911 14,bert.encoder.layer.2.attention.self.value,"[768, 768]",589824,24690 15,bert.encoder.layer.2.attention.output.dense,"[768, 768]",589824,24902 16,bert.encoder.layer.2.intermediate.dense,"[3072, 768]",2359296,804492 17,bert.encoder.layer.2.output.dense,"[768, 3072]",2359296,606736 18,bert.encoder.layer.3.attention.self.query,"[768, 768]",589824,35285 19,bert.encoder.layer.3.attention.self.key,"[768, 768]",589824,38990 20,bert.encoder.layer.3.attention.self.value,"[768, 768]",589824,38379 21,bert.encoder.layer.3.attention.output.dense,"[768, 768]",589824,39416 22,bert.encoder.layer.3.intermediate.dense,"[3072, 768]",2359296,803280 23,bert.encoder.layer.3.output.dense,"[768, 3072]",2359296,592789 24,bert.encoder.layer.4.attention.self.query,"[768, 768]",589824,28239 25,bert.encoder.layer.4.attention.self.key,"[768, 768]",589824,30768 26,bert.encoder.layer.4.attention.self.value,"[768, 768]",589824,39479 27,bert.encoder.layer.4.attention.output.dense,"[768, 768]",589824,39703 28,bert.encoder.layer.4.intermediate.dense,"[3072, 768]",2359296,784797 29,bert.encoder.layer.4.output.dense,"[768, 3072]",2359296,550626 30,bert.encoder.layer.5.attention.self.query,"[768, 768]",589824,16702 31,bert.encoder.layer.5.attention.self.key,"[768, 768]",589824,21640 32,bert.encoder.layer.5.attention.self.value,"[768, 768]",589824,35016 33,bert.encoder.layer.5.attention.output.dense,"[768, 768]",589824,33592 34,bert.encoder.layer.5.intermediate.dense,"[3072, 768]",2359296,785249 35,bert.encoder.layer.5.output.dense,"[768, 3072]",2359296,530621 36,bert.encoder.layer.6.attention.self.query,"[768, 768]",589824,16901 37,bert.encoder.layer.6.attention.self.key,"[768, 768]",589824,20927 38,bert.encoder.layer.6.attention.self.value,"[768, 768]",589824,29547 39,bert.encoder.layer.6.attention.output.dense,"[768, 768]",589824,26602 40,bert.encoder.layer.6.intermediate.dense,"[3072, 768]",2359296,706133 41,bert.encoder.layer.6.output.dense,"[768, 3072]",2359296,458484 42,bert.encoder.layer.7.attention.self.query,"[768, 768]",589824,15264 43,bert.encoder.layer.7.attention.self.key,"[768, 768]",589824,21032 44,bert.encoder.layer.7.attention.self.value,"[768, 768]",589824,34524 45,bert.encoder.layer.7.attention.output.dense,"[768, 768]",589824,26950 46,bert.encoder.layer.7.intermediate.dense,"[3072, 768]",2359296,558548 47,bert.encoder.layer.7.output.dense,"[768, 3072]",2359296,366404 48,bert.encoder.layer.8.attention.self.query,"[768, 768]",589824,14420 49,bert.encoder.layer.8.attention.self.key,"[768, 768]",589824,18006 50,bert.encoder.layer.8.attention.self.value,"[768, 768]",589824,27492 51,bert.encoder.layer.8.attention.output.dense,"[768, 768]",589824,21056 52,bert.encoder.layer.8.intermediate.dense,"[3072, 768]",2359296,388041 53,bert.encoder.layer.8.output.dense,"[768, 3072]",2359296,246678 54,bert.encoder.layer.9.attention.self.query,"[768, 768]",589824,32843 55,bert.encoder.layer.9.attention.self.key,"[768, 768]",589824,31522 56,bert.encoder.layer.9.attention.self.value,"[768, 768]",589824,8125 57,bert.encoder.layer.9.attention.output.dense,"[768, 768]",589824,6000 58,bert.encoder.layer.9.intermediate.dense,"[3072, 768]",2359296,148663 59,bert.encoder.layer.9.output.dense,"[768, 3072]",2359296,82095 60,bert.encoder.layer.10.attention.self.query,"[768, 768]",589824,19588 61,bert.encoder.layer.10.attention.self.key,"[768, 768]",589824,19232 62,bert.encoder.layer.10.attention.self.value,"[768, 768]",589824,5844 63,bert.encoder.layer.10.attention.output.dense,"[768, 768]",589824,4472 64,bert.encoder.layer.10.intermediate.dense,"[3072, 768]",2359296,95918 65,bert.encoder.layer.10.output.dense,"[768, 3072]",2359296,40352 66,bert.encoder.layer.11.attention.self.query,"[768, 768]",589824,4707 67,bert.encoder.layer.11.attention.self.key,"[768, 768]",589824,5807 68,bert.encoder.layer.11.attention.self.value,"[768, 768]",589824,2160 69,bert.encoder.layer.11.attention.output.dense,"[768, 768]",589824,1310 70,bert.encoder.layer.11.intermediate.dense,"[3072, 768]",2359296,71477 71,bert.encoder.layer.11.output.dense,"[768, 3072]",2359296,26152