,linear_id,shape,param_count,nnz_count 0,bert.encoder.layer.0.attention.self.query,"[768, 768]",589824,93964 1,bert.encoder.layer.0.attention.self.key,"[768, 768]",589824,73700 2,bert.encoder.layer.0.attention.self.value,"[768, 768]",589824,137467 3,bert.encoder.layer.0.attention.output.dense,"[768, 768]",589824,160390 4,bert.encoder.layer.0.intermediate.dense,"[3072, 768]",2359296,1709314 5,bert.encoder.layer.0.output.dense,"[768, 3072]",2359296,1641896 6,bert.encoder.layer.1.attention.self.query,"[768, 768]",589824,159970 7,bert.encoder.layer.1.attention.self.key,"[768, 768]",589824,141738 8,bert.encoder.layer.1.attention.self.value,"[768, 768]",589824,162459 9,bert.encoder.layer.1.attention.output.dense,"[768, 768]",589824,172052 10,bert.encoder.layer.1.intermediate.dense,"[3072, 768]",2359296,1741238 11,bert.encoder.layer.1.output.dense,"[768, 3072]",2359296,1626278 12,bert.encoder.layer.2.attention.self.query,"[768, 768]",589824,186643 13,bert.encoder.layer.2.attention.self.key,"[768, 768]",589824,178576 14,bert.encoder.layer.2.attention.self.value,"[768, 768]",589824,192860 15,bert.encoder.layer.2.attention.output.dense,"[768, 768]",589824,192555 16,bert.encoder.layer.2.intermediate.dense,"[3072, 768]",2359296,1768328 17,bert.encoder.layer.2.output.dense,"[768, 3072]",2359296,1655075 18,bert.encoder.layer.3.attention.self.query,"[768, 768]",589824,198697 19,bert.encoder.layer.3.attention.self.key,"[768, 768]",589824,196134 20,bert.encoder.layer.3.attention.self.value,"[768, 768]",589824,237622 21,bert.encoder.layer.3.attention.output.dense,"[768, 768]",589824,230620 22,bert.encoder.layer.3.intermediate.dense,"[3072, 768]",2359296,1766508 23,bert.encoder.layer.3.output.dense,"[768, 3072]",2359296,1642235 24,bert.encoder.layer.4.attention.self.query,"[768, 768]",589824,197935 25,bert.encoder.layer.4.attention.self.key,"[768, 768]",589824,194664 26,bert.encoder.layer.4.attention.self.value,"[768, 768]",589824,249031 27,bert.encoder.layer.4.attention.output.dense,"[768, 768]",589824,246498 28,bert.encoder.layer.4.intermediate.dense,"[3072, 768]",2359296,1760924 29,bert.encoder.layer.4.output.dense,"[768, 3072]",2359296,1621864 30,bert.encoder.layer.5.attention.self.query,"[768, 768]",589824,183234 31,bert.encoder.layer.5.attention.self.key,"[768, 768]",589824,190974 32,bert.encoder.layer.5.attention.self.value,"[768, 768]",589824,252216 33,bert.encoder.layer.5.attention.output.dense,"[768, 768]",589824,244656 34,bert.encoder.layer.5.intermediate.dense,"[3072, 768]",2359296,1779234 35,bert.encoder.layer.5.output.dense,"[768, 3072]",2359296,1623945 36,bert.encoder.layer.6.attention.self.query,"[768, 768]",589824,185037 37,bert.encoder.layer.6.attention.self.key,"[768, 768]",589824,195911 38,bert.encoder.layer.6.attention.self.value,"[768, 768]",589824,253876 39,bert.encoder.layer.6.attention.output.dense,"[768, 768]",589824,239963 40,bert.encoder.layer.6.intermediate.dense,"[3072, 768]",2359296,1729192 41,bert.encoder.layer.6.output.dense,"[768, 3072]",2359296,1551791 42,bert.encoder.layer.7.attention.self.query,"[768, 768]",589824,141144 43,bert.encoder.layer.7.attention.self.key,"[768, 768]",589824,154678 44,bert.encoder.layer.7.attention.self.value,"[768, 768]",589824,216034 45,bert.encoder.layer.7.attention.output.dense,"[768, 768]",589824,203449 46,bert.encoder.layer.7.intermediate.dense,"[3072, 768]",2359296,1588111 47,bert.encoder.layer.7.output.dense,"[768, 3072]",2359296,1430335 48,bert.encoder.layer.8.attention.self.query,"[768, 768]",589824,171025 49,bert.encoder.layer.8.attention.self.key,"[768, 768]",589824,176631 50,bert.encoder.layer.8.attention.self.value,"[768, 768]",589824,249792 51,bert.encoder.layer.8.attention.output.dense,"[768, 768]",589824,221322 52,bert.encoder.layer.8.intermediate.dense,"[3072, 768]",2359296,1369543 53,bert.encoder.layer.8.output.dense,"[768, 3072]",2359296,1208818 54,bert.encoder.layer.9.attention.self.query,"[768, 768]",589824,143797 55,bert.encoder.layer.9.attention.self.key,"[768, 768]",589824,143411 56,bert.encoder.layer.9.attention.self.value,"[768, 768]",589824,122373 57,bert.encoder.layer.9.attention.output.dense,"[768, 768]",589824,107006 58,bert.encoder.layer.9.intermediate.dense,"[3072, 768]",2359296,918887 59,bert.encoder.layer.9.output.dense,"[768, 3072]",2359296,802323 60,bert.encoder.layer.10.attention.self.query,"[768, 768]",589824,98505 61,bert.encoder.layer.10.attention.self.key,"[768, 768]",589824,99921 62,bert.encoder.layer.10.attention.self.value,"[768, 768]",589824,66491 63,bert.encoder.layer.10.attention.output.dense,"[768, 768]",589824,51630 64,bert.encoder.layer.10.intermediate.dense,"[3072, 768]",2359296,652757 65,bert.encoder.layer.10.output.dense,"[768, 3072]",2359296,533202 66,bert.encoder.layer.11.attention.self.query,"[768, 768]",589824,50457 67,bert.encoder.layer.11.attention.self.key,"[768, 768]",589824,55196 68,bert.encoder.layer.11.attention.self.value,"[768, 768]",589824,40581 69,bert.encoder.layer.11.attention.output.dense,"[768, 768]",589824,23933 70,bert.encoder.layer.11.intermediate.dense,"[3072, 768]",2359296,535793 71,bert.encoder.layer.11.output.dense,"[768, 3072]",2359296,255106