,linear_id,shape,param_count,nnz_count 0,bert.encoder.layer.0.attention.self.query,"[768, 768]",589824,16252 1,bert.encoder.layer.0.attention.self.key,"[768, 768]",589824,13054 2,bert.encoder.layer.0.attention.self.value,"[768, 768]",589824,31394 3,bert.encoder.layer.0.attention.output.dense,"[768, 768]",589824,44074 4,bert.encoder.layer.0.intermediate.dense,"[3072, 768]",2359296,1159424 5,bert.encoder.layer.0.output.dense,"[768, 3072]",2359296,1061306 6,bert.encoder.layer.1.attention.self.query,"[768, 768]",589824,58240 7,bert.encoder.layer.1.attention.self.key,"[768, 768]",589824,54729 8,bert.encoder.layer.1.attention.self.value,"[768, 768]",589824,57604 9,bert.encoder.layer.1.attention.output.dense,"[768, 768]",589824,65732 10,bert.encoder.layer.1.intermediate.dense,"[3072, 768]",2359296,1230849 11,bert.encoder.layer.1.output.dense,"[768, 3072]",2359296,1063370 12,bert.encoder.layer.2.attention.self.query,"[768, 768]",589824,76074 13,bert.encoder.layer.2.attention.self.key,"[768, 768]",589824,72798 14,bert.encoder.layer.2.attention.self.value,"[768, 768]",589824,69367 15,bert.encoder.layer.2.attention.output.dense,"[768, 768]",589824,68078 16,bert.encoder.layer.2.intermediate.dense,"[3072, 768]",2359296,1260119 17,bert.encoder.layer.2.output.dense,"[768, 3072]",2359296,1081396 18,bert.encoder.layer.3.attention.self.query,"[768, 768]",589824,79954 19,bert.encoder.layer.3.attention.self.key,"[768, 768]",589824,84045 20,bert.encoder.layer.3.attention.self.value,"[768, 768]",589824,97323 21,bert.encoder.layer.3.attention.output.dense,"[768, 768]",589824,95918 22,bert.encoder.layer.3.intermediate.dense,"[3072, 768]",2359296,1263350 23,bert.encoder.layer.3.output.dense,"[768, 3072]",2359296,1069440 24,bert.encoder.layer.4.attention.self.query,"[768, 768]",589824,77306 25,bert.encoder.layer.4.attention.self.key,"[768, 768]",589824,77738 26,bert.encoder.layer.4.attention.self.value,"[768, 768]",589824,103253 27,bert.encoder.layer.4.attention.output.dense,"[768, 768]",589824,103673 28,bert.encoder.layer.4.intermediate.dense,"[3072, 768]",2359296,1253106 29,bert.encoder.layer.4.output.dense,"[768, 3072]",2359296,1031342 30,bert.encoder.layer.5.attention.self.query,"[768, 768]",589824,59269 31,bert.encoder.layer.5.attention.self.key,"[768, 768]",589824,66896 32,bert.encoder.layer.5.attention.self.value,"[768, 768]",589824,97790 33,bert.encoder.layer.5.attention.output.dense,"[768, 768]",589824,93833 34,bert.encoder.layer.5.intermediate.dense,"[3072, 768]",2359296,1263747 35,bert.encoder.layer.5.output.dense,"[768, 3072]",2359296,1014643 36,bert.encoder.layer.6.attention.self.query,"[768, 768]",589824,59641 37,bert.encoder.layer.6.attention.self.key,"[768, 768]",589824,67416 38,bert.encoder.layer.6.attention.self.value,"[768, 768]",589824,95731 39,bert.encoder.layer.6.attention.output.dense,"[768, 768]",589824,85998 40,bert.encoder.layer.6.intermediate.dense,"[3072, 768]",2359296,1181526 41,bert.encoder.layer.6.output.dense,"[768, 3072]",2359296,920703 42,bert.encoder.layer.7.attention.self.query,"[768, 768]",589824,44935 43,bert.encoder.layer.7.attention.self.key,"[768, 768]",589824,56868 44,bert.encoder.layer.7.attention.self.value,"[768, 768]",589824,89913 45,bert.encoder.layer.7.attention.output.dense,"[768, 768]",589824,74029 46,bert.encoder.layer.7.intermediate.dense,"[3072, 768]",2359296,995977 47,bert.encoder.layer.7.output.dense,"[768, 3072]",2359296,777939 48,bert.encoder.layer.8.attention.self.query,"[768, 768]",589824,49884 49,bert.encoder.layer.8.attention.self.key,"[768, 768]",589824,57649 50,bert.encoder.layer.8.attention.self.value,"[768, 768]",589824,92159 51,bert.encoder.layer.8.attention.output.dense,"[768, 768]",589824,71883 52,bert.encoder.layer.8.intermediate.dense,"[3072, 768]",2359296,757196 53,bert.encoder.layer.8.output.dense,"[768, 3072]",2359296,568698 54,bert.encoder.layer.9.attention.self.query,"[768, 768]",589824,68469 55,bert.encoder.layer.9.attention.self.key,"[768, 768]",589824,69557 56,bert.encoder.layer.9.attention.self.value,"[768, 768]",589824,32458 57,bert.encoder.layer.9.attention.output.dense,"[768, 768]",589824,23895 58,bert.encoder.layer.9.intermediate.dense,"[3072, 768]",2359296,359909 59,bert.encoder.layer.9.output.dense,"[768, 3072]",2359296,245729 60,bert.encoder.layer.10.attention.self.query,"[768, 768]",589824,42730 61,bert.encoder.layer.10.attention.self.key,"[768, 768]",589824,44139 62,bert.encoder.layer.10.attention.self.value,"[768, 768]",589824,17533 63,bert.encoder.layer.10.attention.output.dense,"[768, 768]",589824,12605 64,bert.encoder.layer.10.intermediate.dense,"[3072, 768]",2359296,233883 65,bert.encoder.layer.10.output.dense,"[768, 3072]",2359296,130882 66,bert.encoder.layer.11.attention.self.query,"[768, 768]",589824,11427 67,bert.encoder.layer.11.attention.self.key,"[768, 768]",589824,14775 68,bert.encoder.layer.11.attention.self.value,"[768, 768]",589824,6865 69,bert.encoder.layer.11.attention.output.dense,"[768, 768]",589824,3223 70,bert.encoder.layer.11.intermediate.dense,"[3072, 768]",2359296,190784 71,bert.encoder.layer.11.output.dense,"[768, 3072]",2359296,71760