,linear_id,shape,param_count,nnz_count 0,bert.encoder.layer.0.attention.self.query,"[768, 768]",589824,55856 1,bert.encoder.layer.0.attention.self.key,"[768, 768]",589824,43957 2,bert.encoder.layer.0.attention.self.value,"[768, 768]",589824,89632 3,bert.encoder.layer.0.attention.output.dense,"[768, 768]",589824,110940 4,bert.encoder.layer.0.intermediate.dense,"[3072, 768]",2359296,1531180 5,bert.encoder.layer.0.output.dense,"[768, 3072]",2359296,1452615 6,bert.encoder.layer.1.attention.self.query,"[768, 768]",589824,115146 7,bert.encoder.layer.1.attention.self.key,"[768, 768]",589824,102899 8,bert.encoder.layer.1.attention.self.value,"[768, 768]",589824,116354 9,bert.encoder.layer.1.attention.output.dense,"[768, 768]",589824,126468 10,bert.encoder.layer.1.intermediate.dense,"[3072, 768]",2359296,1580627 11,bert.encoder.layer.1.output.dense,"[768, 3072]",2359296,1445362 12,bert.encoder.layer.2.attention.self.query,"[768, 768]",589824,140857 13,bert.encoder.layer.2.attention.self.key,"[768, 768]",589824,133268 14,bert.encoder.layer.2.attention.self.value,"[768, 768]",589824,141184 15,bert.encoder.layer.2.attention.output.dense,"[768, 768]",589824,140893 16,bert.encoder.layer.2.intermediate.dense,"[3072, 768]",2359296,1608609 17,bert.encoder.layer.2.output.dense,"[768, 3072]",2359296,1471410 18,bert.encoder.layer.3.attention.self.query,"[768, 768]",589824,150136 19,bert.encoder.layer.3.attention.self.key,"[768, 768]",589824,150781 20,bert.encoder.layer.3.attention.self.value,"[768, 768]",589824,180987 21,bert.encoder.layer.3.attention.output.dense,"[768, 768]",589824,175778 22,bert.encoder.layer.3.intermediate.dense,"[3072, 768]",2359296,1606543 23,bert.encoder.layer.3.output.dense,"[768, 3072]",2359296,1456896 24,bert.encoder.layer.4.attention.self.query,"[768, 768]",589824,147969 25,bert.encoder.layer.4.attention.self.key,"[768, 768]",589824,145851 26,bert.encoder.layer.4.attention.self.value,"[768, 768]",589824,191670 27,bert.encoder.layer.4.attention.output.dense,"[768, 768]",589824,190871 28,bert.encoder.layer.4.intermediate.dense,"[3072, 768]",2359296,1597771 29,bert.encoder.layer.4.output.dense,"[768, 3072]",2359296,1428619 30,bert.encoder.layer.5.attention.self.query,"[768, 768]",589824,131102 31,bert.encoder.layer.5.attention.self.key,"[768, 768]",589824,139321 32,bert.encoder.layer.5.attention.self.value,"[768, 768]",589824,192204 33,bert.encoder.layer.5.attention.output.dense,"[768, 768]",589824,185803 34,bert.encoder.layer.5.intermediate.dense,"[3072, 768]",2359296,1616886 35,bert.encoder.layer.5.output.dense,"[768, 3072]",2359296,1426761 36,bert.encoder.layer.6.attention.self.query,"[768, 768]",589824,132493 37,bert.encoder.layer.6.attention.self.key,"[768, 768]",589824,142601 38,bert.encoder.layer.6.attention.self.value,"[768, 768]",589824,193504 39,bert.encoder.layer.6.attention.output.dense,"[768, 768]",589824,179860 40,bert.encoder.layer.6.intermediate.dense,"[3072, 768]",2359296,1557741 41,bert.encoder.layer.6.output.dense,"[768, 3072]",2359296,1345610 42,bert.encoder.layer.7.attention.self.query,"[768, 768]",589824,98196 43,bert.encoder.layer.7.attention.self.key,"[768, 768]",589824,112797 44,bert.encoder.layer.7.attention.self.value,"[768, 768]",589824,166399 45,bert.encoder.layer.7.attention.output.dense,"[768, 768]",589824,150757 46,bert.encoder.layer.7.intermediate.dense,"[3072, 768]",2359296,1395257 47,bert.encoder.layer.7.output.dense,"[768, 3072]",2359296,1209690 48,bert.encoder.layer.8.attention.self.query,"[768, 768]",589824,119328 49,bert.encoder.layer.8.attention.self.key,"[768, 768]",589824,127164 50,bert.encoder.layer.8.attention.self.value,"[768, 768]",589824,189418 51,bert.encoder.layer.8.attention.output.dense,"[768, 768]",589824,161706 52,bert.encoder.layer.8.intermediate.dense,"[3072, 768]",2359296,1154515 53,bert.encoder.layer.8.output.dense,"[768, 3072]",2359296,974995 54,bert.encoder.layer.9.attention.self.query,"[768, 768]",589824,113893 55,bert.encoder.layer.9.attention.self.key,"[768, 768]",589824,114749 56,bert.encoder.layer.9.attention.self.value,"[768, 768]",589824,81969 57,bert.encoder.layer.9.attention.output.dense,"[768, 768]",589824,68461 58,bert.encoder.layer.9.intermediate.dense,"[3072, 768]",2359296,702337 59,bert.encoder.layer.9.output.dense,"[768, 3072]",2359296,576653 60,bert.encoder.layer.10.attention.self.query,"[768, 768]",589824,76831 61,bert.encoder.layer.10.attention.self.key,"[768, 768]",589824,78612 62,bert.encoder.layer.10.attention.self.value,"[768, 768]",589824,43956 63,bert.encoder.layer.10.attention.output.dense,"[768, 768]",589824,32761 64,bert.encoder.layer.10.intermediate.dense,"[3072, 768]",2359296,472830 65,bert.encoder.layer.10.output.dense,"[768, 3072]",2359296,346292 66,bert.encoder.layer.11.attention.self.query,"[768, 768]",589824,30540 67,bert.encoder.layer.11.attention.self.key,"[768, 768]",589824,36104 68,bert.encoder.layer.11.attention.self.value,"[768, 768]",589824,23388 69,bert.encoder.layer.11.attention.output.dense,"[768, 768]",589824,12039 70,bert.encoder.layer.11.intermediate.dense,"[3072, 768]",2359296,386672 71,bert.encoder.layer.11.output.dense,"[768, 3072]",2359296,167064