,layer_id,layer_type,param_type,shape,nparam,nnz,sparsity 0,bert.embeddings.word_embeddings,Embedding,weight,"[30522, 768]",23440896,23440896,0.0 1,bert.embeddings.position_embeddings,Embedding,weight,"[512, 768]",393216,393216,0.0 2,bert.embeddings.token_type_embeddings,Embedding,weight,"[2, 768]",1536,1536,0.0 3,bert.embeddings.LayerNorm,LayerNorm,weight,[768],768,768,0.0 4,bert.embeddings.LayerNorm,LayerNorm,bias,[768],768,768,0.0 5,bert.encoder.layer.0.attention.self.query,Linear,weight,"[768, 768]",589824,589824,0.0 6,bert.encoder.layer.0.attention.self.query,Linear,bias,[768],768,768,0.0 7,bert.encoder.layer.0.attention.self.key,Linear,weight,"[768, 768]",589824,589824,0.0 8,bert.encoder.layer.0.attention.self.key,Linear,bias,[768],768,768,0.0 9,bert.encoder.layer.0.attention.self.value,Linear,weight,"[768, 768]",589824,589824,0.0 10,bert.encoder.layer.0.attention.self.value,Linear,bias,[768],768,768,0.0 11,bert.encoder.layer.0.attention.output.dense,Linear,weight,"[768, 768]",589824,589824,0.0 12,bert.encoder.layer.0.attention.output.dense,Linear,bias,[768],768,768,0.0 13,bert.encoder.layer.0.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 14,bert.encoder.layer.0.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 15,bert.encoder.layer.0.intermediate.dense,Linear,weight,"[3072, 768]",2359296,2359296,0.0 16,bert.encoder.layer.0.intermediate.dense,Linear,bias,[3072],3072,3072,0.0 17,bert.encoder.layer.0.output.dense,Linear,weight,"[768, 3072]",2359296,2359296,0.0 18,bert.encoder.layer.0.output.dense,Linear,bias,[768],768,768,0.0 19,bert.encoder.layer.0.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 20,bert.encoder.layer.0.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 21,bert.encoder.layer.1.attention.self.query,Linear,weight,"[768, 768]",589824,589824,0.0 22,bert.encoder.layer.1.attention.self.query,Linear,bias,[768],768,768,0.0 23,bert.encoder.layer.1.attention.self.key,Linear,weight,"[768, 768]",589824,589824,0.0 24,bert.encoder.layer.1.attention.self.key,Linear,bias,[768],768,768,0.0 25,bert.encoder.layer.1.attention.self.value,Linear,weight,"[768, 768]",589824,589824,0.0 26,bert.encoder.layer.1.attention.self.value,Linear,bias,[768],768,768,0.0 27,bert.encoder.layer.1.attention.output.dense,Linear,weight,"[768, 768]",589824,589824,0.0 28,bert.encoder.layer.1.attention.output.dense,Linear,bias,[768],768,768,0.0 29,bert.encoder.layer.1.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 30,bert.encoder.layer.1.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 31,bert.encoder.layer.1.intermediate.dense,Linear,weight,"[3072, 768]",2359296,2359296,0.0 32,bert.encoder.layer.1.intermediate.dense,Linear,bias,[3072],3072,3072,0.0 33,bert.encoder.layer.1.output.dense,Linear,weight,"[768, 3072]",2359296,2359296,0.0 34,bert.encoder.layer.1.output.dense,Linear,bias,[768],768,768,0.0 35,bert.encoder.layer.1.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 36,bert.encoder.layer.1.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 37,bert.encoder.layer.2.attention.self.query,Linear,weight,"[768, 768]",589824,589824,0.0 38,bert.encoder.layer.2.attention.self.query,Linear,bias,[768],768,768,0.0 39,bert.encoder.layer.2.attention.self.key,Linear,weight,"[768, 768]",589824,589824,0.0 40,bert.encoder.layer.2.attention.self.key,Linear,bias,[768],768,768,0.0 41,bert.encoder.layer.2.attention.self.value,Linear,weight,"[768, 768]",589824,589824,0.0 42,bert.encoder.layer.2.attention.self.value,Linear,bias,[768],768,768,0.0 43,bert.encoder.layer.2.attention.output.dense,Linear,weight,"[768, 768]",589824,589824,0.0 44,bert.encoder.layer.2.attention.output.dense,Linear,bias,[768],768,768,0.0 45,bert.encoder.layer.2.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 46,bert.encoder.layer.2.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 47,bert.encoder.layer.2.intermediate.dense,Linear,weight,"[3072, 768]",2359296,2359296,0.0 48,bert.encoder.layer.2.intermediate.dense,Linear,bias,[3072],3072,3072,0.0 49,bert.encoder.layer.2.output.dense,Linear,weight,"[768, 3072]",2359296,2359296,0.0 50,bert.encoder.layer.2.output.dense,Linear,bias,[768],768,768,0.0 51,bert.encoder.layer.2.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 52,bert.encoder.layer.2.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 53,bert.encoder.layer.3.attention.self.query,Linear,weight,"[768, 768]",589824,589824,0.0 54,bert.encoder.layer.3.attention.self.query,Linear,bias,[768],768,768,0.0 55,bert.encoder.layer.3.attention.self.key,Linear,weight,"[768, 768]",589824,589824,0.0 56,bert.encoder.layer.3.attention.self.key,Linear,bias,[768],768,768,0.0 57,bert.encoder.layer.3.attention.self.value,Linear,weight,"[768, 768]",589824,589824,0.0 58,bert.encoder.layer.3.attention.self.value,Linear,bias,[768],768,768,0.0 59,bert.encoder.layer.3.attention.output.dense,Linear,weight,"[768, 768]",589824,589824,0.0 60,bert.encoder.layer.3.attention.output.dense,Linear,bias,[768],768,768,0.0 61,bert.encoder.layer.3.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 62,bert.encoder.layer.3.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 63,bert.encoder.layer.3.intermediate.dense,Linear,weight,"[3072, 768]",2359296,2359296,0.0 64,bert.encoder.layer.3.intermediate.dense,Linear,bias,[3072],3072,3072,0.0 65,bert.encoder.layer.3.output.dense,Linear,weight,"[768, 3072]",2359296,2359296,0.0 66,bert.encoder.layer.3.output.dense,Linear,bias,[768],768,768,0.0 67,bert.encoder.layer.3.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 68,bert.encoder.layer.3.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 69,bert.encoder.layer.4.attention.self.query,Linear,weight,"[768, 768]",589824,589824,0.0 70,bert.encoder.layer.4.attention.self.query,Linear,bias,[768],768,768,0.0 71,bert.encoder.layer.4.attention.self.key,Linear,weight,"[768, 768]",589824,589824,0.0 72,bert.encoder.layer.4.attention.self.key,Linear,bias,[768],768,768,0.0 73,bert.encoder.layer.4.attention.self.value,Linear,weight,"[768, 768]",589824,589824,0.0 74,bert.encoder.layer.4.attention.self.value,Linear,bias,[768],768,768,0.0 75,bert.encoder.layer.4.attention.output.dense,Linear,weight,"[768, 768]",589824,589824,0.0 76,bert.encoder.layer.4.attention.output.dense,Linear,bias,[768],768,768,0.0 77,bert.encoder.layer.4.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 78,bert.encoder.layer.4.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 79,bert.encoder.layer.4.intermediate.dense,Linear,weight,"[3072, 768]",2359296,2359296,0.0 80,bert.encoder.layer.4.intermediate.dense,Linear,bias,[3072],3072,3072,0.0 81,bert.encoder.layer.4.output.dense,Linear,weight,"[768, 3072]",2359296,2359296,0.0 82,bert.encoder.layer.4.output.dense,Linear,bias,[768],768,768,0.0 83,bert.encoder.layer.4.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 84,bert.encoder.layer.4.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 85,bert.encoder.layer.5.attention.self.query,Linear,weight,"[768, 768]",589824,589824,0.0 86,bert.encoder.layer.5.attention.self.query,Linear,bias,[768],768,768,0.0 87,bert.encoder.layer.5.attention.self.key,Linear,weight,"[768, 768]",589824,589824,0.0 88,bert.encoder.layer.5.attention.self.key,Linear,bias,[768],768,768,0.0 89,bert.encoder.layer.5.attention.self.value,Linear,weight,"[768, 768]",589824,589824,0.0 90,bert.encoder.layer.5.attention.self.value,Linear,bias,[768],768,768,0.0 91,bert.encoder.layer.5.attention.output.dense,Linear,weight,"[768, 768]",589824,589824,0.0 92,bert.encoder.layer.5.attention.output.dense,Linear,bias,[768],768,768,0.0 93,bert.encoder.layer.5.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 94,bert.encoder.layer.5.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 95,bert.encoder.layer.5.intermediate.dense,Linear,weight,"[3072, 768]",2359296,2359296,0.0 96,bert.encoder.layer.5.intermediate.dense,Linear,bias,[3072],3072,3072,0.0 97,bert.encoder.layer.5.output.dense,Linear,weight,"[768, 3072]",2359296,2359296,0.0 98,bert.encoder.layer.5.output.dense,Linear,bias,[768],768,768,0.0 99,bert.encoder.layer.5.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 100,bert.encoder.layer.5.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 101,bert.encoder.layer.6.attention.self.query,Linear,weight,"[768, 768]",589824,589824,0.0 102,bert.encoder.layer.6.attention.self.query,Linear,bias,[768],768,768,0.0 103,bert.encoder.layer.6.attention.self.key,Linear,weight,"[768, 768]",589824,589824,0.0 104,bert.encoder.layer.6.attention.self.key,Linear,bias,[768],768,768,0.0 105,bert.encoder.layer.6.attention.self.value,Linear,weight,"[768, 768]",589824,589824,0.0 106,bert.encoder.layer.6.attention.self.value,Linear,bias,[768],768,768,0.0 107,bert.encoder.layer.6.attention.output.dense,Linear,weight,"[768, 768]",589824,589824,0.0 108,bert.encoder.layer.6.attention.output.dense,Linear,bias,[768],768,768,0.0 109,bert.encoder.layer.6.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 110,bert.encoder.layer.6.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 111,bert.encoder.layer.6.intermediate.dense,Linear,weight,"[3072, 768]",2359296,2359296,0.0 112,bert.encoder.layer.6.intermediate.dense,Linear,bias,[3072],3072,3072,0.0 113,bert.encoder.layer.6.output.dense,Linear,weight,"[768, 3072]",2359296,2359296,0.0 114,bert.encoder.layer.6.output.dense,Linear,bias,[768],768,768,0.0 115,bert.encoder.layer.6.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 116,bert.encoder.layer.6.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 117,bert.encoder.layer.7.attention.self.query,Linear,weight,"[768, 768]",589824,589824,0.0 118,bert.encoder.layer.7.attention.self.query,Linear,bias,[768],768,768,0.0 119,bert.encoder.layer.7.attention.self.key,Linear,weight,"[768, 768]",589824,589824,0.0 120,bert.encoder.layer.7.attention.self.key,Linear,bias,[768],768,768,0.0 121,bert.encoder.layer.7.attention.self.value,Linear,weight,"[768, 768]",589824,589824,0.0 122,bert.encoder.layer.7.attention.self.value,Linear,bias,[768],768,768,0.0 123,bert.encoder.layer.7.attention.output.dense,Linear,weight,"[768, 768]",589824,589824,0.0 124,bert.encoder.layer.7.attention.output.dense,Linear,bias,[768],768,768,0.0 125,bert.encoder.layer.7.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 126,bert.encoder.layer.7.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 127,bert.encoder.layer.7.intermediate.dense,Linear,weight,"[3072, 768]",2359296,2359296,0.0 128,bert.encoder.layer.7.intermediate.dense,Linear,bias,[3072],3072,3072,0.0 129,bert.encoder.layer.7.output.dense,Linear,weight,"[768, 3072]",2359296,2359296,0.0 130,bert.encoder.layer.7.output.dense,Linear,bias,[768],768,768,0.0 131,bert.encoder.layer.7.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 132,bert.encoder.layer.7.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 133,bert.encoder.layer.8.attention.self.query,Linear,weight,"[768, 768]",589824,589824,0.0 134,bert.encoder.layer.8.attention.self.query,Linear,bias,[768],768,768,0.0 135,bert.encoder.layer.8.attention.self.key,Linear,weight,"[768, 768]",589824,589824,0.0 136,bert.encoder.layer.8.attention.self.key,Linear,bias,[768],768,768,0.0 137,bert.encoder.layer.8.attention.self.value,Linear,weight,"[768, 768]",589824,589824,0.0 138,bert.encoder.layer.8.attention.self.value,Linear,bias,[768],768,768,0.0 139,bert.encoder.layer.8.attention.output.dense,Linear,weight,"[768, 768]",589824,589824,0.0 140,bert.encoder.layer.8.attention.output.dense,Linear,bias,[768],768,768,0.0 141,bert.encoder.layer.8.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 142,bert.encoder.layer.8.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 143,bert.encoder.layer.8.intermediate.dense,Linear,weight,"[3072, 768]",2359296,2359296,0.0 144,bert.encoder.layer.8.intermediate.dense,Linear,bias,[3072],3072,3072,0.0 145,bert.encoder.layer.8.output.dense,Linear,weight,"[768, 3072]",2359296,2359296,0.0 146,bert.encoder.layer.8.output.dense,Linear,bias,[768],768,768,0.0 147,bert.encoder.layer.8.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 148,bert.encoder.layer.8.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 149,bert.encoder.layer.9.attention.self.query,Linear,weight,"[768, 768]",589824,589824,0.0 150,bert.encoder.layer.9.attention.self.query,Linear,bias,[768],768,768,0.0 151,bert.encoder.layer.9.attention.self.key,Linear,weight,"[768, 768]",589824,589824,0.0 152,bert.encoder.layer.9.attention.self.key,Linear,bias,[768],768,768,0.0 153,bert.encoder.layer.9.attention.self.value,Linear,weight,"[768, 768]",589824,589824,0.0 154,bert.encoder.layer.9.attention.self.value,Linear,bias,[768],768,768,0.0 155,bert.encoder.layer.9.attention.output.dense,Linear,weight,"[768, 768]",589824,589824,0.0 156,bert.encoder.layer.9.attention.output.dense,Linear,bias,[768],768,768,0.0 157,bert.encoder.layer.9.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 158,bert.encoder.layer.9.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 159,bert.encoder.layer.9.intermediate.dense,Linear,weight,"[3072, 768]",2359296,2359296,0.0 160,bert.encoder.layer.9.intermediate.dense,Linear,bias,[3072],3072,3072,0.0 161,bert.encoder.layer.9.output.dense,Linear,weight,"[768, 3072]",2359296,2359296,0.0 162,bert.encoder.layer.9.output.dense,Linear,bias,[768],768,768,0.0 163,bert.encoder.layer.9.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 164,bert.encoder.layer.9.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 165,bert.encoder.layer.10.attention.self.query,Linear,weight,"[768, 768]",589824,589824,0.0 166,bert.encoder.layer.10.attention.self.query,Linear,bias,[768],768,768,0.0 167,bert.encoder.layer.10.attention.self.key,Linear,weight,"[768, 768]",589824,589824,0.0 168,bert.encoder.layer.10.attention.self.key,Linear,bias,[768],768,768,0.0 169,bert.encoder.layer.10.attention.self.value,Linear,weight,"[768, 768]",589824,589824,0.0 170,bert.encoder.layer.10.attention.self.value,Linear,bias,[768],768,768,0.0 171,bert.encoder.layer.10.attention.output.dense,Linear,weight,"[768, 768]",589824,589824,0.0 172,bert.encoder.layer.10.attention.output.dense,Linear,bias,[768],768,768,0.0 173,bert.encoder.layer.10.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 174,bert.encoder.layer.10.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 175,bert.encoder.layer.10.intermediate.dense,Linear,weight,"[3072, 768]",2359296,2359296,0.0 176,bert.encoder.layer.10.intermediate.dense,Linear,bias,[3072],3072,3072,0.0 177,bert.encoder.layer.10.output.dense,Linear,weight,"[768, 3072]",2359296,2359296,0.0 178,bert.encoder.layer.10.output.dense,Linear,bias,[768],768,768,0.0 179,bert.encoder.layer.10.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 180,bert.encoder.layer.10.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 181,bert.encoder.layer.11.attention.self.query,Linear,weight,"[768, 768]",589824,589824,0.0 182,bert.encoder.layer.11.attention.self.query,Linear,bias,[768],768,768,0.0 183,bert.encoder.layer.11.attention.self.key,Linear,weight,"[768, 768]",589824,589824,0.0 184,bert.encoder.layer.11.attention.self.key,Linear,bias,[768],768,768,0.0 185,bert.encoder.layer.11.attention.self.value,Linear,weight,"[768, 768]",589824,589824,0.0 186,bert.encoder.layer.11.attention.self.value,Linear,bias,[768],768,768,0.0 187,bert.encoder.layer.11.attention.output.dense,Linear,weight,"[768, 768]",589824,589824,0.0 188,bert.encoder.layer.11.attention.output.dense,Linear,bias,[768],768,768,0.0 189,bert.encoder.layer.11.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 190,bert.encoder.layer.11.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 191,bert.encoder.layer.11.intermediate.dense,Linear,weight,"[3072, 768]",2359296,2359296,0.0 192,bert.encoder.layer.11.intermediate.dense,Linear,bias,[3072],3072,3072,0.0 193,bert.encoder.layer.11.output.dense,Linear,weight,"[768, 3072]",2359296,2359296,0.0 194,bert.encoder.layer.11.output.dense,Linear,bias,[768],768,768,0.0 195,bert.encoder.layer.11.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 196,bert.encoder.layer.11.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 197,qa_outputs,Linear,weight,"[2, 768]",1536,1536,0.0 198,qa_outputs,Linear,bias,[2],2,2,0.0