[{"layer": 1, "module": "self_attention.query_key_value", "avg_loss": "0.0456", "time": "1.2718"}, {"layer": 1, "module": "self_attention.dense", "avg_loss": "0.0000", "time": "1.0337"}, {"layer": 1, "module": "mlp.dense_h_to_4h", "avg_loss": "0.1571", "time": "1.0908"}, {"layer": 1, "module": "mlp.dense_4h_to_h", "avg_loss": "0.0001", "time": "3.7028"}, {"layer": 2, "module": "self_attention.query_key_value", "avg_loss": "0.1605", "time": "1.0389"}, {"layer": 2, "module": "self_attention.dense", "avg_loss": "0.0000", "time": "1.0316"}, {"layer": 2, "module": "mlp.dense_h_to_4h", "avg_loss": "0.2443", "time": "1.0796"}, {"layer": 2, "module": "mlp.dense_4h_to_h", "avg_loss": "0.0001", "time": "3.6819"}, {"layer": 3, "module": "self_attention.query_key_value", "avg_loss": "0.3786", "time": "1.0382"}, {"layer": 3, "module": "self_attention.dense", "avg_loss": "0.0000", "time": "1.0262"}, {"layer": 3, "module": "mlp.dense_h_to_4h", "avg_loss": "0.3879", "time": "1.0717"}, {"layer": 3, "module": "mlp.dense_4h_to_h", "avg_loss": "0.0002", "time": "3.6723"}, {"layer": 4, "module": "self_attention.query_key_value", "avg_loss": "0.6848", "time": "1.0302"}, {"layer": 4, "module": "self_attention.dense", "avg_loss": "0.0001", "time": "1.0267"}, {"layer": 4, "module": "mlp.dense_h_to_4h", "avg_loss": "0.7047", "time": "1.0677"}, {"layer": 4, "module": "mlp.dense_4h_to_h", "avg_loss": "0.0003", "time": "3.6295"}, {"layer": 5, "module": "self_attention.query_key_value", "avg_loss": "0.4612", "time": "1.0190"}, {"layer": 5, "module": "self_attention.dense", "avg_loss": "0.0001", "time": "1.0164"}, {"layer": 5, "module": "mlp.dense_h_to_4h", "avg_loss": "1.1225", "time": "1.0964"}, {"layer": 5, "module": "mlp.dense_4h_to_h", "avg_loss": "0.0019", "time": "3.6779"}, {"layer": 6, "module": "self_attention.query_key_value", "avg_loss": "0.7061", "time": "1.0459"}, {"layer": 6, "module": "self_attention.dense", "avg_loss": "0.0001", "time": "1.0392"}, {"layer": 6, "module": "mlp.dense_h_to_4h", "avg_loss": "1.3170", "time": "1.0728"}, {"layer": 6, "module": "mlp.dense_4h_to_h", "avg_loss": "0.0008", "time": "3.6625"}, {"layer": 7, "module": "self_attention.query_key_value", "avg_loss": "1.0826", "time": "1.0185"}, {"layer": 7, "module": "self_attention.dense", "avg_loss": "0.0002", "time": "1.0363"}, {"layer": 7, "module": "mlp.dense_h_to_4h", "avg_loss": "1.5331", "time": "1.0850"}, {"layer": 7, "module": "mlp.dense_4h_to_h", "avg_loss": "0.0010", "time": "3.6696"}, {"layer": 8, "module": "self_attention.query_key_value", "avg_loss": "0.8504", "time": "1.0187"}, {"layer": 8, "module": "self_attention.dense", "avg_loss": "0.0002", "time": "1.0309"}, {"layer": 8, "module": "mlp.dense_h_to_4h", "avg_loss": "1.6909", "time": "1.0833"}, {"layer": 8, "module": "mlp.dense_4h_to_h", "avg_loss": "0.0014", "time": "3.5659"}, {"layer": 9, "module": "self_attention.query_key_value", "avg_loss": "0.8786", "time": "1.0437"}, {"layer": 9, "module": "self_attention.dense", "avg_loss": "0.0004", "time": "1.0418"}, {"layer": 9, "module": "mlp.dense_h_to_4h", "avg_loss": "1.8117", "time": "1.0881"}, {"layer": 9, "module": "mlp.dense_4h_to_h", "avg_loss": "0.0017", "time": "3.6858"}, {"layer": 10, "module": "self_attention.query_key_value", "avg_loss": "1.1535", "time": "1.0385"}, {"layer": 10, "module": "self_attention.dense", "avg_loss": "0.0006", "time": "1.0380"}, {"layer": 10, "module": "mlp.dense_h_to_4h", "avg_loss": "2.1952", "time": "1.1213"}, {"layer": 10, "module": "mlp.dense_4h_to_h", "avg_loss": "0.0021", "time": "3.8092"}, {"layer": 11, "module": "self_attention.query_key_value", "avg_loss": "0.9847", "time": "1.0402"}, {"layer": 11, "module": "self_attention.dense", "avg_loss": "0.0005", "time": "1.0221"}, {"layer": 11, "module": "mlp.dense_h_to_4h", "avg_loss": "2.2434", "time": "1.0912"}, {"layer": 11, "module": "mlp.dense_4h_to_h", "avg_loss": "0.0021", "time": "3.8060"}, {"layer": 12, "module": "self_attention.query_key_value", "avg_loss": "1.0150", "time": "1.0659"}, {"layer": 12, "module": "self_attention.dense", "avg_loss": "0.0005", "time": "1.0092"}, {"layer": 12, "module": "mlp.dense_h_to_4h", "avg_loss": "2.3823", "time": "1.0648"}, {"layer": 12, "module": "mlp.dense_4h_to_h", "avg_loss": "0.0022", "time": "3.6541"}, {"layer": 13, "module": "self_attention.query_key_value", "avg_loss": "1.1467", "time": "1.0281"}, {"layer": 13, "module": "self_attention.dense", "avg_loss": "0.0008", "time": "1.0230"}, {"layer": 13, "module": "mlp.dense_h_to_4h", "avg_loss": "2.4821", "time": "1.0655"}, {"layer": 13, "module": "mlp.dense_4h_to_h", "avg_loss": "0.0025", "time": "3.6630"}, {"layer": 14, "module": "self_attention.query_key_value", "avg_loss": "1.1363", "time": "1.1818"}, {"layer": 14, "module": "self_attention.dense", "avg_loss": "0.0006", "time": "1.0310"}, {"layer": 14, "module": "mlp.dense_h_to_4h", "avg_loss": "2.5727", "time": "1.1022"}, {"layer": 14, "module": "mlp.dense_4h_to_h", "avg_loss": "0.0028", "time": "3.6890"}, {"layer": 15, "module": "self_attention.query_key_value", "avg_loss": "1.1485", "time": "1.0339"}, {"layer": 15, "module": "self_attention.dense", "avg_loss": "0.0009", "time": "1.0220"}, {"layer": 15, "module": "mlp.dense_h_to_4h", "avg_loss": "2.5301", "time": "1.2172"}, {"layer": 15, "module": "mlp.dense_4h_to_h", "avg_loss": "0.0033", "time": "3.6426"}, {"layer": 16, "module": "self_attention.query_key_value", "avg_loss": "1.2379", "time": "1.0446"}, {"layer": 16, "module": "self_attention.dense", "avg_loss": "0.0011", "time": "1.0271"}, {"layer": 16, "module": "mlp.dense_h_to_4h", "avg_loss": "2.6314", "time": "1.0678"}, {"layer": 16, "module": "mlp.dense_4h_to_h", "avg_loss": "0.0039", "time": "3.7884"}, {"layer": 17, "module": "self_attention.query_key_value", "avg_loss": "1.3248", "time": "1.0270"}, {"layer": 17, "module": "self_attention.dense", "avg_loss": "0.0010", "time": "1.0212"}, {"layer": 17, "module": "mlp.dense_h_to_4h", "avg_loss": "2.6900", "time": "1.0614"}, {"layer": 17, "module": "mlp.dense_4h_to_h", "avg_loss": "0.0047", "time": "3.6395"}, {"layer": 18, "module": "self_attention.query_key_value", "avg_loss": "1.1093", "time": "1.0437"}, {"layer": 18, "module": "self_attention.dense", "avg_loss": "0.0014", "time": "1.0274"}, {"layer": 18, "module": "mlp.dense_h_to_4h", "avg_loss": "2.8140", "time": "1.0665"}, {"layer": 18, "module": "mlp.dense_4h_to_h", "avg_loss": "0.0057", "time": "3.6337"}, {"layer": 19, "module": "self_attention.query_key_value", "avg_loss": "2.6255", "time": "1.1932"}, {"layer": 19, "module": "self_attention.dense", "avg_loss": "0.0021", "time": "1.0432"}, {"layer": 19, "module": "mlp.dense_h_to_4h", "avg_loss": "2.7111", "time": "1.1013"}, {"layer": 19, "module": "mlp.dense_4h_to_h", "avg_loss": "0.0064", "time": "3.7450"}, {"layer": 20, "module": "self_attention.query_key_value", "avg_loss": "1.3077", "time": "1.0106"}, {"layer": 20, "module": "self_attention.dense", "avg_loss": "0.0024", "time": "1.0282"}, {"layer": 20, "module": "mlp.dense_h_to_4h", "avg_loss": "2.9936", "time": "1.0913"}, {"layer": 20, "module": "mlp.dense_4h_to_h", "avg_loss": "0.0076", "time": "3.6862"}, {"layer": 21, "module": "self_attention.query_key_value", "avg_loss": "1.2520", "time": "1.0366"}, {"layer": 21, "module": "self_attention.dense", "avg_loss": "0.0020", "time": "1.1751"}, {"layer": 21, "module": "mlp.dense_h_to_4h", "avg_loss": "3.4393", "time": "1.0965"}, {"layer": 21, "module": "mlp.dense_4h_to_h", "avg_loss": "0.0105", "time": "3.7824"}, {"layer": 22, "module": "self_attention.query_key_value", "avg_loss": "1.2995", "time": "1.1574"}, {"layer": 22, "module": "self_attention.dense", "avg_loss": "0.0029", "time": "1.0305"}, {"layer": 22, "module": "mlp.dense_h_to_4h", "avg_loss": "4.0799", "time": "1.0939"}, {"layer": 22, "module": "mlp.dense_4h_to_h", "avg_loss": "0.0155", "time": "3.6276"}, {"layer": 23, "module": "self_attention.query_key_value", "avg_loss": "2.1874", "time": "1.0271"}, {"layer": 23, "module": "self_attention.dense", "avg_loss": "0.0043", "time": "1.0270"}, {"layer": 23, "module": "mlp.dense_h_to_4h", "avg_loss": "4.5313", "time": "1.2096"}, {"layer": 23, "module": "mlp.dense_4h_to_h", "avg_loss": "0.0254", "time": "3.6722"}, {"layer": 24, "module": "self_attention.query_key_value", "avg_loss": "1.6847", "time": "1.0284"}, {"layer": 24, "module": "self_attention.dense", "avg_loss": "0.0070", "time": "1.0388"}, {"layer": 24, "module": "mlp.dense_h_to_4h", "avg_loss": "5.6362", "time": "1.0774"}, {"layer": 24, "module": "mlp.dense_4h_to_h", "avg_loss": "0.0353", "time": "3.6687"}, {"layer": 25, "module": "self_attention.query_key_value", "avg_loss": "2.1510", "time": "1.0317"}, {"layer": 25, "module": "self_attention.dense", "avg_loss": "0.0060", "time": "1.0542"}, {"layer": 25, "module": "mlp.dense_h_to_4h", "avg_loss": "6.8955", "time": "1.0717"}, {"layer": 25, "module": "mlp.dense_4h_to_h", "avg_loss": "0.0572", "time": "3.6365"}, {"layer": 26, "module": "self_attention.query_key_value", "avg_loss": "1.9770", "time": "1.1554"}, {"layer": 26, "module": "self_attention.dense", "avg_loss": "0.0075", "time": "1.0439"}, {"layer": 26, "module": "mlp.dense_h_to_4h", "avg_loss": "8.2434", "time": "1.0673"}, {"layer": 26, "module": "mlp.dense_4h_to_h", "avg_loss": "0.0837", "time": "3.6594"}, {"layer": 27, "module": "self_attention.query_key_value", "avg_loss": "2.5353", "time": "1.0217"}, {"layer": 27, "module": "self_attention.dense", "avg_loss": "0.0122", "time": "1.0433"}, {"layer": 27, "module": "mlp.dense_h_to_4h", "avg_loss": "9.7311", "time": "1.0844"}, {"layer": 27, "module": "mlp.dense_4h_to_h", "avg_loss": "0.1003", "time": "3.6338"}, {"layer": 28, "module": "self_attention.query_key_value", "avg_loss": "2.3269", "time": "1.0255"}, {"layer": 28, "module": "self_attention.dense", "avg_loss": "0.0095", "time": "1.0178"}, {"layer": 28, "module": "mlp.dense_h_to_4h", "avg_loss": "10.3435", "time": "1.0926"}, {"layer": 28, "module": "mlp.dense_4h_to_h", "avg_loss": "0.1182", "time": "3.7859"}, {"layer": 29, "module": "self_attention.query_key_value", "avg_loss": "2.9694", "time": "1.0350"}, {"layer": 29, "module": "self_attention.dense", "avg_loss": "0.0092", "time": "1.0313"}, {"layer": 29, "module": "mlp.dense_h_to_4h", "avg_loss": "11.6591", "time": "1.0561"}, {"layer": 29, "module": "mlp.dense_4h_to_h", "avg_loss": "0.1558", "time": "3.6218"}, {"layer": 30, "module": "self_attention.query_key_value", "avg_loss": "3.8364", "time": "1.0528"}, {"layer": 30, "module": "self_attention.dense", "avg_loss": "0.0182", "time": "1.0120"}, {"layer": 30, "module": "mlp.dense_h_to_4h", "avg_loss": "13.0933", "time": "1.0543"}, {"layer": 30, "module": "mlp.dense_4h_to_h", "avg_loss": "0.1781", "time": "3.7370"}, {"layer": 31, "module": "self_attention.query_key_value", "avg_loss": "2.9439", "time": "1.0055"}, {"layer": 31, "module": "self_attention.dense", "avg_loss": "0.0099", "time": "1.0255"}, {"layer": 31, "module": "mlp.dense_h_to_4h", "avg_loss": "13.6603", "time": "1.0676"}, {"layer": 31, "module": "mlp.dense_4h_to_h", "avg_loss": "0.2035", "time": "3.6714"}, {"layer": 32, "module": "self_attention.query_key_value", "avg_loss": "3.2459", "time": "1.0326"}, {"layer": 32, "module": "self_attention.dense", "avg_loss": "0.0181", "time": "1.0310"}, {"layer": 32, "module": "mlp.dense_h_to_4h", "avg_loss": "14.5814", "time": "1.2273"}, {"layer": 32, "module": "mlp.dense_4h_to_h", "avg_loss": "0.2260", "time": "3.7970"}, {"layer": 33, "module": "self_attention.query_key_value", "avg_loss": "4.4187", "time": "1.0329"}, {"layer": 33, "module": "self_attention.dense", "avg_loss": "0.0247", "time": "1.0081"}, {"layer": 33, "module": "mlp.dense_h_to_4h", "avg_loss": "16.2320", "time": "1.0881"}, {"layer": 33, "module": "mlp.dense_4h_to_h", "avg_loss": "0.2459", "time": "3.6692"}, {"layer": 34, "module": "self_attention.query_key_value", "avg_loss": "5.4903", "time": "1.0327"}, {"layer": 34, "module": "self_attention.dense", "avg_loss": "0.0282", "time": "1.0419"}, {"layer": 34, "module": "mlp.dense_h_to_4h", "avg_loss": "17.7958", "time": "1.2209"}, {"layer": 34, "module": "mlp.dense_4h_to_h", "avg_loss": "0.2842", "time": "3.6758"}, {"layer": 35, "module": "self_attention.query_key_value", "avg_loss": "5.2416", "time": "1.0220"}, {"layer": 35, "module": "self_attention.dense", "avg_loss": "0.0234", "time": "1.0434"}, {"layer": 35, "module": "mlp.dense_h_to_4h", "avg_loss": "19.8502", "time": "1.0698"}, {"layer": 35, "module": "mlp.dense_4h_to_h", "avg_loss": "0.3882", "time": "3.6188"}, {"layer": 36, "module": "self_attention.query_key_value", "avg_loss": "5.6446", "time": "1.1030"}, {"layer": 36, "module": "self_attention.dense", "avg_loss": "0.0530", "time": "1.0372"}, {"layer": 36, "module": "mlp.dense_h_to_4h", "avg_loss": "23.7365", "time": "1.0678"}, {"layer": 36, "module": "mlp.dense_4h_to_h", "avg_loss": "0.5185", "time": "3.6438"}, {"layer": 37, "module": "self_attention.query_key_value", "avg_loss": "4.4728", "time": "1.0244"}, {"layer": 37, "module": "self_attention.dense", "avg_loss": "0.0551", "time": "1.0570"}, {"layer": 37, "module": "mlp.dense_h_to_4h", "avg_loss": "24.7346", "time": "1.0660"}, {"layer": 37, "module": "mlp.dense_4h_to_h", "avg_loss": "0.6423", "time": "3.6397"}, {"layer": 38, "module": "self_attention.query_key_value", "avg_loss": "4.3039", "time": "1.0206"}, {"layer": 38, "module": "self_attention.dense", "avg_loss": "0.0528", "time": "1.0793"}, {"layer": 38, "module": "mlp.dense_h_to_4h", "avg_loss": "26.6922", "time": "1.0740"}, {"layer": 38, "module": "mlp.dense_4h_to_h", "avg_loss": "0.8575", "time": "3.7764"}, {"layer": 39, "module": "self_attention.query_key_value", "avg_loss": "3.1431", "time": "1.0221"}, {"layer": 39, "module": "self_attention.dense", "avg_loss": "0.0245", "time": "1.1827"}, {"layer": 39, "module": "mlp.dense_h_to_4h", "avg_loss": "47.0566", "time": "1.0892"}, {"layer": 39, "module": "mlp.dense_4h_to_h", "avg_loss": "6.1427", "time": "3.7740"}, {"layer": 40, "module": "self_attention.query_key_value", "avg_loss": "3.5401", "time": "1.0457"}, {"layer": 40, "module": "self_attention.dense", "avg_loss": "0.0395", "time": "1.0141"}, {"layer": 40, "module": "mlp.dense_h_to_4h", "avg_loss": "66.8964", "time": "1.2261"}, {"layer": 40, "module": "mlp.dense_4h_to_h", "avg_loss": "4.1432", "time": "3.7793"}]