diff --git "a/last-checkpoint/trainer_state.json" "b/last-checkpoint/trainer_state.json" --- "a/last-checkpoint/trainer_state.json" +++ "b/last-checkpoint/trainer_state.json" @@ -1,18094 +1,1229 @@ { - "best_metric": 2.0400795936584473, - "best_model_checkpoint": "output/checkpoint-3000", - "epoch": 0.5, - "global_step": 3000, + "best_metric": 2.1868629455566406, + "best_model_checkpoint": "output/checkpoint-200", + "epoch": 0.1, + "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, - "learning_rate": 8.000000000000001e-06, - "loss": 5.1377, + "learning_rate": 4.000000000000001e-06, + "loss": 2.7847, "step": 1 }, { "epoch": 0.0, - "learning_rate": 1.6000000000000003e-05, - "loss": 5.1144, + "learning_rate": 8.000000000000001e-06, + "loss": 2.789, "step": 2 }, { "epoch": 0.0, - "learning_rate": 2.4e-05, - "loss": 5.2944, + "learning_rate": 1.2e-05, + "loss": 2.7653, "step": 3 }, { "epoch": 0.0, - "learning_rate": 3.2000000000000005e-05, - "loss": 5.277, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.7217, "step": 4 }, { "epoch": 0.0, - "learning_rate": 4e-05, - "loss": 4.7688, + "learning_rate": 2e-05, + "loss": 2.7964, "step": 5 }, { "epoch": 0.0, - "learning_rate": 4.8e-05, - "loss": 4.7068, + "learning_rate": 2.4e-05, + "loss": 2.7433, "step": 6 }, { "epoch": 0.0, - "learning_rate": 5.6000000000000006e-05, - "loss": 4.905, + "learning_rate": 2.8000000000000003e-05, + "loss": 2.6567, "step": 7 }, { "epoch": 0.0, - "learning_rate": 6.400000000000001e-05, - "loss": 4.6467, + "learning_rate": 3.2000000000000005e-05, + "loss": 2.7926, "step": 8 }, { "epoch": 0.0, - "learning_rate": 7.2e-05, - "loss": 4.4809, + "learning_rate": 3.6e-05, + "loss": 2.7431, "step": 9 }, { - "epoch": 0.0, - "learning_rate": 8e-05, - "loss": 4.4408, + "epoch": 0.01, + "learning_rate": 4e-05, + "loss": 2.7118, "step": 10 }, { - "epoch": 0.0, - "learning_rate": 8.800000000000001e-05, - "loss": 4.2903, + "epoch": 0.01, + "learning_rate": 4.4000000000000006e-05, + "loss": 2.7353, "step": 11 }, { - "epoch": 0.0, - "learning_rate": 9.6e-05, - "loss": 4.2546, + "epoch": 0.01, + "learning_rate": 4.8e-05, + "loss": 2.7365, "step": 12 }, { - "epoch": 0.0, - "learning_rate": 0.00010400000000000001, - "loss": 4.1372, + "epoch": 0.01, + "learning_rate": 5.2000000000000004e-05, + "loss": 2.6999, "step": 13 }, { - "epoch": 0.0, - "learning_rate": 0.00011200000000000001, - "loss": 4.1673, + "epoch": 0.01, + "learning_rate": 5.6000000000000006e-05, + "loss": 2.6944, "step": 14 }, { - "epoch": 0.0, - "learning_rate": 0.00012, - "loss": 3.8483, + "epoch": 0.01, + "learning_rate": 6e-05, + "loss": 2.6036, "step": 15 }, { - "epoch": 0.0, - "learning_rate": 0.00012800000000000002, - "loss": 3.7866, + "epoch": 0.01, + "learning_rate": 6.400000000000001e-05, + "loss": 2.6631, "step": 16 }, { - "epoch": 0.0, - "learning_rate": 0.00013600000000000003, - "loss": 3.7765, + "epoch": 0.01, + "learning_rate": 6.800000000000001e-05, + "loss": 2.6435, "step": 17 }, { - "epoch": 0.0, - "learning_rate": 0.000144, - "loss": 3.61, + "epoch": 0.01, + "learning_rate": 7.2e-05, + "loss": 2.6806, "step": 18 }, { - "epoch": 0.0, - "learning_rate": 0.000152, - "loss": 3.5922, + "epoch": 0.01, + "learning_rate": 7.6e-05, + "loss": 2.5884, "step": 19 }, { - "epoch": 0.0, - "learning_rate": 0.00016, - "loss": 3.5748, + "epoch": 0.01, + "learning_rate": 8e-05, + "loss": 2.6082, "step": 20 }, { - "epoch": 0.0, - "learning_rate": 0.000168, - "loss": 3.462, + "epoch": 0.01, + "learning_rate": 8.4e-05, + "loss": 2.5936, "step": 21 }, { - "epoch": 0.0, - "learning_rate": 0.00017600000000000002, - "loss": 3.5482, + "epoch": 0.01, + "learning_rate": 8.800000000000001e-05, + "loss": 2.5439, "step": 22 }, { - "epoch": 0.0, - "learning_rate": 0.00018400000000000003, - "loss": 3.3806, + "epoch": 0.01, + "learning_rate": 9.200000000000001e-05, + "loss": 2.5566, "step": 23 }, { - "epoch": 0.0, - "learning_rate": 0.000192, - "loss": 3.375, + "epoch": 0.01, + "learning_rate": 9.6e-05, + "loss": 2.5111, "step": 24 }, { - "epoch": 0.0, - "learning_rate": 0.0002, - "loss": 3.4461, + "epoch": 0.01, + "learning_rate": 0.0001, + "loss": 2.5215, "step": 25 }, { - "epoch": 0.0, - "learning_rate": 0.00020800000000000001, - "loss": 3.3302, + "epoch": 0.01, + "learning_rate": 0.00010400000000000001, + "loss": 2.4623, "step": 26 }, { - "epoch": 0.0, - "learning_rate": 0.00021600000000000002, - "loss": 3.2568, + "epoch": 0.01, + "learning_rate": 0.00010800000000000001, + "loss": 2.4546, "step": 27 }, { - "epoch": 0.0, - "learning_rate": 0.00022400000000000002, - "loss": 3.1755, + "epoch": 0.01, + "learning_rate": 0.00011200000000000001, + "loss": 2.5861, "step": 28 }, { - "epoch": 0.0, - "learning_rate": 0.000232, - "loss": 3.1303, + "epoch": 0.01, + "learning_rate": 0.000116, + "loss": 2.5458, "step": 29 }, { "epoch": 0.01, - "learning_rate": 0.00024, - "loss": 3.1867, + "learning_rate": 0.00012, + "loss": 2.473, "step": 30 }, { - "epoch": 0.01, - "learning_rate": 0.000248, - "loss": 3.086, + "epoch": 0.02, + "learning_rate": 0.000124, + "loss": 2.44, "step": 31 }, { - "epoch": 0.01, - "learning_rate": 0.00025600000000000004, - "loss": 3.0142, + "epoch": 0.02, + "learning_rate": 0.00012800000000000002, + "loss": 2.4321, "step": 32 }, { - "epoch": 0.01, - "learning_rate": 0.000264, - "loss": 2.9919, + "epoch": 0.02, + "learning_rate": 0.000132, + "loss": 2.4441, "step": 33 }, { - "epoch": 0.01, - "learning_rate": 0.00027200000000000005, - "loss": 2.9977, + "epoch": 0.02, + "learning_rate": 0.00013600000000000003, + "loss": 2.4543, "step": 34 }, { - "epoch": 0.01, - "learning_rate": 0.00028, - "loss": 3.0582, + "epoch": 0.02, + "learning_rate": 0.00014, + "loss": 2.4442, "step": 35 }, { - "epoch": 0.01, - "learning_rate": 0.000288, - "loss": 3.0244, + "epoch": 0.02, + "learning_rate": 0.000144, + "loss": 2.3461, "step": 36 }, { - "epoch": 0.01, - "learning_rate": 0.000296, - "loss": 3.0201, + "epoch": 0.02, + "learning_rate": 0.000148, + "loss": 2.2818, "step": 37 }, { - "epoch": 0.01, - "learning_rate": 0.000304, - "loss": 3.0081, + "epoch": 0.02, + "learning_rate": 0.000152, + "loss": 2.3907, "step": 38 }, { - "epoch": 0.01, - "learning_rate": 0.00031200000000000005, - "loss": 3.0072, + "epoch": 0.02, + "learning_rate": 0.00015600000000000002, + "loss": 2.3692, "step": 39 }, { - "epoch": 0.01, - "learning_rate": 0.00032, - "loss": 3.0163, + "epoch": 0.02, + "learning_rate": 0.00016, + "loss": 2.3582, "step": 40 }, { - "epoch": 0.01, - "learning_rate": 0.000328, - "loss": 2.9476, + "epoch": 0.02, + "learning_rate": 0.000164, + "loss": 2.3354, "step": 41 }, { - "epoch": 0.01, - "learning_rate": 0.000336, - "loss": 2.9289, + "epoch": 0.02, + "learning_rate": 0.000168, + "loss": 2.4426, "step": 42 }, { - "epoch": 0.01, - "learning_rate": 0.000344, - "loss": 2.8889, + "epoch": 0.02, + "learning_rate": 0.000172, + "loss": 2.2715, "step": 43 }, { - "epoch": 0.01, - "learning_rate": 0.00035200000000000005, - "loss": 2.9479, + "epoch": 0.02, + "learning_rate": 0.00017600000000000002, + "loss": 2.3155, "step": 44 }, { - "epoch": 0.01, - "learning_rate": 0.00036, - "loss": 2.913, + "epoch": 0.02, + "learning_rate": 0.00018, + "loss": 2.243, "step": 45 }, { - "epoch": 0.01, - "learning_rate": 0.00036800000000000005, - "loss": 2.8848, + "epoch": 0.02, + "learning_rate": 0.00018400000000000003, + "loss": 2.3097, "step": 46 }, { - "epoch": 0.01, - "learning_rate": 0.000376, - "loss": 2.9424, + "epoch": 0.02, + "learning_rate": 0.000188, + "loss": 2.2454, "step": 47 }, { - "epoch": 0.01, - "learning_rate": 0.000384, - "loss": 2.8967, + "epoch": 0.02, + "learning_rate": 0.000192, + "loss": 2.3081, "step": 48 }, { - "epoch": 0.01, - "learning_rate": 0.000392, - "loss": 2.8882, + "epoch": 0.02, + "learning_rate": 0.000196, + "loss": 2.3393, "step": 49 }, { - "epoch": 0.01, - "learning_rate": 0.0004, - "loss": 2.783, + "epoch": 0.03, + "learning_rate": 0.0002, + "loss": 2.3029, "step": 50 }, { - "epoch": 0.01, - "learning_rate": 0.00040800000000000005, - "loss": 2.9005, + "epoch": 0.03, + "learning_rate": 0.00020400000000000003, + "loss": 2.2424, "step": 51 }, { - "epoch": 0.01, - "learning_rate": 0.00041600000000000003, - "loss": 2.8624, + "epoch": 0.03, + "learning_rate": 0.00020800000000000001, + "loss": 2.2761, "step": 52 }, { - "epoch": 0.01, - "learning_rate": 0.00042400000000000006, - "loss": 2.8651, + "epoch": 0.03, + "learning_rate": 0.00021200000000000003, + "loss": 2.355, "step": 53 }, { - "epoch": 0.01, - "learning_rate": 0.00043200000000000004, - "loss": 2.9019, + "epoch": 0.03, + "learning_rate": 0.00021600000000000002, + "loss": 2.3057, "step": 54 }, { - "epoch": 0.01, - "learning_rate": 0.00044000000000000007, - "loss": 2.9397, + "epoch": 0.03, + "learning_rate": 0.00022000000000000003, + "loss": 2.3098, "step": 55 }, { - "epoch": 0.01, - "learning_rate": 0.00044800000000000005, - "loss": 2.9143, + "epoch": 0.03, + "learning_rate": 0.00022400000000000002, + "loss": 2.2867, "step": 56 }, { - "epoch": 0.01, - "learning_rate": 0.00045599999999999997, - "loss": 2.816, + "epoch": 0.03, + "learning_rate": 0.00022799999999999999, + "loss": 2.3558, "step": 57 }, { - "epoch": 0.01, - "learning_rate": 0.000464, - "loss": 2.7941, + "epoch": 0.03, + "learning_rate": 0.000232, + "loss": 2.3114, "step": 58 }, { - "epoch": 0.01, - "learning_rate": 0.000472, - "loss": 2.7889, + "epoch": 0.03, + "learning_rate": 0.000236, + "loss": 2.2818, "step": 59 }, { - "epoch": 0.01, - "learning_rate": 0.00048, - "loss": 2.816, + "epoch": 0.03, + "learning_rate": 0.00024, + "loss": 2.3194, "step": 60 }, { - "epoch": 0.01, - "learning_rate": 0.000488, - "loss": 2.7483, + "epoch": 0.03, + "learning_rate": 0.000244, + "loss": 2.2405, "step": 61 }, { - "epoch": 0.01, - "learning_rate": 0.000496, - "loss": 2.8676, + "epoch": 0.03, + "learning_rate": 0.000248, + "loss": 2.31, "step": 62 }, { - "epoch": 0.01, - "learning_rate": 0.000504, - "loss": 2.7342, + "epoch": 0.03, + "learning_rate": 0.000252, + "loss": 2.3165, "step": 63 }, { - "epoch": 0.01, - "learning_rate": 0.0005120000000000001, - "loss": 2.8743, + "epoch": 0.03, + "learning_rate": 0.00025600000000000004, + "loss": 2.3839, "step": 64 }, { - "epoch": 0.01, - "learning_rate": 0.0005200000000000001, - "loss": 2.7635, + "epoch": 0.03, + "learning_rate": 0.00026000000000000003, + "loss": 2.3172, "step": 65 }, { - "epoch": 0.01, - "learning_rate": 0.000528, - "loss": 2.8303, + "epoch": 0.03, + "learning_rate": 0.000264, + "loss": 2.2911, "step": 66 }, { - "epoch": 0.01, - "learning_rate": 0.000536, - "loss": 2.8751, + "epoch": 0.03, + "learning_rate": 0.000268, + "loss": 2.2946, "step": 67 }, { - "epoch": 0.01, - "learning_rate": 0.0005440000000000001, - "loss": 2.7702, + "epoch": 0.03, + "learning_rate": 0.00027200000000000005, + "loss": 2.3343, "step": 68 }, { - "epoch": 0.01, - "learning_rate": 0.000552, - "loss": 2.7697, + "epoch": 0.03, + "learning_rate": 0.000276, + "loss": 2.3077, "step": 69 }, { - "epoch": 0.01, - "learning_rate": 0.00056, - "loss": 2.7656, + "epoch": 0.04, + "learning_rate": 0.00028, + "loss": 2.3141, "step": 70 }, { - "epoch": 0.01, - "learning_rate": 0.000568, - "loss": 2.6676, + "epoch": 0.04, + "learning_rate": 0.000284, + "loss": 2.3114, "step": 71 }, { - "epoch": 0.01, - "learning_rate": 0.000576, - "loss": 2.7541, - "step": 72 + "epoch": 0.04, + "learning_rate": 0.000288, + "loss": 2.2935, + "step": 72 }, { - "epoch": 0.01, - "learning_rate": 0.000584, - "loss": 2.7413, + "epoch": 0.04, + "learning_rate": 0.000292, + "loss": 2.375, "step": 73 }, { - "epoch": 0.01, - "learning_rate": 0.000592, - "loss": 2.655, + "epoch": 0.04, + "learning_rate": 0.000296, + "loss": 2.3633, "step": 74 }, { - "epoch": 0.01, - "learning_rate": 0.0006000000000000001, - "loss": 2.7992, + "epoch": 0.04, + "learning_rate": 0.00030000000000000003, + "loss": 2.2862, "step": 75 }, { - "epoch": 0.01, - "learning_rate": 0.000608, - "loss": 2.7969, + "epoch": 0.04, + "learning_rate": 0.000304, + "loss": 2.2846, "step": 76 }, { - "epoch": 0.01, - "learning_rate": 0.000616, - "loss": 2.8561, + "epoch": 0.04, + "learning_rate": 0.000308, + "loss": 2.2553, "step": 77 }, { - "epoch": 0.01, - "learning_rate": 0.0006240000000000001, - "loss": 2.7182, + "epoch": 0.04, + "learning_rate": 0.00031200000000000005, + "loss": 2.2767, "step": 78 }, { - "epoch": 0.01, - "learning_rate": 0.0006320000000000001, - "loss": 2.7121, + "epoch": 0.04, + "learning_rate": 0.00031600000000000004, + "loss": 2.3474, "step": 79 }, { - "epoch": 0.01, - "learning_rate": 0.00064, - "loss": 2.7505, + "epoch": 0.04, + "learning_rate": 0.00032, + "loss": 2.3817, "step": 80 }, { - "epoch": 0.01, - "learning_rate": 0.000648, - "loss": 2.7664, + "epoch": 0.04, + "learning_rate": 0.000324, + "loss": 2.345, "step": 81 }, { - "epoch": 0.01, - "learning_rate": 0.000656, - "loss": 2.7699, + "epoch": 0.04, + "learning_rate": 0.000328, + "loss": 2.3573, "step": 82 }, { - "epoch": 0.01, - "learning_rate": 0.000664, - "loss": 2.687, + "epoch": 0.04, + "learning_rate": 0.000332, + "loss": 2.3012, "step": 83 }, { - "epoch": 0.01, - "learning_rate": 0.000672, - "loss": 2.6854, + "epoch": 0.04, + "learning_rate": 0.000336, + "loss": 2.2964, "step": 84 }, { - "epoch": 0.01, - "learning_rate": 0.00068, - "loss": 2.7284, + "epoch": 0.04, + "learning_rate": 0.00034, + "loss": 2.3411, "step": 85 }, { - "epoch": 0.01, - "learning_rate": 0.000688, - "loss": 2.7535, + "epoch": 0.04, + "learning_rate": 0.000344, + "loss": 2.2456, "step": 86 }, { - "epoch": 0.01, - "learning_rate": 0.000696, - "loss": 2.7291, + "epoch": 0.04, + "learning_rate": 0.000348, + "loss": 2.2561, "step": 87 }, { - "epoch": 0.01, - "learning_rate": 0.0007040000000000001, - "loss": 2.6795, + "epoch": 0.04, + "learning_rate": 0.00035200000000000005, + "loss": 2.2142, "step": 88 }, { - "epoch": 0.01, - "learning_rate": 0.0007120000000000001, - "loss": 2.6847, + "epoch": 0.04, + "learning_rate": 0.00035600000000000003, + "loss": 2.2198, "step": 89 }, { - "epoch": 0.01, - "learning_rate": 0.00072, - "loss": 2.8037, + "epoch": 0.04, + "learning_rate": 0.00036, + "loss": 2.2613, "step": 90 }, { - "epoch": 0.02, - "learning_rate": 0.000728, - "loss": 2.7104, + "epoch": 0.05, + "learning_rate": 0.000364, + "loss": 2.3107, "step": 91 }, { - "epoch": 0.02, - "learning_rate": 0.0007360000000000001, - "loss": 2.7022, + "epoch": 0.05, + "learning_rate": 0.00036800000000000005, + "loss": 2.2593, "step": 92 }, { - "epoch": 0.02, - "learning_rate": 0.0007440000000000001, - "loss": 2.6876, + "epoch": 0.05, + "learning_rate": 0.00037200000000000004, + "loss": 2.2547, "step": 93 }, { - "epoch": 0.02, - "learning_rate": 0.000752, - "loss": 2.7657, + "epoch": 0.05, + "learning_rate": 0.000376, + "loss": 2.3041, "step": 94 }, { - "epoch": 0.02, - "learning_rate": 0.00076, - "loss": 2.6784, + "epoch": 0.05, + "learning_rate": 0.00038, + "loss": 2.3346, "step": 95 }, { - "epoch": 0.02, - "learning_rate": 0.000768, - "loss": 2.7005, + "epoch": 0.05, + "learning_rate": 0.000384, + "loss": 2.338, "step": 96 }, { - "epoch": 0.02, - "learning_rate": 0.000776, - "loss": 2.6969, + "epoch": 0.05, + "learning_rate": 0.000388, + "loss": 2.2237, "step": 97 }, { - "epoch": 0.02, - "learning_rate": 0.000784, - "loss": 2.727, + "epoch": 0.05, + "learning_rate": 0.000392, + "loss": 2.2255, "step": 98 }, { - "epoch": 0.02, - "learning_rate": 0.0007920000000000001, - "loss": 2.6475, + "epoch": 0.05, + "learning_rate": 0.00039600000000000003, + "loss": 2.2983, "step": 99 }, { - "epoch": 0.02, - "learning_rate": 0.0008, - "loss": 2.7626, + "epoch": 0.05, + "learning_rate": 0.0004, + "loss": 2.2828, "step": 100 }, { - "epoch": 0.02, - "learning_rate": 0.000799864406779661, - "loss": 2.7642, + "epoch": 0.05, + "learning_rate": 0.000404, + "loss": 2.3934, "step": 101 }, { - "epoch": 0.02, - "learning_rate": 0.0007997288135593221, - "loss": 2.6881, + "epoch": 0.05, + "learning_rate": 0.00040800000000000005, + "loss": 2.3819, "step": 102 }, { - "epoch": 0.02, - "learning_rate": 0.0007995932203389831, - "loss": 2.722, + "epoch": 0.05, + "learning_rate": 0.00041200000000000004, + "loss": 2.3157, "step": 103 }, { - "epoch": 0.02, - "learning_rate": 0.0007994576271186441, - "loss": 2.761, + "epoch": 0.05, + "learning_rate": 0.00041600000000000003, + "loss": 2.2675, "step": 104 }, { - "epoch": 0.02, - "learning_rate": 0.000799322033898305, - "loss": 2.6431, + "epoch": 0.05, + "learning_rate": 0.00042, + "loss": 2.2912, "step": 105 }, { - "epoch": 0.02, - "learning_rate": 0.0007991864406779661, - "loss": 2.6869, + "epoch": 0.05, + "learning_rate": 0.00042400000000000006, + "loss": 2.2761, "step": 106 }, { - "epoch": 0.02, - "learning_rate": 0.0007990508474576271, - "loss": 2.6963, + "epoch": 0.05, + "learning_rate": 0.00042800000000000005, + "loss": 2.3368, "step": 107 }, { - "epoch": 0.02, - "learning_rate": 0.0007989152542372882, - "loss": 2.8242, + "epoch": 0.05, + "learning_rate": 0.00043200000000000004, + "loss": 2.2968, "step": 108 }, { - "epoch": 0.02, - "learning_rate": 0.0007987796610169492, - "loss": 2.6418, + "epoch": 0.05, + "learning_rate": 0.000436, + "loss": 2.3208, "step": 109 }, { - "epoch": 0.02, - "learning_rate": 0.0007986440677966103, - "loss": 2.7597, + "epoch": 0.06, + "learning_rate": 0.00044000000000000007, + "loss": 2.2038, "step": 110 }, { - "epoch": 0.02, - "learning_rate": 0.0007985084745762711, - "loss": 2.7569, + "epoch": 0.06, + "learning_rate": 0.00044400000000000006, + "loss": 2.3589, "step": 111 }, { - "epoch": 0.02, - "learning_rate": 0.0007983728813559322, - "loss": 2.7329, + "epoch": 0.06, + "learning_rate": 0.00044800000000000005, + "loss": 2.3036, "step": 112 }, { - "epoch": 0.02, - "learning_rate": 0.0007982372881355932, - "loss": 2.6758, + "epoch": 0.06, + "learning_rate": 0.000452, + "loss": 2.2948, "step": 113 }, { - "epoch": 0.02, - "learning_rate": 0.0007981016949152543, - "loss": 2.7202, + "epoch": 0.06, + "learning_rate": 0.00045599999999999997, + "loss": 2.1675, "step": 114 }, { - "epoch": 0.02, - "learning_rate": 0.0007979661016949153, - "loss": 2.6658, + "epoch": 0.06, + "learning_rate": 0.00045999999999999996, + "loss": 2.3433, "step": 115 }, { - "epoch": 0.02, - "learning_rate": 0.0007978305084745764, - "loss": 2.6372, + "epoch": 0.06, + "learning_rate": 0.000464, + "loss": 2.2702, "step": 116 }, { - "epoch": 0.02, - "learning_rate": 0.0007976949152542374, - "loss": 2.665, + "epoch": 0.06, + "learning_rate": 0.000468, + "loss": 2.3075, "step": 117 }, { - "epoch": 0.02, - "learning_rate": 0.0007975593220338983, - "loss": 2.717, + "epoch": 0.06, + "learning_rate": 0.000472, + "loss": 2.1956, "step": 118 }, { - "epoch": 0.02, - "learning_rate": 0.0007974237288135593, - "loss": 2.8033, + "epoch": 0.06, + "learning_rate": 0.000476, + "loss": 2.234, "step": 119 }, { - "epoch": 0.02, - "learning_rate": 0.0007972881355932204, - "loss": 2.6871, + "epoch": 0.06, + "learning_rate": 0.00048, + "loss": 2.2742, "step": 120 }, { - "epoch": 0.02, - "learning_rate": 0.0007971525423728814, - "loss": 2.7634, + "epoch": 0.06, + "learning_rate": 0.000484, + "loss": 2.3237, "step": 121 }, { - "epoch": 0.02, - "learning_rate": 0.0007970169491525424, - "loss": 2.6665, + "epoch": 0.06, + "learning_rate": 0.000488, + "loss": 2.3122, "step": 122 }, { - "epoch": 0.02, - "learning_rate": 0.0007968813559322035, - "loss": 2.6286, + "epoch": 0.06, + "learning_rate": 0.000492, + "loss": 2.2578, "step": 123 }, { - "epoch": 0.02, - "learning_rate": 0.0007967457627118644, - "loss": 2.7076, + "epoch": 0.06, + "learning_rate": 0.000496, + "loss": 2.3176, "step": 124 }, { - "epoch": 0.02, - "learning_rate": 0.0007966101694915254, - "loss": 2.6828, + "epoch": 0.06, + "learning_rate": 0.0005, + "loss": 2.2668, "step": 125 }, { - "epoch": 0.02, - "learning_rate": 0.0007964745762711864, - "loss": 2.6324, + "epoch": 0.06, + "learning_rate": 0.000504, + "loss": 2.3111, "step": 126 }, { - "epoch": 0.02, - "learning_rate": 0.0007963389830508475, - "loss": 2.6649, + "epoch": 0.06, + "learning_rate": 0.000508, + "loss": 2.2754, "step": 127 }, { - "epoch": 0.02, - "learning_rate": 0.0007962033898305085, - "loss": 2.692, + "epoch": 0.06, + "learning_rate": 0.0005120000000000001, + "loss": 2.2973, "step": 128 }, { - "epoch": 0.02, - "learning_rate": 0.0007960677966101696, - "loss": 2.6541, + "epoch": 0.06, + "learning_rate": 0.0005160000000000001, + "loss": 2.3103, "step": 129 }, { - "epoch": 0.02, - "learning_rate": 0.0007959322033898305, - "loss": 2.6768, + "epoch": 0.07, + "learning_rate": 0.0005200000000000001, + "loss": 2.2599, "step": 130 }, { - "epoch": 0.02, - "learning_rate": 0.0007957966101694916, - "loss": 2.7182, + "epoch": 0.07, + "learning_rate": 0.000524, + "loss": 2.2084, "step": 131 }, { - "epoch": 0.02, - "learning_rate": 0.0007956610169491526, - "loss": 2.7264, + "epoch": 0.07, + "learning_rate": 0.000528, + "loss": 2.3057, "step": 132 }, { - "epoch": 0.02, - "learning_rate": 0.0007955254237288136, - "loss": 2.6101, + "epoch": 0.07, + "learning_rate": 0.000532, + "loss": 2.2949, "step": 133 }, { - "epoch": 0.02, - "learning_rate": 0.0007953898305084746, - "loss": 2.7315, + "epoch": 0.07, + "learning_rate": 0.000536, + "loss": 2.3372, "step": 134 }, { - "epoch": 0.02, - "learning_rate": 0.0007952542372881357, - "loss": 2.7184, + "epoch": 0.07, + "learning_rate": 0.0005400000000000001, + "loss": 2.3106, "step": 135 }, { - "epoch": 0.02, - "learning_rate": 0.0007951186440677966, - "loss": 2.7155, + "epoch": 0.07, + "learning_rate": 0.0005440000000000001, + "loss": 2.3088, "step": 136 }, { - "epoch": 0.02, - "learning_rate": 0.0007949830508474577, - "loss": 2.6894, + "epoch": 0.07, + "learning_rate": 0.0005480000000000001, + "loss": 2.4012, "step": 137 }, { - "epoch": 0.02, - "learning_rate": 0.0007948474576271187, - "loss": 2.6394, + "epoch": 0.07, + "learning_rate": 0.000552, + "loss": 2.2033, "step": 138 }, { - "epoch": 0.02, - "learning_rate": 0.0007947118644067797, - "loss": 2.6448, + "epoch": 0.07, + "learning_rate": 0.000556, + "loss": 2.2326, "step": 139 }, { - "epoch": 0.02, - "learning_rate": 0.0007945762711864407, - "loss": 2.6805, + "epoch": 0.07, + "learning_rate": 0.00056, + "loss": 2.2218, "step": 140 }, { - "epoch": 0.02, - "learning_rate": 0.0007944406779661018, - "loss": 2.6213, + "epoch": 0.07, + "learning_rate": 0.000564, + "loss": 2.2605, "step": 141 }, { - "epoch": 0.02, - "learning_rate": 0.0007943050847457627, - "loss": 2.6665, + "epoch": 0.07, + "learning_rate": 0.000568, + "loss": 2.2576, "step": 142 }, { - "epoch": 0.02, - "learning_rate": 0.0007941694915254237, - "loss": 2.5491, + "epoch": 0.07, + "learning_rate": 0.000572, + "loss": 2.2998, "step": 143 }, { - "epoch": 0.02, - "learning_rate": 0.0007940338983050848, - "loss": 2.7293, + "epoch": 0.07, + "learning_rate": 0.000576, + "loss": 2.3869, "step": 144 }, { - "epoch": 0.02, - "learning_rate": 0.0007938983050847458, - "loss": 2.6627, + "epoch": 0.07, + "learning_rate": 0.00058, + "loss": 2.2979, "step": 145 }, { - "epoch": 0.02, - "learning_rate": 0.0007937627118644069, - "loss": 2.6717, + "epoch": 0.07, + "learning_rate": 0.000584, + "loss": 2.2765, "step": 146 }, { - "epoch": 0.02, - "learning_rate": 0.0007936271186440678, - "loss": 2.6122, + "epoch": 0.07, + "learning_rate": 0.000588, + "loss": 2.3446, "step": 147 }, { - "epoch": 0.02, - "learning_rate": 0.0007934915254237288, - "loss": 2.6305, + "epoch": 0.07, + "learning_rate": 0.000592, + "loss": 2.2205, "step": 148 }, { - "epoch": 0.02, - "learning_rate": 0.0007933559322033898, - "loss": 2.6266, + "epoch": 0.07, + "learning_rate": 0.0005960000000000001, + "loss": 2.3164, "step": 149 }, { - "epoch": 0.03, - "learning_rate": 0.0007932203389830509, - "loss": 2.6781, + "epoch": 0.07, + "learning_rate": 0.0006000000000000001, + "loss": 2.2707, "step": 150 }, { - "epoch": 0.03, - "learning_rate": 0.0007930847457627119, - "loss": 2.6851, + "epoch": 0.08, + "learning_rate": 0.000604, + "loss": 2.3147, "step": 151 }, { - "epoch": 0.03, - "learning_rate": 0.000792949152542373, - "loss": 2.6379, + "epoch": 0.08, + "learning_rate": 0.000608, + "loss": 2.2527, "step": 152 }, { - "epoch": 0.03, - "learning_rate": 0.000792813559322034, - "loss": 2.6714, + "epoch": 0.08, + "learning_rate": 0.000612, + "loss": 2.27, "step": 153 }, { - "epoch": 0.03, - "learning_rate": 0.0007926779661016949, - "loss": 2.7392, + "epoch": 0.08, + "learning_rate": 0.000616, + "loss": 2.2256, "step": 154 }, { - "epoch": 0.03, - "learning_rate": 0.0007925423728813559, - "loss": 2.6727, + "epoch": 0.08, + "learning_rate": 0.00062, + "loss": 2.2865, "step": 155 }, { - "epoch": 0.03, - "learning_rate": 0.000792406779661017, - "loss": 2.7512, + "epoch": 0.08, + "learning_rate": 0.0006240000000000001, + "loss": 2.2749, "step": 156 }, { - "epoch": 0.03, - "learning_rate": 0.000792271186440678, - "loss": 2.731, + "epoch": 0.08, + "learning_rate": 0.0006280000000000001, + "loss": 2.3338, "step": 157 }, { - "epoch": 0.03, - "learning_rate": 0.0007921355932203391, - "loss": 2.7441, + "epoch": 0.08, + "learning_rate": 0.0006320000000000001, + "loss": 2.3202, "step": 158 }, { - "epoch": 0.03, - "learning_rate": 0.0007920000000000001, - "loss": 2.638, + "epoch": 0.08, + "learning_rate": 0.0006360000000000001, + "loss": 2.271, "step": 159 }, { - "epoch": 0.03, - "learning_rate": 0.000791864406779661, - "loss": 2.6419, + "epoch": 0.08, + "learning_rate": 0.00064, + "loss": 2.2443, "step": 160 }, { - "epoch": 0.03, - "learning_rate": 0.000791728813559322, - "loss": 2.582, + "epoch": 0.08, + "learning_rate": 0.000644, + "loss": 2.2537, "step": 161 }, { - "epoch": 0.03, - "learning_rate": 0.0007915932203389831, - "loss": 2.724, + "epoch": 0.08, + "learning_rate": 0.000648, + "loss": 2.2206, "step": 162 }, { - "epoch": 0.03, - "learning_rate": 0.0007914576271186441, - "loss": 2.7238, + "epoch": 0.08, + "learning_rate": 0.000652, + "loss": 2.2687, "step": 163 }, { - "epoch": 0.03, - "learning_rate": 0.0007913220338983051, - "loss": 2.7242, + "epoch": 0.08, + "learning_rate": 0.000656, + "loss": 2.2745, "step": 164 }, { - "epoch": 0.03, - "learning_rate": 0.0007911864406779662, - "loss": 2.6581, + "epoch": 0.08, + "learning_rate": 0.00066, + "loss": 2.283, "step": 165 }, { - "epoch": 0.03, - "learning_rate": 0.0007910508474576271, - "loss": 2.6638, + "epoch": 0.08, + "learning_rate": 0.000664, + "loss": 2.2333, "step": 166 }, { - "epoch": 0.03, - "learning_rate": 0.0007909152542372882, - "loss": 2.6857, + "epoch": 0.08, + "learning_rate": 0.000668, + "loss": 2.2706, "step": 167 }, { - "epoch": 0.03, - "learning_rate": 0.0007907796610169492, - "loss": 2.6805, + "epoch": 0.08, + "learning_rate": 0.000672, + "loss": 2.2628, "step": 168 }, { - "epoch": 0.03, - "learning_rate": 0.0007906440677966102, - "loss": 2.5892, + "epoch": 0.08, + "learning_rate": 0.0006760000000000001, + "loss": 2.3193, "step": 169 }, { - "epoch": 0.03, - "learning_rate": 0.0007905084745762712, - "loss": 2.6294, + "epoch": 0.09, + "learning_rate": 0.00068, + "loss": 2.318, "step": 170 }, { - "epoch": 0.03, - "learning_rate": 0.0007903728813559323, - "loss": 2.7016, + "epoch": 0.09, + "learning_rate": 0.000684, + "loss": 2.2414, "step": 171 }, { - "epoch": 0.03, - "learning_rate": 0.0007902372881355932, - "loss": 2.6009, + "epoch": 0.09, + "learning_rate": 0.000688, + "loss": 2.3725, "step": 172 }, { - "epoch": 0.03, - "learning_rate": 0.0007901016949152543, - "loss": 2.6274, + "epoch": 0.09, + "learning_rate": 0.000692, + "loss": 2.3124, "step": 173 }, { - "epoch": 0.03, - "learning_rate": 0.0007899661016949153, - "loss": 2.6633, + "epoch": 0.09, + "learning_rate": 0.000696, + "loss": 2.2374, "step": 174 }, { - "epoch": 0.03, - "learning_rate": 0.0007898305084745763, - "loss": 2.6486, + "epoch": 0.09, + "learning_rate": 0.0007, + "loss": 2.2959, "step": 175 }, { - "epoch": 0.03, - "learning_rate": 0.0007896949152542373, - "loss": 2.5538, + "epoch": 0.09, + "learning_rate": 0.0007040000000000001, + "loss": 2.2835, "step": 176 }, { - "epoch": 0.03, - "learning_rate": 0.0007895593220338984, - "loss": 2.6583, + "epoch": 0.09, + "learning_rate": 0.0007080000000000001, + "loss": 2.3138, "step": 177 }, { - "epoch": 0.03, - "learning_rate": 0.0007894237288135593, - "loss": 2.6365, + "epoch": 0.09, + "learning_rate": 0.0007120000000000001, + "loss": 2.3125, "step": 178 }, { - "epoch": 0.03, - "learning_rate": 0.0007892881355932204, - "loss": 2.6217, + "epoch": 0.09, + "learning_rate": 0.0007160000000000001, + "loss": 2.2884, "step": 179 }, { - "epoch": 0.03, - "learning_rate": 0.0007891525423728814, - "loss": 2.6431, + "epoch": 0.09, + "learning_rate": 0.00072, + "loss": 2.3444, "step": 180 }, { - "epoch": 0.03, - "learning_rate": 0.0007890169491525425, - "loss": 2.6859, + "epoch": 0.09, + "learning_rate": 0.000724, + "loss": 2.3208, "step": 181 }, { - "epoch": 0.03, - "learning_rate": 0.0007888813559322034, - "loss": 2.5771, + "epoch": 0.09, + "learning_rate": 0.000728, + "loss": 2.2983, "step": 182 }, { - "epoch": 0.03, - "learning_rate": 0.0007887457627118645, - "loss": 2.6483, + "epoch": 0.09, + "learning_rate": 0.000732, + "loss": 2.2733, "step": 183 }, { - "epoch": 0.03, - "learning_rate": 0.0007886101694915254, - "loss": 2.6999, + "epoch": 0.09, + "learning_rate": 0.0007360000000000001, + "loss": 2.3119, "step": 184 }, { - "epoch": 0.03, - "learning_rate": 0.0007884745762711864, - "loss": 2.7057, + "epoch": 0.09, + "learning_rate": 0.0007400000000000001, + "loss": 2.3803, "step": 185 }, { - "epoch": 0.03, - "learning_rate": 0.0007883389830508475, - "loss": 2.6446, + "epoch": 0.09, + "learning_rate": 0.0007440000000000001, + "loss": 2.2776, "step": 186 }, { - "epoch": 0.03, - "learning_rate": 0.0007882033898305085, - "loss": 2.7161, + "epoch": 0.09, + "learning_rate": 0.0007480000000000001, + "loss": 2.241, "step": 187 }, { - "epoch": 0.03, - "learning_rate": 0.0007880677966101696, - "loss": 2.6047, + "epoch": 0.09, + "learning_rate": 0.000752, + "loss": 2.2678, "step": 188 }, { - "epoch": 0.03, - "learning_rate": 0.0007879322033898306, - "loss": 2.6412, + "epoch": 0.09, + "learning_rate": 0.0007559999999999999, + "loss": 2.2228, "step": 189 }, { - "epoch": 0.03, - "learning_rate": 0.0007877966101694915, - "loss": 2.631, + "epoch": 0.1, + "learning_rate": 0.00076, + "loss": 2.3228, "step": 190 }, { - "epoch": 0.03, - "learning_rate": 0.0007876610169491525, - "loss": 2.6807, + "epoch": 0.1, + "learning_rate": 0.000764, + "loss": 2.2546, "step": 191 }, { - "epoch": 0.03, - "learning_rate": 0.0007875254237288136, - "loss": 2.6596, + "epoch": 0.1, + "learning_rate": 0.000768, + "loss": 2.3122, "step": 192 }, { - "epoch": 0.03, - "learning_rate": 0.0007873898305084746, - "loss": 2.6051, + "epoch": 0.1, + "learning_rate": 0.000772, + "loss": 2.3318, "step": 193 }, { - "epoch": 0.03, - "learning_rate": 0.0007872542372881357, - "loss": 2.6938, + "epoch": 0.1, + "learning_rate": 0.000776, + "loss": 2.369, "step": 194 }, { - "epoch": 0.03, - "learning_rate": 0.0007871186440677967, - "loss": 2.7125, + "epoch": 0.1, + "learning_rate": 0.00078, + "loss": 2.2685, "step": 195 }, { - "epoch": 0.03, - "learning_rate": 0.0007869830508474576, - "loss": 2.664, + "epoch": 0.1, + "learning_rate": 0.000784, + "loss": 2.275, "step": 196 }, { - "epoch": 0.03, - "learning_rate": 0.0007868474576271186, - "loss": 2.6355, + "epoch": 0.1, + "learning_rate": 0.0007880000000000001, + "loss": 2.2667, "step": 197 }, { - "epoch": 0.03, - "learning_rate": 0.0007867118644067797, - "loss": 2.6469, + "epoch": 0.1, + "learning_rate": 0.0007920000000000001, + "loss": 2.2844, "step": 198 }, { - "epoch": 0.03, - "learning_rate": 0.0007865762711864407, - "loss": 2.6451, + "epoch": 0.1, + "learning_rate": 0.000796, + "loss": 2.3124, "step": 199 }, { - "epoch": 0.03, - "learning_rate": 0.0007864406779661018, - "loss": 2.6577, + "epoch": 0.1, + "learning_rate": 0.0008, + "loss": 2.3202, "step": 200 }, { - "epoch": 0.03, - "learning_rate": 0.0007863050847457628, - "loss": 2.6677, - "step": 201 - }, - { - "epoch": 0.03, - "learning_rate": 0.0007861694915254239, - "loss": 2.6208, - "step": 202 - }, - { - "epoch": 0.03, - "learning_rate": 0.0007860338983050848, - "loss": 2.5922, - "step": 203 - }, - { - "epoch": 0.03, - "learning_rate": 0.0007858983050847457, - "loss": 2.633, - "step": 204 - }, - { - "epoch": 0.03, - "learning_rate": 0.0007857627118644068, - "loss": 2.6587, - "step": 205 - }, - { - "epoch": 0.03, - "learning_rate": 0.0007856271186440678, - "loss": 2.6327, - "step": 206 - }, - { - "epoch": 0.03, - "learning_rate": 0.0007854915254237289, - "loss": 2.6015, - "step": 207 - }, - { - "epoch": 0.03, - "learning_rate": 0.0007853559322033898, - "loss": 2.6667, - "step": 208 - }, - { - "epoch": 0.03, - "learning_rate": 0.0007852203389830509, - "loss": 2.6262, - "step": 209 - }, - { - "epoch": 0.04, - "learning_rate": 0.0007850847457627119, - "loss": 2.7435, - "step": 210 - }, - { - "epoch": 0.04, - "learning_rate": 0.0007849491525423729, - "loss": 2.4698, - "step": 211 - }, - { - "epoch": 0.04, - "learning_rate": 0.0007848135593220339, - "loss": 2.5554, - "step": 212 - }, - { - "epoch": 0.04, - "learning_rate": 0.000784677966101695, - "loss": 2.5718, - "step": 213 - }, - { - "epoch": 0.04, - "learning_rate": 0.0007845423728813559, - "loss": 2.5544, - "step": 214 - }, - { - "epoch": 0.04, - "learning_rate": 0.000784406779661017, - "loss": 2.6508, - "step": 215 - }, - { - "epoch": 0.04, - "learning_rate": 0.000784271186440678, - "loss": 2.613, - "step": 216 - }, - { - "epoch": 0.04, - "learning_rate": 0.0007841355932203391, - "loss": 2.5898, - "step": 217 - }, - { - "epoch": 0.04, - "learning_rate": 0.000784, - "loss": 2.5876, - "step": 218 - }, - { - "epoch": 0.04, - "learning_rate": 0.0007838644067796611, - "loss": 2.5573, - "step": 219 - }, - { - "epoch": 0.04, - "learning_rate": 0.000783728813559322, - "loss": 2.624, - "step": 220 - }, - { - "epoch": 0.04, - "learning_rate": 0.0007835932203389831, - "loss": 2.5814, - "step": 221 - }, - { - "epoch": 0.04, - "learning_rate": 0.0007834576271186441, - "loss": 2.6656, - "step": 222 - }, - { - "epoch": 0.04, - "learning_rate": 0.0007833220338983052, - "loss": 2.6862, - "step": 223 - }, - { - "epoch": 0.04, - "learning_rate": 0.0007831864406779662, - "loss": 2.8027, - "step": 224 - }, - { - "epoch": 0.04, - "learning_rate": 0.0007830508474576272, - "loss": 2.8105, - "step": 225 - }, - { - "epoch": 0.04, - "learning_rate": 0.0007829152542372881, - "loss": 2.7149, - "step": 226 - }, - { - "epoch": 0.04, - "learning_rate": 0.0007827796610169491, - "loss": 2.8449, - "step": 227 - }, - { - "epoch": 0.04, - "learning_rate": 0.0007826440677966102, - "loss": 2.9684, - "step": 228 - }, - { - "epoch": 0.04, - "learning_rate": 0.0007825084745762712, - "loss": 2.9016, - "step": 229 - }, - { - "epoch": 0.04, - "learning_rate": 0.0007823728813559323, - "loss": 2.8576, - "step": 230 - }, - { - "epoch": 0.04, - "learning_rate": 0.0007822372881355933, - "loss": 2.9106, - "step": 231 - }, - { - "epoch": 0.04, - "learning_rate": 0.0007821016949152542, - "loss": 2.826, - "step": 232 - }, - { - "epoch": 0.04, - "learning_rate": 0.0007819661016949152, - "loss": 2.9042, - "step": 233 - }, - { - "epoch": 0.04, - "learning_rate": 0.0007818305084745763, - "loss": 2.8738, - "step": 234 - }, - { - "epoch": 0.04, - "learning_rate": 0.0007816949152542373, - "loss": 2.9855, - "step": 235 - }, - { - "epoch": 0.04, - "learning_rate": 0.0007815593220338984, - "loss": 2.9692, - "step": 236 - }, - { - "epoch": 0.04, - "learning_rate": 0.0007814237288135594, - "loss": 3.199, - "step": 237 - }, - { - "epoch": 0.04, - "learning_rate": 0.0007812881355932205, - "loss": 3.0884, - "step": 238 - }, - { - "epoch": 0.04, - "learning_rate": 0.0007811525423728813, - "loss": 3.042, - "step": 239 - }, - { - "epoch": 0.04, - "learning_rate": 0.0007810169491525424, - "loss": 3.1018, - "step": 240 - }, - { - "epoch": 0.04, - "learning_rate": 0.0007808813559322034, - "loss": 3.0346, - "step": 241 - }, - { - "epoch": 0.04, - "learning_rate": 0.0007807457627118645, - "loss": 3.0977, - "step": 242 - }, - { - "epoch": 0.04, - "learning_rate": 0.0007806101694915255, - "loss": 3.115, - "step": 243 - }, - { - "epoch": 0.04, - "learning_rate": 0.0007804745762711866, - "loss": 3.2372, - "step": 244 - }, - { - "epoch": 0.04, - "learning_rate": 0.0007803389830508475, - "loss": 3.2689, - "step": 245 - }, - { - "epoch": 0.04, - "learning_rate": 0.0007802033898305085, - "loss": 3.1267, - "step": 246 - }, - { - "epoch": 0.04, - "learning_rate": 0.0007800677966101695, - "loss": 3.0774, - "step": 247 - }, - { - "epoch": 0.04, - "learning_rate": 0.0007799322033898305, - "loss": 3.0283, - "step": 248 - }, - { - "epoch": 0.04, - "learning_rate": 0.0007797966101694916, - "loss": 3.0163, - "step": 249 - }, - { - "epoch": 0.04, - "learning_rate": 0.0007796610169491525, - "loss": 2.9164, - "step": 250 - }, - { - "epoch": 0.04, - "learning_rate": 0.0007795254237288136, - "loss": 2.8825, - "step": 251 - }, - { - "epoch": 0.04, - "learning_rate": 0.0007793898305084746, - "loss": 2.9542, - "step": 252 - }, - { - "epoch": 0.04, - "learning_rate": 0.0007792542372881356, - "loss": 2.9018, - "step": 253 - }, - { - "epoch": 0.04, - "learning_rate": 0.0007791186440677966, - "loss": 2.9929, - "step": 254 - }, - { - "epoch": 0.04, - "learning_rate": 0.0007789830508474577, - "loss": 3.0326, - "step": 255 - }, - { - "epoch": 0.04, - "learning_rate": 0.0007788474576271186, - "loss": 2.9234, - "step": 256 - }, - { - "epoch": 0.04, - "learning_rate": 0.0007787118644067797, - "loss": 3.0114, - "step": 257 - }, - { - "epoch": 0.04, - "learning_rate": 0.0007785762711864407, - "loss": 3.0775, - "step": 258 - }, - { - "epoch": 0.04, - "learning_rate": 0.0007784406779661018, - "loss": 2.9171, - "step": 259 - }, - { - "epoch": 0.04, - "learning_rate": 0.0007783050847457628, - "loss": 3.0288, - "step": 260 - }, - { - "epoch": 0.04, - "learning_rate": 0.0007781694915254238, - "loss": 2.9062, - "step": 261 - }, - { - "epoch": 0.04, - "learning_rate": 0.0007780338983050847, - "loss": 3.092, - "step": 262 - }, - { - "epoch": 0.04, - "learning_rate": 0.0007778983050847458, - "loss": 2.9368, - "step": 263 - }, - { - "epoch": 0.04, - "learning_rate": 0.0007777627118644068, - "loss": 2.9572, - "step": 264 - }, - { - "epoch": 0.04, - "learning_rate": 0.0007776271186440679, - "loss": 2.936, - "step": 265 - }, - { - "epoch": 0.04, - "learning_rate": 0.0007774915254237289, - "loss": 2.9461, - "step": 266 - }, - { - "epoch": 0.04, - "learning_rate": 0.0007773559322033899, - "loss": 3.0886, - "step": 267 - }, - { - "epoch": 0.04, - "learning_rate": 0.0007772203389830508, - "loss": 2.8941, - "step": 268 - }, - { - "epoch": 0.04, - "learning_rate": 0.0007770847457627118, - "loss": 2.8142, - "step": 269 - }, - { - "epoch": 0.04, - "learning_rate": 0.0007769491525423729, - "loss": 2.777, - "step": 270 - }, - { - "epoch": 0.05, - "learning_rate": 0.0007768135593220339, - "loss": 2.8028, - "step": 271 - }, - { - "epoch": 0.05, - "learning_rate": 0.000776677966101695, - "loss": 2.6842, - "step": 272 - }, - { - "epoch": 0.05, - "learning_rate": 0.000776542372881356, - "loss": 2.7688, - "step": 273 - }, - { - "epoch": 0.05, - "learning_rate": 0.0007764067796610171, - "loss": 2.6817, - "step": 274 - }, - { - "epoch": 0.05, - "learning_rate": 0.0007762711864406779, - "loss": 2.6677, - "step": 275 - }, - { - "epoch": 0.05, - "learning_rate": 0.000776135593220339, - "loss": 2.6759, - "step": 276 - }, - { - "epoch": 0.05, - "learning_rate": 0.000776, - "loss": 2.7113, - "step": 277 - }, - { - "epoch": 0.05, - "learning_rate": 0.0007758644067796611, - "loss": 2.5817, - "step": 278 - }, - { - "epoch": 0.05, - "learning_rate": 0.0007757288135593221, - "loss": 2.6208, - "step": 279 - }, - { - "epoch": 0.05, - "learning_rate": 0.0007755932203389832, - "loss": 2.6325, - "step": 280 - }, - { - "epoch": 0.05, - "learning_rate": 0.0007754576271186441, - "loss": 2.7245, - "step": 281 - }, - { - "epoch": 0.05, - "learning_rate": 0.0007753220338983051, - "loss": 2.7362, - "step": 282 - }, - { - "epoch": 0.05, - "learning_rate": 0.0007751864406779661, - "loss": 2.5645, - "step": 283 - }, - { - "epoch": 0.05, - "learning_rate": 0.0007750508474576272, - "loss": 2.6167, - "step": 284 - }, - { - "epoch": 0.05, - "learning_rate": 0.0007749152542372882, - "loss": 2.6517, - "step": 285 - }, - { - "epoch": 0.05, - "learning_rate": 0.0007747796610169493, - "loss": 2.6042, - "step": 286 - }, - { - "epoch": 0.05, - "learning_rate": 0.0007746440677966102, - "loss": 2.652, - "step": 287 - }, - { - "epoch": 0.05, - "learning_rate": 0.0007745084745762712, - "loss": 2.619, - "step": 288 - }, - { - "epoch": 0.05, - "learning_rate": 0.0007743728813559322, - "loss": 2.5819, - "step": 289 - }, - { - "epoch": 0.05, - "learning_rate": 0.0007742372881355932, - "loss": 2.6991, - "step": 290 - }, - { - "epoch": 0.05, - "learning_rate": 0.0007741016949152543, - "loss": 2.6854, - "step": 291 - }, - { - "epoch": 0.05, - "learning_rate": 0.0007739661016949152, - "loss": 2.6002, - "step": 292 - }, - { - "epoch": 0.05, - "learning_rate": 0.0007738305084745763, - "loss": 2.6408, - "step": 293 - }, - { - "epoch": 0.05, - "learning_rate": 0.0007736949152542373, - "loss": 2.6261, - "step": 294 - }, - { - "epoch": 0.05, - "learning_rate": 0.0007735593220338984, - "loss": 2.5894, - "step": 295 - }, - { - "epoch": 0.05, - "learning_rate": 0.0007734237288135594, - "loss": 2.6171, - "step": 296 - }, - { - "epoch": 0.05, - "learning_rate": 0.0007732881355932204, - "loss": 2.6116, - "step": 297 - }, - { - "epoch": 0.05, - "learning_rate": 0.0007731525423728813, - "loss": 2.6656, - "step": 298 - }, - { - "epoch": 0.05, - "learning_rate": 0.0007730169491525424, - "loss": 2.6228, - "step": 299 - }, - { - "epoch": 0.05, - "learning_rate": 0.0007728813559322034, - "loss": 2.608, - "step": 300 - }, - { - "epoch": 0.05, - "learning_rate": 0.0007727457627118645, - "loss": 2.5309, - "step": 301 - }, - { - "epoch": 0.05, - "learning_rate": 0.0007726101694915255, - "loss": 2.6226, - "step": 302 - }, - { - "epoch": 0.05, - "learning_rate": 0.0007724745762711865, - "loss": 2.5953, - "step": 303 - }, - { - "epoch": 0.05, - "learning_rate": 0.0007723389830508474, - "loss": 2.6025, - "step": 304 - }, - { - "epoch": 0.05, - "learning_rate": 0.0007722033898305085, - "loss": 2.5493, - "step": 305 - }, - { - "epoch": 0.05, - "learning_rate": 0.0007720677966101695, - "loss": 2.6022, - "step": 306 - }, - { - "epoch": 0.05, - "learning_rate": 0.0007719322033898306, - "loss": 2.5399, - "step": 307 - }, - { - "epoch": 0.05, - "learning_rate": 0.0007717966101694916, - "loss": 2.5497, - "step": 308 - }, - { - "epoch": 0.05, - "learning_rate": 0.0007716610169491526, - "loss": 2.6287, - "step": 309 - }, - { - "epoch": 0.05, - "learning_rate": 0.0007715254237288135, - "loss": 2.6215, - "step": 310 - }, - { - "epoch": 0.05, - "learning_rate": 0.0007713898305084745, - "loss": 2.6294, - "step": 311 - }, - { - "epoch": 0.05, - "learning_rate": 0.0007712542372881356, - "loss": 2.6212, - "step": 312 - }, - { - "epoch": 0.05, - "learning_rate": 0.0007711186440677966, - "loss": 2.5755, - "step": 313 - }, - { - "epoch": 0.05, - "learning_rate": 0.0007709830508474577, - "loss": 2.5396, - "step": 314 - }, - { - "epoch": 0.05, - "learning_rate": 0.0007708474576271187, - "loss": 2.642, - "step": 315 - }, - { - "epoch": 0.05, - "learning_rate": 0.0007707118644067798, - "loss": 2.5289, - "step": 316 - }, - { - "epoch": 0.05, - "learning_rate": 0.0007705762711864407, - "loss": 2.5519, - "step": 317 - }, - { - "epoch": 0.05, - "learning_rate": 0.0007704406779661017, - "loss": 2.586, - "step": 318 - }, - { - "epoch": 0.05, - "learning_rate": 0.0007703050847457627, - "loss": 2.594, - "step": 319 - }, - { - "epoch": 0.05, - "learning_rate": 0.0007701694915254238, - "loss": 2.5997, - "step": 320 - }, - { - "epoch": 0.05, - "learning_rate": 0.0007700338983050848, - "loss": 2.5599, - "step": 321 - }, - { - "epoch": 0.05, - "learning_rate": 0.0007698983050847459, - "loss": 2.603, - "step": 322 - }, - { - "epoch": 0.05, - "learning_rate": 0.0007697627118644068, - "loss": 2.5505, - "step": 323 - }, - { - "epoch": 0.05, - "learning_rate": 0.0007696271186440678, - "loss": 2.5184, - "step": 324 - }, - { - "epoch": 0.05, - "learning_rate": 0.0007694915254237288, - "loss": 2.6093, - "step": 325 - }, - { - "epoch": 0.05, - "learning_rate": 0.0007693559322033899, - "loss": 2.6121, - "step": 326 - }, - { - "epoch": 0.05, - "learning_rate": 0.0007692203389830509, - "loss": 2.587, - "step": 327 - }, - { - "epoch": 0.05, - "learning_rate": 0.000769084745762712, - "loss": 2.555, - "step": 328 - }, - { - "epoch": 0.05, - "learning_rate": 0.0007689491525423729, - "loss": 2.5838, - "step": 329 - }, - { - "epoch": 0.06, - "learning_rate": 0.0007688135593220339, - "loss": 2.4801, - "step": 330 - }, - { - "epoch": 0.06, - "learning_rate": 0.000768677966101695, - "loss": 2.6102, - "step": 331 - }, - { - "epoch": 0.06, - "learning_rate": 0.0007685423728813559, - "loss": 2.5922, - "step": 332 - }, - { - "epoch": 0.06, - "learning_rate": 0.000768406779661017, - "loss": 2.5824, - "step": 333 - }, - { - "epoch": 0.06, - "learning_rate": 0.000768271186440678, - "loss": 2.6137, - "step": 334 - }, - { - "epoch": 0.06, - "learning_rate": 0.000768135593220339, - "loss": 2.5511, - "step": 335 - }, - { - "epoch": 0.06, - "learning_rate": 0.000768, - "loss": 2.5406, - "step": 336 - }, - { - "epoch": 0.06, - "learning_rate": 0.0007678644067796611, - "loss": 2.5357, - "step": 337 - }, - { - "epoch": 0.06, - "learning_rate": 0.0007677288135593221, - "loss": 2.4895, - "step": 338 - }, - { - "epoch": 0.06, - "learning_rate": 0.0007675932203389831, - "loss": 2.6371, - "step": 339 - }, - { - "epoch": 0.06, - "learning_rate": 0.000767457627118644, - "loss": 2.5476, - "step": 340 - }, - { - "epoch": 0.06, - "learning_rate": 0.0007673220338983051, - "loss": 2.5362, - "step": 341 - }, - { - "epoch": 0.06, - "learning_rate": 0.0007671864406779661, - "loss": 2.5852, - "step": 342 - }, - { - "epoch": 0.06, - "learning_rate": 0.0007670508474576272, - "loss": 2.5826, - "step": 343 - }, - { - "epoch": 0.06, - "learning_rate": 0.0007669152542372882, - "loss": 2.4969, - "step": 344 - }, - { - "epoch": 0.06, - "learning_rate": 0.0007667796610169493, - "loss": 2.5199, - "step": 345 - }, - { - "epoch": 0.06, - "learning_rate": 0.0007666440677966102, - "loss": 2.5435, - "step": 346 - }, - { - "epoch": 0.06, - "learning_rate": 0.0007665084745762712, - "loss": 2.5403, - "step": 347 - }, - { - "epoch": 0.06, - "learning_rate": 0.0007663728813559322, - "loss": 2.6631, - "step": 348 - }, - { - "epoch": 0.06, - "learning_rate": 0.0007662372881355933, - "loss": 2.6509, - "step": 349 - }, - { - "epoch": 0.06, - "learning_rate": 0.0007661016949152543, - "loss": 2.5466, - "step": 350 - }, - { - "epoch": 0.06, - "learning_rate": 0.0007659661016949153, - "loss": 2.6346, - "step": 351 - }, - { - "epoch": 0.06, - "learning_rate": 0.0007658305084745764, - "loss": 2.6195, - "step": 352 - }, - { - "epoch": 0.06, - "learning_rate": 0.0007656949152542373, - "loss": 2.5258, - "step": 353 - }, - { - "epoch": 0.06, - "learning_rate": 0.0007655593220338983, - "loss": 2.562, - "step": 354 - }, - { - "epoch": 0.06, - "learning_rate": 0.0007654237288135593, - "loss": 2.5812, - "step": 355 - }, - { - "epoch": 0.06, - "learning_rate": 0.0007652881355932204, - "loss": 2.5627, - "step": 356 - }, - { - "epoch": 0.06, - "learning_rate": 0.0007651525423728814, - "loss": 2.5146, - "step": 357 - }, - { - "epoch": 0.06, - "learning_rate": 0.0007650169491525425, - "loss": 2.518, - "step": 358 - }, - { - "epoch": 0.06, - "learning_rate": 0.0007648813559322034, - "loss": 2.5651, - "step": 359 - }, - { - "epoch": 0.06, - "learning_rate": 0.0007647457627118644, - "loss": 2.5486, - "step": 360 - }, - { - "epoch": 0.06, - "learning_rate": 0.0007646101694915254, - "loss": 2.5635, - "step": 361 - }, - { - "epoch": 0.06, - "learning_rate": 0.0007644745762711865, - "loss": 2.5231, - "step": 362 - }, - { - "epoch": 0.06, - "learning_rate": 0.0007643389830508475, - "loss": 2.5416, - "step": 363 - }, - { - "epoch": 0.06, - "learning_rate": 0.0007642033898305086, - "loss": 2.4828, - "step": 364 - }, - { - "epoch": 0.06, - "learning_rate": 0.0007640677966101695, - "loss": 2.5094, - "step": 365 - }, - { - "epoch": 0.06, - "learning_rate": 0.0007639322033898306, - "loss": 2.5963, - "step": 366 - }, - { - "epoch": 0.06, - "learning_rate": 0.0007637966101694915, - "loss": 2.5987, - "step": 367 - }, - { - "epoch": 0.06, - "learning_rate": 0.0007636610169491526, - "loss": 2.4665, - "step": 368 - }, - { - "epoch": 0.06, - "learning_rate": 0.0007635254237288136, - "loss": 2.5982, - "step": 369 - }, - { - "epoch": 0.06, - "learning_rate": 0.0007633898305084747, - "loss": 2.4934, - "step": 370 - }, - { - "epoch": 0.06, - "learning_rate": 0.0007632542372881356, - "loss": 2.5585, - "step": 371 - }, - { - "epoch": 0.06, - "learning_rate": 0.0007631186440677966, - "loss": 2.595, - "step": 372 - }, - { - "epoch": 0.06, - "learning_rate": 0.0007629830508474577, - "loss": 2.5229, - "step": 373 - }, - { - "epoch": 0.06, - "learning_rate": 0.0007628474576271187, - "loss": 2.5127, - "step": 374 - }, - { - "epoch": 0.06, - "learning_rate": 0.0007627118644067797, - "loss": 2.5398, - "step": 375 - }, - { - "epoch": 0.06, - "learning_rate": 0.0007625762711864407, - "loss": 2.4711, - "step": 376 - }, - { - "epoch": 0.06, - "learning_rate": 0.0007624406779661017, - "loss": 2.5616, - "step": 377 - }, - { - "epoch": 0.06, - "learning_rate": 0.0007623050847457627, - "loss": 2.4629, - "step": 378 - }, - { - "epoch": 0.06, - "learning_rate": 0.0007621694915254238, - "loss": 2.4795, - "step": 379 - }, - { - "epoch": 0.06, - "learning_rate": 0.0007620338983050848, - "loss": 2.4815, - "step": 380 - }, - { - "epoch": 0.06, - "learning_rate": 0.0007618983050847458, - "loss": 2.5178, - "step": 381 - }, - { - "epoch": 0.06, - "learning_rate": 0.0007617627118644068, - "loss": 2.535, - "step": 382 - }, - { - "epoch": 0.06, - "learning_rate": 0.0007616271186440678, - "loss": 2.5254, - "step": 383 - }, - { - "epoch": 0.06, - "learning_rate": 0.0007614915254237288, - "loss": 2.543, - "step": 384 - }, - { - "epoch": 0.06, - "learning_rate": 0.0007613559322033899, - "loss": 2.5363, - "step": 385 - }, - { - "epoch": 0.06, - "learning_rate": 0.0007612203389830509, - "loss": 2.5608, - "step": 386 - }, - { - "epoch": 0.06, - "learning_rate": 0.000761084745762712, - "loss": 2.5059, - "step": 387 - }, - { - "epoch": 0.06, - "learning_rate": 0.000760949152542373, - "loss": 2.4691, - "step": 388 - }, - { - "epoch": 0.06, - "learning_rate": 0.0007608135593220339, - "loss": 2.5462, - "step": 389 - }, - { - "epoch": 0.07, - "learning_rate": 0.0007606779661016949, - "loss": 2.5665, - "step": 390 - }, - { - "epoch": 0.07, - "learning_rate": 0.0007605423728813559, - "loss": 2.4889, - "step": 391 - }, - { - "epoch": 0.07, - "learning_rate": 0.000760406779661017, - "loss": 2.4813, - "step": 392 - }, - { - "epoch": 0.07, - "learning_rate": 0.000760271186440678, - "loss": 2.5234, - "step": 393 - }, - { - "epoch": 0.07, - "learning_rate": 0.0007601355932203391, - "loss": 2.4285, - "step": 394 - }, - { - "epoch": 0.07, - "learning_rate": 0.00076, - "loss": 2.484, - "step": 395 - }, - { - "epoch": 0.07, - "learning_rate": 0.000759864406779661, - "loss": 2.517, - "step": 396 - }, - { - "epoch": 0.07, - "learning_rate": 0.000759728813559322, - "loss": 2.5167, - "step": 397 - }, - { - "epoch": 0.07, - "learning_rate": 0.0007595932203389831, - "loss": 2.5136, - "step": 398 - }, - { - "epoch": 0.07, - "learning_rate": 0.0007594576271186441, - "loss": 2.4747, - "step": 399 - }, - { - "epoch": 0.07, - "learning_rate": 0.0007593220338983052, - "loss": 2.4998, - "step": 400 - }, - { - "epoch": 0.07, - "learning_rate": 0.0007591864406779661, - "loss": 2.5182, - "step": 401 - }, - { - "epoch": 0.07, - "learning_rate": 0.0007590508474576272, - "loss": 2.4585, - "step": 402 - }, - { - "epoch": 0.07, - "learning_rate": 0.0007589152542372881, - "loss": 2.5584, - "step": 403 - }, - { - "epoch": 0.07, - "learning_rate": 0.0007587796610169492, - "loss": 2.4856, - "step": 404 - }, - { - "epoch": 0.07, - "learning_rate": 0.0007586440677966102, - "loss": 2.4919, - "step": 405 - }, - { - "epoch": 0.07, - "learning_rate": 0.0007585084745762713, - "loss": 2.5149, - "step": 406 - }, - { - "epoch": 0.07, - "learning_rate": 0.0007583728813559322, - "loss": 2.522, - "step": 407 - }, - { - "epoch": 0.07, - "learning_rate": 0.0007582372881355933, - "loss": 2.4943, - "step": 408 - }, - { - "epoch": 0.07, - "learning_rate": 0.0007581016949152543, - "loss": 2.5093, - "step": 409 - }, - { - "epoch": 0.07, - "learning_rate": 0.0007579661016949153, - "loss": 2.517, - "step": 410 - }, - { - "epoch": 0.07, - "learning_rate": 0.0007578305084745763, - "loss": 2.4795, - "step": 411 - }, - { - "epoch": 0.07, - "learning_rate": 0.0007576949152542373, - "loss": 2.5078, - "step": 412 - }, - { - "epoch": 0.07, - "learning_rate": 0.0007575593220338983, - "loss": 2.5792, - "step": 413 - }, - { - "epoch": 0.07, - "learning_rate": 0.0007574237288135593, - "loss": 2.5151, - "step": 414 - }, - { - "epoch": 0.07, - "learning_rate": 0.0007572881355932204, - "loss": 2.4612, - "step": 415 - }, - { - "epoch": 0.07, - "learning_rate": 0.0007571525423728814, - "loss": 2.4582, - "step": 416 - }, - { - "epoch": 0.07, - "learning_rate": 0.0007570169491525424, - "loss": 2.5092, - "step": 417 - }, - { - "epoch": 0.07, - "learning_rate": 0.0007568813559322034, - "loss": 2.4834, - "step": 418 - }, - { - "epoch": 0.07, - "learning_rate": 0.0007567457627118644, - "loss": 2.5014, - "step": 419 - }, - { - "epoch": 0.07, - "learning_rate": 0.0007566101694915254, - "loss": 2.4331, - "step": 420 - }, - { - "epoch": 0.07, - "learning_rate": 0.0007564745762711865, - "loss": 2.5328, - "step": 421 - }, - { - "epoch": 0.07, - "learning_rate": 0.0007563389830508475, - "loss": 2.4425, - "step": 422 - }, - { - "epoch": 0.07, - "learning_rate": 0.0007562033898305086, - "loss": 2.4065, - "step": 423 - }, - { - "epoch": 0.07, - "learning_rate": 0.0007560677966101696, - "loss": 2.4847, - "step": 424 - }, - { - "epoch": 0.07, - "learning_rate": 0.0007559322033898305, - "loss": 2.5003, - "step": 425 - }, - { - "epoch": 0.07, - "learning_rate": 0.0007557966101694915, - "loss": 2.5485, - "step": 426 - }, - { - "epoch": 0.07, - "learning_rate": 0.0007556610169491526, - "loss": 2.5025, - "step": 427 - }, - { - "epoch": 0.07, - "learning_rate": 0.0007555254237288136, - "loss": 2.4961, - "step": 428 - }, - { - "epoch": 0.07, - "learning_rate": 0.0007553898305084747, - "loss": 2.5246, - "step": 429 - }, - { - "epoch": 0.07, - "learning_rate": 0.0007552542372881357, - "loss": 2.4668, - "step": 430 - }, - { - "epoch": 0.07, - "learning_rate": 0.0007551186440677966, - "loss": 2.5051, - "step": 431 - }, - { - "epoch": 0.07, - "learning_rate": 0.0007549830508474576, - "loss": 2.4929, - "step": 432 - }, - { - "epoch": 0.07, - "learning_rate": 0.0007548474576271186, - "loss": 2.4197, - "step": 433 - }, - { - "epoch": 0.07, - "learning_rate": 0.0007547118644067797, - "loss": 2.5754, - "step": 434 - }, - { - "epoch": 0.07, - "learning_rate": 0.0007545762711864407, - "loss": 2.4829, - "step": 435 - }, - { - "epoch": 0.07, - "learning_rate": 0.0007544406779661018, - "loss": 2.4313, - "step": 436 - }, - { - "epoch": 0.07, - "learning_rate": 0.0007543050847457627, - "loss": 2.4745, - "step": 437 - }, - { - "epoch": 0.07, - "learning_rate": 0.0007541694915254237, - "loss": 2.4724, - "step": 438 - }, - { - "epoch": 0.07, - "learning_rate": 0.0007540338983050847, - "loss": 2.4648, - "step": 439 - }, - { - "epoch": 0.07, - "learning_rate": 0.0007538983050847458, - "loss": 2.4234, - "step": 440 - }, - { - "epoch": 0.07, - "learning_rate": 0.0007537627118644068, - "loss": 2.4681, - "step": 441 - }, - { - "epoch": 0.07, - "learning_rate": 0.0007536271186440679, - "loss": 2.4547, - "step": 442 - }, - { - "epoch": 0.07, - "learning_rate": 0.0007534915254237288, - "loss": 2.4622, - "step": 443 - }, - { - "epoch": 0.07, - "learning_rate": 0.0007533559322033899, - "loss": 2.5777, - "step": 444 - }, - { - "epoch": 0.07, - "learning_rate": 0.0007532203389830509, - "loss": 2.4368, - "step": 445 - }, - { - "epoch": 0.07, - "learning_rate": 0.0007530847457627119, - "loss": 2.5213, - "step": 446 - }, - { - "epoch": 0.07, - "learning_rate": 0.0007529491525423729, - "loss": 2.4985, - "step": 447 - }, - { - "epoch": 0.07, - "learning_rate": 0.000752813559322034, - "loss": 2.4489, - "step": 448 - }, - { - "epoch": 0.07, - "learning_rate": 0.0007526779661016949, - "loss": 2.5418, - "step": 449 - }, - { - "epoch": 0.07, - "learning_rate": 0.000752542372881356, - "loss": 2.4771, - "step": 450 - }, - { - "epoch": 0.08, - "learning_rate": 0.000752406779661017, - "loss": 2.5463, - "step": 451 - }, - { - "epoch": 0.08, - "learning_rate": 0.000752271186440678, - "loss": 2.4252, - "step": 452 - }, - { - "epoch": 0.08, - "learning_rate": 0.000752135593220339, - "loss": 2.5508, - "step": 453 - }, - { - "epoch": 0.08, - "learning_rate": 0.000752, - "loss": 2.506, - "step": 454 - }, - { - "epoch": 0.08, - "learning_rate": 0.000751864406779661, - "loss": 2.484, - "step": 455 - }, - { - "epoch": 0.08, - "learning_rate": 0.000751728813559322, - "loss": 2.4426, - "step": 456 - }, - { - "epoch": 0.08, - "learning_rate": 0.0007515932203389831, - "loss": 2.5205, - "step": 457 - }, - { - "epoch": 0.08, - "learning_rate": 0.0007514576271186441, - "loss": 2.4948, - "step": 458 - }, - { - "epoch": 0.08, - "learning_rate": 0.0007513220338983052, - "loss": 2.4423, - "step": 459 - }, - { - "epoch": 0.08, - "learning_rate": 0.0007511864406779661, - "loss": 2.522, - "step": 460 - }, - { - "epoch": 0.08, - "learning_rate": 0.0007510508474576271, - "loss": 2.5116, - "step": 461 - }, - { - "epoch": 0.08, - "learning_rate": 0.0007509152542372881, - "loss": 2.4785, - "step": 462 - }, - { - "epoch": 0.08, - "learning_rate": 0.0007507796610169492, - "loss": 2.5521, - "step": 463 - }, - { - "epoch": 0.08, - "learning_rate": 0.0007506440677966102, - "loss": 2.52, - "step": 464 - }, - { - "epoch": 0.08, - "learning_rate": 0.0007505084745762713, - "loss": 2.5449, - "step": 465 - }, - { - "epoch": 0.08, - "learning_rate": 0.0007503728813559323, - "loss": 2.4744, - "step": 466 - }, - { - "epoch": 0.08, - "learning_rate": 0.0007502372881355932, - "loss": 2.5192, - "step": 467 - }, - { - "epoch": 0.08, - "learning_rate": 0.0007501016949152542, - "loss": 2.4494, - "step": 468 - }, - { - "epoch": 0.08, - "learning_rate": 0.0007499661016949153, - "loss": 2.5145, - "step": 469 - }, - { - "epoch": 0.08, - "learning_rate": 0.0007498305084745763, - "loss": 2.4541, - "step": 470 - }, - { - "epoch": 0.08, - "learning_rate": 0.0007496949152542374, - "loss": 2.563, - "step": 471 - }, - { - "epoch": 0.08, - "learning_rate": 0.0007495593220338984, - "loss": 2.4924, - "step": 472 - }, - { - "epoch": 0.08, - "learning_rate": 0.0007494237288135595, - "loss": 2.5099, - "step": 473 - }, - { - "epoch": 0.08, - "learning_rate": 0.0007492881355932203, - "loss": 2.6177, - "step": 474 - }, - { - "epoch": 0.08, - "learning_rate": 0.0007491525423728813, - "loss": 2.5639, - "step": 475 - }, - { - "epoch": 0.08, - "learning_rate": 0.0007490169491525424, - "loss": 2.4748, - "step": 476 - }, - { - "epoch": 0.08, - "learning_rate": 0.0007488813559322034, - "loss": 2.4668, - "step": 477 - }, - { - "epoch": 0.08, - "learning_rate": 0.0007487457627118645, - "loss": 2.6352, - "step": 478 - }, - { - "epoch": 0.08, - "learning_rate": 0.0007486101694915254, - "loss": 2.5874, - "step": 479 - }, - { - "epoch": 0.08, - "learning_rate": 0.0007484745762711865, - "loss": 2.5707, - "step": 480 - }, - { - "epoch": 0.08, - "learning_rate": 0.0007483389830508475, - "loss": 2.5545, - "step": 481 - }, - { - "epoch": 0.08, - "learning_rate": 0.0007482033898305085, - "loss": 2.5301, - "step": 482 - }, - { - "epoch": 0.08, - "learning_rate": 0.0007480677966101695, - "loss": 2.5508, - "step": 483 - }, - { - "epoch": 0.08, - "learning_rate": 0.0007479322033898306, - "loss": 2.5526, - "step": 484 - }, - { - "epoch": 0.08, - "learning_rate": 0.0007477966101694915, - "loss": 2.4831, - "step": 485 - }, - { - "epoch": 0.08, - "learning_rate": 0.0007476610169491526, - "loss": 2.4991, - "step": 486 - }, - { - "epoch": 0.08, - "learning_rate": 0.0007475254237288136, - "loss": 2.4024, - "step": 487 - }, - { - "epoch": 0.08, - "learning_rate": 0.0007473898305084746, - "loss": 2.5339, - "step": 488 - }, - { - "epoch": 0.08, - "learning_rate": 0.0007472542372881356, - "loss": 2.5592, - "step": 489 - }, - { - "epoch": 0.08, - "learning_rate": 0.0007471186440677967, - "loss": 2.4575, - "step": 490 - }, - { - "epoch": 0.08, - "learning_rate": 0.0007469830508474576, - "loss": 2.4742, - "step": 491 - }, - { - "epoch": 0.08, - "learning_rate": 0.0007468474576271187, - "loss": 2.4736, - "step": 492 - }, - { - "epoch": 0.08, - "learning_rate": 0.0007467118644067797, - "loss": 2.4658, - "step": 493 - }, - { - "epoch": 0.08, - "learning_rate": 0.0007465762711864408, - "loss": 2.5223, - "step": 494 - }, - { - "epoch": 0.08, - "learning_rate": 0.0007464406779661017, - "loss": 2.4854, - "step": 495 - }, - { - "epoch": 0.08, - "learning_rate": 0.0007463050847457627, - "loss": 2.4874, - "step": 496 - }, - { - "epoch": 0.08, - "learning_rate": 0.0007461694915254237, - "loss": 2.5765, - "step": 497 - }, - { - "epoch": 0.08, - "learning_rate": 0.0007460338983050847, - "loss": 2.5211, - "step": 498 - }, - { - "epoch": 0.08, - "learning_rate": 0.0007458983050847458, - "loss": 2.5138, - "step": 499 - }, - { - "epoch": 0.08, - "learning_rate": 0.0007457627118644068, - "loss": 2.4739, - "step": 500 - }, - { - "epoch": 0.08, - "eval_gen_len": 19.0, - "eval_loss": 2.2723255157470703, - "eval_rouge1": 0.2537, - "eval_rouge2": 0.0882, - "eval_rougeL": 0.2125, - "eval_rougeLsum": 0.2111, - "eval_runtime": 18.6819, - "eval_samples_per_second": 2.676, - "eval_steps_per_second": 0.375, - "step": 500 - }, - { - "epoch": 0.08, - "learning_rate": 0.0007456271186440679, - "loss": 2.5046, - "step": 501 - }, - { - "epoch": 0.08, - "learning_rate": 0.0007454915254237289, - "loss": 2.5441, - "step": 502 - }, - { - "epoch": 0.08, - "learning_rate": 0.0007453559322033898, - "loss": 2.522, - "step": 503 - }, - { - "epoch": 0.08, - "learning_rate": 0.0007452203389830508, - "loss": 2.4973, - "step": 504 - }, - { - "epoch": 0.08, - "learning_rate": 0.0007450847457627119, - "loss": 2.5123, - "step": 505 - }, - { - "epoch": 0.08, - "learning_rate": 0.0007449491525423729, - "loss": 2.5534, - "step": 506 - }, - { - "epoch": 0.08, - "learning_rate": 0.000744813559322034, - "loss": 2.5219, - "step": 507 - }, - { - "epoch": 0.08, - "learning_rate": 0.000744677966101695, - "loss": 2.5597, - "step": 508 - }, - { - "epoch": 0.08, - "learning_rate": 0.000744542372881356, - "loss": 2.4751, - "step": 509 - }, - { - "epoch": 0.09, - "learning_rate": 0.0007444067796610169, - "loss": 2.4678, - "step": 510 - }, - { - "epoch": 0.09, - "learning_rate": 0.000744271186440678, - "loss": 2.5362, - "step": 511 - }, - { - "epoch": 0.09, - "learning_rate": 0.000744135593220339, - "loss": 2.4245, - "step": 512 - }, - { - "epoch": 0.09, - "learning_rate": 0.0007440000000000001, - "loss": 2.5493, - "step": 513 - }, - { - "epoch": 0.09, - "learning_rate": 0.0007438644067796611, - "loss": 2.4641, - "step": 514 - }, - { - "epoch": 0.09, - "learning_rate": 0.0007437288135593222, - "loss": 2.5316, - "step": 515 - }, - { - "epoch": 0.09, - "learning_rate": 0.0007435932203389831, - "loss": 2.4449, - "step": 516 - }, - { - "epoch": 0.09, - "learning_rate": 0.000743457627118644, - "loss": 2.441, - "step": 517 - }, - { - "epoch": 0.09, - "learning_rate": 0.0007433220338983051, - "loss": 2.399, - "step": 518 - }, - { - "epoch": 0.09, - "learning_rate": 0.0007431864406779661, - "loss": 2.5141, - "step": 519 - }, - { - "epoch": 0.09, - "learning_rate": 0.0007430508474576272, - "loss": 2.4477, - "step": 520 - }, - { - "epoch": 0.09, - "learning_rate": 0.0007429152542372881, - "loss": 2.5378, - "step": 521 - }, - { - "epoch": 0.09, - "learning_rate": 0.0007427796610169492, - "loss": 2.4214, - "step": 522 - }, - { - "epoch": 0.09, - "learning_rate": 0.0007426440677966102, - "loss": 2.4254, - "step": 523 - }, - { - "epoch": 0.09, - "learning_rate": 0.0007425084745762712, - "loss": 2.4235, - "step": 524 - }, - { - "epoch": 0.09, - "learning_rate": 0.0007423728813559322, - "loss": 2.4348, - "step": 525 - }, - { - "epoch": 0.09, - "learning_rate": 0.0007422372881355933, - "loss": 2.4923, - "step": 526 - }, - { - "epoch": 0.09, - "learning_rate": 0.0007421016949152542, - "loss": 2.4928, - "step": 527 - }, - { - "epoch": 0.09, - "learning_rate": 0.0007419661016949153, - "loss": 2.4542, - "step": 528 - }, - { - "epoch": 0.09, - "learning_rate": 0.0007418305084745763, - "loss": 2.4554, - "step": 529 - }, - { - "epoch": 0.09, - "learning_rate": 0.0007416949152542374, - "loss": 2.4169, - "step": 530 - }, - { - "epoch": 0.09, - "learning_rate": 0.0007415593220338983, - "loss": 2.4257, - "step": 531 - }, - { - "epoch": 0.09, - "learning_rate": 0.0007414237288135594, - "loss": 2.4808, - "step": 532 - }, - { - "epoch": 0.09, - "learning_rate": 0.0007412881355932203, - "loss": 2.4229, - "step": 533 - }, - { - "epoch": 0.09, - "learning_rate": 0.0007411525423728814, - "loss": 2.4579, - "step": 534 - }, - { - "epoch": 0.09, - "learning_rate": 0.0007410169491525424, - "loss": 2.4403, - "step": 535 - }, - { - "epoch": 0.09, - "learning_rate": 0.0007408813559322035, - "loss": 2.4362, - "step": 536 - }, - { - "epoch": 0.09, - "learning_rate": 0.0007407457627118645, - "loss": 2.5626, - "step": 537 - }, - { - "epoch": 0.09, - "learning_rate": 0.0007406101694915255, - "loss": 2.3912, - "step": 538 - }, - { - "epoch": 0.09, - "learning_rate": 0.0007404745762711864, - "loss": 2.4285, - "step": 539 - }, - { - "epoch": 0.09, - "learning_rate": 0.0007403389830508474, - "loss": 2.4435, - "step": 540 - }, - { - "epoch": 0.09, - "learning_rate": 0.0007402033898305085, - "loss": 2.3989, - "step": 541 - }, - { - "epoch": 0.09, - "learning_rate": 0.0007400677966101695, - "loss": 2.5263, - "step": 542 - }, - { - "epoch": 0.09, - "learning_rate": 0.0007399322033898306, - "loss": 2.506, - "step": 543 - }, - { - "epoch": 0.09, - "learning_rate": 0.0007397966101694916, - "loss": 2.4952, - "step": 544 - }, - { - "epoch": 0.09, - "learning_rate": 0.0007396610169491525, - "loss": 2.4743, - "step": 545 - }, - { - "epoch": 0.09, - "learning_rate": 0.0007395254237288135, - "loss": 2.4568, - "step": 546 - }, - { - "epoch": 0.09, - "learning_rate": 0.0007393898305084746, - "loss": 2.5622, - "step": 547 - }, - { - "epoch": 0.09, - "learning_rate": 0.0007392542372881356, - "loss": 2.4803, - "step": 548 - }, - { - "epoch": 0.09, - "learning_rate": 0.0007391186440677967, - "loss": 2.4706, - "step": 549 - }, - { - "epoch": 0.09, - "learning_rate": 0.0007389830508474577, - "loss": 2.4296, - "step": 550 - }, - { - "epoch": 0.09, - "learning_rate": 0.0007388474576271188, - "loss": 2.5034, - "step": 551 - }, - { - "epoch": 0.09, - "learning_rate": 0.0007387118644067797, - "loss": 2.4217, - "step": 552 - }, - { - "epoch": 0.09, - "learning_rate": 0.0007385762711864407, - "loss": 2.5087, - "step": 553 - }, - { - "epoch": 0.09, - "learning_rate": 0.0007384406779661017, - "loss": 2.4901, - "step": 554 - }, - { - "epoch": 0.09, - "learning_rate": 0.0007383050847457628, - "loss": 2.4327, - "step": 555 - }, - { - "epoch": 0.09, - "learning_rate": 0.0007381694915254238, - "loss": 2.4259, - "step": 556 - }, - { - "epoch": 0.09, - "learning_rate": 0.0007380338983050849, - "loss": 2.5199, - "step": 557 - }, - { - "epoch": 0.09, - "learning_rate": 0.0007378983050847458, - "loss": 2.4956, - "step": 558 - }, - { - "epoch": 0.09, - "learning_rate": 0.0007377627118644068, - "loss": 2.4722, - "step": 559 - }, - { - "epoch": 0.09, - "learning_rate": 0.0007376271186440678, - "loss": 2.4604, - "step": 560 - }, - { - "epoch": 0.09, - "learning_rate": 0.0007374915254237288, - "loss": 2.5281, - "step": 561 - }, - { - "epoch": 0.09, - "learning_rate": 0.0007373559322033899, - "loss": 2.482, - "step": 562 - }, - { - "epoch": 0.09, - "learning_rate": 0.0007372203389830509, - "loss": 2.5249, - "step": 563 - }, - { - "epoch": 0.09, - "learning_rate": 0.0007370847457627119, - "loss": 2.4636, - "step": 564 - }, - { - "epoch": 0.09, - "learning_rate": 0.0007369491525423729, - "loss": 2.4367, - "step": 565 - }, - { - "epoch": 0.09, - "learning_rate": 0.0007368135593220339, - "loss": 2.5058, - "step": 566 - }, - { - "epoch": 0.09, - "learning_rate": 0.0007366779661016949, - "loss": 2.5498, - "step": 567 - }, - { - "epoch": 0.09, - "learning_rate": 0.000736542372881356, - "loss": 2.5013, - "step": 568 - }, - { - "epoch": 0.09, - "learning_rate": 0.000736406779661017, - "loss": 2.4334, - "step": 569 - }, - { - "epoch": 0.1, - "learning_rate": 0.000736271186440678, - "loss": 2.4838, - "step": 570 - }, - { - "epoch": 0.1, - "learning_rate": 0.000736135593220339, - "loss": 2.4513, - "step": 571 - }, - { - "epoch": 0.1, - "learning_rate": 0.0007360000000000001, - "loss": 2.3977, - "step": 572 - }, - { - "epoch": 0.1, - "learning_rate": 0.0007358644067796611, - "loss": 2.5123, - "step": 573 - }, - { - "epoch": 0.1, - "learning_rate": 0.0007357288135593221, - "loss": 2.4497, - "step": 574 - }, - { - "epoch": 0.1, - "learning_rate": 0.000735593220338983, - "loss": 2.5084, - "step": 575 - }, - { - "epoch": 0.1, - "learning_rate": 0.0007354576271186441, - "loss": 2.4539, - "step": 576 - }, - { - "epoch": 0.1, - "learning_rate": 0.0007353220338983051, - "loss": 2.5248, - "step": 577 - }, - { - "epoch": 0.1, - "learning_rate": 0.0007351864406779661, - "loss": 2.4247, - "step": 578 - }, - { - "epoch": 0.1, - "learning_rate": 0.0007350508474576272, - "loss": 2.4898, - "step": 579 - }, - { - "epoch": 0.1, - "learning_rate": 0.0007349152542372882, - "loss": 2.4899, - "step": 580 - }, - { - "epoch": 0.1, - "learning_rate": 0.0007347796610169492, - "loss": 2.3781, - "step": 581 - }, - { - "epoch": 0.1, - "learning_rate": 0.0007346440677966101, - "loss": 2.4413, - "step": 582 - }, - { - "epoch": 0.1, - "learning_rate": 0.0007345084745762712, - "loss": 2.4239, - "step": 583 - }, - { - "epoch": 0.1, - "learning_rate": 0.0007343728813559322, - "loss": 2.482, - "step": 584 - }, - { - "epoch": 0.1, - "learning_rate": 0.0007342372881355933, - "loss": 2.522, - "step": 585 - }, - { - "epoch": 0.1, - "learning_rate": 0.0007341016949152543, - "loss": 2.516, - "step": 586 - }, - { - "epoch": 0.1, - "learning_rate": 0.0007339661016949154, - "loss": 2.4424, - "step": 587 - }, - { - "epoch": 0.1, - "learning_rate": 0.0007338305084745762, - "loss": 2.4529, - "step": 588 - }, - { - "epoch": 0.1, - "learning_rate": 0.0007336949152542373, - "loss": 2.4577, - "step": 589 - }, - { - "epoch": 0.1, - "learning_rate": 0.0007335593220338983, - "loss": 2.5327, - "step": 590 - }, - { - "epoch": 0.1, - "learning_rate": 0.0007334237288135594, - "loss": 2.5172, - "step": 591 - }, - { - "epoch": 0.1, - "learning_rate": 0.0007332881355932204, - "loss": 2.3609, - "step": 592 - }, - { - "epoch": 0.1, - "learning_rate": 0.0007331525423728815, - "loss": 2.5082, - "step": 593 - }, - { - "epoch": 0.1, - "learning_rate": 0.0007330169491525424, - "loss": 2.4528, - "step": 594 - }, - { - "epoch": 0.1, - "learning_rate": 0.0007328813559322034, - "loss": 2.435, - "step": 595 - }, - { - "epoch": 0.1, - "learning_rate": 0.0007327457627118644, - "loss": 2.4484, - "step": 596 - }, - { - "epoch": 0.1, - "learning_rate": 0.0007326101694915255, - "loss": 2.4585, - "step": 597 - }, - { - "epoch": 0.1, - "learning_rate": 0.0007324745762711865, - "loss": 2.4744, - "step": 598 - }, - { - "epoch": 0.1, - "learning_rate": 0.0007323389830508475, - "loss": 2.4258, - "step": 599 - }, - { - "epoch": 0.1, - "learning_rate": 0.0007322033898305085, - "loss": 2.4337, - "step": 600 - }, - { - "epoch": 0.1, - "learning_rate": 0.0007320677966101695, - "loss": 2.393, - "step": 601 - }, - { - "epoch": 0.1, - "learning_rate": 0.0007319322033898305, - "loss": 2.3678, - "step": 602 - }, - { - "epoch": 0.1, - "learning_rate": 0.0007317966101694915, - "loss": 2.3684, - "step": 603 - }, - { - "epoch": 0.1, - "learning_rate": 0.0007316610169491526, - "loss": 2.4779, - "step": 604 - }, - { - "epoch": 0.1, - "learning_rate": 0.0007315254237288136, - "loss": 2.3867, - "step": 605 - }, - { - "epoch": 0.1, - "learning_rate": 0.0007313898305084746, - "loss": 2.415, - "step": 606 - }, - { - "epoch": 0.1, - "learning_rate": 0.0007312542372881356, - "loss": 2.4105, - "step": 607 - }, - { - "epoch": 0.1, - "learning_rate": 0.0007311186440677967, - "loss": 2.4479, - "step": 608 - }, - { - "epoch": 0.1, - "learning_rate": 0.0007309830508474577, - "loss": 2.4101, - "step": 609 - }, - { - "epoch": 0.1, - "learning_rate": 0.0007308474576271187, - "loss": 2.4014, - "step": 610 - }, - { - "epoch": 0.1, - "learning_rate": 0.0007307118644067797, - "loss": 2.3743, - "step": 611 - }, - { - "epoch": 0.1, - "learning_rate": 0.0007305762711864407, - "loss": 2.4368, - "step": 612 - }, - { - "epoch": 0.1, - "learning_rate": 0.0007304406779661017, - "loss": 2.4901, - "step": 613 - }, - { - "epoch": 0.1, - "learning_rate": 0.0007303050847457628, - "loss": 2.4855, - "step": 614 - }, - { - "epoch": 0.1, - "learning_rate": 0.0007301694915254238, - "loss": 2.4016, - "step": 615 - }, - { - "epoch": 0.1, - "learning_rate": 0.0007300338983050848, - "loss": 2.4778, - "step": 616 - }, - { - "epoch": 0.1, - "learning_rate": 0.0007298983050847458, - "loss": 2.4468, - "step": 617 - }, - { - "epoch": 0.1, - "learning_rate": 0.0007297627118644068, - "loss": 2.4157, - "step": 618 - }, - { - "epoch": 0.1, - "learning_rate": 0.0007296271186440678, - "loss": 2.4675, - "step": 619 - }, - { - "epoch": 0.1, - "learning_rate": 0.0007294915254237288, - "loss": 2.4413, - "step": 620 - }, - { - "epoch": 0.1, - "learning_rate": 0.0007293559322033899, - "loss": 2.4071, - "step": 621 - }, - { - "epoch": 0.1, - "learning_rate": 0.0007292203389830509, - "loss": 2.4702, - "step": 622 - }, - { - "epoch": 0.1, - "learning_rate": 0.0007290847457627119, - "loss": 2.4494, - "step": 623 - }, - { - "epoch": 0.1, - "learning_rate": 0.0007289491525423728, - "loss": 2.4599, - "step": 624 - }, - { - "epoch": 0.1, - "learning_rate": 0.0007288135593220339, - "loss": 2.5185, - "step": 625 - }, - { - "epoch": 0.1, - "learning_rate": 0.0007286779661016949, - "loss": 2.5081, - "step": 626 - }, - { - "epoch": 0.1, - "learning_rate": 0.000728542372881356, - "loss": 2.4801, - "step": 627 - }, - { - "epoch": 0.1, - "learning_rate": 0.000728406779661017, - "loss": 2.4282, - "step": 628 - }, - { - "epoch": 0.1, - "learning_rate": 0.0007282711864406781, - "loss": 2.4096, - "step": 629 - }, - { - "epoch": 0.1, - "learning_rate": 0.000728135593220339, - "loss": 2.5748, - "step": 630 - }, - { - "epoch": 0.11, - "learning_rate": 0.000728, - "loss": 2.423, - "step": 631 - }, - { - "epoch": 0.11, - "learning_rate": 0.000727864406779661, - "loss": 2.453, - "step": 632 - }, - { - "epoch": 0.11, - "learning_rate": 0.0007277288135593221, - "loss": 2.3775, - "step": 633 - }, - { - "epoch": 0.11, - "learning_rate": 0.0007275932203389831, - "loss": 2.44, - "step": 634 - }, - { - "epoch": 0.11, - "learning_rate": 0.0007274576271186442, - "loss": 2.4774, - "step": 635 - }, - { - "epoch": 0.11, - "learning_rate": 0.0007273220338983051, - "loss": 2.4481, - "step": 636 - }, - { - "epoch": 0.11, - "learning_rate": 0.0007271864406779661, - "loss": 2.4393, - "step": 637 - }, - { - "epoch": 0.11, - "learning_rate": 0.0007270508474576271, - "loss": 2.3175, - "step": 638 - }, - { - "epoch": 0.11, - "learning_rate": 0.0007269152542372882, - "loss": 2.3686, - "step": 639 - }, - { - "epoch": 0.11, - "learning_rate": 0.0007267796610169492, - "loss": 2.5126, - "step": 640 - }, - { - "epoch": 0.11, - "learning_rate": 0.0007266440677966102, - "loss": 2.4521, - "step": 641 - }, - { - "epoch": 0.11, - "learning_rate": 0.0007265084745762712, - "loss": 2.4399, - "step": 642 - }, - { - "epoch": 0.11, - "learning_rate": 0.0007263728813559322, - "loss": 2.5264, - "step": 643 - }, - { - "epoch": 0.11, - "learning_rate": 0.0007262372881355933, - "loss": 2.4922, - "step": 644 - }, - { - "epoch": 0.11, - "learning_rate": 0.0007261016949152542, - "loss": 2.4339, - "step": 645 - }, - { - "epoch": 0.11, - "learning_rate": 0.0007259661016949153, - "loss": 2.4135, - "step": 646 - }, - { - "epoch": 0.11, - "learning_rate": 0.0007258305084745763, - "loss": 2.4049, - "step": 647 - }, - { - "epoch": 0.11, - "learning_rate": 0.0007256949152542373, - "loss": 2.3873, - "step": 648 - }, - { - "epoch": 0.11, - "learning_rate": 0.0007255593220338983, - "loss": 2.3503, - "step": 649 - }, - { - "epoch": 0.11, - "learning_rate": 0.0007254237288135594, - "loss": 2.4078, - "step": 650 - }, - { - "epoch": 0.11, - "learning_rate": 0.0007252881355932204, - "loss": 2.4081, - "step": 651 - }, - { - "epoch": 0.11, - "learning_rate": 0.0007251525423728814, - "loss": 2.3654, - "step": 652 - }, - { - "epoch": 0.11, - "learning_rate": 0.0007250169491525424, - "loss": 2.4208, - "step": 653 - }, - { - "epoch": 0.11, - "learning_rate": 0.0007248813559322034, - "loss": 2.388, - "step": 654 - }, - { - "epoch": 0.11, - "learning_rate": 0.0007247457627118644, - "loss": 2.4752, - "step": 655 - }, - { - "epoch": 0.11, - "learning_rate": 0.0007246101694915255, - "loss": 2.3668, - "step": 656 - }, - { - "epoch": 0.11, - "learning_rate": 0.0007244745762711865, - "loss": 2.4961, - "step": 657 - }, - { - "epoch": 0.11, - "learning_rate": 0.0007243389830508476, - "loss": 2.4887, - "step": 658 - }, - { - "epoch": 0.11, - "learning_rate": 0.0007242033898305085, - "loss": 2.3961, - "step": 659 - }, - { - "epoch": 0.11, - "learning_rate": 0.0007240677966101695, - "loss": 2.3916, - "step": 660 - }, - { - "epoch": 0.11, - "learning_rate": 0.0007239322033898305, - "loss": 2.4099, - "step": 661 - }, - { - "epoch": 0.11, - "learning_rate": 0.0007237966101694915, - "loss": 2.4145, - "step": 662 - }, - { - "epoch": 0.11, - "learning_rate": 0.0007236610169491526, - "loss": 2.436, - "step": 663 - }, - { - "epoch": 0.11, - "learning_rate": 0.0007235254237288136, - "loss": 2.459, - "step": 664 - }, - { - "epoch": 0.11, - "learning_rate": 0.0007233898305084747, - "loss": 2.4249, - "step": 665 - }, - { - "epoch": 0.11, - "learning_rate": 0.0007232542372881356, - "loss": 2.4617, - "step": 666 - }, - { - "epoch": 0.11, - "learning_rate": 0.0007231186440677966, - "loss": 2.3923, - "step": 667 - }, - { - "epoch": 0.11, - "learning_rate": 0.0007229830508474576, - "loss": 2.4061, - "step": 668 - }, - { - "epoch": 0.11, - "learning_rate": 0.0007228474576271187, - "loss": 2.3594, - "step": 669 - }, - { - "epoch": 0.11, - "learning_rate": 0.0007227118644067797, - "loss": 2.3783, - "step": 670 - }, - { - "epoch": 0.11, - "learning_rate": 0.0007225762711864408, - "loss": 2.4546, - "step": 671 - }, - { - "epoch": 0.11, - "learning_rate": 0.0007224406779661017, - "loss": 2.3831, - "step": 672 - }, - { - "epoch": 0.11, - "learning_rate": 0.0007223050847457627, - "loss": 2.4368, - "step": 673 - }, - { - "epoch": 0.11, - "learning_rate": 0.0007221694915254237, - "loss": 2.4975, - "step": 674 - }, - { - "epoch": 0.11, - "learning_rate": 0.0007220338983050848, - "loss": 2.332, - "step": 675 - }, - { - "epoch": 0.11, - "learning_rate": 0.0007218983050847458, - "loss": 2.4446, - "step": 676 - }, - { - "epoch": 0.11, - "learning_rate": 0.0007217627118644069, - "loss": 2.4593, - "step": 677 - }, - { - "epoch": 0.11, - "learning_rate": 0.0007216271186440678, - "loss": 2.4075, - "step": 678 - }, - { - "epoch": 0.11, - "learning_rate": 0.0007214915254237289, - "loss": 2.3628, - "step": 679 - }, - { - "epoch": 0.11, - "learning_rate": 0.0007213559322033899, - "loss": 2.3749, - "step": 680 - }, - { - "epoch": 0.11, - "learning_rate": 0.0007212203389830509, - "loss": 2.4293, - "step": 681 - }, - { - "epoch": 0.11, - "learning_rate": 0.0007210847457627119, - "loss": 2.4039, - "step": 682 - }, - { - "epoch": 0.11, - "learning_rate": 0.0007209491525423729, - "loss": 2.455, - "step": 683 - }, - { - "epoch": 0.11, - "learning_rate": 0.0007208135593220339, - "loss": 2.3378, - "step": 684 - }, - { - "epoch": 0.11, - "learning_rate": 0.0007206779661016949, - "loss": 2.4671, - "step": 685 - }, - { - "epoch": 0.11, - "learning_rate": 0.000720542372881356, - "loss": 2.4593, - "step": 686 - }, - { - "epoch": 0.11, - "learning_rate": 0.000720406779661017, - "loss": 2.3744, - "step": 687 - }, - { - "epoch": 0.11, - "learning_rate": 0.000720271186440678, - "loss": 2.4363, - "step": 688 - }, - { - "epoch": 0.11, - "learning_rate": 0.000720135593220339, - "loss": 2.3742, - "step": 689 - }, - { - "epoch": 0.12, - "learning_rate": 0.00072, - "loss": 2.3959, - "step": 690 - }, - { - "epoch": 0.12, - "learning_rate": 0.000719864406779661, - "loss": 2.4935, - "step": 691 - }, - { - "epoch": 0.12, - "learning_rate": 0.0007197288135593221, - "loss": 2.384, - "step": 692 - }, - { - "epoch": 0.12, - "learning_rate": 0.0007195932203389831, - "loss": 2.3549, - "step": 693 - }, - { - "epoch": 0.12, - "learning_rate": 0.0007194576271186441, - "loss": 2.387, - "step": 694 - }, - { - "epoch": 0.12, - "learning_rate": 0.0007193220338983051, - "loss": 2.4297, - "step": 695 - }, - { - "epoch": 0.12, - "learning_rate": 0.0007191864406779661, - "loss": 2.3856, - "step": 696 - }, - { - "epoch": 0.12, - "learning_rate": 0.0007190508474576271, - "loss": 2.4242, - "step": 697 - }, - { - "epoch": 0.12, - "learning_rate": 0.0007189152542372882, - "loss": 2.4865, - "step": 698 - }, - { - "epoch": 0.12, - "learning_rate": 0.0007187796610169492, - "loss": 2.4889, - "step": 699 - }, - { - "epoch": 0.12, - "learning_rate": 0.0007186440677966103, - "loss": 2.4123, - "step": 700 - }, - { - "epoch": 0.12, - "learning_rate": 0.0007185084745762713, - "loss": 2.4153, - "step": 701 - }, - { - "epoch": 0.12, - "learning_rate": 0.0007183728813559322, - "loss": 2.3682, - "step": 702 - }, - { - "epoch": 0.12, - "learning_rate": 0.0007182372881355932, - "loss": 2.375, - "step": 703 - }, - { - "epoch": 0.12, - "learning_rate": 0.0007181016949152542, - "loss": 2.4435, - "step": 704 - }, - { - "epoch": 0.12, - "learning_rate": 0.0007179661016949153, - "loss": 2.3552, - "step": 705 - }, - { - "epoch": 0.12, - "learning_rate": 0.0007178305084745763, - "loss": 2.4371, - "step": 706 - }, - { - "epoch": 0.12, - "learning_rate": 0.0007176949152542374, - "loss": 2.4124, - "step": 707 - }, - { - "epoch": 0.12, - "learning_rate": 0.0007175593220338983, - "loss": 2.4131, - "step": 708 - }, - { - "epoch": 0.12, - "learning_rate": 0.0007174237288135593, - "loss": 2.4439, - "step": 709 - }, - { - "epoch": 0.12, - "learning_rate": 0.0007172881355932203, - "loss": 2.4884, - "step": 710 - }, - { - "epoch": 0.12, - "learning_rate": 0.0007171525423728814, - "loss": 2.4471, - "step": 711 - }, - { - "epoch": 0.12, - "learning_rate": 0.0007170169491525424, - "loss": 2.4386, - "step": 712 - }, - { - "epoch": 0.12, - "learning_rate": 0.0007168813559322035, - "loss": 2.3794, - "step": 713 - }, - { - "epoch": 0.12, - "learning_rate": 0.0007167457627118644, - "loss": 2.4575, - "step": 714 - }, - { - "epoch": 0.12, - "learning_rate": 0.0007166101694915255, - "loss": 2.4469, - "step": 715 - }, - { - "epoch": 0.12, - "learning_rate": 0.0007164745762711864, - "loss": 2.4556, - "step": 716 - }, - { - "epoch": 0.12, - "learning_rate": 0.0007163389830508475, - "loss": 2.3753, - "step": 717 - }, - { - "epoch": 0.12, - "learning_rate": 0.0007162033898305085, - "loss": 2.3756, - "step": 718 - }, - { - "epoch": 0.12, - "learning_rate": 0.0007160677966101696, - "loss": 2.3641, - "step": 719 - }, - { - "epoch": 0.12, - "learning_rate": 0.0007159322033898305, - "loss": 2.3722, - "step": 720 - }, - { - "epoch": 0.12, - "learning_rate": 0.0007157966101694916, - "loss": 2.3381, - "step": 721 - }, - { - "epoch": 0.12, - "learning_rate": 0.0007156610169491526, - "loss": 2.454, - "step": 722 - }, - { - "epoch": 0.12, - "learning_rate": 0.0007155254237288136, - "loss": 2.3708, - "step": 723 - }, - { - "epoch": 0.12, - "learning_rate": 0.0007153898305084746, - "loss": 2.357, - "step": 724 - }, - { - "epoch": 0.12, - "learning_rate": 0.0007152542372881356, - "loss": 2.4356, - "step": 725 - }, - { - "epoch": 0.12, - "learning_rate": 0.0007151186440677966, - "loss": 2.4606, - "step": 726 - }, - { - "epoch": 0.12, - "learning_rate": 0.0007149830508474576, - "loss": 2.3936, - "step": 727 - }, - { - "epoch": 0.12, - "learning_rate": 0.0007148474576271187, - "loss": 2.3254, - "step": 728 - }, - { - "epoch": 0.12, - "learning_rate": 0.0007147118644067797, - "loss": 2.4692, - "step": 729 - }, - { - "epoch": 0.12, - "learning_rate": 0.0007145762711864407, - "loss": 2.3318, - "step": 730 - }, - { - "epoch": 0.12, - "learning_rate": 0.0007144406779661017, - "loss": 2.3905, - "step": 731 - }, - { - "epoch": 0.12, - "learning_rate": 0.0007143050847457627, - "loss": 2.4384, - "step": 732 - }, - { - "epoch": 0.12, - "learning_rate": 0.0007141694915254237, - "loss": 2.3551, - "step": 733 - }, - { - "epoch": 0.12, - "learning_rate": 0.0007140338983050848, - "loss": 2.3301, - "step": 734 - }, - { - "epoch": 0.12, - "learning_rate": 0.0007138983050847458, - "loss": 2.3521, - "step": 735 - }, - { - "epoch": 0.12, - "learning_rate": 0.0007137627118644069, - "loss": 2.4259, - "step": 736 - }, - { - "epoch": 0.12, - "learning_rate": 0.0007136271186440679, - "loss": 2.3472, - "step": 737 - }, - { - "epoch": 0.12, - "learning_rate": 0.0007134915254237288, - "loss": 2.449, - "step": 738 - }, - { - "epoch": 0.12, - "learning_rate": 0.0007133559322033898, - "loss": 2.3725, - "step": 739 - }, - { - "epoch": 0.12, - "learning_rate": 0.0007132203389830509, - "loss": 2.3895, - "step": 740 - }, - { - "epoch": 0.12, - "learning_rate": 0.0007130847457627119, - "loss": 2.3937, - "step": 741 - }, - { - "epoch": 0.12, - "learning_rate": 0.000712949152542373, - "loss": 2.4303, - "step": 742 - }, - { - "epoch": 0.12, - "learning_rate": 0.000712813559322034, - "loss": 2.4873, - "step": 743 - }, - { - "epoch": 0.12, - "learning_rate": 0.000712677966101695, - "loss": 2.4086, - "step": 744 - }, - { - "epoch": 0.12, - "learning_rate": 0.0007125423728813559, - "loss": 2.4093, - "step": 745 - }, - { - "epoch": 0.12, - "learning_rate": 0.0007124067796610169, - "loss": 2.3518, - "step": 746 - }, - { - "epoch": 0.12, - "learning_rate": 0.000712271186440678, - "loss": 2.3995, - "step": 747 - }, - { - "epoch": 0.12, - "learning_rate": 0.000712135593220339, - "loss": 2.3846, - "step": 748 - }, - { - "epoch": 0.12, - "learning_rate": 0.0007120000000000001, - "loss": 2.3605, - "step": 749 - }, - { - "epoch": 0.12, - "learning_rate": 0.000711864406779661, - "loss": 2.3471, - "step": 750 - }, - { - "epoch": 0.13, - "learning_rate": 0.000711728813559322, - "loss": 2.4907, - "step": 751 - }, - { - "epoch": 0.13, - "learning_rate": 0.000711593220338983, - "loss": 2.4028, - "step": 752 - }, - { - "epoch": 0.13, - "learning_rate": 0.0007114576271186441, - "loss": 2.4003, - "step": 753 - }, - { - "epoch": 0.13, - "learning_rate": 0.0007113220338983051, - "loss": 2.3622, - "step": 754 - }, - { - "epoch": 0.13, - "learning_rate": 0.0007111864406779662, - "loss": 2.3832, - "step": 755 - }, - { - "epoch": 0.13, - "learning_rate": 0.0007110508474576271, - "loss": 2.3646, - "step": 756 - }, - { - "epoch": 0.13, - "learning_rate": 0.0007109152542372882, - "loss": 2.3603, - "step": 757 - }, - { - "epoch": 0.13, - "learning_rate": 0.0007107796610169492, - "loss": 2.3926, - "step": 758 - }, - { - "epoch": 0.13, - "learning_rate": 0.0007106440677966102, - "loss": 2.3649, - "step": 759 - }, - { - "epoch": 0.13, - "learning_rate": 0.0007105084745762712, - "loss": 2.3249, - "step": 760 - }, - { - "epoch": 0.13, - "learning_rate": 0.0007103728813559323, - "loss": 2.4089, - "step": 761 - }, - { - "epoch": 0.13, - "learning_rate": 0.0007102372881355932, - "loss": 2.4467, - "step": 762 - }, - { - "epoch": 0.13, - "learning_rate": 0.0007101016949152543, - "loss": 2.3949, - "step": 763 - }, - { - "epoch": 0.13, - "learning_rate": 0.0007099661016949153, - "loss": 2.5011, - "step": 764 - }, - { - "epoch": 0.13, - "learning_rate": 0.0007098305084745763, - "loss": 2.3289, - "step": 765 - }, - { - "epoch": 0.13, - "learning_rate": 0.0007096949152542373, - "loss": 2.3052, - "step": 766 - }, - { - "epoch": 0.13, - "learning_rate": 0.0007095593220338983, - "loss": 2.4367, - "step": 767 - }, - { - "epoch": 0.13, - "learning_rate": 0.0007094237288135593, - "loss": 2.3933, - "step": 768 - }, - { - "epoch": 0.13, - "learning_rate": 0.0007092881355932203, - "loss": 2.3617, - "step": 769 - }, - { - "epoch": 0.13, - "learning_rate": 0.0007091525423728814, - "loss": 2.3929, - "step": 770 - }, - { - "epoch": 0.13, - "learning_rate": 0.0007090169491525424, - "loss": 2.3905, - "step": 771 - }, - { - "epoch": 0.13, - "learning_rate": 0.0007088813559322035, - "loss": 2.4145, - "step": 772 - }, - { - "epoch": 0.13, - "learning_rate": 0.0007087457627118644, - "loss": 2.3981, - "step": 773 - }, - { - "epoch": 0.13, - "learning_rate": 0.0007086101694915254, - "loss": 2.4041, - "step": 774 - }, - { - "epoch": 0.13, - "learning_rate": 0.0007084745762711864, - "loss": 2.3884, - "step": 775 - }, - { - "epoch": 0.13, - "learning_rate": 0.0007083389830508475, - "loss": 2.3439, - "step": 776 - }, - { - "epoch": 0.13, - "learning_rate": 0.0007082033898305085, - "loss": 2.4176, - "step": 777 - }, - { - "epoch": 0.13, - "learning_rate": 0.0007080677966101696, - "loss": 2.3962, - "step": 778 - }, - { - "epoch": 0.13, - "learning_rate": 0.0007079322033898306, - "loss": 2.3408, - "step": 779 - }, - { - "epoch": 0.13, - "learning_rate": 0.0007077966101694915, - "loss": 2.4429, - "step": 780 - }, - { - "epoch": 0.13, - "learning_rate": 0.0007076610169491525, - "loss": 2.3448, - "step": 781 - }, - { - "epoch": 0.13, - "learning_rate": 0.0007075254237288136, - "loss": 2.3651, - "step": 782 - }, - { - "epoch": 0.13, - "learning_rate": 0.0007073898305084746, - "loss": 2.3302, - "step": 783 - }, - { - "epoch": 0.13, - "learning_rate": 0.0007072542372881357, - "loss": 2.3918, - "step": 784 - }, - { - "epoch": 0.13, - "learning_rate": 0.0007071186440677967, - "loss": 2.2957, - "step": 785 - }, - { - "epoch": 0.13, - "learning_rate": 0.0007069830508474577, - "loss": 2.3282, - "step": 786 - }, - { - "epoch": 0.13, - "learning_rate": 0.0007068474576271186, - "loss": 2.4072, - "step": 787 - }, - { - "epoch": 0.13, - "learning_rate": 0.0007067118644067796, - "loss": 2.379, - "step": 788 - }, - { - "epoch": 0.13, - "learning_rate": 0.0007065762711864407, - "loss": 2.345, - "step": 789 - }, - { - "epoch": 0.13, - "learning_rate": 0.0007064406779661017, - "loss": 2.399, - "step": 790 - }, - { - "epoch": 0.13, - "learning_rate": 0.0007063050847457628, - "loss": 2.2841, - "step": 791 - }, - { - "epoch": 0.13, - "learning_rate": 0.0007061694915254238, - "loss": 2.4271, - "step": 792 - }, - { - "epoch": 0.13, - "learning_rate": 0.0007060338983050848, - "loss": 2.3763, - "step": 793 - }, - { - "epoch": 0.13, - "learning_rate": 0.0007058983050847458, - "loss": 2.3499, - "step": 794 - }, - { - "epoch": 0.13, - "learning_rate": 0.0007057627118644068, - "loss": 2.4007, - "step": 795 - }, - { - "epoch": 0.13, - "learning_rate": 0.0007056271186440678, - "loss": 2.4998, - "step": 796 - }, - { - "epoch": 0.13, - "learning_rate": 0.0007054915254237289, - "loss": 2.4022, - "step": 797 - }, - { - "epoch": 0.13, - "learning_rate": 0.0007053559322033899, - "loss": 2.366, - "step": 798 - }, - { - "epoch": 0.13, - "learning_rate": 0.0007052203389830509, - "loss": 2.4443, - "step": 799 - }, - { - "epoch": 0.13, - "learning_rate": 0.0007050847457627119, - "loss": 2.3921, - "step": 800 - }, - { - "epoch": 0.13, - "learning_rate": 0.0007049491525423729, - "loss": 2.3496, - "step": 801 - }, - { - "epoch": 0.13, - "learning_rate": 0.0007048135593220339, - "loss": 2.4019, - "step": 802 - }, - { - "epoch": 0.13, - "learning_rate": 0.000704677966101695, - "loss": 2.3823, - "step": 803 - }, - { - "epoch": 0.13, - "learning_rate": 0.000704542372881356, - "loss": 2.2986, - "step": 804 - }, - { - "epoch": 0.13, - "learning_rate": 0.000704406779661017, - "loss": 2.4475, - "step": 805 - }, - { - "epoch": 0.13, - "learning_rate": 0.000704271186440678, - "loss": 2.3688, - "step": 806 - }, - { - "epoch": 0.13, - "learning_rate": 0.000704135593220339, - "loss": 2.4812, - "step": 807 - }, - { - "epoch": 0.13, - "learning_rate": 0.0007040000000000001, - "loss": 2.3807, - "step": 808 - }, - { - "epoch": 0.13, - "learning_rate": 0.000703864406779661, - "loss": 2.4659, - "step": 809 - }, - { - "epoch": 0.14, - "learning_rate": 0.000703728813559322, - "loss": 2.3567, - "step": 810 - }, - { - "epoch": 0.14, - "learning_rate": 0.000703593220338983, - "loss": 2.3751, - "step": 811 - }, - { - "epoch": 0.14, - "learning_rate": 0.0007034576271186441, - "loss": 2.3882, - "step": 812 - }, - { - "epoch": 0.14, - "learning_rate": 0.0007033220338983051, - "loss": 2.3168, - "step": 813 - }, - { - "epoch": 0.14, - "learning_rate": 0.0007031864406779662, - "loss": 2.4714, - "step": 814 - }, - { - "epoch": 0.14, - "learning_rate": 0.0007030508474576272, - "loss": 2.3288, - "step": 815 - }, - { - "epoch": 0.14, - "learning_rate": 0.0007029152542372882, - "loss": 2.4024, - "step": 816 - }, - { - "epoch": 0.14, - "learning_rate": 0.0007027796610169491, - "loss": 2.3408, - "step": 817 - }, - { - "epoch": 0.14, - "learning_rate": 0.0007026440677966102, - "loss": 2.4105, - "step": 818 - }, - { - "epoch": 0.14, - "learning_rate": 0.0007025084745762712, - "loss": 2.3791, - "step": 819 - }, - { - "epoch": 0.14, - "learning_rate": 0.0007023728813559323, - "loss": 2.4346, - "step": 820 - }, - { - "epoch": 0.14, - "learning_rate": 0.0007022372881355933, - "loss": 2.4417, - "step": 821 - }, - { - "epoch": 0.14, - "learning_rate": 0.0007021016949152543, - "loss": 2.4494, - "step": 822 - }, - { - "epoch": 0.14, - "learning_rate": 0.0007019661016949152, - "loss": 2.3457, - "step": 823 - }, - { - "epoch": 0.14, - "learning_rate": 0.0007018305084745763, - "loss": 2.451, - "step": 824 - }, - { - "epoch": 0.14, - "learning_rate": 0.0007016949152542373, - "loss": 2.3142, - "step": 825 - }, - { - "epoch": 0.14, - "learning_rate": 0.0007015593220338984, - "loss": 2.3863, - "step": 826 - }, - { - "epoch": 0.14, - "learning_rate": 0.0007014237288135594, - "loss": 2.3904, - "step": 827 - }, - { - "epoch": 0.14, - "learning_rate": 0.0007012881355932204, - "loss": 2.3847, - "step": 828 - }, - { - "epoch": 0.14, - "learning_rate": 0.0007011525423728814, - "loss": 2.3727, - "step": 829 - }, - { - "epoch": 0.14, - "learning_rate": 0.0007010169491525424, - "loss": 2.381, - "step": 830 - }, - { - "epoch": 0.14, - "learning_rate": 0.0007008813559322034, - "loss": 2.4223, - "step": 831 - }, - { - "epoch": 0.14, - "learning_rate": 0.0007007457627118644, - "loss": 2.4274, - "step": 832 - }, - { - "epoch": 0.14, - "learning_rate": 0.0007006101694915255, - "loss": 2.4367, - "step": 833 - }, - { - "epoch": 0.14, - "learning_rate": 0.0007004745762711865, - "loss": 2.3457, - "step": 834 - }, - { - "epoch": 0.14, - "learning_rate": 0.0007003389830508475, - "loss": 2.3294, - "step": 835 - }, - { - "epoch": 0.14, - "learning_rate": 0.0007002033898305085, - "loss": 2.3088, - "step": 836 - }, - { - "epoch": 0.14, - "learning_rate": 0.0007000677966101695, - "loss": 2.3461, - "step": 837 - }, - { - "epoch": 0.14, - "learning_rate": 0.0006999322033898305, - "loss": 2.2568, - "step": 838 - }, - { - "epoch": 0.14, - "learning_rate": 0.0006997966101694916, - "loss": 2.3312, - "step": 839 - }, - { - "epoch": 0.14, - "learning_rate": 0.0006996610169491526, - "loss": 2.3209, - "step": 840 - }, - { - "epoch": 0.14, - "learning_rate": 0.0006995254237288136, - "loss": 2.3644, - "step": 841 - }, - { - "epoch": 0.14, - "learning_rate": 0.0006993898305084746, - "loss": 2.3074, - "step": 842 - }, - { - "epoch": 0.14, - "learning_rate": 0.0006992542372881357, - "loss": 2.3111, - "step": 843 - }, - { - "epoch": 0.14, - "learning_rate": 0.0006991186440677966, - "loss": 2.3602, - "step": 844 - }, - { - "epoch": 0.14, - "learning_rate": 0.0006989830508474577, - "loss": 2.3461, - "step": 845 - }, - { - "epoch": 0.14, - "learning_rate": 0.0006988474576271187, - "loss": 2.3739, - "step": 846 - }, - { - "epoch": 0.14, - "learning_rate": 0.0006987118644067797, - "loss": 2.3962, - "step": 847 - }, - { - "epoch": 0.14, - "learning_rate": 0.0006985762711864407, - "loss": 2.4721, - "step": 848 - }, - { - "epoch": 0.14, - "learning_rate": 0.0006984406779661017, - "loss": 2.3466, - "step": 849 - }, - { - "epoch": 0.14, - "learning_rate": 0.0006983050847457628, - "loss": 2.4464, - "step": 850 - }, - { - "epoch": 0.14, - "learning_rate": 0.0006981694915254238, - "loss": 2.3719, - "step": 851 - }, - { - "epoch": 0.14, - "learning_rate": 0.0006980338983050848, - "loss": 2.3954, - "step": 852 - }, - { - "epoch": 0.14, - "learning_rate": 0.0006978983050847457, - "loss": 2.492, - "step": 853 - }, - { - "epoch": 0.14, - "learning_rate": 0.0006977627118644068, - "loss": 2.4074, - "step": 854 - }, - { - "epoch": 0.14, - "learning_rate": 0.0006976271186440678, - "loss": 2.3668, - "step": 855 - }, - { - "epoch": 0.14, - "learning_rate": 0.0006974915254237289, - "loss": 2.3896, - "step": 856 - }, - { - "epoch": 0.14, - "learning_rate": 0.0006973559322033899, - "loss": 2.4036, - "step": 857 - }, - { - "epoch": 0.14, - "learning_rate": 0.0006972203389830509, - "loss": 2.3175, - "step": 858 - }, - { - "epoch": 0.14, - "learning_rate": 0.0006970847457627118, - "loss": 2.466, - "step": 859 - }, - { - "epoch": 0.14, - "learning_rate": 0.0006969491525423729, - "loss": 2.3603, - "step": 860 - }, - { - "epoch": 0.14, - "learning_rate": 0.0006968135593220339, - "loss": 2.3816, - "step": 861 - }, - { - "epoch": 0.14, - "learning_rate": 0.000696677966101695, - "loss": 2.3338, - "step": 862 - }, - { - "epoch": 0.14, - "learning_rate": 0.000696542372881356, - "loss": 2.3856, - "step": 863 - }, - { - "epoch": 0.14, - "learning_rate": 0.0006964067796610171, - "loss": 2.3683, - "step": 864 - }, - { - "epoch": 0.14, - "learning_rate": 0.000696271186440678, - "loss": 2.4683, - "step": 865 - }, - { - "epoch": 0.14, - "learning_rate": 0.000696135593220339, - "loss": 2.3859, - "step": 866 - }, - { - "epoch": 0.14, - "learning_rate": 0.000696, - "loss": 2.3508, - "step": 867 - }, - { - "epoch": 0.14, - "learning_rate": 0.0006958644067796611, - "loss": 2.4109, - "step": 868 - }, - { - "epoch": 0.14, - "learning_rate": 0.0006957288135593221, - "loss": 2.3429, - "step": 869 - }, - { - "epoch": 0.14, - "learning_rate": 0.0006955932203389831, - "loss": 2.3447, - "step": 870 - }, - { - "epoch": 0.15, - "learning_rate": 0.0006954576271186441, - "loss": 2.2626, - "step": 871 - }, - { - "epoch": 0.15, - "learning_rate": 0.0006953220338983051, - "loss": 2.4016, - "step": 872 - }, - { - "epoch": 0.15, - "learning_rate": 0.0006951864406779661, - "loss": 2.3756, - "step": 873 - }, - { - "epoch": 0.15, - "learning_rate": 0.0006950508474576271, - "loss": 2.3993, - "step": 874 - }, - { - "epoch": 0.15, - "learning_rate": 0.0006949152542372882, - "loss": 2.4407, - "step": 875 - }, - { - "epoch": 0.15, - "learning_rate": 0.0006947796610169492, - "loss": 2.3792, - "step": 876 - }, - { - "epoch": 0.15, - "learning_rate": 0.0006946440677966102, - "loss": 2.3645, - "step": 877 - }, - { - "epoch": 0.15, - "learning_rate": 0.0006945084745762712, - "loss": 2.3616, - "step": 878 - }, - { - "epoch": 0.15, - "learning_rate": 0.0006943728813559322, - "loss": 2.305, - "step": 879 - }, - { - "epoch": 0.15, - "learning_rate": 0.0006942372881355932, - "loss": 2.4369, - "step": 880 - }, - { - "epoch": 0.15, - "learning_rate": 0.0006941016949152543, - "loss": 2.3569, - "step": 881 - }, - { - "epoch": 0.15, - "learning_rate": 0.0006939661016949153, - "loss": 2.3845, - "step": 882 - }, - { - "epoch": 0.15, - "learning_rate": 0.0006938305084745763, - "loss": 2.3202, - "step": 883 - }, - { - "epoch": 0.15, - "learning_rate": 0.0006936949152542373, - "loss": 2.2872, - "step": 884 - }, - { - "epoch": 0.15, - "learning_rate": 0.0006935593220338984, - "loss": 2.2987, - "step": 885 - }, - { - "epoch": 0.15, - "learning_rate": 0.0006934237288135594, - "loss": 2.3952, - "step": 886 - }, - { - "epoch": 0.15, - "learning_rate": 0.0006932881355932204, - "loss": 2.397, - "step": 887 - }, - { - "epoch": 0.15, - "learning_rate": 0.0006931525423728814, - "loss": 2.388, - "step": 888 - }, - { - "epoch": 0.15, - "learning_rate": 0.0006930169491525424, - "loss": 2.3385, - "step": 889 - }, - { - "epoch": 0.15, - "learning_rate": 0.0006928813559322034, - "loss": 2.2949, - "step": 890 - }, - { - "epoch": 0.15, - "learning_rate": 0.0006927457627118644, - "loss": 2.3929, - "step": 891 - }, - { - "epoch": 0.15, - "learning_rate": 0.0006926101694915255, - "loss": 2.3241, - "step": 892 - }, - { - "epoch": 0.15, - "learning_rate": 0.0006924745762711865, - "loss": 2.2892, - "step": 893 - }, - { - "epoch": 0.15, - "learning_rate": 0.0006923389830508475, - "loss": 2.3933, - "step": 894 - }, - { - "epoch": 0.15, - "learning_rate": 0.0006922033898305084, - "loss": 2.4072, - "step": 895 - }, - { - "epoch": 0.15, - "learning_rate": 0.0006920677966101695, - "loss": 2.3719, - "step": 896 - }, - { - "epoch": 0.15, - "learning_rate": 0.0006919322033898305, - "loss": 2.35, - "step": 897 - }, - { - "epoch": 0.15, - "learning_rate": 0.0006917966101694916, - "loss": 2.325, - "step": 898 - }, - { - "epoch": 0.15, - "learning_rate": 0.0006916610169491526, - "loss": 2.4605, - "step": 899 - }, - { - "epoch": 0.15, - "learning_rate": 0.0006915254237288137, - "loss": 2.3272, - "step": 900 - }, - { - "epoch": 0.15, - "learning_rate": 0.0006913898305084745, - "loss": 2.3242, - "step": 901 - }, - { - "epoch": 0.15, - "learning_rate": 0.0006912542372881356, - "loss": 2.3806, - "step": 902 - }, - { - "epoch": 0.15, - "learning_rate": 0.0006911186440677966, - "loss": 2.3183, - "step": 903 - }, - { - "epoch": 0.15, - "learning_rate": 0.0006909830508474577, - "loss": 2.3502, - "step": 904 - }, - { - "epoch": 0.15, - "learning_rate": 0.0006908474576271187, - "loss": 2.3661, - "step": 905 - }, - { - "epoch": 0.15, - "learning_rate": 0.0006907118644067798, - "loss": 2.3906, - "step": 906 - }, - { - "epoch": 0.15, - "learning_rate": 0.0006905762711864407, - "loss": 2.4053, - "step": 907 - }, - { - "epoch": 0.15, - "learning_rate": 0.0006904406779661017, - "loss": 2.3636, - "step": 908 - }, - { - "epoch": 0.15, - "learning_rate": 0.0006903050847457627, - "loss": 2.2655, - "step": 909 - }, - { - "epoch": 0.15, - "learning_rate": 0.0006901694915254238, - "loss": 2.4222, - "step": 910 - }, - { - "epoch": 0.15, - "learning_rate": 0.0006900338983050848, - "loss": 2.3432, - "step": 911 - }, - { - "epoch": 0.15, - "learning_rate": 0.0006898983050847458, - "loss": 2.2854, - "step": 912 - }, - { - "epoch": 0.15, - "learning_rate": 0.0006897627118644068, - "loss": 2.303, - "step": 913 - }, - { - "epoch": 0.15, - "learning_rate": 0.0006896271186440678, - "loss": 2.2797, - "step": 914 - }, - { - "epoch": 0.15, - "learning_rate": 0.0006894915254237288, - "loss": 2.4155, - "step": 915 - }, - { - "epoch": 0.15, - "learning_rate": 0.0006893559322033898, - "loss": 2.2832, - "step": 916 - }, - { - "epoch": 0.15, - "learning_rate": 0.0006892203389830509, - "loss": 2.3637, - "step": 917 - }, - { - "epoch": 0.15, - "learning_rate": 0.0006890847457627119, - "loss": 2.402, - "step": 918 - }, - { - "epoch": 0.15, - "learning_rate": 0.0006889491525423729, - "loss": 2.3481, - "step": 919 - }, - { - "epoch": 0.15, - "learning_rate": 0.0006888135593220339, - "loss": 2.4507, - "step": 920 - }, - { - "epoch": 0.15, - "learning_rate": 0.000688677966101695, - "loss": 2.3685, - "step": 921 - }, - { - "epoch": 0.15, - "learning_rate": 0.000688542372881356, - "loss": 2.3277, - "step": 922 - }, - { - "epoch": 0.15, - "learning_rate": 0.000688406779661017, - "loss": 2.4022, - "step": 923 - }, - { - "epoch": 0.15, - "learning_rate": 0.000688271186440678, - "loss": 2.3499, - "step": 924 - }, - { - "epoch": 0.15, - "learning_rate": 0.000688135593220339, - "loss": 2.3347, - "step": 925 - }, - { - "epoch": 0.15, - "learning_rate": 0.000688, - "loss": 2.3399, - "step": 926 - }, - { - "epoch": 0.15, - "learning_rate": 0.0006878644067796611, - "loss": 2.3295, - "step": 927 - }, - { - "epoch": 0.15, - "learning_rate": 0.0006877288135593221, - "loss": 2.3994, - "step": 928 - }, - { - "epoch": 0.15, - "learning_rate": 0.0006875932203389831, - "loss": 2.3778, - "step": 929 - }, - { - "epoch": 0.15, - "learning_rate": 0.0006874576271186441, - "loss": 2.3528, - "step": 930 - }, - { - "epoch": 0.16, - "learning_rate": 0.000687322033898305, - "loss": 2.3533, - "step": 931 - }, - { - "epoch": 0.16, - "learning_rate": 0.0006871864406779661, - "loss": 2.4151, - "step": 932 - }, - { - "epoch": 0.16, - "learning_rate": 0.0006870508474576271, - "loss": 2.4516, - "step": 933 - }, - { - "epoch": 0.16, - "learning_rate": 0.0006869152542372882, - "loss": 2.4132, - "step": 934 - }, - { - "epoch": 0.16, - "learning_rate": 0.0006867796610169492, - "loss": 2.3986, - "step": 935 - }, - { - "epoch": 0.16, - "learning_rate": 0.0006866440677966103, - "loss": 2.3576, - "step": 936 - }, - { - "epoch": 0.16, - "learning_rate": 0.0006865084745762711, - "loss": 2.3756, - "step": 937 - }, - { - "epoch": 0.16, - "learning_rate": 0.0006863728813559322, - "loss": 2.4238, - "step": 938 - }, - { - "epoch": 0.16, - "learning_rate": 0.0006862372881355932, - "loss": 2.2522, - "step": 939 - }, - { - "epoch": 0.16, - "learning_rate": 0.0006861016949152543, - "loss": 2.4234, - "step": 940 - }, - { - "epoch": 0.16, - "learning_rate": 0.0006859661016949153, - "loss": 2.2996, - "step": 941 - }, - { - "epoch": 0.16, - "learning_rate": 0.0006858305084745764, - "loss": 2.3316, - "step": 942 - }, - { - "epoch": 0.16, - "learning_rate": 0.0006856949152542373, - "loss": 2.3349, - "step": 943 - }, - { - "epoch": 0.16, - "learning_rate": 0.0006855593220338983, - "loss": 2.2936, - "step": 944 - }, - { - "epoch": 0.16, - "learning_rate": 0.0006854237288135593, - "loss": 2.3265, - "step": 945 - }, - { - "epoch": 0.16, - "learning_rate": 0.0006852881355932204, - "loss": 2.3186, - "step": 946 - }, - { - "epoch": 0.16, - "learning_rate": 0.0006851525423728814, - "loss": 2.3746, - "step": 947 - }, - { - "epoch": 0.16, - "learning_rate": 0.0006850169491525425, - "loss": 2.3862, - "step": 948 - }, - { - "epoch": 0.16, - "learning_rate": 0.0006848813559322034, - "loss": 2.3719, - "step": 949 - }, - { - "epoch": 0.16, - "learning_rate": 0.0006847457627118644, - "loss": 2.4512, - "step": 950 - }, - { - "epoch": 0.16, - "learning_rate": 0.0006846101694915254, - "loss": 2.3664, - "step": 951 - }, - { - "epoch": 0.16, - "learning_rate": 0.0006844745762711864, - "loss": 2.3729, - "step": 952 - }, - { - "epoch": 0.16, - "learning_rate": 0.0006843389830508475, - "loss": 2.4321, - "step": 953 - }, - { - "epoch": 0.16, - "learning_rate": 0.0006842033898305085, - "loss": 2.4113, - "step": 954 - }, - { - "epoch": 0.16, - "learning_rate": 0.0006840677966101695, - "loss": 2.3098, - "step": 955 - }, - { - "epoch": 0.16, - "learning_rate": 0.0006839322033898305, - "loss": 2.3412, - "step": 956 - }, - { - "epoch": 0.16, - "learning_rate": 0.0006837966101694916, - "loss": 2.3755, - "step": 957 - }, - { - "epoch": 0.16, - "learning_rate": 0.0006836610169491526, - "loss": 2.3092, - "step": 958 - }, - { - "epoch": 0.16, - "learning_rate": 0.0006835254237288136, - "loss": 2.2964, - "step": 959 - }, - { - "epoch": 0.16, - "learning_rate": 0.0006833898305084746, - "loss": 2.256, - "step": 960 - }, - { - "epoch": 0.16, - "learning_rate": 0.0006832542372881356, - "loss": 2.2659, - "step": 961 - }, - { - "epoch": 0.16, - "learning_rate": 0.0006831186440677966, - "loss": 2.3019, - "step": 962 - }, - { - "epoch": 0.16, - "learning_rate": 0.0006829830508474577, - "loss": 2.3724, - "step": 963 - }, - { - "epoch": 0.16, - "learning_rate": 0.0006828474576271187, - "loss": 2.3187, - "step": 964 - }, - { - "epoch": 0.16, - "learning_rate": 0.0006827118644067797, - "loss": 2.2647, - "step": 965 - }, - { - "epoch": 0.16, - "learning_rate": 0.0006825762711864407, - "loss": 2.3082, - "step": 966 - }, - { - "epoch": 0.16, - "learning_rate": 0.0006824406779661017, - "loss": 2.3569, - "step": 967 - }, - { - "epoch": 0.16, - "learning_rate": 0.0006823050847457627, - "loss": 2.4431, - "step": 968 - }, - { - "epoch": 0.16, - "learning_rate": 0.0006821694915254238, - "loss": 2.3198, - "step": 969 - }, - { - "epoch": 0.16, - "learning_rate": 0.0006820338983050848, - "loss": 2.3731, - "step": 970 - }, - { - "epoch": 0.16, - "learning_rate": 0.0006818983050847459, - "loss": 2.3296, - "step": 971 - }, - { - "epoch": 0.16, - "learning_rate": 0.0006817627118644068, - "loss": 2.3212, - "step": 972 - }, - { - "epoch": 0.16, - "learning_rate": 0.0006816271186440677, - "loss": 2.3171, - "step": 973 - }, - { - "epoch": 0.16, - "learning_rate": 0.0006814915254237288, - "loss": 2.2825, - "step": 974 - }, - { - "epoch": 0.16, - "learning_rate": 0.0006813559322033898, - "loss": 2.2633, - "step": 975 - }, - { - "epoch": 0.16, - "learning_rate": 0.0006812203389830509, - "loss": 2.4003, - "step": 976 - }, - { - "epoch": 0.16, - "learning_rate": 0.0006810847457627119, - "loss": 2.3389, - "step": 977 - }, - { - "epoch": 0.16, - "learning_rate": 0.000680949152542373, - "loss": 2.36, - "step": 978 - }, - { - "epoch": 0.16, - "learning_rate": 0.000680813559322034, - "loss": 2.3114, - "step": 979 - }, - { - "epoch": 0.16, - "learning_rate": 0.0006806779661016949, - "loss": 2.3598, - "step": 980 - }, - { - "epoch": 0.16, - "learning_rate": 0.0006805423728813559, - "loss": 2.3936, - "step": 981 - }, - { - "epoch": 0.16, - "learning_rate": 0.000680406779661017, - "loss": 2.3669, - "step": 982 - }, - { - "epoch": 0.16, - "learning_rate": 0.000680271186440678, - "loss": 2.3693, - "step": 983 - }, - { - "epoch": 0.16, - "learning_rate": 0.0006801355932203391, - "loss": 2.3104, - "step": 984 - }, - { - "epoch": 0.16, - "learning_rate": 0.00068, - "loss": 2.3907, - "step": 985 - }, - { - "epoch": 0.16, - "learning_rate": 0.000679864406779661, - "loss": 2.3522, - "step": 986 - }, - { - "epoch": 0.16, - "learning_rate": 0.000679728813559322, - "loss": 2.3814, - "step": 987 - }, - { - "epoch": 0.16, - "learning_rate": 0.0006795932203389831, - "loss": 2.3801, - "step": 988 - }, - { - "epoch": 0.16, - "learning_rate": 0.0006794576271186441, - "loss": 2.3241, - "step": 989 - }, - { - "epoch": 0.17, - "learning_rate": 0.0006793220338983052, - "loss": 2.2991, - "step": 990 - }, - { - "epoch": 0.17, - "learning_rate": 0.0006791864406779661, - "loss": 2.3359, - "step": 991 - }, - { - "epoch": 0.17, - "learning_rate": 0.0006790508474576272, - "loss": 2.2802, - "step": 992 - }, - { - "epoch": 0.17, - "learning_rate": 0.0006789152542372882, - "loss": 2.2845, - "step": 993 - }, - { - "epoch": 0.17, - "learning_rate": 0.0006787796610169491, - "loss": 2.3241, - "step": 994 - }, - { - "epoch": 0.17, - "learning_rate": 0.0006786440677966102, - "loss": 2.3858, - "step": 995 - }, - { - "epoch": 0.17, - "learning_rate": 0.0006785084745762712, - "loss": 2.3619, - "step": 996 - }, - { - "epoch": 0.17, - "learning_rate": 0.0006783728813559322, - "loss": 2.3134, - "step": 997 - }, - { - "epoch": 0.17, - "learning_rate": 0.0006782372881355932, - "loss": 2.3958, - "step": 998 - }, - { - "epoch": 0.17, - "learning_rate": 0.0006781016949152543, - "loss": 2.3529, - "step": 999 - }, - { - "epoch": 0.17, - "learning_rate": 0.0006779661016949153, - "loss": 2.3145, - "step": 1000 - }, - { - "epoch": 0.17, - "eval_gen_len": 19.0, - "eval_loss": 2.1640422344207764, - "eval_rouge1": 0.2446, - "eval_rouge2": 0.0882, - "eval_rougeL": 0.2042, - "eval_rougeLsum": 0.2032, - "eval_runtime": 25.7428, - "eval_samples_per_second": 1.942, - "eval_steps_per_second": 0.272, - "step": 1000 - }, - { - "epoch": 0.17, - "learning_rate": 0.0006778305084745763, - "loss": 2.377, - "step": 1001 - }, - { - "epoch": 0.17, - "learning_rate": 0.0006776949152542373, - "loss": 2.3483, - "step": 1002 - }, - { - "epoch": 0.17, - "learning_rate": 0.0006775593220338983, - "loss": 2.2529, - "step": 1003 - }, - { - "epoch": 0.17, - "learning_rate": 0.0006774237288135593, - "loss": 2.3304, - "step": 1004 - }, - { - "epoch": 0.17, - "learning_rate": 0.0006772881355932204, - "loss": 2.3087, - "step": 1005 - }, - { - "epoch": 0.17, - "learning_rate": 0.0006771525423728814, - "loss": 2.3364, - "step": 1006 - }, - { - "epoch": 0.17, - "learning_rate": 0.0006770169491525424, - "loss": 2.3916, - "step": 1007 - }, - { - "epoch": 0.17, - "learning_rate": 0.0006768813559322034, - "loss": 2.2666, - "step": 1008 - }, - { - "epoch": 0.17, - "learning_rate": 0.0006767457627118645, - "loss": 2.4, - "step": 1009 - }, - { - "epoch": 0.17, - "learning_rate": 0.0006766101694915254, - "loss": 2.2813, - "step": 1010 - }, - { - "epoch": 0.17, - "learning_rate": 0.0006764745762711865, - "loss": 2.2978, - "step": 1011 - }, - { - "epoch": 0.17, - "learning_rate": 0.0006763389830508475, - "loss": 2.3028, - "step": 1012 - }, - { - "epoch": 0.17, - "learning_rate": 0.0006762033898305086, - "loss": 2.2557, - "step": 1013 - }, - { - "epoch": 0.17, - "learning_rate": 0.0006760677966101696, - "loss": 2.299, - "step": 1014 - }, - { - "epoch": 0.17, - "learning_rate": 0.0006759322033898306, - "loss": 2.3389, - "step": 1015 - }, - { - "epoch": 0.17, - "learning_rate": 0.0006757966101694915, - "loss": 2.2909, - "step": 1016 - }, - { - "epoch": 0.17, - "learning_rate": 0.0006756610169491525, - "loss": 2.3701, - "step": 1017 - }, - { - "epoch": 0.17, - "learning_rate": 0.0006755254237288136, - "loss": 2.2788, - "step": 1018 - }, - { - "epoch": 0.17, - "learning_rate": 0.0006753898305084746, - "loss": 2.3686, - "step": 1019 - }, - { - "epoch": 0.17, - "learning_rate": 0.0006752542372881357, - "loss": 2.2988, - "step": 1020 - }, - { - "epoch": 0.17, - "learning_rate": 0.0006751186440677967, - "loss": 2.3731, - "step": 1021 - }, - { - "epoch": 0.17, - "learning_rate": 0.0006749830508474576, - "loss": 2.3253, - "step": 1022 - }, - { - "epoch": 0.17, - "learning_rate": 0.0006748474576271186, - "loss": 2.3429, - "step": 1023 - }, - { - "epoch": 0.17, - "learning_rate": 0.0006747118644067797, - "loss": 2.285, - "step": 1024 - }, - { - "epoch": 0.17, - "learning_rate": 0.0006745762711864407, - "loss": 2.3053, - "step": 1025 - }, - { - "epoch": 0.17, - "learning_rate": 0.0006744406779661018, - "loss": 2.3202, - "step": 1026 - }, - { - "epoch": 0.17, - "learning_rate": 0.0006743050847457628, - "loss": 2.3234, - "step": 1027 - }, - { - "epoch": 0.17, - "learning_rate": 0.0006741694915254238, - "loss": 2.2927, - "step": 1028 - }, - { - "epoch": 0.17, - "learning_rate": 0.0006740338983050847, - "loss": 2.329, - "step": 1029 - }, - { - "epoch": 0.17, - "learning_rate": 0.0006738983050847458, - "loss": 2.3812, - "step": 1030 - }, - { - "epoch": 0.17, - "learning_rate": 0.0006737627118644068, - "loss": 2.3443, - "step": 1031 - }, - { - "epoch": 0.17, - "learning_rate": 0.0006736271186440679, - "loss": 2.414, - "step": 1032 - }, - { - "epoch": 0.17, - "learning_rate": 0.0006734915254237289, - "loss": 2.4363, - "step": 1033 - }, - { - "epoch": 0.17, - "learning_rate": 0.0006733559322033899, - "loss": 2.3253, - "step": 1034 - }, - { - "epoch": 0.17, - "learning_rate": 0.0006732203389830509, - "loss": 2.3082, - "step": 1035 - }, - { - "epoch": 0.17, - "learning_rate": 0.0006730847457627119, - "loss": 2.3822, - "step": 1036 - }, - { - "epoch": 0.17, - "learning_rate": 0.0006729491525423729, - "loss": 2.3264, - "step": 1037 - }, - { - "epoch": 0.17, - "learning_rate": 0.0006728135593220339, - "loss": 2.3685, - "step": 1038 - }, - { - "epoch": 0.17, - "learning_rate": 0.000672677966101695, - "loss": 2.2532, - "step": 1039 - }, - { - "epoch": 0.17, - "learning_rate": 0.0006725423728813559, - "loss": 2.3283, - "step": 1040 - }, - { - "epoch": 0.17, - "learning_rate": 0.000672406779661017, - "loss": 2.3832, - "step": 1041 - }, - { - "epoch": 0.17, - "learning_rate": 0.000672271186440678, - "loss": 2.3535, - "step": 1042 - }, - { - "epoch": 0.17, - "learning_rate": 0.000672135593220339, - "loss": 2.3756, - "step": 1043 - }, - { - "epoch": 0.17, - "learning_rate": 0.000672, - "loss": 2.318, - "step": 1044 - }, - { - "epoch": 0.17, - "learning_rate": 0.000671864406779661, - "loss": 2.3605, - "step": 1045 - }, - { - "epoch": 0.17, - "learning_rate": 0.000671728813559322, - "loss": 2.3518, - "step": 1046 - }, - { - "epoch": 0.17, - "learning_rate": 0.0006715932203389831, - "loss": 2.3477, - "step": 1047 - }, - { - "epoch": 0.17, - "learning_rate": 0.0006714576271186441, - "loss": 2.231, - "step": 1048 - }, - { - "epoch": 0.17, - "learning_rate": 0.0006713220338983052, - "loss": 2.2736, - "step": 1049 - }, - { - "epoch": 0.17, - "learning_rate": 0.0006711864406779662, - "loss": 2.2304, - "step": 1050 - }, - { - "epoch": 0.18, - "learning_rate": 0.0006710508474576272, - "loss": 2.292, - "step": 1051 - }, - { - "epoch": 0.18, - "learning_rate": 0.0006709152542372881, - "loss": 2.3045, - "step": 1052 - }, - { - "epoch": 0.18, - "learning_rate": 0.0006707796610169492, - "loss": 2.2952, - "step": 1053 - }, - { - "epoch": 0.18, - "learning_rate": 0.0006706440677966102, - "loss": 2.3338, - "step": 1054 - }, - { - "epoch": 0.18, - "learning_rate": 0.0006705084745762713, - "loss": 2.3101, - "step": 1055 - }, - { - "epoch": 0.18, - "learning_rate": 0.0006703728813559323, - "loss": 2.3126, - "step": 1056 - }, - { - "epoch": 0.18, - "learning_rate": 0.0006702372881355933, - "loss": 2.286, - "step": 1057 - }, - { - "epoch": 0.18, - "learning_rate": 0.0006701016949152542, - "loss": 2.293, - "step": 1058 - }, - { - "epoch": 0.18, - "learning_rate": 0.0006699661016949152, - "loss": 2.3577, - "step": 1059 - }, - { - "epoch": 0.18, - "learning_rate": 0.0006698305084745763, - "loss": 2.3265, - "step": 1060 - }, - { - "epoch": 0.18, - "learning_rate": 0.0006696949152542373, - "loss": 2.349, - "step": 1061 - }, - { - "epoch": 0.18, - "learning_rate": 0.0006695593220338984, - "loss": 2.3001, - "step": 1062 - }, - { - "epoch": 0.18, - "learning_rate": 0.0006694237288135594, - "loss": 2.1828, - "step": 1063 - }, - { - "epoch": 0.18, - "learning_rate": 0.0006692881355932204, - "loss": 2.3249, - "step": 1064 - }, - { - "epoch": 0.18, - "learning_rate": 0.0006691525423728813, - "loss": 2.3134, - "step": 1065 - }, - { - "epoch": 0.18, - "learning_rate": 0.0006690169491525424, - "loss": 2.2952, - "step": 1066 - }, - { - "epoch": 0.18, - "learning_rate": 0.0006688813559322034, - "loss": 2.337, - "step": 1067 - }, - { - "epoch": 0.18, - "learning_rate": 0.0006687457627118645, - "loss": 2.2099, - "step": 1068 - }, - { - "epoch": 0.18, - "learning_rate": 0.0006686101694915255, - "loss": 2.2636, - "step": 1069 - }, - { - "epoch": 0.18, - "learning_rate": 0.0006684745762711865, - "loss": 2.3492, - "step": 1070 - }, - { - "epoch": 0.18, - "learning_rate": 0.0006683389830508475, - "loss": 2.3414, - "step": 1071 - }, - { - "epoch": 0.18, - "learning_rate": 0.0006682033898305085, - "loss": 2.3097, - "step": 1072 - }, - { - "epoch": 0.18, - "learning_rate": 0.0006680677966101695, - "loss": 2.2535, - "step": 1073 - }, - { - "epoch": 0.18, - "learning_rate": 0.0006679322033898306, - "loss": 2.2855, - "step": 1074 - }, - { - "epoch": 0.18, - "learning_rate": 0.0006677966101694916, - "loss": 2.2845, - "step": 1075 - }, - { - "epoch": 0.18, - "learning_rate": 0.0006676610169491526, - "loss": 2.3052, - "step": 1076 - }, - { - "epoch": 0.18, - "learning_rate": 0.0006675254237288136, - "loss": 2.3813, - "step": 1077 - }, - { - "epoch": 0.18, - "learning_rate": 0.0006673898305084746, - "loss": 2.3692, - "step": 1078 - }, - { - "epoch": 0.18, - "learning_rate": 0.0006672542372881356, - "loss": 2.2696, - "step": 1079 - }, - { - "epoch": 0.18, - "learning_rate": 0.0006671186440677966, - "loss": 2.3074, - "step": 1080 - }, - { - "epoch": 0.18, - "learning_rate": 0.0006669830508474577, - "loss": 2.3857, - "step": 1081 - }, - { - "epoch": 0.18, - "learning_rate": 0.0006668474576271186, - "loss": 2.2961, - "step": 1082 - }, - { - "epoch": 0.18, - "learning_rate": 0.0006667118644067797, - "loss": 2.2708, - "step": 1083 - }, - { - "epoch": 0.18, - "learning_rate": 0.0006665762711864407, - "loss": 2.3788, - "step": 1084 - }, - { - "epoch": 0.18, - "learning_rate": 0.0006664406779661018, - "loss": 2.3141, - "step": 1085 - }, - { - "epoch": 0.18, - "learning_rate": 0.0006663050847457628, - "loss": 2.3503, - "step": 1086 - }, - { - "epoch": 0.18, - "learning_rate": 0.0006661694915254238, - "loss": 2.371, - "step": 1087 - }, - { - "epoch": 0.18, - "learning_rate": 0.0006660338983050847, - "loss": 2.3156, - "step": 1088 - }, - { - "epoch": 0.18, - "learning_rate": 0.0006658983050847458, - "loss": 2.283, - "step": 1089 - }, - { - "epoch": 0.18, - "learning_rate": 0.0006657627118644068, - "loss": 2.3366, - "step": 1090 - }, - { - "epoch": 0.18, - "learning_rate": 0.0006656271186440679, - "loss": 2.2843, - "step": 1091 - }, - { - "epoch": 0.18, - "learning_rate": 0.0006654915254237289, - "loss": 2.3635, - "step": 1092 - }, - { - "epoch": 0.18, - "learning_rate": 0.0006653559322033899, - "loss": 2.2889, - "step": 1093 - }, - { - "epoch": 0.18, - "learning_rate": 0.0006652203389830508, - "loss": 2.3219, - "step": 1094 - }, - { - "epoch": 0.18, - "learning_rate": 0.0006650847457627119, - "loss": 2.2796, - "step": 1095 - }, - { - "epoch": 0.18, - "learning_rate": 0.0006649491525423729, - "loss": 2.3092, - "step": 1096 - }, - { - "epoch": 0.18, - "learning_rate": 0.000664813559322034, - "loss": 2.2551, - "step": 1097 - }, - { - "epoch": 0.18, - "learning_rate": 0.000664677966101695, - "loss": 2.3118, - "step": 1098 - }, - { - "epoch": 0.18, - "learning_rate": 0.000664542372881356, - "loss": 2.3155, - "step": 1099 - }, - { - "epoch": 0.18, - "learning_rate": 0.0006644067796610169, - "loss": 2.382, - "step": 1100 - }, - { - "epoch": 0.18, - "learning_rate": 0.0006642711864406779, - "loss": 2.3322, - "step": 1101 - }, - { - "epoch": 0.18, - "learning_rate": 0.000664135593220339, - "loss": 2.2848, - "step": 1102 - }, - { - "epoch": 0.18, - "learning_rate": 0.000664, - "loss": 2.3445, - "step": 1103 - }, - { - "epoch": 0.18, - "learning_rate": 0.0006638644067796611, - "loss": 2.386, - "step": 1104 - }, - { - "epoch": 0.18, - "learning_rate": 0.0006637288135593221, - "loss": 2.2992, - "step": 1105 - }, - { - "epoch": 0.18, - "learning_rate": 0.0006635932203389831, - "loss": 2.2894, - "step": 1106 - }, - { - "epoch": 0.18, - "learning_rate": 0.0006634576271186441, - "loss": 2.2807, - "step": 1107 - }, - { - "epoch": 0.18, - "learning_rate": 0.0006633220338983051, - "loss": 2.3253, - "step": 1108 - }, - { - "epoch": 0.18, - "learning_rate": 0.0006631864406779661, - "loss": 2.2242, - "step": 1109 - }, - { - "epoch": 0.18, - "learning_rate": 0.0006630508474576272, - "loss": 2.2379, - "step": 1110 - }, - { - "epoch": 0.19, - "learning_rate": 0.0006629152542372882, - "loss": 2.3186, - "step": 1111 - }, - { - "epoch": 0.19, - "learning_rate": 0.0006627796610169492, - "loss": 2.2563, - "step": 1112 - }, - { - "epoch": 0.19, - "learning_rate": 0.0006626440677966102, - "loss": 2.3138, - "step": 1113 - }, - { - "epoch": 0.19, - "learning_rate": 0.0006625084745762712, - "loss": 2.3805, - "step": 1114 - }, - { - "epoch": 0.19, - "learning_rate": 0.0006623728813559322, - "loss": 2.3126, - "step": 1115 - }, - { - "epoch": 0.19, - "learning_rate": 0.0006622372881355933, - "loss": 2.2741, - "step": 1116 - }, - { - "epoch": 0.19, - "learning_rate": 0.0006621016949152543, - "loss": 2.2884, - "step": 1117 - }, - { - "epoch": 0.19, - "learning_rate": 0.0006619661016949152, - "loss": 2.3752, - "step": 1118 - }, - { - "epoch": 0.19, - "learning_rate": 0.0006618305084745763, - "loss": 2.2988, - "step": 1119 - }, - { - "epoch": 0.19, - "learning_rate": 0.0006616949152542373, - "loss": 2.3493, - "step": 1120 - }, - { - "epoch": 0.19, - "learning_rate": 0.0006615593220338984, - "loss": 2.3622, - "step": 1121 - }, - { - "epoch": 0.19, - "learning_rate": 0.0006614237288135593, - "loss": 2.3941, - "step": 1122 - }, - { - "epoch": 0.19, - "learning_rate": 0.0006612881355932204, - "loss": 2.2981, - "step": 1123 - }, - { - "epoch": 0.19, - "learning_rate": 0.0006611525423728813, - "loss": 2.3175, - "step": 1124 - }, - { - "epoch": 0.19, - "learning_rate": 0.0006610169491525424, - "loss": 2.2746, - "step": 1125 - }, - { - "epoch": 0.19, - "learning_rate": 0.0006608813559322034, - "loss": 2.3479, - "step": 1126 - }, - { - "epoch": 0.19, - "learning_rate": 0.0006607457627118645, - "loss": 2.3103, - "step": 1127 - }, - { - "epoch": 0.19, - "learning_rate": 0.0006606101694915255, - "loss": 2.3344, - "step": 1128 - }, - { - "epoch": 0.19, - "learning_rate": 0.0006604745762711865, - "loss": 2.3104, - "step": 1129 - }, - { - "epoch": 0.19, - "learning_rate": 0.0006603389830508474, - "loss": 2.3765, - "step": 1130 - }, - { - "epoch": 0.19, - "learning_rate": 0.0006602033898305085, - "loss": 2.2659, - "step": 1131 - }, - { - "epoch": 0.19, - "learning_rate": 0.0006600677966101695, - "loss": 2.3057, - "step": 1132 - }, - { - "epoch": 0.19, - "learning_rate": 0.0006599322033898306, - "loss": 2.32, - "step": 1133 - }, - { - "epoch": 0.19, - "learning_rate": 0.0006597966101694916, - "loss": 2.301, - "step": 1134 - }, - { - "epoch": 0.19, - "learning_rate": 0.0006596610169491526, - "loss": 2.3409, - "step": 1135 - }, - { - "epoch": 0.19, - "learning_rate": 0.0006595254237288135, - "loss": 2.3385, - "step": 1136 - }, - { - "epoch": 0.19, - "learning_rate": 0.0006593898305084746, - "loss": 2.369, - "step": 1137 - }, - { - "epoch": 0.19, - "learning_rate": 0.0006592542372881356, - "loss": 2.3338, - "step": 1138 - }, - { - "epoch": 0.19, - "learning_rate": 0.0006591186440677966, - "loss": 2.338, - "step": 1139 - }, - { - "epoch": 0.19, - "learning_rate": 0.0006589830508474577, - "loss": 2.2963, - "step": 1140 - }, - { - "epoch": 0.19, - "learning_rate": 0.0006588474576271187, - "loss": 2.3339, - "step": 1141 - }, - { - "epoch": 0.19, - "learning_rate": 0.0006587118644067797, - "loss": 2.3117, - "step": 1142 - }, - { - "epoch": 0.19, - "learning_rate": 0.0006585762711864407, - "loss": 2.2809, - "step": 1143 - }, - { - "epoch": 0.19, - "learning_rate": 0.0006584406779661017, - "loss": 2.3173, - "step": 1144 - }, - { - "epoch": 0.19, - "learning_rate": 0.0006583050847457627, - "loss": 2.2874, - "step": 1145 - }, - { - "epoch": 0.19, - "learning_rate": 0.0006581694915254238, - "loss": 2.2963, - "step": 1146 - }, - { - "epoch": 0.19, - "learning_rate": 0.0006580338983050848, - "loss": 2.2139, - "step": 1147 - }, - { - "epoch": 0.19, - "learning_rate": 0.0006578983050847458, - "loss": 2.3325, - "step": 1148 - }, - { - "epoch": 0.19, - "learning_rate": 0.0006577627118644068, - "loss": 2.3339, - "step": 1149 - }, - { - "epoch": 0.19, - "learning_rate": 0.0006576271186440678, - "loss": 2.2418, - "step": 1150 - }, - { - "epoch": 0.19, - "learning_rate": 0.0006574915254237288, - "loss": 2.2509, - "step": 1151 - }, - { - "epoch": 0.19, - "learning_rate": 0.0006573559322033899, - "loss": 2.4094, - "step": 1152 - }, - { - "epoch": 0.19, - "learning_rate": 0.0006572203389830509, - "loss": 2.2751, - "step": 1153 - }, - { - "epoch": 0.19, - "learning_rate": 0.000657084745762712, - "loss": 2.2571, - "step": 1154 - }, - { - "epoch": 0.19, - "learning_rate": 0.0006569491525423729, - "loss": 2.3831, - "step": 1155 - }, - { - "epoch": 0.19, - "learning_rate": 0.000656813559322034, - "loss": 2.3076, - "step": 1156 - }, - { - "epoch": 0.19, - "learning_rate": 0.0006566779661016949, - "loss": 2.36, - "step": 1157 - }, - { - "epoch": 0.19, - "learning_rate": 0.000656542372881356, - "loss": 2.3294, - "step": 1158 - }, - { - "epoch": 0.19, - "learning_rate": 0.000656406779661017, - "loss": 2.2241, - "step": 1159 - }, - { - "epoch": 0.19, - "learning_rate": 0.0006562711864406779, - "loss": 2.2931, - "step": 1160 - }, - { - "epoch": 0.19, - "learning_rate": 0.000656135593220339, - "loss": 2.3729, - "step": 1161 - }, - { - "epoch": 0.19, - "learning_rate": 0.000656, - "loss": 2.3227, - "step": 1162 - }, - { - "epoch": 0.19, - "learning_rate": 0.0006558644067796611, - "loss": 2.2542, - "step": 1163 - }, - { - "epoch": 0.19, - "learning_rate": 0.0006557288135593221, - "loss": 2.3173, - "step": 1164 - }, - { - "epoch": 0.19, - "learning_rate": 0.0006555932203389831, - "loss": 2.3202, - "step": 1165 - }, - { - "epoch": 0.19, - "learning_rate": 0.000655457627118644, - "loss": 2.3035, - "step": 1166 - }, - { - "epoch": 0.19, - "learning_rate": 0.0006553220338983051, - "loss": 2.2722, - "step": 1167 - }, - { - "epoch": 0.19, - "learning_rate": 0.0006551864406779661, - "loss": 2.3094, - "step": 1168 - }, - { - "epoch": 0.19, - "learning_rate": 0.0006550508474576272, - "loss": 2.3518, - "step": 1169 - }, - { - "epoch": 0.2, - "learning_rate": 0.0006549152542372882, - "loss": 2.3108, - "step": 1170 - }, - { - "epoch": 0.2, - "learning_rate": 0.0006547796610169492, - "loss": 2.3706, - "step": 1171 - }, - { - "epoch": 0.2, - "learning_rate": 0.0006546440677966101, - "loss": 2.3429, - "step": 1172 - }, - { - "epoch": 0.2, - "learning_rate": 0.0006545084745762712, - "loss": 2.3199, - "step": 1173 - }, - { - "epoch": 0.2, - "learning_rate": 0.0006543728813559322, - "loss": 2.3565, - "step": 1174 - }, - { - "epoch": 0.2, - "learning_rate": 0.0006542372881355933, - "loss": 2.3244, - "step": 1175 - }, - { - "epoch": 0.2, - "learning_rate": 0.0006541016949152543, - "loss": 2.3271, - "step": 1176 - }, - { - "epoch": 0.2, - "learning_rate": 0.0006539661016949154, - "loss": 2.2406, - "step": 1177 - }, - { - "epoch": 0.2, - "learning_rate": 0.0006538305084745763, - "loss": 2.307, - "step": 1178 - }, - { - "epoch": 0.2, - "learning_rate": 0.0006536949152542373, - "loss": 2.3372, - "step": 1179 - }, - { - "epoch": 0.2, - "learning_rate": 0.0006535593220338983, - "loss": 2.2834, - "step": 1180 - }, - { - "epoch": 0.2, - "learning_rate": 0.0006534237288135593, - "loss": 2.3696, - "step": 1181 - }, - { - "epoch": 0.2, - "learning_rate": 0.0006532881355932204, - "loss": 2.2829, - "step": 1182 - }, - { - "epoch": 0.2, - "learning_rate": 0.0006531525423728814, - "loss": 2.3215, - "step": 1183 - }, - { - "epoch": 0.2, - "learning_rate": 0.0006530169491525424, - "loss": 2.2907, - "step": 1184 - }, - { - "epoch": 0.2, - "learning_rate": 0.0006528813559322034, - "loss": 2.2163, - "step": 1185 - }, - { - "epoch": 0.2, - "learning_rate": 0.0006527457627118644, - "loss": 2.3083, - "step": 1186 - }, - { - "epoch": 0.2, - "learning_rate": 0.0006526101694915254, - "loss": 2.3645, - "step": 1187 - }, - { - "epoch": 0.2, - "learning_rate": 0.0006524745762711865, - "loss": 2.3416, - "step": 1188 - }, - { - "epoch": 0.2, - "learning_rate": 0.0006523389830508475, - "loss": 2.3527, - "step": 1189 - }, - { - "epoch": 0.2, - "learning_rate": 0.0006522033898305085, - "loss": 2.3362, - "step": 1190 - }, - { - "epoch": 0.2, - "learning_rate": 0.0006520677966101695, - "loss": 2.2793, - "step": 1191 - }, - { - "epoch": 0.2, - "learning_rate": 0.0006519322033898306, - "loss": 2.3274, - "step": 1192 - }, - { - "epoch": 0.2, - "learning_rate": 0.0006517966101694915, - "loss": 2.307, - "step": 1193 - }, - { - "epoch": 0.2, - "learning_rate": 0.0006516610169491526, - "loss": 2.247, - "step": 1194 - }, - { - "epoch": 0.2, - "learning_rate": 0.0006515254237288136, - "loss": 2.3078, - "step": 1195 - }, - { - "epoch": 0.2, - "learning_rate": 0.0006513898305084746, - "loss": 2.4114, - "step": 1196 - }, - { - "epoch": 0.2, - "learning_rate": 0.0006512542372881356, - "loss": 2.3949, - "step": 1197 - }, - { - "epoch": 0.2, - "learning_rate": 0.0006511186440677967, - "loss": 2.3268, - "step": 1198 - }, - { - "epoch": 0.2, - "learning_rate": 0.0006509830508474577, - "loss": 2.3292, - "step": 1199 - }, - { - "epoch": 0.2, - "learning_rate": 0.0006508474576271187, - "loss": 2.3196, - "step": 1200 - }, - { - "epoch": 0.2, - "learning_rate": 0.0006507118644067797, - "loss": 2.3994, - "step": 1201 - }, - { - "epoch": 0.2, - "learning_rate": 0.0006505762711864406, - "loss": 2.3012, - "step": 1202 - }, - { - "epoch": 0.2, - "learning_rate": 0.0006504406779661017, - "loss": 2.3276, - "step": 1203 - }, - { - "epoch": 0.2, - "learning_rate": 0.0006503050847457627, - "loss": 2.2992, - "step": 1204 - }, - { - "epoch": 0.2, - "learning_rate": 0.0006501694915254238, - "loss": 2.3586, - "step": 1205 - }, - { - "epoch": 0.2, - "learning_rate": 0.0006500338983050848, - "loss": 2.2607, - "step": 1206 - }, - { - "epoch": 0.2, - "learning_rate": 0.0006498983050847458, - "loss": 2.3227, - "step": 1207 - }, - { - "epoch": 0.2, - "learning_rate": 0.0006497627118644067, - "loss": 2.3455, - "step": 1208 - }, - { - "epoch": 0.2, - "learning_rate": 0.0006496271186440678, - "loss": 2.2958, - "step": 1209 - }, - { - "epoch": 0.2, - "learning_rate": 0.0006494915254237288, - "loss": 2.3023, - "step": 1210 - }, - { - "epoch": 0.2, - "learning_rate": 0.0006493559322033899, - "loss": 2.3212, - "step": 1211 - }, - { - "epoch": 0.2, - "learning_rate": 0.0006492203389830509, - "loss": 2.3067, - "step": 1212 - }, - { - "epoch": 0.2, - "learning_rate": 0.000649084745762712, - "loss": 2.3165, - "step": 1213 - }, - { - "epoch": 0.2, - "learning_rate": 0.000648949152542373, - "loss": 2.3216, - "step": 1214 - }, - { - "epoch": 0.2, - "learning_rate": 0.0006488135593220339, - "loss": 2.3001, - "step": 1215 - }, - { - "epoch": 0.2, - "learning_rate": 0.0006486779661016949, - "loss": 2.3818, - "step": 1216 - }, - { - "epoch": 0.2, - "learning_rate": 0.000648542372881356, - "loss": 2.3252, - "step": 1217 - }, - { - "epoch": 0.2, - "learning_rate": 0.000648406779661017, - "loss": 2.3161, - "step": 1218 - }, - { - "epoch": 0.2, - "learning_rate": 0.0006482711864406781, - "loss": 2.3293, - "step": 1219 - }, - { - "epoch": 0.2, - "learning_rate": 0.000648135593220339, - "loss": 2.249, - "step": 1220 - }, - { - "epoch": 0.2, - "learning_rate": 0.000648, - "loss": 2.3195, - "step": 1221 - }, - { - "epoch": 0.2, - "learning_rate": 0.000647864406779661, - "loss": 2.2336, - "step": 1222 - }, - { - "epoch": 0.2, - "learning_rate": 0.000647728813559322, - "loss": 2.2654, - "step": 1223 - }, - { - "epoch": 0.2, - "learning_rate": 0.0006475932203389831, - "loss": 2.2631, - "step": 1224 - }, - { - "epoch": 0.2, - "learning_rate": 0.0006474576271186441, - "loss": 2.3097, - "step": 1225 - }, - { - "epoch": 0.2, - "learning_rate": 0.0006473220338983051, - "loss": 2.3247, - "step": 1226 - }, - { - "epoch": 0.2, - "learning_rate": 0.0006471864406779661, - "loss": 2.3492, - "step": 1227 - }, - { - "epoch": 0.2, - "learning_rate": 0.0006470508474576271, - "loss": 2.2584, - "step": 1228 - }, - { - "epoch": 0.2, - "learning_rate": 0.0006469152542372881, - "loss": 2.2826, - "step": 1229 - }, - { - "epoch": 0.2, - "learning_rate": 0.0006467796610169492, - "loss": 2.3088, - "step": 1230 - }, - { - "epoch": 0.21, - "learning_rate": 0.0006466440677966102, - "loss": 2.3537, - "step": 1231 - }, - { - "epoch": 0.21, - "learning_rate": 0.0006465084745762712, - "loss": 2.2713, - "step": 1232 - }, - { - "epoch": 0.21, - "learning_rate": 0.0006463728813559322, - "loss": 2.2859, - "step": 1233 - }, - { - "epoch": 0.21, - "learning_rate": 0.0006462372881355933, - "loss": 2.3003, - "step": 1234 - }, - { - "epoch": 0.21, - "learning_rate": 0.0006461016949152543, - "loss": 2.2479, - "step": 1235 - }, - { - "epoch": 0.21, - "learning_rate": 0.0006459661016949153, - "loss": 2.2011, - "step": 1236 - }, - { - "epoch": 0.21, - "learning_rate": 0.0006458305084745763, - "loss": 2.3448, - "step": 1237 - }, - { - "epoch": 0.21, - "learning_rate": 0.0006456949152542374, - "loss": 2.2286, - "step": 1238 - }, - { - "epoch": 0.21, - "learning_rate": 0.0006455593220338983, - "loss": 2.3119, - "step": 1239 - }, - { - "epoch": 0.21, - "learning_rate": 0.0006454237288135594, - "loss": 2.2973, - "step": 1240 - }, - { - "epoch": 0.21, - "learning_rate": 0.0006452881355932204, - "loss": 2.2793, - "step": 1241 - }, - { - "epoch": 0.21, - "learning_rate": 0.0006451525423728814, - "loss": 2.3172, - "step": 1242 - }, - { - "epoch": 0.21, - "learning_rate": 0.0006450169491525424, - "loss": 2.3227, - "step": 1243 - }, - { - "epoch": 0.21, - "learning_rate": 0.0006448813559322033, - "loss": 2.3149, - "step": 1244 - }, - { - "epoch": 0.21, - "learning_rate": 0.0006447457627118644, - "loss": 2.3115, - "step": 1245 - }, - { - "epoch": 0.21, - "learning_rate": 0.0006446101694915254, - "loss": 2.3314, - "step": 1246 - }, - { - "epoch": 0.21, - "learning_rate": 0.0006444745762711865, - "loss": 2.3441, - "step": 1247 - }, - { - "epoch": 0.21, - "learning_rate": 0.0006443389830508475, - "loss": 2.327, - "step": 1248 - }, - { - "epoch": 0.21, - "learning_rate": 0.0006442033898305086, - "loss": 2.3801, - "step": 1249 - }, - { - "epoch": 0.21, - "learning_rate": 0.0006440677966101694, - "loss": 2.3271, - "step": 1250 - }, - { - "epoch": 0.21, - "learning_rate": 0.0006439322033898305, - "loss": 2.4136, - "step": 1251 - }, - { - "epoch": 0.21, - "learning_rate": 0.0006437966101694915, - "loss": 2.4318, - "step": 1252 - }, - { - "epoch": 0.21, - "learning_rate": 0.0006436610169491526, - "loss": 2.3519, - "step": 1253 - }, - { - "epoch": 0.21, - "learning_rate": 0.0006435254237288136, - "loss": 2.3013, - "step": 1254 - }, - { - "epoch": 0.21, - "learning_rate": 0.0006433898305084747, - "loss": 2.3699, - "step": 1255 - }, - { - "epoch": 0.21, - "learning_rate": 0.0006432542372881357, - "loss": 2.3568, - "step": 1256 - }, - { - "epoch": 0.21, - "learning_rate": 0.0006431186440677966, - "loss": 2.3688, - "step": 1257 - }, - { - "epoch": 0.21, - "learning_rate": 0.0006429830508474576, - "loss": 2.2902, - "step": 1258 - }, - { - "epoch": 0.21, - "learning_rate": 0.0006428474576271187, - "loss": 2.2984, - "step": 1259 - }, - { - "epoch": 0.21, - "learning_rate": 0.0006427118644067797, - "loss": 2.3681, - "step": 1260 - }, - { - "epoch": 0.21, - "learning_rate": 0.0006425762711864408, - "loss": 2.3983, - "step": 1261 - }, - { - "epoch": 0.21, - "learning_rate": 0.0006424406779661018, - "loss": 2.3916, - "step": 1262 - }, - { - "epoch": 0.21, - "learning_rate": 0.0006423050847457627, - "loss": 2.4085, - "step": 1263 - }, - { - "epoch": 0.21, - "learning_rate": 0.0006421694915254237, - "loss": 2.2792, - "step": 1264 - }, - { - "epoch": 0.21, - "learning_rate": 0.0006420338983050847, - "loss": 2.2792, - "step": 1265 - }, - { - "epoch": 0.21, - "learning_rate": 0.0006418983050847458, - "loss": 2.2988, - "step": 1266 - }, - { - "epoch": 0.21, - "learning_rate": 0.0006417627118644068, - "loss": 2.4064, - "step": 1267 - }, - { - "epoch": 0.21, - "learning_rate": 0.0006416271186440679, - "loss": 2.3667, - "step": 1268 - }, - { - "epoch": 0.21, - "learning_rate": 0.0006414915254237288, - "loss": 2.2916, - "step": 1269 - }, - { - "epoch": 0.21, - "learning_rate": 0.0006413559322033899, - "loss": 2.2605, - "step": 1270 - }, - { - "epoch": 0.21, - "learning_rate": 0.0006412203389830509, - "loss": 2.2376, - "step": 1271 - }, - { - "epoch": 0.21, - "learning_rate": 0.0006410847457627119, - "loss": 2.2654, - "step": 1272 - }, - { - "epoch": 0.21, - "learning_rate": 0.0006409491525423729, - "loss": 2.3242, - "step": 1273 - }, - { - "epoch": 0.21, - "learning_rate": 0.000640813559322034, - "loss": 2.2574, - "step": 1274 - }, - { - "epoch": 0.21, - "learning_rate": 0.0006406779661016949, - "loss": 2.3162, - "step": 1275 - }, - { - "epoch": 0.21, - "learning_rate": 0.000640542372881356, - "loss": 2.2778, - "step": 1276 - }, - { - "epoch": 0.21, - "learning_rate": 0.000640406779661017, - "loss": 2.3337, - "step": 1277 - }, - { - "epoch": 0.21, - "learning_rate": 0.000640271186440678, - "loss": 2.2377, - "step": 1278 - }, - { - "epoch": 0.21, - "learning_rate": 0.000640135593220339, - "loss": 2.2721, - "step": 1279 - }, - { - "epoch": 0.21, - "learning_rate": 0.00064, - "loss": 2.3133, - "step": 1280 - }, - { - "epoch": 0.21, - "learning_rate": 0.000639864406779661, - "loss": 2.3427, - "step": 1281 - }, - { - "epoch": 0.21, - "learning_rate": 0.0006397288135593221, - "loss": 2.3131, - "step": 1282 - }, - { - "epoch": 0.21, - "learning_rate": 0.0006395932203389831, - "loss": 2.2986, - "step": 1283 - }, - { - "epoch": 0.21, - "learning_rate": 0.0006394576271186442, - "loss": 2.4004, - "step": 1284 - }, - { - "epoch": 0.21, - "learning_rate": 0.0006393220338983051, - "loss": 2.2196, - "step": 1285 - }, - { - "epoch": 0.21, - "learning_rate": 0.000639186440677966, - "loss": 2.302, - "step": 1286 - }, - { - "epoch": 0.21, - "learning_rate": 0.0006390508474576271, - "loss": 2.2378, - "step": 1287 - }, - { - "epoch": 0.21, - "learning_rate": 0.0006389152542372881, - "loss": 2.3249, - "step": 1288 - }, - { - "epoch": 0.21, - "learning_rate": 0.0006387796610169492, - "loss": 2.2661, - "step": 1289 - }, - { - "epoch": 0.21, - "learning_rate": 0.0006386440677966102, - "loss": 2.3318, - "step": 1290 - }, - { - "epoch": 0.22, - "learning_rate": 0.0006385084745762713, - "loss": 2.3068, - "step": 1291 - }, - { - "epoch": 0.22, - "learning_rate": 0.0006383728813559323, - "loss": 2.261, - "step": 1292 - }, - { - "epoch": 0.22, - "learning_rate": 0.0006382372881355932, - "loss": 2.3461, - "step": 1293 - }, - { - "epoch": 0.22, - "learning_rate": 0.0006381016949152542, - "loss": 2.358, - "step": 1294 - }, - { - "epoch": 0.22, - "learning_rate": 0.0006379661016949153, - "loss": 2.2284, - "step": 1295 - }, - { - "epoch": 0.22, - "learning_rate": 0.0006378305084745763, - "loss": 2.3474, - "step": 1296 - }, - { - "epoch": 0.22, - "learning_rate": 0.0006376949152542374, - "loss": 2.3601, - "step": 1297 - }, - { - "epoch": 0.22, - "learning_rate": 0.0006375593220338984, - "loss": 2.2939, - "step": 1298 - }, - { - "epoch": 0.22, - "learning_rate": 0.0006374237288135593, - "loss": 2.2557, - "step": 1299 - }, - { - "epoch": 0.22, - "learning_rate": 0.0006372881355932203, - "loss": 2.2704, - "step": 1300 - }, - { - "epoch": 0.22, - "learning_rate": 0.0006371525423728814, - "loss": 2.359, - "step": 1301 - }, - { - "epoch": 0.22, - "learning_rate": 0.0006370169491525424, - "loss": 2.3993, - "step": 1302 - }, - { - "epoch": 0.22, - "learning_rate": 0.0006368813559322035, - "loss": 2.3153, - "step": 1303 - }, - { - "epoch": 0.22, - "learning_rate": 0.0006367457627118645, - "loss": 2.3549, - "step": 1304 - }, - { - "epoch": 0.22, - "learning_rate": 0.0006366101694915254, - "loss": 2.2743, - "step": 1305 - }, - { - "epoch": 0.22, - "learning_rate": 0.0006364745762711865, - "loss": 2.3129, - "step": 1306 - }, - { - "epoch": 0.22, - "learning_rate": 0.0006363389830508474, - "loss": 2.2605, - "step": 1307 - }, - { - "epoch": 0.22, - "learning_rate": 0.0006362033898305085, - "loss": 2.3037, - "step": 1308 - }, - { - "epoch": 0.22, - "learning_rate": 0.0006360677966101695, - "loss": 2.2953, - "step": 1309 - }, - { - "epoch": 0.22, - "learning_rate": 0.0006359322033898306, - "loss": 2.2559, - "step": 1310 - }, - { - "epoch": 0.22, - "learning_rate": 0.0006357966101694915, - "loss": 2.3459, - "step": 1311 - }, - { - "epoch": 0.22, - "learning_rate": 0.0006356610169491526, - "loss": 2.3355, - "step": 1312 - }, - { - "epoch": 0.22, - "learning_rate": 0.0006355254237288136, - "loss": 2.2384, - "step": 1313 - }, - { - "epoch": 0.22, - "learning_rate": 0.0006353898305084746, - "loss": 2.3426, - "step": 1314 - }, - { - "epoch": 0.22, - "learning_rate": 0.0006352542372881356, - "loss": 2.3322, - "step": 1315 - }, - { - "epoch": 0.22, - "learning_rate": 0.0006351186440677967, - "loss": 2.2829, - "step": 1316 - }, - { - "epoch": 0.22, - "learning_rate": 0.0006349830508474576, - "loss": 2.303, - "step": 1317 - }, - { - "epoch": 0.22, - "learning_rate": 0.0006348474576271187, - "loss": 2.3826, - "step": 1318 - }, - { - "epoch": 0.22, - "learning_rate": 0.0006347118644067797, - "loss": 2.2523, - "step": 1319 - }, - { - "epoch": 0.22, - "learning_rate": 0.0006345762711864408, - "loss": 2.3038, - "step": 1320 - }, - { - "epoch": 0.22, - "learning_rate": 0.0006344406779661017, - "loss": 2.3812, - "step": 1321 - }, - { - "epoch": 0.22, - "learning_rate": 0.0006343050847457628, - "loss": 2.2953, - "step": 1322 - }, - { - "epoch": 0.22, - "learning_rate": 0.0006341694915254237, - "loss": 2.2604, - "step": 1323 - }, - { - "epoch": 0.22, - "learning_rate": 0.0006340338983050848, - "loss": 2.3989, - "step": 1324 - }, - { - "epoch": 0.22, - "learning_rate": 0.0006338983050847458, - "loss": 2.2752, - "step": 1325 - }, - { - "epoch": 0.22, - "learning_rate": 0.0006337627118644068, - "loss": 2.425, - "step": 1326 - }, - { - "epoch": 0.22, - "learning_rate": 0.0006336271186440679, - "loss": 2.2434, - "step": 1327 - }, - { - "epoch": 0.22, - "learning_rate": 0.0006334915254237289, - "loss": 2.2774, - "step": 1328 - }, - { - "epoch": 0.22, - "learning_rate": 0.0006333559322033898, - "loss": 2.3416, - "step": 1329 - }, - { - "epoch": 0.22, - "learning_rate": 0.0006332203389830508, - "loss": 2.2928, - "step": 1330 - }, - { - "epoch": 0.22, - "learning_rate": 0.0006330847457627119, - "loss": 2.3543, - "step": 1331 - }, - { - "epoch": 0.22, - "learning_rate": 0.0006329491525423729, - "loss": 2.3618, - "step": 1332 - }, - { - "epoch": 0.22, - "learning_rate": 0.000632813559322034, - "loss": 2.2387, - "step": 1333 - }, - { - "epoch": 0.22, - "learning_rate": 0.000632677966101695, - "loss": 2.2274, - "step": 1334 - }, - { - "epoch": 0.22, - "learning_rate": 0.0006325423728813559, - "loss": 2.2839, - "step": 1335 - }, - { - "epoch": 0.22, - "learning_rate": 0.0006324067796610169, - "loss": 2.2827, - "step": 1336 - }, - { - "epoch": 0.22, - "learning_rate": 0.000632271186440678, - "loss": 2.2796, - "step": 1337 - }, - { - "epoch": 0.22, - "learning_rate": 0.000632135593220339, - "loss": 2.3939, - "step": 1338 - }, - { - "epoch": 0.22, - "learning_rate": 0.0006320000000000001, - "loss": 2.283, - "step": 1339 - }, - { - "epoch": 0.22, - "learning_rate": 0.0006318644067796611, - "loss": 2.2701, - "step": 1340 - }, - { - "epoch": 0.22, - "learning_rate": 0.0006317288135593221, - "loss": 2.2915, - "step": 1341 - }, - { - "epoch": 0.22, - "learning_rate": 0.0006315932203389831, - "loss": 2.3448, - "step": 1342 - }, - { - "epoch": 0.22, - "learning_rate": 0.0006314576271186441, - "loss": 2.3078, - "step": 1343 - }, - { - "epoch": 0.22, - "learning_rate": 0.0006313220338983051, - "loss": 2.2813, - "step": 1344 - }, - { - "epoch": 0.22, - "learning_rate": 0.0006311864406779662, - "loss": 2.2869, - "step": 1345 - }, - { - "epoch": 0.22, - "learning_rate": 0.0006310508474576272, - "loss": 2.2379, - "step": 1346 - }, - { - "epoch": 0.22, - "learning_rate": 0.0006309152542372881, - "loss": 2.2619, - "step": 1347 - }, - { - "epoch": 0.22, - "learning_rate": 0.0006307796610169492, - "loss": 2.2345, - "step": 1348 - }, - { - "epoch": 0.22, - "learning_rate": 0.0006306440677966102, - "loss": 2.2686, - "step": 1349 - }, - { - "epoch": 0.23, - "learning_rate": 0.0006305084745762712, - "loss": 2.2314, - "step": 1350 - }, - { - "epoch": 0.23, - "learning_rate": 0.0006303728813559322, - "loss": 2.383, - "step": 1351 - }, - { - "epoch": 0.23, - "learning_rate": 0.0006302372881355933, - "loss": 2.3418, - "step": 1352 - }, - { - "epoch": 0.23, - "learning_rate": 0.0006301016949152542, - "loss": 2.2665, - "step": 1353 - }, - { - "epoch": 0.23, - "learning_rate": 0.0006299661016949153, - "loss": 2.3437, - "step": 1354 - }, - { - "epoch": 0.23, - "learning_rate": 0.0006298305084745763, - "loss": 2.3268, - "step": 1355 - }, - { - "epoch": 0.23, - "learning_rate": 0.0006296949152542373, - "loss": 2.293, - "step": 1356 - }, - { - "epoch": 0.23, - "learning_rate": 0.0006295593220338983, - "loss": 2.3424, - "step": 1357 - }, - { - "epoch": 0.23, - "learning_rate": 0.0006294237288135594, - "loss": 2.3127, - "step": 1358 - }, - { - "epoch": 0.23, - "learning_rate": 0.0006292881355932203, - "loss": 2.1553, - "step": 1359 - }, - { - "epoch": 0.23, - "learning_rate": 0.0006291525423728814, - "loss": 2.2669, - "step": 1360 - }, - { - "epoch": 0.23, - "learning_rate": 0.0006290169491525424, - "loss": 2.3763, - "step": 1361 - }, - { - "epoch": 0.23, - "learning_rate": 0.0006288813559322035, - "loss": 2.2552, - "step": 1362 - }, - { - "epoch": 0.23, - "learning_rate": 0.0006287457627118645, - "loss": 2.2831, - "step": 1363 - }, - { - "epoch": 0.23, - "learning_rate": 0.0006286101694915255, - "loss": 2.2859, - "step": 1364 - }, - { - "epoch": 0.23, - "learning_rate": 0.0006284745762711864, - "loss": 2.1963, - "step": 1365 - }, - { - "epoch": 0.23, - "learning_rate": 0.0006283389830508475, - "loss": 2.3546, - "step": 1366 - }, - { - "epoch": 0.23, - "learning_rate": 0.0006282033898305085, - "loss": 2.2664, - "step": 1367 - }, - { - "epoch": 0.23, - "learning_rate": 0.0006280677966101695, - "loss": 2.2598, - "step": 1368 - }, - { - "epoch": 0.23, - "learning_rate": 0.0006279322033898306, - "loss": 2.3386, - "step": 1369 - }, - { - "epoch": 0.23, - "learning_rate": 0.0006277966101694916, - "loss": 2.2986, - "step": 1370 - }, - { - "epoch": 0.23, - "learning_rate": 0.0006276610169491525, - "loss": 2.2986, - "step": 1371 - }, - { - "epoch": 0.23, - "learning_rate": 0.0006275254237288135, - "loss": 2.3169, - "step": 1372 - }, - { - "epoch": 0.23, - "learning_rate": 0.0006273898305084746, - "loss": 2.1821, - "step": 1373 - }, - { - "epoch": 0.23, - "learning_rate": 0.0006272542372881356, - "loss": 2.2721, - "step": 1374 - }, - { - "epoch": 0.23, - "learning_rate": 0.0006271186440677967, - "loss": 2.228, - "step": 1375 - }, - { - "epoch": 0.23, - "learning_rate": 0.0006269830508474577, - "loss": 2.2645, - "step": 1376 - }, - { - "epoch": 0.23, - "learning_rate": 0.0006268474576271187, - "loss": 2.2406, - "step": 1377 - }, - { - "epoch": 0.23, - "learning_rate": 0.0006267118644067796, - "loss": 2.283, - "step": 1378 - }, - { - "epoch": 0.23, - "learning_rate": 0.0006265762711864407, - "loss": 2.3808, - "step": 1379 - }, - { - "epoch": 0.23, - "learning_rate": 0.0006264406779661017, - "loss": 2.254, - "step": 1380 - }, - { - "epoch": 0.23, - "learning_rate": 0.0006263050847457628, - "loss": 2.2514, - "step": 1381 - }, - { - "epoch": 0.23, - "learning_rate": 0.0006261694915254238, - "loss": 2.2144, - "step": 1382 - }, - { - "epoch": 0.23, - "learning_rate": 0.0006260338983050848, - "loss": 2.291, - "step": 1383 - }, - { - "epoch": 0.23, - "learning_rate": 0.0006258983050847458, - "loss": 2.3093, - "step": 1384 - }, - { - "epoch": 0.23, - "learning_rate": 0.0006257627118644068, - "loss": 2.3434, - "step": 1385 - }, - { - "epoch": 0.23, - "learning_rate": 0.0006256271186440678, - "loss": 2.3041, - "step": 1386 - }, - { - "epoch": 0.23, - "learning_rate": 0.0006254915254237289, - "loss": 2.2687, - "step": 1387 - }, - { - "epoch": 0.23, - "learning_rate": 0.0006253559322033899, - "loss": 2.2695, - "step": 1388 - }, - { - "epoch": 0.23, - "learning_rate": 0.0006252203389830508, - "loss": 2.2774, - "step": 1389 - }, - { - "epoch": 0.23, - "learning_rate": 0.0006250847457627119, - "loss": 2.3294, - "step": 1390 - }, - { - "epoch": 0.23, - "learning_rate": 0.0006249491525423729, - "loss": 2.3055, - "step": 1391 - }, - { - "epoch": 0.23, - "learning_rate": 0.0006248135593220339, - "loss": 2.2955, - "step": 1392 - }, - { - "epoch": 0.23, - "learning_rate": 0.0006246779661016949, - "loss": 2.3546, - "step": 1393 - }, - { - "epoch": 0.23, - "learning_rate": 0.000624542372881356, - "loss": 2.2937, - "step": 1394 - }, - { - "epoch": 0.23, - "learning_rate": 0.0006244067796610169, - "loss": 2.2788, - "step": 1395 - }, - { - "epoch": 0.23, - "learning_rate": 0.000624271186440678, - "loss": 2.3287, - "step": 1396 - }, - { - "epoch": 0.23, - "learning_rate": 0.000624135593220339, - "loss": 2.2856, - "step": 1397 - }, - { - "epoch": 0.23, - "learning_rate": 0.0006240000000000001, - "loss": 2.3113, - "step": 1398 - }, - { - "epoch": 0.23, - "learning_rate": 0.0006238644067796611, - "loss": 2.2799, - "step": 1399 - }, - { - "epoch": 0.23, - "learning_rate": 0.0006237288135593221, - "loss": 2.3728, - "step": 1400 - }, - { - "epoch": 0.23, - "learning_rate": 0.000623593220338983, - "loss": 2.259, - "step": 1401 - }, - { - "epoch": 0.23, - "learning_rate": 0.0006234576271186441, - "loss": 2.3014, - "step": 1402 - }, - { - "epoch": 0.23, - "learning_rate": 0.0006233220338983051, - "loss": 2.311, - "step": 1403 - }, - { - "epoch": 0.23, - "learning_rate": 0.0006231864406779662, - "loss": 2.368, - "step": 1404 - }, - { - "epoch": 0.23, - "learning_rate": 0.0006230508474576272, - "loss": 2.2718, - "step": 1405 - }, - { - "epoch": 0.23, - "learning_rate": 0.0006229152542372882, - "loss": 2.4123, - "step": 1406 - }, - { - "epoch": 0.23, - "learning_rate": 0.0006227796610169491, - "loss": 2.2875, - "step": 1407 - }, - { - "epoch": 0.23, - "learning_rate": 0.0006226440677966102, - "loss": 2.3094, - "step": 1408 - }, - { - "epoch": 0.23, - "learning_rate": 0.0006225084745762712, - "loss": 2.2793, - "step": 1409 - }, - { - "epoch": 0.23, - "learning_rate": 0.0006223728813559322, - "loss": 2.3255, - "step": 1410 - }, - { - "epoch": 0.24, - "learning_rate": 0.0006222372881355933, - "loss": 2.2764, - "step": 1411 - }, - { - "epoch": 0.24, - "learning_rate": 0.0006221016949152543, - "loss": 2.3127, - "step": 1412 - }, - { - "epoch": 0.24, - "learning_rate": 0.0006219661016949152, - "loss": 2.2667, - "step": 1413 - }, - { - "epoch": 0.24, - "learning_rate": 0.0006218305084745762, - "loss": 2.2738, - "step": 1414 - }, - { - "epoch": 0.24, - "learning_rate": 0.0006216949152542373, - "loss": 2.2272, - "step": 1415 - }, - { - "epoch": 0.24, - "learning_rate": 0.0006215593220338983, - "loss": 2.2777, - "step": 1416 - }, - { - "epoch": 0.24, - "learning_rate": 0.0006214237288135594, - "loss": 2.3331, - "step": 1417 - }, - { - "epoch": 0.24, - "learning_rate": 0.0006212881355932204, - "loss": 2.2846, - "step": 1418 - }, - { - "epoch": 0.24, - "learning_rate": 0.0006211525423728814, - "loss": 2.2315, - "step": 1419 - }, - { - "epoch": 0.24, - "learning_rate": 0.0006210169491525424, - "loss": 2.3674, - "step": 1420 - }, - { - "epoch": 0.24, - "learning_rate": 0.0006208813559322034, - "loss": 2.4168, - "step": 1421 - }, - { - "epoch": 0.24, - "learning_rate": 0.0006207457627118644, - "loss": 2.2196, - "step": 1422 - }, - { - "epoch": 0.24, - "learning_rate": 0.0006206101694915255, - "loss": 2.3477, - "step": 1423 - }, - { - "epoch": 0.24, - "learning_rate": 0.0006204745762711865, - "loss": 2.3572, - "step": 1424 - }, - { - "epoch": 0.24, - "learning_rate": 0.0006203389830508475, - "loss": 2.4283, - "step": 1425 - }, - { - "epoch": 0.24, - "learning_rate": 0.0006202033898305085, - "loss": 2.4598, - "step": 1426 - }, - { - "epoch": 0.24, - "learning_rate": 0.0006200677966101695, - "loss": 2.3063, - "step": 1427 - }, - { - "epoch": 0.24, - "learning_rate": 0.0006199322033898305, - "loss": 2.3396, - "step": 1428 - }, - { - "epoch": 0.24, - "learning_rate": 0.0006197966101694916, - "loss": 2.3948, - "step": 1429 - }, - { - "epoch": 0.24, - "learning_rate": 0.0006196610169491526, - "loss": 2.2976, - "step": 1430 - }, - { - "epoch": 0.24, - "learning_rate": 0.0006195254237288135, - "loss": 2.3941, - "step": 1431 - }, - { - "epoch": 0.24, - "learning_rate": 0.0006193898305084746, - "loss": 2.3908, - "step": 1432 - }, - { - "epoch": 0.24, - "learning_rate": 0.0006192542372881356, - "loss": 2.3348, - "step": 1433 - }, - { - "epoch": 0.24, - "learning_rate": 0.0006191186440677967, - "loss": 2.2954, - "step": 1434 - }, - { - "epoch": 0.24, - "learning_rate": 0.0006189830508474576, - "loss": 2.3364, - "step": 1435 - }, - { - "epoch": 0.24, - "learning_rate": 0.0006188474576271187, - "loss": 2.3149, - "step": 1436 - }, - { - "epoch": 0.24, - "learning_rate": 0.0006187118644067796, - "loss": 2.3592, - "step": 1437 - }, - { - "epoch": 0.24, - "learning_rate": 0.0006185762711864407, - "loss": 2.3256, - "step": 1438 - }, - { - "epoch": 0.24, - "learning_rate": 0.0006184406779661017, - "loss": 2.2592, - "step": 1439 - }, - { - "epoch": 0.24, - "learning_rate": 0.0006183050847457628, - "loss": 2.3998, - "step": 1440 - }, - { - "epoch": 0.24, - "learning_rate": 0.0006181694915254238, - "loss": 2.3471, - "step": 1441 - }, - { - "epoch": 0.24, - "learning_rate": 0.0006180338983050848, - "loss": 2.3034, - "step": 1442 - }, - { - "epoch": 0.24, - "learning_rate": 0.0006178983050847457, - "loss": 2.3124, - "step": 1443 - }, - { - "epoch": 0.24, - "learning_rate": 0.0006177627118644068, - "loss": 2.3166, - "step": 1444 - }, - { - "epoch": 0.24, - "learning_rate": 0.0006176271186440678, - "loss": 2.2621, - "step": 1445 - }, - { - "epoch": 0.24, - "learning_rate": 0.0006174915254237289, - "loss": 2.4386, - "step": 1446 - }, - { - "epoch": 0.24, - "learning_rate": 0.0006173559322033899, - "loss": 2.2886, - "step": 1447 - }, - { - "epoch": 0.24, - "learning_rate": 0.000617220338983051, - "loss": 2.3839, - "step": 1448 - }, - { - "epoch": 0.24, - "learning_rate": 0.0006170847457627118, - "loss": 2.2339, - "step": 1449 - }, - { - "epoch": 0.24, - "learning_rate": 0.0006169491525423729, - "loss": 2.3, - "step": 1450 - }, - { - "epoch": 0.24, - "learning_rate": 0.0006168135593220339, - "loss": 2.3163, - "step": 1451 - }, - { - "epoch": 0.24, - "learning_rate": 0.0006166779661016949, - "loss": 2.4092, - "step": 1452 - }, - { - "epoch": 0.24, - "learning_rate": 0.000616542372881356, - "loss": 2.3008, - "step": 1453 - }, - { - "epoch": 0.24, - "learning_rate": 0.000616406779661017, - "loss": 2.3012, - "step": 1454 - }, - { - "epoch": 0.24, - "learning_rate": 0.000616271186440678, - "loss": 2.3081, - "step": 1455 - }, - { - "epoch": 0.24, - "learning_rate": 0.000616135593220339, - "loss": 2.3407, - "step": 1456 - }, - { - "epoch": 0.24, - "learning_rate": 0.000616, - "loss": 2.2854, - "step": 1457 - }, - { - "epoch": 0.24, - "learning_rate": 0.000615864406779661, - "loss": 2.2489, - "step": 1458 - }, - { - "epoch": 0.24, - "learning_rate": 0.0006157288135593221, - "loss": 2.2157, - "step": 1459 - }, - { - "epoch": 0.24, - "learning_rate": 0.0006155932203389831, - "loss": 2.3015, - "step": 1460 - }, - { - "epoch": 0.24, - "learning_rate": 0.0006154576271186442, - "loss": 2.2659, - "step": 1461 - }, - { - "epoch": 0.24, - "learning_rate": 0.0006153220338983051, - "loss": 2.3238, - "step": 1462 - }, - { - "epoch": 0.24, - "learning_rate": 0.0006151864406779661, - "loss": 2.2884, - "step": 1463 - }, - { - "epoch": 0.24, - "learning_rate": 0.0006150508474576271, - "loss": 2.3412, - "step": 1464 - }, - { - "epoch": 0.24, - "learning_rate": 0.0006149152542372882, - "loss": 2.3517, - "step": 1465 - }, - { - "epoch": 0.24, - "learning_rate": 0.0006147796610169492, - "loss": 2.2742, - "step": 1466 - }, - { - "epoch": 0.24, - "learning_rate": 0.0006146440677966103, - "loss": 2.2885, - "step": 1467 - }, - { - "epoch": 0.24, - "learning_rate": 0.0006145084745762712, - "loss": 2.3107, - "step": 1468 - }, - { - "epoch": 0.24, - "learning_rate": 0.0006143728813559323, - "loss": 2.2957, - "step": 1469 - }, - { - "epoch": 0.24, - "learning_rate": 0.0006142372881355933, - "loss": 2.2977, - "step": 1470 - }, - { - "epoch": 0.25, - "learning_rate": 0.0006141016949152543, - "loss": 2.2358, - "step": 1471 - }, - { - "epoch": 0.25, - "learning_rate": 0.0006139661016949153, - "loss": 2.3502, - "step": 1472 - }, - { - "epoch": 0.25, - "learning_rate": 0.0006138305084745762, - "loss": 2.3496, - "step": 1473 - }, - { - "epoch": 0.25, - "learning_rate": 0.0006136949152542373, - "loss": 2.3138, - "step": 1474 - }, - { - "epoch": 0.25, - "learning_rate": 0.0006135593220338983, - "loss": 2.2705, - "step": 1475 - }, - { - "epoch": 0.25, - "learning_rate": 0.0006134237288135594, - "loss": 2.3264, - "step": 1476 - }, - { - "epoch": 0.25, - "learning_rate": 0.0006132881355932204, - "loss": 2.2809, - "step": 1477 - }, - { - "epoch": 0.25, - "learning_rate": 0.0006131525423728814, - "loss": 2.2642, - "step": 1478 - }, - { - "epoch": 0.25, - "learning_rate": 0.0006130169491525423, - "loss": 2.3581, - "step": 1479 - }, - { - "epoch": 0.25, - "learning_rate": 0.0006128813559322034, - "loss": 2.2181, - "step": 1480 - }, - { - "epoch": 0.25, - "learning_rate": 0.0006127457627118644, - "loss": 2.3564, - "step": 1481 - }, - { - "epoch": 0.25, - "learning_rate": 0.0006126101694915255, - "loss": 2.2418, - "step": 1482 - }, - { - "epoch": 0.25, - "learning_rate": 0.0006124745762711865, - "loss": 2.2574, - "step": 1483 - }, - { - "epoch": 0.25, - "learning_rate": 0.0006123389830508475, - "loss": 2.2482, - "step": 1484 - }, - { - "epoch": 0.25, - "learning_rate": 0.0006122033898305084, - "loss": 2.3216, - "step": 1485 - }, - { - "epoch": 0.25, - "learning_rate": 0.0006120677966101695, - "loss": 2.3377, - "step": 1486 - }, - { - "epoch": 0.25, - "learning_rate": 0.0006119322033898305, - "loss": 2.2732, - "step": 1487 - }, - { - "epoch": 0.25, - "learning_rate": 0.0006117966101694916, - "loss": 2.3531, - "step": 1488 - }, - { - "epoch": 0.25, - "learning_rate": 0.0006116610169491526, - "loss": 2.2955, - "step": 1489 - }, - { - "epoch": 0.25, - "learning_rate": 0.0006115254237288137, - "loss": 2.2086, - "step": 1490 - }, - { - "epoch": 0.25, - "learning_rate": 0.0006113898305084747, - "loss": 2.2781, - "step": 1491 - }, - { - "epoch": 0.25, - "learning_rate": 0.0006112542372881356, - "loss": 2.3606, - "step": 1492 - }, - { - "epoch": 0.25, - "learning_rate": 0.0006111186440677966, - "loss": 2.2835, - "step": 1493 - }, - { - "epoch": 0.25, - "learning_rate": 0.0006109830508474576, - "loss": 2.2461, - "step": 1494 - }, - { - "epoch": 0.25, - "learning_rate": 0.0006108474576271187, - "loss": 2.3081, - "step": 1495 - }, - { - "epoch": 0.25, - "learning_rate": 0.0006107118644067797, - "loss": 2.3191, - "step": 1496 - }, - { - "epoch": 0.25, - "learning_rate": 0.0006105762711864408, - "loss": 2.3301, - "step": 1497 - }, - { - "epoch": 0.25, - "learning_rate": 0.0006104406779661017, - "loss": 2.2949, - "step": 1498 - }, - { - "epoch": 0.25, - "learning_rate": 0.0006103050847457627, - "loss": 2.2728, - "step": 1499 - }, - { - "epoch": 0.25, - "learning_rate": 0.0006101694915254237, - "loss": 2.2764, - "step": 1500 - }, - { - "epoch": 0.25, - "eval_gen_len": 19.0, - "eval_loss": 2.148979425430298, - "eval_rouge1": 0.2605, - "eval_rouge2": 0.0799, - "eval_rougeL": 0.2149, - "eval_rougeLsum": 0.2145, - "eval_runtime": 21.1486, - "eval_samples_per_second": 2.364, - "eval_steps_per_second": 0.331, - "step": 1500 - }, - { - "epoch": 0.25, - "learning_rate": 0.0006100338983050848, - "loss": 2.2922, - "step": 1501 - }, - { - "epoch": 0.25, - "learning_rate": 0.0006098983050847458, - "loss": 2.3739, - "step": 1502 - }, - { - "epoch": 0.25, - "learning_rate": 0.0006097627118644069, - "loss": 2.2985, - "step": 1503 - }, - { - "epoch": 0.25, - "learning_rate": 0.0006096271186440678, - "loss": 2.2795, - "step": 1504 - }, - { - "epoch": 0.25, - "learning_rate": 0.0006094915254237289, - "loss": 2.3223, - "step": 1505 - }, - { - "epoch": 0.25, - "learning_rate": 0.0006093559322033898, - "loss": 2.2481, - "step": 1506 - }, - { - "epoch": 0.25, - "learning_rate": 0.0006092203389830509, - "loss": 2.3285, - "step": 1507 - }, - { - "epoch": 0.25, - "learning_rate": 0.0006090847457627119, - "loss": 2.284, - "step": 1508 - }, - { - "epoch": 0.25, - "learning_rate": 0.000608949152542373, - "loss": 2.2725, - "step": 1509 - }, - { - "epoch": 0.25, - "learning_rate": 0.0006088135593220339, - "loss": 2.336, - "step": 1510 - }, - { - "epoch": 0.25, - "learning_rate": 0.000608677966101695, - "loss": 2.1859, - "step": 1511 - }, - { - "epoch": 0.25, - "learning_rate": 0.000608542372881356, - "loss": 2.2839, - "step": 1512 - }, - { - "epoch": 0.25, - "learning_rate": 0.000608406779661017, - "loss": 2.3752, - "step": 1513 - }, - { - "epoch": 0.25, - "learning_rate": 0.000608271186440678, - "loss": 2.2914, - "step": 1514 - }, - { - "epoch": 0.25, - "learning_rate": 0.0006081355932203389, - "loss": 2.3108, - "step": 1515 - }, - { - "epoch": 0.25, - "learning_rate": 0.000608, - "loss": 2.3215, - "step": 1516 - }, - { - "epoch": 0.25, - "learning_rate": 0.000607864406779661, - "loss": 2.2131, - "step": 1517 - }, - { - "epoch": 0.25, - "learning_rate": 0.0006077288135593221, - "loss": 2.2551, - "step": 1518 - }, - { - "epoch": 0.25, - "learning_rate": 0.0006075932203389831, - "loss": 2.2387, - "step": 1519 - }, - { - "epoch": 0.25, - "learning_rate": 0.0006074576271186441, - "loss": 2.2188, - "step": 1520 - }, - { - "epoch": 0.25, - "learning_rate": 0.000607322033898305, - "loss": 2.3111, - "step": 1521 - }, - { - "epoch": 0.25, - "learning_rate": 0.0006071864406779661, - "loss": 2.3208, - "step": 1522 - }, - { - "epoch": 0.25, - "learning_rate": 0.0006070508474576271, - "loss": 2.3296, - "step": 1523 - }, - { - "epoch": 0.25, - "learning_rate": 0.0006069152542372882, - "loss": 2.2573, - "step": 1524 - }, - { - "epoch": 0.25, - "learning_rate": 0.0006067796610169492, - "loss": 2.291, - "step": 1525 - }, - { - "epoch": 0.25, - "learning_rate": 0.0006066440677966103, - "loss": 2.2959, - "step": 1526 - }, - { - "epoch": 0.25, - "learning_rate": 0.0006065084745762713, - "loss": 2.2441, - "step": 1527 - }, - { - "epoch": 0.25, - "learning_rate": 0.0006063728813559322, - "loss": 2.2261, - "step": 1528 - }, - { - "epoch": 0.25, - "learning_rate": 0.0006062372881355932, - "loss": 2.3545, - "step": 1529 - }, - { - "epoch": 0.26, - "learning_rate": 0.0006061016949152543, - "loss": 2.3121, - "step": 1530 - }, - { - "epoch": 0.26, - "learning_rate": 0.0006059661016949153, - "loss": 2.2171, - "step": 1531 - }, - { - "epoch": 0.26, - "learning_rate": 0.0006058305084745764, - "loss": 2.2224, - "step": 1532 - }, - { - "epoch": 0.26, - "learning_rate": 0.0006056949152542374, - "loss": 2.2228, - "step": 1533 - }, - { - "epoch": 0.26, - "learning_rate": 0.0006055593220338983, - "loss": 2.3128, - "step": 1534 - }, - { - "epoch": 0.26, - "learning_rate": 0.0006054237288135593, - "loss": 2.2306, - "step": 1535 - }, - { - "epoch": 0.26, - "learning_rate": 0.0006052881355932203, - "loss": 2.2712, - "step": 1536 - }, - { - "epoch": 0.26, - "learning_rate": 0.0006051525423728814, - "loss": 2.2482, - "step": 1537 - }, - { - "epoch": 0.26, - "learning_rate": 0.0006050169491525424, - "loss": 2.2845, - "step": 1538 - }, - { - "epoch": 0.26, - "learning_rate": 0.0006048813559322035, - "loss": 2.2315, - "step": 1539 - }, - { - "epoch": 0.26, - "learning_rate": 0.0006047457627118644, - "loss": 2.3159, - "step": 1540 - }, - { - "epoch": 0.26, - "learning_rate": 0.0006046101694915254, - "loss": 2.2382, - "step": 1541 - }, - { - "epoch": 0.26, - "learning_rate": 0.0006044745762711864, - "loss": 2.292, - "step": 1542 - }, - { - "epoch": 0.26, - "learning_rate": 0.0006043389830508475, - "loss": 2.361, - "step": 1543 - }, - { - "epoch": 0.26, - "learning_rate": 0.0006042033898305085, - "loss": 2.2447, - "step": 1544 - }, - { - "epoch": 0.26, - "learning_rate": 0.0006040677966101696, - "loss": 2.2829, - "step": 1545 - }, - { - "epoch": 0.26, - "learning_rate": 0.0006039322033898305, - "loss": 2.2398, - "step": 1546 - }, - { - "epoch": 0.26, - "learning_rate": 0.0006037966101694916, - "loss": 2.3102, - "step": 1547 - }, - { - "epoch": 0.26, - "learning_rate": 0.0006036610169491526, - "loss": 2.2573, - "step": 1548 - }, - { - "epoch": 0.26, - "learning_rate": 0.0006035254237288136, - "loss": 2.2714, - "step": 1549 - }, - { - "epoch": 0.26, - "learning_rate": 0.0006033898305084746, - "loss": 2.3052, - "step": 1550 - }, - { - "epoch": 0.26, - "learning_rate": 0.0006032542372881357, - "loss": 2.362, - "step": 1551 - }, - { - "epoch": 0.26, - "learning_rate": 0.0006031186440677966, - "loss": 2.2517, - "step": 1552 - }, - { - "epoch": 0.26, - "learning_rate": 0.0006029830508474577, - "loss": 2.2917, - "step": 1553 - }, - { - "epoch": 0.26, - "learning_rate": 0.0006028474576271187, - "loss": 2.2695, - "step": 1554 - }, - { - "epoch": 0.26, - "learning_rate": 0.0006027118644067797, - "loss": 2.275, - "step": 1555 - }, - { - "epoch": 0.26, - "learning_rate": 0.0006025762711864407, - "loss": 2.3191, - "step": 1556 - }, - { - "epoch": 0.26, - "learning_rate": 0.0006024406779661016, - "loss": 2.2158, - "step": 1557 - }, - { - "epoch": 0.26, - "learning_rate": 0.0006023050847457627, - "loss": 2.2693, - "step": 1558 - }, - { - "epoch": 0.26, - "learning_rate": 0.0006021694915254237, - "loss": 2.2978, - "step": 1559 - }, - { - "epoch": 0.26, - "learning_rate": 0.0006020338983050848, - "loss": 2.2901, - "step": 1560 - }, - { - "epoch": 0.26, - "learning_rate": 0.0006018983050847458, - "loss": 2.2971, - "step": 1561 - }, - { - "epoch": 0.26, - "learning_rate": 0.0006017627118644069, - "loss": 2.3085, - "step": 1562 - }, - { - "epoch": 0.26, - "learning_rate": 0.0006016271186440677, - "loss": 2.1701, - "step": 1563 - }, - { - "epoch": 0.26, - "learning_rate": 0.0006014915254237288, - "loss": 2.2849, - "step": 1564 - }, - { - "epoch": 0.26, - "learning_rate": 0.0006013559322033898, - "loss": 2.3691, - "step": 1565 - }, - { - "epoch": 0.26, - "learning_rate": 0.0006012203389830509, - "loss": 2.2521, - "step": 1566 - }, - { - "epoch": 0.26, - "learning_rate": 0.0006010847457627119, - "loss": 2.287, - "step": 1567 - }, - { - "epoch": 0.26, - "learning_rate": 0.000600949152542373, - "loss": 2.3416, - "step": 1568 - }, - { - "epoch": 0.26, - "learning_rate": 0.000600813559322034, - "loss": 2.2519, - "step": 1569 - }, - { - "epoch": 0.26, - "learning_rate": 0.0006006779661016949, - "loss": 2.3172, - "step": 1570 - }, - { - "epoch": 0.26, - "learning_rate": 0.0006005423728813559, - "loss": 2.2536, - "step": 1571 - }, - { - "epoch": 0.26, - "learning_rate": 0.000600406779661017, - "loss": 2.2497, - "step": 1572 - }, - { - "epoch": 0.26, - "learning_rate": 0.000600271186440678, - "loss": 2.2123, - "step": 1573 - }, - { - "epoch": 0.26, - "learning_rate": 0.0006001355932203391, - "loss": 2.2607, - "step": 1574 - }, - { - "epoch": 0.26, - "learning_rate": 0.0006000000000000001, - "loss": 2.2425, - "step": 1575 - }, - { - "epoch": 0.26, - "learning_rate": 0.000599864406779661, - "loss": 2.303, - "step": 1576 - }, - { - "epoch": 0.26, - "learning_rate": 0.000599728813559322, - "loss": 2.1978, - "step": 1577 - }, - { - "epoch": 0.26, - "learning_rate": 0.000599593220338983, - "loss": 2.3545, - "step": 1578 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005994576271186441, - "loss": 2.2135, - "step": 1579 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005993220338983051, - "loss": 2.2175, - "step": 1580 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005991864406779662, - "loss": 2.1776, - "step": 1581 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005990508474576271, - "loss": 2.2385, - "step": 1582 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005989152542372882, - "loss": 2.2406, - "step": 1583 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005987796610169492, - "loss": 2.2369, - "step": 1584 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005986440677966102, - "loss": 2.259, - "step": 1585 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005985084745762712, - "loss": 2.1827, - "step": 1586 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005983728813559323, - "loss": 2.2983, - "step": 1587 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005982372881355932, - "loss": 2.221, - "step": 1588 - }, - { - "epoch": 0.26, - "learning_rate": 0.0005981016949152543, - "loss": 2.2862, - "step": 1589 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005979661016949153, - "loss": 2.2694, - "step": 1590 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005978305084745763, - "loss": 2.3226, - "step": 1591 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005976949152542373, - "loss": 2.3082, - "step": 1592 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005975593220338984, - "loss": 2.2479, - "step": 1593 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005974237288135593, - "loss": 2.2975, - "step": 1594 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005972881355932204, - "loss": 2.2274, - "step": 1595 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005971525423728814, - "loss": 2.2348, - "step": 1596 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005970169491525424, - "loss": 2.1944, - "step": 1597 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005968813559322035, - "loss": 2.2425, - "step": 1598 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005967457627118643, - "loss": 2.2994, - "step": 1599 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005966101694915254, - "loss": 2.2142, - "step": 1600 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005964745762711864, - "loss": 2.3281, - "step": 1601 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005963389830508475, - "loss": 2.2567, - "step": 1602 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005962033898305085, - "loss": 2.3205, - "step": 1603 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005960677966101696, - "loss": 2.2151, - "step": 1604 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005959322033898306, - "loss": 2.2991, - "step": 1605 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005957966101694915, - "loss": 2.2718, - "step": 1606 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005956610169491525, - "loss": 2.2693, - "step": 1607 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005955254237288136, - "loss": 2.2874, - "step": 1608 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005953898305084746, - "loss": 2.2982, - "step": 1609 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005952542372881357, - "loss": 2.3516, - "step": 1610 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005951186440677967, - "loss": 2.3024, - "step": 1611 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005949830508474576, - "loss": 2.2145, - "step": 1612 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005948474576271186, - "loss": 2.2724, - "step": 1613 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005947118644067797, - "loss": 2.3571, - "step": 1614 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005945762711864407, - "loss": 2.2689, - "step": 1615 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005944406779661018, - "loss": 2.1719, - "step": 1616 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005943050847457628, - "loss": 2.1941, - "step": 1617 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005941694915254237, - "loss": 2.2625, - "step": 1618 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005940338983050848, - "loss": 2.2496, - "step": 1619 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005938983050847458, - "loss": 2.2635, - "step": 1620 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005937627118644068, - "loss": 2.2381, - "step": 1621 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005936271186440678, - "loss": 2.3002, - "step": 1622 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005934915254237289, - "loss": 2.3376, - "step": 1623 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005933559322033898, - "loss": 2.2623, - "step": 1624 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005932203389830509, - "loss": 2.2539, - "step": 1625 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005930847457627119, - "loss": 2.2382, - "step": 1626 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005929491525423729, - "loss": 2.2741, - "step": 1627 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005928135593220339, - "loss": 2.2915, - "step": 1628 - }, - { - "epoch": 0.27, - "learning_rate": 0.000592677966101695, - "loss": 2.2799, - "step": 1629 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005925423728813559, - "loss": 2.1959, - "step": 1630 - }, - { - "epoch": 0.27, - "learning_rate": 0.000592406779661017, - "loss": 2.1868, - "step": 1631 - }, - { - "epoch": 0.27, - "learning_rate": 0.000592271186440678, - "loss": 2.2574, - "step": 1632 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005921355932203391, - "loss": 2.2274, - "step": 1633 - }, - { - "epoch": 0.27, - "learning_rate": 0.000592, - "loss": 2.3829, - "step": 1634 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005918644067796611, - "loss": 2.3237, - "step": 1635 - }, - { - "epoch": 0.27, - "learning_rate": 0.000591728813559322, - "loss": 2.3179, - "step": 1636 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005915932203389831, - "loss": 2.3098, - "step": 1637 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005914576271186441, - "loss": 2.2883, - "step": 1638 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005913220338983051, - "loss": 2.3284, - "step": 1639 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005911864406779662, - "loss": 2.2887, - "step": 1640 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005910508474576272, - "loss": 2.2821, - "step": 1641 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005909152542372881, - "loss": 2.2241, - "step": 1642 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005907796610169491, - "loss": 2.3224, - "step": 1643 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005906440677966102, - "loss": 2.2384, - "step": 1644 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005905084745762712, - "loss": 2.2738, - "step": 1645 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005903728813559323, - "loss": 2.2699, - "step": 1646 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005902372881355933, - "loss": 2.3547, - "step": 1647 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005901016949152542, - "loss": 2.2796, - "step": 1648 - }, - { - "epoch": 0.27, - "learning_rate": 0.0005899661016949152, - "loss": 2.2808, - "step": 1649 - }, - { - "epoch": 0.28, - "learning_rate": 0.0005898305084745763, - "loss": 2.3383, - "step": 1650 - }, - { - "epoch": 0.28, - "learning_rate": 0.0005896949152542373, - "loss": 2.2917, - "step": 1651 - }, - { - "epoch": 0.28, - "learning_rate": 0.0005895593220338984, - "loss": 2.3558, - "step": 1652 - }, - { - "epoch": 0.28, - "learning_rate": 0.0005894237288135594, - "loss": 2.3203, - "step": 1653 - }, - { - "epoch": 0.28, - "learning_rate": 0.0005892881355932204, - "loss": 2.2349, - "step": 1654 - }, - { - "epoch": 0.28, - "learning_rate": 0.0005891525423728814, - "loss": 2.3092, - "step": 1655 - }, - { - "epoch": 0.28, - "learning_rate": 0.0005890169491525424, - "loss": 2.2656, - "step": 1656 - }, - { - "epoch": 0.28, - "learning_rate": 0.0005888813559322034, - "loss": 2.283, - "step": 1657 - }, - { - "epoch": 0.28, - "learning_rate": 0.0005887457627118645, - "loss": 2.2651, - "step": 1658 - }, - { - "epoch": 0.28, - "learning_rate": 0.0005886101694915255, - "loss": 2.2562, - "step": 1659 - }, - { - "epoch": 0.28, - "learning_rate": 0.0005884745762711864, - "loss": 2.2264, - "step": 1660 - }, - { - "epoch": 0.28, - "learning_rate": 0.0005883389830508475, - "loss": 2.2421, - "step": 1661 - }, - { - "epoch": 0.28, - "learning_rate": 0.0005882033898305085, - "loss": 2.2191, - "step": 1662 - }, - { - "epoch": 0.28, - "learning_rate": 0.0005880677966101695, - "loss": 2.2624, - "step": 1663 - }, - { - "epoch": 0.28, - "learning_rate": 0.0005879322033898305, - "loss": 2.2846, - "step": 1664 - }, - { - "epoch": 0.28, - "learning_rate": 0.0005877966101694916, - "loss": 2.285, - "step": 1665 - }, - { - "epoch": 0.28, - "learning_rate": 0.0005876610169491525, - "loss": 2.2081, - "step": 1666 - }, - { - "epoch": 0.28, - "learning_rate": 0.0005875254237288136, - "loss": 2.2211, - "step": 1667 - }, - { - "epoch": 0.28, - "learning_rate": 0.0005873898305084746, - "loss": 2.3023, - "step": 1668 - }, - { - "epoch": 0.28, - "learning_rate": 0.0005872542372881356, - "loss": 2.2705, - "step": 1669 - }, - { - "epoch": 0.28, - "learning_rate": 0.0005871186440677966, - "loss": 2.3565, - "step": 1670 - }, - { - "epoch": 0.28, - "learning_rate": 0.0005869830508474577, - "loss": 2.3122, - "step": 1671 - }, - { - "epoch": 0.28, - "learning_rate": 0.0005868474576271186, - "loss": 2.275, - "step": 1672 - }, - { - "epoch": 0.28, - "learning_rate": 0.0005867118644067797, - "loss": 2.2983, - "step": 1673 - }, - { - "epoch": 0.28, - "learning_rate": 0.0005865762711864407, - "loss": 2.2642, - "step": 1674 - }, - { - "epoch": 0.28, - "learning_rate": 0.0005864406779661018, - "loss": 2.2097, - "step": 1675 - }, - { - "epoch": 0.28, - "learning_rate": 0.0005863050847457628, - "loss": 2.3059, - "step": 1676 - }, - { - "epoch": 0.28, - "learning_rate": 0.0005861694915254238, - "loss": 2.3457, - "step": 1677 - }, - { - "epoch": 0.28, - "learning_rate": 0.0005860338983050847, - "loss": 2.2781, - "step": 1678 - }, - { - "epoch": 0.28, - "learning_rate": 0.0005858983050847457, - "loss": 2.2928, - "step": 1679 - }, - { - "epoch": 0.28, - "learning_rate": 0.0005857627118644068, - "loss": 2.2637, - "step": 1680 - }, - { - "epoch": 0.28, - "learning_rate": 0.0005856271186440678, - "loss": 2.205, - "step": 1681 - }, - { - "epoch": 0.28, - "learning_rate": 0.0005854915254237289, - "loss": 2.2447, - "step": 1682 - }, - { - "epoch": 0.28, - "learning_rate": 0.0005853559322033899, - "loss": 2.2343, - "step": 1683 - }, - { - "epoch": 0.28, - "learning_rate": 0.0005852203389830508, - "loss": 2.2657, - "step": 1684 - }, - { - "epoch": 0.28, - "learning_rate": 0.0005850847457627118, - "loss": 2.2203, - "step": 1685 - }, - { - "epoch": 0.28, - "learning_rate": 0.0005849491525423729, - "loss": 2.2826, - "step": 1686 - }, - { - "epoch": 0.28, - "learning_rate": 0.0005848135593220339, - "loss": 2.2752, - "step": 1687 - }, - { - "epoch": 0.28, - "learning_rate": 0.000584677966101695, - "loss": 2.2477, - "step": 1688 - }, - { - "epoch": 0.28, - "learning_rate": 0.000584542372881356, - "loss": 2.276, - "step": 1689 - }, - { - "epoch": 0.28, - "learning_rate": 0.000584406779661017, - "loss": 2.2349, - "step": 1690 - }, - { - "epoch": 0.28, - "learning_rate": 0.0005842711864406779, - "loss": 2.1555, - "step": 1691 - }, - { - "epoch": 0.28, - "learning_rate": 0.000584135593220339, - "loss": 2.2925, - "step": 1692 - }, - { - "epoch": 0.28, - "learning_rate": 0.000584, - "loss": 2.2445, - "step": 1693 - }, - { - "epoch": 0.28, - "learning_rate": 0.0005838644067796611, - "loss": 2.3157, - "step": 1694 - }, - { - "epoch": 0.28, - "learning_rate": 0.0005837288135593221, - "loss": 2.2806, - "step": 1695 - }, - { - "epoch": 0.28, - "learning_rate": 0.0005835932203389832, - "loss": 2.23, - "step": 1696 - }, - { - "epoch": 0.28, - "learning_rate": 0.0005834576271186441, - "loss": 2.2634, - "step": 1697 - }, - { - "epoch": 0.28, - "learning_rate": 0.0005833220338983051, - "loss": 2.2522, - "step": 1698 - }, - { - "epoch": 0.28, - "learning_rate": 0.0005831864406779661, - "loss": 2.284, - "step": 1699 - }, - { - "epoch": 0.28, - "learning_rate": 0.0005830508474576271, - "loss": 2.2152, - "step": 1700 - }, - { - "epoch": 0.28, - "learning_rate": 0.0005829152542372882, - "loss": 2.2938, - "step": 1701 - }, - { - "epoch": 0.28, - "learning_rate": 0.0005827796610169491, - "loss": 2.2496, - "step": 1702 - }, - { - "epoch": 0.28, - "learning_rate": 0.0005826440677966102, - "loss": 2.3019, - "step": 1703 - }, - { - "epoch": 0.28, - "learning_rate": 0.0005825084745762712, - "loss": 2.2104, - "step": 1704 - }, - { - "epoch": 0.28, - "learning_rate": 0.0005823728813559322, - "loss": 2.2823, - "step": 1705 - }, - { - "epoch": 0.28, - "learning_rate": 0.0005822372881355932, - "loss": 2.1698, - "step": 1706 - }, - { - "epoch": 0.28, - "learning_rate": 0.0005821016949152543, - "loss": 2.2245, - "step": 1707 - }, - { - "epoch": 0.28, - "learning_rate": 0.0005819661016949152, - "loss": 2.2254, - "step": 1708 - }, - { - "epoch": 0.28, - "learning_rate": 0.0005818305084745763, - "loss": 2.1787, - "step": 1709 - }, - { - "epoch": 0.28, - "learning_rate": 0.0005816949152542373, - "loss": 2.3148, - "step": 1710 - }, - { - "epoch": 0.29, - "learning_rate": 0.0005815593220338984, - "loss": 2.2259, - "step": 1711 - }, - { - "epoch": 0.29, - "learning_rate": 0.0005814237288135594, - "loss": 2.1637, - "step": 1712 - }, - { - "epoch": 0.29, - "learning_rate": 0.0005812881355932204, - "loss": 2.251, - "step": 1713 - }, - { - "epoch": 0.29, - "learning_rate": 0.0005811525423728813, - "loss": 2.235, - "step": 1714 - }, - { - "epoch": 0.29, - "learning_rate": 0.0005810169491525424, - "loss": 2.2339, - "step": 1715 - }, - { - "epoch": 0.29, - "learning_rate": 0.0005808813559322034, - "loss": 2.3465, - "step": 1716 - }, - { - "epoch": 0.29, - "learning_rate": 0.0005807457627118645, - "loss": 2.2735, - "step": 1717 - }, - { - "epoch": 0.29, - "learning_rate": 0.0005806101694915255, - "loss": 2.3128, - "step": 1718 - }, - { - "epoch": 0.29, - "learning_rate": 0.0005804745762711865, - "loss": 2.3205, - "step": 1719 - }, - { - "epoch": 0.29, - "learning_rate": 0.0005803389830508474, - "loss": 2.3429, - "step": 1720 - }, - { - "epoch": 0.29, - "learning_rate": 0.0005802033898305084, - "loss": 2.2108, - "step": 1721 - }, - { - "epoch": 0.29, - "learning_rate": 0.0005800677966101695, - "loss": 2.2866, - "step": 1722 - }, - { - "epoch": 0.29, - "learning_rate": 0.0005799322033898305, - "loss": 2.2541, - "step": 1723 - }, - { - "epoch": 0.29, - "learning_rate": 0.0005797966101694916, - "loss": 2.3363, - "step": 1724 - }, - { - "epoch": 0.29, - "learning_rate": 0.0005796610169491526, - "loss": 2.2264, - "step": 1725 - }, - { - "epoch": 0.29, - "learning_rate": 0.0005795254237288137, - "loss": 2.3255, - "step": 1726 - }, - { - "epoch": 0.29, - "learning_rate": 0.0005793898305084745, - "loss": 2.2939, - "step": 1727 - }, - { - "epoch": 0.29, - "learning_rate": 0.0005792542372881356, - "loss": 2.1931, - "step": 1728 - }, - { - "epoch": 0.29, - "learning_rate": 0.0005791186440677966, - "loss": 2.2382, - "step": 1729 - }, - { - "epoch": 0.29, - "learning_rate": 0.0005789830508474577, - "loss": 2.2539, - "step": 1730 - }, - { - "epoch": 0.29, - "learning_rate": 0.0005788474576271187, - "loss": 2.2506, - "step": 1731 - }, - { - "epoch": 0.29, - "learning_rate": 0.0005787118644067798, - "loss": 2.2582, - "step": 1732 - }, - { - "epoch": 0.29, - "learning_rate": 0.0005785762711864407, - "loss": 2.2071, - "step": 1733 - }, - { - "epoch": 0.29, - "learning_rate": 0.0005784406779661017, - "loss": 2.2225, - "step": 1734 - }, - { - "epoch": 0.29, - "learning_rate": 0.0005783050847457627, - "loss": 2.291, - "step": 1735 - }, - { - "epoch": 0.29, - "learning_rate": 0.0005781694915254238, - "loss": 2.2698, - "step": 1736 - }, - { - "epoch": 0.29, - "learning_rate": 0.0005780338983050848, - "loss": 2.2516, - "step": 1737 - }, - { - "epoch": 0.29, - "learning_rate": 0.0005778983050847459, - "loss": 2.2068, - "step": 1738 - }, - { - "epoch": 0.29, - "learning_rate": 0.0005777627118644068, - "loss": 2.2431, - "step": 1739 - }, - { - "epoch": 0.29, - "learning_rate": 0.0005776271186440678, - "loss": 2.3724, - "step": 1740 - }, - { - "epoch": 0.29, - "learning_rate": 0.0005774915254237288, - "loss": 2.2127, - "step": 1741 - }, - { - "epoch": 0.29, - "learning_rate": 0.0005773559322033898, - "loss": 2.3207, - "step": 1742 - }, - { - "epoch": 0.29, - "learning_rate": 0.0005772203389830509, - "loss": 2.2464, - "step": 1743 - }, - { - "epoch": 0.29, - "learning_rate": 0.0005770847457627118, - "loss": 2.2019, - "step": 1744 - }, - { - "epoch": 0.29, - "learning_rate": 0.0005769491525423729, - "loss": 2.2422, - "step": 1745 - }, - { - "epoch": 0.29, - "learning_rate": 0.0005768135593220339, - "loss": 2.2471, - "step": 1746 - }, - { - "epoch": 0.29, - "learning_rate": 0.000576677966101695, - "loss": 2.2613, - "step": 1747 - }, - { - "epoch": 0.29, - "learning_rate": 0.000576542372881356, - "loss": 2.1736, - "step": 1748 - }, - { - "epoch": 0.29, - "learning_rate": 0.000576406779661017, - "loss": 2.2236, - "step": 1749 - }, - { - "epoch": 0.29, - "learning_rate": 0.0005762711864406779, - "loss": 2.248, - "step": 1750 - }, - { - "epoch": 0.29, - "learning_rate": 0.000576135593220339, - "loss": 2.1692, - "step": 1751 - }, - { - "epoch": 0.29, - "learning_rate": 0.000576, - "loss": 2.2738, - "step": 1752 - }, - { - "epoch": 0.29, - "learning_rate": 0.0005758644067796611, - "loss": 2.2831, - "step": 1753 - }, - { - "epoch": 0.29, - "learning_rate": 0.0005757288135593221, - "loss": 2.2239, - "step": 1754 - }, - { - "epoch": 0.29, - "learning_rate": 0.0005755932203389831, - "loss": 2.3561, - "step": 1755 - }, - { - "epoch": 0.29, - "learning_rate": 0.000575457627118644, - "loss": 2.2142, - "step": 1756 - }, - { - "epoch": 0.29, - "learning_rate": 0.0005753220338983051, - "loss": 2.2602, - "step": 1757 - }, - { - "epoch": 0.29, - "learning_rate": 0.0005751864406779661, - "loss": 2.2695, - "step": 1758 - }, - { - "epoch": 0.29, - "learning_rate": 0.0005750508474576272, - "loss": 2.2663, - "step": 1759 - }, - { - "epoch": 0.29, - "learning_rate": 0.0005749152542372882, - "loss": 2.2889, - "step": 1760 - }, - { - "epoch": 0.29, - "learning_rate": 0.0005747796610169493, - "loss": 2.2365, - "step": 1761 - }, - { - "epoch": 0.29, - "learning_rate": 0.0005746440677966101, - "loss": 2.2276, - "step": 1762 - }, - { - "epoch": 0.29, - "learning_rate": 0.0005745084745762711, - "loss": 2.3215, - "step": 1763 - }, - { - "epoch": 0.29, - "learning_rate": 0.0005743728813559322, - "loss": 2.4086, - "step": 1764 - }, - { - "epoch": 0.29, - "learning_rate": 0.0005742372881355932, - "loss": 2.2443, - "step": 1765 - }, - { - "epoch": 0.29, - "learning_rate": 0.0005741016949152543, - "loss": 2.2406, - "step": 1766 - }, - { - "epoch": 0.29, - "learning_rate": 0.0005739661016949153, - "loss": 2.2844, - "step": 1767 - }, - { - "epoch": 0.29, - "learning_rate": 0.0005738305084745764, - "loss": 2.2775, - "step": 1768 - }, - { - "epoch": 0.29, - "learning_rate": 0.0005736949152542373, - "loss": 2.3222, - "step": 1769 - }, - { - "epoch": 0.29, - "learning_rate": 0.0005735593220338983, - "loss": 2.2159, - "step": 1770 - }, - { - "epoch": 0.3, - "learning_rate": 0.0005734237288135593, - "loss": 2.2728, - "step": 1771 - }, - { - "epoch": 0.3, - "learning_rate": 0.0005732881355932204, - "loss": 2.2308, - "step": 1772 - }, - { - "epoch": 0.3, - "learning_rate": 0.0005731525423728814, - "loss": 2.2242, - "step": 1773 - }, - { - "epoch": 0.3, - "learning_rate": 0.0005730169491525425, - "loss": 2.2431, - "step": 1774 - }, - { - "epoch": 0.3, - "learning_rate": 0.0005728813559322034, - "loss": 2.2575, - "step": 1775 - }, - { - "epoch": 0.3, - "learning_rate": 0.0005727457627118644, - "loss": 2.245, - "step": 1776 - }, - { - "epoch": 0.3, - "learning_rate": 0.0005726101694915254, - "loss": 2.3637, - "step": 1777 - }, - { - "epoch": 0.3, - "learning_rate": 0.0005724745762711865, - "loss": 2.2815, - "step": 1778 - }, - { - "epoch": 0.3, - "learning_rate": 0.0005723389830508475, - "loss": 2.2532, - "step": 1779 - }, - { - "epoch": 0.3, - "learning_rate": 0.0005722033898305086, - "loss": 2.1812, - "step": 1780 - }, - { - "epoch": 0.3, - "learning_rate": 0.0005720677966101695, - "loss": 2.2817, - "step": 1781 - }, - { - "epoch": 0.3, - "learning_rate": 0.0005719322033898306, - "loss": 2.2065, - "step": 1782 - }, - { - "epoch": 0.3, - "learning_rate": 0.0005717966101694916, - "loss": 2.2099, - "step": 1783 - }, - { - "epoch": 0.3, - "learning_rate": 0.0005716610169491525, - "loss": 2.2501, - "step": 1784 - }, - { - "epoch": 0.3, - "learning_rate": 0.0005715254237288136, - "loss": 2.2381, - "step": 1785 - }, - { - "epoch": 0.3, - "learning_rate": 0.0005713898305084745, - "loss": 2.2935, - "step": 1786 - }, - { - "epoch": 0.3, - "learning_rate": 0.0005712542372881356, - "loss": 2.2428, - "step": 1787 - }, - { - "epoch": 0.3, - "learning_rate": 0.0005711186440677966, - "loss": 2.2742, - "step": 1788 - }, - { - "epoch": 0.3, - "learning_rate": 0.0005709830508474577, - "loss": 2.2361, - "step": 1789 - }, - { - "epoch": 0.3, - "learning_rate": 0.0005708474576271187, - "loss": 2.2956, - "step": 1790 - }, - { - "epoch": 0.3, - "learning_rate": 0.0005707118644067797, - "loss": 2.36, - "step": 1791 - }, - { - "epoch": 0.3, - "learning_rate": 0.0005705762711864406, - "loss": 2.2594, - "step": 1792 - }, - { - "epoch": 0.3, - "learning_rate": 0.0005704406779661017, - "loss": 2.35, - "step": 1793 - }, - { - "epoch": 0.3, - "learning_rate": 0.0005703050847457627, - "loss": 2.2326, - "step": 1794 - }, - { - "epoch": 0.3, - "learning_rate": 0.0005701694915254238, - "loss": 2.2605, - "step": 1795 - }, - { - "epoch": 0.3, - "learning_rate": 0.0005700338983050848, - "loss": 2.2422, - "step": 1796 - }, - { - "epoch": 0.3, - "learning_rate": 0.0005698983050847458, - "loss": 2.3085, - "step": 1797 - }, - { - "epoch": 0.3, - "learning_rate": 0.0005697627118644067, - "loss": 2.2205, - "step": 1798 - }, - { - "epoch": 0.3, - "learning_rate": 0.0005696271186440678, - "loss": 2.2499, - "step": 1799 - }, - { - "epoch": 0.3, - "learning_rate": 0.0005694915254237288, - "loss": 2.2759, - "step": 1800 - }, - { - "epoch": 0.3, - "learning_rate": 0.0005693559322033899, - "loss": 2.1906, - "step": 1801 - }, - { - "epoch": 0.3, - "learning_rate": 0.0005692203389830509, - "loss": 2.256, - "step": 1802 - }, - { - "epoch": 0.3, - "learning_rate": 0.000569084745762712, - "loss": 2.2567, - "step": 1803 - }, - { - "epoch": 0.3, - "learning_rate": 0.000568949152542373, - "loss": 2.2046, - "step": 1804 - }, - { - "epoch": 0.3, - "learning_rate": 0.0005688135593220339, - "loss": 2.3697, - "step": 1805 - }, - { - "epoch": 0.3, - "learning_rate": 0.0005686779661016949, - "loss": 2.2283, - "step": 1806 - }, - { - "epoch": 0.3, - "learning_rate": 0.0005685423728813559, - "loss": 2.2904, - "step": 1807 - }, - { - "epoch": 0.3, - "learning_rate": 0.000568406779661017, - "loss": 2.2623, - "step": 1808 - }, - { - "epoch": 0.3, - "learning_rate": 0.000568271186440678, - "loss": 2.3749, - "step": 1809 - }, - { - "epoch": 0.3, - "learning_rate": 0.0005681355932203391, - "loss": 2.2413, - "step": 1810 - }, - { - "epoch": 0.3, - "learning_rate": 0.000568, - "loss": 2.2991, - "step": 1811 - }, - { - "epoch": 0.3, - "learning_rate": 0.000567864406779661, - "loss": 2.248, - "step": 1812 - }, - { - "epoch": 0.3, - "learning_rate": 0.000567728813559322, - "loss": 2.2004, - "step": 1813 - }, - { - "epoch": 0.3, - "learning_rate": 0.0005675932203389831, - "loss": 2.1595, - "step": 1814 - }, - { - "epoch": 0.3, - "learning_rate": 0.0005674576271186441, - "loss": 2.2832, - "step": 1815 - }, - { - "epoch": 0.3, - "learning_rate": 0.0005673220338983052, - "loss": 2.3049, - "step": 1816 - }, - { - "epoch": 0.3, - "learning_rate": 0.0005671864406779661, - "loss": 2.2726, - "step": 1817 - }, - { - "epoch": 0.3, - "learning_rate": 0.0005670508474576272, - "loss": 2.2321, - "step": 1818 - }, - { - "epoch": 0.3, - "learning_rate": 0.0005669152542372881, - "loss": 2.3435, - "step": 1819 - }, - { - "epoch": 0.3, - "learning_rate": 0.0005667796610169492, - "loss": 2.24, - "step": 1820 - }, - { - "epoch": 0.3, - "learning_rate": 0.0005666440677966102, - "loss": 2.2448, - "step": 1821 - }, - { - "epoch": 0.3, - "learning_rate": 0.0005665084745762713, - "loss": 2.2783, - "step": 1822 - }, - { - "epoch": 0.3, - "learning_rate": 0.0005663728813559322, - "loss": 2.2078, - "step": 1823 - }, - { - "epoch": 0.3, - "learning_rate": 0.0005662372881355933, - "loss": 2.1518, - "step": 1824 - }, - { - "epoch": 0.3, - "learning_rate": 0.0005661016949152543, - "loss": 2.2287, - "step": 1825 - }, - { - "epoch": 0.3, - "learning_rate": 0.0005659661016949153, - "loss": 2.1821, - "step": 1826 - }, - { - "epoch": 0.3, - "learning_rate": 0.0005658305084745763, - "loss": 2.196, - "step": 1827 - }, - { - "epoch": 0.3, - "learning_rate": 0.0005656949152542372, - "loss": 2.2027, - "step": 1828 - }, - { - "epoch": 0.3, - "learning_rate": 0.0005655593220338983, - "loss": 2.2573, - "step": 1829 - }, - { - "epoch": 0.3, - "learning_rate": 0.0005654237288135593, - "loss": 2.2172, - "step": 1830 - }, - { - "epoch": 0.31, - "learning_rate": 0.0005652881355932204, - "loss": 2.2454, - "step": 1831 - }, - { - "epoch": 0.31, - "learning_rate": 0.0005651525423728814, - "loss": 2.2641, - "step": 1832 - }, - { - "epoch": 0.31, - "learning_rate": 0.0005650169491525424, - "loss": 2.2851, - "step": 1833 - }, - { - "epoch": 0.31, - "learning_rate": 0.0005648813559322033, - "loss": 2.2254, - "step": 1834 - }, - { - "epoch": 0.31, - "learning_rate": 0.0005647457627118644, - "loss": 2.2108, - "step": 1835 - }, - { - "epoch": 0.31, - "learning_rate": 0.0005646101694915254, - "loss": 2.2266, - "step": 1836 - }, - { - "epoch": 0.31, - "learning_rate": 0.0005644745762711865, - "loss": 2.2147, - "step": 1837 - }, - { - "epoch": 0.31, - "learning_rate": 0.0005643389830508475, - "loss": 2.2707, - "step": 1838 - }, - { - "epoch": 0.31, - "learning_rate": 0.0005642033898305086, - "loss": 2.2879, - "step": 1839 - }, - { - "epoch": 0.31, - "learning_rate": 0.0005640677966101696, - "loss": 2.226, - "step": 1840 - }, - { - "epoch": 0.31, - "learning_rate": 0.0005639322033898305, - "loss": 2.1889, - "step": 1841 - }, - { - "epoch": 0.31, - "learning_rate": 0.0005637966101694915, - "loss": 2.1247, - "step": 1842 - }, - { - "epoch": 0.31, - "learning_rate": 0.0005636610169491526, - "loss": 2.1824, - "step": 1843 - }, - { - "epoch": 0.31, - "learning_rate": 0.0005635254237288136, - "loss": 2.2536, - "step": 1844 - }, - { - "epoch": 0.31, - "learning_rate": 0.0005633898305084747, - "loss": 2.1632, - "step": 1845 - }, - { - "epoch": 0.31, - "learning_rate": 0.0005632542372881357, - "loss": 2.2505, - "step": 1846 - }, - { - "epoch": 0.31, - "learning_rate": 0.0005631186440677966, - "loss": 2.2527, - "step": 1847 - }, - { - "epoch": 0.31, - "learning_rate": 0.0005629830508474576, - "loss": 2.2886, - "step": 1848 - }, - { - "epoch": 0.31, - "learning_rate": 0.0005628474576271186, - "loss": 2.2531, - "step": 1849 - }, - { - "epoch": 0.31, - "learning_rate": 0.0005627118644067797, - "loss": 2.2327, - "step": 1850 - }, - { - "epoch": 0.31, - "learning_rate": 0.0005625762711864407, - "loss": 2.3173, - "step": 1851 - }, - { - "epoch": 0.31, - "learning_rate": 0.0005624406779661018, - "loss": 2.2248, - "step": 1852 - }, - { - "epoch": 0.31, - "learning_rate": 0.0005623050847457627, - "loss": 2.2318, - "step": 1853 - }, - { - "epoch": 0.31, - "learning_rate": 0.0005621694915254238, - "loss": 2.1786, - "step": 1854 - }, - { - "epoch": 0.31, - "learning_rate": 0.0005620338983050847, - "loss": 2.2006, - "step": 1855 - }, - { - "epoch": 0.31, - "learning_rate": 0.0005618983050847458, - "loss": 2.2385, - "step": 1856 - }, - { - "epoch": 0.31, - "learning_rate": 0.0005617627118644068, - "loss": 2.2012, - "step": 1857 - }, - { - "epoch": 0.31, - "learning_rate": 0.0005616271186440679, - "loss": 2.2171, - "step": 1858 - }, - { - "epoch": 0.31, - "learning_rate": 0.0005614915254237288, - "loss": 2.2376, - "step": 1859 - }, - { - "epoch": 0.31, - "learning_rate": 0.0005613559322033899, - "loss": 2.2803, - "step": 1860 - }, - { - "epoch": 0.31, - "learning_rate": 0.0005612203389830509, - "loss": 2.2262, - "step": 1861 - }, - { - "epoch": 0.31, - "learning_rate": 0.0005610847457627119, - "loss": 2.3109, - "step": 1862 - }, - { - "epoch": 0.31, - "learning_rate": 0.0005609491525423729, - "loss": 2.3136, - "step": 1863 - }, - { - "epoch": 0.31, - "learning_rate": 0.000560813559322034, - "loss": 2.2245, - "step": 1864 - }, - { - "epoch": 0.31, - "learning_rate": 0.0005606779661016949, - "loss": 2.2109, - "step": 1865 - }, - { - "epoch": 0.31, - "learning_rate": 0.0005605423728813559, - "loss": 2.279, - "step": 1866 - }, - { - "epoch": 0.31, - "learning_rate": 0.000560406779661017, - "loss": 2.2449, - "step": 1867 - }, - { - "epoch": 0.31, - "learning_rate": 0.000560271186440678, - "loss": 2.2492, - "step": 1868 - }, - { - "epoch": 0.31, - "learning_rate": 0.000560135593220339, - "loss": 2.2539, - "step": 1869 - }, - { - "epoch": 0.31, - "learning_rate": 0.00056, - "loss": 2.2025, - "step": 1870 - }, - { - "epoch": 0.31, - "learning_rate": 0.000559864406779661, - "loss": 2.2705, - "step": 1871 - }, - { - "epoch": 0.31, - "learning_rate": 0.000559728813559322, - "loss": 2.3171, - "step": 1872 - }, - { - "epoch": 0.31, - "learning_rate": 0.0005595932203389831, - "loss": 2.2708, - "step": 1873 - }, - { - "epoch": 0.31, - "learning_rate": 0.0005594576271186441, - "loss": 2.2563, - "step": 1874 - }, - { - "epoch": 0.31, - "learning_rate": 0.0005593220338983052, - "loss": 2.2498, - "step": 1875 - }, - { - "epoch": 0.31, - "learning_rate": 0.0005591864406779662, - "loss": 2.2136, - "step": 1876 - }, - { - "epoch": 0.31, - "learning_rate": 0.0005590508474576271, - "loss": 2.2153, - "step": 1877 - }, - { - "epoch": 0.31, - "learning_rate": 0.0005589152542372881, - "loss": 2.2101, - "step": 1878 - }, - { - "epoch": 0.31, - "learning_rate": 0.0005587796610169492, - "loss": 2.2666, - "step": 1879 - }, - { - "epoch": 0.31, - "learning_rate": 0.0005586440677966102, - "loss": 2.256, - "step": 1880 - }, - { - "epoch": 0.31, - "learning_rate": 0.0005585084745762713, - "loss": 2.2624, - "step": 1881 - }, - { - "epoch": 0.31, - "learning_rate": 0.0005583728813559323, - "loss": 2.2582, - "step": 1882 - }, - { - "epoch": 0.31, - "learning_rate": 0.0005582372881355932, - "loss": 2.3003, - "step": 1883 - }, - { - "epoch": 0.31, - "learning_rate": 0.0005581016949152542, - "loss": 2.2185, - "step": 1884 - }, - { - "epoch": 0.31, - "learning_rate": 0.0005579661016949153, - "loss": 2.2527, - "step": 1885 - }, - { - "epoch": 0.31, - "learning_rate": 0.0005578305084745763, - "loss": 2.2443, - "step": 1886 - }, - { - "epoch": 0.31, - "learning_rate": 0.0005576949152542373, - "loss": 2.2173, - "step": 1887 - }, - { - "epoch": 0.31, - "learning_rate": 0.0005575593220338984, - "loss": 2.3005, - "step": 1888 - }, - { - "epoch": 0.31, - "learning_rate": 0.0005574237288135593, - "loss": 2.2442, - "step": 1889 - }, - { - "epoch": 0.32, - "learning_rate": 0.0005572881355932203, - "loss": 2.2488, - "step": 1890 - }, - { - "epoch": 0.32, - "learning_rate": 0.0005571525423728813, - "loss": 2.2477, - "step": 1891 - }, - { - "epoch": 0.32, - "learning_rate": 0.0005570169491525424, - "loss": 2.2855, - "step": 1892 - }, - { - "epoch": 0.32, - "learning_rate": 0.0005568813559322034, - "loss": 2.2664, - "step": 1893 - }, - { - "epoch": 0.32, - "learning_rate": 0.0005567457627118645, - "loss": 2.2804, - "step": 1894 - }, - { - "epoch": 0.32, - "learning_rate": 0.0005566101694915254, - "loss": 2.268, - "step": 1895 - }, - { - "epoch": 0.32, - "learning_rate": 0.0005564745762711865, - "loss": 2.1828, - "step": 1896 - }, - { - "epoch": 0.32, - "learning_rate": 0.0005563389830508475, - "loss": 2.1813, - "step": 1897 - }, - { - "epoch": 0.32, - "learning_rate": 0.0005562033898305085, - "loss": 2.2428, - "step": 1898 - }, - { - "epoch": 0.32, - "learning_rate": 0.0005560677966101695, - "loss": 2.2445, - "step": 1899 - }, - { - "epoch": 0.32, - "learning_rate": 0.0005559322033898306, - "loss": 2.1926, - "step": 1900 - }, - { - "epoch": 0.32, - "learning_rate": 0.0005557966101694915, - "loss": 2.2152, - "step": 1901 - }, - { - "epoch": 0.32, - "learning_rate": 0.0005556610169491526, - "loss": 2.2672, - "step": 1902 - }, - { - "epoch": 0.32, - "learning_rate": 0.0005555254237288136, - "loss": 2.2184, - "step": 1903 - }, - { - "epoch": 0.32, - "learning_rate": 0.0005553898305084746, - "loss": 2.3173, - "step": 1904 - }, - { - "epoch": 0.32, - "learning_rate": 0.0005552542372881356, - "loss": 2.2639, - "step": 1905 - }, - { - "epoch": 0.32, - "learning_rate": 0.0005551186440677967, - "loss": 2.2001, - "step": 1906 - }, - { - "epoch": 0.32, - "learning_rate": 0.0005549830508474576, - "loss": 2.1717, - "step": 1907 - }, - { - "epoch": 0.32, - "learning_rate": 0.0005548474576271186, - "loss": 2.1892, - "step": 1908 - }, - { - "epoch": 0.32, - "learning_rate": 0.0005547118644067797, - "loss": 2.2474, - "step": 1909 - }, - { - "epoch": 0.32, - "learning_rate": 0.0005545762711864407, - "loss": 2.2659, - "step": 1910 - }, - { - "epoch": 0.32, - "learning_rate": 0.0005544406779661018, - "loss": 2.1912, - "step": 1911 - }, - { - "epoch": 0.32, - "learning_rate": 0.0005543050847457627, - "loss": 2.1801, - "step": 1912 - }, - { - "epoch": 0.32, - "learning_rate": 0.0005541694915254237, - "loss": 2.235, - "step": 1913 - }, - { - "epoch": 0.32, - "learning_rate": 0.0005540338983050847, - "loss": 2.2063, - "step": 1914 - }, - { - "epoch": 0.32, - "learning_rate": 0.0005538983050847458, - "loss": 2.2463, - "step": 1915 - }, - { - "epoch": 0.32, - "learning_rate": 0.0005537627118644068, - "loss": 2.2131, - "step": 1916 - }, - { - "epoch": 0.32, - "learning_rate": 0.0005536271186440679, - "loss": 2.2529, - "step": 1917 - }, - { - "epoch": 0.32, - "learning_rate": 0.0005534915254237289, - "loss": 2.2658, - "step": 1918 - }, - { - "epoch": 0.32, - "learning_rate": 0.0005533559322033898, - "loss": 2.232, - "step": 1919 - }, - { - "epoch": 0.32, - "learning_rate": 0.0005532203389830508, - "loss": 2.3283, - "step": 1920 - }, - { - "epoch": 0.32, - "learning_rate": 0.0005530847457627119, - "loss": 2.3031, - "step": 1921 - }, - { - "epoch": 0.32, - "learning_rate": 0.0005529491525423729, - "loss": 2.2411, - "step": 1922 - }, - { - "epoch": 0.32, - "learning_rate": 0.000552813559322034, - "loss": 2.2045, - "step": 1923 - }, - { - "epoch": 0.32, - "learning_rate": 0.000552677966101695, - "loss": 2.2901, - "step": 1924 - }, - { - "epoch": 0.32, - "learning_rate": 0.0005525423728813559, - "loss": 2.217, - "step": 1925 - }, - { - "epoch": 0.32, - "learning_rate": 0.0005524067796610169, - "loss": 2.2293, - "step": 1926 - }, - { - "epoch": 0.32, - "learning_rate": 0.000552271186440678, - "loss": 2.2648, - "step": 1927 - }, - { - "epoch": 0.32, - "learning_rate": 0.000552135593220339, - "loss": 2.2779, - "step": 1928 - }, - { - "epoch": 0.32, - "learning_rate": 0.000552, - "loss": 2.2523, - "step": 1929 - }, - { - "epoch": 0.32, - "learning_rate": 0.0005518644067796611, - "loss": 2.1951, - "step": 1930 - }, - { - "epoch": 0.32, - "learning_rate": 0.000551728813559322, - "loss": 2.2148, - "step": 1931 - }, - { - "epoch": 0.32, - "learning_rate": 0.0005515932203389831, - "loss": 2.2874, - "step": 1932 - }, - { - "epoch": 0.32, - "learning_rate": 0.0005514576271186441, - "loss": 2.2205, - "step": 1933 - }, - { - "epoch": 0.32, - "learning_rate": 0.0005513220338983051, - "loss": 2.2636, - "step": 1934 - }, - { - "epoch": 0.32, - "learning_rate": 0.0005511864406779661, - "loss": 2.2324, - "step": 1935 - }, - { - "epoch": 0.32, - "learning_rate": 0.0005510508474576272, - "loss": 2.2693, - "step": 1936 - }, - { - "epoch": 0.32, - "learning_rate": 0.0005509152542372881, - "loss": 2.3353, - "step": 1937 - }, - { - "epoch": 0.32, - "learning_rate": 0.0005507796610169492, - "loss": 2.1534, - "step": 1938 - }, - { - "epoch": 0.32, - "learning_rate": 0.0005506440677966102, - "loss": 2.2335, - "step": 1939 - }, - { - "epoch": 0.32, - "learning_rate": 0.0005505084745762712, - "loss": 2.2108, - "step": 1940 - }, - { - "epoch": 0.32, - "learning_rate": 0.0005503728813559322, - "loss": 2.1601, - "step": 1941 - }, - { - "epoch": 0.32, - "learning_rate": 0.0005502372881355933, - "loss": 2.2934, - "step": 1942 - }, - { - "epoch": 0.32, - "learning_rate": 0.0005501016949152542, - "loss": 2.2175, - "step": 1943 - }, - { - "epoch": 0.32, - "learning_rate": 0.0005499661016949153, - "loss": 2.1822, - "step": 1944 - }, - { - "epoch": 0.32, - "learning_rate": 0.0005498305084745763, - "loss": 2.219, - "step": 1945 - }, - { - "epoch": 0.32, - "learning_rate": 0.0005496949152542374, - "loss": 2.3195, - "step": 1946 - }, - { - "epoch": 0.32, - "learning_rate": 0.0005495593220338983, - "loss": 2.1704, - "step": 1947 - }, - { - "epoch": 0.32, - "learning_rate": 0.0005494237288135594, - "loss": 2.1816, - "step": 1948 - }, - { - "epoch": 0.32, - "learning_rate": 0.0005492881355932203, - "loss": 2.193, - "step": 1949 - }, - { - "epoch": 0.33, - "learning_rate": 0.0005491525423728813, - "loss": 2.3052, - "step": 1950 - }, - { - "epoch": 0.33, - "learning_rate": 0.0005490169491525424, - "loss": 2.2274, - "step": 1951 - }, - { - "epoch": 0.33, - "learning_rate": 0.0005488813559322034, - "loss": 2.253, - "step": 1952 - }, - { - "epoch": 0.33, - "learning_rate": 0.0005487457627118645, - "loss": 2.2893, - "step": 1953 - }, - { - "epoch": 0.33, - "learning_rate": 0.0005486101694915255, - "loss": 2.2413, - "step": 1954 - }, - { - "epoch": 0.33, - "learning_rate": 0.0005484745762711864, - "loss": 2.1433, - "step": 1955 - }, - { - "epoch": 0.33, - "learning_rate": 0.0005483389830508474, - "loss": 2.2613, - "step": 1956 - }, - { - "epoch": 0.33, - "learning_rate": 0.0005482033898305085, - "loss": 2.1874, - "step": 1957 - }, - { - "epoch": 0.33, - "learning_rate": 0.0005480677966101695, - "loss": 2.2894, - "step": 1958 - }, - { - "epoch": 0.33, - "learning_rate": 0.0005479322033898306, - "loss": 2.2443, - "step": 1959 - }, - { - "epoch": 0.33, - "learning_rate": 0.0005477966101694916, - "loss": 2.2716, - "step": 1960 - }, - { - "epoch": 0.33, - "learning_rate": 0.0005476610169491525, - "loss": 2.235, - "step": 1961 - }, - { - "epoch": 0.33, - "learning_rate": 0.0005475254237288135, - "loss": 2.2383, - "step": 1962 - }, - { - "epoch": 0.33, - "learning_rate": 0.0005473898305084746, - "loss": 2.2099, - "step": 1963 - }, - { - "epoch": 0.33, - "learning_rate": 0.0005472542372881356, - "loss": 2.2909, - "step": 1964 - }, - { - "epoch": 0.33, - "learning_rate": 0.0005471186440677967, - "loss": 2.2822, - "step": 1965 - }, - { - "epoch": 0.33, - "learning_rate": 0.0005469830508474577, - "loss": 2.2492, - "step": 1966 - }, - { - "epoch": 0.33, - "learning_rate": 0.0005468474576271188, - "loss": 2.199, - "step": 1967 - }, - { - "epoch": 0.33, - "learning_rate": 0.0005467118644067797, - "loss": 2.2648, - "step": 1968 - }, - { - "epoch": 0.33, - "learning_rate": 0.0005465762711864407, - "loss": 2.233, - "step": 1969 - }, - { - "epoch": 0.33, - "learning_rate": 0.0005464406779661017, - "loss": 2.258, - "step": 1970 - }, - { - "epoch": 0.33, - "learning_rate": 0.0005463050847457627, - "loss": 2.2418, - "step": 1971 - }, - { - "epoch": 0.33, - "learning_rate": 0.0005461694915254238, - "loss": 2.2845, - "step": 1972 - }, - { - "epoch": 0.33, - "learning_rate": 0.0005460338983050847, - "loss": 2.2586, - "step": 1973 - }, - { - "epoch": 0.33, - "learning_rate": 0.0005458983050847458, - "loss": 2.1895, - "step": 1974 - }, - { - "epoch": 0.33, - "learning_rate": 0.0005457627118644068, - "loss": 2.2908, - "step": 1975 - }, - { - "epoch": 0.33, - "learning_rate": 0.0005456271186440678, - "loss": 2.2354, - "step": 1976 - }, - { - "epoch": 0.33, - "learning_rate": 0.0005454915254237288, - "loss": 2.2411, - "step": 1977 - }, - { - "epoch": 0.33, - "learning_rate": 0.0005453559322033899, - "loss": 2.2381, - "step": 1978 - }, - { - "epoch": 0.33, - "learning_rate": 0.0005452203389830508, - "loss": 2.1637, - "step": 1979 - }, - { - "epoch": 0.33, - "learning_rate": 0.0005450847457627119, - "loss": 2.2353, - "step": 1980 - }, - { - "epoch": 0.33, - "learning_rate": 0.0005449491525423729, - "loss": 2.2245, - "step": 1981 - }, - { - "epoch": 0.33, - "learning_rate": 0.000544813559322034, - "loss": 2.2284, - "step": 1982 - }, - { - "epoch": 0.33, - "learning_rate": 0.0005446779661016949, - "loss": 2.142, - "step": 1983 - }, - { - "epoch": 0.33, - "learning_rate": 0.000544542372881356, - "loss": 2.1417, - "step": 1984 - }, - { - "epoch": 0.33, - "learning_rate": 0.0005444067796610169, - "loss": 2.3004, - "step": 1985 - }, - { - "epoch": 0.33, - "learning_rate": 0.000544271186440678, - "loss": 2.262, - "step": 1986 - }, - { - "epoch": 0.33, - "learning_rate": 0.000544135593220339, - "loss": 2.2392, - "step": 1987 - }, - { - "epoch": 0.33, - "learning_rate": 0.0005440000000000001, - "loss": 2.232, - "step": 1988 - }, - { - "epoch": 0.33, - "learning_rate": 0.0005438644067796611, - "loss": 2.2268, - "step": 1989 - }, - { - "epoch": 0.33, - "learning_rate": 0.0005437288135593221, - "loss": 2.2566, - "step": 1990 - }, - { - "epoch": 0.33, - "learning_rate": 0.000543593220338983, - "loss": 2.2244, - "step": 1991 - }, - { - "epoch": 0.33, - "learning_rate": 0.000543457627118644, - "loss": 2.2398, - "step": 1992 - }, - { - "epoch": 0.33, - "learning_rate": 0.0005433220338983051, - "loss": 2.2071, - "step": 1993 - }, - { - "epoch": 0.33, - "learning_rate": 0.0005431864406779661, - "loss": 2.2387, - "step": 1994 - }, - { - "epoch": 0.33, - "learning_rate": 0.0005430508474576272, - "loss": 2.1563, - "step": 1995 - }, - { - "epoch": 0.33, - "learning_rate": 0.0005429152542372882, - "loss": 2.2312, - "step": 1996 - }, - { - "epoch": 0.33, - "learning_rate": 0.0005427796610169491, - "loss": 2.2368, - "step": 1997 - }, - { - "epoch": 0.33, - "learning_rate": 0.0005426440677966101, - "loss": 2.1847, - "step": 1998 - }, - { - "epoch": 0.33, - "learning_rate": 0.0005425084745762712, - "loss": 2.302, - "step": 1999 - }, - { - "epoch": 0.33, - "learning_rate": 0.0005423728813559322, - "loss": 2.2601, - "step": 2000 - }, - { - "epoch": 0.33, - "eval_gen_len": 19.0, - "eval_loss": 2.1167054176330566, - "eval_rouge1": 0.2712, - "eval_rouge2": 0.1025, - "eval_rougeL": 0.2298, - "eval_rougeLsum": 0.2283, - "eval_runtime": 19.1674, - "eval_samples_per_second": 2.609, - "eval_steps_per_second": 0.365, - "step": 2000 - }, - { - "epoch": 0.33, - "learning_rate": 0.0005422372881355933, - "loss": 2.2767, - "step": 2001 - }, - { - "epoch": 0.33, - "learning_rate": 0.0005421016949152543, - "loss": 2.2243, - "step": 2002 - }, - { - "epoch": 0.33, - "learning_rate": 0.0005419661016949154, - "loss": 2.2013, - "step": 2003 - }, - { - "epoch": 0.33, - "learning_rate": 0.0005418305084745763, - "loss": 2.3124, - "step": 2004 - }, - { - "epoch": 0.33, - "learning_rate": 0.0005416949152542373, - "loss": 2.2031, - "step": 2005 - }, - { - "epoch": 0.33, - "learning_rate": 0.0005415593220338983, - "loss": 2.237, - "step": 2006 - }, - { - "epoch": 0.33, - "learning_rate": 0.0005414237288135594, - "loss": 2.2048, - "step": 2007 - }, - { - "epoch": 0.33, - "learning_rate": 0.0005412881355932204, - "loss": 2.2688, - "step": 2008 - }, - { - "epoch": 0.33, - "learning_rate": 0.0005411525423728815, - "loss": 2.2177, - "step": 2009 - }, - { - "epoch": 0.34, - "learning_rate": 0.0005410169491525424, - "loss": 2.2792, - "step": 2010 - }, - { - "epoch": 0.34, - "learning_rate": 0.0005408813559322034, - "loss": 2.2995, - "step": 2011 - }, - { - "epoch": 0.34, - "learning_rate": 0.0005407457627118644, - "loss": 2.1763, - "step": 2012 - }, - { - "epoch": 0.34, - "learning_rate": 0.0005406101694915254, - "loss": 2.1862, - "step": 2013 - }, - { - "epoch": 0.34, - "learning_rate": 0.0005404745762711865, - "loss": 2.1728, - "step": 2014 - }, - { - "epoch": 0.34, - "learning_rate": 0.0005403389830508474, - "loss": 2.1935, - "step": 2015 - }, - { - "epoch": 0.34, - "learning_rate": 0.0005402033898305085, - "loss": 2.1291, - "step": 2016 - }, - { - "epoch": 0.34, - "learning_rate": 0.0005400677966101695, - "loss": 2.1921, - "step": 2017 - }, - { - "epoch": 0.34, - "learning_rate": 0.0005399322033898305, - "loss": 2.2846, - "step": 2018 - }, - { - "epoch": 0.34, - "learning_rate": 0.0005397966101694915, - "loss": 2.2618, - "step": 2019 - }, - { - "epoch": 0.34, - "learning_rate": 0.0005396610169491526, - "loss": 2.2725, - "step": 2020 - }, - { - "epoch": 0.34, - "learning_rate": 0.0005395254237288135, - "loss": 2.2848, - "step": 2021 - }, - { - "epoch": 0.34, - "learning_rate": 0.0005393898305084746, - "loss": 2.239, - "step": 2022 - }, - { - "epoch": 0.34, - "learning_rate": 0.0005392542372881356, - "loss": 2.2086, - "step": 2023 - }, - { - "epoch": 0.34, - "learning_rate": 0.0005391186440677967, - "loss": 2.2085, - "step": 2024 - }, - { - "epoch": 0.34, - "learning_rate": 0.0005389830508474577, - "loss": 2.2464, - "step": 2025 - }, - { - "epoch": 0.34, - "learning_rate": 0.0005388474576271187, - "loss": 2.2019, - "step": 2026 - }, - { - "epoch": 0.34, - "learning_rate": 0.0005387118644067796, - "loss": 2.2168, - "step": 2027 - }, - { - "epoch": 0.34, - "learning_rate": 0.0005385762711864407, - "loss": 2.2155, - "step": 2028 - }, - { - "epoch": 0.34, - "learning_rate": 0.0005384406779661017, - "loss": 2.2504, - "step": 2029 - }, - { - "epoch": 0.34, - "learning_rate": 0.0005383050847457628, - "loss": 2.1826, - "step": 2030 - }, - { - "epoch": 0.34, - "learning_rate": 0.0005381694915254238, - "loss": 2.2056, - "step": 2031 - }, - { - "epoch": 0.34, - "learning_rate": 0.0005380338983050848, - "loss": 2.2056, - "step": 2032 - }, - { - "epoch": 0.34, - "learning_rate": 0.0005378983050847457, - "loss": 2.2294, - "step": 2033 - }, - { - "epoch": 0.34, - "learning_rate": 0.0005377627118644067, - "loss": 2.2824, - "step": 2034 - }, - { - "epoch": 0.34, - "learning_rate": 0.0005376271186440678, - "loss": 2.2414, - "step": 2035 - }, - { - "epoch": 0.34, - "learning_rate": 0.0005374915254237288, - "loss": 2.2554, - "step": 2036 - }, - { - "epoch": 0.34, - "learning_rate": 0.0005373559322033899, - "loss": 2.175, - "step": 2037 - }, - { - "epoch": 0.34, - "learning_rate": 0.0005372203389830509, - "loss": 2.2984, - "step": 2038 - }, - { - "epoch": 0.34, - "learning_rate": 0.000537084745762712, - "loss": 2.1961, - "step": 2039 - }, - { - "epoch": 0.34, - "learning_rate": 0.0005369491525423728, - "loss": 2.1751, - "step": 2040 - }, - { - "epoch": 0.34, - "learning_rate": 0.0005368135593220339, - "loss": 2.2745, - "step": 2041 - }, - { - "epoch": 0.34, - "learning_rate": 0.0005366779661016949, - "loss": 2.2797, - "step": 2042 - }, - { - "epoch": 0.34, - "learning_rate": 0.000536542372881356, - "loss": 2.282, - "step": 2043 - }, - { - "epoch": 0.34, - "learning_rate": 0.000536406779661017, - "loss": 2.2882, - "step": 2044 - }, - { - "epoch": 0.34, - "learning_rate": 0.0005362711864406781, - "loss": 2.2505, - "step": 2045 - }, - { - "epoch": 0.34, - "learning_rate": 0.000536135593220339, - "loss": 2.1641, - "step": 2046 - }, - { - "epoch": 0.34, - "learning_rate": 0.000536, - "loss": 2.2599, - "step": 2047 - }, - { - "epoch": 0.34, - "learning_rate": 0.000535864406779661, - "loss": 2.2801, - "step": 2048 - }, - { - "epoch": 0.34, - "learning_rate": 0.0005357288135593221, - "loss": 2.2227, - "step": 2049 - }, - { - "epoch": 0.34, - "learning_rate": 0.0005355932203389831, - "loss": 2.1633, - "step": 2050 - }, - { - "epoch": 0.34, - "learning_rate": 0.0005354576271186442, - "loss": 2.1906, - "step": 2051 - }, - { - "epoch": 0.34, - "learning_rate": 0.0005353220338983051, - "loss": 2.2234, - "step": 2052 - }, - { - "epoch": 0.34, - "learning_rate": 0.0005351864406779661, - "loss": 2.2721, - "step": 2053 - }, - { - "epoch": 0.34, - "learning_rate": 0.0005350508474576271, - "loss": 2.2584, - "step": 2054 - }, - { - "epoch": 0.34, - "learning_rate": 0.0005349152542372881, - "loss": 2.2241, - "step": 2055 - }, - { - "epoch": 0.34, - "learning_rate": 0.0005347796610169492, - "loss": 2.1697, - "step": 2056 - }, - { - "epoch": 0.34, - "learning_rate": 0.0005346440677966101, - "loss": 2.2947, - "step": 2057 - }, - { - "epoch": 0.34, - "learning_rate": 0.0005345084745762712, - "loss": 2.2233, - "step": 2058 - }, - { - "epoch": 0.34, - "learning_rate": 0.0005343728813559322, - "loss": 2.1829, - "step": 2059 - }, - { - "epoch": 0.34, - "learning_rate": 0.0005342372881355933, - "loss": 2.2436, - "step": 2060 - }, - { - "epoch": 0.34, - "learning_rate": 0.0005341016949152543, - "loss": 2.3029, - "step": 2061 - }, - { - "epoch": 0.34, - "learning_rate": 0.0005339661016949153, - "loss": 2.1706, - "step": 2062 - }, - { - "epoch": 0.34, - "learning_rate": 0.0005338305084745762, - "loss": 2.197, - "step": 2063 - }, - { - "epoch": 0.34, - "learning_rate": 0.0005336949152542373, - "loss": 2.1982, - "step": 2064 - }, - { - "epoch": 0.34, - "learning_rate": 0.0005335593220338983, - "loss": 2.1874, - "step": 2065 - }, - { - "epoch": 0.34, - "learning_rate": 0.0005334237288135594, - "loss": 2.2681, - "step": 2066 - }, - { - "epoch": 0.34, - "learning_rate": 0.0005332881355932204, - "loss": 2.2546, - "step": 2067 - }, - { - "epoch": 0.34, - "learning_rate": 0.0005331525423728814, - "loss": 2.2767, - "step": 2068 - }, - { - "epoch": 0.34, - "learning_rate": 0.0005330169491525423, - "loss": 2.2514, - "step": 2069 - }, - { - "epoch": 0.34, - "learning_rate": 0.0005328813559322034, - "loss": 2.2355, - "step": 2070 - }, - { - "epoch": 0.35, - "learning_rate": 0.0005327457627118644, - "loss": 2.2754, - "step": 2071 - }, - { - "epoch": 0.35, - "learning_rate": 0.0005326101694915255, - "loss": 2.2637, - "step": 2072 - }, - { - "epoch": 0.35, - "learning_rate": 0.0005324745762711865, - "loss": 2.2562, - "step": 2073 - }, - { - "epoch": 0.35, - "learning_rate": 0.0005323389830508475, - "loss": 2.2038, - "step": 2074 - }, - { - "epoch": 0.35, - "learning_rate": 0.0005322033898305085, - "loss": 2.1592, - "step": 2075 - }, - { - "epoch": 0.35, - "learning_rate": 0.0005320677966101694, - "loss": 2.2284, - "step": 2076 - }, - { - "epoch": 0.35, - "learning_rate": 0.0005319322033898305, - "loss": 2.2083, - "step": 2077 - }, - { - "epoch": 0.35, - "learning_rate": 0.0005317966101694915, - "loss": 2.2206, - "step": 2078 - }, - { - "epoch": 0.35, - "learning_rate": 0.0005316610169491526, - "loss": 2.2037, - "step": 2079 - }, - { - "epoch": 0.35, - "learning_rate": 0.0005315254237288136, - "loss": 2.2805, - "step": 2080 - }, - { - "epoch": 0.35, - "learning_rate": 0.0005313898305084747, - "loss": 2.2187, - "step": 2081 - }, - { - "epoch": 0.35, - "learning_rate": 0.0005312542372881356, - "loss": 2.2614, - "step": 2082 - }, - { - "epoch": 0.35, - "learning_rate": 0.0005311186440677966, - "loss": 2.1532, - "step": 2083 - }, - { - "epoch": 0.35, - "learning_rate": 0.0005309830508474576, - "loss": 2.2433, - "step": 2084 - }, - { - "epoch": 0.35, - "learning_rate": 0.0005308474576271187, - "loss": 2.2771, - "step": 2085 - }, - { - "epoch": 0.35, - "learning_rate": 0.0005307118644067797, - "loss": 2.2803, - "step": 2086 - }, - { - "epoch": 0.35, - "learning_rate": 0.0005305762711864408, - "loss": 2.232, - "step": 2087 - }, - { - "epoch": 0.35, - "learning_rate": 0.0005304406779661017, - "loss": 2.2147, - "step": 2088 - }, - { - "epoch": 0.35, - "learning_rate": 0.0005303050847457627, - "loss": 2.2706, - "step": 2089 - }, - { - "epoch": 0.35, - "learning_rate": 0.0005301694915254237, - "loss": 2.2839, - "step": 2090 - }, - { - "epoch": 0.35, - "learning_rate": 0.0005300338983050848, - "loss": 2.2673, - "step": 2091 - }, - { - "epoch": 0.35, - "learning_rate": 0.0005298983050847458, - "loss": 2.2017, - "step": 2092 - }, - { - "epoch": 0.35, - "learning_rate": 0.0005297627118644069, - "loss": 2.2356, - "step": 2093 - }, - { - "epoch": 0.35, - "learning_rate": 0.0005296271186440678, - "loss": 2.2475, - "step": 2094 - }, - { - "epoch": 0.35, - "learning_rate": 0.0005294915254237288, - "loss": 2.1092, - "step": 2095 - }, - { - "epoch": 0.35, - "learning_rate": 0.0005293559322033899, - "loss": 2.1665, - "step": 2096 - }, - { - "epoch": 0.35, - "learning_rate": 0.0005292203389830508, - "loss": 2.245, - "step": 2097 - }, - { - "epoch": 0.35, - "learning_rate": 0.0005290847457627119, - "loss": 2.2889, - "step": 2098 - }, - { - "epoch": 0.35, - "learning_rate": 0.0005289491525423729, - "loss": 2.2003, - "step": 2099 - }, - { - "epoch": 0.35, - "learning_rate": 0.0005288135593220339, - "loss": 2.1893, - "step": 2100 - }, - { - "epoch": 0.35, - "learning_rate": 0.0005286779661016949, - "loss": 2.2507, - "step": 2101 - }, - { - "epoch": 0.35, - "learning_rate": 0.000528542372881356, - "loss": 2.2379, - "step": 2102 - }, - { - "epoch": 0.35, - "learning_rate": 0.000528406779661017, - "loss": 2.2303, - "step": 2103 - }, - { - "epoch": 0.35, - "learning_rate": 0.000528271186440678, - "loss": 2.1895, - "step": 2104 - }, - { - "epoch": 0.35, - "learning_rate": 0.000528135593220339, - "loss": 2.2644, - "step": 2105 - }, - { - "epoch": 0.35, - "learning_rate": 0.000528, - "loss": 2.2196, - "step": 2106 - }, - { - "epoch": 0.35, - "learning_rate": 0.000527864406779661, - "loss": 2.2162, - "step": 2107 - }, - { - "epoch": 0.35, - "learning_rate": 0.0005277288135593221, - "loss": 2.1485, - "step": 2108 - }, - { - "epoch": 0.35, - "learning_rate": 0.0005275932203389831, - "loss": 2.2499, - "step": 2109 - }, - { - "epoch": 0.35, - "learning_rate": 0.0005274576271186442, - "loss": 2.1435, - "step": 2110 - }, - { - "epoch": 0.35, - "learning_rate": 0.000527322033898305, - "loss": 2.2032, - "step": 2111 - }, - { - "epoch": 0.35, - "learning_rate": 0.0005271864406779661, - "loss": 2.3032, - "step": 2112 - }, - { - "epoch": 0.35, - "learning_rate": 0.0005270508474576271, - "loss": 2.2313, - "step": 2113 - }, - { - "epoch": 0.35, - "learning_rate": 0.0005269152542372882, - "loss": 2.2046, - "step": 2114 - }, - { - "epoch": 0.35, - "learning_rate": 0.0005267796610169492, - "loss": 2.2659, - "step": 2115 - }, - { - "epoch": 0.35, - "learning_rate": 0.0005266440677966102, - "loss": 2.2319, - "step": 2116 - }, - { - "epoch": 0.35, - "learning_rate": 0.0005265084745762713, - "loss": 2.2137, - "step": 2117 - }, - { - "epoch": 0.35, - "learning_rate": 0.0005263728813559322, - "loss": 2.2479, - "step": 2118 - }, - { - "epoch": 0.35, - "learning_rate": 0.0005262372881355932, - "loss": 2.2536, - "step": 2119 - }, - { - "epoch": 0.35, - "learning_rate": 0.0005261016949152542, - "loss": 2.2788, - "step": 2120 - }, - { - "epoch": 0.35, - "learning_rate": 0.0005259661016949153, - "loss": 2.2234, - "step": 2121 - }, - { - "epoch": 0.35, - "learning_rate": 0.0005258305084745763, - "loss": 2.2601, - "step": 2122 - }, - { - "epoch": 0.35, - "learning_rate": 0.0005256949152542374, - "loss": 2.2342, - "step": 2123 - }, - { - "epoch": 0.35, - "learning_rate": 0.0005255593220338983, - "loss": 2.1995, - "step": 2124 - }, - { - "epoch": 0.35, - "learning_rate": 0.0005254237288135593, - "loss": 2.1187, - "step": 2125 - }, - { - "epoch": 0.35, - "learning_rate": 0.0005252881355932203, - "loss": 2.1746, - "step": 2126 - }, - { - "epoch": 0.35, - "learning_rate": 0.0005251525423728814, - "loss": 2.2393, - "step": 2127 - }, - { - "epoch": 0.35, - "learning_rate": 0.0005250169491525424, - "loss": 2.2106, - "step": 2128 - }, - { - "epoch": 0.35, - "learning_rate": 0.0005248813559322035, - "loss": 2.2742, - "step": 2129 - }, - { - "epoch": 0.35, - "learning_rate": 0.0005247457627118644, - "loss": 2.1978, - "step": 2130 - }, - { - "epoch": 0.36, - "learning_rate": 0.0005246101694915255, - "loss": 2.3223, - "step": 2131 - }, - { - "epoch": 0.36, - "learning_rate": 0.0005244745762711865, - "loss": 2.2252, - "step": 2132 - }, - { - "epoch": 0.36, - "learning_rate": 0.0005243389830508475, - "loss": 2.2205, - "step": 2133 - }, - { - "epoch": 0.36, - "learning_rate": 0.0005242033898305085, - "loss": 2.2369, - "step": 2134 - }, - { - "epoch": 0.36, - "learning_rate": 0.0005240677966101696, - "loss": 2.2013, - "step": 2135 - }, - { - "epoch": 0.36, - "learning_rate": 0.0005239322033898305, - "loss": 2.162, - "step": 2136 - }, - { - "epoch": 0.36, - "learning_rate": 0.0005237966101694915, - "loss": 2.1918, - "step": 2137 - }, - { - "epoch": 0.36, - "learning_rate": 0.0005236610169491526, - "loss": 2.2387, - "step": 2138 - }, - { - "epoch": 0.36, - "learning_rate": 0.0005235254237288136, - "loss": 2.1656, - "step": 2139 - }, - { - "epoch": 0.36, - "learning_rate": 0.0005233898305084746, - "loss": 2.2499, - "step": 2140 - }, - { - "epoch": 0.36, - "learning_rate": 0.0005232542372881356, - "loss": 2.1819, - "step": 2141 - }, - { - "epoch": 0.36, - "learning_rate": 0.0005231186440677966, - "loss": 2.1959, - "step": 2142 - }, - { - "epoch": 0.36, - "learning_rate": 0.0005229830508474576, - "loss": 2.2345, - "step": 2143 - }, - { - "epoch": 0.36, - "learning_rate": 0.0005228474576271187, - "loss": 2.171, - "step": 2144 - }, - { - "epoch": 0.36, - "learning_rate": 0.0005227118644067797, - "loss": 2.2297, - "step": 2145 - }, - { - "epoch": 0.36, - "learning_rate": 0.0005225762711864407, - "loss": 2.194, - "step": 2146 - }, - { - "epoch": 0.36, - "learning_rate": 0.0005224406779661017, - "loss": 2.1664, - "step": 2147 - }, - { - "epoch": 0.36, - "learning_rate": 0.0005223050847457627, - "loss": 2.2464, - "step": 2148 - }, - { - "epoch": 0.36, - "learning_rate": 0.0005221694915254237, - "loss": 2.2283, - "step": 2149 - }, - { - "epoch": 0.36, - "learning_rate": 0.0005220338983050848, - "loss": 2.2012, - "step": 2150 - }, - { - "epoch": 0.36, - "learning_rate": 0.0005218983050847458, - "loss": 2.235, - "step": 2151 - }, - { - "epoch": 0.36, - "learning_rate": 0.0005217627118644069, - "loss": 2.1997, - "step": 2152 - }, - { - "epoch": 0.36, - "learning_rate": 0.0005216271186440679, - "loss": 2.2389, - "step": 2153 - }, - { - "epoch": 0.36, - "learning_rate": 0.0005214915254237288, - "loss": 2.2689, - "step": 2154 - }, - { - "epoch": 0.36, - "learning_rate": 0.0005213559322033898, - "loss": 2.1646, - "step": 2155 - }, - { - "epoch": 0.36, - "learning_rate": 0.0005212203389830509, - "loss": 2.2834, - "step": 2156 - }, - { - "epoch": 0.36, - "learning_rate": 0.0005210847457627119, - "loss": 2.216, - "step": 2157 - }, - { - "epoch": 0.36, - "learning_rate": 0.0005209491525423729, - "loss": 2.1607, - "step": 2158 - }, - { - "epoch": 0.36, - "learning_rate": 0.000520813559322034, - "loss": 2.2098, - "step": 2159 - }, - { - "epoch": 0.36, - "learning_rate": 0.0005206779661016949, - "loss": 2.2062, - "step": 2160 - }, - { - "epoch": 0.36, - "learning_rate": 0.0005205423728813559, - "loss": 2.1718, - "step": 2161 - }, - { - "epoch": 0.36, - "learning_rate": 0.0005204067796610169, - "loss": 2.2358, - "step": 2162 - }, - { - "epoch": 0.36, - "learning_rate": 0.000520271186440678, - "loss": 2.1673, - "step": 2163 - }, - { - "epoch": 0.36, - "learning_rate": 0.000520135593220339, - "loss": 2.2097, - "step": 2164 - }, - { - "epoch": 0.36, - "learning_rate": 0.0005200000000000001, - "loss": 2.1774, - "step": 2165 - }, - { - "epoch": 0.36, - "learning_rate": 0.000519864406779661, - "loss": 2.2993, - "step": 2166 - }, - { - "epoch": 0.36, - "learning_rate": 0.0005197288135593221, - "loss": 2.2407, - "step": 2167 - }, - { - "epoch": 0.36, - "learning_rate": 0.000519593220338983, - "loss": 2.2617, - "step": 2168 - }, - { - "epoch": 0.36, - "learning_rate": 0.0005194576271186441, - "loss": 2.1817, - "step": 2169 - }, - { - "epoch": 0.36, - "learning_rate": 0.0005193220338983051, - "loss": 2.1834, - "step": 2170 - }, - { - "epoch": 0.36, - "learning_rate": 0.0005191864406779662, - "loss": 2.2564, - "step": 2171 - }, - { - "epoch": 0.36, - "learning_rate": 0.0005190508474576271, - "loss": 2.2458, - "step": 2172 - }, - { - "epoch": 0.36, - "learning_rate": 0.0005189152542372882, - "loss": 2.1405, - "step": 2173 - }, - { - "epoch": 0.36, - "learning_rate": 0.0005187796610169492, - "loss": 2.2898, - "step": 2174 - }, - { - "epoch": 0.36, - "learning_rate": 0.0005186440677966102, - "loss": 2.1571, - "step": 2175 - }, - { - "epoch": 0.36, - "learning_rate": 0.0005185084745762712, - "loss": 2.2423, - "step": 2176 - }, - { - "epoch": 0.36, - "learning_rate": 0.0005183728813559323, - "loss": 2.2498, - "step": 2177 - }, - { - "epoch": 0.36, - "learning_rate": 0.0005182372881355932, - "loss": 2.2676, - "step": 2178 - }, - { - "epoch": 0.36, - "learning_rate": 0.0005181016949152542, - "loss": 2.2495, - "step": 2179 - }, - { - "epoch": 0.36, - "learning_rate": 0.0005179661016949153, - "loss": 2.2345, - "step": 2180 - }, - { - "epoch": 0.36, - "learning_rate": 0.0005178305084745763, - "loss": 2.1844, - "step": 2181 - }, - { - "epoch": 0.36, - "learning_rate": 0.0005176949152542373, - "loss": 2.2177, - "step": 2182 - }, - { - "epoch": 0.36, - "learning_rate": 0.0005175593220338983, - "loss": 2.2959, - "step": 2183 - }, - { - "epoch": 0.36, - "learning_rate": 0.0005174237288135593, - "loss": 2.226, - "step": 2184 - }, - { - "epoch": 0.36, - "learning_rate": 0.0005172881355932203, - "loss": 2.2416, - "step": 2185 - }, - { - "epoch": 0.36, - "learning_rate": 0.0005171525423728814, - "loss": 2.2242, - "step": 2186 - }, - { - "epoch": 0.36, - "learning_rate": 0.0005170169491525424, - "loss": 2.2822, - "step": 2187 - }, - { - "epoch": 0.36, - "learning_rate": 0.0005168813559322035, - "loss": 2.289, - "step": 2188 - }, - { - "epoch": 0.36, - "learning_rate": 0.0005167457627118645, - "loss": 2.2439, - "step": 2189 - }, - { - "epoch": 0.36, - "learning_rate": 0.0005166101694915254, - "loss": 2.2245, - "step": 2190 - }, - { - "epoch": 0.37, - "learning_rate": 0.0005164745762711864, - "loss": 2.2321, - "step": 2191 - }, - { - "epoch": 0.37, - "learning_rate": 0.0005163389830508475, - "loss": 2.2557, - "step": 2192 - }, - { - "epoch": 0.37, - "learning_rate": 0.0005162033898305085, - "loss": 2.2126, - "step": 2193 - }, - { - "epoch": 0.37, - "learning_rate": 0.0005160677966101696, - "loss": 2.2241, - "step": 2194 - }, - { - "epoch": 0.37, - "learning_rate": 0.0005159322033898306, - "loss": 2.1753, - "step": 2195 - }, - { - "epoch": 0.37, - "learning_rate": 0.0005157966101694915, - "loss": 2.1543, - "step": 2196 - }, - { - "epoch": 0.37, - "learning_rate": 0.0005156610169491525, - "loss": 2.1702, - "step": 2197 - }, - { - "epoch": 0.37, - "learning_rate": 0.0005155254237288136, - "loss": 2.1949, - "step": 2198 - }, - { - "epoch": 0.37, - "learning_rate": 0.0005153898305084746, - "loss": 2.1922, - "step": 2199 - }, - { - "epoch": 0.37, - "learning_rate": 0.0005152542372881356, - "loss": 2.2306, - "step": 2200 - }, - { - "epoch": 0.37, - "learning_rate": 0.0005151186440677967, - "loss": 2.2592, - "step": 2201 - }, - { - "epoch": 0.37, - "learning_rate": 0.0005149830508474576, - "loss": 2.1785, - "step": 2202 - }, - { - "epoch": 0.37, - "learning_rate": 0.0005148474576271186, - "loss": 2.2125, - "step": 2203 - }, - { - "epoch": 0.37, - "learning_rate": 0.0005147118644067796, - "loss": 2.3134, - "step": 2204 - }, - { - "epoch": 0.37, - "learning_rate": 0.0005145762711864407, - "loss": 2.1845, - "step": 2205 - }, - { - "epoch": 0.37, - "learning_rate": 0.0005144406779661017, - "loss": 2.2326, - "step": 2206 - }, - { - "epoch": 0.37, - "learning_rate": 0.0005143050847457628, - "loss": 2.1908, - "step": 2207 - }, - { - "epoch": 0.37, - "learning_rate": 0.0005141694915254237, - "loss": 2.1466, - "step": 2208 - }, - { - "epoch": 0.37, - "learning_rate": 0.0005140338983050848, - "loss": 2.241, - "step": 2209 - }, - { - "epoch": 0.37, - "learning_rate": 0.0005138983050847458, - "loss": 2.1842, - "step": 2210 - }, - { - "epoch": 0.37, - "learning_rate": 0.0005137627118644068, - "loss": 2.2242, - "step": 2211 - }, - { - "epoch": 0.37, - "learning_rate": 0.0005136271186440678, - "loss": 2.2216, - "step": 2212 - }, - { - "epoch": 0.37, - "learning_rate": 0.0005134915254237289, - "loss": 2.1317, - "step": 2213 - }, - { - "epoch": 0.37, - "learning_rate": 0.0005133559322033898, - "loss": 2.2244, - "step": 2214 - }, - { - "epoch": 0.37, - "learning_rate": 0.0005132203389830509, - "loss": 2.2579, - "step": 2215 - }, - { - "epoch": 0.37, - "learning_rate": 0.0005130847457627119, - "loss": 2.2232, - "step": 2216 - }, - { - "epoch": 0.37, - "learning_rate": 0.0005129491525423729, - "loss": 2.2094, - "step": 2217 - }, - { - "epoch": 0.37, - "learning_rate": 0.0005128135593220339, - "loss": 2.1305, - "step": 2218 - }, - { - "epoch": 0.37, - "learning_rate": 0.000512677966101695, - "loss": 2.1747, - "step": 2219 - }, - { - "epoch": 0.37, - "learning_rate": 0.000512542372881356, - "loss": 2.2544, - "step": 2220 - }, - { - "epoch": 0.37, - "learning_rate": 0.0005124067796610169, - "loss": 2.2425, - "step": 2221 - }, - { - "epoch": 0.37, - "learning_rate": 0.000512271186440678, - "loss": 2.209, - "step": 2222 - }, - { - "epoch": 0.37, - "learning_rate": 0.000512135593220339, - "loss": 2.1374, - "step": 2223 - }, - { - "epoch": 0.37, - "learning_rate": 0.0005120000000000001, - "loss": 2.2563, - "step": 2224 - }, - { - "epoch": 0.37, - "learning_rate": 0.000511864406779661, - "loss": 2.2221, - "step": 2225 - }, - { - "epoch": 0.37, - "learning_rate": 0.000511728813559322, - "loss": 2.139, - "step": 2226 - }, - { - "epoch": 0.37, - "learning_rate": 0.000511593220338983, - "loss": 2.3112, - "step": 2227 - }, - { - "epoch": 0.37, - "learning_rate": 0.0005114576271186441, - "loss": 2.2708, - "step": 2228 - }, - { - "epoch": 0.37, - "learning_rate": 0.0005113220338983051, - "loss": 2.2005, - "step": 2229 - }, - { - "epoch": 0.37, - "learning_rate": 0.0005111864406779662, - "loss": 2.2481, - "step": 2230 - }, - { - "epoch": 0.37, - "learning_rate": 0.0005110508474576272, - "loss": 2.1684, - "step": 2231 - }, - { - "epoch": 0.37, - "learning_rate": 0.0005109152542372881, - "loss": 2.221, - "step": 2232 - }, - { - "epoch": 0.37, - "learning_rate": 0.0005107796610169491, - "loss": 2.1821, - "step": 2233 - }, - { - "epoch": 0.37, - "learning_rate": 0.0005106440677966102, - "loss": 2.1595, - "step": 2234 - }, - { - "epoch": 0.37, - "learning_rate": 0.0005105084745762712, - "loss": 2.243, - "step": 2235 - }, - { - "epoch": 0.37, - "learning_rate": 0.0005103728813559323, - "loss": 2.2372, - "step": 2236 - }, - { - "epoch": 0.37, - "learning_rate": 0.0005102372881355933, - "loss": 2.1871, - "step": 2237 - }, - { - "epoch": 0.37, - "learning_rate": 0.0005101016949152544, - "loss": 2.2035, - "step": 2238 - }, - { - "epoch": 0.37, - "learning_rate": 0.0005099661016949152, - "loss": 2.1696, - "step": 2239 - }, - { - "epoch": 0.37, - "learning_rate": 0.0005098305084745762, - "loss": 2.2175, - "step": 2240 - }, - { - "epoch": 0.37, - "learning_rate": 0.0005096949152542373, - "loss": 2.2831, - "step": 2241 - }, - { - "epoch": 0.37, - "learning_rate": 0.0005095593220338983, - "loss": 2.2307, - "step": 2242 - }, - { - "epoch": 0.37, - "learning_rate": 0.0005094237288135594, - "loss": 2.1867, - "step": 2243 - }, - { - "epoch": 0.37, - "learning_rate": 0.0005092881355932203, - "loss": 2.1615, - "step": 2244 - }, - { - "epoch": 0.37, - "learning_rate": 0.0005091525423728814, - "loss": 2.1252, - "step": 2245 - }, - { - "epoch": 0.37, - "learning_rate": 0.0005090169491525424, - "loss": 2.3275, - "step": 2246 - }, - { - "epoch": 0.37, - "learning_rate": 0.0005088813559322034, - "loss": 2.1972, - "step": 2247 - }, - { - "epoch": 0.37, - "learning_rate": 0.0005087457627118644, - "loss": 2.2314, - "step": 2248 - }, - { - "epoch": 0.37, - "learning_rate": 0.0005086101694915255, - "loss": 2.2176, - "step": 2249 - }, - { - "epoch": 0.38, - "learning_rate": 0.0005084745762711864, - "loss": 2.1716, - "step": 2250 - }, - { - "epoch": 0.38, - "learning_rate": 0.0005083389830508475, - "loss": 2.277, - "step": 2251 - }, - { - "epoch": 0.38, - "learning_rate": 0.0005082033898305085, - "loss": 2.1561, - "step": 2252 - }, - { - "epoch": 0.38, - "learning_rate": 0.0005080677966101695, - "loss": 2.1564, - "step": 2253 - }, - { - "epoch": 0.38, - "learning_rate": 0.0005079322033898305, - "loss": 2.2456, - "step": 2254 - }, - { - "epoch": 0.38, - "learning_rate": 0.0005077966101694916, - "loss": 2.1773, - "step": 2255 - }, - { - "epoch": 0.38, - "learning_rate": 0.0005076610169491525, - "loss": 2.1676, - "step": 2256 - }, - { - "epoch": 0.38, - "learning_rate": 0.0005075254237288136, - "loss": 2.1209, - "step": 2257 - }, - { - "epoch": 0.38, - "learning_rate": 0.0005073898305084746, - "loss": 2.1412, - "step": 2258 - }, - { - "epoch": 0.38, - "learning_rate": 0.0005072542372881357, - "loss": 2.173, - "step": 2259 - }, - { - "epoch": 0.38, - "learning_rate": 0.0005071186440677967, - "loss": 2.1873, - "step": 2260 - }, - { - "epoch": 0.38, - "learning_rate": 0.0005069830508474576, - "loss": 2.1775, - "step": 2261 - }, - { - "epoch": 0.38, - "learning_rate": 0.0005068474576271186, - "loss": 2.1584, - "step": 2262 - }, - { - "epoch": 0.38, - "learning_rate": 0.0005067118644067796, - "loss": 2.1945, - "step": 2263 - }, - { - "epoch": 0.38, - "learning_rate": 0.0005065762711864407, - "loss": 2.1843, - "step": 2264 - }, - { - "epoch": 0.38, - "learning_rate": 0.0005064406779661017, - "loss": 2.2238, - "step": 2265 - }, - { - "epoch": 0.38, - "learning_rate": 0.0005063050847457628, - "loss": 2.1983, - "step": 2266 - }, - { - "epoch": 0.38, - "learning_rate": 0.0005061694915254238, - "loss": 2.2141, - "step": 2267 - }, - { - "epoch": 0.38, - "learning_rate": 0.0005060338983050847, - "loss": 2.1851, - "step": 2268 - }, - { - "epoch": 0.38, - "learning_rate": 0.0005058983050847457, - "loss": 2.1856, - "step": 2269 - }, - { - "epoch": 0.38, - "learning_rate": 0.0005057627118644068, - "loss": 2.2225, - "step": 2270 - }, - { - "epoch": 0.38, - "learning_rate": 0.0005056271186440678, - "loss": 2.196, - "step": 2271 - }, - { - "epoch": 0.38, - "learning_rate": 0.0005054915254237289, - "loss": 2.2258, - "step": 2272 - }, - { - "epoch": 0.38, - "learning_rate": 0.0005053559322033899, - "loss": 2.2658, - "step": 2273 - }, - { - "epoch": 0.38, - "learning_rate": 0.0005052203389830508, - "loss": 2.277, - "step": 2274 - }, - { - "epoch": 0.38, - "learning_rate": 0.0005050847457627118, - "loss": 2.1206, - "step": 2275 - }, - { - "epoch": 0.38, - "learning_rate": 0.0005049491525423729, - "loss": 2.2684, - "step": 2276 - }, - { - "epoch": 0.38, - "learning_rate": 0.0005048135593220339, - "loss": 2.2052, - "step": 2277 - }, - { - "epoch": 0.38, - "learning_rate": 0.000504677966101695, - "loss": 2.2753, - "step": 2278 - }, - { - "epoch": 0.38, - "learning_rate": 0.000504542372881356, - "loss": 2.2171, - "step": 2279 - }, - { - "epoch": 0.38, - "learning_rate": 0.0005044067796610171, - "loss": 2.161, - "step": 2280 - }, - { - "epoch": 0.38, - "learning_rate": 0.000504271186440678, - "loss": 2.1419, - "step": 2281 - }, - { - "epoch": 0.38, - "learning_rate": 0.0005041355932203389, - "loss": 2.1022, - "step": 2282 - }, - { - "epoch": 0.38, - "learning_rate": 0.000504, - "loss": 2.1815, - "step": 2283 - }, - { - "epoch": 0.38, - "learning_rate": 0.000503864406779661, - "loss": 2.1885, - "step": 2284 - }, - { - "epoch": 0.38, - "learning_rate": 0.0005037288135593221, - "loss": 2.2415, - "step": 2285 - }, - { - "epoch": 0.38, - "learning_rate": 0.000503593220338983, - "loss": 2.2571, - "step": 2286 - }, - { - "epoch": 0.38, - "learning_rate": 0.0005034576271186441, - "loss": 2.1438, - "step": 2287 - }, - { - "epoch": 0.38, - "learning_rate": 0.0005033220338983051, - "loss": 2.2036, - "step": 2288 - }, - { - "epoch": 0.38, - "learning_rate": 0.0005031864406779661, - "loss": 2.2178, - "step": 2289 - }, - { - "epoch": 0.38, - "learning_rate": 0.0005030508474576271, - "loss": 2.2438, - "step": 2290 - }, - { - "epoch": 0.38, - "learning_rate": 0.0005029152542372882, - "loss": 2.235, - "step": 2291 - }, - { - "epoch": 0.38, - "learning_rate": 0.0005027796610169491, - "loss": 2.2191, - "step": 2292 - }, - { - "epoch": 0.38, - "learning_rate": 0.0005026440677966102, - "loss": 2.2379, - "step": 2293 - }, - { - "epoch": 0.38, - "learning_rate": 0.0005025084745762712, - "loss": 2.251, - "step": 2294 - }, - { - "epoch": 0.38, - "learning_rate": 0.0005023728813559323, - "loss": 2.244, - "step": 2295 - }, - { - "epoch": 0.38, - "learning_rate": 0.0005022372881355932, - "loss": 2.1309, - "step": 2296 - }, - { - "epoch": 0.38, - "learning_rate": 0.0005021016949152543, - "loss": 2.1837, - "step": 2297 - }, - { - "epoch": 0.38, - "learning_rate": 0.0005019661016949152, - "loss": 2.2598, - "step": 2298 - }, - { - "epoch": 0.38, - "learning_rate": 0.0005018305084745763, - "loss": 2.1708, - "step": 2299 - }, - { - "epoch": 0.38, - "learning_rate": 0.0005016949152542373, - "loss": 2.2399, - "step": 2300 - }, - { - "epoch": 0.38, - "learning_rate": 0.0005015593220338984, - "loss": 2.196, - "step": 2301 - }, - { - "epoch": 0.38, - "learning_rate": 0.0005014237288135594, - "loss": 2.1254, - "step": 2302 - }, - { - "epoch": 0.38, - "learning_rate": 0.0005012881355932204, - "loss": 2.1076, - "step": 2303 - }, - { - "epoch": 0.38, - "learning_rate": 0.0005011525423728814, - "loss": 2.1358, - "step": 2304 - }, - { - "epoch": 0.38, - "learning_rate": 0.0005010169491525423, - "loss": 2.2038, - "step": 2305 - }, - { - "epoch": 0.38, - "learning_rate": 0.0005008813559322034, - "loss": 2.182, - "step": 2306 - }, - { - "epoch": 0.38, - "learning_rate": 0.0005007457627118644, - "loss": 2.235, - "step": 2307 - }, - { - "epoch": 0.38, - "learning_rate": 0.0005006101694915255, - "loss": 2.2541, - "step": 2308 - }, - { - "epoch": 0.38, - "learning_rate": 0.0005004745762711865, - "loss": 2.2962, - "step": 2309 - }, - { - "epoch": 0.39, - "learning_rate": 0.0005003389830508475, - "loss": 2.2009, - "step": 2310 - }, - { - "epoch": 0.39, - "learning_rate": 0.0005002033898305084, - "loss": 2.1533, - "step": 2311 - }, - { - "epoch": 0.39, - "learning_rate": 0.0005000677966101695, - "loss": 2.2516, - "step": 2312 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004999322033898305, - "loss": 2.1711, - "step": 2313 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004997966101694916, - "loss": 2.1623, - "step": 2314 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004996610169491526, - "loss": 2.2857, - "step": 2315 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004995254237288137, - "loss": 2.1518, - "step": 2316 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004993898305084746, - "loss": 2.1977, - "step": 2317 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004992542372881356, - "loss": 2.1834, - "step": 2318 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004991186440677966, - "loss": 2.2094, - "step": 2319 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004989830508474577, - "loss": 2.1771, - "step": 2320 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004988474576271187, - "loss": 2.1867, - "step": 2321 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004987118644067798, - "loss": 2.2691, - "step": 2322 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004985762711864407, - "loss": 2.1786, - "step": 2323 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004984406779661017, - "loss": 2.1774, - "step": 2324 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004983050847457627, - "loss": 2.2158, - "step": 2325 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004981694915254237, - "loss": 2.2291, - "step": 2326 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004980338983050848, - "loss": 2.296, - "step": 2327 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004978983050847458, - "loss": 2.1094, - "step": 2328 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004977627118644068, - "loss": 2.1932, - "step": 2329 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004976271186440678, - "loss": 2.2481, - "step": 2330 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004974915254237288, - "loss": 2.2036, - "step": 2331 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004973559322033898, - "loss": 2.2034, - "step": 2332 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004972203389830509, - "loss": 2.2023, - "step": 2333 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004970847457627119, - "loss": 2.2388, - "step": 2334 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004969491525423729, - "loss": 2.2445, - "step": 2335 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004968135593220339, - "loss": 2.2172, - "step": 2336 - }, - { - "epoch": 0.39, - "learning_rate": 0.000496677966101695, - "loss": 2.16, - "step": 2337 - }, - { - "epoch": 0.39, - "learning_rate": 0.000496542372881356, - "loss": 2.1865, - "step": 2338 - }, - { - "epoch": 0.39, - "learning_rate": 0.000496406779661017, - "loss": 2.2012, - "step": 2339 - }, - { - "epoch": 0.39, - "learning_rate": 0.000496271186440678, - "loss": 2.1537, - "step": 2340 - }, - { - "epoch": 0.39, - "learning_rate": 0.000496135593220339, - "loss": 2.1952, - "step": 2341 - }, - { - "epoch": 0.39, - "learning_rate": 0.000496, - "loss": 2.2181, - "step": 2342 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004958644067796611, - "loss": 2.2455, - "step": 2343 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004957288135593221, - "loss": 2.236, - "step": 2344 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004955932203389831, - "loss": 2.2591, - "step": 2345 - }, - { - "epoch": 0.39, - "learning_rate": 0.000495457627118644, - "loss": 2.169, - "step": 2346 - }, - { - "epoch": 0.39, - "learning_rate": 0.000495322033898305, - "loss": 2.1997, - "step": 2347 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004951864406779661, - "loss": 2.2095, - "step": 2348 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004950508474576271, - "loss": 2.281, - "step": 2349 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004949152542372882, - "loss": 2.238, - "step": 2350 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004947796610169492, - "loss": 2.2127, - "step": 2351 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004946440677966103, - "loss": 2.1367, - "step": 2352 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004945084745762711, - "loss": 2.2763, - "step": 2353 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004943728813559322, - "loss": 2.2816, - "step": 2354 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004942372881355932, - "loss": 2.1938, - "step": 2355 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004941016949152543, - "loss": 2.2263, - "step": 2356 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004939661016949153, - "loss": 2.138, - "step": 2357 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004938305084745764, - "loss": 2.2155, - "step": 2358 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004936949152542373, - "loss": 2.1569, - "step": 2359 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004935593220338983, - "loss": 2.2237, - "step": 2360 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004934237288135593, - "loss": 2.1799, - "step": 2361 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004932881355932204, - "loss": 2.217, - "step": 2362 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004931525423728814, - "loss": 2.1811, - "step": 2363 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004930169491525425, - "loss": 2.1791, - "step": 2364 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004928813559322034, - "loss": 2.1861, - "step": 2365 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004927457627118644, - "loss": 2.0613, - "step": 2366 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004926101694915254, - "loss": 2.2212, - "step": 2367 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004924745762711864, - "loss": 2.1791, - "step": 2368 - }, - { - "epoch": 0.39, - "learning_rate": 0.0004923389830508475, - "loss": 2.2189, - "step": 2369 - }, - { - "epoch": 0.4, - "learning_rate": 0.0004922033898305085, - "loss": 2.2945, - "step": 2370 - }, - { - "epoch": 0.4, - "learning_rate": 0.0004920677966101695, - "loss": 2.2479, - "step": 2371 - }, - { - "epoch": 0.4, - "learning_rate": 0.0004919322033898305, - "loss": 2.1627, - "step": 2372 - }, - { - "epoch": 0.4, - "learning_rate": 0.0004917966101694916, - "loss": 2.3009, - "step": 2373 - }, - { - "epoch": 0.4, - "learning_rate": 0.0004916610169491526, - "loss": 2.2497, - "step": 2374 - }, - { - "epoch": 0.4, - "learning_rate": 0.0004915254237288136, - "loss": 2.2158, - "step": 2375 - }, - { - "epoch": 0.4, - "learning_rate": 0.0004913898305084746, - "loss": 2.2436, - "step": 2376 - }, - { - "epoch": 0.4, - "learning_rate": 0.0004912542372881356, - "loss": 2.1664, - "step": 2377 - }, - { - "epoch": 0.4, - "learning_rate": 0.0004911186440677966, - "loss": 2.3082, - "step": 2378 - }, - { - "epoch": 0.4, - "learning_rate": 0.0004909830508474577, - "loss": 2.2136, - "step": 2379 - }, - { - "epoch": 0.4, - "learning_rate": 0.0004908474576271187, - "loss": 2.2039, - "step": 2380 - }, - { - "epoch": 0.4, - "learning_rate": 0.0004907118644067797, - "loss": 2.1691, - "step": 2381 - }, - { - "epoch": 0.4, - "learning_rate": 0.0004905762711864407, - "loss": 2.2393, - "step": 2382 - }, - { - "epoch": 0.4, - "learning_rate": 0.0004904406779661017, - "loss": 2.2057, - "step": 2383 - }, - { - "epoch": 0.4, - "learning_rate": 0.0004903050847457627, - "loss": 2.228, - "step": 2384 - }, - { - "epoch": 0.4, - "learning_rate": 0.0004901694915254238, - "loss": 2.2407, - "step": 2385 - }, - { - "epoch": 0.4, - "learning_rate": 0.0004900338983050848, - "loss": 2.2758, - "step": 2386 - }, - { - "epoch": 0.4, - "learning_rate": 0.0004898983050847458, - "loss": 2.2038, - "step": 2387 - }, - { - "epoch": 0.4, - "learning_rate": 0.0004897627118644069, - "loss": 2.1618, - "step": 2388 - }, - { - "epoch": 0.4, - "learning_rate": 0.0004896271186440677, - "loss": 2.1851, - "step": 2389 - }, - { - "epoch": 0.4, - "learning_rate": 0.0004894915254237288, - "loss": 2.2466, - "step": 2390 - }, - { - "epoch": 0.4, - "learning_rate": 0.0004893559322033898, - "loss": 2.2039, - "step": 2391 - }, - { - "epoch": 0.4, - "learning_rate": 0.0004892203389830509, - "loss": 2.2159, - "step": 2392 - }, - { - "epoch": 0.4, - "learning_rate": 0.0004890847457627119, - "loss": 2.1462, - "step": 2393 - }, - { - "epoch": 0.4, - "learning_rate": 0.000488949152542373, - "loss": 2.2526, - "step": 2394 - }, - { - "epoch": 0.4, - "learning_rate": 0.0004888135593220339, - "loss": 2.2435, - "step": 2395 - }, - { - "epoch": 0.4, - "learning_rate": 0.0004886779661016949, - "loss": 2.1436, - "step": 2396 - }, - { - "epoch": 0.4, - "learning_rate": 0.0004885423728813559, - "loss": 2.2096, - "step": 2397 - }, - { - "epoch": 0.4, - "learning_rate": 0.000488406779661017, - "loss": 2.1266, - "step": 2398 - }, - { - "epoch": 0.4, - "learning_rate": 0.000488271186440678, - "loss": 2.2289, - "step": 2399 - }, - { - "epoch": 0.4, - "learning_rate": 0.00048813559322033906, - "loss": 2.2934, - "step": 2400 - }, - { - "epoch": 0.4, - "learning_rate": 0.000488, - "loss": 2.2464, - "step": 2401 - }, - { - "epoch": 0.4, - "learning_rate": 0.0004878644067796611, - "loss": 2.2331, - "step": 2402 - }, - { - "epoch": 0.4, - "learning_rate": 0.00048772881355932206, - "loss": 2.1999, - "step": 2403 - }, - { - "epoch": 0.4, - "learning_rate": 0.0004875932203389831, - "loss": 2.0997, - "step": 2404 - }, - { - "epoch": 0.4, - "learning_rate": 0.0004874576271186441, - "loss": 2.1379, - "step": 2405 - }, - { - "epoch": 0.4, - "learning_rate": 0.00048732203389830506, - "loss": 2.1692, - "step": 2406 - }, - { - "epoch": 0.4, - "learning_rate": 0.00048718644067796614, - "loss": 2.2341, - "step": 2407 - }, - { - "epoch": 0.4, - "learning_rate": 0.0004870508474576271, - "loss": 2.1799, - "step": 2408 - }, - { - "epoch": 0.4, - "learning_rate": 0.00048691525423728816, - "loss": 2.2183, - "step": 2409 - }, - { - "epoch": 0.4, - "learning_rate": 0.00048677966101694914, - "loss": 2.1339, - "step": 2410 - }, - { - "epoch": 0.4, - "learning_rate": 0.00048664406779661023, - "loss": 2.2833, - "step": 2411 - }, - { - "epoch": 0.4, - "learning_rate": 0.00048650847457627116, - "loss": 2.1718, - "step": 2412 - }, - { - "epoch": 0.4, - "learning_rate": 0.00048637288135593224, - "loss": 2.126, - "step": 2413 - }, - { - "epoch": 0.4, - "learning_rate": 0.0004862372881355932, - "loss": 2.2404, - "step": 2414 - }, - { - "epoch": 0.4, - "learning_rate": 0.00048610169491525426, - "loss": 2.1648, - "step": 2415 - }, - { - "epoch": 0.4, - "learning_rate": 0.00048596610169491524, - "loss": 2.242, - "step": 2416 - }, - { - "epoch": 0.4, - "learning_rate": 0.00048583050847457633, - "loss": 2.196, - "step": 2417 - }, - { - "epoch": 0.4, - "learning_rate": 0.0004856949152542373, - "loss": 2.2558, - "step": 2418 - }, - { - "epoch": 0.4, - "learning_rate": 0.00048555932203389834, - "loss": 2.1538, - "step": 2419 - }, - { - "epoch": 0.4, - "learning_rate": 0.0004854237288135593, - "loss": 2.1532, - "step": 2420 - }, - { - "epoch": 0.4, - "learning_rate": 0.0004852881355932204, - "loss": 2.1805, - "step": 2421 - }, - { - "epoch": 0.4, - "learning_rate": 0.0004851525423728814, - "loss": 2.2063, - "step": 2422 - }, - { - "epoch": 0.4, - "learning_rate": 0.00048501694915254243, - "loss": 2.2193, - "step": 2423 - }, - { - "epoch": 0.4, - "learning_rate": 0.0004848813559322034, - "loss": 2.1982, - "step": 2424 - }, - { - "epoch": 0.4, - "learning_rate": 0.00048474576271186445, - "loss": 2.2539, - "step": 2425 - }, - { - "epoch": 0.4, - "learning_rate": 0.0004846101694915254, - "loss": 2.244, - "step": 2426 - }, - { - "epoch": 0.4, - "learning_rate": 0.0004844745762711864, - "loss": 2.2449, - "step": 2427 - }, - { - "epoch": 0.4, - "learning_rate": 0.0004843389830508475, - "loss": 2.2277, - "step": 2428 - }, - { - "epoch": 0.4, - "learning_rate": 0.0004842033898305085, - "loss": 2.2147, - "step": 2429 - }, - { - "epoch": 0.41, - "learning_rate": 0.0004840677966101695, - "loss": 2.1484, - "step": 2430 - }, - { - "epoch": 0.41, - "learning_rate": 0.0004839322033898305, - "loss": 2.1931, - "step": 2431 - }, - { - "epoch": 0.41, - "learning_rate": 0.0004837966101694916, - "loss": 2.1441, - "step": 2432 - }, - { - "epoch": 0.41, - "learning_rate": 0.00048366101694915256, - "loss": 2.1878, - "step": 2433 - }, - { - "epoch": 0.41, - "learning_rate": 0.0004835254237288136, - "loss": 2.2027, - "step": 2434 - }, - { - "epoch": 0.41, - "learning_rate": 0.0004833898305084746, - "loss": 2.1994, - "step": 2435 - }, - { - "epoch": 0.41, - "learning_rate": 0.0004832542372881356, - "loss": 2.122, - "step": 2436 - }, - { - "epoch": 0.41, - "learning_rate": 0.0004831186440677966, - "loss": 2.2057, - "step": 2437 - }, - { - "epoch": 0.41, - "learning_rate": 0.0004829830508474577, - "loss": 2.2433, - "step": 2438 - }, - { - "epoch": 0.41, - "learning_rate": 0.00048284745762711866, - "loss": 2.1907, - "step": 2439 - }, - { - "epoch": 0.41, - "learning_rate": 0.0004827118644067797, - "loss": 2.1868, - "step": 2440 - }, - { - "epoch": 0.41, - "learning_rate": 0.0004825762711864407, - "loss": 2.2322, - "step": 2441 - }, - { - "epoch": 0.41, - "learning_rate": 0.00048244067796610177, - "loss": 2.22, - "step": 2442 - }, - { - "epoch": 0.41, - "learning_rate": 0.00048230508474576275, - "loss": 2.1447, - "step": 2443 - }, - { - "epoch": 0.41, - "learning_rate": 0.0004821694915254238, - "loss": 2.2558, - "step": 2444 - }, - { - "epoch": 0.41, - "learning_rate": 0.00048203389830508476, - "loss": 2.1189, - "step": 2445 - }, - { - "epoch": 0.41, - "learning_rate": 0.00048189830508474585, - "loss": 2.1803, - "step": 2446 - }, - { - "epoch": 0.41, - "learning_rate": 0.0004817627118644068, - "loss": 2.229, - "step": 2447 - }, - { - "epoch": 0.41, - "learning_rate": 0.00048162711864406776, - "loss": 2.2312, - "step": 2448 - }, - { - "epoch": 0.41, - "learning_rate": 0.00048149152542372885, - "loss": 2.2365, - "step": 2449 - }, - { - "epoch": 0.41, - "learning_rate": 0.0004813559322033898, - "loss": 2.1939, - "step": 2450 - }, - { - "epoch": 0.41, - "learning_rate": 0.00048122033898305086, - "loss": 2.1652, - "step": 2451 - }, - { - "epoch": 0.41, - "learning_rate": 0.00048108474576271184, - "loss": 2.2152, - "step": 2452 - }, - { - "epoch": 0.41, - "learning_rate": 0.00048094915254237293, - "loss": 2.1912, - "step": 2453 - }, - { - "epoch": 0.41, - "learning_rate": 0.0004808135593220339, - "loss": 2.2335, - "step": 2454 - }, - { - "epoch": 0.41, - "learning_rate": 0.00048067796610169495, - "loss": 2.2079, - "step": 2455 - }, - { - "epoch": 0.41, - "learning_rate": 0.00048054237288135593, - "loss": 2.2502, - "step": 2456 - }, - { - "epoch": 0.41, - "learning_rate": 0.000480406779661017, - "loss": 2.2381, - "step": 2457 - }, - { - "epoch": 0.41, - "learning_rate": 0.00048027118644067794, - "loss": 2.1353, - "step": 2458 - }, - { - "epoch": 0.41, - "learning_rate": 0.00048013559322033903, - "loss": 2.1691, - "step": 2459 - }, - { - "epoch": 0.41, - "learning_rate": 0.00048, - "loss": 2.199, - "step": 2460 - }, - { - "epoch": 0.41, - "learning_rate": 0.00047986440677966105, - "loss": 2.2283, - "step": 2461 - }, - { - "epoch": 0.41, - "learning_rate": 0.00047972881355932203, - "loss": 2.2119, - "step": 2462 - }, - { - "epoch": 0.41, - "learning_rate": 0.0004795932203389831, - "loss": 2.2015, - "step": 2463 - }, - { - "epoch": 0.41, - "learning_rate": 0.0004794576271186441, - "loss": 2.2787, - "step": 2464 - }, - { - "epoch": 0.41, - "learning_rate": 0.00047932203389830513, - "loss": 2.1952, - "step": 2465 - }, - { - "epoch": 0.41, - "learning_rate": 0.0004791864406779661, - "loss": 2.2289, - "step": 2466 - }, - { - "epoch": 0.41, - "learning_rate": 0.0004790508474576272, - "loss": 2.1717, - "step": 2467 - }, - { - "epoch": 0.41, - "learning_rate": 0.0004789152542372882, - "loss": 2.2153, - "step": 2468 - }, - { - "epoch": 0.41, - "learning_rate": 0.0004787796610169491, - "loss": 2.2087, - "step": 2469 - }, - { - "epoch": 0.41, - "learning_rate": 0.0004786440677966102, - "loss": 2.2092, - "step": 2470 - }, - { - "epoch": 0.41, - "learning_rate": 0.0004785084745762712, - "loss": 2.2035, - "step": 2471 - }, - { - "epoch": 0.41, - "learning_rate": 0.0004783728813559322, - "loss": 2.2069, - "step": 2472 - }, - { - "epoch": 0.41, - "learning_rate": 0.0004782372881355932, - "loss": 2.1963, - "step": 2473 - }, - { - "epoch": 0.41, - "learning_rate": 0.0004781016949152543, - "loss": 2.2076, - "step": 2474 - }, - { - "epoch": 0.41, - "learning_rate": 0.00047796610169491526, - "loss": 2.1829, - "step": 2475 - }, - { - "epoch": 0.41, - "learning_rate": 0.0004778305084745763, - "loss": 2.1764, - "step": 2476 - }, - { - "epoch": 0.41, - "learning_rate": 0.0004776949152542373, - "loss": 2.2048, - "step": 2477 - }, - { - "epoch": 0.41, - "learning_rate": 0.00047755932203389837, - "loss": 2.2038, - "step": 2478 - }, - { - "epoch": 0.41, - "learning_rate": 0.00047742372881355935, - "loss": 2.2018, - "step": 2479 - }, - { - "epoch": 0.41, - "learning_rate": 0.0004772881355932204, - "loss": 2.225, - "step": 2480 - }, - { - "epoch": 0.41, - "learning_rate": 0.00047715254237288136, - "loss": 2.1708, - "step": 2481 - }, - { - "epoch": 0.41, - "learning_rate": 0.0004770169491525424, - "loss": 2.204, - "step": 2482 - }, - { - "epoch": 0.41, - "learning_rate": 0.0004768813559322034, - "loss": 2.2076, - "step": 2483 - }, - { - "epoch": 0.41, - "learning_rate": 0.00047674576271186447, - "loss": 2.2065, - "step": 2484 - }, - { - "epoch": 0.41, - "learning_rate": 0.00047661016949152545, - "loss": 2.2204, - "step": 2485 - }, - { - "epoch": 0.41, - "learning_rate": 0.0004764745762711865, - "loss": 2.2346, - "step": 2486 - }, - { - "epoch": 0.41, - "learning_rate": 0.00047633898305084746, - "loss": 2.2114, - "step": 2487 - }, - { - "epoch": 0.41, - "learning_rate": 0.00047620338983050855, - "loss": 2.2893, - "step": 2488 - }, - { - "epoch": 0.41, - "learning_rate": 0.00047606779661016953, - "loss": 2.1737, - "step": 2489 - }, - { - "epoch": 0.41, - "learning_rate": 0.0004759322033898305, - "loss": 2.2411, - "step": 2490 - }, - { - "epoch": 0.42, - "learning_rate": 0.00047579661016949155, - "loss": 2.2292, - "step": 2491 - }, - { - "epoch": 0.42, - "learning_rate": 0.00047566101694915253, - "loss": 2.1915, - "step": 2492 - }, - { - "epoch": 0.42, - "learning_rate": 0.00047552542372881356, - "loss": 2.2041, - "step": 2493 - }, - { - "epoch": 0.42, - "learning_rate": 0.00047538983050847454, - "loss": 2.2141, - "step": 2494 - }, - { - "epoch": 0.42, - "learning_rate": 0.00047525423728813563, - "loss": 2.2215, - "step": 2495 - }, - { - "epoch": 0.42, - "learning_rate": 0.0004751186440677966, - "loss": 2.201, - "step": 2496 - }, - { - "epoch": 0.42, - "learning_rate": 0.00047498305084745765, - "loss": 2.2049, - "step": 2497 - }, - { - "epoch": 0.42, - "learning_rate": 0.00047484745762711863, - "loss": 2.1216, - "step": 2498 - }, - { - "epoch": 0.42, - "learning_rate": 0.0004747118644067797, - "loss": 2.1727, - "step": 2499 - }, - { - "epoch": 0.42, - "learning_rate": 0.0004745762711864407, - "loss": 2.2388, - "step": 2500 - }, - { - "epoch": 0.42, - "eval_gen_len": 19.0, - "eval_loss": 2.074338674545288, - "eval_rouge1": 0.2622, - "eval_rouge2": 0.0951, - "eval_rougeL": 0.2241, - "eval_rougeLsum": 0.2236, - "eval_runtime": 42.0841, - "eval_samples_per_second": 1.188, - "eval_steps_per_second": 0.166, - "step": 2500 - }, - { - "epoch": 0.42, - "learning_rate": 0.00047444067796610173, - "loss": 2.207, - "step": 2501 - }, - { - "epoch": 0.42, - "learning_rate": 0.0004743050847457627, - "loss": 2.1369, - "step": 2502 - }, - { - "epoch": 0.42, - "learning_rate": 0.0004741694915254238, - "loss": 2.1234, - "step": 2503 - }, - { - "epoch": 0.42, - "learning_rate": 0.00047403389830508473, - "loss": 2.1835, - "step": 2504 - }, - { - "epoch": 0.42, - "learning_rate": 0.0004738983050847458, - "loss": 2.1962, - "step": 2505 - }, - { - "epoch": 0.42, - "learning_rate": 0.0004737627118644068, - "loss": 2.1814, - "step": 2506 - }, - { - "epoch": 0.42, - "learning_rate": 0.00047362711864406783, - "loss": 2.1553, - "step": 2507 - }, - { - "epoch": 0.42, - "learning_rate": 0.0004734915254237288, - "loss": 2.1694, - "step": 2508 - }, - { - "epoch": 0.42, - "learning_rate": 0.0004733559322033899, - "loss": 2.2564, - "step": 2509 - }, - { - "epoch": 0.42, - "learning_rate": 0.0004732203389830509, - "loss": 2.1611, - "step": 2510 - }, - { - "epoch": 0.42, - "learning_rate": 0.00047308474576271186, - "loss": 2.3156, - "step": 2511 - }, - { - "epoch": 0.42, - "learning_rate": 0.0004729491525423729, - "loss": 2.1023, - "step": 2512 - }, - { - "epoch": 0.42, - "learning_rate": 0.0004728135593220339, - "loss": 2.2861, - "step": 2513 - }, - { - "epoch": 0.42, - "learning_rate": 0.00047267796610169497, - "loss": 2.1733, - "step": 2514 - }, - { - "epoch": 0.42, - "learning_rate": 0.0004725423728813559, - "loss": 2.1, - "step": 2515 - }, - { - "epoch": 0.42, - "learning_rate": 0.000472406779661017, - "loss": 2.1805, - "step": 2516 - }, - { - "epoch": 0.42, - "learning_rate": 0.00047227118644067796, - "loss": 2.1485, - "step": 2517 - }, - { - "epoch": 0.42, - "learning_rate": 0.000472135593220339, - "loss": 2.2094, - "step": 2518 - }, - { - "epoch": 0.42, - "learning_rate": 0.000472, - "loss": 2.1898, - "step": 2519 - }, - { - "epoch": 0.42, - "learning_rate": 0.00047186440677966107, - "loss": 2.2167, - "step": 2520 - }, - { - "epoch": 0.42, - "learning_rate": 0.00047172881355932205, - "loss": 2.1117, - "step": 2521 - }, - { - "epoch": 0.42, - "learning_rate": 0.0004715932203389831, - "loss": 2.1604, - "step": 2522 - }, - { - "epoch": 0.42, - "learning_rate": 0.00047145762711864407, - "loss": 2.1861, - "step": 2523 - }, - { - "epoch": 0.42, - "learning_rate": 0.00047132203389830515, - "loss": 2.1883, - "step": 2524 - }, - { - "epoch": 0.42, - "learning_rate": 0.00047118644067796613, - "loss": 2.1772, - "step": 2525 - }, - { - "epoch": 0.42, - "learning_rate": 0.00047105084745762717, - "loss": 2.1937, - "step": 2526 - }, - { - "epoch": 0.42, - "learning_rate": 0.00047091525423728815, - "loss": 2.2126, - "step": 2527 - }, - { - "epoch": 0.42, - "learning_rate": 0.00047077966101694924, - "loss": 2.2554, - "step": 2528 - }, - { - "epoch": 0.42, - "learning_rate": 0.00047064406779661017, - "loss": 2.1951, - "step": 2529 - }, - { - "epoch": 0.42, - "learning_rate": 0.00047050847457627125, - "loss": 2.1638, - "step": 2530 - }, - { - "epoch": 0.42, - "learning_rate": 0.00047037288135593224, - "loss": 2.1608, - "step": 2531 - }, - { - "epoch": 0.42, - "learning_rate": 0.0004702372881355932, - "loss": 2.092, - "step": 2532 - }, - { - "epoch": 0.42, - "learning_rate": 0.00047010169491525425, - "loss": 2.1712, - "step": 2533 - }, - { - "epoch": 0.42, - "learning_rate": 0.00046996610169491523, - "loss": 2.2195, - "step": 2534 - }, - { - "epoch": 0.42, - "learning_rate": 0.0004698305084745763, - "loss": 2.1822, - "step": 2535 - }, - { - "epoch": 0.42, - "learning_rate": 0.0004696949152542373, - "loss": 2.1229, - "step": 2536 - }, - { - "epoch": 0.42, - "learning_rate": 0.00046955932203389834, - "loss": 2.191, - "step": 2537 - }, - { - "epoch": 0.42, - "learning_rate": 0.0004694237288135593, - "loss": 2.196, - "step": 2538 - }, - { - "epoch": 0.42, - "learning_rate": 0.0004692881355932204, - "loss": 2.2301, - "step": 2539 - }, - { - "epoch": 0.42, - "learning_rate": 0.00046915254237288133, - "loss": 2.1972, - "step": 2540 - }, - { - "epoch": 0.42, - "learning_rate": 0.0004690169491525424, - "loss": 2.2244, - "step": 2541 - }, - { - "epoch": 0.42, - "learning_rate": 0.0004688813559322034, - "loss": 2.1232, - "step": 2542 - }, - { - "epoch": 0.42, - "learning_rate": 0.00046874576271186444, - "loss": 2.2249, - "step": 2543 - }, - { - "epoch": 0.42, - "learning_rate": 0.0004686101694915254, - "loss": 2.1743, - "step": 2544 - }, - { - "epoch": 0.42, - "learning_rate": 0.0004684745762711865, - "loss": 2.186, - "step": 2545 - }, - { - "epoch": 0.42, - "learning_rate": 0.0004683389830508475, - "loss": 2.1699, - "step": 2546 - }, - { - "epoch": 0.42, - "learning_rate": 0.0004682033898305085, - "loss": 2.188, - "step": 2547 - }, - { - "epoch": 0.42, - "learning_rate": 0.0004680677966101695, - "loss": 2.1757, - "step": 2548 - }, - { - "epoch": 0.42, - "learning_rate": 0.0004679322033898306, - "loss": 2.1531, - "step": 2549 - }, - { - "epoch": 0.42, - "learning_rate": 0.00046779661016949157, - "loss": 2.2214, - "step": 2550 - }, - { - "epoch": 0.43, - "learning_rate": 0.0004676610169491526, - "loss": 2.1826, - "step": 2551 - }, - { - "epoch": 0.43, - "learning_rate": 0.0004675254237288136, - "loss": 2.2332, - "step": 2552 - }, - { - "epoch": 0.43, - "learning_rate": 0.00046738983050847457, - "loss": 2.1946, - "step": 2553 - }, - { - "epoch": 0.43, - "learning_rate": 0.0004672542372881356, - "loss": 2.1449, - "step": 2554 - }, - { - "epoch": 0.43, - "learning_rate": 0.0004671186440677966, - "loss": 2.1659, - "step": 2555 - }, - { - "epoch": 0.43, - "learning_rate": 0.00046698305084745767, - "loss": 2.2544, - "step": 2556 - }, - { - "epoch": 0.43, - "learning_rate": 0.00046684745762711865, - "loss": 2.1633, - "step": 2557 - }, - { - "epoch": 0.43, - "learning_rate": 0.0004667118644067797, - "loss": 2.169, - "step": 2558 - }, - { - "epoch": 0.43, - "learning_rate": 0.00046657627118644067, - "loss": 2.2566, - "step": 2559 - }, - { - "epoch": 0.43, - "learning_rate": 0.00046644067796610176, - "loss": 2.1982, - "step": 2560 - }, - { - "epoch": 0.43, - "learning_rate": 0.00046630508474576274, - "loss": 2.1657, - "step": 2561 - }, - { - "epoch": 0.43, - "learning_rate": 0.00046616949152542377, - "loss": 2.2471, - "step": 2562 - }, - { - "epoch": 0.43, - "learning_rate": 0.00046603389830508475, - "loss": 2.207, - "step": 2563 - }, - { - "epoch": 0.43, - "learning_rate": 0.0004658983050847458, - "loss": 2.1663, - "step": 2564 - }, - { - "epoch": 0.43, - "learning_rate": 0.00046576271186440677, - "loss": 2.087, - "step": 2565 - }, - { - "epoch": 0.43, - "learning_rate": 0.00046562711864406786, - "loss": 2.2147, - "step": 2566 - }, - { - "epoch": 0.43, - "learning_rate": 0.00046549152542372884, - "loss": 2.22, - "step": 2567 - }, - { - "epoch": 0.43, - "learning_rate": 0.00046535593220338987, - "loss": 2.2194, - "step": 2568 - }, - { - "epoch": 0.43, - "learning_rate": 0.00046522033898305085, - "loss": 2.1823, - "step": 2569 - }, - { - "epoch": 0.43, - "learning_rate": 0.00046508474576271194, - "loss": 2.1742, - "step": 2570 - }, - { - "epoch": 0.43, - "learning_rate": 0.0004649491525423729, - "loss": 2.2006, - "step": 2571 - }, - { - "epoch": 0.43, - "learning_rate": 0.00046481355932203396, - "loss": 2.2428, - "step": 2572 - }, - { - "epoch": 0.43, - "learning_rate": 0.00046467796610169494, - "loss": 2.1535, - "step": 2573 - }, - { - "epoch": 0.43, - "learning_rate": 0.0004645423728813559, - "loss": 2.1628, - "step": 2574 - }, - { - "epoch": 0.43, - "learning_rate": 0.00046440677966101695, - "loss": 2.1787, - "step": 2575 - }, - { - "epoch": 0.43, - "learning_rate": 0.00046427118644067793, - "loss": 2.1241, - "step": 2576 - }, - { - "epoch": 0.43, - "learning_rate": 0.000464135593220339, - "loss": 2.2536, - "step": 2577 - }, - { - "epoch": 0.43, - "learning_rate": 0.000464, - "loss": 2.0936, - "step": 2578 - }, - { - "epoch": 0.43, - "learning_rate": 0.00046386440677966104, - "loss": 2.2309, - "step": 2579 - }, - { - "epoch": 0.43, - "learning_rate": 0.000463728813559322, - "loss": 2.1871, - "step": 2580 - }, - { - "epoch": 0.43, - "learning_rate": 0.0004635932203389831, - "loss": 2.2268, - "step": 2581 - }, - { - "epoch": 0.43, - "learning_rate": 0.0004634576271186441, - "loss": 2.1735, - "step": 2582 - }, - { - "epoch": 0.43, - "learning_rate": 0.0004633220338983051, - "loss": 2.1171, - "step": 2583 - }, - { - "epoch": 0.43, - "learning_rate": 0.0004631864406779661, - "loss": 2.2784, - "step": 2584 - }, - { - "epoch": 0.43, - "learning_rate": 0.0004630508474576272, - "loss": 2.212, - "step": 2585 - }, - { - "epoch": 0.43, - "learning_rate": 0.0004629152542372881, - "loss": 2.3069, - "step": 2586 - }, - { - "epoch": 0.43, - "learning_rate": 0.0004627796610169492, - "loss": 2.2547, - "step": 2587 - }, - { - "epoch": 0.43, - "learning_rate": 0.0004626440677966102, - "loss": 2.1827, - "step": 2588 - }, - { - "epoch": 0.43, - "learning_rate": 0.0004625084745762712, - "loss": 2.1967, - "step": 2589 - }, - { - "epoch": 0.43, - "learning_rate": 0.0004623728813559322, - "loss": 2.1982, - "step": 2590 - }, - { - "epoch": 0.43, - "learning_rate": 0.0004622372881355933, - "loss": 2.2301, - "step": 2591 - }, - { - "epoch": 0.43, - "learning_rate": 0.00046210169491525427, - "loss": 2.1121, - "step": 2592 - }, - { - "epoch": 0.43, - "learning_rate": 0.00046196610169491525, - "loss": 2.1594, - "step": 2593 - }, - { - "epoch": 0.43, - "learning_rate": 0.0004618305084745763, - "loss": 2.2726, - "step": 2594 - }, - { - "epoch": 0.43, - "learning_rate": 0.00046169491525423727, - "loss": 2.1845, - "step": 2595 - }, - { - "epoch": 0.43, - "learning_rate": 0.00046155932203389836, - "loss": 2.131, - "step": 2596 - }, - { - "epoch": 0.43, - "learning_rate": 0.0004614237288135593, - "loss": 2.1921, - "step": 2597 - }, - { - "epoch": 0.43, - "learning_rate": 0.0004612881355932204, - "loss": 2.1942, - "step": 2598 - }, - { - "epoch": 0.43, - "learning_rate": 0.00046115254237288135, - "loss": 2.2077, - "step": 2599 - }, - { - "epoch": 0.43, - "learning_rate": 0.0004610169491525424, - "loss": 2.237, - "step": 2600 - }, - { - "epoch": 0.43, - "learning_rate": 0.00046088135593220337, - "loss": 2.1849, - "step": 2601 - }, - { - "epoch": 0.43, - "learning_rate": 0.00046074576271186446, - "loss": 2.2331, - "step": 2602 - }, - { - "epoch": 0.43, - "learning_rate": 0.00046061016949152544, - "loss": 2.1861, - "step": 2603 - }, - { - "epoch": 0.43, - "learning_rate": 0.0004604745762711865, - "loss": 2.1723, - "step": 2604 - }, - { - "epoch": 0.43, - "learning_rate": 0.00046033898305084745, - "loss": 2.2068, - "step": 2605 - }, - { - "epoch": 0.43, - "learning_rate": 0.00046020338983050854, - "loss": 2.1222, - "step": 2606 - }, - { - "epoch": 0.43, - "learning_rate": 0.0004600677966101695, - "loss": 2.1435, - "step": 2607 - }, - { - "epoch": 0.43, - "learning_rate": 0.00045993220338983056, - "loss": 2.2329, - "step": 2608 - }, - { - "epoch": 0.43, - "learning_rate": 0.00045979661016949154, - "loss": 2.1265, - "step": 2609 - }, - { - "epoch": 0.43, - "learning_rate": 0.0004596610169491526, - "loss": 2.2636, - "step": 2610 - }, - { - "epoch": 0.44, - "learning_rate": 0.00045952542372881355, - "loss": 2.2236, - "step": 2611 - }, - { - "epoch": 0.44, - "learning_rate": 0.00045938983050847464, - "loss": 2.2194, - "step": 2612 - }, - { - "epoch": 0.44, - "learning_rate": 0.0004592542372881356, - "loss": 2.1611, - "step": 2613 - }, - { - "epoch": 0.44, - "learning_rate": 0.0004591186440677966, - "loss": 2.1603, - "step": 2614 - }, - { - "epoch": 0.44, - "learning_rate": 0.00045898305084745764, - "loss": 2.1169, - "step": 2615 - }, - { - "epoch": 0.44, - "learning_rate": 0.0004588474576271186, - "loss": 2.2098, - "step": 2616 - }, - { - "epoch": 0.44, - "learning_rate": 0.0004587118644067797, - "loss": 2.237, - "step": 2617 - }, - { - "epoch": 0.44, - "learning_rate": 0.0004585762711864407, - "loss": 2.1146, - "step": 2618 - }, - { - "epoch": 0.44, - "learning_rate": 0.0004584406779661017, - "loss": 2.2139, - "step": 2619 - }, - { - "epoch": 0.44, - "learning_rate": 0.0004583050847457627, - "loss": 2.1968, - "step": 2620 - }, - { - "epoch": 0.44, - "learning_rate": 0.00045816949152542374, - "loss": 2.1989, - "step": 2621 - }, - { - "epoch": 0.44, - "learning_rate": 0.0004580338983050847, - "loss": 2.2338, - "step": 2622 - }, - { - "epoch": 0.44, - "learning_rate": 0.0004578983050847458, - "loss": 2.2344, - "step": 2623 - }, - { - "epoch": 0.44, - "learning_rate": 0.0004577627118644068, - "loss": 2.1605, - "step": 2624 - }, - { - "epoch": 0.44, - "learning_rate": 0.0004576271186440678, - "loss": 2.1599, - "step": 2625 - }, - { - "epoch": 0.44, - "learning_rate": 0.0004574915254237288, - "loss": 2.2193, - "step": 2626 - }, - { - "epoch": 0.44, - "learning_rate": 0.0004573559322033899, - "loss": 2.199, - "step": 2627 - }, - { - "epoch": 0.44, - "learning_rate": 0.0004572203389830509, - "loss": 2.1502, - "step": 2628 - }, - { - "epoch": 0.44, - "learning_rate": 0.0004570847457627119, - "loss": 2.2478, - "step": 2629 - }, - { - "epoch": 0.44, - "learning_rate": 0.0004569491525423729, - "loss": 2.1512, - "step": 2630 - }, - { - "epoch": 0.44, - "learning_rate": 0.000456813559322034, - "loss": 2.2605, - "step": 2631 - }, - { - "epoch": 0.44, - "learning_rate": 0.0004566779661016949, - "loss": 2.258, - "step": 2632 - }, - { - "epoch": 0.44, - "learning_rate": 0.000456542372881356, - "loss": 2.1609, - "step": 2633 - }, - { - "epoch": 0.44, - "learning_rate": 0.000456406779661017, - "loss": 2.2546, - "step": 2634 - }, - { - "epoch": 0.44, - "learning_rate": 0.00045627118644067796, - "loss": 2.1307, - "step": 2635 - }, - { - "epoch": 0.44, - "learning_rate": 0.000456135593220339, - "loss": 2.1853, - "step": 2636 - }, - { - "epoch": 0.44, - "learning_rate": 0.00045599999999999997, - "loss": 2.2144, - "step": 2637 - }, - { - "epoch": 0.44, - "learning_rate": 0.00045586440677966106, - "loss": 2.2212, - "step": 2638 - }, - { - "epoch": 0.44, - "learning_rate": 0.00045572881355932204, - "loss": 2.1757, - "step": 2639 - }, - { - "epoch": 0.44, - "learning_rate": 0.0004555932203389831, - "loss": 2.1837, - "step": 2640 - }, - { - "epoch": 0.44, - "learning_rate": 0.00045545762711864406, - "loss": 2.1359, - "step": 2641 - }, - { - "epoch": 0.44, - "learning_rate": 0.00045532203389830514, - "loss": 2.2074, - "step": 2642 - }, - { - "epoch": 0.44, - "learning_rate": 0.00045518644067796607, - "loss": 2.1438, - "step": 2643 - }, - { - "epoch": 0.44, - "learning_rate": 0.00045505084745762716, - "loss": 2.1324, - "step": 2644 - }, - { - "epoch": 0.44, - "learning_rate": 0.00045491525423728814, - "loss": 2.225, - "step": 2645 - }, - { - "epoch": 0.44, - "learning_rate": 0.0004547796610169492, - "loss": 2.1935, - "step": 2646 - }, - { - "epoch": 0.44, - "learning_rate": 0.00045464406779661016, - "loss": 2.1445, - "step": 2647 - }, - { - "epoch": 0.44, - "learning_rate": 0.00045450847457627125, - "loss": 2.2461, - "step": 2648 - }, - { - "epoch": 0.44, - "learning_rate": 0.0004543728813559322, - "loss": 2.256, - "step": 2649 - }, - { - "epoch": 0.44, - "learning_rate": 0.00045423728813559326, - "loss": 2.1491, - "step": 2650 - }, - { - "epoch": 0.44, - "learning_rate": 0.00045410169491525424, - "loss": 2.139, - "step": 2651 - }, - { - "epoch": 0.44, - "learning_rate": 0.00045396610169491533, - "loss": 2.1354, - "step": 2652 - }, - { - "epoch": 0.44, - "learning_rate": 0.0004538305084745763, - "loss": 2.2622, - "step": 2653 - }, - { - "epoch": 0.44, - "learning_rate": 0.00045369491525423735, - "loss": 2.2424, - "step": 2654 - }, - { - "epoch": 0.44, - "learning_rate": 0.0004535593220338983, - "loss": 2.3128, - "step": 2655 - }, - { - "epoch": 0.44, - "learning_rate": 0.0004534237288135593, - "loss": 2.2169, - "step": 2656 - }, - { - "epoch": 0.44, - "learning_rate": 0.00045328813559322034, - "loss": 2.2816, - "step": 2657 - }, - { - "epoch": 0.44, - "learning_rate": 0.0004531525423728813, - "loss": 2.2674, - "step": 2658 - }, - { - "epoch": 0.44, - "learning_rate": 0.0004530169491525424, - "loss": 2.1776, - "step": 2659 - }, - { - "epoch": 0.44, - "learning_rate": 0.0004528813559322034, - "loss": 2.2076, - "step": 2660 - }, - { - "epoch": 0.44, - "learning_rate": 0.0004527457627118644, - "loss": 2.1904, - "step": 2661 - }, - { - "epoch": 0.44, - "learning_rate": 0.0004526101694915254, - "loss": 2.175, - "step": 2662 - }, - { - "epoch": 0.44, - "learning_rate": 0.0004524745762711865, - "loss": 2.2094, - "step": 2663 - }, - { - "epoch": 0.44, - "learning_rate": 0.0004523389830508475, - "loss": 2.0975, - "step": 2664 - }, - { - "epoch": 0.44, - "learning_rate": 0.0004522033898305085, - "loss": 2.1961, - "step": 2665 - }, - { - "epoch": 0.44, - "learning_rate": 0.0004520677966101695, - "loss": 2.1559, - "step": 2666 - }, - { - "epoch": 0.44, - "learning_rate": 0.0004519322033898306, - "loss": 2.1439, - "step": 2667 - }, - { - "epoch": 0.44, - "learning_rate": 0.0004517966101694915, - "loss": 2.1496, - "step": 2668 - }, - { - "epoch": 0.44, - "learning_rate": 0.0004516610169491526, - "loss": 2.1898, - "step": 2669 - }, - { - "epoch": 0.45, - "learning_rate": 0.0004515254237288136, - "loss": 2.1843, - "step": 2670 - }, - { - "epoch": 0.45, - "learning_rate": 0.0004513898305084746, - "loss": 2.1139, - "step": 2671 - }, - { - "epoch": 0.45, - "learning_rate": 0.0004512542372881356, - "loss": 2.1933, - "step": 2672 - }, - { - "epoch": 0.45, - "learning_rate": 0.0004511186440677967, - "loss": 2.1813, - "step": 2673 - }, - { - "epoch": 0.45, - "learning_rate": 0.00045098305084745766, - "loss": 2.1483, - "step": 2674 - }, - { - "epoch": 0.45, - "learning_rate": 0.0004508474576271187, - "loss": 2.1978, - "step": 2675 - }, - { - "epoch": 0.45, - "learning_rate": 0.0004507118644067797, - "loss": 2.1171, - "step": 2676 - }, - { - "epoch": 0.45, - "learning_rate": 0.00045057627118644066, - "loss": 2.1211, - "step": 2677 - }, - { - "epoch": 0.45, - "learning_rate": 0.00045044067796610175, - "loss": 2.1748, - "step": 2678 - }, - { - "epoch": 0.45, - "learning_rate": 0.0004503050847457627, - "loss": 2.1855, - "step": 2679 - }, - { - "epoch": 0.45, - "learning_rate": 0.00045016949152542376, - "loss": 2.1501, - "step": 2680 - }, - { - "epoch": 0.45, - "learning_rate": 0.00045003389830508474, - "loss": 2.2428, - "step": 2681 - }, - { - "epoch": 0.45, - "learning_rate": 0.0004498983050847458, - "loss": 2.1357, - "step": 2682 - }, - { - "epoch": 0.45, - "learning_rate": 0.00044976271186440676, - "loss": 2.1712, - "step": 2683 - }, - { - "epoch": 0.45, - "learning_rate": 0.00044962711864406785, - "loss": 2.1691, - "step": 2684 - }, - { - "epoch": 0.45, - "learning_rate": 0.00044949152542372883, - "loss": 2.2532, - "step": 2685 - }, - { - "epoch": 0.45, - "learning_rate": 0.00044935593220338986, - "loss": 2.1859, - "step": 2686 - }, - { - "epoch": 0.45, - "learning_rate": 0.00044922033898305084, - "loss": 2.2379, - "step": 2687 - }, - { - "epoch": 0.45, - "learning_rate": 0.00044908474576271193, - "loss": 2.2257, - "step": 2688 - }, - { - "epoch": 0.45, - "learning_rate": 0.0004489491525423729, - "loss": 2.1396, - "step": 2689 - }, - { - "epoch": 0.45, - "learning_rate": 0.00044881355932203395, - "loss": 2.2046, - "step": 2690 - }, - { - "epoch": 0.45, - "learning_rate": 0.00044867796610169493, - "loss": 2.1902, - "step": 2691 - }, - { - "epoch": 0.45, - "learning_rate": 0.00044854237288135596, - "loss": 2.1787, - "step": 2692 - }, - { - "epoch": 0.45, - "learning_rate": 0.00044840677966101694, - "loss": 2.2116, - "step": 2693 - }, - { - "epoch": 0.45, - "learning_rate": 0.00044827118644067803, - "loss": 2.196, - "step": 2694 - }, - { - "epoch": 0.45, - "learning_rate": 0.000448135593220339, - "loss": 2.1512, - "step": 2695 - }, - { - "epoch": 0.45, - "learning_rate": 0.00044800000000000005, - "loss": 2.2, - "step": 2696 - }, - { - "epoch": 0.45, - "learning_rate": 0.00044786440677966103, - "loss": 2.2264, - "step": 2697 - }, - { - "epoch": 0.45, - "learning_rate": 0.000447728813559322, - "loss": 2.2349, - "step": 2698 - }, - { - "epoch": 0.45, - "learning_rate": 0.0004475932203389831, - "loss": 2.191, - "step": 2699 - }, - { - "epoch": 0.45, - "learning_rate": 0.0004474576271186441, - "loss": 2.1978, - "step": 2700 - }, - { - "epoch": 0.45, - "learning_rate": 0.0004473220338983051, - "loss": 2.2675, - "step": 2701 - }, - { - "epoch": 0.45, - "learning_rate": 0.0004471864406779661, - "loss": 2.178, - "step": 2702 - }, - { - "epoch": 0.45, - "learning_rate": 0.00044705084745762713, - "loss": 2.2941, - "step": 2703 - }, - { - "epoch": 0.45, - "learning_rate": 0.0004469152542372881, - "loss": 2.2429, - "step": 2704 - }, - { - "epoch": 0.45, - "learning_rate": 0.0004467796610169492, - "loss": 2.2242, - "step": 2705 - }, - { - "epoch": 0.45, - "learning_rate": 0.0004466440677966102, - "loss": 2.2783, - "step": 2706 - }, - { - "epoch": 0.45, - "learning_rate": 0.0004465084745762712, - "loss": 2.241, - "step": 2707 - }, - { - "epoch": 0.45, - "learning_rate": 0.0004463728813559322, - "loss": 2.2416, - "step": 2708 - }, - { - "epoch": 0.45, - "learning_rate": 0.0004462372881355933, - "loss": 2.1677, - "step": 2709 - }, - { - "epoch": 0.45, - "learning_rate": 0.00044610169491525426, - "loss": 2.2013, - "step": 2710 - }, - { - "epoch": 0.45, - "learning_rate": 0.0004459661016949153, - "loss": 2.1218, - "step": 2711 - }, - { - "epoch": 0.45, - "learning_rate": 0.0004458305084745763, - "loss": 2.1403, - "step": 2712 - }, - { - "epoch": 0.45, - "learning_rate": 0.00044569491525423737, - "loss": 2.2065, - "step": 2713 - }, - { - "epoch": 0.45, - "learning_rate": 0.0004455593220338983, - "loss": 2.1355, - "step": 2714 - }, - { - "epoch": 0.45, - "learning_rate": 0.0004454237288135594, - "loss": 2.1722, - "step": 2715 - }, - { - "epoch": 0.45, - "learning_rate": 0.00044528813559322036, - "loss": 2.2116, - "step": 2716 - }, - { - "epoch": 0.45, - "learning_rate": 0.0004451525423728814, - "loss": 2.1821, - "step": 2717 - }, - { - "epoch": 0.45, - "learning_rate": 0.0004450169491525424, - "loss": 2.2321, - "step": 2718 - }, - { - "epoch": 0.45, - "learning_rate": 0.00044488135593220336, - "loss": 2.1749, - "step": 2719 - }, - { - "epoch": 0.45, - "learning_rate": 0.00044474576271186445, - "loss": 2.219, - "step": 2720 - }, - { - "epoch": 0.45, - "learning_rate": 0.00044461016949152543, - "loss": 2.1305, - "step": 2721 - }, - { - "epoch": 0.45, - "learning_rate": 0.00044447457627118646, - "loss": 2.1545, - "step": 2722 - }, - { - "epoch": 0.45, - "learning_rate": 0.00044433898305084744, - "loss": 2.2897, - "step": 2723 - }, - { - "epoch": 0.45, - "learning_rate": 0.00044420338983050853, - "loss": 2.1997, - "step": 2724 - }, - { - "epoch": 0.45, - "learning_rate": 0.00044406779661016946, - "loss": 2.2596, - "step": 2725 - }, - { - "epoch": 0.45, - "learning_rate": 0.00044393220338983055, - "loss": 2.2346, - "step": 2726 - }, - { - "epoch": 0.45, - "learning_rate": 0.00044379661016949153, - "loss": 2.1678, - "step": 2727 - }, - { - "epoch": 0.45, - "learning_rate": 0.00044366101694915256, - "loss": 2.1903, - "step": 2728 - }, - { - "epoch": 0.45, - "learning_rate": 0.00044352542372881354, - "loss": 2.1976, - "step": 2729 - }, - { - "epoch": 0.46, - "learning_rate": 0.00044338983050847463, - "loss": 2.1003, - "step": 2730 - }, - { - "epoch": 0.46, - "learning_rate": 0.0004432542372881356, - "loss": 2.0941, - "step": 2731 - }, - { - "epoch": 0.46, - "learning_rate": 0.00044311864406779665, - "loss": 2.2205, - "step": 2732 - }, - { - "epoch": 0.46, - "learning_rate": 0.00044298305084745763, - "loss": 2.108, - "step": 2733 - }, - { - "epoch": 0.46, - "learning_rate": 0.0004428474576271187, - "loss": 2.1486, - "step": 2734 - }, - { - "epoch": 0.46, - "learning_rate": 0.0004427118644067797, - "loss": 2.1967, - "step": 2735 - }, - { - "epoch": 0.46, - "learning_rate": 0.00044257627118644073, - "loss": 2.2078, - "step": 2736 - }, - { - "epoch": 0.46, - "learning_rate": 0.0004424406779661017, - "loss": 2.2254, - "step": 2737 - }, - { - "epoch": 0.46, - "learning_rate": 0.00044230508474576275, - "loss": 2.2598, - "step": 2738 - }, - { - "epoch": 0.46, - "learning_rate": 0.00044216949152542373, - "loss": 2.2207, - "step": 2739 - }, - { - "epoch": 0.46, - "learning_rate": 0.0004420338983050847, - "loss": 2.1272, - "step": 2740 - }, - { - "epoch": 0.46, - "learning_rate": 0.0004418983050847458, - "loss": 2.2438, - "step": 2741 - }, - { - "epoch": 0.46, - "learning_rate": 0.0004417627118644068, - "loss": 2.1494, - "step": 2742 - }, - { - "epoch": 0.46, - "learning_rate": 0.0004416271186440678, - "loss": 2.2687, - "step": 2743 - }, - { - "epoch": 0.46, - "learning_rate": 0.0004414915254237288, - "loss": 2.1845, - "step": 2744 - }, - { - "epoch": 0.46, - "learning_rate": 0.0004413559322033899, - "loss": 2.1781, - "step": 2745 - }, - { - "epoch": 0.46, - "learning_rate": 0.00044122033898305087, - "loss": 2.1787, - "step": 2746 - }, - { - "epoch": 0.46, - "learning_rate": 0.0004410847457627119, - "loss": 2.1107, - "step": 2747 - }, - { - "epoch": 0.46, - "learning_rate": 0.0004409491525423729, - "loss": 2.1027, - "step": 2748 - }, - { - "epoch": 0.46, - "learning_rate": 0.0004408135593220339, - "loss": 2.2099, - "step": 2749 - }, - { - "epoch": 0.46, - "learning_rate": 0.0004406779661016949, - "loss": 2.1507, - "step": 2750 - }, - { - "epoch": 0.46, - "learning_rate": 0.000440542372881356, - "loss": 2.168, - "step": 2751 - }, - { - "epoch": 0.46, - "learning_rate": 0.00044040677966101697, - "loss": 2.1331, - "step": 2752 - }, - { - "epoch": 0.46, - "learning_rate": 0.000440271186440678, - "loss": 2.286, - "step": 2753 - }, - { - "epoch": 0.46, - "learning_rate": 0.000440135593220339, - "loss": 2.1822, - "step": 2754 - }, - { - "epoch": 0.46, - "learning_rate": 0.00044000000000000007, - "loss": 2.1692, - "step": 2755 - }, - { - "epoch": 0.46, - "learning_rate": 0.00043986440677966105, - "loss": 2.1917, - "step": 2756 - }, - { - "epoch": 0.46, - "learning_rate": 0.0004397288135593221, - "loss": 2.1979, - "step": 2757 - }, - { - "epoch": 0.46, - "learning_rate": 0.00043959322033898307, - "loss": 2.1971, - "step": 2758 - }, - { - "epoch": 0.46, - "learning_rate": 0.00043945762711864415, - "loss": 2.1208, - "step": 2759 - }, - { - "epoch": 0.46, - "learning_rate": 0.0004393220338983051, - "loss": 2.2626, - "step": 2760 - }, - { - "epoch": 0.46, - "learning_rate": 0.00043918644067796606, - "loss": 2.2041, - "step": 2761 - }, - { - "epoch": 0.46, - "learning_rate": 0.00043905084745762715, - "loss": 2.1519, - "step": 2762 - }, - { - "epoch": 0.46, - "learning_rate": 0.00043891525423728813, - "loss": 2.2102, - "step": 2763 - }, - { - "epoch": 0.46, - "learning_rate": 0.00043877966101694917, - "loss": 2.1933, - "step": 2764 - }, - { - "epoch": 0.46, - "learning_rate": 0.00043864406779661015, - "loss": 2.124, - "step": 2765 - }, - { - "epoch": 0.46, - "learning_rate": 0.00043850847457627124, - "loss": 2.1205, - "step": 2766 - }, - { - "epoch": 0.46, - "learning_rate": 0.0004383728813559322, - "loss": 2.306, - "step": 2767 - }, - { - "epoch": 0.46, - "learning_rate": 0.00043823728813559325, - "loss": 2.196, - "step": 2768 - }, - { - "epoch": 0.46, - "learning_rate": 0.00043810169491525423, - "loss": 2.2451, - "step": 2769 - }, - { - "epoch": 0.46, - "learning_rate": 0.0004379661016949153, - "loss": 2.1794, - "step": 2770 - }, - { - "epoch": 0.46, - "learning_rate": 0.00043783050847457625, - "loss": 2.1634, - "step": 2771 - }, - { - "epoch": 0.46, - "learning_rate": 0.00043769491525423734, - "loss": 2.1123, - "step": 2772 - }, - { - "epoch": 0.46, - "learning_rate": 0.0004375593220338983, - "loss": 2.2193, - "step": 2773 - }, - { - "epoch": 0.46, - "learning_rate": 0.00043742372881355935, - "loss": 2.1491, - "step": 2774 - }, - { - "epoch": 0.46, - "learning_rate": 0.00043728813559322033, - "loss": 2.2288, - "step": 2775 - }, - { - "epoch": 0.46, - "learning_rate": 0.0004371525423728814, - "loss": 2.1587, - "step": 2776 - }, - { - "epoch": 0.46, - "learning_rate": 0.0004370169491525424, - "loss": 2.2145, - "step": 2777 - }, - { - "epoch": 0.46, - "learning_rate": 0.00043688135593220344, - "loss": 2.1669, - "step": 2778 - }, - { - "epoch": 0.46, - "learning_rate": 0.0004367457627118644, - "loss": 2.1546, - "step": 2779 - }, - { - "epoch": 0.46, - "learning_rate": 0.0004366101694915254, - "loss": 2.1785, - "step": 2780 - }, - { - "epoch": 0.46, - "learning_rate": 0.0004364745762711865, - "loss": 2.1258, - "step": 2781 - }, - { - "epoch": 0.46, - "learning_rate": 0.0004363389830508474, - "loss": 2.2338, - "step": 2782 - }, - { - "epoch": 0.46, - "learning_rate": 0.0004362033898305085, - "loss": 2.1971, - "step": 2783 - }, - { - "epoch": 0.46, - "learning_rate": 0.0004360677966101695, - "loss": 2.1648, - "step": 2784 - }, - { - "epoch": 0.46, - "learning_rate": 0.0004359322033898305, - "loss": 2.1698, - "step": 2785 - }, - { - "epoch": 0.46, - "learning_rate": 0.0004357966101694915, - "loss": 2.1884, - "step": 2786 - }, - { - "epoch": 0.46, - "learning_rate": 0.0004356610169491526, - "loss": 2.1965, - "step": 2787 - }, - { - "epoch": 0.46, - "learning_rate": 0.00043552542372881357, - "loss": 2.263, - "step": 2788 - }, - { - "epoch": 0.46, - "learning_rate": 0.0004353898305084746, - "loss": 2.2093, - "step": 2789 - }, - { - "epoch": 0.47, - "learning_rate": 0.0004352542372881356, - "loss": 2.1458, - "step": 2790 - }, - { - "epoch": 0.47, - "learning_rate": 0.00043511864406779667, - "loss": 2.1612, - "step": 2791 - }, - { - "epoch": 0.47, - "learning_rate": 0.00043498305084745765, - "loss": 2.2038, - "step": 2792 - }, - { - "epoch": 0.47, - "learning_rate": 0.0004348474576271187, - "loss": 2.1496, - "step": 2793 - }, - { - "epoch": 0.47, - "learning_rate": 0.00043471186440677967, - "loss": 2.1516, - "step": 2794 - }, - { - "epoch": 0.47, - "learning_rate": 0.00043457627118644076, - "loss": 2.1516, - "step": 2795 - }, - { - "epoch": 0.47, - "learning_rate": 0.0004344406779661017, - "loss": 2.2392, - "step": 2796 - }, - { - "epoch": 0.47, - "learning_rate": 0.00043430508474576277, - "loss": 2.1482, - "step": 2797 - }, - { - "epoch": 0.47, - "learning_rate": 0.00043416949152542375, - "loss": 2.1994, - "step": 2798 - }, - { - "epoch": 0.47, - "learning_rate": 0.0004340338983050848, - "loss": 2.1625, - "step": 2799 - }, - { - "epoch": 0.47, - "learning_rate": 0.00043389830508474577, - "loss": 2.1373, - "step": 2800 - }, - { - "epoch": 0.47, - "learning_rate": 0.00043376271186440675, - "loss": 2.2308, - "step": 2801 - }, - { - "epoch": 0.47, - "learning_rate": 0.00043362711864406784, - "loss": 2.132, - "step": 2802 - }, - { - "epoch": 0.47, - "learning_rate": 0.0004334915254237288, - "loss": 2.1553, - "step": 2803 - }, - { - "epoch": 0.47, - "learning_rate": 0.00043335593220338985, - "loss": 2.1216, - "step": 2804 - }, - { - "epoch": 0.47, - "learning_rate": 0.00043322033898305083, - "loss": 2.1978, - "step": 2805 - }, - { - "epoch": 0.47, - "learning_rate": 0.0004330847457627119, - "loss": 2.2046, - "step": 2806 - }, - { - "epoch": 0.47, - "learning_rate": 0.00043294915254237285, - "loss": 2.1302, - "step": 2807 - }, - { - "epoch": 0.47, - "learning_rate": 0.00043281355932203394, - "loss": 2.1334, - "step": 2808 - }, - { - "epoch": 0.47, - "learning_rate": 0.0004326779661016949, - "loss": 2.1931, - "step": 2809 - }, - { - "epoch": 0.47, - "learning_rate": 0.00043254237288135595, - "loss": 2.0879, - "step": 2810 - }, - { - "epoch": 0.47, - "learning_rate": 0.00043240677966101693, - "loss": 2.1784, - "step": 2811 - }, - { - "epoch": 0.47, - "learning_rate": 0.000432271186440678, - "loss": 2.18, - "step": 2812 - }, - { - "epoch": 0.47, - "learning_rate": 0.000432135593220339, - "loss": 2.1735, - "step": 2813 - }, - { - "epoch": 0.47, - "learning_rate": 0.00043200000000000004, - "loss": 2.0985, - "step": 2814 - }, - { - "epoch": 0.47, - "learning_rate": 0.000431864406779661, - "loss": 2.1894, - "step": 2815 - }, - { - "epoch": 0.47, - "learning_rate": 0.0004317288135593221, - "loss": 2.1497, - "step": 2816 - }, - { - "epoch": 0.47, - "learning_rate": 0.0004315932203389831, - "loss": 2.1981, - "step": 2817 - }, - { - "epoch": 0.47, - "learning_rate": 0.0004314576271186441, - "loss": 2.2008, - "step": 2818 - }, - { - "epoch": 0.47, - "learning_rate": 0.0004313220338983051, - "loss": 2.1571, - "step": 2819 - }, - { - "epoch": 0.47, - "learning_rate": 0.00043118644067796614, - "loss": 2.2743, - "step": 2820 - }, - { - "epoch": 0.47, - "learning_rate": 0.0004310508474576271, - "loss": 2.1833, - "step": 2821 - }, - { - "epoch": 0.47, - "learning_rate": 0.0004309152542372881, - "loss": 2.1796, - "step": 2822 - }, - { - "epoch": 0.47, - "learning_rate": 0.0004307796610169492, - "loss": 2.2698, - "step": 2823 - }, - { - "epoch": 0.47, - "learning_rate": 0.00043064406779661017, - "loss": 2.2112, - "step": 2824 - }, - { - "epoch": 0.47, - "learning_rate": 0.0004305084745762712, - "loss": 2.1944, - "step": 2825 - }, - { - "epoch": 0.47, - "learning_rate": 0.0004303728813559322, - "loss": 2.1812, - "step": 2826 - }, - { - "epoch": 0.47, - "learning_rate": 0.0004302372881355933, - "loss": 2.2353, - "step": 2827 - }, - { - "epoch": 0.47, - "learning_rate": 0.00043010169491525425, - "loss": 2.2165, - "step": 2828 - }, - { - "epoch": 0.47, - "learning_rate": 0.0004299661016949153, - "loss": 2.1878, - "step": 2829 - }, - { - "epoch": 0.47, - "learning_rate": 0.00042983050847457627, - "loss": 2.1921, - "step": 2830 - }, - { - "epoch": 0.47, - "learning_rate": 0.0004296949152542373, - "loss": 2.1842, - "step": 2831 - }, - { - "epoch": 0.47, - "learning_rate": 0.0004295593220338983, - "loss": 2.2485, - "step": 2832 - }, - { - "epoch": 0.47, - "learning_rate": 0.0004294237288135594, - "loss": 2.1801, - "step": 2833 - }, - { - "epoch": 0.47, - "learning_rate": 0.00042928813559322035, - "loss": 2.2013, - "step": 2834 - }, - { - "epoch": 0.47, - "learning_rate": 0.0004291525423728814, - "loss": 2.2313, - "step": 2835 - }, - { - "epoch": 0.47, - "learning_rate": 0.00042901694915254237, - "loss": 2.1955, - "step": 2836 - }, - { - "epoch": 0.47, - "learning_rate": 0.00042888135593220346, - "loss": 2.1234, - "step": 2837 - }, - { - "epoch": 0.47, - "learning_rate": 0.00042874576271186444, - "loss": 2.1559, - "step": 2838 - }, - { - "epoch": 0.47, - "learning_rate": 0.0004286101694915255, - "loss": 2.1444, - "step": 2839 - }, - { - "epoch": 0.47, - "learning_rate": 0.00042847457627118645, - "loss": 2.1511, - "step": 2840 - }, - { - "epoch": 0.47, - "learning_rate": 0.00042833898305084754, - "loss": 2.1536, - "step": 2841 - }, - { - "epoch": 0.47, - "learning_rate": 0.00042820338983050847, - "loss": 2.2474, - "step": 2842 - }, - { - "epoch": 0.47, - "learning_rate": 0.00042806779661016945, - "loss": 2.1643, - "step": 2843 - }, - { - "epoch": 0.47, - "learning_rate": 0.00042793220338983054, - "loss": 2.1671, - "step": 2844 - }, - { - "epoch": 0.47, - "learning_rate": 0.0004277966101694915, - "loss": 2.1588, - "step": 2845 - }, - { - "epoch": 0.47, - "learning_rate": 0.00042766101694915255, - "loss": 2.1799, - "step": 2846 - }, - { - "epoch": 0.47, - "learning_rate": 0.00042752542372881354, - "loss": 2.1565, - "step": 2847 - }, - { - "epoch": 0.47, - "learning_rate": 0.0004273898305084746, - "loss": 2.1751, - "step": 2848 - }, - { - "epoch": 0.47, - "learning_rate": 0.0004272542372881356, - "loss": 2.1091, - "step": 2849 - }, - { - "epoch": 0.47, - "learning_rate": 0.00042711864406779664, - "loss": 2.2063, - "step": 2850 - }, - { - "epoch": 0.48, - "learning_rate": 0.0004269830508474576, - "loss": 2.0999, - "step": 2851 - }, - { - "epoch": 0.48, - "learning_rate": 0.0004268474576271187, - "loss": 2.1041, - "step": 2852 - }, - { - "epoch": 0.48, - "learning_rate": 0.00042671186440677964, - "loss": 2.1839, - "step": 2853 - }, - { - "epoch": 0.48, - "learning_rate": 0.0004265762711864407, - "loss": 2.0895, - "step": 2854 - }, - { - "epoch": 0.48, - "learning_rate": 0.0004264406779661017, - "loss": 2.1978, - "step": 2855 - }, - { - "epoch": 0.48, - "learning_rate": 0.00042630508474576274, - "loss": 2.1899, - "step": 2856 - }, - { - "epoch": 0.48, - "learning_rate": 0.0004261694915254237, - "loss": 2.1461, - "step": 2857 - }, - { - "epoch": 0.48, - "learning_rate": 0.0004260338983050848, - "loss": 2.2126, - "step": 2858 - }, - { - "epoch": 0.48, - "learning_rate": 0.0004258983050847458, - "loss": 2.1616, - "step": 2859 - }, - { - "epoch": 0.48, - "learning_rate": 0.0004257627118644068, - "loss": 2.1526, - "step": 2860 - }, - { - "epoch": 0.48, - "learning_rate": 0.0004256271186440678, - "loss": 2.143, - "step": 2861 - }, - { - "epoch": 0.48, - "learning_rate": 0.0004254915254237289, - "loss": 2.1868, - "step": 2862 - }, - { - "epoch": 0.48, - "learning_rate": 0.0004253559322033899, - "loss": 2.0951, - "step": 2863 - }, - { - "epoch": 0.48, - "learning_rate": 0.0004252203389830508, - "loss": 2.206, - "step": 2864 - }, - { - "epoch": 0.48, - "learning_rate": 0.0004250847457627119, - "loss": 2.1904, - "step": 2865 - }, - { - "epoch": 0.48, - "learning_rate": 0.00042494915254237287, - "loss": 2.2698, - "step": 2866 - }, - { - "epoch": 0.48, - "learning_rate": 0.0004248135593220339, - "loss": 2.1582, - "step": 2867 - }, - { - "epoch": 0.48, - "learning_rate": 0.0004246779661016949, - "loss": 2.1616, - "step": 2868 - }, - { - "epoch": 0.48, - "learning_rate": 0.000424542372881356, - "loss": 2.2028, - "step": 2869 - }, - { - "epoch": 0.48, - "learning_rate": 0.00042440677966101696, - "loss": 2.2078, - "step": 2870 - }, - { - "epoch": 0.48, - "learning_rate": 0.000424271186440678, - "loss": 2.1194, - "step": 2871 - }, - { - "epoch": 0.48, - "learning_rate": 0.00042413559322033897, - "loss": 2.1585, - "step": 2872 - }, - { - "epoch": 0.48, - "learning_rate": 0.00042400000000000006, - "loss": 2.1722, - "step": 2873 - }, - { - "epoch": 0.48, - "learning_rate": 0.00042386440677966104, - "loss": 2.1138, - "step": 2874 - }, - { - "epoch": 0.48, - "learning_rate": 0.0004237288135593221, - "loss": 2.118, - "step": 2875 - }, - { - "epoch": 0.48, - "learning_rate": 0.00042359322033898306, - "loss": 2.1648, - "step": 2876 - }, - { - "epoch": 0.48, - "learning_rate": 0.0004234576271186441, - "loss": 2.1642, - "step": 2877 - }, - { - "epoch": 0.48, - "learning_rate": 0.00042332203389830507, - "loss": 2.1489, - "step": 2878 - }, - { - "epoch": 0.48, - "learning_rate": 0.00042318644067796616, - "loss": 2.1387, - "step": 2879 - }, - { - "epoch": 0.48, - "learning_rate": 0.00042305084745762714, - "loss": 2.1737, - "step": 2880 - }, - { - "epoch": 0.48, - "learning_rate": 0.0004229152542372882, - "loss": 2.1702, - "step": 2881 - }, - { - "epoch": 0.48, - "learning_rate": 0.00042277966101694916, - "loss": 2.0842, - "step": 2882 - }, - { - "epoch": 0.48, - "learning_rate": 0.00042264406779661025, - "loss": 2.1522, - "step": 2883 - }, - { - "epoch": 0.48, - "learning_rate": 0.0004225084745762712, - "loss": 2.2546, - "step": 2884 - }, - { - "epoch": 0.48, - "learning_rate": 0.0004223728813559322, - "loss": 2.1756, - "step": 2885 - }, - { - "epoch": 0.48, - "learning_rate": 0.00042223728813559324, - "loss": 2.2049, - "step": 2886 - }, - { - "epoch": 0.48, - "learning_rate": 0.0004221016949152542, - "loss": 2.1915, - "step": 2887 - }, - { - "epoch": 0.48, - "learning_rate": 0.00042196610169491526, - "loss": 2.2158, - "step": 2888 - }, - { - "epoch": 0.48, - "learning_rate": 0.00042183050847457624, - "loss": 2.1971, - "step": 2889 - }, - { - "epoch": 0.48, - "learning_rate": 0.0004216949152542373, - "loss": 2.1891, - "step": 2890 - }, - { - "epoch": 0.48, - "learning_rate": 0.0004215593220338983, - "loss": 2.1599, - "step": 2891 - }, - { - "epoch": 0.48, - "learning_rate": 0.00042142372881355934, - "loss": 2.2192, - "step": 2892 - }, - { - "epoch": 0.48, - "learning_rate": 0.0004212881355932203, - "loss": 2.1888, - "step": 2893 - }, - { - "epoch": 0.48, - "learning_rate": 0.0004211525423728814, - "loss": 2.1549, - "step": 2894 - }, - { - "epoch": 0.48, - "learning_rate": 0.0004210169491525424, - "loss": 2.0905, - "step": 2895 - }, - { - "epoch": 0.48, - "learning_rate": 0.0004208813559322034, - "loss": 2.1428, - "step": 2896 - }, - { - "epoch": 0.48, - "learning_rate": 0.0004207457627118644, - "loss": 2.0789, - "step": 2897 - }, - { - "epoch": 0.48, - "learning_rate": 0.0004206101694915255, - "loss": 2.2274, - "step": 2898 - }, - { - "epoch": 0.48, - "learning_rate": 0.0004204745762711864, - "loss": 2.2216, - "step": 2899 - }, - { - "epoch": 0.48, - "learning_rate": 0.0004203389830508475, - "loss": 2.1846, - "step": 2900 - }, - { - "epoch": 0.48, - "learning_rate": 0.0004202033898305085, - "loss": 2.2441, - "step": 2901 - }, - { - "epoch": 0.48, - "learning_rate": 0.00042006779661016953, - "loss": 2.1295, - "step": 2902 - }, - { - "epoch": 0.48, - "learning_rate": 0.0004199322033898305, - "loss": 2.2114, - "step": 2903 - }, - { - "epoch": 0.48, - "learning_rate": 0.0004197966101694916, - "loss": 2.1641, - "step": 2904 - }, - { - "epoch": 0.48, - "learning_rate": 0.0004196610169491526, - "loss": 2.1816, - "step": 2905 - }, - { - "epoch": 0.48, - "learning_rate": 0.00041952542372881356, - "loss": 2.2033, - "step": 2906 - }, - { - "epoch": 0.48, - "learning_rate": 0.0004193898305084746, - "loss": 2.1715, - "step": 2907 - }, - { - "epoch": 0.48, - "learning_rate": 0.0004192542372881356, - "loss": 2.1575, - "step": 2908 - }, - { - "epoch": 0.48, - "learning_rate": 0.00041911864406779666, - "loss": 2.2248, - "step": 2909 - }, - { - "epoch": 0.48, - "learning_rate": 0.0004189830508474576, - "loss": 2.281, - "step": 2910 - }, - { - "epoch": 0.49, - "learning_rate": 0.0004188474576271187, - "loss": 2.2173, - "step": 2911 - }, - { - "epoch": 0.49, - "learning_rate": 0.00041871186440677966, - "loss": 2.1685, - "step": 2912 - }, - { - "epoch": 0.49, - "learning_rate": 0.0004185762711864407, - "loss": 2.1741, - "step": 2913 - }, - { - "epoch": 0.49, - "learning_rate": 0.0004184406779661017, - "loss": 2.1727, - "step": 2914 - }, - { - "epoch": 0.49, - "learning_rate": 0.00041830508474576276, - "loss": 2.1974, - "step": 2915 - }, - { - "epoch": 0.49, - "learning_rate": 0.00041816949152542374, - "loss": 2.1526, - "step": 2916 - }, - { - "epoch": 0.49, - "learning_rate": 0.0004180338983050848, - "loss": 2.2312, - "step": 2917 - }, - { - "epoch": 0.49, - "learning_rate": 0.00041789830508474576, - "loss": 2.1809, - "step": 2918 - }, - { - "epoch": 0.49, - "learning_rate": 0.00041776271186440685, - "loss": 2.2041, - "step": 2919 - }, - { - "epoch": 0.49, - "learning_rate": 0.00041762711864406783, - "loss": 2.1479, - "step": 2920 - }, - { - "epoch": 0.49, - "learning_rate": 0.00041749152542372886, - "loss": 2.1227, - "step": 2921 - }, - { - "epoch": 0.49, - "learning_rate": 0.00041735593220338984, - "loss": 2.2373, - "step": 2922 - }, - { - "epoch": 0.49, - "learning_rate": 0.00041722033898305093, - "loss": 2.1932, - "step": 2923 - }, - { - "epoch": 0.49, - "learning_rate": 0.00041708474576271186, - "loss": 2.1475, - "step": 2924 - }, - { - "epoch": 0.49, - "learning_rate": 0.00041694915254237295, - "loss": 2.229, - "step": 2925 - }, - { - "epoch": 0.49, - "learning_rate": 0.00041681355932203393, - "loss": 2.2022, - "step": 2926 - }, - { - "epoch": 0.49, - "learning_rate": 0.0004166779661016949, - "loss": 2.1027, - "step": 2927 - }, - { - "epoch": 0.49, - "learning_rate": 0.00041654237288135594, - "loss": 2.1726, - "step": 2928 - }, - { - "epoch": 0.49, - "learning_rate": 0.0004164067796610169, - "loss": 2.1762, - "step": 2929 - }, - { - "epoch": 0.49, - "learning_rate": 0.000416271186440678, - "loss": 2.1976, - "step": 2930 - }, - { - "epoch": 0.49, - "learning_rate": 0.000416135593220339, - "loss": 2.1517, - "step": 2931 - }, - { - "epoch": 0.49, - "learning_rate": 0.00041600000000000003, - "loss": 2.1961, - "step": 2932 - }, - { - "epoch": 0.49, - "learning_rate": 0.000415864406779661, - "loss": 2.1523, - "step": 2933 - }, - { - "epoch": 0.49, - "learning_rate": 0.0004157288135593221, - "loss": 2.1589, - "step": 2934 - }, - { - "epoch": 0.49, - "learning_rate": 0.000415593220338983, - "loss": 2.2447, - "step": 2935 - }, - { - "epoch": 0.49, - "learning_rate": 0.0004154576271186441, - "loss": 2.1304, - "step": 2936 - }, - { - "epoch": 0.49, - "learning_rate": 0.0004153220338983051, - "loss": 2.137, - "step": 2937 - }, - { - "epoch": 0.49, - "learning_rate": 0.00041518644067796613, - "loss": 2.1495, - "step": 2938 - }, - { - "epoch": 0.49, - "learning_rate": 0.0004150508474576271, - "loss": 2.1051, - "step": 2939 - }, - { - "epoch": 0.49, - "learning_rate": 0.0004149152542372882, - "loss": 2.1827, - "step": 2940 - }, - { - "epoch": 0.49, - "learning_rate": 0.0004147796610169492, - "loss": 2.2329, - "step": 2941 - }, - { - "epoch": 0.49, - "learning_rate": 0.0004146440677966102, - "loss": 2.1234, - "step": 2942 - }, - { - "epoch": 0.49, - "learning_rate": 0.0004145084745762712, - "loss": 2.1709, - "step": 2943 - }, - { - "epoch": 0.49, - "learning_rate": 0.0004143728813559323, - "loss": 2.1335, - "step": 2944 - }, - { - "epoch": 0.49, - "learning_rate": 0.00041423728813559326, - "loss": 2.1752, - "step": 2945 - }, - { - "epoch": 0.49, - "learning_rate": 0.0004141016949152543, - "loss": 2.1176, - "step": 2946 - }, - { - "epoch": 0.49, - "learning_rate": 0.0004139661016949153, - "loss": 2.1184, - "step": 2947 - }, - { - "epoch": 0.49, - "learning_rate": 0.00041383050847457626, - "loss": 2.0973, - "step": 2948 - }, - { - "epoch": 0.49, - "learning_rate": 0.0004136949152542373, - "loss": 2.1017, - "step": 2949 - }, - { - "epoch": 0.49, - "learning_rate": 0.0004135593220338983, - "loss": 2.1777, - "step": 2950 - }, - { - "epoch": 0.49, - "learning_rate": 0.00041342372881355936, - "loss": 2.1708, - "step": 2951 - }, - { - "epoch": 0.49, - "learning_rate": 0.00041328813559322034, - "loss": 2.2064, - "step": 2952 - }, - { - "epoch": 0.49, - "learning_rate": 0.0004131525423728814, - "loss": 2.1337, - "step": 2953 - }, - { - "epoch": 0.49, - "learning_rate": 0.00041301694915254236, - "loss": 2.1547, - "step": 2954 - }, - { - "epoch": 0.49, - "learning_rate": 0.00041288135593220345, - "loss": 2.1788, - "step": 2955 - }, - { - "epoch": 0.49, - "learning_rate": 0.00041274576271186443, - "loss": 2.2068, - "step": 2956 - }, - { - "epoch": 0.49, - "learning_rate": 0.00041261016949152546, - "loss": 2.1452, - "step": 2957 - }, - { - "epoch": 0.49, - "learning_rate": 0.00041247457627118645, - "loss": 2.1059, - "step": 2958 - }, - { - "epoch": 0.49, - "learning_rate": 0.0004123389830508475, - "loss": 2.1151, - "step": 2959 - }, - { - "epoch": 0.49, - "learning_rate": 0.00041220338983050846, - "loss": 2.2186, - "step": 2960 - }, - { - "epoch": 0.49, - "learning_rate": 0.00041206779661016955, - "loss": 2.2221, - "step": 2961 - }, - { - "epoch": 0.49, - "learning_rate": 0.00041193220338983053, - "loss": 2.1348, - "step": 2962 - }, - { - "epoch": 0.49, - "learning_rate": 0.00041179661016949156, - "loss": 2.1694, - "step": 2963 - }, - { - "epoch": 0.49, - "learning_rate": 0.00041166101694915255, - "loss": 2.1813, - "step": 2964 - }, - { - "epoch": 0.49, - "learning_rate": 0.00041152542372881363, - "loss": 2.2421, - "step": 2965 - }, - { - "epoch": 0.49, - "learning_rate": 0.0004113898305084746, - "loss": 2.2113, - "step": 2966 - }, - { - "epoch": 0.49, - "learning_rate": 0.0004112542372881356, - "loss": 2.146, - "step": 2967 - }, - { - "epoch": 0.49, - "learning_rate": 0.00041111864406779663, - "loss": 2.1209, - "step": 2968 - }, - { - "epoch": 0.49, - "learning_rate": 0.0004109830508474576, - "loss": 2.2126, - "step": 2969 - }, - { - "epoch": 0.49, - "learning_rate": 0.00041084745762711865, - "loss": 2.1748, - "step": 2970 - }, - { - "epoch": 0.5, - "learning_rate": 0.0004107118644067796, - "loss": 2.0626, - "step": 2971 - }, - { - "epoch": 0.5, - "learning_rate": 0.0004105762711864407, - "loss": 2.1513, - "step": 2972 - }, - { - "epoch": 0.5, - "learning_rate": 0.0004104406779661017, - "loss": 2.1045, - "step": 2973 - }, - { - "epoch": 0.5, - "learning_rate": 0.00041030508474576273, - "loss": 2.218, - "step": 2974 - }, - { - "epoch": 0.5, - "learning_rate": 0.0004101694915254237, - "loss": 2.1739, - "step": 2975 - }, - { - "epoch": 0.5, - "learning_rate": 0.0004100338983050848, - "loss": 2.1967, - "step": 2976 - }, - { - "epoch": 0.5, - "learning_rate": 0.0004098983050847458, - "loss": 2.1824, - "step": 2977 - }, - { - "epoch": 0.5, - "learning_rate": 0.0004097627118644068, - "loss": 2.1947, - "step": 2978 - }, - { - "epoch": 0.5, - "learning_rate": 0.0004096271186440678, - "loss": 2.1325, - "step": 2979 - }, - { - "epoch": 0.5, - "learning_rate": 0.0004094915254237289, - "loss": 2.2497, - "step": 2980 - }, - { - "epoch": 0.5, - "learning_rate": 0.0004093559322033898, - "loss": 2.1418, - "step": 2981 - }, - { - "epoch": 0.5, - "learning_rate": 0.0004092203389830509, - "loss": 2.1621, - "step": 2982 - }, - { - "epoch": 0.5, - "learning_rate": 0.0004090847457627119, - "loss": 2.1295, - "step": 2983 - }, - { - "epoch": 0.5, - "learning_rate": 0.0004089491525423729, - "loss": 2.1085, - "step": 2984 - }, - { - "epoch": 0.5, - "learning_rate": 0.0004088135593220339, - "loss": 2.1285, - "step": 2985 - }, - { - "epoch": 0.5, - "learning_rate": 0.000408677966101695, - "loss": 2.0908, - "step": 2986 - }, - { - "epoch": 0.5, - "learning_rate": 0.00040854237288135597, - "loss": 2.151, - "step": 2987 - }, - { - "epoch": 0.5, - "learning_rate": 0.00040840677966101695, - "loss": 2.2518, - "step": 2988 - }, - { - "epoch": 0.5, - "learning_rate": 0.000408271186440678, - "loss": 2.0897, - "step": 2989 - }, - { - "epoch": 0.5, - "learning_rate": 0.00040813559322033896, - "loss": 2.2226, - "step": 2990 - }, - { - "epoch": 0.5, - "learning_rate": 0.00040800000000000005, - "loss": 2.1714, - "step": 2991 - }, - { - "epoch": 0.5, - "learning_rate": 0.000407864406779661, - "loss": 2.1256, - "step": 2992 - }, - { - "epoch": 0.5, - "learning_rate": 0.00040772881355932207, - "loss": 2.2306, - "step": 2993 - }, - { - "epoch": 0.5, - "learning_rate": 0.00040759322033898305, - "loss": 2.18, - "step": 2994 - }, - { - "epoch": 0.5, - "learning_rate": 0.0004074576271186441, - "loss": 2.2124, - "step": 2995 - }, - { - "epoch": 0.5, - "learning_rate": 0.00040732203389830506, - "loss": 2.1893, - "step": 2996 - }, - { - "epoch": 0.5, - "learning_rate": 0.00040718644067796615, - "loss": 2.1891, - "step": 2997 - }, - { - "epoch": 0.5, - "learning_rate": 0.00040705084745762713, - "loss": 2.2257, - "step": 2998 - }, - { - "epoch": 0.5, - "learning_rate": 0.00040691525423728817, - "loss": 2.0973, - "step": 2999 - }, - { - "epoch": 0.5, - "learning_rate": 0.00040677966101694915, - "loss": 2.221, - "step": 3000 - }, - { - "epoch": 0.5, - "eval_gen_len": 19.0, - "eval_loss": 2.0400795936584473, - "eval_rouge1": 0.2657, - "eval_rouge2": 0.1001, - "eval_rougeL": 0.2227, - "eval_rougeLsum": 0.2226, - "eval_runtime": 21.6863, - "eval_samples_per_second": 2.306, - "eval_steps_per_second": 0.323, - "step": 3000 + "epoch": 0.1, + "eval_gen_len": 1023.0, + "eval_loss": 2.1868629455566406, + "eval_rouge1": 11.8686, + "eval_rouge2": 3.9117, + "eval_rougeL": 7.4858, + "eval_rougeLsum": 7.4779, + "eval_runtime": 8978.087, + "eval_samples_per_second": 0.056, + "eval_steps_per_second": 0.056, + "step": 200 } ], - "max_steps": 6000, + "max_steps": 2000, "num_train_epochs": 9223372036854775807, - "total_flos": 2.8570333609822003e+18, + "total_flos": 1.6036472259612058e+17, "trial_name": null, "trial_params": null }