diff --git "a/checkpoint-1000/trainer_state.json" "b/checkpoint-1000/trainer_state.json" new file mode 100644--- /dev/null +++ "b/checkpoint-1000/trainer_state.json" @@ -0,0 +1,6019 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.49975012493753124, + "eval_steps": 500, + "global_step": 1000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.000000000000001e-06, + "loss": 2.205, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 8.000000000000001e-06, + "loss": 2.1741, + "step": 2 + }, + { + "epoch": 0.0, + "learning_rate": 1.2e-05, + "loss": 2.3915, + "step": 3 + }, + { + "epoch": 0.0, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.2188, + "step": 4 + }, + { + "epoch": 0.0, + "learning_rate": 2e-05, + "loss": 2.2271, + "step": 5 + }, + { + "epoch": 0.0, + "learning_rate": 2.4e-05, + "loss": 2.1674, + "step": 6 + }, + { + "epoch": 0.0, + "learning_rate": 2.8000000000000003e-05, + "loss": 2.0138, + "step": 7 + }, + { + "epoch": 0.0, + "learning_rate": 3.2000000000000005e-05, + "loss": 1.8433, + "step": 8 + }, + { + "epoch": 0.0, + "learning_rate": 3.6e-05, + "loss": 2.0383, + "step": 9 + }, + { + "epoch": 0.0, + "learning_rate": 4e-05, + "loss": 2.1195, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 4.4000000000000006e-05, + "loss": 2.1695, + "step": 11 + }, + { + "epoch": 0.01, + "learning_rate": 4.8e-05, + "loss": 1.8903, + "step": 12 + }, + { + "epoch": 0.01, + "learning_rate": 5.2000000000000004e-05, + "loss": 1.729, + "step": 13 + }, + { + "epoch": 0.01, + "learning_rate": 5.6000000000000006e-05, + "loss": 2.0379, + "step": 14 + }, + { + "epoch": 0.01, + "learning_rate": 6e-05, + "loss": 1.9539, + "step": 15 + }, + { + "epoch": 0.01, + "learning_rate": 6.400000000000001e-05, + "loss": 2.0113, + "step": 16 + }, + { + "epoch": 0.01, + "learning_rate": 6.800000000000001e-05, + "loss": 1.6799, + "step": 17 + }, + { + "epoch": 0.01, + "learning_rate": 7.2e-05, + "loss": 2.0281, + "step": 18 + }, + { + "epoch": 0.01, + "learning_rate": 7.6e-05, + "loss": 1.8322, + "step": 19 + }, + { + "epoch": 0.01, + "learning_rate": 8e-05, + "loss": 1.8084, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 8.4e-05, + "loss": 1.9343, + "step": 21 + }, + { + "epoch": 0.01, + "learning_rate": 8.800000000000001e-05, + "loss": 1.9195, + "step": 22 + }, + { + "epoch": 0.01, + "learning_rate": 9.200000000000001e-05, + "loss": 1.8495, + "step": 23 + }, + { + "epoch": 0.01, + "learning_rate": 9.6e-05, + "loss": 1.8587, + "step": 24 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001, + "loss": 1.814, + "step": 25 + }, + { + "epoch": 0.01, + "learning_rate": 0.00010400000000000001, + "loss": 1.824, + "step": 26 + }, + { + "epoch": 0.01, + "learning_rate": 0.00010800000000000001, + "loss": 1.7464, + "step": 27 + }, + { + "epoch": 0.01, + "learning_rate": 0.00011200000000000001, + "loss": 1.8182, + "step": 28 + }, + { + "epoch": 0.01, + "learning_rate": 0.000116, + "loss": 1.6595, + "step": 29 + }, + { + "epoch": 0.01, + "learning_rate": 0.00012, + "loss": 1.8015, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 0.000124, + "loss": 1.825, + "step": 31 + }, + { + "epoch": 0.02, + "learning_rate": 0.00012800000000000002, + "loss": 1.8051, + "step": 32 + }, + { + "epoch": 0.02, + "learning_rate": 0.000132, + "loss": 1.7185, + "step": 33 + }, + { + "epoch": 0.02, + "learning_rate": 0.00013600000000000003, + "loss": 1.7766, + "step": 34 + }, + { + "epoch": 0.02, + "learning_rate": 0.00014, + "loss": 1.7529, + "step": 35 + }, + { + "epoch": 0.02, + "learning_rate": 0.000144, + "loss": 1.6643, + "step": 36 + }, + { + "epoch": 0.02, + "learning_rate": 0.000148, + "loss": 1.8265, + "step": 37 + }, + { + "epoch": 0.02, + "learning_rate": 0.000152, + "loss": 1.6598, + "step": 38 + }, + { + "epoch": 0.02, + "learning_rate": 0.00015600000000000002, + "loss": 1.5992, + "step": 39 + }, + { + "epoch": 0.02, + "learning_rate": 0.00016, + "loss": 1.9037, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 0.000164, + "loss": 1.7461, + "step": 41 + }, + { + "epoch": 0.02, + "learning_rate": 0.000168, + "loss": 1.7369, + "step": 42 + }, + { + "epoch": 0.02, + "learning_rate": 0.000172, + "loss": 1.78, + "step": 43 + }, + { + "epoch": 0.02, + "learning_rate": 0.00017600000000000002, + "loss": 1.8073, + "step": 44 + }, + { + "epoch": 0.02, + "learning_rate": 0.00018, + "loss": 1.6786, + "step": 45 + }, + { + "epoch": 0.02, + "learning_rate": 0.00018400000000000003, + "loss": 1.8868, + "step": 46 + }, + { + "epoch": 0.02, + "learning_rate": 0.000188, + "loss": 1.9341, + "step": 47 + }, + { + "epoch": 0.02, + "learning_rate": 0.000192, + "loss": 1.5922, + "step": 48 + }, + { + "epoch": 0.02, + "learning_rate": 0.000196, + "loss": 1.8224, + "step": 49 + }, + { + "epoch": 0.02, + "learning_rate": 0.0002, + "loss": 1.8133, + "step": 50 + }, + { + "epoch": 0.03, + "learning_rate": 0.00020400000000000003, + "loss": 1.8373, + "step": 51 + }, + { + "epoch": 0.03, + "learning_rate": 0.00020800000000000001, + "loss": 1.5649, + "step": 52 + }, + { + "epoch": 0.03, + "learning_rate": 0.00021200000000000003, + "loss": 1.7079, + "step": 53 + }, + { + "epoch": 0.03, + "learning_rate": 0.00021600000000000002, + "loss": 1.7249, + "step": 54 + }, + { + "epoch": 0.03, + "learning_rate": 0.00022000000000000003, + "loss": 1.7299, + "step": 55 + }, + { + "epoch": 0.03, + "learning_rate": 0.00022400000000000002, + "loss": 1.7068, + "step": 56 + }, + { + "epoch": 0.03, + "learning_rate": 0.00022799999999999999, + "loss": 1.8436, + "step": 57 + }, + { + "epoch": 0.03, + "learning_rate": 0.000232, + "loss": 1.7897, + "step": 58 + }, + { + "epoch": 0.03, + "learning_rate": 0.000236, + "loss": 1.8142, + "step": 59 + }, + { + "epoch": 0.03, + "learning_rate": 0.00024, + "loss": 1.8176, + "step": 60 + }, + { + "epoch": 0.03, + "learning_rate": 0.000244, + "loss": 1.7742, + "step": 61 + }, + { + "epoch": 0.03, + "learning_rate": 0.000248, + "loss": 1.6512, + "step": 62 + }, + { + "epoch": 0.03, + "learning_rate": 0.000252, + "loss": 1.806, + "step": 63 + }, + { + "epoch": 0.03, + "learning_rate": 0.00025600000000000004, + "loss": 1.8776, + "step": 64 + }, + { + "epoch": 0.03, + "learning_rate": 0.00026000000000000003, + "loss": 1.8309, + "step": 65 + }, + { + "epoch": 0.03, + "learning_rate": 0.000264, + "loss": 1.7217, + "step": 66 + }, + { + "epoch": 0.03, + "learning_rate": 0.000268, + "loss": 1.7578, + "step": 67 + }, + { + "epoch": 0.03, + "learning_rate": 0.00027200000000000005, + "loss": 1.7367, + "step": 68 + }, + { + "epoch": 0.03, + "learning_rate": 0.000276, + "loss": 1.4613, + "step": 69 + }, + { + "epoch": 0.03, + "learning_rate": 0.00028, + "loss": 1.85, + "step": 70 + }, + { + "epoch": 0.04, + "learning_rate": 0.000284, + "loss": 1.7672, + "step": 71 + }, + { + "epoch": 0.04, + "learning_rate": 0.000288, + "loss": 1.6758, + "step": 72 + }, + { + "epoch": 0.04, + "learning_rate": 0.000292, + "loss": 1.6693, + "step": 73 + }, + { + "epoch": 0.04, + "learning_rate": 0.000296, + "loss": 1.6831, + "step": 74 + }, + { + "epoch": 0.04, + "learning_rate": 0.00030000000000000003, + "loss": 1.7338, + "step": 75 + }, + { + "epoch": 0.04, + "learning_rate": 0.000304, + "loss": 1.6858, + "step": 76 + }, + { + "epoch": 0.04, + "learning_rate": 0.000308, + "loss": 1.7453, + "step": 77 + }, + { + "epoch": 0.04, + "learning_rate": 0.00031200000000000005, + "loss": 1.6362, + "step": 78 + }, + { + "epoch": 0.04, + "learning_rate": 0.00031600000000000004, + "loss": 1.875, + "step": 79 + }, + { + "epoch": 0.04, + "learning_rate": 0.00032, + "loss": 1.7411, + "step": 80 + }, + { + "epoch": 0.04, + "learning_rate": 0.000324, + "loss": 1.7638, + "step": 81 + }, + { + "epoch": 0.04, + "learning_rate": 0.000328, + "loss": 1.7488, + "step": 82 + }, + { + "epoch": 0.04, + "learning_rate": 0.000332, + "loss": 1.5624, + "step": 83 + }, + { + "epoch": 0.04, + "learning_rate": 0.000336, + "loss": 1.8976, + "step": 84 + }, + { + "epoch": 0.04, + "learning_rate": 0.00034, + "loss": 1.7016, + "step": 85 + }, + { + "epoch": 0.04, + "learning_rate": 0.000344, + "loss": 1.6986, + "step": 86 + }, + { + "epoch": 0.04, + "learning_rate": 0.000348, + "loss": 1.8508, + "step": 87 + }, + { + "epoch": 0.04, + "learning_rate": 0.00035200000000000005, + "loss": 1.7049, + "step": 88 + }, + { + "epoch": 0.04, + "learning_rate": 0.00035600000000000003, + "loss": 1.7912, + "step": 89 + }, + { + "epoch": 0.04, + "learning_rate": 0.00036, + "loss": 1.6776, + "step": 90 + }, + { + "epoch": 0.05, + "learning_rate": 0.000364, + "loss": 1.7725, + "step": 91 + }, + { + "epoch": 0.05, + "learning_rate": 0.00036800000000000005, + "loss": 1.734, + "step": 92 + }, + { + "epoch": 0.05, + "learning_rate": 0.00037200000000000004, + "loss": 1.8326, + "step": 93 + }, + { + "epoch": 0.05, + "learning_rate": 0.000376, + "loss": 1.7755, + "step": 94 + }, + { + "epoch": 0.05, + "learning_rate": 0.00038, + "loss": 1.5972, + "step": 95 + }, + { + "epoch": 0.05, + "learning_rate": 0.000384, + "loss": 1.7307, + "step": 96 + }, + { + "epoch": 0.05, + "learning_rate": 0.000388, + "loss": 1.7856, + "step": 97 + }, + { + "epoch": 0.05, + "learning_rate": 0.000392, + "loss": 1.7529, + "step": 98 + }, + { + "epoch": 0.05, + "learning_rate": 0.00039600000000000003, + "loss": 1.5194, + "step": 99 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004, + "loss": 1.7358, + "step": 100 + }, + { + "epoch": 0.05, + "learning_rate": 0.00039999972689137685, + "loss": 1.8458, + "step": 101 + }, + { + "epoch": 0.05, + "learning_rate": 0.00039999890756625326, + "loss": 1.7214, + "step": 102 + }, + { + "epoch": 0.05, + "learning_rate": 0.0003999975420268669, + "loss": 1.9563, + "step": 103 + }, + { + "epoch": 0.05, + "learning_rate": 0.0003999956302769471, + "loss": 1.9295, + "step": 104 + }, + { + "epoch": 0.05, + "learning_rate": 0.0003999931723217151, + "loss": 1.7522, + "step": 105 + }, + { + "epoch": 0.05, + "learning_rate": 0.0003999901681678838, + "loss": 1.6478, + "step": 106 + }, + { + "epoch": 0.05, + "learning_rate": 0.00039998661782365765, + "loss": 1.7303, + "step": 107 + }, + { + "epoch": 0.05, + "learning_rate": 0.00039998252129873314, + "loss": 1.7452, + "step": 108 + }, + { + "epoch": 0.05, + "learning_rate": 0.00039997787860429813, + "loss": 1.6586, + "step": 109 + }, + { + "epoch": 0.05, + "learning_rate": 0.0003999726897530322, + "loss": 1.5898, + "step": 110 + }, + { + "epoch": 0.06, + "learning_rate": 0.0003999669547591067, + "loss": 1.5924, + "step": 111 + }, + { + "epoch": 0.06, + "learning_rate": 0.0003999606736381842, + "loss": 1.5336, + "step": 112 + }, + { + "epoch": 0.06, + "learning_rate": 0.000399953846407419, + "loss": 1.6407, + "step": 113 + }, + { + "epoch": 0.06, + "learning_rate": 0.0003999464730854571, + "loss": 1.5072, + "step": 114 + }, + { + "epoch": 0.06, + "learning_rate": 0.00039993855369243534, + "loss": 1.8297, + "step": 115 + }, + { + "epoch": 0.06, + "learning_rate": 0.00039993008824998246, + "loss": 1.8099, + "step": 116 + }, + { + "epoch": 0.06, + "learning_rate": 0.0003999210767812183, + "loss": 1.5861, + "step": 117 + }, + { + "epoch": 0.06, + "learning_rate": 0.0003999115193107539, + "loss": 1.7694, + "step": 118 + }, + { + "epoch": 0.06, + "learning_rate": 0.0003999014158646916, + "loss": 1.8015, + "step": 119 + }, + { + "epoch": 0.06, + "learning_rate": 0.00039989076647062473, + "loss": 1.7514, + "step": 120 + }, + { + "epoch": 0.06, + "learning_rate": 0.0003998795711576378, + "loss": 1.8225, + "step": 121 + }, + { + "epoch": 0.06, + "learning_rate": 0.00039986782995630603, + "loss": 1.7047, + "step": 122 + }, + { + "epoch": 0.06, + "learning_rate": 0.00039985554289869574, + "loss": 1.5168, + "step": 123 + }, + { + "epoch": 0.06, + "learning_rate": 0.00039984271001836395, + "loss": 1.8128, + "step": 124 + }, + { + "epoch": 0.06, + "learning_rate": 0.0003998293313503583, + "loss": 1.8507, + "step": 125 + }, + { + "epoch": 0.06, + "learning_rate": 0.00039981540693121716, + "loss": 1.6675, + "step": 126 + }, + { + "epoch": 0.06, + "learning_rate": 0.0003998009367989693, + "loss": 1.6252, + "step": 127 + }, + { + "epoch": 0.06, + "learning_rate": 0.00039978592099313386, + "loss": 1.75, + "step": 128 + }, + { + "epoch": 0.06, + "learning_rate": 0.00039977035955472034, + "loss": 1.4685, + "step": 129 + }, + { + "epoch": 0.06, + "learning_rate": 0.0003997542525262284, + "loss": 1.6629, + "step": 130 + }, + { + "epoch": 0.07, + "learning_rate": 0.0003997375999516476, + "loss": 1.8121, + "step": 131 + }, + { + "epoch": 0.07, + "learning_rate": 0.0003997204018764577, + "loss": 1.8056, + "step": 132 + }, + { + "epoch": 0.07, + "learning_rate": 0.000399702658347628, + "loss": 1.6775, + "step": 133 + }, + { + "epoch": 0.07, + "learning_rate": 0.00039968436941361773, + "loss": 1.6369, + "step": 134 + }, + { + "epoch": 0.07, + "learning_rate": 0.0003996655351243755, + "loss": 1.6055, + "step": 135 + }, + { + "epoch": 0.07, + "learning_rate": 0.0003996461555313393, + "loss": 1.636, + "step": 136 + }, + { + "epoch": 0.07, + "learning_rate": 0.0003996262306874366, + "loss": 1.739, + "step": 137 + }, + { + "epoch": 0.07, + "learning_rate": 0.0003996057606470837, + "loss": 1.561, + "step": 138 + }, + { + "epoch": 0.07, + "learning_rate": 0.00039958474546618626, + "loss": 1.6892, + "step": 139 + }, + { + "epoch": 0.07, + "learning_rate": 0.00039956318520213837, + "loss": 1.8357, + "step": 140 + }, + { + "epoch": 0.07, + "learning_rate": 0.0003995410799138231, + "loss": 1.8147, + "step": 141 + }, + { + "epoch": 0.07, + "learning_rate": 0.00039951842966161176, + "loss": 1.6856, + "step": 142 + }, + { + "epoch": 0.07, + "learning_rate": 0.0003994952345073643, + "loss": 1.6191, + "step": 143 + }, + { + "epoch": 0.07, + "learning_rate": 0.0003994714945144286, + "loss": 1.5254, + "step": 144 + }, + { + "epoch": 0.07, + "learning_rate": 0.0003994472097476406, + "loss": 1.5864, + "step": 145 + }, + { + "epoch": 0.07, + "learning_rate": 0.0003994223802733241, + "loss": 1.7604, + "step": 146 + }, + { + "epoch": 0.07, + "learning_rate": 0.0003993970061592906, + "loss": 1.7338, + "step": 147 + }, + { + "epoch": 0.07, + "learning_rate": 0.00039937108747483893, + "loss": 1.8078, + "step": 148 + }, + { + "epoch": 0.07, + "learning_rate": 0.0003993446242907553, + "loss": 1.6927, + "step": 149 + }, + { + "epoch": 0.07, + "learning_rate": 0.00039931761667931287, + "loss": 1.6732, + "step": 150 + }, + { + "epoch": 0.08, + "learning_rate": 0.00039929006471427187, + "loss": 1.5369, + "step": 151 + }, + { + "epoch": 0.08, + "learning_rate": 0.00039926196847087905, + "loss": 1.5529, + "step": 152 + }, + { + "epoch": 0.08, + "learning_rate": 0.0003992333280258676, + "loss": 1.6886, + "step": 153 + }, + { + "epoch": 0.08, + "learning_rate": 0.0003992041434574572, + "loss": 1.7383, + "step": 154 + }, + { + "epoch": 0.08, + "learning_rate": 0.0003991744148453532, + "loss": 1.7943, + "step": 155 + }, + { + "epoch": 0.08, + "learning_rate": 0.0003991441422707472, + "loss": 1.6811, + "step": 156 + }, + { + "epoch": 0.08, + "learning_rate": 0.00039911332581631613, + "loss": 1.7442, + "step": 157 + }, + { + "epoch": 0.08, + "learning_rate": 0.0003990819655662224, + "loss": 1.5712, + "step": 158 + }, + { + "epoch": 0.08, + "learning_rate": 0.00039905006160611357, + "loss": 1.5422, + "step": 159 + }, + { + "epoch": 0.08, + "learning_rate": 0.00039901761402312205, + "loss": 1.6689, + "step": 160 + }, + { + "epoch": 0.08, + "learning_rate": 0.000398984622905865, + "loss": 1.8412, + "step": 161 + }, + { + "epoch": 0.08, + "learning_rate": 0.00039895108834444405, + "loss": 1.7318, + "step": 162 + }, + { + "epoch": 0.08, + "learning_rate": 0.00039891701043044496, + "loss": 1.4819, + "step": 163 + }, + { + "epoch": 0.08, + "learning_rate": 0.0003988823892569375, + "loss": 1.4612, + "step": 164 + }, + { + "epoch": 0.08, + "learning_rate": 0.00039884722491847504, + "loss": 1.5737, + "step": 165 + }, + { + "epoch": 0.08, + "learning_rate": 0.0003988115175110943, + "loss": 1.7666, + "step": 166 + }, + { + "epoch": 0.08, + "learning_rate": 0.0003987752671323155, + "loss": 1.4879, + "step": 167 + }, + { + "epoch": 0.08, + "learning_rate": 0.0003987384738811414, + "loss": 1.7151, + "step": 168 + }, + { + "epoch": 0.08, + "learning_rate": 0.0003987011378580576, + "loss": 1.751, + "step": 169 + }, + { + "epoch": 0.08, + "learning_rate": 0.00039866325916503205, + "loss": 1.7216, + "step": 170 + }, + { + "epoch": 0.09, + "learning_rate": 0.0003986248379055146, + "loss": 1.5863, + "step": 171 + }, + { + "epoch": 0.09, + "learning_rate": 0.00039858587418443715, + "loss": 1.793, + "step": 172 + }, + { + "epoch": 0.09, + "learning_rate": 0.00039854636810821286, + "loss": 1.6011, + "step": 173 + }, + { + "epoch": 0.09, + "learning_rate": 0.0003985063197847363, + "loss": 1.7418, + "step": 174 + }, + { + "epoch": 0.09, + "learning_rate": 0.0003984657293233829, + "loss": 1.6862, + "step": 175 + }, + { + "epoch": 0.09, + "learning_rate": 0.0003984245968350087, + "loss": 1.6857, + "step": 176 + }, + { + "epoch": 0.09, + "learning_rate": 0.00039838292243195013, + "loss": 1.7442, + "step": 177 + }, + { + "epoch": 0.09, + "learning_rate": 0.0003983407062280234, + "loss": 1.9183, + "step": 178 + }, + { + "epoch": 0.09, + "learning_rate": 0.0003982979483385249, + "loss": 1.592, + "step": 179 + }, + { + "epoch": 0.09, + "learning_rate": 0.0003982546488802298, + "loss": 1.777, + "step": 180 + }, + { + "epoch": 0.09, + "learning_rate": 0.00039821080797139283, + "loss": 1.7078, + "step": 181 + }, + { + "epoch": 0.09, + "learning_rate": 0.0003981664257317472, + "loss": 1.7803, + "step": 182 + }, + { + "epoch": 0.09, + "learning_rate": 0.00039812150228250474, + "loss": 1.7864, + "step": 183 + }, + { + "epoch": 0.09, + "learning_rate": 0.0003980760377463552, + "loss": 1.7185, + "step": 184 + }, + { + "epoch": 0.09, + "learning_rate": 0.0003980300322474662, + "loss": 1.7885, + "step": 185 + }, + { + "epoch": 0.09, + "learning_rate": 0.00039798348591148263, + "loss": 1.8994, + "step": 186 + }, + { + "epoch": 0.09, + "learning_rate": 0.00039793639886552665, + "loss": 1.713, + "step": 187 + }, + { + "epoch": 0.09, + "learning_rate": 0.000397888771238197, + "loss": 1.5899, + "step": 188 + }, + { + "epoch": 0.09, + "learning_rate": 0.0003978406031595688, + "loss": 1.6652, + "step": 189 + }, + { + "epoch": 0.09, + "learning_rate": 0.0003977918947611932, + "loss": 1.5935, + "step": 190 + }, + { + "epoch": 0.1, + "learning_rate": 0.0003977426461760972, + "loss": 1.6655, + "step": 191 + }, + { + "epoch": 0.1, + "learning_rate": 0.00039769285753878277, + "loss": 1.8054, + "step": 192 + }, + { + "epoch": 0.1, + "learning_rate": 0.000397642528985227, + "loss": 1.6275, + "step": 193 + }, + { + "epoch": 0.1, + "learning_rate": 0.00039759166065288167, + "loss": 1.7446, + "step": 194 + }, + { + "epoch": 0.1, + "learning_rate": 0.00039754025268067233, + "loss": 1.6488, + "step": 195 + }, + { + "epoch": 0.1, + "learning_rate": 0.00039748830520899874, + "loss": 1.7123, + "step": 196 + }, + { + "epoch": 0.1, + "learning_rate": 0.0003974358183797339, + "loss": 1.6823, + "step": 197 + }, + { + "epoch": 0.1, + "learning_rate": 0.00039738279233622386, + "loss": 1.4115, + "step": 198 + }, + { + "epoch": 0.1, + "learning_rate": 0.00039732922722328725, + "loss": 1.3705, + "step": 199 + }, + { + "epoch": 0.1, + "learning_rate": 0.00039727512318721514, + "loss": 1.776, + "step": 200 + }, + { + "epoch": 0.1, + "learning_rate": 0.00039722048037577024, + "loss": 1.7096, + "step": 201 + }, + { + "epoch": 0.1, + "learning_rate": 0.0003971652989381868, + "loss": 1.5852, + "step": 202 + }, + { + "epoch": 0.1, + "learning_rate": 0.0003971095790251701, + "loss": 1.8677, + "step": 203 + }, + { + "epoch": 0.1, + "learning_rate": 0.00039705332078889596, + "loss": 1.4359, + "step": 204 + }, + { + "epoch": 0.1, + "learning_rate": 0.00039699652438301053, + "loss": 1.7743, + "step": 205 + }, + { + "epoch": 0.1, + "learning_rate": 0.00039693918996262974, + "loss": 1.6298, + "step": 206 + }, + { + "epoch": 0.1, + "learning_rate": 0.0003968813176843387, + "loss": 1.7088, + "step": 207 + }, + { + "epoch": 0.1, + "learning_rate": 0.0003968229077061917, + "loss": 1.4698, + "step": 208 + }, + { + "epoch": 0.1, + "learning_rate": 0.00039676396018771147, + "loss": 1.6022, + "step": 209 + }, + { + "epoch": 0.1, + "learning_rate": 0.0003967044752898886, + "loss": 1.6729, + "step": 210 + }, + { + "epoch": 0.11, + "learning_rate": 0.0003966444531751816, + "loss": 1.5742, + "step": 211 + }, + { + "epoch": 0.11, + "learning_rate": 0.000396583894007516, + "loss": 1.437, + "step": 212 + }, + { + "epoch": 0.11, + "learning_rate": 0.0003965227979522842, + "loss": 1.615, + "step": 213 + }, + { + "epoch": 0.11, + "learning_rate": 0.00039646116517634463, + "loss": 1.6582, + "step": 214 + }, + { + "epoch": 0.11, + "learning_rate": 0.00039639899584802184, + "loss": 1.5987, + "step": 215 + }, + { + "epoch": 0.11, + "learning_rate": 0.00039633629013710554, + "loss": 1.8452, + "step": 216 + }, + { + "epoch": 0.11, + "learning_rate": 0.00039627304821485056, + "loss": 1.6249, + "step": 217 + }, + { + "epoch": 0.11, + "learning_rate": 0.0003962092702539759, + "loss": 1.7399, + "step": 218 + }, + { + "epoch": 0.11, + "learning_rate": 0.0003961449564286648, + "loss": 1.5969, + "step": 219 + }, + { + "epoch": 0.11, + "learning_rate": 0.00039608010691456367, + "loss": 1.5766, + "step": 220 + }, + { + "epoch": 0.11, + "learning_rate": 0.00039601472188878235, + "loss": 1.6199, + "step": 221 + }, + { + "epoch": 0.11, + "learning_rate": 0.0003959488015298929, + "loss": 1.773, + "step": 222 + }, + { + "epoch": 0.11, + "learning_rate": 0.00039588234601792944, + "loss": 1.8759, + "step": 223 + }, + { + "epoch": 0.11, + "learning_rate": 0.0003958153555343878, + "loss": 1.7133, + "step": 224 + }, + { + "epoch": 0.11, + "learning_rate": 0.00039574783026222475, + "loss": 1.5905, + "step": 225 + }, + { + "epoch": 0.11, + "learning_rate": 0.00039567977038585756, + "loss": 1.7087, + "step": 226 + }, + { + "epoch": 0.11, + "learning_rate": 0.0003956111760911637, + "loss": 1.5135, + "step": 227 + }, + { + "epoch": 0.11, + "learning_rate": 0.00039554204756548, + "loss": 1.4835, + "step": 228 + }, + { + "epoch": 0.11, + "learning_rate": 0.00039547238499760255, + "loss": 1.7274, + "step": 229 + }, + { + "epoch": 0.11, + "learning_rate": 0.00039540218857778576, + "loss": 1.568, + "step": 230 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003953314584977421, + "loss": 1.5526, + "step": 231 + }, + { + "epoch": 0.12, + "learning_rate": 0.00039526019495064155, + "loss": 1.7895, + "step": 232 + }, + { + "epoch": 0.12, + "learning_rate": 0.000395188398131111, + "loss": 1.6583, + "step": 233 + }, + { + "epoch": 0.12, + "learning_rate": 0.00039511606823523375, + "loss": 1.7053, + "step": 234 + }, + { + "epoch": 0.12, + "learning_rate": 0.00039504320546054894, + "loss": 1.709, + "step": 235 + }, + { + "epoch": 0.12, + "learning_rate": 0.00039496981000605117, + "loss": 1.6779, + "step": 236 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003948958820721897, + "loss": 1.6184, + "step": 237 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003948214218608681, + "loss": 1.6131, + "step": 238 + }, + { + "epoch": 0.12, + "learning_rate": 0.00039474642957544365, + "loss": 1.6341, + "step": 239 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003946709054207267, + "loss": 1.6489, + "step": 240 + }, + { + "epoch": 0.12, + "learning_rate": 0.00039459484960298026, + "loss": 1.6761, + "step": 241 + }, + { + "epoch": 0.12, + "learning_rate": 0.00039451826232991935, + "loss": 1.5096, + "step": 242 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003944411438107104, + "loss": 1.736, + "step": 243 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003943634942559708, + "loss": 1.7403, + "step": 244 + }, + { + "epoch": 0.12, + "learning_rate": 0.00039428531387776804, + "loss": 1.554, + "step": 245 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003942066028896195, + "loss": 1.8338, + "step": 246 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003941273615064918, + "loss": 1.7811, + "step": 247 + }, + { + "epoch": 0.12, + "learning_rate": 0.00039404758994479984, + "loss": 1.6816, + "step": 248 + }, + { + "epoch": 0.12, + "learning_rate": 0.00039396728842240673, + "loss": 1.6188, + "step": 249 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003938864571586229, + "loss": 1.6212, + "step": 250 + }, + { + "epoch": 0.13, + "learning_rate": 0.00039380509637420533, + "loss": 1.8131, + "step": 251 + }, + { + "epoch": 0.13, + "learning_rate": 0.0003937232062913575, + "loss": 1.7621, + "step": 252 + }, + { + "epoch": 0.13, + "learning_rate": 0.00039364078713372816, + "loss": 1.7201, + "step": 253 + }, + { + "epoch": 0.13, + "learning_rate": 0.00039355783912641126, + "loss": 1.6273, + "step": 254 + }, + { + "epoch": 0.13, + "learning_rate": 0.0003934743624959449, + "loss": 1.812, + "step": 255 + }, + { + "epoch": 0.13, + "learning_rate": 0.0003933903574703109, + "loss": 1.7049, + "step": 256 + }, + { + "epoch": 0.13, + "learning_rate": 0.0003933058242789344, + "loss": 1.7132, + "step": 257 + }, + { + "epoch": 0.13, + "learning_rate": 0.00039322076315268266, + "loss": 1.5422, + "step": 258 + }, + { + "epoch": 0.13, + "learning_rate": 0.000393135174323865, + "loss": 1.6279, + "step": 259 + }, + { + "epoch": 0.13, + "learning_rate": 0.0003930490580262319, + "loss": 1.5107, + "step": 260 + }, + { + "epoch": 0.13, + "learning_rate": 0.00039296241449497443, + "loss": 1.6808, + "step": 261 + }, + { + "epoch": 0.13, + "learning_rate": 0.00039287524396672345, + "loss": 1.8189, + "step": 262 + }, + { + "epoch": 0.13, + "learning_rate": 0.00039278754667954936, + "loss": 1.6716, + "step": 263 + }, + { + "epoch": 0.13, + "learning_rate": 0.00039269932287296083, + "loss": 1.7371, + "step": 264 + }, + { + "epoch": 0.13, + "learning_rate": 0.00039261057278790483, + "loss": 1.5354, + "step": 265 + }, + { + "epoch": 0.13, + "learning_rate": 0.0003925212966667654, + "loss": 1.5848, + "step": 266 + }, + { + "epoch": 0.13, + "learning_rate": 0.0003924314947533633, + "loss": 1.6857, + "step": 267 + }, + { + "epoch": 0.13, + "learning_rate": 0.00039234116729295536, + "loss": 1.8029, + "step": 268 + }, + { + "epoch": 0.13, + "learning_rate": 0.00039225031453223367, + "loss": 1.6986, + "step": 269 + }, + { + "epoch": 0.13, + "learning_rate": 0.00039215893671932497, + "loss": 1.5044, + "step": 270 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003920670341037899, + "loss": 1.6712, + "step": 271 + }, + { + "epoch": 0.14, + "learning_rate": 0.00039197460693662245, + "loss": 1.5513, + "step": 272 + }, + { + "epoch": 0.14, + "learning_rate": 0.00039188165547024916, + "loss": 1.4925, + "step": 273 + }, + { + "epoch": 0.14, + "learning_rate": 0.00039178817995852856, + "loss": 1.6916, + "step": 274 + }, + { + "epoch": 0.14, + "learning_rate": 0.00039169418065675024, + "loss": 1.7875, + "step": 275 + }, + { + "epoch": 0.14, + "learning_rate": 0.00039159965782163453, + "loss": 1.5011, + "step": 276 + }, + { + "epoch": 0.14, + "learning_rate": 0.00039150461171133126, + "loss": 1.6323, + "step": 277 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003914090425854197, + "loss": 1.6161, + "step": 278 + }, + { + "epoch": 0.14, + "learning_rate": 0.00039131295070490727, + "loss": 1.7276, + "step": 279 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003912163363322293, + "loss": 1.6082, + "step": 280 + }, + { + "epoch": 0.14, + "learning_rate": 0.000391119199731248, + "loss": 1.732, + "step": 281 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003910215411672516, + "loss": 1.6163, + "step": 282 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003909233609069542, + "loss": 1.8471, + "step": 283 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003908246592184946, + "loss": 1.6584, + "step": 284 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003907254363714355, + "loss": 1.6216, + "step": 285 + }, + { + "epoch": 0.14, + "learning_rate": 0.00039062569263676307, + "loss": 1.6343, + "step": 286 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003905254282868861, + "loss": 1.682, + "step": 287 + }, + { + "epoch": 0.14, + "learning_rate": 0.00039042464359563523, + "loss": 1.7296, + "step": 288 + }, + { + "epoch": 0.14, + "learning_rate": 0.00039032333883826206, + "loss": 1.5631, + "step": 289 + }, + { + "epoch": 0.14, + "learning_rate": 0.00039022151429143865, + "loss": 1.7181, + "step": 290 + }, + { + "epoch": 0.15, + "learning_rate": 0.00039011917023325655, + "loss": 1.7264, + "step": 291 + }, + { + "epoch": 0.15, + "learning_rate": 0.0003900163069432263, + "loss": 1.815, + "step": 292 + }, + { + "epoch": 0.15, + "learning_rate": 0.00038991292470227636, + "loss": 1.5588, + "step": 293 + }, + { + "epoch": 0.15, + "learning_rate": 0.00038980902379275257, + "loss": 1.7129, + "step": 294 + }, + { + "epoch": 0.15, + "learning_rate": 0.00038970460449841725, + "loss": 1.6191, + "step": 295 + }, + { + "epoch": 0.15, + "learning_rate": 0.0003895996671044485, + "loss": 1.6827, + "step": 296 + }, + { + "epoch": 0.15, + "learning_rate": 0.0003894942118974394, + "loss": 1.5777, + "step": 297 + }, + { + "epoch": 0.15, + "learning_rate": 0.0003893882391653973, + "loss": 1.664, + "step": 298 + }, + { + "epoch": 0.15, + "learning_rate": 0.0003892817491977427, + "loss": 1.7806, + "step": 299 + }, + { + "epoch": 0.15, + "learning_rate": 0.000389174742285309, + "loss": 1.6249, + "step": 300 + }, + { + "epoch": 0.15, + "learning_rate": 0.0003890672187203413, + "loss": 1.7223, + "step": 301 + }, + { + "epoch": 0.15, + "learning_rate": 0.0003889591787964957, + "loss": 1.6471, + "step": 302 + }, + { + "epoch": 0.15, + "learning_rate": 0.0003888506228088385, + "loss": 1.5887, + "step": 303 + }, + { + "epoch": 0.15, + "learning_rate": 0.0003887415510538456, + "loss": 1.7015, + "step": 304 + }, + { + "epoch": 0.15, + "learning_rate": 0.00038863196382940123, + "loss": 1.6102, + "step": 305 + }, + { + "epoch": 0.15, + "learning_rate": 0.00038852186143479764, + "loss": 1.5874, + "step": 306 + }, + { + "epoch": 0.15, + "learning_rate": 0.0003884112441707339, + "loss": 1.6151, + "step": 307 + }, + { + "epoch": 0.15, + "learning_rate": 0.00038830011233931526, + "loss": 1.739, + "step": 308 + }, + { + "epoch": 0.15, + "learning_rate": 0.0003881884662440525, + "loss": 1.7366, + "step": 309 + }, + { + "epoch": 0.15, + "learning_rate": 0.00038807630618986063, + "loss": 1.6522, + "step": 310 + }, + { + "epoch": 0.16, + "learning_rate": 0.0003879636324830584, + "loss": 1.71, + "step": 311 + }, + { + "epoch": 0.16, + "learning_rate": 0.0003878504454313675, + "loss": 1.7334, + "step": 312 + }, + { + "epoch": 0.16, + "learning_rate": 0.00038773674534391144, + "loss": 1.613, + "step": 313 + }, + { + "epoch": 0.16, + "learning_rate": 0.000387622532531215, + "loss": 1.7322, + "step": 314 + }, + { + "epoch": 0.16, + "learning_rate": 0.00038750780730520325, + "loss": 1.5234, + "step": 315 + }, + { + "epoch": 0.16, + "learning_rate": 0.00038739256997920063, + "loss": 1.5423, + "step": 316 + }, + { + "epoch": 0.16, + "learning_rate": 0.0003872768208679302, + "loss": 1.6155, + "step": 317 + }, + { + "epoch": 0.16, + "learning_rate": 0.00038716056028751284, + "loss": 1.6176, + "step": 318 + }, + { + "epoch": 0.16, + "learning_rate": 0.00038704378855546615, + "loss": 1.696, + "step": 319 + }, + { + "epoch": 0.16, + "learning_rate": 0.00038692650599070393, + "loss": 1.5135, + "step": 320 + }, + { + "epoch": 0.16, + "learning_rate": 0.0003868087129135348, + "loss": 1.53, + "step": 321 + }, + { + "epoch": 0.16, + "learning_rate": 0.0003866904096456619, + "loss": 1.6036, + "step": 322 + }, + { + "epoch": 0.16, + "learning_rate": 0.00038657159651018163, + "loss": 1.8304, + "step": 323 + }, + { + "epoch": 0.16, + "learning_rate": 0.0003864522738315829, + "loss": 1.6235, + "step": 324 + }, + { + "epoch": 0.16, + "learning_rate": 0.0003863324419357463, + "loss": 1.5371, + "step": 325 + }, + { + "epoch": 0.16, + "learning_rate": 0.000386212101149943, + "loss": 1.6708, + "step": 326 + }, + { + "epoch": 0.16, + "learning_rate": 0.00038609125180283414, + "loss": 1.6137, + "step": 327 + }, + { + "epoch": 0.16, + "learning_rate": 0.00038596989422446954, + "loss": 1.546, + "step": 328 + }, + { + "epoch": 0.16, + "learning_rate": 0.0003858480287462874, + "loss": 1.5953, + "step": 329 + }, + { + "epoch": 0.16, + "learning_rate": 0.00038572565570111283, + "loss": 1.7084, + "step": 330 + }, + { + "epoch": 0.17, + "learning_rate": 0.0003856027754231571, + "loss": 1.6816, + "step": 331 + }, + { + "epoch": 0.17, + "learning_rate": 0.00038547938824801684, + "loss": 1.5167, + "step": 332 + }, + { + "epoch": 0.17, + "learning_rate": 0.00038535549451267315, + "loss": 1.4679, + "step": 333 + }, + { + "epoch": 0.17, + "learning_rate": 0.0003852310945554904, + "loss": 1.6548, + "step": 334 + }, + { + "epoch": 0.17, + "learning_rate": 0.0003851061887162156, + "loss": 1.9461, + "step": 335 + }, + { + "epoch": 0.17, + "learning_rate": 0.0003849807773359774, + "loss": 1.782, + "step": 336 + }, + { + "epoch": 0.17, + "learning_rate": 0.0003848548607572852, + "loss": 1.7077, + "step": 337 + }, + { + "epoch": 0.17, + "learning_rate": 0.0003847284393240279, + "loss": 1.5953, + "step": 338 + }, + { + "epoch": 0.17, + "learning_rate": 0.00038460151338147333, + "loss": 1.5885, + "step": 339 + }, + { + "epoch": 0.17, + "learning_rate": 0.00038447408327626733, + "loss": 1.5709, + "step": 340 + }, + { + "epoch": 0.17, + "learning_rate": 0.0003843461493564323, + "loss": 1.7463, + "step": 341 + }, + { + "epoch": 0.17, + "learning_rate": 0.00038421771197136696, + "loss": 1.7452, + "step": 342 + }, + { + "epoch": 0.17, + "learning_rate": 0.00038408877147184483, + "loss": 1.4542, + "step": 343 + }, + { + "epoch": 0.17, + "learning_rate": 0.00038395932821001354, + "loss": 1.4835, + "step": 344 + }, + { + "epoch": 0.17, + "learning_rate": 0.00038382938253939385, + "loss": 1.6311, + "step": 345 + }, + { + "epoch": 0.17, + "learning_rate": 0.00038369893481487847, + "loss": 1.6245, + "step": 346 + }, + { + "epoch": 0.17, + "learning_rate": 0.00038356798539273146, + "loss": 1.6264, + "step": 347 + }, + { + "epoch": 0.17, + "learning_rate": 0.00038343653463058705, + "loss": 1.5778, + "step": 348 + }, + { + "epoch": 0.17, + "learning_rate": 0.0003833045828874485, + "loss": 1.6097, + "step": 349 + }, + { + "epoch": 0.17, + "learning_rate": 0.00038317213052368744, + "loss": 1.67, + "step": 350 + }, + { + "epoch": 0.18, + "learning_rate": 0.00038303917790104264, + "loss": 1.5424, + "step": 351 + }, + { + "epoch": 0.18, + "learning_rate": 0.00038290572538261927, + "loss": 1.607, + "step": 352 + }, + { + "epoch": 0.18, + "learning_rate": 0.00038277177333288765, + "loss": 1.6334, + "step": 353 + }, + { + "epoch": 0.18, + "learning_rate": 0.0003826373221176823, + "loss": 1.5178, + "step": 354 + }, + { + "epoch": 0.18, + "learning_rate": 0.0003825023721042012, + "loss": 1.5779, + "step": 355 + }, + { + "epoch": 0.18, + "learning_rate": 0.0003823669236610044, + "loss": 1.5685, + "step": 356 + }, + { + "epoch": 0.18, + "learning_rate": 0.0003822309771580132, + "loss": 1.6973, + "step": 357 + }, + { + "epoch": 0.18, + "learning_rate": 0.00038209453296650944, + "loss": 1.8553, + "step": 358 + }, + { + "epoch": 0.18, + "learning_rate": 0.0003819575914591338, + "loss": 1.6619, + "step": 359 + }, + { + "epoch": 0.18, + "learning_rate": 0.0003818201530098853, + "loss": 1.7002, + "step": 360 + }, + { + "epoch": 0.18, + "learning_rate": 0.0003816822179941204, + "loss": 1.7676, + "step": 361 + }, + { + "epoch": 0.18, + "learning_rate": 0.0003815437867885514, + "loss": 1.5902, + "step": 362 + }, + { + "epoch": 0.18, + "learning_rate": 0.0003814048597712458, + "loss": 1.7975, + "step": 363 + }, + { + "epoch": 0.18, + "learning_rate": 0.0003812654373216254, + "loss": 1.5581, + "step": 364 + }, + { + "epoch": 0.18, + "learning_rate": 0.00038112551982046484, + "loss": 1.7062, + "step": 365 + }, + { + "epoch": 0.18, + "learning_rate": 0.00038098510764989087, + "loss": 1.7171, + "step": 366 + }, + { + "epoch": 0.18, + "learning_rate": 0.0003808442011933814, + "loss": 1.5299, + "step": 367 + }, + { + "epoch": 0.18, + "learning_rate": 0.0003807028008357638, + "loss": 1.5733, + "step": 368 + }, + { + "epoch": 0.18, + "learning_rate": 0.000380560906963215, + "loss": 1.8065, + "step": 369 + }, + { + "epoch": 0.18, + "learning_rate": 0.0003804185199632591, + "loss": 1.4458, + "step": 370 + }, + { + "epoch": 0.19, + "learning_rate": 0.0003802756402247674, + "loss": 1.5408, + "step": 371 + }, + { + "epoch": 0.19, + "learning_rate": 0.00038013226813795686, + "loss": 1.476, + "step": 372 + }, + { + "epoch": 0.19, + "learning_rate": 0.0003799884040943889, + "loss": 1.5259, + "step": 373 + }, + { + "epoch": 0.19, + "learning_rate": 0.00037984404848696873, + "loss": 1.6524, + "step": 374 + }, + { + "epoch": 0.19, + "learning_rate": 0.0003796992017099438, + "loss": 1.6925, + "step": 375 + }, + { + "epoch": 0.19, + "learning_rate": 0.0003795538641589033, + "loss": 1.5929, + "step": 376 + }, + { + "epoch": 0.19, + "learning_rate": 0.0003794080362307766, + "loss": 1.5452, + "step": 377 + }, + { + "epoch": 0.19, + "learning_rate": 0.00037926171832383226, + "loss": 1.6099, + "step": 378 + }, + { + "epoch": 0.19, + "learning_rate": 0.00037911491083767715, + "loss": 1.5971, + "step": 379 + }, + { + "epoch": 0.19, + "learning_rate": 0.00037896761417325524, + "loss": 1.8214, + "step": 380 + }, + { + "epoch": 0.19, + "learning_rate": 0.0003788198287328463, + "loss": 1.4613, + "step": 381 + }, + { + "epoch": 0.19, + "learning_rate": 0.00037867155492006516, + "loss": 1.6455, + "step": 382 + }, + { + "epoch": 0.19, + "learning_rate": 0.00037852279313986044, + "loss": 1.4235, + "step": 383 + }, + { + "epoch": 0.19, + "learning_rate": 0.0003783735437985133, + "loss": 1.6516, + "step": 384 + }, + { + "epoch": 0.19, + "learning_rate": 0.0003782238073036367, + "loss": 1.5719, + "step": 385 + }, + { + "epoch": 0.19, + "learning_rate": 0.00037807358406417374, + "loss": 1.5233, + "step": 386 + }, + { + "epoch": 0.19, + "learning_rate": 0.00037792287449039713, + "loss": 1.8381, + "step": 387 + }, + { + "epoch": 0.19, + "learning_rate": 0.00037777167899390776, + "loss": 1.5918, + "step": 388 + }, + { + "epoch": 0.19, + "learning_rate": 0.0003776199979876335, + "loss": 1.7216, + "step": 389 + }, + { + "epoch": 0.19, + "learning_rate": 0.00037746783188582827, + "loss": 1.8514, + "step": 390 + }, + { + "epoch": 0.2, + "learning_rate": 0.00037731518110407084, + "loss": 1.5811, + "step": 391 + }, + { + "epoch": 0.2, + "learning_rate": 0.00037716204605926367, + "loss": 1.6584, + "step": 392 + }, + { + "epoch": 0.2, + "learning_rate": 0.0003770084271696317, + "loss": 1.5818, + "step": 393 + }, + { + "epoch": 0.2, + "learning_rate": 0.00037685432485472145, + "loss": 1.7268, + "step": 394 + }, + { + "epoch": 0.2, + "learning_rate": 0.0003766997395353995, + "loss": 1.6397, + "step": 395 + }, + { + "epoch": 0.2, + "learning_rate": 0.0003765446716338518, + "loss": 1.7954, + "step": 396 + }, + { + "epoch": 0.2, + "learning_rate": 0.00037638912157358223, + "loss": 1.6724, + "step": 397 + }, + { + "epoch": 0.2, + "learning_rate": 0.00037623308977941124, + "loss": 1.5138, + "step": 398 + }, + { + "epoch": 0.2, + "learning_rate": 0.00037607657667747523, + "loss": 1.6065, + "step": 399 + }, + { + "epoch": 0.2, + "learning_rate": 0.000375919582695225, + "loss": 1.6297, + "step": 400 + }, + { + "epoch": 0.2, + "learning_rate": 0.0003757621082614245, + "loss": 1.5817, + "step": 401 + }, + { + "epoch": 0.2, + "learning_rate": 0.00037560415380615014, + "loss": 1.5774, + "step": 402 + }, + { + "epoch": 0.2, + "learning_rate": 0.00037544571976078913, + "loss": 1.6676, + "step": 403 + }, + { + "epoch": 0.2, + "learning_rate": 0.0003752868065580384, + "loss": 1.7233, + "step": 404 + }, + { + "epoch": 0.2, + "learning_rate": 0.00037512741463190374, + "loss": 1.6582, + "step": 405 + }, + { + "epoch": 0.2, + "learning_rate": 0.0003749675444176983, + "loss": 1.648, + "step": 406 + }, + { + "epoch": 0.2, + "learning_rate": 0.0003748071963520412, + "loss": 1.581, + "step": 407 + }, + { + "epoch": 0.2, + "learning_rate": 0.000374646370872857, + "loss": 1.7211, + "step": 408 + }, + { + "epoch": 0.2, + "learning_rate": 0.00037448506841937393, + "loss": 1.567, + "step": 409 + }, + { + "epoch": 0.2, + "learning_rate": 0.0003743232894321229, + "loss": 1.4671, + "step": 410 + }, + { + "epoch": 0.21, + "learning_rate": 0.00037416103435293616, + "loss": 1.6647, + "step": 411 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003739983036249465, + "loss": 1.5772, + "step": 412 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003738350976925854, + "loss": 1.5484, + "step": 413 + }, + { + "epoch": 0.21, + "learning_rate": 0.00037367141700158247, + "loss": 1.5484, + "step": 414 + }, + { + "epoch": 0.21, + "learning_rate": 0.00037350726199896384, + "loss": 1.6113, + "step": 415 + }, + { + "epoch": 0.21, + "learning_rate": 0.00037334263313305074, + "loss": 1.5764, + "step": 416 + }, + { + "epoch": 0.21, + "learning_rate": 0.000373177530853459, + "loss": 1.6048, + "step": 417 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003730119556110971, + "loss": 1.6771, + "step": 418 + }, + { + "epoch": 0.21, + "learning_rate": 0.00037284590785816534, + "loss": 1.6426, + "step": 419 + }, + { + "epoch": 0.21, + "learning_rate": 0.00037267938804815443, + "loss": 1.6309, + "step": 420 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003725123966358444, + "loss": 1.6995, + "step": 421 + }, + { + "epoch": 0.21, + "learning_rate": 0.00037234493407730307, + "loss": 1.8452, + "step": 422 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003721770008298852, + "loss": 1.6109, + "step": 423 + }, + { + "epoch": 0.21, + "learning_rate": 0.000372008597352231, + "loss": 1.6494, + "step": 424 + }, + { + "epoch": 0.21, + "learning_rate": 0.00037183972410426483, + "loss": 1.7364, + "step": 425 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003716703815471942, + "loss": 1.6014, + "step": 426 + }, + { + "epoch": 0.21, + "learning_rate": 0.00037150057014350796, + "loss": 1.6395, + "step": 427 + }, + { + "epoch": 0.21, + "learning_rate": 0.000371330290356976, + "loss": 1.6497, + "step": 428 + }, + { + "epoch": 0.21, + "learning_rate": 0.000371159542652647, + "loss": 1.4915, + "step": 429 + }, + { + "epoch": 0.21, + "learning_rate": 0.00037098832749684767, + "loss": 1.4835, + "step": 430 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003708166453571813, + "loss": 1.8236, + "step": 431 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003706444967025267, + "loss": 1.5515, + "step": 432 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003704718820030366, + "loss": 1.4684, + "step": 433 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003702988017301368, + "loss": 1.8934, + "step": 434 + }, + { + "epoch": 0.22, + "learning_rate": 0.00037012525635652424, + "loss": 1.7834, + "step": 435 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003699512463561664, + "loss": 1.6533, + "step": 436 + }, + { + "epoch": 0.22, + "learning_rate": 0.00036977677220429963, + "loss": 1.5941, + "step": 437 + }, + { + "epoch": 0.22, + "learning_rate": 0.00036960183437742783, + "loss": 1.6756, + "step": 438 + }, + { + "epoch": 0.22, + "learning_rate": 0.00036942643335332134, + "loss": 1.5123, + "step": 439 + }, + { + "epoch": 0.22, + "learning_rate": 0.00036925056961101537, + "loss": 1.5916, + "step": 440 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003690742436308091, + "loss": 1.4561, + "step": 441 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003688974558942639, + "loss": 1.7021, + "step": 442 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003687202068842024, + "loss": 1.4809, + "step": 443 + }, + { + "epoch": 0.22, + "learning_rate": 0.00036854249708470686, + "loss": 1.4896, + "step": 444 + }, + { + "epoch": 0.22, + "learning_rate": 0.00036836432698111806, + "loss": 1.5072, + "step": 445 + }, + { + "epoch": 0.22, + "learning_rate": 0.000368185697060034, + "loss": 1.5797, + "step": 446 + }, + { + "epoch": 0.22, + "learning_rate": 0.00036800660780930835, + "loss": 1.5629, + "step": 447 + }, + { + "epoch": 0.22, + "learning_rate": 0.00036782705971804923, + "loss": 1.64, + "step": 448 + }, + { + "epoch": 0.22, + "learning_rate": 0.00036764705327661806, + "loss": 1.6466, + "step": 449 + }, + { + "epoch": 0.22, + "learning_rate": 0.00036746658897662793, + "loss": 1.6695, + "step": 450 + }, + { + "epoch": 0.23, + "learning_rate": 0.00036728566731094236, + "loss": 1.8699, + "step": 451 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003671042887736741, + "loss": 1.7128, + "step": 452 + }, + { + "epoch": 0.23, + "learning_rate": 0.00036692245386018353, + "loss": 1.6109, + "step": 453 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003667401630670774, + "loss": 1.5786, + "step": 454 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003665574168922077, + "loss": 1.8162, + "step": 455 + }, + { + "epoch": 0.23, + "learning_rate": 0.00036637421583466995, + "loss": 1.7225, + "step": 456 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003661905603948021, + "loss": 1.7744, + "step": 457 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003660064510741829, + "loss": 1.7345, + "step": 458 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003658218883756308, + "loss": 1.6193, + "step": 459 + }, + { + "epoch": 0.23, + "learning_rate": 0.00036563687280320245, + "loss": 1.5931, + "step": 460 + }, + { + "epoch": 0.23, + "learning_rate": 0.00036545140486219133, + "loss": 1.6595, + "step": 461 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003652654850591264, + "loss": 1.7491, + "step": 462 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003650791139017707, + "loss": 1.4703, + "step": 463 + }, + { + "epoch": 0.23, + "learning_rate": 0.00036489229189911985, + "loss": 1.615, + "step": 464 + }, + { + "epoch": 0.23, + "learning_rate": 0.000364705019561401, + "loss": 1.6676, + "step": 465 + }, + { + "epoch": 0.23, + "learning_rate": 0.00036451729740007084, + "loss": 1.5476, + "step": 466 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003643291259278149, + "loss": 1.4557, + "step": 467 + }, + { + "epoch": 0.23, + "learning_rate": 0.00036414050565854574, + "loss": 1.7099, + "step": 468 + }, + { + "epoch": 0.23, + "learning_rate": 0.00036395143710740143, + "loss": 1.7883, + "step": 469 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003637619207907447, + "loss": 1.4658, + "step": 470 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003635719572261608, + "loss": 1.6454, + "step": 471 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003633815469324566, + "loss": 1.6372, + "step": 472 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003631906904296591, + "loss": 1.4782, + "step": 473 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003629993882390139, + "loss": 1.6954, + "step": 474 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003628076408829836, + "loss": 1.7706, + "step": 475 + }, + { + "epoch": 0.24, + "learning_rate": 0.00036261544888524695, + "loss": 1.7135, + "step": 476 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003624228127706968, + "loss": 1.6762, + "step": 477 + }, + { + "epoch": 0.24, + "learning_rate": 0.000362229733065439, + "loss": 1.697, + "step": 478 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003620362102967909, + "loss": 1.5524, + "step": 479 + }, + { + "epoch": 0.24, + "learning_rate": 0.00036184224499327976, + "loss": 1.488, + "step": 480 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003616478376846417, + "loss": 1.5274, + "step": 481 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003614529889018197, + "loss": 1.437, + "step": 482 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003612576991769627, + "loss": 1.6498, + "step": 483 + }, + { + "epoch": 0.24, + "learning_rate": 0.00036106196904342377, + "loss": 1.6727, + "step": 484 + }, + { + "epoch": 0.24, + "learning_rate": 0.00036086579903575866, + "loss": 1.5969, + "step": 485 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003606691896897248, + "loss": 1.6462, + "step": 486 + }, + { + "epoch": 0.24, + "learning_rate": 0.000360472141542279, + "loss": 1.5607, + "step": 487 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003602746551315769, + "loss": 1.687, + "step": 488 + }, + { + "epoch": 0.24, + "learning_rate": 0.000360076730996971, + "loss": 1.3288, + "step": 489 + }, + { + "epoch": 0.24, + "learning_rate": 0.000359878369679009, + "loss": 1.6054, + "step": 490 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003596795717194328, + "loss": 1.6685, + "step": 491 + }, + { + "epoch": 0.25, + "learning_rate": 0.00035948033766117687, + "loss": 1.5, + "step": 492 + }, + { + "epoch": 0.25, + "learning_rate": 0.00035928066804836653, + "loss": 1.7705, + "step": 493 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003590805634263167, + "loss": 1.4625, + "step": 494 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003588800243415304, + "loss": 1.438, + "step": 495 + }, + { + "epoch": 0.25, + "learning_rate": 0.00035867905134169716, + "loss": 1.7041, + "step": 496 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003584776449756915, + "loss": 1.5696, + "step": 497 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003582758057935717, + "loss": 1.571, + "step": 498 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003580735343465778, + "loss": 1.66, + "step": 499 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003578708311871308, + "loss": 1.4366, + "step": 500 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003576676968688303, + "loss": 1.5916, + "step": 501 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003574641319464537, + "loss": 1.5619, + "step": 502 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003572601369759544, + "loss": 1.7142, + "step": 503 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003570557125144602, + "loss": 1.7435, + "step": 504 + }, + { + "epoch": 0.25, + "learning_rate": 0.00035685085912027197, + "loss": 1.5314, + "step": 505 + }, + { + "epoch": 0.25, + "learning_rate": 0.00035664557735286197, + "loss": 1.8234, + "step": 506 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003564398677728724, + "loss": 1.7898, + "step": 507 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003562337309421139, + "loss": 1.5523, + "step": 508 + }, + { + "epoch": 0.25, + "learning_rate": 0.00035602716742356397, + "loss": 1.5225, + "step": 509 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003558201777813653, + "loss": 1.784, + "step": 510 + }, + { + "epoch": 0.26, + "learning_rate": 0.00035561276258082444, + "loss": 1.5623, + "step": 511 + }, + { + "epoch": 0.26, + "learning_rate": 0.00035540492238841025, + "loss": 1.6344, + "step": 512 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003551966577717522, + "loss": 1.7194, + "step": 513 + }, + { + "epoch": 0.26, + "learning_rate": 0.00035498796929963895, + "loss": 1.4878, + "step": 514 + }, + { + "epoch": 0.26, + "learning_rate": 0.00035477885754201666, + "loss": 1.5972, + "step": 515 + }, + { + "epoch": 0.26, + "learning_rate": 0.00035456932306998765, + "loss": 1.4085, + "step": 516 + }, + { + "epoch": 0.26, + "learning_rate": 0.00035435936645580846, + "loss": 1.4897, + "step": 517 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003541489882728889, + "loss": 1.5127, + "step": 518 + }, + { + "epoch": 0.26, + "learning_rate": 0.00035393818909578985, + "loss": 1.6159, + "step": 519 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003537269695002221, + "loss": 1.6481, + "step": 520 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003535153300630444, + "loss": 1.6057, + "step": 521 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003533032713622625, + "loss": 1.6588, + "step": 522 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003530907939770269, + "loss": 1.6406, + "step": 523 + }, + { + "epoch": 0.26, + "learning_rate": 0.00035287789848763166, + "loss": 1.4532, + "step": 524 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003526645854755128, + "loss": 1.4354, + "step": 525 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003524508555232464, + "loss": 1.6292, + "step": 526 + }, + { + "epoch": 0.26, + "learning_rate": 0.00035223670921454757, + "loss": 1.7245, + "step": 527 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003520221471342682, + "loss": 1.5872, + "step": 528 + }, + { + "epoch": 0.26, + "learning_rate": 0.000351807169868396, + "loss": 1.5689, + "step": 529 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003515917780040522, + "loss": 1.719, + "step": 530 + }, + { + "epoch": 0.27, + "learning_rate": 0.0003513759721294907, + "loss": 1.6705, + "step": 531 + }, + { + "epoch": 0.27, + "learning_rate": 0.00035115975283409593, + "loss": 1.6617, + "step": 532 + }, + { + "epoch": 0.27, + "learning_rate": 0.0003509431207083814, + "loss": 1.6712, + "step": 533 + }, + { + "epoch": 0.27, + "learning_rate": 0.0003507260763439882, + "loss": 1.5884, + "step": 534 + }, + { + "epoch": 0.27, + "learning_rate": 0.0003505086203336831, + "loss": 1.7172, + "step": 535 + }, + { + "epoch": 0.27, + "learning_rate": 0.0003502907532713573, + "loss": 1.6537, + "step": 536 + }, + { + "epoch": 0.27, + "learning_rate": 0.00035007247575202446, + "loss": 1.5581, + "step": 537 + }, + { + "epoch": 0.27, + "learning_rate": 0.0003498537883718194, + "loss": 1.632, + "step": 538 + }, + { + "epoch": 0.27, + "learning_rate": 0.00034963469172799615, + "loss": 1.5551, + "step": 539 + }, + { + "epoch": 0.27, + "learning_rate": 0.0003494151864189266, + "loss": 1.7205, + "step": 540 + }, + { + "epoch": 0.27, + "learning_rate": 0.00034919527304409857, + "loss": 1.5633, + "step": 541 + }, + { + "epoch": 0.27, + "learning_rate": 0.0003489749522041145, + "loss": 1.5343, + "step": 542 + }, + { + "epoch": 0.27, + "learning_rate": 0.00034875422450068963, + "loss": 1.6079, + "step": 543 + }, + { + "epoch": 0.27, + "learning_rate": 0.0003485330905366503, + "loss": 1.4977, + "step": 544 + }, + { + "epoch": 0.27, + "learning_rate": 0.0003483115509159325, + "loss": 1.6487, + "step": 545 + }, + { + "epoch": 0.27, + "learning_rate": 0.00034808960624358, + "loss": 1.6665, + "step": 546 + }, + { + "epoch": 0.27, + "learning_rate": 0.00034786725712574287, + "loss": 1.6199, + "step": 547 + }, + { + "epoch": 0.27, + "learning_rate": 0.0003476445041696757, + "loss": 1.5882, + "step": 548 + }, + { + "epoch": 0.27, + "learning_rate": 0.000347421347983736, + "loss": 1.6632, + "step": 549 + }, + { + "epoch": 0.27, + "learning_rate": 0.00034719778917738256, + "loss": 1.4553, + "step": 550 + }, + { + "epoch": 0.28, + "learning_rate": 0.0003469738283611738, + "loss": 1.5907, + "step": 551 + }, + { + "epoch": 0.28, + "learning_rate": 0.00034674946614676597, + "loss": 1.7215, + "step": 552 + }, + { + "epoch": 0.28, + "learning_rate": 0.0003465247031469117, + "loss": 1.6233, + "step": 553 + }, + { + "epoch": 0.28, + "learning_rate": 0.000346299539975458, + "loss": 1.6185, + "step": 554 + }, + { + "epoch": 0.28, + "learning_rate": 0.000346073977247345, + "loss": 1.4927, + "step": 555 + }, + { + "epoch": 0.28, + "learning_rate": 0.000345848015578604, + "loss": 1.6307, + "step": 556 + }, + { + "epoch": 0.28, + "learning_rate": 0.00034562165558635577, + "loss": 1.6981, + "step": 557 + }, + { + "epoch": 0.28, + "learning_rate": 0.00034539489788880883, + "loss": 1.5469, + "step": 558 + }, + { + "epoch": 0.28, + "learning_rate": 0.0003451677431052582, + "loss": 1.6071, + "step": 559 + }, + { + "epoch": 0.28, + "learning_rate": 0.0003449401918560831, + "loss": 1.658, + "step": 560 + }, + { + "epoch": 0.28, + "learning_rate": 0.0003447122447627456, + "loss": 1.6857, + "step": 561 + }, + { + "epoch": 0.28, + "learning_rate": 0.0003444839024477889, + "loss": 1.5205, + "step": 562 + }, + { + "epoch": 0.28, + "learning_rate": 0.0003442551655348355, + "loss": 1.5646, + "step": 563 + }, + { + "epoch": 0.28, + "learning_rate": 0.00034402603464858564, + "loss": 1.7056, + "step": 564 + }, + { + "epoch": 0.28, + "learning_rate": 0.0003437965104148156, + "loss": 1.6276, + "step": 565 + }, + { + "epoch": 0.28, + "learning_rate": 0.00034356659346037585, + "loss": 1.6262, + "step": 566 + }, + { + "epoch": 0.28, + "learning_rate": 0.00034333628441318936, + "loss": 1.6572, + "step": 567 + }, + { + "epoch": 0.28, + "learning_rate": 0.00034310558390225, + "loss": 1.6792, + "step": 568 + }, + { + "epoch": 0.28, + "learning_rate": 0.0003428744925576208, + "loss": 1.4858, + "step": 569 + }, + { + "epoch": 0.28, + "learning_rate": 0.0003426430110104321, + "loss": 1.318, + "step": 570 + }, + { + "epoch": 0.29, + "learning_rate": 0.00034241113989288003, + "loss": 1.3952, + "step": 571 + }, + { + "epoch": 0.29, + "learning_rate": 0.00034217887983822463, + "loss": 1.7068, + "step": 572 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003419462314807879, + "loss": 1.7115, + "step": 573 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003417131954559529, + "loss": 1.4274, + "step": 574 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003414797724001609, + "loss": 1.4477, + "step": 575 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003412459629509105, + "loss": 1.4958, + "step": 576 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003410117677467553, + "loss": 1.513, + "step": 577 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003407771874273028, + "loss": 1.6229, + "step": 578 + }, + { + "epoch": 0.29, + "learning_rate": 0.00034054222263321194, + "loss": 1.5662, + "step": 579 + }, + { + "epoch": 0.29, + "learning_rate": 0.000340306874006192, + "loss": 1.6515, + "step": 580 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003400711421890001, + "loss": 1.762, + "step": 581 + }, + { + "epoch": 0.29, + "learning_rate": 0.00033983502782544044, + "loss": 1.4794, + "step": 582 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003395985315603615, + "loss": 1.5187, + "step": 583 + }, + { + "epoch": 0.29, + "learning_rate": 0.00033936165403965516, + "loss": 1.5355, + "step": 584 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003391243959102542, + "loss": 1.6659, + "step": 585 + }, + { + "epoch": 0.29, + "learning_rate": 0.00033888675782013113, + "loss": 1.5111, + "step": 586 + }, + { + "epoch": 0.29, + "learning_rate": 0.000338648740418296, + "loss": 1.7099, + "step": 587 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003384103443547948, + "loss": 1.6258, + "step": 588 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003381715702807079, + "loss": 1.5812, + "step": 589 + }, + { + "epoch": 0.29, + "learning_rate": 0.00033793241884814783, + "loss": 1.5988, + "step": 590 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003376928907102578, + "loss": 1.4556, + "step": 591 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003374529865212097, + "loss": 1.4119, + "step": 592 + }, + { + "epoch": 0.3, + "learning_rate": 0.00033721270693620254, + "loss": 1.8002, + "step": 593 + }, + { + "epoch": 0.3, + "learning_rate": 0.00033697205261146076, + "loss": 1.6446, + "step": 594 + }, + { + "epoch": 0.3, + "learning_rate": 0.00033673102420423193, + "loss": 1.5731, + "step": 595 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003364896223727855, + "loss": 1.5963, + "step": 596 + }, + { + "epoch": 0.3, + "learning_rate": 0.00033624784777641067, + "loss": 1.603, + "step": 597 + }, + { + "epoch": 0.3, + "learning_rate": 0.00033600570107541463, + "loss": 1.7701, + "step": 598 + }, + { + "epoch": 0.3, + "learning_rate": 0.00033576318293112103, + "loss": 1.4142, + "step": 599 + }, + { + "epoch": 0.3, + "learning_rate": 0.00033552029400586773, + "loss": 1.6056, + "step": 600 + }, + { + "epoch": 0.3, + "learning_rate": 0.00033527703496300535, + "loss": 1.6508, + "step": 601 + }, + { + "epoch": 0.3, + "learning_rate": 0.00033503340646689534, + "loss": 1.6636, + "step": 602 + }, + { + "epoch": 0.3, + "learning_rate": 0.00033478940918290815, + "loss": 1.6291, + "step": 603 + }, + { + "epoch": 0.3, + "learning_rate": 0.00033454504377742136, + "loss": 1.4496, + "step": 604 + }, + { + "epoch": 0.3, + "learning_rate": 0.000334300310917818, + "loss": 1.5698, + "step": 605 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003340552112724845, + "loss": 1.6541, + "step": 606 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003338097455108093, + "loss": 1.5061, + "step": 607 + }, + { + "epoch": 0.3, + "learning_rate": 0.00033356391430318047, + "loss": 1.4854, + "step": 608 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003333177183209842, + "loss": 1.6627, + "step": 609 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003330711582366031, + "loss": 1.535, + "step": 610 + }, + { + "epoch": 0.31, + "learning_rate": 0.00033282423472341384, + "loss": 1.6511, + "step": 611 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003325769484557859, + "loss": 1.5739, + "step": 612 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003323293001090795, + "loss": 1.3858, + "step": 613 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003320812903596434, + "loss": 1.646, + "step": 614 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003318329198848138, + "loss": 1.7639, + "step": 615 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003315841893629118, + "loss": 1.447, + "step": 616 + }, + { + "epoch": 0.31, + "learning_rate": 0.000331335099473242, + "loss": 1.6622, + "step": 617 + }, + { + "epoch": 0.31, + "learning_rate": 0.00033108565089609034, + "loss": 1.5356, + "step": 618 + }, + { + "epoch": 0.31, + "learning_rate": 0.00033083584431272225, + "loss": 1.4241, + "step": 619 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003305856804053812, + "loss": 1.8722, + "step": 620 + }, + { + "epoch": 0.31, + "learning_rate": 0.00033033515985728633, + "loss": 1.4449, + "step": 621 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003300842833526309, + "loss": 1.8373, + "step": 622 + }, + { + "epoch": 0.31, + "learning_rate": 0.00032983305157658027, + "loss": 1.6631, + "step": 623 + }, + { + "epoch": 0.31, + "learning_rate": 0.00032958146521527, + "loss": 1.771, + "step": 624 + }, + { + "epoch": 0.31, + "learning_rate": 0.00032932952495580425, + "loss": 1.6418, + "step": 625 + }, + { + "epoch": 0.31, + "learning_rate": 0.00032907723148625355, + "loss": 1.4662, + "step": 626 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003288245854956531, + "loss": 1.655, + "step": 627 + }, + { + "epoch": 0.31, + "learning_rate": 0.00032857158767400104, + "loss": 1.4315, + "step": 628 + }, + { + "epoch": 0.31, + "learning_rate": 0.000328318238712256, + "loss": 1.716, + "step": 629 + }, + { + "epoch": 0.31, + "learning_rate": 0.00032806453930233595, + "loss": 1.6393, + "step": 630 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003278104901371159, + "loss": 1.7138, + "step": 631 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003275560919104259, + "loss": 1.6158, + "step": 632 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003273013453170496, + "loss": 1.5414, + "step": 633 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003270462510527218, + "loss": 1.5223, + "step": 634 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003267908098141271, + "loss": 1.6761, + "step": 635 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003265350222988972, + "loss": 1.443, + "step": 636 + }, + { + "epoch": 0.32, + "learning_rate": 0.00032627888920561024, + "loss": 1.756, + "step": 637 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003260224112337876, + "loss": 1.506, + "step": 638 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003257655890838927, + "loss": 1.6524, + "step": 639 + }, + { + "epoch": 0.32, + "learning_rate": 0.00032550842345732917, + "loss": 1.6236, + "step": 640 + }, + { + "epoch": 0.32, + "learning_rate": 0.00032525091505643825, + "loss": 1.578, + "step": 641 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003249930645844978, + "loss": 1.2744, + "step": 642 + }, + { + "epoch": 0.32, + "learning_rate": 0.00032473487274571963, + "loss": 1.7205, + "step": 643 + }, + { + "epoch": 0.32, + "learning_rate": 0.00032447634024524786, + "loss": 1.6393, + "step": 644 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003242174677891571, + "loss": 1.5814, + "step": 645 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003239582560844503, + "loss": 1.4664, + "step": 646 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003236987058390571, + "loss": 1.6065, + "step": 647 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003234388177618314, + "loss": 1.5789, + "step": 648 + }, + { + "epoch": 0.32, + "learning_rate": 0.00032317859256255016, + "loss": 1.4482, + "step": 649 + }, + { + "epoch": 0.32, + "learning_rate": 0.00032291803095191074, + "loss": 1.5858, + "step": 650 + }, + { + "epoch": 0.33, + "learning_rate": 0.00032265713364152933, + "loss": 1.6917, + "step": 651 + }, + { + "epoch": 0.33, + "learning_rate": 0.000322395901343939, + "loss": 1.5473, + "step": 652 + }, + { + "epoch": 0.33, + "learning_rate": 0.00032213433477258776, + "loss": 1.574, + "step": 653 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003218724346418364, + "loss": 1.5813, + "step": 654 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003216102016669568, + "loss": 1.4462, + "step": 655 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003213476365641298, + "loss": 1.5667, + "step": 656 + }, + { + "epoch": 0.33, + "learning_rate": 0.00032108474005044325, + "loss": 1.6149, + "step": 657 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003208215128438904, + "loss": 1.5657, + "step": 658 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003205579556633673, + "loss": 1.5153, + "step": 659 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003202940692286714, + "loss": 1.5941, + "step": 660 + }, + { + "epoch": 0.33, + "learning_rate": 0.00032002985426049925, + "loss": 1.6282, + "step": 661 + }, + { + "epoch": 0.33, + "learning_rate": 0.00031976531148044475, + "loss": 1.478, + "step": 662 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003195004416109971, + "loss": 1.4623, + "step": 663 + }, + { + "epoch": 0.33, + "learning_rate": 0.00031923524537553864, + "loss": 1.5935, + "step": 664 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003189697234983432, + "loss": 1.5293, + "step": 665 + }, + { + "epoch": 0.33, + "learning_rate": 0.000318703876704574, + "loss": 1.4841, + "step": 666 + }, + { + "epoch": 0.33, + "learning_rate": 0.00031843770572028145, + "loss": 1.4985, + "step": 667 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003181712112724015, + "loss": 1.604, + "step": 668 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003179043940887535, + "loss": 1.5311, + "step": 669 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003176372548980381, + "loss": 1.4669, + "step": 670 + }, + { + "epoch": 0.34, + "learning_rate": 0.00031736979442983557, + "loss": 1.4565, + "step": 671 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003171020134146035, + "loss": 1.6435, + "step": 672 + }, + { + "epoch": 0.34, + "learning_rate": 0.00031683391258367484, + "loss": 1.6284, + "step": 673 + }, + { + "epoch": 0.34, + "learning_rate": 0.00031656549266925613, + "loss": 1.4404, + "step": 674 + }, + { + "epoch": 0.34, + "learning_rate": 0.00031629675440442536, + "loss": 1.5517, + "step": 675 + }, + { + "epoch": 0.34, + "learning_rate": 0.00031602769852312983, + "loss": 1.6271, + "step": 676 + }, + { + "epoch": 0.34, + "learning_rate": 0.00031575832576018437, + "loss": 1.6055, + "step": 677 + }, + { + "epoch": 0.34, + "learning_rate": 0.00031548863685126926, + "loss": 1.7491, + "step": 678 + }, + { + "epoch": 0.34, + "learning_rate": 0.00031521863253292814, + "loss": 1.6137, + "step": 679 + }, + { + "epoch": 0.34, + "learning_rate": 0.00031494831354256605, + "loss": 1.5847, + "step": 680 + }, + { + "epoch": 0.34, + "learning_rate": 0.00031467768061844753, + "loss": 1.3082, + "step": 681 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003144067344996944, + "loss": 1.6252, + "step": 682 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003141354759262839, + "loss": 1.5987, + "step": 683 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003138639056390465, + "loss": 1.4824, + "step": 684 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003135920243796641, + "loss": 1.3903, + "step": 685 + }, + { + "epoch": 0.34, + "learning_rate": 0.000313319832890668, + "loss": 1.5826, + "step": 686 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003130473319154365, + "loss": 1.5808, + "step": 687 + }, + { + "epoch": 0.34, + "learning_rate": 0.00031277452219819325, + "loss": 1.6244, + "step": 688 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003125014044840051, + "loss": 1.5069, + "step": 689 + }, + { + "epoch": 0.34, + "learning_rate": 0.00031222797951878026, + "loss": 1.6237, + "step": 690 + }, + { + "epoch": 0.35, + "learning_rate": 0.00031195424804926567, + "loss": 1.4301, + "step": 691 + }, + { + "epoch": 0.35, + "learning_rate": 0.00031168021082304565, + "loss": 1.5819, + "step": 692 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003114058685885396, + "loss": 1.8096, + "step": 693 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003111312220949996, + "loss": 1.4657, + "step": 694 + }, + { + "epoch": 0.35, + "learning_rate": 0.00031085627209250915, + "loss": 1.6379, + "step": 695 + }, + { + "epoch": 0.35, + "learning_rate": 0.00031058101933198023, + "loss": 1.4865, + "step": 696 + }, + { + "epoch": 0.35, + "learning_rate": 0.00031030546456515195, + "loss": 1.3837, + "step": 697 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003100296085445881, + "loss": 1.6494, + "step": 698 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003097534520236754, + "loss": 1.7428, + "step": 699 + }, + { + "epoch": 0.35, + "learning_rate": 0.00030947699575662087, + "loss": 1.5992, + "step": 700 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003092002404984506, + "loss": 1.6187, + "step": 701 + }, + { + "epoch": 0.35, + "learning_rate": 0.00030892318700500703, + "loss": 1.7533, + "step": 702 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003086458360329471, + "loss": 1.6341, + "step": 703 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003083681883397403, + "loss": 1.6211, + "step": 704 + }, + { + "epoch": 0.35, + "learning_rate": 0.00030809024468366635, + "loss": 1.5643, + "step": 705 + }, + { + "epoch": 0.35, + "learning_rate": 0.00030781200582381336, + "loss": 1.5116, + "step": 706 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003075334725200757, + "loss": 1.627, + "step": 707 + }, + { + "epoch": 0.35, + "learning_rate": 0.00030725464553315186, + "loss": 1.8024, + "step": 708 + }, + { + "epoch": 0.35, + "learning_rate": 0.00030697552562454223, + "loss": 1.4348, + "step": 709 + }, + { + "epoch": 0.35, + "learning_rate": 0.00030669611355654743, + "loss": 1.4995, + "step": 710 + }, + { + "epoch": 0.36, + "learning_rate": 0.000306416410092266, + "loss": 1.6125, + "step": 711 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003061364159955921, + "loss": 1.6559, + "step": 712 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003058561320312139, + "loss": 1.469, + "step": 713 + }, + { + "epoch": 0.36, + "learning_rate": 0.00030557555896461086, + "loss": 1.404, + "step": 714 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003052946975620524, + "loss": 1.6919, + "step": 715 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003050135485905951, + "loss": 1.5993, + "step": 716 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003047321128180812, + "loss": 1.5386, + "step": 717 + }, + { + "epoch": 0.36, + "learning_rate": 0.00030445039101313593, + "loss": 1.472, + "step": 718 + }, + { + "epoch": 0.36, + "learning_rate": 0.00030416838394516587, + "loss": 1.413, + "step": 719 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003038860923843567, + "loss": 1.6552, + "step": 720 + }, + { + "epoch": 0.36, + "learning_rate": 0.00030360351710167094, + "loss": 1.4909, + "step": 721 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003033206588688461, + "loss": 1.4127, + "step": 722 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003030375184583923, + "loss": 1.4671, + "step": 723 + }, + { + "epoch": 0.36, + "learning_rate": 0.00030275409664359056, + "loss": 1.6795, + "step": 724 + }, + { + "epoch": 0.36, + "learning_rate": 0.00030247039419849025, + "loss": 1.6729, + "step": 725 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003021864118979071, + "loss": 1.4749, + "step": 726 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003019021505174215, + "loss": 1.6014, + "step": 727 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003016176108333756, + "loss": 1.6341, + "step": 728 + }, + { + "epoch": 0.36, + "learning_rate": 0.00030133279362287187, + "loss": 1.823, + "step": 729 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003010476996637706, + "loss": 1.5576, + "step": 730 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003007623297346881, + "loss": 1.729, + "step": 731 + }, + { + "epoch": 0.37, + "learning_rate": 0.00030047668461499413, + "loss": 1.4666, + "step": 732 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003001907650848103, + "loss": 1.5468, + "step": 733 + }, + { + "epoch": 0.37, + "learning_rate": 0.0002999045719250074, + "loss": 1.6001, + "step": 734 + }, + { + "epoch": 0.37, + "learning_rate": 0.00029961810591720364, + "loss": 1.6967, + "step": 735 + }, + { + "epoch": 0.37, + "learning_rate": 0.00029933136784376235, + "loss": 1.5668, + "step": 736 + }, + { + "epoch": 0.37, + "learning_rate": 0.00029904435848779, + "loss": 1.5286, + "step": 737 + }, + { + "epoch": 0.37, + "learning_rate": 0.0002987570786331339, + "loss": 1.6096, + "step": 738 + }, + { + "epoch": 0.37, + "learning_rate": 0.00029846952906438, + "loss": 1.5456, + "step": 739 + }, + { + "epoch": 0.37, + "learning_rate": 0.00029818171056685103, + "loss": 1.6902, + "step": 740 + }, + { + "epoch": 0.37, + "learning_rate": 0.0002978936239266042, + "loss": 1.7138, + "step": 741 + }, + { + "epoch": 0.37, + "learning_rate": 0.00029760526993042886, + "loss": 1.5856, + "step": 742 + }, + { + "epoch": 0.37, + "learning_rate": 0.00029731664936584473, + "loss": 1.464, + "step": 743 + }, + { + "epoch": 0.37, + "learning_rate": 0.00029702776302109943, + "loss": 1.4777, + "step": 744 + }, + { + "epoch": 0.37, + "learning_rate": 0.00029673861168516634, + "loss": 1.6905, + "step": 745 + }, + { + "epoch": 0.37, + "learning_rate": 0.0002964491961477429, + "loss": 1.54, + "step": 746 + }, + { + "epoch": 0.37, + "learning_rate": 0.00029615951719924783, + "loss": 1.6324, + "step": 747 + }, + { + "epoch": 0.37, + "learning_rate": 0.00029586957563081925, + "loss": 1.5705, + "step": 748 + }, + { + "epoch": 0.37, + "learning_rate": 0.0002955793722343127, + "loss": 1.7081, + "step": 749 + }, + { + "epoch": 0.37, + "learning_rate": 0.0002952889078022985, + "loss": 1.5256, + "step": 750 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002949981831280602, + "loss": 1.4953, + "step": 751 + }, + { + "epoch": 0.38, + "learning_rate": 0.000294707199005592, + "loss": 1.51, + "step": 752 + }, + { + "epoch": 0.38, + "learning_rate": 0.00029441595622959656, + "loss": 1.5811, + "step": 753 + }, + { + "epoch": 0.38, + "learning_rate": 0.000294124455595483, + "loss": 1.5375, + "step": 754 + }, + { + "epoch": 0.38, + "learning_rate": 0.00029383269789936466, + "loss": 1.7053, + "step": 755 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002935406839380571, + "loss": 1.4679, + "step": 756 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002932484145090755, + "loss": 1.6812, + "step": 757 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002929558904106329, + "loss": 1.5965, + "step": 758 + }, + { + "epoch": 0.38, + "learning_rate": 0.00029266311244163784, + "loss": 1.7395, + "step": 759 + }, + { + "epoch": 0.38, + "learning_rate": 0.00029237008140169227, + "loss": 1.5782, + "step": 760 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002920767980910891, + "loss": 1.6442, + "step": 761 + }, + { + "epoch": 0.38, + "learning_rate": 0.00029178326331081043, + "loss": 1.5455, + "step": 762 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002914894778625251, + "loss": 1.3559, + "step": 763 + }, + { + "epoch": 0.38, + "learning_rate": 0.00029119544254858643, + "loss": 1.4975, + "step": 764 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002909011581720302, + "loss": 1.6059, + "step": 765 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002906066255365724, + "loss": 1.6428, + "step": 766 + }, + { + "epoch": 0.38, + "learning_rate": 0.00029031184544660717, + "loss": 1.6501, + "step": 767 + }, + { + "epoch": 0.38, + "learning_rate": 0.00029001681870720434, + "loss": 1.4806, + "step": 768 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002897215461241072, + "loss": 1.4095, + "step": 769 + }, + { + "epoch": 0.38, + "learning_rate": 0.00028942602850373086, + "loss": 1.6663, + "step": 770 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002891302666531592, + "loss": 1.8152, + "step": 771 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002888342613801436, + "loss": 1.7046, + "step": 772 + }, + { + "epoch": 0.39, + "learning_rate": 0.00028853801349309983, + "loss": 1.6848, + "step": 773 + }, + { + "epoch": 0.39, + "learning_rate": 0.00028824152380110645, + "loss": 1.6858, + "step": 774 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002879447931139023, + "loss": 1.5137, + "step": 775 + }, + { + "epoch": 0.39, + "learning_rate": 0.00028764782224188453, + "loss": 1.6079, + "step": 776 + }, + { + "epoch": 0.39, + "learning_rate": 0.00028735061199610626, + "loss": 1.7438, + "step": 777 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002870531631882742, + "loss": 1.6403, + "step": 778 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002867554766307468, + "loss": 1.6697, + "step": 779 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002864575531365316, + "loss": 1.513, + "step": 780 + }, + { + "epoch": 0.39, + "learning_rate": 0.00028615939351928337, + "loss": 1.5807, + "step": 781 + }, + { + "epoch": 0.39, + "learning_rate": 0.00028586099859330183, + "loss": 1.5632, + "step": 782 + }, + { + "epoch": 0.39, + "learning_rate": 0.00028556236917352926, + "loss": 1.7058, + "step": 783 + }, + { + "epoch": 0.39, + "learning_rate": 0.00028526350607554823, + "loss": 1.5961, + "step": 784 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002849644101155797, + "loss": 1.6368, + "step": 785 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002846650821104805, + "loss": 1.5342, + "step": 786 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002843655228777413, + "loss": 1.6549, + "step": 787 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002840657332354841, + "loss": 1.5317, + "step": 788 + }, + { + "epoch": 0.39, + "learning_rate": 0.00028376571400246035, + "loss": 1.687, + "step": 789 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002834654659980484, + "loss": 1.421, + "step": 790 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002831649900422514, + "loss": 1.639, + "step": 791 + }, + { + "epoch": 0.4, + "learning_rate": 0.00028286428695569523, + "loss": 1.62, + "step": 792 + }, + { + "epoch": 0.4, + "learning_rate": 0.00028256335755962584, + "loss": 1.5643, + "step": 793 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002822622026759074, + "loss": 1.6884, + "step": 794 + }, + { + "epoch": 0.4, + "learning_rate": 0.00028196082312701977, + "loss": 1.7462, + "step": 795 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002816592197360566, + "loss": 1.6372, + "step": 796 + }, + { + "epoch": 0.4, + "learning_rate": 0.00028135739332672274, + "loss": 1.5019, + "step": 797 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002810553447233321, + "loss": 1.5365, + "step": 798 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002807530747508056, + "loss": 1.6709, + "step": 799 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002804505842346684, + "loss": 1.5888, + "step": 800 + }, + { + "epoch": 0.4, + "learning_rate": 0.00028014787400104825, + "loss": 1.6876, + "step": 801 + }, + { + "epoch": 0.4, + "learning_rate": 0.00027984494487667296, + "loss": 1.5555, + "step": 802 + }, + { + "epoch": 0.4, + "learning_rate": 0.000279541797688868, + "loss": 1.762, + "step": 803 + }, + { + "epoch": 0.4, + "learning_rate": 0.00027923843326555463, + "loss": 1.5098, + "step": 804 + }, + { + "epoch": 0.4, + "learning_rate": 0.00027893485243524706, + "loss": 1.7013, + "step": 805 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002786310560270509, + "loss": 1.5521, + "step": 806 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002783270448706601, + "loss": 1.6102, + "step": 807 + }, + { + "epoch": 0.4, + "learning_rate": 0.00027802281979635564, + "loss": 1.5667, + "step": 808 + }, + { + "epoch": 0.4, + "learning_rate": 0.00027771838163500223, + "loss": 1.3938, + "step": 809 + }, + { + "epoch": 0.4, + "learning_rate": 0.00027741373121804684, + "loss": 1.5571, + "step": 810 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002771088693775159, + "loss": 1.4687, + "step": 811 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002768037969460135, + "loss": 1.6164, + "step": 812 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002764985147567187, + "loss": 1.5342, + "step": 813 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002761930236433836, + "loss": 1.675, + "step": 814 + }, + { + "epoch": 0.41, + "learning_rate": 0.00027588732444033066, + "loss": 1.5154, + "step": 815 + }, + { + "epoch": 0.41, + "learning_rate": 0.00027558141798245074, + "loss": 1.6015, + "step": 816 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002752753051052007, + "loss": 1.6213, + "step": 817 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002749689866446015, + "loss": 1.7362, + "step": 818 + }, + { + "epoch": 0.41, + "learning_rate": 0.000274662463437235, + "loss": 1.5406, + "step": 819 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002743557363202427, + "loss": 1.5625, + "step": 820 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002740488061313225, + "loss": 1.7225, + "step": 821 + }, + { + "epoch": 0.41, + "learning_rate": 0.00027374167370872746, + "loss": 1.5959, + "step": 822 + }, + { + "epoch": 0.41, + "learning_rate": 0.00027343433989126273, + "loss": 1.5027, + "step": 823 + }, + { + "epoch": 0.41, + "learning_rate": 0.00027312680551828337, + "loss": 1.5802, + "step": 824 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002728190714296923, + "loss": 1.6822, + "step": 825 + }, + { + "epoch": 0.41, + "learning_rate": 0.00027251113846593785, + "loss": 1.5289, + "step": 826 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002722030074680114, + "loss": 1.6296, + "step": 827 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002718946792774455, + "loss": 1.6557, + "step": 828 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002715861547363109, + "loss": 1.5834, + "step": 829 + }, + { + "epoch": 0.41, + "learning_rate": 0.00027127743468721466, + "loss": 1.4457, + "step": 830 + }, + { + "epoch": 0.42, + "learning_rate": 0.00027096851997329794, + "loss": 1.5919, + "step": 831 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002706594114382335, + "loss": 1.7794, + "step": 832 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002703501099262233, + "loss": 1.5044, + "step": 833 + }, + { + "epoch": 0.42, + "learning_rate": 0.00027004061628199645, + "loss": 1.4793, + "step": 834 + }, + { + "epoch": 0.42, + "learning_rate": 0.00026973093135080684, + "loss": 1.4537, + "step": 835 + }, + { + "epoch": 0.42, + "learning_rate": 0.00026942105597843076, + "loss": 1.5793, + "step": 836 + }, + { + "epoch": 0.42, + "learning_rate": 0.00026911099101116444, + "loss": 1.5592, + "step": 837 + }, + { + "epoch": 0.42, + "learning_rate": 0.00026880073729582213, + "loss": 1.4737, + "step": 838 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002684902956797335, + "loss": 1.5163, + "step": 839 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002681796670107413, + "loss": 1.3649, + "step": 840 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002678688521371993, + "loss": 1.6616, + "step": 841 + }, + { + "epoch": 0.42, + "learning_rate": 0.00026755785190796965, + "loss": 1.5122, + "step": 842 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002672466671724208, + "loss": 1.6713, + "step": 843 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002669352987804251, + "loss": 1.594, + "step": 844 + }, + { + "epoch": 0.42, + "learning_rate": 0.00026662374758235655, + "loss": 1.5373, + "step": 845 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002663120144290883, + "loss": 1.3893, + "step": 846 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002660001001719904, + "loss": 1.6124, + "step": 847 + }, + { + "epoch": 0.42, + "learning_rate": 0.00026568800566292763, + "loss": 1.5687, + "step": 848 + }, + { + "epoch": 0.42, + "learning_rate": 0.000265375731754257, + "loss": 1.2931, + "step": 849 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002650632792988255, + "loss": 1.5906, + "step": 850 + }, + { + "epoch": 0.43, + "learning_rate": 0.00026475064914996773, + "loss": 1.5324, + "step": 851 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002644378421615036, + "loss": 1.6167, + "step": 852 + }, + { + "epoch": 0.43, + "learning_rate": 0.00026412485918773595, + "loss": 1.6344, + "step": 853 + }, + { + "epoch": 0.43, + "learning_rate": 0.00026381170108344827, + "loss": 1.5958, + "step": 854 + }, + { + "epoch": 0.43, + "learning_rate": 0.00026349836870390235, + "loss": 1.4789, + "step": 855 + }, + { + "epoch": 0.43, + "learning_rate": 0.00026318486290483593, + "loss": 1.7105, + "step": 856 + }, + { + "epoch": 0.43, + "learning_rate": 0.00026287118454246033, + "loss": 1.4852, + "step": 857 + }, + { + "epoch": 0.43, + "learning_rate": 0.00026255733447345833, + "loss": 1.6808, + "step": 858 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002622433135549814, + "loss": 1.5825, + "step": 859 + }, + { + "epoch": 0.43, + "learning_rate": 0.00026192912264464785, + "loss": 1.6492, + "step": 860 + }, + { + "epoch": 0.43, + "learning_rate": 0.00026161476260054014, + "loss": 1.5621, + "step": 861 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002613002342812026, + "loss": 1.6808, + "step": 862 + }, + { + "epoch": 0.43, + "learning_rate": 0.00026098553854563916, + "loss": 1.5637, + "step": 863 + }, + { + "epoch": 0.43, + "learning_rate": 0.00026067067625331117, + "loss": 1.6629, + "step": 864 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002603556482641345, + "loss": 1.3583, + "step": 865 + }, + { + "epoch": 0.43, + "learning_rate": 0.00026004045543847796, + "loss": 1.6123, + "step": 866 + }, + { + "epoch": 0.43, + "learning_rate": 0.00025972509863716016, + "loss": 1.4788, + "step": 867 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002594095787214478, + "loss": 1.5373, + "step": 868 + }, + { + "epoch": 0.43, + "learning_rate": 0.00025909389655305305, + "loss": 1.5852, + "step": 869 + }, + { + "epoch": 0.43, + "learning_rate": 0.000258778052994131, + "loss": 1.5569, + "step": 870 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002584620489072777, + "loss": 1.54, + "step": 871 + }, + { + "epoch": 0.44, + "learning_rate": 0.00025814588515552753, + "loss": 1.4899, + "step": 872 + }, + { + "epoch": 0.44, + "learning_rate": 0.000257829562602351, + "loss": 1.4598, + "step": 873 + }, + { + "epoch": 0.44, + "learning_rate": 0.00025751308211165223, + "loss": 1.5688, + "step": 874 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002571964445477668, + "loss": 1.4195, + "step": 875 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002568796507754592, + "loss": 1.6566, + "step": 876 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002565627016599205, + "loss": 1.406, + "step": 877 + }, + { + "epoch": 0.44, + "learning_rate": 0.00025624559806676603, + "loss": 1.827, + "step": 878 + }, + { + "epoch": 0.44, + "learning_rate": 0.00025592834086203315, + "loss": 1.4952, + "step": 879 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002556109309121786, + "loss": 1.4737, + "step": 880 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002552933690840762, + "loss": 1.4676, + "step": 881 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002549756562450149, + "loss": 1.5686, + "step": 882 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002546577932626957, + "loss": 1.639, + "step": 883 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002543397810052299, + "loss": 1.6325, + "step": 884 + }, + { + "epoch": 0.44, + "learning_rate": 0.00025402162034113637, + "loss": 1.4458, + "step": 885 + }, + { + "epoch": 0.44, + "learning_rate": 0.00025370331213933926, + "loss": 1.4646, + "step": 886 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002533848572691658, + "loss": 1.7105, + "step": 887 + }, + { + "epoch": 0.44, + "learning_rate": 0.00025306625660034365, + "loss": 1.6741, + "step": 888 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002527475110029988, + "loss": 1.6431, + "step": 889 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002524286213476529, + "loss": 1.7294, + "step": 890 + }, + { + "epoch": 0.45, + "learning_rate": 0.00025210958850522104, + "loss": 1.5165, + "step": 891 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002517904133470095, + "loss": 1.5219, + "step": 892 + }, + { + "epoch": 0.45, + "learning_rate": 0.00025147109674471317, + "loss": 1.3967, + "step": 893 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002511516395704132, + "loss": 1.6214, + "step": 894 + }, + { + "epoch": 0.45, + "learning_rate": 0.00025083204269657467, + "loss": 1.6539, + "step": 895 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002505123069960442, + "loss": 1.5878, + "step": 896 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002501924333420475, + "loss": 1.6149, + "step": 897 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002498724226081872, + "loss": 1.6648, + "step": 898 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002495522756684402, + "loss": 1.4552, + "step": 899 + }, + { + "epoch": 0.45, + "learning_rate": 0.00024923199339715543, + "loss": 1.531, + "step": 900 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002489115766690513, + "loss": 1.6431, + "step": 901 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002485910263592135, + "loss": 1.5006, + "step": 902 + }, + { + "epoch": 0.45, + "learning_rate": 0.00024827034334309265, + "loss": 1.4951, + "step": 903 + }, + { + "epoch": 0.45, + "learning_rate": 0.00024794952849650174, + "loss": 1.3887, + "step": 904 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002476285826956138, + "loss": 1.593, + "step": 905 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002473075068169593, + "loss": 1.4254, + "step": 906 + }, + { + "epoch": 0.45, + "learning_rate": 0.00024698630173742436, + "loss": 1.5903, + "step": 907 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002466649683342477, + "loss": 1.5224, + "step": 908 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002466649683342477, + "loss": 1.6739, + "step": 909 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002463435074850184, + "loss": 1.4916, + "step": 910 + }, + { + "epoch": 0.46, + "learning_rate": 0.000246021920067674, + "loss": 1.3713, + "step": 911 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002457002069604973, + "loss": 1.3699, + "step": 912 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002453783690421146, + "loss": 1.3908, + "step": 913 + }, + { + "epoch": 0.46, + "learning_rate": 0.000245056407191493, + "loss": 1.4784, + "step": 914 + }, + { + "epoch": 0.46, + "learning_rate": 0.00024473432228793807, + "loss": 1.5448, + "step": 915 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002444121152110915, + "loss": 1.4938, + "step": 916 + }, + { + "epoch": 0.46, + "learning_rate": 0.00024408978684092847, + "loss": 1.5958, + "step": 917 + }, + { + "epoch": 0.46, + "learning_rate": 0.00024376733805775574, + "loss": 1.5453, + "step": 918 + }, + { + "epoch": 0.46, + "learning_rate": 0.00024344476974220855, + "loss": 1.4917, + "step": 919 + }, + { + "epoch": 0.46, + "learning_rate": 0.00024312208277524892, + "loss": 1.4726, + "step": 920 + }, + { + "epoch": 0.46, + "learning_rate": 0.00024279927803816276, + "loss": 1.7581, + "step": 921 + }, + { + "epoch": 0.46, + "learning_rate": 0.00024247635641255766, + "loss": 1.517, + "step": 922 + }, + { + "epoch": 0.46, + "learning_rate": 0.00024215331878036037, + "loss": 1.5761, + "step": 923 + }, + { + "epoch": 0.46, + "learning_rate": 0.00024183016602381447, + "loss": 1.3919, + "step": 924 + }, + { + "epoch": 0.46, + "learning_rate": 0.00024150689902547811, + "loss": 1.6481, + "step": 925 + }, + { + "epoch": 0.46, + "learning_rate": 0.00024118351866822137, + "loss": 1.5396, + "step": 926 + }, + { + "epoch": 0.46, + "learning_rate": 0.00024086002583522382, + "loss": 1.6246, + "step": 927 + }, + { + "epoch": 0.46, + "learning_rate": 0.00024053642140997225, + "loss": 1.4913, + "step": 928 + }, + { + "epoch": 0.46, + "learning_rate": 0.00024021270627625825, + "loss": 1.6974, + "step": 929 + }, + { + "epoch": 0.46, + "learning_rate": 0.00023988888131817583, + "loss": 1.727, + "step": 930 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002395649474201189, + "loss": 1.4335, + "step": 931 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002392409054667788, + "loss": 1.7088, + "step": 932 + }, + { + "epoch": 0.47, + "learning_rate": 0.00023891675634314202, + "loss": 1.6817, + "step": 933 + }, + { + "epoch": 0.47, + "learning_rate": 0.00023859250093448783, + "loss": 1.7091, + "step": 934 + }, + { + "epoch": 0.47, + "learning_rate": 0.00023826814012638568, + "loss": 1.5044, + "step": 935 + }, + { + "epoch": 0.47, + "learning_rate": 0.00023794367480469295, + "loss": 1.6067, + "step": 936 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002376191058555524, + "loss": 1.6023, + "step": 937 + }, + { + "epoch": 0.47, + "learning_rate": 0.00023729443416538982, + "loss": 1.5024, + "step": 938 + }, + { + "epoch": 0.47, + "learning_rate": 0.00023696966062091148, + "loss": 1.6559, + "step": 939 + }, + { + "epoch": 0.47, + "learning_rate": 0.00023664478610910207, + "loss": 1.6554, + "step": 940 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002363198115172219, + "loss": 1.5379, + "step": 941 + }, + { + "epoch": 0.47, + "learning_rate": 0.00023599473773280454, + "loss": 1.4307, + "step": 942 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002356695656436546, + "loss": 1.6144, + "step": 943 + }, + { + "epoch": 0.47, + "learning_rate": 0.00023534429613784497, + "loss": 1.6197, + "step": 944 + }, + { + "epoch": 0.47, + "learning_rate": 0.00023501893010371476, + "loss": 1.6309, + "step": 945 + }, + { + "epoch": 0.47, + "learning_rate": 0.00023469346842986677, + "loss": 1.5075, + "step": 946 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002343679120051648, + "loss": 1.6614, + "step": 947 + }, + { + "epoch": 0.47, + "learning_rate": 0.00023404226171873157, + "loss": 1.4659, + "step": 948 + }, + { + "epoch": 0.47, + "learning_rate": 0.00023371651845994603, + "loss": 1.3864, + "step": 949 + }, + { + "epoch": 0.47, + "learning_rate": 0.00023339068311844114, + "loss": 1.3556, + "step": 950 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002330647565841013, + "loss": 1.5859, + "step": 951 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002327387397470601, + "loss": 1.6599, + "step": 952 + }, + { + "epoch": 0.48, + "learning_rate": 0.00023241263349769748, + "loss": 1.5984, + "step": 953 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002320864387266378, + "loss": 1.5987, + "step": 954 + }, + { + "epoch": 0.48, + "learning_rate": 0.00023176015632474703, + "loss": 1.727, + "step": 955 + }, + { + "epoch": 0.48, + "learning_rate": 0.00023143378718313066, + "loss": 1.4925, + "step": 956 + }, + { + "epoch": 0.48, + "learning_rate": 0.00023110733219313087, + "loss": 1.5061, + "step": 957 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002307807922463245, + "loss": 1.6809, + "step": 958 + }, + { + "epoch": 0.48, + "learning_rate": 0.00023045416823452023, + "loss": 1.5185, + "step": 959 + }, + { + "epoch": 0.48, + "learning_rate": 0.00023012746104975632, + "loss": 1.5064, + "step": 960 + }, + { + "epoch": 0.48, + "learning_rate": 0.00022980067158429832, + "loss": 1.4743, + "step": 961 + }, + { + "epoch": 0.48, + "learning_rate": 0.00022947380073063656, + "loss": 1.5773, + "step": 962 + }, + { + "epoch": 0.48, + "learning_rate": 0.00022914684938148342, + "loss": 1.3633, + "step": 963 + }, + { + "epoch": 0.48, + "learning_rate": 0.00022881981842977117, + "loss": 1.6052, + "step": 964 + }, + { + "epoch": 0.48, + "learning_rate": 0.00022849270876864965, + "loss": 1.5255, + "step": 965 + }, + { + "epoch": 0.48, + "learning_rate": 0.00022816552129148354, + "loss": 1.7143, + "step": 966 + }, + { + "epoch": 0.48, + "learning_rate": 0.00022783825689184998, + "loss": 1.5166, + "step": 967 + }, + { + "epoch": 0.48, + "learning_rate": 0.00022751091646353632, + "loss": 1.5318, + "step": 968 + }, + { + "epoch": 0.48, + "learning_rate": 0.00022718350090053752, + "loss": 1.512, + "step": 969 + }, + { + "epoch": 0.48, + "learning_rate": 0.00022685601109705364, + "loss": 1.5758, + "step": 970 + }, + { + "epoch": 0.49, + "learning_rate": 0.00022652844794748765, + "loss": 1.4, + "step": 971 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002262008123464427, + "loss": 1.5296, + "step": 972 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002258731051887199, + "loss": 1.7131, + "step": 973 + }, + { + "epoch": 0.49, + "learning_rate": 0.00022554532736931577, + "loss": 1.5665, + "step": 974 + }, + { + "epoch": 0.49, + "learning_rate": 0.00022521747978341972, + "loss": 1.4738, + "step": 975 + }, + { + "epoch": 0.49, + "learning_rate": 0.00022488956332641192, + "loss": 1.6389, + "step": 976 + }, + { + "epoch": 0.49, + "learning_rate": 0.00022456157889386033, + "loss": 1.6043, + "step": 977 + }, + { + "epoch": 0.49, + "learning_rate": 0.00022423352738151886, + "loss": 1.5159, + "step": 978 + }, + { + "epoch": 0.49, + "learning_rate": 0.00022390540968532442, + "loss": 1.5425, + "step": 979 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002235772267013947, + "loss": 1.5404, + "step": 980 + }, + { + "epoch": 0.49, + "learning_rate": 0.00022324897932602574, + "loss": 1.6195, + "step": 981 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002229206684556895, + "loss": 1.5286, + "step": 982 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002225922949870311, + "loss": 1.4833, + "step": 983 + }, + { + "epoch": 0.49, + "learning_rate": 0.00022226385981686706, + "loss": 1.5749, + "step": 984 + }, + { + "epoch": 0.49, + "learning_rate": 0.00022193536384218195, + "loss": 1.718, + "step": 985 + }, + { + "epoch": 0.49, + "learning_rate": 0.00022160680796012665, + "loss": 1.4774, + "step": 986 + }, + { + "epoch": 0.49, + "learning_rate": 0.00022127819306801567, + "loss": 1.4944, + "step": 987 + }, + { + "epoch": 0.49, + "learning_rate": 0.00022094952006332453, + "loss": 1.6471, + "step": 988 + }, + { + "epoch": 0.49, + "learning_rate": 0.00022062078984368756, + "loss": 1.6706, + "step": 989 + }, + { + "epoch": 0.49, + "learning_rate": 0.00022029200330689545, + "loss": 1.5164, + "step": 990 + }, + { + "epoch": 0.5, + "learning_rate": 0.00021996316135089239, + "loss": 1.6634, + "step": 991 + }, + { + "epoch": 0.5, + "learning_rate": 0.00021963426487377433, + "loss": 1.7244, + "step": 992 + }, + { + "epoch": 0.5, + "learning_rate": 0.00021930531477378572, + "loss": 1.4838, + "step": 993 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002189763119493178, + "loss": 1.6398, + "step": 994 + }, + { + "epoch": 0.5, + "learning_rate": 0.00021864725729890555, + "loss": 1.3957, + "step": 995 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002183181517212256, + "loss": 1.6115, + "step": 996 + }, + { + "epoch": 0.5, + "learning_rate": 0.00021798899611509377, + "loss": 1.7738, + "step": 997 + }, + { + "epoch": 0.5, + "learning_rate": 0.00021765979137946233, + "loss": 1.6396, + "step": 998 + }, + { + "epoch": 0.5, + "learning_rate": 0.00021733053841341775, + "loss": 1.5969, + "step": 999 + }, + { + "epoch": 0.5, + "learning_rate": 0.00021700123811617834, + "loss": 1.4756, + "step": 1000 + } + ], + "logging_steps": 1, + "max_steps": 2001, + "num_train_epochs": 1, + "save_steps": 1000, + "total_flos": 1.9365667152278323e+17, + "trial_name": null, + "trial_params": null +}