diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,5953 +1,1498 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 4.0, - "global_step": 4944, + "epoch": 0.9992919995954284, + "global_step": 1235, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, - "learning_rate": 2.0161290322580645e-07, - "loss": 1.7344, + "learning_rate": 5.0607287449392715e-08, + "loss": 1.0392, "step": 5 }, { "epoch": 0.01, - "learning_rate": 4.032258064516129e-07, - "loss": 1.6586, + "learning_rate": 1.0121457489878543e-07, + "loss": 1.0115, "step": 10 }, { "epoch": 0.01, - "learning_rate": 6.048387096774194e-07, - "loss": 1.6465, + "learning_rate": 1.5182186234817813e-07, + "loss": 0.99, "step": 15 }, { "epoch": 0.02, - "learning_rate": 8.064516129032258e-07, - "loss": 1.5832, + "learning_rate": 2.0242914979757086e-07, + "loss": 1.0143, "step": 20 }, { "epoch": 0.02, - "learning_rate": 1.0080645161290323e-06, - "loss": 1.5574, + "learning_rate": 2.5303643724696356e-07, + "loss": 1.0301, "step": 25 }, { "epoch": 0.02, - "learning_rate": 1.2096774193548388e-06, - "loss": 1.4381, + "learning_rate": 3.0364372469635626e-07, + "loss": 0.9651, "step": 30 }, { "epoch": 0.03, - "learning_rate": 1.4112903225806455e-06, - "loss": 1.5215, + "learning_rate": 3.54251012145749e-07, + "loss": 0.9984, "step": 35 }, { "epoch": 0.03, - "learning_rate": 1.6129032258064516e-06, - "loss": 1.5566, + "learning_rate": 4.048582995951417e-07, + "loss": 0.9842, "step": 40 }, { "epoch": 0.04, - "learning_rate": 1.8145161290322583e-06, - "loss": 1.5641, + "learning_rate": 4.5546558704453447e-07, + "loss": 1.0033, "step": 45 }, { "epoch": 0.04, - "learning_rate": 2.0161290322580646e-06, - "loss": 1.4707, + "learning_rate": 5.060728744939271e-07, + "loss": 0.9764, "step": 50 }, { "epoch": 0.04, - "learning_rate": 2.217741935483871e-06, - "loss": 1.4484, + "learning_rate": 5.566801619433199e-07, + "loss": 1.018, "step": 55 }, { "epoch": 0.05, - "learning_rate": 2.4193548387096776e-06, - "loss": 1.4656, + "learning_rate": 6.072874493927125e-07, + "loss": 0.9808, "step": 60 }, { "epoch": 0.05, - "learning_rate": 2.620967741935484e-06, - "loss": 1.3938, + "learning_rate": 6.578947368421053e-07, + "loss": 0.9508, "step": 65 }, { "epoch": 0.06, - "learning_rate": 2.822580645161291e-06, - "loss": 1.4152, + "learning_rate": 7.08502024291498e-07, + "loss": 0.9654, "step": 70 }, { "epoch": 0.06, - "learning_rate": 3.024193548387097e-06, - "loss": 1.4182, + "learning_rate": 7.591093117408907e-07, + "loss": 0.9801, "step": 75 }, { "epoch": 0.06, - "learning_rate": 3.225806451612903e-06, - "loss": 1.5051, + "learning_rate": 8.097165991902834e-07, + "loss": 0.9825, "step": 80 }, { "epoch": 0.07, - "learning_rate": 3.4274193548387097e-06, - "loss": 1.4918, + "learning_rate": 8.603238866396761e-07, + "loss": 1.0046, "step": 85 }, { "epoch": 0.07, - "learning_rate": 3.6290322580645166e-06, - "loss": 1.4738, + "learning_rate": 9.109311740890689e-07, + "loss": 0.9797, "step": 90 }, { "epoch": 0.08, - "learning_rate": 3.830645161290323e-06, - "loss": 1.4035, + "learning_rate": 9.615384615384617e-07, + "loss": 0.9904, "step": 95 }, { "epoch": 0.08, - "learning_rate": 4.032258064516129e-06, - "loss": 1.4367, + "learning_rate": 1.0121457489878542e-06, + "loss": 0.987, "step": 100 }, { "epoch": 0.08, - "learning_rate": 4.233870967741936e-06, - "loss": 1.4076, + "learning_rate": 1.062753036437247e-06, + "loss": 0.9782, "step": 105 }, { "epoch": 0.09, - "learning_rate": 4.435483870967742e-06, - "loss": 1.4902, + "learning_rate": 1.1133603238866398e-06, + "loss": 1.0131, "step": 110 }, { "epoch": 0.09, - "learning_rate": 4.637096774193548e-06, - "loss": 1.3578, + "learning_rate": 1.1639676113360325e-06, + "loss": 0.9269, "step": 115 }, { "epoch": 0.1, - "learning_rate": 4.838709677419355e-06, - "loss": 1.4467, + "learning_rate": 1.214574898785425e-06, + "loss": 0.9312, "step": 120 }, { "epoch": 0.1, - "learning_rate": 5.040322580645161e-06, - "loss": 1.4766, + "learning_rate": 1.265182186234818e-06, + "loss": 1.0038, "step": 125 }, { "epoch": 0.11, - "learning_rate": 5.241935483870968e-06, - "loss": 1.4828, + "learning_rate": 1.3157894736842106e-06, + "loss": 0.9896, "step": 130 }, { "epoch": 0.11, - "learning_rate": 5.443548387096774e-06, - "loss": 1.4258, + "learning_rate": 1.3663967611336035e-06, + "loss": 0.9544, "step": 135 }, { "epoch": 0.11, - "learning_rate": 5.645161290322582e-06, - "loss": 1.4602, + "learning_rate": 1.417004048582996e-06, + "loss": 0.9811, "step": 140 }, { "epoch": 0.12, - "learning_rate": 5.846774193548388e-06, - "loss": 1.4902, + "learning_rate": 1.4676113360323888e-06, + "loss": 1.0079, "step": 145 }, { "epoch": 0.12, - "learning_rate": 6.048387096774194e-06, - "loss": 1.3729, + "learning_rate": 1.5182186234817814e-06, + "loss": 0.9216, "step": 150 }, { "epoch": 0.13, - "learning_rate": 6.25e-06, - "loss": 1.4902, + "learning_rate": 1.5688259109311743e-06, + "loss": 0.9537, "step": 155 }, { "epoch": 0.13, - "learning_rate": 6.451612903225806e-06, - "loss": 1.435, + "learning_rate": 1.6194331983805669e-06, + "loss": 0.9364, "step": 160 }, { "epoch": 0.13, - "learning_rate": 6.653225806451613e-06, - "loss": 1.4096, + "learning_rate": 1.6700404858299596e-06, + "loss": 0.9203, "step": 165 }, { "epoch": 0.14, - "learning_rate": 6.854838709677419e-06, - "loss": 1.4508, + "learning_rate": 1.7206477732793522e-06, + "loss": 0.9639, "step": 170 }, { "epoch": 0.14, - "learning_rate": 7.056451612903227e-06, - "loss": 1.4266, + "learning_rate": 1.7712550607287451e-06, + "loss": 0.9563, "step": 175 }, { "epoch": 0.15, - "learning_rate": 7.258064516129033e-06, - "loss": 1.4648, + "learning_rate": 1.8218623481781379e-06, + "loss": 0.9413, "step": 180 }, { "epoch": 0.15, - "learning_rate": 7.459677419354839e-06, - "loss": 1.3996, + "learning_rate": 1.8724696356275304e-06, + "loss": 0.9173, "step": 185 }, { "epoch": 0.15, - "learning_rate": 7.661290322580646e-06, - "loss": 1.4152, + "learning_rate": 1.9230769230769234e-06, + "loss": 0.9208, "step": 190 }, { "epoch": 0.16, - "learning_rate": 7.862903225806451e-06, - "loss": 1.4648, + "learning_rate": 1.973684210526316e-06, + "loss": 0.9382, "step": 195 }, { "epoch": 0.16, - "learning_rate": 8.064516129032258e-06, - "loss": 1.3128, + "learning_rate": 2.0242914979757085e-06, + "loss": 0.8964, "step": 200 }, { "epoch": 0.17, - "learning_rate": 8.266129032258065e-06, - "loss": 1.4242, + "learning_rate": 2.0748987854251012e-06, + "loss": 0.9241, "step": 205 }, { "epoch": 0.17, - "learning_rate": 8.467741935483872e-06, - "loss": 1.4648, + "learning_rate": 2.125506072874494e-06, + "loss": 0.9458, "step": 210 }, { "epoch": 0.17, - "learning_rate": 8.669354838709677e-06, - "loss": 1.4527, + "learning_rate": 2.1761133603238867e-06, + "loss": 0.93, "step": 215 }, { "epoch": 0.18, - "learning_rate": 8.870967741935484e-06, - "loss": 1.4, + "learning_rate": 2.2267206477732795e-06, + "loss": 0.9031, "step": 220 }, { "epoch": 0.18, - "learning_rate": 9.072580645161291e-06, - "loss": 1.4984, + "learning_rate": 2.2773279352226723e-06, + "loss": 0.9207, "step": 225 }, { "epoch": 0.19, - "learning_rate": 9.274193548387097e-06, - "loss": 1.4297, + "learning_rate": 2.327935222672065e-06, + "loss": 0.9123, "step": 230 }, { "epoch": 0.19, - "learning_rate": 9.475806451612905e-06, - "loss": 1.4645, + "learning_rate": 2.3785425101214578e-06, + "loss": 0.9057, "step": 235 }, { "epoch": 0.19, - "learning_rate": 9.67741935483871e-06, - "loss": 1.4107, + "learning_rate": 2.42914979757085e-06, + "loss": 0.8909, "step": 240 }, { "epoch": 0.2, - "learning_rate": 9.879032258064517e-06, - "loss": 1.4328, + "learning_rate": 2.4797570850202433e-06, + "loss": 0.9171, "step": 245 }, { "epoch": 0.2, - "learning_rate": 9.999995524479982e-06, - "loss": 1.4279, + "learning_rate": 2.530364372469636e-06, + "loss": 0.8959, "step": 250 }, { "epoch": 0.21, - "learning_rate": 9.999945174971776e-06, - "loss": 1.4738, + "learning_rate": 2.5809716599190288e-06, + "loss": 0.946, "step": 255 }, { "epoch": 0.21, - "learning_rate": 9.999838882120566e-06, - "loss": 1.3898, + "learning_rate": 2.631578947368421e-06, + "loss": 0.9071, "step": 260 }, { "epoch": 0.21, - "learning_rate": 9.999676647115646e-06, - "loss": 1.3346, + "learning_rate": 2.682186234817814e-06, + "loss": 0.8789, "step": 265 }, { "epoch": 0.22, - "learning_rate": 9.999458471772225e-06, - "loss": 1.5164, + "learning_rate": 2.732793522267207e-06, + "loss": 0.9864, "step": 270 }, { "epoch": 0.22, - "learning_rate": 9.999184358531422e-06, - "loss": 1.4148, + "learning_rate": 2.7834008097165994e-06, + "loss": 0.8932, "step": 275 }, { "epoch": 0.23, - "learning_rate": 9.998854310460233e-06, - "loss": 1.4273, + "learning_rate": 2.834008097165992e-06, + "loss": 0.9064, "step": 280 }, { "epoch": 0.23, - "learning_rate": 9.998468331251499e-06, - "loss": 1.4592, + "learning_rate": 2.8846153846153845e-06, + "loss": 0.9127, "step": 285 }, { "epoch": 0.23, - "learning_rate": 9.998026425223858e-06, - "loss": 1.4504, + "learning_rate": 2.9352226720647776e-06, + "loss": 0.914, "step": 290 }, { "epoch": 0.24, - "learning_rate": 9.997528597321704e-06, - "loss": 1.448, + "learning_rate": 2.9858299595141704e-06, + "loss": 0.9254, "step": 295 }, { "epoch": 0.24, - "learning_rate": 9.996974853115132e-06, - "loss": 1.4523, + "learning_rate": 3.0364372469635627e-06, + "loss": 0.9396, "step": 300 }, { "epoch": 0.25, - "learning_rate": 9.996365198799868e-06, - "loss": 1.5063, + "learning_rate": 3.087044534412956e-06, + "loss": 0.9208, "step": 305 }, { "epoch": 0.25, - "learning_rate": 9.99569964119721e-06, - "loss": 1.4051, + "learning_rate": 3.1376518218623487e-06, + "loss": 0.9242, "step": 310 }, { "epoch": 0.25, - "learning_rate": 9.99497818775394e-06, - "loss": 1.4312, + "learning_rate": 3.188259109311741e-06, + "loss": 0.9057, "step": 315 }, { "epoch": 0.26, - "learning_rate": 9.994200846542251e-06, - "loss": 1.4758, + "learning_rate": 3.2388663967611337e-06, + "loss": 0.9632, "step": 320 }, { "epoch": 0.26, - "learning_rate": 9.993367626259652e-06, - "loss": 1.468, + "learning_rate": 3.289473684210527e-06, + "loss": 0.9344, "step": 325 }, { "epoch": 0.27, - "learning_rate": 9.99247853622887e-06, - "loss": 1.527, + "learning_rate": 3.3400809716599193e-06, + "loss": 0.9578, "step": 330 }, { "epoch": 0.27, - "learning_rate": 9.991533586397751e-06, - "loss": 1.4969, + "learning_rate": 3.390688259109312e-06, + "loss": 0.947, "step": 335 }, { "epoch": 0.28, - "learning_rate": 9.990532787339137e-06, - "loss": 1.4996, + "learning_rate": 3.4412955465587043e-06, + "loss": 0.9344, "step": 340 }, { "epoch": 0.28, - "learning_rate": 9.98947615025076e-06, - "loss": 1.5262, + "learning_rate": 3.4919028340080975e-06, + "loss": 0.9472, "step": 345 }, { "epoch": 0.28, - "learning_rate": 9.988363686955116e-06, - "loss": 1.5992, + "learning_rate": 3.5425101214574903e-06, + "loss": 1.0034, "step": 350 }, { "epoch": 0.29, - "learning_rate": 9.987195409899322e-06, - "loss": 1.4711, + "learning_rate": 3.5931174089068826e-06, + "loss": 0.9558, "step": 355 }, { "epoch": 0.29, - "learning_rate": 9.985971332154985e-06, - "loss": 1.45, + "learning_rate": 3.6437246963562758e-06, + "loss": 0.9274, "step": 360 }, { "epoch": 0.3, - "learning_rate": 9.984691467418057e-06, - "loss": 1.4863, + "learning_rate": 3.6943319838056685e-06, + "loss": 0.968, "step": 365 }, { "epoch": 0.3, - "learning_rate": 9.983355830008678e-06, - "loss": 1.5219, + "learning_rate": 3.744939271255061e-06, + "loss": 0.9543, "step": 370 }, { "epoch": 0.3, - "learning_rate": 9.981964434871015e-06, - "loss": 1.5977, + "learning_rate": 3.7955465587044536e-06, + "loss": 1.0063, "step": 375 }, { "epoch": 0.31, - "learning_rate": 9.980517297573097e-06, - "loss": 1.4539, + "learning_rate": 3.846153846153847e-06, + "loss": 0.9491, "step": 380 }, { "epoch": 0.31, - "learning_rate": 9.979014434306642e-06, - "loss": 1.3713, + "learning_rate": 3.896761133603239e-06, + "loss": 0.8891, "step": 385 }, { "epoch": 0.32, - "learning_rate": 9.977455861886874e-06, - "loss": 1.4434, + "learning_rate": 3.947368421052632e-06, + "loss": 0.9015, "step": 390 }, { "epoch": 0.32, - "learning_rate": 9.975841597752334e-06, - "loss": 1.5469, + "learning_rate": 3.997975708502025e-06, + "loss": 0.9852, "step": 395 }, { "epoch": 0.32, - "learning_rate": 9.974171659964688e-06, - "loss": 1.4531, + "learning_rate": 4.048582995951417e-06, + "loss": 0.9636, "step": 400 }, { "epoch": 0.33, - "learning_rate": 9.972446067208519e-06, - "loss": 1.4828, + "learning_rate": 4.09919028340081e-06, + "loss": 0.9683, "step": 405 }, { "epoch": 0.33, - "learning_rate": 9.970664838791126e-06, - "loss": 1.4512, + "learning_rate": 4.1497975708502025e-06, + "loss": 0.9127, "step": 410 }, { "epoch": 0.34, - "learning_rate": 9.9688279946423e-06, - "loss": 1.4076, + "learning_rate": 4.200404858299596e-06, + "loss": 0.9229, "step": 415 }, { "epoch": 0.34, - "learning_rate": 9.966935555314107e-06, - "loss": 1.4969, + "learning_rate": 4.251012145748988e-06, + "loss": 0.9329, "step": 420 }, { "epoch": 0.34, - "learning_rate": 9.96498754198066e-06, - "loss": 1.498, + "learning_rate": 4.30161943319838e-06, + "loss": 0.9864, "step": 425 }, { "epoch": 0.35, - "learning_rate": 9.962983976437868e-06, - "loss": 1.4393, + "learning_rate": 4.3522267206477735e-06, + "loss": 0.9208, "step": 430 }, { "epoch": 0.35, - "learning_rate": 9.96092488110321e-06, - "loss": 1.5219, + "learning_rate": 4.402834008097167e-06, + "loss": 0.959, "step": 435 }, { "epoch": 0.36, - "learning_rate": 9.958810279015474e-06, - "loss": 1.4484, + "learning_rate": 4.453441295546559e-06, + "loss": 0.9411, "step": 440 }, { "epoch": 0.36, - "learning_rate": 9.956640193834501e-06, - "loss": 1.4805, + "learning_rate": 4.504048582995952e-06, + "loss": 0.9695, "step": 445 }, { "epoch": 0.36, - "learning_rate": 9.954414649840922e-06, - "loss": 1.4832, + "learning_rate": 4.5546558704453445e-06, + "loss": 0.9808, "step": 450 }, { "epoch": 0.37, - "learning_rate": 9.952133671935885e-06, - "loss": 1.3789, + "learning_rate": 4.605263157894737e-06, + "loss": 0.8951, "step": 455 }, { "epoch": 0.37, - "learning_rate": 9.949797285640771e-06, - "loss": 1.4773, + "learning_rate": 4.65587044534413e-06, + "loss": 0.9815, "step": 460 }, { "epoch": 0.38, - "learning_rate": 9.947405517096927e-06, - "loss": 1.5246, + "learning_rate": 4.706477732793522e-06, + "loss": 0.9565, "step": 465 }, { "epoch": 0.38, - "learning_rate": 9.944958393065343e-06, - "loss": 1.5094, + "learning_rate": 4.7570850202429155e-06, + "loss": 0.9335, "step": 470 }, { "epoch": 0.38, - "learning_rate": 9.942455940926384e-06, - "loss": 1.4253, + "learning_rate": 4.807692307692308e-06, + "loss": 0.9409, "step": 475 }, { "epoch": 0.39, - "learning_rate": 9.939898188679465e-06, - "loss": 1.4396, + "learning_rate": 4.8582995951417e-06, + "loss": 0.9057, "step": 480 }, { "epoch": 0.39, - "learning_rate": 9.93728516494274e-06, - "loss": 1.3863, + "learning_rate": 4.908906882591093e-06, + "loss": 0.9125, "step": 485 }, { "epoch": 0.4, - "learning_rate": 9.934616898952787e-06, - "loss": 1.4965, + "learning_rate": 4.9595141700404865e-06, + "loss": 0.9485, "step": 490 }, { "epoch": 0.4, - "learning_rate": 9.931893420564277e-06, - "loss": 1.5633, + "learning_rate": 4.9999993758760865e-06, + "loss": 0.9827, "step": 495 }, { "epoch": 0.4, - "learning_rate": 9.929114760249642e-06, - "loss": 1.3689, + "learning_rate": 4.999977531571805e-06, + "loss": 0.9135, "step": 500 }, { "epoch": 0.41, - "learning_rate": 9.926280949098732e-06, - "loss": 1.4434, + "learning_rate": 4.999924481383433e-06, + "loss": 0.9547, "step": 505 }, { "epoch": 0.41, - "learning_rate": 9.923392018818467e-06, - "loss": 1.4758, + "learning_rate": 4.9998402259731634e-06, + "loss": 0.9506, "step": 510 }, { "epoch": 0.42, - "learning_rate": 9.92044800173249e-06, - "loss": 1.4133, + "learning_rate": 4.999724766392715e-06, + "loss": 0.9281, "step": 515 }, { "epoch": 0.42, - "learning_rate": 9.917448930780786e-06, - "loss": 1.451, + "learning_rate": 4.999578104083307e-06, + "loss": 0.925, "step": 520 }, { "epoch": 0.42, - "learning_rate": 9.91439483951934e-06, - "loss": 1.5117, + "learning_rate": 4.999400240875647e-06, + "loss": 0.9808, "step": 525 }, { "epoch": 0.43, - "learning_rate": 9.91128576211974e-06, - "loss": 1.4949, + "learning_rate": 4.999191178989905e-06, + "loss": 0.963, "step": 530 }, { "epoch": 0.43, - "learning_rate": 9.908121733368803e-06, - "loss": 1.3984, + "learning_rate": 4.998950921035691e-06, + "loss": 0.9125, "step": 535 }, { "epoch": 0.44, - "learning_rate": 9.904902788668187e-06, - "loss": 1.6094, + "learning_rate": 4.998679470012015e-06, + "loss": 0.9833, "step": 540 }, { "epoch": 0.44, - "learning_rate": 9.901628964033993e-06, - "loss": 1.452, + "learning_rate": 4.998376829307255e-06, + "loss": 0.949, "step": 545 }, { - "epoch": 0.44, - "learning_rate": 9.89830029609636e-06, - "loss": 1.4293, + "epoch": 0.45, + "learning_rate": 4.998043002699114e-06, + "loss": 0.9539, "step": 550 }, { "epoch": 0.45, - "learning_rate": 9.894916822099062e-06, - "loss": 1.4957, + "learning_rate": 4.997677994354573e-06, + "loss": 0.9612, "step": 555 }, { "epoch": 0.45, - "learning_rate": 9.89147857989908e-06, - "loss": 1.507, + "learning_rate": 4.997281808829833e-06, + "loss": 0.962, "step": 560 }, { "epoch": 0.46, - "learning_rate": 9.887985607966188e-06, - "loss": 1.4379, + "learning_rate": 4.996854451070267e-06, + "loss": 0.9467, "step": 565 }, { "epoch": 0.46, - "learning_rate": 9.884437945382523e-06, - "loss": 1.4812, + "learning_rate": 4.996395926410354e-06, + "loss": 0.9273, "step": 570 }, { "epoch": 0.47, - "learning_rate": 9.880835631842141e-06, - "loss": 1.4477, + "learning_rate": 4.995906240573615e-06, + "loss": 0.9213, "step": 575 }, { "epoch": 0.47, - "learning_rate": 9.877178707650573e-06, - "loss": 1.4484, + "learning_rate": 4.995385399672532e-06, + "loss": 0.9405, "step": 580 }, { "epoch": 0.47, - "learning_rate": 9.873467213724384e-06, - "loss": 1.4897, + "learning_rate": 4.994833410208487e-06, + "loss": 0.9448, "step": 585 }, { "epoch": 0.48, - "learning_rate": 9.869701191590703e-06, - "loss": 1.4641, + "learning_rate": 4.994250279071669e-06, + "loss": 0.9146, "step": 590 }, { "epoch": 0.48, - "learning_rate": 9.865880683386766e-06, - "loss": 1.4809, + "learning_rate": 4.9936360135409915e-06, + "loss": 0.9891, "step": 595 }, { "epoch": 0.49, - "learning_rate": 9.862005731859443e-06, - "loss": 1.4742, + "learning_rate": 4.992990621284004e-06, + "loss": 0.9444, "step": 600 }, { "epoch": 0.49, - "learning_rate": 9.858076380364757e-06, - "loss": 1.4902, + "learning_rate": 4.992314110356793e-06, + "loss": 0.9599, "step": 605 }, { "epoch": 0.49, - "learning_rate": 9.854092672867399e-06, - "loss": 1.5898, + "learning_rate": 4.991606489203883e-06, + "loss": 1.0091, "step": 610 }, { "epoch": 0.5, - "learning_rate": 9.850054653940247e-06, - "loss": 1.4984, + "learning_rate": 4.99086776665813e-06, + "loss": 0.9725, "step": 615 }, { "epoch": 0.5, - "learning_rate": 9.845962368763847e-06, - "loss": 1.4898, + "learning_rate": 4.9900979519406154e-06, + "loss": 0.9283, "step": 620 }, { "epoch": 0.51, - "learning_rate": 9.841815863125923e-06, - "loss": 1.5082, + "learning_rate": 4.9892970546605226e-06, + "loss": 0.9856, "step": 625 }, { "epoch": 0.51, - "learning_rate": 9.837615183420866e-06, - "loss": 1.4992, + "learning_rate": 4.988465084815026e-06, + "loss": 0.9866, "step": 630 }, { "epoch": 0.51, - "learning_rate": 9.8333603766492e-06, - "loss": 1.3744, + "learning_rate": 4.987602052789159e-06, + "loss": 0.8948, "step": 635 }, { "epoch": 0.52, - "learning_rate": 9.829051490417074e-06, - "loss": 1.527, + "learning_rate": 4.986707969355692e-06, + "loss": 0.9727, "step": 640 }, { "epoch": 0.52, - "learning_rate": 9.824688572935713e-06, - "loss": 1.5008, + "learning_rate": 4.985782845674988e-06, + "loss": 0.9579, "step": 645 }, { "epoch": 0.53, - "learning_rate": 9.820271673020891e-06, - "loss": 1.4721, + "learning_rate": 4.9848266932948745e-06, + "loss": 0.9343, "step": 650 }, { "epoch": 0.53, - "learning_rate": 9.81580084009238e-06, - "loss": 1.5555, + "learning_rate": 4.983839524150489e-06, + "loss": 0.9872, "step": 655 }, { "epoch": 0.53, - "learning_rate": 9.811276124173395e-06, - "loss": 1.5285, + "learning_rate": 4.982821350564136e-06, + "loss": 0.9586, "step": 660 }, { "epoch": 0.54, - "learning_rate": 9.806697575890043e-06, - "loss": 1.4777, + "learning_rate": 4.981772185245135e-06, + "loss": 0.9687, "step": 665 }, { "epoch": 0.54, - "learning_rate": 9.802065246470738e-06, - "loss": 1.4322, + "learning_rate": 4.9806920412896555e-06, + "loss": 0.9365, "step": 670 }, { "epoch": 0.55, - "learning_rate": 9.797379187745652e-06, - "loss": 1.5197, + "learning_rate": 4.979580932180556e-06, + "loss": 0.9754, "step": 675 }, { "epoch": 0.55, - "learning_rate": 9.792639452146116e-06, - "loss": 1.4703, + "learning_rate": 4.978438871787219e-06, + "loss": 0.9456, "step": 680 }, { "epoch": 0.55, - "learning_rate": 9.787846092704043e-06, - "loss": 1.4539, + "learning_rate": 4.977265874365374e-06, + "loss": 0.9345, "step": 685 }, { "epoch": 0.56, - "learning_rate": 9.782999163051332e-06, - "loss": 1.4326, + "learning_rate": 4.976061954556921e-06, + "loss": 0.9384, "step": 690 }, { "epoch": 0.56, - "learning_rate": 9.778098717419266e-06, - "loss": 1.3992, + "learning_rate": 4.9748271273897495e-06, + "loss": 0.9121, "step": 695 }, { "epoch": 0.57, - "learning_rate": 9.773144810637908e-06, - "loss": 1.457, + "learning_rate": 4.9735614082775455e-06, + "loss": 0.9196, "step": 700 }, { "epoch": 0.57, - "learning_rate": 9.768137498135489e-06, - "loss": 1.477, + "learning_rate": 4.972264813019605e-06, + "loss": 0.9427, "step": 705 }, { "epoch": 0.57, - "learning_rate": 9.763076835937782e-06, - "loss": 1.4455, + "learning_rate": 4.970937357800635e-06, + "loss": 0.9248, "step": 710 }, { "epoch": 0.58, - "learning_rate": 9.75796288066748e-06, - "loss": 1.442, + "learning_rate": 4.969579059190549e-06, + "loss": 0.924, "step": 715 }, { "epoch": 0.58, - "learning_rate": 9.752795689543563e-06, - "loss": 1.5156, + "learning_rate": 4.968189934144263e-06, + "loss": 0.9705, "step": 720 }, { "epoch": 0.59, - "learning_rate": 9.747575320380652e-06, - "loss": 1.5129, + "learning_rate": 4.966770000001483e-06, + "loss": 0.992, "step": 725 }, { "epoch": 0.59, - "learning_rate": 9.742301831588368e-06, - "loss": 1.3873, + "learning_rate": 4.965319274486488e-06, + "loss": 0.9164, "step": 730 }, { "epoch": 0.59, - "learning_rate": 9.736975282170678e-06, - "loss": 1.448, + "learning_rate": 4.963837775707911e-06, + "loss": 0.9343, "step": 735 }, { "epoch": 0.6, - "learning_rate": 9.731595731725232e-06, - "loss": 1.4695, + "learning_rate": 4.962325522158509e-06, + "loss": 0.9485, "step": 740 }, { "epoch": 0.6, - "learning_rate": 9.726163240442695e-06, - "loss": 1.4898, + "learning_rate": 4.960782532714934e-06, + "loss": 0.9483, "step": 745 }, { "epoch": 0.61, - "learning_rate": 9.720677869106077e-06, - "loss": 1.4619, + "learning_rate": 4.959208826637502e-06, + "loss": 0.959, "step": 750 }, { "epoch": 0.61, - "learning_rate": 9.715139679090057e-06, - "loss": 1.5293, + "learning_rate": 4.957604423569942e-06, + "loss": 0.9819, "step": 755 }, { "epoch": 0.61, - "learning_rate": 9.709548732360286e-06, - "loss": 1.4535, + "learning_rate": 4.955969343539162e-06, + "loss": 0.9399, "step": 760 }, { "epoch": 0.62, - "learning_rate": 9.703905091472698e-06, - "loss": 1.3992, + "learning_rate": 4.954303606954993e-06, + "loss": 0.8887, "step": 765 }, { "epoch": 0.62, - "learning_rate": 9.698208819572815e-06, - "loss": 1.5254, + "learning_rate": 4.952607234609935e-06, + "loss": 0.989, "step": 770 }, { "epoch": 0.63, - "learning_rate": 9.692459980395034e-06, - "loss": 1.351, + "learning_rate": 4.950880247678897e-06, + "loss": 0.9565, "step": 775 }, { "epoch": 0.63, - "learning_rate": 9.686658638261916e-06, - "loss": 1.3639, + "learning_rate": 4.949122667718935e-06, + "loss": 0.9373, "step": 780 }, { "epoch": 0.64, - "learning_rate": 9.680804858083468e-06, - "loss": 1.3813, + "learning_rate": 4.947334516668981e-06, + "loss": 0.8964, "step": 785 }, { "epoch": 0.64, - "learning_rate": 9.674898705356413e-06, - "loss": 1.4016, + "learning_rate": 4.945515816849567e-06, + "loss": 0.9158, "step": 790 }, { "epoch": 0.64, - "learning_rate": 9.668940246163464e-06, - "loss": 1.4611, + "learning_rate": 4.9436665909625555e-06, + "loss": 0.9352, "step": 795 }, { "epoch": 0.65, - "learning_rate": 9.662929547172575e-06, - "loss": 1.4902, + "learning_rate": 4.941786862090842e-06, + "loss": 0.9417, "step": 800 }, { "epoch": 0.65, - "learning_rate": 9.656866675636204e-06, - "loss": 1.5445, + "learning_rate": 4.9398766536980795e-06, + "loss": 1.0072, "step": 805 }, { "epoch": 0.66, - "learning_rate": 9.650751699390554e-06, - "loss": 1.5363, + "learning_rate": 4.937935989628377e-06, + "loss": 0.9596, "step": 810 }, { "epoch": 0.66, - "learning_rate": 9.64458468685482e-06, - "loss": 1.4508, + "learning_rate": 4.935964894106011e-06, + "loss": 0.9452, "step": 815 }, { "epoch": 0.66, - "learning_rate": 9.638365707030415e-06, - "loss": 1.4367, + "learning_rate": 4.933963391735111e-06, + "loss": 0.9791, "step": 820 }, { "epoch": 0.67, - "learning_rate": 9.632094829500206e-06, - "loss": 1.4594, + "learning_rate": 4.9319315074993626e-06, + "loss": 0.9491, "step": 825 }, { "epoch": 0.67, - "learning_rate": 9.62577212442774e-06, - "loss": 1.498, + "learning_rate": 4.929869266761692e-06, + "loss": 0.9722, "step": 830 }, { "epoch": 0.68, - "learning_rate": 9.619397662556434e-06, - "loss": 1.4816, + "learning_rate": 4.9277766952639485e-06, + "loss": 0.9667, "step": 835 }, { "epoch": 0.68, - "learning_rate": 9.61297151520882e-06, - "loss": 1.4754, + "learning_rate": 4.9256538191265836e-06, + "loss": 0.9705, "step": 840 }, { "epoch": 0.68, - "learning_rate": 9.606493754285712e-06, - "loss": 1.4805, + "learning_rate": 4.923500664848327e-06, + "loss": 0.9772, "step": 845 }, { "epoch": 0.69, - "learning_rate": 9.599964452265434e-06, - "loss": 1.5539, + "learning_rate": 4.921317259305852e-06, + "loss": 1.0082, "step": 850 }, { "epoch": 0.69, - "learning_rate": 9.593383682202974e-06, - "loss": 1.4996, + "learning_rate": 4.9191036297534455e-06, + "loss": 0.957, "step": 855 }, { "epoch": 0.7, - "learning_rate": 9.586751517729203e-06, - "loss": 1.484, + "learning_rate": 4.916859803822662e-06, + "loss": 0.9832, "step": 860 }, { "epoch": 0.7, - "learning_rate": 9.580068033050019e-06, - "loss": 1.4781, + "learning_rate": 4.914585809521982e-06, + "loss": 0.9521, "step": 865 }, { "epoch": 0.7, - "learning_rate": 9.57333330294554e-06, - "loss": 1.5156, + "learning_rate": 4.912281675236461e-06, + "loss": 0.9631, "step": 870 }, { "epoch": 0.71, - "learning_rate": 9.566547402769255e-06, - "loss": 1.4734, + "learning_rate": 4.909947429727378e-06, + "loss": 0.9403, "step": 875 }, { "epoch": 0.71, - "learning_rate": 9.559710408447185e-06, - "loss": 1.5398, + "learning_rate": 4.907583102131871e-06, + "loss": 1.0201, "step": 880 }, { "epoch": 0.72, - "learning_rate": 9.55282239647703e-06, - "loss": 1.5621, + "learning_rate": 4.905188721962579e-06, + "loss": 0.9873, "step": 885 }, { "epoch": 0.72, - "learning_rate": 9.545883443927325e-06, - "loss": 1.4793, + "learning_rate": 4.902764319107271e-06, + "loss": 0.9681, "step": 890 }, { "epoch": 0.72, - "learning_rate": 9.538893628436554e-06, - "loss": 1.3898, + "learning_rate": 4.900309923828474e-06, + "loss": 0.9125, "step": 895 }, { "epoch": 0.73, - "learning_rate": 9.531853028212308e-06, - "loss": 1.4719, + "learning_rate": 4.89782556676309e-06, + "loss": 0.9349, "step": 900 }, { "epoch": 0.73, - "learning_rate": 9.52476172203039e-06, - "loss": 1.5215, + "learning_rate": 4.895311278922023e-06, + "loss": 0.9779, "step": 905 }, { "epoch": 0.74, - "learning_rate": 9.51761978923395e-06, - "loss": 1.4264, + "learning_rate": 4.892767091689786e-06, + "loss": 0.9427, "step": 910 }, { "epoch": 0.74, - "learning_rate": 9.51042730973258e-06, - "loss": 1.502, + "learning_rate": 4.890193036824107e-06, + "loss": 0.9845, "step": 915 }, { "epoch": 0.74, - "learning_rate": 9.503184364001432e-06, - "loss": 1.4287, + "learning_rate": 4.88758914645554e-06, + "loss": 0.9601, "step": 920 }, { "epoch": 0.75, - "learning_rate": 9.495891033080315e-06, - "loss": 1.4693, + "learning_rate": 4.884955453087056e-06, + "loss": 0.9894, "step": 925 }, { "epoch": 0.75, - "learning_rate": 9.488547398572787e-06, - "loss": 1.5344, + "learning_rate": 4.882291989593644e-06, + "loss": 1.0139, "step": 930 }, { "epoch": 0.76, - "learning_rate": 9.48115354264524e-06, - "loss": 1.3703, + "learning_rate": 4.879598789221893e-06, + "loss": 0.9599, "step": 935 }, { "epoch": 0.76, - "learning_rate": 9.473709548025987e-06, - "loss": 1.6094, + "learning_rate": 4.876875885589589e-06, + "loss": 0.9855, "step": 940 }, { "epoch": 0.76, - "learning_rate": 9.466215498004328e-06, - "loss": 1.4773, + "learning_rate": 4.87412331268528e-06, + "loss": 0.9763, "step": 945 }, { "epoch": 0.77, - "learning_rate": 9.458671476429624e-06, - "loss": 1.4602, + "learning_rate": 4.8713411048678635e-06, + "loss": 0.946, "step": 950 }, { "epoch": 0.77, - "learning_rate": 9.45107756771036e-06, - "loss": 1.4798, + "learning_rate": 4.868529296866156e-06, + "loss": 0.9446, "step": 955 }, { "epoch": 0.78, - "learning_rate": 9.443433856813197e-06, - "loss": 1.516, + "learning_rate": 4.865687923778452e-06, + "loss": 0.9792, "step": 960 }, { "epoch": 0.78, - "learning_rate": 9.435740429262016e-06, - "loss": 1.4941, + "learning_rate": 4.862817021072096e-06, + "loss": 0.9644, "step": 965 }, { "epoch": 0.78, - "learning_rate": 9.427997371136976e-06, - "loss": 1.402, + "learning_rate": 4.8599166245830306e-06, + "loss": 0.9269, "step": 970 }, { "epoch": 0.79, - "learning_rate": 9.420204769073538e-06, - "loss": 1.5305, + "learning_rate": 4.856986770515358e-06, + "loss": 0.9846, "step": 975 }, { "epoch": 0.79, - "learning_rate": 9.4123627102615e-06, - "loss": 1.4604, + "learning_rate": 4.854027495440881e-06, + "loss": 0.9325, "step": 980 }, { "epoch": 0.8, - "learning_rate": 9.404471282444019e-06, - "loss": 1.5129, + "learning_rate": 4.851038836298649e-06, + "loss": 0.968, "step": 985 }, { "epoch": 0.8, - "learning_rate": 9.396530573916636e-06, - "loss": 1.5453, + "learning_rate": 4.848020830394498e-06, + "loss": 1.0115, "step": 990 }, { "epoch": 0.81, - "learning_rate": 9.38854067352628e-06, - "loss": 1.4834, + "learning_rate": 4.844973515400584e-06, + "loss": 1.0004, "step": 995 }, { "epoch": 0.81, - "learning_rate": 9.38050167067028e-06, - "loss": 1.4387, + "learning_rate": 4.8418969293549106e-06, + "loss": 0.9422, "step": 1000 }, { "epoch": 0.81, - "learning_rate": 9.372413655295362e-06, - "loss": 1.4383, + "learning_rate": 4.83879111066086e-06, + "loss": 0.9617, "step": 1005 }, { "epoch": 0.82, - "learning_rate": 9.364276717896639e-06, - "loss": 1.602, + "learning_rate": 4.8356560980867064e-06, + "loss": 1.0238, "step": 1010 }, { "epoch": 0.82, - "learning_rate": 9.356090949516608e-06, - "loss": 1.4193, + "learning_rate": 4.832491930765137e-06, + "loss": 0.9467, "step": 1015 }, { "epoch": 0.83, - "learning_rate": 9.347856441744122e-06, - "loss": 1.5031, + "learning_rate": 4.829298648192763e-06, + "loss": 0.9567, "step": 1020 }, { "epoch": 0.83, - "learning_rate": 9.339573286713369e-06, - "loss": 1.4828, + "learning_rate": 4.826076290229625e-06, + "loss": 0.9723, "step": 1025 }, { "epoch": 0.83, - "learning_rate": 9.331241577102841e-06, - "loss": 1.5191, + "learning_rate": 4.822824897098697e-06, + "loss": 0.9852, "step": 1030 }, { "epoch": 0.84, - "learning_rate": 9.322861406134302e-06, - "loss": 1.4305, + "learning_rate": 4.819544509385381e-06, + "loss": 0.9636, "step": 1035 }, { "epoch": 0.84, - "learning_rate": 9.314432867571732e-06, - "loss": 1.4625, + "learning_rate": 4.8162351680370046e-06, + "loss": 0.9497, "step": 1040 }, { "epoch": 0.85, - "learning_rate": 9.30595605572029e-06, - "loss": 1.5246, + "learning_rate": 4.81289691436231e-06, + "loss": 1.0008, "step": 1045 }, { "epoch": 0.85, - "learning_rate": 9.297431065425257e-06, - "loss": 1.5227, + "learning_rate": 4.809529790030931e-06, + "loss": 1.0033, "step": 1050 }, { "epoch": 0.85, - "learning_rate": 9.28885799207097e-06, - "loss": 1.5367, + "learning_rate": 4.806133837072886e-06, + "loss": 1.0142, "step": 1055 }, { "epoch": 0.86, - "learning_rate": 9.280236931579754e-06, - "loss": 1.4879, + "learning_rate": 4.802709097878039e-06, + "loss": 0.9727, "step": 1060 }, { "epoch": 0.86, - "learning_rate": 9.271567980410859e-06, - "loss": 1.5137, + "learning_rate": 4.799255615195582e-06, + "loss": 0.9863, "step": 1065 }, { "epoch": 0.87, - "learning_rate": 9.26285123555937e-06, - "loss": 1.4449, + "learning_rate": 4.795773432133492e-06, + "loss": 0.984, "step": 1070 }, { "epoch": 0.87, - "learning_rate": 9.254086794555121e-06, - "loss": 1.4602, + "learning_rate": 4.792262592158002e-06, + "loss": 0.9521, "step": 1075 }, { "epoch": 0.87, - "learning_rate": 9.245274755461621e-06, - "loss": 1.4187, + "learning_rate": 4.788723139093051e-06, + "loss": 0.9497, "step": 1080 }, { "epoch": 0.88, - "learning_rate": 9.23641521687493e-06, - "loss": 1.5391, + "learning_rate": 4.785155117119742e-06, + "loss": 1.0392, "step": 1085 }, { "epoch": 0.88, - "learning_rate": 9.227508277922579e-06, - "loss": 1.3988, + "learning_rate": 4.781558570775787e-06, + "loss": 0.9197, "step": 1090 }, { "epoch": 0.89, - "learning_rate": 9.218554038262448e-06, - "loss": 1.5984, + "learning_rate": 4.777933544954951e-06, + "loss": 1.0343, "step": 1095 }, { "epoch": 0.89, - "learning_rate": 9.209552598081657e-06, - "loss": 1.5109, + "learning_rate": 4.774280084906498e-06, + "loss": 1.0113, "step": 1100 }, { "epoch": 0.89, - "learning_rate": 9.200504058095439e-06, - "loss": 1.5418, + "learning_rate": 4.770598236234617e-06, + "loss": 0.9948, "step": 1105 }, { "epoch": 0.9, - "learning_rate": 9.191408519546022e-06, - "loss": 1.4275, + "learning_rate": 4.766888044897856e-06, + "loss": 0.9312, "step": 1110 }, { "epoch": 0.9, - "learning_rate": 9.182266084201486e-06, - "loss": 1.4074, + "learning_rate": 4.763149557208554e-06, + "loss": 0.9207, "step": 1115 }, { "epoch": 0.91, - "learning_rate": 9.173076854354634e-06, - "loss": 1.5016, + "learning_rate": 4.759382819832256e-06, + "loss": 0.9621, "step": 1120 }, { "epoch": 0.91, - "learning_rate": 9.16384093282184e-06, - "loss": 1.5188, + "learning_rate": 4.755587879787131e-06, + "loss": 1.0067, "step": 1125 }, { "epoch": 0.91, - "learning_rate": 9.154558422941901e-06, - "loss": 1.4738, + "learning_rate": 4.75176478444339e-06, + "loss": 0.9758, "step": 1130 }, { "epoch": 0.92, - "learning_rate": 9.145229428574886e-06, - "loss": 1.4049, + "learning_rate": 4.747913581522689e-06, + "loss": 0.9287, "step": 1135 }, { "epoch": 0.92, - "learning_rate": 9.135854054100961e-06, - "loss": 1.4363, + "learning_rate": 4.744034319097536e-06, + "loss": 0.9803, "step": 1140 }, { "epoch": 0.93, - "learning_rate": 9.126432404419239e-06, - "loss": 1.5211, + "learning_rate": 4.740127045590692e-06, + "loss": 1.0133, "step": 1145 }, { "epoch": 0.93, - "learning_rate": 9.11696458494659e-06, - "loss": 1.457, + "learning_rate": 4.736191809774567e-06, + "loss": 0.9588, "step": 1150 }, { "epoch": 0.93, - "learning_rate": 9.107450701616469e-06, - "loss": 1.4543, + "learning_rate": 4.7322286607706056e-06, + "loss": 0.9763, "step": 1155 }, { "epoch": 0.94, - "learning_rate": 9.097890860877732e-06, - "loss": 1.534, + "learning_rate": 4.72823764804868e-06, + "loss": 0.9944, "step": 1160 }, { "epoch": 0.94, - "learning_rate": 9.088285169693442e-06, - "loss": 1.5254, + "learning_rate": 4.724218821426472e-06, + "loss": 0.9897, "step": 1165 }, { "epoch": 0.95, - "learning_rate": 9.078633735539673e-06, - "loss": 1.5371, + "learning_rate": 4.720172231068845e-06, + "loss": 0.9902, "step": 1170 }, { "epoch": 0.95, - "learning_rate": 9.068936666404307e-06, - "loss": 1.475, + "learning_rate": 4.716097927487225e-06, + "loss": 0.969, "step": 1175 }, { "epoch": 0.95, - "learning_rate": 9.059194070785823e-06, - "loss": 1.457, + "learning_rate": 4.711995961538969e-06, + "loss": 0.9458, "step": 1180 }, { "epoch": 0.96, - "learning_rate": 9.049406057692097e-06, - "loss": 1.4891, + "learning_rate": 4.7078663844267245e-06, + "loss": 0.9782, "step": 1185 }, { "epoch": 0.96, - "learning_rate": 9.03957273663916e-06, - "loss": 1.5648, + "learning_rate": 4.7037092476978e-06, + "loss": 0.9987, "step": 1190 }, { "epoch": 0.97, - "learning_rate": 9.02969421764999e-06, - "loss": 1.5855, + "learning_rate": 4.699524603243509e-06, + "loss": 1.0171, "step": 1195 }, { "epoch": 0.97, - "learning_rate": 9.019770611253272e-06, - "loss": 1.4734, + "learning_rate": 4.695312503298535e-06, + "loss": 0.9727, "step": 1200 }, { - "epoch": 0.97, - "learning_rate": 9.009802028482169e-06, - "loss": 1.4867, + "epoch": 0.98, + "learning_rate": 4.69107300044027e-06, + "loss": 0.969, "step": 1205 }, { "epoch": 0.98, - "learning_rate": 8.999788580873074e-06, - "loss": 1.5094, + "learning_rate": 4.686806147588166e-06, + "loss": 0.977, "step": 1210 }, { "epoch": 0.98, - "learning_rate": 8.989730380464362e-06, - "loss": 1.4965, + "learning_rate": 4.6825119980030664e-06, + "loss": 0.9552, "step": 1215 }, { "epoch": 0.99, - "learning_rate": 8.979627539795136e-06, - "loss": 1.4887, + "learning_rate": 4.678190605286546e-06, + "loss": 0.9912, "step": 1220 }, { "epoch": 0.99, - "learning_rate": 8.969480171903973e-06, - "loss": 1.4398, + "learning_rate": 4.673842023380243e-06, + "loss": 0.9702, "step": 1225 }, { "epoch": 1.0, - "learning_rate": 8.959288390327656e-06, - "loss": 1.4301, + "learning_rate": 4.669466306565181e-06, + "loss": 0.9792, "step": 1230 }, { "epoch": 1.0, - "learning_rate": 8.949052309099897e-06, - "loss": 1.5309, + "learning_rate": 4.665063509461098e-06, + "loss": 1.0204, "step": 1235 - }, - { - "epoch": 1.0, - "learning_rate": 8.938772042750078e-06, - "loss": 1.3054, - "step": 1240 - }, - { - "epoch": 1.01, - "learning_rate": 8.928447706301951e-06, - "loss": 1.2152, - "step": 1245 - }, - { - "epoch": 1.01, - "learning_rate": 8.91807941527236e-06, - "loss": 1.2238, - "step": 1250 - }, - { - "epoch": 1.02, - "learning_rate": 8.907667285669955e-06, - "loss": 1.1881, - "step": 1255 - }, - { - "epoch": 1.02, - "learning_rate": 8.897211433993873e-06, - "loss": 1.234, - "step": 1260 - }, - { - "epoch": 1.02, - "learning_rate": 8.886711977232463e-06, - "loss": 1.1724, - "step": 1265 - }, - { - "epoch": 1.03, - "learning_rate": 8.87616903286195e-06, - "loss": 1.3313, - "step": 1270 - }, - { - "epoch": 1.03, - "learning_rate": 8.865582718845142e-06, - "loss": 1.2277, - "step": 1275 - }, - { - "epoch": 1.04, - "learning_rate": 8.854953153630097e-06, - "loss": 1.2145, - "step": 1280 - }, - { - "epoch": 1.04, - "learning_rate": 8.844280456148799e-06, - "loss": 1.243, - "step": 1285 - }, - { - "epoch": 1.04, - "learning_rate": 8.833564745815835e-06, - "loss": 1.1816, - "step": 1290 - }, - { - "epoch": 1.05, - "learning_rate": 8.82280614252705e-06, - "loss": 1.1965, - "step": 1295 - }, - { - "epoch": 1.05, - "learning_rate": 8.81200476665821e-06, - "loss": 1.2035, - "step": 1300 - }, - { - "epoch": 1.06, - "learning_rate": 8.801160739063657e-06, - "loss": 1.2477, - "step": 1305 - }, - { - "epoch": 1.06, - "learning_rate": 8.790274181074951e-06, - "loss": 1.1686, - "step": 1310 - }, - { - "epoch": 1.06, - "learning_rate": 8.779345214499517e-06, - "loss": 1.1877, - "step": 1315 - }, - { - "epoch": 1.07, - "learning_rate": 8.768373961619283e-06, - "loss": 1.2209, - "step": 1320 - }, - { - "epoch": 1.07, - "learning_rate": 8.757360545189308e-06, - "loss": 1.2066, - "step": 1325 - }, - { - "epoch": 1.08, - "learning_rate": 8.746305088436406e-06, - "loss": 1.2484, - "step": 1330 - }, - { - "epoch": 1.08, - "learning_rate": 8.735207715057779e-06, - "loss": 1.2068, - "step": 1335 - }, - { - "epoch": 1.08, - "learning_rate": 8.724068549219618e-06, - "loss": 1.1803, - "step": 1340 - }, - { - "epoch": 1.09, - "learning_rate": 8.712887715555728e-06, - "loss": 1.234, - "step": 1345 - }, - { - "epoch": 1.09, - "learning_rate": 8.701665339166122e-06, - "loss": 1.2441, - "step": 1350 - }, - { - "epoch": 1.1, - "learning_rate": 8.690401545615626e-06, - "loss": 1.2082, - "step": 1355 - }, - { - "epoch": 1.1, - "learning_rate": 8.679096460932477e-06, - "loss": 1.2176, - "step": 1360 - }, - { - "epoch": 1.1, - "learning_rate": 8.667750211606906e-06, - "loss": 1.2516, - "step": 1365 - }, - { - "epoch": 1.11, - "learning_rate": 8.65636292458973e-06, - "loss": 1.1766, - "step": 1370 - }, - { - "epoch": 1.11, - "learning_rate": 8.644934727290927e-06, - "loss": 1.2277, - "step": 1375 - }, - { - "epoch": 1.12, - "learning_rate": 8.63346574757821e-06, - "loss": 1.2773, - "step": 1380 - }, - { - "epoch": 1.12, - "learning_rate": 8.621956113775601e-06, - "loss": 1.2162, - "step": 1385 - }, - { - "epoch": 1.12, - "learning_rate": 8.610405954661988e-06, - "loss": 1.2551, - "step": 1390 - }, - { - "epoch": 1.13, - "learning_rate": 8.598815399469694e-06, - "loss": 1.2625, - "step": 1395 - }, - { - "epoch": 1.13, - "learning_rate": 8.587184577883018e-06, - "loss": 1.2465, - "step": 1400 - }, - { - "epoch": 1.14, - "learning_rate": 8.5755136200368e-06, - "loss": 1.2008, - "step": 1405 - }, - { - "epoch": 1.14, - "learning_rate": 8.563802656514946e-06, - "loss": 1.1623, - "step": 1410 - }, - { - "epoch": 1.14, - "learning_rate": 8.552051818348986e-06, - "loss": 1.1625, - "step": 1415 - }, - { - "epoch": 1.15, - "learning_rate": 8.540261237016597e-06, - "loss": 1.1723, - "step": 1420 - }, - { - "epoch": 1.15, - "learning_rate": 8.528431044440127e-06, - "loss": 1.268, - "step": 1425 - }, - { - "epoch": 1.16, - "learning_rate": 8.516561372985137e-06, - "loss": 1.2488, - "step": 1430 - }, - { - "epoch": 1.16, - "learning_rate": 8.504652355458901e-06, - "loss": 1.298, - "step": 1435 - }, - { - "epoch": 1.17, - "learning_rate": 8.492704125108933e-06, - "loss": 1.2168, - "step": 1440 - }, - { - "epoch": 1.17, - "learning_rate": 8.480716815621486e-06, - "loss": 1.2166, - "step": 1445 - }, - { - "epoch": 1.17, - "learning_rate": 8.468690561120064e-06, - "loss": 1.201, - "step": 1450 - }, - { - "epoch": 1.18, - "learning_rate": 8.456625496163921e-06, - "loss": 1.2266, - "step": 1455 - }, - { - "epoch": 1.18, - "learning_rate": 8.444521755746547e-06, - "loss": 1.1812, - "step": 1460 - }, - { - "epoch": 1.19, - "learning_rate": 8.43237947529417e-06, - "loss": 1.1762, - "step": 1465 - }, - { - "epoch": 1.19, - "learning_rate": 8.420198790664232e-06, - "loss": 1.2473, - "step": 1470 - }, - { - "epoch": 1.19, - "learning_rate": 8.407979838143869e-06, - "loss": 1.1887, - "step": 1475 - }, - { - "epoch": 1.2, - "learning_rate": 8.395722754448392e-06, - "loss": 1.2277, - "step": 1480 - }, - { - "epoch": 1.2, - "learning_rate": 8.38342767671975e-06, - "loss": 1.2418, - "step": 1485 - }, - { - "epoch": 1.21, - "learning_rate": 8.371094742525006e-06, - "loss": 1.2081, - "step": 1490 - }, - { - "epoch": 1.21, - "learning_rate": 8.358724089854784e-06, - "loss": 1.3461, - "step": 1495 - }, - { - "epoch": 1.21, - "learning_rate": 8.346315857121732e-06, - "loss": 1.1977, - "step": 1500 - }, - { - "epoch": 1.22, - "learning_rate": 8.33387018315898e-06, - "loss": 1.2336, - "step": 1505 - }, - { - "epoch": 1.22, - "learning_rate": 8.321387207218578e-06, - "loss": 1.249, - "step": 1510 - }, - { - "epoch": 1.23, - "learning_rate": 8.308867068969933e-06, - "loss": 1.2188, - "step": 1515 - }, - { - "epoch": 1.23, - "learning_rate": 8.296309908498264e-06, - "loss": 1.1823, - "step": 1520 - }, - { - "epoch": 1.23, - "learning_rate": 8.283715866303016e-06, - "loss": 1.2462, - "step": 1525 - }, - { - "epoch": 1.24, - "learning_rate": 8.271085083296295e-06, - "loss": 1.218, - "step": 1530 - }, - { - "epoch": 1.24, - "learning_rate": 8.258417700801301e-06, - "loss": 1.249, - "step": 1535 - }, - { - "epoch": 1.25, - "learning_rate": 8.245713860550734e-06, - "loss": 1.2629, - "step": 1540 - }, - { - "epoch": 1.25, - "learning_rate": 8.232973704685208e-06, - "loss": 1.2605, - "step": 1545 - }, - { - "epoch": 1.25, - "learning_rate": 8.220197375751667e-06, - "loss": 1.2232, - "step": 1550 - }, - { - "epoch": 1.26, - "learning_rate": 8.207385016701792e-06, - "loss": 1.2242, - "step": 1555 - }, - { - "epoch": 1.26, - "learning_rate": 8.194536770890392e-06, - "loss": 1.1824, - "step": 1560 - }, - { - "epoch": 1.27, - "learning_rate": 8.181652782073808e-06, - "loss": 1.275, - "step": 1565 - }, - { - "epoch": 1.27, - "learning_rate": 8.168733194408302e-06, - "loss": 1.2164, - "step": 1570 - }, - { - "epoch": 1.27, - "learning_rate": 8.155778152448443e-06, - "loss": 1.2207, - "step": 1575 - }, - { - "epoch": 1.28, - "learning_rate": 8.142787801145495e-06, - "loss": 1.2266, - "step": 1580 - }, - { - "epoch": 1.28, - "learning_rate": 8.129762285845784e-06, - "loss": 1.1971, - "step": 1585 - }, - { - "epoch": 1.29, - "learning_rate": 8.116701752289084e-06, - "loss": 1.2107, - "step": 1590 - }, - { - "epoch": 1.29, - "learning_rate": 8.103606346606978e-06, - "loss": 1.1904, - "step": 1595 - }, - { - "epoch": 1.29, - "learning_rate": 8.090476215321226e-06, - "loss": 1.2895, - "step": 1600 - }, - { - "epoch": 1.3, - "learning_rate": 8.07731150534213e-06, - "loss": 1.235, - "step": 1605 - }, - { - "epoch": 1.3, - "learning_rate": 8.064112363966877e-06, - "loss": 1.2238, - "step": 1610 - }, - { - "epoch": 1.31, - "learning_rate": 8.050878938877908e-06, - "loss": 1.2535, - "step": 1615 - }, - { - "epoch": 1.31, - "learning_rate": 8.037611378141257e-06, - "loss": 1.2336, - "step": 1620 - }, - { - "epoch": 1.31, - "learning_rate": 8.024309830204888e-06, - "loss": 1.2285, - "step": 1625 - }, - { - "epoch": 1.32, - "learning_rate": 8.010974443897046e-06, - "loss": 1.25, - "step": 1630 - }, - { - "epoch": 1.32, - "learning_rate": 7.997605368424585e-06, - "loss": 1.2492, - "step": 1635 - }, - { - "epoch": 1.33, - "learning_rate": 7.9842027533713e-06, - "loss": 1.1783, - "step": 1640 - }, - { - "epoch": 1.33, - "learning_rate": 7.970766748696254e-06, - "loss": 1.171, - "step": 1645 - }, - { - "epoch": 1.33, - "learning_rate": 7.9572975047321e-06, - "loss": 1.2336, - "step": 1650 - }, - { - "epoch": 1.34, - "learning_rate": 7.943795172183394e-06, - "loss": 1.204, - "step": 1655 - }, - { - "epoch": 1.34, - "learning_rate": 7.93025990212492e-06, - "loss": 1.2342, - "step": 1660 - }, - { - "epoch": 1.35, - "learning_rate": 7.916691845999986e-06, - "loss": 1.1936, - "step": 1665 - }, - { - "epoch": 1.35, - "learning_rate": 7.903091155618747e-06, - "loss": 1.1992, - "step": 1670 - }, - { - "epoch": 1.36, - "learning_rate": 7.889457983156484e-06, - "loss": 1.1707, - "step": 1675 - }, - { - "epoch": 1.36, - "learning_rate": 7.875792481151916e-06, - "loss": 1.1835, - "step": 1680 - }, - { - "epoch": 1.36, - "learning_rate": 7.862094802505498e-06, - "loss": 1.2359, - "step": 1685 - }, - { - "epoch": 1.37, - "learning_rate": 7.848365100477695e-06, - "loss": 1.2754, - "step": 1690 - }, - { - "epoch": 1.37, - "learning_rate": 7.834603528687277e-06, - "loss": 1.2664, - "step": 1695 - }, - { - "epoch": 1.38, - "learning_rate": 7.8208102411096e-06, - "loss": 1.2535, - "step": 1700 - }, - { - "epoch": 1.38, - "learning_rate": 7.806985392074877e-06, - "loss": 1.2158, - "step": 1705 - }, - { - "epoch": 1.38, - "learning_rate": 7.793129136266464e-06, - "loss": 1.2504, - "step": 1710 - }, - { - "epoch": 1.39, - "learning_rate": 7.779241628719108e-06, - "loss": 1.1505, - "step": 1715 - }, - { - "epoch": 1.39, - "learning_rate": 7.765323024817237e-06, - "loss": 1.2695, - "step": 1720 - }, - { - "epoch": 1.4, - "learning_rate": 7.751373480293205e-06, - "loss": 1.2059, - "step": 1725 - }, - { - "epoch": 1.4, - "learning_rate": 7.737393151225555e-06, - "loss": 1.2547, - "step": 1730 - }, - { - "epoch": 1.4, - "learning_rate": 7.723382194037266e-06, - "loss": 1.2127, - "step": 1735 - }, - { - "epoch": 1.41, - "learning_rate": 7.709340765494017e-06, - "loss": 1.1734, - "step": 1740 - }, - { - "epoch": 1.41, - "learning_rate": 7.695269022702425e-06, - "loss": 1.2037, - "step": 1745 - }, - { - "epoch": 1.42, - "learning_rate": 7.681167123108277e-06, - "loss": 1.2891, - "step": 1750 - }, - { - "epoch": 1.42, - "learning_rate": 7.667035224494787e-06, - "loss": 1.2285, - "step": 1755 - }, - { - "epoch": 1.42, - "learning_rate": 7.65287348498082e-06, - "loss": 1.217, - "step": 1760 - }, - { - "epoch": 1.43, - "learning_rate": 7.63868206301912e-06, - "loss": 1.1856, - "step": 1765 - }, - { - "epoch": 1.43, - "learning_rate": 7.62446111739455e-06, - "loss": 1.2613, - "step": 1770 - }, - { - "epoch": 1.44, - "learning_rate": 7.6102108072223e-06, - "loss": 1.1617, - "step": 1775 - }, - { - "epoch": 1.44, - "learning_rate": 7.595931291946116e-06, - "loss": 1.2006, - "step": 1780 - }, - { - "epoch": 1.44, - "learning_rate": 7.581622731336515e-06, - "loss": 1.2543, - "step": 1785 - }, - { - "epoch": 1.45, - "learning_rate": 7.567285285488994e-06, - "loss": 1.2498, - "step": 1790 - }, - { - "epoch": 1.45, - "learning_rate": 7.552919114822246e-06, - "loss": 1.2484, - "step": 1795 - }, - { - "epoch": 1.46, - "learning_rate": 7.5385243800763505e-06, - "loss": 1.2543, - "step": 1800 - }, - { - "epoch": 1.46, - "learning_rate": 7.524101242310993e-06, - "loss": 1.2621, - "step": 1805 - }, - { - "epoch": 1.46, - "learning_rate": 7.509649862903652e-06, - "loss": 1.2176, - "step": 1810 - }, - { - "epoch": 1.47, - "learning_rate": 7.495170403547797e-06, - "loss": 1.2189, - "step": 1815 - }, - { - "epoch": 1.47, - "learning_rate": 7.480663026251073e-06, - "loss": 1.2503, - "step": 1820 - }, - { - "epoch": 1.48, - "learning_rate": 7.466127893333498e-06, - "loss": 1.2186, - "step": 1825 - }, - { - "epoch": 1.48, - "learning_rate": 7.451565167425642e-06, - "loss": 1.2805, - "step": 1830 - }, - { - "epoch": 1.48, - "learning_rate": 7.436975011466805e-06, - "loss": 1.2347, - "step": 1835 - }, - { - "epoch": 1.49, - "learning_rate": 7.422357588703195e-06, - "loss": 1.266, - "step": 1840 - }, - { - "epoch": 1.49, - "learning_rate": 7.407713062686107e-06, - "loss": 1.2496, - "step": 1845 - }, - { - "epoch": 1.5, - "learning_rate": 7.393041597270085e-06, - "loss": 1.2902, - "step": 1850 - }, - { - "epoch": 1.5, - "learning_rate": 7.378343356611093e-06, - "loss": 1.2367, - "step": 1855 - }, - { - "epoch": 1.5, - "learning_rate": 7.363618505164678e-06, - "loss": 1.274, - "step": 1860 - }, - { - "epoch": 1.51, - "learning_rate": 7.348867207684132e-06, - "loss": 1.2242, - "step": 1865 - }, - { - "epoch": 1.51, - "learning_rate": 7.334089629218639e-06, - "loss": 1.2844, - "step": 1870 - }, - { - "epoch": 1.52, - "learning_rate": 7.319285935111444e-06, - "loss": 1.2672, - "step": 1875 - }, - { - "epoch": 1.52, - "learning_rate": 7.304456290997991e-06, - "loss": 1.1542, - "step": 1880 - }, - { - "epoch": 1.53, - "learning_rate": 7.289600862804069e-06, - "loss": 1.15, - "step": 1885 - }, - { - "epoch": 1.53, - "learning_rate": 7.274719816743967e-06, - "loss": 1.2385, - "step": 1890 - }, - { - "epoch": 1.53, - "learning_rate": 7.259813319318601e-06, - "loss": 1.2348, - "step": 1895 - }, - { - "epoch": 1.54, - "learning_rate": 7.244881537313664e-06, - "loss": 1.2578, - "step": 1900 - }, - { - "epoch": 1.54, - "learning_rate": 7.229924637797742e-06, - "loss": 1.2191, - "step": 1905 - }, - { - "epoch": 1.55, - "learning_rate": 7.214942788120466e-06, - "loss": 1.251, - "step": 1910 - }, - { - "epoch": 1.55, - "learning_rate": 7.1999361559106225e-06, - "loss": 1.2031, - "step": 1915 - }, - { - "epoch": 1.55, - "learning_rate": 7.184904909074293e-06, - "loss": 1.2766, - "step": 1920 - }, - { - "epoch": 1.56, - "learning_rate": 7.169849215792955e-06, - "loss": 1.2299, - "step": 1925 - }, - { - "epoch": 1.56, - "learning_rate": 7.15476924452162e-06, - "loss": 1.2355, - "step": 1930 - }, - { - "epoch": 1.57, - "learning_rate": 7.139665163986938e-06, - "loss": 1.2336, - "step": 1935 - }, - { - "epoch": 1.57, - "learning_rate": 7.124537143185317e-06, - "loss": 1.3566, - "step": 1940 - }, - { - "epoch": 1.57, - "learning_rate": 7.109385351381022e-06, - "loss": 1.1423, - "step": 1945 - }, - { - "epoch": 1.58, - "learning_rate": 7.09420995810429e-06, - "loss": 1.2576, - "step": 1950 - }, - { - "epoch": 1.58, - "learning_rate": 7.079011133149427e-06, - "loss": 1.2563, - "step": 1955 - }, - { - "epoch": 1.59, - "learning_rate": 7.0637890465729165e-06, - "loss": 1.2695, - "step": 1960 - }, - { - "epoch": 1.59, - "learning_rate": 7.048543868691506e-06, - "loss": 1.1986, - "step": 1965 - }, - { - "epoch": 1.59, - "learning_rate": 7.033275770080309e-06, - "loss": 1.25, - "step": 1970 - }, - { - "epoch": 1.6, - "learning_rate": 7.017984921570895e-06, - "loss": 1.2025, - "step": 1975 - }, - { - "epoch": 1.6, - "learning_rate": 7.002671494249376e-06, - "loss": 1.2465, - "step": 1980 - }, - { - "epoch": 1.61, - "learning_rate": 6.987335659454493e-06, - "loss": 1.2336, - "step": 1985 - }, - { - "epoch": 1.61, - "learning_rate": 6.971977588775703e-06, - "loss": 1.2436, - "step": 1990 - }, - { - "epoch": 1.61, - "learning_rate": 6.956597454051253e-06, - "loss": 1.2429, - "step": 1995 - }, - { - "epoch": 1.62, - "learning_rate": 6.941195427366259e-06, - "loss": 1.2574, - "step": 2000 - }, - { - "epoch": 1.62, - "learning_rate": 6.925771681050784e-06, - "loss": 1.2465, - "step": 2005 - }, - { - "epoch": 1.63, - "learning_rate": 6.910326387677906e-06, - "loss": 1.2805, - "step": 2010 - }, - { - "epoch": 1.63, - "learning_rate": 6.89485972006179e-06, - "loss": 1.2664, - "step": 2015 - }, - { - "epoch": 1.63, - "learning_rate": 6.879371851255747e-06, - "loss": 1.1826, - "step": 2020 - }, - { - "epoch": 1.64, - "learning_rate": 6.863862954550315e-06, - "loss": 1.2441, - "step": 2025 - }, - { - "epoch": 1.64, - "learning_rate": 6.8483332034713006e-06, - "loss": 1.191, - "step": 2030 - }, - { - "epoch": 1.65, - "learning_rate": 6.832782771777846e-06, - "loss": 1.2574, - "step": 2035 - }, - { - "epoch": 1.65, - "learning_rate": 6.817211833460484e-06, - "loss": 1.2865, - "step": 2040 - }, - { - "epoch": 1.65, - "learning_rate": 6.801620562739197e-06, - "loss": 1.2504, - "step": 2045 - }, - { - "epoch": 1.66, - "learning_rate": 6.7860091340614575e-06, - "loss": 1.2084, - "step": 2050 - }, - { - "epoch": 1.66, - "learning_rate": 6.770377722100284e-06, - "loss": 1.2609, - "step": 2055 - }, - { - "epoch": 1.67, - "learning_rate": 6.75472650175228e-06, - "loss": 1.2723, - "step": 2060 - }, - { - "epoch": 1.67, - "learning_rate": 6.739055648135685e-06, - "loss": 1.1243, - "step": 2065 - }, - { - "epoch": 1.67, - "learning_rate": 6.723365336588409e-06, - "loss": 1.2529, - "step": 2070 - }, - { - "epoch": 1.68, - "learning_rate": 6.707655742666074e-06, - "loss": 1.3047, - "step": 2075 - }, - { - "epoch": 1.68, - "learning_rate": 6.691927042140044e-06, - "loss": 1.257, - "step": 2080 - }, - { - "epoch": 1.69, - "learning_rate": 6.6761794109954714e-06, - "loss": 1.2086, - "step": 2085 - }, - { - "epoch": 1.69, - "learning_rate": 6.660413025429312e-06, - "loss": 1.2711, - "step": 2090 - }, - { - "epoch": 1.69, - "learning_rate": 6.644628061848363e-06, - "loss": 1.1157, - "step": 2095 - }, - { - "epoch": 1.7, - "learning_rate": 6.628824696867286e-06, - "loss": 1.2309, - "step": 2100 - }, - { - "epoch": 1.7, - "learning_rate": 6.613003107306637e-06, - "loss": 1.2363, - "step": 2105 - }, - { - "epoch": 1.71, - "learning_rate": 6.597163470190877e-06, - "loss": 1.207, - "step": 2110 - }, - { - "epoch": 1.71, - "learning_rate": 6.5813059627464e-06, - "loss": 1.2641, - "step": 2115 - }, - { - "epoch": 1.72, - "learning_rate": 6.565430762399546e-06, - "loss": 1.252, - "step": 2120 - }, - { - "epoch": 1.72, - "learning_rate": 6.549538046774621e-06, - "loss": 1.2586, - "step": 2125 - }, - { - "epoch": 1.72, - "learning_rate": 6.533627993691901e-06, - "loss": 1.3012, - "step": 2130 - }, - { - "epoch": 1.73, - "learning_rate": 6.517700781165649e-06, - "loss": 1.1842, - "step": 2135 - }, - { - "epoch": 1.73, - "learning_rate": 6.501756587402124e-06, - "loss": 1.2016, - "step": 2140 - }, - { - "epoch": 1.74, - "learning_rate": 6.485795590797579e-06, - "loss": 1.2988, - "step": 2145 - }, - { - "epoch": 1.74, - "learning_rate": 6.469817969936277e-06, - "loss": 1.2547, - "step": 2150 - }, - { - "epoch": 1.74, - "learning_rate": 6.453823903588481e-06, - "loss": 1.2309, - "step": 2155 - }, - { - "epoch": 1.75, - "learning_rate": 6.437813570708463e-06, - "loss": 1.2855, - "step": 2160 - }, - { - "epoch": 1.75, - "learning_rate": 6.421787150432493e-06, - "loss": 1.1488, - "step": 2165 - }, - { - "epoch": 1.76, - "learning_rate": 6.405744822076845e-06, - "loss": 1.2115, - "step": 2170 - }, - { - "epoch": 1.76, - "learning_rate": 6.389686765135782e-06, - "loss": 1.2336, - "step": 2175 - }, - { - "epoch": 1.76, - "learning_rate": 6.3736131592795525e-06, - "loss": 1.2746, - "step": 2180 - }, - { - "epoch": 1.77, - "learning_rate": 6.357524184352375e-06, - "loss": 1.201, - "step": 2185 - }, - { - "epoch": 1.77, - "learning_rate": 6.341420020370435e-06, - "loss": 1.2703, - "step": 2190 - }, - { - "epoch": 1.78, - "learning_rate": 6.325300847519859e-06, - "loss": 1.2441, - "step": 2195 - }, - { - "epoch": 1.78, - "learning_rate": 6.309166846154713e-06, - "loss": 1.2684, - "step": 2200 - }, - { - "epoch": 1.78, - "learning_rate": 6.293018196794964e-06, - "loss": 1.2449, - "step": 2205 - }, - { - "epoch": 1.79, - "learning_rate": 6.276855080124483e-06, - "loss": 1.268, - "step": 2210 - }, - { - "epoch": 1.79, - "learning_rate": 6.260677676989008e-06, - "loss": 1.2906, - "step": 2215 - }, - { - "epoch": 1.8, - "learning_rate": 6.24448616839413e-06, - "loss": 1.307, - "step": 2220 - }, - { - "epoch": 1.8, - "learning_rate": 6.228280735503254e-06, - "loss": 1.2699, - "step": 2225 - }, - { - "epoch": 1.8, - "learning_rate": 6.212061559635588e-06, - "loss": 1.24, - "step": 2230 - }, - { - "epoch": 1.81, - "learning_rate": 6.195828822264107e-06, - "loss": 1.2605, - "step": 2235 - }, - { - "epoch": 1.81, - "learning_rate": 6.179582705013519e-06, - "loss": 1.3457, - "step": 2240 - }, - { - "epoch": 1.82, - "learning_rate": 6.163323389658242e-06, - "loss": 1.2301, - "step": 2245 - }, - { - "epoch": 1.82, - "learning_rate": 6.147051058120359e-06, - "loss": 1.257, - "step": 2250 - }, - { - "epoch": 1.82, - "learning_rate": 6.130765892467595e-06, - "loss": 1.1584, - "step": 2255 - }, - { - "epoch": 1.83, - "learning_rate": 6.114468074911265e-06, - "loss": 1.2537, - "step": 2260 - }, - { - "epoch": 1.83, - "learning_rate": 6.098157787804252e-06, - "loss": 1.2559, - "step": 2265 - }, - { - "epoch": 1.84, - "learning_rate": 6.081835213638951e-06, - "loss": 1.2307, - "step": 2270 - }, - { - "epoch": 1.84, - "learning_rate": 6.0655005350452414e-06, - "loss": 1.1664, - "step": 2275 - }, - { - "epoch": 1.84, - "learning_rate": 6.049153934788429e-06, - "loss": 1.2146, - "step": 2280 - }, - { - "epoch": 1.85, - "learning_rate": 6.032795595767214e-06, - "loss": 1.2498, - "step": 2285 - }, - { - "epoch": 1.85, - "learning_rate": 6.016425701011637e-06, - "loss": 1.2379, - "step": 2290 - }, - { - "epoch": 1.86, - "learning_rate": 6.000044433681034e-06, - "loss": 1.2584, - "step": 2295 - }, - { - "epoch": 1.86, - "learning_rate": 5.9836519770619865e-06, - "loss": 1.2805, - "step": 2300 - }, - { - "epoch": 1.86, - "learning_rate": 5.967248514566271e-06, - "loss": 1.2348, - "step": 2305 - }, - { - "epoch": 1.87, - "learning_rate": 5.9508342297288035e-06, - "loss": 1.2572, - "step": 2310 - }, - { - "epoch": 1.87, - "learning_rate": 5.934409306205593e-06, - "loss": 1.2018, - "step": 2315 - }, - { - "epoch": 1.88, - "learning_rate": 5.917973927771678e-06, - "loss": 1.2641, - "step": 2320 - }, - { - "epoch": 1.88, - "learning_rate": 5.901528278319083e-06, - "loss": 1.2293, - "step": 2325 - }, - { - "epoch": 1.89, - "learning_rate": 5.885072541854742e-06, - "loss": 1.3113, - "step": 2330 - }, - { - "epoch": 1.89, - "learning_rate": 5.868606902498457e-06, - "loss": 1.26, - "step": 2335 - }, - { - "epoch": 1.89, - "learning_rate": 5.852131544480831e-06, - "loss": 1.2092, - "step": 2340 - }, - { - "epoch": 1.9, - "learning_rate": 5.835646652141208e-06, - "loss": 1.2535, - "step": 2345 - }, - { - "epoch": 1.9, - "learning_rate": 5.8191524099256035e-06, - "loss": 1.2535, - "step": 2350 - }, - { - "epoch": 1.91, - "learning_rate": 5.802649002384655e-06, - "loss": 1.2629, - "step": 2355 - }, - { - "epoch": 1.91, - "learning_rate": 5.786136614171542e-06, - "loss": 1.233, - "step": 2360 - }, - { - "epoch": 1.91, - "learning_rate": 5.769615430039931e-06, - "loss": 1.2375, - "step": 2365 - }, - { - "epoch": 1.92, - "learning_rate": 5.753085634841903e-06, - "loss": 1.2312, - "step": 2370 - }, - { - "epoch": 1.92, - "learning_rate": 5.736547413525888e-06, - "loss": 1.1715, - "step": 2375 - }, - { - "epoch": 1.93, - "learning_rate": 5.72000095113459e-06, - "loss": 1.2695, - "step": 2380 - }, - { - "epoch": 1.93, - "learning_rate": 5.703446432802924e-06, - "loss": 1.1672, - "step": 2385 - }, - { - "epoch": 1.93, - "learning_rate": 5.686884043755942e-06, - "loss": 1.2637, - "step": 2390 - }, - { - "epoch": 1.94, - "learning_rate": 5.6703139693067554e-06, - "loss": 1.1591, - "step": 2395 - }, - { - "epoch": 1.94, - "learning_rate": 5.653736394854471e-06, - "loss": 1.2343, - "step": 2400 - }, - { - "epoch": 1.95, - "learning_rate": 5.637151505882109e-06, - "loss": 1.2172, - "step": 2405 - }, - { - "epoch": 1.95, - "learning_rate": 5.620559487954531e-06, - "loss": 1.2121, - "step": 2410 - }, - { - "epoch": 1.95, - "learning_rate": 5.603960526716361e-06, - "loss": 1.178, - "step": 2415 - }, - { - "epoch": 1.96, - "learning_rate": 5.587354807889913e-06, - "loss": 1.2256, - "step": 2420 - }, - { - "epoch": 1.96, - "learning_rate": 5.570742517273109e-06, - "loss": 1.241, - "step": 2425 - }, - { - "epoch": 1.97, - "learning_rate": 5.554123840737402e-06, - "loss": 1.2773, - "step": 2430 - }, - { - "epoch": 1.97, - "learning_rate": 5.537498964225694e-06, - "loss": 1.2383, - "step": 2435 - }, - { - "epoch": 1.97, - "learning_rate": 5.520868073750261e-06, - "loss": 1.177, - "step": 2440 - }, - { - "epoch": 1.98, - "learning_rate": 5.50423135539066e-06, - "loss": 1.1607, - "step": 2445 - }, - { - "epoch": 1.98, - "learning_rate": 5.487588995291666e-06, - "loss": 1.2957, - "step": 2450 - }, - { - "epoch": 1.99, - "learning_rate": 5.47094117966117e-06, - "loss": 1.164, - "step": 2455 - }, - { - "epoch": 1.99, - "learning_rate": 5.454288094768108e-06, - "loss": 1.225, - "step": 2460 - }, - { - "epoch": 1.99, - "learning_rate": 5.437629926940367e-06, - "loss": 1.2602, - "step": 2465 - }, - { - "epoch": 2.0, - "learning_rate": 5.420966862562718e-06, - "loss": 1.2434, - "step": 2470 - }, - { - "epoch": 2.0, - "learning_rate": 5.404299088074702e-06, - "loss": 1.0836, - "step": 2475 - }, - { - "epoch": 2.01, - "learning_rate": 5.387626789968574e-06, - "loss": 1.0635, - "step": 2480 - }, - { - "epoch": 2.01, - "learning_rate": 5.370950154787195e-06, - "loss": 1.032, - "step": 2485 - }, - { - "epoch": 2.01, - "learning_rate": 5.354269369121958e-06, - "loss": 1.0236, - "step": 2490 - }, - { - "epoch": 2.02, - "learning_rate": 5.337584619610691e-06, - "loss": 1.0402, - "step": 2495 - }, - { - "epoch": 2.02, - "learning_rate": 5.320896092935575e-06, - "loss": 1.0713, - "step": 2500 - }, - { - "epoch": 2.03, - "learning_rate": 5.304203975821048e-06, - "loss": 1.0443, - "step": 2505 - }, - { - "epoch": 2.03, - "learning_rate": 5.287508455031729e-06, - "loss": 1.0523, - "step": 2510 - }, - { - "epoch": 2.03, - "learning_rate": 5.270809717370314e-06, - "loss": 1.0072, - "step": 2515 - }, - { - "epoch": 2.04, - "learning_rate": 5.254107949675493e-06, - "loss": 1.0473, - "step": 2520 - }, - { - "epoch": 2.04, - "learning_rate": 5.237403338819859e-06, - "loss": 1.0189, - "step": 2525 - }, - { - "epoch": 2.05, - "learning_rate": 5.220696071707816e-06, - "loss": 1.027, - "step": 2530 - }, - { - "epoch": 2.05, - "learning_rate": 5.20398633527349e-06, - "loss": 0.9773, - "step": 2535 - }, - { - "epoch": 2.06, - "learning_rate": 5.187274316478632e-06, - "loss": 0.9916, - "step": 2540 - }, - { - "epoch": 2.06, - "learning_rate": 5.170560202310536e-06, - "loss": 1.0252, - "step": 2545 - }, - { - "epoch": 2.06, - "learning_rate": 5.153844179779932e-06, - "loss": 1.0508, - "step": 2550 - }, - { - "epoch": 2.07, - "learning_rate": 5.137126435918912e-06, - "loss": 1.0217, - "step": 2555 - }, - { - "epoch": 2.07, - "learning_rate": 5.12040715777882e-06, - "loss": 1.0367, - "step": 2560 - }, - { - "epoch": 2.08, - "learning_rate": 5.1036865324281716e-06, - "loss": 1.0121, - "step": 2565 - }, - { - "epoch": 2.08, - "learning_rate": 5.08696474695055e-06, - "loss": 0.9992, - "step": 2570 - }, - { - "epoch": 2.08, - "learning_rate": 5.070241988442528e-06, - "loss": 1.0778, - "step": 2575 - }, - { - "epoch": 2.09, - "learning_rate": 5.053518444011557e-06, - "loss": 1.0703, - "step": 2580 - }, - { - "epoch": 2.09, - "learning_rate": 5.036794300773887e-06, - "loss": 1.017, - "step": 2585 - }, - { - "epoch": 2.1, - "learning_rate": 5.020069745852463e-06, - "loss": 0.9813, - "step": 2590 - }, - { - "epoch": 2.1, - "learning_rate": 5.003344966374843e-06, - "loss": 1.0287, - "step": 2595 - }, - { - "epoch": 2.1, - "learning_rate": 4.9866201494710934e-06, - "loss": 1.0617, - "step": 2600 - }, - { - "epoch": 2.11, - "learning_rate": 4.969895482271695e-06, - "loss": 1.1227, - "step": 2605 - }, - { - "epoch": 2.11, - "learning_rate": 4.953171151905466e-06, - "loss": 1.0496, - "step": 2610 - }, - { - "epoch": 2.12, - "learning_rate": 4.936447345497443e-06, - "loss": 1.0287, - "step": 2615 - }, - { - "epoch": 2.12, - "learning_rate": 4.919724250166808e-06, - "loss": 1.0656, - "step": 2620 - }, - { - "epoch": 2.12, - "learning_rate": 4.903002053024782e-06, - "loss": 1.0287, - "step": 2625 - }, - { - "epoch": 2.13, - "learning_rate": 4.886280941172539e-06, - "loss": 1.0293, - "step": 2630 - }, - { - "epoch": 2.13, - "learning_rate": 4.869561101699113e-06, - "loss": 1.0805, - "step": 2635 - }, - { - "epoch": 2.14, - "learning_rate": 4.852842721679293e-06, - "loss": 1.0068, - "step": 2640 - }, - { - "epoch": 2.14, - "learning_rate": 4.836125988171547e-06, - "loss": 1.0056, - "step": 2645 - }, - { - "epoch": 2.14, - "learning_rate": 4.8194110882159175e-06, - "loss": 1.0256, - "step": 2650 - }, - { - "epoch": 2.15, - "learning_rate": 4.802698208831929e-06, - "loss": 1.0551, - "step": 2655 - }, - { - "epoch": 2.15, - "learning_rate": 4.785987537016504e-06, - "loss": 1.002, - "step": 2660 - }, - { - "epoch": 2.16, - "learning_rate": 4.769279259741858e-06, - "loss": 1.0378, - "step": 2665 - }, - { - "epoch": 2.16, - "learning_rate": 4.752573563953422e-06, - "loss": 1.0088, - "step": 2670 - }, - { - "epoch": 2.16, - "learning_rate": 4.735870636567736e-06, - "loss": 0.9963, - "step": 2675 - }, - { - "epoch": 2.17, - "learning_rate": 4.719170664470371e-06, - "loss": 0.9977, - "step": 2680 - }, - { - "epoch": 2.17, - "learning_rate": 4.702473834513826e-06, - "loss": 1.0533, - "step": 2685 - }, - { - "epoch": 2.18, - "learning_rate": 4.685780333515449e-06, - "loss": 1.0148, - "step": 2690 - }, - { - "epoch": 2.18, - "learning_rate": 4.669090348255338e-06, - "loss": 1.0023, - "step": 2695 - }, - { - "epoch": 2.18, - "learning_rate": 4.652404065474257e-06, - "loss": 1.0227, - "step": 2700 - }, - { - "epoch": 2.19, - "learning_rate": 4.6357216718715375e-06, - "loss": 1.0236, - "step": 2705 - }, - { - "epoch": 2.19, - "learning_rate": 4.619043354103002e-06, - "loss": 1.01, - "step": 2710 - }, - { - "epoch": 2.2, - "learning_rate": 4.602369298778866e-06, - "loss": 1.0625, - "step": 2715 - }, - { - "epoch": 2.2, - "learning_rate": 4.585699692461655e-06, - "loss": 1.0154, - "step": 2720 - }, - { - "epoch": 2.2, - "learning_rate": 4.569034721664114e-06, - "loss": 1.0547, - "step": 2725 - }, - { - "epoch": 2.21, - "learning_rate": 4.552374572847122e-06, - "loss": 0.981, - "step": 2730 - }, - { - "epoch": 2.21, - "learning_rate": 4.535719432417612e-06, - "loss": 1.0691, - "step": 2735 - }, - { - "epoch": 2.22, - "learning_rate": 4.519069486726468e-06, - "loss": 1.0451, - "step": 2740 - }, - { - "epoch": 2.22, - "learning_rate": 4.502424922066462e-06, - "loss": 0.9773, - "step": 2745 - }, - { - "epoch": 2.22, - "learning_rate": 4.485785924670151e-06, - "loss": 0.9898, - "step": 2750 - }, - { - "epoch": 2.23, - "learning_rate": 4.469152680707804e-06, - "loss": 1.0496, - "step": 2755 - }, - { - "epoch": 2.23, - "learning_rate": 4.452525376285319e-06, - "loss": 1.0211, - "step": 2760 - }, - { - "epoch": 2.24, - "learning_rate": 4.435904197442131e-06, - "loss": 1.0961, - "step": 2765 - }, - { - "epoch": 2.24, - "learning_rate": 4.419289330149145e-06, - "loss": 1.0279, - "step": 2770 - }, - { - "epoch": 2.25, - "learning_rate": 4.4026809603066375e-06, - "loss": 1.0081, - "step": 2775 - }, - { - "epoch": 2.25, - "learning_rate": 4.386079273742199e-06, - "loss": 1.0764, - "step": 2780 - }, - { - "epoch": 2.25, - "learning_rate": 4.3694844562086325e-06, - "loss": 1.0342, - "step": 2785 - }, - { - "epoch": 2.26, - "learning_rate": 4.3528966933818865e-06, - "loss": 1.0707, - "step": 2790 - }, - { - "epoch": 2.26, - "learning_rate": 4.33631617085898e-06, - "loss": 1.0127, - "step": 2795 - }, - { - "epoch": 2.27, - "learning_rate": 4.319743074155916e-06, - "loss": 1.0658, - "step": 2800 - }, - { - "epoch": 2.27, - "learning_rate": 4.3031775887056176e-06, - "loss": 1.0881, - "step": 2805 - }, - { - "epoch": 2.27, - "learning_rate": 4.2866198998558404e-06, - "loss": 1.043, - "step": 2810 - }, - { - "epoch": 2.28, - "learning_rate": 4.2700701928671105e-06, - "loss": 1.008, - "step": 2815 - }, - { - "epoch": 2.28, - "learning_rate": 4.253528652910647e-06, - "loss": 1.0571, - "step": 2820 - }, - { - "epoch": 2.29, - "learning_rate": 4.236995465066287e-06, - "loss": 1.0859, - "step": 2825 - }, - { - "epoch": 2.29, - "learning_rate": 4.220470814320417e-06, - "loss": 1.0085, - "step": 2830 - }, - { - "epoch": 2.29, - "learning_rate": 4.203954885563909e-06, - "loss": 1.0146, - "step": 2835 - }, - { - "epoch": 2.3, - "learning_rate": 4.187447863590039e-06, - "loss": 1.0562, - "step": 2840 - }, - { - "epoch": 2.3, - "learning_rate": 4.170949933092432e-06, - "loss": 1.1096, - "step": 2845 - }, - { - "epoch": 2.31, - "learning_rate": 4.154461278662989e-06, - "loss": 1.0555, - "step": 2850 - }, - { - "epoch": 2.31, - "learning_rate": 4.137982084789823e-06, - "loss": 0.9902, - "step": 2855 - }, - { - "epoch": 2.31, - "learning_rate": 4.121512535855193e-06, - "loss": 1.06, - "step": 2860 - }, - { - "epoch": 2.32, - "learning_rate": 4.105052816133448e-06, - "loss": 1.0412, - "step": 2865 - }, - { - "epoch": 2.32, - "learning_rate": 4.0886031097889556e-06, - "loss": 1.0354, - "step": 2870 - }, - { - "epoch": 2.33, - "learning_rate": 4.072163600874045e-06, - "loss": 1.0928, - "step": 2875 - }, - { - "epoch": 2.33, - "learning_rate": 4.0557344733269505e-06, - "loss": 1.0645, - "step": 2880 - }, - { - "epoch": 2.33, - "learning_rate": 4.039315910969754e-06, - "loss": 0.9994, - "step": 2885 - }, - { - "epoch": 2.34, - "learning_rate": 4.02290809750632e-06, - "loss": 1.003, - "step": 2890 - }, - { - "epoch": 2.34, - "learning_rate": 4.006511216520251e-06, - "loss": 1.0512, - "step": 2895 - }, - { - "epoch": 2.35, - "learning_rate": 3.9901254514728225e-06, - "loss": 1.06, - "step": 2900 - }, - { - "epoch": 2.35, - "learning_rate": 3.973750985700943e-06, - "loss": 1.0541, - "step": 2905 - }, - { - "epoch": 2.35, - "learning_rate": 3.957388002415093e-06, - "loss": 1.0078, - "step": 2910 - }, - { - "epoch": 2.36, - "learning_rate": 3.941036684697274e-06, - "loss": 1.0104, - "step": 2915 - }, - { - "epoch": 2.36, - "learning_rate": 3.924697215498971e-06, - "loss": 1.0465, - "step": 2920 - }, - { - "epoch": 2.37, - "learning_rate": 3.908369777639091e-06, - "loss": 0.9527, - "step": 2925 - }, - { - "epoch": 2.37, - "learning_rate": 3.892054553801931e-06, - "loss": 1.0559, - "step": 2930 - }, - { - "epoch": 2.37, - "learning_rate": 3.875751726535124e-06, - "loss": 1.041, - "step": 2935 - }, - { - "epoch": 2.38, - "learning_rate": 3.8594614782476024e-06, - "loss": 1.0352, - "step": 2940 - }, - { - "epoch": 2.38, - "learning_rate": 3.843183991207551e-06, - "loss": 1.0175, - "step": 2945 - }, - { - "epoch": 2.39, - "learning_rate": 3.82691944754038e-06, - "loss": 0.9959, - "step": 2950 - }, - { - "epoch": 2.39, - "learning_rate": 3.8106680292266717e-06, - "loss": 1.0094, - "step": 2955 - }, - { - "epoch": 2.39, - "learning_rate": 3.7944299181001544e-06, - "loss": 1.0367, - "step": 2960 - }, - { - "epoch": 2.4, - "learning_rate": 3.778205295845663e-06, - "loss": 1.0443, - "step": 2965 - }, - { - "epoch": 2.4, - "learning_rate": 3.7619943439971107e-06, - "loss": 1.0074, - "step": 2970 - }, - { - "epoch": 2.41, - "learning_rate": 3.7457972439354526e-06, - "loss": 1.0396, - "step": 2975 - }, - { - "epoch": 2.41, - "learning_rate": 3.7296141768866635e-06, - "loss": 1.0506, - "step": 2980 - }, - { - "epoch": 2.42, - "learning_rate": 3.7134453239196987e-06, - "loss": 1.0268, - "step": 2985 - }, - { - "epoch": 2.42, - "learning_rate": 3.6972908659444828e-06, - "loss": 1.0101, - "step": 2990 - }, - { - "epoch": 2.42, - "learning_rate": 3.6811509837098756e-06, - "loss": 1.0076, - "step": 2995 - }, - { - "epoch": 2.43, - "learning_rate": 3.6650258578016474e-06, - "loss": 1.0602, - "step": 3000 - }, - { - "epoch": 2.43, - "learning_rate": 3.6489156686404683e-06, - "loss": 1.0418, - "step": 3005 - }, - { - "epoch": 2.44, - "learning_rate": 3.6328205964798822e-06, - "loss": 1.0498, - "step": 3010 - }, - { - "epoch": 2.44, - "learning_rate": 3.616740821404292e-06, - "loss": 1.0277, - "step": 3015 - }, - { - "epoch": 2.44, - "learning_rate": 3.600676523326946e-06, - "loss": 1.0979, - "step": 3020 - }, - { - "epoch": 2.45, - "learning_rate": 3.5846278819879197e-06, - "loss": 1.0467, - "step": 3025 - }, - { - "epoch": 2.45, - "learning_rate": 3.568595076952113e-06, - "loss": 1.0344, - "step": 3030 - }, - { - "epoch": 2.46, - "learning_rate": 3.552578287607237e-06, - "loss": 0.9874, - "step": 3035 - }, - { - "epoch": 2.46, - "learning_rate": 3.536577693161801e-06, - "loss": 1.0688, - "step": 3040 - }, - { - "epoch": 2.46, - "learning_rate": 3.520593472643122e-06, - "loss": 1.0023, - "step": 3045 - }, - { - "epoch": 2.47, - "learning_rate": 3.504625804895302e-06, - "loss": 1.0315, - "step": 3050 - }, - { - "epoch": 2.47, - "learning_rate": 3.488674868577246e-06, - "loss": 1.0318, - "step": 3055 - }, - { - "epoch": 2.48, - "learning_rate": 3.472740842160649e-06, - "loss": 1.057, - "step": 3060 - }, - { - "epoch": 2.48, - "learning_rate": 3.4568239039280094e-06, - "loss": 1.041, - "step": 3065 - }, - { - "epoch": 2.48, - "learning_rate": 3.4409242319706225e-06, - "loss": 1.126, - "step": 3070 - }, - { - "epoch": 2.49, - "learning_rate": 3.4250420041866057e-06, - "loss": 1.0151, - "step": 3075 - }, - { - "epoch": 2.49, - "learning_rate": 3.4091773982788867e-06, - "loss": 1.0395, - "step": 3080 - }, - { - "epoch": 2.5, - "learning_rate": 3.393330591753231e-06, - "loss": 1.0207, - "step": 3085 - }, - { - "epoch": 2.5, - "learning_rate": 3.377501761916249e-06, - "loss": 0.9663, - "step": 3090 - }, - { - "epoch": 2.5, - "learning_rate": 3.3616910858734143e-06, - "loss": 1.055, - "step": 3095 - }, - { - "epoch": 2.51, - "learning_rate": 3.3458987405270803e-06, - "loss": 1.017, - "step": 3100 - }, - { - "epoch": 2.51, - "learning_rate": 3.330124902574505e-06, - "loss": 1.0034, - "step": 3105 - }, - { - "epoch": 2.52, - "learning_rate": 3.3143697485058666e-06, - "loss": 1.0262, - "step": 3110 - }, - { - "epoch": 2.52, - "learning_rate": 3.2986334546022964e-06, - "loss": 1.0723, - "step": 3115 - }, - { - "epoch": 2.52, - "learning_rate": 3.282916196933904e-06, - "loss": 1.0314, - "step": 3120 - }, - { - "epoch": 2.53, - "learning_rate": 3.2672181513578038e-06, - "loss": 1.0613, - "step": 3125 - }, - { - "epoch": 2.53, - "learning_rate": 3.251539493516152e-06, - "loss": 1.0641, - "step": 3130 - }, - { - "epoch": 2.54, - "learning_rate": 3.2358803988341776e-06, - "loss": 1.0283, - "step": 3135 - }, - { - "epoch": 2.54, - "learning_rate": 3.220241042518223e-06, - "loss": 1.0502, - "step": 3140 - }, - { - "epoch": 2.54, - "learning_rate": 3.2046215995537837e-06, - "loss": 1.0416, - "step": 3145 - }, - { - "epoch": 2.55, - "learning_rate": 3.1890222447035444e-06, - "loss": 1.0549, - "step": 3150 - }, - { - "epoch": 2.55, - "learning_rate": 3.173443152505431e-06, - "loss": 1.034, - "step": 3155 - }, - { - "epoch": 2.56, - "learning_rate": 3.157884497270658e-06, - "loss": 1.0594, - "step": 3160 - }, - { - "epoch": 2.56, - "learning_rate": 3.1423464530817673e-06, - "loss": 1.0637, - "step": 3165 - }, - { - "epoch": 2.56, - "learning_rate": 3.1268291937906957e-06, - "loss": 1.0402, - "step": 3170 - }, - { - "epoch": 2.57, - "learning_rate": 3.1113328930168153e-06, - "loss": 1.0236, - "step": 3175 - }, - { - "epoch": 2.57, - "learning_rate": 3.095857724145004e-06, - "loss": 1.0414, - "step": 3180 - }, - { - "epoch": 2.58, - "learning_rate": 3.0804038603236943e-06, - "loss": 1.0465, - "step": 3185 - }, - { - "epoch": 2.58, - "learning_rate": 3.0649714744629454e-06, - "loss": 1.0561, - "step": 3190 - }, - { - "epoch": 2.58, - "learning_rate": 3.0495607392324987e-06, - "loss": 1.0414, - "step": 3195 - }, - { - "epoch": 2.59, - "learning_rate": 3.0341718270598557e-06, - "loss": 1.0492, - "step": 3200 - }, - { - "epoch": 2.59, - "learning_rate": 3.0188049101283433e-06, - "loss": 1.0053, - "step": 3205 - }, - { - "epoch": 2.6, - "learning_rate": 3.003460160375189e-06, - "loss": 1.0193, - "step": 3210 - }, - { - "epoch": 2.6, - "learning_rate": 2.9881377494895925e-06, - "loss": 1.093, - "step": 3215 - }, - { - "epoch": 2.61, - "learning_rate": 2.9728378489108135e-06, - "loss": 1.0285, - "step": 3220 - }, - { - "epoch": 2.61, - "learning_rate": 2.957560629826244e-06, - "loss": 1.0982, - "step": 3225 - }, - { - "epoch": 2.61, - "learning_rate": 2.942306263169502e-06, - "loss": 1.0438, - "step": 3230 - }, - { - "epoch": 2.62, - "learning_rate": 2.9270749196185095e-06, - "loss": 1.0695, - "step": 3235 - }, - { - "epoch": 2.62, - "learning_rate": 2.911866769593592e-06, - "loss": 1.0139, - "step": 3240 - }, - { - "epoch": 2.63, - "learning_rate": 2.896681983255565e-06, - "loss": 1.1477, - "step": 3245 - }, - { - "epoch": 2.63, - "learning_rate": 2.881520730503837e-06, - "loss": 1.0437, - "step": 3250 - }, - { - "epoch": 2.63, - "learning_rate": 2.866383180974498e-06, - "loss": 1.0455, - "step": 3255 - }, - { - "epoch": 2.64, - "learning_rate": 2.8512695040384287e-06, - "loss": 1.0014, - "step": 3260 - }, - { - "epoch": 2.64, - "learning_rate": 2.8361798687994097e-06, - "loss": 1.0016, - "step": 3265 - }, - { - "epoch": 2.65, - "learning_rate": 2.8211144440922176e-06, - "loss": 0.9983, - "step": 3270 - }, - { - "epoch": 2.65, - "learning_rate": 2.8060733984807466e-06, - "loss": 1.0927, - "step": 3275 - }, - { - "epoch": 2.65, - "learning_rate": 2.7910569002561137e-06, - "loss": 1.0424, - "step": 3280 - }, - { - "epoch": 2.66, - "learning_rate": 2.7760651174347854e-06, - "loss": 1.0555, - "step": 3285 - }, - { - "epoch": 2.66, - "learning_rate": 2.7610982177566926e-06, - "loss": 0.983, - "step": 3290 - }, - { - "epoch": 2.67, - "learning_rate": 2.7461563686833504e-06, - "loss": 0.9712, - "step": 3295 - }, - { - "epoch": 2.67, - "learning_rate": 2.7312397373959894e-06, - "loss": 1.04, - "step": 3300 - }, - { - "epoch": 2.67, - "learning_rate": 2.716348490793681e-06, - "loss": 1.092, - "step": 3305 - }, - { - "epoch": 2.68, - "learning_rate": 2.7014827954914814e-06, - "loss": 0.9855, - "step": 3310 - }, - { - "epoch": 2.68, - "learning_rate": 2.686642817818548e-06, - "loss": 1.0319, - "step": 3315 - }, - { - "epoch": 2.69, - "learning_rate": 2.6718287238162963e-06, - "loss": 0.9938, - "step": 3320 - }, - { - "epoch": 2.69, - "learning_rate": 2.6570406792365268e-06, - "loss": 1.0662, - "step": 3325 - }, - { - "epoch": 2.69, - "learning_rate": 2.6422788495395912e-06, - "loss": 1.0263, - "step": 3330 - }, - { - "epoch": 2.7, - "learning_rate": 2.6275433998925176e-06, - "loss": 1.0584, - "step": 3335 - }, - { - "epoch": 2.7, - "learning_rate": 2.612834495167177e-06, - "loss": 1.0334, - "step": 3340 - }, - { - "epoch": 2.71, - "learning_rate": 2.5981522999384323e-06, - "loss": 1.0426, - "step": 3345 - }, - { - "epoch": 2.71, - "learning_rate": 2.583496978482305e-06, - "loss": 1.0199, - "step": 3350 - }, - { - "epoch": 2.71, - "learning_rate": 2.568868694774127e-06, - "loss": 1.0363, - "step": 3355 - }, - { - "epoch": 2.72, - "learning_rate": 2.5542676124867103e-06, - "loss": 0.9959, - "step": 3360 - }, - { - "epoch": 2.72, - "learning_rate": 2.5396938949885163e-06, - "loss": 1.0357, - "step": 3365 - }, - { - "epoch": 2.73, - "learning_rate": 2.52514770534183e-06, - "loss": 1.0444, - "step": 3370 - }, - { - "epoch": 2.73, - "learning_rate": 2.510629206300933e-06, - "loss": 1.0627, - "step": 3375 - }, - { - "epoch": 2.73, - "learning_rate": 2.4961385603102794e-06, - "loss": 1.0535, - "step": 3380 - }, - { - "epoch": 2.74, - "learning_rate": 2.481675929502682e-06, - "loss": 1.0276, - "step": 3385 - }, - { - "epoch": 2.74, - "learning_rate": 2.467241475697498e-06, - "loss": 1.0057, - "step": 3390 - }, - { - "epoch": 2.75, - "learning_rate": 2.45283536039882e-06, - "loss": 1.0055, - "step": 3395 - }, - { - "epoch": 2.75, - "learning_rate": 2.438457744793665e-06, - "loss": 1.0001, - "step": 3400 - }, - { - "epoch": 2.75, - "learning_rate": 2.4241087897501703e-06, - "loss": 1.1129, - "step": 3405 - }, - { - "epoch": 2.76, - "learning_rate": 2.409788655815802e-06, - "loss": 0.9816, - "step": 3410 - }, - { - "epoch": 2.76, - "learning_rate": 2.395497503215551e-06, - "loss": 1.008, - "step": 3415 - }, - { - "epoch": 2.77, - "learning_rate": 2.3812354918501397e-06, - "loss": 1.0068, - "step": 3420 - }, - { - "epoch": 2.77, - "learning_rate": 2.3670027812942353e-06, - "loss": 1.0779, - "step": 3425 - }, - { - "epoch": 2.78, - "learning_rate": 2.3527995307946655e-06, - "loss": 1.0264, - "step": 3430 - }, - { - "epoch": 2.78, - "learning_rate": 2.338625899268638e-06, - "loss": 1.0395, - "step": 3435 - }, - { - "epoch": 2.78, - "learning_rate": 2.3244820453019566e-06, - "loss": 1.0604, - "step": 3440 - }, - { - "epoch": 2.79, - "learning_rate": 2.3103681271472516e-06, - "loss": 1.0236, - "step": 3445 - }, - { - "epoch": 2.79, - "learning_rate": 2.296284302722205e-06, - "loss": 1.0918, - "step": 3450 - }, - { - "epoch": 2.8, - "learning_rate": 2.28223072960779e-06, - "loss": 1.0504, - "step": 3455 - }, - { - "epoch": 2.8, - "learning_rate": 2.2682075650465063e-06, - "loss": 1.0361, - "step": 3460 - }, - { - "epoch": 2.8, - "learning_rate": 2.2542149659406126e-06, - "loss": 1.0268, - "step": 3465 - }, - { - "epoch": 2.81, - "learning_rate": 2.2402530888503783e-06, - "loss": 1.0434, - "step": 3470 - }, - { - "epoch": 2.81, - "learning_rate": 2.226322089992336e-06, - "loss": 1.0348, - "step": 3475 - }, - { - "epoch": 2.82, - "learning_rate": 2.2124221252375215e-06, - "loss": 1.0135, - "step": 3480 - }, - { - "epoch": 2.82, - "learning_rate": 2.1985533501097407e-06, - "loss": 1.0488, - "step": 3485 - }, - { - "epoch": 2.82, - "learning_rate": 2.1847159197838213e-06, - "loss": 0.9809, - "step": 3490 - }, - { - "epoch": 2.83, - "learning_rate": 2.1709099890838846e-06, - "loss": 1.0627, - "step": 3495 - }, - { - "epoch": 2.83, - "learning_rate": 2.1571357124816107e-06, - "loss": 1.0373, - "step": 3500 - }, - { - "epoch": 2.84, - "learning_rate": 2.1433932440945028e-06, - "loss": 1.0068, - "step": 3505 - }, - { - "epoch": 2.84, - "learning_rate": 2.129682737684171e-06, - "loss": 1.0604, - "step": 3510 - }, - { - "epoch": 2.84, - "learning_rate": 2.11600434665461e-06, - "loss": 1.0337, - "step": 3515 - }, - { - "epoch": 2.85, - "learning_rate": 2.1023582240504836e-06, - "loss": 1.0668, - "step": 3520 - }, - { - "epoch": 2.85, - "learning_rate": 2.088744522555409e-06, - "loss": 1.0088, - "step": 3525 - }, - { - "epoch": 2.86, - "learning_rate": 2.0751633944902487e-06, - "loss": 1.0436, - "step": 3530 - }, - { - "epoch": 2.86, - "learning_rate": 2.061614991811414e-06, - "loss": 1.0138, - "step": 3535 - }, - { - "epoch": 2.86, - "learning_rate": 2.0480994661091507e-06, - "loss": 1.1406, - "step": 3540 - }, - { - "epoch": 2.87, - "learning_rate": 2.0346169686058586e-06, - "loss": 1.0391, - "step": 3545 - }, - { - "epoch": 2.87, - "learning_rate": 2.0211676501543866e-06, - "loss": 1.0592, - "step": 3550 - }, - { - "epoch": 2.88, - "learning_rate": 2.00775166123635e-06, - "loss": 0.9783, - "step": 3555 - }, - { - "epoch": 2.88, - "learning_rate": 1.9943691519604523e-06, - "loss": 1.0473, - "step": 3560 - }, - { - "epoch": 2.88, - "learning_rate": 1.9810202720607945e-06, - "loss": 1.0555, - "step": 3565 - }, - { - "epoch": 2.89, - "learning_rate": 1.967705170895208e-06, - "loss": 1.0691, - "step": 3570 - }, - { - "epoch": 2.89, - "learning_rate": 1.9544239974435797e-06, - "loss": 1.026, - "step": 3575 - }, - { - "epoch": 2.9, - "learning_rate": 1.9411769003061874e-06, - "loss": 1.0588, - "step": 3580 - }, - { - "epoch": 2.9, - "learning_rate": 1.9279640277020396e-06, - "loss": 1.0635, - "step": 3585 - }, - { - "epoch": 2.9, - "learning_rate": 1.9147855274672073e-06, - "loss": 0.9919, - "step": 3590 - }, - { - "epoch": 2.91, - "learning_rate": 1.9016415470531773e-06, - "loss": 1.0053, - "step": 3595 - }, - { - "epoch": 2.91, - "learning_rate": 1.8885322335252076e-06, - "loss": 1.0461, - "step": 3600 - }, - { - "epoch": 2.92, - "learning_rate": 1.8754577335606689e-06, - "loss": 1.0051, - "step": 3605 - }, - { - "epoch": 2.92, - "learning_rate": 1.8624181934474117e-06, - "loss": 1.0521, - "step": 3610 - }, - { - "epoch": 2.92, - "learning_rate": 1.8494137590821282e-06, - "loss": 0.9926, - "step": 3615 - }, - { - "epoch": 2.93, - "learning_rate": 1.8364445759687233e-06, - "loss": 1.0264, - "step": 3620 - }, - { - "epoch": 2.93, - "learning_rate": 1.823510789216676e-06, - "loss": 1.0475, - "step": 3625 - }, - { - "epoch": 2.94, - "learning_rate": 1.8106125435394312e-06, - "loss": 1.012, - "step": 3630 - }, - { - "epoch": 2.94, - "learning_rate": 1.7977499832527655e-06, - "loss": 1.0269, - "step": 3635 - }, - { - "epoch": 2.94, - "learning_rate": 1.7849232522731797e-06, - "loss": 1.0463, - "step": 3640 - }, - { - "epoch": 2.95, - "learning_rate": 1.7721324941162933e-06, - "loss": 1.025, - "step": 3645 - }, - { - "epoch": 2.95, - "learning_rate": 1.7593778518952275e-06, - "loss": 1.0326, - "step": 3650 - }, - { - "epoch": 2.96, - "learning_rate": 1.7466594683190107e-06, - "loss": 1.0389, - "step": 3655 - }, - { - "epoch": 2.96, - "learning_rate": 1.7339774856909851e-06, - "loss": 1.0609, - "step": 3660 - }, - { - "epoch": 2.97, - "learning_rate": 1.7213320459072047e-06, - "loss": 0.9949, - "step": 3665 - }, - { - "epoch": 2.97, - "learning_rate": 1.7087232904548595e-06, - "loss": 1.0083, - "step": 3670 - }, - { - "epoch": 2.97, - "learning_rate": 1.69615136041068e-06, - "loss": 1.0377, - "step": 3675 - }, - { - "epoch": 2.98, - "learning_rate": 1.6836163964393664e-06, - "loss": 1.0514, - "step": 3680 - }, - { - "epoch": 2.98, - "learning_rate": 1.6711185387920176e-06, - "loss": 0.99, - "step": 3685 - }, - { - "epoch": 2.99, - "learning_rate": 1.6586579273045529e-06, - "loss": 1.0146, - "step": 3690 - }, - { - "epoch": 2.99, - "learning_rate": 1.6462347013961526e-06, - "loss": 1.0445, - "step": 3695 - }, - { - "epoch": 2.99, - "learning_rate": 1.6338490000676987e-06, - "loss": 1.0674, - "step": 3700 - }, - { - "epoch": 3.0, - "learning_rate": 1.6215009619002197e-06, - "loss": 1.0215, - "step": 3705 - }, - { - "epoch": 3.0, - "learning_rate": 1.609190725053335e-06, - "loss": 0.9832, - "step": 3710 - }, - { - "epoch": 3.01, - "learning_rate": 1.5969184272637184e-06, - "loss": 0.9313, - "step": 3715 - }, - { - "epoch": 3.01, - "learning_rate": 1.5846842058435457e-06, - "loss": 1.0244, - "step": 3720 - }, - { - "epoch": 3.01, - "learning_rate": 1.5724881976789696e-06, - "loss": 0.9002, - "step": 3725 - }, - { - "epoch": 3.02, - "learning_rate": 1.5603305392285785e-06, - "loss": 0.957, - "step": 3730 - }, - { - "epoch": 3.02, - "learning_rate": 1.548211366521875e-06, - "loss": 0.9404, - "step": 3735 - }, - { - "epoch": 3.03, - "learning_rate": 1.5361308151577526e-06, - "loss": 0.9199, - "step": 3740 - }, - { - "epoch": 3.03, - "learning_rate": 1.5240890203029813e-06, - "loss": 0.9224, - "step": 3745 - }, - { - "epoch": 3.03, - "learning_rate": 1.5120861166906869e-06, - "loss": 0.9822, - "step": 3750 - }, - { - "epoch": 3.04, - "learning_rate": 1.5001222386188573e-06, - "loss": 0.9063, - "step": 3755 - }, - { - "epoch": 3.04, - "learning_rate": 1.4881975199488247e-06, - "loss": 0.9455, - "step": 3760 - }, - { - "epoch": 3.05, - "learning_rate": 1.4763120941037757e-06, - "loss": 0.8986, - "step": 3765 - }, - { - "epoch": 3.05, - "learning_rate": 1.4644660940672628e-06, - "loss": 0.9297, - "step": 3770 - }, - { - "epoch": 3.05, - "learning_rate": 1.4526596523817066e-06, - "loss": 0.9889, - "step": 3775 - }, - { - "epoch": 3.06, - "learning_rate": 1.4408929011469175e-06, - "loss": 0.9387, - "step": 3780 - }, - { - "epoch": 3.06, - "learning_rate": 1.4291659720186218e-06, - "loss": 0.8889, - "step": 3785 - }, - { - "epoch": 3.07, - "learning_rate": 1.4174789962069808e-06, - "loss": 0.9965, - "step": 3790 - }, - { - "epoch": 3.07, - "learning_rate": 1.4058321044751255e-06, - "loss": 0.9279, - "step": 3795 - }, - { - "epoch": 3.07, - "learning_rate": 1.3942254271377004e-06, - "loss": 0.9621, - "step": 3800 - }, - { - "epoch": 3.08, - "learning_rate": 1.3826590940593926e-06, - "loss": 0.9081, - "step": 3805 - }, - { - "epoch": 3.08, - "learning_rate": 1.3711332346534916e-06, - "loss": 0.9201, - "step": 3810 - }, - { - "epoch": 3.09, - "learning_rate": 1.3596479778804312e-06, - "loss": 0.9013, - "step": 3815 - }, - { - "epoch": 3.09, - "learning_rate": 1.3482034522463522e-06, - "loss": 0.9255, - "step": 3820 - }, - { - "epoch": 3.09, - "learning_rate": 1.3367997858016619e-06, - "loss": 0.9678, - "step": 3825 - }, - { - "epoch": 3.1, - "learning_rate": 1.325437106139607e-06, - "loss": 0.9334, - "step": 3830 - }, - { - "epoch": 3.1, - "learning_rate": 1.3141155403948358e-06, - "loss": 0.9455, - "step": 3835 - }, - { - "epoch": 3.11, - "learning_rate": 1.3028352152419876e-06, - "loss": 0.9025, - "step": 3840 - }, - { - "epoch": 3.11, - "learning_rate": 1.291596256894263e-06, - "loss": 0.8933, - "step": 3845 - }, - { - "epoch": 3.11, - "learning_rate": 1.2803987911020239e-06, - "loss": 0.999, - "step": 3850 - }, - { - "epoch": 3.12, - "learning_rate": 1.269242943151377e-06, - "loss": 0.8996, - "step": 3855 - }, - { - "epoch": 3.12, - "learning_rate": 1.2581288378627759e-06, - "loss": 0.9594, - "step": 3860 - }, - { - "epoch": 3.13, - "learning_rate": 1.2470565995896244e-06, - "loss": 0.9385, - "step": 3865 - }, - { - "epoch": 3.13, - "learning_rate": 1.236026352216888e-06, - "loss": 0.9508, - "step": 3870 - }, - { - "epoch": 3.14, - "learning_rate": 1.2250382191597015e-06, - "loss": 0.9479, - "step": 3875 - }, - { - "epoch": 3.14, - "learning_rate": 1.21409232336199e-06, - "loss": 0.8861, - "step": 3880 - }, - { - "epoch": 3.14, - "learning_rate": 1.2031887872951004e-06, - "loss": 0.9539, - "step": 3885 - }, - { - "epoch": 3.15, - "learning_rate": 1.1923277329564192e-06, - "loss": 0.8969, - "step": 3890 - }, - { - "epoch": 3.15, - "learning_rate": 1.181509281868019e-06, - "loss": 0.9248, - "step": 3895 - }, - { - "epoch": 3.16, - "learning_rate": 1.1707335550752901e-06, - "loss": 0.8923, - "step": 3900 - }, - { - "epoch": 3.16, - "learning_rate": 1.1600006731455888e-06, - "loss": 0.8534, - "step": 3905 - }, - { - "epoch": 3.16, - "learning_rate": 1.1493107561668943e-06, - "loss": 0.9193, - "step": 3910 - }, - { - "epoch": 3.17, - "learning_rate": 1.1386639237464542e-06, - "loss": 0.9688, - "step": 3915 - }, - { - "epoch": 3.17, - "learning_rate": 1.1280602950094532e-06, - "loss": 0.8982, - "step": 3920 - }, - { - "epoch": 3.18, - "learning_rate": 1.1174999885976834e-06, - "loss": 0.9001, - "step": 3925 - }, - { - "epoch": 3.18, - "learning_rate": 1.106983122668206e-06, - "loss": 0.9189, - "step": 3930 - }, - { - "epoch": 3.18, - "learning_rate": 1.0965098148920422e-06, - "loss": 0.9842, - "step": 3935 - }, - { - "epoch": 3.19, - "learning_rate": 1.0860801824528443e-06, - "loss": 0.9438, - "step": 3940 - }, - { - "epoch": 3.19, - "learning_rate": 1.0756943420455934e-06, - "loss": 0.9412, - "step": 3945 - }, - { - "epoch": 3.2, - "learning_rate": 1.0653524098752894e-06, - "loss": 0.9695, - "step": 3950 - }, - { - "epoch": 3.2, - "learning_rate": 1.055054501655654e-06, - "loss": 0.9145, - "step": 3955 - }, - { - "epoch": 3.2, - "learning_rate": 1.0448007326078336e-06, - "loss": 0.9602, - "step": 3960 - }, - { - "epoch": 3.21, - "learning_rate": 1.0345912174591071e-06, - "loss": 0.9009, - "step": 3965 - }, - { - "epoch": 3.21, - "learning_rate": 1.0244260704416104e-06, - "loss": 0.9375, - "step": 3970 - }, - { - "epoch": 3.22, - "learning_rate": 1.0143054052910534e-06, - "loss": 0.9402, - "step": 3975 - }, - { - "epoch": 3.22, - "learning_rate": 1.0042293352454446e-06, - "loss": 0.9182, - "step": 3980 - }, - { - "epoch": 3.22, - "learning_rate": 9.94197973043829e-07, - "loss": 0.909, - "step": 3985 - }, - { - "epoch": 3.23, - "learning_rate": 9.842114309250222e-07, - "loss": 0.9285, - "step": 3990 - }, - { - "epoch": 3.23, - "learning_rate": 9.74269820626364e-07, - "loss": 0.9264, - "step": 3995 - }, - { - "epoch": 3.24, - "learning_rate": 9.643732533824545e-07, - "loss": 0.9205, - "step": 4000 - }, - { - "epoch": 3.24, - "learning_rate": 9.545218399239186e-07, - "loss": 0.96, - "step": 4005 - }, - { - "epoch": 3.24, - "learning_rate": 9.447156904761668e-07, - "loss": 0.9473, - "step": 4010 - }, - { - "epoch": 3.25, - "learning_rate": 9.349549147581571e-07, - "loss": 0.9281, - "step": 4015 - }, - { - "epoch": 3.25, - "learning_rate": 9.252396219811737e-07, - "loss": 0.9311, - "step": 4020 - }, - { - "epoch": 3.26, - "learning_rate": 9.155699208475988e-07, - "loss": 0.9789, - "step": 4025 - }, - { - "epoch": 3.26, - "learning_rate": 9.059459195496989e-07, - "loss": 0.8984, - "step": 4030 - }, - { - "epoch": 3.26, - "learning_rate": 8.963677257684184e-07, - "loss": 0.9564, - "step": 4035 - }, - { - "epoch": 3.27, - "learning_rate": 8.868354466721668e-07, - "loss": 0.9293, - "step": 4040 - }, - { - "epoch": 3.27, - "learning_rate": 8.773491889156254e-07, - "loss": 0.9678, - "step": 4045 - }, - { - "epoch": 3.28, - "learning_rate": 8.679090586385519e-07, - "loss": 0.9275, - "step": 4050 - }, - { - "epoch": 3.28, - "learning_rate": 8.585151614645942e-07, - "loss": 0.966, - "step": 4055 - }, - { - "epoch": 3.28, - "learning_rate": 8.491676025001083e-07, - "loss": 0.9049, - "step": 4060 - }, - { - "epoch": 3.29, - "learning_rate": 8.398664863329792e-07, - "loss": 0.9385, - "step": 4065 - }, - { - "epoch": 3.29, - "learning_rate": 8.306119170314553e-07, - "loss": 0.9529, - "step": 4070 - }, - { - "epoch": 3.3, - "learning_rate": 8.214039981429789e-07, - "loss": 0.9412, - "step": 4075 - }, - { - "epoch": 3.3, - "learning_rate": 8.122428326930348e-07, - "loss": 0.9852, - "step": 4080 - }, - { - "epoch": 3.31, - "learning_rate": 8.031285231839908e-07, - "loss": 0.9223, - "step": 4085 - }, - { - "epoch": 3.31, - "learning_rate": 7.940611715939522e-07, - "loss": 0.9592, - "step": 4090 - }, - { - "epoch": 3.31, - "learning_rate": 7.850408793756242e-07, - "loss": 0.9758, - "step": 4095 - }, - { - "epoch": 3.32, - "learning_rate": 7.760677474551759e-07, - "loss": 0.842, - "step": 4100 - }, - { - "epoch": 3.32, - "learning_rate": 7.67141876231105e-07, - "loss": 0.9406, - "step": 4105 - }, - { - "epoch": 3.33, - "learning_rate": 7.582633655731231e-07, - "loss": 0.9397, - "step": 4110 - }, - { - "epoch": 3.33, - "learning_rate": 7.494323148210303e-07, - "loss": 0.9193, - "step": 4115 - }, - { - "epoch": 3.33, - "learning_rate": 7.406488227836139e-07, - "loss": 0.9529, - "step": 4120 - }, - { - "epoch": 3.34, - "learning_rate": 7.319129877375314e-07, - "loss": 0.973, - "step": 4125 - }, - { - "epoch": 3.34, - "learning_rate": 7.232249074262176e-07, - "loss": 0.9596, - "step": 4130 - }, - { - "epoch": 3.35, - "learning_rate": 7.145846790587891e-07, - "loss": 0.9477, - "step": 4135 - }, - { - "epoch": 3.35, - "learning_rate": 7.059923993089585e-07, - "loss": 0.9809, - "step": 4140 - }, - { - "epoch": 3.35, - "learning_rate": 6.974481643139514e-07, - "loss": 0.9863, - "step": 4145 - }, - { - "epoch": 3.36, - "learning_rate": 6.889520696734297e-07, - "loss": 0.9666, - "step": 4150 - }, - { - "epoch": 3.36, - "learning_rate": 6.805042104484216e-07, - "loss": 0.9328, - "step": 4155 - }, - { - "epoch": 3.37, - "learning_rate": 6.721046811602622e-07, - "loss": 0.8867, - "step": 4160 - }, - { - "epoch": 3.37, - "learning_rate": 6.63753575789532e-07, - "loss": 0.9635, - "step": 4165 - }, - { - "epoch": 3.37, - "learning_rate": 6.554509877750042e-07, - "loss": 0.9605, - "step": 4170 - }, - { - "epoch": 3.38, - "learning_rate": 6.471970100126035e-07, - "loss": 0.989, - "step": 4175 - }, - { - "epoch": 3.38, - "learning_rate": 6.389917348543651e-07, - "loss": 0.9393, - "step": 4180 - }, - { - "epoch": 3.39, - "learning_rate": 6.308352541074014e-07, - "loss": 0.9385, - "step": 4185 - }, - { - "epoch": 3.39, - "learning_rate": 6.227276590328713e-07, - "loss": 0.9325, - "step": 4190 - }, - { - "epoch": 3.39, - "learning_rate": 6.146690403449646e-07, - "loss": 0.9801, - "step": 4195 - }, - { - "epoch": 3.4, - "learning_rate": 6.066594882098831e-07, - "loss": 0.976, - "step": 4200 - }, - { - "epoch": 3.4, - "learning_rate": 5.98699092244835e-07, - "loss": 0.9523, - "step": 4205 - }, - { - "epoch": 3.41, - "learning_rate": 5.907879415170287e-07, - "loss": 0.8773, - "step": 4210 - }, - { - "epoch": 3.41, - "learning_rate": 5.829261245426793e-07, - "loss": 0.8939, - "step": 4215 - }, - { - "epoch": 3.41, - "learning_rate": 5.751137292860126e-07, - "loss": 0.9383, - "step": 4220 - }, - { - "epoch": 3.42, - "learning_rate": 5.673508431582936e-07, - "loss": 0.9797, - "step": 4225 - }, - { - "epoch": 3.42, - "learning_rate": 5.596375530168329e-07, - "loss": 0.932, - "step": 4230 - }, - { - "epoch": 3.43, - "learning_rate": 5.519739451640238e-07, - "loss": 0.9015, - "step": 4235 - }, - { - "epoch": 3.43, - "learning_rate": 5.443601053463743e-07, - "loss": 0.966, - "step": 4240 - }, - { - "epoch": 3.43, - "learning_rate": 5.367961187535504e-07, - "loss": 0.9252, - "step": 4245 - }, - { - "epoch": 3.44, - "learning_rate": 5.292820700174189e-07, - "loss": 0.925, - "step": 4250 - }, - { - "epoch": 3.44, - "learning_rate": 5.218180432111026e-07, - "loss": 0.9445, - "step": 4255 - }, - { - "epoch": 3.45, - "learning_rate": 5.144041218480389e-07, - "loss": 0.9461, - "step": 4260 - }, - { - "epoch": 3.45, - "learning_rate": 5.070403888810471e-07, - "loss": 0.926, - "step": 4265 - }, - { - "epoch": 3.45, - "learning_rate": 4.997269267013993e-07, - "loss": 0.9242, - "step": 4270 - }, - { - "epoch": 3.46, - "learning_rate": 4.924638171378976e-07, - "loss": 0.9514, - "step": 4275 - }, - { - "epoch": 3.46, - "learning_rate": 4.852511414559575e-07, - "loss": 0.9877, - "step": 4280 - }, - { - "epoch": 3.47, - "learning_rate": 4.780889803567018e-07, - "loss": 0.9541, - "step": 4285 - }, - { - "epoch": 3.47, - "learning_rate": 4.7097741397605754e-07, - "loss": 0.9449, - "step": 4290 - }, - { - "epoch": 3.47, - "learning_rate": 4.639165218838559e-07, - "loss": 0.9361, - "step": 4295 - }, - { - "epoch": 3.48, - "learning_rate": 4.569063830829445e-07, - "loss": 0.9908, - "step": 4300 - }, - { - "epoch": 3.48, - "learning_rate": 4.49947076008303e-07, - "loss": 0.9355, - "step": 4305 - }, - { - "epoch": 3.49, - "learning_rate": 4.4303867852616755e-07, - "loss": 0.9096, - "step": 4310 - }, - { - "epoch": 3.49, - "learning_rate": 4.361812679331551e-07, - "loss": 0.9555, - "step": 4315 - }, - { - "epoch": 3.5, - "learning_rate": 4.2937492095540043e-07, - "loss": 0.9221, - "step": 4320 - }, - { - "epoch": 3.5, - "learning_rate": 4.2261971374769893e-07, - "loss": 0.9594, - "step": 4325 - }, - { - "epoch": 3.5, - "learning_rate": 4.159157218926557e-07, - "loss": 0.914, - "step": 4330 - }, - { - "epoch": 3.51, - "learning_rate": 4.09263020399836e-07, - "loss": 0.9935, - "step": 4335 - }, - { - "epoch": 3.51, - "learning_rate": 4.02661683704928e-07, - "loss": 0.9467, - "step": 4340 - }, - { - "epoch": 3.52, - "learning_rate": 3.9611178566890894e-07, - "loss": 0.943, - "step": 4345 - }, - { - "epoch": 3.52, - "learning_rate": 3.896133995772233e-07, - "loss": 0.9232, - "step": 4350 - }, - { - "epoch": 3.52, - "learning_rate": 3.8316659813895597e-07, - "loss": 0.9545, - "step": 4355 - }, - { - "epoch": 3.53, - "learning_rate": 3.767714534860223e-07, - "loss": 0.9242, - "step": 4360 - }, - { - "epoch": 3.53, - "learning_rate": 3.704280371723601e-07, - "loss": 0.9379, - "step": 4365 - }, - { - "epoch": 3.54, - "learning_rate": 3.6413642017313233e-07, - "loss": 0.9506, - "step": 4370 - }, - { - "epoch": 3.54, - "learning_rate": 3.5789667288392784e-07, - "loss": 0.9465, - "step": 4375 - }, - { - "epoch": 3.54, - "learning_rate": 3.517088651199768e-07, - "loss": 0.9365, - "step": 4380 - }, - { - "epoch": 3.55, - "learning_rate": 3.455730661153672e-07, - "loss": 0.9195, - "step": 4385 - }, - { - "epoch": 3.55, - "learning_rate": 3.394893445222752e-07, - "loss": 0.9746, - "step": 4390 - }, - { - "epoch": 3.56, - "learning_rate": 3.334577684101925e-07, - "loss": 0.9289, - "step": 4395 - }, - { - "epoch": 3.56, - "learning_rate": 3.2747840526516414e-07, - "loss": 0.9038, - "step": 4400 - }, - { - "epoch": 3.56, - "learning_rate": 3.215513219890365e-07, - "loss": 0.9098, - "step": 4405 - }, - { - "epoch": 3.57, - "learning_rate": 3.15676584898707e-07, - "loss": 0.9435, - "step": 4410 - }, - { - "epoch": 3.57, - "learning_rate": 3.0985425972538343e-07, - "loss": 0.9098, - "step": 4415 - }, - { - "epoch": 3.58, - "learning_rate": 3.040844116138475e-07, - "loss": 0.9318, - "step": 4420 - }, - { - "epoch": 3.58, - "learning_rate": 2.9836710512172353e-07, - "loss": 0.9592, - "step": 4425 - }, - { - "epoch": 3.58, - "learning_rate": 2.9270240421876204e-07, - "loss": 0.9756, - "step": 4430 - }, - { - "epoch": 3.59, - "learning_rate": 2.8709037228611903e-07, - "loss": 0.9189, - "step": 4435 - }, - { - "epoch": 3.59, - "learning_rate": 2.815310721156489e-07, - "loss": 0.9139, - "step": 4440 - }, - { - "epoch": 3.6, - "learning_rate": 2.7602456590920034e-07, - "loss": 0.9127, - "step": 4445 - }, - { - "epoch": 3.6, - "learning_rate": 2.7057091527792125e-07, - "loss": 0.9602, - "step": 4450 - }, - { - "epoch": 3.6, - "learning_rate": 2.6517018124157137e-07, - "loss": 0.9787, - "step": 4455 - }, - { - "epoch": 3.61, - "learning_rate": 2.598224242278369e-07, - "loss": 0.916, - "step": 4460 - }, - { - "epoch": 3.61, - "learning_rate": 2.545277040716537e-07, - "loss": 0.9846, - "step": 4465 - }, - { - "epoch": 3.62, - "learning_rate": 2.492860800145408e-07, - "loss": 0.9484, - "step": 4470 - }, - { - "epoch": 3.62, - "learning_rate": 2.4409761070393614e-07, - "loss": 0.9191, - "step": 4475 - }, - { - "epoch": 3.62, - "learning_rate": 2.389623541925407e-07, - "loss": 0.9266, - "step": 4480 - }, - { - "epoch": 3.63, - "learning_rate": 2.3388036793766723e-07, - "loss": 0.9034, - "step": 4485 - }, - { - "epoch": 3.63, - "learning_rate": 2.2885170880059758e-07, - "loss": 0.896, - "step": 4490 - }, - { - "epoch": 3.64, - "learning_rate": 2.2387643304595196e-07, - "loss": 0.9574, - "step": 4495 - }, - { - "epoch": 3.64, - "learning_rate": 2.189545963410511e-07, - "loss": 0.9387, - "step": 4500 - }, - { - "epoch": 3.64, - "learning_rate": 2.1408625375529845e-07, - "loss": 0.9322, - "step": 4505 - }, - { - "epoch": 3.65, - "learning_rate": 2.0927145975956297e-07, - "loss": 0.9088, - "step": 4510 - }, - { - "epoch": 3.65, - "learning_rate": 2.0451026822556952e-07, - "loss": 0.9168, - "step": 4515 - }, - { - "epoch": 3.66, - "learning_rate": 1.9980273242529825e-07, - "loss": 0.951, - "step": 4520 - }, - { - "epoch": 3.66, - "learning_rate": 1.951489050303834e-07, - "loss": 0.916, - "step": 4525 - }, - { - "epoch": 3.67, - "learning_rate": 1.9054883811152837e-07, - "loss": 0.8936, - "step": 4530 - }, - { - "epoch": 3.67, - "learning_rate": 1.8600258313792142e-07, - "loss": 0.9279, - "step": 4535 - }, - { - "epoch": 3.67, - "learning_rate": 1.8151019097666146e-07, - "loss": 0.9666, - "step": 4540 - }, - { - "epoch": 3.68, - "learning_rate": 1.7707171189218663e-07, - "loss": 0.9555, - "step": 4545 - }, - { - "epoch": 3.68, - "learning_rate": 1.7268719554571157e-07, - "loss": 0.945, - "step": 4550 - }, - { - "epoch": 3.69, - "learning_rate": 1.683566909946771e-07, - "loss": 0.9357, - "step": 4555 - }, - { - "epoch": 3.69, - "learning_rate": 1.640802466921926e-07, - "loss": 0.9528, - "step": 4560 - }, - { - "epoch": 3.69, - "learning_rate": 1.5985791048650223e-07, - "loss": 0.8418, - "step": 4565 - }, - { - "epoch": 3.7, - "learning_rate": 1.5568972962044405e-07, - "loss": 0.9797, - "step": 4570 - }, - { - "epoch": 3.7, - "learning_rate": 1.515757507309229e-07, - "loss": 0.9197, - "step": 4575 - }, - { - "epoch": 3.71, - "learning_rate": 1.4751601984839159e-07, - "loss": 1.0133, - "step": 4580 - }, - { - "epoch": 3.71, - "learning_rate": 1.4351058239633065e-07, - "loss": 0.9518, - "step": 4585 - }, - { - "epoch": 3.71, - "learning_rate": 1.3955948319074374e-07, - "loss": 0.881, - "step": 4590 - }, - { - "epoch": 3.72, - "learning_rate": 1.3566276643965538e-07, - "loss": 0.9238, - "step": 4595 - }, - { - "epoch": 3.72, - "learning_rate": 1.3182047574261557e-07, - "loss": 0.9002, - "step": 4600 - }, - { - "epoch": 3.73, - "learning_rate": 1.2803265409021436e-07, - "loss": 0.948, - "step": 4605 - }, - { - "epoch": 3.73, - "learning_rate": 1.2429934386359643e-07, - "loss": 0.9025, - "step": 4610 - }, - { - "epoch": 3.73, - "learning_rate": 1.2062058683399048e-07, - "loss": 0.9354, - "step": 4615 - }, - { - "epoch": 3.74, - "learning_rate": 1.1699642416224233e-07, - "loss": 0.9582, - "step": 4620 - }, - { - "epoch": 3.74, - "learning_rate": 1.1342689639835036e-07, - "loss": 0.9734, - "step": 4625 - }, - { - "epoch": 3.75, - "learning_rate": 1.0991204348101692e-07, - "loss": 0.9267, - "step": 4630 - }, - { - "epoch": 3.75, - "learning_rate": 1.0645190473719647e-07, - "loss": 0.9705, - "step": 4635 - }, - { - "epoch": 3.75, - "learning_rate": 1.0304651888166039e-07, - "loss": 0.9285, - "step": 4640 - }, - { - "epoch": 3.76, - "learning_rate": 9.969592401655903e-08, - "loss": 0.9494, - "step": 4645 - }, - { - "epoch": 3.76, - "learning_rate": 9.640015763100031e-08, - "loss": 0.8965, - "step": 4650 - }, - { - "epoch": 3.77, - "learning_rate": 9.315925660062619e-08, - "loss": 0.9922, - "step": 4655 - }, - { - "epoch": 3.77, - "learning_rate": 8.997325718720085e-08, - "loss": 0.9295, - "step": 4660 - }, - { - "epoch": 3.77, - "learning_rate": 8.684219503820756e-08, - "loss": 0.9564, - "step": 4665 - }, - { - "epoch": 3.78, - "learning_rate": 8.376610518644746e-08, - "loss": 0.9201, - "step": 4670 - }, - { - "epoch": 3.78, - "learning_rate": 8.074502204964696e-08, - "loss": 0.9303, - "step": 4675 - }, - { - "epoch": 3.79, - "learning_rate": 7.777897943007595e-08, - "loss": 0.9636, - "step": 4680 - }, - { - "epoch": 3.79, - "learning_rate": 7.486801051416525e-08, - "loss": 0.9542, - "step": 4685 - }, - { - "epoch": 3.79, - "learning_rate": 7.201214787213862e-08, - "loss": 0.9684, - "step": 4690 - }, - { - "epoch": 3.8, - "learning_rate": 6.921142345764798e-08, - "loss": 0.924, - "step": 4695 - }, - { - "epoch": 3.8, - "learning_rate": 6.646586860741322e-08, - "loss": 0.9271, - "step": 4700 - }, - { - "epoch": 3.81, - "learning_rate": 6.377551404087467e-08, - "loss": 0.9333, - "step": 4705 - }, - { - "epoch": 3.81, - "learning_rate": 6.114038985984894e-08, - "loss": 0.9413, - "step": 4710 - }, - { - "epoch": 3.81, - "learning_rate": 5.856052554818969e-08, - "loss": 0.9223, - "step": 4715 - }, - { - "epoch": 3.82, - "learning_rate": 5.603594997145967e-08, - "loss": 0.9301, - "step": 4720 - }, - { - "epoch": 3.82, - "learning_rate": 5.3566691376609744e-08, - "loss": 0.9072, - "step": 4725 - }, - { - "epoch": 3.83, - "learning_rate": 5.115277739165703e-08, - "loss": 0.9152, - "step": 4730 - }, - { - "epoch": 3.83, - "learning_rate": 4.8794235025383386e-08, - "loss": 0.9234, - "step": 4735 - }, - { - "epoch": 3.83, - "learning_rate": 4.6491090667025176e-08, - "loss": 0.943, - "step": 4740 - }, - { - "epoch": 3.84, - "learning_rate": 4.4243370085985114e-08, - "loss": 0.8847, - "step": 4745 - }, - { - "epoch": 3.84, - "learning_rate": 4.2051098431539764e-08, - "loss": 1.0156, - "step": 4750 - }, - { - "epoch": 3.85, - "learning_rate": 3.991430023255804e-08, - "loss": 0.866, - "step": 4755 - }, - { - "epoch": 3.85, - "learning_rate": 3.783299939722984e-08, - "loss": 0.9083, - "step": 4760 - }, - { - "epoch": 3.86, - "learning_rate": 3.580721921279562e-08, - "loss": 0.9077, - "step": 4765 - }, - { - "epoch": 3.86, - "learning_rate": 3.383698234528665e-08, - "loss": 0.9351, - "step": 4770 - }, - { - "epoch": 3.86, - "learning_rate": 3.1922310839272444e-08, - "loss": 0.9322, - "step": 4775 - }, - { - "epoch": 3.87, - "learning_rate": 3.006322611761314e-08, - "loss": 0.9379, - "step": 4780 - }, - { - "epoch": 3.87, - "learning_rate": 2.8259748981219194e-08, - "loss": 0.9136, - "step": 4785 - }, - { - "epoch": 3.88, - "learning_rate": 2.651189960882039e-08, - "loss": 0.9764, - "step": 4790 - }, - { - "epoch": 3.88, - "learning_rate": 2.4819697556737742e-08, - "loss": 0.9348, - "step": 4795 - }, - { - "epoch": 3.88, - "learning_rate": 2.318316175866697e-08, - "loss": 0.9345, - "step": 4800 - }, - { - "epoch": 3.89, - "learning_rate": 2.1602310525466464e-08, - "loss": 0.8879, - "step": 4805 - }, - { - "epoch": 3.89, - "learning_rate": 2.007716154494965e-08, - "loss": 0.9619, - "step": 4810 - }, - { - "epoch": 3.9, - "learning_rate": 1.8607731881690737e-08, - "loss": 0.9516, - "step": 4815 - }, - { - "epoch": 3.9, - "learning_rate": 1.7194037976831502e-08, - "loss": 0.9471, - "step": 4820 - }, - { - "epoch": 3.9, - "learning_rate": 1.583609564789812e-08, - "loss": 0.9197, - "step": 4825 - }, - { - "epoch": 3.91, - "learning_rate": 1.4533920088623533e-08, - "loss": 0.8611, - "step": 4830 - }, - { - "epoch": 3.91, - "learning_rate": 1.3287525868778128e-08, - "loss": 0.9449, - "step": 4835 - }, - { - "epoch": 3.92, - "learning_rate": 1.2096926934007103e-08, - "loss": 0.9418, - "step": 4840 - }, - { - "epoch": 3.92, - "learning_rate": 1.0962136605673357e-08, - "loss": 0.9337, - "step": 4845 - }, - { - "epoch": 3.92, - "learning_rate": 9.883167580709285e-09, - "loss": 0.9118, - "step": 4850 - }, - { - "epoch": 3.93, - "learning_rate": 8.860031931473555e-09, - "loss": 0.9563, - "step": 4855 - }, - { - "epoch": 3.93, - "learning_rate": 7.892741105617329e-09, - "loss": 0.9342, - "step": 4860 - }, - { - "epoch": 3.94, - "learning_rate": 6.981305925956583e-09, - "loss": 0.9553, - "step": 4865 - }, - { - "epoch": 3.94, - "learning_rate": 6.1257365903488745e-09, - "loss": 0.9455, - "step": 4870 - }, - { - "epoch": 3.94, - "learning_rate": 5.326042671580655e-09, - "loss": 0.8813, - "step": 4875 - }, - { - "epoch": 3.95, - "learning_rate": 4.582233117260693e-09, - "loss": 0.8929, - "step": 4880 - }, - { - "epoch": 3.95, - "learning_rate": 3.894316249717922e-09, - "loss": 0.9463, - "step": 4885 - }, - { - "epoch": 3.96, - "learning_rate": 3.2622997659120802e-09, - "loss": 0.9428, - "step": 4890 - }, - { - "epoch": 3.96, - "learning_rate": 2.6861907373432193e-09, - "loss": 0.866, - "step": 4895 - }, - { - "epoch": 3.96, - "learning_rate": 2.165995609973992e-09, - "loss": 0.94, - "step": 4900 - }, - { - "epoch": 3.97, - "learning_rate": 1.7017202041602621e-09, - "loss": 0.9525, - "step": 4905 - }, - { - "epoch": 3.97, - "learning_rate": 1.293369714582271e-09, - "loss": 0.9548, - "step": 4910 - }, - { - "epoch": 3.98, - "learning_rate": 9.409487101880167e-10, - "loss": 0.9668, - "step": 4915 - }, - { - "epoch": 3.98, - "learning_rate": 6.444611341432927e-10, - "loss": 0.9349, - "step": 4920 - }, - { - "epoch": 3.98, - "learning_rate": 4.0391030378561513e-10, - "loss": 0.974, - "step": 4925 - }, - { - "epoch": 3.99, - "learning_rate": 2.1929891058758424e-10, - "loss": 0.9563, - "step": 4930 - }, - { - "epoch": 3.99, - "learning_rate": 9.0629020127464e-11, - "loss": 0.9373, - "step": 4935 - }, - { - "epoch": 4.0, - "learning_rate": 1.790207206586736e-11, - "loss": 0.9326, - "step": 4940 - }, - { - "epoch": 4.0, - "step": 4944, - "total_flos": 2.446826463366742e+18, - "train_loss": 1.1695684537918436, - "train_runtime": 57751.42, - "train_samples_per_second": 5.478, - "train_steps_per_second": 0.086 } ], - "max_steps": 4944, + "max_steps": 4940, "num_train_epochs": 4, - "total_flos": 2.446826463366742e+18, + "total_flos": 6.117066158416855e+17, "trial_name": null, "trial_params": null }