diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,7219 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.030336661096518617, + "eval_steps": 500, + "global_step": 6000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 5e-06, + "loss": 1.7996, + "step": 5 + }, + { + "epoch": 0.0, + "learning_rate": 1e-05, + "loss": 1.8749, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 1.5e-05, + "loss": 1.7917, + "step": 15 + }, + { + "epoch": 0.0, + "learning_rate": 2e-05, + "loss": 1.866, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 2.5e-05, + "loss": 1.8179, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 3e-05, + "loss": 1.7424, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 3.5e-05, + "loss": 1.776, + "step": 35 + }, + { + "epoch": 0.0, + "learning_rate": 4e-05, + "loss": 1.7429, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 4.5e-05, + "loss": 1.7392, + "step": 45 + }, + { + "epoch": 0.0, + "learning_rate": 5e-05, + "loss": 1.7274, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 5.500000000000001e-05, + "loss": 1.7122, + "step": 55 + }, + { + "epoch": 0.0, + "learning_rate": 6e-05, + "loss": 1.7711, + "step": 60 + }, + { + "epoch": 0.0, + "learning_rate": 6.500000000000001e-05, + "loss": 1.6894, + "step": 65 + }, + { + "epoch": 0.0, + "learning_rate": 7e-05, + "loss": 1.7153, + "step": 70 + }, + { + "epoch": 0.0, + "learning_rate": 7.500000000000001e-05, + "loss": 1.72, + "step": 75 + }, + { + "epoch": 0.0, + "learning_rate": 8e-05, + "loss": 1.6923, + "step": 80 + }, + { + "epoch": 0.0, + "learning_rate": 8.5e-05, + "loss": 1.6293, + "step": 85 + }, + { + "epoch": 0.0, + "learning_rate": 9e-05, + "loss": 1.6991, + "step": 90 + }, + { + "epoch": 0.0, + "learning_rate": 9.5e-05, + "loss": 1.6649, + "step": 95 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001, + "loss": 1.6809, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 9.999999984214647e-05, + "loss": 1.6536, + "step": 105 + }, + { + "epoch": 0.0, + "learning_rate": 9.999999936858588e-05, + "loss": 1.6798, + "step": 110 + }, + { + "epoch": 0.0, + "learning_rate": 9.999999857931825e-05, + "loss": 1.6354, + "step": 115 + }, + { + "epoch": 0.0, + "learning_rate": 9.999999747434355e-05, + "loss": 1.6531, + "step": 120 + }, + { + "epoch": 0.0, + "learning_rate": 9.999999605366182e-05, + "loss": 1.6346, + "step": 125 + }, + { + "epoch": 0.0, + "learning_rate": 9.999999431727304e-05, + "loss": 1.5923, + "step": 130 + }, + { + "epoch": 0.0, + "learning_rate": 9.999999226517724e-05, + "loss": 1.6328, + "step": 135 + }, + { + "epoch": 0.0, + "learning_rate": 9.999998989737443e-05, + "loss": 1.643, + "step": 140 + }, + { + "epoch": 0.0, + "learning_rate": 9.999998721386463e-05, + "loss": 1.6708, + "step": 145 + }, + { + "epoch": 0.0, + "learning_rate": 9.999998421464784e-05, + "loss": 1.6129, + "step": 150 + }, + { + "epoch": 0.0, + "learning_rate": 9.99999808997241e-05, + "loss": 1.6173, + "step": 155 + }, + { + "epoch": 0.0, + "learning_rate": 9.999997726909342e-05, + "loss": 1.62, + "step": 160 + }, + { + "epoch": 0.0, + "learning_rate": 9.999997332275582e-05, + "loss": 1.6656, + "step": 165 + }, + { + "epoch": 0.0, + "learning_rate": 9.999996906071134e-05, + "loss": 1.5967, + "step": 170 + }, + { + "epoch": 0.0, + "learning_rate": 9.999996448295999e-05, + "loss": 1.6081, + "step": 175 + }, + { + "epoch": 0.0, + "learning_rate": 9.999995958950179e-05, + "loss": 1.6811, + "step": 180 + }, + { + "epoch": 0.0, + "learning_rate": 9.99999543803368e-05, + "loss": 1.6281, + "step": 185 + }, + { + "epoch": 0.0, + "learning_rate": 9.999994885546504e-05, + "loss": 1.6343, + "step": 190 + }, + { + "epoch": 0.0, + "learning_rate": 9.999994301488653e-05, + "loss": 1.6014, + "step": 195 + }, + { + "epoch": 0.0, + "learning_rate": 9.999993685860133e-05, + "loss": 1.6031, + "step": 200 + }, + { + "epoch": 0.0, + "learning_rate": 9.999993038660947e-05, + "loss": 1.6057, + "step": 205 + }, + { + "epoch": 0.0, + "learning_rate": 9.999992359891099e-05, + "loss": 1.6461, + "step": 210 + }, + { + "epoch": 0.0, + "learning_rate": 9.999991649550593e-05, + "loss": 1.5893, + "step": 215 + }, + { + "epoch": 0.0, + "learning_rate": 9.999990907639434e-05, + "loss": 1.585, + "step": 220 + }, + { + "epoch": 0.0, + "learning_rate": 9.999990134157626e-05, + "loss": 1.6501, + "step": 225 + }, + { + "epoch": 0.0, + "learning_rate": 9.999989329105175e-05, + "loss": 1.585, + "step": 230 + }, + { + "epoch": 0.0, + "learning_rate": 9.999988492482087e-05, + "loss": 1.5825, + "step": 235 + }, + { + "epoch": 0.0, + "learning_rate": 9.999987624288363e-05, + "loss": 1.6001, + "step": 240 + }, + { + "epoch": 0.0, + "learning_rate": 9.999986724524012e-05, + "loss": 1.5628, + "step": 245 + }, + { + "epoch": 0.0, + "learning_rate": 9.999985793189038e-05, + "loss": 1.5727, + "step": 250 + }, + { + "epoch": 0.0, + "learning_rate": 9.999984830283449e-05, + "loss": 1.6333, + "step": 255 + }, + { + "epoch": 0.0, + "learning_rate": 9.99998383580725e-05, + "loss": 1.6112, + "step": 260 + }, + { + "epoch": 0.0, + "learning_rate": 9.999982809760446e-05, + "loss": 1.5798, + "step": 265 + }, + { + "epoch": 0.0, + "learning_rate": 9.999981752143045e-05, + "loss": 1.5776, + "step": 270 + }, + { + "epoch": 0.0, + "learning_rate": 9.999980662955052e-05, + "loss": 1.5801, + "step": 275 + }, + { + "epoch": 0.0, + "learning_rate": 9.999979542196479e-05, + "loss": 1.6455, + "step": 280 + }, + { + "epoch": 0.0, + "learning_rate": 9.999978389867326e-05, + "loss": 1.5998, + "step": 285 + }, + { + "epoch": 0.0, + "learning_rate": 9.999977205967603e-05, + "loss": 1.6335, + "step": 290 + }, + { + "epoch": 0.0, + "learning_rate": 9.999975990497319e-05, + "loss": 1.5837, + "step": 295 + }, + { + "epoch": 0.0, + "learning_rate": 9.999974743456482e-05, + "loss": 1.6105, + "step": 300 + }, + { + "epoch": 0.0, + "learning_rate": 9.999973464845096e-05, + "loss": 1.5818, + "step": 305 + }, + { + "epoch": 0.0, + "learning_rate": 9.999972154663173e-05, + "loss": 1.602, + "step": 310 + }, + { + "epoch": 0.0, + "learning_rate": 9.99997081291072e-05, + "loss": 1.5588, + "step": 315 + }, + { + "epoch": 0.0, + "learning_rate": 9.999969439587746e-05, + "loss": 1.596, + "step": 320 + }, + { + "epoch": 0.0, + "learning_rate": 9.999968034694258e-05, + "loss": 1.5618, + "step": 325 + }, + { + "epoch": 0.0, + "learning_rate": 9.999966598230266e-05, + "loss": 1.6504, + "step": 330 + }, + { + "epoch": 0.0, + "learning_rate": 9.999965130195779e-05, + "loss": 1.5724, + "step": 335 + }, + { + "epoch": 0.0, + "learning_rate": 9.999963630590805e-05, + "loss": 1.5604, + "step": 340 + }, + { + "epoch": 0.0, + "learning_rate": 9.999962099415356e-05, + "loss": 1.5777, + "step": 345 + }, + { + "epoch": 0.0, + "learning_rate": 9.99996053666944e-05, + "loss": 1.6312, + "step": 350 + }, + { + "epoch": 0.0, + "learning_rate": 9.999958942353067e-05, + "loss": 1.5696, + "step": 355 + }, + { + "epoch": 0.0, + "learning_rate": 9.999957316466249e-05, + "loss": 1.5721, + "step": 360 + }, + { + "epoch": 0.0, + "learning_rate": 9.999955659008992e-05, + "loss": 1.6266, + "step": 365 + }, + { + "epoch": 0.0, + "learning_rate": 9.999953969981311e-05, + "loss": 1.5786, + "step": 370 + }, + { + "epoch": 0.0, + "learning_rate": 9.999952249383214e-05, + "loss": 1.5544, + "step": 375 + }, + { + "epoch": 0.0, + "learning_rate": 9.999950497214712e-05, + "loss": 1.6046, + "step": 380 + }, + { + "epoch": 0.0, + "learning_rate": 9.999948713475817e-05, + "loss": 1.6058, + "step": 385 + }, + { + "epoch": 0.0, + "learning_rate": 9.99994689816654e-05, + "loss": 1.5973, + "step": 390 + }, + { + "epoch": 0.0, + "learning_rate": 9.999945051286892e-05, + "loss": 1.6181, + "step": 395 + }, + { + "epoch": 0.0, + "learning_rate": 9.999943172836885e-05, + "loss": 1.5921, + "step": 400 + }, + { + "epoch": 0.0, + "learning_rate": 9.99994126281653e-05, + "loss": 1.625, + "step": 405 + }, + { + "epoch": 0.0, + "learning_rate": 9.999939321225842e-05, + "loss": 1.5971, + "step": 410 + }, + { + "epoch": 0.0, + "learning_rate": 9.999937348064829e-05, + "loss": 1.5907, + "step": 415 + }, + { + "epoch": 0.0, + "learning_rate": 9.999935343333508e-05, + "loss": 1.5694, + "step": 420 + }, + { + "epoch": 0.0, + "learning_rate": 9.999933307031887e-05, + "loss": 1.6243, + "step": 425 + }, + { + "epoch": 0.0, + "learning_rate": 9.999931239159983e-05, + "loss": 1.5286, + "step": 430 + }, + { + "epoch": 0.0, + "learning_rate": 9.999929139717806e-05, + "loss": 1.5546, + "step": 435 + }, + { + "epoch": 0.0, + "learning_rate": 9.999927008705372e-05, + "loss": 1.6108, + "step": 440 + }, + { + "epoch": 0.0, + "learning_rate": 9.99992484612269e-05, + "loss": 1.4987, + "step": 445 + }, + { + "epoch": 0.0, + "learning_rate": 9.999922651969779e-05, + "loss": 1.5822, + "step": 450 + }, + { + "epoch": 0.0, + "learning_rate": 9.99992042624665e-05, + "loss": 1.4994, + "step": 455 + }, + { + "epoch": 0.0, + "learning_rate": 9.999918168953317e-05, + "loss": 1.5675, + "step": 460 + }, + { + "epoch": 0.0, + "learning_rate": 9.999915880089796e-05, + "loss": 1.5846, + "step": 465 + }, + { + "epoch": 0.0, + "learning_rate": 9.999913559656097e-05, + "loss": 1.5625, + "step": 470 + }, + { + "epoch": 0.0, + "learning_rate": 9.999911207652242e-05, + "loss": 1.6224, + "step": 475 + }, + { + "epoch": 0.0, + "learning_rate": 9.999908824078239e-05, + "loss": 1.5623, + "step": 480 + }, + { + "epoch": 0.0, + "learning_rate": 9.999906408934107e-05, + "loss": 1.6009, + "step": 485 + }, + { + "epoch": 0.0, + "learning_rate": 9.999903962219859e-05, + "loss": 1.5425, + "step": 490 + }, + { + "epoch": 0.0, + "learning_rate": 9.999901483935512e-05, + "loss": 1.6444, + "step": 495 + }, + { + "epoch": 0.0, + "learning_rate": 9.99989897408108e-05, + "loss": 1.5167, + "step": 500 + }, + { + "epoch": 0.0, + "learning_rate": 9.999896432656581e-05, + "loss": 1.556, + "step": 505 + }, + { + "epoch": 0.0, + "learning_rate": 9.999893859662031e-05, + "loss": 1.5834, + "step": 510 + }, + { + "epoch": 0.0, + "learning_rate": 9.999891255097444e-05, + "loss": 1.5508, + "step": 515 + }, + { + "epoch": 0.0, + "learning_rate": 9.999888618962838e-05, + "loss": 1.5247, + "step": 520 + }, + { + "epoch": 0.0, + "learning_rate": 9.999885951258228e-05, + "loss": 1.5769, + "step": 525 + }, + { + "epoch": 0.0, + "learning_rate": 9.999883251983634e-05, + "loss": 1.5263, + "step": 530 + }, + { + "epoch": 0.0, + "learning_rate": 9.99988052113907e-05, + "loss": 1.608, + "step": 535 + }, + { + "epoch": 0.0, + "learning_rate": 9.999877758724556e-05, + "loss": 1.5593, + "step": 540 + }, + { + "epoch": 0.0, + "learning_rate": 9.999874964740105e-05, + "loss": 1.5406, + "step": 545 + }, + { + "epoch": 0.0, + "learning_rate": 9.99987213918574e-05, + "loss": 1.5401, + "step": 550 + }, + { + "epoch": 0.0, + "learning_rate": 9.999869282061476e-05, + "loss": 1.5681, + "step": 555 + }, + { + "epoch": 0.0, + "learning_rate": 9.999866393367331e-05, + "loss": 1.5728, + "step": 560 + }, + { + "epoch": 0.0, + "learning_rate": 9.999863473103324e-05, + "loss": 1.5974, + "step": 565 + }, + { + "epoch": 0.0, + "learning_rate": 9.999860521269473e-05, + "loss": 1.5599, + "step": 570 + }, + { + "epoch": 0.0, + "learning_rate": 9.999857537865795e-05, + "loss": 1.5956, + "step": 575 + }, + { + "epoch": 0.0, + "learning_rate": 9.999854522892314e-05, + "loss": 1.5796, + "step": 580 + }, + { + "epoch": 0.0, + "learning_rate": 9.999851476349042e-05, + "loss": 1.6238, + "step": 585 + }, + { + "epoch": 0.0, + "learning_rate": 9.999848398236005e-05, + "loss": 1.618, + "step": 590 + }, + { + "epoch": 0.0, + "learning_rate": 9.999845288553216e-05, + "loss": 1.4921, + "step": 595 + }, + { + "epoch": 0.0, + "learning_rate": 9.9998421473007e-05, + "loss": 1.594, + "step": 600 + }, + { + "epoch": 0.0, + "learning_rate": 9.999838974478475e-05, + "loss": 1.561, + "step": 605 + }, + { + "epoch": 0.0, + "learning_rate": 9.99983577008656e-05, + "loss": 1.609, + "step": 610 + }, + { + "epoch": 0.0, + "learning_rate": 9.999832534124976e-05, + "loss": 1.5937, + "step": 615 + }, + { + "epoch": 0.0, + "learning_rate": 9.999829266593744e-05, + "loss": 1.5282, + "step": 620 + }, + { + "epoch": 0.0, + "learning_rate": 9.999825967492884e-05, + "loss": 1.5998, + "step": 625 + }, + { + "epoch": 0.0, + "learning_rate": 9.999822636822416e-05, + "loss": 1.5592, + "step": 630 + }, + { + "epoch": 0.0, + "learning_rate": 9.999819274582363e-05, + "loss": 1.5475, + "step": 635 + }, + { + "epoch": 0.0, + "learning_rate": 9.999815880772745e-05, + "loss": 1.6313, + "step": 640 + }, + { + "epoch": 0.0, + "learning_rate": 9.999812455393582e-05, + "loss": 1.6535, + "step": 645 + }, + { + "epoch": 0.0, + "learning_rate": 9.9998089984449e-05, + "loss": 1.5264, + "step": 650 + }, + { + "epoch": 0.0, + "learning_rate": 9.999805509926716e-05, + "loss": 1.5979, + "step": 655 + }, + { + "epoch": 0.0, + "learning_rate": 9.999801989839055e-05, + "loss": 1.5618, + "step": 660 + }, + { + "epoch": 0.0, + "learning_rate": 9.999798438181938e-05, + "loss": 1.6201, + "step": 665 + }, + { + "epoch": 0.0, + "learning_rate": 9.999794854955388e-05, + "loss": 1.5691, + "step": 670 + }, + { + "epoch": 0.0, + "learning_rate": 9.999791240159426e-05, + "loss": 1.6399, + "step": 675 + }, + { + "epoch": 0.0, + "learning_rate": 9.999787593794079e-05, + "loss": 1.5727, + "step": 680 + }, + { + "epoch": 0.0, + "learning_rate": 9.999783915859364e-05, + "loss": 1.5899, + "step": 685 + }, + { + "epoch": 0.0, + "learning_rate": 9.999780206355309e-05, + "loss": 1.5925, + "step": 690 + }, + { + "epoch": 0.0, + "learning_rate": 9.999776465281936e-05, + "loss": 1.5569, + "step": 695 + }, + { + "epoch": 0.0, + "learning_rate": 9.999772692639268e-05, + "loss": 1.5749, + "step": 700 + }, + { + "epoch": 0.0, + "learning_rate": 9.99976888842733e-05, + "loss": 1.5409, + "step": 705 + }, + { + "epoch": 0.0, + "learning_rate": 9.999765052646145e-05, + "loss": 1.5865, + "step": 710 + }, + { + "epoch": 0.0, + "learning_rate": 9.999761185295738e-05, + "loss": 1.5857, + "step": 715 + }, + { + "epoch": 0.0, + "learning_rate": 9.999757286376131e-05, + "loss": 1.6028, + "step": 720 + }, + { + "epoch": 0.0, + "learning_rate": 9.999753355887351e-05, + "loss": 1.5525, + "step": 725 + }, + { + "epoch": 0.0, + "learning_rate": 9.999749393829425e-05, + "loss": 1.5298, + "step": 730 + }, + { + "epoch": 0.0, + "learning_rate": 9.999745400202373e-05, + "loss": 1.5837, + "step": 735 + }, + { + "epoch": 0.0, + "learning_rate": 9.999741375006223e-05, + "loss": 1.609, + "step": 740 + }, + { + "epoch": 0.0, + "learning_rate": 9.999737318241001e-05, + "loss": 1.5891, + "step": 745 + }, + { + "epoch": 0.0, + "learning_rate": 9.99973322990673e-05, + "loss": 1.5623, + "step": 750 + }, + { + "epoch": 0.0, + "learning_rate": 9.99972911000344e-05, + "loss": 1.5996, + "step": 755 + }, + { + "epoch": 0.0, + "learning_rate": 9.999724958531151e-05, + "loss": 1.5489, + "step": 760 + }, + { + "epoch": 0.0, + "learning_rate": 9.999720775489896e-05, + "loss": 1.6058, + "step": 765 + }, + { + "epoch": 0.0, + "learning_rate": 9.999716560879696e-05, + "loss": 1.5234, + "step": 770 + }, + { + "epoch": 0.0, + "learning_rate": 9.99971231470058e-05, + "loss": 1.5619, + "step": 775 + }, + { + "epoch": 0.0, + "learning_rate": 9.999708036952576e-05, + "loss": 1.5611, + "step": 780 + }, + { + "epoch": 0.0, + "learning_rate": 9.999703727635708e-05, + "loss": 1.5747, + "step": 785 + }, + { + "epoch": 0.0, + "learning_rate": 9.999699386750005e-05, + "loss": 1.5776, + "step": 790 + }, + { + "epoch": 0.0, + "learning_rate": 9.999695014295494e-05, + "loss": 1.5548, + "step": 795 + }, + { + "epoch": 0.0, + "learning_rate": 9.999690610272204e-05, + "loss": 1.5526, + "step": 800 + }, + { + "epoch": 0.0, + "learning_rate": 9.999686174680161e-05, + "loss": 1.5164, + "step": 805 + }, + { + "epoch": 0.0, + "learning_rate": 9.999681707519393e-05, + "loss": 1.4783, + "step": 810 + }, + { + "epoch": 0.0, + "learning_rate": 9.999677208789928e-05, + "loss": 1.5452, + "step": 815 + }, + { + "epoch": 0.0, + "learning_rate": 9.999672678491796e-05, + "loss": 1.5135, + "step": 820 + }, + { + "epoch": 0.0, + "learning_rate": 9.999668116625025e-05, + "loss": 1.5256, + "step": 825 + }, + { + "epoch": 0.0, + "learning_rate": 9.999663523189644e-05, + "loss": 1.5538, + "step": 830 + }, + { + "epoch": 0.0, + "learning_rate": 9.999658898185681e-05, + "loss": 1.6071, + "step": 835 + }, + { + "epoch": 0.0, + "learning_rate": 9.999654241613166e-05, + "loss": 1.5695, + "step": 840 + }, + { + "epoch": 0.0, + "learning_rate": 9.999649553472128e-05, + "loss": 1.5224, + "step": 845 + }, + { + "epoch": 0.0, + "learning_rate": 9.999644833762599e-05, + "loss": 1.5752, + "step": 850 + }, + { + "epoch": 0.0, + "learning_rate": 9.999640082484602e-05, + "loss": 1.5129, + "step": 855 + }, + { + "epoch": 0.0, + "learning_rate": 9.999635299638174e-05, + "loss": 1.569, + "step": 860 + }, + { + "epoch": 0.0, + "learning_rate": 9.999630485223343e-05, + "loss": 1.5718, + "step": 865 + }, + { + "epoch": 0.0, + "learning_rate": 9.99962563924014e-05, + "loss": 1.5711, + "step": 870 + }, + { + "epoch": 0.0, + "learning_rate": 9.999620761688595e-05, + "loss": 1.5173, + "step": 875 + }, + { + "epoch": 0.0, + "learning_rate": 9.999615852568738e-05, + "loss": 1.6075, + "step": 880 + }, + { + "epoch": 0.0, + "learning_rate": 9.999610911880599e-05, + "loss": 1.549, + "step": 885 + }, + { + "epoch": 0.0, + "learning_rate": 9.999605939624213e-05, + "loss": 1.5395, + "step": 890 + }, + { + "epoch": 0.0, + "learning_rate": 9.999600935799608e-05, + "loss": 1.5699, + "step": 895 + }, + { + "epoch": 0.0, + "learning_rate": 9.999595900406817e-05, + "loss": 1.6032, + "step": 900 + }, + { + "epoch": 0.0, + "learning_rate": 9.999590833445871e-05, + "loss": 1.5008, + "step": 905 + }, + { + "epoch": 0.0, + "learning_rate": 9.999585734916803e-05, + "loss": 1.5711, + "step": 910 + }, + { + "epoch": 0.0, + "learning_rate": 9.999580604819644e-05, + "loss": 1.5846, + "step": 915 + }, + { + "epoch": 0.0, + "learning_rate": 9.99957544315443e-05, + "loss": 1.5641, + "step": 920 + }, + { + "epoch": 0.0, + "learning_rate": 9.999570249921189e-05, + "loss": 1.5393, + "step": 925 + }, + { + "epoch": 0.0, + "learning_rate": 9.999565025119955e-05, + "loss": 1.5406, + "step": 930 + }, + { + "epoch": 0.0, + "learning_rate": 9.999559768750761e-05, + "loss": 1.6012, + "step": 935 + }, + { + "epoch": 0.0, + "learning_rate": 9.999554480813642e-05, + "loss": 1.5826, + "step": 940 + }, + { + "epoch": 0.0, + "learning_rate": 9.99954916130863e-05, + "loss": 1.5666, + "step": 945 + }, + { + "epoch": 0.0, + "learning_rate": 9.999543810235758e-05, + "loss": 1.5688, + "step": 950 + }, + { + "epoch": 0.0, + "learning_rate": 9.999538427595061e-05, + "loss": 1.5858, + "step": 955 + }, + { + "epoch": 0.0, + "learning_rate": 9.999533013386573e-05, + "loss": 1.539, + "step": 960 + }, + { + "epoch": 0.0, + "learning_rate": 9.999527567610328e-05, + "loss": 1.5403, + "step": 965 + }, + { + "epoch": 0.0, + "learning_rate": 9.999522090266357e-05, + "loss": 1.5762, + "step": 970 + }, + { + "epoch": 0.0, + "learning_rate": 9.999516581354701e-05, + "loss": 1.5501, + "step": 975 + }, + { + "epoch": 0.0, + "learning_rate": 9.99951104087539e-05, + "loss": 1.5513, + "step": 980 + }, + { + "epoch": 0.0, + "learning_rate": 9.99950546882846e-05, + "loss": 1.5601, + "step": 985 + }, + { + "epoch": 0.01, + "learning_rate": 9.999499865213948e-05, + "loss": 1.5821, + "step": 990 + }, + { + "epoch": 0.01, + "learning_rate": 9.999494230031887e-05, + "loss": 1.55, + "step": 995 + }, + { + "epoch": 0.01, + "learning_rate": 9.999488563282313e-05, + "loss": 1.5428, + "step": 1000 + }, + { + "epoch": 0.01, + "learning_rate": 9.999482864965264e-05, + "loss": 1.6102, + "step": 1005 + }, + { + "epoch": 0.01, + "learning_rate": 9.999477135080772e-05, + "loss": 1.5822, + "step": 1010 + }, + { + "epoch": 0.01, + "learning_rate": 9.999471373628877e-05, + "loss": 1.5751, + "step": 1015 + }, + { + "epoch": 0.01, + "learning_rate": 9.999465580609615e-05, + "loss": 1.5276, + "step": 1020 + }, + { + "epoch": 0.01, + "learning_rate": 9.99945975602302e-05, + "loss": 1.5953, + "step": 1025 + }, + { + "epoch": 0.01, + "learning_rate": 9.999453899869129e-05, + "loss": 1.5923, + "step": 1030 + }, + { + "epoch": 0.01, + "learning_rate": 9.999448012147982e-05, + "loss": 1.4987, + "step": 1035 + }, + { + "epoch": 0.01, + "learning_rate": 9.999442092859614e-05, + "loss": 1.6472, + "step": 1040 + }, + { + "epoch": 0.01, + "learning_rate": 9.999436142004062e-05, + "loss": 1.5915, + "step": 1045 + }, + { + "epoch": 0.01, + "learning_rate": 9.999430159581365e-05, + "loss": 1.566, + "step": 1050 + }, + { + "epoch": 0.01, + "learning_rate": 9.999424145591561e-05, + "loss": 1.5354, + "step": 1055 + }, + { + "epoch": 0.01, + "learning_rate": 9.999418100034685e-05, + "loss": 1.5651, + "step": 1060 + }, + { + "epoch": 0.01, + "learning_rate": 9.999412022910779e-05, + "loss": 1.5543, + "step": 1065 + }, + { + "epoch": 0.01, + "learning_rate": 9.999405914219878e-05, + "loss": 1.5432, + "step": 1070 + }, + { + "epoch": 0.01, + "learning_rate": 9.999399773962024e-05, + "loss": 1.5405, + "step": 1075 + }, + { + "epoch": 0.01, + "learning_rate": 9.999393602137253e-05, + "loss": 1.4827, + "step": 1080 + }, + { + "epoch": 0.01, + "learning_rate": 9.999387398745605e-05, + "loss": 1.567, + "step": 1085 + }, + { + "epoch": 0.01, + "learning_rate": 9.999381163787119e-05, + "loss": 1.5851, + "step": 1090 + }, + { + "epoch": 0.01, + "learning_rate": 9.999374897261834e-05, + "loss": 1.5537, + "step": 1095 + }, + { + "epoch": 0.01, + "learning_rate": 9.999368599169791e-05, + "loss": 1.5684, + "step": 1100 + }, + { + "epoch": 0.01, + "learning_rate": 9.999362269511028e-05, + "loss": 1.6324, + "step": 1105 + }, + { + "epoch": 0.01, + "learning_rate": 9.999355908285586e-05, + "loss": 1.5908, + "step": 1110 + }, + { + "epoch": 0.01, + "learning_rate": 9.999349515493504e-05, + "loss": 1.5818, + "step": 1115 + }, + { + "epoch": 0.01, + "learning_rate": 9.999343091134825e-05, + "loss": 1.5015, + "step": 1120 + }, + { + "epoch": 0.01, + "learning_rate": 9.999336635209587e-05, + "loss": 1.5314, + "step": 1125 + }, + { + "epoch": 0.01, + "learning_rate": 9.999330147717831e-05, + "loss": 1.6139, + "step": 1130 + }, + { + "epoch": 0.01, + "learning_rate": 9.9993236286596e-05, + "loss": 1.5631, + "step": 1135 + }, + { + "epoch": 0.01, + "learning_rate": 9.999317078034934e-05, + "loss": 1.5393, + "step": 1140 + }, + { + "epoch": 0.01, + "learning_rate": 9.999310495843873e-05, + "loss": 1.5802, + "step": 1145 + }, + { + "epoch": 0.01, + "learning_rate": 9.999303882086459e-05, + "loss": 1.5265, + "step": 1150 + }, + { + "epoch": 0.01, + "learning_rate": 9.999297236762736e-05, + "loss": 1.5805, + "step": 1155 + }, + { + "epoch": 0.01, + "learning_rate": 9.999290559872742e-05, + "loss": 1.4904, + "step": 1160 + }, + { + "epoch": 0.01, + "learning_rate": 9.999283851416525e-05, + "loss": 1.522, + "step": 1165 + }, + { + "epoch": 0.01, + "learning_rate": 9.99927711139412e-05, + "loss": 1.5492, + "step": 1170 + }, + { + "epoch": 0.01, + "learning_rate": 9.999270339805577e-05, + "loss": 1.5396, + "step": 1175 + }, + { + "epoch": 0.01, + "learning_rate": 9.999263536650934e-05, + "loss": 1.5411, + "step": 1180 + }, + { + "epoch": 0.01, + "learning_rate": 9.999256701930235e-05, + "loss": 1.581, + "step": 1185 + }, + { + "epoch": 0.01, + "learning_rate": 9.999249835643522e-05, + "loss": 1.6091, + "step": 1190 + }, + { + "epoch": 0.01, + "learning_rate": 9.999242937790842e-05, + "loss": 1.5867, + "step": 1195 + }, + { + "epoch": 0.01, + "learning_rate": 9.999236008372235e-05, + "loss": 1.5334, + "step": 1200 + }, + { + "epoch": 0.01, + "learning_rate": 9.999229047387746e-05, + "loss": 1.5239, + "step": 1205 + }, + { + "epoch": 0.01, + "learning_rate": 9.999222054837419e-05, + "loss": 1.5217, + "step": 1210 + }, + { + "epoch": 0.01, + "learning_rate": 9.999215030721298e-05, + "loss": 1.5833, + "step": 1215 + }, + { + "epoch": 0.01, + "learning_rate": 9.999207975039429e-05, + "loss": 1.5455, + "step": 1220 + }, + { + "epoch": 0.01, + "learning_rate": 9.999200887791853e-05, + "loss": 1.482, + "step": 1225 + }, + { + "epoch": 0.01, + "learning_rate": 9.999193768978617e-05, + "loss": 1.5395, + "step": 1230 + }, + { + "epoch": 0.01, + "learning_rate": 9.999186618599767e-05, + "loss": 1.5382, + "step": 1235 + }, + { + "epoch": 0.01, + "learning_rate": 9.999179436655346e-05, + "loss": 1.5346, + "step": 1240 + }, + { + "epoch": 0.01, + "learning_rate": 9.999172223145399e-05, + "loss": 1.4847, + "step": 1245 + }, + { + "epoch": 0.01, + "learning_rate": 9.999164978069974e-05, + "loss": 1.5358, + "step": 1250 + }, + { + "epoch": 0.01, + "learning_rate": 9.999157701429116e-05, + "loss": 1.5671, + "step": 1255 + }, + { + "epoch": 0.01, + "learning_rate": 9.99915039322287e-05, + "loss": 1.5468, + "step": 1260 + }, + { + "epoch": 0.01, + "learning_rate": 9.999143053451282e-05, + "loss": 1.6142, + "step": 1265 + }, + { + "epoch": 0.01, + "learning_rate": 9.9991356821144e-05, + "loss": 1.544, + "step": 1270 + }, + { + "epoch": 0.01, + "learning_rate": 9.999128279212268e-05, + "loss": 1.5805, + "step": 1275 + }, + { + "epoch": 0.01, + "learning_rate": 9.999120844744935e-05, + "loss": 1.6026, + "step": 1280 + }, + { + "epoch": 0.01, + "learning_rate": 9.999113378712447e-05, + "loss": 1.5879, + "step": 1285 + }, + { + "epoch": 0.01, + "learning_rate": 9.999105881114852e-05, + "loss": 1.5617, + "step": 1290 + }, + { + "epoch": 0.01, + "learning_rate": 9.999098351952195e-05, + "loss": 1.5493, + "step": 1295 + }, + { + "epoch": 0.01, + "learning_rate": 9.999090791224527e-05, + "loss": 1.4812, + "step": 1300 + }, + { + "epoch": 0.01, + "learning_rate": 9.999083198931893e-05, + "loss": 1.5836, + "step": 1305 + }, + { + "epoch": 0.01, + "learning_rate": 9.999075575074341e-05, + "loss": 1.5515, + "step": 1310 + }, + { + "epoch": 0.01, + "learning_rate": 9.999067919651921e-05, + "loss": 1.5362, + "step": 1315 + }, + { + "epoch": 0.01, + "learning_rate": 9.999060232664681e-05, + "loss": 1.6015, + "step": 1320 + }, + { + "epoch": 0.01, + "learning_rate": 9.999052514112668e-05, + "loss": 1.6118, + "step": 1325 + }, + { + "epoch": 0.01, + "learning_rate": 9.999044763995932e-05, + "loss": 1.5226, + "step": 1330 + }, + { + "epoch": 0.01, + "learning_rate": 9.999036982314521e-05, + "loss": 1.5084, + "step": 1335 + }, + { + "epoch": 0.01, + "learning_rate": 9.999029169068485e-05, + "loss": 1.5113, + "step": 1340 + }, + { + "epoch": 0.01, + "learning_rate": 9.999021324257873e-05, + "loss": 1.5519, + "step": 1345 + }, + { + "epoch": 0.01, + "learning_rate": 9.999013447882735e-05, + "loss": 1.5396, + "step": 1350 + }, + { + "epoch": 0.01, + "learning_rate": 9.999005539943119e-05, + "loss": 1.5679, + "step": 1355 + }, + { + "epoch": 0.01, + "learning_rate": 9.998997600439077e-05, + "loss": 1.5439, + "step": 1360 + }, + { + "epoch": 0.01, + "learning_rate": 9.998989629370659e-05, + "loss": 1.52, + "step": 1365 + }, + { + "epoch": 0.01, + "learning_rate": 9.998981626737914e-05, + "loss": 1.558, + "step": 1370 + }, + { + "epoch": 0.01, + "learning_rate": 9.998973592540892e-05, + "loss": 1.4872, + "step": 1375 + }, + { + "epoch": 0.01, + "learning_rate": 9.998965526779647e-05, + "loss": 1.5766, + "step": 1380 + }, + { + "epoch": 0.01, + "learning_rate": 9.998957429454227e-05, + "loss": 1.5765, + "step": 1385 + }, + { + "epoch": 0.01, + "learning_rate": 9.998949300564684e-05, + "loss": 1.5404, + "step": 1390 + }, + { + "epoch": 0.01, + "learning_rate": 9.998941140111068e-05, + "loss": 1.55, + "step": 1395 + }, + { + "epoch": 0.01, + "learning_rate": 9.998932948093434e-05, + "loss": 1.5632, + "step": 1400 + }, + { + "epoch": 0.01, + "learning_rate": 9.99892472451183e-05, + "loss": 1.5557, + "step": 1405 + }, + { + "epoch": 0.01, + "learning_rate": 9.998916469366311e-05, + "loss": 1.5449, + "step": 1410 + }, + { + "epoch": 0.01, + "learning_rate": 9.998908182656925e-05, + "loss": 1.5139, + "step": 1415 + }, + { + "epoch": 0.01, + "learning_rate": 9.998899864383728e-05, + "loss": 1.5281, + "step": 1420 + }, + { + "epoch": 0.01, + "learning_rate": 9.998891514546773e-05, + "loss": 1.5736, + "step": 1425 + }, + { + "epoch": 0.01, + "learning_rate": 9.998883133146111e-05, + "loss": 1.5173, + "step": 1430 + }, + { + "epoch": 0.01, + "learning_rate": 9.998874720181795e-05, + "loss": 1.5877, + "step": 1435 + }, + { + "epoch": 0.01, + "learning_rate": 9.998866275653877e-05, + "loss": 1.5667, + "step": 1440 + }, + { + "epoch": 0.01, + "learning_rate": 9.99885779956241e-05, + "loss": 1.5054, + "step": 1445 + }, + { + "epoch": 0.01, + "learning_rate": 9.998849291907453e-05, + "loss": 1.556, + "step": 1450 + }, + { + "epoch": 0.01, + "learning_rate": 9.998840752689053e-05, + "loss": 1.5338, + "step": 1455 + }, + { + "epoch": 0.01, + "learning_rate": 9.998832181907267e-05, + "loss": 1.5424, + "step": 1460 + }, + { + "epoch": 0.01, + "learning_rate": 9.99882357956215e-05, + "loss": 1.5117, + "step": 1465 + }, + { + "epoch": 0.01, + "learning_rate": 9.998814945653754e-05, + "loss": 1.531, + "step": 1470 + }, + { + "epoch": 0.01, + "learning_rate": 9.998806280182135e-05, + "loss": 1.5302, + "step": 1475 + }, + { + "epoch": 0.01, + "learning_rate": 9.998797583147348e-05, + "loss": 1.5085, + "step": 1480 + }, + { + "epoch": 0.01, + "learning_rate": 9.998788854549447e-05, + "loss": 1.5074, + "step": 1485 + }, + { + "epoch": 0.01, + "learning_rate": 9.998780094388487e-05, + "loss": 1.5279, + "step": 1490 + }, + { + "epoch": 0.01, + "learning_rate": 9.998771302664524e-05, + "loss": 1.5998, + "step": 1495 + }, + { + "epoch": 0.01, + "learning_rate": 9.998762479377613e-05, + "loss": 1.5471, + "step": 1500 + }, + { + "epoch": 0.01, + "learning_rate": 9.99875362452781e-05, + "loss": 1.5421, + "step": 1505 + }, + { + "epoch": 0.01, + "learning_rate": 9.998744738115171e-05, + "loss": 1.5692, + "step": 1510 + }, + { + "epoch": 0.01, + "learning_rate": 9.99873582013975e-05, + "loss": 1.6098, + "step": 1515 + }, + { + "epoch": 0.01, + "learning_rate": 9.998726870601609e-05, + "loss": 1.5027, + "step": 1520 + }, + { + "epoch": 0.01, + "learning_rate": 9.998717889500798e-05, + "loss": 1.5163, + "step": 1525 + }, + { + "epoch": 0.01, + "learning_rate": 9.998708876837377e-05, + "loss": 1.5064, + "step": 1530 + }, + { + "epoch": 0.01, + "learning_rate": 9.998699832611403e-05, + "loss": 1.5468, + "step": 1535 + }, + { + "epoch": 0.01, + "learning_rate": 9.998690756822931e-05, + "loss": 1.5748, + "step": 1540 + }, + { + "epoch": 0.01, + "learning_rate": 9.998681649472021e-05, + "loss": 1.5112, + "step": 1545 + }, + { + "epoch": 0.01, + "learning_rate": 9.99867251055873e-05, + "loss": 1.5103, + "step": 1550 + }, + { + "epoch": 0.01, + "learning_rate": 9.998663340083115e-05, + "loss": 1.5412, + "step": 1555 + }, + { + "epoch": 0.01, + "learning_rate": 9.998654138045231e-05, + "loss": 1.5342, + "step": 1560 + }, + { + "epoch": 0.01, + "learning_rate": 9.998644904445143e-05, + "loss": 1.5451, + "step": 1565 + }, + { + "epoch": 0.01, + "learning_rate": 9.998635639282903e-05, + "loss": 1.5328, + "step": 1570 + }, + { + "epoch": 0.01, + "learning_rate": 9.998626342558571e-05, + "loss": 1.5288, + "step": 1575 + }, + { + "epoch": 0.01, + "learning_rate": 9.998617014272208e-05, + "loss": 1.5191, + "step": 1580 + }, + { + "epoch": 0.01, + "learning_rate": 9.998607654423871e-05, + "loss": 1.518, + "step": 1585 + }, + { + "epoch": 0.01, + "learning_rate": 9.99859826301362e-05, + "loss": 1.5382, + "step": 1590 + }, + { + "epoch": 0.01, + "learning_rate": 9.998588840041512e-05, + "loss": 1.5361, + "step": 1595 + }, + { + "epoch": 0.01, + "learning_rate": 9.99857938550761e-05, + "loss": 1.4716, + "step": 1600 + }, + { + "epoch": 0.01, + "learning_rate": 9.998569899411972e-05, + "loss": 1.5346, + "step": 1605 + }, + { + "epoch": 0.01, + "learning_rate": 9.998560381754658e-05, + "loss": 1.5335, + "step": 1610 + }, + { + "epoch": 0.01, + "learning_rate": 9.998550832535727e-05, + "loss": 1.571, + "step": 1615 + }, + { + "epoch": 0.01, + "learning_rate": 9.99854125175524e-05, + "loss": 1.5688, + "step": 1620 + }, + { + "epoch": 0.01, + "learning_rate": 9.99853163941326e-05, + "loss": 1.5505, + "step": 1625 + }, + { + "epoch": 0.01, + "learning_rate": 9.998521995509845e-05, + "loss": 1.5353, + "step": 1630 + }, + { + "epoch": 0.01, + "learning_rate": 9.998512320045055e-05, + "loss": 1.5067, + "step": 1635 + }, + { + "epoch": 0.01, + "learning_rate": 9.998502613018952e-05, + "loss": 1.5349, + "step": 1640 + }, + { + "epoch": 0.01, + "learning_rate": 9.998492874431599e-05, + "loss": 1.567, + "step": 1645 + }, + { + "epoch": 0.01, + "learning_rate": 9.998483104283056e-05, + "loss": 1.511, + "step": 1650 + }, + { + "epoch": 0.01, + "learning_rate": 9.998473302573385e-05, + "loss": 1.5579, + "step": 1655 + }, + { + "epoch": 0.01, + "learning_rate": 9.998463469302647e-05, + "loss": 1.5335, + "step": 1660 + }, + { + "epoch": 0.01, + "learning_rate": 9.998453604470905e-05, + "loss": 1.5173, + "step": 1665 + }, + { + "epoch": 0.01, + "learning_rate": 9.998443708078222e-05, + "loss": 1.5424, + "step": 1670 + }, + { + "epoch": 0.01, + "learning_rate": 9.99843378012466e-05, + "loss": 1.5325, + "step": 1675 + }, + { + "epoch": 0.01, + "learning_rate": 9.998423820610282e-05, + "loss": 1.5752, + "step": 1680 + }, + { + "epoch": 0.01, + "learning_rate": 9.998413829535147e-05, + "loss": 1.5561, + "step": 1685 + }, + { + "epoch": 0.01, + "learning_rate": 9.998403806899324e-05, + "loss": 1.5617, + "step": 1690 + }, + { + "epoch": 0.01, + "learning_rate": 9.998393752702873e-05, + "loss": 1.5346, + "step": 1695 + }, + { + "epoch": 0.01, + "learning_rate": 9.998383666945859e-05, + "loss": 1.5467, + "step": 1700 + }, + { + "epoch": 0.01, + "learning_rate": 9.998373549628343e-05, + "loss": 1.4466, + "step": 1705 + }, + { + "epoch": 0.01, + "learning_rate": 9.998363400750392e-05, + "loss": 1.5568, + "step": 1710 + }, + { + "epoch": 0.01, + "learning_rate": 9.998353220312069e-05, + "loss": 1.5046, + "step": 1715 + }, + { + "epoch": 0.01, + "learning_rate": 9.998343008313437e-05, + "loss": 1.5542, + "step": 1720 + }, + { + "epoch": 0.01, + "learning_rate": 9.99833276475456e-05, + "loss": 1.5236, + "step": 1725 + }, + { + "epoch": 0.01, + "learning_rate": 9.998322489635507e-05, + "loss": 1.4776, + "step": 1730 + }, + { + "epoch": 0.01, + "learning_rate": 9.99831218295634e-05, + "loss": 1.614, + "step": 1735 + }, + { + "epoch": 0.01, + "learning_rate": 9.998301844717123e-05, + "loss": 1.5401, + "step": 1740 + }, + { + "epoch": 0.01, + "learning_rate": 9.998291474917923e-05, + "loss": 1.515, + "step": 1745 + }, + { + "epoch": 0.01, + "learning_rate": 9.998281073558804e-05, + "loss": 1.5409, + "step": 1750 + }, + { + "epoch": 0.01, + "learning_rate": 9.998270640639833e-05, + "loss": 1.567, + "step": 1755 + }, + { + "epoch": 0.01, + "learning_rate": 9.998260176161076e-05, + "loss": 1.5215, + "step": 1760 + }, + { + "epoch": 0.01, + "learning_rate": 9.998249680122599e-05, + "loss": 1.5309, + "step": 1765 + }, + { + "epoch": 0.01, + "learning_rate": 9.998239152524467e-05, + "loss": 1.4966, + "step": 1770 + }, + { + "epoch": 0.01, + "learning_rate": 9.998228593366747e-05, + "loss": 1.5245, + "step": 1775 + }, + { + "epoch": 0.01, + "learning_rate": 9.998218002649506e-05, + "loss": 1.4797, + "step": 1780 + }, + { + "epoch": 0.01, + "learning_rate": 9.998207380372812e-05, + "loss": 1.5137, + "step": 1785 + }, + { + "epoch": 0.01, + "learning_rate": 9.99819672653673e-05, + "loss": 1.5058, + "step": 1790 + }, + { + "epoch": 0.01, + "learning_rate": 9.998186041141329e-05, + "loss": 1.6036, + "step": 1795 + }, + { + "epoch": 0.01, + "learning_rate": 9.998175324186674e-05, + "loss": 1.5146, + "step": 1800 + }, + { + "epoch": 0.01, + "learning_rate": 9.998164575672835e-05, + "loss": 1.5022, + "step": 1805 + }, + { + "epoch": 0.01, + "learning_rate": 9.998153795599879e-05, + "loss": 1.5059, + "step": 1810 + }, + { + "epoch": 0.01, + "learning_rate": 9.998142983967875e-05, + "loss": 1.5061, + "step": 1815 + }, + { + "epoch": 0.01, + "learning_rate": 9.99813214077689e-05, + "loss": 1.5005, + "step": 1820 + }, + { + "epoch": 0.01, + "learning_rate": 9.998121266026993e-05, + "loss": 1.5239, + "step": 1825 + }, + { + "epoch": 0.01, + "learning_rate": 9.998110359718253e-05, + "loss": 1.5135, + "step": 1830 + }, + { + "epoch": 0.01, + "learning_rate": 9.998099421850737e-05, + "loss": 1.547, + "step": 1835 + }, + { + "epoch": 0.01, + "learning_rate": 9.998088452424516e-05, + "loss": 1.5184, + "step": 1840 + }, + { + "epoch": 0.01, + "learning_rate": 9.99807745143966e-05, + "loss": 1.5437, + "step": 1845 + }, + { + "epoch": 0.01, + "learning_rate": 9.998066418896238e-05, + "loss": 1.5234, + "step": 1850 + }, + { + "epoch": 0.01, + "learning_rate": 9.998055354794316e-05, + "loss": 1.5954, + "step": 1855 + }, + { + "epoch": 0.01, + "learning_rate": 9.998044259133969e-05, + "loss": 1.5129, + "step": 1860 + }, + { + "epoch": 0.01, + "learning_rate": 9.998033131915266e-05, + "loss": 1.5716, + "step": 1865 + }, + { + "epoch": 0.01, + "learning_rate": 9.998021973138274e-05, + "loss": 1.4783, + "step": 1870 + }, + { + "epoch": 0.01, + "learning_rate": 9.998010782803066e-05, + "loss": 1.5515, + "step": 1875 + }, + { + "epoch": 0.01, + "learning_rate": 9.997999560909712e-05, + "loss": 1.543, + "step": 1880 + }, + { + "epoch": 0.01, + "learning_rate": 9.997988307458283e-05, + "loss": 1.4829, + "step": 1885 + }, + { + "epoch": 0.01, + "learning_rate": 9.99797702244885e-05, + "loss": 1.4991, + "step": 1890 + }, + { + "epoch": 0.01, + "learning_rate": 9.997965705881485e-05, + "loss": 1.5513, + "step": 1895 + }, + { + "epoch": 0.01, + "learning_rate": 9.99795435775626e-05, + "loss": 1.4615, + "step": 1900 + }, + { + "epoch": 0.01, + "learning_rate": 9.997942978073243e-05, + "loss": 1.5439, + "step": 1905 + }, + { + "epoch": 0.01, + "learning_rate": 9.99793156683251e-05, + "loss": 1.5779, + "step": 1910 + }, + { + "epoch": 0.01, + "learning_rate": 9.997920124034133e-05, + "loss": 1.5227, + "step": 1915 + }, + { + "epoch": 0.01, + "learning_rate": 9.99790864967818e-05, + "loss": 1.5125, + "step": 1920 + }, + { + "epoch": 0.01, + "learning_rate": 9.997897143764727e-05, + "loss": 1.4916, + "step": 1925 + }, + { + "epoch": 0.01, + "learning_rate": 9.997885606293844e-05, + "loss": 1.544, + "step": 1930 + }, + { + "epoch": 0.01, + "learning_rate": 9.997874037265608e-05, + "loss": 1.5055, + "step": 1935 + }, + { + "epoch": 0.01, + "learning_rate": 9.99786243668009e-05, + "loss": 1.4588, + "step": 1940 + }, + { + "epoch": 0.01, + "learning_rate": 9.99785080453736e-05, + "loss": 1.5419, + "step": 1945 + }, + { + "epoch": 0.01, + "learning_rate": 9.997839140837497e-05, + "loss": 1.5481, + "step": 1950 + }, + { + "epoch": 0.01, + "learning_rate": 9.997827445580572e-05, + "loss": 1.6031, + "step": 1955 + }, + { + "epoch": 0.01, + "learning_rate": 9.997815718766658e-05, + "loss": 1.5483, + "step": 1960 + }, + { + "epoch": 0.01, + "learning_rate": 9.99780396039583e-05, + "loss": 1.5523, + "step": 1965 + }, + { + "epoch": 0.01, + "learning_rate": 9.997792170468162e-05, + "loss": 1.541, + "step": 1970 + }, + { + "epoch": 0.01, + "learning_rate": 9.997780348983728e-05, + "loss": 1.5834, + "step": 1975 + }, + { + "epoch": 0.01, + "learning_rate": 9.997768495942605e-05, + "loss": 1.5626, + "step": 1980 + }, + { + "epoch": 0.01, + "learning_rate": 9.997756611344864e-05, + "loss": 1.5052, + "step": 1985 + }, + { + "epoch": 0.01, + "learning_rate": 9.997744695190583e-05, + "loss": 1.5214, + "step": 1990 + }, + { + "epoch": 0.01, + "learning_rate": 9.997732747479837e-05, + "loss": 1.496, + "step": 1995 + }, + { + "epoch": 0.01, + "learning_rate": 9.997720768212701e-05, + "loss": 1.5063, + "step": 2000 + }, + { + "epoch": 0.01, + "learning_rate": 9.99770875738925e-05, + "loss": 1.5716, + "step": 2005 + }, + { + "epoch": 0.01, + "learning_rate": 9.997696715009558e-05, + "loss": 1.491, + "step": 2010 + }, + { + "epoch": 0.01, + "learning_rate": 9.997684641073705e-05, + "loss": 1.5733, + "step": 2015 + }, + { + "epoch": 0.01, + "learning_rate": 9.997672535581768e-05, + "loss": 1.5072, + "step": 2020 + }, + { + "epoch": 0.01, + "learning_rate": 9.997660398533818e-05, + "loss": 1.5445, + "step": 2025 + }, + { + "epoch": 0.01, + "learning_rate": 9.997648229929935e-05, + "loss": 1.5033, + "step": 2030 + }, + { + "epoch": 0.01, + "learning_rate": 9.997636029770197e-05, + "loss": 1.5877, + "step": 2035 + }, + { + "epoch": 0.01, + "learning_rate": 9.997623798054679e-05, + "loss": 1.5401, + "step": 2040 + }, + { + "epoch": 0.01, + "learning_rate": 9.997611534783456e-05, + "loss": 1.518, + "step": 2045 + }, + { + "epoch": 0.01, + "learning_rate": 9.99759923995661e-05, + "loss": 1.5661, + "step": 2050 + }, + { + "epoch": 0.01, + "learning_rate": 9.997586913574217e-05, + "loss": 1.6024, + "step": 2055 + }, + { + "epoch": 0.01, + "learning_rate": 9.997574555636356e-05, + "loss": 1.5285, + "step": 2060 + }, + { + "epoch": 0.01, + "learning_rate": 9.997562166143102e-05, + "loss": 1.5251, + "step": 2065 + }, + { + "epoch": 0.01, + "learning_rate": 9.997549745094535e-05, + "loss": 1.513, + "step": 2070 + }, + { + "epoch": 0.01, + "learning_rate": 9.997537292490734e-05, + "loss": 1.5422, + "step": 2075 + }, + { + "epoch": 0.01, + "learning_rate": 9.997524808331775e-05, + "loss": 1.5595, + "step": 2080 + }, + { + "epoch": 0.01, + "learning_rate": 9.99751229261774e-05, + "loss": 1.6162, + "step": 2085 + }, + { + "epoch": 0.01, + "learning_rate": 9.997499745348708e-05, + "loss": 1.4848, + "step": 2090 + }, + { + "epoch": 0.01, + "learning_rate": 9.997487166524755e-05, + "loss": 1.5274, + "step": 2095 + }, + { + "epoch": 0.01, + "learning_rate": 9.997474556145963e-05, + "loss": 1.5433, + "step": 2100 + }, + { + "epoch": 0.01, + "learning_rate": 9.997461914212411e-05, + "loss": 1.5322, + "step": 2105 + }, + { + "epoch": 0.01, + "learning_rate": 9.997449240724179e-05, + "loss": 1.5405, + "step": 2110 + }, + { + "epoch": 0.01, + "learning_rate": 9.997436535681348e-05, + "loss": 1.5629, + "step": 2115 + }, + { + "epoch": 0.01, + "learning_rate": 9.997423799083995e-05, + "loss": 1.5265, + "step": 2120 + }, + { + "epoch": 0.01, + "learning_rate": 9.997411030932205e-05, + "loss": 1.5918, + "step": 2125 + }, + { + "epoch": 0.01, + "learning_rate": 9.997398231226055e-05, + "loss": 1.513, + "step": 2130 + }, + { + "epoch": 0.01, + "learning_rate": 9.997385399965627e-05, + "loss": 1.5206, + "step": 2135 + }, + { + "epoch": 0.01, + "learning_rate": 9.997372537151002e-05, + "loss": 1.5626, + "step": 2140 + }, + { + "epoch": 0.01, + "learning_rate": 9.99735964278226e-05, + "loss": 1.5216, + "step": 2145 + }, + { + "epoch": 0.01, + "learning_rate": 9.997346716859486e-05, + "loss": 1.4917, + "step": 2150 + }, + { + "epoch": 0.01, + "learning_rate": 9.997333759382757e-05, + "loss": 1.5318, + "step": 2155 + }, + { + "epoch": 0.01, + "learning_rate": 9.997320770352159e-05, + "loss": 1.5348, + "step": 2160 + }, + { + "epoch": 0.01, + "learning_rate": 9.997307749767771e-05, + "loss": 1.5397, + "step": 2165 + }, + { + "epoch": 0.01, + "learning_rate": 9.997294697629676e-05, + "loss": 1.5519, + "step": 2170 + }, + { + "epoch": 0.01, + "learning_rate": 9.997281613937956e-05, + "loss": 1.5524, + "step": 2175 + }, + { + "epoch": 0.01, + "learning_rate": 9.997268498692696e-05, + "loss": 1.5372, + "step": 2180 + }, + { + "epoch": 0.01, + "learning_rate": 9.997255351893976e-05, + "loss": 1.6459, + "step": 2185 + }, + { + "epoch": 0.01, + "learning_rate": 9.997242173541882e-05, + "loss": 1.5363, + "step": 2190 + }, + { + "epoch": 0.01, + "learning_rate": 9.997228963636494e-05, + "loss": 1.5172, + "step": 2195 + }, + { + "epoch": 0.01, + "learning_rate": 9.997215722177896e-05, + "loss": 1.5398, + "step": 2200 + }, + { + "epoch": 0.01, + "learning_rate": 9.997202449166172e-05, + "loss": 1.5388, + "step": 2205 + }, + { + "epoch": 0.01, + "learning_rate": 9.997189144601407e-05, + "loss": 1.5532, + "step": 2210 + }, + { + "epoch": 0.01, + "learning_rate": 9.997175808483686e-05, + "loss": 1.5264, + "step": 2215 + }, + { + "epoch": 0.01, + "learning_rate": 9.997162440813088e-05, + "loss": 1.5251, + "step": 2220 + }, + { + "epoch": 0.01, + "learning_rate": 9.997149041589704e-05, + "loss": 1.5257, + "step": 2225 + }, + { + "epoch": 0.01, + "learning_rate": 9.997135610813613e-05, + "loss": 1.4777, + "step": 2230 + }, + { + "epoch": 0.01, + "learning_rate": 9.997122148484902e-05, + "loss": 1.5145, + "step": 2235 + }, + { + "epoch": 0.01, + "learning_rate": 9.99710865460366e-05, + "loss": 1.5228, + "step": 2240 + }, + { + "epoch": 0.01, + "learning_rate": 9.997095129169965e-05, + "loss": 1.4485, + "step": 2245 + }, + { + "epoch": 0.01, + "learning_rate": 9.997081572183907e-05, + "loss": 1.5493, + "step": 2250 + }, + { + "epoch": 0.01, + "learning_rate": 9.997067983645569e-05, + "loss": 1.5696, + "step": 2255 + }, + { + "epoch": 0.01, + "learning_rate": 9.99705436355504e-05, + "loss": 1.5178, + "step": 2260 + }, + { + "epoch": 0.01, + "learning_rate": 9.997040711912402e-05, + "loss": 1.502, + "step": 2265 + }, + { + "epoch": 0.01, + "learning_rate": 9.997027028717745e-05, + "loss": 1.5349, + "step": 2270 + }, + { + "epoch": 0.01, + "learning_rate": 9.997013313971154e-05, + "loss": 1.5049, + "step": 2275 + }, + { + "epoch": 0.01, + "learning_rate": 9.996999567672716e-05, + "loss": 1.5232, + "step": 2280 + }, + { + "epoch": 0.01, + "learning_rate": 9.996985789822515e-05, + "loss": 1.5508, + "step": 2285 + }, + { + "epoch": 0.01, + "learning_rate": 9.996971980420642e-05, + "loss": 1.5511, + "step": 2290 + }, + { + "epoch": 0.01, + "learning_rate": 9.99695813946718e-05, + "loss": 1.5355, + "step": 2295 + }, + { + "epoch": 0.01, + "learning_rate": 9.996944266962222e-05, + "loss": 1.5373, + "step": 2300 + }, + { + "epoch": 0.01, + "learning_rate": 9.99693036290585e-05, + "loss": 1.563, + "step": 2305 + }, + { + "epoch": 0.01, + "learning_rate": 9.996916427298155e-05, + "loss": 1.5324, + "step": 2310 + }, + { + "epoch": 0.01, + "learning_rate": 9.996902460139226e-05, + "loss": 1.5672, + "step": 2315 + }, + { + "epoch": 0.01, + "learning_rate": 9.996888461429148e-05, + "loss": 1.5321, + "step": 2320 + }, + { + "epoch": 0.01, + "learning_rate": 9.996874431168008e-05, + "loss": 1.5211, + "step": 2325 + }, + { + "epoch": 0.01, + "learning_rate": 9.9968603693559e-05, + "loss": 1.4997, + "step": 2330 + }, + { + "epoch": 0.01, + "learning_rate": 9.99684627599291e-05, + "loss": 1.571, + "step": 2335 + }, + { + "epoch": 0.01, + "learning_rate": 9.996832151079127e-05, + "loss": 1.5507, + "step": 2340 + }, + { + "epoch": 0.01, + "learning_rate": 9.99681799461464e-05, + "loss": 1.5583, + "step": 2345 + }, + { + "epoch": 0.01, + "learning_rate": 9.99680380659954e-05, + "loss": 1.517, + "step": 2350 + }, + { + "epoch": 0.01, + "learning_rate": 9.996789587033912e-05, + "loss": 1.5097, + "step": 2355 + }, + { + "epoch": 0.01, + "learning_rate": 9.996775335917852e-05, + "loss": 1.5261, + "step": 2360 + }, + { + "epoch": 0.01, + "learning_rate": 9.996761053251446e-05, + "loss": 1.5335, + "step": 2365 + }, + { + "epoch": 0.01, + "learning_rate": 9.996746739034786e-05, + "loss": 1.5407, + "step": 2370 + }, + { + "epoch": 0.01, + "learning_rate": 9.99673239326796e-05, + "loss": 1.5252, + "step": 2375 + }, + { + "epoch": 0.01, + "learning_rate": 9.996718015951061e-05, + "loss": 1.4937, + "step": 2380 + }, + { + "epoch": 0.01, + "learning_rate": 9.996703607084179e-05, + "loss": 1.5121, + "step": 2385 + }, + { + "epoch": 0.01, + "learning_rate": 9.996689166667406e-05, + "loss": 1.4867, + "step": 2390 + }, + { + "epoch": 0.01, + "learning_rate": 9.99667469470083e-05, + "loss": 1.5516, + "step": 2395 + }, + { + "epoch": 0.01, + "learning_rate": 9.996660191184546e-05, + "loss": 1.5321, + "step": 2400 + }, + { + "epoch": 0.01, + "learning_rate": 9.996645656118644e-05, + "loss": 1.5535, + "step": 2405 + }, + { + "epoch": 0.01, + "learning_rate": 9.996631089503214e-05, + "loss": 1.5573, + "step": 2410 + }, + { + "epoch": 0.01, + "learning_rate": 9.996616491338352e-05, + "loss": 1.5682, + "step": 2415 + }, + { + "epoch": 0.01, + "learning_rate": 9.996601861624147e-05, + "loss": 1.5171, + "step": 2420 + }, + { + "epoch": 0.01, + "learning_rate": 9.996587200360692e-05, + "loss": 1.5273, + "step": 2425 + }, + { + "epoch": 0.01, + "learning_rate": 9.996572507548081e-05, + "loss": 1.4945, + "step": 2430 + }, + { + "epoch": 0.01, + "learning_rate": 9.996557783186406e-05, + "loss": 1.5208, + "step": 2435 + }, + { + "epoch": 0.01, + "learning_rate": 9.996543027275758e-05, + "loss": 1.529, + "step": 2440 + }, + { + "epoch": 0.01, + "learning_rate": 9.996528239816233e-05, + "loss": 1.4699, + "step": 2445 + }, + { + "epoch": 0.01, + "learning_rate": 9.996513420807923e-05, + "loss": 1.5693, + "step": 2450 + }, + { + "epoch": 0.01, + "learning_rate": 9.996498570250922e-05, + "loss": 1.477, + "step": 2455 + }, + { + "epoch": 0.01, + "learning_rate": 9.996483688145324e-05, + "loss": 1.454, + "step": 2460 + }, + { + "epoch": 0.01, + "learning_rate": 9.996468774491222e-05, + "loss": 1.507, + "step": 2465 + }, + { + "epoch": 0.01, + "learning_rate": 9.99645382928871e-05, + "loss": 1.5086, + "step": 2470 + }, + { + "epoch": 0.01, + "learning_rate": 9.996438852537884e-05, + "loss": 1.4753, + "step": 2475 + }, + { + "epoch": 0.01, + "learning_rate": 9.996423844238836e-05, + "loss": 1.5156, + "step": 2480 + }, + { + "epoch": 0.01, + "learning_rate": 9.996408804391663e-05, + "loss": 1.4741, + "step": 2485 + }, + { + "epoch": 0.01, + "learning_rate": 9.996393732996461e-05, + "loss": 1.5344, + "step": 2490 + }, + { + "epoch": 0.01, + "learning_rate": 9.996378630053322e-05, + "loss": 1.5078, + "step": 2495 + }, + { + "epoch": 0.01, + "learning_rate": 9.996363495562344e-05, + "loss": 1.5143, + "step": 2500 + }, + { + "epoch": 0.01, + "learning_rate": 9.996348329523622e-05, + "loss": 1.5224, + "step": 2505 + }, + { + "epoch": 0.01, + "learning_rate": 9.996333131937248e-05, + "loss": 1.5095, + "step": 2510 + }, + { + "epoch": 0.01, + "learning_rate": 9.996317902803323e-05, + "loss": 1.451, + "step": 2515 + }, + { + "epoch": 0.01, + "learning_rate": 9.996302642121942e-05, + "loss": 1.5299, + "step": 2520 + }, + { + "epoch": 0.01, + "learning_rate": 9.9962873498932e-05, + "loss": 1.5151, + "step": 2525 + }, + { + "epoch": 0.01, + "learning_rate": 9.996272026117196e-05, + "loss": 1.5189, + "step": 2530 + }, + { + "epoch": 0.01, + "learning_rate": 9.996256670794022e-05, + "loss": 1.5397, + "step": 2535 + }, + { + "epoch": 0.01, + "learning_rate": 9.99624128392378e-05, + "loss": 1.5134, + "step": 2540 + }, + { + "epoch": 0.01, + "learning_rate": 9.996225865506564e-05, + "loss": 1.5024, + "step": 2545 + }, + { + "epoch": 0.01, + "learning_rate": 9.996210415542473e-05, + "loss": 1.5519, + "step": 2550 + }, + { + "epoch": 0.01, + "learning_rate": 9.996194934031604e-05, + "loss": 1.5404, + "step": 2555 + }, + { + "epoch": 0.01, + "learning_rate": 9.996179420974055e-05, + "loss": 1.4881, + "step": 2560 + }, + { + "epoch": 0.01, + "learning_rate": 9.996163876369924e-05, + "loss": 1.5769, + "step": 2565 + }, + { + "epoch": 0.01, + "learning_rate": 9.996148300219308e-05, + "loss": 1.5814, + "step": 2570 + }, + { + "epoch": 0.01, + "learning_rate": 9.996132692522306e-05, + "loss": 1.4895, + "step": 2575 + }, + { + "epoch": 0.01, + "learning_rate": 9.996117053279017e-05, + "loss": 1.5461, + "step": 2580 + }, + { + "epoch": 0.01, + "learning_rate": 9.99610138248954e-05, + "loss": 1.5383, + "step": 2585 + }, + { + "epoch": 0.01, + "learning_rate": 9.996085680153974e-05, + "loss": 1.5121, + "step": 2590 + }, + { + "epoch": 0.01, + "learning_rate": 9.996069946272416e-05, + "loss": 1.5801, + "step": 2595 + }, + { + "epoch": 0.01, + "learning_rate": 9.996054180844968e-05, + "loss": 1.5253, + "step": 2600 + }, + { + "epoch": 0.01, + "learning_rate": 9.996038383871729e-05, + "loss": 1.5258, + "step": 2605 + }, + { + "epoch": 0.01, + "learning_rate": 9.996022555352797e-05, + "loss": 1.5068, + "step": 2610 + }, + { + "epoch": 0.01, + "learning_rate": 9.996006695288273e-05, + "loss": 1.5297, + "step": 2615 + }, + { + "epoch": 0.01, + "learning_rate": 9.995990803678259e-05, + "loss": 1.5454, + "step": 2620 + }, + { + "epoch": 0.01, + "learning_rate": 9.995974880522853e-05, + "loss": 1.5164, + "step": 2625 + }, + { + "epoch": 0.01, + "learning_rate": 9.995958925822156e-05, + "loss": 1.5505, + "step": 2630 + }, + { + "epoch": 0.01, + "learning_rate": 9.995942939576268e-05, + "loss": 1.5168, + "step": 2635 + }, + { + "epoch": 0.01, + "learning_rate": 9.995926921785292e-05, + "loss": 1.5132, + "step": 2640 + }, + { + "epoch": 0.01, + "learning_rate": 9.995910872449327e-05, + "loss": 1.4964, + "step": 2645 + }, + { + "epoch": 0.01, + "learning_rate": 9.995894791568477e-05, + "loss": 1.5126, + "step": 2650 + }, + { + "epoch": 0.01, + "learning_rate": 9.99587867914284e-05, + "loss": 1.4778, + "step": 2655 + }, + { + "epoch": 0.01, + "learning_rate": 9.995862535172522e-05, + "loss": 1.5032, + "step": 2660 + }, + { + "epoch": 0.01, + "learning_rate": 9.995846359657622e-05, + "loss": 1.494, + "step": 2665 + }, + { + "epoch": 0.01, + "learning_rate": 9.995830152598241e-05, + "loss": 1.5445, + "step": 2670 + }, + { + "epoch": 0.01, + "learning_rate": 9.995813913994484e-05, + "loss": 1.5408, + "step": 2675 + }, + { + "epoch": 0.01, + "learning_rate": 9.995797643846453e-05, + "loss": 1.4772, + "step": 2680 + }, + { + "epoch": 0.01, + "learning_rate": 9.995781342154249e-05, + "loss": 1.5315, + "step": 2685 + }, + { + "epoch": 0.01, + "learning_rate": 9.995765008917977e-05, + "loss": 1.5888, + "step": 2690 + }, + { + "epoch": 0.01, + "learning_rate": 9.99574864413774e-05, + "loss": 1.4974, + "step": 2695 + }, + { + "epoch": 0.01, + "learning_rate": 9.99573224781364e-05, + "loss": 1.509, + "step": 2700 + }, + { + "epoch": 0.01, + "learning_rate": 9.995715819945783e-05, + "loss": 1.5193, + "step": 2705 + }, + { + "epoch": 0.01, + "learning_rate": 9.995699360534269e-05, + "loss": 1.528, + "step": 2710 + }, + { + "epoch": 0.01, + "learning_rate": 9.995682869579203e-05, + "loss": 1.5071, + "step": 2715 + }, + { + "epoch": 0.01, + "learning_rate": 9.995666347080692e-05, + "loss": 1.494, + "step": 2720 + }, + { + "epoch": 0.01, + "learning_rate": 9.995649793038837e-05, + "loss": 1.512, + "step": 2725 + }, + { + "epoch": 0.01, + "learning_rate": 9.995633207453745e-05, + "loss": 1.5239, + "step": 2730 + }, + { + "epoch": 0.01, + "learning_rate": 9.99561659032552e-05, + "loss": 1.5525, + "step": 2735 + }, + { + "epoch": 0.01, + "learning_rate": 9.995599941654266e-05, + "loss": 1.5474, + "step": 2740 + }, + { + "epoch": 0.01, + "learning_rate": 9.995583261440087e-05, + "loss": 1.557, + "step": 2745 + }, + { + "epoch": 0.01, + "learning_rate": 9.99556654968309e-05, + "loss": 1.5016, + "step": 2750 + }, + { + "epoch": 0.01, + "learning_rate": 9.995549806383383e-05, + "loss": 1.5156, + "step": 2755 + }, + { + "epoch": 0.01, + "learning_rate": 9.995533031541067e-05, + "loss": 1.4697, + "step": 2760 + }, + { + "epoch": 0.01, + "learning_rate": 9.995516225156251e-05, + "loss": 1.492, + "step": 2765 + }, + { + "epoch": 0.01, + "learning_rate": 9.995499387229041e-05, + "loss": 1.542, + "step": 2770 + }, + { + "epoch": 0.01, + "learning_rate": 9.995482517759543e-05, + "loss": 1.5223, + "step": 2775 + }, + { + "epoch": 0.01, + "learning_rate": 9.995465616747861e-05, + "loss": 1.5568, + "step": 2780 + }, + { + "epoch": 0.01, + "learning_rate": 9.995448684194104e-05, + "loss": 1.5111, + "step": 2785 + }, + { + "epoch": 0.01, + "learning_rate": 9.99543172009838e-05, + "loss": 1.5416, + "step": 2790 + }, + { + "epoch": 0.01, + "learning_rate": 9.995414724460793e-05, + "loss": 1.4793, + "step": 2795 + }, + { + "epoch": 0.01, + "learning_rate": 9.995397697281453e-05, + "loss": 1.4842, + "step": 2800 + }, + { + "epoch": 0.01, + "learning_rate": 9.995380638560468e-05, + "loss": 1.4727, + "step": 2805 + }, + { + "epoch": 0.01, + "learning_rate": 9.995363548297943e-05, + "loss": 1.5372, + "step": 2810 + }, + { + "epoch": 0.01, + "learning_rate": 9.995346426493987e-05, + "loss": 1.486, + "step": 2815 + }, + { + "epoch": 0.01, + "learning_rate": 9.995329273148708e-05, + "loss": 1.5398, + "step": 2820 + }, + { + "epoch": 0.01, + "learning_rate": 9.995312088262216e-05, + "loss": 1.506, + "step": 2825 + }, + { + "epoch": 0.01, + "learning_rate": 9.995294871834617e-05, + "loss": 1.5192, + "step": 2830 + }, + { + "epoch": 0.01, + "learning_rate": 9.995277623866022e-05, + "loss": 1.5395, + "step": 2835 + }, + { + "epoch": 0.01, + "learning_rate": 9.995260344356539e-05, + "loss": 1.5055, + "step": 2840 + }, + { + "epoch": 0.01, + "learning_rate": 9.995243033306276e-05, + "loss": 1.5103, + "step": 2845 + }, + { + "epoch": 0.01, + "learning_rate": 9.995225690715344e-05, + "loss": 1.4474, + "step": 2850 + }, + { + "epoch": 0.01, + "learning_rate": 9.995208316583851e-05, + "loss": 1.5004, + "step": 2855 + }, + { + "epoch": 0.01, + "learning_rate": 9.995190910911907e-05, + "loss": 1.4629, + "step": 2860 + }, + { + "epoch": 0.01, + "learning_rate": 9.995173473699621e-05, + "loss": 1.5233, + "step": 2865 + }, + { + "epoch": 0.01, + "learning_rate": 9.995156004947107e-05, + "loss": 1.4981, + "step": 2870 + }, + { + "epoch": 0.01, + "learning_rate": 9.995138504654472e-05, + "loss": 1.4929, + "step": 2875 + }, + { + "epoch": 0.01, + "learning_rate": 9.995120972821828e-05, + "loss": 1.505, + "step": 2880 + }, + { + "epoch": 0.01, + "learning_rate": 9.995103409449282e-05, + "loss": 1.5656, + "step": 2885 + }, + { + "epoch": 0.01, + "learning_rate": 9.99508581453695e-05, + "loss": 1.5047, + "step": 2890 + }, + { + "epoch": 0.01, + "learning_rate": 9.995068188084942e-05, + "loss": 1.4868, + "step": 2895 + }, + { + "epoch": 0.01, + "learning_rate": 9.995050530093367e-05, + "loss": 1.5294, + "step": 2900 + }, + { + "epoch": 0.01, + "learning_rate": 9.995032840562338e-05, + "loss": 1.5434, + "step": 2905 + }, + { + "epoch": 0.01, + "learning_rate": 9.995015119491965e-05, + "loss": 1.533, + "step": 2910 + }, + { + "epoch": 0.01, + "learning_rate": 9.994997366882361e-05, + "loss": 1.5523, + "step": 2915 + }, + { + "epoch": 0.01, + "learning_rate": 9.994979582733642e-05, + "loss": 1.5377, + "step": 2920 + }, + { + "epoch": 0.01, + "learning_rate": 9.994961767045913e-05, + "loss": 1.4969, + "step": 2925 + }, + { + "epoch": 0.01, + "learning_rate": 9.994943919819291e-05, + "loss": 1.5979, + "step": 2930 + }, + { + "epoch": 0.01, + "learning_rate": 9.994926041053889e-05, + "loss": 1.5196, + "step": 2935 + }, + { + "epoch": 0.01, + "learning_rate": 9.994908130749818e-05, + "loss": 1.5421, + "step": 2940 + }, + { + "epoch": 0.01, + "learning_rate": 9.994890188907191e-05, + "loss": 1.5627, + "step": 2945 + }, + { + "epoch": 0.01, + "learning_rate": 9.994872215526124e-05, + "loss": 1.5439, + "step": 2950 + }, + { + "epoch": 0.01, + "learning_rate": 9.994854210606728e-05, + "loss": 1.5637, + "step": 2955 + }, + { + "epoch": 0.01, + "learning_rate": 9.994836174149116e-05, + "loss": 1.465, + "step": 2960 + }, + { + "epoch": 0.01, + "learning_rate": 9.994818106153402e-05, + "loss": 1.503, + "step": 2965 + }, + { + "epoch": 0.02, + "learning_rate": 9.994800006619705e-05, + "loss": 1.5463, + "step": 2970 + }, + { + "epoch": 0.02, + "learning_rate": 9.994781875548134e-05, + "loss": 1.5887, + "step": 2975 + }, + { + "epoch": 0.02, + "learning_rate": 9.994763712938804e-05, + "loss": 1.538, + "step": 2980 + }, + { + "epoch": 0.02, + "learning_rate": 9.994745518791832e-05, + "loss": 1.4573, + "step": 2985 + }, + { + "epoch": 0.02, + "learning_rate": 9.994727293107331e-05, + "loss": 1.5372, + "step": 2990 + }, + { + "epoch": 0.02, + "learning_rate": 9.994709035885417e-05, + "loss": 1.5234, + "step": 2995 + }, + { + "epoch": 0.02, + "learning_rate": 9.994690747126204e-05, + "loss": 1.4949, + "step": 3000 + }, + { + "epoch": 0.02, + "learning_rate": 9.99467242682981e-05, + "loss": 1.5092, + "step": 3005 + }, + { + "epoch": 0.02, + "learning_rate": 9.994654074996348e-05, + "loss": 1.5308, + "step": 3010 + }, + { + "epoch": 0.02, + "learning_rate": 9.994635691625935e-05, + "loss": 1.5072, + "step": 3015 + }, + { + "epoch": 0.02, + "learning_rate": 9.994617276718686e-05, + "loss": 1.4754, + "step": 3020 + }, + { + "epoch": 0.02, + "learning_rate": 9.99459883027472e-05, + "loss": 1.4705, + "step": 3025 + }, + { + "epoch": 0.02, + "learning_rate": 9.994580352294152e-05, + "loss": 1.5202, + "step": 3030 + }, + { + "epoch": 0.02, + "learning_rate": 9.994561842777097e-05, + "loss": 1.5326, + "step": 3035 + }, + { + "epoch": 0.02, + "learning_rate": 9.994543301723674e-05, + "loss": 1.4853, + "step": 3040 + }, + { + "epoch": 0.02, + "learning_rate": 9.994524729134e-05, + "loss": 1.5118, + "step": 3045 + }, + { + "epoch": 0.02, + "learning_rate": 9.994506125008189e-05, + "loss": 1.4941, + "step": 3050 + }, + { + "epoch": 0.02, + "learning_rate": 9.994487489346364e-05, + "loss": 1.5047, + "step": 3055 + }, + { + "epoch": 0.02, + "learning_rate": 9.99446882214864e-05, + "loss": 1.5304, + "step": 3060 + }, + { + "epoch": 0.02, + "learning_rate": 9.994450123415133e-05, + "loss": 1.494, + "step": 3065 + }, + { + "epoch": 0.02, + "learning_rate": 9.994431393145965e-05, + "loss": 1.4773, + "step": 3070 + }, + { + "epoch": 0.02, + "learning_rate": 9.994412631341249e-05, + "loss": 1.5026, + "step": 3075 + }, + { + "epoch": 0.02, + "learning_rate": 9.994393838001108e-05, + "loss": 1.5637, + "step": 3080 + }, + { + "epoch": 0.02, + "learning_rate": 9.994375013125659e-05, + "loss": 1.5263, + "step": 3085 + }, + { + "epoch": 0.02, + "learning_rate": 9.994356156715022e-05, + "loss": 1.5137, + "step": 3090 + }, + { + "epoch": 0.02, + "learning_rate": 9.994337268769314e-05, + "loss": 1.5198, + "step": 3095 + }, + { + "epoch": 0.02, + "learning_rate": 9.994318349288655e-05, + "loss": 1.518, + "step": 3100 + }, + { + "epoch": 0.02, + "learning_rate": 9.994299398273165e-05, + "loss": 1.5231, + "step": 3105 + }, + { + "epoch": 0.02, + "learning_rate": 9.994280415722963e-05, + "loss": 1.5613, + "step": 3110 + }, + { + "epoch": 0.02, + "learning_rate": 9.994261401638171e-05, + "loss": 1.4401, + "step": 3115 + }, + { + "epoch": 0.02, + "learning_rate": 9.994242356018905e-05, + "loss": 1.4348, + "step": 3120 + }, + { + "epoch": 0.02, + "learning_rate": 9.99422327886529e-05, + "loss": 1.5746, + "step": 3125 + }, + { + "epoch": 0.02, + "learning_rate": 9.994204170177444e-05, + "loss": 1.5122, + "step": 3130 + }, + { + "epoch": 0.02, + "learning_rate": 9.994185029955486e-05, + "loss": 1.518, + "step": 3135 + }, + { + "epoch": 0.02, + "learning_rate": 9.994165858199539e-05, + "loss": 1.4871, + "step": 3140 + }, + { + "epoch": 0.02, + "learning_rate": 9.994146654909725e-05, + "loss": 1.4953, + "step": 3145 + }, + { + "epoch": 0.02, + "learning_rate": 9.994127420086161e-05, + "loss": 1.5215, + "step": 3150 + }, + { + "epoch": 0.02, + "learning_rate": 9.994108153728973e-05, + "loss": 1.4941, + "step": 3155 + }, + { + "epoch": 0.02, + "learning_rate": 9.994088855838282e-05, + "loss": 1.5802, + "step": 3160 + }, + { + "epoch": 0.02, + "learning_rate": 9.994069526414208e-05, + "loss": 1.5252, + "step": 3165 + }, + { + "epoch": 0.02, + "learning_rate": 9.994050165456874e-05, + "loss": 1.559, + "step": 3170 + }, + { + "epoch": 0.02, + "learning_rate": 9.994030772966403e-05, + "loss": 1.5166, + "step": 3175 + }, + { + "epoch": 0.02, + "learning_rate": 9.994011348942915e-05, + "loss": 1.4929, + "step": 3180 + }, + { + "epoch": 0.02, + "learning_rate": 9.993991893386534e-05, + "loss": 1.5256, + "step": 3185 + }, + { + "epoch": 0.02, + "learning_rate": 9.993972406297385e-05, + "loss": 1.49, + "step": 3190 + }, + { + "epoch": 0.02, + "learning_rate": 9.993952887675587e-05, + "loss": 1.555, + "step": 3195 + }, + { + "epoch": 0.02, + "learning_rate": 9.993933337521267e-05, + "loss": 1.4504, + "step": 3200 + }, + { + "epoch": 0.02, + "learning_rate": 9.993913755834546e-05, + "loss": 1.5377, + "step": 3205 + }, + { + "epoch": 0.02, + "learning_rate": 9.993894142615549e-05, + "loss": 1.5529, + "step": 3210 + }, + { + "epoch": 0.02, + "learning_rate": 9.993874497864399e-05, + "loss": 1.4826, + "step": 3215 + }, + { + "epoch": 0.02, + "learning_rate": 9.99385482158122e-05, + "loss": 1.5276, + "step": 3220 + }, + { + "epoch": 0.02, + "learning_rate": 9.993835113766136e-05, + "loss": 1.4329, + "step": 3225 + }, + { + "epoch": 0.02, + "learning_rate": 9.993815374419273e-05, + "loss": 1.5641, + "step": 3230 + }, + { + "epoch": 0.02, + "learning_rate": 9.993795603540754e-05, + "loss": 1.5104, + "step": 3235 + }, + { + "epoch": 0.02, + "learning_rate": 9.993775801130706e-05, + "loss": 1.5004, + "step": 3240 + }, + { + "epoch": 0.02, + "learning_rate": 9.99375596718925e-05, + "loss": 1.4776, + "step": 3245 + }, + { + "epoch": 0.02, + "learning_rate": 9.993736101716517e-05, + "loss": 1.4893, + "step": 3250 + }, + { + "epoch": 0.02, + "learning_rate": 9.993716204712626e-05, + "loss": 1.5128, + "step": 3255 + }, + { + "epoch": 0.02, + "learning_rate": 9.993696276177708e-05, + "loss": 1.5456, + "step": 3260 + }, + { + "epoch": 0.02, + "learning_rate": 9.993676316111887e-05, + "loss": 1.4825, + "step": 3265 + }, + { + "epoch": 0.02, + "learning_rate": 9.993656324515286e-05, + "loss": 1.5172, + "step": 3270 + }, + { + "epoch": 0.02, + "learning_rate": 9.993636301388036e-05, + "loss": 1.5193, + "step": 3275 + }, + { + "epoch": 0.02, + "learning_rate": 9.993616246730259e-05, + "loss": 1.5517, + "step": 3280 + }, + { + "epoch": 0.02, + "learning_rate": 9.993596160542087e-05, + "loss": 1.5099, + "step": 3285 + }, + { + "epoch": 0.02, + "learning_rate": 9.993576042823642e-05, + "loss": 1.4761, + "step": 3290 + }, + { + "epoch": 0.02, + "learning_rate": 9.993555893575055e-05, + "loss": 1.5321, + "step": 3295 + }, + { + "epoch": 0.02, + "learning_rate": 9.993535712796447e-05, + "loss": 1.5451, + "step": 3300 + }, + { + "epoch": 0.02, + "learning_rate": 9.993515500487954e-05, + "loss": 1.5722, + "step": 3305 + }, + { + "epoch": 0.02, + "learning_rate": 9.993495256649695e-05, + "loss": 1.4602, + "step": 3310 + }, + { + "epoch": 0.02, + "learning_rate": 9.993474981281805e-05, + "loss": 1.5073, + "step": 3315 + }, + { + "epoch": 0.02, + "learning_rate": 9.993454674384407e-05, + "loss": 1.5392, + "step": 3320 + }, + { + "epoch": 0.02, + "learning_rate": 9.993434335957632e-05, + "loss": 1.5624, + "step": 3325 + }, + { + "epoch": 0.02, + "learning_rate": 9.993413966001608e-05, + "loss": 1.5157, + "step": 3330 + }, + { + "epoch": 0.02, + "learning_rate": 9.993393564516462e-05, + "loss": 1.5243, + "step": 3335 + }, + { + "epoch": 0.02, + "learning_rate": 9.993373131502324e-05, + "loss": 1.5236, + "step": 3340 + }, + { + "epoch": 0.02, + "learning_rate": 9.993352666959324e-05, + "loss": 1.5148, + "step": 3345 + }, + { + "epoch": 0.02, + "learning_rate": 9.993332170887591e-05, + "loss": 1.5247, + "step": 3350 + }, + { + "epoch": 0.02, + "learning_rate": 9.99331164328725e-05, + "loss": 1.4501, + "step": 3355 + }, + { + "epoch": 0.02, + "learning_rate": 9.993291084158438e-05, + "loss": 1.5403, + "step": 3360 + }, + { + "epoch": 0.02, + "learning_rate": 9.99327049350128e-05, + "loss": 1.4707, + "step": 3365 + }, + { + "epoch": 0.02, + "learning_rate": 9.993249871315906e-05, + "loss": 1.4824, + "step": 3370 + }, + { + "epoch": 0.02, + "learning_rate": 9.99322921760245e-05, + "loss": 1.4943, + "step": 3375 + }, + { + "epoch": 0.02, + "learning_rate": 9.993208532361036e-05, + "loss": 1.5315, + "step": 3380 + }, + { + "epoch": 0.02, + "learning_rate": 9.993187815591801e-05, + "loss": 1.5344, + "step": 3385 + }, + { + "epoch": 0.02, + "learning_rate": 9.993167067294873e-05, + "loss": 1.5218, + "step": 3390 + }, + { + "epoch": 0.02, + "learning_rate": 9.993146287470383e-05, + "loss": 1.5238, + "step": 3395 + }, + { + "epoch": 0.02, + "learning_rate": 9.993125476118462e-05, + "loss": 1.5105, + "step": 3400 + }, + { + "epoch": 0.02, + "learning_rate": 9.99310463323924e-05, + "loss": 1.5031, + "step": 3405 + }, + { + "epoch": 0.02, + "learning_rate": 9.993083758832853e-05, + "loss": 1.5065, + "step": 3410 + }, + { + "epoch": 0.02, + "learning_rate": 9.993062852899429e-05, + "loss": 1.4841, + "step": 3415 + }, + { + "epoch": 0.02, + "learning_rate": 9.9930419154391e-05, + "loss": 1.5589, + "step": 3420 + }, + { + "epoch": 0.02, + "learning_rate": 9.993020946452002e-05, + "loss": 1.4913, + "step": 3425 + }, + { + "epoch": 0.02, + "learning_rate": 9.992999945938264e-05, + "loss": 1.516, + "step": 3430 + }, + { + "epoch": 0.02, + "learning_rate": 9.99297891389802e-05, + "loss": 1.5157, + "step": 3435 + }, + { + "epoch": 0.02, + "learning_rate": 9.992957850331399e-05, + "loss": 1.5128, + "step": 3440 + }, + { + "epoch": 0.02, + "learning_rate": 9.99293675523854e-05, + "loss": 1.5151, + "step": 3445 + }, + { + "epoch": 0.02, + "learning_rate": 9.992915628619572e-05, + "loss": 1.4946, + "step": 3450 + }, + { + "epoch": 0.02, + "learning_rate": 9.99289447047463e-05, + "loss": 1.4794, + "step": 3455 + }, + { + "epoch": 0.02, + "learning_rate": 9.992873280803849e-05, + "loss": 1.4567, + "step": 3460 + }, + { + "epoch": 0.02, + "learning_rate": 9.992852059607358e-05, + "loss": 1.5259, + "step": 3465 + }, + { + "epoch": 0.02, + "learning_rate": 9.992830806885296e-05, + "loss": 1.5298, + "step": 3470 + }, + { + "epoch": 0.02, + "learning_rate": 9.992809522637795e-05, + "loss": 1.5511, + "step": 3475 + }, + { + "epoch": 0.02, + "learning_rate": 9.992788206864991e-05, + "loss": 1.5268, + "step": 3480 + }, + { + "epoch": 0.02, + "learning_rate": 9.992766859567017e-05, + "loss": 1.4739, + "step": 3485 + }, + { + "epoch": 0.02, + "learning_rate": 9.992745480744008e-05, + "loss": 1.4906, + "step": 3490 + }, + { + "epoch": 0.02, + "learning_rate": 9.992724070396098e-05, + "loss": 1.5345, + "step": 3495 + }, + { + "epoch": 0.02, + "learning_rate": 9.992702628523422e-05, + "loss": 1.4536, + "step": 3500 + }, + { + "epoch": 0.02, + "learning_rate": 9.99268115512612e-05, + "loss": 1.4975, + "step": 3505 + }, + { + "epoch": 0.02, + "learning_rate": 9.992659650204323e-05, + "loss": 1.5348, + "step": 3510 + }, + { + "epoch": 0.02, + "learning_rate": 9.992638113758168e-05, + "loss": 1.5162, + "step": 3515 + }, + { + "epoch": 0.02, + "learning_rate": 9.99261654578779e-05, + "loss": 1.5277, + "step": 3520 + }, + { + "epoch": 0.02, + "learning_rate": 9.992594946293327e-05, + "loss": 1.5186, + "step": 3525 + }, + { + "epoch": 0.02, + "learning_rate": 9.992573315274914e-05, + "loss": 1.4776, + "step": 3530 + }, + { + "epoch": 0.02, + "learning_rate": 9.99255165273269e-05, + "loss": 1.502, + "step": 3535 + }, + { + "epoch": 0.02, + "learning_rate": 9.992529958666788e-05, + "loss": 1.5049, + "step": 3540 + }, + { + "epoch": 0.02, + "learning_rate": 9.992508233077348e-05, + "loss": 1.512, + "step": 3545 + }, + { + "epoch": 0.02, + "learning_rate": 9.992486475964506e-05, + "loss": 1.5675, + "step": 3550 + }, + { + "epoch": 0.02, + "learning_rate": 9.9924646873284e-05, + "loss": 1.5572, + "step": 3555 + }, + { + "epoch": 0.02, + "learning_rate": 9.992442867169165e-05, + "loss": 1.5045, + "step": 3560 + }, + { + "epoch": 0.02, + "learning_rate": 9.992421015486943e-05, + "loss": 1.5, + "step": 3565 + }, + { + "epoch": 0.02, + "learning_rate": 9.992399132281869e-05, + "loss": 1.5228, + "step": 3570 + }, + { + "epoch": 0.02, + "learning_rate": 9.992377217554082e-05, + "loss": 1.516, + "step": 3575 + }, + { + "epoch": 0.02, + "learning_rate": 9.992355271303719e-05, + "loss": 1.4356, + "step": 3580 + }, + { + "epoch": 0.02, + "learning_rate": 9.992333293530922e-05, + "loss": 1.532, + "step": 3585 + }, + { + "epoch": 0.02, + "learning_rate": 9.992311284235827e-05, + "loss": 1.5011, + "step": 3590 + }, + { + "epoch": 0.02, + "learning_rate": 9.992289243418574e-05, + "loss": 1.4729, + "step": 3595 + }, + { + "epoch": 0.02, + "learning_rate": 9.992267171079301e-05, + "loss": 1.4935, + "step": 3600 + }, + { + "epoch": 0.02, + "learning_rate": 9.99224506721815e-05, + "loss": 1.5192, + "step": 3605 + }, + { + "epoch": 0.02, + "learning_rate": 9.992222931835258e-05, + "loss": 1.5306, + "step": 3610 + }, + { + "epoch": 0.02, + "learning_rate": 9.992200764930763e-05, + "loss": 1.5288, + "step": 3615 + }, + { + "epoch": 0.02, + "learning_rate": 9.992178566504811e-05, + "loss": 1.5358, + "step": 3620 + }, + { + "epoch": 0.02, + "learning_rate": 9.992156336557536e-05, + "loss": 1.5034, + "step": 3625 + }, + { + "epoch": 0.02, + "learning_rate": 9.992134075089084e-05, + "loss": 1.4965, + "step": 3630 + }, + { + "epoch": 0.02, + "learning_rate": 9.99211178209959e-05, + "loss": 1.5002, + "step": 3635 + }, + { + "epoch": 0.02, + "learning_rate": 9.992089457589198e-05, + "loss": 1.4796, + "step": 3640 + }, + { + "epoch": 0.02, + "learning_rate": 9.99206710155805e-05, + "loss": 1.5193, + "step": 3645 + }, + { + "epoch": 0.02, + "learning_rate": 9.992044714006283e-05, + "loss": 1.5129, + "step": 3650 + }, + { + "epoch": 0.02, + "learning_rate": 9.992022294934042e-05, + "loss": 1.4832, + "step": 3655 + }, + { + "epoch": 0.02, + "learning_rate": 9.991999844341467e-05, + "loss": 1.4397, + "step": 3660 + }, + { + "epoch": 0.02, + "learning_rate": 9.9919773622287e-05, + "loss": 1.4875, + "step": 3665 + }, + { + "epoch": 0.02, + "learning_rate": 9.991954848595883e-05, + "loss": 1.5253, + "step": 3670 + }, + { + "epoch": 0.02, + "learning_rate": 9.99193230344316e-05, + "loss": 1.4675, + "step": 3675 + }, + { + "epoch": 0.02, + "learning_rate": 9.991909726770671e-05, + "loss": 1.5533, + "step": 3680 + }, + { + "epoch": 0.02, + "learning_rate": 9.991887118578558e-05, + "loss": 1.5269, + "step": 3685 + }, + { + "epoch": 0.02, + "learning_rate": 9.991864478866966e-05, + "loss": 1.4913, + "step": 3690 + }, + { + "epoch": 0.02, + "learning_rate": 9.991841807636036e-05, + "loss": 1.4657, + "step": 3695 + }, + { + "epoch": 0.02, + "learning_rate": 9.991819104885912e-05, + "loss": 1.5898, + "step": 3700 + }, + { + "epoch": 0.02, + "learning_rate": 9.991796370616738e-05, + "loss": 1.471, + "step": 3705 + }, + { + "epoch": 0.02, + "learning_rate": 9.991773604828657e-05, + "loss": 1.5591, + "step": 3710 + }, + { + "epoch": 0.02, + "learning_rate": 9.991750807521811e-05, + "loss": 1.5143, + "step": 3715 + }, + { + "epoch": 0.02, + "learning_rate": 9.991727978696348e-05, + "loss": 1.5713, + "step": 3720 + }, + { + "epoch": 0.02, + "learning_rate": 9.991705118352408e-05, + "loss": 1.4789, + "step": 3725 + }, + { + "epoch": 0.02, + "learning_rate": 9.991682226490137e-05, + "loss": 1.4906, + "step": 3730 + }, + { + "epoch": 0.02, + "learning_rate": 9.991659303109681e-05, + "loss": 1.5641, + "step": 3735 + }, + { + "epoch": 0.02, + "learning_rate": 9.991636348211184e-05, + "loss": 1.5029, + "step": 3740 + }, + { + "epoch": 0.02, + "learning_rate": 9.99161336179479e-05, + "loss": 1.4743, + "step": 3745 + }, + { + "epoch": 0.02, + "learning_rate": 9.991590343860642e-05, + "loss": 1.5302, + "step": 3750 + }, + { + "epoch": 0.02, + "learning_rate": 9.99156729440889e-05, + "loss": 1.5319, + "step": 3755 + }, + { + "epoch": 0.02, + "learning_rate": 9.991544213439677e-05, + "loss": 1.5088, + "step": 3760 + }, + { + "epoch": 0.02, + "learning_rate": 9.991521100953148e-05, + "loss": 1.4649, + "step": 3765 + }, + { + "epoch": 0.02, + "learning_rate": 9.991497956949452e-05, + "loss": 1.5334, + "step": 3770 + }, + { + "epoch": 0.02, + "learning_rate": 9.99147478142873e-05, + "loss": 1.5279, + "step": 3775 + }, + { + "epoch": 0.02, + "learning_rate": 9.991451574391134e-05, + "loss": 1.516, + "step": 3780 + }, + { + "epoch": 0.02, + "learning_rate": 9.991428335836808e-05, + "loss": 1.4928, + "step": 3785 + }, + { + "epoch": 0.02, + "learning_rate": 9.991405065765898e-05, + "loss": 1.4405, + "step": 3790 + }, + { + "epoch": 0.02, + "learning_rate": 9.991381764178551e-05, + "loss": 1.5142, + "step": 3795 + }, + { + "epoch": 0.02, + "learning_rate": 9.991358431074915e-05, + "loss": 1.4963, + "step": 3800 + }, + { + "epoch": 0.02, + "learning_rate": 9.991335066455138e-05, + "loss": 1.5076, + "step": 3805 + }, + { + "epoch": 0.02, + "learning_rate": 9.991311670319368e-05, + "loss": 1.5718, + "step": 3810 + }, + { + "epoch": 0.02, + "learning_rate": 9.991288242667749e-05, + "loss": 1.4916, + "step": 3815 + }, + { + "epoch": 0.02, + "learning_rate": 9.991264783500431e-05, + "loss": 1.4561, + "step": 3820 + }, + { + "epoch": 0.02, + "learning_rate": 9.991241292817564e-05, + "loss": 1.5167, + "step": 3825 + }, + { + "epoch": 0.02, + "learning_rate": 9.991217770619294e-05, + "loss": 1.5802, + "step": 3830 + }, + { + "epoch": 0.02, + "learning_rate": 9.991194216905771e-05, + "loss": 1.542, + "step": 3835 + }, + { + "epoch": 0.02, + "learning_rate": 9.991170631677143e-05, + "loss": 1.4965, + "step": 3840 + }, + { + "epoch": 0.02, + "learning_rate": 9.991147014933557e-05, + "loss": 1.5709, + "step": 3845 + }, + { + "epoch": 0.02, + "learning_rate": 9.991123366675166e-05, + "loss": 1.5383, + "step": 3850 + }, + { + "epoch": 0.02, + "learning_rate": 9.991099686902117e-05, + "loss": 1.4743, + "step": 3855 + }, + { + "epoch": 0.02, + "learning_rate": 9.99107597561456e-05, + "loss": 1.5117, + "step": 3860 + }, + { + "epoch": 0.02, + "learning_rate": 9.991052232812644e-05, + "loss": 1.4882, + "step": 3865 + }, + { + "epoch": 0.02, + "learning_rate": 9.99102845849652e-05, + "loss": 1.5421, + "step": 3870 + }, + { + "epoch": 0.02, + "learning_rate": 9.991004652666338e-05, + "loss": 1.499, + "step": 3875 + }, + { + "epoch": 0.02, + "learning_rate": 9.990980815322247e-05, + "loss": 1.5019, + "step": 3880 + }, + { + "epoch": 0.02, + "learning_rate": 9.990956946464399e-05, + "loss": 1.5189, + "step": 3885 + }, + { + "epoch": 0.02, + "learning_rate": 9.990933046092944e-05, + "loss": 1.5, + "step": 3890 + }, + { + "epoch": 0.02, + "learning_rate": 9.990909114208033e-05, + "loss": 1.504, + "step": 3895 + }, + { + "epoch": 0.02, + "learning_rate": 9.990885150809817e-05, + "loss": 1.493, + "step": 3900 + }, + { + "epoch": 0.02, + "learning_rate": 9.990861155898448e-05, + "loss": 1.5292, + "step": 3905 + }, + { + "epoch": 0.02, + "learning_rate": 9.990837129474075e-05, + "loss": 1.4811, + "step": 3910 + }, + { + "epoch": 0.02, + "learning_rate": 9.990813071536855e-05, + "loss": 1.5442, + "step": 3915 + }, + { + "epoch": 0.02, + "learning_rate": 9.990788982086934e-05, + "loss": 1.5521, + "step": 3920 + }, + { + "epoch": 0.02, + "learning_rate": 9.990764861124467e-05, + "loss": 1.5455, + "step": 3925 + }, + { + "epoch": 0.02, + "learning_rate": 9.990740708649607e-05, + "loss": 1.5175, + "step": 3930 + }, + { + "epoch": 0.02, + "learning_rate": 9.990716524662506e-05, + "loss": 1.5023, + "step": 3935 + }, + { + "epoch": 0.02, + "learning_rate": 9.990692309163314e-05, + "loss": 1.5238, + "step": 3940 + }, + { + "epoch": 0.02, + "learning_rate": 9.990668062152189e-05, + "loss": 1.4671, + "step": 3945 + }, + { + "epoch": 0.02, + "learning_rate": 9.990643783629279e-05, + "loss": 1.4789, + "step": 3950 + }, + { + "epoch": 0.02, + "learning_rate": 9.99061947359474e-05, + "loss": 1.5161, + "step": 3955 + }, + { + "epoch": 0.02, + "learning_rate": 9.990595132048726e-05, + "loss": 1.4652, + "step": 3960 + }, + { + "epoch": 0.02, + "learning_rate": 9.99057075899139e-05, + "loss": 1.4732, + "step": 3965 + }, + { + "epoch": 0.02, + "learning_rate": 9.990546354422883e-05, + "loss": 1.5079, + "step": 3970 + }, + { + "epoch": 0.02, + "learning_rate": 9.990521918343362e-05, + "loss": 1.5139, + "step": 3975 + }, + { + "epoch": 0.02, + "learning_rate": 9.990497450752983e-05, + "loss": 1.4872, + "step": 3980 + }, + { + "epoch": 0.02, + "learning_rate": 9.990472951651898e-05, + "loss": 1.494, + "step": 3985 + }, + { + "epoch": 0.02, + "learning_rate": 9.990448421040262e-05, + "loss": 1.5164, + "step": 3990 + }, + { + "epoch": 0.02, + "learning_rate": 9.99042385891823e-05, + "loss": 1.4692, + "step": 3995 + }, + { + "epoch": 0.02, + "learning_rate": 9.990399265285956e-05, + "loss": 1.4927, + "step": 4000 + }, + { + "epoch": 0.02, + "learning_rate": 9.990374640143599e-05, + "loss": 1.5416, + "step": 4005 + }, + { + "epoch": 0.02, + "learning_rate": 9.990349983491309e-05, + "loss": 1.5582, + "step": 4010 + }, + { + "epoch": 0.02, + "learning_rate": 9.990325295329246e-05, + "loss": 1.4653, + "step": 4015 + }, + { + "epoch": 0.02, + "learning_rate": 9.990300575657565e-05, + "loss": 1.5256, + "step": 4020 + }, + { + "epoch": 0.02, + "learning_rate": 9.990275824476421e-05, + "loss": 1.5464, + "step": 4025 + }, + { + "epoch": 0.02, + "learning_rate": 9.99025104178597e-05, + "loss": 1.5305, + "step": 4030 + }, + { + "epoch": 0.02, + "learning_rate": 9.990226227586371e-05, + "loss": 1.5468, + "step": 4035 + }, + { + "epoch": 0.02, + "learning_rate": 9.990201381877778e-05, + "loss": 1.486, + "step": 4040 + }, + { + "epoch": 0.02, + "learning_rate": 9.990176504660349e-05, + "loss": 1.5599, + "step": 4045 + }, + { + "epoch": 0.02, + "learning_rate": 9.990151595934242e-05, + "loss": 1.4714, + "step": 4050 + }, + { + "epoch": 0.02, + "learning_rate": 9.990126655699613e-05, + "loss": 1.4766, + "step": 4055 + }, + { + "epoch": 0.02, + "learning_rate": 9.990101683956619e-05, + "loss": 1.544, + "step": 4060 + }, + { + "epoch": 0.02, + "learning_rate": 9.990076680705418e-05, + "loss": 1.5168, + "step": 4065 + }, + { + "epoch": 0.02, + "learning_rate": 9.990051645946168e-05, + "loss": 1.5307, + "step": 4070 + }, + { + "epoch": 0.02, + "learning_rate": 9.990026579679029e-05, + "loss": 1.5073, + "step": 4075 + }, + { + "epoch": 0.02, + "learning_rate": 9.990001481904157e-05, + "loss": 1.4967, + "step": 4080 + }, + { + "epoch": 0.02, + "learning_rate": 9.98997635262171e-05, + "loss": 1.5607, + "step": 4085 + }, + { + "epoch": 0.02, + "learning_rate": 9.989951191831849e-05, + "loss": 1.4891, + "step": 4090 + }, + { + "epoch": 0.02, + "learning_rate": 9.98992599953473e-05, + "loss": 1.4783, + "step": 4095 + }, + { + "epoch": 0.02, + "learning_rate": 9.989900775730516e-05, + "loss": 1.4935, + "step": 4100 + }, + { + "epoch": 0.02, + "learning_rate": 9.989875520419363e-05, + "loss": 1.4895, + "step": 4105 + }, + { + "epoch": 0.02, + "learning_rate": 9.989850233601432e-05, + "loss": 1.4632, + "step": 4110 + }, + { + "epoch": 0.02, + "learning_rate": 9.989824915276881e-05, + "loss": 1.4752, + "step": 4115 + }, + { + "epoch": 0.02, + "learning_rate": 9.989799565445872e-05, + "loss": 1.5499, + "step": 4120 + }, + { + "epoch": 0.02, + "learning_rate": 9.989774184108563e-05, + "loss": 1.428, + "step": 4125 + }, + { + "epoch": 0.02, + "learning_rate": 9.989748771265114e-05, + "loss": 1.537, + "step": 4130 + }, + { + "epoch": 0.02, + "learning_rate": 9.98972332691569e-05, + "loss": 1.5157, + "step": 4135 + }, + { + "epoch": 0.02, + "learning_rate": 9.989697851060446e-05, + "loss": 1.4997, + "step": 4140 + }, + { + "epoch": 0.02, + "learning_rate": 9.989672343699547e-05, + "loss": 1.4946, + "step": 4145 + }, + { + "epoch": 0.02, + "learning_rate": 9.989646804833152e-05, + "loss": 1.5161, + "step": 4150 + }, + { + "epoch": 0.02, + "learning_rate": 9.989621234461422e-05, + "loss": 1.5054, + "step": 4155 + }, + { + "epoch": 0.02, + "learning_rate": 9.989595632584519e-05, + "loss": 1.5207, + "step": 4160 + }, + { + "epoch": 0.02, + "learning_rate": 9.989569999202603e-05, + "loss": 1.4765, + "step": 4165 + }, + { + "epoch": 0.02, + "learning_rate": 9.989544334315841e-05, + "loss": 1.5545, + "step": 4170 + }, + { + "epoch": 0.02, + "learning_rate": 9.989518637924388e-05, + "loss": 1.5858, + "step": 4175 + }, + { + "epoch": 0.02, + "learning_rate": 9.989492910028412e-05, + "loss": 1.4902, + "step": 4180 + }, + { + "epoch": 0.02, + "learning_rate": 9.989467150628073e-05, + "loss": 1.4587, + "step": 4185 + }, + { + "epoch": 0.02, + "learning_rate": 9.989441359723535e-05, + "loss": 1.4603, + "step": 4190 + }, + { + "epoch": 0.02, + "learning_rate": 9.989415537314958e-05, + "loss": 1.507, + "step": 4195 + }, + { + "epoch": 0.02, + "learning_rate": 9.989389683402508e-05, + "loss": 1.4852, + "step": 4200 + }, + { + "epoch": 0.02, + "learning_rate": 9.989363797986346e-05, + "loss": 1.5239, + "step": 4205 + }, + { + "epoch": 0.02, + "learning_rate": 9.989337881066639e-05, + "loss": 1.4857, + "step": 4210 + }, + { + "epoch": 0.02, + "learning_rate": 9.989311932643545e-05, + "loss": 1.5302, + "step": 4215 + }, + { + "epoch": 0.02, + "learning_rate": 9.989285952717234e-05, + "loss": 1.5101, + "step": 4220 + }, + { + "epoch": 0.02, + "learning_rate": 9.989259941287864e-05, + "loss": 1.5009, + "step": 4225 + }, + { + "epoch": 0.02, + "learning_rate": 9.989233898355603e-05, + "loss": 1.4918, + "step": 4230 + }, + { + "epoch": 0.02, + "learning_rate": 9.989207823920617e-05, + "loss": 1.5294, + "step": 4235 + }, + { + "epoch": 0.02, + "learning_rate": 9.989181717983066e-05, + "loss": 1.525, + "step": 4240 + }, + { + "epoch": 0.02, + "learning_rate": 9.989155580543118e-05, + "loss": 1.4571, + "step": 4245 + }, + { + "epoch": 0.02, + "learning_rate": 9.989129411600938e-05, + "loss": 1.4515, + "step": 4250 + }, + { + "epoch": 0.02, + "learning_rate": 9.989103211156689e-05, + "loss": 1.4865, + "step": 4255 + }, + { + "epoch": 0.02, + "learning_rate": 9.989076979210539e-05, + "loss": 1.5006, + "step": 4260 + }, + { + "epoch": 0.02, + "learning_rate": 9.989050715762652e-05, + "loss": 1.529, + "step": 4265 + }, + { + "epoch": 0.02, + "learning_rate": 9.989024420813193e-05, + "loss": 1.5761, + "step": 4270 + }, + { + "epoch": 0.02, + "learning_rate": 9.988998094362332e-05, + "loss": 1.5111, + "step": 4275 + }, + { + "epoch": 0.02, + "learning_rate": 9.98897173641023e-05, + "loss": 1.5537, + "step": 4280 + }, + { + "epoch": 0.02, + "learning_rate": 9.988945346957059e-05, + "loss": 1.5933, + "step": 4285 + }, + { + "epoch": 0.02, + "learning_rate": 9.988918926002981e-05, + "loss": 1.581, + "step": 4290 + }, + { + "epoch": 0.02, + "learning_rate": 9.988892473548163e-05, + "loss": 1.4913, + "step": 4295 + }, + { + "epoch": 0.02, + "learning_rate": 9.988865989592777e-05, + "loss": 1.5118, + "step": 4300 + }, + { + "epoch": 0.02, + "learning_rate": 9.988839474136984e-05, + "loss": 1.4788, + "step": 4305 + }, + { + "epoch": 0.02, + "learning_rate": 9.988812927180956e-05, + "loss": 1.4853, + "step": 4310 + }, + { + "epoch": 0.02, + "learning_rate": 9.988786348724858e-05, + "loss": 1.456, + "step": 4315 + }, + { + "epoch": 0.02, + "learning_rate": 9.988759738768858e-05, + "loss": 1.5449, + "step": 4320 + }, + { + "epoch": 0.02, + "learning_rate": 9.988733097313125e-05, + "loss": 1.4819, + "step": 4325 + }, + { + "epoch": 0.02, + "learning_rate": 9.988706424357827e-05, + "loss": 1.5171, + "step": 4330 + }, + { + "epoch": 0.02, + "learning_rate": 9.988679719903133e-05, + "loss": 1.5015, + "step": 4335 + }, + { + "epoch": 0.02, + "learning_rate": 9.988652983949209e-05, + "loss": 1.5576, + "step": 4340 + }, + { + "epoch": 0.02, + "learning_rate": 9.988626216496229e-05, + "loss": 1.5293, + "step": 4345 + }, + { + "epoch": 0.02, + "learning_rate": 9.988599417544356e-05, + "loss": 1.4875, + "step": 4350 + }, + { + "epoch": 0.02, + "learning_rate": 9.988572587093761e-05, + "loss": 1.497, + "step": 4355 + }, + { + "epoch": 0.02, + "learning_rate": 9.988545725144616e-05, + "loss": 1.4931, + "step": 4360 + }, + { + "epoch": 0.02, + "learning_rate": 9.988518831697089e-05, + "loss": 1.4963, + "step": 4365 + }, + { + "epoch": 0.02, + "learning_rate": 9.988491906751351e-05, + "loss": 1.5466, + "step": 4370 + }, + { + "epoch": 0.02, + "learning_rate": 9.988464950307568e-05, + "loss": 1.5052, + "step": 4375 + }, + { + "epoch": 0.02, + "learning_rate": 9.988437962365915e-05, + "loss": 1.505, + "step": 4380 + }, + { + "epoch": 0.02, + "learning_rate": 9.988410942926561e-05, + "loss": 1.5476, + "step": 4385 + }, + { + "epoch": 0.02, + "learning_rate": 9.988383891989673e-05, + "loss": 1.5194, + "step": 4390 + }, + { + "epoch": 0.02, + "learning_rate": 9.988356809555427e-05, + "loss": 1.479, + "step": 4395 + }, + { + "epoch": 0.02, + "learning_rate": 9.98832969562399e-05, + "loss": 1.5269, + "step": 4400 + }, + { + "epoch": 0.02, + "learning_rate": 9.988302550195537e-05, + "loss": 1.5161, + "step": 4405 + }, + { + "epoch": 0.02, + "learning_rate": 9.988275373270237e-05, + "loss": 1.5332, + "step": 4410 + }, + { + "epoch": 0.02, + "learning_rate": 9.988248164848262e-05, + "loss": 1.4943, + "step": 4415 + }, + { + "epoch": 0.02, + "learning_rate": 9.988220924929784e-05, + "loss": 1.5402, + "step": 4420 + }, + { + "epoch": 0.02, + "learning_rate": 9.988193653514973e-05, + "loss": 1.5296, + "step": 4425 + }, + { + "epoch": 0.02, + "learning_rate": 9.988166350604004e-05, + "loss": 1.5358, + "step": 4430 + }, + { + "epoch": 0.02, + "learning_rate": 9.98813901619705e-05, + "loss": 1.4855, + "step": 4435 + }, + { + "epoch": 0.02, + "learning_rate": 9.988111650294278e-05, + "loss": 1.4435, + "step": 4440 + }, + { + "epoch": 0.02, + "learning_rate": 9.988084252895868e-05, + "loss": 1.4716, + "step": 4445 + }, + { + "epoch": 0.02, + "learning_rate": 9.98805682400199e-05, + "loss": 1.4794, + "step": 4450 + }, + { + "epoch": 0.02, + "learning_rate": 9.988029363612815e-05, + "loss": 1.5067, + "step": 4455 + }, + { + "epoch": 0.02, + "learning_rate": 9.98800187172852e-05, + "loss": 1.5073, + "step": 4460 + }, + { + "epoch": 0.02, + "learning_rate": 9.987974348349276e-05, + "loss": 1.5499, + "step": 4465 + }, + { + "epoch": 0.02, + "learning_rate": 9.987946793475257e-05, + "loss": 1.4601, + "step": 4470 + }, + { + "epoch": 0.02, + "learning_rate": 9.987919207106639e-05, + "loss": 1.5021, + "step": 4475 + }, + { + "epoch": 0.02, + "learning_rate": 9.987891589243594e-05, + "loss": 1.5213, + "step": 4480 + }, + { + "epoch": 0.02, + "learning_rate": 9.987863939886298e-05, + "loss": 1.5026, + "step": 4485 + }, + { + "epoch": 0.02, + "learning_rate": 9.987836259034925e-05, + "loss": 1.5198, + "step": 4490 + }, + { + "epoch": 0.02, + "learning_rate": 9.98780854668965e-05, + "loss": 1.477, + "step": 4495 + }, + { + "epoch": 0.02, + "learning_rate": 9.987780802850646e-05, + "loss": 1.4909, + "step": 4500 + }, + { + "epoch": 0.02, + "learning_rate": 9.98775302751809e-05, + "loss": 1.5267, + "step": 4505 + }, + { + "epoch": 0.02, + "learning_rate": 9.987725220692157e-05, + "loss": 1.4857, + "step": 4510 + }, + { + "epoch": 0.02, + "learning_rate": 9.987697382373025e-05, + "loss": 1.5284, + "step": 4515 + }, + { + "epoch": 0.02, + "learning_rate": 9.987669512560865e-05, + "loss": 1.4978, + "step": 4520 + }, + { + "epoch": 0.02, + "learning_rate": 9.987641611255857e-05, + "loss": 1.5073, + "step": 4525 + }, + { + "epoch": 0.02, + "learning_rate": 9.987613678458174e-05, + "loss": 1.4503, + "step": 4530 + }, + { + "epoch": 0.02, + "learning_rate": 9.987585714167995e-05, + "loss": 1.4891, + "step": 4535 + }, + { + "epoch": 0.02, + "learning_rate": 9.987557718385497e-05, + "loss": 1.4578, + "step": 4540 + }, + { + "epoch": 0.02, + "learning_rate": 9.987529691110853e-05, + "loss": 1.4722, + "step": 4545 + }, + { + "epoch": 0.02, + "learning_rate": 9.987501632344244e-05, + "loss": 1.4837, + "step": 4550 + }, + { + "epoch": 0.02, + "learning_rate": 9.987473542085845e-05, + "loss": 1.5406, + "step": 4555 + }, + { + "epoch": 0.02, + "learning_rate": 9.987445420335833e-05, + "loss": 1.5159, + "step": 4560 + }, + { + "epoch": 0.02, + "learning_rate": 9.987417267094388e-05, + "loss": 1.5105, + "step": 4565 + }, + { + "epoch": 0.02, + "learning_rate": 9.987389082361684e-05, + "loss": 1.5373, + "step": 4570 + }, + { + "epoch": 0.02, + "learning_rate": 9.987360866137903e-05, + "loss": 1.5489, + "step": 4575 + }, + { + "epoch": 0.02, + "learning_rate": 9.987332618423221e-05, + "loss": 1.5048, + "step": 4580 + }, + { + "epoch": 0.02, + "learning_rate": 9.987304339217815e-05, + "loss": 1.4216, + "step": 4585 + }, + { + "epoch": 0.02, + "learning_rate": 9.987276028521867e-05, + "loss": 1.4706, + "step": 4590 + }, + { + "epoch": 0.02, + "learning_rate": 9.987247686335555e-05, + "loss": 1.4979, + "step": 4595 + }, + { + "epoch": 0.02, + "learning_rate": 9.987219312659055e-05, + "loss": 1.507, + "step": 4600 + }, + { + "epoch": 0.02, + "learning_rate": 9.987190907492549e-05, + "loss": 1.5173, + "step": 4605 + }, + { + "epoch": 0.02, + "learning_rate": 9.987162470836215e-05, + "loss": 1.4988, + "step": 4610 + }, + { + "epoch": 0.02, + "learning_rate": 9.987134002690233e-05, + "loss": 1.4874, + "step": 4615 + }, + { + "epoch": 0.02, + "learning_rate": 9.987105503054783e-05, + "loss": 1.4652, + "step": 4620 + }, + { + "epoch": 0.02, + "learning_rate": 9.987076971930045e-05, + "loss": 1.4972, + "step": 4625 + }, + { + "epoch": 0.02, + "learning_rate": 9.987048409316199e-05, + "loss": 1.52, + "step": 4630 + }, + { + "epoch": 0.02, + "learning_rate": 9.987019815213425e-05, + "loss": 1.4863, + "step": 4635 + }, + { + "epoch": 0.02, + "learning_rate": 9.986991189621902e-05, + "loss": 1.4899, + "step": 4640 + }, + { + "epoch": 0.02, + "learning_rate": 9.986962532541814e-05, + "loss": 1.457, + "step": 4645 + }, + { + "epoch": 0.02, + "learning_rate": 9.986933843973341e-05, + "loss": 1.5102, + "step": 4650 + }, + { + "epoch": 0.02, + "learning_rate": 9.986905123916664e-05, + "loss": 1.4892, + "step": 4655 + }, + { + "epoch": 0.02, + "learning_rate": 9.986876372371963e-05, + "loss": 1.5298, + "step": 4660 + }, + { + "epoch": 0.02, + "learning_rate": 9.98684758933942e-05, + "loss": 1.5681, + "step": 4665 + }, + { + "epoch": 0.02, + "learning_rate": 9.986818774819218e-05, + "loss": 1.4885, + "step": 4670 + }, + { + "epoch": 0.02, + "learning_rate": 9.986789928811538e-05, + "loss": 1.4887, + "step": 4675 + }, + { + "epoch": 0.02, + "learning_rate": 9.986761051316563e-05, + "loss": 1.5291, + "step": 4680 + }, + { + "epoch": 0.02, + "learning_rate": 9.986732142334471e-05, + "loss": 1.5125, + "step": 4685 + }, + { + "epoch": 0.02, + "learning_rate": 9.986703201865453e-05, + "loss": 1.5467, + "step": 4690 + }, + { + "epoch": 0.02, + "learning_rate": 9.986674229909683e-05, + "loss": 1.5131, + "step": 4695 + }, + { + "epoch": 0.02, + "learning_rate": 9.98664522646735e-05, + "loss": 1.474, + "step": 4700 + }, + { + "epoch": 0.02, + "learning_rate": 9.986616191538635e-05, + "loss": 1.5111, + "step": 4705 + }, + { + "epoch": 0.02, + "learning_rate": 9.98658712512372e-05, + "loss": 1.5524, + "step": 4710 + }, + { + "epoch": 0.02, + "learning_rate": 9.986558027222793e-05, + "loss": 1.5388, + "step": 4715 + }, + { + "epoch": 0.02, + "learning_rate": 9.986528897836032e-05, + "loss": 1.528, + "step": 4720 + }, + { + "epoch": 0.02, + "learning_rate": 9.986499736963624e-05, + "loss": 1.4662, + "step": 4725 + }, + { + "epoch": 0.02, + "learning_rate": 9.98647054460575e-05, + "loss": 1.5473, + "step": 4730 + }, + { + "epoch": 0.02, + "learning_rate": 9.986441320762601e-05, + "loss": 1.5541, + "step": 4735 + }, + { + "epoch": 0.02, + "learning_rate": 9.986412065434355e-05, + "loss": 1.5309, + "step": 4740 + }, + { + "epoch": 0.02, + "learning_rate": 9.9863827786212e-05, + "loss": 1.4741, + "step": 4745 + }, + { + "epoch": 0.02, + "learning_rate": 9.986353460323321e-05, + "loss": 1.442, + "step": 4750 + }, + { + "epoch": 0.02, + "learning_rate": 9.986324110540901e-05, + "loss": 1.4524, + "step": 4755 + }, + { + "epoch": 0.02, + "learning_rate": 9.986294729274127e-05, + "loss": 1.4878, + "step": 4760 + }, + { + "epoch": 0.02, + "learning_rate": 9.986265316523184e-05, + "loss": 1.5478, + "step": 4765 + }, + { + "epoch": 0.02, + "learning_rate": 9.986235872288256e-05, + "loss": 1.4916, + "step": 4770 + }, + { + "epoch": 0.02, + "learning_rate": 9.986206396569533e-05, + "loss": 1.4551, + "step": 4775 + }, + { + "epoch": 0.02, + "learning_rate": 9.986176889367198e-05, + "loss": 1.5169, + "step": 4780 + }, + { + "epoch": 0.02, + "learning_rate": 9.986147350681439e-05, + "loss": 1.5001, + "step": 4785 + }, + { + "epoch": 0.02, + "learning_rate": 9.986117780512441e-05, + "loss": 1.4939, + "step": 4790 + }, + { + "epoch": 0.02, + "learning_rate": 9.986088178860391e-05, + "loss": 1.4576, + "step": 4795 + }, + { + "epoch": 0.02, + "learning_rate": 9.986058545725476e-05, + "loss": 1.5747, + "step": 4800 + }, + { + "epoch": 0.02, + "learning_rate": 9.986028881107882e-05, + "loss": 1.4927, + "step": 4805 + }, + { + "epoch": 0.02, + "learning_rate": 9.985999185007802e-05, + "loss": 1.5061, + "step": 4810 + }, + { + "epoch": 0.02, + "learning_rate": 9.985969457425414e-05, + "loss": 1.4841, + "step": 4815 + }, + { + "epoch": 0.02, + "learning_rate": 9.985939698360916e-05, + "loss": 1.5298, + "step": 4820 + }, + { + "epoch": 0.02, + "learning_rate": 9.985909907814487e-05, + "loss": 1.4187, + "step": 4825 + }, + { + "epoch": 0.02, + "learning_rate": 9.98588008578632e-05, + "loss": 1.4835, + "step": 4830 + }, + { + "epoch": 0.02, + "learning_rate": 9.985850232276603e-05, + "loss": 1.5325, + "step": 4835 + }, + { + "epoch": 0.02, + "learning_rate": 9.985820347285521e-05, + "loss": 1.5165, + "step": 4840 + }, + { + "epoch": 0.02, + "learning_rate": 9.985790430813269e-05, + "loss": 1.5617, + "step": 4845 + }, + { + "epoch": 0.02, + "learning_rate": 9.98576048286003e-05, + "loss": 1.4625, + "step": 4850 + }, + { + "epoch": 0.02, + "learning_rate": 9.985730503425997e-05, + "loss": 1.4876, + "step": 4855 + }, + { + "epoch": 0.02, + "learning_rate": 9.985700492511356e-05, + "loss": 1.515, + "step": 4860 + }, + { + "epoch": 0.02, + "learning_rate": 9.985670450116297e-05, + "loss": 1.5216, + "step": 4865 + }, + { + "epoch": 0.02, + "learning_rate": 9.985640376241014e-05, + "loss": 1.4706, + "step": 4870 + }, + { + "epoch": 0.02, + "learning_rate": 9.985610270885692e-05, + "loss": 1.4992, + "step": 4875 + }, + { + "epoch": 0.02, + "learning_rate": 9.985580134050522e-05, + "loss": 1.4712, + "step": 4880 + }, + { + "epoch": 0.02, + "learning_rate": 9.985549965735698e-05, + "loss": 1.5588, + "step": 4885 + }, + { + "epoch": 0.02, + "learning_rate": 9.985519765941405e-05, + "loss": 1.4993, + "step": 4890 + }, + { + "epoch": 0.02, + "learning_rate": 9.985489534667837e-05, + "loss": 1.5217, + "step": 4895 + }, + { + "epoch": 0.02, + "learning_rate": 9.985459271915185e-05, + "loss": 1.5097, + "step": 4900 + }, + { + "epoch": 0.02, + "learning_rate": 9.985428977683638e-05, + "loss": 1.478, + "step": 4905 + }, + { + "epoch": 0.02, + "learning_rate": 9.985398651973389e-05, + "loss": 1.4965, + "step": 4910 + }, + { + "epoch": 0.02, + "learning_rate": 9.98536829478463e-05, + "loss": 1.527, + "step": 4915 + }, + { + "epoch": 0.02, + "learning_rate": 9.98533790611755e-05, + "loss": 1.5296, + "step": 4920 + }, + { + "epoch": 0.02, + "learning_rate": 9.985307485972344e-05, + "loss": 1.5321, + "step": 4925 + }, + { + "epoch": 0.02, + "learning_rate": 9.9852770343492e-05, + "loss": 1.4849, + "step": 4930 + }, + { + "epoch": 0.02, + "learning_rate": 9.985246551248317e-05, + "loss": 1.4683, + "step": 4935 + }, + { + "epoch": 0.02, + "learning_rate": 9.98521603666988e-05, + "loss": 1.5257, + "step": 4940 + }, + { + "epoch": 0.03, + "learning_rate": 9.985185490614086e-05, + "loss": 1.5138, + "step": 4945 + }, + { + "epoch": 0.03, + "learning_rate": 9.985154913081127e-05, + "loss": 1.5458, + "step": 4950 + }, + { + "epoch": 0.03, + "learning_rate": 9.985124304071198e-05, + "loss": 1.5363, + "step": 4955 + }, + { + "epoch": 0.03, + "learning_rate": 9.985093663584488e-05, + "loss": 1.5001, + "step": 4960 + }, + { + "epoch": 0.03, + "learning_rate": 9.985062991621193e-05, + "loss": 1.4756, + "step": 4965 + }, + { + "epoch": 0.03, + "learning_rate": 9.985032288181506e-05, + "loss": 1.5139, + "step": 4970 + }, + { + "epoch": 0.03, + "learning_rate": 9.985001553265623e-05, + "loss": 1.5024, + "step": 4975 + }, + { + "epoch": 0.03, + "learning_rate": 9.984970786873735e-05, + "loss": 1.5113, + "step": 4980 + }, + { + "epoch": 0.03, + "learning_rate": 9.984939989006038e-05, + "loss": 1.4691, + "step": 4985 + }, + { + "epoch": 0.03, + "learning_rate": 9.984909159662727e-05, + "loss": 1.5188, + "step": 4990 + }, + { + "epoch": 0.03, + "learning_rate": 9.984878298843994e-05, + "loss": 1.4678, + "step": 4995 + }, + { + "epoch": 0.03, + "learning_rate": 9.984847406550037e-05, + "loss": 1.4888, + "step": 5000 + }, + { + "epoch": 0.03, + "learning_rate": 9.984816482781048e-05, + "loss": 1.4616, + "step": 5005 + }, + { + "epoch": 0.03, + "learning_rate": 9.984785527537226e-05, + "loss": 1.5665, + "step": 5010 + }, + { + "epoch": 0.03, + "learning_rate": 9.984754540818763e-05, + "loss": 1.4752, + "step": 5015 + }, + { + "epoch": 0.03, + "learning_rate": 9.984723522625856e-05, + "loss": 1.4979, + "step": 5020 + }, + { + "epoch": 0.03, + "learning_rate": 9.9846924729587e-05, + "loss": 1.5286, + "step": 5025 + }, + { + "epoch": 0.03, + "learning_rate": 9.984661391817494e-05, + "loss": 1.4881, + "step": 5030 + }, + { + "epoch": 0.03, + "learning_rate": 9.984630279202432e-05, + "loss": 1.4904, + "step": 5035 + }, + { + "epoch": 0.03, + "learning_rate": 9.98459913511371e-05, + "loss": 1.4129, + "step": 5040 + }, + { + "epoch": 0.03, + "learning_rate": 9.984567959551526e-05, + "loss": 1.555, + "step": 5045 + }, + { + "epoch": 0.03, + "learning_rate": 9.984536752516074e-05, + "loss": 1.495, + "step": 5050 + }, + { + "epoch": 0.03, + "learning_rate": 9.984505514007555e-05, + "loss": 1.4924, + "step": 5055 + }, + { + "epoch": 0.03, + "learning_rate": 9.984474244026163e-05, + "loss": 1.5308, + "step": 5060 + }, + { + "epoch": 0.03, + "learning_rate": 9.984442942572099e-05, + "loss": 1.4795, + "step": 5065 + }, + { + "epoch": 0.03, + "learning_rate": 9.984411609645557e-05, + "loss": 1.5113, + "step": 5070 + }, + { + "epoch": 0.03, + "learning_rate": 9.984380245246736e-05, + "loss": 1.4893, + "step": 5075 + }, + { + "epoch": 0.03, + "learning_rate": 9.984348849375836e-05, + "loss": 1.5274, + "step": 5080 + }, + { + "epoch": 0.03, + "learning_rate": 9.984317422033052e-05, + "loss": 1.5588, + "step": 5085 + }, + { + "epoch": 0.03, + "learning_rate": 9.984285963218585e-05, + "loss": 1.5325, + "step": 5090 + }, + { + "epoch": 0.03, + "learning_rate": 9.984254472932633e-05, + "loss": 1.5095, + "step": 5095 + }, + { + "epoch": 0.03, + "learning_rate": 9.984222951175393e-05, + "loss": 1.5129, + "step": 5100 + }, + { + "epoch": 0.03, + "learning_rate": 9.984191397947067e-05, + "loss": 1.4927, + "step": 5105 + }, + { + "epoch": 0.03, + "learning_rate": 9.98415981324785e-05, + "loss": 1.5628, + "step": 5110 + }, + { + "epoch": 0.03, + "learning_rate": 9.984128197077947e-05, + "loss": 1.4961, + "step": 5115 + }, + { + "epoch": 0.03, + "learning_rate": 9.984096549437555e-05, + "loss": 1.481, + "step": 5120 + }, + { + "epoch": 0.03, + "learning_rate": 9.984064870326872e-05, + "loss": 1.5379, + "step": 5125 + }, + { + "epoch": 0.03, + "learning_rate": 9.984033159746102e-05, + "loss": 1.5377, + "step": 5130 + }, + { + "epoch": 0.03, + "learning_rate": 9.984001417695442e-05, + "loss": 1.4382, + "step": 5135 + }, + { + "epoch": 0.03, + "learning_rate": 9.983969644175091e-05, + "loss": 1.5358, + "step": 5140 + }, + { + "epoch": 0.03, + "learning_rate": 9.983937839185255e-05, + "loss": 1.5165, + "step": 5145 + }, + { + "epoch": 0.03, + "learning_rate": 9.983906002726131e-05, + "loss": 1.5192, + "step": 5150 + }, + { + "epoch": 0.03, + "learning_rate": 9.98387413479792e-05, + "loss": 1.5313, + "step": 5155 + }, + { + "epoch": 0.03, + "learning_rate": 9.983842235400824e-05, + "loss": 1.4434, + "step": 5160 + }, + { + "epoch": 0.03, + "learning_rate": 9.983810304535047e-05, + "loss": 1.5003, + "step": 5165 + }, + { + "epoch": 0.03, + "learning_rate": 9.983778342200785e-05, + "loss": 1.5131, + "step": 5170 + }, + { + "epoch": 0.03, + "learning_rate": 9.983746348398244e-05, + "loss": 1.487, + "step": 5175 + }, + { + "epoch": 0.03, + "learning_rate": 9.983714323127625e-05, + "loss": 1.4931, + "step": 5180 + }, + { + "epoch": 0.03, + "learning_rate": 9.98368226638913e-05, + "loss": 1.4297, + "step": 5185 + }, + { + "epoch": 0.03, + "learning_rate": 9.98365017818296e-05, + "loss": 1.5028, + "step": 5190 + }, + { + "epoch": 0.03, + "learning_rate": 9.983618058509321e-05, + "loss": 1.473, + "step": 5195 + }, + { + "epoch": 0.03, + "learning_rate": 9.983585907368413e-05, + "loss": 1.5346, + "step": 5200 + }, + { + "epoch": 0.03, + "learning_rate": 9.98355372476044e-05, + "loss": 1.5364, + "step": 5205 + }, + { + "epoch": 0.03, + "learning_rate": 9.983521510685606e-05, + "loss": 1.5042, + "step": 5210 + }, + { + "epoch": 0.03, + "learning_rate": 9.983489265144112e-05, + "loss": 1.5007, + "step": 5215 + }, + { + "epoch": 0.03, + "learning_rate": 9.983456988136164e-05, + "loss": 1.5043, + "step": 5220 + }, + { + "epoch": 0.03, + "learning_rate": 9.983424679661966e-05, + "loss": 1.4609, + "step": 5225 + }, + { + "epoch": 0.03, + "learning_rate": 9.98339233972172e-05, + "loss": 1.5366, + "step": 5230 + }, + { + "epoch": 0.03, + "learning_rate": 9.983359968315631e-05, + "loss": 1.5686, + "step": 5235 + }, + { + "epoch": 0.03, + "learning_rate": 9.983327565443906e-05, + "loss": 1.4633, + "step": 5240 + }, + { + "epoch": 0.03, + "learning_rate": 9.983295131106744e-05, + "loss": 1.4903, + "step": 5245 + }, + { + "epoch": 0.03, + "learning_rate": 9.983262665304353e-05, + "loss": 1.4585, + "step": 5250 + }, + { + "epoch": 0.03, + "learning_rate": 9.98323016803694e-05, + "loss": 1.5265, + "step": 5255 + }, + { + "epoch": 0.03, + "learning_rate": 9.983197639304706e-05, + "loss": 1.482, + "step": 5260 + }, + { + "epoch": 0.03, + "learning_rate": 9.983165079107859e-05, + "loss": 1.5468, + "step": 5265 + }, + { + "epoch": 0.03, + "learning_rate": 9.983132487446605e-05, + "loss": 1.4755, + "step": 5270 + }, + { + "epoch": 0.03, + "learning_rate": 9.983099864321149e-05, + "loss": 1.51, + "step": 5275 + }, + { + "epoch": 0.03, + "learning_rate": 9.983067209731695e-05, + "loss": 1.5016, + "step": 5280 + }, + { + "epoch": 0.03, + "learning_rate": 9.983034523678454e-05, + "loss": 1.4736, + "step": 5285 + }, + { + "epoch": 0.03, + "learning_rate": 9.983001806161627e-05, + "loss": 1.4539, + "step": 5290 + }, + { + "epoch": 0.03, + "learning_rate": 9.982969057181423e-05, + "loss": 1.5463, + "step": 5295 + }, + { + "epoch": 0.03, + "learning_rate": 9.98293627673805e-05, + "loss": 1.4902, + "step": 5300 + }, + { + "epoch": 0.03, + "learning_rate": 9.982903464831714e-05, + "loss": 1.5275, + "step": 5305 + }, + { + "epoch": 0.03, + "learning_rate": 9.982870621462621e-05, + "loss": 1.5566, + "step": 5310 + }, + { + "epoch": 0.03, + "learning_rate": 9.98283774663098e-05, + "loss": 1.4929, + "step": 5315 + }, + { + "epoch": 0.03, + "learning_rate": 9.982804840336998e-05, + "loss": 1.4763, + "step": 5320 + }, + { + "epoch": 0.03, + "learning_rate": 9.982771902580883e-05, + "loss": 1.5196, + "step": 5325 + }, + { + "epoch": 0.03, + "learning_rate": 9.982738933362842e-05, + "loss": 1.5326, + "step": 5330 + }, + { + "epoch": 0.03, + "learning_rate": 9.982705932683085e-05, + "loss": 1.5162, + "step": 5335 + }, + { + "epoch": 0.03, + "learning_rate": 9.982672900541817e-05, + "loss": 1.5219, + "step": 5340 + }, + { + "epoch": 0.03, + "learning_rate": 9.98263983693925e-05, + "loss": 1.5499, + "step": 5345 + }, + { + "epoch": 0.03, + "learning_rate": 9.982606741875592e-05, + "loss": 1.5681, + "step": 5350 + }, + { + "epoch": 0.03, + "learning_rate": 9.98257361535105e-05, + "loss": 1.5491, + "step": 5355 + }, + { + "epoch": 0.03, + "learning_rate": 9.982540457365836e-05, + "loss": 1.5337, + "step": 5360 + }, + { + "epoch": 0.03, + "learning_rate": 9.982507267920158e-05, + "loss": 1.5557, + "step": 5365 + }, + { + "epoch": 0.03, + "learning_rate": 9.982474047014226e-05, + "loss": 1.5216, + "step": 5370 + }, + { + "epoch": 0.03, + "learning_rate": 9.982440794648249e-05, + "loss": 1.506, + "step": 5375 + }, + { + "epoch": 0.03, + "learning_rate": 9.982407510822437e-05, + "loss": 1.4518, + "step": 5380 + }, + { + "epoch": 0.03, + "learning_rate": 9.982374195537001e-05, + "loss": 1.4318, + "step": 5385 + }, + { + "epoch": 0.03, + "learning_rate": 9.98234084879215e-05, + "loss": 1.4863, + "step": 5390 + }, + { + "epoch": 0.03, + "learning_rate": 9.982307470588098e-05, + "loss": 1.5002, + "step": 5395 + }, + { + "epoch": 0.03, + "learning_rate": 9.98227406092505e-05, + "loss": 1.4875, + "step": 5400 + }, + { + "epoch": 0.03, + "learning_rate": 9.982240619803221e-05, + "loss": 1.5092, + "step": 5405 + }, + { + "epoch": 0.03, + "learning_rate": 9.982207147222822e-05, + "loss": 1.5387, + "step": 5410 + }, + { + "epoch": 0.03, + "learning_rate": 9.982173643184063e-05, + "loss": 1.5063, + "step": 5415 + }, + { + "epoch": 0.03, + "learning_rate": 9.982140107687156e-05, + "loss": 1.5373, + "step": 5420 + }, + { + "epoch": 0.03, + "learning_rate": 9.982106540732312e-05, + "loss": 1.4991, + "step": 5425 + }, + { + "epoch": 0.03, + "learning_rate": 9.982072942319745e-05, + "loss": 1.5448, + "step": 5430 + }, + { + "epoch": 0.03, + "learning_rate": 9.982039312449666e-05, + "loss": 1.5134, + "step": 5435 + }, + { + "epoch": 0.03, + "learning_rate": 9.982005651122288e-05, + "loss": 1.5076, + "step": 5440 + }, + { + "epoch": 0.03, + "learning_rate": 9.981971958337824e-05, + "loss": 1.5199, + "step": 5445 + }, + { + "epoch": 0.03, + "learning_rate": 9.981938234096482e-05, + "loss": 1.4118, + "step": 5450 + }, + { + "epoch": 0.03, + "learning_rate": 9.981904478398482e-05, + "loss": 1.5121, + "step": 5455 + }, + { + "epoch": 0.03, + "learning_rate": 9.981870691244031e-05, + "loss": 1.4717, + "step": 5460 + }, + { + "epoch": 0.03, + "learning_rate": 9.981836872633348e-05, + "loss": 1.4902, + "step": 5465 + }, + { + "epoch": 0.03, + "learning_rate": 9.981803022566641e-05, + "loss": 1.5429, + "step": 5470 + }, + { + "epoch": 0.03, + "learning_rate": 9.981769141044127e-05, + "loss": 1.5351, + "step": 5475 + }, + { + "epoch": 0.03, + "learning_rate": 9.98173522806602e-05, + "loss": 1.4532, + "step": 5480 + }, + { + "epoch": 0.03, + "learning_rate": 9.981701283632532e-05, + "loss": 1.4673, + "step": 5485 + }, + { + "epoch": 0.03, + "learning_rate": 9.98166730774388e-05, + "loss": 1.4805, + "step": 5490 + }, + { + "epoch": 0.03, + "learning_rate": 9.981633300400277e-05, + "loss": 1.5073, + "step": 5495 + }, + { + "epoch": 0.03, + "learning_rate": 9.981599261601939e-05, + "loss": 1.4875, + "step": 5500 + }, + { + "epoch": 0.03, + "learning_rate": 9.981565191349078e-05, + "loss": 1.4679, + "step": 5505 + }, + { + "epoch": 0.03, + "learning_rate": 9.981531089641912e-05, + "loss": 1.5476, + "step": 5510 + }, + { + "epoch": 0.03, + "learning_rate": 9.981496956480655e-05, + "loss": 1.4786, + "step": 5515 + }, + { + "epoch": 0.03, + "learning_rate": 9.981462791865524e-05, + "loss": 1.5164, + "step": 5520 + }, + { + "epoch": 0.03, + "learning_rate": 9.981428595796731e-05, + "loss": 1.5029, + "step": 5525 + }, + { + "epoch": 0.03, + "learning_rate": 9.981394368274497e-05, + "loss": 1.4879, + "step": 5530 + }, + { + "epoch": 0.03, + "learning_rate": 9.981360109299034e-05, + "loss": 1.4773, + "step": 5535 + }, + { + "epoch": 0.03, + "learning_rate": 9.98132581887056e-05, + "loss": 1.4871, + "step": 5540 + }, + { + "epoch": 0.03, + "learning_rate": 9.981291496989294e-05, + "loss": 1.5306, + "step": 5545 + }, + { + "epoch": 0.03, + "learning_rate": 9.981257143655447e-05, + "loss": 1.4711, + "step": 5550 + }, + { + "epoch": 0.03, + "learning_rate": 9.98122275886924e-05, + "loss": 1.5054, + "step": 5555 + }, + { + "epoch": 0.03, + "learning_rate": 9.98118834263089e-05, + "loss": 1.4881, + "step": 5560 + }, + { + "epoch": 0.03, + "learning_rate": 9.981153894940614e-05, + "loss": 1.5215, + "step": 5565 + }, + { + "epoch": 0.03, + "learning_rate": 9.981119415798628e-05, + "loss": 1.5511, + "step": 5570 + }, + { + "epoch": 0.03, + "learning_rate": 9.981084905205149e-05, + "loss": 1.5274, + "step": 5575 + }, + { + "epoch": 0.03, + "learning_rate": 9.981050363160399e-05, + "loss": 1.5059, + "step": 5580 + }, + { + "epoch": 0.03, + "learning_rate": 9.981015789664593e-05, + "loss": 1.5123, + "step": 5585 + }, + { + "epoch": 0.03, + "learning_rate": 9.98098118471795e-05, + "loss": 1.4691, + "step": 5590 + }, + { + "epoch": 0.03, + "learning_rate": 9.980946548320689e-05, + "loss": 1.4681, + "step": 5595 + }, + { + "epoch": 0.03, + "learning_rate": 9.980911880473027e-05, + "loss": 1.401, + "step": 5600 + }, + { + "epoch": 0.03, + "learning_rate": 9.980877181175186e-05, + "loss": 1.5356, + "step": 5605 + }, + { + "epoch": 0.03, + "learning_rate": 9.980842450427382e-05, + "loss": 1.5111, + "step": 5610 + }, + { + "epoch": 0.03, + "learning_rate": 9.980807688229836e-05, + "loss": 1.51, + "step": 5615 + }, + { + "epoch": 0.03, + "learning_rate": 9.980772894582766e-05, + "loss": 1.4631, + "step": 5620 + }, + { + "epoch": 0.03, + "learning_rate": 9.980738069486394e-05, + "loss": 1.4893, + "step": 5625 + }, + { + "epoch": 0.03, + "learning_rate": 9.980703212940938e-05, + "loss": 1.4753, + "step": 5630 + }, + { + "epoch": 0.03, + "learning_rate": 9.980668324946619e-05, + "loss": 1.4929, + "step": 5635 + }, + { + "epoch": 0.03, + "learning_rate": 9.980633405503656e-05, + "loss": 1.4866, + "step": 5640 + }, + { + "epoch": 0.03, + "learning_rate": 9.980598454612271e-05, + "loss": 1.5493, + "step": 5645 + }, + { + "epoch": 0.03, + "learning_rate": 9.980563472272684e-05, + "loss": 1.4831, + "step": 5650 + }, + { + "epoch": 0.03, + "learning_rate": 9.980528458485117e-05, + "loss": 1.4877, + "step": 5655 + }, + { + "epoch": 0.03, + "learning_rate": 9.98049341324979e-05, + "loss": 1.4614, + "step": 5660 + }, + { + "epoch": 0.03, + "learning_rate": 9.980458336566923e-05, + "loss": 1.4293, + "step": 5665 + }, + { + "epoch": 0.03, + "learning_rate": 9.98042322843674e-05, + "loss": 1.5266, + "step": 5670 + }, + { + "epoch": 0.03, + "learning_rate": 9.98038808885946e-05, + "loss": 1.4845, + "step": 5675 + }, + { + "epoch": 0.03, + "learning_rate": 9.980352917835308e-05, + "loss": 1.4924, + "step": 5680 + }, + { + "epoch": 0.03, + "learning_rate": 9.980317715364505e-05, + "loss": 1.4654, + "step": 5685 + }, + { + "epoch": 0.03, + "learning_rate": 9.980282481447272e-05, + "loss": 1.3969, + "step": 5690 + }, + { + "epoch": 0.03, + "learning_rate": 9.980247216083832e-05, + "loss": 1.5392, + "step": 5695 + }, + { + "epoch": 0.03, + "learning_rate": 9.980211919274407e-05, + "loss": 1.4587, + "step": 5700 + }, + { + "epoch": 0.03, + "learning_rate": 9.980176591019222e-05, + "loss": 1.5106, + "step": 5705 + }, + { + "epoch": 0.03, + "learning_rate": 9.980141231318498e-05, + "loss": 1.5285, + "step": 5710 + }, + { + "epoch": 0.03, + "learning_rate": 9.980105840172461e-05, + "loss": 1.5293, + "step": 5715 + }, + { + "epoch": 0.03, + "learning_rate": 9.980070417581331e-05, + "loss": 1.4841, + "step": 5720 + }, + { + "epoch": 0.03, + "learning_rate": 9.980034963545333e-05, + "loss": 1.4763, + "step": 5725 + }, + { + "epoch": 0.03, + "learning_rate": 9.97999947806469e-05, + "loss": 1.4851, + "step": 5730 + }, + { + "epoch": 0.03, + "learning_rate": 9.97996396113963e-05, + "loss": 1.5057, + "step": 5735 + }, + { + "epoch": 0.03, + "learning_rate": 9.979928412770373e-05, + "loss": 1.5023, + "step": 5740 + }, + { + "epoch": 0.03, + "learning_rate": 9.979892832957145e-05, + "loss": 1.4776, + "step": 5745 + }, + { + "epoch": 0.03, + "learning_rate": 9.979857221700171e-05, + "loss": 1.5119, + "step": 5750 + }, + { + "epoch": 0.03, + "learning_rate": 9.979821578999675e-05, + "loss": 1.5539, + "step": 5755 + }, + { + "epoch": 0.03, + "learning_rate": 9.979785904855882e-05, + "loss": 1.4454, + "step": 5760 + }, + { + "epoch": 0.03, + "learning_rate": 9.979750199269018e-05, + "loss": 1.4928, + "step": 5765 + }, + { + "epoch": 0.03, + "learning_rate": 9.979714462239309e-05, + "loss": 1.4733, + "step": 5770 + }, + { + "epoch": 0.03, + "learning_rate": 9.979678693766979e-05, + "loss": 1.469, + "step": 5775 + }, + { + "epoch": 0.03, + "learning_rate": 9.979642893852255e-05, + "loss": 1.5397, + "step": 5780 + }, + { + "epoch": 0.03, + "learning_rate": 9.979607062495362e-05, + "loss": 1.5132, + "step": 5785 + }, + { + "epoch": 0.03, + "learning_rate": 9.979571199696527e-05, + "loss": 1.5642, + "step": 5790 + }, + { + "epoch": 0.03, + "learning_rate": 9.979535305455977e-05, + "loss": 1.5103, + "step": 5795 + }, + { + "epoch": 0.03, + "learning_rate": 9.979499379773936e-05, + "loss": 1.5058, + "step": 5800 + }, + { + "epoch": 0.03, + "learning_rate": 9.979463422650634e-05, + "loss": 1.5033, + "step": 5805 + }, + { + "epoch": 0.03, + "learning_rate": 9.979427434086299e-05, + "loss": 1.5078, + "step": 5810 + }, + { + "epoch": 0.03, + "learning_rate": 9.979391414081153e-05, + "loss": 1.4872, + "step": 5815 + }, + { + "epoch": 0.03, + "learning_rate": 9.97935536263543e-05, + "loss": 1.5118, + "step": 5820 + }, + { + "epoch": 0.03, + "learning_rate": 9.97931927974935e-05, + "loss": 1.4501, + "step": 5825 + }, + { + "epoch": 0.03, + "learning_rate": 9.979283165423148e-05, + "loss": 1.4908, + "step": 5830 + }, + { + "epoch": 0.03, + "learning_rate": 9.979247019657049e-05, + "loss": 1.4134, + "step": 5835 + }, + { + "epoch": 0.03, + "learning_rate": 9.979210842451281e-05, + "loss": 1.4692, + "step": 5840 + }, + { + "epoch": 0.03, + "learning_rate": 9.979174633806072e-05, + "loss": 1.4497, + "step": 5845 + }, + { + "epoch": 0.03, + "learning_rate": 9.979138393721653e-05, + "loss": 1.4799, + "step": 5850 + }, + { + "epoch": 0.03, + "learning_rate": 9.979102122198248e-05, + "loss": 1.4355, + "step": 5855 + }, + { + "epoch": 0.03, + "learning_rate": 9.97906581923609e-05, + "loss": 1.4799, + "step": 5860 + }, + { + "epoch": 0.03, + "learning_rate": 9.97902948483541e-05, + "loss": 1.4916, + "step": 5865 + }, + { + "epoch": 0.03, + "learning_rate": 9.978993118996432e-05, + "loss": 1.5522, + "step": 5870 + }, + { + "epoch": 0.03, + "learning_rate": 9.97895672171939e-05, + "loss": 1.4886, + "step": 5875 + }, + { + "epoch": 0.03, + "learning_rate": 9.978920293004513e-05, + "loss": 1.5207, + "step": 5880 + }, + { + "epoch": 0.03, + "learning_rate": 9.978883832852028e-05, + "loss": 1.5016, + "step": 5885 + }, + { + "epoch": 0.03, + "learning_rate": 9.97884734126217e-05, + "loss": 1.5223, + "step": 5890 + }, + { + "epoch": 0.03, + "learning_rate": 9.978810818235164e-05, + "loss": 1.5014, + "step": 5895 + }, + { + "epoch": 0.03, + "learning_rate": 9.978774263771247e-05, + "loss": 1.437, + "step": 5900 + }, + { + "epoch": 0.03, + "learning_rate": 9.978737677870645e-05, + "loss": 1.4876, + "step": 5905 + }, + { + "epoch": 0.03, + "learning_rate": 9.978701060533589e-05, + "loss": 1.5068, + "step": 5910 + }, + { + "epoch": 0.03, + "learning_rate": 9.978664411760312e-05, + "loss": 1.5635, + "step": 5915 + }, + { + "epoch": 0.03, + "learning_rate": 9.978627731551046e-05, + "loss": 1.5147, + "step": 5920 + }, + { + "epoch": 0.03, + "learning_rate": 9.97859101990602e-05, + "loss": 1.5375, + "step": 5925 + }, + { + "epoch": 0.03, + "learning_rate": 9.978554276825469e-05, + "loss": 1.5361, + "step": 5930 + }, + { + "epoch": 0.03, + "learning_rate": 9.978517502309622e-05, + "loss": 1.4982, + "step": 5935 + }, + { + "epoch": 0.03, + "learning_rate": 9.978480696358714e-05, + "loss": 1.4991, + "step": 5940 + }, + { + "epoch": 0.03, + "learning_rate": 9.978443858972974e-05, + "loss": 1.5021, + "step": 5945 + }, + { + "epoch": 0.03, + "learning_rate": 9.978406990152637e-05, + "loss": 1.4616, + "step": 5950 + }, + { + "epoch": 0.03, + "learning_rate": 9.978370089897938e-05, + "loss": 1.5133, + "step": 5955 + }, + { + "epoch": 0.03, + "learning_rate": 9.978333158209105e-05, + "loss": 1.4992, + "step": 5960 + }, + { + "epoch": 0.03, + "learning_rate": 9.978296195086375e-05, + "loss": 1.4619, + "step": 5965 + }, + { + "epoch": 0.03, + "learning_rate": 9.978259200529978e-05, + "loss": 1.4875, + "step": 5970 + }, + { + "epoch": 0.03, + "learning_rate": 9.97822217454015e-05, + "loss": 1.4738, + "step": 5975 + }, + { + "epoch": 0.03, + "learning_rate": 9.978185117117125e-05, + "loss": 1.5233, + "step": 5980 + }, + { + "epoch": 0.03, + "learning_rate": 9.978148028261136e-05, + "loss": 1.4409, + "step": 5985 + }, + { + "epoch": 0.03, + "learning_rate": 9.978110907972417e-05, + "loss": 1.5485, + "step": 5990 + }, + { + "epoch": 0.03, + "learning_rate": 9.978073756251204e-05, + "loss": 1.5185, + "step": 5995 + }, + { + "epoch": 0.03, + "learning_rate": 9.978036573097729e-05, + "loss": 1.515, + "step": 6000 + } + ], + "logging_steps": 5, + "max_steps": 197780, + "num_train_epochs": 1, + "save_steps": 1000, + "total_flos": 1.6825629945102336e+19, + "trial_name": null, + "trial_params": null +}