diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,25527 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.6415689021187183, + "eval_steps": 100, + "global_step": 37500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 1.2000000000000002e-07, + "loss": 3.306, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 1.2000000000000002e-06, + "loss": 3.5052, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 2.4000000000000003e-06, + "loss": 3.3971, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 3.6e-06, + "loss": 3.3713, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 4.800000000000001e-06, + "loss": 3.4039, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 6e-06, + "loss": 3.3735, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 7.2e-06, + "loss": 3.3117, + "step": 60 + }, + { + "epoch": 0.0, + "learning_rate": 8.400000000000001e-06, + "loss": 3.2573, + "step": 70 + }, + { + "epoch": 0.0, + "learning_rate": 9.600000000000001e-06, + "loss": 3.1882, + "step": 80 + }, + { + "epoch": 0.0, + "learning_rate": 1.08e-05, + "loss": 3.0751, + "step": 90 + }, + { + "epoch": 0.0, + "learning_rate": 1.2e-05, + "loss": 2.9993, + "step": 100 + }, + { + "epoch": 0.0, + "eval_loss": 2.979447841644287, + "eval_runtime": 11.7537, + "eval_samples_per_second": 348.487, + "eval_steps_per_second": 21.78, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 1.32e-05, + "loss": 2.9849, + "step": 110 + }, + { + "epoch": 0.01, + "learning_rate": 1.44e-05, + "loss": 2.9377, + "step": 120 + }, + { + "epoch": 0.01, + "learning_rate": 1.56e-05, + "loss": 2.916, + "step": 130 + }, + { + "epoch": 0.01, + "learning_rate": 1.6800000000000002e-05, + "loss": 2.8504, + "step": 140 + }, + { + "epoch": 0.01, + "learning_rate": 1.8e-05, + "loss": 2.8581, + "step": 150 + }, + { + "epoch": 0.01, + "learning_rate": 1.9200000000000003e-05, + "loss": 2.8143, + "step": 160 + }, + { + "epoch": 0.01, + "learning_rate": 2.04e-05, + "loss": 2.8103, + "step": 170 + }, + { + "epoch": 0.01, + "learning_rate": 2.16e-05, + "loss": 2.824, + "step": 180 + }, + { + "epoch": 0.01, + "learning_rate": 2.2800000000000002e-05, + "loss": 2.7853, + "step": 190 + }, + { + "epoch": 0.01, + "learning_rate": 2.4e-05, + "loss": 2.7613, + "step": 200 + }, + { + "epoch": 0.01, + "eval_loss": 2.7339868545532227, + "eval_runtime": 11.8471, + "eval_samples_per_second": 345.74, + "eval_steps_per_second": 21.609, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 2.52e-05, + "loss": 2.7264, + "step": 210 + }, + { + "epoch": 0.01, + "learning_rate": 2.64e-05, + "loss": 2.7563, + "step": 220 + }, + { + "epoch": 0.01, + "learning_rate": 2.7600000000000003e-05, + "loss": 2.7032, + "step": 230 + }, + { + "epoch": 0.01, + "learning_rate": 2.88e-05, + "loss": 2.6709, + "step": 240 + }, + { + "epoch": 0.01, + "learning_rate": 3e-05, + "loss": 2.7113, + "step": 250 + }, + { + "epoch": 0.01, + "learning_rate": 3.12e-05, + "loss": 2.652, + "step": 260 + }, + { + "epoch": 0.01, + "learning_rate": 3.24e-05, + "loss": 2.6599, + "step": 270 + }, + { + "epoch": 0.01, + "learning_rate": 3.3600000000000004e-05, + "loss": 2.6164, + "step": 280 + }, + { + "epoch": 0.01, + "learning_rate": 3.48e-05, + "loss": 2.6489, + "step": 290 + }, + { + "epoch": 0.01, + "learning_rate": 3.6e-05, + "loss": 2.6529, + "step": 300 + }, + { + "epoch": 0.01, + "eval_loss": 2.6033902168273926, + "eval_runtime": 11.9371, + "eval_samples_per_second": 343.131, + "eval_steps_per_second": 21.446, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 3.72e-05, + "loss": 2.6186, + "step": 310 + }, + { + "epoch": 0.01, + "learning_rate": 3.8400000000000005e-05, + "loss": 2.6158, + "step": 320 + }, + { + "epoch": 0.01, + "learning_rate": 3.96e-05, + "loss": 2.5911, + "step": 330 + }, + { + "epoch": 0.01, + "learning_rate": 4.08e-05, + "loss": 2.5856, + "step": 340 + }, + { + "epoch": 0.02, + "learning_rate": 4.2e-05, + "loss": 2.5734, + "step": 350 + }, + { + "epoch": 0.02, + "learning_rate": 4.32e-05, + "loss": 2.6221, + "step": 360 + }, + { + "epoch": 0.02, + "learning_rate": 4.44e-05, + "loss": 2.5921, + "step": 370 + }, + { + "epoch": 0.02, + "learning_rate": 4.5600000000000004e-05, + "loss": 2.5796, + "step": 380 + }, + { + "epoch": 0.02, + "learning_rate": 4.6800000000000006e-05, + "loss": 2.5515, + "step": 390 + }, + { + "epoch": 0.02, + "learning_rate": 4.8e-05, + "loss": 2.5531, + "step": 400 + }, + { + "epoch": 0.02, + "eval_loss": 2.5212419033050537, + "eval_runtime": 11.6859, + "eval_samples_per_second": 350.507, + "eval_steps_per_second": 21.907, + "step": 400 + }, + { + "epoch": 0.02, + "learning_rate": 4.9199999999999997e-05, + "loss": 2.5611, + "step": 410 + }, + { + "epoch": 0.02, + "learning_rate": 5.04e-05, + "loss": 2.5204, + "step": 420 + }, + { + "epoch": 0.02, + "learning_rate": 5.16e-05, + "loss": 2.5302, + "step": 430 + }, + { + "epoch": 0.02, + "learning_rate": 5.28e-05, + "loss": 2.453, + "step": 440 + }, + { + "epoch": 0.02, + "learning_rate": 5.4000000000000005e-05, + "loss": 2.5275, + "step": 450 + }, + { + "epoch": 0.02, + "learning_rate": 5.520000000000001e-05, + "loss": 2.4754, + "step": 460 + }, + { + "epoch": 0.02, + "learning_rate": 5.6399999999999995e-05, + "loss": 2.5214, + "step": 470 + }, + { + "epoch": 0.02, + "learning_rate": 5.76e-05, + "loss": 2.475, + "step": 480 + }, + { + "epoch": 0.02, + "learning_rate": 5.88e-05, + "loss": 2.4533, + "step": 490 + }, + { + "epoch": 0.02, + "learning_rate": 6e-05, + "loss": 2.4883, + "step": 500 + }, + { + "epoch": 0.02, + "eval_loss": 2.4650485515594482, + "eval_runtime": 11.6784, + "eval_samples_per_second": 350.734, + "eval_steps_per_second": 21.921, + "step": 500 + }, + { + "epoch": 0.02, + "learning_rate": 5.999999274988748e-05, + "loss": 2.4537, + "step": 510 + }, + { + "epoch": 0.02, + "learning_rate": 5.9999970999553426e-05, + "loss": 2.488, + "step": 520 + }, + { + "epoch": 0.02, + "learning_rate": 5.9999934749008346e-05, + "loss": 2.5225, + "step": 530 + }, + { + "epoch": 0.02, + "learning_rate": 5.999988399826977e-05, + "loss": 2.4376, + "step": 540 + }, + { + "epoch": 0.02, + "learning_rate": 5.999981874736221e-05, + "loss": 2.4825, + "step": 550 + }, + { + "epoch": 0.02, + "learning_rate": 5.9999738996317224e-05, + "loss": 2.4779, + "step": 560 + }, + { + "epoch": 0.02, + "learning_rate": 5.999964474517335e-05, + "loss": 2.4146, + "step": 570 + }, + { + "epoch": 0.03, + "learning_rate": 5.9999535993976145e-05, + "loss": 2.4468, + "step": 580 + }, + { + "epoch": 0.03, + "learning_rate": 5.999941274277817e-05, + "loss": 2.4516, + "step": 590 + }, + { + "epoch": 0.03, + "learning_rate": 5.9999274991639004e-05, + "loss": 2.4213, + "step": 600 + }, + { + "epoch": 0.03, + "eval_loss": 2.411180019378662, + "eval_runtime": 11.7741, + "eval_samples_per_second": 347.881, + "eval_steps_per_second": 21.743, + "step": 600 + }, + { + "epoch": 0.03, + "learning_rate": 5.999912274062522e-05, + "loss": 2.4222, + "step": 610 + }, + { + "epoch": 0.03, + "learning_rate": 5.999895598981041e-05, + "loss": 2.3641, + "step": 620 + }, + { + "epoch": 0.03, + "learning_rate": 5.999877473927517e-05, + "loss": 2.4214, + "step": 630 + }, + { + "epoch": 0.03, + "learning_rate": 5.999857898910712e-05, + "loss": 2.3662, + "step": 640 + }, + { + "epoch": 0.03, + "learning_rate": 5.999836873940085e-05, + "loss": 2.4183, + "step": 650 + }, + { + "epoch": 0.03, + "learning_rate": 5.9998143990258e-05, + "loss": 2.4012, + "step": 660 + }, + { + "epoch": 0.03, + "learning_rate": 5.9997904741787194e-05, + "loss": 2.4289, + "step": 670 + }, + { + "epoch": 0.03, + "learning_rate": 5.999765099410407e-05, + "loss": 2.3593, + "step": 680 + }, + { + "epoch": 0.03, + "learning_rate": 5.999738274733128e-05, + "loss": 2.4153, + "step": 690 + }, + { + "epoch": 0.03, + "learning_rate": 5.999710000159846e-05, + "loss": 2.4046, + "step": 700 + }, + { + "epoch": 0.03, + "eval_loss": 2.3694727420806885, + "eval_runtime": 12.1658, + "eval_samples_per_second": 336.681, + "eval_steps_per_second": 21.043, + "step": 700 + }, + { + "epoch": 0.03, + "learning_rate": 5.999680275704231e-05, + "loss": 2.3952, + "step": 710 + }, + { + "epoch": 0.03, + "learning_rate": 5.999649101380646e-05, + "loss": 2.381, + "step": 720 + }, + { + "epoch": 0.03, + "learning_rate": 5.999616477204161e-05, + "loss": 2.3835, + "step": 730 + }, + { + "epoch": 0.03, + "learning_rate": 5.9995824031905446e-05, + "loss": 2.3664, + "step": 740 + }, + { + "epoch": 0.03, + "learning_rate": 5.9995468793562655e-05, + "loss": 2.3001, + "step": 750 + }, + { + "epoch": 0.03, + "learning_rate": 5.9995099057184944e-05, + "loss": 2.3884, + "step": 760 + }, + { + "epoch": 0.03, + "learning_rate": 5.999471482295101e-05, + "loss": 2.3915, + "step": 770 + }, + { + "epoch": 0.03, + "learning_rate": 5.999431609104658e-05, + "loss": 2.3674, + "step": 780 + }, + { + "epoch": 0.03, + "learning_rate": 5.999390286166438e-05, + "loss": 2.3697, + "step": 790 + }, + { + "epoch": 0.04, + "learning_rate": 5.9993475135004135e-05, + "loss": 2.3509, + "step": 800 + }, + { + "epoch": 0.04, + "eval_loss": 2.3388166427612305, + "eval_runtime": 11.6279, + "eval_samples_per_second": 352.257, + "eval_steps_per_second": 22.016, + "step": 800 + }, + { + "epoch": 0.04, + "learning_rate": 5.9993032911272584e-05, + "loss": 2.3633, + "step": 810 + }, + { + "epoch": 0.04, + "learning_rate": 5.999257619068346e-05, + "loss": 2.3405, + "step": 820 + }, + { + "epoch": 0.04, + "learning_rate": 5.9992104973457536e-05, + "loss": 2.3684, + "step": 830 + }, + { + "epoch": 0.04, + "learning_rate": 5.9991619259822554e-05, + "loss": 2.3348, + "step": 840 + }, + { + "epoch": 0.04, + "learning_rate": 5.999111905001329e-05, + "loss": 2.396, + "step": 850 + }, + { + "epoch": 0.04, + "learning_rate": 5.999060434427151e-05, + "loss": 2.3006, + "step": 860 + }, + { + "epoch": 0.04, + "learning_rate": 5.999007514284599e-05, + "loss": 2.3543, + "step": 870 + }, + { + "epoch": 0.04, + "learning_rate": 5.998953144599253e-05, + "loss": 2.3564, + "step": 880 + }, + { + "epoch": 0.04, + "learning_rate": 5.9988973253973895e-05, + "loss": 2.3265, + "step": 890 + }, + { + "epoch": 0.04, + "learning_rate": 5.9988400567059905e-05, + "loss": 2.3153, + "step": 900 + }, + { + "epoch": 0.04, + "eval_loss": 2.309986114501953, + "eval_runtime": 11.6011, + "eval_samples_per_second": 353.071, + "eval_steps_per_second": 22.067, + "step": 900 + }, + { + "epoch": 0.04, + "learning_rate": 5.9987813385527355e-05, + "loss": 2.2815, + "step": 910 + }, + { + "epoch": 0.04, + "learning_rate": 5.998721170966004e-05, + "loss": 2.3601, + "step": 920 + }, + { + "epoch": 0.04, + "learning_rate": 5.99865955397488e-05, + "loss": 2.3063, + "step": 930 + }, + { + "epoch": 0.04, + "learning_rate": 5.998596487609144e-05, + "loss": 2.3125, + "step": 940 + }, + { + "epoch": 0.04, + "learning_rate": 5.998531971899279e-05, + "loss": 2.2972, + "step": 950 + }, + { + "epoch": 0.04, + "learning_rate": 5.9984660068764675e-05, + "loss": 2.2984, + "step": 960 + }, + { + "epoch": 0.04, + "learning_rate": 5.998398592572594e-05, + "loss": 2.2664, + "step": 970 + }, + { + "epoch": 0.04, + "learning_rate": 5.998329729020241e-05, + "loss": 2.3188, + "step": 980 + }, + { + "epoch": 0.04, + "learning_rate": 5.998259416252695e-05, + "loss": 2.2963, + "step": 990 + }, + { + "epoch": 0.04, + "learning_rate": 5.99818765430394e-05, + "loss": 2.3158, + "step": 1000 + }, + { + "epoch": 0.04, + "eval_loss": 2.2877612113952637, + "eval_runtime": 12.462, + "eval_samples_per_second": 328.678, + "eval_steps_per_second": 20.542, + "step": 1000 + }, + { + "epoch": 0.04, + "learning_rate": 5.998114443208661e-05, + "loss": 2.313, + "step": 1010 + }, + { + "epoch": 0.04, + "learning_rate": 5.998039783002245e-05, + "loss": 2.3052, + "step": 1020 + }, + { + "epoch": 0.05, + "learning_rate": 5.997963673720778e-05, + "loss": 2.3348, + "step": 1030 + }, + { + "epoch": 0.05, + "learning_rate": 5.997886115401047e-05, + "loss": 2.3304, + "step": 1040 + }, + { + "epoch": 0.05, + "learning_rate": 5.997807108080538e-05, + "loss": 2.2719, + "step": 1050 + }, + { + "epoch": 0.05, + "learning_rate": 5.9977266517974396e-05, + "loss": 2.2937, + "step": 1060 + }, + { + "epoch": 0.05, + "learning_rate": 5.9976447465906386e-05, + "loss": 2.2919, + "step": 1070 + }, + { + "epoch": 0.05, + "learning_rate": 5.997561392499724e-05, + "loss": 2.2525, + "step": 1080 + }, + { + "epoch": 0.05, + "learning_rate": 5.997476589564984e-05, + "loss": 2.2822, + "step": 1090 + }, + { + "epoch": 0.05, + "learning_rate": 5.997390337827408e-05, + "loss": 2.2833, + "step": 1100 + }, + { + "epoch": 0.05, + "eval_loss": 2.267622947692871, + "eval_runtime": 11.9236, + "eval_samples_per_second": 343.521, + "eval_steps_per_second": 21.47, + "step": 1100 + }, + { + "epoch": 0.05, + "learning_rate": 5.997302637328683e-05, + "loss": 2.2687, + "step": 1110 + }, + { + "epoch": 0.05, + "learning_rate": 5.9972134881112e-05, + "loss": 2.2636, + "step": 1120 + }, + { + "epoch": 0.05, + "learning_rate": 5.997122890218047e-05, + "loss": 2.3226, + "step": 1130 + }, + { + "epoch": 0.05, + "learning_rate": 5.997030843693016e-05, + "loss": 2.2599, + "step": 1140 + }, + { + "epoch": 0.05, + "learning_rate": 5.996937348580595e-05, + "loss": 2.2566, + "step": 1150 + }, + { + "epoch": 0.05, + "learning_rate": 5.9968424049259735e-05, + "loss": 2.2997, + "step": 1160 + }, + { + "epoch": 0.05, + "learning_rate": 5.9967460127750446e-05, + "loss": 2.3179, + "step": 1170 + }, + { + "epoch": 0.05, + "learning_rate": 5.996648172174395e-05, + "loss": 2.2863, + "step": 1180 + }, + { + "epoch": 0.05, + "learning_rate": 5.996548883171317e-05, + "loss": 2.267, + "step": 1190 + }, + { + "epoch": 0.05, + "learning_rate": 5.9964481458138e-05, + "loss": 2.2207, + "step": 1200 + }, + { + "epoch": 0.05, + "eval_loss": 2.2525129318237305, + "eval_runtime": 12.0941, + "eval_samples_per_second": 338.679, + "eval_steps_per_second": 21.167, + "step": 1200 + }, + { + "epoch": 0.05, + "learning_rate": 5.996345960150536e-05, + "loss": 2.2595, + "step": 1210 + }, + { + "epoch": 0.05, + "learning_rate": 5.9962423262309146e-05, + "loss": 2.316, + "step": 1220 + }, + { + "epoch": 0.05, + "learning_rate": 5.996137244105027e-05, + "loss": 2.258, + "step": 1230 + }, + { + "epoch": 0.05, + "learning_rate": 5.996030713823661e-05, + "loss": 2.2726, + "step": 1240 + }, + { + "epoch": 0.05, + "learning_rate": 5.995922735438311e-05, + "loss": 2.249, + "step": 1250 + }, + { + "epoch": 0.06, + "learning_rate": 5.995813309001164e-05, + "loss": 2.2319, + "step": 1260 + }, + { + "epoch": 0.06, + "learning_rate": 5.995702434565112e-05, + "loss": 2.2444, + "step": 1270 + }, + { + "epoch": 0.06, + "learning_rate": 5.995590112183745e-05, + "loss": 2.258, + "step": 1280 + }, + { + "epoch": 0.06, + "learning_rate": 5.995476341911353e-05, + "loss": 2.296, + "step": 1290 + }, + { + "epoch": 0.06, + "learning_rate": 5.995361123802926e-05, + "loss": 2.2656, + "step": 1300 + }, + { + "epoch": 0.06, + "eval_loss": 2.233938694000244, + "eval_runtime": 11.9568, + "eval_samples_per_second": 342.566, + "eval_steps_per_second": 21.41, + "step": 1300 + }, + { + "epoch": 0.06, + "learning_rate": 5.995244457914152e-05, + "loss": 2.2022, + "step": 1310 + }, + { + "epoch": 0.06, + "learning_rate": 5.9951263443014225e-05, + "loss": 2.2465, + "step": 1320 + }, + { + "epoch": 0.06, + "learning_rate": 5.9950067830218256e-05, + "loss": 2.2018, + "step": 1330 + }, + { + "epoch": 0.06, + "learning_rate": 5.99488577413315e-05, + "loss": 2.2699, + "step": 1340 + }, + { + "epoch": 0.06, + "learning_rate": 5.994763317693885e-05, + "loss": 2.2657, + "step": 1350 + }, + { + "epoch": 0.06, + "learning_rate": 5.994639413763218e-05, + "loss": 2.2484, + "step": 1360 + }, + { + "epoch": 0.06, + "learning_rate": 5.994514062401038e-05, + "loss": 2.2652, + "step": 1370 + }, + { + "epoch": 0.06, + "learning_rate": 5.9943872636679305e-05, + "loss": 2.2168, + "step": 1380 + }, + { + "epoch": 0.06, + "learning_rate": 5.9942590176251846e-05, + "loss": 2.2863, + "step": 1390 + }, + { + "epoch": 0.06, + "learning_rate": 5.9941293243347845e-05, + "loss": 2.1976, + "step": 1400 + }, + { + "epoch": 0.06, + "eval_loss": 2.2203006744384766, + "eval_runtime": 16.8316, + "eval_samples_per_second": 243.352, + "eval_steps_per_second": 15.209, + "step": 1400 + }, + { + "epoch": 0.06, + "learning_rate": 5.993998183859419e-05, + "loss": 2.2344, + "step": 1410 + }, + { + "epoch": 0.06, + "learning_rate": 5.993865596262472e-05, + "loss": 2.2125, + "step": 1420 + }, + { + "epoch": 0.06, + "learning_rate": 5.993731561608029e-05, + "loss": 2.2113, + "step": 1430 + }, + { + "epoch": 0.06, + "learning_rate": 5.993596079960873e-05, + "loss": 2.2446, + "step": 1440 + }, + { + "epoch": 0.06, + "learning_rate": 5.99345915138649e-05, + "loss": 2.2338, + "step": 1450 + }, + { + "epoch": 0.06, + "learning_rate": 5.993320775951061e-05, + "loss": 2.247, + "step": 1460 + }, + { + "epoch": 0.06, + "learning_rate": 5.9931809537214715e-05, + "loss": 2.1983, + "step": 1470 + }, + { + "epoch": 0.06, + "learning_rate": 5.9930396847653e-05, + "loss": 2.2772, + "step": 1480 + }, + { + "epoch": 0.07, + "learning_rate": 5.9928969691508296e-05, + "loss": 2.232, + "step": 1490 + }, + { + "epoch": 0.07, + "learning_rate": 5.99275280694704e-05, + "loss": 2.2015, + "step": 1500 + }, + { + "epoch": 0.07, + "eval_loss": 2.207272529602051, + "eval_runtime": 11.8836, + "eval_samples_per_second": 344.677, + "eval_steps_per_second": 21.542, + "step": 1500 + }, + { + "epoch": 0.07, + "learning_rate": 5.9926071982236105e-05, + "loss": 2.2393, + "step": 1510 + }, + { + "epoch": 0.07, + "learning_rate": 5.9924601430509207e-05, + "loss": 2.2068, + "step": 1520 + }, + { + "epoch": 0.07, + "learning_rate": 5.992311641500047e-05, + "loss": 2.2571, + "step": 1530 + }, + { + "epoch": 0.07, + "learning_rate": 5.992161693642767e-05, + "loss": 2.1754, + "step": 1540 + }, + { + "epoch": 0.07, + "learning_rate": 5.9920102995515554e-05, + "loss": 2.2719, + "step": 1550 + }, + { + "epoch": 0.07, + "learning_rate": 5.99185745929959e-05, + "loss": 2.1849, + "step": 1560 + }, + { + "epoch": 0.07, + "learning_rate": 5.991703172960742e-05, + "loss": 2.2507, + "step": 1570 + }, + { + "epoch": 0.07, + "learning_rate": 5.991547440609585e-05, + "loss": 2.238, + "step": 1580 + }, + { + "epoch": 0.07, + "learning_rate": 5.991390262321392e-05, + "loss": 2.204, + "step": 1590 + }, + { + "epoch": 0.07, + "learning_rate": 5.9912316381721316e-05, + "loss": 2.2177, + "step": 1600 + }, + { + "epoch": 0.07, + "eval_loss": 2.1963398456573486, + "eval_runtime": 13.5095, + "eval_samples_per_second": 303.193, + "eval_steps_per_second": 18.95, + "step": 1600 + }, + { + "epoch": 0.07, + "learning_rate": 5.9910715682384755e-05, + "loss": 2.1927, + "step": 1610 + }, + { + "epoch": 0.07, + "learning_rate": 5.990910052597791e-05, + "loss": 2.2564, + "step": 1620 + }, + { + "epoch": 0.07, + "learning_rate": 5.990747091328144e-05, + "loss": 2.2149, + "step": 1630 + }, + { + "epoch": 0.07, + "learning_rate": 5.990582684508302e-05, + "loss": 2.2022, + "step": 1640 + }, + { + "epoch": 0.07, + "learning_rate": 5.99041683221773e-05, + "loss": 2.2009, + "step": 1650 + }, + { + "epoch": 0.07, + "learning_rate": 5.99024953453659e-05, + "loss": 2.2003, + "step": 1660 + }, + { + "epoch": 0.07, + "learning_rate": 5.9900807915457434e-05, + "loss": 2.2312, + "step": 1670 + }, + { + "epoch": 0.07, + "learning_rate": 5.989910603326752e-05, + "loss": 2.1916, + "step": 1680 + }, + { + "epoch": 0.07, + "learning_rate": 5.9897389699618725e-05, + "loss": 2.2396, + "step": 1690 + }, + { + "epoch": 0.07, + "learning_rate": 5.9895658915340654e-05, + "loss": 2.175, + "step": 1700 + }, + { + "epoch": 0.07, + "eval_loss": 2.1844124794006348, + "eval_runtime": 11.4247, + "eval_samples_per_second": 358.521, + "eval_steps_per_second": 22.408, + "step": 1700 + }, + { + "epoch": 0.07, + "learning_rate": 5.989391368126984e-05, + "loss": 2.1792, + "step": 1710 + }, + { + "epoch": 0.08, + "learning_rate": 5.989215399824984e-05, + "loss": 2.2737, + "step": 1720 + }, + { + "epoch": 0.08, + "learning_rate": 5.9890379867131177e-05, + "loss": 2.1745, + "step": 1730 + }, + { + "epoch": 0.08, + "learning_rate": 5.9888591288771357e-05, + "loss": 2.1888, + "step": 1740 + }, + { + "epoch": 0.08, + "learning_rate": 5.988678826403488e-05, + "loss": 2.1556, + "step": 1750 + }, + { + "epoch": 0.08, + "learning_rate": 5.988497079379321e-05, + "loss": 2.2134, + "step": 1760 + }, + { + "epoch": 0.08, + "learning_rate": 5.988313887892482e-05, + "loss": 2.1801, + "step": 1770 + }, + { + "epoch": 0.08, + "learning_rate": 5.988129252031514e-05, + "loss": 2.1928, + "step": 1780 + }, + { + "epoch": 0.08, + "learning_rate": 5.9879431718856594e-05, + "loss": 2.1882, + "step": 1790 + }, + { + "epoch": 0.08, + "learning_rate": 5.987755647544857e-05, + "loss": 2.2182, + "step": 1800 + }, + { + "epoch": 0.08, + "eval_loss": 2.174042224884033, + "eval_runtime": 11.6019, + "eval_samples_per_second": 353.045, + "eval_steps_per_second": 22.065, + "step": 1800 + }, + { + "epoch": 0.08, + "learning_rate": 5.987566679099746e-05, + "loss": 2.178, + "step": 1810 + }, + { + "epoch": 0.08, + "learning_rate": 5.987376266641664e-05, + "loss": 2.2011, + "step": 1820 + }, + { + "epoch": 0.08, + "learning_rate": 5.987184410262643e-05, + "loss": 2.2196, + "step": 1830 + }, + { + "epoch": 0.08, + "learning_rate": 5.9869911100554165e-05, + "loss": 2.2287, + "step": 1840 + }, + { + "epoch": 0.08, + "learning_rate": 5.986796366113413e-05, + "loss": 2.1855, + "step": 1850 + }, + { + "epoch": 0.08, + "learning_rate": 5.986600178530761e-05, + "loss": 2.2516, + "step": 1860 + }, + { + "epoch": 0.08, + "learning_rate": 5.9864025474022856e-05, + "loss": 2.1804, + "step": 1870 + }, + { + "epoch": 0.08, + "learning_rate": 5.98620347282351e-05, + "loss": 2.2544, + "step": 1880 + }, + { + "epoch": 0.08, + "learning_rate": 5.986002954890657e-05, + "loss": 2.1874, + "step": 1890 + }, + { + "epoch": 0.08, + "learning_rate": 5.985800993700642e-05, + "loss": 2.1908, + "step": 1900 + }, + { + "epoch": 0.08, + "eval_loss": 2.1624016761779785, + "eval_runtime": 11.7335, + "eval_samples_per_second": 349.086, + "eval_steps_per_second": 21.818, + "step": 1900 + }, + { + "epoch": 0.08, + "learning_rate": 5.985597589351083e-05, + "loss": 2.1858, + "step": 1910 + }, + { + "epoch": 0.08, + "learning_rate": 5.985392741940292e-05, + "loss": 2.1944, + "step": 1920 + }, + { + "epoch": 0.08, + "learning_rate": 5.985186451567282e-05, + "loss": 2.2135, + "step": 1930 + }, + { + "epoch": 0.08, + "learning_rate": 5.984978718331762e-05, + "loss": 2.1289, + "step": 1940 + }, + { + "epoch": 0.09, + "learning_rate": 5.9847695423341354e-05, + "loss": 2.1891, + "step": 1950 + }, + { + "epoch": 0.09, + "learning_rate": 5.9845589236755065e-05, + "loss": 2.1651, + "step": 1960 + }, + { + "epoch": 0.09, + "learning_rate": 5.984346862457677e-05, + "loss": 2.2016, + "step": 1970 + }, + { + "epoch": 0.09, + "learning_rate": 5.984133358783144e-05, + "loss": 2.2092, + "step": 1980 + }, + { + "epoch": 0.09, + "learning_rate": 5.983918412755103e-05, + "loss": 2.1864, + "step": 1990 + }, + { + "epoch": 0.09, + "learning_rate": 5.983702024477445e-05, + "loss": 2.1916, + "step": 2000 + }, + { + "epoch": 0.09, + "eval_loss": 2.1545424461364746, + "eval_runtime": 12.2776, + "eval_samples_per_second": 333.616, + "eval_steps_per_second": 20.851, + "step": 2000 + }, + { + "epoch": 0.09, + "learning_rate": 5.9834841940547604e-05, + "loss": 2.1598, + "step": 2010 + }, + { + "epoch": 0.09, + "learning_rate": 5.983264921592335e-05, + "loss": 2.1968, + "step": 2020 + }, + { + "epoch": 0.09, + "learning_rate": 5.9830442071961524e-05, + "loss": 2.1737, + "step": 2030 + }, + { + "epoch": 0.09, + "learning_rate": 5.982822050972892e-05, + "loss": 2.1664, + "step": 2040 + }, + { + "epoch": 0.09, + "learning_rate": 5.982598453029933e-05, + "loss": 2.2088, + "step": 2050 + }, + { + "epoch": 0.09, + "learning_rate": 5.982373413475348e-05, + "loss": 2.1314, + "step": 2060 + }, + { + "epoch": 0.09, + "learning_rate": 5.9821469324179084e-05, + "loss": 2.1583, + "step": 2070 + }, + { + "epoch": 0.09, + "learning_rate": 5.98191900996708e-05, + "loss": 2.2043, + "step": 2080 + }, + { + "epoch": 0.09, + "learning_rate": 5.981689646233029e-05, + "loss": 2.1916, + "step": 2090 + }, + { + "epoch": 0.09, + "learning_rate": 5.981458841326616e-05, + "loss": 2.0975, + "step": 2100 + }, + { + "epoch": 0.09, + "eval_loss": 2.145850896835327, + "eval_runtime": 11.7678, + "eval_samples_per_second": 348.07, + "eval_steps_per_second": 21.754, + "step": 2100 + }, + { + "epoch": 0.09, + "learning_rate": 5.981226595359397e-05, + "loss": 2.1805, + "step": 2110 + }, + { + "epoch": 0.09, + "learning_rate": 5.980992908443628e-05, + "loss": 2.1319, + "step": 2120 + }, + { + "epoch": 0.09, + "learning_rate": 5.9807577806922587e-05, + "loss": 2.1684, + "step": 2130 + }, + { + "epoch": 0.09, + "learning_rate": 5.980521212218935e-05, + "loss": 2.1669, + "step": 2140 + }, + { + "epoch": 0.09, + "learning_rate": 5.9802832031380006e-05, + "loss": 2.1536, + "step": 2150 + }, + { + "epoch": 0.09, + "learning_rate": 5.980043753564496e-05, + "loss": 2.1812, + "step": 2160 + }, + { + "epoch": 0.09, + "learning_rate": 5.979802863614155e-05, + "loss": 2.1474, + "step": 2170 + }, + { + "epoch": 0.1, + "learning_rate": 5.979560533403412e-05, + "loss": 2.1461, + "step": 2180 + }, + { + "epoch": 0.1, + "learning_rate": 5.979316763049393e-05, + "loss": 2.1817, + "step": 2190 + }, + { + "epoch": 0.1, + "learning_rate": 5.979071552669923e-05, + "loss": 2.2076, + "step": 2200 + }, + { + "epoch": 0.1, + "eval_loss": 2.136880397796631, + "eval_runtime": 11.5412, + "eval_samples_per_second": 354.904, + "eval_steps_per_second": 22.181, + "step": 2200 + }, + { + "epoch": 0.1, + "learning_rate": 5.978824902383523e-05, + "loss": 2.1546, + "step": 2210 + }, + { + "epoch": 0.1, + "learning_rate": 5.978576812309407e-05, + "loss": 2.1107, + "step": 2220 + }, + { + "epoch": 0.1, + "learning_rate": 5.97832728256749e-05, + "loss": 2.1283, + "step": 2230 + }, + { + "epoch": 0.1, + "learning_rate": 5.9780763132783765e-05, + "loss": 2.1638, + "step": 2240 + }, + { + "epoch": 0.1, + "learning_rate": 5.977823904563373e-05, + "loss": 2.2048, + "step": 2250 + }, + { + "epoch": 0.1, + "learning_rate": 5.9775700565444777e-05, + "loss": 2.1297, + "step": 2260 + }, + { + "epoch": 0.1, + "learning_rate": 5.977314769344385e-05, + "loss": 2.1407, + "step": 2270 + }, + { + "epoch": 0.1, + "learning_rate": 5.9770580430864876e-05, + "loss": 2.175, + "step": 2280 + }, + { + "epoch": 0.1, + "learning_rate": 5.976799877894871e-05, + "loss": 2.1167, + "step": 2290 + }, + { + "epoch": 0.1, + "learning_rate": 5.976540273894317e-05, + "loss": 2.1116, + "step": 2300 + }, + { + "epoch": 0.1, + "eval_loss": 2.1296005249023438, + "eval_runtime": 11.6525, + "eval_samples_per_second": 351.512, + "eval_steps_per_second": 21.97, + "step": 2300 + }, + { + "epoch": 0.1, + "learning_rate": 5.976279231210302e-05, + "loss": 2.1281, + "step": 2310 + }, + { + "epoch": 0.1, + "learning_rate": 5.9760167499689985e-05, + "loss": 2.1152, + "step": 2320 + }, + { + "epoch": 0.1, + "learning_rate": 5.9757528302972764e-05, + "loss": 2.1657, + "step": 2330 + }, + { + "epoch": 0.1, + "learning_rate": 5.975487472322696e-05, + "loss": 2.1417, + "step": 2340 + }, + { + "epoch": 0.1, + "learning_rate": 5.975220676173518e-05, + "loss": 2.1868, + "step": 2350 + }, + { + "epoch": 0.1, + "learning_rate": 5.9749524419786954e-05, + "loss": 2.1341, + "step": 2360 + }, + { + "epoch": 0.1, + "learning_rate": 5.974682769867876e-05, + "loss": 2.1344, + "step": 2370 + }, + { + "epoch": 0.1, + "learning_rate": 5.974411659971403e-05, + "loss": 2.111, + "step": 2380 + }, + { + "epoch": 0.1, + "learning_rate": 5.974139112420316e-05, + "loss": 2.1725, + "step": 2390 + }, + { + "epoch": 0.11, + "learning_rate": 5.973865127346348e-05, + "loss": 2.1404, + "step": 2400 + }, + { + "epoch": 0.11, + "eval_loss": 2.1210813522338867, + "eval_runtime": 12.1931, + "eval_samples_per_second": 335.927, + "eval_steps_per_second": 20.995, + "step": 2400 + }, + { + "epoch": 0.11, + "learning_rate": 5.973589704881927e-05, + "loss": 2.1281, + "step": 2410 + }, + { + "epoch": 0.11, + "learning_rate": 5.9733128451601764e-05, + "loss": 2.1381, + "step": 2420 + }, + { + "epoch": 0.11, + "learning_rate": 5.9730345483149134e-05, + "loss": 2.1297, + "step": 2430 + }, + { + "epoch": 0.11, + "learning_rate": 5.9727548144806494e-05, + "loss": 2.1399, + "step": 2440 + }, + { + "epoch": 0.11, + "learning_rate": 5.9724736437925926e-05, + "loss": 2.1495, + "step": 2450 + }, + { + "epoch": 0.11, + "learning_rate": 5.9721910363866444e-05, + "loss": 2.094, + "step": 2460 + }, + { + "epoch": 0.11, + "learning_rate": 5.971906992399399e-05, + "loss": 2.1428, + "step": 2470 + }, + { + "epoch": 0.11, + "learning_rate": 5.9716215119681477e-05, + "loss": 2.1544, + "step": 2480 + }, + { + "epoch": 0.11, + "learning_rate": 5.971334595230875e-05, + "loss": 2.1708, + "step": 2490 + }, + { + "epoch": 0.11, + "learning_rate": 5.971046242326258e-05, + "loss": 2.1338, + "step": 2500 + }, + { + "epoch": 0.11, + "eval_loss": 2.1137373447418213, + "eval_runtime": 11.9925, + "eval_samples_per_second": 341.548, + "eval_steps_per_second": 21.347, + "step": 2500 + }, + { + "epoch": 0.11, + "learning_rate": 5.9707564533936704e-05, + "loss": 2.0854, + "step": 2510 + }, + { + "epoch": 0.11, + "learning_rate": 5.970465228573179e-05, + "loss": 2.1676, + "step": 2520 + }, + { + "epoch": 0.11, + "learning_rate": 5.9701725680055445e-05, + "loss": 2.1919, + "step": 2530 + }, + { + "epoch": 0.11, + "learning_rate": 5.9698784718322226e-05, + "loss": 2.116, + "step": 2540 + }, + { + "epoch": 0.11, + "learning_rate": 5.9695829401953615e-05, + "loss": 2.1112, + "step": 2550 + }, + { + "epoch": 0.11, + "learning_rate": 5.969285973237802e-05, + "loss": 2.1726, + "step": 2560 + }, + { + "epoch": 0.11, + "learning_rate": 5.968987571103083e-05, + "loss": 2.1494, + "step": 2570 + }, + { + "epoch": 0.11, + "learning_rate": 5.968687733935432e-05, + "loss": 2.0896, + "step": 2580 + }, + { + "epoch": 0.11, + "learning_rate": 5.9683864618797746e-05, + "loss": 2.1028, + "step": 2590 + }, + { + "epoch": 0.11, + "learning_rate": 5.968083755081727e-05, + "loss": 2.0907, + "step": 2600 + }, + { + "epoch": 0.11, + "eval_loss": 2.1094017028808594, + "eval_runtime": 11.9254, + "eval_samples_per_second": 343.467, + "eval_steps_per_second": 21.467, + "step": 2600 + }, + { + "epoch": 0.11, + "learning_rate": 5.967779613687599e-05, + "loss": 2.1917, + "step": 2610 + }, + { + "epoch": 0.11, + "learning_rate": 5.967474037844396e-05, + "loss": 2.0942, + "step": 2620 + }, + { + "epoch": 0.12, + "learning_rate": 5.967167027699815e-05, + "loss": 2.1257, + "step": 2630 + }, + { + "epoch": 0.12, + "learning_rate": 5.9668585834022466e-05, + "loss": 2.1354, + "step": 2640 + }, + { + "epoch": 0.12, + "learning_rate": 5.966548705100773e-05, + "loss": 2.1495, + "step": 2650 + }, + { + "epoch": 0.12, + "learning_rate": 5.966237392945172e-05, + "loss": 2.1276, + "step": 2660 + }, + { + "epoch": 0.12, + "learning_rate": 5.965924647085914e-05, + "loss": 2.09, + "step": 2670 + }, + { + "epoch": 0.12, + "learning_rate": 5.965610467674162e-05, + "loss": 2.1088, + "step": 2680 + }, + { + "epoch": 0.12, + "learning_rate": 5.9652948548617715e-05, + "loss": 2.1951, + "step": 2690 + }, + { + "epoch": 0.12, + "learning_rate": 5.9649778088012896e-05, + "loss": 2.1079, + "step": 2700 + }, + { + "epoch": 0.12, + "eval_loss": 2.104461669921875, + "eval_runtime": 12.0537, + "eval_samples_per_second": 339.813, + "eval_steps_per_second": 21.238, + "step": 2700 + }, + { + "epoch": 0.12, + "learning_rate": 5.9646593296459594e-05, + "loss": 2.128, + "step": 2710 + }, + { + "epoch": 0.12, + "learning_rate": 5.9643394175497145e-05, + "loss": 2.092, + "step": 2720 + }, + { + "epoch": 0.12, + "learning_rate": 5.9640180726671806e-05, + "loss": 2.1446, + "step": 2730 + }, + { + "epoch": 0.12, + "learning_rate": 5.9636952951536776e-05, + "loss": 2.1721, + "step": 2740 + }, + { + "epoch": 0.12, + "learning_rate": 5.963371085165217e-05, + "loss": 2.1262, + "step": 2750 + }, + { + "epoch": 0.12, + "learning_rate": 5.963045442858503e-05, + "loss": 2.1791, + "step": 2760 + }, + { + "epoch": 0.12, + "learning_rate": 5.962718368390931e-05, + "loss": 2.0893, + "step": 2770 + }, + { + "epoch": 0.12, + "learning_rate": 5.96238986192059e-05, + "loss": 2.1119, + "step": 2780 + }, + { + "epoch": 0.12, + "learning_rate": 5.96205992360626e-05, + "loss": 2.1215, + "step": 2790 + }, + { + "epoch": 0.12, + "learning_rate": 5.961728553607415e-05, + "loss": 2.1327, + "step": 2800 + }, + { + "epoch": 0.12, + "eval_loss": 2.0953927040100098, + "eval_runtime": 11.7403, + "eval_samples_per_second": 348.885, + "eval_steps_per_second": 21.805, + "step": 2800 + }, + { + "epoch": 0.12, + "learning_rate": 5.9613957520842177e-05, + "loss": 2.0981, + "step": 2810 + }, + { + "epoch": 0.12, + "learning_rate": 5.961061519197527e-05, + "loss": 2.1383, + "step": 2820 + }, + { + "epoch": 0.12, + "learning_rate": 5.96072585510889e-05, + "loss": 2.1195, + "step": 2830 + }, + { + "epoch": 0.12, + "learning_rate": 5.9603887599805455e-05, + "loss": 2.1172, + "step": 2840 + }, + { + "epoch": 0.12, + "learning_rate": 5.960050233975428e-05, + "loss": 2.0543, + "step": 2850 + }, + { + "epoch": 0.13, + "learning_rate": 5.959710277257159e-05, + "loss": 2.1454, + "step": 2860 + }, + { + "epoch": 0.13, + "learning_rate": 5.959368889990055e-05, + "loss": 2.095, + "step": 2870 + }, + { + "epoch": 0.13, + "learning_rate": 5.959026072339121e-05, + "loss": 2.1166, + "step": 2880 + }, + { + "epoch": 0.13, + "learning_rate": 5.9586818244700554e-05, + "loss": 2.1447, + "step": 2890 + }, + { + "epoch": 0.13, + "learning_rate": 5.9583361465492475e-05, + "loss": 2.1123, + "step": 2900 + }, + { + "epoch": 0.13, + "eval_loss": 2.090653419494629, + "eval_runtime": 11.7523, + "eval_samples_per_second": 348.527, + "eval_steps_per_second": 21.783, + "step": 2900 + }, + { + "epoch": 0.13, + "learning_rate": 5.957989038743777e-05, + "loss": 2.0771, + "step": 2910 + }, + { + "epoch": 0.13, + "learning_rate": 5.9576405012214155e-05, + "loss": 2.1296, + "step": 2920 + }, + { + "epoch": 0.13, + "learning_rate": 5.957290534150625e-05, + "loss": 2.1512, + "step": 2930 + }, + { + "epoch": 0.13, + "learning_rate": 5.9569391377005604e-05, + "loss": 2.0663, + "step": 2940 + }, + { + "epoch": 0.13, + "learning_rate": 5.9565863120410637e-05, + "loss": 2.1126, + "step": 2950 + }, + { + "epoch": 0.13, + "learning_rate": 5.956232057342672e-05, + "loss": 2.1432, + "step": 2960 + }, + { + "epoch": 0.13, + "learning_rate": 5.95587637377661e-05, + "loss": 2.1382, + "step": 2970 + }, + { + "epoch": 0.13, + "learning_rate": 5.955519261514794e-05, + "loss": 2.0925, + "step": 2980 + }, + { + "epoch": 0.13, + "learning_rate": 5.955160720729831e-05, + "loss": 2.1153, + "step": 2990 + }, + { + "epoch": 0.13, + "learning_rate": 5.9548007515950196e-05, + "loss": 2.0952, + "step": 3000 + }, + { + "epoch": 0.13, + "eval_loss": 2.085477828979492, + "eval_runtime": 12.1839, + "eval_samples_per_second": 336.181, + "eval_steps_per_second": 21.011, + "step": 3000 + }, + { + "epoch": 0.13, + "learning_rate": 5.954439354284346e-05, + "loss": 2.1191, + "step": 3010 + }, + { + "epoch": 0.13, + "learning_rate": 5.954076528972489e-05, + "loss": 2.1233, + "step": 3020 + }, + { + "epoch": 0.13, + "learning_rate": 5.953712275834817e-05, + "loss": 2.1523, + "step": 3030 + }, + { + "epoch": 0.13, + "learning_rate": 5.953346595047388e-05, + "loss": 2.092, + "step": 3040 + }, + { + "epoch": 0.13, + "learning_rate": 5.95297948678695e-05, + "loss": 2.0483, + "step": 3050 + }, + { + "epoch": 0.13, + "learning_rate": 5.9526109512309423e-05, + "loss": 2.1245, + "step": 3060 + }, + { + "epoch": 0.13, + "learning_rate": 5.9522409885574934e-05, + "loss": 2.1205, + "step": 3070 + }, + { + "epoch": 0.13, + "learning_rate": 5.95186959894542e-05, + "loss": 2.1143, + "step": 3080 + }, + { + "epoch": 0.14, + "learning_rate": 5.9514967825742325e-05, + "loss": 2.0988, + "step": 3090 + }, + { + "epoch": 0.14, + "learning_rate": 5.9511225396241256e-05, + "loss": 2.1173, + "step": 3100 + }, + { + "epoch": 0.14, + "eval_loss": 2.0793402194976807, + "eval_runtime": 11.7138, + "eval_samples_per_second": 349.672, + "eval_steps_per_second": 21.854, + "step": 3100 + }, + { + "epoch": 0.14, + "learning_rate": 5.950746870275986e-05, + "loss": 2.1051, + "step": 3110 + }, + { + "epoch": 0.14, + "learning_rate": 5.950369774711392e-05, + "loss": 2.071, + "step": 3120 + }, + { + "epoch": 0.14, + "learning_rate": 5.9499912531126096e-05, + "loss": 2.136, + "step": 3130 + }, + { + "epoch": 0.14, + "learning_rate": 5.949611305662592e-05, + "loss": 2.1015, + "step": 3140 + }, + { + "epoch": 0.14, + "learning_rate": 5.949229932544983e-05, + "loss": 2.1133, + "step": 3150 + }, + { + "epoch": 0.14, + "learning_rate": 5.9488471339441175e-05, + "loss": 2.0993, + "step": 3160 + }, + { + "epoch": 0.14, + "learning_rate": 5.948462910045017e-05, + "loss": 2.0734, + "step": 3170 + }, + { + "epoch": 0.14, + "learning_rate": 5.948077261033392e-05, + "loss": 2.0588, + "step": 3180 + }, + { + "epoch": 0.14, + "learning_rate": 5.9476901870956426e-05, + "loss": 2.0703, + "step": 3190 + }, + { + "epoch": 0.14, + "learning_rate": 5.947301688418859e-05, + "loss": 2.0886, + "step": 3200 + }, + { + "epoch": 0.14, + "eval_loss": 2.0747480392456055, + "eval_runtime": 16.3253, + "eval_samples_per_second": 250.899, + "eval_steps_per_second": 15.681, + "step": 3200 + }, + { + "epoch": 0.14, + "learning_rate": 5.9469117651908163e-05, + "loss": 2.0638, + "step": 3210 + }, + { + "epoch": 0.14, + "learning_rate": 5.946520417599982e-05, + "loss": 2.0492, + "step": 3220 + }, + { + "epoch": 0.14, + "learning_rate": 5.946127645835509e-05, + "loss": 2.0683, + "step": 3230 + }, + { + "epoch": 0.14, + "learning_rate": 5.945733450087242e-05, + "loss": 2.0929, + "step": 3240 + }, + { + "epoch": 0.14, + "learning_rate": 5.9453378305457085e-05, + "loss": 2.139, + "step": 3250 + }, + { + "epoch": 0.14, + "learning_rate": 5.944940787402131e-05, + "loss": 2.0932, + "step": 3260 + }, + { + "epoch": 0.14, + "learning_rate": 5.944542320848414e-05, + "loss": 2.1399, + "step": 3270 + }, + { + "epoch": 0.14, + "learning_rate": 5.9441424310771545e-05, + "loss": 2.1349, + "step": 3280 + }, + { + "epoch": 0.14, + "learning_rate": 5.943741118281635e-05, + "loss": 2.0773, + "step": 3290 + }, + { + "epoch": 0.14, + "learning_rate": 5.943338382655826e-05, + "loss": 2.0418, + "step": 3300 + }, + { + "epoch": 0.14, + "eval_loss": 2.069995403289795, + "eval_runtime": 13.501, + "eval_samples_per_second": 303.384, + "eval_steps_per_second": 18.962, + "step": 3300 + }, + { + "epoch": 0.14, + "learning_rate": 5.942934224394387e-05, + "loss": 2.1208, + "step": 3310 + }, + { + "epoch": 0.15, + "learning_rate": 5.9425286436926635e-05, + "loss": 2.0428, + "step": 3320 + }, + { + "epoch": 0.15, + "learning_rate": 5.942121640746688e-05, + "loss": 2.0581, + "step": 3330 + }, + { + "epoch": 0.15, + "learning_rate": 5.941713215753184e-05, + "loss": 2.1182, + "step": 3340 + }, + { + "epoch": 0.15, + "learning_rate": 5.9413033689095596e-05, + "loss": 2.098, + "step": 3350 + }, + { + "epoch": 0.15, + "learning_rate": 5.940892100413909e-05, + "loss": 2.0887, + "step": 3360 + }, + { + "epoch": 0.15, + "learning_rate": 5.9404794104650156e-05, + "loss": 2.0979, + "step": 3370 + }, + { + "epoch": 0.15, + "learning_rate": 5.9400652992623495e-05, + "loss": 2.083, + "step": 3380 + }, + { + "epoch": 0.15, + "learning_rate": 5.9396497670060685e-05, + "loss": 2.0856, + "step": 3390 + }, + { + "epoch": 0.15, + "learning_rate": 5.939232813897014e-05, + "loss": 2.095, + "step": 3400 + }, + { + "epoch": 0.15, + "eval_loss": 2.0656051635742188, + "eval_runtime": 13.8953, + "eval_samples_per_second": 294.777, + "eval_steps_per_second": 18.424, + "step": 3400 + }, + { + "epoch": 0.15, + "learning_rate": 5.9388144401367194e-05, + "loss": 2.1035, + "step": 3410 + }, + { + "epoch": 0.15, + "learning_rate": 5.9383946459273995e-05, + "loss": 2.096, + "step": 3420 + }, + { + "epoch": 0.15, + "learning_rate": 5.937973431471959e-05, + "loss": 2.1298, + "step": 3430 + }, + { + "epoch": 0.15, + "learning_rate": 5.937550796973988e-05, + "loss": 2.0599, + "step": 3440 + }, + { + "epoch": 0.15, + "learning_rate": 5.9371267426377624e-05, + "loss": 2.0537, + "step": 3450 + }, + { + "epoch": 0.15, + "learning_rate": 5.9367012686682464e-05, + "loss": 2.0865, + "step": 3460 + }, + { + "epoch": 0.15, + "learning_rate": 5.936274375271087e-05, + "loss": 2.1082, + "step": 3470 + }, + { + "epoch": 0.15, + "learning_rate": 5.935846062652621e-05, + "loss": 2.1416, + "step": 3480 + }, + { + "epoch": 0.15, + "learning_rate": 5.935416331019868e-05, + "loss": 2.0887, + "step": 3490 + }, + { + "epoch": 0.15, + "learning_rate": 5.934985180580536e-05, + "loss": 2.0988, + "step": 3500 + }, + { + "epoch": 0.15, + "eval_loss": 2.061530113220215, + "eval_runtime": 14.5323, + "eval_samples_per_second": 281.854, + "eval_steps_per_second": 17.616, + "step": 3500 + }, + { + "epoch": 0.15, + "learning_rate": 5.934552611543016e-05, + "loss": 2.034, + "step": 3510 + }, + { + "epoch": 0.15, + "learning_rate": 5.9341186241163875e-05, + "loss": 2.1009, + "step": 3520 + }, + { + "epoch": 0.15, + "learning_rate": 5.9336832185104155e-05, + "loss": 2.1099, + "step": 3530 + }, + { + "epoch": 0.15, + "learning_rate": 5.933246394935546e-05, + "loss": 2.0687, + "step": 3540 + }, + { + "epoch": 0.16, + "learning_rate": 5.9328081536029175e-05, + "loss": 2.0504, + "step": 3550 + }, + { + "epoch": 0.16, + "learning_rate": 5.9323684947243476e-05, + "loss": 2.0703, + "step": 3560 + }, + { + "epoch": 0.16, + "learning_rate": 5.931927418512341e-05, + "loss": 2.0398, + "step": 3570 + }, + { + "epoch": 0.16, + "learning_rate": 5.931484925180089e-05, + "loss": 2.1021, + "step": 3580 + }, + { + "epoch": 0.16, + "learning_rate": 5.931041014941467e-05, + "loss": 2.0889, + "step": 3590 + }, + { + "epoch": 0.16, + "learning_rate": 5.930595688011034e-05, + "loss": 2.092, + "step": 3600 + }, + { + "epoch": 0.16, + "eval_loss": 2.0562427043914795, + "eval_runtime": 11.8426, + "eval_samples_per_second": 345.869, + "eval_steps_per_second": 21.617, + "step": 3600 + }, + { + "epoch": 0.16, + "learning_rate": 5.930148944604035e-05, + "loss": 2.0708, + "step": 3610 + }, + { + "epoch": 0.16, + "learning_rate": 5.929700784936399e-05, + "loss": 2.0789, + "step": 3620 + }, + { + "epoch": 0.16, + "learning_rate": 5.9292512092247405e-05, + "loss": 2.0735, + "step": 3630 + }, + { + "epoch": 0.16, + "learning_rate": 5.928800217686358e-05, + "loss": 2.1018, + "step": 3640 + }, + { + "epoch": 0.16, + "learning_rate": 5.928347810539233e-05, + "loss": 2.0637, + "step": 3650 + }, + { + "epoch": 0.16, + "learning_rate": 5.927893988002033e-05, + "loss": 2.0389, + "step": 3660 + }, + { + "epoch": 0.16, + "learning_rate": 5.9274387502941094e-05, + "loss": 2.062, + "step": 3670 + }, + { + "epoch": 0.16, + "learning_rate": 5.926982097635497e-05, + "loss": 2.0766, + "step": 3680 + }, + { + "epoch": 0.16, + "learning_rate": 5.926524030246914e-05, + "loss": 2.0654, + "step": 3690 + }, + { + "epoch": 0.16, + "learning_rate": 5.926064548349764e-05, + "loss": 2.1046, + "step": 3700 + }, + { + "epoch": 0.16, + "eval_loss": 2.051886558532715, + "eval_runtime": 13.6021, + "eval_samples_per_second": 301.131, + "eval_steps_per_second": 18.821, + "step": 3700 + }, + { + "epoch": 0.16, + "learning_rate": 5.9256036521661316e-05, + "loss": 2.068, + "step": 3710 + }, + { + "epoch": 0.16, + "learning_rate": 5.9251413419187893e-05, + "loss": 2.0482, + "step": 3720 + }, + { + "epoch": 0.16, + "learning_rate": 5.9246776178311885e-05, + "loss": 2.0652, + "step": 3730 + }, + { + "epoch": 0.16, + "learning_rate": 5.9242124801274674e-05, + "loss": 2.1223, + "step": 3740 + }, + { + "epoch": 0.16, + "learning_rate": 5.9237459290324444e-05, + "loss": 2.0723, + "step": 3750 + }, + { + "epoch": 0.16, + "learning_rate": 5.9232779647716244e-05, + "loss": 2.054, + "step": 3760 + }, + { + "epoch": 0.17, + "learning_rate": 5.9228085875711926e-05, + "loss": 2.1379, + "step": 3770 + }, + { + "epoch": 0.17, + "learning_rate": 5.922337797658018e-05, + "loss": 2.0366, + "step": 3780 + }, + { + "epoch": 0.17, + "learning_rate": 5.921865595259654e-05, + "loss": 2.0602, + "step": 3790 + }, + { + "epoch": 0.17, + "learning_rate": 5.921391980604335e-05, + "loss": 2.0698, + "step": 3800 + }, + { + "epoch": 0.17, + "eval_loss": 2.047264814376831, + "eval_runtime": 15.2622, + "eval_samples_per_second": 268.375, + "eval_steps_per_second": 16.773, + "step": 3800 + }, + { + "epoch": 0.17, + "learning_rate": 5.920916953920976e-05, + "loss": 2.0555, + "step": 3810 + }, + { + "epoch": 0.17, + "learning_rate": 5.920440515439179e-05, + "loss": 2.121, + "step": 3820 + }, + { + "epoch": 0.17, + "learning_rate": 5.9199626653892264e-05, + "loss": 2.0358, + "step": 3830 + }, + { + "epoch": 0.17, + "learning_rate": 5.919483404002081e-05, + "loss": 2.0702, + "step": 3840 + }, + { + "epoch": 0.17, + "learning_rate": 5.91900273150939e-05, + "loss": 2.0706, + "step": 3850 + }, + { + "epoch": 0.17, + "learning_rate": 5.918520648143482e-05, + "loss": 2.0742, + "step": 3860 + }, + { + "epoch": 0.17, + "learning_rate": 5.918037154137369e-05, + "loss": 2.0806, + "step": 3870 + }, + { + "epoch": 0.17, + "learning_rate": 5.917552249724742e-05, + "loss": 2.0539, + "step": 3880 + }, + { + "epoch": 0.17, + "learning_rate": 5.917065935139975e-05, + "loss": 2.0925, + "step": 3890 + }, + { + "epoch": 0.17, + "learning_rate": 5.9165782106181244e-05, + "loss": 2.0628, + "step": 3900 + }, + { + "epoch": 0.17, + "eval_loss": 2.041888952255249, + "eval_runtime": 12.8465, + "eval_samples_per_second": 318.843, + "eval_steps_per_second": 19.928, + "step": 3900 + }, + { + "epoch": 0.17, + "learning_rate": 5.916089076394927e-05, + "loss": 2.0655, + "step": 3910 + }, + { + "epoch": 0.17, + "learning_rate": 5.915598532706801e-05, + "loss": 2.0998, + "step": 3920 + }, + { + "epoch": 0.17, + "learning_rate": 5.915106579790848e-05, + "loss": 2.0661, + "step": 3930 + }, + { + "epoch": 0.17, + "learning_rate": 5.914613217884846e-05, + "loss": 2.0268, + "step": 3940 + }, + { + "epoch": 0.17, + "learning_rate": 5.9141184472272596e-05, + "loss": 2.1178, + "step": 3950 + }, + { + "epoch": 0.17, + "learning_rate": 5.913622268057229e-05, + "loss": 2.0453, + "step": 3960 + }, + { + "epoch": 0.17, + "learning_rate": 5.913124680614581e-05, + "loss": 2.0846, + "step": 3970 + }, + { + "epoch": 0.17, + "learning_rate": 5.912625685139818e-05, + "loss": 2.1052, + "step": 3980 + }, + { + "epoch": 0.17, + "learning_rate": 5.912125281874125e-05, + "loss": 2.0422, + "step": 3990 + }, + { + "epoch": 0.18, + "learning_rate": 5.9116234710593674e-05, + "loss": 2.0528, + "step": 4000 + }, + { + "epoch": 0.18, + "eval_loss": 2.0398635864257812, + "eval_runtime": 12.2343, + "eval_samples_per_second": 334.796, + "eval_steps_per_second": 20.925, + "step": 4000 + }, + { + "epoch": 0.18, + "learning_rate": 5.9111202529380904e-05, + "loss": 2.0678, + "step": 4010 + }, + { + "epoch": 0.18, + "learning_rate": 5.910615627753521e-05, + "loss": 2.0423, + "step": 4020 + }, + { + "epoch": 0.18, + "learning_rate": 5.9101095957495654e-05, + "loss": 2.0402, + "step": 4030 + }, + { + "epoch": 0.18, + "learning_rate": 5.909602157170808e-05, + "loss": 2.0706, + "step": 4040 + }, + { + "epoch": 0.18, + "learning_rate": 5.909093312262516e-05, + "loss": 2.0197, + "step": 4050 + }, + { + "epoch": 0.18, + "learning_rate": 5.9085830612706336e-05, + "loss": 2.0763, + "step": 4060 + }, + { + "epoch": 0.18, + "learning_rate": 5.908071404441787e-05, + "loss": 2.1287, + "step": 4070 + }, + { + "epoch": 0.18, + "learning_rate": 5.9075583420232806e-05, + "loss": 2.0297, + "step": 4080 + }, + { + "epoch": 0.18, + "learning_rate": 5.907043874263098e-05, + "loss": 2.0862, + "step": 4090 + }, + { + "epoch": 0.18, + "learning_rate": 5.906528001409902e-05, + "loss": 2.0211, + "step": 4100 + }, + { + "epoch": 0.18, + "eval_loss": 2.0350661277770996, + "eval_runtime": 12.0443, + "eval_samples_per_second": 340.079, + "eval_steps_per_second": 21.255, + "step": 4100 + }, + { + "epoch": 0.18, + "learning_rate": 5.906010723713038e-05, + "loss": 2.0536, + "step": 4110 + }, + { + "epoch": 0.18, + "learning_rate": 5.905492041422523e-05, + "loss": 2.0348, + "step": 4120 + }, + { + "epoch": 0.18, + "learning_rate": 5.9049719547890594e-05, + "loss": 1.9906, + "step": 4130 + }, + { + "epoch": 0.18, + "learning_rate": 5.9044504640640276e-05, + "loss": 2.0626, + "step": 4140 + }, + { + "epoch": 0.18, + "learning_rate": 5.9039275694994844e-05, + "loss": 2.0877, + "step": 4150 + }, + { + "epoch": 0.18, + "learning_rate": 5.9034032713481654e-05, + "loss": 2.0266, + "step": 4160 + }, + { + "epoch": 0.18, + "learning_rate": 5.902877569863485e-05, + "loss": 2.13, + "step": 4170 + }, + { + "epoch": 0.18, + "learning_rate": 5.9023504652995384e-05, + "loss": 2.0176, + "step": 4180 + }, + { + "epoch": 0.18, + "learning_rate": 5.901821957911095e-05, + "loss": 2.0459, + "step": 4190 + }, + { + "epoch": 0.18, + "learning_rate": 5.9012920479536034e-05, + "loss": 2.0805, + "step": 4200 + }, + { + "epoch": 0.18, + "eval_loss": 2.0328736305236816, + "eval_runtime": 18.7677, + "eval_samples_per_second": 218.248, + "eval_steps_per_second": 13.64, + "step": 4200 + }, + { + "epoch": 0.18, + "learning_rate": 5.9007607356831934e-05, + "loss": 2.0437, + "step": 4210 + }, + { + "epoch": 0.18, + "learning_rate": 5.900228021356666e-05, + "loss": 2.0958, + "step": 4220 + }, + { + "epoch": 0.19, + "learning_rate": 5.899693905231507e-05, + "loss": 2.0722, + "step": 4230 + }, + { + "epoch": 0.19, + "learning_rate": 5.899158387565877e-05, + "loss": 2.0361, + "step": 4240 + }, + { + "epoch": 0.19, + "learning_rate": 5.89862146861861e-05, + "loss": 2.0838, + "step": 4250 + }, + { + "epoch": 0.19, + "learning_rate": 5.898083148649224e-05, + "loss": 2.0631, + "step": 4260 + }, + { + "epoch": 0.19, + "learning_rate": 5.8975434279179096e-05, + "loss": 2.0552, + "step": 4270 + }, + { + "epoch": 0.19, + "learning_rate": 5.897002306685536e-05, + "loss": 2.0305, + "step": 4280 + }, + { + "epoch": 0.19, + "learning_rate": 5.89645978521365e-05, + "loss": 2.1161, + "step": 4290 + }, + { + "epoch": 0.19, + "learning_rate": 5.895915863764473e-05, + "loss": 2.0235, + "step": 4300 + }, + { + "epoch": 0.19, + "eval_loss": 2.0307788848876953, + "eval_runtime": 11.5851, + "eval_samples_per_second": 353.557, + "eval_steps_per_second": 22.097, + "step": 4300 + }, + { + "epoch": 0.19, + "learning_rate": 5.895370542600906e-05, + "loss": 2.0678, + "step": 4310 + }, + { + "epoch": 0.19, + "learning_rate": 5.894823821986524e-05, + "loss": 2.0671, + "step": 4320 + }, + { + "epoch": 0.19, + "learning_rate": 5.894275702185579e-05, + "loss": 2.0524, + "step": 4330 + }, + { + "epoch": 0.19, + "learning_rate": 5.893726183463001e-05, + "loss": 2.0369, + "step": 4340 + }, + { + "epoch": 0.19, + "learning_rate": 5.893175266084394e-05, + "loss": 2.0658, + "step": 4350 + }, + { + "epoch": 0.19, + "learning_rate": 5.892622950316039e-05, + "loss": 2.0488, + "step": 4360 + }, + { + "epoch": 0.19, + "learning_rate": 5.8920692364248926e-05, + "loss": 2.0644, + "step": 4370 + }, + { + "epoch": 0.19, + "learning_rate": 5.8915141246785875e-05, + "loss": 2.0313, + "step": 4380 + }, + { + "epoch": 0.19, + "learning_rate": 5.890957615345433e-05, + "loss": 2.0117, + "step": 4390 + }, + { + "epoch": 0.19, + "learning_rate": 5.89039970869441e-05, + "loss": 2.0123, + "step": 4400 + }, + { + "epoch": 0.19, + "eval_loss": 2.027006149291992, + "eval_runtime": 20.8382, + "eval_samples_per_second": 196.562, + "eval_steps_per_second": 12.285, + "step": 4400 + }, + { + "epoch": 0.19, + "learning_rate": 5.88984040499518e-05, + "loss": 2.0617, + "step": 4410 + }, + { + "epoch": 0.19, + "learning_rate": 5.889279704518077e-05, + "loss": 2.0394, + "step": 4420 + }, + { + "epoch": 0.19, + "learning_rate": 5.888717607534109e-05, + "loss": 2.058, + "step": 4430 + }, + { + "epoch": 0.19, + "learning_rate": 5.888154114314961e-05, + "loss": 2.1012, + "step": 4440 + }, + { + "epoch": 0.19, + "learning_rate": 5.887589225132994e-05, + "loss": 2.0424, + "step": 4450 + }, + { + "epoch": 0.2, + "learning_rate": 5.887022940261241e-05, + "loss": 2.0011, + "step": 4460 + }, + { + "epoch": 0.2, + "learning_rate": 5.886455259973408e-05, + "loss": 2.1042, + "step": 4470 + }, + { + "epoch": 0.2, + "learning_rate": 5.8858861845438824e-05, + "loss": 2.0368, + "step": 4480 + }, + { + "epoch": 0.2, + "learning_rate": 5.885315714247719e-05, + "loss": 2.0797, + "step": 4490 + }, + { + "epoch": 0.2, + "learning_rate": 5.8847438493606504e-05, + "loss": 2.0964, + "step": 4500 + }, + { + "epoch": 0.2, + "eval_loss": 2.023592710494995, + "eval_runtime": 13.8773, + "eval_samples_per_second": 295.157, + "eval_steps_per_second": 18.447, + "step": 4500 + }, + { + "epoch": 0.2, + "learning_rate": 5.884170590159081e-05, + "loss": 2.079, + "step": 4510 + }, + { + "epoch": 0.2, + "learning_rate": 5.8835959369200925e-05, + "loss": 2.0535, + "step": 4520 + }, + { + "epoch": 0.2, + "learning_rate": 5.883019889921436e-05, + "loss": 2.0473, + "step": 4530 + }, + { + "epoch": 0.2, + "learning_rate": 5.882442449441539e-05, + "loss": 2.0421, + "step": 4540 + }, + { + "epoch": 0.2, + "learning_rate": 5.881863615759503e-05, + "loss": 2.044, + "step": 4550 + }, + { + "epoch": 0.2, + "learning_rate": 5.8812833891551024e-05, + "loss": 2.076, + "step": 4560 + }, + { + "epoch": 0.2, + "learning_rate": 5.880701769908782e-05, + "loss": 2.0583, + "step": 4570 + }, + { + "epoch": 0.2, + "learning_rate": 5.880118758301665e-05, + "loss": 2.0261, + "step": 4580 + }, + { + "epoch": 0.2, + "learning_rate": 5.879534354615543e-05, + "loss": 2.0434, + "step": 4590 + }, + { + "epoch": 0.2, + "learning_rate": 5.878948559132882e-05, + "loss": 2.0839, + "step": 4600 + }, + { + "epoch": 0.2, + "eval_loss": 2.0206034183502197, + "eval_runtime": 13.1899, + "eval_samples_per_second": 310.54, + "eval_steps_per_second": 19.409, + "step": 4600 + }, + { + "epoch": 0.2, + "learning_rate": 5.8783613721368216e-05, + "loss": 2.0191, + "step": 4610 + }, + { + "epoch": 0.2, + "learning_rate": 5.877772793911173e-05, + "loss": 2.037, + "step": 4620 + }, + { + "epoch": 0.2, + "learning_rate": 5.8771828247404204e-05, + "loss": 2.0319, + "step": 4630 + }, + { + "epoch": 0.2, + "learning_rate": 5.87659146490972e-05, + "loss": 2.0892, + "step": 4640 + }, + { + "epoch": 0.2, + "learning_rate": 5.8759987147048997e-05, + "loss": 2.0116, + "step": 4650 + }, + { + "epoch": 0.2, + "learning_rate": 5.87540457441246e-05, + "loss": 2.0725, + "step": 4660 + }, + { + "epoch": 0.2, + "learning_rate": 5.874809044319573e-05, + "loss": 2.0583, + "step": 4670 + }, + { + "epoch": 0.2, + "learning_rate": 5.8742121247140835e-05, + "loss": 2.0341, + "step": 4680 + }, + { + "epoch": 0.21, + "learning_rate": 5.873613815884506e-05, + "loss": 2.0719, + "step": 4690 + }, + { + "epoch": 0.21, + "learning_rate": 5.8730141181200284e-05, + "loss": 2.0736, + "step": 4700 + }, + { + "epoch": 0.21, + "eval_loss": 2.0166029930114746, + "eval_runtime": 13.4827, + "eval_samples_per_second": 303.798, + "eval_steps_per_second": 18.987, + "step": 4700 + }, + { + "epoch": 0.21, + "learning_rate": 5.872413031710509e-05, + "loss": 2.0566, + "step": 4710 + }, + { + "epoch": 0.21, + "learning_rate": 5.871810556946478e-05, + "loss": 2.0676, + "step": 4720 + }, + { + "epoch": 0.21, + "learning_rate": 5.871206694119134e-05, + "loss": 2.0385, + "step": 4730 + }, + { + "epoch": 0.21, + "learning_rate": 5.870601443520351e-05, + "loss": 2.0604, + "step": 4740 + }, + { + "epoch": 0.21, + "learning_rate": 5.869994805442669e-05, + "loss": 2.0338, + "step": 4750 + }, + { + "epoch": 0.21, + "learning_rate": 5.869386780179303e-05, + "loss": 1.9712, + "step": 4760 + }, + { + "epoch": 0.21, + "learning_rate": 5.868777368024136e-05, + "loss": 2.0208, + "step": 4770 + }, + { + "epoch": 0.21, + "learning_rate": 5.868166569271721e-05, + "loss": 2.0479, + "step": 4780 + }, + { + "epoch": 0.21, + "learning_rate": 5.867554384217282e-05, + "loss": 2.0104, + "step": 4790 + }, + { + "epoch": 0.21, + "learning_rate": 5.866940813156714e-05, + "loss": 2.0591, + "step": 4800 + }, + { + "epoch": 0.21, + "eval_loss": 2.0134992599487305, + "eval_runtime": 11.7962, + "eval_samples_per_second": 347.232, + "eval_steps_per_second": 21.702, + "step": 4800 + }, + { + "epoch": 0.21, + "learning_rate": 5.8663258563865804e-05, + "loss": 2.041, + "step": 4810 + }, + { + "epoch": 0.21, + "learning_rate": 5.865709514204115e-05, + "loss": 1.9956, + "step": 4820 + }, + { + "epoch": 0.21, + "learning_rate": 5.865091786907221e-05, + "loss": 2.0447, + "step": 4830 + }, + { + "epoch": 0.21, + "learning_rate": 5.864472674794471e-05, + "loss": 2.022, + "step": 4840 + }, + { + "epoch": 0.21, + "learning_rate": 5.863852178165108e-05, + "loss": 1.9585, + "step": 4850 + }, + { + "epoch": 0.21, + "learning_rate": 5.8632302973190424e-05, + "loss": 2.058, + "step": 4860 + }, + { + "epoch": 0.21, + "learning_rate": 5.862607032556854e-05, + "loss": 2.0582, + "step": 4870 + }, + { + "epoch": 0.21, + "learning_rate": 5.861982384179794e-05, + "loss": 2.0257, + "step": 4880 + }, + { + "epoch": 0.21, + "learning_rate": 5.86135635248978e-05, + "loss": 1.9781, + "step": 4890 + }, + { + "epoch": 0.21, + "learning_rate": 5.860728937789398e-05, + "loss": 2.0381, + "step": 4900 + }, + { + "epoch": 0.21, + "eval_loss": 2.0108816623687744, + "eval_runtime": 11.6358, + "eval_samples_per_second": 352.018, + "eval_steps_per_second": 22.001, + "step": 4900 + }, + { + "epoch": 0.21, + "learning_rate": 5.860100140381903e-05, + "loss": 2.0079, + "step": 4910 + }, + { + "epoch": 0.22, + "learning_rate": 5.8594699605712184e-05, + "loss": 2.0501, + "step": 4920 + }, + { + "epoch": 0.22, + "learning_rate": 5.858838398661938e-05, + "loss": 2.0088, + "step": 4930 + }, + { + "epoch": 0.22, + "learning_rate": 5.8582054549593184e-05, + "loss": 1.9654, + "step": 4940 + }, + { + "epoch": 0.22, + "learning_rate": 5.8575711297692886e-05, + "loss": 2.0455, + "step": 4950 + }, + { + "epoch": 0.22, + "learning_rate": 5.8569354233984445e-05, + "loss": 2.0596, + "step": 4960 + }, + { + "epoch": 0.22, + "learning_rate": 5.856298336154048e-05, + "loss": 1.9902, + "step": 4970 + }, + { + "epoch": 0.22, + "learning_rate": 5.855659868344029e-05, + "loss": 1.9897, + "step": 4980 + }, + { + "epoch": 0.22, + "learning_rate": 5.8550200202769856e-05, + "loss": 2.0454, + "step": 4990 + }, + { + "epoch": 0.22, + "learning_rate": 5.854378792262183e-05, + "loss": 2.0479, + "step": 5000 + }, + { + "epoch": 0.22, + "eval_loss": 2.0080699920654297, + "eval_runtime": 11.7197, + "eval_samples_per_second": 349.497, + "eval_steps_per_second": 21.844, + "step": 5000 + }, + { + "epoch": 0.22, + "learning_rate": 5.853736184609553e-05, + "loss": 2.037, + "step": 5010 + }, + { + "epoch": 0.22, + "learning_rate": 5.853092197629693e-05, + "loss": 1.9935, + "step": 5020 + }, + { + "epoch": 0.22, + "learning_rate": 5.852446831633869e-05, + "loss": 2.0225, + "step": 5030 + }, + { + "epoch": 0.22, + "learning_rate": 5.851800086934013e-05, + "loss": 2.0627, + "step": 5040 + }, + { + "epoch": 0.22, + "learning_rate": 5.851151963842721e-05, + "loss": 2.0333, + "step": 5050 + }, + { + "epoch": 0.22, + "learning_rate": 5.85050246267326e-05, + "loss": 2.0602, + "step": 5060 + }, + { + "epoch": 0.22, + "learning_rate": 5.849851583739559e-05, + "loss": 2.0367, + "step": 5070 + }, + { + "epoch": 0.22, + "learning_rate": 5.849199327356215e-05, + "loss": 2.0453, + "step": 5080 + }, + { + "epoch": 0.22, + "learning_rate": 5.84854569383849e-05, + "loss": 2.0275, + "step": 5090 + }, + { + "epoch": 0.22, + "learning_rate": 5.847890683502312e-05, + "loss": 2.0145, + "step": 5100 + }, + { + "epoch": 0.22, + "eval_loss": 2.0048532485961914, + "eval_runtime": 11.7026, + "eval_samples_per_second": 350.008, + "eval_steps_per_second": 21.875, + "step": 5100 + }, + { + "epoch": 0.22, + "learning_rate": 5.8472342966642735e-05, + "loss": 2.0088, + "step": 5110 + }, + { + "epoch": 0.22, + "learning_rate": 5.8465765336416336e-05, + "loss": 2.0097, + "step": 5120 + }, + { + "epoch": 0.22, + "learning_rate": 5.8459173947523165e-05, + "loss": 2.002, + "step": 5130 + }, + { + "epoch": 0.23, + "learning_rate": 5.84525688031491e-05, + "loss": 2.007, + "step": 5140 + }, + { + "epoch": 0.23, + "learning_rate": 5.8445949906486674e-05, + "loss": 2.0457, + "step": 5150 + }, + { + "epoch": 0.23, + "learning_rate": 5.843931726073509e-05, + "loss": 2.0467, + "step": 5160 + }, + { + "epoch": 0.23, + "learning_rate": 5.843267086910015e-05, + "loss": 1.9947, + "step": 5170 + }, + { + "epoch": 0.23, + "learning_rate": 5.8426010734794346e-05, + "loss": 2.0507, + "step": 5180 + }, + { + "epoch": 0.23, + "learning_rate": 5.84193368610368e-05, + "loss": 2.0486, + "step": 5190 + }, + { + "epoch": 0.23, + "learning_rate": 5.841264925105323e-05, + "loss": 2.0231, + "step": 5200 + }, + { + "epoch": 0.23, + "eval_loss": 2.0028250217437744, + "eval_runtime": 11.7989, + "eval_samples_per_second": 347.151, + "eval_steps_per_second": 21.697, + "step": 5200 + }, + { + "epoch": 0.23, + "learning_rate": 5.840594790807607e-05, + "loss": 1.9965, + "step": 5210 + }, + { + "epoch": 0.23, + "learning_rate": 5.8399232835344335e-05, + "loss": 1.9954, + "step": 5220 + }, + { + "epoch": 0.23, + "learning_rate": 5.83925040361037e-05, + "loss": 2.0133, + "step": 5230 + }, + { + "epoch": 0.23, + "learning_rate": 5.838576151360646e-05, + "loss": 1.9867, + "step": 5240 + }, + { + "epoch": 0.23, + "learning_rate": 5.837900527111156e-05, + "loss": 2.0275, + "step": 5250 + }, + { + "epoch": 0.23, + "learning_rate": 5.837223531188456e-05, + "loss": 2.0276, + "step": 5260 + }, + { + "epoch": 0.23, + "learning_rate": 5.836545163919767e-05, + "loss": 2.0017, + "step": 5270 + }, + { + "epoch": 0.23, + "learning_rate": 5.83586542563297e-05, + "loss": 2.0644, + "step": 5280 + }, + { + "epoch": 0.23, + "learning_rate": 5.835184316656612e-05, + "loss": 1.9864, + "step": 5290 + }, + { + "epoch": 0.23, + "learning_rate": 5.834501837319899e-05, + "loss": 2.0212, + "step": 5300 + }, + { + "epoch": 0.23, + "eval_loss": 2.001617670059204, + "eval_runtime": 11.8117, + "eval_samples_per_second": 346.776, + "eval_steps_per_second": 21.673, + "step": 5300 + }, + { + "epoch": 0.23, + "learning_rate": 5.8338179879527034e-05, + "loss": 2.0293, + "step": 5310 + }, + { + "epoch": 0.23, + "learning_rate": 5.833132768885555e-05, + "loss": 2.0081, + "step": 5320 + }, + { + "epoch": 0.23, + "learning_rate": 5.83244618044965e-05, + "loss": 2.0506, + "step": 5330 + }, + { + "epoch": 0.23, + "learning_rate": 5.831758222976843e-05, + "loss": 2.0314, + "step": 5340 + }, + { + "epoch": 0.23, + "learning_rate": 5.8310688967996534e-05, + "loss": 2.0002, + "step": 5350 + }, + { + "epoch": 0.23, + "learning_rate": 5.830378202251261e-05, + "loss": 2.0547, + "step": 5360 + }, + { + "epoch": 0.24, + "learning_rate": 5.829686139665505e-05, + "loss": 2.006, + "step": 5370 + }, + { + "epoch": 0.24, + "learning_rate": 5.8289927093768885e-05, + "loss": 2.0277, + "step": 5380 + }, + { + "epoch": 0.24, + "learning_rate": 5.8282979117205745e-05, + "loss": 2.0159, + "step": 5390 + }, + { + "epoch": 0.24, + "learning_rate": 5.827601747032387e-05, + "loss": 2.0505, + "step": 5400 + }, + { + "epoch": 0.24, + "eval_loss": 1.9988398551940918, + "eval_runtime": 11.6914, + "eval_samples_per_second": 350.342, + "eval_steps_per_second": 21.896, + "step": 5400 + }, + { + "epoch": 0.24, + "learning_rate": 5.8269042156488106e-05, + "loss": 2.0138, + "step": 5410 + }, + { + "epoch": 0.24, + "learning_rate": 5.826205317906991e-05, + "loss": 2.0205, + "step": 5420 + }, + { + "epoch": 0.24, + "learning_rate": 5.825505054144735e-05, + "loss": 2.0279, + "step": 5430 + }, + { + "epoch": 0.24, + "learning_rate": 5.8248034247005075e-05, + "loss": 2.0387, + "step": 5440 + }, + { + "epoch": 0.24, + "learning_rate": 5.8241004299134345e-05, + "loss": 2.0103, + "step": 5450 + }, + { + "epoch": 0.24, + "learning_rate": 5.8233960701233026e-05, + "loss": 1.9852, + "step": 5460 + }, + { + "epoch": 0.24, + "learning_rate": 5.822690345670558e-05, + "loss": 2.0353, + "step": 5470 + }, + { + "epoch": 0.24, + "learning_rate": 5.821983256896305e-05, + "loss": 2.0069, + "step": 5480 + }, + { + "epoch": 0.24, + "learning_rate": 5.821274804142309e-05, + "loss": 2.0343, + "step": 5490 + }, + { + "epoch": 0.24, + "learning_rate": 5.820564987750994e-05, + "loss": 1.9844, + "step": 5500 + }, + { + "epoch": 0.24, + "eval_loss": 1.9970853328704834, + "eval_runtime": 11.77, + "eval_samples_per_second": 348.004, + "eval_steps_per_second": 21.75, + "step": 5500 + }, + { + "epoch": 0.24, + "learning_rate": 5.8198538080654456e-05, + "loss": 2.0359, + "step": 5510 + }, + { + "epoch": 0.24, + "learning_rate": 5.819141265429402e-05, + "loss": 2.05, + "step": 5520 + }, + { + "epoch": 0.24, + "learning_rate": 5.818427360187267e-05, + "loss": 1.985, + "step": 5530 + }, + { + "epoch": 0.24, + "learning_rate": 5.817712092684099e-05, + "loss": 2.005, + "step": 5540 + }, + { + "epoch": 0.24, + "learning_rate": 5.816995463265615e-05, + "loss": 2.0303, + "step": 5550 + }, + { + "epoch": 0.24, + "learning_rate": 5.816277472278194e-05, + "loss": 2.0114, + "step": 5560 + }, + { + "epoch": 0.24, + "learning_rate": 5.815558120068868e-05, + "loss": 2.0188, + "step": 5570 + }, + { + "epoch": 0.24, + "learning_rate": 5.814837406985331e-05, + "loss": 2.0357, + "step": 5580 + }, + { + "epoch": 0.24, + "learning_rate": 5.8141153333759316e-05, + "loss": 2.0272, + "step": 5590 + }, + { + "epoch": 0.25, + "learning_rate": 5.8133918995896784e-05, + "loss": 2.0311, + "step": 5600 + }, + { + "epoch": 0.25, + "eval_loss": 1.993647575378418, + "eval_runtime": 13.7808, + "eval_samples_per_second": 297.225, + "eval_steps_per_second": 18.577, + "step": 5600 + }, + { + "epoch": 0.25, + "learning_rate": 5.8126671059762356e-05, + "loss": 2.0287, + "step": 5610 + }, + { + "epoch": 0.25, + "learning_rate": 5.811940952885927e-05, + "loss": 2.0618, + "step": 5620 + }, + { + "epoch": 0.25, + "learning_rate": 5.81121344066973e-05, + "loss": 2.025, + "step": 5630 + }, + { + "epoch": 0.25, + "learning_rate": 5.810484569679283e-05, + "loss": 1.9983, + "step": 5640 + }, + { + "epoch": 0.25, + "learning_rate": 5.8097543402668785e-05, + "loss": 1.9824, + "step": 5650 + }, + { + "epoch": 0.25, + "learning_rate": 5.809022752785465e-05, + "loss": 1.9815, + "step": 5660 + }, + { + "epoch": 0.25, + "learning_rate": 5.8082898075886495e-05, + "loss": 2.0088, + "step": 5670 + }, + { + "epoch": 0.25, + "learning_rate": 5.807555505030695e-05, + "loss": 2.0073, + "step": 5680 + }, + { + "epoch": 0.25, + "learning_rate": 5.80681984546652e-05, + "loss": 1.9424, + "step": 5690 + }, + { + "epoch": 0.25, + "learning_rate": 5.806082829251696e-05, + "loss": 2.0114, + "step": 5700 + }, + { + "epoch": 0.25, + "eval_loss": 1.9904594421386719, + "eval_runtime": 12.2556, + "eval_samples_per_second": 334.213, + "eval_steps_per_second": 20.888, + "step": 5700 + }, + { + "epoch": 0.25, + "learning_rate": 5.805344456742457e-05, + "loss": 2.015, + "step": 5710 + }, + { + "epoch": 0.25, + "learning_rate": 5.804604728295686e-05, + "loss": 2.0413, + "step": 5720 + }, + { + "epoch": 0.25, + "learning_rate": 5.803863644268925e-05, + "loss": 2.0054, + "step": 5730 + }, + { + "epoch": 0.25, + "learning_rate": 5.803121205020369e-05, + "loss": 1.9972, + "step": 5740 + }, + { + "epoch": 0.25, + "learning_rate": 5.802377410908871e-05, + "loss": 2.007, + "step": 5750 + }, + { + "epoch": 0.25, + "learning_rate": 5.801632262293935e-05, + "loss": 1.9748, + "step": 5760 + }, + { + "epoch": 0.25, + "learning_rate": 5.8008857595357234e-05, + "loss": 2.0488, + "step": 5770 + }, + { + "epoch": 0.25, + "learning_rate": 5.8001379029950515e-05, + "loss": 2.018, + "step": 5780 + }, + { + "epoch": 0.25, + "learning_rate": 5.799388693033387e-05, + "loss": 1.9982, + "step": 5790 + }, + { + "epoch": 0.25, + "learning_rate": 5.798638130012856e-05, + "loss": 2.0603, + "step": 5800 + }, + { + "epoch": 0.25, + "eval_loss": 1.9879425764083862, + "eval_runtime": 19.1497, + "eval_samples_per_second": 213.894, + "eval_steps_per_second": 13.368, + "step": 5800 + }, + { + "epoch": 0.25, + "learning_rate": 5.7978862142962354e-05, + "loss": 2.023, + "step": 5810 + }, + { + "epoch": 0.25, + "learning_rate": 5.7971329462469565e-05, + "loss": 2.0262, + "step": 5820 + }, + { + "epoch": 0.26, + "learning_rate": 5.7963783262291046e-05, + "loss": 1.9969, + "step": 5830 + }, + { + "epoch": 0.26, + "learning_rate": 5.795622354607419e-05, + "loss": 2.0213, + "step": 5840 + }, + { + "epoch": 0.26, + "learning_rate": 5.79486503174729e-05, + "loss": 1.9825, + "step": 5850 + }, + { + "epoch": 0.26, + "learning_rate": 5.7941063580147637e-05, + "loss": 2.0005, + "step": 5860 + }, + { + "epoch": 0.26, + "learning_rate": 5.793346333776539e-05, + "loss": 2.0043, + "step": 5870 + }, + { + "epoch": 0.26, + "learning_rate": 5.7925849593999656e-05, + "loss": 1.9907, + "step": 5880 + }, + { + "epoch": 0.26, + "learning_rate": 5.791822235253046e-05, + "loss": 1.9813, + "step": 5890 + }, + { + "epoch": 0.26, + "learning_rate": 5.7910581617044375e-05, + "loss": 1.987, + "step": 5900 + }, + { + "epoch": 0.26, + "eval_loss": 1.9848577976226807, + "eval_runtime": 13.9355, + "eval_samples_per_second": 293.926, + "eval_steps_per_second": 18.37, + "step": 5900 + }, + { + "epoch": 0.26, + "learning_rate": 5.790292739123448e-05, + "loss": 2.0303, + "step": 5910 + }, + { + "epoch": 0.26, + "learning_rate": 5.789525967880037e-05, + "loss": 1.9622, + "step": 5920 + }, + { + "epoch": 0.26, + "learning_rate": 5.788757848344816e-05, + "loss": 1.9876, + "step": 5930 + }, + { + "epoch": 0.26, + "learning_rate": 5.787988380889048e-05, + "loss": 1.9844, + "step": 5940 + }, + { + "epoch": 0.26, + "learning_rate": 5.7872175658846495e-05, + "loss": 1.9737, + "step": 5950 + }, + { + "epoch": 0.26, + "learning_rate": 5.7864454037041864e-05, + "loss": 1.992, + "step": 5960 + }, + { + "epoch": 0.26, + "learning_rate": 5.785671894720876e-05, + "loss": 2.0209, + "step": 5970 + }, + { + "epoch": 0.26, + "learning_rate": 5.784897039308587e-05, + "loss": 1.9725, + "step": 5980 + }, + { + "epoch": 0.26, + "learning_rate": 5.7841208378418386e-05, + "loss": 1.9777, + "step": 5990 + }, + { + "epoch": 0.26, + "learning_rate": 5.783343290695801e-05, + "loss": 2.0128, + "step": 6000 + }, + { + "epoch": 0.26, + "eval_loss": 1.9819371700286865, + "eval_runtime": 12.081, + "eval_samples_per_second": 339.046, + "eval_steps_per_second": 21.19, + "step": 6000 + }, + { + "epoch": 0.26, + "learning_rate": 5.7825643982462926e-05, + "loss": 1.9872, + "step": 6010 + }, + { + "epoch": 0.26, + "learning_rate": 5.781784160869786e-05, + "loss": 2.0036, + "step": 6020 + }, + { + "epoch": 0.26, + "learning_rate": 5.7810025789434014e-05, + "loss": 1.984, + "step": 6030 + }, + { + "epoch": 0.26, + "learning_rate": 5.780219652844909e-05, + "loss": 1.9859, + "step": 6040 + }, + { + "epoch": 0.26, + "learning_rate": 5.77943538295273e-05, + "loss": 2.0032, + "step": 6050 + }, + { + "epoch": 0.27, + "learning_rate": 5.778649769645932e-05, + "loss": 2.0241, + "step": 6060 + }, + { + "epoch": 0.27, + "learning_rate": 5.7778628133042346e-05, + "loss": 2.0075, + "step": 6070 + }, + { + "epoch": 0.27, + "learning_rate": 5.7770745143080076e-05, + "loss": 1.9937, + "step": 6080 + }, + { + "epoch": 0.27, + "learning_rate": 5.776284873038266e-05, + "loss": 2.0228, + "step": 6090 + }, + { + "epoch": 0.27, + "learning_rate": 5.775493889876677e-05, + "loss": 1.9932, + "step": 6100 + }, + { + "epoch": 0.27, + "eval_loss": 1.9792561531066895, + "eval_runtime": 15.4668, + "eval_samples_per_second": 264.825, + "eval_steps_per_second": 16.552, + "step": 6100 + }, + { + "epoch": 0.27, + "learning_rate": 5.774701565205553e-05, + "loss": 1.9778, + "step": 6110 + }, + { + "epoch": 0.27, + "learning_rate": 5.77390789940786e-05, + "loss": 2.0101, + "step": 6120 + }, + { + "epoch": 0.27, + "learning_rate": 5.773112892867207e-05, + "loss": 1.9633, + "step": 6130 + }, + { + "epoch": 0.27, + "learning_rate": 5.7723165459678554e-05, + "loss": 1.9755, + "step": 6140 + }, + { + "epoch": 0.27, + "learning_rate": 5.771518859094709e-05, + "loss": 2.0302, + "step": 6150 + }, + { + "epoch": 0.27, + "learning_rate": 5.770719832633324e-05, + "loss": 1.961, + "step": 6160 + }, + { + "epoch": 0.27, + "learning_rate": 5.7699194669699026e-05, + "loss": 1.9772, + "step": 6170 + }, + { + "epoch": 0.27, + "learning_rate": 5.7691177624912934e-05, + "loss": 1.9707, + "step": 6180 + }, + { + "epoch": 0.27, + "learning_rate": 5.768314719584995e-05, + "loss": 1.9767, + "step": 6190 + }, + { + "epoch": 0.27, + "learning_rate": 5.767510338639148e-05, + "loss": 1.9874, + "step": 6200 + }, + { + "epoch": 0.27, + "eval_loss": 1.9786502122879028, + "eval_runtime": 13.4256, + "eval_samples_per_second": 305.088, + "eval_steps_per_second": 19.068, + "step": 6200 + }, + { + "epoch": 0.27, + "learning_rate": 5.7667046200425445e-05, + "loss": 1.9606, + "step": 6210 + }, + { + "epoch": 0.27, + "learning_rate": 5.76589756418462e-05, + "loss": 1.9976, + "step": 6220 + }, + { + "epoch": 0.27, + "learning_rate": 5.765089171455459e-05, + "loss": 2.0065, + "step": 6230 + }, + { + "epoch": 0.27, + "learning_rate": 5.764279442245789e-05, + "loss": 2.0063, + "step": 6240 + }, + { + "epoch": 0.27, + "learning_rate": 5.763468376946987e-05, + "loss": 1.9764, + "step": 6250 + }, + { + "epoch": 0.27, + "learning_rate": 5.762655975951073e-05, + "loss": 2.0229, + "step": 6260 + }, + { + "epoch": 0.27, + "learning_rate": 5.761842239650713e-05, + "loss": 1.9482, + "step": 6270 + }, + { + "epoch": 0.27, + "learning_rate": 5.761027168439219e-05, + "loss": 2.0053, + "step": 6280 + }, + { + "epoch": 0.28, + "learning_rate": 5.7602107627105506e-05, + "loss": 2.0104, + "step": 6290 + }, + { + "epoch": 0.28, + "learning_rate": 5.759393022859308e-05, + "loss": 1.9935, + "step": 6300 + }, + { + "epoch": 0.28, + "eval_loss": 1.9752275943756104, + "eval_runtime": 13.0612, + "eval_samples_per_second": 313.6, + "eval_steps_per_second": 19.6, + "step": 6300 + }, + { + "epoch": 0.28, + "learning_rate": 5.758573949280737e-05, + "loss": 1.9944, + "step": 6310 + }, + { + "epoch": 0.28, + "learning_rate": 5.7577535423707325e-05, + "loss": 1.977, + "step": 6320 + }, + { + "epoch": 0.28, + "learning_rate": 5.7569318025258286e-05, + "loss": 1.9996, + "step": 6330 + }, + { + "epoch": 0.28, + "learning_rate": 5.7561087301432054e-05, + "loss": 1.9515, + "step": 6340 + }, + { + "epoch": 0.28, + "learning_rate": 5.755284325620688e-05, + "loss": 1.9979, + "step": 6350 + }, + { + "epoch": 0.28, + "learning_rate": 5.754458589356746e-05, + "loss": 1.9294, + "step": 6360 + }, + { + "epoch": 0.28, + "learning_rate": 5.753631521750489e-05, + "loss": 1.9437, + "step": 6370 + }, + { + "epoch": 0.28, + "learning_rate": 5.7528031232016744e-05, + "loss": 1.944, + "step": 6380 + }, + { + "epoch": 0.28, + "learning_rate": 5.751973394110701e-05, + "loss": 1.9716, + "step": 6390 + }, + { + "epoch": 0.28, + "learning_rate": 5.751142334878609e-05, + "loss": 1.9515, + "step": 6400 + }, + { + "epoch": 0.28, + "eval_loss": 1.972642183303833, + "eval_runtime": 13.9092, + "eval_samples_per_second": 294.481, + "eval_steps_per_second": 18.405, + "step": 6400 + }, + { + "epoch": 0.28, + "learning_rate": 5.7503099459070864e-05, + "loss": 1.9862, + "step": 6410 + }, + { + "epoch": 0.28, + "learning_rate": 5.749476227598457e-05, + "loss": 1.9563, + "step": 6420 + }, + { + "epoch": 0.28, + "learning_rate": 5.748641180355694e-05, + "loss": 2.0096, + "step": 6430 + }, + { + "epoch": 0.28, + "learning_rate": 5.747804804582409e-05, + "loss": 2.0074, + "step": 6440 + }, + { + "epoch": 0.28, + "learning_rate": 5.7469671006828546e-05, + "loss": 1.9559, + "step": 6450 + }, + { + "epoch": 0.28, + "learning_rate": 5.7461280690619305e-05, + "loss": 1.9869, + "step": 6460 + }, + { + "epoch": 0.28, + "learning_rate": 5.745287710125173e-05, + "loss": 1.9906, + "step": 6470 + }, + { + "epoch": 0.28, + "learning_rate": 5.7444460242787614e-05, + "loss": 2.0531, + "step": 6480 + }, + { + "epoch": 0.28, + "learning_rate": 5.7436030119295184e-05, + "loss": 1.9787, + "step": 6490 + }, + { + "epoch": 0.28, + "learning_rate": 5.742758673484905e-05, + "loss": 2.0037, + "step": 6500 + }, + { + "epoch": 0.28, + "eval_loss": 1.9702032804489136, + "eval_runtime": 12.617, + "eval_samples_per_second": 324.641, + "eval_steps_per_second": 20.29, + "step": 6500 + }, + { + "epoch": 0.28, + "learning_rate": 5.7419130093530255e-05, + "loss": 2.0547, + "step": 6510 + }, + { + "epoch": 0.29, + "learning_rate": 5.7410660199426236e-05, + "loss": 2.052, + "step": 6520 + }, + { + "epoch": 0.29, + "learning_rate": 5.740217705663083e-05, + "loss": 1.9391, + "step": 6530 + }, + { + "epoch": 0.29, + "learning_rate": 5.739368066924431e-05, + "loss": 1.9846, + "step": 6540 + }, + { + "epoch": 0.29, + "learning_rate": 5.7385171041373285e-05, + "loss": 2.0112, + "step": 6550 + }, + { + "epoch": 0.29, + "learning_rate": 5.7376648177130846e-05, + "loss": 1.9748, + "step": 6560 + }, + { + "epoch": 0.29, + "learning_rate": 5.736811208063642e-05, + "loss": 1.9662, + "step": 6570 + }, + { + "epoch": 0.29, + "learning_rate": 5.7359562756015864e-05, + "loss": 1.9933, + "step": 6580 + }, + { + "epoch": 0.29, + "learning_rate": 5.735100020740141e-05, + "loss": 2.0313, + "step": 6590 + }, + { + "epoch": 0.29, + "learning_rate": 5.734242443893168e-05, + "loss": 1.9313, + "step": 6600 + }, + { + "epoch": 0.29, + "eval_loss": 1.9688479900360107, + "eval_runtime": 12.9347, + "eval_samples_per_second": 316.667, + "eval_steps_per_second": 19.792, + "step": 6600 + }, + { + "epoch": 0.29, + "learning_rate": 5.73338354547517e-05, + "loss": 1.9515, + "step": 6610 + }, + { + "epoch": 0.29, + "learning_rate": 5.732523325901288e-05, + "loss": 1.954, + "step": 6620 + }, + { + "epoch": 0.29, + "learning_rate": 5.731661785587301e-05, + "loss": 2.0307, + "step": 6630 + }, + { + "epoch": 0.29, + "learning_rate": 5.730798924949626e-05, + "loss": 2.001, + "step": 6640 + }, + { + "epoch": 0.29, + "learning_rate": 5.729934744405319e-05, + "loss": 2.0087, + "step": 6650 + }, + { + "epoch": 0.29, + "learning_rate": 5.729069244372075e-05, + "loss": 2.0286, + "step": 6660 + }, + { + "epoch": 0.29, + "learning_rate": 5.728202425268224e-05, + "loss": 2.0152, + "step": 6670 + }, + { + "epoch": 0.29, + "learning_rate": 5.727334287512735e-05, + "loss": 1.9232, + "step": 6680 + }, + { + "epoch": 0.29, + "learning_rate": 5.7264648315252156e-05, + "loss": 1.976, + "step": 6690 + }, + { + "epoch": 0.29, + "learning_rate": 5.725594057725909e-05, + "loss": 1.968, + "step": 6700 + }, + { + "epoch": 0.29, + "eval_loss": 1.9677083492279053, + "eval_runtime": 12.2837, + "eval_samples_per_second": 333.451, + "eval_steps_per_second": 20.841, + "step": 6700 + }, + { + "epoch": 0.29, + "learning_rate": 5.724721966535695e-05, + "loss": 1.9615, + "step": 6710 + }, + { + "epoch": 0.29, + "learning_rate": 5.723848558376092e-05, + "loss": 1.9948, + "step": 6720 + }, + { + "epoch": 0.29, + "learning_rate": 5.722973833669252e-05, + "loss": 1.9942, + "step": 6730 + }, + { + "epoch": 0.3, + "learning_rate": 5.7220977928379684e-05, + "loss": 2.0166, + "step": 6740 + }, + { + "epoch": 0.3, + "learning_rate": 5.721220436305664e-05, + "loss": 1.9345, + "step": 6750 + }, + { + "epoch": 0.3, + "learning_rate": 5.720341764496404e-05, + "loss": 2.0107, + "step": 6760 + }, + { + "epoch": 0.3, + "learning_rate": 5.719461777834883e-05, + "loss": 1.9749, + "step": 6770 + }, + { + "epoch": 0.3, + "learning_rate": 5.7185804767464375e-05, + "loss": 2.0009, + "step": 6780 + }, + { + "epoch": 0.3, + "learning_rate": 5.717697861657035e-05, + "loss": 2.0079, + "step": 6790 + }, + { + "epoch": 0.3, + "learning_rate": 5.7168139329932796e-05, + "loss": 1.9954, + "step": 6800 + }, + { + "epoch": 0.3, + "eval_loss": 1.9654452800750732, + "eval_runtime": 12.8097, + "eval_samples_per_second": 319.759, + "eval_steps_per_second": 19.985, + "step": 6800 + }, + { + "epoch": 0.3, + "learning_rate": 5.71592869118241e-05, + "loss": 2.0057, + "step": 6810 + }, + { + "epoch": 0.3, + "learning_rate": 5.7150421366523e-05, + "loss": 1.9505, + "step": 6820 + }, + { + "epoch": 0.3, + "learning_rate": 5.7141542698314585e-05, + "loss": 2.0055, + "step": 6830 + }, + { + "epoch": 0.3, + "learning_rate": 5.713265091149025e-05, + "loss": 1.9874, + "step": 6840 + }, + { + "epoch": 0.3, + "learning_rate": 5.71237460103478e-05, + "loss": 1.9679, + "step": 6850 + }, + { + "epoch": 0.3, + "learning_rate": 5.711482799919129e-05, + "loss": 1.9643, + "step": 6860 + }, + { + "epoch": 0.3, + "learning_rate": 5.710589688233119e-05, + "loss": 2.0399, + "step": 6870 + }, + { + "epoch": 0.3, + "learning_rate": 5.709695266408426e-05, + "loss": 1.9797, + "step": 6880 + }, + { + "epoch": 0.3, + "learning_rate": 5.708799534877363e-05, + "loss": 1.9537, + "step": 6890 + }, + { + "epoch": 0.3, + "learning_rate": 5.707902494072871e-05, + "loss": 1.9182, + "step": 6900 + }, + { + "epoch": 0.3, + "eval_loss": 1.9642119407653809, + "eval_runtime": 13.2593, + "eval_samples_per_second": 308.916, + "eval_steps_per_second": 19.307, + "step": 6900 + }, + { + "epoch": 0.3, + "learning_rate": 5.707004144428526e-05, + "loss": 2.007, + "step": 6910 + }, + { + "epoch": 0.3, + "learning_rate": 5.706104486378539e-05, + "loss": 1.9933, + "step": 6920 + }, + { + "epoch": 0.3, + "learning_rate": 5.7052035203577516e-05, + "loss": 2.013, + "step": 6930 + }, + { + "epoch": 0.3, + "learning_rate": 5.704301246801636e-05, + "loss": 1.9395, + "step": 6940 + }, + { + "epoch": 0.3, + "learning_rate": 5.703397666146299e-05, + "loss": 1.9651, + "step": 6950 + }, + { + "epoch": 0.3, + "learning_rate": 5.7024927788284765e-05, + "loss": 1.9879, + "step": 6960 + }, + { + "epoch": 0.31, + "learning_rate": 5.7015865852855406e-05, + "loss": 1.9733, + "step": 6970 + }, + { + "epoch": 0.31, + "learning_rate": 5.700679085955488e-05, + "loss": 2.0089, + "step": 6980 + }, + { + "epoch": 0.31, + "learning_rate": 5.699770281276952e-05, + "loss": 2.0274, + "step": 6990 + }, + { + "epoch": 0.31, + "learning_rate": 5.6988601716891954e-05, + "loss": 1.9599, + "step": 7000 + }, + { + "epoch": 0.31, + "eval_loss": 1.9618194103240967, + "eval_runtime": 13.1445, + "eval_samples_per_second": 311.613, + "eval_steps_per_second": 19.476, + "step": 7000 + }, + { + "epoch": 0.31, + "learning_rate": 5.69794875763211e-05, + "loss": 1.9609, + "step": 7010 + }, + { + "epoch": 0.31, + "learning_rate": 5.6970360395462204e-05, + "loss": 1.9311, + "step": 7020 + }, + { + "epoch": 0.31, + "learning_rate": 5.69612201787268e-05, + "loss": 2.0024, + "step": 7030 + }, + { + "epoch": 0.31, + "learning_rate": 5.695206693053273e-05, + "loss": 1.9428, + "step": 7040 + }, + { + "epoch": 0.31, + "learning_rate": 5.694290065530414e-05, + "loss": 1.9586, + "step": 7050 + }, + { + "epoch": 0.31, + "learning_rate": 5.6933721357471455e-05, + "loss": 1.967, + "step": 7060 + }, + { + "epoch": 0.31, + "learning_rate": 5.692452904147141e-05, + "loss": 2.0423, + "step": 7070 + }, + { + "epoch": 0.31, + "learning_rate": 5.6915323711747023e-05, + "loss": 1.9693, + "step": 7080 + }, + { + "epoch": 0.31, + "learning_rate": 5.69061053727476e-05, + "loss": 2.001, + "step": 7090 + }, + { + "epoch": 0.31, + "learning_rate": 5.689687402892876e-05, + "loss": 1.9601, + "step": 7100 + }, + { + "epoch": 0.31, + "eval_loss": 1.959427833557129, + "eval_runtime": 12.6915, + "eval_samples_per_second": 322.737, + "eval_steps_per_second": 20.171, + "step": 7100 + }, + { + "epoch": 0.31, + "learning_rate": 5.688762968475237e-05, + "loss": 1.9979, + "step": 7110 + }, + { + "epoch": 0.31, + "learning_rate": 5.687837234468661e-05, + "loss": 1.9676, + "step": 7120 + }, + { + "epoch": 0.31, + "learning_rate": 5.686910201320592e-05, + "loss": 2.0003, + "step": 7130 + }, + { + "epoch": 0.31, + "learning_rate": 5.685981869479104e-05, + "loss": 1.9988, + "step": 7140 + }, + { + "epoch": 0.31, + "learning_rate": 5.685052239392897e-05, + "loss": 2.0026, + "step": 7150 + }, + { + "epoch": 0.31, + "learning_rate": 5.6841213115113e-05, + "loss": 1.9073, + "step": 7160 + }, + { + "epoch": 0.31, + "learning_rate": 5.683189086284268e-05, + "loss": 1.9597, + "step": 7170 + }, + { + "epoch": 0.31, + "learning_rate": 5.682255564162382e-05, + "loss": 1.9903, + "step": 7180 + }, + { + "epoch": 0.31, + "learning_rate": 5.6813207455968534e-05, + "loss": 2.0011, + "step": 7190 + }, + { + "epoch": 0.32, + "learning_rate": 5.680384631039519e-05, + "loss": 1.9978, + "step": 7200 + }, + { + "epoch": 0.32, + "eval_loss": 1.9591736793518066, + "eval_runtime": 14.3637, + "eval_samples_per_second": 285.163, + "eval_steps_per_second": 17.823, + "step": 7200 + }, + { + "epoch": 0.32, + "learning_rate": 5.679447220942838e-05, + "loss": 1.976, + "step": 7210 + }, + { + "epoch": 0.32, + "learning_rate": 5.6785085157599016e-05, + "loss": 1.9745, + "step": 7220 + }, + { + "epoch": 0.32, + "learning_rate": 5.677568515944424e-05, + "loss": 1.968, + "step": 7230 + }, + { + "epoch": 0.32, + "learning_rate": 5.676627221950743e-05, + "loss": 1.993, + "step": 7240 + }, + { + "epoch": 0.32, + "learning_rate": 5.675684634233828e-05, + "loss": 1.979, + "step": 7250 + }, + { + "epoch": 0.32, + "learning_rate": 5.674740753249268e-05, + "loss": 2.0276, + "step": 7260 + }, + { + "epoch": 0.32, + "learning_rate": 5.673795579453281e-05, + "loss": 1.9836, + "step": 7270 + }, + { + "epoch": 0.32, + "learning_rate": 5.672849113302705e-05, + "loss": 1.9735, + "step": 7280 + }, + { + "epoch": 0.32, + "learning_rate": 5.6719013552550084e-05, + "loss": 1.9811, + "step": 7290 + }, + { + "epoch": 0.32, + "learning_rate": 5.67095230576828e-05, + "loss": 1.9934, + "step": 7300 + }, + { + "epoch": 0.32, + "eval_loss": 1.9573659896850586, + "eval_runtime": 13.2709, + "eval_samples_per_second": 308.645, + "eval_steps_per_second": 19.29, + "step": 7300 + }, + { + "epoch": 0.32, + "learning_rate": 5.6700019653012354e-05, + "loss": 2.0308, + "step": 7310 + }, + { + "epoch": 0.32, + "learning_rate": 5.669050334313213e-05, + "loss": 1.9438, + "step": 7320 + }, + { + "epoch": 0.32, + "learning_rate": 5.668097413264173e-05, + "loss": 1.9725, + "step": 7330 + }, + { + "epoch": 0.32, + "learning_rate": 5.6671432026147035e-05, + "loss": 1.9757, + "step": 7340 + }, + { + "epoch": 0.32, + "learning_rate": 5.6661877028260107e-05, + "loss": 1.9539, + "step": 7350 + }, + { + "epoch": 0.32, + "learning_rate": 5.665230914359929e-05, + "loss": 1.998, + "step": 7360 + }, + { + "epoch": 0.32, + "learning_rate": 5.6642728376789124e-05, + "loss": 1.9701, + "step": 7370 + }, + { + "epoch": 0.32, + "learning_rate": 5.663313473246038e-05, + "loss": 1.9892, + "step": 7380 + }, + { + "epoch": 0.32, + "learning_rate": 5.662352821525007e-05, + "loss": 2.0117, + "step": 7390 + }, + { + "epoch": 0.32, + "learning_rate": 5.661390882980141e-05, + "loss": 2.0, + "step": 7400 + }, + { + "epoch": 0.32, + "eval_loss": 1.9556201696395874, + "eval_runtime": 13.8205, + "eval_samples_per_second": 296.372, + "eval_steps_per_second": 18.523, + "step": 7400 + }, + { + "epoch": 0.32, + "learning_rate": 5.660427658076384e-05, + "loss": 2.0104, + "step": 7410 + }, + { + "epoch": 0.32, + "learning_rate": 5.6594631472793035e-05, + "loss": 1.9395, + "step": 7420 + }, + { + "epoch": 0.33, + "learning_rate": 5.6584973510550844e-05, + "loss": 1.9631, + "step": 7430 + }, + { + "epoch": 0.33, + "learning_rate": 5.657530269870536e-05, + "loss": 1.9215, + "step": 7440 + }, + { + "epoch": 0.33, + "learning_rate": 5.6565619041930894e-05, + "loss": 1.9726, + "step": 7450 + }, + { + "epoch": 0.33, + "learning_rate": 5.655592254490795e-05, + "loss": 1.9995, + "step": 7460 + }, + { + "epoch": 0.33, + "learning_rate": 5.6546213212323235e-05, + "loss": 2.0179, + "step": 7470 + }, + { + "epoch": 0.33, + "learning_rate": 5.6536491048869666e-05, + "loss": 1.9594, + "step": 7480 + }, + { + "epoch": 0.33, + "learning_rate": 5.6526756059246365e-05, + "loss": 1.9821, + "step": 7490 + }, + { + "epoch": 0.33, + "learning_rate": 5.651700824815865e-05, + "loss": 1.9719, + "step": 7500 + }, + { + "epoch": 0.33, + "eval_loss": 1.9540364742279053, + "eval_runtime": 14.3302, + "eval_samples_per_second": 285.83, + "eval_steps_per_second": 17.864, + "step": 7500 + }, + { + "epoch": 0.33, + "learning_rate": 5.650724762031803e-05, + "loss": 1.9848, + "step": 7510 + }, + { + "epoch": 0.33, + "learning_rate": 5.6497474180442224e-05, + "loss": 1.9471, + "step": 7520 + }, + { + "epoch": 0.33, + "learning_rate": 5.648768793325513e-05, + "loss": 1.9697, + "step": 7530 + }, + { + "epoch": 0.33, + "learning_rate": 5.6477888883486844e-05, + "loss": 1.9522, + "step": 7540 + }, + { + "epoch": 0.33, + "learning_rate": 5.646807703587365e-05, + "loss": 1.9738, + "step": 7550 + }, + { + "epoch": 0.33, + "learning_rate": 5.645825239515799e-05, + "loss": 1.9944, + "step": 7560 + }, + { + "epoch": 0.33, + "learning_rate": 5.644841496608855e-05, + "loss": 2.0082, + "step": 7570 + }, + { + "epoch": 0.33, + "learning_rate": 5.643856475342014e-05, + "loss": 2.0052, + "step": 7580 + }, + { + "epoch": 0.33, + "learning_rate": 5.642870176191377e-05, + "loss": 1.9821, + "step": 7590 + }, + { + "epoch": 0.33, + "learning_rate": 5.641882599633663e-05, + "loss": 1.991, + "step": 7600 + }, + { + "epoch": 0.33, + "eval_loss": 1.951909065246582, + "eval_runtime": 12.5953, + "eval_samples_per_second": 325.201, + "eval_steps_per_second": 20.325, + "step": 7600 + }, + { + "epoch": 0.33, + "learning_rate": 5.6408937461462095e-05, + "loss": 2.0207, + "step": 7610 + }, + { + "epoch": 0.33, + "learning_rate": 5.639903616206967e-05, + "loss": 1.9723, + "step": 7620 + }, + { + "epoch": 0.33, + "learning_rate": 5.638912210294508e-05, + "loss": 1.9881, + "step": 7630 + }, + { + "epoch": 0.33, + "learning_rate": 5.6379195288880184e-05, + "loss": 1.955, + "step": 7640 + }, + { + "epoch": 0.33, + "learning_rate": 5.636925572467301e-05, + "loss": 1.9749, + "step": 7650 + }, + { + "epoch": 0.34, + "learning_rate": 5.635930341512778e-05, + "loss": 1.9815, + "step": 7660 + }, + { + "epoch": 0.34, + "learning_rate": 5.6349338365054825e-05, + "loss": 1.9807, + "step": 7670 + }, + { + "epoch": 0.34, + "learning_rate": 5.633936057927067e-05, + "loss": 1.9934, + "step": 7680 + }, + { + "epoch": 0.34, + "learning_rate": 5.632937006259799e-05, + "loss": 1.9536, + "step": 7690 + }, + { + "epoch": 0.34, + "learning_rate": 5.631936681986561e-05, + "loss": 1.9894, + "step": 7700 + }, + { + "epoch": 0.34, + "eval_loss": 1.9506793022155762, + "eval_runtime": 13.5546, + "eval_samples_per_second": 302.184, + "eval_steps_per_second": 18.887, + "step": 7700 + }, + { + "epoch": 0.34, + "learning_rate": 5.63093508559085e-05, + "loss": 1.9473, + "step": 7710 + }, + { + "epoch": 0.34, + "learning_rate": 5.6299322175567783e-05, + "loss": 1.98, + "step": 7720 + }, + { + "epoch": 0.34, + "learning_rate": 5.628928078369074e-05, + "loss": 1.9744, + "step": 7730 + }, + { + "epoch": 0.34, + "learning_rate": 5.6279226685130784e-05, + "loss": 1.9851, + "step": 7740 + }, + { + "epoch": 0.34, + "learning_rate": 5.626915988474746e-05, + "loss": 1.9756, + "step": 7750 + }, + { + "epoch": 0.34, + "learning_rate": 5.625908038740647e-05, + "loss": 2.035, + "step": 7760 + }, + { + "epoch": 0.34, + "learning_rate": 5.6248988197979664e-05, + "loss": 1.9488, + "step": 7770 + }, + { + "epoch": 0.34, + "learning_rate": 5.623888332134497e-05, + "loss": 1.9579, + "step": 7780 + }, + { + "epoch": 0.34, + "learning_rate": 5.622876576238652e-05, + "loss": 1.9861, + "step": 7790 + }, + { + "epoch": 0.34, + "learning_rate": 5.6218635525994545e-05, + "loss": 1.9895, + "step": 7800 + }, + { + "epoch": 0.34, + "eval_loss": 1.9489729404449463, + "eval_runtime": 12.8919, + "eval_samples_per_second": 317.719, + "eval_steps_per_second": 19.857, + "step": 7800 + }, + { + "epoch": 0.34, + "learning_rate": 5.620849261706537e-05, + "loss": 2.0395, + "step": 7810 + }, + { + "epoch": 0.34, + "learning_rate": 5.6198337040501505e-05, + "loss": 1.958, + "step": 7820 + }, + { + "epoch": 0.34, + "learning_rate": 5.618816880121155e-05, + "loss": 1.9912, + "step": 7830 + }, + { + "epoch": 0.34, + "learning_rate": 5.617798790411023e-05, + "loss": 1.9098, + "step": 7840 + }, + { + "epoch": 0.34, + "learning_rate": 5.616779435411838e-05, + "loss": 1.9905, + "step": 7850 + }, + { + "epoch": 0.34, + "learning_rate": 5.615758815616297e-05, + "loss": 1.9593, + "step": 7860 + }, + { + "epoch": 0.34, + "learning_rate": 5.614736931517707e-05, + "loss": 1.9738, + "step": 7870 + }, + { + "epoch": 0.34, + "learning_rate": 5.613713783609985e-05, + "loss": 1.9318, + "step": 7880 + }, + { + "epoch": 0.35, + "learning_rate": 5.612689372387662e-05, + "loss": 1.9924, + "step": 7890 + }, + { + "epoch": 0.35, + "learning_rate": 5.6116636983458766e-05, + "loss": 1.97, + "step": 7900 + }, + { + "epoch": 0.35, + "eval_loss": 1.947704553604126, + "eval_runtime": 12.113, + "eval_samples_per_second": 338.148, + "eval_steps_per_second": 21.134, + "step": 7900 + }, + { + "epoch": 0.35, + "learning_rate": 5.61063676198038e-05, + "loss": 1.9341, + "step": 7910 + }, + { + "epoch": 0.35, + "learning_rate": 5.609608563787532e-05, + "loss": 2.0017, + "step": 7920 + }, + { + "epoch": 0.35, + "learning_rate": 5.608579104264301e-05, + "loss": 1.9553, + "step": 7930 + }, + { + "epoch": 0.35, + "learning_rate": 5.607548383908269e-05, + "loss": 1.9659, + "step": 7940 + }, + { + "epoch": 0.35, + "learning_rate": 5.6065164032176255e-05, + "loss": 2.0098, + "step": 7950 + }, + { + "epoch": 0.35, + "learning_rate": 5.605483162691168e-05, + "loss": 1.9583, + "step": 7960 + }, + { + "epoch": 0.35, + "learning_rate": 5.6044486628283026e-05, + "loss": 1.8988, + "step": 7970 + }, + { + "epoch": 0.35, + "learning_rate": 5.6034129041290476e-05, + "loss": 1.9599, + "step": 7980 + }, + { + "epoch": 0.35, + "learning_rate": 5.6023758870940265e-05, + "loss": 1.9926, + "step": 7990 + }, + { + "epoch": 0.35, + "learning_rate": 5.601337612224472e-05, + "loss": 1.9334, + "step": 8000 + }, + { + "epoch": 0.35, + "eval_loss": 1.9458627700805664, + "eval_runtime": 13.6514, + "eval_samples_per_second": 300.043, + "eval_steps_per_second": 18.753, + "step": 8000 + }, + { + "epoch": 0.35, + "learning_rate": 5.600298080022224e-05, + "loss": 1.9365, + "step": 8010 + }, + { + "epoch": 0.35, + "learning_rate": 5.599257290989733e-05, + "loss": 2.0154, + "step": 8020 + }, + { + "epoch": 0.35, + "learning_rate": 5.5982152456300514e-05, + "loss": 1.9472, + "step": 8030 + }, + { + "epoch": 0.35, + "learning_rate": 5.5971719444468454e-05, + "loss": 1.9347, + "step": 8040 + }, + { + "epoch": 0.35, + "learning_rate": 5.5961273879443845e-05, + "loss": 1.9706, + "step": 8050 + }, + { + "epoch": 0.35, + "learning_rate": 5.595081576627544e-05, + "loss": 1.8983, + "step": 8060 + }, + { + "epoch": 0.35, + "learning_rate": 5.594034511001809e-05, + "loss": 1.9517, + "step": 8070 + }, + { + "epoch": 0.35, + "learning_rate": 5.5929861915732674e-05, + "loss": 2.0011, + "step": 8080 + }, + { + "epoch": 0.35, + "learning_rate": 5.591936618848617e-05, + "loss": 1.9648, + "step": 8090 + }, + { + "epoch": 0.35, + "learning_rate": 5.590885793335156e-05, + "loss": 1.9856, + "step": 8100 + }, + { + "epoch": 0.35, + "eval_loss": 1.9449169635772705, + "eval_runtime": 12.4323, + "eval_samples_per_second": 329.464, + "eval_steps_per_second": 20.591, + "step": 8100 + }, + { + "epoch": 0.36, + "learning_rate": 5.589833715540794e-05, + "loss": 1.9837, + "step": 8110 + }, + { + "epoch": 0.36, + "learning_rate": 5.588780385974042e-05, + "loss": 1.996, + "step": 8120 + }, + { + "epoch": 0.36, + "learning_rate": 5.5877258051440175e-05, + "loss": 1.9771, + "step": 8130 + }, + { + "epoch": 0.36, + "learning_rate": 5.5866699735604424e-05, + "loss": 1.9989, + "step": 8140 + }, + { + "epoch": 0.36, + "learning_rate": 5.5856128917336437e-05, + "loss": 1.99, + "step": 8150 + }, + { + "epoch": 0.36, + "learning_rate": 5.584554560174552e-05, + "loss": 2.0055, + "step": 8160 + }, + { + "epoch": 0.36, + "learning_rate": 5.5834949793947004e-05, + "loss": 1.9763, + "step": 8170 + }, + { + "epoch": 0.36, + "learning_rate": 5.5824341499062304e-05, + "loss": 1.9356, + "step": 8180 + }, + { + "epoch": 0.36, + "learning_rate": 5.581372072221882e-05, + "loss": 1.9056, + "step": 8190 + }, + { + "epoch": 0.36, + "learning_rate": 5.580308746855002e-05, + "loss": 1.9748, + "step": 8200 + }, + { + "epoch": 0.36, + "eval_loss": 1.9407728910446167, + "eval_runtime": 14.8862, + "eval_samples_per_second": 275.154, + "eval_steps_per_second": 17.197, + "step": 8200 + }, + { + "epoch": 0.36, + "learning_rate": 5.5792441743195385e-05, + "loss": 1.9251, + "step": 8210 + }, + { + "epoch": 0.36, + "learning_rate": 5.5781783551300435e-05, + "loss": 1.9767, + "step": 8220 + }, + { + "epoch": 0.36, + "learning_rate": 5.57711128980167e-05, + "loss": 1.9116, + "step": 8230 + }, + { + "epoch": 0.36, + "learning_rate": 5.576042978850174e-05, + "loss": 1.9413, + "step": 8240 + }, + { + "epoch": 0.36, + "learning_rate": 5.574973422791916e-05, + "loss": 1.9468, + "step": 8250 + }, + { + "epoch": 0.36, + "learning_rate": 5.573902622143852e-05, + "loss": 1.9562, + "step": 8260 + }, + { + "epoch": 0.36, + "learning_rate": 5.572830577423549e-05, + "loss": 1.9744, + "step": 8270 + }, + { + "epoch": 0.36, + "learning_rate": 5.571757289149165e-05, + "loss": 1.9422, + "step": 8280 + }, + { + "epoch": 0.36, + "learning_rate": 5.570682757839467e-05, + "loss": 1.9565, + "step": 8290 + }, + { + "epoch": 0.36, + "learning_rate": 5.569606984013819e-05, + "loss": 2.0056, + "step": 8300 + }, + { + "epoch": 0.36, + "eval_loss": 1.9395599365234375, + "eval_runtime": 11.71, + "eval_samples_per_second": 349.786, + "eval_steps_per_second": 21.862, + "step": 8300 + }, + { + "epoch": 0.36, + "learning_rate": 5.568529968192187e-05, + "loss": 1.9783, + "step": 8310 + }, + { + "epoch": 0.36, + "learning_rate": 5.5674517108951365e-05, + "loss": 1.9335, + "step": 8320 + }, + { + "epoch": 0.36, + "learning_rate": 5.5663722126438316e-05, + "loss": 1.9209, + "step": 8330 + }, + { + "epoch": 0.37, + "learning_rate": 5.565291473960041e-05, + "loss": 1.9587, + "step": 8340 + }, + { + "epoch": 0.37, + "learning_rate": 5.5642094953661274e-05, + "loss": 2.0141, + "step": 8350 + }, + { + "epoch": 0.37, + "learning_rate": 5.563126277385056e-05, + "loss": 1.9702, + "step": 8360 + }, + { + "epoch": 0.37, + "learning_rate": 5.562041820540391e-05, + "loss": 2.0086, + "step": 8370 + }, + { + "epoch": 0.37, + "learning_rate": 5.560956125356293e-05, + "loss": 1.9322, + "step": 8380 + }, + { + "epoch": 0.37, + "learning_rate": 5.5598691923575244e-05, + "loss": 1.9788, + "step": 8390 + }, + { + "epoch": 0.37, + "learning_rate": 5.558781022069443e-05, + "loss": 1.982, + "step": 8400 + }, + { + "epoch": 0.37, + "eval_loss": 1.9378072023391724, + "eval_runtime": 13.6976, + "eval_samples_per_second": 299.029, + "eval_steps_per_second": 18.689, + "step": 8400 + }, + { + "epoch": 0.37, + "learning_rate": 5.557691615018008e-05, + "loss": 2.0186, + "step": 8410 + }, + { + "epoch": 0.37, + "learning_rate": 5.556600971729771e-05, + "loss": 1.9675, + "step": 8420 + }, + { + "epoch": 0.37, + "learning_rate": 5.5555090927318876e-05, + "loss": 1.9675, + "step": 8430 + }, + { + "epoch": 0.37, + "learning_rate": 5.554415978552106e-05, + "loss": 2.0175, + "step": 8440 + }, + { + "epoch": 0.37, + "learning_rate": 5.5533216297187726e-05, + "loss": 1.9642, + "step": 8450 + }, + { + "epoch": 0.37, + "learning_rate": 5.5522260467608326e-05, + "loss": 1.9741, + "step": 8460 + }, + { + "epoch": 0.37, + "learning_rate": 5.5511292302078244e-05, + "loss": 1.9557, + "step": 8470 + }, + { + "epoch": 0.37, + "learning_rate": 5.550031180589884e-05, + "loss": 1.9811, + "step": 8480 + }, + { + "epoch": 0.37, + "learning_rate": 5.5489318984377456e-05, + "loss": 1.9578, + "step": 8490 + }, + { + "epoch": 0.37, + "learning_rate": 5.547831384282734e-05, + "loss": 1.9934, + "step": 8500 + }, + { + "epoch": 0.37, + "eval_loss": 1.9361932277679443, + "eval_runtime": 12.095, + "eval_samples_per_second": 338.652, + "eval_steps_per_second": 21.166, + "step": 8500 + }, + { + "epoch": 0.37, + "learning_rate": 5.546729638656777e-05, + "loss": 1.9608, + "step": 8510 + }, + { + "epoch": 0.37, + "learning_rate": 5.545626662092389e-05, + "loss": 1.966, + "step": 8520 + }, + { + "epoch": 0.37, + "learning_rate": 5.544522455122686e-05, + "loss": 1.9437, + "step": 8530 + }, + { + "epoch": 0.37, + "learning_rate": 5.5434170182813755e-05, + "loss": 1.9392, + "step": 8540 + }, + { + "epoch": 0.37, + "learning_rate": 5.54231035210276e-05, + "loss": 1.9426, + "step": 8550 + }, + { + "epoch": 0.37, + "learning_rate": 5.541202457121737e-05, + "loss": 1.9498, + "step": 8560 + }, + { + "epoch": 0.38, + "learning_rate": 5.540093333873798e-05, + "loss": 1.9778, + "step": 8570 + }, + { + "epoch": 0.38, + "learning_rate": 5.5389829828950266e-05, + "loss": 1.9281, + "step": 8580 + }, + { + "epoch": 0.38, + "learning_rate": 5.5378714047221007e-05, + "loss": 1.99, + "step": 8590 + }, + { + "epoch": 0.38, + "learning_rate": 5.5367585998922916e-05, + "loss": 1.9328, + "step": 8600 + }, + { + "epoch": 0.38, + "eval_loss": 1.9360263347625732, + "eval_runtime": 11.7734, + "eval_samples_per_second": 347.904, + "eval_steps_per_second": 21.744, + "step": 8600 + }, + { + "epoch": 0.38, + "learning_rate": 5.535644568943464e-05, + "loss": 1.9399, + "step": 8610 + }, + { + "epoch": 0.38, + "learning_rate": 5.5345293124140736e-05, + "loss": 1.9359, + "step": 8620 + }, + { + "epoch": 0.38, + "learning_rate": 5.533412830843169e-05, + "loss": 1.9176, + "step": 8630 + }, + { + "epoch": 0.38, + "learning_rate": 5.5322951247703935e-05, + "loss": 1.9591, + "step": 8640 + }, + { + "epoch": 0.38, + "learning_rate": 5.5311761947359786e-05, + "loss": 1.964, + "step": 8650 + }, + { + "epoch": 0.38, + "learning_rate": 5.530056041280748e-05, + "loss": 1.9774, + "step": 8660 + }, + { + "epoch": 0.38, + "learning_rate": 5.5289346649461196e-05, + "loss": 1.9532, + "step": 8670 + }, + { + "epoch": 0.38, + "learning_rate": 5.527812066274099e-05, + "loss": 1.9559, + "step": 8680 + }, + { + "epoch": 0.38, + "learning_rate": 5.5266882458072845e-05, + "loss": 1.9101, + "step": 8690 + }, + { + "epoch": 0.38, + "learning_rate": 5.525563204088864e-05, + "loss": 2.0133, + "step": 8700 + }, + { + "epoch": 0.38, + "eval_loss": 1.9347482919692993, + "eval_runtime": 11.5929, + "eval_samples_per_second": 353.32, + "eval_steps_per_second": 22.082, + "step": 8700 + }, + { + "epoch": 0.38, + "learning_rate": 5.524436941662616e-05, + "loss": 1.9481, + "step": 8710 + }, + { + "epoch": 0.38, + "learning_rate": 5.52330945907291e-05, + "loss": 2.0243, + "step": 8720 + }, + { + "epoch": 0.38, + "learning_rate": 5.522180756864705e-05, + "loss": 2.0271, + "step": 8730 + }, + { + "epoch": 0.38, + "learning_rate": 5.521050835583546e-05, + "loss": 1.9358, + "step": 8740 + }, + { + "epoch": 0.38, + "learning_rate": 5.519919695775572e-05, + "loss": 2.0078, + "step": 8750 + }, + { + "epoch": 0.38, + "learning_rate": 5.51878733798751e-05, + "loss": 1.9632, + "step": 8760 + }, + { + "epoch": 0.38, + "learning_rate": 5.517653762766673e-05, + "loss": 1.9393, + "step": 8770 + }, + { + "epoch": 0.38, + "learning_rate": 5.516518970660965e-05, + "loss": 1.9215, + "step": 8780 + }, + { + "epoch": 0.38, + "learning_rate": 5.515382962218877e-05, + "loss": 1.9502, + "step": 8790 + }, + { + "epoch": 0.39, + "learning_rate": 5.514245737989489e-05, + "loss": 1.9523, + "step": 8800 + }, + { + "epoch": 0.39, + "eval_loss": 1.9324275255203247, + "eval_runtime": 11.8045, + "eval_samples_per_second": 346.986, + "eval_steps_per_second": 21.687, + "step": 8800 + }, + { + "epoch": 0.39, + "learning_rate": 5.513107298522467e-05, + "loss": 1.9323, + "step": 8810 + }, + { + "epoch": 0.39, + "learning_rate": 5.5119676443680657e-05, + "loss": 1.9541, + "step": 8820 + }, + { + "epoch": 0.39, + "learning_rate": 5.5108267760771266e-05, + "loss": 1.9716, + "step": 8830 + }, + { + "epoch": 0.39, + "learning_rate": 5.5096846942010775e-05, + "loss": 1.9488, + "step": 8840 + }, + { + "epoch": 0.39, + "learning_rate": 5.5085413992919344e-05, + "loss": 1.9172, + "step": 8850 + }, + { + "epoch": 0.39, + "learning_rate": 5.507396891902296e-05, + "loss": 1.9546, + "step": 8860 + }, + { + "epoch": 0.39, + "learning_rate": 5.506251172585352e-05, + "loss": 1.9807, + "step": 8870 + }, + { + "epoch": 0.39, + "learning_rate": 5.505104241894875e-05, + "loss": 1.9575, + "step": 8880 + }, + { + "epoch": 0.39, + "learning_rate": 5.503956100385221e-05, + "loss": 1.963, + "step": 8890 + }, + { + "epoch": 0.39, + "learning_rate": 5.5028067486113366e-05, + "loss": 1.954, + "step": 8900 + }, + { + "epoch": 0.39, + "eval_loss": 1.9312846660614014, + "eval_runtime": 11.7288, + "eval_samples_per_second": 349.225, + "eval_steps_per_second": 21.827, + "step": 8900 + }, + { + "epoch": 0.39, + "learning_rate": 5.5016561871287496e-05, + "loss": 1.9755, + "step": 8910 + }, + { + "epoch": 0.39, + "learning_rate": 5.500504416493572e-05, + "loss": 1.9325, + "step": 8920 + }, + { + "epoch": 0.39, + "learning_rate": 5.499351437262503e-05, + "loss": 2.0681, + "step": 8930 + }, + { + "epoch": 0.39, + "learning_rate": 5.4981972499928244e-05, + "loss": 1.9759, + "step": 8940 + }, + { + "epoch": 0.39, + "learning_rate": 5.497041855242401e-05, + "loss": 1.9578, + "step": 8950 + }, + { + "epoch": 0.39, + "learning_rate": 5.495885253569684e-05, + "loss": 1.9771, + "step": 8960 + }, + { + "epoch": 0.39, + "learning_rate": 5.4947274455337046e-05, + "loss": 1.9007, + "step": 8970 + }, + { + "epoch": 0.39, + "learning_rate": 5.49356843169408e-05, + "loss": 1.9742, + "step": 8980 + }, + { + "epoch": 0.39, + "learning_rate": 5.492408212611009e-05, + "loss": 1.9316, + "step": 8990 + }, + { + "epoch": 0.39, + "learning_rate": 5.4912467888452705e-05, + "loss": 1.9556, + "step": 9000 + }, + { + "epoch": 0.39, + "eval_loss": 1.9298361539840698, + "eval_runtime": 11.9118, + "eval_samples_per_second": 343.861, + "eval_steps_per_second": 21.491, + "step": 9000 + }, + { + "epoch": 0.39, + "learning_rate": 5.4900841609582304e-05, + "loss": 1.9705, + "step": 9010 + }, + { + "epoch": 0.39, + "learning_rate": 5.488920329511833e-05, + "loss": 1.9575, + "step": 9020 + }, + { + "epoch": 0.4, + "learning_rate": 5.487755295068606e-05, + "loss": 1.9734, + "step": 9030 + }, + { + "epoch": 0.4, + "learning_rate": 5.486589058191659e-05, + "loss": 2.0, + "step": 9040 + }, + { + "epoch": 0.4, + "learning_rate": 5.4854216194446806e-05, + "loss": 1.9733, + "step": 9050 + }, + { + "epoch": 0.4, + "learning_rate": 5.484252979391942e-05, + "loss": 1.9879, + "step": 9060 + }, + { + "epoch": 0.4, + "learning_rate": 5.4830831385982955e-05, + "loss": 1.9072, + "step": 9070 + }, + { + "epoch": 0.4, + "learning_rate": 5.4819120976291726e-05, + "loss": 1.9627, + "step": 9080 + }, + { + "epoch": 0.4, + "learning_rate": 5.4807398570505836e-05, + "loss": 1.9815, + "step": 9090 + }, + { + "epoch": 0.4, + "learning_rate": 5.4795664174291214e-05, + "loss": 1.975, + "step": 9100 + }, + { + "epoch": 0.4, + "eval_loss": 1.9286532402038574, + "eval_runtime": 11.9769, + "eval_samples_per_second": 341.991, + "eval_steps_per_second": 21.374, + "step": 9100 + }, + { + "epoch": 0.4, + "learning_rate": 5.478391779331958e-05, + "loss": 1.935, + "step": 9110 + }, + { + "epoch": 0.4, + "learning_rate": 5.477215943326843e-05, + "loss": 1.9991, + "step": 9120 + }, + { + "epoch": 0.4, + "learning_rate": 5.476038909982106e-05, + "loss": 1.958, + "step": 9130 + }, + { + "epoch": 0.4, + "learning_rate": 5.474860679866656e-05, + "loss": 1.9126, + "step": 9140 + }, + { + "epoch": 0.4, + "learning_rate": 5.47368125354998e-05, + "loss": 1.9441, + "step": 9150 + }, + { + "epoch": 0.4, + "learning_rate": 5.4725006316021404e-05, + "loss": 1.9427, + "step": 9160 + }, + { + "epoch": 0.4, + "learning_rate": 5.471318814593783e-05, + "loss": 1.9856, + "step": 9170 + }, + { + "epoch": 0.4, + "learning_rate": 5.4701358030961266e-05, + "loss": 1.9876, + "step": 9180 + }, + { + "epoch": 0.4, + "learning_rate": 5.468951597680969e-05, + "loss": 1.9421, + "step": 9190 + }, + { + "epoch": 0.4, + "learning_rate": 5.467766198920686e-05, + "loss": 1.9562, + "step": 9200 + }, + { + "epoch": 0.4, + "eval_loss": 1.9275963306427002, + "eval_runtime": 11.7257, + "eval_samples_per_second": 349.317, + "eval_steps_per_second": 21.832, + "step": 9200 + }, + { + "epoch": 0.4, + "learning_rate": 5.466579607388229e-05, + "loss": 2.0232, + "step": 9210 + }, + { + "epoch": 0.4, + "learning_rate": 5.4653918236571245e-05, + "loss": 1.8672, + "step": 9220 + }, + { + "epoch": 0.4, + "learning_rate": 5.464202848301479e-05, + "loss": 1.9394, + "step": 9230 + }, + { + "epoch": 0.4, + "learning_rate": 5.463012681895972e-05, + "loss": 1.9189, + "step": 9240 + }, + { + "epoch": 0.4, + "learning_rate": 5.461821325015859e-05, + "loss": 1.9301, + "step": 9250 + }, + { + "epoch": 0.41, + "learning_rate": 5.4606287782369724e-05, + "loss": 1.9574, + "step": 9260 + }, + { + "epoch": 0.41, + "learning_rate": 5.459435042135718e-05, + "loss": 1.9584, + "step": 9270 + }, + { + "epoch": 0.41, + "learning_rate": 5.458240117289077e-05, + "loss": 1.9699, + "step": 9280 + }, + { + "epoch": 0.41, + "learning_rate": 5.457044004274607e-05, + "loss": 1.976, + "step": 9290 + }, + { + "epoch": 0.41, + "learning_rate": 5.455846703670436e-05, + "loss": 1.9704, + "step": 9300 + }, + { + "epoch": 0.41, + "eval_loss": 1.9267959594726562, + "eval_runtime": 11.5085, + "eval_samples_per_second": 355.912, + "eval_steps_per_second": 22.244, + "step": 9300 + }, + { + "epoch": 0.41, + "learning_rate": 5.45464821605527e-05, + "loss": 1.9217, + "step": 9310 + }, + { + "epoch": 0.41, + "learning_rate": 5.4534485420083866e-05, + "loss": 1.9389, + "step": 9320 + }, + { + "epoch": 0.41, + "learning_rate": 5.4522476821096364e-05, + "loss": 1.9367, + "step": 9330 + }, + { + "epoch": 0.41, + "learning_rate": 5.4510456369394455e-05, + "loss": 1.9851, + "step": 9340 + }, + { + "epoch": 0.41, + "learning_rate": 5.44984240707881e-05, + "loss": 1.9447, + "step": 9350 + }, + { + "epoch": 0.41, + "learning_rate": 5.448637993109301e-05, + "loss": 1.9024, + "step": 9360 + }, + { + "epoch": 0.41, + "learning_rate": 5.44743239561306e-05, + "loss": 1.9409, + "step": 9370 + }, + { + "epoch": 0.41, + "learning_rate": 5.4462256151728024e-05, + "loss": 1.9617, + "step": 9380 + }, + { + "epoch": 0.41, + "learning_rate": 5.4450176523718144e-05, + "loss": 1.9452, + "step": 9390 + }, + { + "epoch": 0.41, + "learning_rate": 5.443808507793953e-05, + "loss": 1.9769, + "step": 9400 + }, + { + "epoch": 0.41, + "eval_loss": 1.9254794120788574, + "eval_runtime": 11.6603, + "eval_samples_per_second": 351.277, + "eval_steps_per_second": 21.955, + "step": 9400 + }, + { + "epoch": 0.41, + "learning_rate": 5.442598182023648e-05, + "loss": 1.9595, + "step": 9410 + }, + { + "epoch": 0.41, + "learning_rate": 5.441386675645899e-05, + "loss": 1.9119, + "step": 9420 + }, + { + "epoch": 0.41, + "learning_rate": 5.440173989246276e-05, + "loss": 1.9025, + "step": 9430 + }, + { + "epoch": 0.41, + "learning_rate": 5.438960123410921e-05, + "loss": 1.9572, + "step": 9440 + }, + { + "epoch": 0.41, + "learning_rate": 5.437745078726543e-05, + "loss": 1.9962, + "step": 9450 + }, + { + "epoch": 0.41, + "learning_rate": 5.4365288557804243e-05, + "loss": 1.9908, + "step": 9460 + }, + { + "epoch": 0.41, + "learning_rate": 5.435311455160415e-05, + "loss": 1.9532, + "step": 9470 + }, + { + "epoch": 0.41, + "learning_rate": 5.434092877454934e-05, + "loss": 1.9798, + "step": 9480 + }, + { + "epoch": 0.42, + "learning_rate": 5.43287312325297e-05, + "loss": 1.894, + "step": 9490 + }, + { + "epoch": 0.42, + "learning_rate": 5.43165219314408e-05, + "loss": 1.9114, + "step": 9500 + }, + { + "epoch": 0.42, + "eval_loss": 1.923863410949707, + "eval_runtime": 12.1484, + "eval_samples_per_second": 337.164, + "eval_steps_per_second": 21.073, + "step": 9500 + }, + { + "epoch": 0.42, + "learning_rate": 5.43043008771839e-05, + "loss": 2.0034, + "step": 9510 + }, + { + "epoch": 0.42, + "learning_rate": 5.429206807566592e-05, + "loss": 1.9252, + "step": 9520 + }, + { + "epoch": 0.42, + "learning_rate": 5.4279823532799476e-05, + "loss": 1.9489, + "step": 9530 + }, + { + "epoch": 0.42, + "learning_rate": 5.4267567254502865e-05, + "loss": 1.9173, + "step": 9540 + }, + { + "epoch": 0.42, + "learning_rate": 5.4255299246700046e-05, + "loss": 1.9617, + "step": 9550 + }, + { + "epoch": 0.42, + "learning_rate": 5.424301951532064e-05, + "loss": 1.9543, + "step": 9560 + }, + { + "epoch": 0.42, + "learning_rate": 5.423072806629994e-05, + "loss": 1.9604, + "step": 9570 + }, + { + "epoch": 0.42, + "learning_rate": 5.421842490557892e-05, + "loss": 1.952, + "step": 9580 + }, + { + "epoch": 0.42, + "learning_rate": 5.420611003910419e-05, + "loss": 1.9601, + "step": 9590 + }, + { + "epoch": 0.42, + "learning_rate": 5.4193783472828024e-05, + "loss": 1.9285, + "step": 9600 + }, + { + "epoch": 0.42, + "eval_loss": 1.922660231590271, + "eval_runtime": 11.9313, + "eval_samples_per_second": 343.299, + "eval_steps_per_second": 21.456, + "step": 9600 + }, + { + "epoch": 0.42, + "learning_rate": 5.418144521270836e-05, + "loss": 1.9213, + "step": 9610 + }, + { + "epoch": 0.42, + "learning_rate": 5.416909526470878e-05, + "loss": 1.9605, + "step": 9620 + }, + { + "epoch": 0.42, + "learning_rate": 5.4156733634798535e-05, + "loss": 1.9544, + "step": 9630 + }, + { + "epoch": 0.42, + "learning_rate": 5.414436032895248e-05, + "loss": 1.941, + "step": 9640 + }, + { + "epoch": 0.42, + "learning_rate": 5.413197535315116e-05, + "loss": 1.9717, + "step": 9650 + }, + { + "epoch": 0.42, + "learning_rate": 5.4119578713380726e-05, + "loss": 1.9784, + "step": 9660 + }, + { + "epoch": 0.42, + "learning_rate": 5.410717041563298e-05, + "loss": 1.9701, + "step": 9670 + }, + { + "epoch": 0.42, + "learning_rate": 5.409475046590537e-05, + "loss": 1.9148, + "step": 9680 + }, + { + "epoch": 0.42, + "learning_rate": 5.408231887020095e-05, + "loss": 1.9512, + "step": 9690 + }, + { + "epoch": 0.42, + "learning_rate": 5.406987563452844e-05, + "loss": 1.9492, + "step": 9700 + }, + { + "epoch": 0.42, + "eval_loss": 1.9206269979476929, + "eval_runtime": 11.8263, + "eval_samples_per_second": 346.347, + "eval_steps_per_second": 21.647, + "step": 9700 + }, + { + "epoch": 0.43, + "learning_rate": 5.405742076490214e-05, + "loss": 1.9556, + "step": 9710 + }, + { + "epoch": 0.43, + "learning_rate": 5.4044954267342005e-05, + "loss": 1.9188, + "step": 9720 + }, + { + "epoch": 0.43, + "learning_rate": 5.403247614787361e-05, + "loss": 1.9446, + "step": 9730 + }, + { + "epoch": 0.43, + "learning_rate": 5.4019986412528135e-05, + "loss": 1.913, + "step": 9740 + }, + { + "epoch": 0.43, + "learning_rate": 5.400748506734237e-05, + "loss": 1.9685, + "step": 9750 + }, + { + "epoch": 0.43, + "learning_rate": 5.3994972118358743e-05, + "loss": 1.8976, + "step": 9760 + }, + { + "epoch": 0.43, + "learning_rate": 5.398244757162527e-05, + "loss": 1.9133, + "step": 9770 + }, + { + "epoch": 0.43, + "learning_rate": 5.396991143319555e-05, + "loss": 1.9174, + "step": 9780 + }, + { + "epoch": 0.43, + "learning_rate": 5.395736370912884e-05, + "loss": 1.9537, + "step": 9790 + }, + { + "epoch": 0.43, + "learning_rate": 5.394480440548997e-05, + "loss": 1.9391, + "step": 9800 + }, + { + "epoch": 0.43, + "eval_loss": 1.9210110902786255, + "eval_runtime": 11.7383, + "eval_samples_per_second": 348.944, + "eval_steps_per_second": 21.809, + "step": 9800 + }, + { + "epoch": 0.43, + "learning_rate": 5.393223352834933e-05, + "loss": 1.9293, + "step": 9810 + }, + { + "epoch": 0.43, + "learning_rate": 5.391965108378298e-05, + "loss": 1.9753, + "step": 9820 + }, + { + "epoch": 0.43, + "learning_rate": 5.3907057077872496e-05, + "loss": 1.921, + "step": 9830 + }, + { + "epoch": 0.43, + "learning_rate": 5.38944515167051e-05, + "loss": 1.9402, + "step": 9840 + }, + { + "epoch": 0.43, + "learning_rate": 5.388183440637356e-05, + "loss": 1.9688, + "step": 9850 + }, + { + "epoch": 0.43, + "learning_rate": 5.3869205752976237e-05, + "loss": 1.9478, + "step": 9860 + }, + { + "epoch": 0.43, + "learning_rate": 5.385656556261709e-05, + "loss": 1.9596, + "step": 9870 + }, + { + "epoch": 0.43, + "learning_rate": 5.3843913841405637e-05, + "loss": 1.9397, + "step": 9880 + }, + { + "epoch": 0.43, + "learning_rate": 5.3831250595456956e-05, + "loss": 1.9378, + "step": 9890 + }, + { + "epoch": 0.43, + "learning_rate": 5.3818575830891716e-05, + "loss": 1.9586, + "step": 9900 + }, + { + "epoch": 0.43, + "eval_loss": 1.919926643371582, + "eval_runtime": 11.7669, + "eval_samples_per_second": 348.096, + "eval_steps_per_second": 21.756, + "step": 9900 + }, + { + "epoch": 0.43, + "learning_rate": 5.380588955383616e-05, + "loss": 1.9818, + "step": 9910 + }, + { + "epoch": 0.43, + "learning_rate": 5.379319177042208e-05, + "loss": 1.9407, + "step": 9920 + }, + { + "epoch": 0.43, + "learning_rate": 5.378048248678682e-05, + "loss": 1.9797, + "step": 9930 + }, + { + "epoch": 0.44, + "learning_rate": 5.3767761709073314e-05, + "loss": 1.9684, + "step": 9940 + }, + { + "epoch": 0.44, + "learning_rate": 5.3755029443430024e-05, + "loss": 1.9438, + "step": 9950 + }, + { + "epoch": 0.44, + "learning_rate": 5.374228569601098e-05, + "loss": 1.9506, + "step": 9960 + }, + { + "epoch": 0.44, + "learning_rate": 5.372953047297574e-05, + "loss": 1.9582, + "step": 9970 + }, + { + "epoch": 0.44, + "learning_rate": 5.371676378048944e-05, + "loss": 1.8836, + "step": 9980 + }, + { + "epoch": 0.44, + "learning_rate": 5.370398562472273e-05, + "loss": 1.879, + "step": 9990 + }, + { + "epoch": 0.44, + "learning_rate": 5.3691196011851835e-05, + "loss": 1.8977, + "step": 10000 + }, + { + "epoch": 0.44, + "eval_loss": 1.9170405864715576, + "eval_runtime": 12.0985, + "eval_samples_per_second": 338.555, + "eval_steps_per_second": 21.16, + "step": 10000 + }, + { + "epoch": 0.44, + "learning_rate": 5.367839494805847e-05, + "loss": 1.9891, + "step": 10010 + }, + { + "epoch": 0.44, + "learning_rate": 5.3665582439529944e-05, + "loss": 1.9025, + "step": 10020 + }, + { + "epoch": 0.44, + "learning_rate": 5.365275849245904e-05, + "loss": 1.9416, + "step": 10030 + }, + { + "epoch": 0.44, + "learning_rate": 5.3639923113044107e-05, + "loss": 1.954, + "step": 10040 + }, + { + "epoch": 0.44, + "learning_rate": 5.362707630748901e-05, + "loss": 1.9102, + "step": 10050 + }, + { + "epoch": 0.44, + "learning_rate": 5.361421808200312e-05, + "loss": 1.9629, + "step": 10060 + }, + { + "epoch": 0.44, + "learning_rate": 5.360134844280135e-05, + "loss": 1.9691, + "step": 10070 + }, + { + "epoch": 0.44, + "learning_rate": 5.358846739610413e-05, + "loss": 1.9133, + "step": 10080 + }, + { + "epoch": 0.44, + "learning_rate": 5.357557494813738e-05, + "loss": 1.9778, + "step": 10090 + }, + { + "epoch": 0.44, + "learning_rate": 5.3562671105132566e-05, + "loss": 1.9306, + "step": 10100 + }, + { + "epoch": 0.44, + "eval_loss": 1.917136549949646, + "eval_runtime": 11.7385, + "eval_samples_per_second": 348.936, + "eval_steps_per_second": 21.808, + "step": 10100 + }, + { + "epoch": 0.44, + "learning_rate": 5.3549755873326636e-05, + "loss": 1.9673, + "step": 10110 + }, + { + "epoch": 0.44, + "learning_rate": 5.353682925896204e-05, + "loss": 1.9411, + "step": 10120 + }, + { + "epoch": 0.44, + "learning_rate": 5.352389126828674e-05, + "loss": 1.9431, + "step": 10130 + }, + { + "epoch": 0.44, + "learning_rate": 5.3510941907554205e-05, + "loss": 1.9393, + "step": 10140 + }, + { + "epoch": 0.44, + "learning_rate": 5.349798118302338e-05, + "loss": 1.9358, + "step": 10150 + }, + { + "epoch": 0.44, + "learning_rate": 5.348500910095873e-05, + "loss": 1.9342, + "step": 10160 + }, + { + "epoch": 0.45, + "learning_rate": 5.3472025667630164e-05, + "loss": 1.9211, + "step": 10170 + }, + { + "epoch": 0.45, + "learning_rate": 5.345903088931312e-05, + "loss": 1.9403, + "step": 10180 + }, + { + "epoch": 0.45, + "learning_rate": 5.344602477228851e-05, + "loss": 1.9169, + "step": 10190 + }, + { + "epoch": 0.45, + "learning_rate": 5.343300732284272e-05, + "loss": 1.9776, + "step": 10200 + }, + { + "epoch": 0.45, + "eval_loss": 1.9152053594589233, + "eval_runtime": 11.8351, + "eval_samples_per_second": 346.088, + "eval_steps_per_second": 21.631, + "step": 10200 + }, + { + "epoch": 0.45, + "learning_rate": 5.34199785472676e-05, + "loss": 1.9555, + "step": 10210 + }, + { + "epoch": 0.45, + "learning_rate": 5.340693845186051e-05, + "loss": 1.9321, + "step": 10220 + }, + { + "epoch": 0.45, + "learning_rate": 5.339388704292425e-05, + "loss": 1.9974, + "step": 10230 + }, + { + "epoch": 0.45, + "learning_rate": 5.33808243267671e-05, + "loss": 1.9198, + "step": 10240 + }, + { + "epoch": 0.45, + "learning_rate": 5.33677503097028e-05, + "loss": 1.9572, + "step": 10250 + }, + { + "epoch": 0.45, + "learning_rate": 5.335466499805057e-05, + "loss": 1.9275, + "step": 10260 + }, + { + "epoch": 0.45, + "learning_rate": 5.334156839813506e-05, + "loss": 1.9221, + "step": 10270 + }, + { + "epoch": 0.45, + "learning_rate": 5.33284605162864e-05, + "loss": 1.9517, + "step": 10280 + }, + { + "epoch": 0.45, + "learning_rate": 5.3315341358840155e-05, + "loss": 1.9344, + "step": 10290 + }, + { + "epoch": 0.45, + "learning_rate": 5.330221093213736e-05, + "loss": 1.9339, + "step": 10300 + }, + { + "epoch": 0.45, + "eval_loss": 1.9138355255126953, + "eval_runtime": 11.7912, + "eval_samples_per_second": 347.377, + "eval_steps_per_second": 21.711, + "step": 10300 + }, + { + "epoch": 0.45, + "learning_rate": 5.3289069242524486e-05, + "loss": 1.942, + "step": 10310 + }, + { + "epoch": 0.45, + "learning_rate": 5.327591629635345e-05, + "loss": 1.9285, + "step": 10320 + }, + { + "epoch": 0.45, + "learning_rate": 5.32627520999816e-05, + "loss": 1.9394, + "step": 10330 + }, + { + "epoch": 0.45, + "learning_rate": 5.3249576659771723e-05, + "loss": 1.9306, + "step": 10340 + }, + { + "epoch": 0.45, + "learning_rate": 5.323638998209207e-05, + "loss": 1.9389, + "step": 10350 + }, + { + "epoch": 0.45, + "learning_rate": 5.322319207331628e-05, + "loss": 1.9811, + "step": 10360 + }, + { + "epoch": 0.45, + "learning_rate": 5.320998293982345e-05, + "loss": 1.9051, + "step": 10370 + }, + { + "epoch": 0.45, + "learning_rate": 5.31967625879981e-05, + "loss": 1.9013, + "step": 10380 + }, + { + "epoch": 0.45, + "learning_rate": 5.3183531024230145e-05, + "loss": 1.9472, + "step": 10390 + }, + { + "epoch": 0.46, + "learning_rate": 5.3170288254914974e-05, + "loss": 1.9531, + "step": 10400 + }, + { + "epoch": 0.46, + "eval_loss": 1.9130414724349976, + "eval_runtime": 11.7623, + "eval_samples_per_second": 348.231, + "eval_steps_per_second": 21.764, + "step": 10400 + }, + { + "epoch": 0.46, + "learning_rate": 5.315703428645332e-05, + "loss": 1.9145, + "step": 10410 + }, + { + "epoch": 0.46, + "learning_rate": 5.31437691252514e-05, + "loss": 1.9428, + "step": 10420 + }, + { + "epoch": 0.46, + "learning_rate": 5.3130492777720784e-05, + "loss": 1.9159, + "step": 10430 + }, + { + "epoch": 0.46, + "learning_rate": 5.311720525027847e-05, + "loss": 1.9567, + "step": 10440 + }, + { + "epoch": 0.46, + "learning_rate": 5.310390654934689e-05, + "loss": 1.9582, + "step": 10450 + }, + { + "epoch": 0.46, + "learning_rate": 5.309059668135384e-05, + "loss": 1.8719, + "step": 10460 + }, + { + "epoch": 0.46, + "learning_rate": 5.3077275652732506e-05, + "loss": 1.9593, + "step": 10470 + }, + { + "epoch": 0.46, + "learning_rate": 5.3063943469921495e-05, + "loss": 1.9132, + "step": 10480 + }, + { + "epoch": 0.46, + "learning_rate": 5.305060013936481e-05, + "loss": 1.9277, + "step": 10490 + }, + { + "epoch": 0.46, + "learning_rate": 5.303724566751179e-05, + "loss": 1.9285, + "step": 10500 + }, + { + "epoch": 0.46, + "eval_loss": 1.912137746810913, + "eval_runtime": 11.8324, + "eval_samples_per_second": 346.169, + "eval_steps_per_second": 21.636, + "step": 10500 + }, + { + "epoch": 0.46, + "learning_rate": 5.302388006081724e-05, + "loss": 1.9365, + "step": 10510 + }, + { + "epoch": 0.46, + "learning_rate": 5.301050332574128e-05, + "loss": 1.9349, + "step": 10520 + }, + { + "epoch": 0.46, + "learning_rate": 5.299711546874944e-05, + "loss": 1.9636, + "step": 10530 + }, + { + "epoch": 0.46, + "learning_rate": 5.2983716496312595e-05, + "loss": 1.9193, + "step": 10540 + }, + { + "epoch": 0.46, + "learning_rate": 5.297030641490705e-05, + "loss": 1.925, + "step": 10550 + }, + { + "epoch": 0.46, + "learning_rate": 5.2956885231014426e-05, + "loss": 1.9379, + "step": 10560 + }, + { + "epoch": 0.46, + "learning_rate": 5.294345295112173e-05, + "loss": 1.9314, + "step": 10570 + }, + { + "epoch": 0.46, + "learning_rate": 5.2930009581721345e-05, + "loss": 1.9457, + "step": 10580 + }, + { + "epoch": 0.46, + "learning_rate": 5.291655512931098e-05, + "loss": 1.9083, + "step": 10590 + }, + { + "epoch": 0.46, + "learning_rate": 5.290308960039373e-05, + "loss": 1.9104, + "step": 10600 + }, + { + "epoch": 0.46, + "eval_loss": 1.9107956886291504, + "eval_runtime": 12.0695, + "eval_samples_per_second": 339.368, + "eval_steps_per_second": 21.21, + "step": 10600 + }, + { + "epoch": 0.46, + "learning_rate": 5.288961300147804e-05, + "loss": 1.9385, + "step": 10610 + }, + { + "epoch": 0.46, + "learning_rate": 5.287612533907769e-05, + "loss": 1.9446, + "step": 10620 + }, + { + "epoch": 0.47, + "learning_rate": 5.286262661971183e-05, + "loss": 1.9284, + "step": 10630 + }, + { + "epoch": 0.47, + "learning_rate": 5.284911684990494e-05, + "loss": 1.9348, + "step": 10640 + }, + { + "epoch": 0.47, + "learning_rate": 5.283559603618683e-05, + "loss": 1.9606, + "step": 10650 + }, + { + "epoch": 0.47, + "learning_rate": 5.2822064185092676e-05, + "loss": 1.883, + "step": 10660 + }, + { + "epoch": 0.47, + "learning_rate": 5.280852130316297e-05, + "loss": 1.9772, + "step": 10670 + }, + { + "epoch": 0.47, + "learning_rate": 5.279496739694355e-05, + "loss": 1.9849, + "step": 10680 + }, + { + "epoch": 0.47, + "learning_rate": 5.2781402472985544e-05, + "loss": 1.916, + "step": 10690 + }, + { + "epoch": 0.47, + "learning_rate": 5.276782653784546e-05, + "loss": 1.9241, + "step": 10700 + }, + { + "epoch": 0.47, + "eval_loss": 1.9091017246246338, + "eval_runtime": 11.9167, + "eval_samples_per_second": 343.72, + "eval_steps_per_second": 21.483, + "step": 10700 + }, + { + "epoch": 0.47, + "learning_rate": 5.275423959808509e-05, + "loss": 1.9683, + "step": 10710 + }, + { + "epoch": 0.47, + "learning_rate": 5.274064166027156e-05, + "loss": 1.9504, + "step": 10720 + }, + { + "epoch": 0.47, + "learning_rate": 5.272703273097731e-05, + "loss": 1.9201, + "step": 10730 + }, + { + "epoch": 0.47, + "learning_rate": 5.271341281678009e-05, + "loss": 1.9344, + "step": 10740 + }, + { + "epoch": 0.47, + "learning_rate": 5.2699781924262966e-05, + "loss": 1.9063, + "step": 10750 + }, + { + "epoch": 0.47, + "learning_rate": 5.2686140060014297e-05, + "loss": 1.8997, + "step": 10760 + }, + { + "epoch": 0.47, + "learning_rate": 5.267248723062775e-05, + "loss": 1.8877, + "step": 10770 + }, + { + "epoch": 0.47, + "learning_rate": 5.26588234427023e-05, + "loss": 1.9442, + "step": 10780 + }, + { + "epoch": 0.47, + "learning_rate": 5.2645148702842224e-05, + "loss": 1.8964, + "step": 10790 + }, + { + "epoch": 0.47, + "learning_rate": 5.2631463017657064e-05, + "loss": 1.8983, + "step": 10800 + }, + { + "epoch": 0.47, + "eval_loss": 1.9094958305358887, + "eval_runtime": 11.7143, + "eval_samples_per_second": 349.659, + "eval_steps_per_second": 21.854, + "step": 10800 + }, + { + "epoch": 0.47, + "learning_rate": 5.261776639376169e-05, + "loss": 1.9515, + "step": 10810 + }, + { + "epoch": 0.47, + "learning_rate": 5.260405883777622e-05, + "loss": 1.9609, + "step": 10820 + }, + { + "epoch": 0.47, + "learning_rate": 5.259034035632607e-05, + "loss": 1.9301, + "step": 10830 + }, + { + "epoch": 0.47, + "learning_rate": 5.2576610956041976e-05, + "loss": 1.946, + "step": 10840 + }, + { + "epoch": 0.47, + "learning_rate": 5.2562870643559895e-05, + "loss": 1.9419, + "step": 10850 + }, + { + "epoch": 0.48, + "learning_rate": 5.254911942552108e-05, + "loss": 1.9415, + "step": 10860 + }, + { + "epoch": 0.48, + "learning_rate": 5.2535357308572064e-05, + "loss": 1.9428, + "step": 10870 + }, + { + "epoch": 0.48, + "learning_rate": 5.252158429936464e-05, + "loss": 1.9382, + "step": 10880 + }, + { + "epoch": 0.48, + "learning_rate": 5.250780040455586e-05, + "loss": 1.937, + "step": 10890 + }, + { + "epoch": 0.48, + "learning_rate": 5.249400563080804e-05, + "loss": 1.9589, + "step": 10900 + }, + { + "epoch": 0.48, + "eval_loss": 1.9076216220855713, + "eval_runtime": 11.7896, + "eval_samples_per_second": 347.424, + "eval_steps_per_second": 21.714, + "step": 10900 + }, + { + "epoch": 0.48, + "learning_rate": 5.2480199984788765e-05, + "loss": 1.919, + "step": 10910 + }, + { + "epoch": 0.48, + "learning_rate": 5.246638347317086e-05, + "loss": 1.947, + "step": 10920 + }, + { + "epoch": 0.48, + "learning_rate": 5.245255610263243e-05, + "loss": 1.965, + "step": 10930 + }, + { + "epoch": 0.48, + "learning_rate": 5.243871787985678e-05, + "loss": 1.9391, + "step": 10940 + }, + { + "epoch": 0.48, + "learning_rate": 5.242486881153251e-05, + "loss": 1.9027, + "step": 10950 + }, + { + "epoch": 0.48, + "learning_rate": 5.241100890435342e-05, + "loss": 1.9201, + "step": 10960 + }, + { + "epoch": 0.48, + "learning_rate": 5.239713816501859e-05, + "loss": 1.9278, + "step": 10970 + }, + { + "epoch": 0.48, + "learning_rate": 5.23832566002323e-05, + "loss": 1.9501, + "step": 10980 + }, + { + "epoch": 0.48, + "learning_rate": 5.2369364216704084e-05, + "loss": 1.9276, + "step": 10990 + }, + { + "epoch": 0.48, + "learning_rate": 5.2355461021148695e-05, + "loss": 1.9232, + "step": 11000 + }, + { + "epoch": 0.48, + "eval_loss": 1.9063364267349243, + "eval_runtime": 11.7844, + "eval_samples_per_second": 347.577, + "eval_steps_per_second": 21.724, + "step": 11000 + }, + { + "epoch": 0.48, + "learning_rate": 5.2341547020286116e-05, + "loss": 1.946, + "step": 11010 + }, + { + "epoch": 0.48, + "learning_rate": 5.232762222084155e-05, + "loss": 1.931, + "step": 11020 + }, + { + "epoch": 0.48, + "learning_rate": 5.231368662954542e-05, + "loss": 1.944, + "step": 11030 + }, + { + "epoch": 0.48, + "learning_rate": 5.2299740253133383e-05, + "loss": 1.9547, + "step": 11040 + }, + { + "epoch": 0.48, + "learning_rate": 5.2285783098346265e-05, + "loss": 1.8769, + "step": 11050 + }, + { + "epoch": 0.48, + "learning_rate": 5.227181517193015e-05, + "loss": 1.8999, + "step": 11060 + }, + { + "epoch": 0.48, + "learning_rate": 5.22578364806363e-05, + "loss": 1.9489, + "step": 11070 + }, + { + "epoch": 0.49, + "learning_rate": 5.2243847031221185e-05, + "loss": 1.9533, + "step": 11080 + }, + { + "epoch": 0.49, + "learning_rate": 5.222984683044648e-05, + "loss": 1.9008, + "step": 11090 + }, + { + "epoch": 0.49, + "learning_rate": 5.221583588507905e-05, + "loss": 1.9322, + "step": 11100 + }, + { + "epoch": 0.49, + "eval_loss": 1.905591607093811, + "eval_runtime": 11.9402, + "eval_samples_per_second": 343.044, + "eval_steps_per_second": 21.44, + "step": 11100 + }, + { + "epoch": 0.49, + "learning_rate": 5.2201814201890964e-05, + "loss": 1.885, + "step": 11110 + }, + { + "epoch": 0.49, + "learning_rate": 5.218778178765947e-05, + "loss": 1.9275, + "step": 11120 + }, + { + "epoch": 0.49, + "learning_rate": 5.217373864916701e-05, + "loss": 1.9124, + "step": 11130 + }, + { + "epoch": 0.49, + "learning_rate": 5.21596847932012e-05, + "loss": 1.9431, + "step": 11140 + }, + { + "epoch": 0.49, + "learning_rate": 5.2145620226554844e-05, + "loss": 1.9124, + "step": 11150 + }, + { + "epoch": 0.49, + "learning_rate": 5.213154495602593e-05, + "loss": 1.9301, + "step": 11160 + }, + { + "epoch": 0.49, + "learning_rate": 5.211745898841759e-05, + "loss": 1.888, + "step": 11170 + }, + { + "epoch": 0.49, + "learning_rate": 5.210336233053817e-05, + "loss": 1.9275, + "step": 11180 + }, + { + "epoch": 0.49, + "learning_rate": 5.2089254989201156e-05, + "loss": 1.9612, + "step": 11190 + }, + { + "epoch": 0.49, + "learning_rate": 5.207513697122519e-05, + "loss": 1.9247, + "step": 11200 + }, + { + "epoch": 0.49, + "eval_loss": 1.9045929908752441, + "eval_runtime": 11.6726, + "eval_samples_per_second": 350.906, + "eval_steps_per_second": 21.932, + "step": 11200 + }, + { + "epoch": 0.49, + "learning_rate": 5.20610082834341e-05, + "loss": 1.9102, + "step": 11210 + }, + { + "epoch": 0.49, + "learning_rate": 5.2046868932656855e-05, + "loss": 1.923, + "step": 11220 + }, + { + "epoch": 0.49, + "learning_rate": 5.203271892572757e-05, + "loss": 1.8978, + "step": 11230 + }, + { + "epoch": 0.49, + "learning_rate": 5.201855826948553e-05, + "loss": 1.8708, + "step": 11240 + }, + { + "epoch": 0.49, + "learning_rate": 5.200438697077516e-05, + "loss": 1.9239, + "step": 11250 + }, + { + "epoch": 0.49, + "learning_rate": 5.199020503644603e-05, + "loss": 1.9347, + "step": 11260 + }, + { + "epoch": 0.49, + "learning_rate": 5.1976012473352834e-05, + "loss": 1.8907, + "step": 11270 + }, + { + "epoch": 0.49, + "learning_rate": 5.196180928835543e-05, + "loss": 1.9238, + "step": 11280 + }, + { + "epoch": 0.49, + "learning_rate": 5.1947595488318794e-05, + "loss": 1.925, + "step": 11290 + }, + { + "epoch": 0.49, + "learning_rate": 5.1933371080113034e-05, + "loss": 1.8946, + "step": 11300 + }, + { + "epoch": 0.49, + "eval_loss": 1.9035365581512451, + "eval_runtime": 11.7334, + "eval_samples_per_second": 349.089, + "eval_steps_per_second": 21.818, + "step": 11300 + }, + { + "epoch": 0.5, + "learning_rate": 5.191913607061339e-05, + "loss": 1.885, + "step": 11310 + }, + { + "epoch": 0.5, + "learning_rate": 5.190489046670022e-05, + "loss": 1.9246, + "step": 11320 + }, + { + "epoch": 0.5, + "learning_rate": 5.189063427525901e-05, + "loss": 1.9431, + "step": 11330 + }, + { + "epoch": 0.5, + "learning_rate": 5.1876367503180356e-05, + "loss": 1.9154, + "step": 11340 + }, + { + "epoch": 0.5, + "learning_rate": 5.186209015735998e-05, + "loss": 1.9025, + "step": 11350 + }, + { + "epoch": 0.5, + "learning_rate": 5.184780224469869e-05, + "loss": 1.96, + "step": 11360 + }, + { + "epoch": 0.5, + "learning_rate": 5.183350377210243e-05, + "loss": 1.9536, + "step": 11370 + }, + { + "epoch": 0.5, + "learning_rate": 5.181919474648224e-05, + "loss": 1.9724, + "step": 11380 + }, + { + "epoch": 0.5, + "learning_rate": 5.180487517475424e-05, + "loss": 1.8894, + "step": 11390 + }, + { + "epoch": 0.5, + "learning_rate": 5.1790545063839675e-05, + "loss": 1.894, + "step": 11400 + }, + { + "epoch": 0.5, + "eval_loss": 1.9031916856765747, + "eval_runtime": 11.9751, + "eval_samples_per_second": 342.043, + "eval_steps_per_second": 21.378, + "step": 11400 + }, + { + "epoch": 0.5, + "learning_rate": 5.177620442066487e-05, + "loss": 1.8651, + "step": 11410 + }, + { + "epoch": 0.5, + "learning_rate": 5.176185325216124e-05, + "loss": 1.9263, + "step": 11420 + }, + { + "epoch": 0.5, + "learning_rate": 5.1747491565265304e-05, + "loss": 1.9026, + "step": 11430 + }, + { + "epoch": 0.5, + "learning_rate": 5.173311936691864e-05, + "loss": 1.9311, + "step": 11440 + }, + { + "epoch": 0.5, + "learning_rate": 5.171873666406792e-05, + "loss": 1.9022, + "step": 11450 + }, + { + "epoch": 0.5, + "learning_rate": 5.170434346366489e-05, + "loss": 1.9235, + "step": 11460 + }, + { + "epoch": 0.5, + "learning_rate": 5.1689939772666376e-05, + "loss": 1.9091, + "step": 11470 + }, + { + "epoch": 0.5, + "learning_rate": 5.1675525598034275e-05, + "loss": 1.9388, + "step": 11480 + }, + { + "epoch": 0.5, + "learning_rate": 5.166110094673553e-05, + "loss": 1.9413, + "step": 11490 + }, + { + "epoch": 0.5, + "learning_rate": 5.164666582574217e-05, + "loss": 1.9681, + "step": 11500 + }, + { + "epoch": 0.5, + "eval_loss": 1.9012072086334229, + "eval_runtime": 11.9665, + "eval_samples_per_second": 342.288, + "eval_steps_per_second": 21.393, + "step": 11500 + }, + { + "epoch": 0.5, + "learning_rate": 5.163222024203129e-05, + "loss": 1.9637, + "step": 11510 + }, + { + "epoch": 0.5, + "learning_rate": 5.161776420258502e-05, + "loss": 1.9221, + "step": 11520 + }, + { + "epoch": 0.5, + "learning_rate": 5.1603297714390555e-05, + "loss": 1.9217, + "step": 11530 + }, + { + "epoch": 0.51, + "learning_rate": 5.1588820784440144e-05, + "loss": 1.9271, + "step": 11540 + }, + { + "epoch": 0.51, + "learning_rate": 5.157433341973107e-05, + "loss": 1.9115, + "step": 11550 + }, + { + "epoch": 0.51, + "learning_rate": 5.155983562726568e-05, + "loss": 1.9195, + "step": 11560 + }, + { + "epoch": 0.51, + "learning_rate": 5.154532741405133e-05, + "loss": 1.9535, + "step": 11570 + }, + { + "epoch": 0.51, + "learning_rate": 5.153080878710046e-05, + "loss": 1.8912, + "step": 11580 + }, + { + "epoch": 0.51, + "learning_rate": 5.151627975343049e-05, + "loss": 1.8922, + "step": 11590 + }, + { + "epoch": 0.51, + "learning_rate": 5.1501740320063906e-05, + "loss": 1.8981, + "step": 11600 + }, + { + "epoch": 0.51, + "eval_loss": 1.9007313251495361, + "eval_runtime": 11.7722, + "eval_samples_per_second": 347.939, + "eval_steps_per_second": 21.746, + "step": 11600 + }, + { + "epoch": 0.51, + "learning_rate": 5.148719049402821e-05, + "loss": 1.919, + "step": 11610 + }, + { + "epoch": 0.51, + "learning_rate": 5.147263028235593e-05, + "loss": 1.8871, + "step": 11620 + }, + { + "epoch": 0.51, + "learning_rate": 5.145805969208459e-05, + "loss": 1.9283, + "step": 11630 + }, + { + "epoch": 0.51, + "learning_rate": 5.144347873025679e-05, + "loss": 1.9203, + "step": 11640 + }, + { + "epoch": 0.51, + "learning_rate": 5.1428887403920064e-05, + "loss": 1.9441, + "step": 11650 + }, + { + "epoch": 0.51, + "learning_rate": 5.141428572012703e-05, + "loss": 1.9235, + "step": 11660 + }, + { + "epoch": 0.51, + "learning_rate": 5.1399673685935244e-05, + "loss": 1.8727, + "step": 11670 + }, + { + "epoch": 0.51, + "learning_rate": 5.138505130840733e-05, + "loss": 1.9103, + "step": 11680 + }, + { + "epoch": 0.51, + "learning_rate": 5.137041859461086e-05, + "loss": 1.9122, + "step": 11690 + }, + { + "epoch": 0.51, + "learning_rate": 5.1355775551618435e-05, + "loss": 1.9197, + "step": 11700 + }, + { + "epoch": 0.51, + "eval_loss": 1.8997355699539185, + "eval_runtime": 11.7556, + "eval_samples_per_second": 348.43, + "eval_steps_per_second": 21.777, + "step": 11700 + }, + { + "epoch": 0.51, + "learning_rate": 5.134112218650762e-05, + "loss": 1.9484, + "step": 11710 + }, + { + "epoch": 0.51, + "learning_rate": 5.1326458506360994e-05, + "loss": 1.9234, + "step": 11720 + }, + { + "epoch": 0.51, + "learning_rate": 5.131178451826612e-05, + "loss": 1.932, + "step": 11730 + }, + { + "epoch": 0.51, + "learning_rate": 5.1297100229315536e-05, + "loss": 1.9191, + "step": 11740 + }, + { + "epoch": 0.51, + "learning_rate": 5.128240564660673e-05, + "loss": 1.9287, + "step": 11750 + }, + { + "epoch": 0.51, + "learning_rate": 5.1267700777242234e-05, + "loss": 1.8905, + "step": 11760 + }, + { + "epoch": 0.52, + "learning_rate": 5.1252985628329485e-05, + "loss": 1.9224, + "step": 11770 + }, + { + "epoch": 0.52, + "learning_rate": 5.123826020698092e-05, + "loss": 1.9458, + "step": 11780 + }, + { + "epoch": 0.52, + "learning_rate": 5.122352452031394e-05, + "loss": 1.9364, + "step": 11790 + }, + { + "epoch": 0.52, + "learning_rate": 5.1208778575450904e-05, + "loss": 1.8697, + "step": 11800 + }, + { + "epoch": 0.52, + "eval_loss": 1.8991934061050415, + "eval_runtime": 11.702, + "eval_samples_per_second": 350.026, + "eval_steps_per_second": 21.877, + "step": 11800 + }, + { + "epoch": 0.52, + "learning_rate": 5.1194022379519134e-05, + "loss": 1.9021, + "step": 11810 + }, + { + "epoch": 0.52, + "learning_rate": 5.1179255939650894e-05, + "loss": 1.9163, + "step": 11820 + }, + { + "epoch": 0.52, + "learning_rate": 5.1164479262983405e-05, + "loss": 1.9285, + "step": 11830 + }, + { + "epoch": 0.52, + "learning_rate": 5.1149692356658856e-05, + "loss": 1.9226, + "step": 11840 + }, + { + "epoch": 0.52, + "learning_rate": 5.113489522782434e-05, + "loss": 1.8812, + "step": 11850 + }, + { + "epoch": 0.52, + "learning_rate": 5.112008788363192e-05, + "loss": 1.9432, + "step": 11860 + }, + { + "epoch": 0.52, + "learning_rate": 5.110527033123861e-05, + "loss": 1.8859, + "step": 11870 + }, + { + "epoch": 0.52, + "learning_rate": 5.1090442577806306e-05, + "loss": 1.9347, + "step": 11880 + }, + { + "epoch": 0.52, + "learning_rate": 5.107560463050188e-05, + "loss": 1.9167, + "step": 11890 + }, + { + "epoch": 0.52, + "learning_rate": 5.106075649649714e-05, + "loss": 1.9169, + "step": 11900 + }, + { + "epoch": 0.52, + "eval_loss": 1.8978095054626465, + "eval_runtime": 11.7169, + "eval_samples_per_second": 349.58, + "eval_steps_per_second": 21.849, + "step": 11900 + }, + { + "epoch": 0.52, + "learning_rate": 5.104589818296875e-05, + "loss": 1.9154, + "step": 11910 + }, + { + "epoch": 0.52, + "learning_rate": 5.103102969709839e-05, + "loss": 1.9427, + "step": 11920 + }, + { + "epoch": 0.52, + "learning_rate": 5.1016151046072576e-05, + "loss": 1.9245, + "step": 11930 + }, + { + "epoch": 0.52, + "learning_rate": 5.100126223708276e-05, + "loss": 1.929, + "step": 11940 + }, + { + "epoch": 0.52, + "learning_rate": 5.098636327732534e-05, + "loss": 1.912, + "step": 11950 + }, + { + "epoch": 0.52, + "learning_rate": 5.097145417400157e-05, + "loss": 1.9093, + "step": 11960 + }, + { + "epoch": 0.52, + "learning_rate": 5.0956534934317624e-05, + "loss": 1.9014, + "step": 11970 + }, + { + "epoch": 0.52, + "learning_rate": 5.094160556548461e-05, + "loss": 1.9396, + "step": 11980 + }, + { + "epoch": 0.52, + "learning_rate": 5.092666607471847e-05, + "loss": 1.9046, + "step": 11990 + }, + { + "epoch": 0.53, + "learning_rate": 5.091171646924009e-05, + "loss": 1.9012, + "step": 12000 + }, + { + "epoch": 0.53, + "eval_loss": 1.897355556488037, + "eval_runtime": 12.4634, + "eval_samples_per_second": 328.642, + "eval_steps_per_second": 20.54, + "step": 12000 + }, + { + "epoch": 0.53, + "learning_rate": 5.089675675627522e-05, + "loss": 1.92, + "step": 12010 + }, + { + "epoch": 0.53, + "learning_rate": 5.0881786943054486e-05, + "loss": 1.9191, + "step": 12020 + }, + { + "epoch": 0.53, + "learning_rate": 5.086680703681343e-05, + "loss": 1.9002, + "step": 12030 + }, + { + "epoch": 0.53, + "learning_rate": 5.085181704479244e-05, + "loss": 1.9084, + "step": 12040 + }, + { + "epoch": 0.53, + "learning_rate": 5.0836816974236796e-05, + "loss": 1.9434, + "step": 12050 + }, + { + "epoch": 0.53, + "learning_rate": 5.082180683239664e-05, + "loss": 1.9575, + "step": 12060 + }, + { + "epoch": 0.53, + "learning_rate": 5.0806786626527e-05, + "loss": 1.9332, + "step": 12070 + }, + { + "epoch": 0.53, + "learning_rate": 5.079175636388773e-05, + "loss": 1.8858, + "step": 12080 + }, + { + "epoch": 0.53, + "learning_rate": 5.077671605174359e-05, + "loss": 1.8997, + "step": 12090 + }, + { + "epoch": 0.53, + "learning_rate": 5.076166569736418e-05, + "loss": 1.9475, + "step": 12100 + }, + { + "epoch": 0.53, + "eval_loss": 1.8951764106750488, + "eval_runtime": 11.8716, + "eval_samples_per_second": 345.025, + "eval_steps_per_second": 21.564, + "step": 12100 + }, + { + "epoch": 0.53, + "learning_rate": 5.074660530802393e-05, + "loss": 1.9078, + "step": 12110 + }, + { + "epoch": 0.53, + "learning_rate": 5.073153489100216e-05, + "loss": 1.9249, + "step": 12120 + }, + { + "epoch": 0.53, + "learning_rate": 5.0716454453583015e-05, + "loss": 1.8992, + "step": 12130 + }, + { + "epoch": 0.53, + "learning_rate": 5.0701364003055475e-05, + "loss": 1.8958, + "step": 12140 + }, + { + "epoch": 0.53, + "learning_rate": 5.0686263546713386e-05, + "loss": 1.9222, + "step": 12150 + }, + { + "epoch": 0.53, + "learning_rate": 5.0671153091855406e-05, + "loss": 1.9098, + "step": 12160 + }, + { + "epoch": 0.53, + "learning_rate": 5.0656032645785026e-05, + "loss": 1.8734, + "step": 12170 + }, + { + "epoch": 0.53, + "learning_rate": 5.06409022158106e-05, + "loss": 1.9222, + "step": 12180 + }, + { + "epoch": 0.53, + "learning_rate": 5.062576180924526e-05, + "loss": 1.9241, + "step": 12190 + }, + { + "epoch": 0.53, + "learning_rate": 5.0610611433406996e-05, + "loss": 1.9312, + "step": 12200 + }, + { + "epoch": 0.53, + "eval_loss": 1.8951544761657715, + "eval_runtime": 11.9349, + "eval_samples_per_second": 343.194, + "eval_steps_per_second": 21.45, + "step": 12200 + }, + { + "epoch": 0.53, + "learning_rate": 5.0595451095618595e-05, + "loss": 1.8868, + "step": 12210 + }, + { + "epoch": 0.53, + "learning_rate": 5.058028080320767e-05, + "loss": 1.9428, + "step": 12220 + }, + { + "epoch": 0.54, + "learning_rate": 5.056510056350665e-05, + "loss": 1.8836, + "step": 12230 + }, + { + "epoch": 0.54, + "learning_rate": 5.0549910383852756e-05, + "loss": 1.9613, + "step": 12240 + }, + { + "epoch": 0.54, + "learning_rate": 5.0534710271588026e-05, + "loss": 1.9218, + "step": 12250 + }, + { + "epoch": 0.54, + "learning_rate": 5.051950023405928e-05, + "loss": 1.8936, + "step": 12260 + }, + { + "epoch": 0.54, + "learning_rate": 5.0504280278618175e-05, + "loss": 1.9501, + "step": 12270 + }, + { + "epoch": 0.54, + "learning_rate": 5.048905041262113e-05, + "loss": 1.9222, + "step": 12280 + }, + { + "epoch": 0.54, + "learning_rate": 5.0473810643429346e-05, + "loss": 1.9357, + "step": 12290 + }, + { + "epoch": 0.54, + "learning_rate": 5.0458560978408844e-05, + "loss": 1.9024, + "step": 12300 + }, + { + "epoch": 0.54, + "eval_loss": 1.894567608833313, + "eval_runtime": 11.8015, + "eval_samples_per_second": 347.074, + "eval_steps_per_second": 21.692, + "step": 12300 + }, + { + "epoch": 0.54, + "learning_rate": 5.04433014249304e-05, + "loss": 1.901, + "step": 12310 + }, + { + "epoch": 0.54, + "learning_rate": 5.042803199036958e-05, + "loss": 1.8953, + "step": 12320 + }, + { + "epoch": 0.54, + "learning_rate": 5.041275268210672e-05, + "loss": 1.9358, + "step": 12330 + }, + { + "epoch": 0.54, + "learning_rate": 5.039746350752696e-05, + "loss": 1.9632, + "step": 12340 + }, + { + "epoch": 0.54, + "learning_rate": 5.0382164474020144e-05, + "loss": 1.8925, + "step": 12350 + }, + { + "epoch": 0.54, + "learning_rate": 5.036685558898095e-05, + "loss": 1.8659, + "step": 12360 + }, + { + "epoch": 0.54, + "learning_rate": 5.035153685980877e-05, + "loss": 1.9407, + "step": 12370 + }, + { + "epoch": 0.54, + "learning_rate": 5.033620829390778e-05, + "loss": 1.886, + "step": 12380 + }, + { + "epoch": 0.54, + "learning_rate": 5.0320869898686897e-05, + "loss": 1.8922, + "step": 12390 + }, + { + "epoch": 0.54, + "learning_rate": 5.0305521681559795e-05, + "loss": 1.9246, + "step": 12400 + }, + { + "epoch": 0.54, + "eval_loss": 1.8931217193603516, + "eval_runtime": 11.8736, + "eval_samples_per_second": 344.967, + "eval_steps_per_second": 21.56, + "step": 12400 + }, + { + "epoch": 0.54, + "learning_rate": 5.0290163649944895e-05, + "loss": 1.9126, + "step": 12410 + }, + { + "epoch": 0.54, + "learning_rate": 5.0274795811265356e-05, + "loss": 1.8948, + "step": 12420 + }, + { + "epoch": 0.54, + "learning_rate": 5.0259418172949096e-05, + "loss": 1.942, + "step": 12430 + }, + { + "epoch": 0.54, + "learning_rate": 5.0244030742428746e-05, + "loss": 1.8984, + "step": 12440 + }, + { + "epoch": 0.55, + "learning_rate": 5.022863352714168e-05, + "loss": 1.8838, + "step": 12450 + }, + { + "epoch": 0.55, + "learning_rate": 5.0213226534529994e-05, + "loss": 1.8943, + "step": 12460 + }, + { + "epoch": 0.55, + "learning_rate": 5.0197809772040526e-05, + "loss": 1.9087, + "step": 12470 + }, + { + "epoch": 0.55, + "learning_rate": 5.0182383247124826e-05, + "loss": 1.9317, + "step": 12480 + }, + { + "epoch": 0.55, + "learning_rate": 5.0166946967239155e-05, + "loss": 1.9581, + "step": 12490 + }, + { + "epoch": 0.55, + "learning_rate": 5.0151500939844505e-05, + "loss": 1.9311, + "step": 12500 + }, + { + "epoch": 0.55, + "eval_loss": 1.8927887678146362, + "eval_runtime": 12.0202, + "eval_samples_per_second": 340.759, + "eval_steps_per_second": 21.297, + "step": 12500 + }, + { + "epoch": 0.55, + "learning_rate": 5.013604517240657e-05, + "loss": 1.9014, + "step": 12510 + }, + { + "epoch": 0.55, + "learning_rate": 5.012057967239576e-05, + "loss": 1.8831, + "step": 12520 + }, + { + "epoch": 0.55, + "learning_rate": 5.010510444728717e-05, + "loss": 1.9165, + "step": 12530 + }, + { + "epoch": 0.55, + "learning_rate": 5.008961950456062e-05, + "loss": 1.8805, + "step": 12540 + }, + { + "epoch": 0.55, + "learning_rate": 5.0074124851700615e-05, + "loss": 1.8601, + "step": 12550 + }, + { + "epoch": 0.55, + "learning_rate": 5.005862049619634e-05, + "loss": 1.9155, + "step": 12560 + }, + { + "epoch": 0.55, + "learning_rate": 5.00431064455417e-05, + "loss": 1.923, + "step": 12570 + }, + { + "epoch": 0.55, + "learning_rate": 5.002758270723525e-05, + "loss": 1.9147, + "step": 12580 + }, + { + "epoch": 0.55, + "learning_rate": 5.0012049288780266e-05, + "loss": 1.9428, + "step": 12590 + }, + { + "epoch": 0.55, + "learning_rate": 4.9996506197684674e-05, + "loss": 1.9652, + "step": 12600 + }, + { + "epoch": 0.55, + "eval_loss": 1.8919925689697266, + "eval_runtime": 11.9961, + "eval_samples_per_second": 341.443, + "eval_steps_per_second": 21.34, + "step": 12600 + }, + { + "epoch": 0.55, + "learning_rate": 4.9980953441461076e-05, + "loss": 1.9505, + "step": 12610 + }, + { + "epoch": 0.55, + "learning_rate": 4.9965391027626776e-05, + "loss": 1.8803, + "step": 12620 + }, + { + "epoch": 0.55, + "learning_rate": 4.99498189637037e-05, + "loss": 1.9102, + "step": 12630 + }, + { + "epoch": 0.55, + "learning_rate": 4.993423725721849e-05, + "loss": 1.9154, + "step": 12640 + }, + { + "epoch": 0.55, + "learning_rate": 4.99186459157024e-05, + "loss": 1.9074, + "step": 12650 + }, + { + "epoch": 0.55, + "learning_rate": 4.9903044946691354e-05, + "loss": 1.9372, + "step": 12660 + }, + { + "epoch": 0.55, + "learning_rate": 4.988743435772596e-05, + "loss": 1.9326, + "step": 12670 + }, + { + "epoch": 0.56, + "learning_rate": 4.9871814156351444e-05, + "loss": 1.8712, + "step": 12680 + }, + { + "epoch": 0.56, + "learning_rate": 4.9856184350117696e-05, + "loss": 1.9461, + "step": 12690 + }, + { + "epoch": 0.56, + "learning_rate": 4.9840544946579226e-05, + "loss": 1.9164, + "step": 12700 + }, + { + "epoch": 0.56, + "eval_loss": 1.8904156684875488, + "eval_runtime": 11.9521, + "eval_samples_per_second": 342.701, + "eval_steps_per_second": 21.419, + "step": 12700 + }, + { + "epoch": 0.56, + "learning_rate": 4.98248959532952e-05, + "loss": 1.8621, + "step": 12710 + }, + { + "epoch": 0.56, + "learning_rate": 4.980923737782941e-05, + "loss": 1.8796, + "step": 12720 + }, + { + "epoch": 0.56, + "learning_rate": 4.979356922775029e-05, + "loss": 1.9101, + "step": 12730 + }, + { + "epoch": 0.56, + "learning_rate": 4.9777891510630904e-05, + "loss": 1.9218, + "step": 12740 + }, + { + "epoch": 0.56, + "learning_rate": 4.976220423404894e-05, + "loss": 1.904, + "step": 12750 + }, + { + "epoch": 0.56, + "learning_rate": 4.9746507405586664e-05, + "loss": 1.947, + "step": 12760 + }, + { + "epoch": 0.56, + "learning_rate": 4.973080103283103e-05, + "loss": 1.9095, + "step": 12770 + }, + { + "epoch": 0.56, + "learning_rate": 4.971508512337355e-05, + "loss": 1.8795, + "step": 12780 + }, + { + "epoch": 0.56, + "learning_rate": 4.969935968481037e-05, + "loss": 1.9002, + "step": 12790 + }, + { + "epoch": 0.56, + "learning_rate": 4.9683624724742246e-05, + "loss": 1.9281, + "step": 12800 + }, + { + "epoch": 0.56, + "eval_loss": 1.8897275924682617, + "eval_runtime": 11.5813, + "eval_samples_per_second": 353.674, + "eval_steps_per_second": 22.105, + "step": 12800 + }, + { + "epoch": 0.56, + "learning_rate": 4.96678802507745e-05, + "loss": 1.875, + "step": 12810 + }, + { + "epoch": 0.56, + "learning_rate": 4.965212627051712e-05, + "loss": 1.8754, + "step": 12820 + }, + { + "epoch": 0.56, + "learning_rate": 4.9636362791584606e-05, + "loss": 1.8657, + "step": 12830 + }, + { + "epoch": 0.56, + "learning_rate": 4.9620589821596115e-05, + "loss": 1.922, + "step": 12840 + }, + { + "epoch": 0.56, + "learning_rate": 4.960480736817537e-05, + "loss": 1.9201, + "step": 12850 + }, + { + "epoch": 0.56, + "learning_rate": 4.958901543895066e-05, + "loss": 1.8708, + "step": 12860 + }, + { + "epoch": 0.56, + "learning_rate": 4.957321404155488e-05, + "loss": 1.9147, + "step": 12870 + }, + { + "epoch": 0.56, + "learning_rate": 4.955740318362548e-05, + "loss": 1.9418, + "step": 12880 + }, + { + "epoch": 0.56, + "learning_rate": 4.954158287280452e-05, + "loss": 1.9568, + "step": 12890 + }, + { + "epoch": 0.56, + "learning_rate": 4.9525753116738566e-05, + "loss": 1.9146, + "step": 12900 + }, + { + "epoch": 0.56, + "eval_loss": 1.89072847366333, + "eval_runtime": 11.6591, + "eval_samples_per_second": 351.315, + "eval_steps_per_second": 21.957, + "step": 12900 + }, + { + "epoch": 0.57, + "learning_rate": 4.950991392307881e-05, + "loss": 1.9184, + "step": 12910 + }, + { + "epoch": 0.57, + "learning_rate": 4.949406529948097e-05, + "loss": 1.9761, + "step": 12920 + }, + { + "epoch": 0.57, + "learning_rate": 4.947820725360534e-05, + "loss": 1.9419, + "step": 12930 + }, + { + "epoch": 0.57, + "learning_rate": 4.946233979311676e-05, + "loss": 1.893, + "step": 12940 + }, + { + "epoch": 0.57, + "learning_rate": 4.9446462925684616e-05, + "loss": 1.9044, + "step": 12950 + }, + { + "epoch": 0.57, + "learning_rate": 4.943057665898285e-05, + "loss": 1.9326, + "step": 12960 + }, + { + "epoch": 0.57, + "learning_rate": 4.941468100068994e-05, + "loss": 1.9218, + "step": 12970 + }, + { + "epoch": 0.57, + "learning_rate": 4.9398775958488914e-05, + "loss": 1.9269, + "step": 12980 + }, + { + "epoch": 0.57, + "learning_rate": 4.9382861540067324e-05, + "loss": 1.9444, + "step": 12990 + }, + { + "epoch": 0.57, + "learning_rate": 4.9366937753117266e-05, + "loss": 1.8917, + "step": 13000 + }, + { + "epoch": 0.57, + "eval_loss": 1.888537883758545, + "eval_runtime": 11.8673, + "eval_samples_per_second": 345.151, + "eval_steps_per_second": 21.572, + "step": 13000 + }, + { + "epoch": 0.57, + "learning_rate": 4.9351004605335335e-05, + "loss": 1.8794, + "step": 13010 + }, + { + "epoch": 0.57, + "learning_rate": 4.9335062104422693e-05, + "loss": 1.977, + "step": 13020 + }, + { + "epoch": 0.57, + "learning_rate": 4.9319110258085e-05, + "loss": 1.918, + "step": 13030 + }, + { + "epoch": 0.57, + "learning_rate": 4.930314907403243e-05, + "loss": 1.9226, + "step": 13040 + }, + { + "epoch": 0.57, + "learning_rate": 4.928717855997966e-05, + "loss": 1.8847, + "step": 13050 + }, + { + "epoch": 0.57, + "learning_rate": 4.927119872364591e-05, + "loss": 1.9315, + "step": 13060 + }, + { + "epoch": 0.57, + "learning_rate": 4.925520957275489e-05, + "loss": 1.9221, + "step": 13070 + }, + { + "epoch": 0.57, + "learning_rate": 4.9239211115034804e-05, + "loss": 1.9149, + "step": 13080 + }, + { + "epoch": 0.57, + "learning_rate": 4.922320335821835e-05, + "loss": 1.8801, + "step": 13090 + }, + { + "epoch": 0.57, + "learning_rate": 4.920718631004275e-05, + "loss": 1.8859, + "step": 13100 + }, + { + "epoch": 0.57, + "eval_loss": 1.8876938819885254, + "eval_runtime": 11.8093, + "eval_samples_per_second": 346.846, + "eval_steps_per_second": 21.678, + "step": 13100 + }, + { + "epoch": 0.57, + "learning_rate": 4.9191159978249686e-05, + "loss": 1.9131, + "step": 13110 + }, + { + "epoch": 0.57, + "learning_rate": 4.917512437058534e-05, + "loss": 1.9271, + "step": 13120 + }, + { + "epoch": 0.57, + "learning_rate": 4.915907949480037e-05, + "loss": 1.9257, + "step": 13130 + }, + { + "epoch": 0.58, + "learning_rate": 4.914302535864993e-05, + "loss": 1.9172, + "step": 13140 + }, + { + "epoch": 0.58, + "learning_rate": 4.9126961969893625e-05, + "loss": 1.9085, + "step": 13150 + }, + { + "epoch": 0.58, + "learning_rate": 4.911088933629557e-05, + "loss": 1.916, + "step": 13160 + }, + { + "epoch": 0.58, + "learning_rate": 4.90948074656243e-05, + "loss": 1.9169, + "step": 13170 + }, + { + "epoch": 0.58, + "learning_rate": 4.907871636565285e-05, + "loss": 1.9196, + "step": 13180 + }, + { + "epoch": 0.58, + "learning_rate": 4.906261604415872e-05, + "loss": 1.919, + "step": 13190 + }, + { + "epoch": 0.58, + "learning_rate": 4.904650650892384e-05, + "loss": 1.9009, + "step": 13200 + }, + { + "epoch": 0.58, + "eval_loss": 1.8867980241775513, + "eval_runtime": 11.7952, + "eval_samples_per_second": 347.26, + "eval_steps_per_second": 21.704, + "step": 13200 + }, + { + "epoch": 0.58, + "learning_rate": 4.90303877677346e-05, + "loss": 1.8787, + "step": 13210 + }, + { + "epoch": 0.58, + "learning_rate": 4.901425982838185e-05, + "loss": 1.9349, + "step": 13220 + }, + { + "epoch": 0.58, + "learning_rate": 4.8998122698660884e-05, + "loss": 1.955, + "step": 13230 + }, + { + "epoch": 0.58, + "learning_rate": 4.8981976386371444e-05, + "loss": 1.9376, + "step": 13240 + }, + { + "epoch": 0.58, + "learning_rate": 4.896582089931768e-05, + "loss": 1.9519, + "step": 13250 + }, + { + "epoch": 0.58, + "learning_rate": 4.894965624530821e-05, + "loss": 1.9243, + "step": 13260 + }, + { + "epoch": 0.58, + "learning_rate": 4.8933482432156094e-05, + "loss": 1.9578, + "step": 13270 + }, + { + "epoch": 0.58, + "learning_rate": 4.891729946767876e-05, + "loss": 1.9111, + "step": 13280 + }, + { + "epoch": 0.58, + "learning_rate": 4.8901107359698115e-05, + "loss": 1.9282, + "step": 13290 + }, + { + "epoch": 0.58, + "learning_rate": 4.8884906116040464e-05, + "loss": 1.8958, + "step": 13300 + }, + { + "epoch": 0.58, + "eval_loss": 1.8861165046691895, + "eval_runtime": 11.9112, + "eval_samples_per_second": 343.879, + "eval_steps_per_second": 21.492, + "step": 13300 + }, + { + "epoch": 0.58, + "learning_rate": 4.886869574453653e-05, + "loss": 1.8374, + "step": 13310 + }, + { + "epoch": 0.58, + "learning_rate": 4.8852476253021435e-05, + "loss": 1.9007, + "step": 13320 + }, + { + "epoch": 0.58, + "learning_rate": 4.883624764933474e-05, + "loss": 1.9312, + "step": 13330 + }, + { + "epoch": 0.58, + "learning_rate": 4.882000994132039e-05, + "loss": 1.9262, + "step": 13340 + }, + { + "epoch": 0.58, + "learning_rate": 4.8803763136826715e-05, + "loss": 1.888, + "step": 13350 + }, + { + "epoch": 0.58, + "learning_rate": 4.878750724370647e-05, + "loss": 1.9391, + "step": 13360 + }, + { + "epoch": 0.59, + "learning_rate": 4.87712422698168e-05, + "loss": 1.8841, + "step": 13370 + }, + { + "epoch": 0.59, + "learning_rate": 4.875496822301922e-05, + "loss": 1.8599, + "step": 13380 + }, + { + "epoch": 0.59, + "learning_rate": 4.873868511117964e-05, + "loss": 1.8711, + "step": 13390 + }, + { + "epoch": 0.59, + "learning_rate": 4.8722392942168365e-05, + "loss": 1.9013, + "step": 13400 + }, + { + "epoch": 0.59, + "eval_loss": 1.8849880695343018, + "eval_runtime": 11.6646, + "eval_samples_per_second": 351.149, + "eval_steps_per_second": 21.947, + "step": 13400 + }, + { + "epoch": 0.59, + "learning_rate": 4.870609172386006e-05, + "loss": 1.8912, + "step": 13410 + }, + { + "epoch": 0.59, + "learning_rate": 4.868978146413376e-05, + "loss": 1.8703, + "step": 13420 + }, + { + "epoch": 0.59, + "learning_rate": 4.867346217087289e-05, + "loss": 1.92, + "step": 13430 + }, + { + "epoch": 0.59, + "learning_rate": 4.865713385196522e-05, + "loss": 1.8996, + "step": 13440 + }, + { + "epoch": 0.59, + "learning_rate": 4.8640796515302915e-05, + "loss": 1.9156, + "step": 13450 + }, + { + "epoch": 0.59, + "learning_rate": 4.862445016878245e-05, + "loss": 1.927, + "step": 13460 + }, + { + "epoch": 0.59, + "learning_rate": 4.8608094820304704e-05, + "loss": 1.9362, + "step": 13470 + }, + { + "epoch": 0.59, + "learning_rate": 4.859173047777488e-05, + "loss": 1.8748, + "step": 13480 + }, + { + "epoch": 0.59, + "learning_rate": 4.857535714910252e-05, + "loss": 1.9397, + "step": 13490 + }, + { + "epoch": 0.59, + "learning_rate": 4.855897484220153e-05, + "loss": 1.8975, + "step": 13500 + }, + { + "epoch": 0.59, + "eval_loss": 1.8842600584030151, + "eval_runtime": 11.9977, + "eval_samples_per_second": 341.399, + "eval_steps_per_second": 21.337, + "step": 13500 + }, + { + "epoch": 0.59, + "learning_rate": 4.854258356499016e-05, + "loss": 1.8826, + "step": 13510 + }, + { + "epoch": 0.59, + "learning_rate": 4.8526183325390956e-05, + "loss": 1.8971, + "step": 13520 + }, + { + "epoch": 0.59, + "learning_rate": 4.850977413133085e-05, + "loss": 1.9155, + "step": 13530 + }, + { + "epoch": 0.59, + "learning_rate": 4.849335599074106e-05, + "loss": 1.9324, + "step": 13540 + }, + { + "epoch": 0.59, + "learning_rate": 4.847692891155716e-05, + "loss": 1.8948, + "step": 13550 + }, + { + "epoch": 0.59, + "learning_rate": 4.8460492901719006e-05, + "loss": 1.9081, + "step": 13560 + }, + { + "epoch": 0.59, + "learning_rate": 4.84440479691708e-05, + "loss": 1.9164, + "step": 13570 + }, + { + "epoch": 0.59, + "learning_rate": 4.842759412186107e-05, + "loss": 1.9046, + "step": 13580 + }, + { + "epoch": 0.59, + "learning_rate": 4.8411131367742606e-05, + "loss": 1.8918, + "step": 13590 + }, + { + "epoch": 0.6, + "learning_rate": 4.839465971477255e-05, + "loss": 1.9471, + "step": 13600 + }, + { + "epoch": 0.6, + "eval_loss": 1.8836712837219238, + "eval_runtime": 11.9691, + "eval_samples_per_second": 342.213, + "eval_steps_per_second": 21.388, + "step": 13600 + }, + { + "epoch": 0.6, + "learning_rate": 4.8378179170912295e-05, + "loss": 1.9032, + "step": 13610 + }, + { + "epoch": 0.6, + "learning_rate": 4.83616897441276e-05, + "loss": 1.9068, + "step": 13620 + }, + { + "epoch": 0.6, + "learning_rate": 4.8345191442388444e-05, + "loss": 1.9466, + "step": 13630 + }, + { + "epoch": 0.6, + "learning_rate": 4.832868427366915e-05, + "loss": 1.9245, + "step": 13640 + }, + { + "epoch": 0.6, + "learning_rate": 4.83121682459483e-05, + "loss": 1.9372, + "step": 13650 + }, + { + "epoch": 0.6, + "learning_rate": 4.829564336720877e-05, + "loss": 1.9026, + "step": 13660 + }, + { + "epoch": 0.6, + "learning_rate": 4.827910964543769e-05, + "loss": 1.8805, + "step": 13670 + }, + { + "epoch": 0.6, + "learning_rate": 4.82625670886265e-05, + "loss": 1.9085, + "step": 13680 + }, + { + "epoch": 0.6, + "learning_rate": 4.82460157047709e-05, + "loss": 1.8671, + "step": 13690 + }, + { + "epoch": 0.6, + "learning_rate": 4.822945550187083e-05, + "loss": 1.9411, + "step": 13700 + }, + { + "epoch": 0.6, + "eval_loss": 1.8830509185791016, + "eval_runtime": 11.7716, + "eval_samples_per_second": 347.957, + "eval_steps_per_second": 21.747, + "step": 13700 + }, + { + "epoch": 0.6, + "learning_rate": 4.8212886487930526e-05, + "loss": 1.9245, + "step": 13710 + }, + { + "epoch": 0.6, + "learning_rate": 4.819630867095845e-05, + "loss": 1.9302, + "step": 13720 + }, + { + "epoch": 0.6, + "learning_rate": 4.817972205896738e-05, + "loss": 1.8732, + "step": 13730 + }, + { + "epoch": 0.6, + "learning_rate": 4.816312665997426e-05, + "loss": 1.888, + "step": 13740 + }, + { + "epoch": 0.6, + "learning_rate": 4.814652248200035e-05, + "loss": 1.8778, + "step": 13750 + }, + { + "epoch": 0.6, + "learning_rate": 4.8129909533071105e-05, + "loss": 1.8716, + "step": 13760 + }, + { + "epoch": 0.6, + "learning_rate": 4.811328782121626e-05, + "loss": 1.8734, + "step": 13770 + }, + { + "epoch": 0.6, + "learning_rate": 4.809665735446975e-05, + "loss": 1.9905, + "step": 13780 + }, + { + "epoch": 0.6, + "learning_rate": 4.8080018140869775e-05, + "loss": 1.9574, + "step": 13790 + }, + { + "epoch": 0.6, + "learning_rate": 4.806337018845875e-05, + "loss": 1.8946, + "step": 13800 + }, + { + "epoch": 0.6, + "eval_loss": 1.8828606605529785, + "eval_runtime": 11.6574, + "eval_samples_per_second": 351.364, + "eval_steps_per_second": 21.96, + "step": 13800 + }, + { + "epoch": 0.6, + "learning_rate": 4.804671350528329e-05, + "loss": 1.911, + "step": 13810 + }, + { + "epoch": 0.6, + "learning_rate": 4.8030048099394265e-05, + "loss": 1.9134, + "step": 13820 + }, + { + "epoch": 0.61, + "learning_rate": 4.801337397884675e-05, + "loss": 1.9293, + "step": 13830 + }, + { + "epoch": 0.61, + "learning_rate": 4.799669115170001e-05, + "loss": 1.9327, + "step": 13840 + }, + { + "epoch": 0.61, + "learning_rate": 4.797999962601755e-05, + "loss": 1.8865, + "step": 13850 + }, + { + "epoch": 0.61, + "learning_rate": 4.796329940986706e-05, + "loss": 1.9236, + "step": 13860 + }, + { + "epoch": 0.61, + "learning_rate": 4.794659051132044e-05, + "loss": 1.8992, + "step": 13870 + }, + { + "epoch": 0.61, + "learning_rate": 4.7929872938453796e-05, + "loss": 1.913, + "step": 13880 + }, + { + "epoch": 0.61, + "learning_rate": 4.791314669934739e-05, + "loss": 1.8726, + "step": 13890 + }, + { + "epoch": 0.61, + "learning_rate": 4.78964118020857e-05, + "loss": 1.8791, + "step": 13900 + }, + { + "epoch": 0.61, + "eval_loss": 1.8819687366485596, + "eval_runtime": 11.7317, + "eval_samples_per_second": 349.139, + "eval_steps_per_second": 21.821, + "step": 13900 + }, + { + "epoch": 0.61, + "learning_rate": 4.7879668254757404e-05, + "loss": 1.8488, + "step": 13910 + }, + { + "epoch": 0.61, + "learning_rate": 4.786291606545533e-05, + "loss": 1.9093, + "step": 13920 + }, + { + "epoch": 0.61, + "learning_rate": 4.784615524227648e-05, + "loss": 1.9286, + "step": 13930 + }, + { + "epoch": 0.61, + "learning_rate": 4.782938579332207e-05, + "loss": 1.9215, + "step": 13940 + }, + { + "epoch": 0.61, + "learning_rate": 4.7812607726697446e-05, + "loss": 1.9186, + "step": 13950 + }, + { + "epoch": 0.61, + "learning_rate": 4.779582105051214e-05, + "loss": 1.8571, + "step": 13960 + }, + { + "epoch": 0.61, + "learning_rate": 4.777902577287983e-05, + "loss": 1.9116, + "step": 13970 + }, + { + "epoch": 0.61, + "learning_rate": 4.7762221901918364e-05, + "loss": 1.8792, + "step": 13980 + }, + { + "epoch": 0.61, + "learning_rate": 4.7745409445749737e-05, + "loss": 1.8778, + "step": 13990 + }, + { + "epoch": 0.61, + "learning_rate": 4.7728588412500095e-05, + "loss": 1.9064, + "step": 14000 + }, + { + "epoch": 0.61, + "eval_loss": 1.88134765625, + "eval_runtime": 12.1415, + "eval_samples_per_second": 337.357, + "eval_steps_per_second": 21.085, + "step": 14000 + }, + { + "epoch": 0.61, + "learning_rate": 4.771175881029973e-05, + "loss": 1.9157, + "step": 14010 + }, + { + "epoch": 0.61, + "learning_rate": 4.769492064728309e-05, + "loss": 1.8532, + "step": 14020 + }, + { + "epoch": 0.61, + "learning_rate": 4.7678073931588716e-05, + "loss": 1.909, + "step": 14030 + }, + { + "epoch": 0.61, + "learning_rate": 4.766121867135935e-05, + "loss": 1.9097, + "step": 14040 + }, + { + "epoch": 0.62, + "learning_rate": 4.7644354874741795e-05, + "loss": 1.9228, + "step": 14050 + }, + { + "epoch": 0.62, + "learning_rate": 4.762748254988704e-05, + "loss": 1.8766, + "step": 14060 + }, + { + "epoch": 0.62, + "learning_rate": 4.7610601704950154e-05, + "loss": 1.8731, + "step": 14070 + }, + { + "epoch": 0.62, + "learning_rate": 4.7593712348090335e-05, + "loss": 1.8987, + "step": 14080 + }, + { + "epoch": 0.62, + "learning_rate": 4.75768144874709e-05, + "loss": 1.8735, + "step": 14090 + }, + { + "epoch": 0.62, + "learning_rate": 4.755990813125929e-05, + "loss": 1.9096, + "step": 14100 + }, + { + "epoch": 0.62, + "eval_loss": 1.8808305263519287, + "eval_runtime": 11.6253, + "eval_samples_per_second": 352.334, + "eval_steps_per_second": 22.021, + "step": 14100 + }, + { + "epoch": 0.62, + "learning_rate": 4.754299328762703e-05, + "loss": 1.8673, + "step": 14110 + }, + { + "epoch": 0.62, + "learning_rate": 4.7526069964749745e-05, + "loss": 1.9113, + "step": 14120 + }, + { + "epoch": 0.62, + "learning_rate": 4.750913817080718e-05, + "loss": 1.906, + "step": 14130 + }, + { + "epoch": 0.62, + "learning_rate": 4.749219791398315e-05, + "loss": 1.8916, + "step": 14140 + }, + { + "epoch": 0.62, + "learning_rate": 4.747524920246558e-05, + "loss": 1.9327, + "step": 14150 + }, + { + "epoch": 0.62, + "learning_rate": 4.745829204444648e-05, + "loss": 1.9021, + "step": 14160 + }, + { + "epoch": 0.62, + "learning_rate": 4.744132644812192e-05, + "loss": 1.859, + "step": 14170 + }, + { + "epoch": 0.62, + "learning_rate": 4.742435242169208e-05, + "loss": 1.8952, + "step": 14180 + }, + { + "epoch": 0.62, + "learning_rate": 4.74073699733612e-05, + "loss": 1.9064, + "step": 14190 + }, + { + "epoch": 0.62, + "learning_rate": 4.73903791113376e-05, + "loss": 1.8784, + "step": 14200 + }, + { + "epoch": 0.62, + "eval_loss": 1.878967523574829, + "eval_runtime": 11.7525, + "eval_samples_per_second": 348.52, + "eval_steps_per_second": 21.783, + "step": 14200 + }, + { + "epoch": 0.62, + "learning_rate": 4.737337984383363e-05, + "loss": 1.8813, + "step": 14210 + }, + { + "epoch": 0.62, + "learning_rate": 4.735637217906574e-05, + "loss": 1.8586, + "step": 14220 + }, + { + "epoch": 0.62, + "learning_rate": 4.733935612525444e-05, + "loss": 1.9097, + "step": 14230 + }, + { + "epoch": 0.62, + "learning_rate": 4.732233169062428e-05, + "loss": 1.9324, + "step": 14240 + }, + { + "epoch": 0.62, + "learning_rate": 4.730529888340386e-05, + "loss": 1.9022, + "step": 14250 + }, + { + "epoch": 0.62, + "learning_rate": 4.7288257711825836e-05, + "loss": 1.8895, + "step": 14260 + }, + { + "epoch": 0.62, + "learning_rate": 4.7271208184126895e-05, + "loss": 1.9084, + "step": 14270 + }, + { + "epoch": 0.63, + "learning_rate": 4.725415030854777e-05, + "loss": 1.8941, + "step": 14280 + }, + { + "epoch": 0.63, + "learning_rate": 4.7237084093333244e-05, + "loss": 1.8961, + "step": 14290 + }, + { + "epoch": 0.63, + "learning_rate": 4.72200095467321e-05, + "loss": 1.881, + "step": 14300 + }, + { + "epoch": 0.63, + "eval_loss": 1.8786323070526123, + "eval_runtime": 11.735, + "eval_samples_per_second": 349.042, + "eval_steps_per_second": 21.815, + "step": 14300 + }, + { + "epoch": 0.63, + "learning_rate": 4.720292667699717e-05, + "loss": 1.9209, + "step": 14310 + }, + { + "epoch": 0.63, + "learning_rate": 4.7185835492385294e-05, + "loss": 1.8944, + "step": 14320 + }, + { + "epoch": 0.63, + "learning_rate": 4.716873600115736e-05, + "loss": 1.9029, + "step": 14330 + }, + { + "epoch": 0.63, + "learning_rate": 4.7151628211578226e-05, + "loss": 1.9425, + "step": 14340 + }, + { + "epoch": 0.63, + "learning_rate": 4.71345121319168e-05, + "loss": 1.9491, + "step": 14350 + }, + { + "epoch": 0.63, + "learning_rate": 4.711738777044598e-05, + "loss": 1.9051, + "step": 14360 + }, + { + "epoch": 0.63, + "learning_rate": 4.710025513544266e-05, + "loss": 1.9132, + "step": 14370 + }, + { + "epoch": 0.63, + "learning_rate": 4.708311423518776e-05, + "loss": 1.9294, + "step": 14380 + }, + { + "epoch": 0.63, + "learning_rate": 4.706596507796616e-05, + "loss": 1.8892, + "step": 14390 + }, + { + "epoch": 0.63, + "learning_rate": 4.7048807672066754e-05, + "loss": 1.9103, + "step": 14400 + }, + { + "epoch": 0.63, + "eval_loss": 1.8775782585144043, + "eval_runtime": 11.7984, + "eval_samples_per_second": 347.167, + "eval_steps_per_second": 21.698, + "step": 14400 + }, + { + "epoch": 0.63, + "learning_rate": 4.7031642025782416e-05, + "loss": 1.8831, + "step": 14410 + }, + { + "epoch": 0.63, + "learning_rate": 4.701446814741001e-05, + "loss": 1.922, + "step": 14420 + }, + { + "epoch": 0.63, + "learning_rate": 4.699728604525037e-05, + "loss": 1.9376, + "step": 14430 + }, + { + "epoch": 0.63, + "learning_rate": 4.69800957276083e-05, + "loss": 1.9187, + "step": 14440 + }, + { + "epoch": 0.63, + "learning_rate": 4.696289720279259e-05, + "loss": 1.8812, + "step": 14450 + }, + { + "epoch": 0.63, + "learning_rate": 4.694569047911599e-05, + "loss": 1.9076, + "step": 14460 + }, + { + "epoch": 0.63, + "learning_rate": 4.69284755648952e-05, + "loss": 1.9382, + "step": 14470 + }, + { + "epoch": 0.63, + "learning_rate": 4.69112524684509e-05, + "loss": 1.8788, + "step": 14480 + }, + { + "epoch": 0.63, + "learning_rate": 4.689402119810773e-05, + "loss": 1.9089, + "step": 14490 + }, + { + "epoch": 0.63, + "learning_rate": 4.687678176219424e-05, + "loss": 1.913, + "step": 14500 + }, + { + "epoch": 0.63, + "eval_loss": 1.8763041496276855, + "eval_runtime": 12.2143, + "eval_samples_per_second": 335.344, + "eval_steps_per_second": 20.959, + "step": 14500 + }, + { + "epoch": 0.64, + "learning_rate": 4.6859534169042976e-05, + "loss": 1.878, + "step": 14510 + }, + { + "epoch": 0.64, + "learning_rate": 4.6842278426990397e-05, + "loss": 1.9019, + "step": 14520 + }, + { + "epoch": 0.64, + "learning_rate": 4.68250145443769e-05, + "loss": 1.8764, + "step": 14530 + }, + { + "epoch": 0.64, + "learning_rate": 4.6807742529546815e-05, + "loss": 1.9188, + "step": 14540 + }, + { + "epoch": 0.64, + "learning_rate": 4.679046239084845e-05, + "loss": 1.9146, + "step": 14550 + }, + { + "epoch": 0.64, + "learning_rate": 4.677317413663397e-05, + "loss": 1.8906, + "step": 14560 + }, + { + "epoch": 0.64, + "learning_rate": 4.675587777525949e-05, + "loss": 1.875, + "step": 14570 + }, + { + "epoch": 0.64, + "learning_rate": 4.6738573315085075e-05, + "loss": 1.9221, + "step": 14580 + }, + { + "epoch": 0.64, + "learning_rate": 4.672126076447466e-05, + "loss": 1.928, + "step": 14590 + }, + { + "epoch": 0.64, + "learning_rate": 4.670394013179611e-05, + "loss": 1.9176, + "step": 14600 + }, + { + "epoch": 0.64, + "eval_loss": 1.876225471496582, + "eval_runtime": 11.7678, + "eval_samples_per_second": 348.067, + "eval_steps_per_second": 21.754, + "step": 14600 + }, + { + "epoch": 0.64, + "learning_rate": 4.66866114254212e-05, + "loss": 1.8598, + "step": 14610 + }, + { + "epoch": 0.64, + "learning_rate": 4.666927465372559e-05, + "loss": 1.9218, + "step": 14620 + }, + { + "epoch": 0.64, + "learning_rate": 4.665192982508884e-05, + "loss": 1.8575, + "step": 14630 + }, + { + "epoch": 0.64, + "learning_rate": 4.6634576947894456e-05, + "loss": 1.9287, + "step": 14640 + }, + { + "epoch": 0.64, + "learning_rate": 4.6617216030529746e-05, + "loss": 1.9112, + "step": 14650 + }, + { + "epoch": 0.64, + "learning_rate": 4.659984708138597e-05, + "loss": 1.8923, + "step": 14660 + }, + { + "epoch": 0.64, + "learning_rate": 4.658247010885826e-05, + "loss": 1.8565, + "step": 14670 + }, + { + "epoch": 0.64, + "learning_rate": 4.6565085121345606e-05, + "loss": 1.8667, + "step": 14680 + }, + { + "epoch": 0.64, + "learning_rate": 4.654769212725088e-05, + "loss": 1.8877, + "step": 14690 + }, + { + "epoch": 0.64, + "learning_rate": 4.6530291134980825e-05, + "loss": 1.9241, + "step": 14700 + }, + { + "epoch": 0.64, + "eval_loss": 1.8761088848114014, + "eval_runtime": 12.0138, + "eval_samples_per_second": 340.942, + "eval_steps_per_second": 21.309, + "step": 14700 + }, + { + "epoch": 0.64, + "learning_rate": 4.651288215294606e-05, + "loss": 1.8847, + "step": 14710 + }, + { + "epoch": 0.64, + "learning_rate": 4.649546518956105e-05, + "loss": 1.8731, + "step": 14720 + }, + { + "epoch": 0.64, + "learning_rate": 4.647804025324413e-05, + "loss": 1.8855, + "step": 14730 + }, + { + "epoch": 0.65, + "learning_rate": 4.6460607352417476e-05, + "loss": 1.89, + "step": 14740 + }, + { + "epoch": 0.65, + "learning_rate": 4.644316649550712e-05, + "loss": 1.8096, + "step": 14750 + }, + { + "epoch": 0.65, + "learning_rate": 4.642571769094296e-05, + "loss": 1.9138, + "step": 14760 + }, + { + "epoch": 0.65, + "learning_rate": 4.6408260947158684e-05, + "loss": 1.9277, + "step": 14770 + }, + { + "epoch": 0.65, + "learning_rate": 4.6390796272591884e-05, + "loss": 1.8945, + "step": 14780 + }, + { + "epoch": 0.65, + "learning_rate": 4.637332367568392e-05, + "loss": 1.8884, + "step": 14790 + }, + { + "epoch": 0.65, + "learning_rate": 4.635584316488003e-05, + "loss": 1.9179, + "step": 14800 + }, + { + "epoch": 0.65, + "eval_loss": 1.8754827976226807, + "eval_runtime": 12.2675, + "eval_samples_per_second": 333.89, + "eval_steps_per_second": 20.868, + "step": 14800 + }, + { + "epoch": 0.65, + "learning_rate": 4.6338354748629244e-05, + "loss": 1.9361, + "step": 14810 + }, + { + "epoch": 0.65, + "learning_rate": 4.6320858435384446e-05, + "loss": 1.9061, + "step": 14820 + }, + { + "epoch": 0.65, + "learning_rate": 4.630335423360232e-05, + "loss": 1.9, + "step": 14830 + }, + { + "epoch": 0.65, + "learning_rate": 4.628584215174333e-05, + "loss": 1.8648, + "step": 14840 + }, + { + "epoch": 0.65, + "learning_rate": 4.6268322198271804e-05, + "loss": 1.8879, + "step": 14850 + }, + { + "epoch": 0.65, + "learning_rate": 4.625079438165585e-05, + "loss": 1.8491, + "step": 14860 + }, + { + "epoch": 0.65, + "learning_rate": 4.6233258710367375e-05, + "loss": 1.867, + "step": 14870 + }, + { + "epoch": 0.65, + "learning_rate": 4.621571519288209e-05, + "loss": 1.8921, + "step": 14880 + }, + { + "epoch": 0.65, + "learning_rate": 4.619816383767949e-05, + "loss": 1.9075, + "step": 14890 + }, + { + "epoch": 0.65, + "learning_rate": 4.6180604653242855e-05, + "loss": 1.9128, + "step": 14900 + }, + { + "epoch": 0.65, + "eval_loss": 1.874982476234436, + "eval_runtime": 11.9034, + "eval_samples_per_second": 344.103, + "eval_steps_per_second": 21.506, + "step": 14900 + }, + { + "epoch": 0.65, + "learning_rate": 4.6163037648059256e-05, + "loss": 1.9357, + "step": 14910 + }, + { + "epoch": 0.65, + "learning_rate": 4.614546283061955e-05, + "loss": 1.8716, + "step": 14920 + }, + { + "epoch": 0.65, + "learning_rate": 4.612788020941837e-05, + "loss": 1.886, + "step": 14930 + }, + { + "epoch": 0.65, + "learning_rate": 4.611028979295411e-05, + "loss": 1.9695, + "step": 14940 + }, + { + "epoch": 0.65, + "learning_rate": 4.6092691589728924e-05, + "loss": 1.8555, + "step": 14950 + }, + { + "epoch": 0.65, + "learning_rate": 4.607508560824876e-05, + "loss": 1.9099, + "step": 14960 + }, + { + "epoch": 0.66, + "learning_rate": 4.6057471857023306e-05, + "loss": 1.8963, + "step": 14970 + }, + { + "epoch": 0.66, + "learning_rate": 4.603985034456599e-05, + "loss": 1.8679, + "step": 14980 + }, + { + "epoch": 0.66, + "learning_rate": 4.602222107939403e-05, + "loss": 1.9101, + "step": 14990 + }, + { + "epoch": 0.66, + "learning_rate": 4.6004584070028354e-05, + "loss": 1.9419, + "step": 15000 + }, + { + "epoch": 0.66, + "eval_loss": 1.874194622039795, + "eval_runtime": 11.8292, + "eval_samples_per_second": 346.263, + "eval_steps_per_second": 21.641, + "step": 15000 + }, + { + "epoch": 0.66, + "learning_rate": 4.598693932499366e-05, + "loss": 1.9005, + "step": 15010 + }, + { + "epoch": 0.66, + "learning_rate": 4.596928685281836e-05, + "loss": 1.8898, + "step": 15020 + }, + { + "epoch": 0.66, + "learning_rate": 4.5951626662034636e-05, + "loss": 1.8748, + "step": 15030 + }, + { + "epoch": 0.66, + "learning_rate": 4.5933958761178355e-05, + "loss": 1.9403, + "step": 15040 + }, + { + "epoch": 0.66, + "learning_rate": 4.5916283158789146e-05, + "loss": 1.8801, + "step": 15050 + }, + { + "epoch": 0.66, + "learning_rate": 4.5898599863410355e-05, + "loss": 1.8711, + "step": 15060 + }, + { + "epoch": 0.66, + "learning_rate": 4.5880908883589044e-05, + "loss": 1.8567, + "step": 15070 + }, + { + "epoch": 0.66, + "learning_rate": 4.5863210227875965e-05, + "loss": 1.9125, + "step": 15080 + }, + { + "epoch": 0.66, + "learning_rate": 4.584550390482562e-05, + "loss": 1.9104, + "step": 15090 + }, + { + "epoch": 0.66, + "learning_rate": 4.582778992299618e-05, + "loss": 1.8765, + "step": 15100 + }, + { + "epoch": 0.66, + "eval_loss": 1.8737382888793945, + "eval_runtime": 11.8232, + "eval_samples_per_second": 346.437, + "eval_steps_per_second": 21.652, + "step": 15100 + }, + { + "epoch": 0.66, + "learning_rate": 4.5810068290949566e-05, + "loss": 1.9292, + "step": 15110 + }, + { + "epoch": 0.66, + "learning_rate": 4.5792339017251336e-05, + "loss": 1.9138, + "step": 15120 + }, + { + "epoch": 0.66, + "learning_rate": 4.577460211047078e-05, + "loss": 1.8812, + "step": 15130 + }, + { + "epoch": 0.66, + "learning_rate": 4.5756857579180887e-05, + "loss": 1.875, + "step": 15140 + }, + { + "epoch": 0.66, + "learning_rate": 4.573910543195829e-05, + "loss": 1.8958, + "step": 15150 + }, + { + "epoch": 0.66, + "learning_rate": 4.572134567738334e-05, + "loss": 1.9032, + "step": 15160 + }, + { + "epoch": 0.66, + "learning_rate": 4.5703578324040044e-05, + "loss": 1.9176, + "step": 15170 + }, + { + "epoch": 0.66, + "learning_rate": 4.56858033805161e-05, + "loss": 1.8699, + "step": 15180 + }, + { + "epoch": 0.66, + "learning_rate": 4.5668020855402844e-05, + "loss": 1.8832, + "step": 15190 + }, + { + "epoch": 0.67, + "learning_rate": 4.565023075729532e-05, + "loss": 1.9259, + "step": 15200 + }, + { + "epoch": 0.67, + "eval_loss": 1.872511625289917, + "eval_runtime": 11.6329, + "eval_samples_per_second": 352.105, + "eval_steps_per_second": 22.007, + "step": 15200 + }, + { + "epoch": 0.67, + "learning_rate": 4.56324330947922e-05, + "loss": 1.8409, + "step": 15210 + }, + { + "epoch": 0.67, + "learning_rate": 4.5614627876495825e-05, + "loss": 1.9122, + "step": 15220 + }, + { + "epoch": 0.67, + "learning_rate": 4.559681511101217e-05, + "loss": 1.903, + "step": 15230 + }, + { + "epoch": 0.67, + "learning_rate": 4.5578994806950876e-05, + "loss": 1.8904, + "step": 15240 + }, + { + "epoch": 0.67, + "learning_rate": 4.556116697292524e-05, + "loss": 1.9553, + "step": 15250 + }, + { + "epoch": 0.67, + "learning_rate": 4.554333161755216e-05, + "loss": 1.8666, + "step": 15260 + }, + { + "epoch": 0.67, + "learning_rate": 4.552548874945221e-05, + "loss": 1.8376, + "step": 15270 + }, + { + "epoch": 0.67, + "learning_rate": 4.550763837724957e-05, + "loss": 1.8787, + "step": 15280 + }, + { + "epoch": 0.67, + "learning_rate": 4.5489780509572044e-05, + "loss": 1.9157, + "step": 15290 + }, + { + "epoch": 0.67, + "learning_rate": 4.5471915155051084e-05, + "loss": 1.9041, + "step": 15300 + }, + { + "epoch": 0.67, + "eval_loss": 1.8725992441177368, + "eval_runtime": 11.7485, + "eval_samples_per_second": 348.639, + "eval_steps_per_second": 21.79, + "step": 15300 + }, + { + "epoch": 0.67, + "learning_rate": 4.545404232232174e-05, + "loss": 1.9036, + "step": 15310 + }, + { + "epoch": 0.67, + "learning_rate": 4.5436162020022686e-05, + "loss": 1.8459, + "step": 15320 + }, + { + "epoch": 0.67, + "learning_rate": 4.541827425679618e-05, + "loss": 1.8782, + "step": 15330 + }, + { + "epoch": 0.67, + "learning_rate": 4.540037904128814e-05, + "loss": 1.921, + "step": 15340 + }, + { + "epoch": 0.67, + "learning_rate": 4.538247638214804e-05, + "loss": 1.913, + "step": 15350 + }, + { + "epoch": 0.67, + "learning_rate": 4.536456628802895e-05, + "loss": 1.8882, + "step": 15360 + }, + { + "epoch": 0.67, + "learning_rate": 4.5346648767587574e-05, + "loss": 1.9248, + "step": 15370 + }, + { + "epoch": 0.67, + "learning_rate": 4.532872382948418e-05, + "loss": 1.9775, + "step": 15380 + }, + { + "epoch": 0.67, + "learning_rate": 4.5310791482382616e-05, + "loss": 1.9272, + "step": 15390 + }, + { + "epoch": 0.67, + "learning_rate": 4.52928517349503e-05, + "loss": 1.8919, + "step": 15400 + }, + { + "epoch": 0.67, + "eval_loss": 1.8721492290496826, + "eval_runtime": 11.7624, + "eval_samples_per_second": 348.228, + "eval_steps_per_second": 21.764, + "step": 15400 + }, + { + "epoch": 0.67, + "learning_rate": 4.527490459585828e-05, + "loss": 1.902, + "step": 15410 + }, + { + "epoch": 0.68, + "learning_rate": 4.525695007378112e-05, + "loss": 1.8867, + "step": 15420 + }, + { + "epoch": 0.68, + "learning_rate": 4.523898817739697e-05, + "loss": 1.8574, + "step": 15430 + }, + { + "epoch": 0.68, + "learning_rate": 4.522101891538755e-05, + "loss": 1.9315, + "step": 15440 + }, + { + "epoch": 0.68, + "learning_rate": 4.5203042296438156e-05, + "loss": 1.9293, + "step": 15450 + }, + { + "epoch": 0.68, + "learning_rate": 4.518505832923761e-05, + "loss": 1.9151, + "step": 15460 + }, + { + "epoch": 0.68, + "learning_rate": 4.516706702247828e-05, + "loss": 1.9109, + "step": 15470 + }, + { + "epoch": 0.68, + "learning_rate": 4.514906838485613e-05, + "loss": 1.8913, + "step": 15480 + }, + { + "epoch": 0.68, + "learning_rate": 4.513106242507061e-05, + "loss": 1.9178, + "step": 15490 + }, + { + "epoch": 0.68, + "learning_rate": 4.511304915182477e-05, + "loss": 1.9077, + "step": 15500 + }, + { + "epoch": 0.68, + "eval_loss": 1.870882272720337, + "eval_runtime": 11.8959, + "eval_samples_per_second": 344.32, + "eval_steps_per_second": 21.52, + "step": 15500 + }, + { + "epoch": 0.68, + "learning_rate": 4.509502857382512e-05, + "loss": 1.8716, + "step": 15510 + }, + { + "epoch": 0.68, + "learning_rate": 4.507700069978176e-05, + "loss": 1.9321, + "step": 15520 + }, + { + "epoch": 0.68, + "learning_rate": 4.50589655384083e-05, + "loss": 1.8632, + "step": 15530 + }, + { + "epoch": 0.68, + "learning_rate": 4.504092309842187e-05, + "loss": 1.8933, + "step": 15540 + }, + { + "epoch": 0.68, + "learning_rate": 4.502287338854311e-05, + "loss": 1.8846, + "step": 15550 + }, + { + "epoch": 0.68, + "learning_rate": 4.5004816417496194e-05, + "loss": 1.9258, + "step": 15560 + }, + { + "epoch": 0.68, + "learning_rate": 4.4986752194008786e-05, + "loss": 1.9276, + "step": 15570 + }, + { + "epoch": 0.68, + "learning_rate": 4.496868072681206e-05, + "loss": 1.9237, + "step": 15580 + }, + { + "epoch": 0.68, + "learning_rate": 4.495060202464069e-05, + "loss": 1.9118, + "step": 15590 + }, + { + "epoch": 0.68, + "learning_rate": 4.4932516096232864e-05, + "loss": 1.8694, + "step": 15600 + }, + { + "epoch": 0.68, + "eval_loss": 1.8696751594543457, + "eval_runtime": 11.8083, + "eval_samples_per_second": 346.876, + "eval_steps_per_second": 21.68, + "step": 15600 + }, + { + "epoch": 0.68, + "learning_rate": 4.4914422950330247e-05, + "loss": 1.8977, + "step": 15610 + }, + { + "epoch": 0.68, + "learning_rate": 4.489632259567799e-05, + "loss": 1.8569, + "step": 15620 + }, + { + "epoch": 0.68, + "learning_rate": 4.487821504102474e-05, + "loss": 1.8957, + "step": 15630 + }, + { + "epoch": 0.68, + "learning_rate": 4.486010029512261e-05, + "loss": 1.9011, + "step": 15640 + }, + { + "epoch": 0.69, + "learning_rate": 4.48419783667272e-05, + "loss": 1.8822, + "step": 15650 + }, + { + "epoch": 0.69, + "learning_rate": 4.482384926459758e-05, + "loss": 1.8912, + "step": 15660 + }, + { + "epoch": 0.69, + "learning_rate": 4.480571299749628e-05, + "loss": 1.9014, + "step": 15670 + }, + { + "epoch": 0.69, + "learning_rate": 4.47875695741893e-05, + "loss": 1.9297, + "step": 15680 + }, + { + "epoch": 0.69, + "learning_rate": 4.476941900344611e-05, + "loss": 1.9134, + "step": 15690 + }, + { + "epoch": 0.69, + "learning_rate": 4.47512612940396e-05, + "loss": 1.9072, + "step": 15700 + }, + { + "epoch": 0.69, + "eval_loss": 1.869808554649353, + "eval_runtime": 11.9339, + "eval_samples_per_second": 343.223, + "eval_steps_per_second": 21.451, + "step": 15700 + }, + { + "epoch": 0.69, + "learning_rate": 4.473309645474614e-05, + "loss": 1.8311, + "step": 15710 + }, + { + "epoch": 0.69, + "learning_rate": 4.471492449434555e-05, + "loss": 1.9179, + "step": 15720 + }, + { + "epoch": 0.69, + "learning_rate": 4.4696745421621076e-05, + "loss": 1.8282, + "step": 15730 + }, + { + "epoch": 0.69, + "learning_rate": 4.46785592453594e-05, + "loss": 1.8748, + "step": 15740 + }, + { + "epoch": 0.69, + "learning_rate": 4.466036597435064e-05, + "loss": 1.8851, + "step": 15750 + }, + { + "epoch": 0.69, + "learning_rate": 4.464216561738836e-05, + "loss": 1.9093, + "step": 15760 + }, + { + "epoch": 0.69, + "learning_rate": 4.462395818326953e-05, + "loss": 1.874, + "step": 15770 + }, + { + "epoch": 0.69, + "learning_rate": 4.460574368079454e-05, + "loss": 1.9358, + "step": 15780 + }, + { + "epoch": 0.69, + "learning_rate": 4.458752211876721e-05, + "loss": 1.8965, + "step": 15790 + }, + { + "epoch": 0.69, + "learning_rate": 4.456929350599476e-05, + "loss": 1.9137, + "step": 15800 + }, + { + "epoch": 0.69, + "eval_loss": 1.8691773414611816, + "eval_runtime": 11.9836, + "eval_samples_per_second": 341.799, + "eval_steps_per_second": 21.362, + "step": 15800 + }, + { + "epoch": 0.69, + "learning_rate": 4.4551057851287834e-05, + "loss": 1.8851, + "step": 15810 + }, + { + "epoch": 0.69, + "learning_rate": 4.4532815163460455e-05, + "loss": 1.8815, + "step": 15820 + }, + { + "epoch": 0.69, + "learning_rate": 4.451456545133007e-05, + "loss": 1.8997, + "step": 15830 + }, + { + "epoch": 0.69, + "learning_rate": 4.44963087237175e-05, + "loss": 1.8841, + "step": 15840 + }, + { + "epoch": 0.69, + "learning_rate": 4.4478044989446965e-05, + "loss": 1.9042, + "step": 15850 + }, + { + "epoch": 0.69, + "learning_rate": 4.445977425734609e-05, + "loss": 1.8973, + "step": 15860 + }, + { + "epoch": 0.69, + "learning_rate": 4.444149653624585e-05, + "loss": 1.8587, + "step": 15870 + }, + { + "epoch": 0.7, + "learning_rate": 4.4423211834980626e-05, + "loss": 1.8835, + "step": 15880 + }, + { + "epoch": 0.7, + "learning_rate": 4.440492016238815e-05, + "loss": 1.9072, + "step": 15890 + }, + { + "epoch": 0.7, + "learning_rate": 4.438662152730954e-05, + "loss": 1.9272, + "step": 15900 + }, + { + "epoch": 0.7, + "eval_loss": 1.8685188293457031, + "eval_runtime": 11.8174, + "eval_samples_per_second": 346.607, + "eval_steps_per_second": 21.663, + "step": 15900 + }, + { + "epoch": 0.7, + "learning_rate": 4.436831593858928e-05, + "loss": 1.9128, + "step": 15910 + }, + { + "epoch": 0.7, + "learning_rate": 4.435000340507519e-05, + "loss": 1.8712, + "step": 15920 + }, + { + "epoch": 0.7, + "learning_rate": 4.433168393561849e-05, + "loss": 1.915, + "step": 15930 + }, + { + "epoch": 0.7, + "learning_rate": 4.43133575390737e-05, + "loss": 1.9015, + "step": 15940 + }, + { + "epoch": 0.7, + "learning_rate": 4.429502422429874e-05, + "loss": 1.8909, + "step": 15950 + }, + { + "epoch": 0.7, + "learning_rate": 4.427668400015483e-05, + "loss": 1.8679, + "step": 15960 + }, + { + "epoch": 0.7, + "learning_rate": 4.4258336875506564e-05, + "loss": 1.858, + "step": 15970 + }, + { + "epoch": 0.7, + "learning_rate": 4.423998285922185e-05, + "loss": 1.899, + "step": 15980 + }, + { + "epoch": 0.7, + "learning_rate": 4.422162196017194e-05, + "loss": 1.8772, + "step": 15990 + }, + { + "epoch": 0.7, + "learning_rate": 4.42032541872314e-05, + "loss": 1.9077, + "step": 16000 + }, + { + "epoch": 0.7, + "eval_loss": 1.8670729398727417, + "eval_runtime": 11.9299, + "eval_samples_per_second": 343.34, + "eval_steps_per_second": 21.459, + "step": 16000 + }, + { + "epoch": 0.7, + "learning_rate": 4.418487954927812e-05, + "loss": 1.9019, + "step": 16010 + }, + { + "epoch": 0.7, + "learning_rate": 4.416649805519333e-05, + "loss": 1.8496, + "step": 16020 + }, + { + "epoch": 0.7, + "learning_rate": 4.4148109713861536e-05, + "loss": 1.8713, + "step": 16030 + }, + { + "epoch": 0.7, + "learning_rate": 4.412971453417059e-05, + "loss": 1.9179, + "step": 16040 + }, + { + "epoch": 0.7, + "learning_rate": 4.411131252501161e-05, + "loss": 1.9228, + "step": 16050 + }, + { + "epoch": 0.7, + "learning_rate": 4.4092903695279074e-05, + "loss": 1.9031, + "step": 16060 + }, + { + "epoch": 0.7, + "learning_rate": 4.40744880538707e-05, + "loss": 1.8563, + "step": 16070 + }, + { + "epoch": 0.7, + "learning_rate": 4.405606560968752e-05, + "loss": 1.8679, + "step": 16080 + }, + { + "epoch": 0.7, + "learning_rate": 4.403763637163385e-05, + "loss": 1.8741, + "step": 16090 + }, + { + "epoch": 0.7, + "learning_rate": 4.401920034861731e-05, + "loss": 1.8663, + "step": 16100 + }, + { + "epoch": 0.7, + "eval_loss": 1.8673057556152344, + "eval_runtime": 12.4069, + "eval_samples_per_second": 330.14, + "eval_steps_per_second": 20.634, + "step": 16100 + }, + { + "epoch": 0.71, + "learning_rate": 4.400075754954877e-05, + "loss": 1.9126, + "step": 16110 + }, + { + "epoch": 0.71, + "learning_rate": 4.398230798334238e-05, + "loss": 1.8892, + "step": 16120 + }, + { + "epoch": 0.71, + "learning_rate": 4.396385165891559e-05, + "loss": 1.9086, + "step": 16130 + }, + { + "epoch": 0.71, + "learning_rate": 4.394538858518907e-05, + "loss": 1.8721, + "step": 16140 + }, + { + "epoch": 0.71, + "learning_rate": 4.392691877108681e-05, + "loss": 1.9023, + "step": 16150 + }, + { + "epoch": 0.71, + "learning_rate": 4.390844222553599e-05, + "loss": 1.905, + "step": 16160 + }, + { + "epoch": 0.71, + "learning_rate": 4.3889958957467104e-05, + "loss": 1.8876, + "step": 16170 + }, + { + "epoch": 0.71, + "learning_rate": 4.387146897581386e-05, + "loss": 1.8468, + "step": 16180 + }, + { + "epoch": 0.71, + "learning_rate": 4.3852972289513224e-05, + "loss": 1.89, + "step": 16190 + }, + { + "epoch": 0.71, + "learning_rate": 4.3834468907505395e-05, + "loss": 1.8819, + "step": 16200 + }, + { + "epoch": 0.71, + "eval_loss": 1.8665714263916016, + "eval_runtime": 11.567, + "eval_samples_per_second": 354.112, + "eval_steps_per_second": 22.132, + "step": 16200 + }, + { + "epoch": 0.71, + "learning_rate": 4.3815958838733814e-05, + "loss": 1.8478, + "step": 16210 + }, + { + "epoch": 0.71, + "learning_rate": 4.379744209214517e-05, + "loss": 1.8759, + "step": 16220 + }, + { + "epoch": 0.71, + "learning_rate": 4.3778918676689334e-05, + "loss": 1.8713, + "step": 16230 + }, + { + "epoch": 0.71, + "learning_rate": 4.376038860131945e-05, + "loss": 1.9362, + "step": 16240 + }, + { + "epoch": 0.71, + "learning_rate": 4.374185187499186e-05, + "loss": 1.904, + "step": 16250 + }, + { + "epoch": 0.71, + "learning_rate": 4.372330850666611e-05, + "loss": 1.8876, + "step": 16260 + }, + { + "epoch": 0.71, + "learning_rate": 4.3704758505304966e-05, + "loss": 1.9085, + "step": 16270 + }, + { + "epoch": 0.71, + "learning_rate": 4.368620187987442e-05, + "loss": 1.907, + "step": 16280 + }, + { + "epoch": 0.71, + "learning_rate": 4.3667638639343625e-05, + "loss": 1.8927, + "step": 16290 + }, + { + "epoch": 0.71, + "learning_rate": 4.364906879268495e-05, + "loss": 1.8798, + "step": 16300 + }, + { + "epoch": 0.71, + "eval_loss": 1.8657153844833374, + "eval_runtime": 11.7422, + "eval_samples_per_second": 348.828, + "eval_steps_per_second": 21.802, + "step": 16300 + }, + { + "epoch": 0.71, + "learning_rate": 4.363049234887399e-05, + "loss": 1.9308, + "step": 16310 + }, + { + "epoch": 0.71, + "learning_rate": 4.361190931688947e-05, + "loss": 1.8721, + "step": 16320 + }, + { + "epoch": 0.71, + "learning_rate": 4.359331970571335e-05, + "loss": 1.9018, + "step": 16330 + }, + { + "epoch": 0.72, + "learning_rate": 4.3574723524330726e-05, + "loss": 1.8691, + "step": 16340 + }, + { + "epoch": 0.72, + "learning_rate": 4.355612078172991e-05, + "loss": 1.9025, + "step": 16350 + }, + { + "epoch": 0.72, + "learning_rate": 4.353751148690236e-05, + "loss": 1.8992, + "step": 16360 + }, + { + "epoch": 0.72, + "learning_rate": 4.35188956488427e-05, + "loss": 1.8235, + "step": 16370 + }, + { + "epoch": 0.72, + "learning_rate": 4.350027327654874e-05, + "loss": 1.92, + "step": 16380 + }, + { + "epoch": 0.72, + "learning_rate": 4.348164437902143e-05, + "loss": 1.8775, + "step": 16390 + }, + { + "epoch": 0.72, + "learning_rate": 4.3463008965264864e-05, + "loss": 1.9177, + "step": 16400 + }, + { + "epoch": 0.72, + "eval_loss": 1.8651164770126343, + "eval_runtime": 11.6918, + "eval_samples_per_second": 350.33, + "eval_steps_per_second": 21.896, + "step": 16400 + }, + { + "epoch": 0.72, + "learning_rate": 4.3444367044286315e-05, + "loss": 1.9275, + "step": 16410 + }, + { + "epoch": 0.72, + "learning_rate": 4.3425718625096176e-05, + "loss": 1.8558, + "step": 16420 + }, + { + "epoch": 0.72, + "learning_rate": 4.340706371670799e-05, + "loss": 1.8686, + "step": 16430 + }, + { + "epoch": 0.72, + "learning_rate": 4.3388402328138434e-05, + "loss": 1.8789, + "step": 16440 + }, + { + "epoch": 0.72, + "learning_rate": 4.336973446840733e-05, + "loss": 1.8479, + "step": 16450 + }, + { + "epoch": 0.72, + "learning_rate": 4.335106014653759e-05, + "loss": 1.904, + "step": 16460 + }, + { + "epoch": 0.72, + "learning_rate": 4.333237937155531e-05, + "loss": 1.9069, + "step": 16470 + }, + { + "epoch": 0.72, + "learning_rate": 4.331369215248965e-05, + "loss": 1.9015, + "step": 16480 + }, + { + "epoch": 0.72, + "learning_rate": 4.329499849837293e-05, + "loss": 1.8867, + "step": 16490 + }, + { + "epoch": 0.72, + "learning_rate": 4.3276298418240514e-05, + "loss": 1.8883, + "step": 16500 + }, + { + "epoch": 0.72, + "eval_loss": 1.864149808883667, + "eval_runtime": 11.9658, + "eval_samples_per_second": 342.309, + "eval_steps_per_second": 21.394, + "step": 16500 + }, + { + "epoch": 0.72, + "learning_rate": 4.325759192113095e-05, + "loss": 1.8479, + "step": 16510 + }, + { + "epoch": 0.72, + "learning_rate": 4.323887901608584e-05, + "loss": 1.8701, + "step": 16520 + }, + { + "epoch": 0.72, + "learning_rate": 4.3220159712149894e-05, + "loss": 1.8689, + "step": 16530 + }, + { + "epoch": 0.72, + "learning_rate": 4.320143401837092e-05, + "loss": 1.8919, + "step": 16540 + }, + { + "epoch": 0.72, + "learning_rate": 4.3182701943799806e-05, + "loss": 1.8699, + "step": 16550 + }, + { + "epoch": 0.72, + "learning_rate": 4.316396349749054e-05, + "loss": 1.8623, + "step": 16560 + }, + { + "epoch": 0.73, + "learning_rate": 4.314521868850016e-05, + "loss": 1.9231, + "step": 16570 + }, + { + "epoch": 0.73, + "learning_rate": 4.312646752588881e-05, + "loss": 1.8877, + "step": 16580 + }, + { + "epoch": 0.73, + "learning_rate": 4.310771001871969e-05, + "loss": 1.861, + "step": 16590 + }, + { + "epoch": 0.73, + "learning_rate": 4.308894617605907e-05, + "loss": 1.8961, + "step": 16600 + }, + { + "epoch": 0.73, + "eval_loss": 1.8638092279434204, + "eval_runtime": 11.7356, + "eval_samples_per_second": 349.022, + "eval_steps_per_second": 21.814, + "step": 16600 + }, + { + "epoch": 0.73, + "learning_rate": 4.307017600697627e-05, + "loss": 1.9087, + "step": 16610 + }, + { + "epoch": 0.73, + "learning_rate": 4.30513995205437e-05, + "loss": 1.9017, + "step": 16620 + }, + { + "epoch": 0.73, + "learning_rate": 4.30326167258368e-05, + "loss": 1.8747, + "step": 16630 + }, + { + "epoch": 0.73, + "learning_rate": 4.301382763193404e-05, + "loss": 1.8697, + "step": 16640 + }, + { + "epoch": 0.73, + "learning_rate": 4.2995032247916974e-05, + "loss": 1.8932, + "step": 16650 + }, + { + "epoch": 0.73, + "learning_rate": 4.297623058287017e-05, + "loss": 1.8577, + "step": 16660 + }, + { + "epoch": 0.73, + "learning_rate": 4.295742264588125e-05, + "loss": 1.8503, + "step": 16670 + }, + { + "epoch": 0.73, + "learning_rate": 4.2938608446040846e-05, + "loss": 1.8975, + "step": 16680 + }, + { + "epoch": 0.73, + "learning_rate": 4.2919787992442646e-05, + "loss": 1.9002, + "step": 16690 + }, + { + "epoch": 0.73, + "learning_rate": 4.2900961294183326e-05, + "loss": 1.9155, + "step": 16700 + }, + { + "epoch": 0.73, + "eval_loss": 1.8630321025848389, + "eval_runtime": 11.6984, + "eval_samples_per_second": 350.132, + "eval_steps_per_second": 21.883, + "step": 16700 + }, + { + "epoch": 0.73, + "learning_rate": 4.2882128360362616e-05, + "loss": 1.9088, + "step": 16710 + }, + { + "epoch": 0.73, + "learning_rate": 4.2863289200083226e-05, + "loss": 1.9019, + "step": 16720 + }, + { + "epoch": 0.73, + "learning_rate": 4.2844443822450896e-05, + "loss": 1.9395, + "step": 16730 + }, + { + "epoch": 0.73, + "learning_rate": 4.282559223657437e-05, + "loss": 1.8582, + "step": 16740 + }, + { + "epoch": 0.73, + "learning_rate": 4.2806734451565385e-05, + "loss": 1.9054, + "step": 16750 + }, + { + "epoch": 0.73, + "learning_rate": 4.2787870476538685e-05, + "loss": 1.9036, + "step": 16760 + }, + { + "epoch": 0.73, + "learning_rate": 4.276900032061198e-05, + "loss": 1.8571, + "step": 16770 + }, + { + "epoch": 0.73, + "learning_rate": 4.275012399290602e-05, + "loss": 1.898, + "step": 16780 + }, + { + "epoch": 0.73, + "learning_rate": 4.273124150254447e-05, + "loss": 1.905, + "step": 16790 + }, + { + "epoch": 0.74, + "learning_rate": 4.271235285865404e-05, + "loss": 1.8843, + "step": 16800 + }, + { + "epoch": 0.74, + "eval_loss": 1.8624677658081055, + "eval_runtime": 11.5664, + "eval_samples_per_second": 354.13, + "eval_steps_per_second": 22.133, + "step": 16800 + }, + { + "epoch": 0.74, + "learning_rate": 4.269345807036436e-05, + "loss": 1.8558, + "step": 16810 + }, + { + "epoch": 0.74, + "learning_rate": 4.267455714680807e-05, + "loss": 1.9309, + "step": 16820 + }, + { + "epoch": 0.74, + "learning_rate": 4.2655650097120746e-05, + "loss": 1.885, + "step": 16830 + }, + { + "epoch": 0.74, + "learning_rate": 4.2636736930440935e-05, + "loss": 1.8901, + "step": 16840 + }, + { + "epoch": 0.74, + "learning_rate": 4.261781765591016e-05, + "loss": 1.9033, + "step": 16850 + }, + { + "epoch": 0.74, + "learning_rate": 4.259889228267285e-05, + "loss": 1.8814, + "step": 16860 + }, + { + "epoch": 0.74, + "learning_rate": 4.257996081987644e-05, + "loss": 1.8316, + "step": 16870 + }, + { + "epoch": 0.74, + "learning_rate": 4.256102327667127e-05, + "loss": 1.8846, + "step": 16880 + }, + { + "epoch": 0.74, + "learning_rate": 4.254207966221062e-05, + "loss": 1.8539, + "step": 16890 + }, + { + "epoch": 0.74, + "learning_rate": 4.2523129985650715e-05, + "loss": 1.871, + "step": 16900 + }, + { + "epoch": 0.74, + "eval_loss": 1.8623075485229492, + "eval_runtime": 11.6133, + "eval_samples_per_second": 352.699, + "eval_steps_per_second": 22.044, + "step": 16900 + }, + { + "epoch": 0.74, + "learning_rate": 4.250417425615071e-05, + "loss": 1.8487, + "step": 16910 + }, + { + "epoch": 0.74, + "learning_rate": 4.248521248287269e-05, + "loss": 1.9164, + "step": 16920 + }, + { + "epoch": 0.74, + "learning_rate": 4.2466244674981633e-05, + "loss": 1.8555, + "step": 16930 + }, + { + "epoch": 0.74, + "learning_rate": 4.2447270841645486e-05, + "loss": 1.8263, + "step": 16940 + }, + { + "epoch": 0.74, + "learning_rate": 4.2428290992035055e-05, + "loss": 1.8682, + "step": 16950 + }, + { + "epoch": 0.74, + "learning_rate": 4.2409305135324085e-05, + "loss": 1.9171, + "step": 16960 + }, + { + "epoch": 0.74, + "learning_rate": 4.2390313280689204e-05, + "loss": 1.9025, + "step": 16970 + }, + { + "epoch": 0.74, + "learning_rate": 4.237131543730997e-05, + "loss": 1.885, + "step": 16980 + }, + { + "epoch": 0.74, + "learning_rate": 4.23523116143688e-05, + "loss": 1.9243, + "step": 16990 + }, + { + "epoch": 0.74, + "learning_rate": 4.2333301821051024e-05, + "loss": 1.8804, + "step": 17000 + }, + { + "epoch": 0.74, + "eval_loss": 1.8615334033966064, + "eval_runtime": 11.6378, + "eval_samples_per_second": 351.957, + "eval_steps_per_second": 21.997, + "step": 17000 + }, + { + "epoch": 0.74, + "learning_rate": 4.231428606654486e-05, + "loss": 1.8375, + "step": 17010 + }, + { + "epoch": 0.75, + "learning_rate": 4.229526436004138e-05, + "loss": 1.863, + "step": 17020 + }, + { + "epoch": 0.75, + "learning_rate": 4.2276236710734564e-05, + "loss": 1.8763, + "step": 17030 + }, + { + "epoch": 0.75, + "learning_rate": 4.2257203127821243e-05, + "loss": 1.9255, + "step": 17040 + }, + { + "epoch": 0.75, + "learning_rate": 4.2238163620501145e-05, + "loss": 1.8753, + "step": 17050 + }, + { + "epoch": 0.75, + "learning_rate": 4.2219118197976814e-05, + "loss": 1.8752, + "step": 17060 + }, + { + "epoch": 0.75, + "learning_rate": 4.22000668694537e-05, + "loss": 1.8979, + "step": 17070 + }, + { + "epoch": 0.75, + "learning_rate": 4.218100964414009e-05, + "loss": 1.8533, + "step": 17080 + }, + { + "epoch": 0.75, + "learning_rate": 4.2161946531247104e-05, + "loss": 1.8929, + "step": 17090 + }, + { + "epoch": 0.75, + "learning_rate": 4.214287753998873e-05, + "loss": 1.8709, + "step": 17100 + }, + { + "epoch": 0.75, + "eval_loss": 1.8611003160476685, + "eval_runtime": 11.6511, + "eval_samples_per_second": 351.556, + "eval_steps_per_second": 21.972, + "step": 17100 + }, + { + "epoch": 0.75, + "learning_rate": 4.212380267958179e-05, + "loss": 1.876, + "step": 17110 + }, + { + "epoch": 0.75, + "learning_rate": 4.210472195924595e-05, + "loss": 1.8903, + "step": 17120 + }, + { + "epoch": 0.75, + "learning_rate": 4.208563538820368e-05, + "loss": 1.8769, + "step": 17130 + }, + { + "epoch": 0.75, + "learning_rate": 4.206654297568033e-05, + "loss": 1.8677, + "step": 17140 + }, + { + "epoch": 0.75, + "learning_rate": 4.204744473090401e-05, + "loss": 1.8979, + "step": 17150 + }, + { + "epoch": 0.75, + "learning_rate": 4.2028340663105714e-05, + "loss": 1.904, + "step": 17160 + }, + { + "epoch": 0.75, + "learning_rate": 4.200923078151919e-05, + "loss": 1.9036, + "step": 17170 + }, + { + "epoch": 0.75, + "learning_rate": 4.199011509538104e-05, + "loss": 1.8592, + "step": 17180 + }, + { + "epoch": 0.75, + "learning_rate": 4.1970993613930655e-05, + "loss": 1.8772, + "step": 17190 + }, + { + "epoch": 0.75, + "learning_rate": 4.1951866346410225e-05, + "loss": 1.8779, + "step": 17200 + }, + { + "epoch": 0.75, + "eval_loss": 1.8605446815490723, + "eval_runtime": 11.5509, + "eval_samples_per_second": 354.603, + "eval_steps_per_second": 22.163, + "step": 17200 + }, + { + "epoch": 0.75, + "learning_rate": 4.1932733302064745e-05, + "loss": 1.8491, + "step": 17210 + }, + { + "epoch": 0.75, + "learning_rate": 4.191359449014197e-05, + "loss": 1.8851, + "step": 17220 + }, + { + "epoch": 0.75, + "learning_rate": 4.189444991989251e-05, + "loss": 1.8727, + "step": 17230 + }, + { + "epoch": 0.75, + "learning_rate": 4.187529960056969e-05, + "loss": 1.8844, + "step": 17240 + }, + { + "epoch": 0.76, + "learning_rate": 4.185614354142965e-05, + "loss": 1.9146, + "step": 17250 + }, + { + "epoch": 0.76, + "learning_rate": 4.1836981751731286e-05, + "loss": 1.8736, + "step": 17260 + }, + { + "epoch": 0.76, + "learning_rate": 4.1817814240736294e-05, + "loss": 1.852, + "step": 17270 + }, + { + "epoch": 0.76, + "learning_rate": 4.179864101770911e-05, + "loss": 1.9035, + "step": 17280 + }, + { + "epoch": 0.76, + "learning_rate": 4.177946209191691e-05, + "loss": 1.8563, + "step": 17290 + }, + { + "epoch": 0.76, + "learning_rate": 4.176027747262968e-05, + "loss": 1.9006, + "step": 17300 + }, + { + "epoch": 0.76, + "eval_loss": 1.8597712516784668, + "eval_runtime": 11.6159, + "eval_samples_per_second": 352.621, + "eval_steps_per_second": 22.039, + "step": 17300 + }, + { + "epoch": 0.76, + "learning_rate": 4.1741087169120106e-05, + "loss": 1.9001, + "step": 17310 + }, + { + "epoch": 0.76, + "learning_rate": 4.1721891190663674e-05, + "loss": 1.8468, + "step": 17320 + }, + { + "epoch": 0.76, + "learning_rate": 4.170268954653856e-05, + "loss": 1.8753, + "step": 17330 + }, + { + "epoch": 0.76, + "learning_rate": 4.1683482246025726e-05, + "loss": 1.8843, + "step": 17340 + }, + { + "epoch": 0.76, + "learning_rate": 4.166426929840883e-05, + "loss": 1.8496, + "step": 17350 + }, + { + "epoch": 0.76, + "learning_rate": 4.1645050712974264e-05, + "loss": 1.859, + "step": 17360 + }, + { + "epoch": 0.76, + "learning_rate": 4.162582649901118e-05, + "loss": 1.8695, + "step": 17370 + }, + { + "epoch": 0.76, + "learning_rate": 4.16065966658114e-05, + "loss": 1.9075, + "step": 17380 + }, + { + "epoch": 0.76, + "learning_rate": 4.1587361222669506e-05, + "loss": 1.8326, + "step": 17390 + }, + { + "epoch": 0.76, + "learning_rate": 4.156812017888276e-05, + "loss": 1.8529, + "step": 17400 + }, + { + "epoch": 0.76, + "eval_loss": 1.8598828315734863, + "eval_runtime": 11.5771, + "eval_samples_per_second": 353.803, + "eval_steps_per_second": 22.113, + "step": 17400 + }, + { + "epoch": 0.76, + "learning_rate": 4.154887354375116e-05, + "loss": 1.8753, + "step": 17410 + }, + { + "epoch": 0.76, + "learning_rate": 4.1529621326577375e-05, + "loss": 1.8727, + "step": 17420 + }, + { + "epoch": 0.76, + "learning_rate": 4.1510363536666794e-05, + "loss": 1.8874, + "step": 17430 + }, + { + "epoch": 0.76, + "learning_rate": 4.14911001833275e-05, + "loss": 1.8964, + "step": 17440 + }, + { + "epoch": 0.76, + "learning_rate": 4.147183127587026e-05, + "loss": 1.9168, + "step": 17450 + }, + { + "epoch": 0.76, + "learning_rate": 4.14525568236085e-05, + "loss": 1.8852, + "step": 17460 + }, + { + "epoch": 0.76, + "learning_rate": 4.143327683585837e-05, + "loss": 1.8632, + "step": 17470 + }, + { + "epoch": 0.77, + "learning_rate": 4.141399132193867e-05, + "loss": 1.8671, + "step": 17480 + }, + { + "epoch": 0.77, + "learning_rate": 4.1394700291170874e-05, + "loss": 1.8999, + "step": 17490 + }, + { + "epoch": 0.77, + "learning_rate": 4.1375403752879135e-05, + "loss": 1.9191, + "step": 17500 + }, + { + "epoch": 0.77, + "eval_loss": 1.85947585105896, + "eval_runtime": 11.8448, + "eval_samples_per_second": 345.805, + "eval_steps_per_second": 21.613, + "step": 17500 + }, + { + "epoch": 0.77, + "learning_rate": 4.135610171639025e-05, + "loss": 1.8976, + "step": 17510 + }, + { + "epoch": 0.77, + "learning_rate": 4.133679419103368e-05, + "loss": 1.8638, + "step": 17520 + }, + { + "epoch": 0.77, + "learning_rate": 4.1317481186141555e-05, + "loss": 1.8592, + "step": 17530 + }, + { + "epoch": 0.77, + "learning_rate": 4.129816271104861e-05, + "loss": 1.9085, + "step": 17540 + }, + { + "epoch": 0.77, + "learning_rate": 4.1278838775092277e-05, + "loss": 1.9276, + "step": 17550 + }, + { + "epoch": 0.77, + "learning_rate": 4.125950938761259e-05, + "loss": 1.914, + "step": 17560 + }, + { + "epoch": 0.77, + "learning_rate": 4.1240174557952245e-05, + "loss": 1.8823, + "step": 17570 + }, + { + "epoch": 0.77, + "learning_rate": 4.122083429545655e-05, + "loss": 1.8713, + "step": 17580 + }, + { + "epoch": 0.77, + "learning_rate": 4.120148860947343e-05, + "loss": 1.9081, + "step": 17590 + }, + { + "epoch": 0.77, + "learning_rate": 4.118213750935346e-05, + "loss": 1.8539, + "step": 17600 + }, + { + "epoch": 0.77, + "eval_loss": 1.8588768243789673, + "eval_runtime": 11.5896, + "eval_samples_per_second": 353.419, + "eval_steps_per_second": 22.089, + "step": 17600 + }, + { + "epoch": 0.77, + "learning_rate": 4.1162781004449816e-05, + "loss": 1.875, + "step": 17610 + }, + { + "epoch": 0.77, + "learning_rate": 4.114341910411829e-05, + "loss": 1.8625, + "step": 17620 + }, + { + "epoch": 0.77, + "learning_rate": 4.112405181771726e-05, + "loss": 1.9175, + "step": 17630 + }, + { + "epoch": 0.77, + "learning_rate": 4.110467915460775e-05, + "loss": 1.9035, + "step": 17640 + }, + { + "epoch": 0.77, + "learning_rate": 4.108530112415334e-05, + "loss": 1.8611, + "step": 17650 + }, + { + "epoch": 0.77, + "learning_rate": 4.106591773572023e-05, + "loss": 1.8985, + "step": 17660 + }, + { + "epoch": 0.77, + "learning_rate": 4.104652899867721e-05, + "loss": 1.9136, + "step": 17670 + }, + { + "epoch": 0.77, + "learning_rate": 4.1027134922395656e-05, + "loss": 1.9122, + "step": 17680 + }, + { + "epoch": 0.77, + "learning_rate": 4.1007735516249484e-05, + "loss": 1.8883, + "step": 17690 + }, + { + "epoch": 0.77, + "learning_rate": 4.098833078961526e-05, + "loss": 1.8788, + "step": 17700 + }, + { + "epoch": 0.77, + "eval_loss": 1.8577911853790283, + "eval_runtime": 11.7168, + "eval_samples_per_second": 349.584, + "eval_steps_per_second": 21.849, + "step": 17700 + }, + { + "epoch": 0.78, + "learning_rate": 4.0968920751872036e-05, + "loss": 1.8542, + "step": 17710 + }, + { + "epoch": 0.78, + "learning_rate": 4.0949505412401516e-05, + "loss": 1.9247, + "step": 17720 + }, + { + "epoch": 0.78, + "learning_rate": 4.0930084780587914e-05, + "loss": 1.8587, + "step": 17730 + }, + { + "epoch": 0.78, + "learning_rate": 4.0910658865817996e-05, + "loss": 1.8966, + "step": 17740 + }, + { + "epoch": 0.78, + "learning_rate": 4.089122767748113e-05, + "loss": 1.8524, + "step": 17750 + }, + { + "epoch": 0.78, + "learning_rate": 4.087179122496918e-05, + "loss": 1.836, + "step": 17760 + }, + { + "epoch": 0.78, + "learning_rate": 4.085234951767658e-05, + "loss": 1.8473, + "step": 17770 + }, + { + "epoch": 0.78, + "learning_rate": 4.083290256500031e-05, + "loss": 1.8628, + "step": 17780 + }, + { + "epoch": 0.78, + "learning_rate": 4.081345037633988e-05, + "loss": 1.861, + "step": 17790 + }, + { + "epoch": 0.78, + "learning_rate": 4.079399296109731e-05, + "loss": 1.9114, + "step": 17800 + }, + { + "epoch": 0.78, + "eval_loss": 1.8577276468276978, + "eval_runtime": 11.5497, + "eval_samples_per_second": 354.642, + "eval_steps_per_second": 22.165, + "step": 17800 + }, + { + "epoch": 0.78, + "learning_rate": 4.077453032867717e-05, + "loss": 1.8936, + "step": 17810 + }, + { + "epoch": 0.78, + "learning_rate": 4.075506248848656e-05, + "loss": 1.8647, + "step": 17820 + }, + { + "epoch": 0.78, + "learning_rate": 4.073558944993506e-05, + "loss": 1.9102, + "step": 17830 + }, + { + "epoch": 0.78, + "learning_rate": 4.07161112224348e-05, + "loss": 1.9068, + "step": 17840 + }, + { + "epoch": 0.78, + "learning_rate": 4.0696627815400386e-05, + "loss": 1.8272, + "step": 17850 + }, + { + "epoch": 0.78, + "learning_rate": 4.0677139238248966e-05, + "loss": 1.8715, + "step": 17860 + }, + { + "epoch": 0.78, + "learning_rate": 4.0657645500400155e-05, + "loss": 1.9281, + "step": 17870 + }, + { + "epoch": 0.78, + "learning_rate": 4.063814661127607e-05, + "loss": 1.9073, + "step": 17880 + }, + { + "epoch": 0.78, + "learning_rate": 4.061864258030132e-05, + "loss": 1.8984, + "step": 17890 + }, + { + "epoch": 0.78, + "learning_rate": 4.0599133416903e-05, + "loss": 1.8766, + "step": 17900 + }, + { + "epoch": 0.78, + "eval_loss": 1.8569570779800415, + "eval_runtime": 11.5288, + "eval_samples_per_second": 355.285, + "eval_steps_per_second": 22.205, + "step": 17900 + }, + { + "epoch": 0.78, + "learning_rate": 4.05796191305107e-05, + "loss": 1.8893, + "step": 17910 + }, + { + "epoch": 0.78, + "learning_rate": 4.056009973055645e-05, + "loss": 1.8882, + "step": 17920 + }, + { + "epoch": 0.78, + "learning_rate": 4.0540575226474785e-05, + "loss": 1.8622, + "step": 17930 + }, + { + "epoch": 0.79, + "learning_rate": 4.052104562770269e-05, + "loss": 1.9042, + "step": 17940 + }, + { + "epoch": 0.79, + "learning_rate": 4.0501510943679616e-05, + "loss": 1.8958, + "step": 17950 + }, + { + "epoch": 0.79, + "learning_rate": 4.0481971183847475e-05, + "loss": 1.9045, + "step": 17960 + }, + { + "epoch": 0.79, + "learning_rate": 4.046242635765064e-05, + "loss": 1.906, + "step": 17970 + }, + { + "epoch": 0.79, + "learning_rate": 4.044287647453592e-05, + "loss": 1.8578, + "step": 17980 + }, + { + "epoch": 0.79, + "learning_rate": 4.042332154395256e-05, + "loss": 1.8961, + "step": 17990 + }, + { + "epoch": 0.79, + "learning_rate": 4.040376157535226e-05, + "loss": 1.8905, + "step": 18000 + }, + { + "epoch": 0.79, + "eval_loss": 1.8565237522125244, + "eval_runtime": 11.8383, + "eval_samples_per_second": 345.996, + "eval_steps_per_second": 21.625, + "step": 18000 + }, + { + "epoch": 0.79, + "learning_rate": 4.038419657818916e-05, + "loss": 1.8752, + "step": 18010 + }, + { + "epoch": 0.79, + "learning_rate": 4.036462656191983e-05, + "loss": 1.847, + "step": 18020 + }, + { + "epoch": 0.79, + "learning_rate": 4.0345051536003235e-05, + "loss": 1.8536, + "step": 18030 + }, + { + "epoch": 0.79, + "learning_rate": 4.03254715099008e-05, + "loss": 1.9066, + "step": 18040 + }, + { + "epoch": 0.79, + "learning_rate": 4.0305886493076335e-05, + "loss": 1.913, + "step": 18050 + }, + { + "epoch": 0.79, + "learning_rate": 4.028629649499611e-05, + "loss": 1.8413, + "step": 18060 + }, + { + "epoch": 0.79, + "learning_rate": 4.026670152512874e-05, + "loss": 1.9036, + "step": 18070 + }, + { + "epoch": 0.79, + "learning_rate": 4.024710159294529e-05, + "loss": 1.8875, + "step": 18080 + }, + { + "epoch": 0.79, + "learning_rate": 4.02274967079192e-05, + "loss": 1.866, + "step": 18090 + }, + { + "epoch": 0.79, + "learning_rate": 4.020788687952632e-05, + "loss": 1.9219, + "step": 18100 + }, + { + "epoch": 0.79, + "eval_loss": 1.8559688329696655, + "eval_runtime": 11.6536, + "eval_samples_per_second": 351.479, + "eval_steps_per_second": 21.967, + "step": 18100 + }, + { + "epoch": 0.79, + "learning_rate": 4.018827211724487e-05, + "loss": 1.8589, + "step": 18110 + }, + { + "epoch": 0.79, + "learning_rate": 4.016865243055546e-05, + "loss": 1.9037, + "step": 18120 + }, + { + "epoch": 0.79, + "learning_rate": 4.0149027828941115e-05, + "loss": 1.8967, + "step": 18130 + }, + { + "epoch": 0.79, + "learning_rate": 4.012939832188718e-05, + "loss": 1.924, + "step": 18140 + }, + { + "epoch": 0.79, + "learning_rate": 4.0109763918881405e-05, + "loss": 1.8552, + "step": 18150 + }, + { + "epoch": 0.79, + "learning_rate": 4.00901246294139e-05, + "loss": 1.8851, + "step": 18160 + }, + { + "epoch": 0.8, + "learning_rate": 4.007048046297714e-05, + "loss": 1.8228, + "step": 18170 + }, + { + "epoch": 0.8, + "learning_rate": 4.005083142906594e-05, + "loss": 1.8643, + "step": 18180 + }, + { + "epoch": 0.8, + "learning_rate": 4.003117753717749e-05, + "loss": 1.8684, + "step": 18190 + }, + { + "epoch": 0.8, + "learning_rate": 4.001151879681132e-05, + "loss": 1.9042, + "step": 18200 + }, + { + "epoch": 0.8, + "eval_loss": 1.8557652235031128, + "eval_runtime": 11.5663, + "eval_samples_per_second": 354.133, + "eval_steps_per_second": 22.133, + "step": 18200 + }, + { + "epoch": 0.8, + "learning_rate": 3.999185521746929e-05, + "loss": 1.8764, + "step": 18210 + }, + { + "epoch": 0.8, + "learning_rate": 3.9972186808655624e-05, + "loss": 1.8506, + "step": 18220 + }, + { + "epoch": 0.8, + "learning_rate": 3.9952513579876855e-05, + "loss": 1.8802, + "step": 18230 + }, + { + "epoch": 0.8, + "learning_rate": 3.993283554064187e-05, + "loss": 1.8562, + "step": 18240 + }, + { + "epoch": 0.8, + "learning_rate": 3.9913152700461876e-05, + "loss": 1.9276, + "step": 18250 + }, + { + "epoch": 0.8, + "learning_rate": 3.9893465068850366e-05, + "loss": 1.8899, + "step": 18260 + }, + { + "epoch": 0.8, + "learning_rate": 3.98737726553232e-05, + "loss": 1.8703, + "step": 18270 + }, + { + "epoch": 0.8, + "learning_rate": 3.9854075469398514e-05, + "loss": 1.9109, + "step": 18280 + }, + { + "epoch": 0.8, + "learning_rate": 3.983437352059677e-05, + "loss": 1.8824, + "step": 18290 + }, + { + "epoch": 0.8, + "learning_rate": 3.981466681844071e-05, + "loss": 1.8573, + "step": 18300 + }, + { + "epoch": 0.8, + "eval_loss": 1.854828119277954, + "eval_runtime": 11.5928, + "eval_samples_per_second": 353.322, + "eval_steps_per_second": 22.083, + "step": 18300 + }, + { + "epoch": 0.8, + "learning_rate": 3.97949553724554e-05, + "loss": 1.8958, + "step": 18310 + }, + { + "epoch": 0.8, + "learning_rate": 3.977523919216819e-05, + "loss": 1.9016, + "step": 18320 + }, + { + "epoch": 0.8, + "learning_rate": 3.975551828710871e-05, + "loss": 1.8862, + "step": 18330 + }, + { + "epoch": 0.8, + "learning_rate": 3.973579266680888e-05, + "loss": 1.8684, + "step": 18340 + }, + { + "epoch": 0.8, + "learning_rate": 3.971606234080289e-05, + "loss": 1.8834, + "step": 18350 + }, + { + "epoch": 0.8, + "learning_rate": 3.969632731862722e-05, + "loss": 1.8579, + "step": 18360 + }, + { + "epoch": 0.8, + "learning_rate": 3.967658760982061e-05, + "loss": 1.905, + "step": 18370 + }, + { + "epoch": 0.8, + "learning_rate": 3.965684322392407e-05, + "loss": 1.8533, + "step": 18380 + }, + { + "epoch": 0.81, + "learning_rate": 3.963709417048087e-05, + "loss": 1.8636, + "step": 18390 + }, + { + "epoch": 0.81, + "learning_rate": 3.961734045903652e-05, + "loss": 1.8764, + "step": 18400 + }, + { + "epoch": 0.81, + "eval_loss": 1.8546396493911743, + "eval_runtime": 11.6105, + "eval_samples_per_second": 352.785, + "eval_steps_per_second": 22.049, + "step": 18400 + }, + { + "epoch": 0.81, + "learning_rate": 3.959758209913881e-05, + "loss": 1.9181, + "step": 18410 + }, + { + "epoch": 0.81, + "learning_rate": 3.957781910033776e-05, + "loss": 1.8662, + "step": 18420 + }, + { + "epoch": 0.81, + "learning_rate": 3.955805147218563e-05, + "loss": 1.8574, + "step": 18430 + }, + { + "epoch": 0.81, + "learning_rate": 3.953827922423692e-05, + "loss": 1.8516, + "step": 18440 + }, + { + "epoch": 0.81, + "learning_rate": 3.9518502366048375e-05, + "loss": 1.8576, + "step": 18450 + }, + { + "epoch": 0.81, + "learning_rate": 3.949872090717894e-05, + "loss": 1.8712, + "step": 18460 + }, + { + "epoch": 0.81, + "learning_rate": 3.947893485718982e-05, + "loss": 1.8648, + "step": 18470 + }, + { + "epoch": 0.81, + "learning_rate": 3.945914422564441e-05, + "loss": 1.9053, + "step": 18480 + }, + { + "epoch": 0.81, + "learning_rate": 3.943934902210834e-05, + "loss": 1.8801, + "step": 18490 + }, + { + "epoch": 0.81, + "learning_rate": 3.941954925614943e-05, + "loss": 1.8713, + "step": 18500 + }, + { + "epoch": 0.81, + "eval_loss": 1.854550838470459, + "eval_runtime": 11.781, + "eval_samples_per_second": 347.68, + "eval_steps_per_second": 21.73, + "step": 18500 + }, + { + "epoch": 0.81, + "learning_rate": 3.939974493733773e-05, + "loss": 1.8499, + "step": 18510 + }, + { + "epoch": 0.81, + "learning_rate": 3.9379936075245464e-05, + "loss": 1.8369, + "step": 18520 + }, + { + "epoch": 0.81, + "learning_rate": 3.9360122679447055e-05, + "loss": 1.8367, + "step": 18530 + }, + { + "epoch": 0.81, + "learning_rate": 3.934030475951915e-05, + "loss": 1.8849, + "step": 18540 + }, + { + "epoch": 0.81, + "learning_rate": 3.932048232504053e-05, + "loss": 1.8544, + "step": 18550 + }, + { + "epoch": 0.81, + "learning_rate": 3.930065538559222e-05, + "loss": 1.9585, + "step": 18560 + }, + { + "epoch": 0.81, + "learning_rate": 3.928082395075736e-05, + "loss": 1.8535, + "step": 18570 + }, + { + "epoch": 0.81, + "learning_rate": 3.926098803012132e-05, + "loss": 1.8866, + "step": 18580 + }, + { + "epoch": 0.81, + "learning_rate": 3.924114763327159e-05, + "loss": 1.8637, + "step": 18590 + }, + { + "epoch": 0.81, + "learning_rate": 3.9221302769797836e-05, + "loss": 1.8889, + "step": 18600 + }, + { + "epoch": 0.81, + "eval_loss": 1.8540050983428955, + "eval_runtime": 11.6301, + "eval_samples_per_second": 352.189, + "eval_steps_per_second": 22.012, + "step": 18600 + }, + { + "epoch": 0.81, + "learning_rate": 3.920145344929192e-05, + "loss": 1.8645, + "step": 18610 + }, + { + "epoch": 0.82, + "learning_rate": 3.9181599681347816e-05, + "loss": 1.8928, + "step": 18620 + }, + { + "epoch": 0.82, + "learning_rate": 3.9161741475561654e-05, + "loss": 1.8368, + "step": 18630 + }, + { + "epoch": 0.82, + "learning_rate": 3.914187884153171e-05, + "loss": 1.8581, + "step": 18640 + }, + { + "epoch": 0.82, + "learning_rate": 3.912201178885843e-05, + "loss": 1.8422, + "step": 18650 + }, + { + "epoch": 0.82, + "learning_rate": 3.910214032714434e-05, + "loss": 1.8837, + "step": 18660 + }, + { + "epoch": 0.82, + "learning_rate": 3.9082264465994165e-05, + "loss": 1.8372, + "step": 18670 + }, + { + "epoch": 0.82, + "learning_rate": 3.9062384215014696e-05, + "loss": 1.8619, + "step": 18680 + }, + { + "epoch": 0.82, + "learning_rate": 3.904249958381487e-05, + "loss": 1.841, + "step": 18690 + }, + { + "epoch": 0.82, + "learning_rate": 3.902261058200576e-05, + "loss": 1.8825, + "step": 18700 + }, + { + "epoch": 0.82, + "eval_loss": 1.8539540767669678, + "eval_runtime": 11.6061, + "eval_samples_per_second": 352.918, + "eval_steps_per_second": 22.057, + "step": 18700 + }, + { + "epoch": 0.82, + "learning_rate": 3.900271721920051e-05, + "loss": 1.8167, + "step": 18710 + }, + { + "epoch": 0.82, + "learning_rate": 3.8982819505014414e-05, + "loss": 1.8565, + "step": 18720 + }, + { + "epoch": 0.82, + "learning_rate": 3.896291744906482e-05, + "loss": 1.8591, + "step": 18730 + }, + { + "epoch": 0.82, + "learning_rate": 3.8943011060971254e-05, + "loss": 1.8222, + "step": 18740 + }, + { + "epoch": 0.82, + "learning_rate": 3.8923100350355236e-05, + "loss": 1.8635, + "step": 18750 + }, + { + "epoch": 0.82, + "learning_rate": 3.890318532684046e-05, + "loss": 1.8953, + "step": 18760 + }, + { + "epoch": 0.82, + "learning_rate": 3.888326600005264e-05, + "loss": 1.8781, + "step": 18770 + }, + { + "epoch": 0.82, + "learning_rate": 3.8863342379619634e-05, + "loss": 1.8203, + "step": 18780 + }, + { + "epoch": 0.82, + "learning_rate": 3.884341447517132e-05, + "loss": 1.8809, + "step": 18790 + }, + { + "epoch": 0.82, + "learning_rate": 3.882348229633967e-05, + "loss": 1.8686, + "step": 18800 + }, + { + "epoch": 0.82, + "eval_loss": 1.8534257411956787, + "eval_runtime": 11.5432, + "eval_samples_per_second": 354.841, + "eval_steps_per_second": 22.178, + "step": 18800 + }, + { + "epoch": 0.82, + "learning_rate": 3.8803545852758726e-05, + "loss": 1.8958, + "step": 18810 + }, + { + "epoch": 0.82, + "learning_rate": 3.8783605154064566e-05, + "loss": 1.8794, + "step": 18820 + }, + { + "epoch": 0.82, + "learning_rate": 3.8763660209895374e-05, + "loss": 1.9037, + "step": 18830 + }, + { + "epoch": 0.82, + "learning_rate": 3.8743711029891335e-05, + "loss": 1.9141, + "step": 18840 + }, + { + "epoch": 0.83, + "learning_rate": 3.872375762369471e-05, + "loss": 1.8257, + "step": 18850 + }, + { + "epoch": 0.83, + "learning_rate": 3.87038000009498e-05, + "loss": 1.8494, + "step": 18860 + }, + { + "epoch": 0.83, + "learning_rate": 3.8683838171302935e-05, + "loss": 1.8758, + "step": 18870 + }, + { + "epoch": 0.83, + "learning_rate": 3.8663872144402466e-05, + "loss": 1.8392, + "step": 18880 + }, + { + "epoch": 0.83, + "learning_rate": 3.864390192989881e-05, + "loss": 1.8782, + "step": 18890 + }, + { + "epoch": 0.83, + "learning_rate": 3.862392753744438e-05, + "loss": 1.8514, + "step": 18900 + }, + { + "epoch": 0.83, + "eval_loss": 1.8528721332550049, + "eval_runtime": 11.544, + "eval_samples_per_second": 354.817, + "eval_steps_per_second": 22.176, + "step": 18900 + }, + { + "epoch": 0.83, + "learning_rate": 3.860394897669361e-05, + "loss": 1.8893, + "step": 18910 + }, + { + "epoch": 0.83, + "learning_rate": 3.858396625730297e-05, + "loss": 1.9059, + "step": 18920 + }, + { + "epoch": 0.83, + "learning_rate": 3.8563979388930914e-05, + "loss": 1.8665, + "step": 18930 + }, + { + "epoch": 0.83, + "learning_rate": 3.8543988381237904e-05, + "loss": 1.8418, + "step": 18940 + }, + { + "epoch": 0.83, + "learning_rate": 3.852399324388642e-05, + "loss": 1.8945, + "step": 18950 + }, + { + "epoch": 0.83, + "learning_rate": 3.850399398654093e-05, + "loss": 1.8753, + "step": 18960 + }, + { + "epoch": 0.83, + "learning_rate": 3.848399061886789e-05, + "loss": 1.8824, + "step": 18970 + }, + { + "epoch": 0.83, + "learning_rate": 3.8463983150535735e-05, + "loss": 1.8406, + "step": 18980 + }, + { + "epoch": 0.83, + "learning_rate": 3.844397159121491e-05, + "loss": 1.8191, + "step": 18990 + }, + { + "epoch": 0.83, + "learning_rate": 3.8423955950577806e-05, + "loss": 1.8585, + "step": 19000 + }, + { + "epoch": 0.83, + "eval_loss": 1.8524885177612305, + "eval_runtime": 11.8631, + "eval_samples_per_second": 345.273, + "eval_steps_per_second": 21.58, + "step": 19000 + }, + { + "epoch": 0.83, + "learning_rate": 3.840393623829879e-05, + "loss": 1.903, + "step": 19010 + }, + { + "epoch": 0.83, + "learning_rate": 3.838391246405423e-05, + "loss": 1.8799, + "step": 19020 + }, + { + "epoch": 0.83, + "learning_rate": 3.8363884637522414e-05, + "loss": 1.9105, + "step": 19030 + }, + { + "epoch": 0.83, + "learning_rate": 3.834385276838362e-05, + "loss": 1.8475, + "step": 19040 + }, + { + "epoch": 0.83, + "learning_rate": 3.832381686632006e-05, + "loss": 1.9121, + "step": 19050 + }, + { + "epoch": 0.83, + "learning_rate": 3.830377694101592e-05, + "loss": 1.9089, + "step": 19060 + }, + { + "epoch": 0.83, + "learning_rate": 3.8283733002157296e-05, + "loss": 1.8896, + "step": 19070 + }, + { + "epoch": 0.84, + "learning_rate": 3.8263685059432246e-05, + "loss": 1.8935, + "step": 19080 + }, + { + "epoch": 0.84, + "learning_rate": 3.8243633122530754e-05, + "loss": 1.8786, + "step": 19090 + }, + { + "epoch": 0.84, + "learning_rate": 3.8223577201144766e-05, + "loss": 1.8583, + "step": 19100 + }, + { + "epoch": 0.84, + "eval_loss": 1.8515942096710205, + "eval_runtime": 11.739, + "eval_samples_per_second": 348.922, + "eval_steps_per_second": 21.808, + "step": 19100 + }, + { + "epoch": 0.84, + "learning_rate": 3.82035173049681e-05, + "loss": 1.9034, + "step": 19110 + }, + { + "epoch": 0.84, + "learning_rate": 3.8183453443696535e-05, + "loss": 1.9279, + "step": 19120 + }, + { + "epoch": 0.84, + "learning_rate": 3.816338562702775e-05, + "loss": 1.937, + "step": 19130 + }, + { + "epoch": 0.84, + "learning_rate": 3.8143313864661336e-05, + "loss": 1.8961, + "step": 19140 + }, + { + "epoch": 0.84, + "learning_rate": 3.812323816629882e-05, + "loss": 1.8915, + "step": 19150 + }, + { + "epoch": 0.84, + "learning_rate": 3.810315854164357e-05, + "loss": 1.8638, + "step": 19160 + }, + { + "epoch": 0.84, + "learning_rate": 3.808307500040091e-05, + "loss": 1.8666, + "step": 19170 + }, + { + "epoch": 0.84, + "learning_rate": 3.8062987552278034e-05, + "loss": 1.8889, + "step": 19180 + }, + { + "epoch": 0.84, + "learning_rate": 3.8042896206984024e-05, + "loss": 1.8346, + "step": 19190 + }, + { + "epoch": 0.84, + "learning_rate": 3.802280097422984e-05, + "loss": 1.8317, + "step": 19200 + }, + { + "epoch": 0.84, + "eval_loss": 1.8512599468231201, + "eval_runtime": 11.4851, + "eval_samples_per_second": 356.637, + "eval_steps_per_second": 22.29, + "step": 19200 + }, + { + "epoch": 0.84, + "learning_rate": 3.800270186372836e-05, + "loss": 1.8443, + "step": 19210 + }, + { + "epoch": 0.84, + "learning_rate": 3.798259888519426e-05, + "loss": 1.8674, + "step": 19220 + }, + { + "epoch": 0.84, + "learning_rate": 3.796249204834416e-05, + "loss": 1.8872, + "step": 19230 + }, + { + "epoch": 0.84, + "learning_rate": 3.794238136289651e-05, + "loss": 1.8847, + "step": 19240 + }, + { + "epoch": 0.84, + "learning_rate": 3.792226683857162e-05, + "loss": 1.9199, + "step": 19250 + }, + { + "epoch": 0.84, + "learning_rate": 3.790214848509166e-05, + "loss": 1.8791, + "step": 19260 + }, + { + "epoch": 0.84, + "learning_rate": 3.788202631218066e-05, + "loss": 1.869, + "step": 19270 + }, + { + "epoch": 0.84, + "learning_rate": 3.7861900329564485e-05, + "loss": 1.8384, + "step": 19280 + }, + { + "epoch": 0.84, + "learning_rate": 3.784177054697083e-05, + "loss": 1.8943, + "step": 19290 + }, + { + "epoch": 0.84, + "learning_rate": 3.782163697412927e-05, + "loss": 1.8706, + "step": 19300 + }, + { + "epoch": 0.84, + "eval_loss": 1.850408911705017, + "eval_runtime": 11.5994, + "eval_samples_per_second": 353.121, + "eval_steps_per_second": 22.07, + "step": 19300 + }, + { + "epoch": 0.85, + "learning_rate": 3.780149962077115e-05, + "loss": 1.87, + "step": 19310 + }, + { + "epoch": 0.85, + "learning_rate": 3.7781358496629704e-05, + "loss": 1.8783, + "step": 19320 + }, + { + "epoch": 0.85, + "learning_rate": 3.776121361143995e-05, + "loss": 1.9046, + "step": 19330 + }, + { + "epoch": 0.85, + "learning_rate": 3.774106497493872e-05, + "loss": 1.9047, + "step": 19340 + }, + { + "epoch": 0.85, + "learning_rate": 3.772091259686469e-05, + "loss": 1.8444, + "step": 19350 + }, + { + "epoch": 0.85, + "learning_rate": 3.770075648695832e-05, + "loss": 1.8606, + "step": 19360 + }, + { + "epoch": 0.85, + "learning_rate": 3.7680596654961886e-05, + "loss": 1.8874, + "step": 19370 + }, + { + "epoch": 0.85, + "learning_rate": 3.7660433110619447e-05, + "loss": 1.863, + "step": 19380 + }, + { + "epoch": 0.85, + "learning_rate": 3.764026586367687e-05, + "loss": 1.8678, + "step": 19390 + }, + { + "epoch": 0.85, + "learning_rate": 3.762009492388182e-05, + "loss": 1.8803, + "step": 19400 + }, + { + "epoch": 0.85, + "eval_loss": 1.8501853942871094, + "eval_runtime": 11.6025, + "eval_samples_per_second": 353.026, + "eval_steps_per_second": 22.064, + "step": 19400 + }, + { + "epoch": 0.85, + "learning_rate": 3.759992030098373e-05, + "loss": 1.8829, + "step": 19410 + }, + { + "epoch": 0.85, + "learning_rate": 3.757974200473382e-05, + "loss": 1.8918, + "step": 19420 + }, + { + "epoch": 0.85, + "learning_rate": 3.755956004488508e-05, + "loss": 1.8583, + "step": 19430 + }, + { + "epoch": 0.85, + "learning_rate": 3.753937443119228e-05, + "loss": 1.898, + "step": 19440 + }, + { + "epoch": 0.85, + "learning_rate": 3.751918517341194e-05, + "loss": 1.8663, + "step": 19450 + }, + { + "epoch": 0.85, + "learning_rate": 3.749899228130237e-05, + "loss": 1.8703, + "step": 19460 + }, + { + "epoch": 0.85, + "learning_rate": 3.747879576462361e-05, + "loss": 1.8459, + "step": 19470 + }, + { + "epoch": 0.85, + "learning_rate": 3.7458595633137464e-05, + "loss": 1.8908, + "step": 19480 + }, + { + "epoch": 0.85, + "learning_rate": 3.743839189660748e-05, + "loss": 1.859, + "step": 19490 + }, + { + "epoch": 0.85, + "learning_rate": 3.741818456479895e-05, + "loss": 1.9049, + "step": 19500 + }, + { + "epoch": 0.85, + "eval_loss": 1.849900245666504, + "eval_runtime": 11.8738, + "eval_samples_per_second": 344.962, + "eval_steps_per_second": 21.56, + "step": 19500 + }, + { + "epoch": 0.85, + "learning_rate": 3.739797364747889e-05, + "loss": 1.8687, + "step": 19510 + }, + { + "epoch": 0.85, + "learning_rate": 3.737775915441608e-05, + "loss": 1.8764, + "step": 19520 + }, + { + "epoch": 0.85, + "learning_rate": 3.735754109538101e-05, + "loss": 1.8692, + "step": 19530 + }, + { + "epoch": 0.86, + "learning_rate": 3.733731948014587e-05, + "loss": 1.8887, + "step": 19540 + }, + { + "epoch": 0.86, + "learning_rate": 3.731709431848462e-05, + "loss": 1.8812, + "step": 19550 + }, + { + "epoch": 0.86, + "learning_rate": 3.729686562017288e-05, + "loss": 1.8412, + "step": 19560 + }, + { + "epoch": 0.86, + "learning_rate": 3.727663339498804e-05, + "loss": 1.8897, + "step": 19570 + }, + { + "epoch": 0.86, + "learning_rate": 3.725639765270913e-05, + "loss": 1.8781, + "step": 19580 + }, + { + "epoch": 0.86, + "learning_rate": 3.7236158403116925e-05, + "loss": 1.8901, + "step": 19590 + }, + { + "epoch": 0.86, + "learning_rate": 3.721591565599388e-05, + "loss": 1.8884, + "step": 19600 + }, + { + "epoch": 0.86, + "eval_loss": 1.8497211933135986, + "eval_runtime": 11.3914, + "eval_samples_per_second": 359.57, + "eval_steps_per_second": 22.473, + "step": 19600 + }, + { + "epoch": 0.86, + "learning_rate": 3.7195669421124136e-05, + "loss": 1.8471, + "step": 19610 + }, + { + "epoch": 0.86, + "learning_rate": 3.7175419708293526e-05, + "loss": 1.8422, + "step": 19620 + }, + { + "epoch": 0.86, + "learning_rate": 3.715516652728956e-05, + "loss": 1.8919, + "step": 19630 + }, + { + "epoch": 0.86, + "learning_rate": 3.713490988790144e-05, + "loss": 1.8633, + "step": 19640 + }, + { + "epoch": 0.86, + "learning_rate": 3.711464979992001e-05, + "loss": 1.8695, + "step": 19650 + }, + { + "epoch": 0.86, + "learning_rate": 3.709438627313782e-05, + "loss": 1.808, + "step": 19660 + }, + { + "epoch": 0.86, + "learning_rate": 3.707411931734904e-05, + "loss": 1.8954, + "step": 19670 + }, + { + "epoch": 0.86, + "learning_rate": 3.705384894234953e-05, + "loss": 1.882, + "step": 19680 + }, + { + "epoch": 0.86, + "learning_rate": 3.703357515793678e-05, + "loss": 1.8588, + "step": 19690 + }, + { + "epoch": 0.86, + "learning_rate": 3.701329797390994e-05, + "loss": 1.8431, + "step": 19700 + }, + { + "epoch": 0.86, + "eval_loss": 1.8491344451904297, + "eval_runtime": 11.3311, + "eval_samples_per_second": 361.483, + "eval_steps_per_second": 22.593, + "step": 19700 + }, + { + "epoch": 0.86, + "learning_rate": 3.699301740006982e-05, + "loss": 1.896, + "step": 19710 + }, + { + "epoch": 0.86, + "learning_rate": 3.69727334462188e-05, + "loss": 1.8863, + "step": 19720 + }, + { + "epoch": 0.86, + "learning_rate": 3.695244612216101e-05, + "loss": 1.8949, + "step": 19730 + }, + { + "epoch": 0.86, + "learning_rate": 3.693215543770209e-05, + "loss": 1.8252, + "step": 19740 + }, + { + "epoch": 0.86, + "learning_rate": 3.691186140264938e-05, + "loss": 1.8675, + "step": 19750 + }, + { + "epoch": 0.86, + "learning_rate": 3.689156402681181e-05, + "loss": 1.8894, + "step": 19760 + }, + { + "epoch": 0.87, + "learning_rate": 3.687126331999992e-05, + "loss": 1.8632, + "step": 19770 + }, + { + "epoch": 0.87, + "learning_rate": 3.68509592920259e-05, + "loss": 1.8709, + "step": 19780 + }, + { + "epoch": 0.87, + "learning_rate": 3.6830651952703484e-05, + "loss": 1.8076, + "step": 19790 + }, + { + "epoch": 0.87, + "learning_rate": 3.681034131184806e-05, + "loss": 1.8623, + "step": 19800 + }, + { + "epoch": 0.87, + "eval_loss": 1.8488744497299194, + "eval_runtime": 11.5142, + "eval_samples_per_second": 355.735, + "eval_steps_per_second": 22.233, + "step": 19800 + }, + { + "epoch": 0.87, + "learning_rate": 3.679002737927658e-05, + "loss": 1.8551, + "step": 19810 + }, + { + "epoch": 0.87, + "learning_rate": 3.67697101648076e-05, + "loss": 1.9459, + "step": 19820 + }, + { + "epoch": 0.87, + "learning_rate": 3.6749389678261254e-05, + "loss": 1.8397, + "step": 19830 + }, + { + "epoch": 0.87, + "learning_rate": 3.672906592945927e-05, + "loss": 1.8937, + "step": 19840 + }, + { + "epoch": 0.87, + "learning_rate": 3.670873892822494e-05, + "loss": 1.8702, + "step": 19850 + }, + { + "epoch": 0.87, + "learning_rate": 3.668840868438314e-05, + "loss": 1.8441, + "step": 19860 + }, + { + "epoch": 0.87, + "learning_rate": 3.66680752077603e-05, + "loss": 1.834, + "step": 19870 + }, + { + "epoch": 0.87, + "learning_rate": 3.6647738508184425e-05, + "loss": 1.8955, + "step": 19880 + }, + { + "epoch": 0.87, + "learning_rate": 3.6627398595485076e-05, + "loss": 1.8395, + "step": 19890 + }, + { + "epoch": 0.87, + "learning_rate": 3.660705547949335e-05, + "loss": 1.8728, + "step": 19900 + }, + { + "epoch": 0.87, + "eval_loss": 1.8482991456985474, + "eval_runtime": 11.5679, + "eval_samples_per_second": 354.082, + "eval_steps_per_second": 22.13, + "step": 19900 + }, + { + "epoch": 0.87, + "learning_rate": 3.658670917004192e-05, + "loss": 1.823, + "step": 19910 + }, + { + "epoch": 0.87, + "learning_rate": 3.656635967696498e-05, + "loss": 1.9088, + "step": 19920 + }, + { + "epoch": 0.87, + "learning_rate": 3.654600701009828e-05, + "loss": 1.8349, + "step": 19930 + }, + { + "epoch": 0.87, + "learning_rate": 3.652565117927907e-05, + "loss": 1.8668, + "step": 19940 + }, + { + "epoch": 0.87, + "learning_rate": 3.650529219434618e-05, + "loss": 1.881, + "step": 19950 + }, + { + "epoch": 0.87, + "learning_rate": 3.6484930065139946e-05, + "loss": 1.8751, + "step": 19960 + }, + { + "epoch": 0.87, + "learning_rate": 3.646456480150217e-05, + "loss": 1.8724, + "step": 19970 + }, + { + "epoch": 0.87, + "learning_rate": 3.644419641327628e-05, + "loss": 1.8408, + "step": 19980 + }, + { + "epoch": 0.88, + "learning_rate": 3.6423824910307095e-05, + "loss": 1.8853, + "step": 19990 + }, + { + "epoch": 0.88, + "learning_rate": 3.640345030244103e-05, + "loss": 1.9109, + "step": 20000 + }, + { + "epoch": 0.88, + "eval_loss": 1.848042368888855, + "eval_runtime": 11.6122, + "eval_samples_per_second": 352.732, + "eval_steps_per_second": 22.046, + "step": 20000 + }, + { + "epoch": 0.88, + "learning_rate": 3.638307259952593e-05, + "loss": 1.9015, + "step": 20010 + }, + { + "epoch": 0.88, + "learning_rate": 3.636269181141122e-05, + "loss": 1.8893, + "step": 20020 + }, + { + "epoch": 0.88, + "learning_rate": 3.6342307947947726e-05, + "loss": 1.8432, + "step": 20030 + }, + { + "epoch": 0.88, + "learning_rate": 3.6321921018987815e-05, + "loss": 1.8465, + "step": 20040 + }, + { + "epoch": 0.88, + "learning_rate": 3.6301531034385326e-05, + "loss": 1.8545, + "step": 20050 + }, + { + "epoch": 0.88, + "learning_rate": 3.628113800399556e-05, + "loss": 1.8507, + "step": 20060 + }, + { + "epoch": 0.88, + "learning_rate": 3.626074193767531e-05, + "loss": 1.8718, + "step": 20070 + }, + { + "epoch": 0.88, + "learning_rate": 3.624034284528282e-05, + "loss": 1.8886, + "step": 20080 + }, + { + "epoch": 0.88, + "learning_rate": 3.621994073667783e-05, + "loss": 1.898, + "step": 20090 + }, + { + "epoch": 0.88, + "learning_rate": 3.6199535621721465e-05, + "loss": 1.8848, + "step": 20100 + }, + { + "epoch": 0.88, + "eval_loss": 1.8475103378295898, + "eval_runtime": 11.4108, + "eval_samples_per_second": 358.959, + "eval_steps_per_second": 22.435, + "step": 20100 + }, + { + "epoch": 0.88, + "learning_rate": 3.61791275102764e-05, + "loss": 1.8565, + "step": 20110 + }, + { + "epoch": 0.88, + "learning_rate": 3.615871641220667e-05, + "loss": 1.9083, + "step": 20120 + }, + { + "epoch": 0.88, + "learning_rate": 3.613830233737782e-05, + "loss": 1.8759, + "step": 20130 + }, + { + "epoch": 0.88, + "learning_rate": 3.61178852956568e-05, + "loss": 1.8727, + "step": 20140 + }, + { + "epoch": 0.88, + "learning_rate": 3.609746529691198e-05, + "loss": 1.8918, + "step": 20150 + }, + { + "epoch": 0.88, + "learning_rate": 3.6077042351013206e-05, + "loss": 1.8517, + "step": 20160 + }, + { + "epoch": 0.88, + "learning_rate": 3.60566164678317e-05, + "loss": 1.9069, + "step": 20170 + }, + { + "epoch": 0.88, + "learning_rate": 3.603618765724014e-05, + "loss": 1.8429, + "step": 20180 + }, + { + "epoch": 0.88, + "learning_rate": 3.60157559291126e-05, + "loss": 1.832, + "step": 20190 + }, + { + "epoch": 0.88, + "learning_rate": 3.599532129332457e-05, + "loss": 1.9338, + "step": 20200 + }, + { + "epoch": 0.88, + "eval_loss": 1.847186803817749, + "eval_runtime": 11.3797, + "eval_samples_per_second": 359.94, + "eval_steps_per_second": 22.496, + "step": 20200 + }, + { + "epoch": 0.88, + "learning_rate": 3.597488375975294e-05, + "loss": 1.8703, + "step": 20210 + }, + { + "epoch": 0.89, + "learning_rate": 3.5954443338276004e-05, + "loss": 1.8843, + "step": 20220 + }, + { + "epoch": 0.89, + "learning_rate": 3.593400003877346e-05, + "loss": 1.8451, + "step": 20230 + }, + { + "epoch": 0.89, + "learning_rate": 3.591355387112637e-05, + "loss": 1.8947, + "step": 20240 + }, + { + "epoch": 0.89, + "learning_rate": 3.5893104845217235e-05, + "loss": 1.8208, + "step": 20250 + }, + { + "epoch": 0.89, + "learning_rate": 3.5872652970929874e-05, + "loss": 1.9205, + "step": 20260 + }, + { + "epoch": 0.89, + "learning_rate": 3.585219825814953e-05, + "loss": 1.8699, + "step": 20270 + }, + { + "epoch": 0.89, + "learning_rate": 3.583174071676279e-05, + "loss": 1.8673, + "step": 20280 + }, + { + "epoch": 0.89, + "learning_rate": 3.581128035665763e-05, + "loss": 1.8823, + "step": 20290 + }, + { + "epoch": 0.89, + "learning_rate": 3.579081718772337e-05, + "loss": 1.8937, + "step": 20300 + }, + { + "epoch": 0.89, + "eval_loss": 1.8470911979675293, + "eval_runtime": 11.3691, + "eval_samples_per_second": 360.276, + "eval_steps_per_second": 22.517, + "step": 20300 + }, + { + "epoch": 0.89, + "learning_rate": 3.577035121985069e-05, + "loss": 1.8222, + "step": 20310 + }, + { + "epoch": 0.89, + "learning_rate": 3.5749882462931646e-05, + "loss": 1.8978, + "step": 20320 + }, + { + "epoch": 0.89, + "learning_rate": 3.572941092685961e-05, + "loss": 1.8855, + "step": 20330 + }, + { + "epoch": 0.89, + "learning_rate": 3.570893662152931e-05, + "loss": 1.8851, + "step": 20340 + }, + { + "epoch": 0.89, + "learning_rate": 3.568845955683682e-05, + "loss": 1.8858, + "step": 20350 + }, + { + "epoch": 0.89, + "learning_rate": 3.566797974267954e-05, + "loss": 1.8572, + "step": 20360 + }, + { + "epoch": 0.89, + "learning_rate": 3.56474971889562e-05, + "loss": 1.8883, + "step": 20370 + }, + { + "epoch": 0.89, + "learning_rate": 3.5627011905566854e-05, + "loss": 1.8731, + "step": 20380 + }, + { + "epoch": 0.89, + "learning_rate": 3.560652390241288e-05, + "loss": 1.8558, + "step": 20390 + }, + { + "epoch": 0.89, + "learning_rate": 3.558603318939696e-05, + "loss": 1.8459, + "step": 20400 + }, + { + "epoch": 0.89, + "eval_loss": 1.8470215797424316, + "eval_runtime": 11.6007, + "eval_samples_per_second": 353.084, + "eval_steps_per_second": 22.068, + "step": 20400 + }, + { + "epoch": 0.89, + "learning_rate": 3.556553977642309e-05, + "loss": 1.8773, + "step": 20410 + }, + { + "epoch": 0.89, + "learning_rate": 3.554504367339659e-05, + "loss": 1.8588, + "step": 20420 + }, + { + "epoch": 0.89, + "learning_rate": 3.552454489022405e-05, + "loss": 1.8891, + "step": 20430 + }, + { + "epoch": 0.89, + "learning_rate": 3.550404343681336e-05, + "loss": 1.8349, + "step": 20440 + }, + { + "epoch": 0.9, + "learning_rate": 3.5483539323073735e-05, + "loss": 1.8599, + "step": 20450 + }, + { + "epoch": 0.9, + "learning_rate": 3.546303255891563e-05, + "loss": 1.8299, + "step": 20460 + }, + { + "epoch": 0.9, + "learning_rate": 3.544252315425081e-05, + "loss": 1.8787, + "step": 20470 + }, + { + "epoch": 0.9, + "learning_rate": 3.542201111899231e-05, + "loss": 1.8508, + "step": 20480 + }, + { + "epoch": 0.9, + "learning_rate": 3.5401496463054425e-05, + "loss": 1.8561, + "step": 20490 + }, + { + "epoch": 0.9, + "learning_rate": 3.5380979196352735e-05, + "loss": 1.8587, + "step": 20500 + }, + { + "epoch": 0.9, + "eval_loss": 1.8461756706237793, + "eval_runtime": 11.7063, + "eval_samples_per_second": 349.898, + "eval_steps_per_second": 21.869, + "step": 20500 + }, + { + "epoch": 0.9, + "learning_rate": 3.5360459328804064e-05, + "loss": 1.9049, + "step": 20510 + }, + { + "epoch": 0.9, + "learning_rate": 3.533993687032652e-05, + "loss": 1.8235, + "step": 20520 + }, + { + "epoch": 0.9, + "learning_rate": 3.5319411830839415e-05, + "loss": 1.8665, + "step": 20530 + }, + { + "epoch": 0.9, + "learning_rate": 3.529888422026336e-05, + "loss": 1.9031, + "step": 20540 + }, + { + "epoch": 0.9, + "learning_rate": 3.527835404852018e-05, + "loss": 1.8694, + "step": 20550 + }, + { + "epoch": 0.9, + "learning_rate": 3.525782132553296e-05, + "loss": 1.8432, + "step": 20560 + }, + { + "epoch": 0.9, + "learning_rate": 3.523728606122598e-05, + "loss": 1.9015, + "step": 20570 + }, + { + "epoch": 0.9, + "learning_rate": 3.521674826552479e-05, + "loss": 1.8434, + "step": 20580 + }, + { + "epoch": 0.9, + "learning_rate": 3.519620794835614e-05, + "loss": 1.8726, + "step": 20590 + }, + { + "epoch": 0.9, + "learning_rate": 3.5175665119648e-05, + "loss": 1.9017, + "step": 20600 + }, + { + "epoch": 0.9, + "eval_loss": 1.845084309577942, + "eval_runtime": 11.6236, + "eval_samples_per_second": 352.386, + "eval_steps_per_second": 22.024, + "step": 20600 + }, + { + "epoch": 0.9, + "learning_rate": 3.515511978932956e-05, + "loss": 1.8861, + "step": 20610 + }, + { + "epoch": 0.9, + "learning_rate": 3.513457196733121e-05, + "loss": 1.8689, + "step": 20620 + }, + { + "epoch": 0.9, + "learning_rate": 3.511402166358458e-05, + "loss": 1.8851, + "step": 20630 + }, + { + "epoch": 0.9, + "learning_rate": 3.5093468888022435e-05, + "loss": 1.8506, + "step": 20640 + }, + { + "epoch": 0.9, + "learning_rate": 3.5072913650578797e-05, + "loss": 1.9112, + "step": 20650 + }, + { + "epoch": 0.9, + "learning_rate": 3.505235596118884e-05, + "loss": 1.8424, + "step": 20660 + }, + { + "epoch": 0.9, + "learning_rate": 3.503179582978892e-05, + "loss": 1.8257, + "step": 20670 + }, + { + "epoch": 0.91, + "learning_rate": 3.5011233266316616e-05, + "loss": 1.866, + "step": 20680 + }, + { + "epoch": 0.91, + "learning_rate": 3.499066828071063e-05, + "loss": 1.8652, + "step": 20690 + }, + { + "epoch": 0.91, + "learning_rate": 3.497010088291089e-05, + "loss": 1.8827, + "step": 20700 + }, + { + "epoch": 0.91, + "eval_loss": 1.8452777862548828, + "eval_runtime": 11.6079, + "eval_samples_per_second": 352.863, + "eval_steps_per_second": 22.054, + "step": 20700 + }, + { + "epoch": 0.91, + "learning_rate": 3.494953108285843e-05, + "loss": 1.8998, + "step": 20710 + }, + { + "epoch": 0.91, + "learning_rate": 3.492895889049549e-05, + "loss": 1.9052, + "step": 20720 + }, + { + "epoch": 0.91, + "learning_rate": 3.490838431576544e-05, + "loss": 1.838, + "step": 20730 + }, + { + "epoch": 0.91, + "learning_rate": 3.488780736861283e-05, + "loss": 1.9133, + "step": 20740 + }, + { + "epoch": 0.91, + "learning_rate": 3.4867228058983316e-05, + "loss": 1.8919, + "step": 20750 + }, + { + "epoch": 0.91, + "learning_rate": 3.484664639682373e-05, + "loss": 1.885, + "step": 20760 + }, + { + "epoch": 0.91, + "learning_rate": 3.4826062392082035e-05, + "loss": 1.8721, + "step": 20770 + }, + { + "epoch": 0.91, + "learning_rate": 3.4805476054707315e-05, + "loss": 1.8589, + "step": 20780 + }, + { + "epoch": 0.91, + "learning_rate": 3.4784887394649785e-05, + "loss": 1.8758, + "step": 20790 + }, + { + "epoch": 0.91, + "learning_rate": 3.476429642186078e-05, + "loss": 1.8723, + "step": 20800 + }, + { + "epoch": 0.91, + "eval_loss": 1.844846487045288, + "eval_runtime": 11.5952, + "eval_samples_per_second": 353.25, + "eval_steps_per_second": 22.078, + "step": 20800 + }, + { + "epoch": 0.91, + "learning_rate": 3.4743703146292774e-05, + "loss": 1.8686, + "step": 20810 + }, + { + "epoch": 0.91, + "learning_rate": 3.472310757789932e-05, + "loss": 1.8509, + "step": 20820 + }, + { + "epoch": 0.91, + "learning_rate": 3.470250972663512e-05, + "loss": 1.8502, + "step": 20830 + }, + { + "epoch": 0.91, + "learning_rate": 3.468190960245593e-05, + "loss": 1.8396, + "step": 20840 + }, + { + "epoch": 0.91, + "learning_rate": 3.466130721531867e-05, + "loss": 1.8543, + "step": 20850 + }, + { + "epoch": 0.91, + "learning_rate": 3.464070257518127e-05, + "loss": 1.8785, + "step": 20860 + }, + { + "epoch": 0.91, + "learning_rate": 3.4620095692002815e-05, + "loss": 1.8533, + "step": 20870 + }, + { + "epoch": 0.91, + "learning_rate": 3.4599486575743454e-05, + "loss": 1.8293, + "step": 20880 + }, + { + "epoch": 0.91, + "learning_rate": 3.457887523636441e-05, + "loss": 1.8223, + "step": 20890 + }, + { + "epoch": 0.91, + "learning_rate": 3.455826168382799e-05, + "loss": 1.8676, + "step": 20900 + }, + { + "epoch": 0.91, + "eval_loss": 1.844473123550415, + "eval_runtime": 11.7054, + "eval_samples_per_second": 349.924, + "eval_steps_per_second": 21.87, + "step": 20900 + }, + { + "epoch": 0.92, + "learning_rate": 3.4537645928097554e-05, + "loss": 1.8801, + "step": 20910 + }, + { + "epoch": 0.92, + "learning_rate": 3.451702797913756e-05, + "loss": 1.8434, + "step": 20920 + }, + { + "epoch": 0.92, + "learning_rate": 3.449640784691349e-05, + "loss": 1.8943, + "step": 20930 + }, + { + "epoch": 0.92, + "learning_rate": 3.4475785541391894e-05, + "loss": 1.8622, + "step": 20940 + }, + { + "epoch": 0.92, + "learning_rate": 3.445516107254039e-05, + "loss": 1.8625, + "step": 20950 + }, + { + "epoch": 0.92, + "learning_rate": 3.44345344503276e-05, + "loss": 1.86, + "step": 20960 + }, + { + "epoch": 0.92, + "learning_rate": 3.441390568472323e-05, + "loss": 1.8783, + "step": 20970 + }, + { + "epoch": 0.92, + "learning_rate": 3.439327478569801e-05, + "loss": 1.8514, + "step": 20980 + }, + { + "epoch": 0.92, + "learning_rate": 3.437264176322369e-05, + "loss": 1.8293, + "step": 20990 + }, + { + "epoch": 0.92, + "learning_rate": 3.4352006627273036e-05, + "loss": 1.8812, + "step": 21000 + }, + { + "epoch": 0.92, + "eval_loss": 1.844010829925537, + "eval_runtime": 11.8385, + "eval_samples_per_second": 345.989, + "eval_steps_per_second": 21.624, + "step": 21000 + }, + { + "epoch": 0.92, + "learning_rate": 3.433136938781988e-05, + "loss": 1.8436, + "step": 21010 + }, + { + "epoch": 0.92, + "learning_rate": 3.431073005483902e-05, + "loss": 1.8674, + "step": 21020 + }, + { + "epoch": 0.92, + "learning_rate": 3.42900886383063e-05, + "loss": 1.884, + "step": 21030 + }, + { + "epoch": 0.92, + "learning_rate": 3.426944514819856e-05, + "loss": 1.8758, + "step": 21040 + }, + { + "epoch": 0.92, + "learning_rate": 3.424879959449363e-05, + "loss": 1.8379, + "step": 21050 + }, + { + "epoch": 0.92, + "learning_rate": 3.422815198717037e-05, + "loss": 1.8983, + "step": 21060 + }, + { + "epoch": 0.92, + "learning_rate": 3.4207502336208586e-05, + "loss": 1.9012, + "step": 21070 + }, + { + "epoch": 0.92, + "learning_rate": 3.418685065158912e-05, + "loss": 1.8474, + "step": 21080 + }, + { + "epoch": 0.92, + "learning_rate": 3.416619694329376e-05, + "loss": 1.8605, + "step": 21090 + }, + { + "epoch": 0.92, + "learning_rate": 3.414554122130529e-05, + "loss": 1.8589, + "step": 21100 + }, + { + "epoch": 0.92, + "eval_loss": 1.8435540199279785, + "eval_runtime": 11.7192, + "eval_samples_per_second": 349.512, + "eval_steps_per_second": 21.844, + "step": 21100 + }, + { + "epoch": 0.92, + "learning_rate": 3.412488349560748e-05, + "loss": 1.9195, + "step": 21110 + }, + { + "epoch": 0.92, + "learning_rate": 3.4104223776185025e-05, + "loss": 1.8987, + "step": 21120 + }, + { + "epoch": 0.92, + "learning_rate": 3.408356207302363e-05, + "loss": 1.8662, + "step": 21130 + }, + { + "epoch": 0.93, + "learning_rate": 3.4062898396109926e-05, + "loss": 1.8617, + "step": 21140 + }, + { + "epoch": 0.93, + "learning_rate": 3.404223275543153e-05, + "loss": 1.8549, + "step": 21150 + }, + { + "epoch": 0.93, + "learning_rate": 3.402156516097697e-05, + "loss": 1.8974, + "step": 21160 + }, + { + "epoch": 0.93, + "learning_rate": 3.400089562273576e-05, + "loss": 1.8277, + "step": 21170 + }, + { + "epoch": 0.93, + "learning_rate": 3.3980224150698304e-05, + "loss": 1.8674, + "step": 21180 + }, + { + "epoch": 0.93, + "learning_rate": 3.3959550754856e-05, + "loss": 1.8959, + "step": 21190 + }, + { + "epoch": 0.93, + "learning_rate": 3.3938875445201126e-05, + "loss": 1.8234, + "step": 21200 + }, + { + "epoch": 0.93, + "eval_loss": 1.8433294296264648, + "eval_runtime": 12.3336, + "eval_samples_per_second": 332.1, + "eval_steps_per_second": 20.756, + "step": 21200 + }, + { + "epoch": 0.93, + "learning_rate": 3.39181982317269e-05, + "loss": 1.8358, + "step": 21210 + }, + { + "epoch": 0.93, + "learning_rate": 3.3897519124427484e-05, + "loss": 1.9033, + "step": 21220 + }, + { + "epoch": 0.93, + "learning_rate": 3.387683813329791e-05, + "loss": 1.8959, + "step": 21230 + }, + { + "epoch": 0.93, + "learning_rate": 3.385615526833416e-05, + "loss": 1.8205, + "step": 21240 + }, + { + "epoch": 0.93, + "learning_rate": 3.38354705395331e-05, + "loss": 1.8477, + "step": 21250 + }, + { + "epoch": 0.93, + "learning_rate": 3.381478395689252e-05, + "loss": 1.8897, + "step": 21260 + }, + { + "epoch": 0.93, + "learning_rate": 3.3794095530411064e-05, + "loss": 1.8679, + "step": 21270 + }, + { + "epoch": 0.93, + "learning_rate": 3.377340527008831e-05, + "loss": 1.8254, + "step": 21280 + }, + { + "epoch": 0.93, + "learning_rate": 3.37527131859247e-05, + "loss": 1.8897, + "step": 21290 + }, + { + "epoch": 0.93, + "learning_rate": 3.373201928792158e-05, + "loss": 1.878, + "step": 21300 + }, + { + "epoch": 0.93, + "eval_loss": 1.8435399532318115, + "eval_runtime": 11.6036, + "eval_samples_per_second": 352.993, + "eval_steps_per_second": 22.062, + "step": 21300 + }, + { + "epoch": 0.93, + "learning_rate": 3.371132358608114e-05, + "loss": 1.8616, + "step": 21310 + }, + { + "epoch": 0.93, + "learning_rate": 3.3690626090406446e-05, + "loss": 1.8702, + "step": 21320 + }, + { + "epoch": 0.93, + "learning_rate": 3.366992681090147e-05, + "loss": 1.8344, + "step": 21330 + }, + { + "epoch": 0.93, + "learning_rate": 3.3649225757571e-05, + "loss": 1.898, + "step": 21340 + }, + { + "epoch": 0.93, + "learning_rate": 3.362852294042071e-05, + "loss": 1.8452, + "step": 21350 + }, + { + "epoch": 0.94, + "learning_rate": 3.3607818369457106e-05, + "loss": 1.872, + "step": 21360 + }, + { + "epoch": 0.94, + "learning_rate": 3.3587112054687566e-05, + "loss": 1.8164, + "step": 21370 + }, + { + "epoch": 0.94, + "learning_rate": 3.3566404006120296e-05, + "loss": 1.8258, + "step": 21380 + }, + { + "epoch": 0.94, + "learning_rate": 3.3545694233764326e-05, + "loss": 1.8799, + "step": 21390 + }, + { + "epoch": 0.94, + "learning_rate": 3.352498274762955e-05, + "loss": 1.8901, + "step": 21400 + }, + { + "epoch": 0.94, + "eval_loss": 1.8432936668395996, + "eval_runtime": 11.5934, + "eval_samples_per_second": 353.304, + "eval_steps_per_second": 22.082, + "step": 21400 + }, + { + "epoch": 0.94, + "learning_rate": 3.3504269557726675e-05, + "loss": 1.8615, + "step": 21410 + }, + { + "epoch": 0.94, + "learning_rate": 3.348355467406723e-05, + "loss": 1.8215, + "step": 21420 + }, + { + "epoch": 0.94, + "learning_rate": 3.3462838106663544e-05, + "loss": 1.8374, + "step": 21430 + }, + { + "epoch": 0.94, + "learning_rate": 3.344211986552881e-05, + "loss": 1.9267, + "step": 21440 + }, + { + "epoch": 0.94, + "learning_rate": 3.342139996067699e-05, + "loss": 1.8219, + "step": 21450 + }, + { + "epoch": 0.94, + "learning_rate": 3.340067840212285e-05, + "loss": 1.8697, + "step": 21460 + }, + { + "epoch": 0.94, + "learning_rate": 3.3379955199881973e-05, + "loss": 1.883, + "step": 21470 + }, + { + "epoch": 0.94, + "learning_rate": 3.3359230363970734e-05, + "loss": 1.8599, + "step": 21480 + }, + { + "epoch": 0.94, + "learning_rate": 3.333850390440629e-05, + "loss": 1.8511, + "step": 21490 + }, + { + "epoch": 0.94, + "learning_rate": 3.331777583120657e-05, + "loss": 1.8464, + "step": 21500 + }, + { + "epoch": 0.94, + "eval_loss": 1.843240737915039, + "eval_runtime": 11.8662, + "eval_samples_per_second": 345.183, + "eval_steps_per_second": 21.574, + "step": 21500 + }, + { + "epoch": 0.94, + "learning_rate": 3.329704615439032e-05, + "loss": 1.8908, + "step": 21510 + }, + { + "epoch": 0.94, + "learning_rate": 3.327631488397702e-05, + "loss": 1.9016, + "step": 21520 + }, + { + "epoch": 0.94, + "learning_rate": 3.3255582029986956e-05, + "loss": 1.8827, + "step": 21530 + }, + { + "epoch": 0.94, + "learning_rate": 3.323484760244115e-05, + "loss": 1.905, + "step": 21540 + }, + { + "epoch": 0.94, + "learning_rate": 3.321411161136141e-05, + "loss": 1.872, + "step": 21550 + }, + { + "epoch": 0.94, + "learning_rate": 3.319337406677029e-05, + "loss": 1.8769, + "step": 21560 + }, + { + "epoch": 0.94, + "learning_rate": 3.317263497869107e-05, + "loss": 1.9206, + "step": 21570 + }, + { + "epoch": 0.94, + "learning_rate": 3.315189435714781e-05, + "loss": 1.8788, + "step": 21580 + }, + { + "epoch": 0.95, + "learning_rate": 3.31311522121653e-05, + "loss": 1.8762, + "step": 21590 + }, + { + "epoch": 0.95, + "learning_rate": 3.311040855376908e-05, + "loss": 1.884, + "step": 21600 + }, + { + "epoch": 0.95, + "eval_loss": 1.8426785469055176, + "eval_runtime": 11.6483, + "eval_samples_per_second": 351.639, + "eval_steps_per_second": 21.977, + "step": 21600 + }, + { + "epoch": 0.95, + "learning_rate": 3.3089663391985375e-05, + "loss": 1.8241, + "step": 21610 + }, + { + "epoch": 0.95, + "learning_rate": 3.30689167368412e-05, + "loss": 1.8922, + "step": 21620 + }, + { + "epoch": 0.95, + "learning_rate": 3.304816859836424e-05, + "loss": 1.8536, + "step": 21630 + }, + { + "epoch": 0.95, + "learning_rate": 3.302741898658294e-05, + "loss": 1.8683, + "step": 21640 + }, + { + "epoch": 0.95, + "learning_rate": 3.300666791152641e-05, + "loss": 1.8762, + "step": 21650 + }, + { + "epoch": 0.95, + "learning_rate": 3.29859153832245e-05, + "loss": 1.8442, + "step": 21660 + }, + { + "epoch": 0.95, + "learning_rate": 3.296516141170776e-05, + "loss": 1.8689, + "step": 21670 + }, + { + "epoch": 0.95, + "learning_rate": 3.294440600700742e-05, + "loss": 1.8598, + "step": 21680 + }, + { + "epoch": 0.95, + "learning_rate": 3.2923649179155436e-05, + "loss": 1.8641, + "step": 21690 + }, + { + "epoch": 0.95, + "learning_rate": 3.290289093818442e-05, + "loss": 1.8715, + "step": 21700 + }, + { + "epoch": 0.95, + "eval_loss": 1.8424584865570068, + "eval_runtime": 11.6455, + "eval_samples_per_second": 351.723, + "eval_steps_per_second": 21.983, + "step": 21700 + }, + { + "epoch": 0.95, + "learning_rate": 3.288213129412766e-05, + "loss": 1.84, + "step": 21710 + }, + { + "epoch": 0.95, + "learning_rate": 3.286137025701916e-05, + "loss": 1.8564, + "step": 21720 + }, + { + "epoch": 0.95, + "learning_rate": 3.2840607836893574e-05, + "loss": 1.8725, + "step": 21730 + }, + { + "epoch": 0.95, + "learning_rate": 3.281984404378622e-05, + "loss": 1.8576, + "step": 21740 + }, + { + "epoch": 0.95, + "learning_rate": 3.27990788877331e-05, + "loss": 1.882, + "step": 21750 + }, + { + "epoch": 0.95, + "learning_rate": 3.277831237877085e-05, + "loss": 1.8279, + "step": 21760 + }, + { + "epoch": 0.95, + "learning_rate": 3.275754452693677e-05, + "loss": 1.9028, + "step": 21770 + }, + { + "epoch": 0.95, + "learning_rate": 3.273677534226883e-05, + "loss": 1.8716, + "step": 21780 + }, + { + "epoch": 0.95, + "learning_rate": 3.27160048348056e-05, + "loss": 1.8502, + "step": 21790 + }, + { + "epoch": 0.95, + "learning_rate": 3.269523301458633e-05, + "loss": 1.8384, + "step": 21800 + }, + { + "epoch": 0.95, + "eval_loss": 1.8420464992523193, + "eval_runtime": 11.6127, + "eval_samples_per_second": 352.716, + "eval_steps_per_second": 22.045, + "step": 21800 + }, + { + "epoch": 0.95, + "learning_rate": 3.2674459891650884e-05, + "loss": 1.8778, + "step": 21810 + }, + { + "epoch": 0.96, + "learning_rate": 3.265368547603977e-05, + "loss": 1.8726, + "step": 21820 + }, + { + "epoch": 0.96, + "learning_rate": 3.263290977779409e-05, + "loss": 1.905, + "step": 21830 + }, + { + "epoch": 0.96, + "learning_rate": 3.2612132806955604e-05, + "loss": 1.868, + "step": 21840 + }, + { + "epoch": 0.96, + "learning_rate": 3.259135457356667e-05, + "loss": 1.8234, + "step": 21850 + }, + { + "epoch": 0.96, + "learning_rate": 3.257057508767025e-05, + "loss": 1.8995, + "step": 21860 + }, + { + "epoch": 0.96, + "learning_rate": 3.254979435930993e-05, + "loss": 1.8289, + "step": 21870 + }, + { + "epoch": 0.96, + "learning_rate": 3.252901239852986e-05, + "loss": 1.864, + "step": 21880 + }, + { + "epoch": 0.96, + "learning_rate": 3.2508229215374845e-05, + "loss": 1.8272, + "step": 21890 + }, + { + "epoch": 0.96, + "learning_rate": 3.248744481989022e-05, + "loss": 1.8424, + "step": 21900 + }, + { + "epoch": 0.96, + "eval_loss": 1.8414499759674072, + "eval_runtime": 11.656, + "eval_samples_per_second": 351.406, + "eval_steps_per_second": 21.963, + "step": 21900 + }, + { + "epoch": 0.96, + "learning_rate": 3.2466659222121936e-05, + "loss": 1.8475, + "step": 21910 + }, + { + "epoch": 0.96, + "learning_rate": 3.2445872432116525e-05, + "loss": 1.895, + "step": 21920 + }, + { + "epoch": 0.96, + "learning_rate": 3.242508445992109e-05, + "loss": 1.8655, + "step": 21930 + }, + { + "epoch": 0.96, + "learning_rate": 3.2404295315583324e-05, + "loss": 1.8606, + "step": 21940 + }, + { + "epoch": 0.96, + "learning_rate": 3.238350500915144e-05, + "loss": 1.8496, + "step": 21950 + }, + { + "epoch": 0.96, + "learning_rate": 3.2362713550674256e-05, + "loss": 1.8838, + "step": 21960 + }, + { + "epoch": 0.96, + "learning_rate": 3.234192095020113e-05, + "loss": 1.8587, + "step": 21970 + }, + { + "epoch": 0.96, + "learning_rate": 3.232112721778198e-05, + "loss": 1.8999, + "step": 21980 + }, + { + "epoch": 0.96, + "learning_rate": 3.230033236346725e-05, + "loss": 1.8584, + "step": 21990 + }, + { + "epoch": 0.96, + "learning_rate": 3.227953639730798e-05, + "loss": 1.8248, + "step": 22000 + }, + { + "epoch": 0.96, + "eval_loss": 1.8413933515548706, + "eval_runtime": 11.8856, + "eval_samples_per_second": 344.619, + "eval_steps_per_second": 21.539, + "step": 22000 + }, + { + "epoch": 0.96, + "learning_rate": 3.225873932935567e-05, + "loss": 1.8195, + "step": 22010 + }, + { + "epoch": 0.96, + "learning_rate": 3.223794116966241e-05, + "loss": 1.9048, + "step": 22020 + }, + { + "epoch": 0.96, + "learning_rate": 3.22171419282808e-05, + "loss": 1.8697, + "step": 22030 + }, + { + "epoch": 0.96, + "learning_rate": 3.219634161526396e-05, + "loss": 1.8435, + "step": 22040 + }, + { + "epoch": 0.97, + "learning_rate": 3.2175540240665526e-05, + "loss": 1.9132, + "step": 22050 + }, + { + "epoch": 0.97, + "learning_rate": 3.215473781453965e-05, + "loss": 1.8596, + "step": 22060 + }, + { + "epoch": 0.97, + "learning_rate": 3.213393434694101e-05, + "loss": 1.8665, + "step": 22070 + }, + { + "epoch": 0.97, + "learning_rate": 3.211312984792476e-05, + "loss": 1.8975, + "step": 22080 + }, + { + "epoch": 0.97, + "learning_rate": 3.209232432754656e-05, + "loss": 1.8316, + "step": 22090 + }, + { + "epoch": 0.97, + "learning_rate": 3.207151779586259e-05, + "loss": 1.8948, + "step": 22100 + }, + { + "epoch": 0.97, + "eval_loss": 1.8409616947174072, + "eval_runtime": 11.7108, + "eval_samples_per_second": 349.763, + "eval_steps_per_second": 21.86, + "step": 22100 + }, + { + "epoch": 0.97, + "learning_rate": 3.205071026292946e-05, + "loss": 1.8931, + "step": 22110 + }, + { + "epoch": 0.97, + "learning_rate": 3.202990173880433e-05, + "loss": 1.9053, + "step": 22120 + }, + { + "epoch": 0.97, + "learning_rate": 3.2009092233544795e-05, + "loss": 1.9124, + "step": 22130 + }, + { + "epoch": 0.97, + "learning_rate": 3.198828175720895e-05, + "loss": 1.8689, + "step": 22140 + }, + { + "epoch": 0.97, + "learning_rate": 3.196747031985533e-05, + "loss": 1.8693, + "step": 22150 + }, + { + "epoch": 0.97, + "learning_rate": 3.194665793154297e-05, + "loss": 1.8486, + "step": 22160 + }, + { + "epoch": 0.97, + "learning_rate": 3.192584460233134e-05, + "loss": 1.8557, + "step": 22170 + }, + { + "epoch": 0.97, + "learning_rate": 3.190503034228037e-05, + "loss": 1.8637, + "step": 22180 + }, + { + "epoch": 0.97, + "learning_rate": 3.188421516145045e-05, + "loss": 1.8603, + "step": 22190 + }, + { + "epoch": 0.97, + "learning_rate": 3.18633990699024e-05, + "loss": 1.8666, + "step": 22200 + }, + { + "epoch": 0.97, + "eval_loss": 1.8402700424194336, + "eval_runtime": 11.7872, + "eval_samples_per_second": 347.497, + "eval_steps_per_second": 21.719, + "step": 22200 + }, + { + "epoch": 0.97, + "learning_rate": 3.184258207769749e-05, + "loss": 1.888, + "step": 22210 + }, + { + "epoch": 0.97, + "learning_rate": 3.182176419489741e-05, + "loss": 1.8296, + "step": 22220 + }, + { + "epoch": 0.97, + "learning_rate": 3.180094543156431e-05, + "loss": 1.8857, + "step": 22230 + }, + { + "epoch": 0.97, + "learning_rate": 3.1780125797760735e-05, + "loss": 1.9119, + "step": 22240 + }, + { + "epoch": 0.97, + "learning_rate": 3.175930530354968e-05, + "loss": 1.8893, + "step": 22250 + }, + { + "epoch": 0.97, + "learning_rate": 3.173848395899453e-05, + "loss": 1.8697, + "step": 22260 + }, + { + "epoch": 0.97, + "learning_rate": 3.171766177415909e-05, + "loss": 1.901, + "step": 22270 + }, + { + "epoch": 0.98, + "learning_rate": 3.1696838759107575e-05, + "loss": 1.8397, + "step": 22280 + }, + { + "epoch": 0.98, + "learning_rate": 3.1676014923904594e-05, + "loss": 1.884, + "step": 22290 + }, + { + "epoch": 0.98, + "learning_rate": 3.165519027861517e-05, + "loss": 1.9316, + "step": 22300 + }, + { + "epoch": 0.98, + "eval_loss": 1.8402478694915771, + "eval_runtime": 11.775, + "eval_samples_per_second": 347.857, + "eval_steps_per_second": 21.741, + "step": 22300 + }, + { + "epoch": 0.98, + "learning_rate": 3.163436483330469e-05, + "loss": 1.8686, + "step": 22310 + }, + { + "epoch": 0.98, + "learning_rate": 3.161353859803895e-05, + "loss": 1.8972, + "step": 22320 + }, + { + "epoch": 0.98, + "learning_rate": 3.1592711582884107e-05, + "loss": 1.8481, + "step": 22330 + }, + { + "epoch": 0.98, + "learning_rate": 3.1571883797906726e-05, + "loss": 1.8679, + "step": 22340 + }, + { + "epoch": 0.98, + "learning_rate": 3.155105525317372e-05, + "loss": 1.8732, + "step": 22350 + }, + { + "epoch": 0.98, + "learning_rate": 3.1530225958752365e-05, + "loss": 1.8054, + "step": 22360 + }, + { + "epoch": 0.98, + "learning_rate": 3.1509395924710334e-05, + "loss": 1.9027, + "step": 22370 + }, + { + "epoch": 0.98, + "learning_rate": 3.1488565161115593e-05, + "loss": 1.8344, + "step": 22380 + }, + { + "epoch": 0.98, + "learning_rate": 3.1467733678036546e-05, + "loss": 1.8781, + "step": 22390 + }, + { + "epoch": 0.98, + "learning_rate": 3.1446901485541865e-05, + "loss": 1.8247, + "step": 22400 + }, + { + "epoch": 0.98, + "eval_loss": 1.8399176597595215, + "eval_runtime": 11.5771, + "eval_samples_per_second": 353.801, + "eval_steps_per_second": 22.113, + "step": 22400 + }, + { + "epoch": 0.98, + "learning_rate": 3.142606859370063e-05, + "loss": 1.8256, + "step": 22410 + }, + { + "epoch": 0.98, + "learning_rate": 3.140523501258219e-05, + "loss": 1.8657, + "step": 22420 + }, + { + "epoch": 0.98, + "learning_rate": 3.1384400752256293e-05, + "loss": 1.8599, + "step": 22430 + }, + { + "epoch": 0.98, + "learning_rate": 3.136356582279298e-05, + "loss": 1.8749, + "step": 22440 + }, + { + "epoch": 0.98, + "learning_rate": 3.134273023426263e-05, + "loss": 1.8761, + "step": 22450 + }, + { + "epoch": 0.98, + "learning_rate": 3.132189399673593e-05, + "loss": 1.8616, + "step": 22460 + }, + { + "epoch": 0.98, + "learning_rate": 3.1301057120283876e-05, + "loss": 1.865, + "step": 22470 + }, + { + "epoch": 0.98, + "learning_rate": 3.1280219614977786e-05, + "loss": 1.8759, + "step": 22480 + }, + { + "epoch": 0.98, + "learning_rate": 3.125938149088927e-05, + "loss": 1.8637, + "step": 22490 + }, + { + "epoch": 0.98, + "learning_rate": 3.123854275809027e-05, + "loss": 1.8637, + "step": 22500 + }, + { + "epoch": 0.98, + "eval_loss": 1.839624285697937, + "eval_runtime": 12.1106, + "eval_samples_per_second": 338.216, + "eval_steps_per_second": 21.139, + "step": 22500 + }, + { + "epoch": 0.99, + "learning_rate": 3.1217703426652975e-05, + "loss": 1.8921, + "step": 22510 + }, + { + "epoch": 0.99, + "learning_rate": 3.119686350664989e-05, + "loss": 1.8818, + "step": 22520 + }, + { + "epoch": 0.99, + "learning_rate": 3.117602300815379e-05, + "loss": 1.8604, + "step": 22530 + }, + { + "epoch": 0.99, + "learning_rate": 3.1155181941237754e-05, + "loss": 1.8978, + "step": 22540 + }, + { + "epoch": 0.99, + "learning_rate": 3.113434031597512e-05, + "loss": 1.8593, + "step": 22550 + }, + { + "epoch": 0.99, + "learning_rate": 3.1113498142439473e-05, + "loss": 1.8391, + "step": 22560 + }, + { + "epoch": 0.99, + "learning_rate": 3.1092655430704725e-05, + "loss": 1.8613, + "step": 22570 + }, + { + "epoch": 0.99, + "learning_rate": 3.1071812190844975e-05, + "loss": 1.8845, + "step": 22580 + }, + { + "epoch": 0.99, + "learning_rate": 3.105096843293463e-05, + "loss": 1.8934, + "step": 22590 + }, + { + "epoch": 0.99, + "learning_rate": 3.1030124167048326e-05, + "loss": 1.8463, + "step": 22600 + }, + { + "epoch": 0.99, + "eval_loss": 1.839435338973999, + "eval_runtime": 11.6439, + "eval_samples_per_second": 351.773, + "eval_steps_per_second": 21.986, + "step": 22600 + }, + { + "epoch": 0.99, + "learning_rate": 3.100927940326095e-05, + "loss": 1.8468, + "step": 22610 + }, + { + "epoch": 0.99, + "learning_rate": 3.0988434151647625e-05, + "loss": 1.8613, + "step": 22620 + }, + { + "epoch": 0.99, + "learning_rate": 3.096758842228371e-05, + "loss": 1.8547, + "step": 22630 + }, + { + "epoch": 0.99, + "learning_rate": 3.09467422252448e-05, + "loss": 1.8687, + "step": 22640 + }, + { + "epoch": 0.99, + "learning_rate": 3.0925895570606714e-05, + "loss": 1.8653, + "step": 22650 + }, + { + "epoch": 0.99, + "learning_rate": 3.0905048468445494e-05, + "loss": 1.8523, + "step": 22660 + }, + { + "epoch": 0.99, + "learning_rate": 3.088420092883738e-05, + "loss": 1.8353, + "step": 22670 + }, + { + "epoch": 0.99, + "learning_rate": 3.086335296185885e-05, + "loss": 1.8948, + "step": 22680 + }, + { + "epoch": 0.99, + "learning_rate": 3.084250457758659e-05, + "loss": 1.8711, + "step": 22690 + }, + { + "epoch": 0.99, + "learning_rate": 3.0821655786097465e-05, + "loss": 1.8616, + "step": 22700 + }, + { + "epoch": 0.99, + "eval_loss": 1.8392356634140015, + "eval_runtime": 11.5942, + "eval_samples_per_second": 353.279, + "eval_steps_per_second": 22.08, + "step": 22700 + }, + { + "epoch": 0.99, + "learning_rate": 3.080080659746853e-05, + "loss": 1.8775, + "step": 22710 + }, + { + "epoch": 0.99, + "learning_rate": 3.0779957021777084e-05, + "loss": 1.8427, + "step": 22720 + }, + { + "epoch": 1.0, + "learning_rate": 3.0759107069100556e-05, + "loss": 1.8399, + "step": 22730 + }, + { + "epoch": 1.0, + "learning_rate": 3.0738256749516575e-05, + "loss": 1.8356, + "step": 22740 + }, + { + "epoch": 1.0, + "learning_rate": 3.071740607310296e-05, + "loss": 1.8397, + "step": 22750 + }, + { + "epoch": 1.0, + "learning_rate": 3.0696555049937705e-05, + "loss": 1.8664, + "step": 22760 + }, + { + "epoch": 1.0, + "learning_rate": 3.067570369009895e-05, + "loss": 1.889, + "step": 22770 + }, + { + "epoch": 1.0, + "learning_rate": 3.0654852003665e-05, + "loss": 1.8426, + "step": 22780 + }, + { + "epoch": 1.0, + "learning_rate": 3.0634000000714345e-05, + "loss": 1.8929, + "step": 22790 + }, + { + "epoch": 1.0, + "learning_rate": 3.061314769132559e-05, + "loss": 1.9215, + "step": 22800 + }, + { + "epoch": 1.0, + "eval_loss": 1.839365005493164, + "eval_runtime": 11.7046, + "eval_samples_per_second": 349.947, + "eval_steps_per_second": 21.872, + "step": 22800 + }, + { + "epoch": 1.0, + "learning_rate": 3.0592295085577536e-05, + "loss": 1.8533, + "step": 22810 + }, + { + "epoch": 1.0, + "learning_rate": 3.0571442193549066e-05, + "loss": 1.8485, + "step": 22820 + }, + { + "epoch": 1.0, + "learning_rate": 3.055058902531925e-05, + "loss": 1.8331, + "step": 22830 + }, + { + "epoch": 1.0, + "learning_rate": 3.052973559096729e-05, + "loss": 1.8565, + "step": 22840 + }, + { + "epoch": 1.0, + "learning_rate": 3.0508881900572467e-05, + "loss": 1.8282, + "step": 22850 + }, + { + "epoch": 1.0, + "learning_rate": 3.0488027964214257e-05, + "loss": 1.7852, + "step": 22860 + }, + { + "epoch": 1.0, + "learning_rate": 3.046717379197219e-05, + "loss": 1.8608, + "step": 22870 + }, + { + "epoch": 1.0, + "learning_rate": 3.0446319393925966e-05, + "loss": 1.784, + "step": 22880 + }, + { + "epoch": 1.0, + "learning_rate": 3.042546478015535e-05, + "loss": 1.8352, + "step": 22890 + }, + { + "epoch": 1.0, + "learning_rate": 3.0404609960740227e-05, + "loss": 1.843, + "step": 22900 + }, + { + "epoch": 1.0, + "eval_loss": 1.8390878438949585, + "eval_runtime": 11.6738, + "eval_samples_per_second": 350.871, + "eval_steps_per_second": 21.929, + "step": 22900 + }, + { + "epoch": 1.0, + "learning_rate": 3.0383754945760583e-05, + "loss": 1.8813, + "step": 22910 + }, + { + "epoch": 1.0, + "learning_rate": 3.0362899745296515e-05, + "loss": 1.8617, + "step": 22920 + }, + { + "epoch": 1.0, + "learning_rate": 3.034204436942818e-05, + "loss": 1.8365, + "step": 22930 + }, + { + "epoch": 1.0, + "learning_rate": 3.0321188828235827e-05, + "loss": 1.8799, + "step": 22940 + }, + { + "epoch": 1.0, + "learning_rate": 3.0300333131799806e-05, + "loss": 1.8485, + "step": 22950 + }, + { + "epoch": 1.01, + "learning_rate": 3.0279477290200505e-05, + "loss": 1.8404, + "step": 22960 + }, + { + "epoch": 1.01, + "learning_rate": 3.0258621313518433e-05, + "loss": 1.8718, + "step": 22970 + }, + { + "epoch": 1.01, + "learning_rate": 3.0237765211834105e-05, + "loss": 1.8443, + "step": 22980 + }, + { + "epoch": 1.01, + "learning_rate": 3.0216908995228152e-05, + "loss": 1.8151, + "step": 22990 + }, + { + "epoch": 1.01, + "learning_rate": 3.0196052673781224e-05, + "loss": 1.778, + "step": 23000 + }, + { + "epoch": 1.01, + "eval_loss": 1.8393231630325317, + "eval_runtime": 12.9491, + "eval_samples_per_second": 316.316, + "eval_steps_per_second": 19.77, + "step": 23000 + }, + { + "epoch": 1.01, + "learning_rate": 3.0175196257574026e-05, + "loss": 1.8567, + "step": 23010 + }, + { + "epoch": 1.01, + "learning_rate": 3.015433975668733e-05, + "loss": 1.8481, + "step": 23020 + }, + { + "epoch": 1.01, + "learning_rate": 3.0133483181201915e-05, + "loss": 1.8392, + "step": 23030 + }, + { + "epoch": 1.01, + "learning_rate": 3.011262654119864e-05, + "loss": 1.8451, + "step": 23040 + }, + { + "epoch": 1.01, + "learning_rate": 3.0091769846758353e-05, + "loss": 1.8074, + "step": 23050 + }, + { + "epoch": 1.01, + "learning_rate": 3.0070913107961955e-05, + "loss": 1.8228, + "step": 23060 + }, + { + "epoch": 1.01, + "learning_rate": 3.0050056334890354e-05, + "loss": 1.8126, + "step": 23070 + }, + { + "epoch": 1.01, + "learning_rate": 3.002919953762448e-05, + "loss": 1.816, + "step": 23080 + }, + { + "epoch": 1.01, + "learning_rate": 3.0008342726245283e-05, + "loss": 1.8528, + "step": 23090 + }, + { + "epoch": 1.01, + "learning_rate": 2.9987485910833704e-05, + "loss": 1.8528, + "step": 23100 + }, + { + "epoch": 1.01, + "eval_loss": 1.8387082815170288, + "eval_runtime": 11.526, + "eval_samples_per_second": 355.37, + "eval_steps_per_second": 22.211, + "step": 23100 + }, + { + "epoch": 1.01, + "learning_rate": 2.9966629101470693e-05, + "loss": 1.8318, + "step": 23110 + }, + { + "epoch": 1.01, + "learning_rate": 2.99457723082372e-05, + "loss": 1.7999, + "step": 23120 + }, + { + "epoch": 1.01, + "learning_rate": 2.9924915541214165e-05, + "loss": 1.8028, + "step": 23130 + }, + { + "epoch": 1.01, + "learning_rate": 2.9904058810482516e-05, + "loss": 1.82, + "step": 23140 + }, + { + "epoch": 1.01, + "learning_rate": 2.9883202126123156e-05, + "loss": 1.827, + "step": 23150 + }, + { + "epoch": 1.01, + "learning_rate": 2.9862345498216987e-05, + "loss": 1.8541, + "step": 23160 + }, + { + "epoch": 1.01, + "learning_rate": 2.9841488936844853e-05, + "loss": 1.8234, + "step": 23170 + }, + { + "epoch": 1.01, + "learning_rate": 2.9820632452087598e-05, + "loss": 1.8179, + "step": 23180 + }, + { + "epoch": 1.02, + "learning_rate": 2.9799776054025987e-05, + "loss": 1.7522, + "step": 23190 + }, + { + "epoch": 1.02, + "learning_rate": 2.9778919752740805e-05, + "loss": 1.8131, + "step": 23200 + }, + { + "epoch": 1.02, + "eval_loss": 1.8382573127746582, + "eval_runtime": 11.5966, + "eval_samples_per_second": 353.208, + "eval_steps_per_second": 22.075, + "step": 23200 + }, + { + "epoch": 1.02, + "learning_rate": 2.9758063558312715e-05, + "loss": 1.8818, + "step": 23210 + }, + { + "epoch": 1.02, + "learning_rate": 2.9737207480822403e-05, + "loss": 1.8894, + "step": 23220 + }, + { + "epoch": 1.02, + "learning_rate": 2.9716351530350437e-05, + "loss": 1.8496, + "step": 23230 + }, + { + "epoch": 1.02, + "learning_rate": 2.9695495716977364e-05, + "loss": 1.8393, + "step": 23240 + }, + { + "epoch": 1.02, + "learning_rate": 2.9674640050783645e-05, + "loss": 1.8029, + "step": 23250 + }, + { + "epoch": 1.02, + "learning_rate": 2.9653784541849667e-05, + "loss": 1.836, + "step": 23260 + }, + { + "epoch": 1.02, + "learning_rate": 2.9632929200255768e-05, + "loss": 1.8207, + "step": 23270 + }, + { + "epoch": 1.02, + "learning_rate": 2.961207403608217e-05, + "loss": 1.8362, + "step": 23280 + }, + { + "epoch": 1.02, + "learning_rate": 2.9591219059409042e-05, + "loss": 1.845, + "step": 23290 + }, + { + "epoch": 1.02, + "learning_rate": 2.957036428031641e-05, + "loss": 1.841, + "step": 23300 + }, + { + "epoch": 1.02, + "eval_loss": 1.8381986618041992, + "eval_runtime": 11.5438, + "eval_samples_per_second": 354.822, + "eval_steps_per_second": 22.176, + "step": 23300 + }, + { + "epoch": 1.02, + "learning_rate": 2.9549509708884293e-05, + "loss": 1.7825, + "step": 23310 + }, + { + "epoch": 1.02, + "learning_rate": 2.9528655355192507e-05, + "loss": 1.8415, + "step": 23320 + }, + { + "epoch": 1.02, + "learning_rate": 2.9507801229320857e-05, + "loss": 1.8185, + "step": 23330 + }, + { + "epoch": 1.02, + "learning_rate": 2.9486947341348954e-05, + "loss": 1.7948, + "step": 23340 + }, + { + "epoch": 1.02, + "learning_rate": 2.9466093701356362e-05, + "loss": 1.8576, + "step": 23350 + }, + { + "epoch": 1.02, + "learning_rate": 2.9445240319422474e-05, + "loss": 1.8236, + "step": 23360 + }, + { + "epoch": 1.02, + "learning_rate": 2.9424387205626606e-05, + "loss": 1.821, + "step": 23370 + }, + { + "epoch": 1.02, + "learning_rate": 2.9403534370047895e-05, + "loss": 1.8555, + "step": 23380 + }, + { + "epoch": 1.02, + "learning_rate": 2.938268182276539e-05, + "loss": 1.8439, + "step": 23390 + }, + { + "epoch": 1.02, + "learning_rate": 2.9361829573857957e-05, + "loss": 1.887, + "step": 23400 + }, + { + "epoch": 1.02, + "eval_loss": 1.8380987644195557, + "eval_runtime": 11.4898, + "eval_samples_per_second": 356.489, + "eval_steps_per_second": 22.281, + "step": 23400 + }, + { + "epoch": 1.02, + "learning_rate": 2.934097763340436e-05, + "loss": 1.8797, + "step": 23410 + }, + { + "epoch": 1.03, + "learning_rate": 2.9320126011483177e-05, + "loss": 1.9105, + "step": 23420 + }, + { + "epoch": 1.03, + "learning_rate": 2.929927471817286e-05, + "loss": 1.8606, + "step": 23430 + }, + { + "epoch": 1.03, + "learning_rate": 2.9278423763551684e-05, + "loss": 1.8446, + "step": 23440 + }, + { + "epoch": 1.03, + "learning_rate": 2.9257573157697777e-05, + "loss": 1.8375, + "step": 23450 + }, + { + "epoch": 1.03, + "learning_rate": 2.923672291068906e-05, + "loss": 1.8811, + "step": 23460 + }, + { + "epoch": 1.03, + "learning_rate": 2.9215873032603347e-05, + "loss": 1.812, + "step": 23470 + }, + { + "epoch": 1.03, + "learning_rate": 2.9195023533518214e-05, + "loss": 1.8409, + "step": 23480 + }, + { + "epoch": 1.03, + "learning_rate": 2.917417442351107e-05, + "loss": 1.8653, + "step": 23490 + }, + { + "epoch": 1.03, + "learning_rate": 2.9153325712659155e-05, + "loss": 1.8448, + "step": 23500 + }, + { + "epoch": 1.03, + "eval_loss": 1.8378263711929321, + "eval_runtime": 11.9067, + "eval_samples_per_second": 344.008, + "eval_steps_per_second": 21.501, + "step": 23500 + }, + { + "epoch": 1.03, + "learning_rate": 2.9132477411039492e-05, + "loss": 1.753, + "step": 23510 + }, + { + "epoch": 1.03, + "learning_rate": 2.9111629528728927e-05, + "loss": 1.816, + "step": 23520 + }, + { + "epoch": 1.03, + "learning_rate": 2.9090782075804082e-05, + "loss": 1.8389, + "step": 23530 + }, + { + "epoch": 1.03, + "learning_rate": 2.906993506234139e-05, + "loss": 1.7968, + "step": 23540 + }, + { + "epoch": 1.03, + "learning_rate": 2.904908849841706e-05, + "loss": 1.8927, + "step": 23550 + }, + { + "epoch": 1.03, + "learning_rate": 2.9028242394107096e-05, + "loss": 1.8586, + "step": 23560 + }, + { + "epoch": 1.03, + "learning_rate": 2.900739675948725e-05, + "loss": 1.8503, + "step": 23570 + }, + { + "epoch": 1.03, + "learning_rate": 2.8986551604633098e-05, + "loss": 1.8244, + "step": 23580 + }, + { + "epoch": 1.03, + "learning_rate": 2.8965706939619926e-05, + "loss": 1.8273, + "step": 23590 + }, + { + "epoch": 1.03, + "learning_rate": 2.8944862774522838e-05, + "loss": 1.8311, + "step": 23600 + }, + { + "epoch": 1.03, + "eval_loss": 1.837775707244873, + "eval_runtime": 11.4396, + "eval_samples_per_second": 358.055, + "eval_steps_per_second": 22.378, + "step": 23600 + }, + { + "epoch": 1.03, + "learning_rate": 2.8924019119416648e-05, + "loss": 1.7796, + "step": 23610 + }, + { + "epoch": 1.03, + "learning_rate": 2.890317598437596e-05, + "loss": 1.8699, + "step": 23620 + }, + { + "epoch": 1.03, + "learning_rate": 2.88823333794751e-05, + "loss": 1.8105, + "step": 23630 + }, + { + "epoch": 1.03, + "learning_rate": 2.8861491314788167e-05, + "loss": 1.8384, + "step": 23640 + }, + { + "epoch": 1.04, + "learning_rate": 2.884064980038896e-05, + "loss": 1.8276, + "step": 23650 + }, + { + "epoch": 1.04, + "learning_rate": 2.881980884635106e-05, + "loss": 1.8022, + "step": 23660 + }, + { + "epoch": 1.04, + "learning_rate": 2.8798968462747725e-05, + "loss": 1.7894, + "step": 23670 + }, + { + "epoch": 1.04, + "learning_rate": 2.8778128659651985e-05, + "loss": 1.842, + "step": 23680 + }, + { + "epoch": 1.04, + "learning_rate": 2.875728944713655e-05, + "loss": 1.8128, + "step": 23690 + }, + { + "epoch": 1.04, + "learning_rate": 2.873645083527388e-05, + "loss": 1.8564, + "step": 23700 + }, + { + "epoch": 1.04, + "eval_loss": 1.8370027542114258, + "eval_runtime": 13.2784, + "eval_samples_per_second": 308.47, + "eval_steps_per_second": 19.279, + "step": 23700 + }, + { + "epoch": 1.04, + "learning_rate": 2.8715612834136125e-05, + "loss": 1.8202, + "step": 23710 + }, + { + "epoch": 1.04, + "learning_rate": 2.8694775453795117e-05, + "loss": 1.7821, + "step": 23720 + }, + { + "epoch": 1.04, + "learning_rate": 2.867393870432244e-05, + "loss": 1.8326, + "step": 23730 + }, + { + "epoch": 1.04, + "learning_rate": 2.865310259578933e-05, + "loss": 1.8237, + "step": 23740 + }, + { + "epoch": 1.04, + "learning_rate": 2.863226713826674e-05, + "loss": 1.8495, + "step": 23750 + }, + { + "epoch": 1.04, + "learning_rate": 2.8611432341825292e-05, + "loss": 1.8296, + "step": 23760 + }, + { + "epoch": 1.04, + "learning_rate": 2.8590598216535298e-05, + "loss": 1.8261, + "step": 23770 + }, + { + "epoch": 1.04, + "learning_rate": 2.8569764772466733e-05, + "loss": 1.834, + "step": 23780 + }, + { + "epoch": 1.04, + "learning_rate": 2.854893201968926e-05, + "loss": 1.8588, + "step": 23790 + }, + { + "epoch": 1.04, + "learning_rate": 2.8528099968272194e-05, + "loss": 1.8609, + "step": 23800 + }, + { + "epoch": 1.04, + "eval_loss": 1.837214708328247, + "eval_runtime": 11.7272, + "eval_samples_per_second": 349.274, + "eval_steps_per_second": 21.83, + "step": 23800 + }, + { + "epoch": 1.04, + "learning_rate": 2.850726862828452e-05, + "loss": 1.8529, + "step": 23810 + }, + { + "epoch": 1.04, + "learning_rate": 2.8486438009794868e-05, + "loss": 1.7861, + "step": 23820 + }, + { + "epoch": 1.04, + "learning_rate": 2.8465608122871547e-05, + "loss": 1.8063, + "step": 23830 + }, + { + "epoch": 1.04, + "learning_rate": 2.8444778977582455e-05, + "loss": 1.8232, + "step": 23840 + }, + { + "epoch": 1.04, + "learning_rate": 2.842395058399521e-05, + "loss": 1.8102, + "step": 23850 + }, + { + "epoch": 1.04, + "learning_rate": 2.840312295217699e-05, + "loss": 1.8094, + "step": 23860 + }, + { + "epoch": 1.04, + "learning_rate": 2.8382296092194664e-05, + "loss": 1.831, + "step": 23870 + }, + { + "epoch": 1.05, + "learning_rate": 2.836147001411469e-05, + "loss": 1.8275, + "step": 23880 + }, + { + "epoch": 1.05, + "learning_rate": 2.8340644728003173e-05, + "loss": 1.8325, + "step": 23890 + }, + { + "epoch": 1.05, + "learning_rate": 2.8319820243925808e-05, + "loss": 1.8287, + "step": 23900 + }, + { + "epoch": 1.05, + "eval_loss": 1.8369483947753906, + "eval_runtime": 11.5483, + "eval_samples_per_second": 354.685, + "eval_steps_per_second": 22.168, + "step": 23900 + }, + { + "epoch": 1.05, + "learning_rate": 2.829899657194794e-05, + "loss": 1.8468, + "step": 23910 + }, + { + "epoch": 1.05, + "learning_rate": 2.827817372213448e-05, + "loss": 1.8562, + "step": 23920 + }, + { + "epoch": 1.05, + "learning_rate": 2.8257351704549975e-05, + "loss": 1.8662, + "step": 23930 + }, + { + "epoch": 1.05, + "learning_rate": 2.8236530529258547e-05, + "loss": 1.8251, + "step": 23940 + }, + { + "epoch": 1.05, + "learning_rate": 2.821571020632393e-05, + "loss": 1.805, + "step": 23950 + }, + { + "epoch": 1.05, + "learning_rate": 2.8194890745809443e-05, + "loss": 1.802, + "step": 23960 + }, + { + "epoch": 1.05, + "learning_rate": 2.817407215777795e-05, + "loss": 1.7749, + "step": 23970 + }, + { + "epoch": 1.05, + "learning_rate": 2.8153254452291956e-05, + "loss": 1.7909, + "step": 23980 + }, + { + "epoch": 1.05, + "learning_rate": 2.8132437639413486e-05, + "loss": 1.7694, + "step": 23990 + }, + { + "epoch": 1.05, + "learning_rate": 2.8111621729204177e-05, + "loss": 1.8344, + "step": 24000 + }, + { + "epoch": 1.05, + "eval_loss": 1.8369200229644775, + "eval_runtime": 11.8804, + "eval_samples_per_second": 344.77, + "eval_steps_per_second": 21.548, + "step": 24000 + }, + { + "epoch": 1.05, + "learning_rate": 2.809080673172519e-05, + "loss": 1.7981, + "step": 24010 + }, + { + "epoch": 1.05, + "learning_rate": 2.8069992657037273e-05, + "loss": 1.8076, + "step": 24020 + }, + { + "epoch": 1.05, + "learning_rate": 2.804917951520071e-05, + "loss": 1.794, + "step": 24030 + }, + { + "epoch": 1.05, + "learning_rate": 2.8028367316275357e-05, + "loss": 1.8598, + "step": 24040 + }, + { + "epoch": 1.05, + "learning_rate": 2.8007556070320583e-05, + "loss": 1.8616, + "step": 24050 + }, + { + "epoch": 1.05, + "learning_rate": 2.7986745787395333e-05, + "loss": 1.8413, + "step": 24060 + }, + { + "epoch": 1.05, + "learning_rate": 2.796593647755804e-05, + "loss": 1.8168, + "step": 24070 + }, + { + "epoch": 1.05, + "learning_rate": 2.794512815086673e-05, + "loss": 1.7945, + "step": 24080 + }, + { + "epoch": 1.05, + "learning_rate": 2.792432081737887e-05, + "loss": 1.8828, + "step": 24090 + }, + { + "epoch": 1.05, + "learning_rate": 2.7903514487151538e-05, + "loss": 1.8317, + "step": 24100 + }, + { + "epoch": 1.05, + "eval_loss": 1.8366378545761108, + "eval_runtime": 12.7159, + "eval_samples_per_second": 322.117, + "eval_steps_per_second": 20.132, + "step": 24100 + }, + { + "epoch": 1.06, + "learning_rate": 2.7882709170241247e-05, + "loss": 1.8135, + "step": 24110 + }, + { + "epoch": 1.06, + "learning_rate": 2.7861904876704087e-05, + "loss": 1.8198, + "step": 24120 + }, + { + "epoch": 1.06, + "learning_rate": 2.7841101616595597e-05, + "loss": 1.8092, + "step": 24130 + }, + { + "epoch": 1.06, + "learning_rate": 2.7820299399970864e-05, + "loss": 1.8312, + "step": 24140 + }, + { + "epoch": 1.06, + "learning_rate": 2.779949823688443e-05, + "loss": 1.8219, + "step": 24150 + }, + { + "epoch": 1.06, + "learning_rate": 2.777869813739036e-05, + "loss": 1.8105, + "step": 24160 + }, + { + "epoch": 1.06, + "learning_rate": 2.775789911154218e-05, + "loss": 1.864, + "step": 24170 + }, + { + "epoch": 1.06, + "learning_rate": 2.7737101169392924e-05, + "loss": 1.7589, + "step": 24180 + }, + { + "epoch": 1.06, + "learning_rate": 2.7716304320995084e-05, + "loss": 1.8094, + "step": 24190 + }, + { + "epoch": 1.06, + "learning_rate": 2.7695508576400596e-05, + "loss": 1.8204, + "step": 24200 + }, + { + "epoch": 1.06, + "eval_loss": 1.836849570274353, + "eval_runtime": 11.6165, + "eval_samples_per_second": 352.601, + "eval_steps_per_second": 22.038, + "step": 24200 + }, + { + "epoch": 1.06, + "learning_rate": 2.767471394566094e-05, + "loss": 1.8652, + "step": 24210 + }, + { + "epoch": 1.06, + "learning_rate": 2.7653920438826964e-05, + "loss": 1.8454, + "step": 24220 + }, + { + "epoch": 1.06, + "learning_rate": 2.763312806594906e-05, + "loss": 1.7737, + "step": 24230 + }, + { + "epoch": 1.06, + "learning_rate": 2.7612336837077e-05, + "loss": 1.7854, + "step": 24240 + }, + { + "epoch": 1.06, + "learning_rate": 2.7591546762260058e-05, + "loss": 1.8266, + "step": 24250 + }, + { + "epoch": 1.06, + "learning_rate": 2.7570757851546904e-05, + "loss": 1.7916, + "step": 24260 + }, + { + "epoch": 1.06, + "learning_rate": 2.754997011498568e-05, + "loss": 1.8398, + "step": 24270 + }, + { + "epoch": 1.06, + "learning_rate": 2.7529183562623943e-05, + "loss": 1.8321, + "step": 24280 + }, + { + "epoch": 1.06, + "learning_rate": 2.7508398204508692e-05, + "loss": 1.8409, + "step": 24290 + }, + { + "epoch": 1.06, + "learning_rate": 2.7487614050686325e-05, + "loss": 1.8149, + "step": 24300 + }, + { + "epoch": 1.06, + "eval_loss": 1.836502194404602, + "eval_runtime": 11.6567, + "eval_samples_per_second": 351.386, + "eval_steps_per_second": 21.962, + "step": 24300 + }, + { + "epoch": 1.06, + "learning_rate": 2.746683111120269e-05, + "loss": 1.8265, + "step": 24310 + }, + { + "epoch": 1.06, + "learning_rate": 2.744604939610301e-05, + "loss": 1.8582, + "step": 24320 + }, + { + "epoch": 1.07, + "learning_rate": 2.7425268915431954e-05, + "loss": 1.8372, + "step": 24330 + }, + { + "epoch": 1.07, + "learning_rate": 2.740448967923356e-05, + "loss": 1.8785, + "step": 24340 + }, + { + "epoch": 1.07, + "learning_rate": 2.7383711697551307e-05, + "loss": 1.8081, + "step": 24350 + }, + { + "epoch": 1.07, + "learning_rate": 2.7362934980428004e-05, + "loss": 1.8398, + "step": 24360 + }, + { + "epoch": 1.07, + "learning_rate": 2.7342159537905934e-05, + "loss": 1.8358, + "step": 24370 + }, + { + "epoch": 1.07, + "learning_rate": 2.7321385380026666e-05, + "loss": 1.8095, + "step": 24380 + }, + { + "epoch": 1.07, + "learning_rate": 2.7300612516831248e-05, + "loss": 1.8128, + "step": 24390 + }, + { + "epoch": 1.07, + "learning_rate": 2.727984095836001e-05, + "loss": 1.8152, + "step": 24400 + }, + { + "epoch": 1.07, + "eval_loss": 1.8360520601272583, + "eval_runtime": 13.1567, + "eval_samples_per_second": 311.323, + "eval_steps_per_second": 19.458, + "step": 24400 + }, + { + "epoch": 1.07, + "learning_rate": 2.7259070714652725e-05, + "loss": 1.797, + "step": 24410 + }, + { + "epoch": 1.07, + "learning_rate": 2.723830179574848e-05, + "loss": 1.8195, + "step": 24420 + }, + { + "epoch": 1.07, + "learning_rate": 2.721753421168576e-05, + "loss": 1.8132, + "step": 24430 + }, + { + "epoch": 1.07, + "learning_rate": 2.7196767972502376e-05, + "loss": 1.8053, + "step": 24440 + }, + { + "epoch": 1.07, + "learning_rate": 2.717600308823549e-05, + "loss": 1.8321, + "step": 24450 + }, + { + "epoch": 1.07, + "learning_rate": 2.7155239568921643e-05, + "loss": 1.8653, + "step": 24460 + }, + { + "epoch": 1.07, + "learning_rate": 2.7134477424596657e-05, + "loss": 1.8642, + "step": 24470 + }, + { + "epoch": 1.07, + "learning_rate": 2.711371666529577e-05, + "loss": 1.8297, + "step": 24480 + }, + { + "epoch": 1.07, + "learning_rate": 2.7092957301053448e-05, + "loss": 1.8048, + "step": 24490 + }, + { + "epoch": 1.07, + "learning_rate": 2.70721993419036e-05, + "loss": 1.8603, + "step": 24500 + }, + { + "epoch": 1.07, + "eval_loss": 1.8359553813934326, + "eval_runtime": 12.136, + "eval_samples_per_second": 337.508, + "eval_steps_per_second": 21.094, + "step": 24500 + }, + { + "epoch": 1.07, + "learning_rate": 2.705144279787935e-05, + "loss": 1.8392, + "step": 24510 + }, + { + "epoch": 1.07, + "learning_rate": 2.70306876790132e-05, + "loss": 1.8608, + "step": 24520 + }, + { + "epoch": 1.07, + "learning_rate": 2.7009933995336948e-05, + "loss": 1.835, + "step": 24530 + }, + { + "epoch": 1.07, + "learning_rate": 2.6989181756881696e-05, + "loss": 1.8186, + "step": 24540 + }, + { + "epoch": 1.07, + "learning_rate": 2.6968430973677844e-05, + "loss": 1.8125, + "step": 24550 + }, + { + "epoch": 1.08, + "learning_rate": 2.6947681655755097e-05, + "loss": 1.8156, + "step": 24560 + }, + { + "epoch": 1.08, + "learning_rate": 2.6926933813142446e-05, + "loss": 1.824, + "step": 24570 + }, + { + "epoch": 1.08, + "learning_rate": 2.690618745586817e-05, + "loss": 1.8335, + "step": 24580 + }, + { + "epoch": 1.08, + "learning_rate": 2.6885442593959828e-05, + "loss": 1.7904, + "step": 24590 + }, + { + "epoch": 1.08, + "learning_rate": 2.686469923744427e-05, + "loss": 1.8317, + "step": 24600 + }, + { + "epoch": 1.08, + "eval_loss": 1.8358409404754639, + "eval_runtime": 11.6144, + "eval_samples_per_second": 352.667, + "eval_steps_per_second": 22.042, + "step": 24600 + }, + { + "epoch": 1.08, + "learning_rate": 2.6843957396347592e-05, + "loss": 1.7796, + "step": 24610 + }, + { + "epoch": 1.08, + "learning_rate": 2.6823217080695197e-05, + "loss": 1.8364, + "step": 24620 + }, + { + "epoch": 1.08, + "learning_rate": 2.6802478300511687e-05, + "loss": 1.8251, + "step": 24630 + }, + { + "epoch": 1.08, + "learning_rate": 2.6781741065821003e-05, + "loss": 1.8535, + "step": 24640 + }, + { + "epoch": 1.08, + "learning_rate": 2.676100538664626e-05, + "loss": 1.8141, + "step": 24650 + }, + { + "epoch": 1.08, + "learning_rate": 2.6740271273009896e-05, + "loss": 1.8181, + "step": 24660 + }, + { + "epoch": 1.08, + "learning_rate": 2.671953873493353e-05, + "loss": 1.8209, + "step": 24670 + }, + { + "epoch": 1.08, + "learning_rate": 2.669880778243804e-05, + "loss": 1.7613, + "step": 24680 + }, + { + "epoch": 1.08, + "learning_rate": 2.6678078425543563e-05, + "loss": 1.83, + "step": 24690 + }, + { + "epoch": 1.08, + "learning_rate": 2.6657350674269424e-05, + "loss": 1.8316, + "step": 24700 + }, + { + "epoch": 1.08, + "eval_loss": 1.835756778717041, + "eval_runtime": 11.6376, + "eval_samples_per_second": 351.961, + "eval_steps_per_second": 21.998, + "step": 24700 + }, + { + "epoch": 1.08, + "learning_rate": 2.6636624538634208e-05, + "loss": 1.8301, + "step": 24710 + }, + { + "epoch": 1.08, + "learning_rate": 2.6615900028655687e-05, + "loss": 1.8184, + "step": 24720 + }, + { + "epoch": 1.08, + "learning_rate": 2.6595177154350883e-05, + "loss": 1.8037, + "step": 24730 + }, + { + "epoch": 1.08, + "learning_rate": 2.6574455925735977e-05, + "loss": 1.8436, + "step": 24740 + }, + { + "epoch": 1.08, + "learning_rate": 2.6553736352826426e-05, + "loss": 1.8369, + "step": 24750 + }, + { + "epoch": 1.08, + "learning_rate": 2.6533018445636797e-05, + "loss": 1.8328, + "step": 24760 + }, + { + "epoch": 1.08, + "learning_rate": 2.651230221418095e-05, + "loss": 1.8164, + "step": 24770 + }, + { + "epoch": 1.08, + "learning_rate": 2.6491587668471846e-05, + "loss": 1.8137, + "step": 24780 + }, + { + "epoch": 1.09, + "learning_rate": 2.6470874818521696e-05, + "loss": 1.8131, + "step": 24790 + }, + { + "epoch": 1.09, + "learning_rate": 2.6450163674341847e-05, + "loss": 1.8169, + "step": 24800 + }, + { + "epoch": 1.09, + "eval_loss": 1.8353500366210938, + "eval_runtime": 11.5666, + "eval_samples_per_second": 354.122, + "eval_steps_per_second": 22.133, + "step": 24800 + }, + { + "epoch": 1.09, + "learning_rate": 2.6429454245942863e-05, + "loss": 1.8232, + "step": 24810 + }, + { + "epoch": 1.09, + "learning_rate": 2.640874654333443e-05, + "loss": 1.7629, + "step": 24820 + }, + { + "epoch": 1.09, + "learning_rate": 2.6388040576525452e-05, + "loss": 1.8425, + "step": 24830 + }, + { + "epoch": 1.09, + "learning_rate": 2.636733635552395e-05, + "loss": 1.8757, + "step": 24840 + }, + { + "epoch": 1.09, + "learning_rate": 2.634663389033713e-05, + "loss": 1.8382, + "step": 24850 + }, + { + "epoch": 1.09, + "learning_rate": 2.632593319097133e-05, + "loss": 1.8147, + "step": 24860 + }, + { + "epoch": 1.09, + "learning_rate": 2.6305234267432055e-05, + "loss": 1.8169, + "step": 24870 + }, + { + "epoch": 1.09, + "learning_rate": 2.6284537129723926e-05, + "loss": 1.8293, + "step": 24880 + }, + { + "epoch": 1.09, + "learning_rate": 2.6263841787850735e-05, + "loss": 1.8217, + "step": 24890 + }, + { + "epoch": 1.09, + "learning_rate": 2.6243148251815364e-05, + "loss": 1.8505, + "step": 24900 + }, + { + "epoch": 1.09, + "eval_loss": 1.8351911306381226, + "eval_runtime": 13.3476, + "eval_samples_per_second": 306.872, + "eval_steps_per_second": 19.179, + "step": 24900 + }, + { + "epoch": 1.09, + "learning_rate": 2.6222456531619848e-05, + "loss": 1.8278, + "step": 24910 + }, + { + "epoch": 1.09, + "learning_rate": 2.6201766637265354e-05, + "loss": 1.8546, + "step": 24920 + }, + { + "epoch": 1.09, + "learning_rate": 2.6181078578752133e-05, + "loss": 1.8576, + "step": 24930 + }, + { + "epoch": 1.09, + "learning_rate": 2.6160392366079592e-05, + "loss": 1.8243, + "step": 24940 + }, + { + "epoch": 1.09, + "learning_rate": 2.6139708009246196e-05, + "loss": 1.7829, + "step": 24950 + }, + { + "epoch": 1.09, + "learning_rate": 2.611902551824956e-05, + "loss": 1.845, + "step": 24960 + }, + { + "epoch": 1.09, + "learning_rate": 2.609834490308637e-05, + "loss": 1.8217, + "step": 24970 + }, + { + "epoch": 1.09, + "learning_rate": 2.607766617375241e-05, + "loss": 1.8471, + "step": 24980 + }, + { + "epoch": 1.09, + "learning_rate": 2.6056989340242555e-05, + "loss": 1.8651, + "step": 24990 + }, + { + "epoch": 1.09, + "learning_rate": 2.6036314412550773e-05, + "loss": 1.795, + "step": 25000 + }, + { + "epoch": 1.09, + "eval_loss": 1.8348760604858398, + "eval_runtime": 11.8701, + "eval_samples_per_second": 345.068, + "eval_steps_per_second": 21.567, + "step": 25000 + }, + { + "epoch": 1.09, + "learning_rate": 2.601564140067007e-05, + "loss": 1.7818, + "step": 25010 + }, + { + "epoch": 1.1, + "learning_rate": 2.5994970314592603e-05, + "loss": 1.785, + "step": 25020 + }, + { + "epoch": 1.1, + "learning_rate": 2.5974301164309514e-05, + "loss": 1.8312, + "step": 25030 + }, + { + "epoch": 1.1, + "learning_rate": 2.595363395981106e-05, + "loss": 1.8278, + "step": 25040 + }, + { + "epoch": 1.1, + "learning_rate": 2.593296871108654e-05, + "loss": 1.8119, + "step": 25050 + }, + { + "epoch": 1.1, + "learning_rate": 2.591230542812433e-05, + "loss": 1.8369, + "step": 25060 + }, + { + "epoch": 1.1, + "learning_rate": 2.5891644120911817e-05, + "loss": 1.8243, + "step": 25070 + }, + { + "epoch": 1.1, + "learning_rate": 2.5870984799435472e-05, + "loss": 1.8056, + "step": 25080 + }, + { + "epoch": 1.1, + "learning_rate": 2.585032747368077e-05, + "loss": 1.8389, + "step": 25090 + }, + { + "epoch": 1.1, + "learning_rate": 2.582967215363226e-05, + "loss": 1.8413, + "step": 25100 + }, + { + "epoch": 1.1, + "eval_loss": 1.8349188566207886, + "eval_runtime": 12.9593, + "eval_samples_per_second": 316.066, + "eval_steps_per_second": 19.754, + "step": 25100 + }, + { + "epoch": 1.1, + "learning_rate": 2.5809018849273484e-05, + "loss": 1.8708, + "step": 25110 + }, + { + "epoch": 1.1, + "learning_rate": 2.5788367570587047e-05, + "loss": 1.8269, + "step": 25120 + }, + { + "epoch": 1.1, + "learning_rate": 2.5767718327554532e-05, + "loss": 1.7818, + "step": 25130 + }, + { + "epoch": 1.1, + "learning_rate": 2.5747071130156587e-05, + "loss": 1.8257, + "step": 25140 + }, + { + "epoch": 1.1, + "learning_rate": 2.5726425988372825e-05, + "loss": 1.8555, + "step": 25150 + }, + { + "epoch": 1.1, + "learning_rate": 2.5705782912181888e-05, + "loss": 1.8056, + "step": 25160 + }, + { + "epoch": 1.1, + "learning_rate": 2.568514191156143e-05, + "loss": 1.8413, + "step": 25170 + }, + { + "epoch": 1.1, + "learning_rate": 2.5664502996488072e-05, + "loss": 1.8206, + "step": 25180 + }, + { + "epoch": 1.1, + "learning_rate": 2.5643866176937462e-05, + "loss": 1.868, + "step": 25190 + }, + { + "epoch": 1.1, + "learning_rate": 2.5623231462884204e-05, + "loss": 1.8046, + "step": 25200 + }, + { + "epoch": 1.1, + "eval_loss": 1.8350028991699219, + "eval_runtime": 12.8788, + "eval_samples_per_second": 318.041, + "eval_steps_per_second": 19.878, + "step": 25200 + }, + { + "epoch": 1.1, + "learning_rate": 2.5602598864301912e-05, + "loss": 1.8307, + "step": 25210 + }, + { + "epoch": 1.1, + "learning_rate": 2.5581968391163145e-05, + "loss": 1.7947, + "step": 25220 + }, + { + "epoch": 1.1, + "learning_rate": 2.5561340053439473e-05, + "loss": 1.8254, + "step": 25230 + }, + { + "epoch": 1.1, + "learning_rate": 2.5540713861101395e-05, + "loss": 1.8283, + "step": 25240 + }, + { + "epoch": 1.11, + "learning_rate": 2.5520089824118422e-05, + "loss": 1.8242, + "step": 25250 + }, + { + "epoch": 1.11, + "learning_rate": 2.5499467952458946e-05, + "loss": 1.8352, + "step": 25260 + }, + { + "epoch": 1.11, + "learning_rate": 2.547884825609041e-05, + "loss": 1.8657, + "step": 25270 + }, + { + "epoch": 1.11, + "learning_rate": 2.545823074497911e-05, + "loss": 1.8535, + "step": 25280 + }, + { + "epoch": 1.11, + "learning_rate": 2.5437615429090373e-05, + "loss": 1.8455, + "step": 25290 + }, + { + "epoch": 1.11, + "learning_rate": 2.541700231838839e-05, + "loss": 1.8007, + "step": 25300 + }, + { + "epoch": 1.11, + "eval_loss": 1.8347127437591553, + "eval_runtime": 12.1617, + "eval_samples_per_second": 336.796, + "eval_steps_per_second": 21.05, + "step": 25300 + }, + { + "epoch": 1.11, + "learning_rate": 2.5396391422836335e-05, + "loss": 1.8574, + "step": 25310 + }, + { + "epoch": 1.11, + "learning_rate": 2.5375782752396286e-05, + "loss": 1.8717, + "step": 25320 + }, + { + "epoch": 1.11, + "learning_rate": 2.535517631702927e-05, + "loss": 1.8575, + "step": 25330 + }, + { + "epoch": 1.11, + "learning_rate": 2.5334572126695204e-05, + "loss": 1.8246, + "step": 25340 + }, + { + "epoch": 1.11, + "learning_rate": 2.5313970191352945e-05, + "loss": 1.775, + "step": 25350 + }, + { + "epoch": 1.11, + "learning_rate": 2.529337052096024e-05, + "loss": 1.834, + "step": 25360 + }, + { + "epoch": 1.11, + "learning_rate": 2.527277312547376e-05, + "loss": 1.8294, + "step": 25370 + }, + { + "epoch": 1.11, + "learning_rate": 2.5252178014849072e-05, + "loss": 1.785, + "step": 25380 + }, + { + "epoch": 1.11, + "learning_rate": 2.523158519904061e-05, + "loss": 1.7699, + "step": 25390 + }, + { + "epoch": 1.11, + "learning_rate": 2.521099468800175e-05, + "loss": 1.8178, + "step": 25400 + }, + { + "epoch": 1.11, + "eval_loss": 1.8346803188323975, + "eval_runtime": 11.7822, + "eval_samples_per_second": 347.642, + "eval_steps_per_second": 21.728, + "step": 25400 + }, + { + "epoch": 1.11, + "learning_rate": 2.5190406491684708e-05, + "loss": 1.8216, + "step": 25410 + }, + { + "epoch": 1.11, + "learning_rate": 2.5169820620040608e-05, + "loss": 1.8559, + "step": 25420 + }, + { + "epoch": 1.11, + "learning_rate": 2.5149237083019438e-05, + "loss": 1.8424, + "step": 25430 + }, + { + "epoch": 1.11, + "learning_rate": 2.5128655890570067e-05, + "loss": 1.8335, + "step": 25440 + }, + { + "epoch": 1.11, + "learning_rate": 2.5108077052640223e-05, + "loss": 1.8141, + "step": 25450 + }, + { + "epoch": 1.11, + "learning_rate": 2.5087500579176504e-05, + "loss": 1.851, + "step": 25460 + }, + { + "epoch": 1.11, + "learning_rate": 2.506692648012435e-05, + "loss": 1.9212, + "step": 25470 + }, + { + "epoch": 1.12, + "learning_rate": 2.5046354765428074e-05, + "loss": 1.845, + "step": 25480 + }, + { + "epoch": 1.12, + "learning_rate": 2.502578544503081e-05, + "loss": 1.8246, + "step": 25490 + }, + { + "epoch": 1.12, + "learning_rate": 2.5005218528874574e-05, + "loss": 1.8055, + "step": 25500 + }, + { + "epoch": 1.12, + "eval_loss": 1.834632158279419, + "eval_runtime": 11.8751, + "eval_samples_per_second": 344.925, + "eval_steps_per_second": 21.558, + "step": 25500 + }, + { + "epoch": 1.12, + "learning_rate": 2.4984654026900173e-05, + "loss": 1.8475, + "step": 25510 + }, + { + "epoch": 1.12, + "learning_rate": 2.4964091949047294e-05, + "loss": 1.8136, + "step": 25520 + }, + { + "epoch": 1.12, + "learning_rate": 2.49435323052544e-05, + "loss": 1.7852, + "step": 25530 + }, + { + "epoch": 1.12, + "learning_rate": 2.4922975105458838e-05, + "loss": 1.7962, + "step": 25540 + }, + { + "epoch": 1.12, + "learning_rate": 2.4902420359596706e-05, + "loss": 1.8233, + "step": 25550 + }, + { + "epoch": 1.12, + "learning_rate": 2.4881868077602996e-05, + "loss": 1.8193, + "step": 25560 + }, + { + "epoch": 1.12, + "learning_rate": 2.4861318269411427e-05, + "loss": 1.8325, + "step": 25570 + }, + { + "epoch": 1.12, + "learning_rate": 2.484077094495458e-05, + "loss": 1.8399, + "step": 25580 + }, + { + "epoch": 1.12, + "learning_rate": 2.4820226114163803e-05, + "loss": 1.7936, + "step": 25590 + }, + { + "epoch": 1.12, + "learning_rate": 2.4799683786969268e-05, + "loss": 1.7853, + "step": 25600 + }, + { + "epoch": 1.12, + "eval_loss": 1.834519863128662, + "eval_runtime": 11.6394, + "eval_samples_per_second": 351.907, + "eval_steps_per_second": 21.994, + "step": 25600 + }, + { + "epoch": 1.12, + "learning_rate": 2.477914397329991e-05, + "loss": 1.8067, + "step": 25610 + }, + { + "epoch": 1.12, + "learning_rate": 2.4758606683083456e-05, + "loss": 1.8196, + "step": 25620 + }, + { + "epoch": 1.12, + "learning_rate": 2.4738071926246435e-05, + "loss": 1.7976, + "step": 25630 + }, + { + "epoch": 1.12, + "learning_rate": 2.4717539712714103e-05, + "loss": 1.8813, + "step": 25640 + }, + { + "epoch": 1.12, + "learning_rate": 2.4697010052410555e-05, + "loss": 1.7963, + "step": 25650 + }, + { + "epoch": 1.12, + "learning_rate": 2.4676482955258578e-05, + "loss": 1.8388, + "step": 25660 + }, + { + "epoch": 1.12, + "learning_rate": 2.465595843117979e-05, + "loss": 1.8194, + "step": 25670 + }, + { + "epoch": 1.12, + "learning_rate": 2.46354364900945e-05, + "loss": 1.83, + "step": 25680 + }, + { + "epoch": 1.12, + "learning_rate": 2.461491714192182e-05, + "loss": 1.8395, + "step": 25690 + }, + { + "epoch": 1.13, + "learning_rate": 2.4594400396579582e-05, + "loss": 1.8314, + "step": 25700 + }, + { + "epoch": 1.13, + "eval_loss": 1.8341410160064697, + "eval_runtime": 13.3901, + "eval_samples_per_second": 305.899, + "eval_steps_per_second": 19.119, + "step": 25700 + }, + { + "epoch": 1.13, + "learning_rate": 2.457388626398437e-05, + "loss": 1.8478, + "step": 25710 + }, + { + "epoch": 1.13, + "learning_rate": 2.4553374754051494e-05, + "loss": 1.8013, + "step": 25720 + }, + { + "epoch": 1.13, + "learning_rate": 2.4532865876695022e-05, + "loss": 1.8231, + "step": 25730 + }, + { + "epoch": 1.13, + "learning_rate": 2.4512359641827707e-05, + "loss": 1.8409, + "step": 25740 + }, + { + "epoch": 1.13, + "learning_rate": 2.4491856059361073e-05, + "loss": 1.8713, + "step": 25750 + }, + { + "epoch": 1.13, + "learning_rate": 2.4471355139205323e-05, + "loss": 1.8156, + "step": 25760 + }, + { + "epoch": 1.13, + "learning_rate": 2.44508568912694e-05, + "loss": 1.8367, + "step": 25770 + }, + { + "epoch": 1.13, + "learning_rate": 2.4430361325460937e-05, + "loss": 1.8107, + "step": 25780 + }, + { + "epoch": 1.13, + "learning_rate": 2.440986845168629e-05, + "loss": 1.7991, + "step": 25790 + }, + { + "epoch": 1.13, + "learning_rate": 2.438937827985047e-05, + "loss": 1.8464, + "step": 25800 + }, + { + "epoch": 1.13, + "eval_loss": 1.834221601486206, + "eval_runtime": 11.7029, + "eval_samples_per_second": 349.999, + "eval_steps_per_second": 21.875, + "step": 25800 + }, + { + "epoch": 1.13, + "learning_rate": 2.4368890819857256e-05, + "loss": 1.8099, + "step": 25810 + }, + { + "epoch": 1.13, + "learning_rate": 2.4348406081609035e-05, + "loss": 1.8311, + "step": 25820 + }, + { + "epoch": 1.13, + "learning_rate": 2.432792407500695e-05, + "loss": 1.8057, + "step": 25830 + }, + { + "epoch": 1.13, + "learning_rate": 2.4307444809950767e-05, + "loss": 1.8371, + "step": 25840 + }, + { + "epoch": 1.13, + "learning_rate": 2.428696829633897e-05, + "loss": 1.8405, + "step": 25850 + }, + { + "epoch": 1.13, + "learning_rate": 2.4266494544068682e-05, + "loss": 1.8629, + "step": 25860 + }, + { + "epoch": 1.13, + "learning_rate": 2.4246023563035703e-05, + "loss": 1.8636, + "step": 25870 + }, + { + "epoch": 1.13, + "learning_rate": 2.4225555363134504e-05, + "loss": 1.819, + "step": 25880 + }, + { + "epoch": 1.13, + "learning_rate": 2.4205089954258187e-05, + "loss": 1.8154, + "step": 25890 + }, + { + "epoch": 1.13, + "learning_rate": 2.4184627346298544e-05, + "loss": 1.8101, + "step": 25900 + }, + { + "epoch": 1.13, + "eval_loss": 1.8341854810714722, + "eval_runtime": 11.5933, + "eval_samples_per_second": 353.308, + "eval_steps_per_second": 22.082, + "step": 25900 + }, + { + "epoch": 1.13, + "learning_rate": 2.4164167549145952e-05, + "loss": 1.8222, + "step": 25910 + }, + { + "epoch": 1.13, + "learning_rate": 2.4143710572689507e-05, + "loss": 1.8402, + "step": 25920 + }, + { + "epoch": 1.14, + "learning_rate": 2.4123256426816873e-05, + "loss": 1.8229, + "step": 25930 + }, + { + "epoch": 1.14, + "learning_rate": 2.4102805121414386e-05, + "loss": 1.7893, + "step": 25940 + }, + { + "epoch": 1.14, + "learning_rate": 2.4082356666366987e-05, + "loss": 1.8193, + "step": 25950 + }, + { + "epoch": 1.14, + "learning_rate": 2.4061911071558266e-05, + "loss": 1.8274, + "step": 25960 + }, + { + "epoch": 1.14, + "learning_rate": 2.4041468346870393e-05, + "loss": 1.8479, + "step": 25970 + }, + { + "epoch": 1.14, + "learning_rate": 2.4021028502184194e-05, + "loss": 1.8438, + "step": 25980 + }, + { + "epoch": 1.14, + "learning_rate": 2.4000591547379058e-05, + "loss": 1.8017, + "step": 25990 + }, + { + "epoch": 1.14, + "learning_rate": 2.398015749233302e-05, + "loss": 1.8014, + "step": 26000 + }, + { + "epoch": 1.14, + "eval_loss": 1.8340920209884644, + "eval_runtime": 11.695, + "eval_samples_per_second": 350.235, + "eval_steps_per_second": 21.89, + "step": 26000 + }, + { + "epoch": 1.14, + "learning_rate": 2.3959726346922674e-05, + "loss": 1.7826, + "step": 26010 + }, + { + "epoch": 1.14, + "learning_rate": 2.393929812102325e-05, + "loss": 1.8292, + "step": 26020 + }, + { + "epoch": 1.14, + "learning_rate": 2.391887282450852e-05, + "loss": 1.8536, + "step": 26030 + }, + { + "epoch": 1.14, + "learning_rate": 2.389845046725089e-05, + "loss": 1.8431, + "step": 26040 + }, + { + "epoch": 1.14, + "learning_rate": 2.3878031059121282e-05, + "loss": 1.8513, + "step": 26050 + }, + { + "epoch": 1.14, + "learning_rate": 2.3857614609989278e-05, + "loss": 1.8321, + "step": 26060 + }, + { + "epoch": 1.14, + "learning_rate": 2.383720112972293e-05, + "loss": 1.8147, + "step": 26070 + }, + { + "epoch": 1.14, + "learning_rate": 2.3816790628188953e-05, + "loss": 1.7952, + "step": 26080 + }, + { + "epoch": 1.14, + "learning_rate": 2.3796383115252546e-05, + "loss": 1.8118, + "step": 26090 + }, + { + "epoch": 1.14, + "learning_rate": 2.37759786007775e-05, + "loss": 1.8081, + "step": 26100 + }, + { + "epoch": 1.14, + "eval_loss": 1.833876371383667, + "eval_runtime": 11.5373, + "eval_samples_per_second": 355.021, + "eval_steps_per_second": 22.189, + "step": 26100 + }, + { + "epoch": 1.14, + "learning_rate": 2.3755577094626154e-05, + "loss": 1.8162, + "step": 26110 + }, + { + "epoch": 1.14, + "learning_rate": 2.3735178606659382e-05, + "loss": 1.8302, + "step": 26120 + }, + { + "epoch": 1.14, + "learning_rate": 2.3714783146736622e-05, + "loss": 1.8093, + "step": 26130 + }, + { + "epoch": 1.14, + "learning_rate": 2.369439072471581e-05, + "loss": 1.8492, + "step": 26140 + }, + { + "epoch": 1.14, + "learning_rate": 2.3674001350453468e-05, + "loss": 1.8483, + "step": 26150 + }, + { + "epoch": 1.15, + "learning_rate": 2.3653615033804564e-05, + "loss": 1.8476, + "step": 26160 + }, + { + "epoch": 1.15, + "learning_rate": 2.3633231784622693e-05, + "loss": 1.8372, + "step": 26170 + }, + { + "epoch": 1.15, + "learning_rate": 2.3612851612759865e-05, + "loss": 1.8233, + "step": 26180 + }, + { + "epoch": 1.15, + "learning_rate": 2.3592474528066693e-05, + "loss": 1.7858, + "step": 26190 + }, + { + "epoch": 1.15, + "learning_rate": 2.3572100540392215e-05, + "loss": 1.8376, + "step": 26200 + }, + { + "epoch": 1.15, + "eval_loss": 1.833698034286499, + "eval_runtime": 11.6849, + "eval_samples_per_second": 350.538, + "eval_steps_per_second": 21.909, + "step": 26200 + }, + { + "epoch": 1.15, + "learning_rate": 2.355172965958403e-05, + "loss": 1.8245, + "step": 26210 + }, + { + "epoch": 1.15, + "learning_rate": 2.353136189548821e-05, + "loss": 1.842, + "step": 26220 + }, + { + "epoch": 1.15, + "learning_rate": 2.3510997257949336e-05, + "loss": 1.8888, + "step": 26230 + }, + { + "epoch": 1.15, + "learning_rate": 2.3490635756810458e-05, + "loss": 1.8482, + "step": 26240 + }, + { + "epoch": 1.15, + "learning_rate": 2.3470277401913134e-05, + "loss": 1.8517, + "step": 26250 + }, + { + "epoch": 1.15, + "learning_rate": 2.3449922203097373e-05, + "loss": 1.8199, + "step": 26260 + }, + { + "epoch": 1.15, + "learning_rate": 2.342957017020169e-05, + "loss": 1.824, + "step": 26270 + }, + { + "epoch": 1.15, + "learning_rate": 2.3409221313063035e-05, + "loss": 1.8576, + "step": 26280 + }, + { + "epoch": 1.15, + "learning_rate": 2.3388875641516863e-05, + "loss": 1.8216, + "step": 26290 + }, + { + "epoch": 1.15, + "learning_rate": 2.3368533165397046e-05, + "loss": 1.8346, + "step": 26300 + }, + { + "epoch": 1.15, + "eval_loss": 1.8334472179412842, + "eval_runtime": 11.5788, + "eval_samples_per_second": 353.751, + "eval_steps_per_second": 22.109, + "step": 26300 + }, + { + "epoch": 1.15, + "learning_rate": 2.3348193894535956e-05, + "loss": 1.7762, + "step": 26310 + }, + { + "epoch": 1.15, + "learning_rate": 2.332785783876438e-05, + "loss": 1.8077, + "step": 26320 + }, + { + "epoch": 1.15, + "learning_rate": 2.3307525007911557e-05, + "loss": 1.8083, + "step": 26330 + }, + { + "epoch": 1.15, + "learning_rate": 2.328719541180519e-05, + "loss": 1.8251, + "step": 26340 + }, + { + "epoch": 1.15, + "learning_rate": 2.3266869060271383e-05, + "loss": 1.8575, + "step": 26350 + }, + { + "epoch": 1.15, + "learning_rate": 2.3246545963134718e-05, + "loss": 1.7965, + "step": 26360 + }, + { + "epoch": 1.15, + "learning_rate": 2.3226226130218155e-05, + "loss": 1.8171, + "step": 26370 + }, + { + "epoch": 1.15, + "learning_rate": 2.3205909571343112e-05, + "loss": 1.7914, + "step": 26380 + }, + { + "epoch": 1.16, + "learning_rate": 2.3185596296329404e-05, + "loss": 1.8391, + "step": 26390 + }, + { + "epoch": 1.16, + "learning_rate": 2.3165286314995278e-05, + "loss": 1.7914, + "step": 26400 + }, + { + "epoch": 1.16, + "eval_loss": 1.8334349393844604, + "eval_runtime": 11.5307, + "eval_samples_per_second": 355.226, + "eval_steps_per_second": 22.202, + "step": 26400 + }, + { + "epoch": 1.16, + "learning_rate": 2.3144979637157365e-05, + "loss": 1.8035, + "step": 26410 + }, + { + "epoch": 1.16, + "learning_rate": 2.3124676272630724e-05, + "loss": 1.8065, + "step": 26420 + }, + { + "epoch": 1.16, + "learning_rate": 2.3104376231228773e-05, + "loss": 1.7798, + "step": 26430 + }, + { + "epoch": 1.16, + "learning_rate": 2.308407952276339e-05, + "loss": 1.7659, + "step": 26440 + }, + { + "epoch": 1.16, + "learning_rate": 2.3063786157044766e-05, + "loss": 1.8257, + "step": 26450 + }, + { + "epoch": 1.16, + "learning_rate": 2.3043496143881546e-05, + "loss": 1.8107, + "step": 26460 + }, + { + "epoch": 1.16, + "learning_rate": 2.3023209493080696e-05, + "loss": 1.8759, + "step": 26470 + }, + { + "epoch": 1.16, + "learning_rate": 2.3002926214447598e-05, + "loss": 1.8089, + "step": 26480 + }, + { + "epoch": 1.16, + "learning_rate": 2.2982646317785977e-05, + "loss": 1.7965, + "step": 26490 + }, + { + "epoch": 1.16, + "learning_rate": 2.296236981289795e-05, + "loss": 1.8116, + "step": 26500 + }, + { + "epoch": 1.16, + "eval_loss": 1.8330750465393066, + "eval_runtime": 12.1921, + "eval_samples_per_second": 335.955, + "eval_steps_per_second": 20.997, + "step": 26500 + }, + { + "epoch": 1.16, + "learning_rate": 2.2942096709583966e-05, + "loss": 1.8146, + "step": 26510 + }, + { + "epoch": 1.16, + "learning_rate": 2.2921827017642855e-05, + "loss": 1.8026, + "step": 26520 + }, + { + "epoch": 1.16, + "learning_rate": 2.2901560746871777e-05, + "loss": 1.8253, + "step": 26530 + }, + { + "epoch": 1.16, + "learning_rate": 2.2881297907066254e-05, + "loss": 1.786, + "step": 26540 + }, + { + "epoch": 1.16, + "learning_rate": 2.286103850802014e-05, + "loss": 1.8141, + "step": 26550 + }, + { + "epoch": 1.16, + "learning_rate": 2.2840782559525635e-05, + "loss": 1.8312, + "step": 26560 + }, + { + "epoch": 1.16, + "learning_rate": 2.2820530071373278e-05, + "loss": 1.7801, + "step": 26570 + }, + { + "epoch": 1.16, + "learning_rate": 2.280028105335189e-05, + "loss": 1.8628, + "step": 26580 + }, + { + "epoch": 1.16, + "learning_rate": 2.2780035515248673e-05, + "loss": 1.8254, + "step": 26590 + }, + { + "epoch": 1.16, + "learning_rate": 2.275979346684911e-05, + "loss": 1.8014, + "step": 26600 + }, + { + "epoch": 1.16, + "eval_loss": 1.832726001739502, + "eval_runtime": 11.4701, + "eval_samples_per_second": 357.102, + "eval_steps_per_second": 22.319, + "step": 26600 + }, + { + "epoch": 1.16, + "learning_rate": 2.273955491793702e-05, + "loss": 1.8464, + "step": 26610 + }, + { + "epoch": 1.17, + "learning_rate": 2.2719319878294514e-05, + "loss": 1.854, + "step": 26620 + }, + { + "epoch": 1.17, + "learning_rate": 2.2699088357702025e-05, + "loss": 1.8389, + "step": 26630 + }, + { + "epoch": 1.17, + "learning_rate": 2.267886036593825e-05, + "loss": 1.7847, + "step": 26640 + }, + { + "epoch": 1.17, + "learning_rate": 2.2658635912780227e-05, + "loss": 1.8347, + "step": 26650 + }, + { + "epoch": 1.17, + "learning_rate": 2.2638415008003243e-05, + "loss": 1.7616, + "step": 26660 + }, + { + "epoch": 1.17, + "learning_rate": 2.2618197661380898e-05, + "loss": 1.7771, + "step": 26670 + }, + { + "epoch": 1.17, + "learning_rate": 2.259798388268505e-05, + "loss": 1.8119, + "step": 26680 + }, + { + "epoch": 1.17, + "learning_rate": 2.2577773681685867e-05, + "loss": 1.8511, + "step": 26690 + }, + { + "epoch": 1.17, + "learning_rate": 2.2557567068151724e-05, + "loss": 1.7993, + "step": 26700 + }, + { + "epoch": 1.17, + "eval_loss": 1.8325154781341553, + "eval_runtime": 11.4069, + "eval_samples_per_second": 359.08, + "eval_steps_per_second": 22.443, + "step": 26700 + }, + { + "epoch": 1.17, + "learning_rate": 2.253736405184935e-05, + "loss": 1.7621, + "step": 26710 + }, + { + "epoch": 1.17, + "learning_rate": 2.2517164642543642e-05, + "loss": 1.8126, + "step": 26720 + }, + { + "epoch": 1.17, + "learning_rate": 2.249696884999784e-05, + "loss": 1.826, + "step": 26730 + }, + { + "epoch": 1.17, + "learning_rate": 2.2476776683973364e-05, + "loss": 1.7861, + "step": 26740 + }, + { + "epoch": 1.17, + "learning_rate": 2.2456588154229932e-05, + "loss": 1.8022, + "step": 26750 + }, + { + "epoch": 1.17, + "learning_rate": 2.2436403270525473e-05, + "loss": 1.7953, + "step": 26760 + }, + { + "epoch": 1.17, + "learning_rate": 2.2416222042616177e-05, + "loss": 1.8091, + "step": 26770 + }, + { + "epoch": 1.17, + "learning_rate": 2.2396044480256443e-05, + "loss": 1.839, + "step": 26780 + }, + { + "epoch": 1.17, + "learning_rate": 2.237587059319892e-05, + "loss": 1.835, + "step": 26790 + }, + { + "epoch": 1.17, + "learning_rate": 2.235570039119448e-05, + "loss": 1.8417, + "step": 26800 + }, + { + "epoch": 1.17, + "eval_loss": 1.8325867652893066, + "eval_runtime": 11.4762, + "eval_samples_per_second": 356.911, + "eval_steps_per_second": 22.307, + "step": 26800 + }, + { + "epoch": 1.17, + "learning_rate": 2.2335533883992166e-05, + "loss": 1.8167, + "step": 26810 + }, + { + "epoch": 1.17, + "learning_rate": 2.2315371081339328e-05, + "loss": 1.8286, + "step": 26820 + }, + { + "epoch": 1.17, + "learning_rate": 2.2295211992981426e-05, + "loss": 1.862, + "step": 26830 + }, + { + "epoch": 1.17, + "learning_rate": 2.2275056628662205e-05, + "loss": 1.7991, + "step": 26840 + }, + { + "epoch": 1.18, + "learning_rate": 2.225490499812355e-05, + "loss": 1.8163, + "step": 26850 + }, + { + "epoch": 1.18, + "learning_rate": 2.2234757111105584e-05, + "loss": 1.8737, + "step": 26860 + }, + { + "epoch": 1.18, + "learning_rate": 2.2214612977346593e-05, + "loss": 1.8203, + "step": 26870 + }, + { + "epoch": 1.18, + "learning_rate": 2.2194472606583074e-05, + "loss": 1.8228, + "step": 26880 + }, + { + "epoch": 1.18, + "learning_rate": 2.2174336008549667e-05, + "loss": 1.8561, + "step": 26890 + }, + { + "epoch": 1.18, + "learning_rate": 2.2154203192979235e-05, + "loss": 1.8229, + "step": 26900 + }, + { + "epoch": 1.18, + "eval_loss": 1.8322032690048218, + "eval_runtime": 11.5781, + "eval_samples_per_second": 353.771, + "eval_steps_per_second": 22.111, + "step": 26900 + }, + { + "epoch": 1.18, + "learning_rate": 2.2134074169602773e-05, + "loss": 1.8383, + "step": 26910 + }, + { + "epoch": 1.18, + "learning_rate": 2.2113948948149477e-05, + "loss": 1.792, + "step": 26920 + }, + { + "epoch": 1.18, + "learning_rate": 2.209382753834667e-05, + "loss": 1.7557, + "step": 26930 + }, + { + "epoch": 1.18, + "learning_rate": 2.2073709949919867e-05, + "loss": 1.8174, + "step": 26940 + }, + { + "epoch": 1.18, + "learning_rate": 2.2053596192592704e-05, + "loss": 1.8101, + "step": 26950 + }, + { + "epoch": 1.18, + "learning_rate": 2.2033486276087e-05, + "loss": 1.8121, + "step": 26960 + }, + { + "epoch": 1.18, + "learning_rate": 2.201338021012268e-05, + "loss": 1.792, + "step": 26970 + }, + { + "epoch": 1.18, + "learning_rate": 2.199327800441785e-05, + "loss": 1.8288, + "step": 26980 + }, + { + "epoch": 1.18, + "learning_rate": 2.1973179668688697e-05, + "loss": 1.8468, + "step": 26990 + }, + { + "epoch": 1.18, + "learning_rate": 2.195308521264959e-05, + "loss": 1.8541, + "step": 27000 + }, + { + "epoch": 1.18, + "eval_loss": 1.8320529460906982, + "eval_runtime": 11.853, + "eval_samples_per_second": 345.568, + "eval_steps_per_second": 21.598, + "step": 27000 + }, + { + "epoch": 1.18, + "learning_rate": 2.193299464601299e-05, + "loss": 1.8016, + "step": 27010 + }, + { + "epoch": 1.18, + "learning_rate": 2.1912907978489493e-05, + "loss": 1.8023, + "step": 27020 + }, + { + "epoch": 1.18, + "learning_rate": 2.1892825219787804e-05, + "loss": 1.8119, + "step": 27030 + }, + { + "epoch": 1.18, + "learning_rate": 2.1872746379614736e-05, + "loss": 1.808, + "step": 27040 + }, + { + "epoch": 1.18, + "learning_rate": 2.185267146767522e-05, + "loss": 1.8235, + "step": 27050 + }, + { + "epoch": 1.18, + "learning_rate": 2.1832600493672274e-05, + "loss": 1.8342, + "step": 27060 + }, + { + "epoch": 1.18, + "learning_rate": 2.1812533467307035e-05, + "loss": 1.8602, + "step": 27070 + }, + { + "epoch": 1.19, + "learning_rate": 2.1792470398278683e-05, + "loss": 1.8775, + "step": 27080 + }, + { + "epoch": 1.19, + "learning_rate": 2.177241129628456e-05, + "loss": 1.8305, + "step": 27090 + }, + { + "epoch": 1.19, + "learning_rate": 2.1752356171020016e-05, + "loss": 1.8182, + "step": 27100 + }, + { + "epoch": 1.19, + "eval_loss": 1.832180142402649, + "eval_runtime": 11.4296, + "eval_samples_per_second": 358.368, + "eval_steps_per_second": 22.398, + "step": 27100 + }, + { + "epoch": 1.19, + "learning_rate": 2.1732305032178533e-05, + "loss": 1.8361, + "step": 27110 + }, + { + "epoch": 1.19, + "learning_rate": 2.1712257889451627e-05, + "loss": 1.849, + "step": 27120 + }, + { + "epoch": 1.19, + "learning_rate": 2.1692214752528916e-05, + "loss": 1.8339, + "step": 27130 + }, + { + "epoch": 1.19, + "learning_rate": 2.1672175631098056e-05, + "loss": 1.8372, + "step": 27140 + }, + { + "epoch": 1.19, + "learning_rate": 2.1652140534844787e-05, + "loss": 1.7972, + "step": 27150 + }, + { + "epoch": 1.19, + "learning_rate": 2.1632109473452864e-05, + "loss": 1.8092, + "step": 27160 + }, + { + "epoch": 1.19, + "learning_rate": 2.161208245660415e-05, + "loss": 1.8685, + "step": 27170 + }, + { + "epoch": 1.19, + "learning_rate": 2.1592059493978492e-05, + "loss": 1.8396, + "step": 27180 + }, + { + "epoch": 1.19, + "learning_rate": 2.1572040595253822e-05, + "loss": 1.7891, + "step": 27190 + }, + { + "epoch": 1.19, + "learning_rate": 2.1552025770106077e-05, + "loss": 1.7929, + "step": 27200 + }, + { + "epoch": 1.19, + "eval_loss": 1.8319555521011353, + "eval_runtime": 11.6871, + "eval_samples_per_second": 350.47, + "eval_steps_per_second": 21.904, + "step": 27200 + }, + { + "epoch": 1.19, + "learning_rate": 2.1532015028209264e-05, + "loss": 1.8318, + "step": 27210 + }, + { + "epoch": 1.19, + "learning_rate": 2.1512008379235355e-05, + "loss": 1.8183, + "step": 27220 + }, + { + "epoch": 1.19, + "learning_rate": 2.149200583285442e-05, + "loss": 1.8565, + "step": 27230 + }, + { + "epoch": 1.19, + "learning_rate": 2.1472007398734464e-05, + "loss": 1.8268, + "step": 27240 + }, + { + "epoch": 1.19, + "learning_rate": 2.1452013086541593e-05, + "loss": 1.7994, + "step": 27250 + }, + { + "epoch": 1.19, + "learning_rate": 2.143202290593984e-05, + "loss": 1.8272, + "step": 27260 + }, + { + "epoch": 1.19, + "learning_rate": 2.1412036866591293e-05, + "loss": 1.8074, + "step": 27270 + }, + { + "epoch": 1.19, + "learning_rate": 2.1392054978156015e-05, + "loss": 1.8095, + "step": 27280 + }, + { + "epoch": 1.19, + "learning_rate": 2.137207725029206e-05, + "loss": 1.8097, + "step": 27290 + }, + { + "epoch": 1.2, + "learning_rate": 2.1352103692655497e-05, + "loss": 1.7954, + "step": 27300 + }, + { + "epoch": 1.2, + "eval_loss": 1.8318532705307007, + "eval_runtime": 11.5752, + "eval_samples_per_second": 353.86, + "eval_steps_per_second": 22.116, + "step": 27300 + }, + { + "epoch": 1.2, + "learning_rate": 2.133213431490035e-05, + "loss": 1.809, + "step": 27310 + }, + { + "epoch": 1.2, + "learning_rate": 2.1312169126678647e-05, + "loss": 1.8336, + "step": 27320 + }, + { + "epoch": 1.2, + "learning_rate": 2.129220813764035e-05, + "loss": 1.8235, + "step": 27330 + }, + { + "epoch": 1.2, + "learning_rate": 2.127225135743346e-05, + "loss": 1.7739, + "step": 27340 + }, + { + "epoch": 1.2, + "learning_rate": 2.1252298795703863e-05, + "loss": 1.857, + "step": 27350 + }, + { + "epoch": 1.2, + "learning_rate": 2.123235046209549e-05, + "loss": 1.7995, + "step": 27360 + }, + { + "epoch": 1.2, + "learning_rate": 2.121240636625015e-05, + "loss": 1.8557, + "step": 27370 + }, + { + "epoch": 1.2, + "learning_rate": 2.1192466517807657e-05, + "loss": 1.8035, + "step": 27380 + }, + { + "epoch": 1.2, + "learning_rate": 2.1172530926405745e-05, + "loss": 1.8037, + "step": 27390 + }, + { + "epoch": 1.2, + "learning_rate": 2.1152599601680105e-05, + "loss": 1.8958, + "step": 27400 + }, + { + "epoch": 1.2, + "eval_loss": 1.8316702842712402, + "eval_runtime": 11.651, + "eval_samples_per_second": 351.557, + "eval_steps_per_second": 21.972, + "step": 27400 + }, + { + "epoch": 1.2, + "learning_rate": 2.1132672553264356e-05, + "loss": 1.8168, + "step": 27410 + }, + { + "epoch": 1.2, + "learning_rate": 2.111274979079006e-05, + "loss": 1.8574, + "step": 27420 + }, + { + "epoch": 1.2, + "learning_rate": 2.1092831323886694e-05, + "loss": 1.7885, + "step": 27430 + }, + { + "epoch": 1.2, + "learning_rate": 2.1072917162181672e-05, + "loss": 1.8138, + "step": 27440 + }, + { + "epoch": 1.2, + "learning_rate": 2.105300731530032e-05, + "loss": 1.8419, + "step": 27450 + }, + { + "epoch": 1.2, + "learning_rate": 2.1033101792865885e-05, + "loss": 1.7851, + "step": 27460 + }, + { + "epoch": 1.2, + "learning_rate": 2.1013200604499507e-05, + "loss": 1.8412, + "step": 27470 + }, + { + "epoch": 1.2, + "learning_rate": 2.099330375982026e-05, + "loss": 1.8451, + "step": 27480 + }, + { + "epoch": 1.2, + "learning_rate": 2.0973411268445075e-05, + "loss": 1.8418, + "step": 27490 + }, + { + "epoch": 1.2, + "learning_rate": 2.095352313998884e-05, + "loss": 1.8237, + "step": 27500 + }, + { + "epoch": 1.2, + "eval_loss": 1.831519603729248, + "eval_runtime": 11.976, + "eval_samples_per_second": 342.019, + "eval_steps_per_second": 21.376, + "step": 27500 + }, + { + "epoch": 1.2, + "learning_rate": 2.0933639384064277e-05, + "loss": 1.8165, + "step": 27510 + }, + { + "epoch": 1.2, + "learning_rate": 2.0913760010282015e-05, + "loss": 1.8361, + "step": 27520 + }, + { + "epoch": 1.21, + "learning_rate": 2.0893885028250577e-05, + "loss": 1.8167, + "step": 27530 + }, + { + "epoch": 1.21, + "learning_rate": 2.0874014447576348e-05, + "loss": 1.7941, + "step": 27540 + }, + { + "epoch": 1.21, + "learning_rate": 2.0854148277863597e-05, + "loss": 1.799, + "step": 27550 + }, + { + "epoch": 1.21, + "learning_rate": 2.0834286528714445e-05, + "loss": 1.8005, + "step": 27560 + }, + { + "epoch": 1.21, + "learning_rate": 2.08144292097289e-05, + "loss": 1.8535, + "step": 27570 + }, + { + "epoch": 1.21, + "learning_rate": 2.0794576330504802e-05, + "loss": 1.8461, + "step": 27580 + }, + { + "epoch": 1.21, + "learning_rate": 2.0774727900637875e-05, + "loss": 1.8137, + "step": 27590 + }, + { + "epoch": 1.21, + "learning_rate": 2.0754883929721646e-05, + "loss": 1.8526, + "step": 27600 + }, + { + "epoch": 1.21, + "eval_loss": 1.8314945697784424, + "eval_runtime": 11.6983, + "eval_samples_per_second": 350.137, + "eval_steps_per_second": 21.884, + "step": 27600 + }, + { + "epoch": 1.21, + "learning_rate": 2.0735044427347557e-05, + "loss": 1.7918, + "step": 27610 + }, + { + "epoch": 1.21, + "learning_rate": 2.0715209403104805e-05, + "loss": 1.8192, + "step": 27620 + }, + { + "epoch": 1.21, + "learning_rate": 2.0695378866580508e-05, + "loss": 1.8595, + "step": 27630 + }, + { + "epoch": 1.21, + "learning_rate": 2.0675552827359544e-05, + "loss": 1.8632, + "step": 27640 + }, + { + "epoch": 1.21, + "learning_rate": 2.065573129502467e-05, + "loss": 1.8333, + "step": 27650 + }, + { + "epoch": 1.21, + "learning_rate": 2.0635914279156423e-05, + "loss": 1.7787, + "step": 27660 + }, + { + "epoch": 1.21, + "learning_rate": 2.0616101789333192e-05, + "loss": 1.7813, + "step": 27670 + }, + { + "epoch": 1.21, + "learning_rate": 2.0596293835131144e-05, + "loss": 1.7958, + "step": 27680 + }, + { + "epoch": 1.21, + "learning_rate": 2.057649042612429e-05, + "loss": 1.8267, + "step": 27690 + }, + { + "epoch": 1.21, + "learning_rate": 2.0556691571884413e-05, + "loss": 1.808, + "step": 27700 + }, + { + "epoch": 1.21, + "eval_loss": 1.8313714265823364, + "eval_runtime": 11.5536, + "eval_samples_per_second": 354.521, + "eval_steps_per_second": 22.158, + "step": 27700 + }, + { + "epoch": 1.21, + "learning_rate": 2.0536897281981125e-05, + "loss": 1.8536, + "step": 27710 + }, + { + "epoch": 1.21, + "learning_rate": 2.0517107565981794e-05, + "loss": 1.8201, + "step": 27720 + }, + { + "epoch": 1.21, + "learning_rate": 2.0497322433451612e-05, + "loss": 1.8408, + "step": 27730 + }, + { + "epoch": 1.21, + "learning_rate": 2.0477541893953545e-05, + "loss": 1.8357, + "step": 27740 + }, + { + "epoch": 1.21, + "learning_rate": 2.0457765957048314e-05, + "loss": 1.7831, + "step": 27750 + }, + { + "epoch": 1.22, + "learning_rate": 2.0437994632294456e-05, + "loss": 1.788, + "step": 27760 + }, + { + "epoch": 1.22, + "learning_rate": 2.0418227929248246e-05, + "loss": 1.833, + "step": 27770 + }, + { + "epoch": 1.22, + "learning_rate": 2.0398465857463757e-05, + "loss": 1.8343, + "step": 27780 + }, + { + "epoch": 1.22, + "learning_rate": 2.0378708426492782e-05, + "loss": 1.8193, + "step": 27790 + }, + { + "epoch": 1.22, + "learning_rate": 2.035895564588492e-05, + "loss": 1.8208, + "step": 27800 + }, + { + "epoch": 1.22, + "eval_loss": 1.8313522338867188, + "eval_runtime": 11.4654, + "eval_samples_per_second": 357.248, + "eval_steps_per_second": 22.328, + "step": 27800 + }, + { + "epoch": 1.22, + "learning_rate": 2.0339207525187474e-05, + "loss": 1.8266, + "step": 27810 + }, + { + "epoch": 1.22, + "learning_rate": 2.031946407394553e-05, + "loss": 1.7958, + "step": 27820 + }, + { + "epoch": 1.22, + "learning_rate": 2.02997253017019e-05, + "loss": 1.8288, + "step": 27830 + }, + { + "epoch": 1.22, + "learning_rate": 2.027999121799714e-05, + "loss": 1.8183, + "step": 27840 + }, + { + "epoch": 1.22, + "learning_rate": 2.0260261832369536e-05, + "loss": 1.8376, + "step": 27850 + }, + { + "epoch": 1.22, + "learning_rate": 2.0240537154355117e-05, + "loss": 1.8534, + "step": 27860 + }, + { + "epoch": 1.22, + "learning_rate": 2.0220817193487602e-05, + "loss": 1.827, + "step": 27870 + }, + { + "epoch": 1.22, + "learning_rate": 2.0201101959298482e-05, + "loss": 1.8304, + "step": 27880 + }, + { + "epoch": 1.22, + "learning_rate": 2.018139146131691e-05, + "loss": 1.8339, + "step": 27890 + }, + { + "epoch": 1.22, + "learning_rate": 2.0161685709069786e-05, + "loss": 1.7982, + "step": 27900 + }, + { + "epoch": 1.22, + "eval_loss": 1.8311944007873535, + "eval_runtime": 11.5326, + "eval_samples_per_second": 355.168, + "eval_steps_per_second": 22.198, + "step": 27900 + }, + { + "epoch": 1.22, + "learning_rate": 2.0141984712081704e-05, + "loss": 1.8171, + "step": 27910 + }, + { + "epoch": 1.22, + "learning_rate": 2.012228847987496e-05, + "loss": 1.8211, + "step": 27920 + }, + { + "epoch": 1.22, + "learning_rate": 2.010259702196954e-05, + "loss": 1.8328, + "step": 27930 + }, + { + "epoch": 1.22, + "learning_rate": 2.0082910347883144e-05, + "loss": 1.7907, + "step": 27940 + }, + { + "epoch": 1.22, + "learning_rate": 2.0063228467131136e-05, + "loss": 1.8231, + "step": 27950 + }, + { + "epoch": 1.22, + "learning_rate": 2.0043551389226576e-05, + "loss": 1.8691, + "step": 27960 + }, + { + "epoch": 1.22, + "learning_rate": 2.0023879123680192e-05, + "loss": 1.8043, + "step": 27970 + }, + { + "epoch": 1.22, + "learning_rate": 2.000421168000041e-05, + "loss": 1.8543, + "step": 27980 + }, + { + "epoch": 1.23, + "learning_rate": 1.9984549067693303e-05, + "loss": 1.8065, + "step": 27990 + }, + { + "epoch": 1.23, + "learning_rate": 1.9964891296262595e-05, + "loss": 1.8391, + "step": 28000 + }, + { + "epoch": 1.23, + "eval_loss": 1.8308237791061401, + "eval_runtime": 11.5213, + "eval_samples_per_second": 355.515, + "eval_steps_per_second": 22.22, + "step": 28000 + }, + { + "epoch": 1.23, + "learning_rate": 1.9945238375209708e-05, + "loss": 1.8262, + "step": 28010 + }, + { + "epoch": 1.23, + "learning_rate": 1.992559031403369e-05, + "loss": 1.8074, + "step": 28020 + }, + { + "epoch": 1.23, + "learning_rate": 1.9905947122231273e-05, + "loss": 1.792, + "step": 28030 + }, + { + "epoch": 1.23, + "learning_rate": 1.988630880929679e-05, + "loss": 1.7933, + "step": 28040 + }, + { + "epoch": 1.23, + "learning_rate": 1.9866675384722252e-05, + "loss": 1.8644, + "step": 28050 + }, + { + "epoch": 1.23, + "learning_rate": 1.9847046857997285e-05, + "loss": 1.8546, + "step": 28060 + }, + { + "epoch": 1.23, + "learning_rate": 1.9827423238609173e-05, + "loss": 1.8298, + "step": 28070 + }, + { + "epoch": 1.23, + "learning_rate": 1.9807804536042796e-05, + "loss": 1.8519, + "step": 28080 + }, + { + "epoch": 1.23, + "learning_rate": 1.978819075978069e-05, + "loss": 1.8168, + "step": 28090 + }, + { + "epoch": 1.23, + "learning_rate": 1.9768581919302978e-05, + "loss": 1.8294, + "step": 28100 + }, + { + "epoch": 1.23, + "eval_loss": 1.830810785293579, + "eval_runtime": 11.61, + "eval_samples_per_second": 352.798, + "eval_steps_per_second": 22.05, + "step": 28100 + }, + { + "epoch": 1.23, + "learning_rate": 1.9748978024087435e-05, + "loss": 1.8529, + "step": 28110 + }, + { + "epoch": 1.23, + "learning_rate": 1.972937908360939e-05, + "loss": 1.8372, + "step": 28120 + }, + { + "epoch": 1.23, + "learning_rate": 1.970978510734185e-05, + "loss": 1.8326, + "step": 28130 + }, + { + "epoch": 1.23, + "learning_rate": 1.969019610475535e-05, + "loss": 1.8487, + "step": 28140 + }, + { + "epoch": 1.23, + "learning_rate": 1.9670612085318082e-05, + "loss": 1.8219, + "step": 28150 + }, + { + "epoch": 1.23, + "learning_rate": 1.9651033058495783e-05, + "loss": 1.837, + "step": 28160 + }, + { + "epoch": 1.23, + "learning_rate": 1.963145903375181e-05, + "loss": 1.8009, + "step": 28170 + }, + { + "epoch": 1.23, + "learning_rate": 1.9611890020547075e-05, + "loss": 1.8739, + "step": 28180 + }, + { + "epoch": 1.23, + "learning_rate": 1.9592326028340093e-05, + "loss": 1.8071, + "step": 28190 + }, + { + "epoch": 1.23, + "learning_rate": 1.9572767066586933e-05, + "loss": 1.7504, + "step": 28200 + }, + { + "epoch": 1.23, + "eval_loss": 1.830684781074524, + "eval_runtime": 11.5059, + "eval_samples_per_second": 355.991, + "eval_steps_per_second": 22.249, + "step": 28200 + }, + { + "epoch": 1.23, + "learning_rate": 1.955321314474124e-05, + "loss": 1.8246, + "step": 28210 + }, + { + "epoch": 1.24, + "learning_rate": 1.9533664272254245e-05, + "loss": 1.8368, + "step": 28220 + }, + { + "epoch": 1.24, + "learning_rate": 1.9514120458574667e-05, + "loss": 1.8114, + "step": 28230 + }, + { + "epoch": 1.24, + "learning_rate": 1.9494581713148883e-05, + "loss": 1.7932, + "step": 28240 + }, + { + "epoch": 1.24, + "learning_rate": 1.9475048045420723e-05, + "loss": 1.8443, + "step": 28250 + }, + { + "epoch": 1.24, + "learning_rate": 1.9455519464831643e-05, + "loss": 1.8332, + "step": 28260 + }, + { + "epoch": 1.24, + "learning_rate": 1.9435995980820576e-05, + "loss": 1.8698, + "step": 28270 + }, + { + "epoch": 1.24, + "learning_rate": 1.9416477602824035e-05, + "loss": 1.8356, + "step": 28280 + }, + { + "epoch": 1.24, + "learning_rate": 1.9396964340276034e-05, + "loss": 1.8133, + "step": 28290 + }, + { + "epoch": 1.24, + "learning_rate": 1.9377456202608148e-05, + "loss": 1.8127, + "step": 28300 + }, + { + "epoch": 1.24, + "eval_loss": 1.830805778503418, + "eval_runtime": 11.4938, + "eval_samples_per_second": 356.366, + "eval_steps_per_second": 22.273, + "step": 28300 + }, + { + "epoch": 1.24, + "learning_rate": 1.935795319924944e-05, + "loss": 1.7979, + "step": 28310 + }, + { + "epoch": 1.24, + "learning_rate": 1.933845533962652e-05, + "loss": 1.8191, + "step": 28320 + }, + { + "epoch": 1.24, + "learning_rate": 1.9318962633163493e-05, + "loss": 1.8228, + "step": 28330 + }, + { + "epoch": 1.24, + "learning_rate": 1.9299475089281988e-05, + "loss": 1.8554, + "step": 28340 + }, + { + "epoch": 1.24, + "learning_rate": 1.9279992717401114e-05, + "loss": 1.8253, + "step": 28350 + }, + { + "epoch": 1.24, + "learning_rate": 1.9260515526937517e-05, + "loss": 1.8192, + "step": 28360 + }, + { + "epoch": 1.24, + "learning_rate": 1.9241043527305306e-05, + "loss": 1.8501, + "step": 28370 + }, + { + "epoch": 1.24, + "learning_rate": 1.9221576727916107e-05, + "loss": 1.8767, + "step": 28380 + }, + { + "epoch": 1.24, + "learning_rate": 1.920211513817899e-05, + "loss": 1.8047, + "step": 28390 + }, + { + "epoch": 1.24, + "learning_rate": 1.918265876750057e-05, + "loss": 1.8454, + "step": 28400 + }, + { + "epoch": 1.24, + "eval_loss": 1.8304309844970703, + "eval_runtime": 11.674, + "eval_samples_per_second": 350.864, + "eval_steps_per_second": 21.929, + "step": 28400 + }, + { + "epoch": 1.24, + "learning_rate": 1.9163207625284877e-05, + "loss": 1.8197, + "step": 28410 + }, + { + "epoch": 1.24, + "learning_rate": 1.9143761720933473e-05, + "loss": 1.849, + "step": 28420 + }, + { + "epoch": 1.24, + "learning_rate": 1.9124321063845325e-05, + "loss": 1.8015, + "step": 28430 + }, + { + "epoch": 1.24, + "learning_rate": 1.910488566341692e-05, + "loss": 1.8752, + "step": 28440 + }, + { + "epoch": 1.25, + "learning_rate": 1.9085455529042175e-05, + "loss": 1.834, + "step": 28450 + }, + { + "epoch": 1.25, + "learning_rate": 1.9066030670112456e-05, + "loss": 1.8128, + "step": 28460 + }, + { + "epoch": 1.25, + "learning_rate": 1.9046611096016604e-05, + "loss": 1.8009, + "step": 28470 + }, + { + "epoch": 1.25, + "learning_rate": 1.9027196816140885e-05, + "loss": 1.7636, + "step": 28480 + }, + { + "epoch": 1.25, + "learning_rate": 1.9007787839869026e-05, + "loss": 1.8176, + "step": 28490 + }, + { + "epoch": 1.25, + "learning_rate": 1.8988384176582147e-05, + "loss": 1.8369, + "step": 28500 + }, + { + "epoch": 1.25, + "eval_loss": 1.8305020332336426, + "eval_runtime": 11.6462, + "eval_samples_per_second": 351.703, + "eval_steps_per_second": 21.981, + "step": 28500 + }, + { + "epoch": 1.25, + "learning_rate": 1.8968985835658877e-05, + "loss": 1.7947, + "step": 28510 + }, + { + "epoch": 1.25, + "learning_rate": 1.8949592826475173e-05, + "loss": 1.7933, + "step": 28520 + }, + { + "epoch": 1.25, + "learning_rate": 1.893020515840452e-05, + "loss": 1.8181, + "step": 28530 + }, + { + "epoch": 1.25, + "learning_rate": 1.8910822840817733e-05, + "loss": 1.8049, + "step": 28540 + }, + { + "epoch": 1.25, + "learning_rate": 1.8891445883083093e-05, + "loss": 1.8141, + "step": 28550 + }, + { + "epoch": 1.25, + "learning_rate": 1.8872074294566265e-05, + "loss": 1.8061, + "step": 28560 + }, + { + "epoch": 1.25, + "learning_rate": 1.8852708084630347e-05, + "loss": 1.8378, + "step": 28570 + }, + { + "epoch": 1.25, + "learning_rate": 1.88333472626358e-05, + "loss": 1.8434, + "step": 28580 + }, + { + "epoch": 1.25, + "learning_rate": 1.881399183794051e-05, + "loss": 1.7783, + "step": 28590 + }, + { + "epoch": 1.25, + "learning_rate": 1.879464181989974e-05, + "loss": 1.8562, + "step": 28600 + }, + { + "epoch": 1.25, + "eval_loss": 1.8305237293243408, + "eval_runtime": 11.6453, + "eval_samples_per_second": 351.728, + "eval_steps_per_second": 21.983, + "step": 28600 + }, + { + "epoch": 1.25, + "learning_rate": 1.877529721786615e-05, + "loss": 1.8491, + "step": 28610 + }, + { + "epoch": 1.25, + "learning_rate": 1.8755958041189765e-05, + "loss": 1.835, + "step": 28620 + }, + { + "epoch": 1.25, + "learning_rate": 1.8736624299218016e-05, + "loss": 1.7899, + "step": 28630 + }, + { + "epoch": 1.25, + "learning_rate": 1.8717296001295673e-05, + "loss": 1.7965, + "step": 28640 + }, + { + "epoch": 1.25, + "learning_rate": 1.8697973156764906e-05, + "loss": 1.7844, + "step": 28650 + }, + { + "epoch": 1.25, + "learning_rate": 1.867865577496521e-05, + "loss": 1.8755, + "step": 28660 + }, + { + "epoch": 1.26, + "learning_rate": 1.8659343865233497e-05, + "loss": 1.7975, + "step": 28670 + }, + { + "epoch": 1.26, + "learning_rate": 1.8640037436903977e-05, + "loss": 1.8462, + "step": 28680 + }, + { + "epoch": 1.26, + "learning_rate": 1.8620736499308255e-05, + "loss": 1.833, + "step": 28690 + }, + { + "epoch": 1.26, + "learning_rate": 1.8601441061775248e-05, + "loss": 1.8273, + "step": 28700 + }, + { + "epoch": 1.26, + "eval_loss": 1.830354928970337, + "eval_runtime": 11.5397, + "eval_samples_per_second": 354.948, + "eval_steps_per_second": 22.184, + "step": 28700 + }, + { + "epoch": 1.26, + "learning_rate": 1.8582151133631224e-05, + "loss": 1.7919, + "step": 28710 + }, + { + "epoch": 1.26, + "learning_rate": 1.856286672419981e-05, + "loss": 1.8614, + "step": 28720 + }, + { + "epoch": 1.26, + "learning_rate": 1.8543587842801934e-05, + "loss": 1.8111, + "step": 28730 + }, + { + "epoch": 1.26, + "learning_rate": 1.8524314498755872e-05, + "loss": 1.8388, + "step": 28740 + }, + { + "epoch": 1.26, + "learning_rate": 1.8505046701377218e-05, + "loss": 1.7883, + "step": 28750 + }, + { + "epoch": 1.26, + "learning_rate": 1.84857844599789e-05, + "loss": 1.8307, + "step": 28760 + }, + { + "epoch": 1.26, + "learning_rate": 1.846652778387111e-05, + "loss": 1.8396, + "step": 28770 + }, + { + "epoch": 1.26, + "learning_rate": 1.8447276682361428e-05, + "loss": 1.8378, + "step": 28780 + }, + { + "epoch": 1.26, + "learning_rate": 1.842803116475466e-05, + "loss": 1.8442, + "step": 28790 + }, + { + "epoch": 1.26, + "learning_rate": 1.8408791240352983e-05, + "loss": 1.7593, + "step": 28800 + }, + { + "epoch": 1.26, + "eval_loss": 1.8302619457244873, + "eval_runtime": 11.3952, + "eval_samples_per_second": 359.45, + "eval_steps_per_second": 22.466, + "step": 28800 + }, + { + "epoch": 1.26, + "learning_rate": 1.8389556918455813e-05, + "loss": 1.8163, + "step": 28810 + }, + { + "epoch": 1.26, + "learning_rate": 1.83703282083599e-05, + "loss": 1.7921, + "step": 28820 + }, + { + "epoch": 1.26, + "learning_rate": 1.835110511935925e-05, + "loss": 1.8257, + "step": 28830 + }, + { + "epoch": 1.26, + "learning_rate": 1.8331887660745182e-05, + "loss": 1.7806, + "step": 28840 + }, + { + "epoch": 1.26, + "learning_rate": 1.8312675841806263e-05, + "loss": 1.8569, + "step": 28850 + }, + { + "epoch": 1.26, + "learning_rate": 1.8293469671828365e-05, + "loss": 1.8384, + "step": 28860 + }, + { + "epoch": 1.26, + "learning_rate": 1.82742691600946e-05, + "loss": 1.8023, + "step": 28870 + }, + { + "epoch": 1.26, + "learning_rate": 1.8255074315885372e-05, + "loss": 1.8437, + "step": 28880 + }, + { + "epoch": 1.26, + "learning_rate": 1.8235885148478325e-05, + "loss": 1.7939, + "step": 28890 + }, + { + "epoch": 1.27, + "learning_rate": 1.8216701667148378e-05, + "loss": 1.8193, + "step": 28900 + }, + { + "epoch": 1.27, + "eval_loss": 1.830406904220581, + "eval_runtime": 11.5645, + "eval_samples_per_second": 354.187, + "eval_steps_per_second": 22.137, + "step": 28900 + }, + { + "epoch": 1.27, + "learning_rate": 1.819752388116767e-05, + "loss": 1.8371, + "step": 28910 + }, + { + "epoch": 1.27, + "learning_rate": 1.8178351799805637e-05, + "loss": 1.8238, + "step": 28920 + }, + { + "epoch": 1.27, + "learning_rate": 1.8159185432328906e-05, + "loss": 1.8504, + "step": 28930 + }, + { + "epoch": 1.27, + "learning_rate": 1.814002478800136e-05, + "loss": 1.7753, + "step": 28940 + }, + { + "epoch": 1.27, + "learning_rate": 1.812086987608414e-05, + "loss": 1.8689, + "step": 28950 + }, + { + "epoch": 1.27, + "learning_rate": 1.810172070583559e-05, + "loss": 1.8385, + "step": 28960 + }, + { + "epoch": 1.27, + "learning_rate": 1.8082577286511285e-05, + "loss": 1.8529, + "step": 28970 + }, + { + "epoch": 1.27, + "learning_rate": 1.8063439627364016e-05, + "loss": 1.8337, + "step": 28980 + }, + { + "epoch": 1.27, + "learning_rate": 1.804430773764381e-05, + "loss": 1.7948, + "step": 28990 + }, + { + "epoch": 1.27, + "learning_rate": 1.802518162659787e-05, + "loss": 1.838, + "step": 29000 + }, + { + "epoch": 1.27, + "eval_loss": 1.830385684967041, + "eval_runtime": 11.7627, + "eval_samples_per_second": 348.218, + "eval_steps_per_second": 21.764, + "step": 29000 + }, + { + "epoch": 1.27, + "learning_rate": 1.8006061303470648e-05, + "loss": 1.7526, + "step": 29010 + }, + { + "epoch": 1.27, + "learning_rate": 1.798694677750376e-05, + "loss": 1.8202, + "step": 29020 + }, + { + "epoch": 1.27, + "learning_rate": 1.796783805793606e-05, + "loss": 1.8431, + "step": 29030 + }, + { + "epoch": 1.27, + "learning_rate": 1.794873515400353e-05, + "loss": 1.813, + "step": 29040 + }, + { + "epoch": 1.27, + "learning_rate": 1.7929638074939433e-05, + "loss": 1.8351, + "step": 29050 + }, + { + "epoch": 1.27, + "learning_rate": 1.7910546829974127e-05, + "loss": 1.8675, + "step": 29060 + }, + { + "epoch": 1.27, + "learning_rate": 1.789146142833522e-05, + "loss": 1.8152, + "step": 29070 + }, + { + "epoch": 1.27, + "learning_rate": 1.787238187924744e-05, + "loss": 1.8264, + "step": 29080 + }, + { + "epoch": 1.27, + "learning_rate": 1.785330819193274e-05, + "loss": 1.7769, + "step": 29090 + }, + { + "epoch": 1.27, + "learning_rate": 1.7834240375610183e-05, + "loss": 1.8131, + "step": 29100 + }, + { + "epoch": 1.27, + "eval_loss": 1.830368995666504, + "eval_runtime": 11.647, + "eval_samples_per_second": 351.678, + "eval_steps_per_second": 21.98, + "step": 29100 + }, + { + "epoch": 1.27, + "learning_rate": 1.7815178439496047e-05, + "loss": 1.8124, + "step": 29110 + }, + { + "epoch": 1.27, + "learning_rate": 1.7796122392803727e-05, + "loss": 1.8489, + "step": 29120 + }, + { + "epoch": 1.28, + "learning_rate": 1.77770722447438e-05, + "loss": 1.8363, + "step": 29130 + }, + { + "epoch": 1.28, + "learning_rate": 1.7758028004523978e-05, + "loss": 1.8315, + "step": 29140 + }, + { + "epoch": 1.28, + "learning_rate": 1.773898968134912e-05, + "loss": 1.8668, + "step": 29150 + }, + { + "epoch": 1.28, + "learning_rate": 1.7719957284421228e-05, + "loss": 1.8214, + "step": 29160 + }, + { + "epoch": 1.28, + "learning_rate": 1.7700930822939424e-05, + "loss": 1.78, + "step": 29170 + }, + { + "epoch": 1.28, + "learning_rate": 1.7681910306099985e-05, + "loss": 1.8128, + "step": 29180 + }, + { + "epoch": 1.28, + "learning_rate": 1.766289574309629e-05, + "loss": 1.8504, + "step": 29190 + }, + { + "epoch": 1.28, + "learning_rate": 1.7643887143118875e-05, + "loss": 1.7937, + "step": 29200 + }, + { + "epoch": 1.28, + "eval_loss": 1.8301746845245361, + "eval_runtime": 11.7318, + "eval_samples_per_second": 349.135, + "eval_steps_per_second": 21.821, + "step": 29200 + }, + { + "epoch": 1.28, + "learning_rate": 1.7624884515355358e-05, + "loss": 1.8176, + "step": 29210 + }, + { + "epoch": 1.28, + "learning_rate": 1.760588786899049e-05, + "loss": 1.8329, + "step": 29220 + }, + { + "epoch": 1.28, + "learning_rate": 1.758689721320612e-05, + "loss": 1.863, + "step": 29230 + }, + { + "epoch": 1.28, + "learning_rate": 1.7567912557181218e-05, + "loss": 1.8621, + "step": 29240 + }, + { + "epoch": 1.28, + "learning_rate": 1.7548933910091834e-05, + "loss": 1.8192, + "step": 29250 + }, + { + "epoch": 1.28, + "learning_rate": 1.752996128111113e-05, + "loss": 1.8057, + "step": 29260 + }, + { + "epoch": 1.28, + "learning_rate": 1.751099467940934e-05, + "loss": 1.8206, + "step": 29270 + }, + { + "epoch": 1.28, + "learning_rate": 1.7492034114153825e-05, + "loss": 1.8019, + "step": 29280 + }, + { + "epoch": 1.28, + "learning_rate": 1.7473079594508966e-05, + "loss": 1.8015, + "step": 29290 + }, + { + "epoch": 1.28, + "learning_rate": 1.7454131129636273e-05, + "loss": 1.8239, + "step": 29300 + }, + { + "epoch": 1.28, + "eval_loss": 1.8301327228546143, + "eval_runtime": 11.5499, + "eval_samples_per_second": 354.636, + "eval_steps_per_second": 22.165, + "step": 29300 + }, + { + "epoch": 1.28, + "learning_rate": 1.7435188728694312e-05, + "loss": 1.8463, + "step": 29310 + }, + { + "epoch": 1.28, + "learning_rate": 1.741625240083873e-05, + "loss": 1.8382, + "step": 29320 + }, + { + "epoch": 1.28, + "learning_rate": 1.7397322155222203e-05, + "loss": 1.8513, + "step": 29330 + }, + { + "epoch": 1.28, + "learning_rate": 1.73783980009945e-05, + "loss": 1.7766, + "step": 29340 + }, + { + "epoch": 1.28, + "learning_rate": 1.735947994730245e-05, + "loss": 1.8219, + "step": 29350 + }, + { + "epoch": 1.29, + "learning_rate": 1.7340568003289917e-05, + "loss": 1.851, + "step": 29360 + }, + { + "epoch": 1.29, + "learning_rate": 1.7321662178097805e-05, + "loss": 1.8086, + "step": 29370 + }, + { + "epoch": 1.29, + "learning_rate": 1.7302762480864093e-05, + "loss": 1.7966, + "step": 29380 + }, + { + "epoch": 1.29, + "learning_rate": 1.7283868920723734e-05, + "loss": 1.8585, + "step": 29390 + }, + { + "epoch": 1.29, + "learning_rate": 1.726498150680881e-05, + "loss": 1.8134, + "step": 29400 + }, + { + "epoch": 1.29, + "eval_loss": 1.8302505016326904, + "eval_runtime": 11.6196, + "eval_samples_per_second": 352.509, + "eval_steps_per_second": 22.032, + "step": 29400 + }, + { + "epoch": 1.29, + "learning_rate": 1.7246100248248356e-05, + "loss": 1.7909, + "step": 29410 + }, + { + "epoch": 1.29, + "learning_rate": 1.7227225154168438e-05, + "loss": 1.8333, + "step": 29420 + }, + { + "epoch": 1.29, + "learning_rate": 1.7208356233692174e-05, + "loss": 1.7974, + "step": 29430 + }, + { + "epoch": 1.29, + "learning_rate": 1.718949349593969e-05, + "loss": 1.833, + "step": 29440 + }, + { + "epoch": 1.29, + "learning_rate": 1.717063695002812e-05, + "loss": 1.8043, + "step": 29450 + }, + { + "epoch": 1.29, + "learning_rate": 1.7151786605071588e-05, + "loss": 1.8103, + "step": 29460 + }, + { + "epoch": 1.29, + "learning_rate": 1.713294247018125e-05, + "loss": 1.8118, + "step": 29470 + }, + { + "epoch": 1.29, + "learning_rate": 1.7114104554465216e-05, + "loss": 1.8458, + "step": 29480 + }, + { + "epoch": 1.29, + "learning_rate": 1.709527286702867e-05, + "loss": 1.8072, + "step": 29490 + }, + { + "epoch": 1.29, + "learning_rate": 1.707644741697369e-05, + "loss": 1.8057, + "step": 29500 + }, + { + "epoch": 1.29, + "eval_loss": 1.8301050662994385, + "eval_runtime": 11.921, + "eval_samples_per_second": 343.596, + "eval_steps_per_second": 21.475, + "step": 29500 + }, + { + "epoch": 1.29, + "learning_rate": 1.7057628213399415e-05, + "loss": 1.8512, + "step": 29510 + }, + { + "epoch": 1.29, + "learning_rate": 1.703881526540191e-05, + "loss": 1.7953, + "step": 29520 + }, + { + "epoch": 1.29, + "learning_rate": 1.7020008582074257e-05, + "loss": 1.8278, + "step": 29530 + }, + { + "epoch": 1.29, + "learning_rate": 1.7001208172506487e-05, + "loss": 1.7863, + "step": 29540 + }, + { + "epoch": 1.29, + "learning_rate": 1.6982414045785624e-05, + "loss": 1.8038, + "step": 29550 + }, + { + "epoch": 1.29, + "learning_rate": 1.6963626210995608e-05, + "loss": 1.861, + "step": 29560 + }, + { + "epoch": 1.29, + "learning_rate": 1.6944844677217378e-05, + "loss": 1.8666, + "step": 29570 + }, + { + "epoch": 1.29, + "learning_rate": 1.6926069453528822e-05, + "loss": 1.8175, + "step": 29580 + }, + { + "epoch": 1.3, + "learning_rate": 1.690730054900478e-05, + "loss": 1.8434, + "step": 29590 + }, + { + "epoch": 1.3, + "learning_rate": 1.6888537972717008e-05, + "loss": 1.8518, + "step": 29600 + }, + { + "epoch": 1.3, + "eval_loss": 1.829789400100708, + "eval_runtime": 11.5886, + "eval_samples_per_second": 353.45, + "eval_steps_per_second": 22.091, + "step": 29600 + }, + { + "epoch": 1.3, + "learning_rate": 1.6869781733734234e-05, + "loss": 1.8064, + "step": 29610 + }, + { + "epoch": 1.3, + "learning_rate": 1.6851031841122114e-05, + "loss": 1.8443, + "step": 29620 + }, + { + "epoch": 1.3, + "learning_rate": 1.6832288303943254e-05, + "loss": 1.8501, + "step": 29630 + }, + { + "epoch": 1.3, + "learning_rate": 1.6813551131257154e-05, + "loss": 1.8504, + "step": 29640 + }, + { + "epoch": 1.3, + "learning_rate": 1.679482033212025e-05, + "loss": 1.8269, + "step": 29650 + }, + { + "epoch": 1.3, + "learning_rate": 1.6776095915585905e-05, + "loss": 1.7929, + "step": 29660 + }, + { + "epoch": 1.3, + "learning_rate": 1.6757377890704398e-05, + "loss": 1.8159, + "step": 29670 + }, + { + "epoch": 1.3, + "learning_rate": 1.6738666266522928e-05, + "loss": 1.8405, + "step": 29680 + }, + { + "epoch": 1.3, + "learning_rate": 1.671996105208556e-05, + "loss": 1.7826, + "step": 29690 + }, + { + "epoch": 1.3, + "learning_rate": 1.6701262256433304e-05, + "loss": 1.8001, + "step": 29700 + }, + { + "epoch": 1.3, + "eval_loss": 1.8297191858291626, + "eval_runtime": 11.5768, + "eval_samples_per_second": 353.81, + "eval_steps_per_second": 22.113, + "step": 29700 + }, + { + "epoch": 1.3, + "learning_rate": 1.6682569888604047e-05, + "loss": 1.7877, + "step": 29710 + }, + { + "epoch": 1.3, + "learning_rate": 1.666388395763259e-05, + "loss": 1.8405, + "step": 29720 + }, + { + "epoch": 1.3, + "learning_rate": 1.6645204472550576e-05, + "loss": 1.8083, + "step": 29730 + }, + { + "epoch": 1.3, + "learning_rate": 1.6626531442386598e-05, + "loss": 1.7797, + "step": 29740 + }, + { + "epoch": 1.3, + "learning_rate": 1.6607864876166048e-05, + "loss": 1.8313, + "step": 29750 + }, + { + "epoch": 1.3, + "learning_rate": 1.6589204782911287e-05, + "loss": 1.798, + "step": 29760 + }, + { + "epoch": 1.3, + "learning_rate": 1.6570551171641475e-05, + "loss": 1.8381, + "step": 29770 + }, + { + "epoch": 1.3, + "learning_rate": 1.6551904051372674e-05, + "loss": 1.8251, + "step": 29780 + }, + { + "epoch": 1.3, + "learning_rate": 1.6533263431117786e-05, + "loss": 1.7904, + "step": 29790 + }, + { + "epoch": 1.3, + "learning_rate": 1.6514629319886592e-05, + "loss": 1.8533, + "step": 29800 + }, + { + "epoch": 1.3, + "eval_loss": 1.8297039270401, + "eval_runtime": 11.6268, + "eval_samples_per_second": 352.288, + "eval_steps_per_second": 22.018, + "step": 29800 + }, + { + "epoch": 1.3, + "learning_rate": 1.6496001726685715e-05, + "loss": 1.8153, + "step": 29810 + }, + { + "epoch": 1.31, + "learning_rate": 1.647738066051865e-05, + "loss": 1.866, + "step": 29820 + }, + { + "epoch": 1.31, + "learning_rate": 1.6458766130385692e-05, + "loss": 1.8188, + "step": 29830 + }, + { + "epoch": 1.31, + "learning_rate": 1.6440158145284017e-05, + "loss": 1.8115, + "step": 29840 + }, + { + "epoch": 1.31, + "learning_rate": 1.642155671420762e-05, + "loss": 1.8042, + "step": 29850 + }, + { + "epoch": 1.31, + "learning_rate": 1.6402961846147346e-05, + "loss": 1.8007, + "step": 29860 + }, + { + "epoch": 1.31, + "learning_rate": 1.638437355009084e-05, + "loss": 1.7936, + "step": 29870 + }, + { + "epoch": 1.31, + "learning_rate": 1.6365791835022566e-05, + "loss": 1.7836, + "step": 29880 + }, + { + "epoch": 1.31, + "learning_rate": 1.6347216709923872e-05, + "loss": 1.7985, + "step": 29890 + }, + { + "epoch": 1.31, + "learning_rate": 1.6328648183772837e-05, + "loss": 1.7926, + "step": 29900 + }, + { + "epoch": 1.31, + "eval_loss": 1.82939612865448, + "eval_runtime": 11.6247, + "eval_samples_per_second": 352.353, + "eval_steps_per_second": 22.022, + "step": 29900 + }, + { + "epoch": 1.31, + "learning_rate": 1.6310086265544412e-05, + "loss": 1.8354, + "step": 29910 + }, + { + "epoch": 1.31, + "learning_rate": 1.6291530964210303e-05, + "loss": 1.8529, + "step": 29920 + }, + { + "epoch": 1.31, + "learning_rate": 1.6272982288739063e-05, + "loss": 1.7827, + "step": 29930 + }, + { + "epoch": 1.31, + "learning_rate": 1.6254440248096022e-05, + "loss": 1.7849, + "step": 29940 + }, + { + "epoch": 1.31, + "learning_rate": 1.623590485124331e-05, + "loss": 1.836, + "step": 29950 + }, + { + "epoch": 1.31, + "learning_rate": 1.621737610713983e-05, + "loss": 1.7903, + "step": 29960 + }, + { + "epoch": 1.31, + "learning_rate": 1.6198854024741286e-05, + "loss": 1.7863, + "step": 29970 + }, + { + "epoch": 1.31, + "learning_rate": 1.6180338613000155e-05, + "loss": 1.7768, + "step": 29980 + }, + { + "epoch": 1.31, + "learning_rate": 1.6161829880865707e-05, + "loss": 1.804, + "step": 29990 + }, + { + "epoch": 1.31, + "learning_rate": 1.6143327837283946e-05, + "loss": 1.7666, + "step": 30000 + }, + { + "epoch": 1.31, + "eval_loss": 1.8295360803604126, + "eval_runtime": 11.8624, + "eval_samples_per_second": 345.294, + "eval_steps_per_second": 21.581, + "step": 30000 + }, + { + "epoch": 1.31, + "learning_rate": 1.6124832491197682e-05, + "loss": 1.8302, + "step": 30010 + }, + { + "epoch": 1.31, + "learning_rate": 1.610634385154644e-05, + "loss": 1.7949, + "step": 30020 + }, + { + "epoch": 1.31, + "learning_rate": 1.608786192726658e-05, + "loss": 1.8113, + "step": 30030 + }, + { + "epoch": 1.32, + "learning_rate": 1.606938672729114e-05, + "loss": 1.8027, + "step": 30040 + }, + { + "epoch": 1.32, + "learning_rate": 1.6050918260549955e-05, + "loss": 1.7857, + "step": 30050 + }, + { + "epoch": 1.32, + "learning_rate": 1.6032456535969576e-05, + "loss": 1.8274, + "step": 30060 + }, + { + "epoch": 1.32, + "learning_rate": 1.6014001562473305e-05, + "loss": 1.8056, + "step": 30070 + }, + { + "epoch": 1.32, + "learning_rate": 1.5995553348981197e-05, + "loss": 1.7804, + "step": 30080 + }, + { + "epoch": 1.32, + "learning_rate": 1.5977111904410034e-05, + "loss": 1.8346, + "step": 30090 + }, + { + "epoch": 1.32, + "learning_rate": 1.5958677237673295e-05, + "loss": 1.8124, + "step": 30100 + }, + { + "epoch": 1.32, + "eval_loss": 1.8036388158798218, + "eval_runtime": 12.2283, + "eval_samples_per_second": 334.96, + "eval_steps_per_second": 20.935, + "step": 30100 + }, + { + "epoch": 1.32, + "learning_rate": 1.594024935768122e-05, + "loss": 1.7914, + "step": 30110 + }, + { + "epoch": 1.32, + "learning_rate": 1.5921828273340768e-05, + "loss": 1.817, + "step": 30120 + }, + { + "epoch": 1.32, + "learning_rate": 1.590341399355558e-05, + "loss": 1.8477, + "step": 30130 + }, + { + "epoch": 1.32, + "learning_rate": 1.588500652722605e-05, + "loss": 1.8133, + "step": 30140 + }, + { + "epoch": 1.32, + "learning_rate": 1.586660588324923e-05, + "loss": 1.8079, + "step": 30150 + }, + { + "epoch": 1.32, + "learning_rate": 1.5848212070518923e-05, + "loss": 1.8397, + "step": 30160 + }, + { + "epoch": 1.32, + "learning_rate": 1.5829825097925605e-05, + "loss": 1.8179, + "step": 30170 + }, + { + "epoch": 1.32, + "learning_rate": 1.5811444974356466e-05, + "loss": 1.8638, + "step": 30180 + }, + { + "epoch": 1.32, + "learning_rate": 1.579307170869534e-05, + "loss": 1.8601, + "step": 30190 + }, + { + "epoch": 1.32, + "learning_rate": 1.5774705309822796e-05, + "loss": 1.8226, + "step": 30200 + }, + { + "epoch": 1.32, + "eval_loss": 1.8035895824432373, + "eval_runtime": 13.4266, + "eval_samples_per_second": 305.066, + "eval_steps_per_second": 19.067, + "step": 30200 + }, + { + "epoch": 1.32, + "learning_rate": 1.575634578661606e-05, + "loss": 1.8057, + "step": 30210 + }, + { + "epoch": 1.32, + "learning_rate": 1.573799314794905e-05, + "loss": 1.8171, + "step": 30220 + }, + { + "epoch": 1.32, + "learning_rate": 1.571964740269233e-05, + "loss": 1.8724, + "step": 30230 + }, + { + "epoch": 1.32, + "learning_rate": 1.570130855971315e-05, + "loss": 1.8347, + "step": 30240 + }, + { + "epoch": 1.32, + "learning_rate": 1.5682976627875423e-05, + "loss": 1.8281, + "step": 30250 + }, + { + "epoch": 1.32, + "learning_rate": 1.566465161603974e-05, + "loss": 1.8488, + "step": 30260 + }, + { + "epoch": 1.33, + "learning_rate": 1.56463335330633e-05, + "loss": 1.8638, + "step": 30270 + }, + { + "epoch": 1.33, + "learning_rate": 1.5628022387799995e-05, + "loss": 1.8497, + "step": 30280 + }, + { + "epoch": 1.33, + "learning_rate": 1.5609718189100322e-05, + "loss": 1.796, + "step": 30290 + }, + { + "epoch": 1.33, + "learning_rate": 1.5591420945811503e-05, + "loss": 1.8429, + "step": 30300 + }, + { + "epoch": 1.33, + "eval_loss": 1.8034627437591553, + "eval_runtime": 11.7516, + "eval_samples_per_second": 348.548, + "eval_steps_per_second": 21.784, + "step": 30300 + }, + { + "epoch": 1.33, + "learning_rate": 1.5573130666777293e-05, + "loss": 1.8275, + "step": 30310 + }, + { + "epoch": 1.33, + "learning_rate": 1.5554847360838164e-05, + "loss": 1.8428, + "step": 30320 + }, + { + "epoch": 1.33, + "learning_rate": 1.5536571036831148e-05, + "loss": 1.7699, + "step": 30330 + }, + { + "epoch": 1.33, + "learning_rate": 1.5518301703589967e-05, + "loss": 1.7879, + "step": 30340 + }, + { + "epoch": 1.33, + "learning_rate": 1.550003936994494e-05, + "loss": 1.8352, + "step": 30350 + }, + { + "epoch": 1.33, + "learning_rate": 1.5481784044722975e-05, + "loss": 1.8067, + "step": 30360 + }, + { + "epoch": 1.33, + "learning_rate": 1.5463535736747636e-05, + "loss": 1.7945, + "step": 30370 + }, + { + "epoch": 1.33, + "learning_rate": 1.5445294454839047e-05, + "loss": 1.8567, + "step": 30380 + }, + { + "epoch": 1.33, + "learning_rate": 1.5427060207814008e-05, + "loss": 1.8136, + "step": 30390 + }, + { + "epoch": 1.33, + "learning_rate": 1.540883300448584e-05, + "loss": 1.8147, + "step": 30400 + }, + { + "epoch": 1.33, + "eval_loss": 1.803378939628601, + "eval_runtime": 11.9608, + "eval_samples_per_second": 342.451, + "eval_steps_per_second": 21.403, + "step": 30400 + }, + { + "epoch": 1.33, + "learning_rate": 1.5390612853664515e-05, + "loss": 1.7916, + "step": 30410 + }, + { + "epoch": 1.33, + "learning_rate": 1.537239976415656e-05, + "loss": 1.7759, + "step": 30420 + }, + { + "epoch": 1.33, + "learning_rate": 1.5354193744765113e-05, + "loss": 1.8284, + "step": 30430 + }, + { + "epoch": 1.33, + "learning_rate": 1.533599480428988e-05, + "loss": 1.7931, + "step": 30440 + }, + { + "epoch": 1.33, + "learning_rate": 1.5317802951527177e-05, + "loss": 1.8196, + "step": 30450 + }, + { + "epoch": 1.33, + "learning_rate": 1.5299618195269837e-05, + "loss": 1.8148, + "step": 30460 + }, + { + "epoch": 1.33, + "learning_rate": 1.5281440544307304e-05, + "loss": 1.835, + "step": 30470 + }, + { + "epoch": 1.33, + "learning_rate": 1.526327000742559e-05, + "loss": 1.7835, + "step": 30480 + }, + { + "epoch": 1.33, + "learning_rate": 1.5245106593407258e-05, + "loss": 1.7868, + "step": 30490 + }, + { + "epoch": 1.34, + "learning_rate": 1.522695031103141e-05, + "loss": 1.8516, + "step": 30500 + }, + { + "epoch": 1.34, + "eval_loss": 1.8033866882324219, + "eval_runtime": 11.923, + "eval_samples_per_second": 343.536, + "eval_steps_per_second": 21.471, + "step": 30500 + }, + { + "epoch": 1.34, + "learning_rate": 1.5208801169073735e-05, + "loss": 1.8361, + "step": 30510 + }, + { + "epoch": 1.34, + "learning_rate": 1.5190659176306442e-05, + "loss": 1.8378, + "step": 30520 + }, + { + "epoch": 1.34, + "learning_rate": 1.5172524341498316e-05, + "loss": 1.8046, + "step": 30530 + }, + { + "epoch": 1.34, + "learning_rate": 1.5154396673414641e-05, + "loss": 1.8332, + "step": 30540 + }, + { + "epoch": 1.34, + "learning_rate": 1.513627618081728e-05, + "loss": 1.8318, + "step": 30550 + }, + { + "epoch": 1.34, + "learning_rate": 1.5118162872464576e-05, + "loss": 1.7733, + "step": 30560 + }, + { + "epoch": 1.34, + "learning_rate": 1.5100056757111473e-05, + "loss": 1.8487, + "step": 30570 + }, + { + "epoch": 1.34, + "learning_rate": 1.5081957843509373e-05, + "loss": 1.8064, + "step": 30580 + }, + { + "epoch": 1.34, + "learning_rate": 1.5063866140406211e-05, + "loss": 1.8432, + "step": 30590 + }, + { + "epoch": 1.34, + "learning_rate": 1.5045781656546451e-05, + "loss": 1.823, + "step": 30600 + }, + { + "epoch": 1.34, + "eval_loss": 1.8032710552215576, + "eval_runtime": 12.014, + "eval_samples_per_second": 340.936, + "eval_steps_per_second": 21.309, + "step": 30600 + }, + { + "epoch": 1.34, + "learning_rate": 1.502770440067107e-05, + "loss": 1.7994, + "step": 30610 + }, + { + "epoch": 1.34, + "learning_rate": 1.5009634381517554e-05, + "loss": 1.871, + "step": 30620 + }, + { + "epoch": 1.34, + "learning_rate": 1.4991571607819855e-05, + "loss": 1.7873, + "step": 30630 + }, + { + "epoch": 1.34, + "learning_rate": 1.4973516088308472e-05, + "loss": 1.8297, + "step": 30640 + }, + { + "epoch": 1.34, + "learning_rate": 1.4955467831710347e-05, + "loss": 1.803, + "step": 30650 + }, + { + "epoch": 1.34, + "learning_rate": 1.4937426846748978e-05, + "loss": 1.7824, + "step": 30660 + }, + { + "epoch": 1.34, + "learning_rate": 1.4919393142144276e-05, + "loss": 1.8144, + "step": 30670 + }, + { + "epoch": 1.34, + "learning_rate": 1.4901366726612695e-05, + "loss": 1.8307, + "step": 30680 + }, + { + "epoch": 1.34, + "learning_rate": 1.488334760886711e-05, + "loss": 1.8081, + "step": 30690 + }, + { + "epoch": 1.34, + "learning_rate": 1.4865335797616908e-05, + "loss": 1.8277, + "step": 30700 + }, + { + "epoch": 1.34, + "eval_loss": 1.8033111095428467, + "eval_runtime": 11.7975, + "eval_samples_per_second": 347.192, + "eval_steps_per_second": 21.699, + "step": 30700 + }, + { + "epoch": 1.34, + "learning_rate": 1.4847331301567937e-05, + "loss": 1.8293, + "step": 30710 + }, + { + "epoch": 1.34, + "learning_rate": 1.4829334129422513e-05, + "loss": 1.8478, + "step": 30720 + }, + { + "epoch": 1.35, + "learning_rate": 1.4811344289879381e-05, + "loss": 1.8669, + "step": 30730 + }, + { + "epoch": 1.35, + "learning_rate": 1.4793361791633779e-05, + "loss": 1.8202, + "step": 30740 + }, + { + "epoch": 1.35, + "learning_rate": 1.477538664337738e-05, + "loss": 1.8127, + "step": 30750 + }, + { + "epoch": 1.35, + "learning_rate": 1.475741885379832e-05, + "loss": 1.8181, + "step": 30760 + }, + { + "epoch": 1.35, + "learning_rate": 1.4739458431581146e-05, + "loss": 1.8136, + "step": 30770 + }, + { + "epoch": 1.35, + "learning_rate": 1.4721505385406865e-05, + "loss": 1.8594, + "step": 30780 + }, + { + "epoch": 1.35, + "learning_rate": 1.470355972395293e-05, + "loss": 1.7994, + "step": 30790 + }, + { + "epoch": 1.35, + "learning_rate": 1.4685621455893215e-05, + "loss": 1.7717, + "step": 30800 + }, + { + "epoch": 1.35, + "eval_loss": 1.8033769130706787, + "eval_runtime": 12.8899, + "eval_samples_per_second": 317.768, + "eval_steps_per_second": 19.861, + "step": 30800 + }, + { + "epoch": 1.35, + "learning_rate": 1.4667690589897995e-05, + "loss": 1.8188, + "step": 30810 + }, + { + "epoch": 1.35, + "learning_rate": 1.4649767134634016e-05, + "loss": 1.8189, + "step": 30820 + }, + { + "epoch": 1.35, + "learning_rate": 1.463185109876439e-05, + "loss": 1.8468, + "step": 30830 + }, + { + "epoch": 1.35, + "learning_rate": 1.4613942490948683e-05, + "loss": 1.8815, + "step": 30840 + }, + { + "epoch": 1.35, + "learning_rate": 1.4596041319842866e-05, + "loss": 1.8377, + "step": 30850 + }, + { + "epoch": 1.35, + "learning_rate": 1.4578147594099282e-05, + "loss": 1.8201, + "step": 30860 + }, + { + "epoch": 1.35, + "learning_rate": 1.4560261322366711e-05, + "loss": 1.8229, + "step": 30870 + }, + { + "epoch": 1.35, + "learning_rate": 1.4542382513290323e-05, + "loss": 1.8585, + "step": 30880 + }, + { + "epoch": 1.35, + "learning_rate": 1.4524511175511686e-05, + "loss": 1.7989, + "step": 30890 + }, + { + "epoch": 1.35, + "learning_rate": 1.4506647317668719e-05, + "loss": 1.8391, + "step": 30900 + }, + { + "epoch": 1.35, + "eval_loss": 1.8032344579696655, + "eval_runtime": 12.1463, + "eval_samples_per_second": 337.222, + "eval_steps_per_second": 21.076, + "step": 30900 + }, + { + "epoch": 1.35, + "learning_rate": 1.4488790948395783e-05, + "loss": 1.8361, + "step": 30910 + }, + { + "epoch": 1.35, + "learning_rate": 1.4470942076323553e-05, + "loss": 1.8794, + "step": 30920 + }, + { + "epoch": 1.35, + "learning_rate": 1.4453100710079167e-05, + "loss": 1.8314, + "step": 30930 + }, + { + "epoch": 1.35, + "learning_rate": 1.4435266858286048e-05, + "loss": 1.7951, + "step": 30940 + }, + { + "epoch": 1.35, + "learning_rate": 1.441744052956405e-05, + "loss": 1.8046, + "step": 30950 + }, + { + "epoch": 1.36, + "learning_rate": 1.4399621732529337e-05, + "loss": 1.7914, + "step": 30960 + }, + { + "epoch": 1.36, + "learning_rate": 1.4381810475794482e-05, + "loss": 1.7874, + "step": 30970 + }, + { + "epoch": 1.36, + "learning_rate": 1.4364006767968386e-05, + "loss": 1.7981, + "step": 30980 + }, + { + "epoch": 1.36, + "learning_rate": 1.434621061765632e-05, + "loss": 1.8093, + "step": 30990 + }, + { + "epoch": 1.36, + "learning_rate": 1.432842203345987e-05, + "loss": 1.8229, + "step": 31000 + }, + { + "epoch": 1.36, + "eval_loss": 1.8032114505767822, + "eval_runtime": 12.0468, + "eval_samples_per_second": 340.007, + "eval_steps_per_second": 21.25, + "step": 31000 + }, + { + "epoch": 1.36, + "learning_rate": 1.4310641023976996e-05, + "loss": 1.8338, + "step": 31010 + }, + { + "epoch": 1.36, + "learning_rate": 1.4292867597801983e-05, + "loss": 1.8277, + "step": 31020 + }, + { + "epoch": 1.36, + "learning_rate": 1.427510176352547e-05, + "loss": 1.8275, + "step": 31030 + }, + { + "epoch": 1.36, + "learning_rate": 1.425734352973438e-05, + "loss": 1.7842, + "step": 31040 + }, + { + "epoch": 1.36, + "learning_rate": 1.4239592905012024e-05, + "loss": 1.8522, + "step": 31050 + }, + { + "epoch": 1.36, + "learning_rate": 1.4221849897937976e-05, + "loss": 1.8172, + "step": 31060 + }, + { + "epoch": 1.36, + "learning_rate": 1.4204114517088168e-05, + "loss": 1.7892, + "step": 31070 + }, + { + "epoch": 1.36, + "learning_rate": 1.4186386771034842e-05, + "loss": 1.8359, + "step": 31080 + }, + { + "epoch": 1.36, + "learning_rate": 1.4168666668346526e-05, + "loss": 1.8521, + "step": 31090 + }, + { + "epoch": 1.36, + "learning_rate": 1.4150954217588076e-05, + "loss": 1.7878, + "step": 31100 + }, + { + "epoch": 1.36, + "eval_loss": 1.8032019138336182, + "eval_runtime": 18.0994, + "eval_samples_per_second": 226.306, + "eval_steps_per_second": 14.144, + "step": 31100 + }, + { + "epoch": 1.36, + "learning_rate": 1.4133249427320644e-05, + "loss": 1.8111, + "step": 31110 + }, + { + "epoch": 1.36, + "learning_rate": 1.4115552306101688e-05, + "loss": 1.8599, + "step": 31120 + }, + { + "epoch": 1.36, + "learning_rate": 1.4097862862484926e-05, + "loss": 1.7708, + "step": 31130 + }, + { + "epoch": 1.36, + "learning_rate": 1.4080181105020406e-05, + "loss": 1.79, + "step": 31140 + }, + { + "epoch": 1.36, + "learning_rate": 1.4062507042254434e-05, + "loss": 1.8523, + "step": 31150 + }, + { + "epoch": 1.36, + "learning_rate": 1.4044840682729622e-05, + "loss": 1.7994, + "step": 31160 + }, + { + "epoch": 1.36, + "learning_rate": 1.4027182034984823e-05, + "loss": 1.8007, + "step": 31170 + }, + { + "epoch": 1.36, + "learning_rate": 1.4009531107555202e-05, + "loss": 1.7724, + "step": 31180 + }, + { + "epoch": 1.37, + "learning_rate": 1.3991887908972142e-05, + "loss": 1.7872, + "step": 31190 + }, + { + "epoch": 1.37, + "learning_rate": 1.397425244776336e-05, + "loss": 1.8078, + "step": 31200 + }, + { + "epoch": 1.37, + "eval_loss": 1.80314040184021, + "eval_runtime": 12.2739, + "eval_samples_per_second": 333.717, + "eval_steps_per_second": 20.857, + "step": 31200 + }, + { + "epoch": 1.37, + "learning_rate": 1.3956624732452768e-05, + "loss": 1.815, + "step": 31210 + }, + { + "epoch": 1.37, + "learning_rate": 1.3939004771560581e-05, + "loss": 1.8561, + "step": 31220 + }, + { + "epoch": 1.37, + "learning_rate": 1.392139257360322e-05, + "loss": 1.7922, + "step": 31230 + }, + { + "epoch": 1.37, + "learning_rate": 1.3903788147093393e-05, + "loss": 1.7885, + "step": 31240 + }, + { + "epoch": 1.37, + "learning_rate": 1.3886191500540042e-05, + "loss": 1.7975, + "step": 31250 + }, + { + "epoch": 1.37, + "learning_rate": 1.386860264244835e-05, + "loss": 1.8109, + "step": 31260 + }, + { + "epoch": 1.37, + "learning_rate": 1.3851021581319709e-05, + "loss": 1.7984, + "step": 31270 + }, + { + "epoch": 1.37, + "learning_rate": 1.3833448325651776e-05, + "loss": 1.8225, + "step": 31280 + }, + { + "epoch": 1.37, + "learning_rate": 1.3815882883938435e-05, + "loss": 1.7602, + "step": 31290 + }, + { + "epoch": 1.37, + "learning_rate": 1.3798325264669757e-05, + "loss": 1.786, + "step": 31300 + }, + { + "epoch": 1.37, + "eval_loss": 1.802952766418457, + "eval_runtime": 15.4326, + "eval_samples_per_second": 265.413, + "eval_steps_per_second": 16.588, + "step": 31300 + }, + { + "epoch": 1.37, + "learning_rate": 1.3780775476332083e-05, + "loss": 1.8364, + "step": 31310 + }, + { + "epoch": 1.37, + "learning_rate": 1.3763233527407915e-05, + "loss": 1.8026, + "step": 31320 + }, + { + "epoch": 1.37, + "learning_rate": 1.3745699426376008e-05, + "loss": 1.803, + "step": 31330 + }, + { + "epoch": 1.37, + "learning_rate": 1.3728173181711306e-05, + "loss": 1.808, + "step": 31340 + }, + { + "epoch": 1.37, + "learning_rate": 1.3710654801884973e-05, + "loss": 1.7936, + "step": 31350 + }, + { + "epoch": 1.37, + "learning_rate": 1.3693144295364332e-05, + "loss": 1.8277, + "step": 31360 + }, + { + "epoch": 1.37, + "learning_rate": 1.3675641670612937e-05, + "loss": 1.802, + "step": 31370 + }, + { + "epoch": 1.37, + "learning_rate": 1.3658146936090526e-05, + "loss": 1.8276, + "step": 31380 + }, + { + "epoch": 1.37, + "learning_rate": 1.3640660100253026e-05, + "loss": 1.7541, + "step": 31390 + }, + { + "epoch": 1.37, + "learning_rate": 1.3623181171552512e-05, + "loss": 1.8347, + "step": 31400 + }, + { + "epoch": 1.37, + "eval_loss": 1.8029532432556152, + "eval_runtime": 22.8254, + "eval_samples_per_second": 179.449, + "eval_steps_per_second": 11.216, + "step": 31400 + }, + { + "epoch": 1.37, + "learning_rate": 1.3605710158437284e-05, + "loss": 1.8161, + "step": 31410 + }, + { + "epoch": 1.38, + "learning_rate": 1.3588247069351793e-05, + "loss": 1.8135, + "step": 31420 + }, + { + "epoch": 1.38, + "learning_rate": 1.357079191273667e-05, + "loss": 1.8231, + "step": 31430 + }, + { + "epoch": 1.38, + "learning_rate": 1.3553344697028687e-05, + "loss": 1.8008, + "step": 31440 + }, + { + "epoch": 1.38, + "learning_rate": 1.353590543066082e-05, + "loss": 1.8755, + "step": 31450 + }, + { + "epoch": 1.38, + "learning_rate": 1.3518474122062139e-05, + "loss": 1.7891, + "step": 31460 + }, + { + "epoch": 1.38, + "learning_rate": 1.3501050779657955e-05, + "loss": 1.7602, + "step": 31470 + }, + { + "epoch": 1.38, + "learning_rate": 1.3483635411869648e-05, + "loss": 1.7987, + "step": 31480 + }, + { + "epoch": 1.38, + "learning_rate": 1.34662280271148e-05, + "loss": 1.8161, + "step": 31490 + }, + { + "epoch": 1.38, + "learning_rate": 1.3448828633807086e-05, + "loss": 1.7987, + "step": 31500 + }, + { + "epoch": 1.38, + "eval_loss": 1.8029508590698242, + "eval_runtime": 13.1282, + "eval_samples_per_second": 311.999, + "eval_steps_per_second": 19.5, + "step": 31500 + }, + { + "epoch": 1.38, + "learning_rate": 1.343143724035635e-05, + "loss": 1.8132, + "step": 31510 + }, + { + "epoch": 1.38, + "learning_rate": 1.3414053855168574e-05, + "loss": 1.7628, + "step": 31520 + }, + { + "epoch": 1.38, + "learning_rate": 1.3396678486645861e-05, + "loss": 1.8087, + "step": 31530 + }, + { + "epoch": 1.38, + "learning_rate": 1.3379311143186428e-05, + "loss": 1.8384, + "step": 31540 + }, + { + "epoch": 1.38, + "learning_rate": 1.3361951833184595e-05, + "loss": 1.8143, + "step": 31550 + }, + { + "epoch": 1.38, + "learning_rate": 1.3344600565030876e-05, + "loss": 1.784, + "step": 31560 + }, + { + "epoch": 1.38, + "learning_rate": 1.3327257347111809e-05, + "loss": 1.8613, + "step": 31570 + }, + { + "epoch": 1.38, + "learning_rate": 1.3309922187810103e-05, + "loss": 1.817, + "step": 31580 + }, + { + "epoch": 1.38, + "learning_rate": 1.3292595095504527e-05, + "loss": 1.8067, + "step": 31590 + }, + { + "epoch": 1.38, + "learning_rate": 1.3275276078569982e-05, + "loss": 1.8025, + "step": 31600 + }, + { + "epoch": 1.38, + "eval_loss": 1.8028916120529175, + "eval_runtime": 12.1557, + "eval_samples_per_second": 336.961, + "eval_steps_per_second": 21.06, + "step": 31600 + }, + { + "epoch": 1.38, + "learning_rate": 1.3257965145377457e-05, + "loss": 1.8405, + "step": 31610 + }, + { + "epoch": 1.38, + "learning_rate": 1.3240662304294046e-05, + "loss": 1.8196, + "step": 31620 + }, + { + "epoch": 1.38, + "learning_rate": 1.3223367563682894e-05, + "loss": 1.8064, + "step": 31630 + }, + { + "epoch": 1.39, + "learning_rate": 1.3206080931903264e-05, + "loss": 1.8144, + "step": 31640 + }, + { + "epoch": 1.39, + "learning_rate": 1.3188802417310497e-05, + "loss": 1.7973, + "step": 31650 + }, + { + "epoch": 1.39, + "learning_rate": 1.3171532028256013e-05, + "loss": 1.8425, + "step": 31660 + }, + { + "epoch": 1.39, + "learning_rate": 1.3154269773087273e-05, + "loss": 1.7956, + "step": 31670 + }, + { + "epoch": 1.39, + "learning_rate": 1.313701566014784e-05, + "loss": 1.8636, + "step": 31680 + }, + { + "epoch": 1.39, + "learning_rate": 1.3119769697777336e-05, + "loss": 1.8104, + "step": 31690 + }, + { + "epoch": 1.39, + "learning_rate": 1.3102531894311445e-05, + "loss": 1.8135, + "step": 31700 + }, + { + "epoch": 1.39, + "eval_loss": 1.8029296398162842, + "eval_runtime": 13.8361, + "eval_samples_per_second": 296.037, + "eval_steps_per_second": 18.502, + "step": 31700 + }, + { + "epoch": 1.39, + "learning_rate": 1.3085302258081881e-05, + "loss": 1.8348, + "step": 31710 + }, + { + "epoch": 1.39, + "learning_rate": 1.3068080797416454e-05, + "loss": 1.828, + "step": 31720 + }, + { + "epoch": 1.39, + "learning_rate": 1.3050867520638964e-05, + "loss": 1.8008, + "step": 31730 + }, + { + "epoch": 1.39, + "learning_rate": 1.3033662436069339e-05, + "loss": 1.8172, + "step": 31740 + }, + { + "epoch": 1.39, + "learning_rate": 1.3016465552023458e-05, + "loss": 1.839, + "step": 31750 + }, + { + "epoch": 1.39, + "learning_rate": 1.2999276876813308e-05, + "loss": 1.8226, + "step": 31760 + }, + { + "epoch": 1.39, + "learning_rate": 1.298209641874685e-05, + "loss": 1.8308, + "step": 31770 + }, + { + "epoch": 1.39, + "learning_rate": 1.2964924186128115e-05, + "loss": 1.8015, + "step": 31780 + }, + { + "epoch": 1.39, + "learning_rate": 1.2947760187257149e-05, + "loss": 1.792, + "step": 31790 + }, + { + "epoch": 1.39, + "learning_rate": 1.2930604430430003e-05, + "loss": 1.8778, + "step": 31800 + }, + { + "epoch": 1.39, + "eval_loss": 1.8028862476348877, + "eval_runtime": 12.2991, + "eval_samples_per_second": 333.031, + "eval_steps_per_second": 20.814, + "step": 31800 + }, + { + "epoch": 1.39, + "learning_rate": 1.2913456923938766e-05, + "loss": 1.8518, + "step": 31810 + }, + { + "epoch": 1.39, + "learning_rate": 1.2896317676071497e-05, + "loss": 1.8393, + "step": 31820 + }, + { + "epoch": 1.39, + "learning_rate": 1.2879186695112344e-05, + "loss": 1.806, + "step": 31830 + }, + { + "epoch": 1.39, + "learning_rate": 1.2862063989341374e-05, + "loss": 1.8155, + "step": 31840 + }, + { + "epoch": 1.39, + "learning_rate": 1.2844949567034711e-05, + "loss": 1.8201, + "step": 31850 + }, + { + "epoch": 1.39, + "learning_rate": 1.282784343646443e-05, + "loss": 1.8248, + "step": 31860 + }, + { + "epoch": 1.4, + "learning_rate": 1.2810745605898638e-05, + "loss": 1.7913, + "step": 31870 + }, + { + "epoch": 1.4, + "learning_rate": 1.279365608360141e-05, + "loss": 1.789, + "step": 31880 + }, + { + "epoch": 1.4, + "learning_rate": 1.2776574877832827e-05, + "loss": 1.7382, + "step": 31890 + }, + { + "epoch": 1.4, + "learning_rate": 1.2759501996848903e-05, + "loss": 1.815, + "step": 31900 + }, + { + "epoch": 1.4, + "eval_loss": 1.8028854131698608, + "eval_runtime": 11.9394, + "eval_samples_per_second": 343.066, + "eval_steps_per_second": 21.442, + "step": 31900 + }, + { + "epoch": 1.4, + "learning_rate": 1.2742437448901678e-05, + "loss": 1.7857, + "step": 31910 + }, + { + "epoch": 1.4, + "learning_rate": 1.2725381242239139e-05, + "loss": 1.791, + "step": 31920 + }, + { + "epoch": 1.4, + "learning_rate": 1.2708333385105263e-05, + "loss": 1.8129, + "step": 31930 + }, + { + "epoch": 1.4, + "learning_rate": 1.2691293885739948e-05, + "loss": 1.8506, + "step": 31940 + }, + { + "epoch": 1.4, + "learning_rate": 1.2674262752379095e-05, + "loss": 1.8381, + "step": 31950 + }, + { + "epoch": 1.4, + "learning_rate": 1.2657239993254544e-05, + "loss": 1.7665, + "step": 31960 + }, + { + "epoch": 1.4, + "learning_rate": 1.2640225616594102e-05, + "loss": 1.8019, + "step": 31970 + }, + { + "epoch": 1.4, + "learning_rate": 1.262321963062149e-05, + "loss": 1.823, + "step": 31980 + }, + { + "epoch": 1.4, + "learning_rate": 1.2606222043556413e-05, + "loss": 1.8168, + "step": 31990 + }, + { + "epoch": 1.4, + "learning_rate": 1.2589232863614483e-05, + "loss": 1.8158, + "step": 32000 + }, + { + "epoch": 1.4, + "eval_loss": 1.802783727645874, + "eval_runtime": 11.8236, + "eval_samples_per_second": 346.426, + "eval_steps_per_second": 21.652, + "step": 32000 + }, + { + "epoch": 1.4, + "learning_rate": 1.2572252099007274e-05, + "loss": 1.8244, + "step": 32010 + }, + { + "epoch": 1.4, + "learning_rate": 1.2555279757942292e-05, + "loss": 1.8271, + "step": 32020 + }, + { + "epoch": 1.4, + "learning_rate": 1.2538315848622944e-05, + "loss": 1.8189, + "step": 32030 + }, + { + "epoch": 1.4, + "learning_rate": 1.2521360379248586e-05, + "loss": 1.805, + "step": 32040 + }, + { + "epoch": 1.4, + "learning_rate": 1.2504413358014493e-05, + "loss": 1.798, + "step": 32050 + }, + { + "epoch": 1.4, + "learning_rate": 1.248747479311186e-05, + "loss": 1.8368, + "step": 32060 + }, + { + "epoch": 1.4, + "learning_rate": 1.2470544692727772e-05, + "loss": 1.8545, + "step": 32070 + }, + { + "epoch": 1.4, + "learning_rate": 1.2453623065045249e-05, + "loss": 1.8446, + "step": 32080 + }, + { + "epoch": 1.4, + "learning_rate": 1.2436709918243177e-05, + "loss": 1.8307, + "step": 32090 + }, + { + "epoch": 1.41, + "learning_rate": 1.2419805260496419e-05, + "loss": 1.8157, + "step": 32100 + }, + { + "epoch": 1.41, + "eval_loss": 1.8028050661087036, + "eval_runtime": 12.0893, + "eval_samples_per_second": 338.811, + "eval_steps_per_second": 21.176, + "step": 32100 + }, + { + "epoch": 1.41, + "learning_rate": 1.2402909099975648e-05, + "loss": 1.8266, + "step": 32110 + }, + { + "epoch": 1.41, + "learning_rate": 1.2386021444847493e-05, + "loss": 1.821, + "step": 32120 + }, + { + "epoch": 1.41, + "learning_rate": 1.236914230327442e-05, + "loss": 1.8437, + "step": 32130 + }, + { + "epoch": 1.41, + "learning_rate": 1.2352271683414824e-05, + "loss": 1.7839, + "step": 32140 + }, + { + "epoch": 1.41, + "learning_rate": 1.2335409593422967e-05, + "loss": 1.7678, + "step": 32150 + }, + { + "epoch": 1.41, + "learning_rate": 1.2318556041448987e-05, + "loss": 1.8096, + "step": 32160 + }, + { + "epoch": 1.41, + "learning_rate": 1.2301711035638879e-05, + "loss": 1.8305, + "step": 32170 + }, + { + "epoch": 1.41, + "learning_rate": 1.228487458413453e-05, + "loss": 1.8427, + "step": 32180 + }, + { + "epoch": 1.41, + "learning_rate": 1.2268046695073689e-05, + "loss": 1.8266, + "step": 32190 + }, + { + "epoch": 1.41, + "learning_rate": 1.225122737658997e-05, + "loss": 1.8164, + "step": 32200 + }, + { + "epoch": 1.41, + "eval_loss": 1.802794337272644, + "eval_runtime": 12.9249, + "eval_samples_per_second": 316.908, + "eval_steps_per_second": 19.807, + "step": 32200 + }, + { + "epoch": 1.41, + "learning_rate": 1.2234416636812812e-05, + "loss": 1.7873, + "step": 32210 + }, + { + "epoch": 1.41, + "learning_rate": 1.2217614483867556e-05, + "loss": 1.863, + "step": 32220 + }, + { + "epoch": 1.41, + "learning_rate": 1.220082092587534e-05, + "loss": 1.8111, + "step": 32230 + }, + { + "epoch": 1.41, + "learning_rate": 1.2184035970953213e-05, + "loss": 1.8467, + "step": 32240 + }, + { + "epoch": 1.41, + "learning_rate": 1.216725962721401e-05, + "loss": 1.8348, + "step": 32250 + }, + { + "epoch": 1.41, + "learning_rate": 1.2150491902766414e-05, + "loss": 1.8502, + "step": 32260 + }, + { + "epoch": 1.41, + "learning_rate": 1.2133732805714963e-05, + "loss": 1.7893, + "step": 32270 + }, + { + "epoch": 1.41, + "learning_rate": 1.2116982344160005e-05, + "loss": 1.8066, + "step": 32280 + }, + { + "epoch": 1.41, + "learning_rate": 1.2100240526197742e-05, + "loss": 1.835, + "step": 32290 + }, + { + "epoch": 1.41, + "learning_rate": 1.2083507359920152e-05, + "loss": 1.8376, + "step": 32300 + }, + { + "epoch": 1.41, + "eval_loss": 1.8027287721633911, + "eval_runtime": 11.923, + "eval_samples_per_second": 343.538, + "eval_steps_per_second": 21.471, + "step": 32300 + }, + { + "epoch": 1.41, + "learning_rate": 1.2066782853415066e-05, + "loss": 1.775, + "step": 32310 + }, + { + "epoch": 1.41, + "learning_rate": 1.2050067014766129e-05, + "loss": 1.7925, + "step": 32320 + }, + { + "epoch": 1.42, + "learning_rate": 1.2033359852052793e-05, + "loss": 1.8244, + "step": 32330 + }, + { + "epoch": 1.42, + "learning_rate": 1.2016661373350291e-05, + "loss": 1.7604, + "step": 32340 + }, + { + "epoch": 1.42, + "learning_rate": 1.1999971586729705e-05, + "loss": 1.8296, + "step": 32350 + }, + { + "epoch": 1.42, + "learning_rate": 1.1983290500257852e-05, + "loss": 1.7692, + "step": 32360 + }, + { + "epoch": 1.42, + "learning_rate": 1.1966618121997428e-05, + "loss": 1.8503, + "step": 32370 + }, + { + "epoch": 1.42, + "learning_rate": 1.1949954460006843e-05, + "loss": 1.8422, + "step": 32380 + }, + { + "epoch": 1.42, + "learning_rate": 1.1933299522340345e-05, + "loss": 1.8154, + "step": 32390 + }, + { + "epoch": 1.42, + "learning_rate": 1.1916653317047927e-05, + "loss": 1.8168, + "step": 32400 + }, + { + "epoch": 1.42, + "eval_loss": 1.8027868270874023, + "eval_runtime": 12.029, + "eval_samples_per_second": 340.51, + "eval_steps_per_second": 21.282, + "step": 32400 + }, + { + "epoch": 1.42, + "learning_rate": 1.1900015852175388e-05, + "loss": 1.7937, + "step": 32410 + }, + { + "epoch": 1.42, + "learning_rate": 1.1883387135764296e-05, + "loss": 1.822, + "step": 32420 + }, + { + "epoch": 1.42, + "learning_rate": 1.1866767175851998e-05, + "loss": 1.8388, + "step": 32430 + }, + { + "epoch": 1.42, + "learning_rate": 1.1850155980471576e-05, + "loss": 1.7809, + "step": 32440 + }, + { + "epoch": 1.42, + "learning_rate": 1.1833553557651913e-05, + "loss": 1.797, + "step": 32450 + }, + { + "epoch": 1.42, + "learning_rate": 1.1816959915417634e-05, + "loss": 1.841, + "step": 32460 + }, + { + "epoch": 1.42, + "learning_rate": 1.1800375061789142e-05, + "loss": 1.7972, + "step": 32470 + }, + { + "epoch": 1.42, + "learning_rate": 1.1783799004782554e-05, + "loss": 1.8139, + "step": 32480 + }, + { + "epoch": 1.42, + "learning_rate": 1.1767231752409753e-05, + "loss": 1.8422, + "step": 32490 + }, + { + "epoch": 1.42, + "learning_rate": 1.1750673312678374e-05, + "loss": 1.8647, + "step": 32500 + }, + { + "epoch": 1.42, + "eval_loss": 1.802736759185791, + "eval_runtime": 11.754, + "eval_samples_per_second": 348.478, + "eval_steps_per_second": 21.78, + "step": 32500 + }, + { + "epoch": 1.42, + "learning_rate": 1.173412369359179e-05, + "loss": 1.8132, + "step": 32510 + }, + { + "epoch": 1.42, + "learning_rate": 1.1717582903149114e-05, + "loss": 1.7861, + "step": 32520 + }, + { + "epoch": 1.42, + "learning_rate": 1.1701050949345166e-05, + "loss": 1.8471, + "step": 32530 + }, + { + "epoch": 1.42, + "learning_rate": 1.1684527840170532e-05, + "loss": 1.816, + "step": 32540 + }, + { + "epoch": 1.42, + "learning_rate": 1.1668013583611494e-05, + "loss": 1.8111, + "step": 32550 + }, + { + "epoch": 1.43, + "learning_rate": 1.1651508187650084e-05, + "loss": 1.7778, + "step": 32560 + }, + { + "epoch": 1.43, + "learning_rate": 1.1635011660264008e-05, + "loss": 1.8028, + "step": 32570 + }, + { + "epoch": 1.43, + "learning_rate": 1.161852400942673e-05, + "loss": 1.8269, + "step": 32580 + }, + { + "epoch": 1.43, + "learning_rate": 1.1602045243107399e-05, + "loss": 1.8296, + "step": 32590 + }, + { + "epoch": 1.43, + "learning_rate": 1.1585575369270884e-05, + "loss": 1.8497, + "step": 32600 + }, + { + "epoch": 1.43, + "eval_loss": 1.797649621963501, + "eval_runtime": 12.0427, + "eval_samples_per_second": 340.123, + "eval_steps_per_second": 21.258, + "step": 32600 + }, + { + "epoch": 1.43, + "learning_rate": 1.1569114395877735e-05, + "loss": 1.7884, + "step": 32610 + }, + { + "epoch": 1.43, + "learning_rate": 1.1552662330884225e-05, + "loss": 1.8655, + "step": 32620 + }, + { + "epoch": 1.43, + "learning_rate": 1.1536219182242281e-05, + "loss": 1.8085, + "step": 32630 + }, + { + "epoch": 1.43, + "learning_rate": 1.1519784957899591e-05, + "loss": 1.8425, + "step": 32640 + }, + { + "epoch": 1.43, + "learning_rate": 1.1503359665799452e-05, + "loss": 1.7779, + "step": 32650 + }, + { + "epoch": 1.43, + "learning_rate": 1.1486943313880902e-05, + "loss": 1.8367, + "step": 32660 + }, + { + "epoch": 1.43, + "learning_rate": 1.1470535910078613e-05, + "loss": 1.8409, + "step": 32670 + }, + { + "epoch": 1.43, + "learning_rate": 1.1454137462322965e-05, + "loss": 1.7905, + "step": 32680 + }, + { + "epoch": 1.43, + "learning_rate": 1.1437747978539995e-05, + "loss": 1.8579, + "step": 32690 + }, + { + "epoch": 1.43, + "learning_rate": 1.1421367466651418e-05, + "loss": 1.8186, + "step": 32700 + }, + { + "epoch": 1.43, + "eval_loss": 1.7975997924804688, + "eval_runtime": 12.4174, + "eval_samples_per_second": 329.86, + "eval_steps_per_second": 20.616, + "step": 32700 + }, + { + "epoch": 1.43, + "learning_rate": 1.14049959345746e-05, + "loss": 1.818, + "step": 32710 + }, + { + "epoch": 1.43, + "learning_rate": 1.138863339022255e-05, + "loss": 1.8632, + "step": 32720 + }, + { + "epoch": 1.43, + "learning_rate": 1.1372279841503986e-05, + "loss": 1.7715, + "step": 32730 + }, + { + "epoch": 1.43, + "learning_rate": 1.1355935296323226e-05, + "loss": 1.8207, + "step": 32740 + }, + { + "epoch": 1.43, + "learning_rate": 1.133959976258027e-05, + "loss": 1.825, + "step": 32750 + }, + { + "epoch": 1.43, + "learning_rate": 1.1323273248170733e-05, + "loss": 1.8418, + "step": 32760 + }, + { + "epoch": 1.43, + "learning_rate": 1.1306955760985888e-05, + "loss": 1.8071, + "step": 32770 + }, + { + "epoch": 1.43, + "learning_rate": 1.1290647308912653e-05, + "loss": 1.8258, + "step": 32780 + }, + { + "epoch": 1.44, + "learning_rate": 1.1274347899833573e-05, + "loss": 1.8413, + "step": 32790 + }, + { + "epoch": 1.44, + "learning_rate": 1.12580575416268e-05, + "loss": 1.8404, + "step": 32800 + }, + { + "epoch": 1.44, + "eval_loss": 1.797699213027954, + "eval_runtime": 13.4742, + "eval_samples_per_second": 303.988, + "eval_steps_per_second": 18.999, + "step": 32800 + }, + { + "epoch": 1.44, + "learning_rate": 1.1241776242166136e-05, + "loss": 1.8321, + "step": 32810 + }, + { + "epoch": 1.44, + "learning_rate": 1.1225504009321002e-05, + "loss": 1.8252, + "step": 32820 + }, + { + "epoch": 1.44, + "learning_rate": 1.120924085095644e-05, + "loss": 1.7925, + "step": 32830 + }, + { + "epoch": 1.44, + "learning_rate": 1.1192986774933079e-05, + "loss": 1.7826, + "step": 32840 + }, + { + "epoch": 1.44, + "learning_rate": 1.1176741789107188e-05, + "loss": 1.8035, + "step": 32850 + }, + { + "epoch": 1.44, + "learning_rate": 1.1160505901330634e-05, + "loss": 1.8092, + "step": 32860 + }, + { + "epoch": 1.44, + "learning_rate": 1.114427911945089e-05, + "loss": 1.8353, + "step": 32870 + }, + { + "epoch": 1.44, + "learning_rate": 1.1128061451311007e-05, + "loss": 1.7647, + "step": 32880 + }, + { + "epoch": 1.44, + "learning_rate": 1.1111852904749665e-05, + "loss": 1.8204, + "step": 32890 + }, + { + "epoch": 1.44, + "learning_rate": 1.1095653487601097e-05, + "loss": 1.7997, + "step": 32900 + }, + { + "epoch": 1.44, + "eval_loss": 1.7977631092071533, + "eval_runtime": 11.7853, + "eval_samples_per_second": 347.552, + "eval_steps_per_second": 21.722, + "step": 32900 + }, + { + "epoch": 1.44, + "learning_rate": 1.1079463207695147e-05, + "loss": 1.7835, + "step": 32910 + }, + { + "epoch": 1.44, + "learning_rate": 1.1063282072857246e-05, + "loss": 1.8071, + "step": 32920 + }, + { + "epoch": 1.44, + "learning_rate": 1.1047110090908403e-05, + "loss": 1.8531, + "step": 32930 + }, + { + "epoch": 1.44, + "learning_rate": 1.1030947269665177e-05, + "loss": 1.8684, + "step": 32940 + }, + { + "epoch": 1.44, + "learning_rate": 1.1014793616939731e-05, + "loss": 1.8099, + "step": 32950 + }, + { + "epoch": 1.44, + "learning_rate": 1.0998649140539795e-05, + "loss": 1.8266, + "step": 32960 + }, + { + "epoch": 1.44, + "learning_rate": 1.0982513848268635e-05, + "loss": 1.8116, + "step": 32970 + }, + { + "epoch": 1.44, + "learning_rate": 1.0966387747925117e-05, + "loss": 1.7971, + "step": 32980 + }, + { + "epoch": 1.44, + "learning_rate": 1.0950270847303609e-05, + "loss": 1.7828, + "step": 32990 + }, + { + "epoch": 1.44, + "learning_rate": 1.093416315419411e-05, + "loss": 1.8123, + "step": 33000 + }, + { + "epoch": 1.44, + "eval_loss": 1.7976471185684204, + "eval_runtime": 11.5471, + "eval_samples_per_second": 354.72, + "eval_steps_per_second": 22.17, + "step": 33000 + }, + { + "epoch": 1.45, + "learning_rate": 1.0918064676382096e-05, + "loss": 1.8024, + "step": 33010 + }, + { + "epoch": 1.45, + "learning_rate": 1.090197542164864e-05, + "loss": 1.7934, + "step": 33020 + }, + { + "epoch": 1.45, + "learning_rate": 1.0885895397770312e-05, + "loss": 1.8409, + "step": 33030 + }, + { + "epoch": 1.45, + "learning_rate": 1.086982461251926e-05, + "loss": 1.8419, + "step": 33040 + }, + { + "epoch": 1.45, + "learning_rate": 1.0853763073663145e-05, + "loss": 1.871, + "step": 33050 + }, + { + "epoch": 1.45, + "learning_rate": 1.0837710788965178e-05, + "loss": 1.8298, + "step": 33060 + }, + { + "epoch": 1.45, + "learning_rate": 1.0821667766184062e-05, + "loss": 1.8285, + "step": 33070 + }, + { + "epoch": 1.45, + "learning_rate": 1.0805634013074057e-05, + "loss": 1.7954, + "step": 33080 + }, + { + "epoch": 1.45, + "learning_rate": 1.0789609537384931e-05, + "loss": 1.8158, + "step": 33090 + }, + { + "epoch": 1.45, + "learning_rate": 1.0773594346861972e-05, + "loss": 1.8429, + "step": 33100 + }, + { + "epoch": 1.45, + "eval_loss": 1.797677755355835, + "eval_runtime": 11.8947, + "eval_samples_per_second": 344.354, + "eval_steps_per_second": 21.522, + "step": 33100 + }, + { + "epoch": 1.45, + "learning_rate": 1.0757588449245962e-05, + "loss": 1.804, + "step": 33110 + }, + { + "epoch": 1.45, + "learning_rate": 1.074159185227321e-05, + "loss": 1.8201, + "step": 33120 + }, + { + "epoch": 1.45, + "learning_rate": 1.0725604563675529e-05, + "loss": 1.8097, + "step": 33130 + }, + { + "epoch": 1.45, + "learning_rate": 1.0709626591180235e-05, + "loss": 1.7624, + "step": 33140 + }, + { + "epoch": 1.45, + "learning_rate": 1.0693657942510116e-05, + "loss": 1.8371, + "step": 33150 + }, + { + "epoch": 1.45, + "learning_rate": 1.067769862538349e-05, + "loss": 1.8046, + "step": 33160 + }, + { + "epoch": 1.45, + "learning_rate": 1.066174864751413e-05, + "loss": 1.8491, + "step": 33170 + }, + { + "epoch": 1.45, + "learning_rate": 1.064580801661132e-05, + "loss": 1.8462, + "step": 33180 + }, + { + "epoch": 1.45, + "learning_rate": 1.0629876740379831e-05, + "loss": 1.7958, + "step": 33190 + }, + { + "epoch": 1.45, + "learning_rate": 1.0613954826519874e-05, + "loss": 1.8365, + "step": 33200 + }, + { + "epoch": 1.45, + "eval_loss": 1.7976925373077393, + "eval_runtime": 11.8876, + "eval_samples_per_second": 344.56, + "eval_steps_per_second": 21.535, + "step": 33200 + }, + { + "epoch": 1.45, + "learning_rate": 1.059804228272717e-05, + "loss": 1.8233, + "step": 33210 + }, + { + "epoch": 1.45, + "learning_rate": 1.0582139116692907e-05, + "loss": 1.8124, + "step": 33220 + }, + { + "epoch": 1.45, + "learning_rate": 1.0566245336103738e-05, + "loss": 1.8068, + "step": 33230 + }, + { + "epoch": 1.46, + "learning_rate": 1.055036094864176e-05, + "loss": 1.8066, + "step": 33240 + }, + { + "epoch": 1.46, + "learning_rate": 1.0534485961984566e-05, + "loss": 1.7826, + "step": 33250 + }, + { + "epoch": 1.46, + "learning_rate": 1.0518620383805156e-05, + "loss": 1.8468, + "step": 33260 + }, + { + "epoch": 1.46, + "learning_rate": 1.0502764221772047e-05, + "loss": 1.8527, + "step": 33270 + }, + { + "epoch": 1.46, + "learning_rate": 1.0486917483549145e-05, + "loss": 1.8265, + "step": 33280 + }, + { + "epoch": 1.46, + "learning_rate": 1.0471080176795845e-05, + "loss": 1.7898, + "step": 33290 + }, + { + "epoch": 1.46, + "learning_rate": 1.0455252309166941e-05, + "loss": 1.8109, + "step": 33300 + }, + { + "epoch": 1.46, + "eval_loss": 1.7976725101470947, + "eval_runtime": 11.8849, + "eval_samples_per_second": 344.639, + "eval_steps_per_second": 21.54, + "step": 33300 + }, + { + "epoch": 1.46, + "learning_rate": 1.0439433888312702e-05, + "loss": 1.8551, + "step": 33310 + }, + { + "epoch": 1.46, + "learning_rate": 1.0423624921878813e-05, + "loss": 1.8199, + "step": 33320 + }, + { + "epoch": 1.46, + "learning_rate": 1.0407825417506404e-05, + "loss": 1.8212, + "step": 33330 + }, + { + "epoch": 1.46, + "learning_rate": 1.0392035382832e-05, + "loss": 1.8219, + "step": 33340 + }, + { + "epoch": 1.46, + "learning_rate": 1.0376254825487582e-05, + "loss": 1.75, + "step": 33350 + }, + { + "epoch": 1.46, + "learning_rate": 1.0360483753100536e-05, + "loss": 1.8406, + "step": 33360 + }, + { + "epoch": 1.46, + "learning_rate": 1.0344722173293676e-05, + "loss": 1.8288, + "step": 33370 + }, + { + "epoch": 1.46, + "learning_rate": 1.0328970093685193e-05, + "loss": 1.8457, + "step": 33380 + }, + { + "epoch": 1.46, + "learning_rate": 1.0313227521888739e-05, + "loss": 1.858, + "step": 33390 + }, + { + "epoch": 1.46, + "learning_rate": 1.0297494465513307e-05, + "loss": 1.8263, + "step": 33400 + }, + { + "epoch": 1.46, + "eval_loss": 1.7975435256958008, + "eval_runtime": 11.9333, + "eval_samples_per_second": 343.24, + "eval_steps_per_second": 21.453, + "step": 33400 + }, + { + "epoch": 1.46, + "learning_rate": 1.0281770932163362e-05, + "loss": 1.759, + "step": 33410 + }, + { + "epoch": 1.46, + "learning_rate": 1.0266056929438715e-05, + "loss": 1.8427, + "step": 33420 + }, + { + "epoch": 1.46, + "learning_rate": 1.0250352464934572e-05, + "loss": 1.8069, + "step": 33430 + }, + { + "epoch": 1.46, + "learning_rate": 1.0234657546241556e-05, + "loss": 1.7832, + "step": 33440 + }, + { + "epoch": 1.46, + "learning_rate": 1.0218972180945653e-05, + "loss": 1.8104, + "step": 33450 + }, + { + "epoch": 1.46, + "learning_rate": 1.0203296376628262e-05, + "loss": 1.8281, + "step": 33460 + }, + { + "epoch": 1.47, + "learning_rate": 1.0187630140866115e-05, + "loss": 1.7494, + "step": 33470 + }, + { + "epoch": 1.47, + "learning_rate": 1.0171973481231352e-05, + "loss": 1.8211, + "step": 33480 + }, + { + "epoch": 1.47, + "learning_rate": 1.0156326405291476e-05, + "loss": 1.8315, + "step": 33490 + }, + { + "epoch": 1.47, + "learning_rate": 1.014068892060937e-05, + "loss": 1.7995, + "step": 33500 + }, + { + "epoch": 1.47, + "eval_loss": 1.7975175380706787, + "eval_runtime": 11.6479, + "eval_samples_per_second": 351.65, + "eval_steps_per_second": 21.978, + "step": 33500 + }, + { + "epoch": 1.47, + "learning_rate": 1.0125061034743244e-05, + "loss": 1.7929, + "step": 33510 + }, + { + "epoch": 1.47, + "learning_rate": 1.0109442755246719e-05, + "loss": 1.8212, + "step": 33520 + }, + { + "epoch": 1.47, + "learning_rate": 1.0093834089668715e-05, + "loss": 1.8553, + "step": 33530 + }, + { + "epoch": 1.47, + "learning_rate": 1.0078235045553576e-05, + "loss": 1.7782, + "step": 33540 + }, + { + "epoch": 1.47, + "learning_rate": 1.0062645630440931e-05, + "loss": 1.8372, + "step": 33550 + }, + { + "epoch": 1.47, + "learning_rate": 1.0047065851865797e-05, + "loss": 1.8232, + "step": 33560 + }, + { + "epoch": 1.47, + "learning_rate": 1.0031495717358497e-05, + "loss": 1.8573, + "step": 33570 + }, + { + "epoch": 1.47, + "learning_rate": 1.0015935234444728e-05, + "loss": 1.8615, + "step": 33580 + }, + { + "epoch": 1.47, + "learning_rate": 1.00003844106455e-05, + "loss": 1.7935, + "step": 33590 + }, + { + "epoch": 1.47, + "learning_rate": 9.984843253477175e-06, + "loss": 1.7813, + "step": 33600 + }, + { + "epoch": 1.47, + "eval_loss": 1.7975555658340454, + "eval_runtime": 11.8662, + "eval_samples_per_second": 345.182, + "eval_steps_per_second": 21.574, + "step": 33600 + }, + { + "epoch": 1.47, + "learning_rate": 9.96931177045141e-06, + "loss": 1.8015, + "step": 33610 + }, + { + "epoch": 1.47, + "learning_rate": 9.95378996907521e-06, + "loss": 1.8304, + "step": 33620 + }, + { + "epoch": 1.47, + "learning_rate": 9.938277856850903e-06, + "loss": 1.8519, + "step": 33630 + }, + { + "epoch": 1.47, + "learning_rate": 9.922775441276128e-06, + "loss": 1.7916, + "step": 33640 + }, + { + "epoch": 1.47, + "learning_rate": 9.907282729843818e-06, + "loss": 1.8084, + "step": 33650 + }, + { + "epoch": 1.47, + "learning_rate": 9.891799730042259e-06, + "loss": 1.8415, + "step": 33660 + }, + { + "epoch": 1.47, + "learning_rate": 9.87632644935499e-06, + "loss": 1.8212, + "step": 33670 + }, + { + "epoch": 1.47, + "learning_rate": 9.860862895260891e-06, + "loss": 1.8296, + "step": 33680 + }, + { + "epoch": 1.47, + "learning_rate": 9.845409075234143e-06, + "loss": 1.8163, + "step": 33690 + }, + { + "epoch": 1.48, + "learning_rate": 9.829964996744183e-06, + "loss": 1.7989, + "step": 33700 + }, + { + "epoch": 1.48, + "eval_loss": 1.7975475788116455, + "eval_runtime": 11.8379, + "eval_samples_per_second": 346.008, + "eval_steps_per_second": 21.626, + "step": 33700 + }, + { + "epoch": 1.48, + "learning_rate": 9.814530667255777e-06, + "loss": 1.8075, + "step": 33710 + }, + { + "epoch": 1.48, + "learning_rate": 9.799106094228968e-06, + "loss": 1.8176, + "step": 33720 + }, + { + "epoch": 1.48, + "learning_rate": 9.783691285119089e-06, + "loss": 1.8055, + "step": 33730 + }, + { + "epoch": 1.48, + "learning_rate": 9.768286247376732e-06, + "loss": 1.7935, + "step": 33740 + }, + { + "epoch": 1.48, + "learning_rate": 9.752890988447783e-06, + "loss": 1.7922, + "step": 33750 + }, + { + "epoch": 1.48, + "learning_rate": 9.737505515773408e-06, + "loss": 1.8302, + "step": 33760 + }, + { + "epoch": 1.48, + "learning_rate": 9.722129836790034e-06, + "loss": 1.8262, + "step": 33770 + }, + { + "epoch": 1.48, + "learning_rate": 9.706763958929344e-06, + "loss": 1.8267, + "step": 33780 + }, + { + "epoch": 1.48, + "learning_rate": 9.691407889618314e-06, + "loss": 1.8011, + "step": 33790 + }, + { + "epoch": 1.48, + "learning_rate": 9.67606163627912e-06, + "loss": 1.7894, + "step": 33800 + }, + { + "epoch": 1.48, + "eval_loss": 1.7975398302078247, + "eval_runtime": 11.7714, + "eval_samples_per_second": 347.963, + "eval_steps_per_second": 21.748, + "step": 33800 + }, + { + "epoch": 1.48, + "learning_rate": 9.660725206329281e-06, + "loss": 1.8095, + "step": 33810 + }, + { + "epoch": 1.48, + "learning_rate": 9.645398607181486e-06, + "loss": 1.8024, + "step": 33820 + }, + { + "epoch": 1.48, + "learning_rate": 9.630081846243732e-06, + "loss": 1.857, + "step": 33830 + }, + { + "epoch": 1.48, + "learning_rate": 9.614774930919208e-06, + "loss": 1.8129, + "step": 33840 + }, + { + "epoch": 1.48, + "learning_rate": 9.599477868606384e-06, + "loss": 1.8335, + "step": 33850 + }, + { + "epoch": 1.48, + "learning_rate": 9.584190666698953e-06, + "loss": 1.7774, + "step": 33860 + }, + { + "epoch": 1.48, + "learning_rate": 9.56891333258586e-06, + "loss": 1.7853, + "step": 33870 + }, + { + "epoch": 1.48, + "learning_rate": 9.553645873651238e-06, + "loss": 1.7977, + "step": 33880 + }, + { + "epoch": 1.48, + "learning_rate": 9.538388297274485e-06, + "loss": 1.856, + "step": 33890 + }, + { + "epoch": 1.48, + "learning_rate": 9.523140610830224e-06, + "loss": 1.733, + "step": 33900 + }, + { + "epoch": 1.48, + "eval_loss": 1.7975311279296875, + "eval_runtime": 11.7099, + "eval_samples_per_second": 349.789, + "eval_steps_per_second": 21.862, + "step": 33900 + }, + { + "epoch": 1.48, + "learning_rate": 9.507902821688261e-06, + "loss": 1.8348, + "step": 33910 + }, + { + "epoch": 1.48, + "learning_rate": 9.492674937213659e-06, + "loss": 1.819, + "step": 33920 + }, + { + "epoch": 1.49, + "learning_rate": 9.477456964766666e-06, + "loss": 1.8414, + "step": 33930 + }, + { + "epoch": 1.49, + "learning_rate": 9.46224891170275e-06, + "loss": 1.8223, + "step": 33940 + }, + { + "epoch": 1.49, + "learning_rate": 9.447050785372585e-06, + "loss": 1.8267, + "step": 33950 + }, + { + "epoch": 1.49, + "learning_rate": 9.431862593122065e-06, + "loss": 1.8111, + "step": 33960 + }, + { + "epoch": 1.49, + "learning_rate": 9.416684342292233e-06, + "loss": 1.7722, + "step": 33970 + }, + { + "epoch": 1.49, + "learning_rate": 9.40151604021937e-06, + "loss": 1.8534, + "step": 33980 + }, + { + "epoch": 1.49, + "learning_rate": 9.38635769423494e-06, + "loss": 1.8012, + "step": 33990 + }, + { + "epoch": 1.49, + "learning_rate": 9.371209311665597e-06, + "loss": 1.788, + "step": 34000 + }, + { + "epoch": 1.49, + "eval_loss": 1.7975867986679077, + "eval_runtime": 11.6503, + "eval_samples_per_second": 351.579, + "eval_steps_per_second": 21.974, + "step": 34000 + }, + { + "epoch": 1.49, + "learning_rate": 9.356070899833157e-06, + "loss": 1.7657, + "step": 34010 + }, + { + "epoch": 1.49, + "learning_rate": 9.340942466054636e-06, + "loss": 1.8549, + "step": 34020 + }, + { + "epoch": 1.49, + "learning_rate": 9.32582401764223e-06, + "loss": 1.8214, + "step": 34030 + }, + { + "epoch": 1.49, + "learning_rate": 9.310715561903305e-06, + "loss": 1.8474, + "step": 34040 + }, + { + "epoch": 1.49, + "learning_rate": 9.295617106140383e-06, + "loss": 1.7682, + "step": 34050 + }, + { + "epoch": 1.49, + "learning_rate": 9.28052865765118e-06, + "loss": 1.895, + "step": 34060 + }, + { + "epoch": 1.49, + "learning_rate": 9.265450223728538e-06, + "loss": 1.8382, + "step": 34070 + }, + { + "epoch": 1.49, + "learning_rate": 9.250381811660492e-06, + "loss": 1.8462, + "step": 34080 + }, + { + "epoch": 1.49, + "learning_rate": 9.235323428730223e-06, + "loss": 1.847, + "step": 34090 + }, + { + "epoch": 1.49, + "learning_rate": 9.220275082216066e-06, + "loss": 1.8147, + "step": 34100 + }, + { + "epoch": 1.49, + "eval_loss": 1.7975001335144043, + "eval_runtime": 11.9247, + "eval_samples_per_second": 343.489, + "eval_steps_per_second": 21.468, + "step": 34100 + }, + { + "epoch": 1.49, + "learning_rate": 9.20523677939149e-06, + "loss": 1.8361, + "step": 34110 + }, + { + "epoch": 1.49, + "learning_rate": 9.190208527525124e-06, + "loss": 1.7882, + "step": 34120 + }, + { + "epoch": 1.49, + "learning_rate": 9.17519033388074e-06, + "loss": 1.7995, + "step": 34130 + }, + { + "epoch": 1.49, + "learning_rate": 9.160182205717249e-06, + "loss": 1.8362, + "step": 34140 + }, + { + "epoch": 1.49, + "learning_rate": 9.145184150288683e-06, + "loss": 1.8442, + "step": 34150 + }, + { + "epoch": 1.5, + "learning_rate": 9.130196174844202e-06, + "loss": 1.7843, + "step": 34160 + }, + { + "epoch": 1.5, + "learning_rate": 9.11521828662814e-06, + "loss": 1.8043, + "step": 34170 + }, + { + "epoch": 1.5, + "learning_rate": 9.100250492879893e-06, + "loss": 1.7229, + "step": 34180 + }, + { + "epoch": 1.5, + "learning_rate": 9.085292800834028e-06, + "loss": 1.8089, + "step": 34190 + }, + { + "epoch": 1.5, + "learning_rate": 9.070345217720187e-06, + "loss": 1.8034, + "step": 34200 + }, + { + "epoch": 1.5, + "eval_loss": 1.7975050210952759, + "eval_runtime": 11.8339, + "eval_samples_per_second": 346.124, + "eval_steps_per_second": 21.633, + "step": 34200 + }, + { + "epoch": 1.5, + "learning_rate": 9.055407750763159e-06, + "loss": 1.8414, + "step": 34210 + }, + { + "epoch": 1.5, + "learning_rate": 9.040480407182829e-06, + "loss": 1.833, + "step": 34220 + }, + { + "epoch": 1.5, + "learning_rate": 9.025563194194207e-06, + "loss": 1.8378, + "step": 34230 + }, + { + "epoch": 1.5, + "learning_rate": 9.010656119007366e-06, + "loss": 1.8454, + "step": 34240 + }, + { + "epoch": 1.5, + "learning_rate": 8.995759188827512e-06, + "loss": 1.8588, + "step": 34250 + }, + { + "epoch": 1.5, + "learning_rate": 8.980872410854948e-06, + "loss": 1.8293, + "step": 34260 + }, + { + "epoch": 1.5, + "learning_rate": 8.965995792285069e-06, + "loss": 1.7856, + "step": 34270 + }, + { + "epoch": 1.5, + "learning_rate": 8.951129340308327e-06, + "loss": 1.8215, + "step": 34280 + }, + { + "epoch": 1.5, + "learning_rate": 8.936273062110308e-06, + "loss": 1.7872, + "step": 34290 + }, + { + "epoch": 1.5, + "learning_rate": 8.921426964871632e-06, + "loss": 1.8376, + "step": 34300 + }, + { + "epoch": 1.5, + "eval_loss": 1.7974214553833008, + "eval_runtime": 11.8688, + "eval_samples_per_second": 345.108, + "eval_steps_per_second": 21.569, + "step": 34300 + }, + { + "epoch": 1.5, + "learning_rate": 8.906591055768057e-06, + "loss": 1.7878, + "step": 34310 + }, + { + "epoch": 1.5, + "learning_rate": 8.891765341970363e-06, + "loss": 1.8004, + "step": 34320 + }, + { + "epoch": 1.5, + "learning_rate": 8.876949830644432e-06, + "loss": 1.8028, + "step": 34330 + }, + { + "epoch": 1.5, + "learning_rate": 8.862144528951194e-06, + "loss": 1.7938, + "step": 34340 + }, + { + "epoch": 1.5, + "learning_rate": 8.84734944404666e-06, + "loss": 1.8146, + "step": 34350 + }, + { + "epoch": 1.5, + "learning_rate": 8.832564583081906e-06, + "loss": 1.7881, + "step": 34360 + }, + { + "epoch": 1.5, + "learning_rate": 8.81778995320306e-06, + "loss": 1.7841, + "step": 34370 + }, + { + "epoch": 1.5, + "learning_rate": 8.803025561551289e-06, + "loss": 1.8205, + "step": 34380 + }, + { + "epoch": 1.51, + "learning_rate": 8.78827141526284e-06, + "loss": 1.7638, + "step": 34390 + }, + { + "epoch": 1.51, + "learning_rate": 8.773527521468994e-06, + "loss": 1.7943, + "step": 34400 + }, + { + "epoch": 1.51, + "eval_loss": 1.7974566221237183, + "eval_runtime": 11.745, + "eval_samples_per_second": 348.745, + "eval_steps_per_second": 21.797, + "step": 34400 + }, + { + "epoch": 1.51, + "learning_rate": 8.758793887296068e-06, + "loss": 1.7452, + "step": 34410 + }, + { + "epoch": 1.51, + "learning_rate": 8.74407051986544e-06, + "loss": 1.8075, + "step": 34420 + }, + { + "epoch": 1.51, + "learning_rate": 8.729357426293494e-06, + "loss": 1.8142, + "step": 34430 + }, + { + "epoch": 1.51, + "learning_rate": 8.7146546136917e-06, + "loss": 1.8146, + "step": 34440 + }, + { + "epoch": 1.51, + "learning_rate": 8.699962089166505e-06, + "loss": 1.8135, + "step": 34450 + }, + { + "epoch": 1.51, + "learning_rate": 8.685279859819418e-06, + "loss": 1.7831, + "step": 34460 + }, + { + "epoch": 1.51, + "learning_rate": 8.670607932746948e-06, + "loss": 1.8292, + "step": 34470 + }, + { + "epoch": 1.51, + "learning_rate": 8.655946315040642e-06, + "loss": 1.7835, + "step": 34480 + }, + { + "epoch": 1.51, + "learning_rate": 8.64129501378706e-06, + "loss": 1.8169, + "step": 34490 + }, + { + "epoch": 1.51, + "learning_rate": 8.626654036067783e-06, + "loss": 1.8659, + "step": 34500 + }, + { + "epoch": 1.51, + "eval_loss": 1.797457218170166, + "eval_runtime": 11.691, + "eval_samples_per_second": 350.355, + "eval_steps_per_second": 21.897, + "step": 34500 + }, + { + "epoch": 1.51, + "learning_rate": 8.612023388959377e-06, + "loss": 1.8225, + "step": 34510 + }, + { + "epoch": 1.51, + "learning_rate": 8.597403079533434e-06, + "loss": 1.8664, + "step": 34520 + }, + { + "epoch": 1.51, + "learning_rate": 8.582793114856554e-06, + "loss": 1.7656, + "step": 34530 + }, + { + "epoch": 1.51, + "learning_rate": 8.568193501990337e-06, + "loss": 1.8224, + "step": 34540 + }, + { + "epoch": 1.51, + "learning_rate": 8.55360424799135e-06, + "loss": 1.8056, + "step": 34550 + }, + { + "epoch": 1.51, + "learning_rate": 8.539025359911197e-06, + "loss": 1.8298, + "step": 34560 + }, + { + "epoch": 1.51, + "learning_rate": 8.52445684479642e-06, + "loss": 1.79, + "step": 34570 + }, + { + "epoch": 1.51, + "learning_rate": 8.509898709688614e-06, + "loss": 1.7757, + "step": 34580 + }, + { + "epoch": 1.51, + "learning_rate": 8.495350961624296e-06, + "loss": 1.8096, + "step": 34590 + }, + { + "epoch": 1.51, + "learning_rate": 8.480813607635003e-06, + "loss": 1.7909, + "step": 34600 + }, + { + "epoch": 1.51, + "eval_loss": 1.797386884689331, + "eval_runtime": 12.0435, + "eval_samples_per_second": 340.101, + "eval_steps_per_second": 21.256, + "step": 34600 + }, + { + "epoch": 1.52, + "learning_rate": 8.466286654747212e-06, + "loss": 1.8285, + "step": 34610 + }, + { + "epoch": 1.52, + "learning_rate": 8.451770109982406e-06, + "loss": 1.8151, + "step": 34620 + }, + { + "epoch": 1.52, + "learning_rate": 8.437263980357028e-06, + "loss": 1.8071, + "step": 34630 + }, + { + "epoch": 1.52, + "learning_rate": 8.422768272882468e-06, + "loss": 1.7474, + "step": 34640 + }, + { + "epoch": 1.52, + "learning_rate": 8.408282994565098e-06, + "loss": 1.7808, + "step": 34650 + }, + { + "epoch": 1.52, + "learning_rate": 8.39380815240625e-06, + "loss": 1.8051, + "step": 34660 + }, + { + "epoch": 1.52, + "learning_rate": 8.379343753402214e-06, + "loss": 1.8293, + "step": 34670 + }, + { + "epoch": 1.52, + "learning_rate": 8.364889804544204e-06, + "loss": 1.7951, + "step": 34680 + }, + { + "epoch": 1.52, + "learning_rate": 8.350446312818424e-06, + "loss": 1.8187, + "step": 34690 + }, + { + "epoch": 1.52, + "learning_rate": 8.336013285205977e-06, + "loss": 1.7762, + "step": 34700 + }, + { + "epoch": 1.52, + "eval_loss": 1.7973322868347168, + "eval_runtime": 11.7769, + "eval_samples_per_second": 347.801, + "eval_steps_per_second": 21.738, + "step": 34700 + }, + { + "epoch": 1.52, + "learning_rate": 8.321590728682966e-06, + "loss": 1.7932, + "step": 34710 + }, + { + "epoch": 1.52, + "learning_rate": 8.307178650220382e-06, + "loss": 1.7925, + "step": 34720 + }, + { + "epoch": 1.52, + "learning_rate": 8.29277705678418e-06, + "loss": 1.7959, + "step": 34730 + }, + { + "epoch": 1.52, + "learning_rate": 8.27838595533523e-06, + "loss": 1.802, + "step": 34740 + }, + { + "epoch": 1.52, + "learning_rate": 8.26400535282934e-06, + "loss": 1.8553, + "step": 34750 + }, + { + "epoch": 1.52, + "learning_rate": 8.249635256217243e-06, + "loss": 1.7845, + "step": 34760 + }, + { + "epoch": 1.52, + "learning_rate": 8.23527567244461e-06, + "loss": 1.783, + "step": 34770 + }, + { + "epoch": 1.52, + "learning_rate": 8.220926608451988e-06, + "loss": 1.8221, + "step": 34780 + }, + { + "epoch": 1.52, + "learning_rate": 8.206588071174878e-06, + "loss": 1.7731, + "step": 34790 + }, + { + "epoch": 1.52, + "learning_rate": 8.192260067543685e-06, + "loss": 1.8195, + "step": 34800 + }, + { + "epoch": 1.52, + "eval_loss": 1.797337293624878, + "eval_runtime": 11.9416, + "eval_samples_per_second": 343.003, + "eval_steps_per_second": 21.438, + "step": 34800 + }, + { + "epoch": 1.52, + "learning_rate": 8.177942604483716e-06, + "loss": 1.7583, + "step": 34810 + }, + { + "epoch": 1.52, + "learning_rate": 8.163635688915178e-06, + "loss": 1.8383, + "step": 34820 + }, + { + "epoch": 1.52, + "learning_rate": 8.149339327753199e-06, + "loss": 1.783, + "step": 34830 + }, + { + "epoch": 1.53, + "learning_rate": 8.13505352790777e-06, + "loss": 1.848, + "step": 34840 + }, + { + "epoch": 1.53, + "learning_rate": 8.12077829628384e-06, + "loss": 1.8442, + "step": 34850 + }, + { + "epoch": 1.53, + "learning_rate": 8.106513639781188e-06, + "loss": 1.8794, + "step": 34860 + }, + { + "epoch": 1.53, + "learning_rate": 8.0922595652945e-06, + "loss": 1.7914, + "step": 34870 + }, + { + "epoch": 1.53, + "learning_rate": 8.078016079713358e-06, + "loss": 1.7973, + "step": 34880 + }, + { + "epoch": 1.53, + "learning_rate": 8.063783189922223e-06, + "loss": 1.7731, + "step": 34890 + }, + { + "epoch": 1.53, + "learning_rate": 8.049560902800438e-06, + "loss": 1.8355, + "step": 34900 + }, + { + "epoch": 1.53, + "eval_loss": 1.7973167896270752, + "eval_runtime": 11.7033, + "eval_samples_per_second": 349.986, + "eval_steps_per_second": 21.874, + "step": 34900 + }, + { + "epoch": 1.53, + "learning_rate": 8.035349225222201e-06, + "loss": 1.8303, + "step": 34910 + }, + { + "epoch": 1.53, + "learning_rate": 8.021148164056604e-06, + "loss": 1.7822, + "step": 34920 + }, + { + "epoch": 1.53, + "learning_rate": 8.006957726167596e-06, + "loss": 1.8136, + "step": 34930 + }, + { + "epoch": 1.53, + "learning_rate": 7.992777918414007e-06, + "loss": 1.8305, + "step": 34940 + }, + { + "epoch": 1.53, + "learning_rate": 7.978608747649503e-06, + "loss": 1.7985, + "step": 34950 + }, + { + "epoch": 1.53, + "learning_rate": 7.964450220722632e-06, + "loss": 1.8094, + "step": 34960 + }, + { + "epoch": 1.53, + "learning_rate": 7.95030234447677e-06, + "loss": 1.845, + "step": 34970 + }, + { + "epoch": 1.53, + "learning_rate": 7.936165125750192e-06, + "loss": 1.8332, + "step": 34980 + }, + { + "epoch": 1.53, + "learning_rate": 7.92203857137597e-06, + "loss": 1.8231, + "step": 34990 + }, + { + "epoch": 1.53, + "learning_rate": 7.907922688182065e-06, + "loss": 1.7771, + "step": 35000 + }, + { + "epoch": 1.53, + "eval_loss": 1.7973071336746216, + "eval_runtime": 11.7574, + "eval_samples_per_second": 348.375, + "eval_steps_per_second": 21.773, + "step": 35000 + }, + { + "epoch": 1.53, + "learning_rate": 7.893817482991233e-06, + "loss": 1.8482, + "step": 35010 + }, + { + "epoch": 1.53, + "learning_rate": 7.879722962621117e-06, + "loss": 1.8413, + "step": 35020 + }, + { + "epoch": 1.53, + "learning_rate": 7.865639133884164e-06, + "loss": 1.7733, + "step": 35030 + }, + { + "epoch": 1.53, + "learning_rate": 7.851566003587677e-06, + "loss": 1.786, + "step": 35040 + }, + { + "epoch": 1.53, + "learning_rate": 7.837503578533756e-06, + "loss": 1.7989, + "step": 35050 + }, + { + "epoch": 1.53, + "learning_rate": 7.823451865519353e-06, + "loss": 1.8485, + "step": 35060 + }, + { + "epoch": 1.54, + "learning_rate": 7.809410871336236e-06, + "loss": 1.8469, + "step": 35070 + }, + { + "epoch": 1.54, + "learning_rate": 7.795380602771e-06, + "loss": 1.7898, + "step": 35080 + }, + { + "epoch": 1.54, + "learning_rate": 7.781361066605038e-06, + "loss": 1.774, + "step": 35090 + }, + { + "epoch": 1.54, + "learning_rate": 7.767352269614553e-06, + "loss": 1.8445, + "step": 35100 + }, + { + "epoch": 1.54, + "eval_loss": 1.7972954511642456, + "eval_runtime": 11.9038, + "eval_samples_per_second": 344.093, + "eval_steps_per_second": 21.506, + "step": 35100 + }, + { + "epoch": 1.54, + "learning_rate": 7.75335421857058e-06, + "loss": 1.7657, + "step": 35110 + }, + { + "epoch": 1.54, + "learning_rate": 7.73936692023895e-06, + "loss": 1.8643, + "step": 35120 + }, + { + "epoch": 1.54, + "learning_rate": 7.725390381380298e-06, + "loss": 1.8439, + "step": 35130 + }, + { + "epoch": 1.54, + "learning_rate": 7.711424608750048e-06, + "loss": 1.8153, + "step": 35140 + }, + { + "epoch": 1.54, + "learning_rate": 7.697469609098427e-06, + "loss": 1.8191, + "step": 35150 + }, + { + "epoch": 1.54, + "learning_rate": 7.683525389170458e-06, + "loss": 1.7889, + "step": 35160 + }, + { + "epoch": 1.54, + "learning_rate": 7.66959195570597e-06, + "loss": 1.7851, + "step": 35170 + }, + { + "epoch": 1.54, + "learning_rate": 7.655669315439536e-06, + "loss": 1.8457, + "step": 35180 + }, + { + "epoch": 1.54, + "learning_rate": 7.641757475100542e-06, + "loss": 1.8443, + "step": 35190 + }, + { + "epoch": 1.54, + "learning_rate": 7.6278564414131515e-06, + "loss": 1.7745, + "step": 35200 + }, + { + "epoch": 1.54, + "eval_loss": 1.7972785234451294, + "eval_runtime": 11.7681, + "eval_samples_per_second": 348.059, + "eval_steps_per_second": 21.754, + "step": 35200 + }, + { + "epoch": 1.54, + "learning_rate": 7.613966221096313e-06, + "loss": 1.7634, + "step": 35210 + }, + { + "epoch": 1.54, + "learning_rate": 7.600086820863717e-06, + "loss": 1.8021, + "step": 35220 + }, + { + "epoch": 1.54, + "learning_rate": 7.586218247423864e-06, + "loss": 1.7733, + "step": 35230 + }, + { + "epoch": 1.54, + "learning_rate": 7.572360507479981e-06, + "loss": 1.841, + "step": 35240 + }, + { + "epoch": 1.54, + "learning_rate": 7.5585136077300895e-06, + "loss": 1.8506, + "step": 35250 + }, + { + "epoch": 1.54, + "learning_rate": 7.544677554866964e-06, + "loss": 1.7568, + "step": 35260 + }, + { + "epoch": 1.54, + "learning_rate": 7.530852355578142e-06, + "loss": 1.8283, + "step": 35270 + }, + { + "epoch": 1.54, + "learning_rate": 7.51703801654589e-06, + "loss": 1.8275, + "step": 35280 + }, + { + "epoch": 1.54, + "learning_rate": 7.503234544447244e-06, + "loss": 1.8043, + "step": 35290 + }, + { + "epoch": 1.55, + "learning_rate": 7.489441945953994e-06, + "loss": 1.8329, + "step": 35300 + }, + { + "epoch": 1.55, + "eval_loss": 1.7972854375839233, + "eval_runtime": 11.6885, + "eval_samples_per_second": 350.431, + "eval_steps_per_second": 21.902, + "step": 35300 + }, + { + "epoch": 1.55, + "learning_rate": 7.475660227732672e-06, + "loss": 1.8416, + "step": 35310 + }, + { + "epoch": 1.55, + "learning_rate": 7.461889396444537e-06, + "loss": 1.8516, + "step": 35320 + }, + { + "epoch": 1.55, + "learning_rate": 7.448129458745573e-06, + "loss": 1.8768, + "step": 35330 + }, + { + "epoch": 1.55, + "learning_rate": 7.434380421286559e-06, + "loss": 1.8093, + "step": 35340 + }, + { + "epoch": 1.55, + "learning_rate": 7.420642290712938e-06, + "loss": 1.8084, + "step": 35350 + }, + { + "epoch": 1.55, + "learning_rate": 7.4069150736649295e-06, + "loss": 1.8161, + "step": 35360 + }, + { + "epoch": 1.55, + "learning_rate": 7.393198776777437e-06, + "loss": 1.8265, + "step": 35370 + }, + { + "epoch": 1.55, + "learning_rate": 7.379493406680117e-06, + "loss": 1.7728, + "step": 35380 + }, + { + "epoch": 1.55, + "learning_rate": 7.365798969997334e-06, + "loss": 1.8431, + "step": 35390 + }, + { + "epoch": 1.55, + "learning_rate": 7.35211547334818e-06, + "loss": 1.8254, + "step": 35400 + }, + { + "epoch": 1.55, + "eval_loss": 1.7973430156707764, + "eval_runtime": 11.8006, + "eval_samples_per_second": 347.1, + "eval_steps_per_second": 21.694, + "step": 35400 + }, + { + "epoch": 1.55, + "learning_rate": 7.338442923346428e-06, + "loss": 1.8339, + "step": 35410 + }, + { + "epoch": 1.55, + "learning_rate": 7.324781326600588e-06, + "loss": 1.8182, + "step": 35420 + }, + { + "epoch": 1.55, + "learning_rate": 7.311130689713868e-06, + "loss": 1.8214, + "step": 35430 + }, + { + "epoch": 1.55, + "learning_rate": 7.297491019284188e-06, + "loss": 1.8257, + "step": 35440 + }, + { + "epoch": 1.55, + "learning_rate": 7.28386232190414e-06, + "loss": 1.8534, + "step": 35450 + }, + { + "epoch": 1.55, + "learning_rate": 7.2702446041610446e-06, + "loss": 1.8448, + "step": 35460 + }, + { + "epoch": 1.55, + "learning_rate": 7.2566378726368776e-06, + "loss": 1.783, + "step": 35470 + }, + { + "epoch": 1.55, + "learning_rate": 7.243042133908364e-06, + "loss": 1.8584, + "step": 35480 + }, + { + "epoch": 1.55, + "learning_rate": 7.2294573945468515e-06, + "loss": 1.826, + "step": 35490 + }, + { + "epoch": 1.55, + "learning_rate": 7.215883661118413e-06, + "loss": 1.8125, + "step": 35500 + }, + { + "epoch": 1.55, + "eval_loss": 1.7973475456237793, + "eval_runtime": 11.7172, + "eval_samples_per_second": 349.572, + "eval_steps_per_second": 21.848, + "step": 35500 + }, + { + "epoch": 1.55, + "learning_rate": 7.202320940183779e-06, + "loss": 1.8274, + "step": 35510 + }, + { + "epoch": 1.55, + "learning_rate": 7.1887692382983675e-06, + "loss": 1.7883, + "step": 35520 + }, + { + "epoch": 1.56, + "learning_rate": 7.175228562012272e-06, + "loss": 1.8196, + "step": 35530 + }, + { + "epoch": 1.56, + "learning_rate": 7.161698917870266e-06, + "loss": 1.802, + "step": 35540 + }, + { + "epoch": 1.56, + "learning_rate": 7.148180312411761e-06, + "loss": 1.786, + "step": 35550 + }, + { + "epoch": 1.56, + "learning_rate": 7.134672752170856e-06, + "loss": 1.787, + "step": 35560 + }, + { + "epoch": 1.56, + "learning_rate": 7.121176243676315e-06, + "loss": 1.802, + "step": 35570 + }, + { + "epoch": 1.56, + "learning_rate": 7.107690793451538e-06, + "loss": 1.8255, + "step": 35580 + }, + { + "epoch": 1.56, + "learning_rate": 7.094216408014606e-06, + "loss": 1.8201, + "step": 35590 + }, + { + "epoch": 1.56, + "learning_rate": 7.0807530938782216e-06, + "loss": 1.79, + "step": 35600 + }, + { + "epoch": 1.56, + "eval_loss": 1.7972863912582397, + "eval_runtime": 11.965, + "eval_samples_per_second": 342.331, + "eval_steps_per_second": 21.396, + "step": 35600 + }, + { + "epoch": 1.56, + "learning_rate": 7.067300857549785e-06, + "loss": 1.8406, + "step": 35610 + }, + { + "epoch": 1.56, + "learning_rate": 7.05385970553128e-06, + "loss": 1.8449, + "step": 35620 + }, + { + "epoch": 1.56, + "learning_rate": 7.04042964431939e-06, + "loss": 1.7923, + "step": 35630 + }, + { + "epoch": 1.56, + "learning_rate": 7.027010680405389e-06, + "loss": 1.7873, + "step": 35640 + }, + { + "epoch": 1.56, + "learning_rate": 7.0136028202752244e-06, + "loss": 1.8115, + "step": 35650 + }, + { + "epoch": 1.56, + "learning_rate": 7.000206070409455e-06, + "loss": 1.8063, + "step": 35660 + }, + { + "epoch": 1.56, + "learning_rate": 6.986820437283291e-06, + "loss": 1.8808, + "step": 35670 + }, + { + "epoch": 1.56, + "learning_rate": 6.973445927366538e-06, + "loss": 1.8388, + "step": 35680 + }, + { + "epoch": 1.56, + "learning_rate": 6.96008254712365e-06, + "loss": 1.8139, + "step": 35690 + }, + { + "epoch": 1.56, + "learning_rate": 6.946730303013694e-06, + "loss": 1.8185, + "step": 35700 + }, + { + "epoch": 1.56, + "eval_loss": 1.7972298860549927, + "eval_runtime": 11.8804, + "eval_samples_per_second": 344.768, + "eval_steps_per_second": 21.548, + "step": 35700 + }, + { + "epoch": 1.56, + "learning_rate": 6.933389201490366e-06, + "loss": 1.8188, + "step": 35710 + }, + { + "epoch": 1.56, + "learning_rate": 6.920059249001942e-06, + "loss": 1.8225, + "step": 35720 + }, + { + "epoch": 1.56, + "learning_rate": 6.906740451991357e-06, + "loss": 1.8263, + "step": 35730 + }, + { + "epoch": 1.56, + "learning_rate": 6.8934328168961e-06, + "loss": 1.8524, + "step": 35740 + }, + { + "epoch": 1.56, + "learning_rate": 6.880136350148327e-06, + "loss": 1.7724, + "step": 35750 + }, + { + "epoch": 1.57, + "learning_rate": 6.866851058174743e-06, + "loss": 1.8175, + "step": 35760 + }, + { + "epoch": 1.57, + "learning_rate": 6.853576947396683e-06, + "loss": 1.8516, + "step": 35770 + }, + { + "epoch": 1.57, + "learning_rate": 6.8403140242300554e-06, + "loss": 1.7983, + "step": 35780 + }, + { + "epoch": 1.57, + "learning_rate": 6.827062295085378e-06, + "loss": 1.8137, + "step": 35790 + }, + { + "epoch": 1.57, + "learning_rate": 6.813821766367759e-06, + "loss": 1.7658, + "step": 35800 + }, + { + "epoch": 1.57, + "eval_loss": 1.7972460985183716, + "eval_runtime": 11.7601, + "eval_samples_per_second": 348.295, + "eval_steps_per_second": 21.768, + "step": 35800 + }, + { + "epoch": 1.57, + "learning_rate": 6.800592444476874e-06, + "loss": 1.8095, + "step": 35810 + }, + { + "epoch": 1.57, + "learning_rate": 6.787374335807001e-06, + "loss": 1.7734, + "step": 35820 + }, + { + "epoch": 1.57, + "learning_rate": 6.774167446746992e-06, + "loss": 1.8284, + "step": 35830 + }, + { + "epoch": 1.57, + "learning_rate": 6.760971783680281e-06, + "loss": 1.8354, + "step": 35840 + }, + { + "epoch": 1.57, + "learning_rate": 6.74778735298486e-06, + "loss": 1.803, + "step": 35850 + }, + { + "epoch": 1.57, + "learning_rate": 6.7346141610333145e-06, + "loss": 1.8212, + "step": 35860 + }, + { + "epoch": 1.57, + "learning_rate": 6.721452214192765e-06, + "loss": 1.7941, + "step": 35870 + }, + { + "epoch": 1.57, + "learning_rate": 6.708301518824949e-06, + "loss": 1.767, + "step": 35880 + }, + { + "epoch": 1.57, + "learning_rate": 6.695162081286108e-06, + "loss": 1.8077, + "step": 35890 + }, + { + "epoch": 1.57, + "learning_rate": 6.682033907927087e-06, + "loss": 1.808, + "step": 35900 + }, + { + "epoch": 1.57, + "eval_loss": 1.7970843315124512, + "eval_runtime": 11.7297, + "eval_samples_per_second": 349.198, + "eval_steps_per_second": 21.825, + "step": 35900 + }, + { + "epoch": 1.57, + "learning_rate": 6.6689170050932545e-06, + "loss": 1.8053, + "step": 35910 + }, + { + "epoch": 1.57, + "learning_rate": 6.6558113791245475e-06, + "loss": 1.8368, + "step": 35920 + }, + { + "epoch": 1.57, + "learning_rate": 6.642717036355456e-06, + "loss": 1.817, + "step": 35930 + }, + { + "epoch": 1.57, + "learning_rate": 6.629633983115015e-06, + "loss": 1.7996, + "step": 35940 + }, + { + "epoch": 1.57, + "learning_rate": 6.616562225726782e-06, + "loss": 1.8287, + "step": 35950 + }, + { + "epoch": 1.57, + "learning_rate": 6.603501770508885e-06, + "loss": 1.8236, + "step": 35960 + }, + { + "epoch": 1.57, + "learning_rate": 6.590452623773968e-06, + "loss": 1.7861, + "step": 35970 + }, + { + "epoch": 1.58, + "learning_rate": 6.577414791829226e-06, + "loss": 1.7758, + "step": 35980 + }, + { + "epoch": 1.58, + "learning_rate": 6.564388280976365e-06, + "loss": 1.7861, + "step": 35990 + }, + { + "epoch": 1.58, + "learning_rate": 6.551373097511637e-06, + "loss": 1.812, + "step": 36000 + }, + { + "epoch": 1.58, + "eval_loss": 1.797182559967041, + "eval_runtime": 11.8246, + "eval_samples_per_second": 346.396, + "eval_steps_per_second": 21.65, + "step": 36000 + }, + { + "epoch": 1.58, + "learning_rate": 6.538369247725795e-06, + "loss": 1.8064, + "step": 36010 + }, + { + "epoch": 1.58, + "learning_rate": 6.52537673790416e-06, + "loss": 1.8005, + "step": 36020 + }, + { + "epoch": 1.58, + "learning_rate": 6.512395574326524e-06, + "loss": 1.8345, + "step": 36030 + }, + { + "epoch": 1.58, + "learning_rate": 6.49942576326721e-06, + "loss": 1.8291, + "step": 36040 + }, + { + "epoch": 1.58, + "learning_rate": 6.486467310995062e-06, + "loss": 1.8399, + "step": 36050 + }, + { + "epoch": 1.58, + "learning_rate": 6.473520223773433e-06, + "loss": 1.8127, + "step": 36060 + }, + { + "epoch": 1.58, + "learning_rate": 6.460584507860181e-06, + "loss": 1.8742, + "step": 36070 + }, + { + "epoch": 1.58, + "learning_rate": 6.447660169507653e-06, + "loss": 1.8144, + "step": 36080 + }, + { + "epoch": 1.58, + "learning_rate": 6.434747214962718e-06, + "loss": 1.8517, + "step": 36090 + }, + { + "epoch": 1.58, + "learning_rate": 6.421845650466734e-06, + "loss": 1.8358, + "step": 36100 + }, + { + "epoch": 1.58, + "eval_loss": 1.79728364944458, + "eval_runtime": 11.953, + "eval_samples_per_second": 342.677, + "eval_steps_per_second": 21.417, + "step": 36100 + }, + { + "epoch": 1.58, + "learning_rate": 6.408955482255563e-06, + "loss": 1.7886, + "step": 36110 + }, + { + "epoch": 1.58, + "learning_rate": 6.39607671655953e-06, + "loss": 1.8088, + "step": 36120 + }, + { + "epoch": 1.58, + "learning_rate": 6.3832093596034905e-06, + "loss": 1.7968, + "step": 36130 + }, + { + "epoch": 1.58, + "learning_rate": 6.3703534176067415e-06, + "loss": 1.8042, + "step": 36140 + }, + { + "epoch": 1.58, + "learning_rate": 6.357508896783093e-06, + "loss": 1.8517, + "step": 36150 + }, + { + "epoch": 1.58, + "learning_rate": 6.344675803340829e-06, + "loss": 1.8469, + "step": 36160 + }, + { + "epoch": 1.58, + "learning_rate": 6.331854143482715e-06, + "loss": 1.7896, + "step": 36170 + }, + { + "epoch": 1.58, + "learning_rate": 6.3190439234059695e-06, + "loss": 1.804, + "step": 36180 + }, + { + "epoch": 1.58, + "learning_rate": 6.306245149302297e-06, + "loss": 1.8063, + "step": 36190 + }, + { + "epoch": 1.58, + "learning_rate": 6.293457827357871e-06, + "loss": 1.8314, + "step": 36200 + }, + { + "epoch": 1.58, + "eval_loss": 1.7972148656845093, + "eval_runtime": 11.4888, + "eval_samples_per_second": 356.521, + "eval_steps_per_second": 22.283, + "step": 36200 + }, + { + "epoch": 1.59, + "learning_rate": 6.2806819637533365e-06, + "loss": 1.8709, + "step": 36210 + }, + { + "epoch": 1.59, + "learning_rate": 6.267917564663768e-06, + "loss": 1.7827, + "step": 36220 + }, + { + "epoch": 1.59, + "learning_rate": 6.255164636258737e-06, + "loss": 1.7999, + "step": 36230 + }, + { + "epoch": 1.59, + "learning_rate": 6.242423184702246e-06, + "loss": 1.8188, + "step": 36240 + }, + { + "epoch": 1.59, + "learning_rate": 6.229693216152774e-06, + "loss": 1.831, + "step": 36250 + }, + { + "epoch": 1.59, + "learning_rate": 6.216974736763212e-06, + "loss": 1.8664, + "step": 36260 + }, + { + "epoch": 1.59, + "learning_rate": 6.204267752680943e-06, + "loss": 1.7857, + "step": 36270 + }, + { + "epoch": 1.59, + "learning_rate": 6.191572270047754e-06, + "loss": 1.8188, + "step": 36280 + }, + { + "epoch": 1.59, + "learning_rate": 6.178888294999892e-06, + "loss": 1.8396, + "step": 36290 + }, + { + "epoch": 1.59, + "learning_rate": 6.166215833668055e-06, + "loss": 1.8235, + "step": 36300 + }, + { + "epoch": 1.59, + "eval_loss": 1.7972338199615479, + "eval_runtime": 11.7825, + "eval_samples_per_second": 347.634, + "eval_steps_per_second": 21.727, + "step": 36300 + }, + { + "epoch": 1.59, + "learning_rate": 6.1535548921773425e-06, + "loss": 1.8227, + "step": 36310 + }, + { + "epoch": 1.59, + "learning_rate": 6.140905476647312e-06, + "loss": 1.8282, + "step": 36320 + }, + { + "epoch": 1.59, + "learning_rate": 6.128267593191946e-06, + "loss": 1.8089, + "step": 36330 + }, + { + "epoch": 1.59, + "learning_rate": 6.115641247919653e-06, + "loss": 1.8254, + "step": 36340 + }, + { + "epoch": 1.59, + "learning_rate": 6.103026446933244e-06, + "loss": 1.8223, + "step": 36350 + }, + { + "epoch": 1.59, + "learning_rate": 6.090423196329991e-06, + "loss": 1.8172, + "step": 36360 + }, + { + "epoch": 1.59, + "learning_rate": 6.077831502201529e-06, + "loss": 1.7998, + "step": 36370 + }, + { + "epoch": 1.59, + "learning_rate": 6.0652513706339726e-06, + "loss": 1.848, + "step": 36380 + }, + { + "epoch": 1.59, + "learning_rate": 6.052682807707791e-06, + "loss": 1.8263, + "step": 36390 + }, + { + "epoch": 1.59, + "learning_rate": 6.040125819497895e-06, + "loss": 1.8287, + "step": 36400 + }, + { + "epoch": 1.59, + "eval_loss": 1.7972549200057983, + "eval_runtime": 11.8176, + "eval_samples_per_second": 346.601, + "eval_steps_per_second": 21.663, + "step": 36400 + }, + { + "epoch": 1.59, + "learning_rate": 6.027580412073575e-06, + "loss": 1.8571, + "step": 36410 + }, + { + "epoch": 1.59, + "learning_rate": 6.015046591498549e-06, + "loss": 1.8373, + "step": 36420 + }, + { + "epoch": 1.59, + "learning_rate": 6.002524363830925e-06, + "loss": 1.831, + "step": 36430 + }, + { + "epoch": 1.6, + "learning_rate": 5.990013735123212e-06, + "loss": 1.7875, + "step": 36440 + }, + { + "epoch": 1.6, + "learning_rate": 5.977514711422295e-06, + "loss": 1.8114, + "step": 36450 + }, + { + "epoch": 1.6, + "learning_rate": 5.965027298769468e-06, + "loss": 1.8116, + "step": 36460 + }, + { + "epoch": 1.6, + "learning_rate": 5.952551503200405e-06, + "loss": 1.8001, + "step": 36470 + }, + { + "epoch": 1.6, + "learning_rate": 5.940087330745181e-06, + "loss": 1.8408, + "step": 36480 + }, + { + "epoch": 1.6, + "learning_rate": 5.927634787428219e-06, + "loss": 1.82, + "step": 36490 + }, + { + "epoch": 1.6, + "learning_rate": 5.9151938792683545e-06, + "loss": 1.8077, + "step": 36500 + }, + { + "epoch": 1.6, + "eval_loss": 1.7971975803375244, + "eval_runtime": 12.6073, + "eval_samples_per_second": 324.89, + "eval_steps_per_second": 20.306, + "step": 36500 + }, + { + "epoch": 1.6, + "learning_rate": 5.902764612278788e-06, + "loss": 1.8095, + "step": 36510 + }, + { + "epoch": 1.6, + "learning_rate": 5.890346992467084e-06, + "loss": 1.8333, + "step": 36520 + }, + { + "epoch": 1.6, + "learning_rate": 5.877941025835194e-06, + "loss": 1.8566, + "step": 36530 + }, + { + "epoch": 1.6, + "learning_rate": 5.865546718379412e-06, + "loss": 1.8239, + "step": 36540 + }, + { + "epoch": 1.6, + "learning_rate": 5.853164076090429e-06, + "loss": 1.8163, + "step": 36550 + }, + { + "epoch": 1.6, + "learning_rate": 5.840793104953275e-06, + "loss": 1.7902, + "step": 36560 + }, + { + "epoch": 1.6, + "learning_rate": 5.8284338109473526e-06, + "loss": 1.8278, + "step": 36570 + }, + { + "epoch": 1.6, + "learning_rate": 5.8160862000464e-06, + "loss": 1.8097, + "step": 36580 + }, + { + "epoch": 1.6, + "learning_rate": 5.80375027821853e-06, + "loss": 1.8486, + "step": 36590 + }, + { + "epoch": 1.6, + "learning_rate": 5.791426051426197e-06, + "loss": 1.8051, + "step": 36600 + }, + { + "epoch": 1.6, + "eval_loss": 1.7971279621124268, + "eval_runtime": 11.9768, + "eval_samples_per_second": 341.994, + "eval_steps_per_second": 21.375, + "step": 36600 + }, + { + "epoch": 1.6, + "learning_rate": 5.779113525626213e-06, + "loss": 1.8213, + "step": 36610 + }, + { + "epoch": 1.6, + "learning_rate": 5.766812706769703e-06, + "loss": 1.7921, + "step": 36620 + }, + { + "epoch": 1.6, + "learning_rate": 5.754523600802175e-06, + "loss": 1.8111, + "step": 36630 + }, + { + "epoch": 1.6, + "learning_rate": 5.742246213663435e-06, + "loss": 1.8006, + "step": 36640 + }, + { + "epoch": 1.6, + "learning_rate": 5.729980551287668e-06, + "loss": 1.8423, + "step": 36650 + }, + { + "epoch": 1.6, + "learning_rate": 5.717726619603353e-06, + "loss": 1.7913, + "step": 36660 + }, + { + "epoch": 1.61, + "learning_rate": 5.705484424533332e-06, + "loss": 1.7874, + "step": 36670 + }, + { + "epoch": 1.61, + "learning_rate": 5.693253971994737e-06, + "loss": 1.7997, + "step": 36680 + }, + { + "epoch": 1.61, + "learning_rate": 5.681035267899055e-06, + "loss": 1.8268, + "step": 36690 + }, + { + "epoch": 1.61, + "learning_rate": 5.668828318152088e-06, + "loss": 1.8094, + "step": 36700 + }, + { + "epoch": 1.61, + "eval_loss": 1.797187328338623, + "eval_runtime": 11.9607, + "eval_samples_per_second": 342.454, + "eval_steps_per_second": 21.403, + "step": 36700 + }, + { + "epoch": 1.61, + "learning_rate": 5.656633128653958e-06, + "loss": 1.8127, + "step": 36710 + }, + { + "epoch": 1.61, + "learning_rate": 5.6444497052990805e-06, + "loss": 1.8294, + "step": 36720 + }, + { + "epoch": 1.61, + "learning_rate": 5.632278053976213e-06, + "loss": 1.8017, + "step": 36730 + }, + { + "epoch": 1.61, + "learning_rate": 5.6201181805684185e-06, + "loss": 1.8082, + "step": 36740 + }, + { + "epoch": 1.61, + "learning_rate": 5.6079700909530426e-06, + "loss": 1.8064, + "step": 36750 + }, + { + "epoch": 1.61, + "learning_rate": 5.595833791001772e-06, + "loss": 1.7524, + "step": 36760 + }, + { + "epoch": 1.61, + "learning_rate": 5.583709286580551e-06, + "loss": 1.7921, + "step": 36770 + }, + { + "epoch": 1.61, + "learning_rate": 5.571596583549677e-06, + "loss": 1.8524, + "step": 36780 + }, + { + "epoch": 1.61, + "learning_rate": 5.559495687763696e-06, + "loss": 1.7973, + "step": 36790 + }, + { + "epoch": 1.61, + "learning_rate": 5.547406605071474e-06, + "loss": 1.8229, + "step": 36800 + }, + { + "epoch": 1.61, + "eval_loss": 1.797210931777954, + "eval_runtime": 11.794, + "eval_samples_per_second": 347.297, + "eval_steps_per_second": 21.706, + "step": 36800 + }, + { + "epoch": 1.61, + "learning_rate": 5.535329341316149e-06, + "loss": 1.8275, + "step": 36810 + }, + { + "epoch": 1.61, + "learning_rate": 5.523263902335156e-06, + "loss": 1.8013, + "step": 36820 + }, + { + "epoch": 1.61, + "learning_rate": 5.511210293960222e-06, + "loss": 1.8436, + "step": 36830 + }, + { + "epoch": 1.61, + "learning_rate": 5.499168522017351e-06, + "loss": 1.7926, + "step": 36840 + }, + { + "epoch": 1.61, + "learning_rate": 5.4871385923268105e-06, + "loss": 1.8132, + "step": 36850 + }, + { + "epoch": 1.61, + "learning_rate": 5.475120510703163e-06, + "loss": 1.8226, + "step": 36860 + }, + { + "epoch": 1.61, + "learning_rate": 5.463114282955241e-06, + "loss": 1.7857, + "step": 36870 + }, + { + "epoch": 1.61, + "learning_rate": 5.451119914886143e-06, + "loss": 1.8093, + "step": 36880 + }, + { + "epoch": 1.61, + "learning_rate": 5.439137412293235e-06, + "loss": 1.8043, + "step": 36890 + }, + { + "epoch": 1.62, + "learning_rate": 5.427166780968155e-06, + "loss": 1.7664, + "step": 36900 + }, + { + "epoch": 1.62, + "eval_loss": 1.7971539497375488, + "eval_runtime": 11.76, + "eval_samples_per_second": 348.299, + "eval_steps_per_second": 21.769, + "step": 36900 + }, + { + "epoch": 1.62, + "learning_rate": 5.415208026696778e-06, + "loss": 1.8128, + "step": 36910 + }, + { + "epoch": 1.62, + "learning_rate": 5.403261155259289e-06, + "loss": 1.793, + "step": 36920 + }, + { + "epoch": 1.62, + "learning_rate": 5.391326172430078e-06, + "loss": 1.7863, + "step": 36930 + }, + { + "epoch": 1.62, + "learning_rate": 5.379403083977823e-06, + "loss": 1.8186, + "step": 36940 + }, + { + "epoch": 1.62, + "learning_rate": 5.367491895665422e-06, + "loss": 1.8128, + "step": 36950 + }, + { + "epoch": 1.62, + "learning_rate": 5.355592613250052e-06, + "loss": 1.84, + "step": 36960 + }, + { + "epoch": 1.62, + "learning_rate": 5.343705242483113e-06, + "loss": 1.8215, + "step": 36970 + }, + { + "epoch": 1.62, + "learning_rate": 5.331829789110276e-06, + "loss": 1.8136, + "step": 36980 + }, + { + "epoch": 1.62, + "learning_rate": 5.319966258871409e-06, + "loss": 1.8243, + "step": 36990 + }, + { + "epoch": 1.62, + "learning_rate": 5.308114657500649e-06, + "loss": 1.8066, + "step": 37000 + }, + { + "epoch": 1.62, + "eval_loss": 1.7971651554107666, + "eval_runtime": 11.8992, + "eval_samples_per_second": 344.224, + "eval_steps_per_second": 21.514, + "step": 37000 + }, + { + "epoch": 1.62, + "learning_rate": 5.2962749907263694e-06, + "loss": 1.8203, + "step": 37010 + }, + { + "epoch": 1.62, + "learning_rate": 5.284447264271147e-06, + "loss": 1.8284, + "step": 37020 + }, + { + "epoch": 1.62, + "learning_rate": 5.272631483851822e-06, + "loss": 1.8251, + "step": 37030 + }, + { + "epoch": 1.62, + "learning_rate": 5.260827655179419e-06, + "loss": 1.8302, + "step": 37040 + }, + { + "epoch": 1.62, + "learning_rate": 5.249035783959241e-06, + "loss": 1.808, + "step": 37050 + }, + { + "epoch": 1.62, + "learning_rate": 5.237255875890762e-06, + "loss": 1.8063, + "step": 37060 + }, + { + "epoch": 1.62, + "learning_rate": 5.225487936667701e-06, + "loss": 1.8649, + "step": 37070 + }, + { + "epoch": 1.62, + "learning_rate": 5.2137319719779755e-06, + "loss": 1.8114, + "step": 37080 + }, + { + "epoch": 1.62, + "learning_rate": 5.2019879875037255e-06, + "loss": 1.839, + "step": 37090 + }, + { + "epoch": 1.62, + "learning_rate": 5.190255988921298e-06, + "loss": 1.8004, + "step": 37100 + }, + { + "epoch": 1.62, + "eval_loss": 1.7972087860107422, + "eval_runtime": 11.9296, + "eval_samples_per_second": 343.347, + "eval_steps_per_second": 21.459, + "step": 37100 + }, + { + "epoch": 1.62, + "learning_rate": 5.178535981901261e-06, + "loss": 1.7963, + "step": 37110 + }, + { + "epoch": 1.62, + "learning_rate": 5.166827972108349e-06, + "loss": 1.7894, + "step": 37120 + }, + { + "epoch": 1.63, + "learning_rate": 5.155131965201535e-06, + "loss": 1.8562, + "step": 37130 + }, + { + "epoch": 1.63, + "learning_rate": 5.143447966833974e-06, + "loss": 1.8311, + "step": 37140 + }, + { + "epoch": 1.63, + "learning_rate": 5.1317759826530286e-06, + "loss": 1.8282, + "step": 37150 + }, + { + "epoch": 1.63, + "learning_rate": 5.12011601830023e-06, + "loss": 1.8286, + "step": 37160 + }, + { + "epoch": 1.63, + "learning_rate": 5.1084680794113245e-06, + "loss": 1.7696, + "step": 37170 + }, + { + "epoch": 1.63, + "learning_rate": 5.096832171616225e-06, + "loss": 1.8051, + "step": 37180 + }, + { + "epoch": 1.63, + "learning_rate": 5.08520830053906e-06, + "loss": 1.8004, + "step": 37190 + }, + { + "epoch": 1.63, + "learning_rate": 5.073596471798108e-06, + "loss": 1.8315, + "step": 37200 + }, + { + "epoch": 1.63, + "eval_loss": 1.7971980571746826, + "eval_runtime": 11.9874, + "eval_samples_per_second": 341.693, + "eval_steps_per_second": 21.356, + "step": 37200 + }, + { + "epoch": 1.63, + "learning_rate": 5.061996691005847e-06, + "loss": 1.8219, + "step": 37210 + }, + { + "epoch": 1.63, + "learning_rate": 5.0504089637689166e-06, + "loss": 1.7975, + "step": 37220 + }, + { + "epoch": 1.63, + "learning_rate": 5.03883329568814e-06, + "loss": 1.8664, + "step": 37230 + }, + { + "epoch": 1.63, + "learning_rate": 5.027269692358522e-06, + "loss": 1.8035, + "step": 37240 + }, + { + "epoch": 1.63, + "learning_rate": 5.015718159369207e-06, + "loss": 1.8155, + "step": 37250 + }, + { + "epoch": 1.63, + "learning_rate": 5.00417870230353e-06, + "loss": 1.8054, + "step": 37260 + }, + { + "epoch": 1.63, + "learning_rate": 4.992651326738983e-06, + "loss": 1.8541, + "step": 37270 + }, + { + "epoch": 1.63, + "learning_rate": 4.9811360382472245e-06, + "loss": 1.8034, + "step": 37280 + }, + { + "epoch": 1.63, + "learning_rate": 4.969632842394051e-06, + "loss": 1.7881, + "step": 37290 + }, + { + "epoch": 1.63, + "learning_rate": 4.958141744739436e-06, + "loss": 1.8364, + "step": 37300 + }, + { + "epoch": 1.63, + "eval_loss": 1.797140121459961, + "eval_runtime": 12.2451, + "eval_samples_per_second": 334.501, + "eval_steps_per_second": 20.906, + "step": 37300 + }, + { + "epoch": 1.63, + "learning_rate": 4.946662750837485e-06, + "loss": 1.8236, + "step": 37310 + }, + { + "epoch": 1.63, + "learning_rate": 4.935195866236473e-06, + "loss": 1.7661, + "step": 37320 + }, + { + "epoch": 1.63, + "learning_rate": 4.9237410964788124e-06, + "loss": 1.8246, + "step": 37330 + }, + { + "epoch": 1.63, + "learning_rate": 4.9122984471010675e-06, + "loss": 1.8118, + "step": 37340 + }, + { + "epoch": 1.64, + "learning_rate": 4.900867923633923e-06, + "loss": 1.8188, + "step": 37350 + }, + { + "epoch": 1.64, + "learning_rate": 4.889449531602227e-06, + "loss": 1.8013, + "step": 37360 + }, + { + "epoch": 1.64, + "learning_rate": 4.878043276524956e-06, + "loss": 1.78, + "step": 37370 + }, + { + "epoch": 1.64, + "learning_rate": 4.86664916391522e-06, + "loss": 1.834, + "step": 37380 + }, + { + "epoch": 1.64, + "learning_rate": 4.855267199280254e-06, + "loss": 1.8049, + "step": 37390 + }, + { + "epoch": 1.64, + "learning_rate": 4.843897388121422e-06, + "loss": 1.7917, + "step": 37400 + }, + { + "epoch": 1.64, + "eval_loss": 1.7971751689910889, + "eval_runtime": 12.9807, + "eval_samples_per_second": 315.545, + "eval_steps_per_second": 19.722, + "step": 37400 + }, + { + "epoch": 1.64, + "learning_rate": 4.832539735934227e-06, + "loss": 1.795, + "step": 37410 + }, + { + "epoch": 1.64, + "learning_rate": 4.821194248208287e-06, + "loss": 1.8131, + "step": 37420 + }, + { + "epoch": 1.64, + "learning_rate": 4.809860930427332e-06, + "loss": 1.7924, + "step": 37430 + }, + { + "epoch": 1.64, + "learning_rate": 4.798539788069227e-06, + "loss": 1.8176, + "step": 37440 + }, + { + "epoch": 1.64, + "learning_rate": 4.787230826605929e-06, + "loss": 1.8091, + "step": 37450 + }, + { + "epoch": 1.64, + "learning_rate": 4.775934051503523e-06, + "loss": 1.8302, + "step": 37460 + }, + { + "epoch": 1.64, + "learning_rate": 4.764649468222221e-06, + "loss": 1.7777, + "step": 37470 + }, + { + "epoch": 1.64, + "learning_rate": 4.753377082216298e-06, + "loss": 1.8047, + "step": 37480 + }, + { + "epoch": 1.64, + "learning_rate": 4.742116898934166e-06, + "loss": 1.8524, + "step": 37490 + }, + { + "epoch": 1.64, + "learning_rate": 4.73086892381834e-06, + "loss": 1.8025, + "step": 37500 + }, + { + "epoch": 1.64, + "eval_loss": 1.7971476316452026, + "eval_runtime": 12.0152, + "eval_samples_per_second": 340.901, + "eval_steps_per_second": 21.306, + "step": 37500 + } + ], + "logging_steps": 10, + "max_steps": 45688, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 500, + "total_flos": 9.930378174862983e+18, + "train_batch_size": 32, + "trial_name": null, + "trial_params": null +}