{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.6415689021187183, "eval_steps": 100, "global_step": 37500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.2000000000000002e-07, "loss": 3.306, "step": 1 }, { "epoch": 0.0, "learning_rate": 1.2000000000000002e-06, "loss": 3.5052, "step": 10 }, { "epoch": 0.0, "learning_rate": 2.4000000000000003e-06, "loss": 3.3971, "step": 20 }, { "epoch": 0.0, "learning_rate": 3.6e-06, "loss": 3.3713, "step": 30 }, { "epoch": 0.0, "learning_rate": 4.800000000000001e-06, "loss": 3.4039, "step": 40 }, { "epoch": 0.0, "learning_rate": 6e-06, "loss": 3.3735, "step": 50 }, { "epoch": 0.0, "learning_rate": 7.2e-06, "loss": 3.3117, "step": 60 }, { "epoch": 0.0, "learning_rate": 8.400000000000001e-06, "loss": 3.2573, "step": 70 }, { "epoch": 0.0, "learning_rate": 9.600000000000001e-06, "loss": 3.1882, "step": 80 }, { "epoch": 0.0, "learning_rate": 1.08e-05, "loss": 3.0751, "step": 90 }, { "epoch": 0.0, "learning_rate": 1.2e-05, "loss": 2.9993, "step": 100 }, { "epoch": 0.0, "eval_loss": 2.979447841644287, "eval_runtime": 11.7537, "eval_samples_per_second": 348.487, "eval_steps_per_second": 21.78, "step": 100 }, { "epoch": 0.0, "learning_rate": 1.32e-05, "loss": 2.9849, "step": 110 }, { "epoch": 0.01, "learning_rate": 1.44e-05, "loss": 2.9377, "step": 120 }, { "epoch": 0.01, "learning_rate": 1.56e-05, "loss": 2.916, "step": 130 }, { "epoch": 0.01, "learning_rate": 1.6800000000000002e-05, "loss": 2.8504, "step": 140 }, { "epoch": 0.01, "learning_rate": 1.8e-05, "loss": 2.8581, "step": 150 }, { "epoch": 0.01, "learning_rate": 1.9200000000000003e-05, "loss": 2.8143, "step": 160 }, { "epoch": 0.01, "learning_rate": 2.04e-05, "loss": 2.8103, "step": 170 }, { "epoch": 0.01, "learning_rate": 2.16e-05, "loss": 2.824, "step": 180 }, { "epoch": 0.01, "learning_rate": 2.2800000000000002e-05, "loss": 2.7853, "step": 190 }, { "epoch": 0.01, "learning_rate": 2.4e-05, "loss": 2.7613, "step": 200 }, { "epoch": 0.01, "eval_loss": 2.7339868545532227, "eval_runtime": 11.8471, "eval_samples_per_second": 345.74, "eval_steps_per_second": 21.609, "step": 200 }, { "epoch": 0.01, "learning_rate": 2.52e-05, "loss": 2.7264, "step": 210 }, { "epoch": 0.01, "learning_rate": 2.64e-05, "loss": 2.7563, "step": 220 }, { "epoch": 0.01, "learning_rate": 2.7600000000000003e-05, "loss": 2.7032, "step": 230 }, { "epoch": 0.01, "learning_rate": 2.88e-05, "loss": 2.6709, "step": 240 }, { "epoch": 0.01, "learning_rate": 3e-05, "loss": 2.7113, "step": 250 }, { "epoch": 0.01, "learning_rate": 3.12e-05, "loss": 2.652, "step": 260 }, { "epoch": 0.01, "learning_rate": 3.24e-05, "loss": 2.6599, "step": 270 }, { "epoch": 0.01, "learning_rate": 3.3600000000000004e-05, "loss": 2.6164, "step": 280 }, { "epoch": 0.01, "learning_rate": 3.48e-05, "loss": 2.6489, "step": 290 }, { "epoch": 0.01, "learning_rate": 3.6e-05, "loss": 2.6529, "step": 300 }, { "epoch": 0.01, "eval_loss": 2.6033902168273926, "eval_runtime": 11.9371, "eval_samples_per_second": 343.131, "eval_steps_per_second": 21.446, "step": 300 }, { "epoch": 0.01, "learning_rate": 3.72e-05, "loss": 2.6186, "step": 310 }, { "epoch": 0.01, "learning_rate": 3.8400000000000005e-05, "loss": 2.6158, "step": 320 }, { "epoch": 0.01, "learning_rate": 3.96e-05, "loss": 2.5911, "step": 330 }, { "epoch": 0.01, "learning_rate": 4.08e-05, "loss": 2.5856, "step": 340 }, { "epoch": 0.02, "learning_rate": 4.2e-05, "loss": 2.5734, "step": 350 }, { "epoch": 0.02, "learning_rate": 4.32e-05, "loss": 2.6221, "step": 360 }, { "epoch": 0.02, "learning_rate": 4.44e-05, "loss": 2.5921, "step": 370 }, { "epoch": 0.02, "learning_rate": 4.5600000000000004e-05, "loss": 2.5796, "step": 380 }, { "epoch": 0.02, "learning_rate": 4.6800000000000006e-05, "loss": 2.5515, "step": 390 }, { "epoch": 0.02, "learning_rate": 4.8e-05, "loss": 2.5531, "step": 400 }, { "epoch": 0.02, "eval_loss": 2.5212419033050537, "eval_runtime": 11.6859, "eval_samples_per_second": 350.507, "eval_steps_per_second": 21.907, "step": 400 }, { "epoch": 0.02, "learning_rate": 4.9199999999999997e-05, "loss": 2.5611, "step": 410 }, { "epoch": 0.02, "learning_rate": 5.04e-05, "loss": 2.5204, "step": 420 }, { "epoch": 0.02, "learning_rate": 5.16e-05, "loss": 2.5302, "step": 430 }, { "epoch": 0.02, "learning_rate": 5.28e-05, "loss": 2.453, "step": 440 }, { "epoch": 0.02, "learning_rate": 5.4000000000000005e-05, "loss": 2.5275, "step": 450 }, { "epoch": 0.02, "learning_rate": 5.520000000000001e-05, "loss": 2.4754, "step": 460 }, { "epoch": 0.02, "learning_rate": 5.6399999999999995e-05, "loss": 2.5214, "step": 470 }, { "epoch": 0.02, "learning_rate": 5.76e-05, "loss": 2.475, "step": 480 }, { "epoch": 0.02, "learning_rate": 5.88e-05, "loss": 2.4533, "step": 490 }, { "epoch": 0.02, "learning_rate": 6e-05, "loss": 2.4883, "step": 500 }, { "epoch": 0.02, "eval_loss": 2.4650485515594482, "eval_runtime": 11.6784, "eval_samples_per_second": 350.734, "eval_steps_per_second": 21.921, "step": 500 }, { "epoch": 0.02, "learning_rate": 5.999999274988748e-05, "loss": 2.4537, "step": 510 }, { "epoch": 0.02, "learning_rate": 5.9999970999553426e-05, "loss": 2.488, "step": 520 }, { "epoch": 0.02, "learning_rate": 5.9999934749008346e-05, "loss": 2.5225, "step": 530 }, { "epoch": 0.02, "learning_rate": 5.999988399826977e-05, "loss": 2.4376, "step": 540 }, { "epoch": 0.02, "learning_rate": 5.999981874736221e-05, "loss": 2.4825, "step": 550 }, { "epoch": 0.02, "learning_rate": 5.9999738996317224e-05, "loss": 2.4779, "step": 560 }, { "epoch": 0.02, "learning_rate": 5.999964474517335e-05, "loss": 2.4146, "step": 570 }, { "epoch": 0.03, "learning_rate": 5.9999535993976145e-05, "loss": 2.4468, "step": 580 }, { "epoch": 0.03, "learning_rate": 5.999941274277817e-05, "loss": 2.4516, "step": 590 }, { "epoch": 0.03, "learning_rate": 5.9999274991639004e-05, "loss": 2.4213, "step": 600 }, { "epoch": 0.03, "eval_loss": 2.411180019378662, "eval_runtime": 11.7741, "eval_samples_per_second": 347.881, "eval_steps_per_second": 21.743, "step": 600 }, { "epoch": 0.03, "learning_rate": 5.999912274062522e-05, "loss": 2.4222, "step": 610 }, { "epoch": 0.03, "learning_rate": 5.999895598981041e-05, "loss": 2.3641, "step": 620 }, { "epoch": 0.03, "learning_rate": 5.999877473927517e-05, "loss": 2.4214, "step": 630 }, { "epoch": 0.03, "learning_rate": 5.999857898910712e-05, "loss": 2.3662, "step": 640 }, { "epoch": 0.03, "learning_rate": 5.999836873940085e-05, "loss": 2.4183, "step": 650 }, { "epoch": 0.03, "learning_rate": 5.9998143990258e-05, "loss": 2.4012, "step": 660 }, { "epoch": 0.03, "learning_rate": 5.9997904741787194e-05, "loss": 2.4289, "step": 670 }, { "epoch": 0.03, "learning_rate": 5.999765099410407e-05, "loss": 2.3593, "step": 680 }, { "epoch": 0.03, "learning_rate": 5.999738274733128e-05, "loss": 2.4153, "step": 690 }, { "epoch": 0.03, "learning_rate": 5.999710000159846e-05, "loss": 2.4046, "step": 700 }, { "epoch": 0.03, "eval_loss": 2.3694727420806885, "eval_runtime": 12.1658, "eval_samples_per_second": 336.681, "eval_steps_per_second": 21.043, "step": 700 }, { "epoch": 0.03, "learning_rate": 5.999680275704231e-05, "loss": 2.3952, "step": 710 }, { "epoch": 0.03, "learning_rate": 5.999649101380646e-05, "loss": 2.381, "step": 720 }, { "epoch": 0.03, "learning_rate": 5.999616477204161e-05, "loss": 2.3835, "step": 730 }, { "epoch": 0.03, "learning_rate": 5.9995824031905446e-05, "loss": 2.3664, "step": 740 }, { "epoch": 0.03, "learning_rate": 5.9995468793562655e-05, "loss": 2.3001, "step": 750 }, { "epoch": 0.03, "learning_rate": 5.9995099057184944e-05, "loss": 2.3884, "step": 760 }, { "epoch": 0.03, "learning_rate": 5.999471482295101e-05, "loss": 2.3915, "step": 770 }, { "epoch": 0.03, "learning_rate": 5.999431609104658e-05, "loss": 2.3674, "step": 780 }, { "epoch": 0.03, "learning_rate": 5.999390286166438e-05, "loss": 2.3697, "step": 790 }, { "epoch": 0.04, "learning_rate": 5.9993475135004135e-05, "loss": 2.3509, "step": 800 }, { "epoch": 0.04, "eval_loss": 2.3388166427612305, "eval_runtime": 11.6279, "eval_samples_per_second": 352.257, "eval_steps_per_second": 22.016, "step": 800 }, { "epoch": 0.04, "learning_rate": 5.9993032911272584e-05, "loss": 2.3633, "step": 810 }, { "epoch": 0.04, "learning_rate": 5.999257619068346e-05, "loss": 2.3405, "step": 820 }, { "epoch": 0.04, "learning_rate": 5.9992104973457536e-05, "loss": 2.3684, "step": 830 }, { "epoch": 0.04, "learning_rate": 5.9991619259822554e-05, "loss": 2.3348, "step": 840 }, { "epoch": 0.04, "learning_rate": 5.999111905001329e-05, "loss": 2.396, "step": 850 }, { "epoch": 0.04, "learning_rate": 5.999060434427151e-05, "loss": 2.3006, "step": 860 }, { "epoch": 0.04, "learning_rate": 5.999007514284599e-05, "loss": 2.3543, "step": 870 }, { "epoch": 0.04, "learning_rate": 5.998953144599253e-05, "loss": 2.3564, "step": 880 }, { "epoch": 0.04, "learning_rate": 5.9988973253973895e-05, "loss": 2.3265, "step": 890 }, { "epoch": 0.04, "learning_rate": 5.9988400567059905e-05, "loss": 2.3153, "step": 900 }, { "epoch": 0.04, "eval_loss": 2.309986114501953, "eval_runtime": 11.6011, "eval_samples_per_second": 353.071, "eval_steps_per_second": 22.067, "step": 900 }, { "epoch": 0.04, "learning_rate": 5.9987813385527355e-05, "loss": 2.2815, "step": 910 }, { "epoch": 0.04, "learning_rate": 5.998721170966004e-05, "loss": 2.3601, "step": 920 }, { "epoch": 0.04, "learning_rate": 5.99865955397488e-05, "loss": 2.3063, "step": 930 }, { "epoch": 0.04, "learning_rate": 5.998596487609144e-05, "loss": 2.3125, "step": 940 }, { "epoch": 0.04, "learning_rate": 5.998531971899279e-05, "loss": 2.2972, "step": 950 }, { "epoch": 0.04, "learning_rate": 5.9984660068764675e-05, "loss": 2.2984, "step": 960 }, { "epoch": 0.04, "learning_rate": 5.998398592572594e-05, "loss": 2.2664, "step": 970 }, { "epoch": 0.04, "learning_rate": 5.998329729020241e-05, "loss": 2.3188, "step": 980 }, { "epoch": 0.04, "learning_rate": 5.998259416252695e-05, "loss": 2.2963, "step": 990 }, { "epoch": 0.04, "learning_rate": 5.99818765430394e-05, "loss": 2.3158, "step": 1000 }, { "epoch": 0.04, "eval_loss": 2.2877612113952637, "eval_runtime": 12.462, "eval_samples_per_second": 328.678, "eval_steps_per_second": 20.542, "step": 1000 }, { "epoch": 0.04, "learning_rate": 5.998114443208661e-05, "loss": 2.313, "step": 1010 }, { "epoch": 0.04, "learning_rate": 5.998039783002245e-05, "loss": 2.3052, "step": 1020 }, { "epoch": 0.05, "learning_rate": 5.997963673720778e-05, "loss": 2.3348, "step": 1030 }, { "epoch": 0.05, "learning_rate": 5.997886115401047e-05, "loss": 2.3304, "step": 1040 }, { "epoch": 0.05, "learning_rate": 5.997807108080538e-05, "loss": 2.2719, "step": 1050 }, { "epoch": 0.05, "learning_rate": 5.9977266517974396e-05, "loss": 2.2937, "step": 1060 }, { "epoch": 0.05, "learning_rate": 5.9976447465906386e-05, "loss": 2.2919, "step": 1070 }, { "epoch": 0.05, "learning_rate": 5.997561392499724e-05, "loss": 2.2525, "step": 1080 }, { "epoch": 0.05, "learning_rate": 5.997476589564984e-05, "loss": 2.2822, "step": 1090 }, { "epoch": 0.05, "learning_rate": 5.997390337827408e-05, "loss": 2.2833, "step": 1100 }, { "epoch": 0.05, "eval_loss": 2.267622947692871, "eval_runtime": 11.9236, "eval_samples_per_second": 343.521, "eval_steps_per_second": 21.47, "step": 1100 }, { "epoch": 0.05, "learning_rate": 5.997302637328683e-05, "loss": 2.2687, "step": 1110 }, { "epoch": 0.05, "learning_rate": 5.9972134881112e-05, "loss": 2.2636, "step": 1120 }, { "epoch": 0.05, "learning_rate": 5.997122890218047e-05, "loss": 2.3226, "step": 1130 }, { "epoch": 0.05, "learning_rate": 5.997030843693016e-05, "loss": 2.2599, "step": 1140 }, { "epoch": 0.05, "learning_rate": 5.996937348580595e-05, "loss": 2.2566, "step": 1150 }, { "epoch": 0.05, "learning_rate": 5.9968424049259735e-05, "loss": 2.2997, "step": 1160 }, { "epoch": 0.05, "learning_rate": 5.9967460127750446e-05, "loss": 2.3179, "step": 1170 }, { "epoch": 0.05, "learning_rate": 5.996648172174395e-05, "loss": 2.2863, "step": 1180 }, { "epoch": 0.05, "learning_rate": 5.996548883171317e-05, "loss": 2.267, "step": 1190 }, { "epoch": 0.05, "learning_rate": 5.9964481458138e-05, "loss": 2.2207, "step": 1200 }, { "epoch": 0.05, "eval_loss": 2.2525129318237305, "eval_runtime": 12.0941, "eval_samples_per_second": 338.679, "eval_steps_per_second": 21.167, "step": 1200 }, { "epoch": 0.05, "learning_rate": 5.996345960150536e-05, "loss": 2.2595, "step": 1210 }, { "epoch": 0.05, "learning_rate": 5.9962423262309146e-05, "loss": 2.316, "step": 1220 }, { "epoch": 0.05, "learning_rate": 5.996137244105027e-05, "loss": 2.258, "step": 1230 }, { "epoch": 0.05, "learning_rate": 5.996030713823661e-05, "loss": 2.2726, "step": 1240 }, { "epoch": 0.05, "learning_rate": 5.995922735438311e-05, "loss": 2.249, "step": 1250 }, { "epoch": 0.06, "learning_rate": 5.995813309001164e-05, "loss": 2.2319, "step": 1260 }, { "epoch": 0.06, "learning_rate": 5.995702434565112e-05, "loss": 2.2444, "step": 1270 }, { "epoch": 0.06, "learning_rate": 5.995590112183745e-05, "loss": 2.258, "step": 1280 }, { "epoch": 0.06, "learning_rate": 5.995476341911353e-05, "loss": 2.296, "step": 1290 }, { "epoch": 0.06, "learning_rate": 5.995361123802926e-05, "loss": 2.2656, "step": 1300 }, { "epoch": 0.06, "eval_loss": 2.233938694000244, "eval_runtime": 11.9568, "eval_samples_per_second": 342.566, "eval_steps_per_second": 21.41, "step": 1300 }, { "epoch": 0.06, "learning_rate": 5.995244457914152e-05, "loss": 2.2022, "step": 1310 }, { "epoch": 0.06, "learning_rate": 5.9951263443014225e-05, "loss": 2.2465, "step": 1320 }, { "epoch": 0.06, "learning_rate": 5.9950067830218256e-05, "loss": 2.2018, "step": 1330 }, { "epoch": 0.06, "learning_rate": 5.99488577413315e-05, "loss": 2.2699, "step": 1340 }, { "epoch": 0.06, "learning_rate": 5.994763317693885e-05, "loss": 2.2657, "step": 1350 }, { "epoch": 0.06, "learning_rate": 5.994639413763218e-05, "loss": 2.2484, "step": 1360 }, { "epoch": 0.06, "learning_rate": 5.994514062401038e-05, "loss": 2.2652, "step": 1370 }, { "epoch": 0.06, "learning_rate": 5.9943872636679305e-05, "loss": 2.2168, "step": 1380 }, { "epoch": 0.06, "learning_rate": 5.9942590176251846e-05, "loss": 2.2863, "step": 1390 }, { "epoch": 0.06, "learning_rate": 5.9941293243347845e-05, "loss": 2.1976, "step": 1400 }, { "epoch": 0.06, "eval_loss": 2.2203006744384766, "eval_runtime": 16.8316, "eval_samples_per_second": 243.352, "eval_steps_per_second": 15.209, "step": 1400 }, { "epoch": 0.06, "learning_rate": 5.993998183859419e-05, "loss": 2.2344, "step": 1410 }, { "epoch": 0.06, "learning_rate": 5.993865596262472e-05, "loss": 2.2125, "step": 1420 }, { "epoch": 0.06, "learning_rate": 5.993731561608029e-05, "loss": 2.2113, "step": 1430 }, { "epoch": 0.06, "learning_rate": 5.993596079960873e-05, "loss": 2.2446, "step": 1440 }, { "epoch": 0.06, "learning_rate": 5.99345915138649e-05, "loss": 2.2338, "step": 1450 }, { "epoch": 0.06, "learning_rate": 5.993320775951061e-05, "loss": 2.247, "step": 1460 }, { "epoch": 0.06, "learning_rate": 5.9931809537214715e-05, "loss": 2.1983, "step": 1470 }, { "epoch": 0.06, "learning_rate": 5.9930396847653e-05, "loss": 2.2772, "step": 1480 }, { "epoch": 0.07, "learning_rate": 5.9928969691508296e-05, "loss": 2.232, "step": 1490 }, { "epoch": 0.07, "learning_rate": 5.99275280694704e-05, "loss": 2.2015, "step": 1500 }, { "epoch": 0.07, "eval_loss": 2.207272529602051, "eval_runtime": 11.8836, "eval_samples_per_second": 344.677, "eval_steps_per_second": 21.542, "step": 1500 }, { "epoch": 0.07, "learning_rate": 5.9926071982236105e-05, "loss": 2.2393, "step": 1510 }, { "epoch": 0.07, "learning_rate": 5.9924601430509207e-05, "loss": 2.2068, "step": 1520 }, { "epoch": 0.07, "learning_rate": 5.992311641500047e-05, "loss": 2.2571, "step": 1530 }, { "epoch": 0.07, "learning_rate": 5.992161693642767e-05, "loss": 2.1754, "step": 1540 }, { "epoch": 0.07, "learning_rate": 5.9920102995515554e-05, "loss": 2.2719, "step": 1550 }, { "epoch": 0.07, "learning_rate": 5.99185745929959e-05, "loss": 2.1849, "step": 1560 }, { "epoch": 0.07, "learning_rate": 5.991703172960742e-05, "loss": 2.2507, "step": 1570 }, { "epoch": 0.07, "learning_rate": 5.991547440609585e-05, "loss": 2.238, "step": 1580 }, { "epoch": 0.07, "learning_rate": 5.991390262321392e-05, "loss": 2.204, "step": 1590 }, { "epoch": 0.07, "learning_rate": 5.9912316381721316e-05, "loss": 2.2177, "step": 1600 }, { "epoch": 0.07, "eval_loss": 2.1963398456573486, "eval_runtime": 13.5095, "eval_samples_per_second": 303.193, "eval_steps_per_second": 18.95, "step": 1600 }, { "epoch": 0.07, "learning_rate": 5.9910715682384755e-05, "loss": 2.1927, "step": 1610 }, { "epoch": 0.07, "learning_rate": 5.990910052597791e-05, "loss": 2.2564, "step": 1620 }, { "epoch": 0.07, "learning_rate": 5.990747091328144e-05, "loss": 2.2149, "step": 1630 }, { "epoch": 0.07, "learning_rate": 5.990582684508302e-05, "loss": 2.2022, "step": 1640 }, { "epoch": 0.07, "learning_rate": 5.99041683221773e-05, "loss": 2.2009, "step": 1650 }, { "epoch": 0.07, "learning_rate": 5.99024953453659e-05, "loss": 2.2003, "step": 1660 }, { "epoch": 0.07, "learning_rate": 5.9900807915457434e-05, "loss": 2.2312, "step": 1670 }, { "epoch": 0.07, "learning_rate": 5.989910603326752e-05, "loss": 2.1916, "step": 1680 }, { "epoch": 0.07, "learning_rate": 5.9897389699618725e-05, "loss": 2.2396, "step": 1690 }, { "epoch": 0.07, "learning_rate": 5.9895658915340654e-05, "loss": 2.175, "step": 1700 }, { "epoch": 0.07, "eval_loss": 2.1844124794006348, "eval_runtime": 11.4247, "eval_samples_per_second": 358.521, "eval_steps_per_second": 22.408, "step": 1700 }, { "epoch": 0.07, "learning_rate": 5.989391368126984e-05, "loss": 2.1792, "step": 1710 }, { "epoch": 0.08, "learning_rate": 5.989215399824984e-05, "loss": 2.2737, "step": 1720 }, { "epoch": 0.08, "learning_rate": 5.9890379867131177e-05, "loss": 2.1745, "step": 1730 }, { "epoch": 0.08, "learning_rate": 5.9888591288771357e-05, "loss": 2.1888, "step": 1740 }, { "epoch": 0.08, "learning_rate": 5.988678826403488e-05, "loss": 2.1556, "step": 1750 }, { "epoch": 0.08, "learning_rate": 5.988497079379321e-05, "loss": 2.2134, "step": 1760 }, { "epoch": 0.08, "learning_rate": 5.988313887892482e-05, "loss": 2.1801, "step": 1770 }, { "epoch": 0.08, "learning_rate": 5.988129252031514e-05, "loss": 2.1928, "step": 1780 }, { "epoch": 0.08, "learning_rate": 5.9879431718856594e-05, "loss": 2.1882, "step": 1790 }, { "epoch": 0.08, "learning_rate": 5.987755647544857e-05, "loss": 2.2182, "step": 1800 }, { "epoch": 0.08, "eval_loss": 2.174042224884033, "eval_runtime": 11.6019, "eval_samples_per_second": 353.045, "eval_steps_per_second": 22.065, "step": 1800 }, { "epoch": 0.08, "learning_rate": 5.987566679099746e-05, "loss": 2.178, "step": 1810 }, { "epoch": 0.08, "learning_rate": 5.987376266641664e-05, "loss": 2.2011, "step": 1820 }, { "epoch": 0.08, "learning_rate": 5.987184410262643e-05, "loss": 2.2196, "step": 1830 }, { "epoch": 0.08, "learning_rate": 5.9869911100554165e-05, "loss": 2.2287, "step": 1840 }, { "epoch": 0.08, "learning_rate": 5.986796366113413e-05, "loss": 2.1855, "step": 1850 }, { "epoch": 0.08, "learning_rate": 5.986600178530761e-05, "loss": 2.2516, "step": 1860 }, { "epoch": 0.08, "learning_rate": 5.9864025474022856e-05, "loss": 2.1804, "step": 1870 }, { "epoch": 0.08, "learning_rate": 5.98620347282351e-05, "loss": 2.2544, "step": 1880 }, { "epoch": 0.08, "learning_rate": 5.986002954890657e-05, "loss": 2.1874, "step": 1890 }, { "epoch": 0.08, "learning_rate": 5.985800993700642e-05, "loss": 2.1908, "step": 1900 }, { "epoch": 0.08, "eval_loss": 2.1624016761779785, "eval_runtime": 11.7335, "eval_samples_per_second": 349.086, "eval_steps_per_second": 21.818, "step": 1900 }, { "epoch": 0.08, "learning_rate": 5.985597589351083e-05, "loss": 2.1858, "step": 1910 }, { "epoch": 0.08, "learning_rate": 5.985392741940292e-05, "loss": 2.1944, "step": 1920 }, { "epoch": 0.08, "learning_rate": 5.985186451567282e-05, "loss": 2.2135, "step": 1930 }, { "epoch": 0.08, "learning_rate": 5.984978718331762e-05, "loss": 2.1289, "step": 1940 }, { "epoch": 0.09, "learning_rate": 5.9847695423341354e-05, "loss": 2.1891, "step": 1950 }, { "epoch": 0.09, "learning_rate": 5.9845589236755065e-05, "loss": 2.1651, "step": 1960 }, { "epoch": 0.09, "learning_rate": 5.984346862457677e-05, "loss": 2.2016, "step": 1970 }, { "epoch": 0.09, "learning_rate": 5.984133358783144e-05, "loss": 2.2092, "step": 1980 }, { "epoch": 0.09, "learning_rate": 5.983918412755103e-05, "loss": 2.1864, "step": 1990 }, { "epoch": 0.09, "learning_rate": 5.983702024477445e-05, "loss": 2.1916, "step": 2000 }, { "epoch": 0.09, "eval_loss": 2.1545424461364746, "eval_runtime": 12.2776, "eval_samples_per_second": 333.616, "eval_steps_per_second": 20.851, "step": 2000 }, { "epoch": 0.09, "learning_rate": 5.9834841940547604e-05, "loss": 2.1598, "step": 2010 }, { "epoch": 0.09, "learning_rate": 5.983264921592335e-05, "loss": 2.1968, "step": 2020 }, { "epoch": 0.09, "learning_rate": 5.9830442071961524e-05, "loss": 2.1737, "step": 2030 }, { "epoch": 0.09, "learning_rate": 5.982822050972892e-05, "loss": 2.1664, "step": 2040 }, { "epoch": 0.09, "learning_rate": 5.982598453029933e-05, "loss": 2.2088, "step": 2050 }, { "epoch": 0.09, "learning_rate": 5.982373413475348e-05, "loss": 2.1314, "step": 2060 }, { "epoch": 0.09, "learning_rate": 5.9821469324179084e-05, "loss": 2.1583, "step": 2070 }, { "epoch": 0.09, "learning_rate": 5.98191900996708e-05, "loss": 2.2043, "step": 2080 }, { "epoch": 0.09, "learning_rate": 5.981689646233029e-05, "loss": 2.1916, "step": 2090 }, { "epoch": 0.09, "learning_rate": 5.981458841326616e-05, "loss": 2.0975, "step": 2100 }, { "epoch": 0.09, "eval_loss": 2.145850896835327, "eval_runtime": 11.7678, "eval_samples_per_second": 348.07, "eval_steps_per_second": 21.754, "step": 2100 }, { "epoch": 0.09, "learning_rate": 5.981226595359397e-05, "loss": 2.1805, "step": 2110 }, { "epoch": 0.09, "learning_rate": 5.980992908443628e-05, "loss": 2.1319, "step": 2120 }, { "epoch": 0.09, "learning_rate": 5.9807577806922587e-05, "loss": 2.1684, "step": 2130 }, { "epoch": 0.09, "learning_rate": 5.980521212218935e-05, "loss": 2.1669, "step": 2140 }, { "epoch": 0.09, "learning_rate": 5.9802832031380006e-05, "loss": 2.1536, "step": 2150 }, { "epoch": 0.09, "learning_rate": 5.980043753564496e-05, "loss": 2.1812, "step": 2160 }, { "epoch": 0.09, "learning_rate": 5.979802863614155e-05, "loss": 2.1474, "step": 2170 }, { "epoch": 0.1, "learning_rate": 5.979560533403412e-05, "loss": 2.1461, "step": 2180 }, { "epoch": 0.1, "learning_rate": 5.979316763049393e-05, "loss": 2.1817, "step": 2190 }, { "epoch": 0.1, "learning_rate": 5.979071552669923e-05, "loss": 2.2076, "step": 2200 }, { "epoch": 0.1, "eval_loss": 2.136880397796631, "eval_runtime": 11.5412, "eval_samples_per_second": 354.904, "eval_steps_per_second": 22.181, "step": 2200 }, { "epoch": 0.1, "learning_rate": 5.978824902383523e-05, "loss": 2.1546, "step": 2210 }, { "epoch": 0.1, "learning_rate": 5.978576812309407e-05, "loss": 2.1107, "step": 2220 }, { "epoch": 0.1, "learning_rate": 5.97832728256749e-05, "loss": 2.1283, "step": 2230 }, { "epoch": 0.1, "learning_rate": 5.9780763132783765e-05, "loss": 2.1638, "step": 2240 }, { "epoch": 0.1, "learning_rate": 5.977823904563373e-05, "loss": 2.2048, "step": 2250 }, { "epoch": 0.1, "learning_rate": 5.9775700565444777e-05, "loss": 2.1297, "step": 2260 }, { "epoch": 0.1, "learning_rate": 5.977314769344385e-05, "loss": 2.1407, "step": 2270 }, { "epoch": 0.1, "learning_rate": 5.9770580430864876e-05, "loss": 2.175, "step": 2280 }, { "epoch": 0.1, "learning_rate": 5.976799877894871e-05, "loss": 2.1167, "step": 2290 }, { "epoch": 0.1, "learning_rate": 5.976540273894317e-05, "loss": 2.1116, "step": 2300 }, { "epoch": 0.1, "eval_loss": 2.1296005249023438, "eval_runtime": 11.6525, "eval_samples_per_second": 351.512, "eval_steps_per_second": 21.97, "step": 2300 }, { "epoch": 0.1, "learning_rate": 5.976279231210302e-05, "loss": 2.1281, "step": 2310 }, { "epoch": 0.1, "learning_rate": 5.9760167499689985e-05, "loss": 2.1152, "step": 2320 }, { "epoch": 0.1, "learning_rate": 5.9757528302972764e-05, "loss": 2.1657, "step": 2330 }, { "epoch": 0.1, "learning_rate": 5.975487472322696e-05, "loss": 2.1417, "step": 2340 }, { "epoch": 0.1, "learning_rate": 5.975220676173518e-05, "loss": 2.1868, "step": 2350 }, { "epoch": 0.1, "learning_rate": 5.9749524419786954e-05, "loss": 2.1341, "step": 2360 }, { "epoch": 0.1, "learning_rate": 5.974682769867876e-05, "loss": 2.1344, "step": 2370 }, { "epoch": 0.1, "learning_rate": 5.974411659971403e-05, "loss": 2.111, "step": 2380 }, { "epoch": 0.1, "learning_rate": 5.974139112420316e-05, "loss": 2.1725, "step": 2390 }, { "epoch": 0.11, "learning_rate": 5.973865127346348e-05, "loss": 2.1404, "step": 2400 }, { "epoch": 0.11, "eval_loss": 2.1210813522338867, "eval_runtime": 12.1931, "eval_samples_per_second": 335.927, "eval_steps_per_second": 20.995, "step": 2400 }, { "epoch": 0.11, "learning_rate": 5.973589704881927e-05, "loss": 2.1281, "step": 2410 }, { "epoch": 0.11, "learning_rate": 5.9733128451601764e-05, "loss": 2.1381, "step": 2420 }, { "epoch": 0.11, "learning_rate": 5.9730345483149134e-05, "loss": 2.1297, "step": 2430 }, { "epoch": 0.11, "learning_rate": 5.9727548144806494e-05, "loss": 2.1399, "step": 2440 }, { "epoch": 0.11, "learning_rate": 5.9724736437925926e-05, "loss": 2.1495, "step": 2450 }, { "epoch": 0.11, "learning_rate": 5.9721910363866444e-05, "loss": 2.094, "step": 2460 }, { "epoch": 0.11, "learning_rate": 5.971906992399399e-05, "loss": 2.1428, "step": 2470 }, { "epoch": 0.11, "learning_rate": 5.9716215119681477e-05, "loss": 2.1544, "step": 2480 }, { "epoch": 0.11, "learning_rate": 5.971334595230875e-05, "loss": 2.1708, "step": 2490 }, { "epoch": 0.11, "learning_rate": 5.971046242326258e-05, "loss": 2.1338, "step": 2500 }, { "epoch": 0.11, "eval_loss": 2.1137373447418213, "eval_runtime": 11.9925, "eval_samples_per_second": 341.548, "eval_steps_per_second": 21.347, "step": 2500 }, { "epoch": 0.11, "learning_rate": 5.9707564533936704e-05, "loss": 2.0854, "step": 2510 }, { "epoch": 0.11, "learning_rate": 5.970465228573179e-05, "loss": 2.1676, "step": 2520 }, { "epoch": 0.11, "learning_rate": 5.9701725680055445e-05, "loss": 2.1919, "step": 2530 }, { "epoch": 0.11, "learning_rate": 5.9698784718322226e-05, "loss": 2.116, "step": 2540 }, { "epoch": 0.11, "learning_rate": 5.9695829401953615e-05, "loss": 2.1112, "step": 2550 }, { "epoch": 0.11, "learning_rate": 5.969285973237802e-05, "loss": 2.1726, "step": 2560 }, { "epoch": 0.11, "learning_rate": 5.968987571103083e-05, "loss": 2.1494, "step": 2570 }, { "epoch": 0.11, "learning_rate": 5.968687733935432e-05, "loss": 2.0896, "step": 2580 }, { "epoch": 0.11, "learning_rate": 5.9683864618797746e-05, "loss": 2.1028, "step": 2590 }, { "epoch": 0.11, "learning_rate": 5.968083755081727e-05, "loss": 2.0907, "step": 2600 }, { "epoch": 0.11, "eval_loss": 2.1094017028808594, "eval_runtime": 11.9254, "eval_samples_per_second": 343.467, "eval_steps_per_second": 21.467, "step": 2600 }, { "epoch": 0.11, "learning_rate": 5.967779613687599e-05, "loss": 2.1917, "step": 2610 }, { "epoch": 0.11, "learning_rate": 5.967474037844396e-05, "loss": 2.0942, "step": 2620 }, { "epoch": 0.12, "learning_rate": 5.967167027699815e-05, "loss": 2.1257, "step": 2630 }, { "epoch": 0.12, "learning_rate": 5.9668585834022466e-05, "loss": 2.1354, "step": 2640 }, { "epoch": 0.12, "learning_rate": 5.966548705100773e-05, "loss": 2.1495, "step": 2650 }, { "epoch": 0.12, "learning_rate": 5.966237392945172e-05, "loss": 2.1276, "step": 2660 }, { "epoch": 0.12, "learning_rate": 5.965924647085914e-05, "loss": 2.09, "step": 2670 }, { "epoch": 0.12, "learning_rate": 5.965610467674162e-05, "loss": 2.1088, "step": 2680 }, { "epoch": 0.12, "learning_rate": 5.9652948548617715e-05, "loss": 2.1951, "step": 2690 }, { "epoch": 0.12, "learning_rate": 5.9649778088012896e-05, "loss": 2.1079, "step": 2700 }, { "epoch": 0.12, "eval_loss": 2.104461669921875, "eval_runtime": 12.0537, "eval_samples_per_second": 339.813, "eval_steps_per_second": 21.238, "step": 2700 }, { "epoch": 0.12, "learning_rate": 5.9646593296459594e-05, "loss": 2.128, "step": 2710 }, { "epoch": 0.12, "learning_rate": 5.9643394175497145e-05, "loss": 2.092, "step": 2720 }, { "epoch": 0.12, "learning_rate": 5.9640180726671806e-05, "loss": 2.1446, "step": 2730 }, { "epoch": 0.12, "learning_rate": 5.9636952951536776e-05, "loss": 2.1721, "step": 2740 }, { "epoch": 0.12, "learning_rate": 5.963371085165217e-05, "loss": 2.1262, "step": 2750 }, { "epoch": 0.12, "learning_rate": 5.963045442858503e-05, "loss": 2.1791, "step": 2760 }, { "epoch": 0.12, "learning_rate": 5.962718368390931e-05, "loss": 2.0893, "step": 2770 }, { "epoch": 0.12, "learning_rate": 5.96238986192059e-05, "loss": 2.1119, "step": 2780 }, { "epoch": 0.12, "learning_rate": 5.96205992360626e-05, "loss": 2.1215, "step": 2790 }, { "epoch": 0.12, "learning_rate": 5.961728553607415e-05, "loss": 2.1327, "step": 2800 }, { "epoch": 0.12, "eval_loss": 2.0953927040100098, "eval_runtime": 11.7403, "eval_samples_per_second": 348.885, "eval_steps_per_second": 21.805, "step": 2800 }, { "epoch": 0.12, "learning_rate": 5.9613957520842177e-05, "loss": 2.0981, "step": 2810 }, { "epoch": 0.12, "learning_rate": 5.961061519197527e-05, "loss": 2.1383, "step": 2820 }, { "epoch": 0.12, "learning_rate": 5.96072585510889e-05, "loss": 2.1195, "step": 2830 }, { "epoch": 0.12, "learning_rate": 5.9603887599805455e-05, "loss": 2.1172, "step": 2840 }, { "epoch": 0.12, "learning_rate": 5.960050233975428e-05, "loss": 2.0543, "step": 2850 }, { "epoch": 0.13, "learning_rate": 5.959710277257159e-05, "loss": 2.1454, "step": 2860 }, { "epoch": 0.13, "learning_rate": 5.959368889990055e-05, "loss": 2.095, "step": 2870 }, { "epoch": 0.13, "learning_rate": 5.959026072339121e-05, "loss": 2.1166, "step": 2880 }, { "epoch": 0.13, "learning_rate": 5.9586818244700554e-05, "loss": 2.1447, "step": 2890 }, { "epoch": 0.13, "learning_rate": 5.9583361465492475e-05, "loss": 2.1123, "step": 2900 }, { "epoch": 0.13, "eval_loss": 2.090653419494629, "eval_runtime": 11.7523, "eval_samples_per_second": 348.527, "eval_steps_per_second": 21.783, "step": 2900 }, { "epoch": 0.13, "learning_rate": 5.957989038743777e-05, "loss": 2.0771, "step": 2910 }, { "epoch": 0.13, "learning_rate": 5.9576405012214155e-05, "loss": 2.1296, "step": 2920 }, { "epoch": 0.13, "learning_rate": 5.957290534150625e-05, "loss": 2.1512, "step": 2930 }, { "epoch": 0.13, "learning_rate": 5.9569391377005604e-05, "loss": 2.0663, "step": 2940 }, { "epoch": 0.13, "learning_rate": 5.9565863120410637e-05, "loss": 2.1126, "step": 2950 }, { "epoch": 0.13, "learning_rate": 5.956232057342672e-05, "loss": 2.1432, "step": 2960 }, { "epoch": 0.13, "learning_rate": 5.95587637377661e-05, "loss": 2.1382, "step": 2970 }, { "epoch": 0.13, "learning_rate": 5.955519261514794e-05, "loss": 2.0925, "step": 2980 }, { "epoch": 0.13, "learning_rate": 5.955160720729831e-05, "loss": 2.1153, "step": 2990 }, { "epoch": 0.13, "learning_rate": 5.9548007515950196e-05, "loss": 2.0952, "step": 3000 }, { "epoch": 0.13, "eval_loss": 2.085477828979492, "eval_runtime": 12.1839, "eval_samples_per_second": 336.181, "eval_steps_per_second": 21.011, "step": 3000 }, { "epoch": 0.13, "learning_rate": 5.954439354284346e-05, "loss": 2.1191, "step": 3010 }, { "epoch": 0.13, "learning_rate": 5.954076528972489e-05, "loss": 2.1233, "step": 3020 }, { "epoch": 0.13, "learning_rate": 5.953712275834817e-05, "loss": 2.1523, "step": 3030 }, { "epoch": 0.13, "learning_rate": 5.953346595047388e-05, "loss": 2.092, "step": 3040 }, { "epoch": 0.13, "learning_rate": 5.95297948678695e-05, "loss": 2.0483, "step": 3050 }, { "epoch": 0.13, "learning_rate": 5.9526109512309423e-05, "loss": 2.1245, "step": 3060 }, { "epoch": 0.13, "learning_rate": 5.9522409885574934e-05, "loss": 2.1205, "step": 3070 }, { "epoch": 0.13, "learning_rate": 5.95186959894542e-05, "loss": 2.1143, "step": 3080 }, { "epoch": 0.14, "learning_rate": 5.9514967825742325e-05, "loss": 2.0988, "step": 3090 }, { "epoch": 0.14, "learning_rate": 5.9511225396241256e-05, "loss": 2.1173, "step": 3100 }, { "epoch": 0.14, "eval_loss": 2.0793402194976807, "eval_runtime": 11.7138, "eval_samples_per_second": 349.672, "eval_steps_per_second": 21.854, "step": 3100 }, { "epoch": 0.14, "learning_rate": 5.950746870275986e-05, "loss": 2.1051, "step": 3110 }, { "epoch": 0.14, "learning_rate": 5.950369774711392e-05, "loss": 2.071, "step": 3120 }, { "epoch": 0.14, "learning_rate": 5.9499912531126096e-05, "loss": 2.136, "step": 3130 }, { "epoch": 0.14, "learning_rate": 5.949611305662592e-05, "loss": 2.1015, "step": 3140 }, { "epoch": 0.14, "learning_rate": 5.949229932544983e-05, "loss": 2.1133, "step": 3150 }, { "epoch": 0.14, "learning_rate": 5.9488471339441175e-05, "loss": 2.0993, "step": 3160 }, { "epoch": 0.14, "learning_rate": 5.948462910045017e-05, "loss": 2.0734, "step": 3170 }, { "epoch": 0.14, "learning_rate": 5.948077261033392e-05, "loss": 2.0588, "step": 3180 }, { "epoch": 0.14, "learning_rate": 5.9476901870956426e-05, "loss": 2.0703, "step": 3190 }, { "epoch": 0.14, "learning_rate": 5.947301688418859e-05, "loss": 2.0886, "step": 3200 }, { "epoch": 0.14, "eval_loss": 2.0747480392456055, "eval_runtime": 16.3253, "eval_samples_per_second": 250.899, "eval_steps_per_second": 15.681, "step": 3200 }, { "epoch": 0.14, "learning_rate": 5.9469117651908163e-05, "loss": 2.0638, "step": 3210 }, { "epoch": 0.14, "learning_rate": 5.946520417599982e-05, "loss": 2.0492, "step": 3220 }, { "epoch": 0.14, "learning_rate": 5.946127645835509e-05, "loss": 2.0683, "step": 3230 }, { "epoch": 0.14, "learning_rate": 5.945733450087242e-05, "loss": 2.0929, "step": 3240 }, { "epoch": 0.14, "learning_rate": 5.9453378305457085e-05, "loss": 2.139, "step": 3250 }, { "epoch": 0.14, "learning_rate": 5.944940787402131e-05, "loss": 2.0932, "step": 3260 }, { "epoch": 0.14, "learning_rate": 5.944542320848414e-05, "loss": 2.1399, "step": 3270 }, { "epoch": 0.14, "learning_rate": 5.9441424310771545e-05, "loss": 2.1349, "step": 3280 }, { "epoch": 0.14, "learning_rate": 5.943741118281635e-05, "loss": 2.0773, "step": 3290 }, { "epoch": 0.14, "learning_rate": 5.943338382655826e-05, "loss": 2.0418, "step": 3300 }, { "epoch": 0.14, "eval_loss": 2.069995403289795, "eval_runtime": 13.501, "eval_samples_per_second": 303.384, "eval_steps_per_second": 18.962, "step": 3300 }, { "epoch": 0.14, "learning_rate": 5.942934224394387e-05, "loss": 2.1208, "step": 3310 }, { "epoch": 0.15, "learning_rate": 5.9425286436926635e-05, "loss": 2.0428, "step": 3320 }, { "epoch": 0.15, "learning_rate": 5.942121640746688e-05, "loss": 2.0581, "step": 3330 }, { "epoch": 0.15, "learning_rate": 5.941713215753184e-05, "loss": 2.1182, "step": 3340 }, { "epoch": 0.15, "learning_rate": 5.9413033689095596e-05, "loss": 2.098, "step": 3350 }, { "epoch": 0.15, "learning_rate": 5.940892100413909e-05, "loss": 2.0887, "step": 3360 }, { "epoch": 0.15, "learning_rate": 5.9404794104650156e-05, "loss": 2.0979, "step": 3370 }, { "epoch": 0.15, "learning_rate": 5.9400652992623495e-05, "loss": 2.083, "step": 3380 }, { "epoch": 0.15, "learning_rate": 5.9396497670060685e-05, "loss": 2.0856, "step": 3390 }, { "epoch": 0.15, "learning_rate": 5.939232813897014e-05, "loss": 2.095, "step": 3400 }, { "epoch": 0.15, "eval_loss": 2.0656051635742188, "eval_runtime": 13.8953, "eval_samples_per_second": 294.777, "eval_steps_per_second": 18.424, "step": 3400 }, { "epoch": 0.15, "learning_rate": 5.9388144401367194e-05, "loss": 2.1035, "step": 3410 }, { "epoch": 0.15, "learning_rate": 5.9383946459273995e-05, "loss": 2.096, "step": 3420 }, { "epoch": 0.15, "learning_rate": 5.937973431471959e-05, "loss": 2.1298, "step": 3430 }, { "epoch": 0.15, "learning_rate": 5.937550796973988e-05, "loss": 2.0599, "step": 3440 }, { "epoch": 0.15, "learning_rate": 5.9371267426377624e-05, "loss": 2.0537, "step": 3450 }, { "epoch": 0.15, "learning_rate": 5.9367012686682464e-05, "loss": 2.0865, "step": 3460 }, { "epoch": 0.15, "learning_rate": 5.936274375271087e-05, "loss": 2.1082, "step": 3470 }, { "epoch": 0.15, "learning_rate": 5.935846062652621e-05, "loss": 2.1416, "step": 3480 }, { "epoch": 0.15, "learning_rate": 5.935416331019868e-05, "loss": 2.0887, "step": 3490 }, { "epoch": 0.15, "learning_rate": 5.934985180580536e-05, "loss": 2.0988, "step": 3500 }, { "epoch": 0.15, "eval_loss": 2.061530113220215, "eval_runtime": 14.5323, "eval_samples_per_second": 281.854, "eval_steps_per_second": 17.616, "step": 3500 }, { "epoch": 0.15, "learning_rate": 5.934552611543016e-05, "loss": 2.034, "step": 3510 }, { "epoch": 0.15, "learning_rate": 5.9341186241163875e-05, "loss": 2.1009, "step": 3520 }, { "epoch": 0.15, "learning_rate": 5.9336832185104155e-05, "loss": 2.1099, "step": 3530 }, { "epoch": 0.15, "learning_rate": 5.933246394935546e-05, "loss": 2.0687, "step": 3540 }, { "epoch": 0.16, "learning_rate": 5.9328081536029175e-05, "loss": 2.0504, "step": 3550 }, { "epoch": 0.16, "learning_rate": 5.9323684947243476e-05, "loss": 2.0703, "step": 3560 }, { "epoch": 0.16, "learning_rate": 5.931927418512341e-05, "loss": 2.0398, "step": 3570 }, { "epoch": 0.16, "learning_rate": 5.931484925180089e-05, "loss": 2.1021, "step": 3580 }, { "epoch": 0.16, "learning_rate": 5.931041014941467e-05, "loss": 2.0889, "step": 3590 }, { "epoch": 0.16, "learning_rate": 5.930595688011034e-05, "loss": 2.092, "step": 3600 }, { "epoch": 0.16, "eval_loss": 2.0562427043914795, "eval_runtime": 11.8426, "eval_samples_per_second": 345.869, "eval_steps_per_second": 21.617, "step": 3600 }, { "epoch": 0.16, "learning_rate": 5.930148944604035e-05, "loss": 2.0708, "step": 3610 }, { "epoch": 0.16, "learning_rate": 5.929700784936399e-05, "loss": 2.0789, "step": 3620 }, { "epoch": 0.16, "learning_rate": 5.9292512092247405e-05, "loss": 2.0735, "step": 3630 }, { "epoch": 0.16, "learning_rate": 5.928800217686358e-05, "loss": 2.1018, "step": 3640 }, { "epoch": 0.16, "learning_rate": 5.928347810539233e-05, "loss": 2.0637, "step": 3650 }, { "epoch": 0.16, "learning_rate": 5.927893988002033e-05, "loss": 2.0389, "step": 3660 }, { "epoch": 0.16, "learning_rate": 5.9274387502941094e-05, "loss": 2.062, "step": 3670 }, { "epoch": 0.16, "learning_rate": 5.926982097635497e-05, "loss": 2.0766, "step": 3680 }, { "epoch": 0.16, "learning_rate": 5.926524030246914e-05, "loss": 2.0654, "step": 3690 }, { "epoch": 0.16, "learning_rate": 5.926064548349764e-05, "loss": 2.1046, "step": 3700 }, { "epoch": 0.16, "eval_loss": 2.051886558532715, "eval_runtime": 13.6021, "eval_samples_per_second": 301.131, "eval_steps_per_second": 18.821, "step": 3700 }, { "epoch": 0.16, "learning_rate": 5.9256036521661316e-05, "loss": 2.068, "step": 3710 }, { "epoch": 0.16, "learning_rate": 5.9251413419187893e-05, "loss": 2.0482, "step": 3720 }, { "epoch": 0.16, "learning_rate": 5.9246776178311885e-05, "loss": 2.0652, "step": 3730 }, { "epoch": 0.16, "learning_rate": 5.9242124801274674e-05, "loss": 2.1223, "step": 3740 }, { "epoch": 0.16, "learning_rate": 5.9237459290324444e-05, "loss": 2.0723, "step": 3750 }, { "epoch": 0.16, "learning_rate": 5.9232779647716244e-05, "loss": 2.054, "step": 3760 }, { "epoch": 0.17, "learning_rate": 5.9228085875711926e-05, "loss": 2.1379, "step": 3770 }, { "epoch": 0.17, "learning_rate": 5.922337797658018e-05, "loss": 2.0366, "step": 3780 }, { "epoch": 0.17, "learning_rate": 5.921865595259654e-05, "loss": 2.0602, "step": 3790 }, { "epoch": 0.17, "learning_rate": 5.921391980604335e-05, "loss": 2.0698, "step": 3800 }, { "epoch": 0.17, "eval_loss": 2.047264814376831, "eval_runtime": 15.2622, "eval_samples_per_second": 268.375, "eval_steps_per_second": 16.773, "step": 3800 }, { "epoch": 0.17, "learning_rate": 5.920916953920976e-05, "loss": 2.0555, "step": 3810 }, { "epoch": 0.17, "learning_rate": 5.920440515439179e-05, "loss": 2.121, "step": 3820 }, { "epoch": 0.17, "learning_rate": 5.9199626653892264e-05, "loss": 2.0358, "step": 3830 }, { "epoch": 0.17, "learning_rate": 5.919483404002081e-05, "loss": 2.0702, "step": 3840 }, { "epoch": 0.17, "learning_rate": 5.91900273150939e-05, "loss": 2.0706, "step": 3850 }, { "epoch": 0.17, "learning_rate": 5.918520648143482e-05, "loss": 2.0742, "step": 3860 }, { "epoch": 0.17, "learning_rate": 5.918037154137369e-05, "loss": 2.0806, "step": 3870 }, { "epoch": 0.17, "learning_rate": 5.917552249724742e-05, "loss": 2.0539, "step": 3880 }, { "epoch": 0.17, "learning_rate": 5.917065935139975e-05, "loss": 2.0925, "step": 3890 }, { "epoch": 0.17, "learning_rate": 5.9165782106181244e-05, "loss": 2.0628, "step": 3900 }, { "epoch": 0.17, "eval_loss": 2.041888952255249, "eval_runtime": 12.8465, "eval_samples_per_second": 318.843, "eval_steps_per_second": 19.928, "step": 3900 }, { "epoch": 0.17, "learning_rate": 5.916089076394927e-05, "loss": 2.0655, "step": 3910 }, { "epoch": 0.17, "learning_rate": 5.915598532706801e-05, "loss": 2.0998, "step": 3920 }, { "epoch": 0.17, "learning_rate": 5.915106579790848e-05, "loss": 2.0661, "step": 3930 }, { "epoch": 0.17, "learning_rate": 5.914613217884846e-05, "loss": 2.0268, "step": 3940 }, { "epoch": 0.17, "learning_rate": 5.9141184472272596e-05, "loss": 2.1178, "step": 3950 }, { "epoch": 0.17, "learning_rate": 5.913622268057229e-05, "loss": 2.0453, "step": 3960 }, { "epoch": 0.17, "learning_rate": 5.913124680614581e-05, "loss": 2.0846, "step": 3970 }, { "epoch": 0.17, "learning_rate": 5.912625685139818e-05, "loss": 2.1052, "step": 3980 }, { "epoch": 0.17, "learning_rate": 5.912125281874125e-05, "loss": 2.0422, "step": 3990 }, { "epoch": 0.18, "learning_rate": 5.9116234710593674e-05, "loss": 2.0528, "step": 4000 }, { "epoch": 0.18, "eval_loss": 2.0398635864257812, "eval_runtime": 12.2343, "eval_samples_per_second": 334.796, "eval_steps_per_second": 20.925, "step": 4000 }, { "epoch": 0.18, "learning_rate": 5.9111202529380904e-05, "loss": 2.0678, "step": 4010 }, { "epoch": 0.18, "learning_rate": 5.910615627753521e-05, "loss": 2.0423, "step": 4020 }, { "epoch": 0.18, "learning_rate": 5.9101095957495654e-05, "loss": 2.0402, "step": 4030 }, { "epoch": 0.18, "learning_rate": 5.909602157170808e-05, "loss": 2.0706, "step": 4040 }, { "epoch": 0.18, "learning_rate": 5.909093312262516e-05, "loss": 2.0197, "step": 4050 }, { "epoch": 0.18, "learning_rate": 5.9085830612706336e-05, "loss": 2.0763, "step": 4060 }, { "epoch": 0.18, "learning_rate": 5.908071404441787e-05, "loss": 2.1287, "step": 4070 }, { "epoch": 0.18, "learning_rate": 5.9075583420232806e-05, "loss": 2.0297, "step": 4080 }, { "epoch": 0.18, "learning_rate": 5.907043874263098e-05, "loss": 2.0862, "step": 4090 }, { "epoch": 0.18, "learning_rate": 5.906528001409902e-05, "loss": 2.0211, "step": 4100 }, { "epoch": 0.18, "eval_loss": 2.0350661277770996, "eval_runtime": 12.0443, "eval_samples_per_second": 340.079, "eval_steps_per_second": 21.255, "step": 4100 }, { "epoch": 0.18, "learning_rate": 5.906010723713038e-05, "loss": 2.0536, "step": 4110 }, { "epoch": 0.18, "learning_rate": 5.905492041422523e-05, "loss": 2.0348, "step": 4120 }, { "epoch": 0.18, "learning_rate": 5.9049719547890594e-05, "loss": 1.9906, "step": 4130 }, { "epoch": 0.18, "learning_rate": 5.9044504640640276e-05, "loss": 2.0626, "step": 4140 }, { "epoch": 0.18, "learning_rate": 5.9039275694994844e-05, "loss": 2.0877, "step": 4150 }, { "epoch": 0.18, "learning_rate": 5.9034032713481654e-05, "loss": 2.0266, "step": 4160 }, { "epoch": 0.18, "learning_rate": 5.902877569863485e-05, "loss": 2.13, "step": 4170 }, { "epoch": 0.18, "learning_rate": 5.9023504652995384e-05, "loss": 2.0176, "step": 4180 }, { "epoch": 0.18, "learning_rate": 5.901821957911095e-05, "loss": 2.0459, "step": 4190 }, { "epoch": 0.18, "learning_rate": 5.9012920479536034e-05, "loss": 2.0805, "step": 4200 }, { "epoch": 0.18, "eval_loss": 2.0328736305236816, "eval_runtime": 18.7677, "eval_samples_per_second": 218.248, "eval_steps_per_second": 13.64, "step": 4200 }, { "epoch": 0.18, "learning_rate": 5.9007607356831934e-05, "loss": 2.0437, "step": 4210 }, { "epoch": 0.18, "learning_rate": 5.900228021356666e-05, "loss": 2.0958, "step": 4220 }, { "epoch": 0.19, "learning_rate": 5.899693905231507e-05, "loss": 2.0722, "step": 4230 }, { "epoch": 0.19, "learning_rate": 5.899158387565877e-05, "loss": 2.0361, "step": 4240 }, { "epoch": 0.19, "learning_rate": 5.89862146861861e-05, "loss": 2.0838, "step": 4250 }, { "epoch": 0.19, "learning_rate": 5.898083148649224e-05, "loss": 2.0631, "step": 4260 }, { "epoch": 0.19, "learning_rate": 5.8975434279179096e-05, "loss": 2.0552, "step": 4270 }, { "epoch": 0.19, "learning_rate": 5.897002306685536e-05, "loss": 2.0305, "step": 4280 }, { "epoch": 0.19, "learning_rate": 5.89645978521365e-05, "loss": 2.1161, "step": 4290 }, { "epoch": 0.19, "learning_rate": 5.895915863764473e-05, "loss": 2.0235, "step": 4300 }, { "epoch": 0.19, "eval_loss": 2.0307788848876953, "eval_runtime": 11.5851, "eval_samples_per_second": 353.557, "eval_steps_per_second": 22.097, "step": 4300 }, { "epoch": 0.19, "learning_rate": 5.895370542600906e-05, "loss": 2.0678, "step": 4310 }, { "epoch": 0.19, "learning_rate": 5.894823821986524e-05, "loss": 2.0671, "step": 4320 }, { "epoch": 0.19, "learning_rate": 5.894275702185579e-05, "loss": 2.0524, "step": 4330 }, { "epoch": 0.19, "learning_rate": 5.893726183463001e-05, "loss": 2.0369, "step": 4340 }, { "epoch": 0.19, "learning_rate": 5.893175266084394e-05, "loss": 2.0658, "step": 4350 }, { "epoch": 0.19, "learning_rate": 5.892622950316039e-05, "loss": 2.0488, "step": 4360 }, { "epoch": 0.19, "learning_rate": 5.8920692364248926e-05, "loss": 2.0644, "step": 4370 }, { "epoch": 0.19, "learning_rate": 5.8915141246785875e-05, "loss": 2.0313, "step": 4380 }, { "epoch": 0.19, "learning_rate": 5.890957615345433e-05, "loss": 2.0117, "step": 4390 }, { "epoch": 0.19, "learning_rate": 5.89039970869441e-05, "loss": 2.0123, "step": 4400 }, { "epoch": 0.19, "eval_loss": 2.027006149291992, "eval_runtime": 20.8382, "eval_samples_per_second": 196.562, "eval_steps_per_second": 12.285, "step": 4400 }, { "epoch": 0.19, "learning_rate": 5.88984040499518e-05, "loss": 2.0617, "step": 4410 }, { "epoch": 0.19, "learning_rate": 5.889279704518077e-05, "loss": 2.0394, "step": 4420 }, { "epoch": 0.19, "learning_rate": 5.888717607534109e-05, "loss": 2.058, "step": 4430 }, { "epoch": 0.19, "learning_rate": 5.888154114314961e-05, "loss": 2.1012, "step": 4440 }, { "epoch": 0.19, "learning_rate": 5.887589225132994e-05, "loss": 2.0424, "step": 4450 }, { "epoch": 0.2, "learning_rate": 5.887022940261241e-05, "loss": 2.0011, "step": 4460 }, { "epoch": 0.2, "learning_rate": 5.886455259973408e-05, "loss": 2.1042, "step": 4470 }, { "epoch": 0.2, "learning_rate": 5.8858861845438824e-05, "loss": 2.0368, "step": 4480 }, { "epoch": 0.2, "learning_rate": 5.885315714247719e-05, "loss": 2.0797, "step": 4490 }, { "epoch": 0.2, "learning_rate": 5.8847438493606504e-05, "loss": 2.0964, "step": 4500 }, { "epoch": 0.2, "eval_loss": 2.023592710494995, "eval_runtime": 13.8773, "eval_samples_per_second": 295.157, "eval_steps_per_second": 18.447, "step": 4500 }, { "epoch": 0.2, "learning_rate": 5.884170590159081e-05, "loss": 2.079, "step": 4510 }, { "epoch": 0.2, "learning_rate": 5.8835959369200925e-05, "loss": 2.0535, "step": 4520 }, { "epoch": 0.2, "learning_rate": 5.883019889921436e-05, "loss": 2.0473, "step": 4530 }, { "epoch": 0.2, "learning_rate": 5.882442449441539e-05, "loss": 2.0421, "step": 4540 }, { "epoch": 0.2, "learning_rate": 5.881863615759503e-05, "loss": 2.044, "step": 4550 }, { "epoch": 0.2, "learning_rate": 5.8812833891551024e-05, "loss": 2.076, "step": 4560 }, { "epoch": 0.2, "learning_rate": 5.880701769908782e-05, "loss": 2.0583, "step": 4570 }, { "epoch": 0.2, "learning_rate": 5.880118758301665e-05, "loss": 2.0261, "step": 4580 }, { "epoch": 0.2, "learning_rate": 5.879534354615543e-05, "loss": 2.0434, "step": 4590 }, { "epoch": 0.2, "learning_rate": 5.878948559132882e-05, "loss": 2.0839, "step": 4600 }, { "epoch": 0.2, "eval_loss": 2.0206034183502197, "eval_runtime": 13.1899, "eval_samples_per_second": 310.54, "eval_steps_per_second": 19.409, "step": 4600 }, { "epoch": 0.2, "learning_rate": 5.8783613721368216e-05, "loss": 2.0191, "step": 4610 }, { "epoch": 0.2, "learning_rate": 5.877772793911173e-05, "loss": 2.037, "step": 4620 }, { "epoch": 0.2, "learning_rate": 5.8771828247404204e-05, "loss": 2.0319, "step": 4630 }, { "epoch": 0.2, "learning_rate": 5.87659146490972e-05, "loss": 2.0892, "step": 4640 }, { "epoch": 0.2, "learning_rate": 5.8759987147048997e-05, "loss": 2.0116, "step": 4650 }, { "epoch": 0.2, "learning_rate": 5.87540457441246e-05, "loss": 2.0725, "step": 4660 }, { "epoch": 0.2, "learning_rate": 5.874809044319573e-05, "loss": 2.0583, "step": 4670 }, { "epoch": 0.2, "learning_rate": 5.8742121247140835e-05, "loss": 2.0341, "step": 4680 }, { "epoch": 0.21, "learning_rate": 5.873613815884506e-05, "loss": 2.0719, "step": 4690 }, { "epoch": 0.21, "learning_rate": 5.8730141181200284e-05, "loss": 2.0736, "step": 4700 }, { "epoch": 0.21, "eval_loss": 2.0166029930114746, "eval_runtime": 13.4827, "eval_samples_per_second": 303.798, "eval_steps_per_second": 18.987, "step": 4700 }, { "epoch": 0.21, "learning_rate": 5.872413031710509e-05, "loss": 2.0566, "step": 4710 }, { "epoch": 0.21, "learning_rate": 5.871810556946478e-05, "loss": 2.0676, "step": 4720 }, { "epoch": 0.21, "learning_rate": 5.871206694119134e-05, "loss": 2.0385, "step": 4730 }, { "epoch": 0.21, "learning_rate": 5.870601443520351e-05, "loss": 2.0604, "step": 4740 }, { "epoch": 0.21, "learning_rate": 5.869994805442669e-05, "loss": 2.0338, "step": 4750 }, { "epoch": 0.21, "learning_rate": 5.869386780179303e-05, "loss": 1.9712, "step": 4760 }, { "epoch": 0.21, "learning_rate": 5.868777368024136e-05, "loss": 2.0208, "step": 4770 }, { "epoch": 0.21, "learning_rate": 5.868166569271721e-05, "loss": 2.0479, "step": 4780 }, { "epoch": 0.21, "learning_rate": 5.867554384217282e-05, "loss": 2.0104, "step": 4790 }, { "epoch": 0.21, "learning_rate": 5.866940813156714e-05, "loss": 2.0591, "step": 4800 }, { "epoch": 0.21, "eval_loss": 2.0134992599487305, "eval_runtime": 11.7962, "eval_samples_per_second": 347.232, "eval_steps_per_second": 21.702, "step": 4800 }, { "epoch": 0.21, "learning_rate": 5.8663258563865804e-05, "loss": 2.041, "step": 4810 }, { "epoch": 0.21, "learning_rate": 5.865709514204115e-05, "loss": 1.9956, "step": 4820 }, { "epoch": 0.21, "learning_rate": 5.865091786907221e-05, "loss": 2.0447, "step": 4830 }, { "epoch": 0.21, "learning_rate": 5.864472674794471e-05, "loss": 2.022, "step": 4840 }, { "epoch": 0.21, "learning_rate": 5.863852178165108e-05, "loss": 1.9585, "step": 4850 }, { "epoch": 0.21, "learning_rate": 5.8632302973190424e-05, "loss": 2.058, "step": 4860 }, { "epoch": 0.21, "learning_rate": 5.862607032556854e-05, "loss": 2.0582, "step": 4870 }, { "epoch": 0.21, "learning_rate": 5.861982384179794e-05, "loss": 2.0257, "step": 4880 }, { "epoch": 0.21, "learning_rate": 5.86135635248978e-05, "loss": 1.9781, "step": 4890 }, { "epoch": 0.21, "learning_rate": 5.860728937789398e-05, "loss": 2.0381, "step": 4900 }, { "epoch": 0.21, "eval_loss": 2.0108816623687744, "eval_runtime": 11.6358, "eval_samples_per_second": 352.018, "eval_steps_per_second": 22.001, "step": 4900 }, { "epoch": 0.21, "learning_rate": 5.860100140381903e-05, "loss": 2.0079, "step": 4910 }, { "epoch": 0.22, "learning_rate": 5.8594699605712184e-05, "loss": 2.0501, "step": 4920 }, { "epoch": 0.22, "learning_rate": 5.858838398661938e-05, "loss": 2.0088, "step": 4930 }, { "epoch": 0.22, "learning_rate": 5.8582054549593184e-05, "loss": 1.9654, "step": 4940 }, { "epoch": 0.22, "learning_rate": 5.8575711297692886e-05, "loss": 2.0455, "step": 4950 }, { "epoch": 0.22, "learning_rate": 5.8569354233984445e-05, "loss": 2.0596, "step": 4960 }, { "epoch": 0.22, "learning_rate": 5.856298336154048e-05, "loss": 1.9902, "step": 4970 }, { "epoch": 0.22, "learning_rate": 5.855659868344029e-05, "loss": 1.9897, "step": 4980 }, { "epoch": 0.22, "learning_rate": 5.8550200202769856e-05, "loss": 2.0454, "step": 4990 }, { "epoch": 0.22, "learning_rate": 5.854378792262183e-05, "loss": 2.0479, "step": 5000 }, { "epoch": 0.22, "eval_loss": 2.0080699920654297, "eval_runtime": 11.7197, "eval_samples_per_second": 349.497, "eval_steps_per_second": 21.844, "step": 5000 }, { "epoch": 0.22, "learning_rate": 5.853736184609553e-05, "loss": 2.037, "step": 5010 }, { "epoch": 0.22, "learning_rate": 5.853092197629693e-05, "loss": 1.9935, "step": 5020 }, { "epoch": 0.22, "learning_rate": 5.852446831633869e-05, "loss": 2.0225, "step": 5030 }, { "epoch": 0.22, "learning_rate": 5.851800086934013e-05, "loss": 2.0627, "step": 5040 }, { "epoch": 0.22, "learning_rate": 5.851151963842721e-05, "loss": 2.0333, "step": 5050 }, { "epoch": 0.22, "learning_rate": 5.85050246267326e-05, "loss": 2.0602, "step": 5060 }, { "epoch": 0.22, "learning_rate": 5.849851583739559e-05, "loss": 2.0367, "step": 5070 }, { "epoch": 0.22, "learning_rate": 5.849199327356215e-05, "loss": 2.0453, "step": 5080 }, { "epoch": 0.22, "learning_rate": 5.84854569383849e-05, "loss": 2.0275, "step": 5090 }, { "epoch": 0.22, "learning_rate": 5.847890683502312e-05, "loss": 2.0145, "step": 5100 }, { "epoch": 0.22, "eval_loss": 2.0048532485961914, "eval_runtime": 11.7026, "eval_samples_per_second": 350.008, "eval_steps_per_second": 21.875, "step": 5100 }, { "epoch": 0.22, "learning_rate": 5.8472342966642735e-05, "loss": 2.0088, "step": 5110 }, { "epoch": 0.22, "learning_rate": 5.8465765336416336e-05, "loss": 2.0097, "step": 5120 }, { "epoch": 0.22, "learning_rate": 5.8459173947523165e-05, "loss": 2.002, "step": 5130 }, { "epoch": 0.23, "learning_rate": 5.84525688031491e-05, "loss": 2.007, "step": 5140 }, { "epoch": 0.23, "learning_rate": 5.8445949906486674e-05, "loss": 2.0457, "step": 5150 }, { "epoch": 0.23, "learning_rate": 5.843931726073509e-05, "loss": 2.0467, "step": 5160 }, { "epoch": 0.23, "learning_rate": 5.843267086910015e-05, "loss": 1.9947, "step": 5170 }, { "epoch": 0.23, "learning_rate": 5.8426010734794346e-05, "loss": 2.0507, "step": 5180 }, { "epoch": 0.23, "learning_rate": 5.84193368610368e-05, "loss": 2.0486, "step": 5190 }, { "epoch": 0.23, "learning_rate": 5.841264925105323e-05, "loss": 2.0231, "step": 5200 }, { "epoch": 0.23, "eval_loss": 2.0028250217437744, "eval_runtime": 11.7989, "eval_samples_per_second": 347.151, "eval_steps_per_second": 21.697, "step": 5200 }, { "epoch": 0.23, "learning_rate": 5.840594790807607e-05, "loss": 1.9965, "step": 5210 }, { "epoch": 0.23, "learning_rate": 5.8399232835344335e-05, "loss": 1.9954, "step": 5220 }, { "epoch": 0.23, "learning_rate": 5.83925040361037e-05, "loss": 2.0133, "step": 5230 }, { "epoch": 0.23, "learning_rate": 5.838576151360646e-05, "loss": 1.9867, "step": 5240 }, { "epoch": 0.23, "learning_rate": 5.837900527111156e-05, "loss": 2.0275, "step": 5250 }, { "epoch": 0.23, "learning_rate": 5.837223531188456e-05, "loss": 2.0276, "step": 5260 }, { "epoch": 0.23, "learning_rate": 5.836545163919767e-05, "loss": 2.0017, "step": 5270 }, { "epoch": 0.23, "learning_rate": 5.83586542563297e-05, "loss": 2.0644, "step": 5280 }, { "epoch": 0.23, "learning_rate": 5.835184316656612e-05, "loss": 1.9864, "step": 5290 }, { "epoch": 0.23, "learning_rate": 5.834501837319899e-05, "loss": 2.0212, "step": 5300 }, { "epoch": 0.23, "eval_loss": 2.001617670059204, "eval_runtime": 11.8117, "eval_samples_per_second": 346.776, "eval_steps_per_second": 21.673, "step": 5300 }, { "epoch": 0.23, "learning_rate": 5.8338179879527034e-05, "loss": 2.0293, "step": 5310 }, { "epoch": 0.23, "learning_rate": 5.833132768885555e-05, "loss": 2.0081, "step": 5320 }, { "epoch": 0.23, "learning_rate": 5.83244618044965e-05, "loss": 2.0506, "step": 5330 }, { "epoch": 0.23, "learning_rate": 5.831758222976843e-05, "loss": 2.0314, "step": 5340 }, { "epoch": 0.23, "learning_rate": 5.8310688967996534e-05, "loss": 2.0002, "step": 5350 }, { "epoch": 0.23, "learning_rate": 5.830378202251261e-05, "loss": 2.0547, "step": 5360 }, { "epoch": 0.24, "learning_rate": 5.829686139665505e-05, "loss": 2.006, "step": 5370 }, { "epoch": 0.24, "learning_rate": 5.8289927093768885e-05, "loss": 2.0277, "step": 5380 }, { "epoch": 0.24, "learning_rate": 5.8282979117205745e-05, "loss": 2.0159, "step": 5390 }, { "epoch": 0.24, "learning_rate": 5.827601747032387e-05, "loss": 2.0505, "step": 5400 }, { "epoch": 0.24, "eval_loss": 1.9988398551940918, "eval_runtime": 11.6914, "eval_samples_per_second": 350.342, "eval_steps_per_second": 21.896, "step": 5400 }, { "epoch": 0.24, "learning_rate": 5.8269042156488106e-05, "loss": 2.0138, "step": 5410 }, { "epoch": 0.24, "learning_rate": 5.826205317906991e-05, "loss": 2.0205, "step": 5420 }, { "epoch": 0.24, "learning_rate": 5.825505054144735e-05, "loss": 2.0279, "step": 5430 }, { "epoch": 0.24, "learning_rate": 5.8248034247005075e-05, "loss": 2.0387, "step": 5440 }, { "epoch": 0.24, "learning_rate": 5.8241004299134345e-05, "loss": 2.0103, "step": 5450 }, { "epoch": 0.24, "learning_rate": 5.8233960701233026e-05, "loss": 1.9852, "step": 5460 }, { "epoch": 0.24, "learning_rate": 5.822690345670558e-05, "loss": 2.0353, "step": 5470 }, { "epoch": 0.24, "learning_rate": 5.821983256896305e-05, "loss": 2.0069, "step": 5480 }, { "epoch": 0.24, "learning_rate": 5.821274804142309e-05, "loss": 2.0343, "step": 5490 }, { "epoch": 0.24, "learning_rate": 5.820564987750994e-05, "loss": 1.9844, "step": 5500 }, { "epoch": 0.24, "eval_loss": 1.9970853328704834, "eval_runtime": 11.77, "eval_samples_per_second": 348.004, "eval_steps_per_second": 21.75, "step": 5500 }, { "epoch": 0.24, "learning_rate": 5.8198538080654456e-05, "loss": 2.0359, "step": 5510 }, { "epoch": 0.24, "learning_rate": 5.819141265429402e-05, "loss": 2.05, "step": 5520 }, { "epoch": 0.24, "learning_rate": 5.818427360187267e-05, "loss": 1.985, "step": 5530 }, { "epoch": 0.24, "learning_rate": 5.817712092684099e-05, "loss": 2.005, "step": 5540 }, { "epoch": 0.24, "learning_rate": 5.816995463265615e-05, "loss": 2.0303, "step": 5550 }, { "epoch": 0.24, "learning_rate": 5.816277472278194e-05, "loss": 2.0114, "step": 5560 }, { "epoch": 0.24, "learning_rate": 5.815558120068868e-05, "loss": 2.0188, "step": 5570 }, { "epoch": 0.24, "learning_rate": 5.814837406985331e-05, "loss": 2.0357, "step": 5580 }, { "epoch": 0.24, "learning_rate": 5.8141153333759316e-05, "loss": 2.0272, "step": 5590 }, { "epoch": 0.25, "learning_rate": 5.8133918995896784e-05, "loss": 2.0311, "step": 5600 }, { "epoch": 0.25, "eval_loss": 1.993647575378418, "eval_runtime": 13.7808, "eval_samples_per_second": 297.225, "eval_steps_per_second": 18.577, "step": 5600 }, { "epoch": 0.25, "learning_rate": 5.8126671059762356e-05, "loss": 2.0287, "step": 5610 }, { "epoch": 0.25, "learning_rate": 5.811940952885927e-05, "loss": 2.0618, "step": 5620 }, { "epoch": 0.25, "learning_rate": 5.81121344066973e-05, "loss": 2.025, "step": 5630 }, { "epoch": 0.25, "learning_rate": 5.810484569679283e-05, "loss": 1.9983, "step": 5640 }, { "epoch": 0.25, "learning_rate": 5.8097543402668785e-05, "loss": 1.9824, "step": 5650 }, { "epoch": 0.25, "learning_rate": 5.809022752785465e-05, "loss": 1.9815, "step": 5660 }, { "epoch": 0.25, "learning_rate": 5.8082898075886495e-05, "loss": 2.0088, "step": 5670 }, { "epoch": 0.25, "learning_rate": 5.807555505030695e-05, "loss": 2.0073, "step": 5680 }, { "epoch": 0.25, "learning_rate": 5.80681984546652e-05, "loss": 1.9424, "step": 5690 }, { "epoch": 0.25, "learning_rate": 5.806082829251696e-05, "loss": 2.0114, "step": 5700 }, { "epoch": 0.25, "eval_loss": 1.9904594421386719, "eval_runtime": 12.2556, "eval_samples_per_second": 334.213, "eval_steps_per_second": 20.888, "step": 5700 }, { "epoch": 0.25, "learning_rate": 5.805344456742457e-05, "loss": 2.015, "step": 5710 }, { "epoch": 0.25, "learning_rate": 5.804604728295686e-05, "loss": 2.0413, "step": 5720 }, { "epoch": 0.25, "learning_rate": 5.803863644268925e-05, "loss": 2.0054, "step": 5730 }, { "epoch": 0.25, "learning_rate": 5.803121205020369e-05, "loss": 1.9972, "step": 5740 }, { "epoch": 0.25, "learning_rate": 5.802377410908871e-05, "loss": 2.007, "step": 5750 }, { "epoch": 0.25, "learning_rate": 5.801632262293935e-05, "loss": 1.9748, "step": 5760 }, { "epoch": 0.25, "learning_rate": 5.8008857595357234e-05, "loss": 2.0488, "step": 5770 }, { "epoch": 0.25, "learning_rate": 5.8001379029950515e-05, "loss": 2.018, "step": 5780 }, { "epoch": 0.25, "learning_rate": 5.799388693033387e-05, "loss": 1.9982, "step": 5790 }, { "epoch": 0.25, "learning_rate": 5.798638130012856e-05, "loss": 2.0603, "step": 5800 }, { "epoch": 0.25, "eval_loss": 1.9879425764083862, "eval_runtime": 19.1497, "eval_samples_per_second": 213.894, "eval_steps_per_second": 13.368, "step": 5800 }, { "epoch": 0.25, "learning_rate": 5.7978862142962354e-05, "loss": 2.023, "step": 5810 }, { "epoch": 0.25, "learning_rate": 5.7971329462469565e-05, "loss": 2.0262, "step": 5820 }, { "epoch": 0.26, "learning_rate": 5.7963783262291046e-05, "loss": 1.9969, "step": 5830 }, { "epoch": 0.26, "learning_rate": 5.795622354607419e-05, "loss": 2.0213, "step": 5840 }, { "epoch": 0.26, "learning_rate": 5.79486503174729e-05, "loss": 1.9825, "step": 5850 }, { "epoch": 0.26, "learning_rate": 5.7941063580147637e-05, "loss": 2.0005, "step": 5860 }, { "epoch": 0.26, "learning_rate": 5.793346333776539e-05, "loss": 2.0043, "step": 5870 }, { "epoch": 0.26, "learning_rate": 5.7925849593999656e-05, "loss": 1.9907, "step": 5880 }, { "epoch": 0.26, "learning_rate": 5.791822235253046e-05, "loss": 1.9813, "step": 5890 }, { "epoch": 0.26, "learning_rate": 5.7910581617044375e-05, "loss": 1.987, "step": 5900 }, { "epoch": 0.26, "eval_loss": 1.9848577976226807, "eval_runtime": 13.9355, "eval_samples_per_second": 293.926, "eval_steps_per_second": 18.37, "step": 5900 }, { "epoch": 0.26, "learning_rate": 5.790292739123448e-05, "loss": 2.0303, "step": 5910 }, { "epoch": 0.26, "learning_rate": 5.789525967880037e-05, "loss": 1.9622, "step": 5920 }, { "epoch": 0.26, "learning_rate": 5.788757848344816e-05, "loss": 1.9876, "step": 5930 }, { "epoch": 0.26, "learning_rate": 5.787988380889048e-05, "loss": 1.9844, "step": 5940 }, { "epoch": 0.26, "learning_rate": 5.7872175658846495e-05, "loss": 1.9737, "step": 5950 }, { "epoch": 0.26, "learning_rate": 5.7864454037041864e-05, "loss": 1.992, "step": 5960 }, { "epoch": 0.26, "learning_rate": 5.785671894720876e-05, "loss": 2.0209, "step": 5970 }, { "epoch": 0.26, "learning_rate": 5.784897039308587e-05, "loss": 1.9725, "step": 5980 }, { "epoch": 0.26, "learning_rate": 5.7841208378418386e-05, "loss": 1.9777, "step": 5990 }, { "epoch": 0.26, "learning_rate": 5.783343290695801e-05, "loss": 2.0128, "step": 6000 }, { "epoch": 0.26, "eval_loss": 1.9819371700286865, "eval_runtime": 12.081, "eval_samples_per_second": 339.046, "eval_steps_per_second": 21.19, "step": 6000 }, { "epoch": 0.26, "learning_rate": 5.7825643982462926e-05, "loss": 1.9872, "step": 6010 }, { "epoch": 0.26, "learning_rate": 5.781784160869786e-05, "loss": 2.0036, "step": 6020 }, { "epoch": 0.26, "learning_rate": 5.7810025789434014e-05, "loss": 1.984, "step": 6030 }, { "epoch": 0.26, "learning_rate": 5.780219652844909e-05, "loss": 1.9859, "step": 6040 }, { "epoch": 0.26, "learning_rate": 5.77943538295273e-05, "loss": 2.0032, "step": 6050 }, { "epoch": 0.27, "learning_rate": 5.778649769645932e-05, "loss": 2.0241, "step": 6060 }, { "epoch": 0.27, "learning_rate": 5.7778628133042346e-05, "loss": 2.0075, "step": 6070 }, { "epoch": 0.27, "learning_rate": 5.7770745143080076e-05, "loss": 1.9937, "step": 6080 }, { "epoch": 0.27, "learning_rate": 5.776284873038266e-05, "loss": 2.0228, "step": 6090 }, { "epoch": 0.27, "learning_rate": 5.775493889876677e-05, "loss": 1.9932, "step": 6100 }, { "epoch": 0.27, "eval_loss": 1.9792561531066895, "eval_runtime": 15.4668, "eval_samples_per_second": 264.825, "eval_steps_per_second": 16.552, "step": 6100 }, { "epoch": 0.27, "learning_rate": 5.774701565205553e-05, "loss": 1.9778, "step": 6110 }, { "epoch": 0.27, "learning_rate": 5.77390789940786e-05, "loss": 2.0101, "step": 6120 }, { "epoch": 0.27, "learning_rate": 5.773112892867207e-05, "loss": 1.9633, "step": 6130 }, { "epoch": 0.27, "learning_rate": 5.7723165459678554e-05, "loss": 1.9755, "step": 6140 }, { "epoch": 0.27, "learning_rate": 5.771518859094709e-05, "loss": 2.0302, "step": 6150 }, { "epoch": 0.27, "learning_rate": 5.770719832633324e-05, "loss": 1.961, "step": 6160 }, { "epoch": 0.27, "learning_rate": 5.7699194669699026e-05, "loss": 1.9772, "step": 6170 }, { "epoch": 0.27, "learning_rate": 5.7691177624912934e-05, "loss": 1.9707, "step": 6180 }, { "epoch": 0.27, "learning_rate": 5.768314719584995e-05, "loss": 1.9767, "step": 6190 }, { "epoch": 0.27, "learning_rate": 5.767510338639148e-05, "loss": 1.9874, "step": 6200 }, { "epoch": 0.27, "eval_loss": 1.9786502122879028, "eval_runtime": 13.4256, "eval_samples_per_second": 305.088, "eval_steps_per_second": 19.068, "step": 6200 }, { "epoch": 0.27, "learning_rate": 5.7667046200425445e-05, "loss": 1.9606, "step": 6210 }, { "epoch": 0.27, "learning_rate": 5.76589756418462e-05, "loss": 1.9976, "step": 6220 }, { "epoch": 0.27, "learning_rate": 5.765089171455459e-05, "loss": 2.0065, "step": 6230 }, { "epoch": 0.27, "learning_rate": 5.764279442245789e-05, "loss": 2.0063, "step": 6240 }, { "epoch": 0.27, "learning_rate": 5.763468376946987e-05, "loss": 1.9764, "step": 6250 }, { "epoch": 0.27, "learning_rate": 5.762655975951073e-05, "loss": 2.0229, "step": 6260 }, { "epoch": 0.27, "learning_rate": 5.761842239650713e-05, "loss": 1.9482, "step": 6270 }, { "epoch": 0.27, "learning_rate": 5.761027168439219e-05, "loss": 2.0053, "step": 6280 }, { "epoch": 0.28, "learning_rate": 5.7602107627105506e-05, "loss": 2.0104, "step": 6290 }, { "epoch": 0.28, "learning_rate": 5.759393022859308e-05, "loss": 1.9935, "step": 6300 }, { "epoch": 0.28, "eval_loss": 1.9752275943756104, "eval_runtime": 13.0612, "eval_samples_per_second": 313.6, "eval_steps_per_second": 19.6, "step": 6300 }, { "epoch": 0.28, "learning_rate": 5.758573949280737e-05, "loss": 1.9944, "step": 6310 }, { "epoch": 0.28, "learning_rate": 5.7577535423707325e-05, "loss": 1.977, "step": 6320 }, { "epoch": 0.28, "learning_rate": 5.7569318025258286e-05, "loss": 1.9996, "step": 6330 }, { "epoch": 0.28, "learning_rate": 5.7561087301432054e-05, "loss": 1.9515, "step": 6340 }, { "epoch": 0.28, "learning_rate": 5.755284325620688e-05, "loss": 1.9979, "step": 6350 }, { "epoch": 0.28, "learning_rate": 5.754458589356746e-05, "loss": 1.9294, "step": 6360 }, { "epoch": 0.28, "learning_rate": 5.753631521750489e-05, "loss": 1.9437, "step": 6370 }, { "epoch": 0.28, "learning_rate": 5.7528031232016744e-05, "loss": 1.944, "step": 6380 }, { "epoch": 0.28, "learning_rate": 5.751973394110701e-05, "loss": 1.9716, "step": 6390 }, { "epoch": 0.28, "learning_rate": 5.751142334878609e-05, "loss": 1.9515, "step": 6400 }, { "epoch": 0.28, "eval_loss": 1.972642183303833, "eval_runtime": 13.9092, "eval_samples_per_second": 294.481, "eval_steps_per_second": 18.405, "step": 6400 }, { "epoch": 0.28, "learning_rate": 5.7503099459070864e-05, "loss": 1.9862, "step": 6410 }, { "epoch": 0.28, "learning_rate": 5.749476227598457e-05, "loss": 1.9563, "step": 6420 }, { "epoch": 0.28, "learning_rate": 5.748641180355694e-05, "loss": 2.0096, "step": 6430 }, { "epoch": 0.28, "learning_rate": 5.747804804582409e-05, "loss": 2.0074, "step": 6440 }, { "epoch": 0.28, "learning_rate": 5.7469671006828546e-05, "loss": 1.9559, "step": 6450 }, { "epoch": 0.28, "learning_rate": 5.7461280690619305e-05, "loss": 1.9869, "step": 6460 }, { "epoch": 0.28, "learning_rate": 5.745287710125173e-05, "loss": 1.9906, "step": 6470 }, { "epoch": 0.28, "learning_rate": 5.7444460242787614e-05, "loss": 2.0531, "step": 6480 }, { "epoch": 0.28, "learning_rate": 5.7436030119295184e-05, "loss": 1.9787, "step": 6490 }, { "epoch": 0.28, "learning_rate": 5.742758673484905e-05, "loss": 2.0037, "step": 6500 }, { "epoch": 0.28, "eval_loss": 1.9702032804489136, "eval_runtime": 12.617, "eval_samples_per_second": 324.641, "eval_steps_per_second": 20.29, "step": 6500 }, { "epoch": 0.28, "learning_rate": 5.7419130093530255e-05, "loss": 2.0547, "step": 6510 }, { "epoch": 0.29, "learning_rate": 5.7410660199426236e-05, "loss": 2.052, "step": 6520 }, { "epoch": 0.29, "learning_rate": 5.740217705663083e-05, "loss": 1.9391, "step": 6530 }, { "epoch": 0.29, "learning_rate": 5.739368066924431e-05, "loss": 1.9846, "step": 6540 }, { "epoch": 0.29, "learning_rate": 5.7385171041373285e-05, "loss": 2.0112, "step": 6550 }, { "epoch": 0.29, "learning_rate": 5.7376648177130846e-05, "loss": 1.9748, "step": 6560 }, { "epoch": 0.29, "learning_rate": 5.736811208063642e-05, "loss": 1.9662, "step": 6570 }, { "epoch": 0.29, "learning_rate": 5.7359562756015864e-05, "loss": 1.9933, "step": 6580 }, { "epoch": 0.29, "learning_rate": 5.735100020740141e-05, "loss": 2.0313, "step": 6590 }, { "epoch": 0.29, "learning_rate": 5.734242443893168e-05, "loss": 1.9313, "step": 6600 }, { "epoch": 0.29, "eval_loss": 1.9688479900360107, "eval_runtime": 12.9347, "eval_samples_per_second": 316.667, "eval_steps_per_second": 19.792, "step": 6600 }, { "epoch": 0.29, "learning_rate": 5.73338354547517e-05, "loss": 1.9515, "step": 6610 }, { "epoch": 0.29, "learning_rate": 5.732523325901288e-05, "loss": 1.954, "step": 6620 }, { "epoch": 0.29, "learning_rate": 5.731661785587301e-05, "loss": 2.0307, "step": 6630 }, { "epoch": 0.29, "learning_rate": 5.730798924949626e-05, "loss": 2.001, "step": 6640 }, { "epoch": 0.29, "learning_rate": 5.729934744405319e-05, "loss": 2.0087, "step": 6650 }, { "epoch": 0.29, "learning_rate": 5.729069244372075e-05, "loss": 2.0286, "step": 6660 }, { "epoch": 0.29, "learning_rate": 5.728202425268224e-05, "loss": 2.0152, "step": 6670 }, { "epoch": 0.29, "learning_rate": 5.727334287512735e-05, "loss": 1.9232, "step": 6680 }, { "epoch": 0.29, "learning_rate": 5.7264648315252156e-05, "loss": 1.976, "step": 6690 }, { "epoch": 0.29, "learning_rate": 5.725594057725909e-05, "loss": 1.968, "step": 6700 }, { "epoch": 0.29, "eval_loss": 1.9677083492279053, "eval_runtime": 12.2837, "eval_samples_per_second": 333.451, "eval_steps_per_second": 20.841, "step": 6700 }, { "epoch": 0.29, "learning_rate": 5.724721966535695e-05, "loss": 1.9615, "step": 6710 }, { "epoch": 0.29, "learning_rate": 5.723848558376092e-05, "loss": 1.9948, "step": 6720 }, { "epoch": 0.29, "learning_rate": 5.722973833669252e-05, "loss": 1.9942, "step": 6730 }, { "epoch": 0.3, "learning_rate": 5.7220977928379684e-05, "loss": 2.0166, "step": 6740 }, { "epoch": 0.3, "learning_rate": 5.721220436305664e-05, "loss": 1.9345, "step": 6750 }, { "epoch": 0.3, "learning_rate": 5.720341764496404e-05, "loss": 2.0107, "step": 6760 }, { "epoch": 0.3, "learning_rate": 5.719461777834883e-05, "loss": 1.9749, "step": 6770 }, { "epoch": 0.3, "learning_rate": 5.7185804767464375e-05, "loss": 2.0009, "step": 6780 }, { "epoch": 0.3, "learning_rate": 5.717697861657035e-05, "loss": 2.0079, "step": 6790 }, { "epoch": 0.3, "learning_rate": 5.7168139329932796e-05, "loss": 1.9954, "step": 6800 }, { "epoch": 0.3, "eval_loss": 1.9654452800750732, "eval_runtime": 12.8097, "eval_samples_per_second": 319.759, "eval_steps_per_second": 19.985, "step": 6800 }, { "epoch": 0.3, "learning_rate": 5.71592869118241e-05, "loss": 2.0057, "step": 6810 }, { "epoch": 0.3, "learning_rate": 5.7150421366523e-05, "loss": 1.9505, "step": 6820 }, { "epoch": 0.3, "learning_rate": 5.7141542698314585e-05, "loss": 2.0055, "step": 6830 }, { "epoch": 0.3, "learning_rate": 5.713265091149025e-05, "loss": 1.9874, "step": 6840 }, { "epoch": 0.3, "learning_rate": 5.71237460103478e-05, "loss": 1.9679, "step": 6850 }, { "epoch": 0.3, "learning_rate": 5.711482799919129e-05, "loss": 1.9643, "step": 6860 }, { "epoch": 0.3, "learning_rate": 5.710589688233119e-05, "loss": 2.0399, "step": 6870 }, { "epoch": 0.3, "learning_rate": 5.709695266408426e-05, "loss": 1.9797, "step": 6880 }, { "epoch": 0.3, "learning_rate": 5.708799534877363e-05, "loss": 1.9537, "step": 6890 }, { "epoch": 0.3, "learning_rate": 5.707902494072871e-05, "loss": 1.9182, "step": 6900 }, { "epoch": 0.3, "eval_loss": 1.9642119407653809, "eval_runtime": 13.2593, "eval_samples_per_second": 308.916, "eval_steps_per_second": 19.307, "step": 6900 }, { "epoch": 0.3, "learning_rate": 5.707004144428526e-05, "loss": 2.007, "step": 6910 }, { "epoch": 0.3, "learning_rate": 5.706104486378539e-05, "loss": 1.9933, "step": 6920 }, { "epoch": 0.3, "learning_rate": 5.7052035203577516e-05, "loss": 2.013, "step": 6930 }, { "epoch": 0.3, "learning_rate": 5.704301246801636e-05, "loss": 1.9395, "step": 6940 }, { "epoch": 0.3, "learning_rate": 5.703397666146299e-05, "loss": 1.9651, "step": 6950 }, { "epoch": 0.3, "learning_rate": 5.7024927788284765e-05, "loss": 1.9879, "step": 6960 }, { "epoch": 0.31, "learning_rate": 5.7015865852855406e-05, "loss": 1.9733, "step": 6970 }, { "epoch": 0.31, "learning_rate": 5.700679085955488e-05, "loss": 2.0089, "step": 6980 }, { "epoch": 0.31, "learning_rate": 5.699770281276952e-05, "loss": 2.0274, "step": 6990 }, { "epoch": 0.31, "learning_rate": 5.6988601716891954e-05, "loss": 1.9599, "step": 7000 }, { "epoch": 0.31, "eval_loss": 1.9618194103240967, "eval_runtime": 13.1445, "eval_samples_per_second": 311.613, "eval_steps_per_second": 19.476, "step": 7000 }, { "epoch": 0.31, "learning_rate": 5.69794875763211e-05, "loss": 1.9609, "step": 7010 }, { "epoch": 0.31, "learning_rate": 5.6970360395462204e-05, "loss": 1.9311, "step": 7020 }, { "epoch": 0.31, "learning_rate": 5.69612201787268e-05, "loss": 2.0024, "step": 7030 }, { "epoch": 0.31, "learning_rate": 5.695206693053273e-05, "loss": 1.9428, "step": 7040 }, { "epoch": 0.31, "learning_rate": 5.694290065530414e-05, "loss": 1.9586, "step": 7050 }, { "epoch": 0.31, "learning_rate": 5.6933721357471455e-05, "loss": 1.967, "step": 7060 }, { "epoch": 0.31, "learning_rate": 5.692452904147141e-05, "loss": 2.0423, "step": 7070 }, { "epoch": 0.31, "learning_rate": 5.6915323711747023e-05, "loss": 1.9693, "step": 7080 }, { "epoch": 0.31, "learning_rate": 5.69061053727476e-05, "loss": 2.001, "step": 7090 }, { "epoch": 0.31, "learning_rate": 5.689687402892876e-05, "loss": 1.9601, "step": 7100 }, { "epoch": 0.31, "eval_loss": 1.959427833557129, "eval_runtime": 12.6915, "eval_samples_per_second": 322.737, "eval_steps_per_second": 20.171, "step": 7100 }, { "epoch": 0.31, "learning_rate": 5.688762968475237e-05, "loss": 1.9979, "step": 7110 }, { "epoch": 0.31, "learning_rate": 5.687837234468661e-05, "loss": 1.9676, "step": 7120 }, { "epoch": 0.31, "learning_rate": 5.686910201320592e-05, "loss": 2.0003, "step": 7130 }, { "epoch": 0.31, "learning_rate": 5.685981869479104e-05, "loss": 1.9988, "step": 7140 }, { "epoch": 0.31, "learning_rate": 5.685052239392897e-05, "loss": 2.0026, "step": 7150 }, { "epoch": 0.31, "learning_rate": 5.6841213115113e-05, "loss": 1.9073, "step": 7160 }, { "epoch": 0.31, "learning_rate": 5.683189086284268e-05, "loss": 1.9597, "step": 7170 }, { "epoch": 0.31, "learning_rate": 5.682255564162382e-05, "loss": 1.9903, "step": 7180 }, { "epoch": 0.31, "learning_rate": 5.6813207455968534e-05, "loss": 2.0011, "step": 7190 }, { "epoch": 0.32, "learning_rate": 5.680384631039519e-05, "loss": 1.9978, "step": 7200 }, { "epoch": 0.32, "eval_loss": 1.9591736793518066, "eval_runtime": 14.3637, "eval_samples_per_second": 285.163, "eval_steps_per_second": 17.823, "step": 7200 }, { "epoch": 0.32, "learning_rate": 5.679447220942838e-05, "loss": 1.976, "step": 7210 }, { "epoch": 0.32, "learning_rate": 5.6785085157599016e-05, "loss": 1.9745, "step": 7220 }, { "epoch": 0.32, "learning_rate": 5.677568515944424e-05, "loss": 1.968, "step": 7230 }, { "epoch": 0.32, "learning_rate": 5.676627221950743e-05, "loss": 1.993, "step": 7240 }, { "epoch": 0.32, "learning_rate": 5.675684634233828e-05, "loss": 1.979, "step": 7250 }, { "epoch": 0.32, "learning_rate": 5.674740753249268e-05, "loss": 2.0276, "step": 7260 }, { "epoch": 0.32, "learning_rate": 5.673795579453281e-05, "loss": 1.9836, "step": 7270 }, { "epoch": 0.32, "learning_rate": 5.672849113302705e-05, "loss": 1.9735, "step": 7280 }, { "epoch": 0.32, "learning_rate": 5.6719013552550084e-05, "loss": 1.9811, "step": 7290 }, { "epoch": 0.32, "learning_rate": 5.67095230576828e-05, "loss": 1.9934, "step": 7300 }, { "epoch": 0.32, "eval_loss": 1.9573659896850586, "eval_runtime": 13.2709, "eval_samples_per_second": 308.645, "eval_steps_per_second": 19.29, "step": 7300 }, { "epoch": 0.32, "learning_rate": 5.6700019653012354e-05, "loss": 2.0308, "step": 7310 }, { "epoch": 0.32, "learning_rate": 5.669050334313213e-05, "loss": 1.9438, "step": 7320 }, { "epoch": 0.32, "learning_rate": 5.668097413264173e-05, "loss": 1.9725, "step": 7330 }, { "epoch": 0.32, "learning_rate": 5.6671432026147035e-05, "loss": 1.9757, "step": 7340 }, { "epoch": 0.32, "learning_rate": 5.6661877028260107e-05, "loss": 1.9539, "step": 7350 }, { "epoch": 0.32, "learning_rate": 5.665230914359929e-05, "loss": 1.998, "step": 7360 }, { "epoch": 0.32, "learning_rate": 5.6642728376789124e-05, "loss": 1.9701, "step": 7370 }, { "epoch": 0.32, "learning_rate": 5.663313473246038e-05, "loss": 1.9892, "step": 7380 }, { "epoch": 0.32, "learning_rate": 5.662352821525007e-05, "loss": 2.0117, "step": 7390 }, { "epoch": 0.32, "learning_rate": 5.661390882980141e-05, "loss": 2.0, "step": 7400 }, { "epoch": 0.32, "eval_loss": 1.9556201696395874, "eval_runtime": 13.8205, "eval_samples_per_second": 296.372, "eval_steps_per_second": 18.523, "step": 7400 }, { "epoch": 0.32, "learning_rate": 5.660427658076384e-05, "loss": 2.0104, "step": 7410 }, { "epoch": 0.32, "learning_rate": 5.6594631472793035e-05, "loss": 1.9395, "step": 7420 }, { "epoch": 0.33, "learning_rate": 5.6584973510550844e-05, "loss": 1.9631, "step": 7430 }, { "epoch": 0.33, "learning_rate": 5.657530269870536e-05, "loss": 1.9215, "step": 7440 }, { "epoch": 0.33, "learning_rate": 5.6565619041930894e-05, "loss": 1.9726, "step": 7450 }, { "epoch": 0.33, "learning_rate": 5.655592254490795e-05, "loss": 1.9995, "step": 7460 }, { "epoch": 0.33, "learning_rate": 5.6546213212323235e-05, "loss": 2.0179, "step": 7470 }, { "epoch": 0.33, "learning_rate": 5.6536491048869666e-05, "loss": 1.9594, "step": 7480 }, { "epoch": 0.33, "learning_rate": 5.6526756059246365e-05, "loss": 1.9821, "step": 7490 }, { "epoch": 0.33, "learning_rate": 5.651700824815865e-05, "loss": 1.9719, "step": 7500 }, { "epoch": 0.33, "eval_loss": 1.9540364742279053, "eval_runtime": 14.3302, "eval_samples_per_second": 285.83, "eval_steps_per_second": 17.864, "step": 7500 }, { "epoch": 0.33, "learning_rate": 5.650724762031803e-05, "loss": 1.9848, "step": 7510 }, { "epoch": 0.33, "learning_rate": 5.6497474180442224e-05, "loss": 1.9471, "step": 7520 }, { "epoch": 0.33, "learning_rate": 5.648768793325513e-05, "loss": 1.9697, "step": 7530 }, { "epoch": 0.33, "learning_rate": 5.6477888883486844e-05, "loss": 1.9522, "step": 7540 }, { "epoch": 0.33, "learning_rate": 5.646807703587365e-05, "loss": 1.9738, "step": 7550 }, { "epoch": 0.33, "learning_rate": 5.645825239515799e-05, "loss": 1.9944, "step": 7560 }, { "epoch": 0.33, "learning_rate": 5.644841496608855e-05, "loss": 2.0082, "step": 7570 }, { "epoch": 0.33, "learning_rate": 5.643856475342014e-05, "loss": 2.0052, "step": 7580 }, { "epoch": 0.33, "learning_rate": 5.642870176191377e-05, "loss": 1.9821, "step": 7590 }, { "epoch": 0.33, "learning_rate": 5.641882599633663e-05, "loss": 1.991, "step": 7600 }, { "epoch": 0.33, "eval_loss": 1.951909065246582, "eval_runtime": 12.5953, "eval_samples_per_second": 325.201, "eval_steps_per_second": 20.325, "step": 7600 }, { "epoch": 0.33, "learning_rate": 5.6408937461462095e-05, "loss": 2.0207, "step": 7610 }, { "epoch": 0.33, "learning_rate": 5.639903616206967e-05, "loss": 1.9723, "step": 7620 }, { "epoch": 0.33, "learning_rate": 5.638912210294508e-05, "loss": 1.9881, "step": 7630 }, { "epoch": 0.33, "learning_rate": 5.6379195288880184e-05, "loss": 1.955, "step": 7640 }, { "epoch": 0.33, "learning_rate": 5.636925572467301e-05, "loss": 1.9749, "step": 7650 }, { "epoch": 0.34, "learning_rate": 5.635930341512778e-05, "loss": 1.9815, "step": 7660 }, { "epoch": 0.34, "learning_rate": 5.6349338365054825e-05, "loss": 1.9807, "step": 7670 }, { "epoch": 0.34, "learning_rate": 5.633936057927067e-05, "loss": 1.9934, "step": 7680 }, { "epoch": 0.34, "learning_rate": 5.632937006259799e-05, "loss": 1.9536, "step": 7690 }, { "epoch": 0.34, "learning_rate": 5.631936681986561e-05, "loss": 1.9894, "step": 7700 }, { "epoch": 0.34, "eval_loss": 1.9506793022155762, "eval_runtime": 13.5546, "eval_samples_per_second": 302.184, "eval_steps_per_second": 18.887, "step": 7700 }, { "epoch": 0.34, "learning_rate": 5.63093508559085e-05, "loss": 1.9473, "step": 7710 }, { "epoch": 0.34, "learning_rate": 5.6299322175567783e-05, "loss": 1.98, "step": 7720 }, { "epoch": 0.34, "learning_rate": 5.628928078369074e-05, "loss": 1.9744, "step": 7730 }, { "epoch": 0.34, "learning_rate": 5.6279226685130784e-05, "loss": 1.9851, "step": 7740 }, { "epoch": 0.34, "learning_rate": 5.626915988474746e-05, "loss": 1.9756, "step": 7750 }, { "epoch": 0.34, "learning_rate": 5.625908038740647e-05, "loss": 2.035, "step": 7760 }, { "epoch": 0.34, "learning_rate": 5.6248988197979664e-05, "loss": 1.9488, "step": 7770 }, { "epoch": 0.34, "learning_rate": 5.623888332134497e-05, "loss": 1.9579, "step": 7780 }, { "epoch": 0.34, "learning_rate": 5.622876576238652e-05, "loss": 1.9861, "step": 7790 }, { "epoch": 0.34, "learning_rate": 5.6218635525994545e-05, "loss": 1.9895, "step": 7800 }, { "epoch": 0.34, "eval_loss": 1.9489729404449463, "eval_runtime": 12.8919, "eval_samples_per_second": 317.719, "eval_steps_per_second": 19.857, "step": 7800 }, { "epoch": 0.34, "learning_rate": 5.620849261706537e-05, "loss": 2.0395, "step": 7810 }, { "epoch": 0.34, "learning_rate": 5.6198337040501505e-05, "loss": 1.958, "step": 7820 }, { "epoch": 0.34, "learning_rate": 5.618816880121155e-05, "loss": 1.9912, "step": 7830 }, { "epoch": 0.34, "learning_rate": 5.617798790411023e-05, "loss": 1.9098, "step": 7840 }, { "epoch": 0.34, "learning_rate": 5.616779435411838e-05, "loss": 1.9905, "step": 7850 }, { "epoch": 0.34, "learning_rate": 5.615758815616297e-05, "loss": 1.9593, "step": 7860 }, { "epoch": 0.34, "learning_rate": 5.614736931517707e-05, "loss": 1.9738, "step": 7870 }, { "epoch": 0.34, "learning_rate": 5.613713783609985e-05, "loss": 1.9318, "step": 7880 }, { "epoch": 0.35, "learning_rate": 5.612689372387662e-05, "loss": 1.9924, "step": 7890 }, { "epoch": 0.35, "learning_rate": 5.6116636983458766e-05, "loss": 1.97, "step": 7900 }, { "epoch": 0.35, "eval_loss": 1.947704553604126, "eval_runtime": 12.113, "eval_samples_per_second": 338.148, "eval_steps_per_second": 21.134, "step": 7900 }, { "epoch": 0.35, "learning_rate": 5.61063676198038e-05, "loss": 1.9341, "step": 7910 }, { "epoch": 0.35, "learning_rate": 5.609608563787532e-05, "loss": 2.0017, "step": 7920 }, { "epoch": 0.35, "learning_rate": 5.608579104264301e-05, "loss": 1.9553, "step": 7930 }, { "epoch": 0.35, "learning_rate": 5.607548383908269e-05, "loss": 1.9659, "step": 7940 }, { "epoch": 0.35, "learning_rate": 5.6065164032176255e-05, "loss": 2.0098, "step": 7950 }, { "epoch": 0.35, "learning_rate": 5.605483162691168e-05, "loss": 1.9583, "step": 7960 }, { "epoch": 0.35, "learning_rate": 5.6044486628283026e-05, "loss": 1.8988, "step": 7970 }, { "epoch": 0.35, "learning_rate": 5.6034129041290476e-05, "loss": 1.9599, "step": 7980 }, { "epoch": 0.35, "learning_rate": 5.6023758870940265e-05, "loss": 1.9926, "step": 7990 }, { "epoch": 0.35, "learning_rate": 5.601337612224472e-05, "loss": 1.9334, "step": 8000 }, { "epoch": 0.35, "eval_loss": 1.9458627700805664, "eval_runtime": 13.6514, "eval_samples_per_second": 300.043, "eval_steps_per_second": 18.753, "step": 8000 }, { "epoch": 0.35, "learning_rate": 5.600298080022224e-05, "loss": 1.9365, "step": 8010 }, { "epoch": 0.35, "learning_rate": 5.599257290989733e-05, "loss": 2.0154, "step": 8020 }, { "epoch": 0.35, "learning_rate": 5.5982152456300514e-05, "loss": 1.9472, "step": 8030 }, { "epoch": 0.35, "learning_rate": 5.5971719444468454e-05, "loss": 1.9347, "step": 8040 }, { "epoch": 0.35, "learning_rate": 5.5961273879443845e-05, "loss": 1.9706, "step": 8050 }, { "epoch": 0.35, "learning_rate": 5.595081576627544e-05, "loss": 1.8983, "step": 8060 }, { "epoch": 0.35, "learning_rate": 5.594034511001809e-05, "loss": 1.9517, "step": 8070 }, { "epoch": 0.35, "learning_rate": 5.5929861915732674e-05, "loss": 2.0011, "step": 8080 }, { "epoch": 0.35, "learning_rate": 5.591936618848617e-05, "loss": 1.9648, "step": 8090 }, { "epoch": 0.35, "learning_rate": 5.590885793335156e-05, "loss": 1.9856, "step": 8100 }, { "epoch": 0.35, "eval_loss": 1.9449169635772705, "eval_runtime": 12.4323, "eval_samples_per_second": 329.464, "eval_steps_per_second": 20.591, "step": 8100 }, { "epoch": 0.36, "learning_rate": 5.589833715540794e-05, "loss": 1.9837, "step": 8110 }, { "epoch": 0.36, "learning_rate": 5.588780385974042e-05, "loss": 1.996, "step": 8120 }, { "epoch": 0.36, "learning_rate": 5.5877258051440175e-05, "loss": 1.9771, "step": 8130 }, { "epoch": 0.36, "learning_rate": 5.5866699735604424e-05, "loss": 1.9989, "step": 8140 }, { "epoch": 0.36, "learning_rate": 5.5856128917336437e-05, "loss": 1.99, "step": 8150 }, { "epoch": 0.36, "learning_rate": 5.584554560174552e-05, "loss": 2.0055, "step": 8160 }, { "epoch": 0.36, "learning_rate": 5.5834949793947004e-05, "loss": 1.9763, "step": 8170 }, { "epoch": 0.36, "learning_rate": 5.5824341499062304e-05, "loss": 1.9356, "step": 8180 }, { "epoch": 0.36, "learning_rate": 5.581372072221882e-05, "loss": 1.9056, "step": 8190 }, { "epoch": 0.36, "learning_rate": 5.580308746855002e-05, "loss": 1.9748, "step": 8200 }, { "epoch": 0.36, "eval_loss": 1.9407728910446167, "eval_runtime": 14.8862, "eval_samples_per_second": 275.154, "eval_steps_per_second": 17.197, "step": 8200 }, { "epoch": 0.36, "learning_rate": 5.5792441743195385e-05, "loss": 1.9251, "step": 8210 }, { "epoch": 0.36, "learning_rate": 5.5781783551300435e-05, "loss": 1.9767, "step": 8220 }, { "epoch": 0.36, "learning_rate": 5.57711128980167e-05, "loss": 1.9116, "step": 8230 }, { "epoch": 0.36, "learning_rate": 5.576042978850174e-05, "loss": 1.9413, "step": 8240 }, { "epoch": 0.36, "learning_rate": 5.574973422791916e-05, "loss": 1.9468, "step": 8250 }, { "epoch": 0.36, "learning_rate": 5.573902622143852e-05, "loss": 1.9562, "step": 8260 }, { "epoch": 0.36, "learning_rate": 5.572830577423549e-05, "loss": 1.9744, "step": 8270 }, { "epoch": 0.36, "learning_rate": 5.571757289149165e-05, "loss": 1.9422, "step": 8280 }, { "epoch": 0.36, "learning_rate": 5.570682757839467e-05, "loss": 1.9565, "step": 8290 }, { "epoch": 0.36, "learning_rate": 5.569606984013819e-05, "loss": 2.0056, "step": 8300 }, { "epoch": 0.36, "eval_loss": 1.9395599365234375, "eval_runtime": 11.71, "eval_samples_per_second": 349.786, "eval_steps_per_second": 21.862, "step": 8300 }, { "epoch": 0.36, "learning_rate": 5.568529968192187e-05, "loss": 1.9783, "step": 8310 }, { "epoch": 0.36, "learning_rate": 5.5674517108951365e-05, "loss": 1.9335, "step": 8320 }, { "epoch": 0.36, "learning_rate": 5.5663722126438316e-05, "loss": 1.9209, "step": 8330 }, { "epoch": 0.37, "learning_rate": 5.565291473960041e-05, "loss": 1.9587, "step": 8340 }, { "epoch": 0.37, "learning_rate": 5.5642094953661274e-05, "loss": 2.0141, "step": 8350 }, { "epoch": 0.37, "learning_rate": 5.563126277385056e-05, "loss": 1.9702, "step": 8360 }, { "epoch": 0.37, "learning_rate": 5.562041820540391e-05, "loss": 2.0086, "step": 8370 }, { "epoch": 0.37, "learning_rate": 5.560956125356293e-05, "loss": 1.9322, "step": 8380 }, { "epoch": 0.37, "learning_rate": 5.5598691923575244e-05, "loss": 1.9788, "step": 8390 }, { "epoch": 0.37, "learning_rate": 5.558781022069443e-05, "loss": 1.982, "step": 8400 }, { "epoch": 0.37, "eval_loss": 1.9378072023391724, "eval_runtime": 13.6976, "eval_samples_per_second": 299.029, "eval_steps_per_second": 18.689, "step": 8400 }, { "epoch": 0.37, "learning_rate": 5.557691615018008e-05, "loss": 2.0186, "step": 8410 }, { "epoch": 0.37, "learning_rate": 5.556600971729771e-05, "loss": 1.9675, "step": 8420 }, { "epoch": 0.37, "learning_rate": 5.5555090927318876e-05, "loss": 1.9675, "step": 8430 }, { "epoch": 0.37, "learning_rate": 5.554415978552106e-05, "loss": 2.0175, "step": 8440 }, { "epoch": 0.37, "learning_rate": 5.5533216297187726e-05, "loss": 1.9642, "step": 8450 }, { "epoch": 0.37, "learning_rate": 5.5522260467608326e-05, "loss": 1.9741, "step": 8460 }, { "epoch": 0.37, "learning_rate": 5.5511292302078244e-05, "loss": 1.9557, "step": 8470 }, { "epoch": 0.37, "learning_rate": 5.550031180589884e-05, "loss": 1.9811, "step": 8480 }, { "epoch": 0.37, "learning_rate": 5.5489318984377456e-05, "loss": 1.9578, "step": 8490 }, { "epoch": 0.37, "learning_rate": 5.547831384282734e-05, "loss": 1.9934, "step": 8500 }, { "epoch": 0.37, "eval_loss": 1.9361932277679443, "eval_runtime": 12.095, "eval_samples_per_second": 338.652, "eval_steps_per_second": 21.166, "step": 8500 }, { "epoch": 0.37, "learning_rate": 5.546729638656777e-05, "loss": 1.9608, "step": 8510 }, { "epoch": 0.37, "learning_rate": 5.545626662092389e-05, "loss": 1.966, "step": 8520 }, { "epoch": 0.37, "learning_rate": 5.544522455122686e-05, "loss": 1.9437, "step": 8530 }, { "epoch": 0.37, "learning_rate": 5.5434170182813755e-05, "loss": 1.9392, "step": 8540 }, { "epoch": 0.37, "learning_rate": 5.54231035210276e-05, "loss": 1.9426, "step": 8550 }, { "epoch": 0.37, "learning_rate": 5.541202457121737e-05, "loss": 1.9498, "step": 8560 }, { "epoch": 0.38, "learning_rate": 5.540093333873798e-05, "loss": 1.9778, "step": 8570 }, { "epoch": 0.38, "learning_rate": 5.5389829828950266e-05, "loss": 1.9281, "step": 8580 }, { "epoch": 0.38, "learning_rate": 5.5378714047221007e-05, "loss": 1.99, "step": 8590 }, { "epoch": 0.38, "learning_rate": 5.5367585998922916e-05, "loss": 1.9328, "step": 8600 }, { "epoch": 0.38, "eval_loss": 1.9360263347625732, "eval_runtime": 11.7734, "eval_samples_per_second": 347.904, "eval_steps_per_second": 21.744, "step": 8600 }, { "epoch": 0.38, "learning_rate": 5.535644568943464e-05, "loss": 1.9399, "step": 8610 }, { "epoch": 0.38, "learning_rate": 5.5345293124140736e-05, "loss": 1.9359, "step": 8620 }, { "epoch": 0.38, "learning_rate": 5.533412830843169e-05, "loss": 1.9176, "step": 8630 }, { "epoch": 0.38, "learning_rate": 5.5322951247703935e-05, "loss": 1.9591, "step": 8640 }, { "epoch": 0.38, "learning_rate": 5.5311761947359786e-05, "loss": 1.964, "step": 8650 }, { "epoch": 0.38, "learning_rate": 5.530056041280748e-05, "loss": 1.9774, "step": 8660 }, { "epoch": 0.38, "learning_rate": 5.5289346649461196e-05, "loss": 1.9532, "step": 8670 }, { "epoch": 0.38, "learning_rate": 5.527812066274099e-05, "loss": 1.9559, "step": 8680 }, { "epoch": 0.38, "learning_rate": 5.5266882458072845e-05, "loss": 1.9101, "step": 8690 }, { "epoch": 0.38, "learning_rate": 5.525563204088864e-05, "loss": 2.0133, "step": 8700 }, { "epoch": 0.38, "eval_loss": 1.9347482919692993, "eval_runtime": 11.5929, "eval_samples_per_second": 353.32, "eval_steps_per_second": 22.082, "step": 8700 }, { "epoch": 0.38, "learning_rate": 5.524436941662616e-05, "loss": 1.9481, "step": 8710 }, { "epoch": 0.38, "learning_rate": 5.52330945907291e-05, "loss": 2.0243, "step": 8720 }, { "epoch": 0.38, "learning_rate": 5.522180756864705e-05, "loss": 2.0271, "step": 8730 }, { "epoch": 0.38, "learning_rate": 5.521050835583546e-05, "loss": 1.9358, "step": 8740 }, { "epoch": 0.38, "learning_rate": 5.519919695775572e-05, "loss": 2.0078, "step": 8750 }, { "epoch": 0.38, "learning_rate": 5.51878733798751e-05, "loss": 1.9632, "step": 8760 }, { "epoch": 0.38, "learning_rate": 5.517653762766673e-05, "loss": 1.9393, "step": 8770 }, { "epoch": 0.38, "learning_rate": 5.516518970660965e-05, "loss": 1.9215, "step": 8780 }, { "epoch": 0.38, "learning_rate": 5.515382962218877e-05, "loss": 1.9502, "step": 8790 }, { "epoch": 0.39, "learning_rate": 5.514245737989489e-05, "loss": 1.9523, "step": 8800 }, { "epoch": 0.39, "eval_loss": 1.9324275255203247, "eval_runtime": 11.8045, "eval_samples_per_second": 346.986, "eval_steps_per_second": 21.687, "step": 8800 }, { "epoch": 0.39, "learning_rate": 5.513107298522467e-05, "loss": 1.9323, "step": 8810 }, { "epoch": 0.39, "learning_rate": 5.5119676443680657e-05, "loss": 1.9541, "step": 8820 }, { "epoch": 0.39, "learning_rate": 5.5108267760771266e-05, "loss": 1.9716, "step": 8830 }, { "epoch": 0.39, "learning_rate": 5.5096846942010775e-05, "loss": 1.9488, "step": 8840 }, { "epoch": 0.39, "learning_rate": 5.5085413992919344e-05, "loss": 1.9172, "step": 8850 }, { "epoch": 0.39, "learning_rate": 5.507396891902296e-05, "loss": 1.9546, "step": 8860 }, { "epoch": 0.39, "learning_rate": 5.506251172585352e-05, "loss": 1.9807, "step": 8870 }, { "epoch": 0.39, "learning_rate": 5.505104241894875e-05, "loss": 1.9575, "step": 8880 }, { "epoch": 0.39, "learning_rate": 5.503956100385221e-05, "loss": 1.963, "step": 8890 }, { "epoch": 0.39, "learning_rate": 5.5028067486113366e-05, "loss": 1.954, "step": 8900 }, { "epoch": 0.39, "eval_loss": 1.9312846660614014, "eval_runtime": 11.7288, "eval_samples_per_second": 349.225, "eval_steps_per_second": 21.827, "step": 8900 }, { "epoch": 0.39, "learning_rate": 5.5016561871287496e-05, "loss": 1.9755, "step": 8910 }, { "epoch": 0.39, "learning_rate": 5.500504416493572e-05, "loss": 1.9325, "step": 8920 }, { "epoch": 0.39, "learning_rate": 5.499351437262503e-05, "loss": 2.0681, "step": 8930 }, { "epoch": 0.39, "learning_rate": 5.4981972499928244e-05, "loss": 1.9759, "step": 8940 }, { "epoch": 0.39, "learning_rate": 5.497041855242401e-05, "loss": 1.9578, "step": 8950 }, { "epoch": 0.39, "learning_rate": 5.495885253569684e-05, "loss": 1.9771, "step": 8960 }, { "epoch": 0.39, "learning_rate": 5.4947274455337046e-05, "loss": 1.9007, "step": 8970 }, { "epoch": 0.39, "learning_rate": 5.49356843169408e-05, "loss": 1.9742, "step": 8980 }, { "epoch": 0.39, "learning_rate": 5.492408212611009e-05, "loss": 1.9316, "step": 8990 }, { "epoch": 0.39, "learning_rate": 5.4912467888452705e-05, "loss": 1.9556, "step": 9000 }, { "epoch": 0.39, "eval_loss": 1.9298361539840698, "eval_runtime": 11.9118, "eval_samples_per_second": 343.861, "eval_steps_per_second": 21.491, "step": 9000 }, { "epoch": 0.39, "learning_rate": 5.4900841609582304e-05, "loss": 1.9705, "step": 9010 }, { "epoch": 0.39, "learning_rate": 5.488920329511833e-05, "loss": 1.9575, "step": 9020 }, { "epoch": 0.4, "learning_rate": 5.487755295068606e-05, "loss": 1.9734, "step": 9030 }, { "epoch": 0.4, "learning_rate": 5.486589058191659e-05, "loss": 2.0, "step": 9040 }, { "epoch": 0.4, "learning_rate": 5.4854216194446806e-05, "loss": 1.9733, "step": 9050 }, { "epoch": 0.4, "learning_rate": 5.484252979391942e-05, "loss": 1.9879, "step": 9060 }, { "epoch": 0.4, "learning_rate": 5.4830831385982955e-05, "loss": 1.9072, "step": 9070 }, { "epoch": 0.4, "learning_rate": 5.4819120976291726e-05, "loss": 1.9627, "step": 9080 }, { "epoch": 0.4, "learning_rate": 5.4807398570505836e-05, "loss": 1.9815, "step": 9090 }, { "epoch": 0.4, "learning_rate": 5.4795664174291214e-05, "loss": 1.975, "step": 9100 }, { "epoch": 0.4, "eval_loss": 1.9286532402038574, "eval_runtime": 11.9769, "eval_samples_per_second": 341.991, "eval_steps_per_second": 21.374, "step": 9100 }, { "epoch": 0.4, "learning_rate": 5.478391779331958e-05, "loss": 1.935, "step": 9110 }, { "epoch": 0.4, "learning_rate": 5.477215943326843e-05, "loss": 1.9991, "step": 9120 }, { "epoch": 0.4, "learning_rate": 5.476038909982106e-05, "loss": 1.958, "step": 9130 }, { "epoch": 0.4, "learning_rate": 5.474860679866656e-05, "loss": 1.9126, "step": 9140 }, { "epoch": 0.4, "learning_rate": 5.47368125354998e-05, "loss": 1.9441, "step": 9150 }, { "epoch": 0.4, "learning_rate": 5.4725006316021404e-05, "loss": 1.9427, "step": 9160 }, { "epoch": 0.4, "learning_rate": 5.471318814593783e-05, "loss": 1.9856, "step": 9170 }, { "epoch": 0.4, "learning_rate": 5.4701358030961266e-05, "loss": 1.9876, "step": 9180 }, { "epoch": 0.4, "learning_rate": 5.468951597680969e-05, "loss": 1.9421, "step": 9190 }, { "epoch": 0.4, "learning_rate": 5.467766198920686e-05, "loss": 1.9562, "step": 9200 }, { "epoch": 0.4, "eval_loss": 1.9275963306427002, "eval_runtime": 11.7257, "eval_samples_per_second": 349.317, "eval_steps_per_second": 21.832, "step": 9200 }, { "epoch": 0.4, "learning_rate": 5.466579607388229e-05, "loss": 2.0232, "step": 9210 }, { "epoch": 0.4, "learning_rate": 5.4653918236571245e-05, "loss": 1.8672, "step": 9220 }, { "epoch": 0.4, "learning_rate": 5.464202848301479e-05, "loss": 1.9394, "step": 9230 }, { "epoch": 0.4, "learning_rate": 5.463012681895972e-05, "loss": 1.9189, "step": 9240 }, { "epoch": 0.4, "learning_rate": 5.461821325015859e-05, "loss": 1.9301, "step": 9250 }, { "epoch": 0.41, "learning_rate": 5.4606287782369724e-05, "loss": 1.9574, "step": 9260 }, { "epoch": 0.41, "learning_rate": 5.459435042135718e-05, "loss": 1.9584, "step": 9270 }, { "epoch": 0.41, "learning_rate": 5.458240117289077e-05, "loss": 1.9699, "step": 9280 }, { "epoch": 0.41, "learning_rate": 5.457044004274607e-05, "loss": 1.976, "step": 9290 }, { "epoch": 0.41, "learning_rate": 5.455846703670436e-05, "loss": 1.9704, "step": 9300 }, { "epoch": 0.41, "eval_loss": 1.9267959594726562, "eval_runtime": 11.5085, "eval_samples_per_second": 355.912, "eval_steps_per_second": 22.244, "step": 9300 }, { "epoch": 0.41, "learning_rate": 5.45464821605527e-05, "loss": 1.9217, "step": 9310 }, { "epoch": 0.41, "learning_rate": 5.4534485420083866e-05, "loss": 1.9389, "step": 9320 }, { "epoch": 0.41, "learning_rate": 5.4522476821096364e-05, "loss": 1.9367, "step": 9330 }, { "epoch": 0.41, "learning_rate": 5.4510456369394455e-05, "loss": 1.9851, "step": 9340 }, { "epoch": 0.41, "learning_rate": 5.44984240707881e-05, "loss": 1.9447, "step": 9350 }, { "epoch": 0.41, "learning_rate": 5.448637993109301e-05, "loss": 1.9024, "step": 9360 }, { "epoch": 0.41, "learning_rate": 5.44743239561306e-05, "loss": 1.9409, "step": 9370 }, { "epoch": 0.41, "learning_rate": 5.4462256151728024e-05, "loss": 1.9617, "step": 9380 }, { "epoch": 0.41, "learning_rate": 5.4450176523718144e-05, "loss": 1.9452, "step": 9390 }, { "epoch": 0.41, "learning_rate": 5.443808507793953e-05, "loss": 1.9769, "step": 9400 }, { "epoch": 0.41, "eval_loss": 1.9254794120788574, "eval_runtime": 11.6603, "eval_samples_per_second": 351.277, "eval_steps_per_second": 21.955, "step": 9400 }, { "epoch": 0.41, "learning_rate": 5.442598182023648e-05, "loss": 1.9595, "step": 9410 }, { "epoch": 0.41, "learning_rate": 5.441386675645899e-05, "loss": 1.9119, "step": 9420 }, { "epoch": 0.41, "learning_rate": 5.440173989246276e-05, "loss": 1.9025, "step": 9430 }, { "epoch": 0.41, "learning_rate": 5.438960123410921e-05, "loss": 1.9572, "step": 9440 }, { "epoch": 0.41, "learning_rate": 5.437745078726543e-05, "loss": 1.9962, "step": 9450 }, { "epoch": 0.41, "learning_rate": 5.4365288557804243e-05, "loss": 1.9908, "step": 9460 }, { "epoch": 0.41, "learning_rate": 5.435311455160415e-05, "loss": 1.9532, "step": 9470 }, { "epoch": 0.41, "learning_rate": 5.434092877454934e-05, "loss": 1.9798, "step": 9480 }, { "epoch": 0.42, "learning_rate": 5.43287312325297e-05, "loss": 1.894, "step": 9490 }, { "epoch": 0.42, "learning_rate": 5.43165219314408e-05, "loss": 1.9114, "step": 9500 }, { "epoch": 0.42, "eval_loss": 1.923863410949707, "eval_runtime": 12.1484, "eval_samples_per_second": 337.164, "eval_steps_per_second": 21.073, "step": 9500 }, { "epoch": 0.42, "learning_rate": 5.43043008771839e-05, "loss": 2.0034, "step": 9510 }, { "epoch": 0.42, "learning_rate": 5.429206807566592e-05, "loss": 1.9252, "step": 9520 }, { "epoch": 0.42, "learning_rate": 5.4279823532799476e-05, "loss": 1.9489, "step": 9530 }, { "epoch": 0.42, "learning_rate": 5.4267567254502865e-05, "loss": 1.9173, "step": 9540 }, { "epoch": 0.42, "learning_rate": 5.4255299246700046e-05, "loss": 1.9617, "step": 9550 }, { "epoch": 0.42, "learning_rate": 5.424301951532064e-05, "loss": 1.9543, "step": 9560 }, { "epoch": 0.42, "learning_rate": 5.423072806629994e-05, "loss": 1.9604, "step": 9570 }, { "epoch": 0.42, "learning_rate": 5.421842490557892e-05, "loss": 1.952, "step": 9580 }, { "epoch": 0.42, "learning_rate": 5.420611003910419e-05, "loss": 1.9601, "step": 9590 }, { "epoch": 0.42, "learning_rate": 5.4193783472828024e-05, "loss": 1.9285, "step": 9600 }, { "epoch": 0.42, "eval_loss": 1.922660231590271, "eval_runtime": 11.9313, "eval_samples_per_second": 343.299, "eval_steps_per_second": 21.456, "step": 9600 }, { "epoch": 0.42, "learning_rate": 5.418144521270836e-05, "loss": 1.9213, "step": 9610 }, { "epoch": 0.42, "learning_rate": 5.416909526470878e-05, "loss": 1.9605, "step": 9620 }, { "epoch": 0.42, "learning_rate": 5.4156733634798535e-05, "loss": 1.9544, "step": 9630 }, { "epoch": 0.42, "learning_rate": 5.414436032895248e-05, "loss": 1.941, "step": 9640 }, { "epoch": 0.42, "learning_rate": 5.413197535315116e-05, "loss": 1.9717, "step": 9650 }, { "epoch": 0.42, "learning_rate": 5.4119578713380726e-05, "loss": 1.9784, "step": 9660 }, { "epoch": 0.42, "learning_rate": 5.410717041563298e-05, "loss": 1.9701, "step": 9670 }, { "epoch": 0.42, "learning_rate": 5.409475046590537e-05, "loss": 1.9148, "step": 9680 }, { "epoch": 0.42, "learning_rate": 5.408231887020095e-05, "loss": 1.9512, "step": 9690 }, { "epoch": 0.42, "learning_rate": 5.406987563452844e-05, "loss": 1.9492, "step": 9700 }, { "epoch": 0.42, "eval_loss": 1.9206269979476929, "eval_runtime": 11.8263, "eval_samples_per_second": 346.347, "eval_steps_per_second": 21.647, "step": 9700 }, { "epoch": 0.43, "learning_rate": 5.405742076490214e-05, "loss": 1.9556, "step": 9710 }, { "epoch": 0.43, "learning_rate": 5.4044954267342005e-05, "loss": 1.9188, "step": 9720 }, { "epoch": 0.43, "learning_rate": 5.403247614787361e-05, "loss": 1.9446, "step": 9730 }, { "epoch": 0.43, "learning_rate": 5.4019986412528135e-05, "loss": 1.913, "step": 9740 }, { "epoch": 0.43, "learning_rate": 5.400748506734237e-05, "loss": 1.9685, "step": 9750 }, { "epoch": 0.43, "learning_rate": 5.3994972118358743e-05, "loss": 1.8976, "step": 9760 }, { "epoch": 0.43, "learning_rate": 5.398244757162527e-05, "loss": 1.9133, "step": 9770 }, { "epoch": 0.43, "learning_rate": 5.396991143319555e-05, "loss": 1.9174, "step": 9780 }, { "epoch": 0.43, "learning_rate": 5.395736370912884e-05, "loss": 1.9537, "step": 9790 }, { "epoch": 0.43, "learning_rate": 5.394480440548997e-05, "loss": 1.9391, "step": 9800 }, { "epoch": 0.43, "eval_loss": 1.9210110902786255, "eval_runtime": 11.7383, "eval_samples_per_second": 348.944, "eval_steps_per_second": 21.809, "step": 9800 }, { "epoch": 0.43, "learning_rate": 5.393223352834933e-05, "loss": 1.9293, "step": 9810 }, { "epoch": 0.43, "learning_rate": 5.391965108378298e-05, "loss": 1.9753, "step": 9820 }, { "epoch": 0.43, "learning_rate": 5.3907057077872496e-05, "loss": 1.921, "step": 9830 }, { "epoch": 0.43, "learning_rate": 5.38944515167051e-05, "loss": 1.9402, "step": 9840 }, { "epoch": 0.43, "learning_rate": 5.388183440637356e-05, "loss": 1.9688, "step": 9850 }, { "epoch": 0.43, "learning_rate": 5.3869205752976237e-05, "loss": 1.9478, "step": 9860 }, { "epoch": 0.43, "learning_rate": 5.385656556261709e-05, "loss": 1.9596, "step": 9870 }, { "epoch": 0.43, "learning_rate": 5.3843913841405637e-05, "loss": 1.9397, "step": 9880 }, { "epoch": 0.43, "learning_rate": 5.3831250595456956e-05, "loss": 1.9378, "step": 9890 }, { "epoch": 0.43, "learning_rate": 5.3818575830891716e-05, "loss": 1.9586, "step": 9900 }, { "epoch": 0.43, "eval_loss": 1.919926643371582, "eval_runtime": 11.7669, "eval_samples_per_second": 348.096, "eval_steps_per_second": 21.756, "step": 9900 }, { "epoch": 0.43, "learning_rate": 5.380588955383616e-05, "loss": 1.9818, "step": 9910 }, { "epoch": 0.43, "learning_rate": 5.379319177042208e-05, "loss": 1.9407, "step": 9920 }, { "epoch": 0.43, "learning_rate": 5.378048248678682e-05, "loss": 1.9797, "step": 9930 }, { "epoch": 0.44, "learning_rate": 5.3767761709073314e-05, "loss": 1.9684, "step": 9940 }, { "epoch": 0.44, "learning_rate": 5.3755029443430024e-05, "loss": 1.9438, "step": 9950 }, { "epoch": 0.44, "learning_rate": 5.374228569601098e-05, "loss": 1.9506, "step": 9960 }, { "epoch": 0.44, "learning_rate": 5.372953047297574e-05, "loss": 1.9582, "step": 9970 }, { "epoch": 0.44, "learning_rate": 5.371676378048944e-05, "loss": 1.8836, "step": 9980 }, { "epoch": 0.44, "learning_rate": 5.370398562472273e-05, "loss": 1.879, "step": 9990 }, { "epoch": 0.44, "learning_rate": 5.3691196011851835e-05, "loss": 1.8977, "step": 10000 }, { "epoch": 0.44, "eval_loss": 1.9170405864715576, "eval_runtime": 12.0985, "eval_samples_per_second": 338.555, "eval_steps_per_second": 21.16, "step": 10000 }, { "epoch": 0.44, "learning_rate": 5.367839494805847e-05, "loss": 1.9891, "step": 10010 }, { "epoch": 0.44, "learning_rate": 5.3665582439529944e-05, "loss": 1.9025, "step": 10020 }, { "epoch": 0.44, "learning_rate": 5.365275849245904e-05, "loss": 1.9416, "step": 10030 }, { "epoch": 0.44, "learning_rate": 5.3639923113044107e-05, "loss": 1.954, "step": 10040 }, { "epoch": 0.44, "learning_rate": 5.362707630748901e-05, "loss": 1.9102, "step": 10050 }, { "epoch": 0.44, "learning_rate": 5.361421808200312e-05, "loss": 1.9629, "step": 10060 }, { "epoch": 0.44, "learning_rate": 5.360134844280135e-05, "loss": 1.9691, "step": 10070 }, { "epoch": 0.44, "learning_rate": 5.358846739610413e-05, "loss": 1.9133, "step": 10080 }, { "epoch": 0.44, "learning_rate": 5.357557494813738e-05, "loss": 1.9778, "step": 10090 }, { "epoch": 0.44, "learning_rate": 5.3562671105132566e-05, "loss": 1.9306, "step": 10100 }, { "epoch": 0.44, "eval_loss": 1.917136549949646, "eval_runtime": 11.7385, "eval_samples_per_second": 348.936, "eval_steps_per_second": 21.808, "step": 10100 }, { "epoch": 0.44, "learning_rate": 5.3549755873326636e-05, "loss": 1.9673, "step": 10110 }, { "epoch": 0.44, "learning_rate": 5.353682925896204e-05, "loss": 1.9411, "step": 10120 }, { "epoch": 0.44, "learning_rate": 5.352389126828674e-05, "loss": 1.9431, "step": 10130 }, { "epoch": 0.44, "learning_rate": 5.3510941907554205e-05, "loss": 1.9393, "step": 10140 }, { "epoch": 0.44, "learning_rate": 5.349798118302338e-05, "loss": 1.9358, "step": 10150 }, { "epoch": 0.44, "learning_rate": 5.348500910095873e-05, "loss": 1.9342, "step": 10160 }, { "epoch": 0.45, "learning_rate": 5.3472025667630164e-05, "loss": 1.9211, "step": 10170 }, { "epoch": 0.45, "learning_rate": 5.345903088931312e-05, "loss": 1.9403, "step": 10180 }, { "epoch": 0.45, "learning_rate": 5.344602477228851e-05, "loss": 1.9169, "step": 10190 }, { "epoch": 0.45, "learning_rate": 5.343300732284272e-05, "loss": 1.9776, "step": 10200 }, { "epoch": 0.45, "eval_loss": 1.9152053594589233, "eval_runtime": 11.8351, "eval_samples_per_second": 346.088, "eval_steps_per_second": 21.631, "step": 10200 }, { "epoch": 0.45, "learning_rate": 5.34199785472676e-05, "loss": 1.9555, "step": 10210 }, { "epoch": 0.45, "learning_rate": 5.340693845186051e-05, "loss": 1.9321, "step": 10220 }, { "epoch": 0.45, "learning_rate": 5.339388704292425e-05, "loss": 1.9974, "step": 10230 }, { "epoch": 0.45, "learning_rate": 5.33808243267671e-05, "loss": 1.9198, "step": 10240 }, { "epoch": 0.45, "learning_rate": 5.33677503097028e-05, "loss": 1.9572, "step": 10250 }, { "epoch": 0.45, "learning_rate": 5.335466499805057e-05, "loss": 1.9275, "step": 10260 }, { "epoch": 0.45, "learning_rate": 5.334156839813506e-05, "loss": 1.9221, "step": 10270 }, { "epoch": 0.45, "learning_rate": 5.33284605162864e-05, "loss": 1.9517, "step": 10280 }, { "epoch": 0.45, "learning_rate": 5.3315341358840155e-05, "loss": 1.9344, "step": 10290 }, { "epoch": 0.45, "learning_rate": 5.330221093213736e-05, "loss": 1.9339, "step": 10300 }, { "epoch": 0.45, "eval_loss": 1.9138355255126953, "eval_runtime": 11.7912, "eval_samples_per_second": 347.377, "eval_steps_per_second": 21.711, "step": 10300 }, { "epoch": 0.45, "learning_rate": 5.3289069242524486e-05, "loss": 1.942, "step": 10310 }, { "epoch": 0.45, "learning_rate": 5.327591629635345e-05, "loss": 1.9285, "step": 10320 }, { "epoch": 0.45, "learning_rate": 5.32627520999816e-05, "loss": 1.9394, "step": 10330 }, { "epoch": 0.45, "learning_rate": 5.3249576659771723e-05, "loss": 1.9306, "step": 10340 }, { "epoch": 0.45, "learning_rate": 5.323638998209207e-05, "loss": 1.9389, "step": 10350 }, { "epoch": 0.45, "learning_rate": 5.322319207331628e-05, "loss": 1.9811, "step": 10360 }, { "epoch": 0.45, "learning_rate": 5.320998293982345e-05, "loss": 1.9051, "step": 10370 }, { "epoch": 0.45, "learning_rate": 5.31967625879981e-05, "loss": 1.9013, "step": 10380 }, { "epoch": 0.45, "learning_rate": 5.3183531024230145e-05, "loss": 1.9472, "step": 10390 }, { "epoch": 0.46, "learning_rate": 5.3170288254914974e-05, "loss": 1.9531, "step": 10400 }, { "epoch": 0.46, "eval_loss": 1.9130414724349976, "eval_runtime": 11.7623, "eval_samples_per_second": 348.231, "eval_steps_per_second": 21.764, "step": 10400 }, { "epoch": 0.46, "learning_rate": 5.315703428645332e-05, "loss": 1.9145, "step": 10410 }, { "epoch": 0.46, "learning_rate": 5.31437691252514e-05, "loss": 1.9428, "step": 10420 }, { "epoch": 0.46, "learning_rate": 5.3130492777720784e-05, "loss": 1.9159, "step": 10430 }, { "epoch": 0.46, "learning_rate": 5.311720525027847e-05, "loss": 1.9567, "step": 10440 }, { "epoch": 0.46, "learning_rate": 5.310390654934689e-05, "loss": 1.9582, "step": 10450 }, { "epoch": 0.46, "learning_rate": 5.309059668135384e-05, "loss": 1.8719, "step": 10460 }, { "epoch": 0.46, "learning_rate": 5.3077275652732506e-05, "loss": 1.9593, "step": 10470 }, { "epoch": 0.46, "learning_rate": 5.3063943469921495e-05, "loss": 1.9132, "step": 10480 }, { "epoch": 0.46, "learning_rate": 5.305060013936481e-05, "loss": 1.9277, "step": 10490 }, { "epoch": 0.46, "learning_rate": 5.303724566751179e-05, "loss": 1.9285, "step": 10500 }, { "epoch": 0.46, "eval_loss": 1.912137746810913, "eval_runtime": 11.8324, "eval_samples_per_second": 346.169, "eval_steps_per_second": 21.636, "step": 10500 }, { "epoch": 0.46, "learning_rate": 5.302388006081724e-05, "loss": 1.9365, "step": 10510 }, { "epoch": 0.46, "learning_rate": 5.301050332574128e-05, "loss": 1.9349, "step": 10520 }, { "epoch": 0.46, "learning_rate": 5.299711546874944e-05, "loss": 1.9636, "step": 10530 }, { "epoch": 0.46, "learning_rate": 5.2983716496312595e-05, "loss": 1.9193, "step": 10540 }, { "epoch": 0.46, "learning_rate": 5.297030641490705e-05, "loss": 1.925, "step": 10550 }, { "epoch": 0.46, "learning_rate": 5.2956885231014426e-05, "loss": 1.9379, "step": 10560 }, { "epoch": 0.46, "learning_rate": 5.294345295112173e-05, "loss": 1.9314, "step": 10570 }, { "epoch": 0.46, "learning_rate": 5.2930009581721345e-05, "loss": 1.9457, "step": 10580 }, { "epoch": 0.46, "learning_rate": 5.291655512931098e-05, "loss": 1.9083, "step": 10590 }, { "epoch": 0.46, "learning_rate": 5.290308960039373e-05, "loss": 1.9104, "step": 10600 }, { "epoch": 0.46, "eval_loss": 1.9107956886291504, "eval_runtime": 12.0695, "eval_samples_per_second": 339.368, "eval_steps_per_second": 21.21, "step": 10600 }, { "epoch": 0.46, "learning_rate": 5.288961300147804e-05, "loss": 1.9385, "step": 10610 }, { "epoch": 0.46, "learning_rate": 5.287612533907769e-05, "loss": 1.9446, "step": 10620 }, { "epoch": 0.47, "learning_rate": 5.286262661971183e-05, "loss": 1.9284, "step": 10630 }, { "epoch": 0.47, "learning_rate": 5.284911684990494e-05, "loss": 1.9348, "step": 10640 }, { "epoch": 0.47, "learning_rate": 5.283559603618683e-05, "loss": 1.9606, "step": 10650 }, { "epoch": 0.47, "learning_rate": 5.2822064185092676e-05, "loss": 1.883, "step": 10660 }, { "epoch": 0.47, "learning_rate": 5.280852130316297e-05, "loss": 1.9772, "step": 10670 }, { "epoch": 0.47, "learning_rate": 5.279496739694355e-05, "loss": 1.9849, "step": 10680 }, { "epoch": 0.47, "learning_rate": 5.2781402472985544e-05, "loss": 1.916, "step": 10690 }, { "epoch": 0.47, "learning_rate": 5.276782653784546e-05, "loss": 1.9241, "step": 10700 }, { "epoch": 0.47, "eval_loss": 1.9091017246246338, "eval_runtime": 11.9167, "eval_samples_per_second": 343.72, "eval_steps_per_second": 21.483, "step": 10700 }, { "epoch": 0.47, "learning_rate": 5.275423959808509e-05, "loss": 1.9683, "step": 10710 }, { "epoch": 0.47, "learning_rate": 5.274064166027156e-05, "loss": 1.9504, "step": 10720 }, { "epoch": 0.47, "learning_rate": 5.272703273097731e-05, "loss": 1.9201, "step": 10730 }, { "epoch": 0.47, "learning_rate": 5.271341281678009e-05, "loss": 1.9344, "step": 10740 }, { "epoch": 0.47, "learning_rate": 5.2699781924262966e-05, "loss": 1.9063, "step": 10750 }, { "epoch": 0.47, "learning_rate": 5.2686140060014297e-05, "loss": 1.8997, "step": 10760 }, { "epoch": 0.47, "learning_rate": 5.267248723062775e-05, "loss": 1.8877, "step": 10770 }, { "epoch": 0.47, "learning_rate": 5.26588234427023e-05, "loss": 1.9442, "step": 10780 }, { "epoch": 0.47, "learning_rate": 5.2645148702842224e-05, "loss": 1.8964, "step": 10790 }, { "epoch": 0.47, "learning_rate": 5.2631463017657064e-05, "loss": 1.8983, "step": 10800 }, { "epoch": 0.47, "eval_loss": 1.9094958305358887, "eval_runtime": 11.7143, "eval_samples_per_second": 349.659, "eval_steps_per_second": 21.854, "step": 10800 }, { "epoch": 0.47, "learning_rate": 5.261776639376169e-05, "loss": 1.9515, "step": 10810 }, { "epoch": 0.47, "learning_rate": 5.260405883777622e-05, "loss": 1.9609, "step": 10820 }, { "epoch": 0.47, "learning_rate": 5.259034035632607e-05, "loss": 1.9301, "step": 10830 }, { "epoch": 0.47, "learning_rate": 5.2576610956041976e-05, "loss": 1.946, "step": 10840 }, { "epoch": 0.47, "learning_rate": 5.2562870643559895e-05, "loss": 1.9419, "step": 10850 }, { "epoch": 0.48, "learning_rate": 5.254911942552108e-05, "loss": 1.9415, "step": 10860 }, { "epoch": 0.48, "learning_rate": 5.2535357308572064e-05, "loss": 1.9428, "step": 10870 }, { "epoch": 0.48, "learning_rate": 5.252158429936464e-05, "loss": 1.9382, "step": 10880 }, { "epoch": 0.48, "learning_rate": 5.250780040455586e-05, "loss": 1.937, "step": 10890 }, { "epoch": 0.48, "learning_rate": 5.249400563080804e-05, "loss": 1.9589, "step": 10900 }, { "epoch": 0.48, "eval_loss": 1.9076216220855713, "eval_runtime": 11.7896, "eval_samples_per_second": 347.424, "eval_steps_per_second": 21.714, "step": 10900 }, { "epoch": 0.48, "learning_rate": 5.2480199984788765e-05, "loss": 1.919, "step": 10910 }, { "epoch": 0.48, "learning_rate": 5.246638347317086e-05, "loss": 1.947, "step": 10920 }, { "epoch": 0.48, "learning_rate": 5.245255610263243e-05, "loss": 1.965, "step": 10930 }, { "epoch": 0.48, "learning_rate": 5.243871787985678e-05, "loss": 1.9391, "step": 10940 }, { "epoch": 0.48, "learning_rate": 5.242486881153251e-05, "loss": 1.9027, "step": 10950 }, { "epoch": 0.48, "learning_rate": 5.241100890435342e-05, "loss": 1.9201, "step": 10960 }, { "epoch": 0.48, "learning_rate": 5.239713816501859e-05, "loss": 1.9278, "step": 10970 }, { "epoch": 0.48, "learning_rate": 5.23832566002323e-05, "loss": 1.9501, "step": 10980 }, { "epoch": 0.48, "learning_rate": 5.2369364216704084e-05, "loss": 1.9276, "step": 10990 }, { "epoch": 0.48, "learning_rate": 5.2355461021148695e-05, "loss": 1.9232, "step": 11000 }, { "epoch": 0.48, "eval_loss": 1.9063364267349243, "eval_runtime": 11.7844, "eval_samples_per_second": 347.577, "eval_steps_per_second": 21.724, "step": 11000 }, { "epoch": 0.48, "learning_rate": 5.2341547020286116e-05, "loss": 1.946, "step": 11010 }, { "epoch": 0.48, "learning_rate": 5.232762222084155e-05, "loss": 1.931, "step": 11020 }, { "epoch": 0.48, "learning_rate": 5.231368662954542e-05, "loss": 1.944, "step": 11030 }, { "epoch": 0.48, "learning_rate": 5.2299740253133383e-05, "loss": 1.9547, "step": 11040 }, { "epoch": 0.48, "learning_rate": 5.2285783098346265e-05, "loss": 1.8769, "step": 11050 }, { "epoch": 0.48, "learning_rate": 5.227181517193015e-05, "loss": 1.8999, "step": 11060 }, { "epoch": 0.48, "learning_rate": 5.22578364806363e-05, "loss": 1.9489, "step": 11070 }, { "epoch": 0.49, "learning_rate": 5.2243847031221185e-05, "loss": 1.9533, "step": 11080 }, { "epoch": 0.49, "learning_rate": 5.222984683044648e-05, "loss": 1.9008, "step": 11090 }, { "epoch": 0.49, "learning_rate": 5.221583588507905e-05, "loss": 1.9322, "step": 11100 }, { "epoch": 0.49, "eval_loss": 1.905591607093811, "eval_runtime": 11.9402, "eval_samples_per_second": 343.044, "eval_steps_per_second": 21.44, "step": 11100 }, { "epoch": 0.49, "learning_rate": 5.2201814201890964e-05, "loss": 1.885, "step": 11110 }, { "epoch": 0.49, "learning_rate": 5.218778178765947e-05, "loss": 1.9275, "step": 11120 }, { "epoch": 0.49, "learning_rate": 5.217373864916701e-05, "loss": 1.9124, "step": 11130 }, { "epoch": 0.49, "learning_rate": 5.21596847932012e-05, "loss": 1.9431, "step": 11140 }, { "epoch": 0.49, "learning_rate": 5.2145620226554844e-05, "loss": 1.9124, "step": 11150 }, { "epoch": 0.49, "learning_rate": 5.213154495602593e-05, "loss": 1.9301, "step": 11160 }, { "epoch": 0.49, "learning_rate": 5.211745898841759e-05, "loss": 1.888, "step": 11170 }, { "epoch": 0.49, "learning_rate": 5.210336233053817e-05, "loss": 1.9275, "step": 11180 }, { "epoch": 0.49, "learning_rate": 5.2089254989201156e-05, "loss": 1.9612, "step": 11190 }, { "epoch": 0.49, "learning_rate": 5.207513697122519e-05, "loss": 1.9247, "step": 11200 }, { "epoch": 0.49, "eval_loss": 1.9045929908752441, "eval_runtime": 11.6726, "eval_samples_per_second": 350.906, "eval_steps_per_second": 21.932, "step": 11200 }, { "epoch": 0.49, "learning_rate": 5.20610082834341e-05, "loss": 1.9102, "step": 11210 }, { "epoch": 0.49, "learning_rate": 5.2046868932656855e-05, "loss": 1.923, "step": 11220 }, { "epoch": 0.49, "learning_rate": 5.203271892572757e-05, "loss": 1.8978, "step": 11230 }, { "epoch": 0.49, "learning_rate": 5.201855826948553e-05, "loss": 1.8708, "step": 11240 }, { "epoch": 0.49, "learning_rate": 5.200438697077516e-05, "loss": 1.9239, "step": 11250 }, { "epoch": 0.49, "learning_rate": 5.199020503644603e-05, "loss": 1.9347, "step": 11260 }, { "epoch": 0.49, "learning_rate": 5.1976012473352834e-05, "loss": 1.8907, "step": 11270 }, { "epoch": 0.49, "learning_rate": 5.196180928835543e-05, "loss": 1.9238, "step": 11280 }, { "epoch": 0.49, "learning_rate": 5.1947595488318794e-05, "loss": 1.925, "step": 11290 }, { "epoch": 0.49, "learning_rate": 5.1933371080113034e-05, "loss": 1.8946, "step": 11300 }, { "epoch": 0.49, "eval_loss": 1.9035365581512451, "eval_runtime": 11.7334, "eval_samples_per_second": 349.089, "eval_steps_per_second": 21.818, "step": 11300 }, { "epoch": 0.5, "learning_rate": 5.191913607061339e-05, "loss": 1.885, "step": 11310 }, { "epoch": 0.5, "learning_rate": 5.190489046670022e-05, "loss": 1.9246, "step": 11320 }, { "epoch": 0.5, "learning_rate": 5.189063427525901e-05, "loss": 1.9431, "step": 11330 }, { "epoch": 0.5, "learning_rate": 5.1876367503180356e-05, "loss": 1.9154, "step": 11340 }, { "epoch": 0.5, "learning_rate": 5.186209015735998e-05, "loss": 1.9025, "step": 11350 }, { "epoch": 0.5, "learning_rate": 5.184780224469869e-05, "loss": 1.96, "step": 11360 }, { "epoch": 0.5, "learning_rate": 5.183350377210243e-05, "loss": 1.9536, "step": 11370 }, { "epoch": 0.5, "learning_rate": 5.181919474648224e-05, "loss": 1.9724, "step": 11380 }, { "epoch": 0.5, "learning_rate": 5.180487517475424e-05, "loss": 1.8894, "step": 11390 }, { "epoch": 0.5, "learning_rate": 5.1790545063839675e-05, "loss": 1.894, "step": 11400 }, { "epoch": 0.5, "eval_loss": 1.9031916856765747, "eval_runtime": 11.9751, "eval_samples_per_second": 342.043, "eval_steps_per_second": 21.378, "step": 11400 }, { "epoch": 0.5, "learning_rate": 5.177620442066487e-05, "loss": 1.8651, "step": 11410 }, { "epoch": 0.5, "learning_rate": 5.176185325216124e-05, "loss": 1.9263, "step": 11420 }, { "epoch": 0.5, "learning_rate": 5.1747491565265304e-05, "loss": 1.9026, "step": 11430 }, { "epoch": 0.5, "learning_rate": 5.173311936691864e-05, "loss": 1.9311, "step": 11440 }, { "epoch": 0.5, "learning_rate": 5.171873666406792e-05, "loss": 1.9022, "step": 11450 }, { "epoch": 0.5, "learning_rate": 5.170434346366489e-05, "loss": 1.9235, "step": 11460 }, { "epoch": 0.5, "learning_rate": 5.1689939772666376e-05, "loss": 1.9091, "step": 11470 }, { "epoch": 0.5, "learning_rate": 5.1675525598034275e-05, "loss": 1.9388, "step": 11480 }, { "epoch": 0.5, "learning_rate": 5.166110094673553e-05, "loss": 1.9413, "step": 11490 }, { "epoch": 0.5, "learning_rate": 5.164666582574217e-05, "loss": 1.9681, "step": 11500 }, { "epoch": 0.5, "eval_loss": 1.9012072086334229, "eval_runtime": 11.9665, "eval_samples_per_second": 342.288, "eval_steps_per_second": 21.393, "step": 11500 }, { "epoch": 0.5, "learning_rate": 5.163222024203129e-05, "loss": 1.9637, "step": 11510 }, { "epoch": 0.5, "learning_rate": 5.161776420258502e-05, "loss": 1.9221, "step": 11520 }, { "epoch": 0.5, "learning_rate": 5.1603297714390555e-05, "loss": 1.9217, "step": 11530 }, { "epoch": 0.51, "learning_rate": 5.1588820784440144e-05, "loss": 1.9271, "step": 11540 }, { "epoch": 0.51, "learning_rate": 5.157433341973107e-05, "loss": 1.9115, "step": 11550 }, { "epoch": 0.51, "learning_rate": 5.155983562726568e-05, "loss": 1.9195, "step": 11560 }, { "epoch": 0.51, "learning_rate": 5.154532741405133e-05, "loss": 1.9535, "step": 11570 }, { "epoch": 0.51, "learning_rate": 5.153080878710046e-05, "loss": 1.8912, "step": 11580 }, { "epoch": 0.51, "learning_rate": 5.151627975343049e-05, "loss": 1.8922, "step": 11590 }, { "epoch": 0.51, "learning_rate": 5.1501740320063906e-05, "loss": 1.8981, "step": 11600 }, { "epoch": 0.51, "eval_loss": 1.9007313251495361, "eval_runtime": 11.7722, "eval_samples_per_second": 347.939, "eval_steps_per_second": 21.746, "step": 11600 }, { "epoch": 0.51, "learning_rate": 5.148719049402821e-05, "loss": 1.919, "step": 11610 }, { "epoch": 0.51, "learning_rate": 5.147263028235593e-05, "loss": 1.8871, "step": 11620 }, { "epoch": 0.51, "learning_rate": 5.145805969208459e-05, "loss": 1.9283, "step": 11630 }, { "epoch": 0.51, "learning_rate": 5.144347873025679e-05, "loss": 1.9203, "step": 11640 }, { "epoch": 0.51, "learning_rate": 5.1428887403920064e-05, "loss": 1.9441, "step": 11650 }, { "epoch": 0.51, "learning_rate": 5.141428572012703e-05, "loss": 1.9235, "step": 11660 }, { "epoch": 0.51, "learning_rate": 5.1399673685935244e-05, "loss": 1.8727, "step": 11670 }, { "epoch": 0.51, "learning_rate": 5.138505130840733e-05, "loss": 1.9103, "step": 11680 }, { "epoch": 0.51, "learning_rate": 5.137041859461086e-05, "loss": 1.9122, "step": 11690 }, { "epoch": 0.51, "learning_rate": 5.1355775551618435e-05, "loss": 1.9197, "step": 11700 }, { "epoch": 0.51, "eval_loss": 1.8997355699539185, "eval_runtime": 11.7556, "eval_samples_per_second": 348.43, "eval_steps_per_second": 21.777, "step": 11700 }, { "epoch": 0.51, "learning_rate": 5.134112218650762e-05, "loss": 1.9484, "step": 11710 }, { "epoch": 0.51, "learning_rate": 5.1326458506360994e-05, "loss": 1.9234, "step": 11720 }, { "epoch": 0.51, "learning_rate": 5.131178451826612e-05, "loss": 1.932, "step": 11730 }, { "epoch": 0.51, "learning_rate": 5.1297100229315536e-05, "loss": 1.9191, "step": 11740 }, { "epoch": 0.51, "learning_rate": 5.128240564660673e-05, "loss": 1.9287, "step": 11750 }, { "epoch": 0.51, "learning_rate": 5.1267700777242234e-05, "loss": 1.8905, "step": 11760 }, { "epoch": 0.52, "learning_rate": 5.1252985628329485e-05, "loss": 1.9224, "step": 11770 }, { "epoch": 0.52, "learning_rate": 5.123826020698092e-05, "loss": 1.9458, "step": 11780 }, { "epoch": 0.52, "learning_rate": 5.122352452031394e-05, "loss": 1.9364, "step": 11790 }, { "epoch": 0.52, "learning_rate": 5.1208778575450904e-05, "loss": 1.8697, "step": 11800 }, { "epoch": 0.52, "eval_loss": 1.8991934061050415, "eval_runtime": 11.702, "eval_samples_per_second": 350.026, "eval_steps_per_second": 21.877, "step": 11800 }, { "epoch": 0.52, "learning_rate": 5.1194022379519134e-05, "loss": 1.9021, "step": 11810 }, { "epoch": 0.52, "learning_rate": 5.1179255939650894e-05, "loss": 1.9163, "step": 11820 }, { "epoch": 0.52, "learning_rate": 5.1164479262983405e-05, "loss": 1.9285, "step": 11830 }, { "epoch": 0.52, "learning_rate": 5.1149692356658856e-05, "loss": 1.9226, "step": 11840 }, { "epoch": 0.52, "learning_rate": 5.113489522782434e-05, "loss": 1.8812, "step": 11850 }, { "epoch": 0.52, "learning_rate": 5.112008788363192e-05, "loss": 1.9432, "step": 11860 }, { "epoch": 0.52, "learning_rate": 5.110527033123861e-05, "loss": 1.8859, "step": 11870 }, { "epoch": 0.52, "learning_rate": 5.1090442577806306e-05, "loss": 1.9347, "step": 11880 }, { "epoch": 0.52, "learning_rate": 5.107560463050188e-05, "loss": 1.9167, "step": 11890 }, { "epoch": 0.52, "learning_rate": 5.106075649649714e-05, "loss": 1.9169, "step": 11900 }, { "epoch": 0.52, "eval_loss": 1.8978095054626465, "eval_runtime": 11.7169, "eval_samples_per_second": 349.58, "eval_steps_per_second": 21.849, "step": 11900 }, { "epoch": 0.52, "learning_rate": 5.104589818296875e-05, "loss": 1.9154, "step": 11910 }, { "epoch": 0.52, "learning_rate": 5.103102969709839e-05, "loss": 1.9427, "step": 11920 }, { "epoch": 0.52, "learning_rate": 5.1016151046072576e-05, "loss": 1.9245, "step": 11930 }, { "epoch": 0.52, "learning_rate": 5.100126223708276e-05, "loss": 1.929, "step": 11940 }, { "epoch": 0.52, "learning_rate": 5.098636327732534e-05, "loss": 1.912, "step": 11950 }, { "epoch": 0.52, "learning_rate": 5.097145417400157e-05, "loss": 1.9093, "step": 11960 }, { "epoch": 0.52, "learning_rate": 5.0956534934317624e-05, "loss": 1.9014, "step": 11970 }, { "epoch": 0.52, "learning_rate": 5.094160556548461e-05, "loss": 1.9396, "step": 11980 }, { "epoch": 0.52, "learning_rate": 5.092666607471847e-05, "loss": 1.9046, "step": 11990 }, { "epoch": 0.53, "learning_rate": 5.091171646924009e-05, "loss": 1.9012, "step": 12000 }, { "epoch": 0.53, "eval_loss": 1.897355556488037, "eval_runtime": 12.4634, "eval_samples_per_second": 328.642, "eval_steps_per_second": 20.54, "step": 12000 }, { "epoch": 0.53, "learning_rate": 5.089675675627522e-05, "loss": 1.92, "step": 12010 }, { "epoch": 0.53, "learning_rate": 5.0881786943054486e-05, "loss": 1.9191, "step": 12020 }, { "epoch": 0.53, "learning_rate": 5.086680703681343e-05, "loss": 1.9002, "step": 12030 }, { "epoch": 0.53, "learning_rate": 5.085181704479244e-05, "loss": 1.9084, "step": 12040 }, { "epoch": 0.53, "learning_rate": 5.0836816974236796e-05, "loss": 1.9434, "step": 12050 }, { "epoch": 0.53, "learning_rate": 5.082180683239664e-05, "loss": 1.9575, "step": 12060 }, { "epoch": 0.53, "learning_rate": 5.0806786626527e-05, "loss": 1.9332, "step": 12070 }, { "epoch": 0.53, "learning_rate": 5.079175636388773e-05, "loss": 1.8858, "step": 12080 }, { "epoch": 0.53, "learning_rate": 5.077671605174359e-05, "loss": 1.8997, "step": 12090 }, { "epoch": 0.53, "learning_rate": 5.076166569736418e-05, "loss": 1.9475, "step": 12100 }, { "epoch": 0.53, "eval_loss": 1.8951764106750488, "eval_runtime": 11.8716, "eval_samples_per_second": 345.025, "eval_steps_per_second": 21.564, "step": 12100 }, { "epoch": 0.53, "learning_rate": 5.074660530802393e-05, "loss": 1.9078, "step": 12110 }, { "epoch": 0.53, "learning_rate": 5.073153489100216e-05, "loss": 1.9249, "step": 12120 }, { "epoch": 0.53, "learning_rate": 5.0716454453583015e-05, "loss": 1.8992, "step": 12130 }, { "epoch": 0.53, "learning_rate": 5.0701364003055475e-05, "loss": 1.8958, "step": 12140 }, { "epoch": 0.53, "learning_rate": 5.0686263546713386e-05, "loss": 1.9222, "step": 12150 }, { "epoch": 0.53, "learning_rate": 5.0671153091855406e-05, "loss": 1.9098, "step": 12160 }, { "epoch": 0.53, "learning_rate": 5.0656032645785026e-05, "loss": 1.8734, "step": 12170 }, { "epoch": 0.53, "learning_rate": 5.06409022158106e-05, "loss": 1.9222, "step": 12180 }, { "epoch": 0.53, "learning_rate": 5.062576180924526e-05, "loss": 1.9241, "step": 12190 }, { "epoch": 0.53, "learning_rate": 5.0610611433406996e-05, "loss": 1.9312, "step": 12200 }, { "epoch": 0.53, "eval_loss": 1.8951544761657715, "eval_runtime": 11.9349, "eval_samples_per_second": 343.194, "eval_steps_per_second": 21.45, "step": 12200 }, { "epoch": 0.53, "learning_rate": 5.0595451095618595e-05, "loss": 1.8868, "step": 12210 }, { "epoch": 0.53, "learning_rate": 5.058028080320767e-05, "loss": 1.9428, "step": 12220 }, { "epoch": 0.54, "learning_rate": 5.056510056350665e-05, "loss": 1.8836, "step": 12230 }, { "epoch": 0.54, "learning_rate": 5.0549910383852756e-05, "loss": 1.9613, "step": 12240 }, { "epoch": 0.54, "learning_rate": 5.0534710271588026e-05, "loss": 1.9218, "step": 12250 }, { "epoch": 0.54, "learning_rate": 5.051950023405928e-05, "loss": 1.8936, "step": 12260 }, { "epoch": 0.54, "learning_rate": 5.0504280278618175e-05, "loss": 1.9501, "step": 12270 }, { "epoch": 0.54, "learning_rate": 5.048905041262113e-05, "loss": 1.9222, "step": 12280 }, { "epoch": 0.54, "learning_rate": 5.0473810643429346e-05, "loss": 1.9357, "step": 12290 }, { "epoch": 0.54, "learning_rate": 5.0458560978408844e-05, "loss": 1.9024, "step": 12300 }, { "epoch": 0.54, "eval_loss": 1.894567608833313, "eval_runtime": 11.8015, "eval_samples_per_second": 347.074, "eval_steps_per_second": 21.692, "step": 12300 }, { "epoch": 0.54, "learning_rate": 5.04433014249304e-05, "loss": 1.901, "step": 12310 }, { "epoch": 0.54, "learning_rate": 5.042803199036958e-05, "loss": 1.8953, "step": 12320 }, { "epoch": 0.54, "learning_rate": 5.041275268210672e-05, "loss": 1.9358, "step": 12330 }, { "epoch": 0.54, "learning_rate": 5.039746350752696e-05, "loss": 1.9632, "step": 12340 }, { "epoch": 0.54, "learning_rate": 5.0382164474020144e-05, "loss": 1.8925, "step": 12350 }, { "epoch": 0.54, "learning_rate": 5.036685558898095e-05, "loss": 1.8659, "step": 12360 }, { "epoch": 0.54, "learning_rate": 5.035153685980877e-05, "loss": 1.9407, "step": 12370 }, { "epoch": 0.54, "learning_rate": 5.033620829390778e-05, "loss": 1.886, "step": 12380 }, { "epoch": 0.54, "learning_rate": 5.0320869898686897e-05, "loss": 1.8922, "step": 12390 }, { "epoch": 0.54, "learning_rate": 5.0305521681559795e-05, "loss": 1.9246, "step": 12400 }, { "epoch": 0.54, "eval_loss": 1.8931217193603516, "eval_runtime": 11.8736, "eval_samples_per_second": 344.967, "eval_steps_per_second": 21.56, "step": 12400 }, { "epoch": 0.54, "learning_rate": 5.0290163649944895e-05, "loss": 1.9126, "step": 12410 }, { "epoch": 0.54, "learning_rate": 5.0274795811265356e-05, "loss": 1.8948, "step": 12420 }, { "epoch": 0.54, "learning_rate": 5.0259418172949096e-05, "loss": 1.942, "step": 12430 }, { "epoch": 0.54, "learning_rate": 5.0244030742428746e-05, "loss": 1.8984, "step": 12440 }, { "epoch": 0.55, "learning_rate": 5.022863352714168e-05, "loss": 1.8838, "step": 12450 }, { "epoch": 0.55, "learning_rate": 5.0213226534529994e-05, "loss": 1.8943, "step": 12460 }, { "epoch": 0.55, "learning_rate": 5.0197809772040526e-05, "loss": 1.9087, "step": 12470 }, { "epoch": 0.55, "learning_rate": 5.0182383247124826e-05, "loss": 1.9317, "step": 12480 }, { "epoch": 0.55, "learning_rate": 5.0166946967239155e-05, "loss": 1.9581, "step": 12490 }, { "epoch": 0.55, "learning_rate": 5.0151500939844505e-05, "loss": 1.9311, "step": 12500 }, { "epoch": 0.55, "eval_loss": 1.8927887678146362, "eval_runtime": 12.0202, "eval_samples_per_second": 340.759, "eval_steps_per_second": 21.297, "step": 12500 }, { "epoch": 0.55, "learning_rate": 5.013604517240657e-05, "loss": 1.9014, "step": 12510 }, { "epoch": 0.55, "learning_rate": 5.012057967239576e-05, "loss": 1.8831, "step": 12520 }, { "epoch": 0.55, "learning_rate": 5.010510444728717e-05, "loss": 1.9165, "step": 12530 }, { "epoch": 0.55, "learning_rate": 5.008961950456062e-05, "loss": 1.8805, "step": 12540 }, { "epoch": 0.55, "learning_rate": 5.0074124851700615e-05, "loss": 1.8601, "step": 12550 }, { "epoch": 0.55, "learning_rate": 5.005862049619634e-05, "loss": 1.9155, "step": 12560 }, { "epoch": 0.55, "learning_rate": 5.00431064455417e-05, "loss": 1.923, "step": 12570 }, { "epoch": 0.55, "learning_rate": 5.002758270723525e-05, "loss": 1.9147, "step": 12580 }, { "epoch": 0.55, "learning_rate": 5.0012049288780266e-05, "loss": 1.9428, "step": 12590 }, { "epoch": 0.55, "learning_rate": 4.9996506197684674e-05, "loss": 1.9652, "step": 12600 }, { "epoch": 0.55, "eval_loss": 1.8919925689697266, "eval_runtime": 11.9961, "eval_samples_per_second": 341.443, "eval_steps_per_second": 21.34, "step": 12600 }, { "epoch": 0.55, "learning_rate": 4.9980953441461076e-05, "loss": 1.9505, "step": 12610 }, { "epoch": 0.55, "learning_rate": 4.9965391027626776e-05, "loss": 1.8803, "step": 12620 }, { "epoch": 0.55, "learning_rate": 4.99498189637037e-05, "loss": 1.9102, "step": 12630 }, { "epoch": 0.55, "learning_rate": 4.993423725721849e-05, "loss": 1.9154, "step": 12640 }, { "epoch": 0.55, "learning_rate": 4.99186459157024e-05, "loss": 1.9074, "step": 12650 }, { "epoch": 0.55, "learning_rate": 4.9903044946691354e-05, "loss": 1.9372, "step": 12660 }, { "epoch": 0.55, "learning_rate": 4.988743435772596e-05, "loss": 1.9326, "step": 12670 }, { "epoch": 0.56, "learning_rate": 4.9871814156351444e-05, "loss": 1.8712, "step": 12680 }, { "epoch": 0.56, "learning_rate": 4.9856184350117696e-05, "loss": 1.9461, "step": 12690 }, { "epoch": 0.56, "learning_rate": 4.9840544946579226e-05, "loss": 1.9164, "step": 12700 }, { "epoch": 0.56, "eval_loss": 1.8904156684875488, "eval_runtime": 11.9521, "eval_samples_per_second": 342.701, "eval_steps_per_second": 21.419, "step": 12700 }, { "epoch": 0.56, "learning_rate": 4.98248959532952e-05, "loss": 1.8621, "step": 12710 }, { "epoch": 0.56, "learning_rate": 4.980923737782941e-05, "loss": 1.8796, "step": 12720 }, { "epoch": 0.56, "learning_rate": 4.979356922775029e-05, "loss": 1.9101, "step": 12730 }, { "epoch": 0.56, "learning_rate": 4.9777891510630904e-05, "loss": 1.9218, "step": 12740 }, { "epoch": 0.56, "learning_rate": 4.976220423404894e-05, "loss": 1.904, "step": 12750 }, { "epoch": 0.56, "learning_rate": 4.9746507405586664e-05, "loss": 1.947, "step": 12760 }, { "epoch": 0.56, "learning_rate": 4.973080103283103e-05, "loss": 1.9095, "step": 12770 }, { "epoch": 0.56, "learning_rate": 4.971508512337355e-05, "loss": 1.8795, "step": 12780 }, { "epoch": 0.56, "learning_rate": 4.969935968481037e-05, "loss": 1.9002, "step": 12790 }, { "epoch": 0.56, "learning_rate": 4.9683624724742246e-05, "loss": 1.9281, "step": 12800 }, { "epoch": 0.56, "eval_loss": 1.8897275924682617, "eval_runtime": 11.5813, "eval_samples_per_second": 353.674, "eval_steps_per_second": 22.105, "step": 12800 }, { "epoch": 0.56, "learning_rate": 4.96678802507745e-05, "loss": 1.875, "step": 12810 }, { "epoch": 0.56, "learning_rate": 4.965212627051712e-05, "loss": 1.8754, "step": 12820 }, { "epoch": 0.56, "learning_rate": 4.9636362791584606e-05, "loss": 1.8657, "step": 12830 }, { "epoch": 0.56, "learning_rate": 4.9620589821596115e-05, "loss": 1.922, "step": 12840 }, { "epoch": 0.56, "learning_rate": 4.960480736817537e-05, "loss": 1.9201, "step": 12850 }, { "epoch": 0.56, "learning_rate": 4.958901543895066e-05, "loss": 1.8708, "step": 12860 }, { "epoch": 0.56, "learning_rate": 4.957321404155488e-05, "loss": 1.9147, "step": 12870 }, { "epoch": 0.56, "learning_rate": 4.955740318362548e-05, "loss": 1.9418, "step": 12880 }, { "epoch": 0.56, "learning_rate": 4.954158287280452e-05, "loss": 1.9568, "step": 12890 }, { "epoch": 0.56, "learning_rate": 4.9525753116738566e-05, "loss": 1.9146, "step": 12900 }, { "epoch": 0.56, "eval_loss": 1.89072847366333, "eval_runtime": 11.6591, "eval_samples_per_second": 351.315, "eval_steps_per_second": 21.957, "step": 12900 }, { "epoch": 0.57, "learning_rate": 4.950991392307881e-05, "loss": 1.9184, "step": 12910 }, { "epoch": 0.57, "learning_rate": 4.949406529948097e-05, "loss": 1.9761, "step": 12920 }, { "epoch": 0.57, "learning_rate": 4.947820725360534e-05, "loss": 1.9419, "step": 12930 }, { "epoch": 0.57, "learning_rate": 4.946233979311676e-05, "loss": 1.893, "step": 12940 }, { "epoch": 0.57, "learning_rate": 4.9446462925684616e-05, "loss": 1.9044, "step": 12950 }, { "epoch": 0.57, "learning_rate": 4.943057665898285e-05, "loss": 1.9326, "step": 12960 }, { "epoch": 0.57, "learning_rate": 4.941468100068994e-05, "loss": 1.9218, "step": 12970 }, { "epoch": 0.57, "learning_rate": 4.9398775958488914e-05, "loss": 1.9269, "step": 12980 }, { "epoch": 0.57, "learning_rate": 4.9382861540067324e-05, "loss": 1.9444, "step": 12990 }, { "epoch": 0.57, "learning_rate": 4.9366937753117266e-05, "loss": 1.8917, "step": 13000 }, { "epoch": 0.57, "eval_loss": 1.888537883758545, "eval_runtime": 11.8673, "eval_samples_per_second": 345.151, "eval_steps_per_second": 21.572, "step": 13000 }, { "epoch": 0.57, "learning_rate": 4.9351004605335335e-05, "loss": 1.8794, "step": 13010 }, { "epoch": 0.57, "learning_rate": 4.9335062104422693e-05, "loss": 1.977, "step": 13020 }, { "epoch": 0.57, "learning_rate": 4.9319110258085e-05, "loss": 1.918, "step": 13030 }, { "epoch": 0.57, "learning_rate": 4.930314907403243e-05, "loss": 1.9226, "step": 13040 }, { "epoch": 0.57, "learning_rate": 4.928717855997966e-05, "loss": 1.8847, "step": 13050 }, { "epoch": 0.57, "learning_rate": 4.927119872364591e-05, "loss": 1.9315, "step": 13060 }, { "epoch": 0.57, "learning_rate": 4.925520957275489e-05, "loss": 1.9221, "step": 13070 }, { "epoch": 0.57, "learning_rate": 4.9239211115034804e-05, "loss": 1.9149, "step": 13080 }, { "epoch": 0.57, "learning_rate": 4.922320335821835e-05, "loss": 1.8801, "step": 13090 }, { "epoch": 0.57, "learning_rate": 4.920718631004275e-05, "loss": 1.8859, "step": 13100 }, { "epoch": 0.57, "eval_loss": 1.8876938819885254, "eval_runtime": 11.8093, "eval_samples_per_second": 346.846, "eval_steps_per_second": 21.678, "step": 13100 }, { "epoch": 0.57, "learning_rate": 4.9191159978249686e-05, "loss": 1.9131, "step": 13110 }, { "epoch": 0.57, "learning_rate": 4.917512437058534e-05, "loss": 1.9271, "step": 13120 }, { "epoch": 0.57, "learning_rate": 4.915907949480037e-05, "loss": 1.9257, "step": 13130 }, { "epoch": 0.58, "learning_rate": 4.914302535864993e-05, "loss": 1.9172, "step": 13140 }, { "epoch": 0.58, "learning_rate": 4.9126961969893625e-05, "loss": 1.9085, "step": 13150 }, { "epoch": 0.58, "learning_rate": 4.911088933629557e-05, "loss": 1.916, "step": 13160 }, { "epoch": 0.58, "learning_rate": 4.90948074656243e-05, "loss": 1.9169, "step": 13170 }, { "epoch": 0.58, "learning_rate": 4.907871636565285e-05, "loss": 1.9196, "step": 13180 }, { "epoch": 0.58, "learning_rate": 4.906261604415872e-05, "loss": 1.919, "step": 13190 }, { "epoch": 0.58, "learning_rate": 4.904650650892384e-05, "loss": 1.9009, "step": 13200 }, { "epoch": 0.58, "eval_loss": 1.8867980241775513, "eval_runtime": 11.7952, "eval_samples_per_second": 347.26, "eval_steps_per_second": 21.704, "step": 13200 }, { "epoch": 0.58, "learning_rate": 4.90303877677346e-05, "loss": 1.8787, "step": 13210 }, { "epoch": 0.58, "learning_rate": 4.901425982838185e-05, "loss": 1.9349, "step": 13220 }, { "epoch": 0.58, "learning_rate": 4.8998122698660884e-05, "loss": 1.955, "step": 13230 }, { "epoch": 0.58, "learning_rate": 4.8981976386371444e-05, "loss": 1.9376, "step": 13240 }, { "epoch": 0.58, "learning_rate": 4.896582089931768e-05, "loss": 1.9519, "step": 13250 }, { "epoch": 0.58, "learning_rate": 4.894965624530821e-05, "loss": 1.9243, "step": 13260 }, { "epoch": 0.58, "learning_rate": 4.8933482432156094e-05, "loss": 1.9578, "step": 13270 }, { "epoch": 0.58, "learning_rate": 4.891729946767876e-05, "loss": 1.9111, "step": 13280 }, { "epoch": 0.58, "learning_rate": 4.8901107359698115e-05, "loss": 1.9282, "step": 13290 }, { "epoch": 0.58, "learning_rate": 4.8884906116040464e-05, "loss": 1.8958, "step": 13300 }, { "epoch": 0.58, "eval_loss": 1.8861165046691895, "eval_runtime": 11.9112, "eval_samples_per_second": 343.879, "eval_steps_per_second": 21.492, "step": 13300 }, { "epoch": 0.58, "learning_rate": 4.886869574453653e-05, "loss": 1.8374, "step": 13310 }, { "epoch": 0.58, "learning_rate": 4.8852476253021435e-05, "loss": 1.9007, "step": 13320 }, { "epoch": 0.58, "learning_rate": 4.883624764933474e-05, "loss": 1.9312, "step": 13330 }, { "epoch": 0.58, "learning_rate": 4.882000994132039e-05, "loss": 1.9262, "step": 13340 }, { "epoch": 0.58, "learning_rate": 4.8803763136826715e-05, "loss": 1.888, "step": 13350 }, { "epoch": 0.58, "learning_rate": 4.878750724370647e-05, "loss": 1.9391, "step": 13360 }, { "epoch": 0.59, "learning_rate": 4.87712422698168e-05, "loss": 1.8841, "step": 13370 }, { "epoch": 0.59, "learning_rate": 4.875496822301922e-05, "loss": 1.8599, "step": 13380 }, { "epoch": 0.59, "learning_rate": 4.873868511117964e-05, "loss": 1.8711, "step": 13390 }, { "epoch": 0.59, "learning_rate": 4.8722392942168365e-05, "loss": 1.9013, "step": 13400 }, { "epoch": 0.59, "eval_loss": 1.8849880695343018, "eval_runtime": 11.6646, "eval_samples_per_second": 351.149, "eval_steps_per_second": 21.947, "step": 13400 }, { "epoch": 0.59, "learning_rate": 4.870609172386006e-05, "loss": 1.8912, "step": 13410 }, { "epoch": 0.59, "learning_rate": 4.868978146413376e-05, "loss": 1.8703, "step": 13420 }, { "epoch": 0.59, "learning_rate": 4.867346217087289e-05, "loss": 1.92, "step": 13430 }, { "epoch": 0.59, "learning_rate": 4.865713385196522e-05, "loss": 1.8996, "step": 13440 }, { "epoch": 0.59, "learning_rate": 4.8640796515302915e-05, "loss": 1.9156, "step": 13450 }, { "epoch": 0.59, "learning_rate": 4.862445016878245e-05, "loss": 1.927, "step": 13460 }, { "epoch": 0.59, "learning_rate": 4.8608094820304704e-05, "loss": 1.9362, "step": 13470 }, { "epoch": 0.59, "learning_rate": 4.859173047777488e-05, "loss": 1.8748, "step": 13480 }, { "epoch": 0.59, "learning_rate": 4.857535714910252e-05, "loss": 1.9397, "step": 13490 }, { "epoch": 0.59, "learning_rate": 4.855897484220153e-05, "loss": 1.8975, "step": 13500 }, { "epoch": 0.59, "eval_loss": 1.8842600584030151, "eval_runtime": 11.9977, "eval_samples_per_second": 341.399, "eval_steps_per_second": 21.337, "step": 13500 }, { "epoch": 0.59, "learning_rate": 4.854258356499016e-05, "loss": 1.8826, "step": 13510 }, { "epoch": 0.59, "learning_rate": 4.8526183325390956e-05, "loss": 1.8971, "step": 13520 }, { "epoch": 0.59, "learning_rate": 4.850977413133085e-05, "loss": 1.9155, "step": 13530 }, { "epoch": 0.59, "learning_rate": 4.849335599074106e-05, "loss": 1.9324, "step": 13540 }, { "epoch": 0.59, "learning_rate": 4.847692891155716e-05, "loss": 1.8948, "step": 13550 }, { "epoch": 0.59, "learning_rate": 4.8460492901719006e-05, "loss": 1.9081, "step": 13560 }, { "epoch": 0.59, "learning_rate": 4.84440479691708e-05, "loss": 1.9164, "step": 13570 }, { "epoch": 0.59, "learning_rate": 4.842759412186107e-05, "loss": 1.9046, "step": 13580 }, { "epoch": 0.59, "learning_rate": 4.8411131367742606e-05, "loss": 1.8918, "step": 13590 }, { "epoch": 0.6, "learning_rate": 4.839465971477255e-05, "loss": 1.9471, "step": 13600 }, { "epoch": 0.6, "eval_loss": 1.8836712837219238, "eval_runtime": 11.9691, "eval_samples_per_second": 342.213, "eval_steps_per_second": 21.388, "step": 13600 }, { "epoch": 0.6, "learning_rate": 4.8378179170912295e-05, "loss": 1.9032, "step": 13610 }, { "epoch": 0.6, "learning_rate": 4.83616897441276e-05, "loss": 1.9068, "step": 13620 }, { "epoch": 0.6, "learning_rate": 4.8345191442388444e-05, "loss": 1.9466, "step": 13630 }, { "epoch": 0.6, "learning_rate": 4.832868427366915e-05, "loss": 1.9245, "step": 13640 }, { "epoch": 0.6, "learning_rate": 4.83121682459483e-05, "loss": 1.9372, "step": 13650 }, { "epoch": 0.6, "learning_rate": 4.829564336720877e-05, "loss": 1.9026, "step": 13660 }, { "epoch": 0.6, "learning_rate": 4.827910964543769e-05, "loss": 1.8805, "step": 13670 }, { "epoch": 0.6, "learning_rate": 4.82625670886265e-05, "loss": 1.9085, "step": 13680 }, { "epoch": 0.6, "learning_rate": 4.82460157047709e-05, "loss": 1.8671, "step": 13690 }, { "epoch": 0.6, "learning_rate": 4.822945550187083e-05, "loss": 1.9411, "step": 13700 }, { "epoch": 0.6, "eval_loss": 1.8830509185791016, "eval_runtime": 11.7716, "eval_samples_per_second": 347.957, "eval_steps_per_second": 21.747, "step": 13700 }, { "epoch": 0.6, "learning_rate": 4.8212886487930526e-05, "loss": 1.9245, "step": 13710 }, { "epoch": 0.6, "learning_rate": 4.819630867095845e-05, "loss": 1.9302, "step": 13720 }, { "epoch": 0.6, "learning_rate": 4.817972205896738e-05, "loss": 1.8732, "step": 13730 }, { "epoch": 0.6, "learning_rate": 4.816312665997426e-05, "loss": 1.888, "step": 13740 }, { "epoch": 0.6, "learning_rate": 4.814652248200035e-05, "loss": 1.8778, "step": 13750 }, { "epoch": 0.6, "learning_rate": 4.8129909533071105e-05, "loss": 1.8716, "step": 13760 }, { "epoch": 0.6, "learning_rate": 4.811328782121626e-05, "loss": 1.8734, "step": 13770 }, { "epoch": 0.6, "learning_rate": 4.809665735446975e-05, "loss": 1.9905, "step": 13780 }, { "epoch": 0.6, "learning_rate": 4.8080018140869775e-05, "loss": 1.9574, "step": 13790 }, { "epoch": 0.6, "learning_rate": 4.806337018845875e-05, "loss": 1.8946, "step": 13800 }, { "epoch": 0.6, "eval_loss": 1.8828606605529785, "eval_runtime": 11.6574, "eval_samples_per_second": 351.364, "eval_steps_per_second": 21.96, "step": 13800 }, { "epoch": 0.6, "learning_rate": 4.804671350528329e-05, "loss": 1.911, "step": 13810 }, { "epoch": 0.6, "learning_rate": 4.8030048099394265e-05, "loss": 1.9134, "step": 13820 }, { "epoch": 0.61, "learning_rate": 4.801337397884675e-05, "loss": 1.9293, "step": 13830 }, { "epoch": 0.61, "learning_rate": 4.799669115170001e-05, "loss": 1.9327, "step": 13840 }, { "epoch": 0.61, "learning_rate": 4.797999962601755e-05, "loss": 1.8865, "step": 13850 }, { "epoch": 0.61, "learning_rate": 4.796329940986706e-05, "loss": 1.9236, "step": 13860 }, { "epoch": 0.61, "learning_rate": 4.794659051132044e-05, "loss": 1.8992, "step": 13870 }, { "epoch": 0.61, "learning_rate": 4.7929872938453796e-05, "loss": 1.913, "step": 13880 }, { "epoch": 0.61, "learning_rate": 4.791314669934739e-05, "loss": 1.8726, "step": 13890 }, { "epoch": 0.61, "learning_rate": 4.78964118020857e-05, "loss": 1.8791, "step": 13900 }, { "epoch": 0.61, "eval_loss": 1.8819687366485596, "eval_runtime": 11.7317, "eval_samples_per_second": 349.139, "eval_steps_per_second": 21.821, "step": 13900 }, { "epoch": 0.61, "learning_rate": 4.7879668254757404e-05, "loss": 1.8488, "step": 13910 }, { "epoch": 0.61, "learning_rate": 4.786291606545533e-05, "loss": 1.9093, "step": 13920 }, { "epoch": 0.61, "learning_rate": 4.784615524227648e-05, "loss": 1.9286, "step": 13930 }, { "epoch": 0.61, "learning_rate": 4.782938579332207e-05, "loss": 1.9215, "step": 13940 }, { "epoch": 0.61, "learning_rate": 4.7812607726697446e-05, "loss": 1.9186, "step": 13950 }, { "epoch": 0.61, "learning_rate": 4.779582105051214e-05, "loss": 1.8571, "step": 13960 }, { "epoch": 0.61, "learning_rate": 4.777902577287983e-05, "loss": 1.9116, "step": 13970 }, { "epoch": 0.61, "learning_rate": 4.7762221901918364e-05, "loss": 1.8792, "step": 13980 }, { "epoch": 0.61, "learning_rate": 4.7745409445749737e-05, "loss": 1.8778, "step": 13990 }, { "epoch": 0.61, "learning_rate": 4.7728588412500095e-05, "loss": 1.9064, "step": 14000 }, { "epoch": 0.61, "eval_loss": 1.88134765625, "eval_runtime": 12.1415, "eval_samples_per_second": 337.357, "eval_steps_per_second": 21.085, "step": 14000 }, { "epoch": 0.61, "learning_rate": 4.771175881029973e-05, "loss": 1.9157, "step": 14010 }, { "epoch": 0.61, "learning_rate": 4.769492064728309e-05, "loss": 1.8532, "step": 14020 }, { "epoch": 0.61, "learning_rate": 4.7678073931588716e-05, "loss": 1.909, "step": 14030 }, { "epoch": 0.61, "learning_rate": 4.766121867135935e-05, "loss": 1.9097, "step": 14040 }, { "epoch": 0.62, "learning_rate": 4.7644354874741795e-05, "loss": 1.9228, "step": 14050 }, { "epoch": 0.62, "learning_rate": 4.762748254988704e-05, "loss": 1.8766, "step": 14060 }, { "epoch": 0.62, "learning_rate": 4.7610601704950154e-05, "loss": 1.8731, "step": 14070 }, { "epoch": 0.62, "learning_rate": 4.7593712348090335e-05, "loss": 1.8987, "step": 14080 }, { "epoch": 0.62, "learning_rate": 4.75768144874709e-05, "loss": 1.8735, "step": 14090 }, { "epoch": 0.62, "learning_rate": 4.755990813125929e-05, "loss": 1.9096, "step": 14100 }, { "epoch": 0.62, "eval_loss": 1.8808305263519287, "eval_runtime": 11.6253, "eval_samples_per_second": 352.334, "eval_steps_per_second": 22.021, "step": 14100 }, { "epoch": 0.62, "learning_rate": 4.754299328762703e-05, "loss": 1.8673, "step": 14110 }, { "epoch": 0.62, "learning_rate": 4.7526069964749745e-05, "loss": 1.9113, "step": 14120 }, { "epoch": 0.62, "learning_rate": 4.750913817080718e-05, "loss": 1.906, "step": 14130 }, { "epoch": 0.62, "learning_rate": 4.749219791398315e-05, "loss": 1.8916, "step": 14140 }, { "epoch": 0.62, "learning_rate": 4.747524920246558e-05, "loss": 1.9327, "step": 14150 }, { "epoch": 0.62, "learning_rate": 4.745829204444648e-05, "loss": 1.9021, "step": 14160 }, { "epoch": 0.62, "learning_rate": 4.744132644812192e-05, "loss": 1.859, "step": 14170 }, { "epoch": 0.62, "learning_rate": 4.742435242169208e-05, "loss": 1.8952, "step": 14180 }, { "epoch": 0.62, "learning_rate": 4.74073699733612e-05, "loss": 1.9064, "step": 14190 }, { "epoch": 0.62, "learning_rate": 4.73903791113376e-05, "loss": 1.8784, "step": 14200 }, { "epoch": 0.62, "eval_loss": 1.878967523574829, "eval_runtime": 11.7525, "eval_samples_per_second": 348.52, "eval_steps_per_second": 21.783, "step": 14200 }, { "epoch": 0.62, "learning_rate": 4.737337984383363e-05, "loss": 1.8813, "step": 14210 }, { "epoch": 0.62, "learning_rate": 4.735637217906574e-05, "loss": 1.8586, "step": 14220 }, { "epoch": 0.62, "learning_rate": 4.733935612525444e-05, "loss": 1.9097, "step": 14230 }, { "epoch": 0.62, "learning_rate": 4.732233169062428e-05, "loss": 1.9324, "step": 14240 }, { "epoch": 0.62, "learning_rate": 4.730529888340386e-05, "loss": 1.9022, "step": 14250 }, { "epoch": 0.62, "learning_rate": 4.7288257711825836e-05, "loss": 1.8895, "step": 14260 }, { "epoch": 0.62, "learning_rate": 4.7271208184126895e-05, "loss": 1.9084, "step": 14270 }, { "epoch": 0.63, "learning_rate": 4.725415030854777e-05, "loss": 1.8941, "step": 14280 }, { "epoch": 0.63, "learning_rate": 4.7237084093333244e-05, "loss": 1.8961, "step": 14290 }, { "epoch": 0.63, "learning_rate": 4.72200095467321e-05, "loss": 1.881, "step": 14300 }, { "epoch": 0.63, "eval_loss": 1.8786323070526123, "eval_runtime": 11.735, "eval_samples_per_second": 349.042, "eval_steps_per_second": 21.815, "step": 14300 }, { "epoch": 0.63, "learning_rate": 4.720292667699717e-05, "loss": 1.9209, "step": 14310 }, { "epoch": 0.63, "learning_rate": 4.7185835492385294e-05, "loss": 1.8944, "step": 14320 }, { "epoch": 0.63, "learning_rate": 4.716873600115736e-05, "loss": 1.9029, "step": 14330 }, { "epoch": 0.63, "learning_rate": 4.7151628211578226e-05, "loss": 1.9425, "step": 14340 }, { "epoch": 0.63, "learning_rate": 4.71345121319168e-05, "loss": 1.9491, "step": 14350 }, { "epoch": 0.63, "learning_rate": 4.711738777044598e-05, "loss": 1.9051, "step": 14360 }, { "epoch": 0.63, "learning_rate": 4.710025513544266e-05, "loss": 1.9132, "step": 14370 }, { "epoch": 0.63, "learning_rate": 4.708311423518776e-05, "loss": 1.9294, "step": 14380 }, { "epoch": 0.63, "learning_rate": 4.706596507796616e-05, "loss": 1.8892, "step": 14390 }, { "epoch": 0.63, "learning_rate": 4.7048807672066754e-05, "loss": 1.9103, "step": 14400 }, { "epoch": 0.63, "eval_loss": 1.8775782585144043, "eval_runtime": 11.7984, "eval_samples_per_second": 347.167, "eval_steps_per_second": 21.698, "step": 14400 }, { "epoch": 0.63, "learning_rate": 4.7031642025782416e-05, "loss": 1.8831, "step": 14410 }, { "epoch": 0.63, "learning_rate": 4.701446814741001e-05, "loss": 1.922, "step": 14420 }, { "epoch": 0.63, "learning_rate": 4.699728604525037e-05, "loss": 1.9376, "step": 14430 }, { "epoch": 0.63, "learning_rate": 4.69800957276083e-05, "loss": 1.9187, "step": 14440 }, { "epoch": 0.63, "learning_rate": 4.696289720279259e-05, "loss": 1.8812, "step": 14450 }, { "epoch": 0.63, "learning_rate": 4.694569047911599e-05, "loss": 1.9076, "step": 14460 }, { "epoch": 0.63, "learning_rate": 4.69284755648952e-05, "loss": 1.9382, "step": 14470 }, { "epoch": 0.63, "learning_rate": 4.69112524684509e-05, "loss": 1.8788, "step": 14480 }, { "epoch": 0.63, "learning_rate": 4.689402119810773e-05, "loss": 1.9089, "step": 14490 }, { "epoch": 0.63, "learning_rate": 4.687678176219424e-05, "loss": 1.913, "step": 14500 }, { "epoch": 0.63, "eval_loss": 1.8763041496276855, "eval_runtime": 12.2143, "eval_samples_per_second": 335.344, "eval_steps_per_second": 20.959, "step": 14500 }, { "epoch": 0.64, "learning_rate": 4.6859534169042976e-05, "loss": 1.878, "step": 14510 }, { "epoch": 0.64, "learning_rate": 4.6842278426990397e-05, "loss": 1.9019, "step": 14520 }, { "epoch": 0.64, "learning_rate": 4.68250145443769e-05, "loss": 1.8764, "step": 14530 }, { "epoch": 0.64, "learning_rate": 4.6807742529546815e-05, "loss": 1.9188, "step": 14540 }, { "epoch": 0.64, "learning_rate": 4.679046239084845e-05, "loss": 1.9146, "step": 14550 }, { "epoch": 0.64, "learning_rate": 4.677317413663397e-05, "loss": 1.8906, "step": 14560 }, { "epoch": 0.64, "learning_rate": 4.675587777525949e-05, "loss": 1.875, "step": 14570 }, { "epoch": 0.64, "learning_rate": 4.6738573315085075e-05, "loss": 1.9221, "step": 14580 }, { "epoch": 0.64, "learning_rate": 4.672126076447466e-05, "loss": 1.928, "step": 14590 }, { "epoch": 0.64, "learning_rate": 4.670394013179611e-05, "loss": 1.9176, "step": 14600 }, { "epoch": 0.64, "eval_loss": 1.876225471496582, "eval_runtime": 11.7678, "eval_samples_per_second": 348.067, "eval_steps_per_second": 21.754, "step": 14600 }, { "epoch": 0.64, "learning_rate": 4.66866114254212e-05, "loss": 1.8598, "step": 14610 }, { "epoch": 0.64, "learning_rate": 4.666927465372559e-05, "loss": 1.9218, "step": 14620 }, { "epoch": 0.64, "learning_rate": 4.665192982508884e-05, "loss": 1.8575, "step": 14630 }, { "epoch": 0.64, "learning_rate": 4.6634576947894456e-05, "loss": 1.9287, "step": 14640 }, { "epoch": 0.64, "learning_rate": 4.6617216030529746e-05, "loss": 1.9112, "step": 14650 }, { "epoch": 0.64, "learning_rate": 4.659984708138597e-05, "loss": 1.8923, "step": 14660 }, { "epoch": 0.64, "learning_rate": 4.658247010885826e-05, "loss": 1.8565, "step": 14670 }, { "epoch": 0.64, "learning_rate": 4.6565085121345606e-05, "loss": 1.8667, "step": 14680 }, { "epoch": 0.64, "learning_rate": 4.654769212725088e-05, "loss": 1.8877, "step": 14690 }, { "epoch": 0.64, "learning_rate": 4.6530291134980825e-05, "loss": 1.9241, "step": 14700 }, { "epoch": 0.64, "eval_loss": 1.8761088848114014, "eval_runtime": 12.0138, "eval_samples_per_second": 340.942, "eval_steps_per_second": 21.309, "step": 14700 }, { "epoch": 0.64, "learning_rate": 4.651288215294606e-05, "loss": 1.8847, "step": 14710 }, { "epoch": 0.64, "learning_rate": 4.649546518956105e-05, "loss": 1.8731, "step": 14720 }, { "epoch": 0.64, "learning_rate": 4.647804025324413e-05, "loss": 1.8855, "step": 14730 }, { "epoch": 0.65, "learning_rate": 4.6460607352417476e-05, "loss": 1.89, "step": 14740 }, { "epoch": 0.65, "learning_rate": 4.644316649550712e-05, "loss": 1.8096, "step": 14750 }, { "epoch": 0.65, "learning_rate": 4.642571769094296e-05, "loss": 1.9138, "step": 14760 }, { "epoch": 0.65, "learning_rate": 4.6408260947158684e-05, "loss": 1.9277, "step": 14770 }, { "epoch": 0.65, "learning_rate": 4.6390796272591884e-05, "loss": 1.8945, "step": 14780 }, { "epoch": 0.65, "learning_rate": 4.637332367568392e-05, "loss": 1.8884, "step": 14790 }, { "epoch": 0.65, "learning_rate": 4.635584316488003e-05, "loss": 1.9179, "step": 14800 }, { "epoch": 0.65, "eval_loss": 1.8754827976226807, "eval_runtime": 12.2675, "eval_samples_per_second": 333.89, "eval_steps_per_second": 20.868, "step": 14800 }, { "epoch": 0.65, "learning_rate": 4.6338354748629244e-05, "loss": 1.9361, "step": 14810 }, { "epoch": 0.65, "learning_rate": 4.6320858435384446e-05, "loss": 1.9061, "step": 14820 }, { "epoch": 0.65, "learning_rate": 4.630335423360232e-05, "loss": 1.9, "step": 14830 }, { "epoch": 0.65, "learning_rate": 4.628584215174333e-05, "loss": 1.8648, "step": 14840 }, { "epoch": 0.65, "learning_rate": 4.6268322198271804e-05, "loss": 1.8879, "step": 14850 }, { "epoch": 0.65, "learning_rate": 4.625079438165585e-05, "loss": 1.8491, "step": 14860 }, { "epoch": 0.65, "learning_rate": 4.6233258710367375e-05, "loss": 1.867, "step": 14870 }, { "epoch": 0.65, "learning_rate": 4.621571519288209e-05, "loss": 1.8921, "step": 14880 }, { "epoch": 0.65, "learning_rate": 4.619816383767949e-05, "loss": 1.9075, "step": 14890 }, { "epoch": 0.65, "learning_rate": 4.6180604653242855e-05, "loss": 1.9128, "step": 14900 }, { "epoch": 0.65, "eval_loss": 1.874982476234436, "eval_runtime": 11.9034, "eval_samples_per_second": 344.103, "eval_steps_per_second": 21.506, "step": 14900 }, { "epoch": 0.65, "learning_rate": 4.6163037648059256e-05, "loss": 1.9357, "step": 14910 }, { "epoch": 0.65, "learning_rate": 4.614546283061955e-05, "loss": 1.8716, "step": 14920 }, { "epoch": 0.65, "learning_rate": 4.612788020941837e-05, "loss": 1.886, "step": 14930 }, { "epoch": 0.65, "learning_rate": 4.611028979295411e-05, "loss": 1.9695, "step": 14940 }, { "epoch": 0.65, "learning_rate": 4.6092691589728924e-05, "loss": 1.8555, "step": 14950 }, { "epoch": 0.65, "learning_rate": 4.607508560824876e-05, "loss": 1.9099, "step": 14960 }, { "epoch": 0.66, "learning_rate": 4.6057471857023306e-05, "loss": 1.8963, "step": 14970 }, { "epoch": 0.66, "learning_rate": 4.603985034456599e-05, "loss": 1.8679, "step": 14980 }, { "epoch": 0.66, "learning_rate": 4.602222107939403e-05, "loss": 1.9101, "step": 14990 }, { "epoch": 0.66, "learning_rate": 4.6004584070028354e-05, "loss": 1.9419, "step": 15000 }, { "epoch": 0.66, "eval_loss": 1.874194622039795, "eval_runtime": 11.8292, "eval_samples_per_second": 346.263, "eval_steps_per_second": 21.641, "step": 15000 }, { "epoch": 0.66, "learning_rate": 4.598693932499366e-05, "loss": 1.9005, "step": 15010 }, { "epoch": 0.66, "learning_rate": 4.596928685281836e-05, "loss": 1.8898, "step": 15020 }, { "epoch": 0.66, "learning_rate": 4.5951626662034636e-05, "loss": 1.8748, "step": 15030 }, { "epoch": 0.66, "learning_rate": 4.5933958761178355e-05, "loss": 1.9403, "step": 15040 }, { "epoch": 0.66, "learning_rate": 4.5916283158789146e-05, "loss": 1.8801, "step": 15050 }, { "epoch": 0.66, "learning_rate": 4.5898599863410355e-05, "loss": 1.8711, "step": 15060 }, { "epoch": 0.66, "learning_rate": 4.5880908883589044e-05, "loss": 1.8567, "step": 15070 }, { "epoch": 0.66, "learning_rate": 4.5863210227875965e-05, "loss": 1.9125, "step": 15080 }, { "epoch": 0.66, "learning_rate": 4.584550390482562e-05, "loss": 1.9104, "step": 15090 }, { "epoch": 0.66, "learning_rate": 4.582778992299618e-05, "loss": 1.8765, "step": 15100 }, { "epoch": 0.66, "eval_loss": 1.8737382888793945, "eval_runtime": 11.8232, "eval_samples_per_second": 346.437, "eval_steps_per_second": 21.652, "step": 15100 }, { "epoch": 0.66, "learning_rate": 4.5810068290949566e-05, "loss": 1.9292, "step": 15110 }, { "epoch": 0.66, "learning_rate": 4.5792339017251336e-05, "loss": 1.9138, "step": 15120 }, { "epoch": 0.66, "learning_rate": 4.577460211047078e-05, "loss": 1.8812, "step": 15130 }, { "epoch": 0.66, "learning_rate": 4.5756857579180887e-05, "loss": 1.875, "step": 15140 }, { "epoch": 0.66, "learning_rate": 4.573910543195829e-05, "loss": 1.8958, "step": 15150 }, { "epoch": 0.66, "learning_rate": 4.572134567738334e-05, "loss": 1.9032, "step": 15160 }, { "epoch": 0.66, "learning_rate": 4.5703578324040044e-05, "loss": 1.9176, "step": 15170 }, { "epoch": 0.66, "learning_rate": 4.56858033805161e-05, "loss": 1.8699, "step": 15180 }, { "epoch": 0.66, "learning_rate": 4.5668020855402844e-05, "loss": 1.8832, "step": 15190 }, { "epoch": 0.67, "learning_rate": 4.565023075729532e-05, "loss": 1.9259, "step": 15200 }, { "epoch": 0.67, "eval_loss": 1.872511625289917, "eval_runtime": 11.6329, "eval_samples_per_second": 352.105, "eval_steps_per_second": 22.007, "step": 15200 }, { "epoch": 0.67, "learning_rate": 4.56324330947922e-05, "loss": 1.8409, "step": 15210 }, { "epoch": 0.67, "learning_rate": 4.5614627876495825e-05, "loss": 1.9122, "step": 15220 }, { "epoch": 0.67, "learning_rate": 4.559681511101217e-05, "loss": 1.903, "step": 15230 }, { "epoch": 0.67, "learning_rate": 4.5578994806950876e-05, "loss": 1.8904, "step": 15240 }, { "epoch": 0.67, "learning_rate": 4.556116697292524e-05, "loss": 1.9553, "step": 15250 }, { "epoch": 0.67, "learning_rate": 4.554333161755216e-05, "loss": 1.8666, "step": 15260 }, { "epoch": 0.67, "learning_rate": 4.552548874945221e-05, "loss": 1.8376, "step": 15270 }, { "epoch": 0.67, "learning_rate": 4.550763837724957e-05, "loss": 1.8787, "step": 15280 }, { "epoch": 0.67, "learning_rate": 4.5489780509572044e-05, "loss": 1.9157, "step": 15290 }, { "epoch": 0.67, "learning_rate": 4.5471915155051084e-05, "loss": 1.9041, "step": 15300 }, { "epoch": 0.67, "eval_loss": 1.8725992441177368, "eval_runtime": 11.7485, "eval_samples_per_second": 348.639, "eval_steps_per_second": 21.79, "step": 15300 }, { "epoch": 0.67, "learning_rate": 4.545404232232174e-05, "loss": 1.9036, "step": 15310 }, { "epoch": 0.67, "learning_rate": 4.5436162020022686e-05, "loss": 1.8459, "step": 15320 }, { "epoch": 0.67, "learning_rate": 4.541827425679618e-05, "loss": 1.8782, "step": 15330 }, { "epoch": 0.67, "learning_rate": 4.540037904128814e-05, "loss": 1.921, "step": 15340 }, { "epoch": 0.67, "learning_rate": 4.538247638214804e-05, "loss": 1.913, "step": 15350 }, { "epoch": 0.67, "learning_rate": 4.536456628802895e-05, "loss": 1.8882, "step": 15360 }, { "epoch": 0.67, "learning_rate": 4.5346648767587574e-05, "loss": 1.9248, "step": 15370 }, { "epoch": 0.67, "learning_rate": 4.532872382948418e-05, "loss": 1.9775, "step": 15380 }, { "epoch": 0.67, "learning_rate": 4.5310791482382616e-05, "loss": 1.9272, "step": 15390 }, { "epoch": 0.67, "learning_rate": 4.52928517349503e-05, "loss": 1.8919, "step": 15400 }, { "epoch": 0.67, "eval_loss": 1.8721492290496826, "eval_runtime": 11.7624, "eval_samples_per_second": 348.228, "eval_steps_per_second": 21.764, "step": 15400 }, { "epoch": 0.67, "learning_rate": 4.527490459585828e-05, "loss": 1.902, "step": 15410 }, { "epoch": 0.68, "learning_rate": 4.525695007378112e-05, "loss": 1.8867, "step": 15420 }, { "epoch": 0.68, "learning_rate": 4.523898817739697e-05, "loss": 1.8574, "step": 15430 }, { "epoch": 0.68, "learning_rate": 4.522101891538755e-05, "loss": 1.9315, "step": 15440 }, { "epoch": 0.68, "learning_rate": 4.5203042296438156e-05, "loss": 1.9293, "step": 15450 }, { "epoch": 0.68, "learning_rate": 4.518505832923761e-05, "loss": 1.9151, "step": 15460 }, { "epoch": 0.68, "learning_rate": 4.516706702247828e-05, "loss": 1.9109, "step": 15470 }, { "epoch": 0.68, "learning_rate": 4.514906838485613e-05, "loss": 1.8913, "step": 15480 }, { "epoch": 0.68, "learning_rate": 4.513106242507061e-05, "loss": 1.9178, "step": 15490 }, { "epoch": 0.68, "learning_rate": 4.511304915182477e-05, "loss": 1.9077, "step": 15500 }, { "epoch": 0.68, "eval_loss": 1.870882272720337, "eval_runtime": 11.8959, "eval_samples_per_second": 344.32, "eval_steps_per_second": 21.52, "step": 15500 }, { "epoch": 0.68, "learning_rate": 4.509502857382512e-05, "loss": 1.8716, "step": 15510 }, { "epoch": 0.68, "learning_rate": 4.507700069978176e-05, "loss": 1.9321, "step": 15520 }, { "epoch": 0.68, "learning_rate": 4.50589655384083e-05, "loss": 1.8632, "step": 15530 }, { "epoch": 0.68, "learning_rate": 4.504092309842187e-05, "loss": 1.8933, "step": 15540 }, { "epoch": 0.68, "learning_rate": 4.502287338854311e-05, "loss": 1.8846, "step": 15550 }, { "epoch": 0.68, "learning_rate": 4.5004816417496194e-05, "loss": 1.9258, "step": 15560 }, { "epoch": 0.68, "learning_rate": 4.4986752194008786e-05, "loss": 1.9276, "step": 15570 }, { "epoch": 0.68, "learning_rate": 4.496868072681206e-05, "loss": 1.9237, "step": 15580 }, { "epoch": 0.68, "learning_rate": 4.495060202464069e-05, "loss": 1.9118, "step": 15590 }, { "epoch": 0.68, "learning_rate": 4.4932516096232864e-05, "loss": 1.8694, "step": 15600 }, { "epoch": 0.68, "eval_loss": 1.8696751594543457, "eval_runtime": 11.8083, "eval_samples_per_second": 346.876, "eval_steps_per_second": 21.68, "step": 15600 }, { "epoch": 0.68, "learning_rate": 4.4914422950330247e-05, "loss": 1.8977, "step": 15610 }, { "epoch": 0.68, "learning_rate": 4.489632259567799e-05, "loss": 1.8569, "step": 15620 }, { "epoch": 0.68, "learning_rate": 4.487821504102474e-05, "loss": 1.8957, "step": 15630 }, { "epoch": 0.68, "learning_rate": 4.486010029512261e-05, "loss": 1.9011, "step": 15640 }, { "epoch": 0.69, "learning_rate": 4.48419783667272e-05, "loss": 1.8822, "step": 15650 }, { "epoch": 0.69, "learning_rate": 4.482384926459758e-05, "loss": 1.8912, "step": 15660 }, { "epoch": 0.69, "learning_rate": 4.480571299749628e-05, "loss": 1.9014, "step": 15670 }, { "epoch": 0.69, "learning_rate": 4.47875695741893e-05, "loss": 1.9297, "step": 15680 }, { "epoch": 0.69, "learning_rate": 4.476941900344611e-05, "loss": 1.9134, "step": 15690 }, { "epoch": 0.69, "learning_rate": 4.47512612940396e-05, "loss": 1.9072, "step": 15700 }, { "epoch": 0.69, "eval_loss": 1.869808554649353, "eval_runtime": 11.9339, "eval_samples_per_second": 343.223, "eval_steps_per_second": 21.451, "step": 15700 }, { "epoch": 0.69, "learning_rate": 4.473309645474614e-05, "loss": 1.8311, "step": 15710 }, { "epoch": 0.69, "learning_rate": 4.471492449434555e-05, "loss": 1.9179, "step": 15720 }, { "epoch": 0.69, "learning_rate": 4.4696745421621076e-05, "loss": 1.8282, "step": 15730 }, { "epoch": 0.69, "learning_rate": 4.46785592453594e-05, "loss": 1.8748, "step": 15740 }, { "epoch": 0.69, "learning_rate": 4.466036597435064e-05, "loss": 1.8851, "step": 15750 }, { "epoch": 0.69, "learning_rate": 4.464216561738836e-05, "loss": 1.9093, "step": 15760 }, { "epoch": 0.69, "learning_rate": 4.462395818326953e-05, "loss": 1.874, "step": 15770 }, { "epoch": 0.69, "learning_rate": 4.460574368079454e-05, "loss": 1.9358, "step": 15780 }, { "epoch": 0.69, "learning_rate": 4.458752211876721e-05, "loss": 1.8965, "step": 15790 }, { "epoch": 0.69, "learning_rate": 4.456929350599476e-05, "loss": 1.9137, "step": 15800 }, { "epoch": 0.69, "eval_loss": 1.8691773414611816, "eval_runtime": 11.9836, "eval_samples_per_second": 341.799, "eval_steps_per_second": 21.362, "step": 15800 }, { "epoch": 0.69, "learning_rate": 4.4551057851287834e-05, "loss": 1.8851, "step": 15810 }, { "epoch": 0.69, "learning_rate": 4.4532815163460455e-05, "loss": 1.8815, "step": 15820 }, { "epoch": 0.69, "learning_rate": 4.451456545133007e-05, "loss": 1.8997, "step": 15830 }, { "epoch": 0.69, "learning_rate": 4.44963087237175e-05, "loss": 1.8841, "step": 15840 }, { "epoch": 0.69, "learning_rate": 4.4478044989446965e-05, "loss": 1.9042, "step": 15850 }, { "epoch": 0.69, "learning_rate": 4.445977425734609e-05, "loss": 1.8973, "step": 15860 }, { "epoch": 0.69, "learning_rate": 4.444149653624585e-05, "loss": 1.8587, "step": 15870 }, { "epoch": 0.7, "learning_rate": 4.4423211834980626e-05, "loss": 1.8835, "step": 15880 }, { "epoch": 0.7, "learning_rate": 4.440492016238815e-05, "loss": 1.9072, "step": 15890 }, { "epoch": 0.7, "learning_rate": 4.438662152730954e-05, "loss": 1.9272, "step": 15900 }, { "epoch": 0.7, "eval_loss": 1.8685188293457031, "eval_runtime": 11.8174, "eval_samples_per_second": 346.607, "eval_steps_per_second": 21.663, "step": 15900 }, { "epoch": 0.7, "learning_rate": 4.436831593858928e-05, "loss": 1.9128, "step": 15910 }, { "epoch": 0.7, "learning_rate": 4.435000340507519e-05, "loss": 1.8712, "step": 15920 }, { "epoch": 0.7, "learning_rate": 4.433168393561849e-05, "loss": 1.915, "step": 15930 }, { "epoch": 0.7, "learning_rate": 4.43133575390737e-05, "loss": 1.9015, "step": 15940 }, { "epoch": 0.7, "learning_rate": 4.429502422429874e-05, "loss": 1.8909, "step": 15950 }, { "epoch": 0.7, "learning_rate": 4.427668400015483e-05, "loss": 1.8679, "step": 15960 }, { "epoch": 0.7, "learning_rate": 4.4258336875506564e-05, "loss": 1.858, "step": 15970 }, { "epoch": 0.7, "learning_rate": 4.423998285922185e-05, "loss": 1.899, "step": 15980 }, { "epoch": 0.7, "learning_rate": 4.422162196017194e-05, "loss": 1.8772, "step": 15990 }, { "epoch": 0.7, "learning_rate": 4.42032541872314e-05, "loss": 1.9077, "step": 16000 }, { "epoch": 0.7, "eval_loss": 1.8670729398727417, "eval_runtime": 11.9299, "eval_samples_per_second": 343.34, "eval_steps_per_second": 21.459, "step": 16000 }, { "epoch": 0.7, "learning_rate": 4.418487954927812e-05, "loss": 1.9019, "step": 16010 }, { "epoch": 0.7, "learning_rate": 4.416649805519333e-05, "loss": 1.8496, "step": 16020 }, { "epoch": 0.7, "learning_rate": 4.4148109713861536e-05, "loss": 1.8713, "step": 16030 }, { "epoch": 0.7, "learning_rate": 4.412971453417059e-05, "loss": 1.9179, "step": 16040 }, { "epoch": 0.7, "learning_rate": 4.411131252501161e-05, "loss": 1.9228, "step": 16050 }, { "epoch": 0.7, "learning_rate": 4.4092903695279074e-05, "loss": 1.9031, "step": 16060 }, { "epoch": 0.7, "learning_rate": 4.40744880538707e-05, "loss": 1.8563, "step": 16070 }, { "epoch": 0.7, "learning_rate": 4.405606560968752e-05, "loss": 1.8679, "step": 16080 }, { "epoch": 0.7, "learning_rate": 4.403763637163385e-05, "loss": 1.8741, "step": 16090 }, { "epoch": 0.7, "learning_rate": 4.401920034861731e-05, "loss": 1.8663, "step": 16100 }, { "epoch": 0.7, "eval_loss": 1.8673057556152344, "eval_runtime": 12.4069, "eval_samples_per_second": 330.14, "eval_steps_per_second": 20.634, "step": 16100 }, { "epoch": 0.71, "learning_rate": 4.400075754954877e-05, "loss": 1.9126, "step": 16110 }, { "epoch": 0.71, "learning_rate": 4.398230798334238e-05, "loss": 1.8892, "step": 16120 }, { "epoch": 0.71, "learning_rate": 4.396385165891559e-05, "loss": 1.9086, "step": 16130 }, { "epoch": 0.71, "learning_rate": 4.394538858518907e-05, "loss": 1.8721, "step": 16140 }, { "epoch": 0.71, "learning_rate": 4.392691877108681e-05, "loss": 1.9023, "step": 16150 }, { "epoch": 0.71, "learning_rate": 4.390844222553599e-05, "loss": 1.905, "step": 16160 }, { "epoch": 0.71, "learning_rate": 4.3889958957467104e-05, "loss": 1.8876, "step": 16170 }, { "epoch": 0.71, "learning_rate": 4.387146897581386e-05, "loss": 1.8468, "step": 16180 }, { "epoch": 0.71, "learning_rate": 4.3852972289513224e-05, "loss": 1.89, "step": 16190 }, { "epoch": 0.71, "learning_rate": 4.3834468907505395e-05, "loss": 1.8819, "step": 16200 }, { "epoch": 0.71, "eval_loss": 1.8665714263916016, "eval_runtime": 11.567, "eval_samples_per_second": 354.112, "eval_steps_per_second": 22.132, "step": 16200 }, { "epoch": 0.71, "learning_rate": 4.3815958838733814e-05, "loss": 1.8478, "step": 16210 }, { "epoch": 0.71, "learning_rate": 4.379744209214517e-05, "loss": 1.8759, "step": 16220 }, { "epoch": 0.71, "learning_rate": 4.3778918676689334e-05, "loss": 1.8713, "step": 16230 }, { "epoch": 0.71, "learning_rate": 4.376038860131945e-05, "loss": 1.9362, "step": 16240 }, { "epoch": 0.71, "learning_rate": 4.374185187499186e-05, "loss": 1.904, "step": 16250 }, { "epoch": 0.71, "learning_rate": 4.372330850666611e-05, "loss": 1.8876, "step": 16260 }, { "epoch": 0.71, "learning_rate": 4.3704758505304966e-05, "loss": 1.9085, "step": 16270 }, { "epoch": 0.71, "learning_rate": 4.368620187987442e-05, "loss": 1.907, "step": 16280 }, { "epoch": 0.71, "learning_rate": 4.3667638639343625e-05, "loss": 1.8927, "step": 16290 }, { "epoch": 0.71, "learning_rate": 4.364906879268495e-05, "loss": 1.8798, "step": 16300 }, { "epoch": 0.71, "eval_loss": 1.8657153844833374, "eval_runtime": 11.7422, "eval_samples_per_second": 348.828, "eval_steps_per_second": 21.802, "step": 16300 }, { "epoch": 0.71, "learning_rate": 4.363049234887399e-05, "loss": 1.9308, "step": 16310 }, { "epoch": 0.71, "learning_rate": 4.361190931688947e-05, "loss": 1.8721, "step": 16320 }, { "epoch": 0.71, "learning_rate": 4.359331970571335e-05, "loss": 1.9018, "step": 16330 }, { "epoch": 0.72, "learning_rate": 4.3574723524330726e-05, "loss": 1.8691, "step": 16340 }, { "epoch": 0.72, "learning_rate": 4.355612078172991e-05, "loss": 1.9025, "step": 16350 }, { "epoch": 0.72, "learning_rate": 4.353751148690236e-05, "loss": 1.8992, "step": 16360 }, { "epoch": 0.72, "learning_rate": 4.35188956488427e-05, "loss": 1.8235, "step": 16370 }, { "epoch": 0.72, "learning_rate": 4.350027327654874e-05, "loss": 1.92, "step": 16380 }, { "epoch": 0.72, "learning_rate": 4.348164437902143e-05, "loss": 1.8775, "step": 16390 }, { "epoch": 0.72, "learning_rate": 4.3463008965264864e-05, "loss": 1.9177, "step": 16400 }, { "epoch": 0.72, "eval_loss": 1.8651164770126343, "eval_runtime": 11.6918, "eval_samples_per_second": 350.33, "eval_steps_per_second": 21.896, "step": 16400 }, { "epoch": 0.72, "learning_rate": 4.3444367044286315e-05, "loss": 1.9275, "step": 16410 }, { "epoch": 0.72, "learning_rate": 4.3425718625096176e-05, "loss": 1.8558, "step": 16420 }, { "epoch": 0.72, "learning_rate": 4.340706371670799e-05, "loss": 1.8686, "step": 16430 }, { "epoch": 0.72, "learning_rate": 4.3388402328138434e-05, "loss": 1.8789, "step": 16440 }, { "epoch": 0.72, "learning_rate": 4.336973446840733e-05, "loss": 1.8479, "step": 16450 }, { "epoch": 0.72, "learning_rate": 4.335106014653759e-05, "loss": 1.904, "step": 16460 }, { "epoch": 0.72, "learning_rate": 4.333237937155531e-05, "loss": 1.9069, "step": 16470 }, { "epoch": 0.72, "learning_rate": 4.331369215248965e-05, "loss": 1.9015, "step": 16480 }, { "epoch": 0.72, "learning_rate": 4.329499849837293e-05, "loss": 1.8867, "step": 16490 }, { "epoch": 0.72, "learning_rate": 4.3276298418240514e-05, "loss": 1.8883, "step": 16500 }, { "epoch": 0.72, "eval_loss": 1.864149808883667, "eval_runtime": 11.9658, "eval_samples_per_second": 342.309, "eval_steps_per_second": 21.394, "step": 16500 }, { "epoch": 0.72, "learning_rate": 4.325759192113095e-05, "loss": 1.8479, "step": 16510 }, { "epoch": 0.72, "learning_rate": 4.323887901608584e-05, "loss": 1.8701, "step": 16520 }, { "epoch": 0.72, "learning_rate": 4.3220159712149894e-05, "loss": 1.8689, "step": 16530 }, { "epoch": 0.72, "learning_rate": 4.320143401837092e-05, "loss": 1.8919, "step": 16540 }, { "epoch": 0.72, "learning_rate": 4.3182701943799806e-05, "loss": 1.8699, "step": 16550 }, { "epoch": 0.72, "learning_rate": 4.316396349749054e-05, "loss": 1.8623, "step": 16560 }, { "epoch": 0.73, "learning_rate": 4.314521868850016e-05, "loss": 1.9231, "step": 16570 }, { "epoch": 0.73, "learning_rate": 4.312646752588881e-05, "loss": 1.8877, "step": 16580 }, { "epoch": 0.73, "learning_rate": 4.310771001871969e-05, "loss": 1.861, "step": 16590 }, { "epoch": 0.73, "learning_rate": 4.308894617605907e-05, "loss": 1.8961, "step": 16600 }, { "epoch": 0.73, "eval_loss": 1.8638092279434204, "eval_runtime": 11.7356, "eval_samples_per_second": 349.022, "eval_steps_per_second": 21.814, "step": 16600 }, { "epoch": 0.73, "learning_rate": 4.307017600697627e-05, "loss": 1.9087, "step": 16610 }, { "epoch": 0.73, "learning_rate": 4.30513995205437e-05, "loss": 1.9017, "step": 16620 }, { "epoch": 0.73, "learning_rate": 4.30326167258368e-05, "loss": 1.8747, "step": 16630 }, { "epoch": 0.73, "learning_rate": 4.301382763193404e-05, "loss": 1.8697, "step": 16640 }, { "epoch": 0.73, "learning_rate": 4.2995032247916974e-05, "loss": 1.8932, "step": 16650 }, { "epoch": 0.73, "learning_rate": 4.297623058287017e-05, "loss": 1.8577, "step": 16660 }, { "epoch": 0.73, "learning_rate": 4.295742264588125e-05, "loss": 1.8503, "step": 16670 }, { "epoch": 0.73, "learning_rate": 4.2938608446040846e-05, "loss": 1.8975, "step": 16680 }, { "epoch": 0.73, "learning_rate": 4.2919787992442646e-05, "loss": 1.9002, "step": 16690 }, { "epoch": 0.73, "learning_rate": 4.2900961294183326e-05, "loss": 1.9155, "step": 16700 }, { "epoch": 0.73, "eval_loss": 1.8630321025848389, "eval_runtime": 11.6984, "eval_samples_per_second": 350.132, "eval_steps_per_second": 21.883, "step": 16700 }, { "epoch": 0.73, "learning_rate": 4.2882128360362616e-05, "loss": 1.9088, "step": 16710 }, { "epoch": 0.73, "learning_rate": 4.2863289200083226e-05, "loss": 1.9019, "step": 16720 }, { "epoch": 0.73, "learning_rate": 4.2844443822450896e-05, "loss": 1.9395, "step": 16730 }, { "epoch": 0.73, "learning_rate": 4.282559223657437e-05, "loss": 1.8582, "step": 16740 }, { "epoch": 0.73, "learning_rate": 4.2806734451565385e-05, "loss": 1.9054, "step": 16750 }, { "epoch": 0.73, "learning_rate": 4.2787870476538685e-05, "loss": 1.9036, "step": 16760 }, { "epoch": 0.73, "learning_rate": 4.276900032061198e-05, "loss": 1.8571, "step": 16770 }, { "epoch": 0.73, "learning_rate": 4.275012399290602e-05, "loss": 1.898, "step": 16780 }, { "epoch": 0.73, "learning_rate": 4.273124150254447e-05, "loss": 1.905, "step": 16790 }, { "epoch": 0.74, "learning_rate": 4.271235285865404e-05, "loss": 1.8843, "step": 16800 }, { "epoch": 0.74, "eval_loss": 1.8624677658081055, "eval_runtime": 11.5664, "eval_samples_per_second": 354.13, "eval_steps_per_second": 22.133, "step": 16800 }, { "epoch": 0.74, "learning_rate": 4.269345807036436e-05, "loss": 1.8558, "step": 16810 }, { "epoch": 0.74, "learning_rate": 4.267455714680807e-05, "loss": 1.9309, "step": 16820 }, { "epoch": 0.74, "learning_rate": 4.2655650097120746e-05, "loss": 1.885, "step": 16830 }, { "epoch": 0.74, "learning_rate": 4.2636736930440935e-05, "loss": 1.8901, "step": 16840 }, { "epoch": 0.74, "learning_rate": 4.261781765591016e-05, "loss": 1.9033, "step": 16850 }, { "epoch": 0.74, "learning_rate": 4.259889228267285e-05, "loss": 1.8814, "step": 16860 }, { "epoch": 0.74, "learning_rate": 4.257996081987644e-05, "loss": 1.8316, "step": 16870 }, { "epoch": 0.74, "learning_rate": 4.256102327667127e-05, "loss": 1.8846, "step": 16880 }, { "epoch": 0.74, "learning_rate": 4.254207966221062e-05, "loss": 1.8539, "step": 16890 }, { "epoch": 0.74, "learning_rate": 4.2523129985650715e-05, "loss": 1.871, "step": 16900 }, { "epoch": 0.74, "eval_loss": 1.8623075485229492, "eval_runtime": 11.6133, "eval_samples_per_second": 352.699, "eval_steps_per_second": 22.044, "step": 16900 }, { "epoch": 0.74, "learning_rate": 4.250417425615071e-05, "loss": 1.8487, "step": 16910 }, { "epoch": 0.74, "learning_rate": 4.248521248287269e-05, "loss": 1.9164, "step": 16920 }, { "epoch": 0.74, "learning_rate": 4.2466244674981633e-05, "loss": 1.8555, "step": 16930 }, { "epoch": 0.74, "learning_rate": 4.2447270841645486e-05, "loss": 1.8263, "step": 16940 }, { "epoch": 0.74, "learning_rate": 4.2428290992035055e-05, "loss": 1.8682, "step": 16950 }, { "epoch": 0.74, "learning_rate": 4.2409305135324085e-05, "loss": 1.9171, "step": 16960 }, { "epoch": 0.74, "learning_rate": 4.2390313280689204e-05, "loss": 1.9025, "step": 16970 }, { "epoch": 0.74, "learning_rate": 4.237131543730997e-05, "loss": 1.885, "step": 16980 }, { "epoch": 0.74, "learning_rate": 4.23523116143688e-05, "loss": 1.9243, "step": 16990 }, { "epoch": 0.74, "learning_rate": 4.2333301821051024e-05, "loss": 1.8804, "step": 17000 }, { "epoch": 0.74, "eval_loss": 1.8615334033966064, "eval_runtime": 11.6378, "eval_samples_per_second": 351.957, "eval_steps_per_second": 21.997, "step": 17000 }, { "epoch": 0.74, "learning_rate": 4.231428606654486e-05, "loss": 1.8375, "step": 17010 }, { "epoch": 0.75, "learning_rate": 4.229526436004138e-05, "loss": 1.863, "step": 17020 }, { "epoch": 0.75, "learning_rate": 4.2276236710734564e-05, "loss": 1.8763, "step": 17030 }, { "epoch": 0.75, "learning_rate": 4.2257203127821243e-05, "loss": 1.9255, "step": 17040 }, { "epoch": 0.75, "learning_rate": 4.2238163620501145e-05, "loss": 1.8753, "step": 17050 }, { "epoch": 0.75, "learning_rate": 4.2219118197976814e-05, "loss": 1.8752, "step": 17060 }, { "epoch": 0.75, "learning_rate": 4.22000668694537e-05, "loss": 1.8979, "step": 17070 }, { "epoch": 0.75, "learning_rate": 4.218100964414009e-05, "loss": 1.8533, "step": 17080 }, { "epoch": 0.75, "learning_rate": 4.2161946531247104e-05, "loss": 1.8929, "step": 17090 }, { "epoch": 0.75, "learning_rate": 4.214287753998873e-05, "loss": 1.8709, "step": 17100 }, { "epoch": 0.75, "eval_loss": 1.8611003160476685, "eval_runtime": 11.6511, "eval_samples_per_second": 351.556, "eval_steps_per_second": 21.972, "step": 17100 }, { "epoch": 0.75, "learning_rate": 4.212380267958179e-05, "loss": 1.876, "step": 17110 }, { "epoch": 0.75, "learning_rate": 4.210472195924595e-05, "loss": 1.8903, "step": 17120 }, { "epoch": 0.75, "learning_rate": 4.208563538820368e-05, "loss": 1.8769, "step": 17130 }, { "epoch": 0.75, "learning_rate": 4.206654297568033e-05, "loss": 1.8677, "step": 17140 }, { "epoch": 0.75, "learning_rate": 4.204744473090401e-05, "loss": 1.8979, "step": 17150 }, { "epoch": 0.75, "learning_rate": 4.2028340663105714e-05, "loss": 1.904, "step": 17160 }, { "epoch": 0.75, "learning_rate": 4.200923078151919e-05, "loss": 1.9036, "step": 17170 }, { "epoch": 0.75, "learning_rate": 4.199011509538104e-05, "loss": 1.8592, "step": 17180 }, { "epoch": 0.75, "learning_rate": 4.1970993613930655e-05, "loss": 1.8772, "step": 17190 }, { "epoch": 0.75, "learning_rate": 4.1951866346410225e-05, "loss": 1.8779, "step": 17200 }, { "epoch": 0.75, "eval_loss": 1.8605446815490723, "eval_runtime": 11.5509, "eval_samples_per_second": 354.603, "eval_steps_per_second": 22.163, "step": 17200 }, { "epoch": 0.75, "learning_rate": 4.1932733302064745e-05, "loss": 1.8491, "step": 17210 }, { "epoch": 0.75, "learning_rate": 4.191359449014197e-05, "loss": 1.8851, "step": 17220 }, { "epoch": 0.75, "learning_rate": 4.189444991989251e-05, "loss": 1.8727, "step": 17230 }, { "epoch": 0.75, "learning_rate": 4.187529960056969e-05, "loss": 1.8844, "step": 17240 }, { "epoch": 0.76, "learning_rate": 4.185614354142965e-05, "loss": 1.9146, "step": 17250 }, { "epoch": 0.76, "learning_rate": 4.1836981751731286e-05, "loss": 1.8736, "step": 17260 }, { "epoch": 0.76, "learning_rate": 4.1817814240736294e-05, "loss": 1.852, "step": 17270 }, { "epoch": 0.76, "learning_rate": 4.179864101770911e-05, "loss": 1.9035, "step": 17280 }, { "epoch": 0.76, "learning_rate": 4.177946209191691e-05, "loss": 1.8563, "step": 17290 }, { "epoch": 0.76, "learning_rate": 4.176027747262968e-05, "loss": 1.9006, "step": 17300 }, { "epoch": 0.76, "eval_loss": 1.8597712516784668, "eval_runtime": 11.6159, "eval_samples_per_second": 352.621, "eval_steps_per_second": 22.039, "step": 17300 }, { "epoch": 0.76, "learning_rate": 4.1741087169120106e-05, "loss": 1.9001, "step": 17310 }, { "epoch": 0.76, "learning_rate": 4.1721891190663674e-05, "loss": 1.8468, "step": 17320 }, { "epoch": 0.76, "learning_rate": 4.170268954653856e-05, "loss": 1.8753, "step": 17330 }, { "epoch": 0.76, "learning_rate": 4.1683482246025726e-05, "loss": 1.8843, "step": 17340 }, { "epoch": 0.76, "learning_rate": 4.166426929840883e-05, "loss": 1.8496, "step": 17350 }, { "epoch": 0.76, "learning_rate": 4.1645050712974264e-05, "loss": 1.859, "step": 17360 }, { "epoch": 0.76, "learning_rate": 4.162582649901118e-05, "loss": 1.8695, "step": 17370 }, { "epoch": 0.76, "learning_rate": 4.16065966658114e-05, "loss": 1.9075, "step": 17380 }, { "epoch": 0.76, "learning_rate": 4.1587361222669506e-05, "loss": 1.8326, "step": 17390 }, { "epoch": 0.76, "learning_rate": 4.156812017888276e-05, "loss": 1.8529, "step": 17400 }, { "epoch": 0.76, "eval_loss": 1.8598828315734863, "eval_runtime": 11.5771, "eval_samples_per_second": 353.803, "eval_steps_per_second": 22.113, "step": 17400 }, { "epoch": 0.76, "learning_rate": 4.154887354375116e-05, "loss": 1.8753, "step": 17410 }, { "epoch": 0.76, "learning_rate": 4.1529621326577375e-05, "loss": 1.8727, "step": 17420 }, { "epoch": 0.76, "learning_rate": 4.1510363536666794e-05, "loss": 1.8874, "step": 17430 }, { "epoch": 0.76, "learning_rate": 4.14911001833275e-05, "loss": 1.8964, "step": 17440 }, { "epoch": 0.76, "learning_rate": 4.147183127587026e-05, "loss": 1.9168, "step": 17450 }, { "epoch": 0.76, "learning_rate": 4.14525568236085e-05, "loss": 1.8852, "step": 17460 }, { "epoch": 0.76, "learning_rate": 4.143327683585837e-05, "loss": 1.8632, "step": 17470 }, { "epoch": 0.77, "learning_rate": 4.141399132193867e-05, "loss": 1.8671, "step": 17480 }, { "epoch": 0.77, "learning_rate": 4.1394700291170874e-05, "loss": 1.8999, "step": 17490 }, { "epoch": 0.77, "learning_rate": 4.1375403752879135e-05, "loss": 1.9191, "step": 17500 }, { "epoch": 0.77, "eval_loss": 1.85947585105896, "eval_runtime": 11.8448, "eval_samples_per_second": 345.805, "eval_steps_per_second": 21.613, "step": 17500 }, { "epoch": 0.77, "learning_rate": 4.135610171639025e-05, "loss": 1.8976, "step": 17510 }, { "epoch": 0.77, "learning_rate": 4.133679419103368e-05, "loss": 1.8638, "step": 17520 }, { "epoch": 0.77, "learning_rate": 4.1317481186141555e-05, "loss": 1.8592, "step": 17530 }, { "epoch": 0.77, "learning_rate": 4.129816271104861e-05, "loss": 1.9085, "step": 17540 }, { "epoch": 0.77, "learning_rate": 4.1278838775092277e-05, "loss": 1.9276, "step": 17550 }, { "epoch": 0.77, "learning_rate": 4.125950938761259e-05, "loss": 1.914, "step": 17560 }, { "epoch": 0.77, "learning_rate": 4.1240174557952245e-05, "loss": 1.8823, "step": 17570 }, { "epoch": 0.77, "learning_rate": 4.122083429545655e-05, "loss": 1.8713, "step": 17580 }, { "epoch": 0.77, "learning_rate": 4.120148860947343e-05, "loss": 1.9081, "step": 17590 }, { "epoch": 0.77, "learning_rate": 4.118213750935346e-05, "loss": 1.8539, "step": 17600 }, { "epoch": 0.77, "eval_loss": 1.8588768243789673, "eval_runtime": 11.5896, "eval_samples_per_second": 353.419, "eval_steps_per_second": 22.089, "step": 17600 }, { "epoch": 0.77, "learning_rate": 4.1162781004449816e-05, "loss": 1.875, "step": 17610 }, { "epoch": 0.77, "learning_rate": 4.114341910411829e-05, "loss": 1.8625, "step": 17620 }, { "epoch": 0.77, "learning_rate": 4.112405181771726e-05, "loss": 1.9175, "step": 17630 }, { "epoch": 0.77, "learning_rate": 4.110467915460775e-05, "loss": 1.9035, "step": 17640 }, { "epoch": 0.77, "learning_rate": 4.108530112415334e-05, "loss": 1.8611, "step": 17650 }, { "epoch": 0.77, "learning_rate": 4.106591773572023e-05, "loss": 1.8985, "step": 17660 }, { "epoch": 0.77, "learning_rate": 4.104652899867721e-05, "loss": 1.9136, "step": 17670 }, { "epoch": 0.77, "learning_rate": 4.1027134922395656e-05, "loss": 1.9122, "step": 17680 }, { "epoch": 0.77, "learning_rate": 4.1007735516249484e-05, "loss": 1.8883, "step": 17690 }, { "epoch": 0.77, "learning_rate": 4.098833078961526e-05, "loss": 1.8788, "step": 17700 }, { "epoch": 0.77, "eval_loss": 1.8577911853790283, "eval_runtime": 11.7168, "eval_samples_per_second": 349.584, "eval_steps_per_second": 21.849, "step": 17700 }, { "epoch": 0.78, "learning_rate": 4.0968920751872036e-05, "loss": 1.8542, "step": 17710 }, { "epoch": 0.78, "learning_rate": 4.0949505412401516e-05, "loss": 1.9247, "step": 17720 }, { "epoch": 0.78, "learning_rate": 4.0930084780587914e-05, "loss": 1.8587, "step": 17730 }, { "epoch": 0.78, "learning_rate": 4.0910658865817996e-05, "loss": 1.8966, "step": 17740 }, { "epoch": 0.78, "learning_rate": 4.089122767748113e-05, "loss": 1.8524, "step": 17750 }, { "epoch": 0.78, "learning_rate": 4.087179122496918e-05, "loss": 1.836, "step": 17760 }, { "epoch": 0.78, "learning_rate": 4.085234951767658e-05, "loss": 1.8473, "step": 17770 }, { "epoch": 0.78, "learning_rate": 4.083290256500031e-05, "loss": 1.8628, "step": 17780 }, { "epoch": 0.78, "learning_rate": 4.081345037633988e-05, "loss": 1.861, "step": 17790 }, { "epoch": 0.78, "learning_rate": 4.079399296109731e-05, "loss": 1.9114, "step": 17800 }, { "epoch": 0.78, "eval_loss": 1.8577276468276978, "eval_runtime": 11.5497, "eval_samples_per_second": 354.642, "eval_steps_per_second": 22.165, "step": 17800 }, { "epoch": 0.78, "learning_rate": 4.077453032867717e-05, "loss": 1.8936, "step": 17810 }, { "epoch": 0.78, "learning_rate": 4.075506248848656e-05, "loss": 1.8647, "step": 17820 }, { "epoch": 0.78, "learning_rate": 4.073558944993506e-05, "loss": 1.9102, "step": 17830 }, { "epoch": 0.78, "learning_rate": 4.07161112224348e-05, "loss": 1.9068, "step": 17840 }, { "epoch": 0.78, "learning_rate": 4.0696627815400386e-05, "loss": 1.8272, "step": 17850 }, { "epoch": 0.78, "learning_rate": 4.0677139238248966e-05, "loss": 1.8715, "step": 17860 }, { "epoch": 0.78, "learning_rate": 4.0657645500400155e-05, "loss": 1.9281, "step": 17870 }, { "epoch": 0.78, "learning_rate": 4.063814661127607e-05, "loss": 1.9073, "step": 17880 }, { "epoch": 0.78, "learning_rate": 4.061864258030132e-05, "loss": 1.8984, "step": 17890 }, { "epoch": 0.78, "learning_rate": 4.0599133416903e-05, "loss": 1.8766, "step": 17900 }, { "epoch": 0.78, "eval_loss": 1.8569570779800415, "eval_runtime": 11.5288, "eval_samples_per_second": 355.285, "eval_steps_per_second": 22.205, "step": 17900 }, { "epoch": 0.78, "learning_rate": 4.05796191305107e-05, "loss": 1.8893, "step": 17910 }, { "epoch": 0.78, "learning_rate": 4.056009973055645e-05, "loss": 1.8882, "step": 17920 }, { "epoch": 0.78, "learning_rate": 4.0540575226474785e-05, "loss": 1.8622, "step": 17930 }, { "epoch": 0.79, "learning_rate": 4.052104562770269e-05, "loss": 1.9042, "step": 17940 }, { "epoch": 0.79, "learning_rate": 4.0501510943679616e-05, "loss": 1.8958, "step": 17950 }, { "epoch": 0.79, "learning_rate": 4.0481971183847475e-05, "loss": 1.9045, "step": 17960 }, { "epoch": 0.79, "learning_rate": 4.046242635765064e-05, "loss": 1.906, "step": 17970 }, { "epoch": 0.79, "learning_rate": 4.044287647453592e-05, "loss": 1.8578, "step": 17980 }, { "epoch": 0.79, "learning_rate": 4.042332154395256e-05, "loss": 1.8961, "step": 17990 }, { "epoch": 0.79, "learning_rate": 4.040376157535226e-05, "loss": 1.8905, "step": 18000 }, { "epoch": 0.79, "eval_loss": 1.8565237522125244, "eval_runtime": 11.8383, "eval_samples_per_second": 345.996, "eval_steps_per_second": 21.625, "step": 18000 }, { "epoch": 0.79, "learning_rate": 4.038419657818916e-05, "loss": 1.8752, "step": 18010 }, { "epoch": 0.79, "learning_rate": 4.036462656191983e-05, "loss": 1.847, "step": 18020 }, { "epoch": 0.79, "learning_rate": 4.0345051536003235e-05, "loss": 1.8536, "step": 18030 }, { "epoch": 0.79, "learning_rate": 4.03254715099008e-05, "loss": 1.9066, "step": 18040 }, { "epoch": 0.79, "learning_rate": 4.0305886493076335e-05, "loss": 1.913, "step": 18050 }, { "epoch": 0.79, "learning_rate": 4.028629649499611e-05, "loss": 1.8413, "step": 18060 }, { "epoch": 0.79, "learning_rate": 4.026670152512874e-05, "loss": 1.9036, "step": 18070 }, { "epoch": 0.79, "learning_rate": 4.024710159294529e-05, "loss": 1.8875, "step": 18080 }, { "epoch": 0.79, "learning_rate": 4.02274967079192e-05, "loss": 1.866, "step": 18090 }, { "epoch": 0.79, "learning_rate": 4.020788687952632e-05, "loss": 1.9219, "step": 18100 }, { "epoch": 0.79, "eval_loss": 1.8559688329696655, "eval_runtime": 11.6536, "eval_samples_per_second": 351.479, "eval_steps_per_second": 21.967, "step": 18100 }, { "epoch": 0.79, "learning_rate": 4.018827211724487e-05, "loss": 1.8589, "step": 18110 }, { "epoch": 0.79, "learning_rate": 4.016865243055546e-05, "loss": 1.9037, "step": 18120 }, { "epoch": 0.79, "learning_rate": 4.0149027828941115e-05, "loss": 1.8967, "step": 18130 }, { "epoch": 0.79, "learning_rate": 4.012939832188718e-05, "loss": 1.924, "step": 18140 }, { "epoch": 0.79, "learning_rate": 4.0109763918881405e-05, "loss": 1.8552, "step": 18150 }, { "epoch": 0.79, "learning_rate": 4.00901246294139e-05, "loss": 1.8851, "step": 18160 }, { "epoch": 0.8, "learning_rate": 4.007048046297714e-05, "loss": 1.8228, "step": 18170 }, { "epoch": 0.8, "learning_rate": 4.005083142906594e-05, "loss": 1.8643, "step": 18180 }, { "epoch": 0.8, "learning_rate": 4.003117753717749e-05, "loss": 1.8684, "step": 18190 }, { "epoch": 0.8, "learning_rate": 4.001151879681132e-05, "loss": 1.9042, "step": 18200 }, { "epoch": 0.8, "eval_loss": 1.8557652235031128, "eval_runtime": 11.5663, "eval_samples_per_second": 354.133, "eval_steps_per_second": 22.133, "step": 18200 }, { "epoch": 0.8, "learning_rate": 3.999185521746929e-05, "loss": 1.8764, "step": 18210 }, { "epoch": 0.8, "learning_rate": 3.9972186808655624e-05, "loss": 1.8506, "step": 18220 }, { "epoch": 0.8, "learning_rate": 3.9952513579876855e-05, "loss": 1.8802, "step": 18230 }, { "epoch": 0.8, "learning_rate": 3.993283554064187e-05, "loss": 1.8562, "step": 18240 }, { "epoch": 0.8, "learning_rate": 3.9913152700461876e-05, "loss": 1.9276, "step": 18250 }, { "epoch": 0.8, "learning_rate": 3.9893465068850366e-05, "loss": 1.8899, "step": 18260 }, { "epoch": 0.8, "learning_rate": 3.98737726553232e-05, "loss": 1.8703, "step": 18270 }, { "epoch": 0.8, "learning_rate": 3.9854075469398514e-05, "loss": 1.9109, "step": 18280 }, { "epoch": 0.8, "learning_rate": 3.983437352059677e-05, "loss": 1.8824, "step": 18290 }, { "epoch": 0.8, "learning_rate": 3.981466681844071e-05, "loss": 1.8573, "step": 18300 }, { "epoch": 0.8, "eval_loss": 1.854828119277954, "eval_runtime": 11.5928, "eval_samples_per_second": 353.322, "eval_steps_per_second": 22.083, "step": 18300 }, { "epoch": 0.8, "learning_rate": 3.97949553724554e-05, "loss": 1.8958, "step": 18310 }, { "epoch": 0.8, "learning_rate": 3.977523919216819e-05, "loss": 1.9016, "step": 18320 }, { "epoch": 0.8, "learning_rate": 3.975551828710871e-05, "loss": 1.8862, "step": 18330 }, { "epoch": 0.8, "learning_rate": 3.973579266680888e-05, "loss": 1.8684, "step": 18340 }, { "epoch": 0.8, "learning_rate": 3.971606234080289e-05, "loss": 1.8834, "step": 18350 }, { "epoch": 0.8, "learning_rate": 3.969632731862722e-05, "loss": 1.8579, "step": 18360 }, { "epoch": 0.8, "learning_rate": 3.967658760982061e-05, "loss": 1.905, "step": 18370 }, { "epoch": 0.8, "learning_rate": 3.965684322392407e-05, "loss": 1.8533, "step": 18380 }, { "epoch": 0.81, "learning_rate": 3.963709417048087e-05, "loss": 1.8636, "step": 18390 }, { "epoch": 0.81, "learning_rate": 3.961734045903652e-05, "loss": 1.8764, "step": 18400 }, { "epoch": 0.81, "eval_loss": 1.8546396493911743, "eval_runtime": 11.6105, "eval_samples_per_second": 352.785, "eval_steps_per_second": 22.049, "step": 18400 }, { "epoch": 0.81, "learning_rate": 3.959758209913881e-05, "loss": 1.9181, "step": 18410 }, { "epoch": 0.81, "learning_rate": 3.957781910033776e-05, "loss": 1.8662, "step": 18420 }, { "epoch": 0.81, "learning_rate": 3.955805147218563e-05, "loss": 1.8574, "step": 18430 }, { "epoch": 0.81, "learning_rate": 3.953827922423692e-05, "loss": 1.8516, "step": 18440 }, { "epoch": 0.81, "learning_rate": 3.9518502366048375e-05, "loss": 1.8576, "step": 18450 }, { "epoch": 0.81, "learning_rate": 3.949872090717894e-05, "loss": 1.8712, "step": 18460 }, { "epoch": 0.81, "learning_rate": 3.947893485718982e-05, "loss": 1.8648, "step": 18470 }, { "epoch": 0.81, "learning_rate": 3.945914422564441e-05, "loss": 1.9053, "step": 18480 }, { "epoch": 0.81, "learning_rate": 3.943934902210834e-05, "loss": 1.8801, "step": 18490 }, { "epoch": 0.81, "learning_rate": 3.941954925614943e-05, "loss": 1.8713, "step": 18500 }, { "epoch": 0.81, "eval_loss": 1.854550838470459, "eval_runtime": 11.781, "eval_samples_per_second": 347.68, "eval_steps_per_second": 21.73, "step": 18500 }, { "epoch": 0.81, "learning_rate": 3.939974493733773e-05, "loss": 1.8499, "step": 18510 }, { "epoch": 0.81, "learning_rate": 3.9379936075245464e-05, "loss": 1.8369, "step": 18520 }, { "epoch": 0.81, "learning_rate": 3.9360122679447055e-05, "loss": 1.8367, "step": 18530 }, { "epoch": 0.81, "learning_rate": 3.934030475951915e-05, "loss": 1.8849, "step": 18540 }, { "epoch": 0.81, "learning_rate": 3.932048232504053e-05, "loss": 1.8544, "step": 18550 }, { "epoch": 0.81, "learning_rate": 3.930065538559222e-05, "loss": 1.9585, "step": 18560 }, { "epoch": 0.81, "learning_rate": 3.928082395075736e-05, "loss": 1.8535, "step": 18570 }, { "epoch": 0.81, "learning_rate": 3.926098803012132e-05, "loss": 1.8866, "step": 18580 }, { "epoch": 0.81, "learning_rate": 3.924114763327159e-05, "loss": 1.8637, "step": 18590 }, { "epoch": 0.81, "learning_rate": 3.9221302769797836e-05, "loss": 1.8889, "step": 18600 }, { "epoch": 0.81, "eval_loss": 1.8540050983428955, "eval_runtime": 11.6301, "eval_samples_per_second": 352.189, "eval_steps_per_second": 22.012, "step": 18600 }, { "epoch": 0.81, "learning_rate": 3.920145344929192e-05, "loss": 1.8645, "step": 18610 }, { "epoch": 0.82, "learning_rate": 3.9181599681347816e-05, "loss": 1.8928, "step": 18620 }, { "epoch": 0.82, "learning_rate": 3.9161741475561654e-05, "loss": 1.8368, "step": 18630 }, { "epoch": 0.82, "learning_rate": 3.914187884153171e-05, "loss": 1.8581, "step": 18640 }, { "epoch": 0.82, "learning_rate": 3.912201178885843e-05, "loss": 1.8422, "step": 18650 }, { "epoch": 0.82, "learning_rate": 3.910214032714434e-05, "loss": 1.8837, "step": 18660 }, { "epoch": 0.82, "learning_rate": 3.9082264465994165e-05, "loss": 1.8372, "step": 18670 }, { "epoch": 0.82, "learning_rate": 3.9062384215014696e-05, "loss": 1.8619, "step": 18680 }, { "epoch": 0.82, "learning_rate": 3.904249958381487e-05, "loss": 1.841, "step": 18690 }, { "epoch": 0.82, "learning_rate": 3.902261058200576e-05, "loss": 1.8825, "step": 18700 }, { "epoch": 0.82, "eval_loss": 1.8539540767669678, "eval_runtime": 11.6061, "eval_samples_per_second": 352.918, "eval_steps_per_second": 22.057, "step": 18700 }, { "epoch": 0.82, "learning_rate": 3.900271721920051e-05, "loss": 1.8167, "step": 18710 }, { "epoch": 0.82, "learning_rate": 3.8982819505014414e-05, "loss": 1.8565, "step": 18720 }, { "epoch": 0.82, "learning_rate": 3.896291744906482e-05, "loss": 1.8591, "step": 18730 }, { "epoch": 0.82, "learning_rate": 3.8943011060971254e-05, "loss": 1.8222, "step": 18740 }, { "epoch": 0.82, "learning_rate": 3.8923100350355236e-05, "loss": 1.8635, "step": 18750 }, { "epoch": 0.82, "learning_rate": 3.890318532684046e-05, "loss": 1.8953, "step": 18760 }, { "epoch": 0.82, "learning_rate": 3.888326600005264e-05, "loss": 1.8781, "step": 18770 }, { "epoch": 0.82, "learning_rate": 3.8863342379619634e-05, "loss": 1.8203, "step": 18780 }, { "epoch": 0.82, "learning_rate": 3.884341447517132e-05, "loss": 1.8809, "step": 18790 }, { "epoch": 0.82, "learning_rate": 3.882348229633967e-05, "loss": 1.8686, "step": 18800 }, { "epoch": 0.82, "eval_loss": 1.8534257411956787, "eval_runtime": 11.5432, "eval_samples_per_second": 354.841, "eval_steps_per_second": 22.178, "step": 18800 }, { "epoch": 0.82, "learning_rate": 3.8803545852758726e-05, "loss": 1.8958, "step": 18810 }, { "epoch": 0.82, "learning_rate": 3.8783605154064566e-05, "loss": 1.8794, "step": 18820 }, { "epoch": 0.82, "learning_rate": 3.8763660209895374e-05, "loss": 1.9037, "step": 18830 }, { "epoch": 0.82, "learning_rate": 3.8743711029891335e-05, "loss": 1.9141, "step": 18840 }, { "epoch": 0.83, "learning_rate": 3.872375762369471e-05, "loss": 1.8257, "step": 18850 }, { "epoch": 0.83, "learning_rate": 3.87038000009498e-05, "loss": 1.8494, "step": 18860 }, { "epoch": 0.83, "learning_rate": 3.8683838171302935e-05, "loss": 1.8758, "step": 18870 }, { "epoch": 0.83, "learning_rate": 3.8663872144402466e-05, "loss": 1.8392, "step": 18880 }, { "epoch": 0.83, "learning_rate": 3.864390192989881e-05, "loss": 1.8782, "step": 18890 }, { "epoch": 0.83, "learning_rate": 3.862392753744438e-05, "loss": 1.8514, "step": 18900 }, { "epoch": 0.83, "eval_loss": 1.8528721332550049, "eval_runtime": 11.544, "eval_samples_per_second": 354.817, "eval_steps_per_second": 22.176, "step": 18900 }, { "epoch": 0.83, "learning_rate": 3.860394897669361e-05, "loss": 1.8893, "step": 18910 }, { "epoch": 0.83, "learning_rate": 3.858396625730297e-05, "loss": 1.9059, "step": 18920 }, { "epoch": 0.83, "learning_rate": 3.8563979388930914e-05, "loss": 1.8665, "step": 18930 }, { "epoch": 0.83, "learning_rate": 3.8543988381237904e-05, "loss": 1.8418, "step": 18940 }, { "epoch": 0.83, "learning_rate": 3.852399324388642e-05, "loss": 1.8945, "step": 18950 }, { "epoch": 0.83, "learning_rate": 3.850399398654093e-05, "loss": 1.8753, "step": 18960 }, { "epoch": 0.83, "learning_rate": 3.848399061886789e-05, "loss": 1.8824, "step": 18970 }, { "epoch": 0.83, "learning_rate": 3.8463983150535735e-05, "loss": 1.8406, "step": 18980 }, { "epoch": 0.83, "learning_rate": 3.844397159121491e-05, "loss": 1.8191, "step": 18990 }, { "epoch": 0.83, "learning_rate": 3.8423955950577806e-05, "loss": 1.8585, "step": 19000 }, { "epoch": 0.83, "eval_loss": 1.8524885177612305, "eval_runtime": 11.8631, "eval_samples_per_second": 345.273, "eval_steps_per_second": 21.58, "step": 19000 }, { "epoch": 0.83, "learning_rate": 3.840393623829879e-05, "loss": 1.903, "step": 19010 }, { "epoch": 0.83, "learning_rate": 3.838391246405423e-05, "loss": 1.8799, "step": 19020 }, { "epoch": 0.83, "learning_rate": 3.8363884637522414e-05, "loss": 1.9105, "step": 19030 }, { "epoch": 0.83, "learning_rate": 3.834385276838362e-05, "loss": 1.8475, "step": 19040 }, { "epoch": 0.83, "learning_rate": 3.832381686632006e-05, "loss": 1.9121, "step": 19050 }, { "epoch": 0.83, "learning_rate": 3.830377694101592e-05, "loss": 1.9089, "step": 19060 }, { "epoch": 0.83, "learning_rate": 3.8283733002157296e-05, "loss": 1.8896, "step": 19070 }, { "epoch": 0.84, "learning_rate": 3.8263685059432246e-05, "loss": 1.8935, "step": 19080 }, { "epoch": 0.84, "learning_rate": 3.8243633122530754e-05, "loss": 1.8786, "step": 19090 }, { "epoch": 0.84, "learning_rate": 3.8223577201144766e-05, "loss": 1.8583, "step": 19100 }, { "epoch": 0.84, "eval_loss": 1.8515942096710205, "eval_runtime": 11.739, "eval_samples_per_second": 348.922, "eval_steps_per_second": 21.808, "step": 19100 }, { "epoch": 0.84, "learning_rate": 3.82035173049681e-05, "loss": 1.9034, "step": 19110 }, { "epoch": 0.84, "learning_rate": 3.8183453443696535e-05, "loss": 1.9279, "step": 19120 }, { "epoch": 0.84, "learning_rate": 3.816338562702775e-05, "loss": 1.937, "step": 19130 }, { "epoch": 0.84, "learning_rate": 3.8143313864661336e-05, "loss": 1.8961, "step": 19140 }, { "epoch": 0.84, "learning_rate": 3.812323816629882e-05, "loss": 1.8915, "step": 19150 }, { "epoch": 0.84, "learning_rate": 3.810315854164357e-05, "loss": 1.8638, "step": 19160 }, { "epoch": 0.84, "learning_rate": 3.808307500040091e-05, "loss": 1.8666, "step": 19170 }, { "epoch": 0.84, "learning_rate": 3.8062987552278034e-05, "loss": 1.8889, "step": 19180 }, { "epoch": 0.84, "learning_rate": 3.8042896206984024e-05, "loss": 1.8346, "step": 19190 }, { "epoch": 0.84, "learning_rate": 3.802280097422984e-05, "loss": 1.8317, "step": 19200 }, { "epoch": 0.84, "eval_loss": 1.8512599468231201, "eval_runtime": 11.4851, "eval_samples_per_second": 356.637, "eval_steps_per_second": 22.29, "step": 19200 }, { "epoch": 0.84, "learning_rate": 3.800270186372836e-05, "loss": 1.8443, "step": 19210 }, { "epoch": 0.84, "learning_rate": 3.798259888519426e-05, "loss": 1.8674, "step": 19220 }, { "epoch": 0.84, "learning_rate": 3.796249204834416e-05, "loss": 1.8872, "step": 19230 }, { "epoch": 0.84, "learning_rate": 3.794238136289651e-05, "loss": 1.8847, "step": 19240 }, { "epoch": 0.84, "learning_rate": 3.792226683857162e-05, "loss": 1.9199, "step": 19250 }, { "epoch": 0.84, "learning_rate": 3.790214848509166e-05, "loss": 1.8791, "step": 19260 }, { "epoch": 0.84, "learning_rate": 3.788202631218066e-05, "loss": 1.869, "step": 19270 }, { "epoch": 0.84, "learning_rate": 3.7861900329564485e-05, "loss": 1.8384, "step": 19280 }, { "epoch": 0.84, "learning_rate": 3.784177054697083e-05, "loss": 1.8943, "step": 19290 }, { "epoch": 0.84, "learning_rate": 3.782163697412927e-05, "loss": 1.8706, "step": 19300 }, { "epoch": 0.84, "eval_loss": 1.850408911705017, "eval_runtime": 11.5994, "eval_samples_per_second": 353.121, "eval_steps_per_second": 22.07, "step": 19300 }, { "epoch": 0.85, "learning_rate": 3.780149962077115e-05, "loss": 1.87, "step": 19310 }, { "epoch": 0.85, "learning_rate": 3.7781358496629704e-05, "loss": 1.8783, "step": 19320 }, { "epoch": 0.85, "learning_rate": 3.776121361143995e-05, "loss": 1.9046, "step": 19330 }, { "epoch": 0.85, "learning_rate": 3.774106497493872e-05, "loss": 1.9047, "step": 19340 }, { "epoch": 0.85, "learning_rate": 3.772091259686469e-05, "loss": 1.8444, "step": 19350 }, { "epoch": 0.85, "learning_rate": 3.770075648695832e-05, "loss": 1.8606, "step": 19360 }, { "epoch": 0.85, "learning_rate": 3.7680596654961886e-05, "loss": 1.8874, "step": 19370 }, { "epoch": 0.85, "learning_rate": 3.7660433110619447e-05, "loss": 1.863, "step": 19380 }, { "epoch": 0.85, "learning_rate": 3.764026586367687e-05, "loss": 1.8678, "step": 19390 }, { "epoch": 0.85, "learning_rate": 3.762009492388182e-05, "loss": 1.8803, "step": 19400 }, { "epoch": 0.85, "eval_loss": 1.8501853942871094, "eval_runtime": 11.6025, "eval_samples_per_second": 353.026, "eval_steps_per_second": 22.064, "step": 19400 }, { "epoch": 0.85, "learning_rate": 3.759992030098373e-05, "loss": 1.8829, "step": 19410 }, { "epoch": 0.85, "learning_rate": 3.757974200473382e-05, "loss": 1.8918, "step": 19420 }, { "epoch": 0.85, "learning_rate": 3.755956004488508e-05, "loss": 1.8583, "step": 19430 }, { "epoch": 0.85, "learning_rate": 3.753937443119228e-05, "loss": 1.898, "step": 19440 }, { "epoch": 0.85, "learning_rate": 3.751918517341194e-05, "loss": 1.8663, "step": 19450 }, { "epoch": 0.85, "learning_rate": 3.749899228130237e-05, "loss": 1.8703, "step": 19460 }, { "epoch": 0.85, "learning_rate": 3.747879576462361e-05, "loss": 1.8459, "step": 19470 }, { "epoch": 0.85, "learning_rate": 3.7458595633137464e-05, "loss": 1.8908, "step": 19480 }, { "epoch": 0.85, "learning_rate": 3.743839189660748e-05, "loss": 1.859, "step": 19490 }, { "epoch": 0.85, "learning_rate": 3.741818456479895e-05, "loss": 1.9049, "step": 19500 }, { "epoch": 0.85, "eval_loss": 1.849900245666504, "eval_runtime": 11.8738, "eval_samples_per_second": 344.962, "eval_steps_per_second": 21.56, "step": 19500 }, { "epoch": 0.85, "learning_rate": 3.739797364747889e-05, "loss": 1.8687, "step": 19510 }, { "epoch": 0.85, "learning_rate": 3.737775915441608e-05, "loss": 1.8764, "step": 19520 }, { "epoch": 0.85, "learning_rate": 3.735754109538101e-05, "loss": 1.8692, "step": 19530 }, { "epoch": 0.86, "learning_rate": 3.733731948014587e-05, "loss": 1.8887, "step": 19540 }, { "epoch": 0.86, "learning_rate": 3.731709431848462e-05, "loss": 1.8812, "step": 19550 }, { "epoch": 0.86, "learning_rate": 3.729686562017288e-05, "loss": 1.8412, "step": 19560 }, { "epoch": 0.86, "learning_rate": 3.727663339498804e-05, "loss": 1.8897, "step": 19570 }, { "epoch": 0.86, "learning_rate": 3.725639765270913e-05, "loss": 1.8781, "step": 19580 }, { "epoch": 0.86, "learning_rate": 3.7236158403116925e-05, "loss": 1.8901, "step": 19590 }, { "epoch": 0.86, "learning_rate": 3.721591565599388e-05, "loss": 1.8884, "step": 19600 }, { "epoch": 0.86, "eval_loss": 1.8497211933135986, "eval_runtime": 11.3914, "eval_samples_per_second": 359.57, "eval_steps_per_second": 22.473, "step": 19600 }, { "epoch": 0.86, "learning_rate": 3.7195669421124136e-05, "loss": 1.8471, "step": 19610 }, { "epoch": 0.86, "learning_rate": 3.7175419708293526e-05, "loss": 1.8422, "step": 19620 }, { "epoch": 0.86, "learning_rate": 3.715516652728956e-05, "loss": 1.8919, "step": 19630 }, { "epoch": 0.86, "learning_rate": 3.713490988790144e-05, "loss": 1.8633, "step": 19640 }, { "epoch": 0.86, "learning_rate": 3.711464979992001e-05, "loss": 1.8695, "step": 19650 }, { "epoch": 0.86, "learning_rate": 3.709438627313782e-05, "loss": 1.808, "step": 19660 }, { "epoch": 0.86, "learning_rate": 3.707411931734904e-05, "loss": 1.8954, "step": 19670 }, { "epoch": 0.86, "learning_rate": 3.705384894234953e-05, "loss": 1.882, "step": 19680 }, { "epoch": 0.86, "learning_rate": 3.703357515793678e-05, "loss": 1.8588, "step": 19690 }, { "epoch": 0.86, "learning_rate": 3.701329797390994e-05, "loss": 1.8431, "step": 19700 }, { "epoch": 0.86, "eval_loss": 1.8491344451904297, "eval_runtime": 11.3311, "eval_samples_per_second": 361.483, "eval_steps_per_second": 22.593, "step": 19700 }, { "epoch": 0.86, "learning_rate": 3.699301740006982e-05, "loss": 1.896, "step": 19710 }, { "epoch": 0.86, "learning_rate": 3.69727334462188e-05, "loss": 1.8863, "step": 19720 }, { "epoch": 0.86, "learning_rate": 3.695244612216101e-05, "loss": 1.8949, "step": 19730 }, { "epoch": 0.86, "learning_rate": 3.693215543770209e-05, "loss": 1.8252, "step": 19740 }, { "epoch": 0.86, "learning_rate": 3.691186140264938e-05, "loss": 1.8675, "step": 19750 }, { "epoch": 0.86, "learning_rate": 3.689156402681181e-05, "loss": 1.8894, "step": 19760 }, { "epoch": 0.87, "learning_rate": 3.687126331999992e-05, "loss": 1.8632, "step": 19770 }, { "epoch": 0.87, "learning_rate": 3.68509592920259e-05, "loss": 1.8709, "step": 19780 }, { "epoch": 0.87, "learning_rate": 3.6830651952703484e-05, "loss": 1.8076, "step": 19790 }, { "epoch": 0.87, "learning_rate": 3.681034131184806e-05, "loss": 1.8623, "step": 19800 }, { "epoch": 0.87, "eval_loss": 1.8488744497299194, "eval_runtime": 11.5142, "eval_samples_per_second": 355.735, "eval_steps_per_second": 22.233, "step": 19800 }, { "epoch": 0.87, "learning_rate": 3.679002737927658e-05, "loss": 1.8551, "step": 19810 }, { "epoch": 0.87, "learning_rate": 3.67697101648076e-05, "loss": 1.9459, "step": 19820 }, { "epoch": 0.87, "learning_rate": 3.6749389678261254e-05, "loss": 1.8397, "step": 19830 }, { "epoch": 0.87, "learning_rate": 3.672906592945927e-05, "loss": 1.8937, "step": 19840 }, { "epoch": 0.87, "learning_rate": 3.670873892822494e-05, "loss": 1.8702, "step": 19850 }, { "epoch": 0.87, "learning_rate": 3.668840868438314e-05, "loss": 1.8441, "step": 19860 }, { "epoch": 0.87, "learning_rate": 3.66680752077603e-05, "loss": 1.834, "step": 19870 }, { "epoch": 0.87, "learning_rate": 3.6647738508184425e-05, "loss": 1.8955, "step": 19880 }, { "epoch": 0.87, "learning_rate": 3.6627398595485076e-05, "loss": 1.8395, "step": 19890 }, { "epoch": 0.87, "learning_rate": 3.660705547949335e-05, "loss": 1.8728, "step": 19900 }, { "epoch": 0.87, "eval_loss": 1.8482991456985474, "eval_runtime": 11.5679, "eval_samples_per_second": 354.082, "eval_steps_per_second": 22.13, "step": 19900 }, { "epoch": 0.87, "learning_rate": 3.658670917004192e-05, "loss": 1.823, "step": 19910 }, { "epoch": 0.87, "learning_rate": 3.656635967696498e-05, "loss": 1.9088, "step": 19920 }, { "epoch": 0.87, "learning_rate": 3.654600701009828e-05, "loss": 1.8349, "step": 19930 }, { "epoch": 0.87, "learning_rate": 3.652565117927907e-05, "loss": 1.8668, "step": 19940 }, { "epoch": 0.87, "learning_rate": 3.650529219434618e-05, "loss": 1.881, "step": 19950 }, { "epoch": 0.87, "learning_rate": 3.6484930065139946e-05, "loss": 1.8751, "step": 19960 }, { "epoch": 0.87, "learning_rate": 3.646456480150217e-05, "loss": 1.8724, "step": 19970 }, { "epoch": 0.87, "learning_rate": 3.644419641327628e-05, "loss": 1.8408, "step": 19980 }, { "epoch": 0.88, "learning_rate": 3.6423824910307095e-05, "loss": 1.8853, "step": 19990 }, { "epoch": 0.88, "learning_rate": 3.640345030244103e-05, "loss": 1.9109, "step": 20000 }, { "epoch": 0.88, "eval_loss": 1.848042368888855, "eval_runtime": 11.6122, "eval_samples_per_second": 352.732, "eval_steps_per_second": 22.046, "step": 20000 }, { "epoch": 0.88, "learning_rate": 3.638307259952593e-05, "loss": 1.9015, "step": 20010 }, { "epoch": 0.88, "learning_rate": 3.636269181141122e-05, "loss": 1.8893, "step": 20020 }, { "epoch": 0.88, "learning_rate": 3.6342307947947726e-05, "loss": 1.8432, "step": 20030 }, { "epoch": 0.88, "learning_rate": 3.6321921018987815e-05, "loss": 1.8465, "step": 20040 }, { "epoch": 0.88, "learning_rate": 3.6301531034385326e-05, "loss": 1.8545, "step": 20050 }, { "epoch": 0.88, "learning_rate": 3.628113800399556e-05, "loss": 1.8507, "step": 20060 }, { "epoch": 0.88, "learning_rate": 3.626074193767531e-05, "loss": 1.8718, "step": 20070 }, { "epoch": 0.88, "learning_rate": 3.624034284528282e-05, "loss": 1.8886, "step": 20080 }, { "epoch": 0.88, "learning_rate": 3.621994073667783e-05, "loss": 1.898, "step": 20090 }, { "epoch": 0.88, "learning_rate": 3.6199535621721465e-05, "loss": 1.8848, "step": 20100 }, { "epoch": 0.88, "eval_loss": 1.8475103378295898, "eval_runtime": 11.4108, "eval_samples_per_second": 358.959, "eval_steps_per_second": 22.435, "step": 20100 }, { "epoch": 0.88, "learning_rate": 3.61791275102764e-05, "loss": 1.8565, "step": 20110 }, { "epoch": 0.88, "learning_rate": 3.615871641220667e-05, "loss": 1.9083, "step": 20120 }, { "epoch": 0.88, "learning_rate": 3.613830233737782e-05, "loss": 1.8759, "step": 20130 }, { "epoch": 0.88, "learning_rate": 3.61178852956568e-05, "loss": 1.8727, "step": 20140 }, { "epoch": 0.88, "learning_rate": 3.609746529691198e-05, "loss": 1.8918, "step": 20150 }, { "epoch": 0.88, "learning_rate": 3.6077042351013206e-05, "loss": 1.8517, "step": 20160 }, { "epoch": 0.88, "learning_rate": 3.60566164678317e-05, "loss": 1.9069, "step": 20170 }, { "epoch": 0.88, "learning_rate": 3.603618765724014e-05, "loss": 1.8429, "step": 20180 }, { "epoch": 0.88, "learning_rate": 3.60157559291126e-05, "loss": 1.832, "step": 20190 }, { "epoch": 0.88, "learning_rate": 3.599532129332457e-05, "loss": 1.9338, "step": 20200 }, { "epoch": 0.88, "eval_loss": 1.847186803817749, "eval_runtime": 11.3797, "eval_samples_per_second": 359.94, "eval_steps_per_second": 22.496, "step": 20200 }, { "epoch": 0.88, "learning_rate": 3.597488375975294e-05, "loss": 1.8703, "step": 20210 }, { "epoch": 0.89, "learning_rate": 3.5954443338276004e-05, "loss": 1.8843, "step": 20220 }, { "epoch": 0.89, "learning_rate": 3.593400003877346e-05, "loss": 1.8451, "step": 20230 }, { "epoch": 0.89, "learning_rate": 3.591355387112637e-05, "loss": 1.8947, "step": 20240 }, { "epoch": 0.89, "learning_rate": 3.5893104845217235e-05, "loss": 1.8208, "step": 20250 }, { "epoch": 0.89, "learning_rate": 3.5872652970929874e-05, "loss": 1.9205, "step": 20260 }, { "epoch": 0.89, "learning_rate": 3.585219825814953e-05, "loss": 1.8699, "step": 20270 }, { "epoch": 0.89, "learning_rate": 3.583174071676279e-05, "loss": 1.8673, "step": 20280 }, { "epoch": 0.89, "learning_rate": 3.581128035665763e-05, "loss": 1.8823, "step": 20290 }, { "epoch": 0.89, "learning_rate": 3.579081718772337e-05, "loss": 1.8937, "step": 20300 }, { "epoch": 0.89, "eval_loss": 1.8470911979675293, "eval_runtime": 11.3691, "eval_samples_per_second": 360.276, "eval_steps_per_second": 22.517, "step": 20300 }, { "epoch": 0.89, "learning_rate": 3.577035121985069e-05, "loss": 1.8222, "step": 20310 }, { "epoch": 0.89, "learning_rate": 3.5749882462931646e-05, "loss": 1.8978, "step": 20320 }, { "epoch": 0.89, "learning_rate": 3.572941092685961e-05, "loss": 1.8855, "step": 20330 }, { "epoch": 0.89, "learning_rate": 3.570893662152931e-05, "loss": 1.8851, "step": 20340 }, { "epoch": 0.89, "learning_rate": 3.568845955683682e-05, "loss": 1.8858, "step": 20350 }, { "epoch": 0.89, "learning_rate": 3.566797974267954e-05, "loss": 1.8572, "step": 20360 }, { "epoch": 0.89, "learning_rate": 3.56474971889562e-05, "loss": 1.8883, "step": 20370 }, { "epoch": 0.89, "learning_rate": 3.5627011905566854e-05, "loss": 1.8731, "step": 20380 }, { "epoch": 0.89, "learning_rate": 3.560652390241288e-05, "loss": 1.8558, "step": 20390 }, { "epoch": 0.89, "learning_rate": 3.558603318939696e-05, "loss": 1.8459, "step": 20400 }, { "epoch": 0.89, "eval_loss": 1.8470215797424316, "eval_runtime": 11.6007, "eval_samples_per_second": 353.084, "eval_steps_per_second": 22.068, "step": 20400 }, { "epoch": 0.89, "learning_rate": 3.556553977642309e-05, "loss": 1.8773, "step": 20410 }, { "epoch": 0.89, "learning_rate": 3.554504367339659e-05, "loss": 1.8588, "step": 20420 }, { "epoch": 0.89, "learning_rate": 3.552454489022405e-05, "loss": 1.8891, "step": 20430 }, { "epoch": 0.89, "learning_rate": 3.550404343681336e-05, "loss": 1.8349, "step": 20440 }, { "epoch": 0.9, "learning_rate": 3.5483539323073735e-05, "loss": 1.8599, "step": 20450 }, { "epoch": 0.9, "learning_rate": 3.546303255891563e-05, "loss": 1.8299, "step": 20460 }, { "epoch": 0.9, "learning_rate": 3.544252315425081e-05, "loss": 1.8787, "step": 20470 }, { "epoch": 0.9, "learning_rate": 3.542201111899231e-05, "loss": 1.8508, "step": 20480 }, { "epoch": 0.9, "learning_rate": 3.5401496463054425e-05, "loss": 1.8561, "step": 20490 }, { "epoch": 0.9, "learning_rate": 3.5380979196352735e-05, "loss": 1.8587, "step": 20500 }, { "epoch": 0.9, "eval_loss": 1.8461756706237793, "eval_runtime": 11.7063, "eval_samples_per_second": 349.898, "eval_steps_per_second": 21.869, "step": 20500 }, { "epoch": 0.9, "learning_rate": 3.5360459328804064e-05, "loss": 1.9049, "step": 20510 }, { "epoch": 0.9, "learning_rate": 3.533993687032652e-05, "loss": 1.8235, "step": 20520 }, { "epoch": 0.9, "learning_rate": 3.5319411830839415e-05, "loss": 1.8665, "step": 20530 }, { "epoch": 0.9, "learning_rate": 3.529888422026336e-05, "loss": 1.9031, "step": 20540 }, { "epoch": 0.9, "learning_rate": 3.527835404852018e-05, "loss": 1.8694, "step": 20550 }, { "epoch": 0.9, "learning_rate": 3.525782132553296e-05, "loss": 1.8432, "step": 20560 }, { "epoch": 0.9, "learning_rate": 3.523728606122598e-05, "loss": 1.9015, "step": 20570 }, { "epoch": 0.9, "learning_rate": 3.521674826552479e-05, "loss": 1.8434, "step": 20580 }, { "epoch": 0.9, "learning_rate": 3.519620794835614e-05, "loss": 1.8726, "step": 20590 }, { "epoch": 0.9, "learning_rate": 3.5175665119648e-05, "loss": 1.9017, "step": 20600 }, { "epoch": 0.9, "eval_loss": 1.845084309577942, "eval_runtime": 11.6236, "eval_samples_per_second": 352.386, "eval_steps_per_second": 22.024, "step": 20600 }, { "epoch": 0.9, "learning_rate": 3.515511978932956e-05, "loss": 1.8861, "step": 20610 }, { "epoch": 0.9, "learning_rate": 3.513457196733121e-05, "loss": 1.8689, "step": 20620 }, { "epoch": 0.9, "learning_rate": 3.511402166358458e-05, "loss": 1.8851, "step": 20630 }, { "epoch": 0.9, "learning_rate": 3.5093468888022435e-05, "loss": 1.8506, "step": 20640 }, { "epoch": 0.9, "learning_rate": 3.5072913650578797e-05, "loss": 1.9112, "step": 20650 }, { "epoch": 0.9, "learning_rate": 3.505235596118884e-05, "loss": 1.8424, "step": 20660 }, { "epoch": 0.9, "learning_rate": 3.503179582978892e-05, "loss": 1.8257, "step": 20670 }, { "epoch": 0.91, "learning_rate": 3.5011233266316616e-05, "loss": 1.866, "step": 20680 }, { "epoch": 0.91, "learning_rate": 3.499066828071063e-05, "loss": 1.8652, "step": 20690 }, { "epoch": 0.91, "learning_rate": 3.497010088291089e-05, "loss": 1.8827, "step": 20700 }, { "epoch": 0.91, "eval_loss": 1.8452777862548828, "eval_runtime": 11.6079, "eval_samples_per_second": 352.863, "eval_steps_per_second": 22.054, "step": 20700 }, { "epoch": 0.91, "learning_rate": 3.494953108285843e-05, "loss": 1.8998, "step": 20710 }, { "epoch": 0.91, "learning_rate": 3.492895889049549e-05, "loss": 1.9052, "step": 20720 }, { "epoch": 0.91, "learning_rate": 3.490838431576544e-05, "loss": 1.838, "step": 20730 }, { "epoch": 0.91, "learning_rate": 3.488780736861283e-05, "loss": 1.9133, "step": 20740 }, { "epoch": 0.91, "learning_rate": 3.4867228058983316e-05, "loss": 1.8919, "step": 20750 }, { "epoch": 0.91, "learning_rate": 3.484664639682373e-05, "loss": 1.885, "step": 20760 }, { "epoch": 0.91, "learning_rate": 3.4826062392082035e-05, "loss": 1.8721, "step": 20770 }, { "epoch": 0.91, "learning_rate": 3.4805476054707315e-05, "loss": 1.8589, "step": 20780 }, { "epoch": 0.91, "learning_rate": 3.4784887394649785e-05, "loss": 1.8758, "step": 20790 }, { "epoch": 0.91, "learning_rate": 3.476429642186078e-05, "loss": 1.8723, "step": 20800 }, { "epoch": 0.91, "eval_loss": 1.844846487045288, "eval_runtime": 11.5952, "eval_samples_per_second": 353.25, "eval_steps_per_second": 22.078, "step": 20800 }, { "epoch": 0.91, "learning_rate": 3.4743703146292774e-05, "loss": 1.8686, "step": 20810 }, { "epoch": 0.91, "learning_rate": 3.472310757789932e-05, "loss": 1.8509, "step": 20820 }, { "epoch": 0.91, "learning_rate": 3.470250972663512e-05, "loss": 1.8502, "step": 20830 }, { "epoch": 0.91, "learning_rate": 3.468190960245593e-05, "loss": 1.8396, "step": 20840 }, { "epoch": 0.91, "learning_rate": 3.466130721531867e-05, "loss": 1.8543, "step": 20850 }, { "epoch": 0.91, "learning_rate": 3.464070257518127e-05, "loss": 1.8785, "step": 20860 }, { "epoch": 0.91, "learning_rate": 3.4620095692002815e-05, "loss": 1.8533, "step": 20870 }, { "epoch": 0.91, "learning_rate": 3.4599486575743454e-05, "loss": 1.8293, "step": 20880 }, { "epoch": 0.91, "learning_rate": 3.457887523636441e-05, "loss": 1.8223, "step": 20890 }, { "epoch": 0.91, "learning_rate": 3.455826168382799e-05, "loss": 1.8676, "step": 20900 }, { "epoch": 0.91, "eval_loss": 1.844473123550415, "eval_runtime": 11.7054, "eval_samples_per_second": 349.924, "eval_steps_per_second": 21.87, "step": 20900 }, { "epoch": 0.92, "learning_rate": 3.4537645928097554e-05, "loss": 1.8801, "step": 20910 }, { "epoch": 0.92, "learning_rate": 3.451702797913756e-05, "loss": 1.8434, "step": 20920 }, { "epoch": 0.92, "learning_rate": 3.449640784691349e-05, "loss": 1.8943, "step": 20930 }, { "epoch": 0.92, "learning_rate": 3.4475785541391894e-05, "loss": 1.8622, "step": 20940 }, { "epoch": 0.92, "learning_rate": 3.445516107254039e-05, "loss": 1.8625, "step": 20950 }, { "epoch": 0.92, "learning_rate": 3.44345344503276e-05, "loss": 1.86, "step": 20960 }, { "epoch": 0.92, "learning_rate": 3.441390568472323e-05, "loss": 1.8783, "step": 20970 }, { "epoch": 0.92, "learning_rate": 3.439327478569801e-05, "loss": 1.8514, "step": 20980 }, { "epoch": 0.92, "learning_rate": 3.437264176322369e-05, "loss": 1.8293, "step": 20990 }, { "epoch": 0.92, "learning_rate": 3.4352006627273036e-05, "loss": 1.8812, "step": 21000 }, { "epoch": 0.92, "eval_loss": 1.844010829925537, "eval_runtime": 11.8385, "eval_samples_per_second": 345.989, "eval_steps_per_second": 21.624, "step": 21000 }, { "epoch": 0.92, "learning_rate": 3.433136938781988e-05, "loss": 1.8436, "step": 21010 }, { "epoch": 0.92, "learning_rate": 3.431073005483902e-05, "loss": 1.8674, "step": 21020 }, { "epoch": 0.92, "learning_rate": 3.42900886383063e-05, "loss": 1.884, "step": 21030 }, { "epoch": 0.92, "learning_rate": 3.426944514819856e-05, "loss": 1.8758, "step": 21040 }, { "epoch": 0.92, "learning_rate": 3.424879959449363e-05, "loss": 1.8379, "step": 21050 }, { "epoch": 0.92, "learning_rate": 3.422815198717037e-05, "loss": 1.8983, "step": 21060 }, { "epoch": 0.92, "learning_rate": 3.4207502336208586e-05, "loss": 1.9012, "step": 21070 }, { "epoch": 0.92, "learning_rate": 3.418685065158912e-05, "loss": 1.8474, "step": 21080 }, { "epoch": 0.92, "learning_rate": 3.416619694329376e-05, "loss": 1.8605, "step": 21090 }, { "epoch": 0.92, "learning_rate": 3.414554122130529e-05, "loss": 1.8589, "step": 21100 }, { "epoch": 0.92, "eval_loss": 1.8435540199279785, "eval_runtime": 11.7192, "eval_samples_per_second": 349.512, "eval_steps_per_second": 21.844, "step": 21100 }, { "epoch": 0.92, "learning_rate": 3.412488349560748e-05, "loss": 1.9195, "step": 21110 }, { "epoch": 0.92, "learning_rate": 3.4104223776185025e-05, "loss": 1.8987, "step": 21120 }, { "epoch": 0.92, "learning_rate": 3.408356207302363e-05, "loss": 1.8662, "step": 21130 }, { "epoch": 0.93, "learning_rate": 3.4062898396109926e-05, "loss": 1.8617, "step": 21140 }, { "epoch": 0.93, "learning_rate": 3.404223275543153e-05, "loss": 1.8549, "step": 21150 }, { "epoch": 0.93, "learning_rate": 3.402156516097697e-05, "loss": 1.8974, "step": 21160 }, { "epoch": 0.93, "learning_rate": 3.400089562273576e-05, "loss": 1.8277, "step": 21170 }, { "epoch": 0.93, "learning_rate": 3.3980224150698304e-05, "loss": 1.8674, "step": 21180 }, { "epoch": 0.93, "learning_rate": 3.3959550754856e-05, "loss": 1.8959, "step": 21190 }, { "epoch": 0.93, "learning_rate": 3.3938875445201126e-05, "loss": 1.8234, "step": 21200 }, { "epoch": 0.93, "eval_loss": 1.8433294296264648, "eval_runtime": 12.3336, "eval_samples_per_second": 332.1, "eval_steps_per_second": 20.756, "step": 21200 }, { "epoch": 0.93, "learning_rate": 3.39181982317269e-05, "loss": 1.8358, "step": 21210 }, { "epoch": 0.93, "learning_rate": 3.3897519124427484e-05, "loss": 1.9033, "step": 21220 }, { "epoch": 0.93, "learning_rate": 3.387683813329791e-05, "loss": 1.8959, "step": 21230 }, { "epoch": 0.93, "learning_rate": 3.385615526833416e-05, "loss": 1.8205, "step": 21240 }, { "epoch": 0.93, "learning_rate": 3.38354705395331e-05, "loss": 1.8477, "step": 21250 }, { "epoch": 0.93, "learning_rate": 3.381478395689252e-05, "loss": 1.8897, "step": 21260 }, { "epoch": 0.93, "learning_rate": 3.3794095530411064e-05, "loss": 1.8679, "step": 21270 }, { "epoch": 0.93, "learning_rate": 3.377340527008831e-05, "loss": 1.8254, "step": 21280 }, { "epoch": 0.93, "learning_rate": 3.37527131859247e-05, "loss": 1.8897, "step": 21290 }, { "epoch": 0.93, "learning_rate": 3.373201928792158e-05, "loss": 1.878, "step": 21300 }, { "epoch": 0.93, "eval_loss": 1.8435399532318115, "eval_runtime": 11.6036, "eval_samples_per_second": 352.993, "eval_steps_per_second": 22.062, "step": 21300 }, { "epoch": 0.93, "learning_rate": 3.371132358608114e-05, "loss": 1.8616, "step": 21310 }, { "epoch": 0.93, "learning_rate": 3.3690626090406446e-05, "loss": 1.8702, "step": 21320 }, { "epoch": 0.93, "learning_rate": 3.366992681090147e-05, "loss": 1.8344, "step": 21330 }, { "epoch": 0.93, "learning_rate": 3.3649225757571e-05, "loss": 1.898, "step": 21340 }, { "epoch": 0.93, "learning_rate": 3.362852294042071e-05, "loss": 1.8452, "step": 21350 }, { "epoch": 0.94, "learning_rate": 3.3607818369457106e-05, "loss": 1.872, "step": 21360 }, { "epoch": 0.94, "learning_rate": 3.3587112054687566e-05, "loss": 1.8164, "step": 21370 }, { "epoch": 0.94, "learning_rate": 3.3566404006120296e-05, "loss": 1.8258, "step": 21380 }, { "epoch": 0.94, "learning_rate": 3.3545694233764326e-05, "loss": 1.8799, "step": 21390 }, { "epoch": 0.94, "learning_rate": 3.352498274762955e-05, "loss": 1.8901, "step": 21400 }, { "epoch": 0.94, "eval_loss": 1.8432936668395996, "eval_runtime": 11.5934, "eval_samples_per_second": 353.304, "eval_steps_per_second": 22.082, "step": 21400 }, { "epoch": 0.94, "learning_rate": 3.3504269557726675e-05, "loss": 1.8615, "step": 21410 }, { "epoch": 0.94, "learning_rate": 3.348355467406723e-05, "loss": 1.8215, "step": 21420 }, { "epoch": 0.94, "learning_rate": 3.3462838106663544e-05, "loss": 1.8374, "step": 21430 }, { "epoch": 0.94, "learning_rate": 3.344211986552881e-05, "loss": 1.9267, "step": 21440 }, { "epoch": 0.94, "learning_rate": 3.342139996067699e-05, "loss": 1.8219, "step": 21450 }, { "epoch": 0.94, "learning_rate": 3.340067840212285e-05, "loss": 1.8697, "step": 21460 }, { "epoch": 0.94, "learning_rate": 3.3379955199881973e-05, "loss": 1.883, "step": 21470 }, { "epoch": 0.94, "learning_rate": 3.3359230363970734e-05, "loss": 1.8599, "step": 21480 }, { "epoch": 0.94, "learning_rate": 3.333850390440629e-05, "loss": 1.8511, "step": 21490 }, { "epoch": 0.94, "learning_rate": 3.331777583120657e-05, "loss": 1.8464, "step": 21500 }, { "epoch": 0.94, "eval_loss": 1.843240737915039, "eval_runtime": 11.8662, "eval_samples_per_second": 345.183, "eval_steps_per_second": 21.574, "step": 21500 }, { "epoch": 0.94, "learning_rate": 3.329704615439032e-05, "loss": 1.8908, "step": 21510 }, { "epoch": 0.94, "learning_rate": 3.327631488397702e-05, "loss": 1.9016, "step": 21520 }, { "epoch": 0.94, "learning_rate": 3.3255582029986956e-05, "loss": 1.8827, "step": 21530 }, { "epoch": 0.94, "learning_rate": 3.323484760244115e-05, "loss": 1.905, "step": 21540 }, { "epoch": 0.94, "learning_rate": 3.321411161136141e-05, "loss": 1.872, "step": 21550 }, { "epoch": 0.94, "learning_rate": 3.319337406677029e-05, "loss": 1.8769, "step": 21560 }, { "epoch": 0.94, "learning_rate": 3.317263497869107e-05, "loss": 1.9206, "step": 21570 }, { "epoch": 0.94, "learning_rate": 3.315189435714781e-05, "loss": 1.8788, "step": 21580 }, { "epoch": 0.95, "learning_rate": 3.31311522121653e-05, "loss": 1.8762, "step": 21590 }, { "epoch": 0.95, "learning_rate": 3.311040855376908e-05, "loss": 1.884, "step": 21600 }, { "epoch": 0.95, "eval_loss": 1.8426785469055176, "eval_runtime": 11.6483, "eval_samples_per_second": 351.639, "eval_steps_per_second": 21.977, "step": 21600 }, { "epoch": 0.95, "learning_rate": 3.3089663391985375e-05, "loss": 1.8241, "step": 21610 }, { "epoch": 0.95, "learning_rate": 3.30689167368412e-05, "loss": 1.8922, "step": 21620 }, { "epoch": 0.95, "learning_rate": 3.304816859836424e-05, "loss": 1.8536, "step": 21630 }, { "epoch": 0.95, "learning_rate": 3.302741898658294e-05, "loss": 1.8683, "step": 21640 }, { "epoch": 0.95, "learning_rate": 3.300666791152641e-05, "loss": 1.8762, "step": 21650 }, { "epoch": 0.95, "learning_rate": 3.29859153832245e-05, "loss": 1.8442, "step": 21660 }, { "epoch": 0.95, "learning_rate": 3.296516141170776e-05, "loss": 1.8689, "step": 21670 }, { "epoch": 0.95, "learning_rate": 3.294440600700742e-05, "loss": 1.8598, "step": 21680 }, { "epoch": 0.95, "learning_rate": 3.2923649179155436e-05, "loss": 1.8641, "step": 21690 }, { "epoch": 0.95, "learning_rate": 3.290289093818442e-05, "loss": 1.8715, "step": 21700 }, { "epoch": 0.95, "eval_loss": 1.8424584865570068, "eval_runtime": 11.6455, "eval_samples_per_second": 351.723, "eval_steps_per_second": 21.983, "step": 21700 }, { "epoch": 0.95, "learning_rate": 3.288213129412766e-05, "loss": 1.84, "step": 21710 }, { "epoch": 0.95, "learning_rate": 3.286137025701916e-05, "loss": 1.8564, "step": 21720 }, { "epoch": 0.95, "learning_rate": 3.2840607836893574e-05, "loss": 1.8725, "step": 21730 }, { "epoch": 0.95, "learning_rate": 3.281984404378622e-05, "loss": 1.8576, "step": 21740 }, { "epoch": 0.95, "learning_rate": 3.27990788877331e-05, "loss": 1.882, "step": 21750 }, { "epoch": 0.95, "learning_rate": 3.277831237877085e-05, "loss": 1.8279, "step": 21760 }, { "epoch": 0.95, "learning_rate": 3.275754452693677e-05, "loss": 1.9028, "step": 21770 }, { "epoch": 0.95, "learning_rate": 3.273677534226883e-05, "loss": 1.8716, "step": 21780 }, { "epoch": 0.95, "learning_rate": 3.27160048348056e-05, "loss": 1.8502, "step": 21790 }, { "epoch": 0.95, "learning_rate": 3.269523301458633e-05, "loss": 1.8384, "step": 21800 }, { "epoch": 0.95, "eval_loss": 1.8420464992523193, "eval_runtime": 11.6127, "eval_samples_per_second": 352.716, "eval_steps_per_second": 22.045, "step": 21800 }, { "epoch": 0.95, "learning_rate": 3.2674459891650884e-05, "loss": 1.8778, "step": 21810 }, { "epoch": 0.96, "learning_rate": 3.265368547603977e-05, "loss": 1.8726, "step": 21820 }, { "epoch": 0.96, "learning_rate": 3.263290977779409e-05, "loss": 1.905, "step": 21830 }, { "epoch": 0.96, "learning_rate": 3.2612132806955604e-05, "loss": 1.868, "step": 21840 }, { "epoch": 0.96, "learning_rate": 3.259135457356667e-05, "loss": 1.8234, "step": 21850 }, { "epoch": 0.96, "learning_rate": 3.257057508767025e-05, "loss": 1.8995, "step": 21860 }, { "epoch": 0.96, "learning_rate": 3.254979435930993e-05, "loss": 1.8289, "step": 21870 }, { "epoch": 0.96, "learning_rate": 3.252901239852986e-05, "loss": 1.864, "step": 21880 }, { "epoch": 0.96, "learning_rate": 3.2508229215374845e-05, "loss": 1.8272, "step": 21890 }, { "epoch": 0.96, "learning_rate": 3.248744481989022e-05, "loss": 1.8424, "step": 21900 }, { "epoch": 0.96, "eval_loss": 1.8414499759674072, "eval_runtime": 11.656, "eval_samples_per_second": 351.406, "eval_steps_per_second": 21.963, "step": 21900 }, { "epoch": 0.96, "learning_rate": 3.2466659222121936e-05, "loss": 1.8475, "step": 21910 }, { "epoch": 0.96, "learning_rate": 3.2445872432116525e-05, "loss": 1.895, "step": 21920 }, { "epoch": 0.96, "learning_rate": 3.242508445992109e-05, "loss": 1.8655, "step": 21930 }, { "epoch": 0.96, "learning_rate": 3.2404295315583324e-05, "loss": 1.8606, "step": 21940 }, { "epoch": 0.96, "learning_rate": 3.238350500915144e-05, "loss": 1.8496, "step": 21950 }, { "epoch": 0.96, "learning_rate": 3.2362713550674256e-05, "loss": 1.8838, "step": 21960 }, { "epoch": 0.96, "learning_rate": 3.234192095020113e-05, "loss": 1.8587, "step": 21970 }, { "epoch": 0.96, "learning_rate": 3.232112721778198e-05, "loss": 1.8999, "step": 21980 }, { "epoch": 0.96, "learning_rate": 3.230033236346725e-05, "loss": 1.8584, "step": 21990 }, { "epoch": 0.96, "learning_rate": 3.227953639730798e-05, "loss": 1.8248, "step": 22000 }, { "epoch": 0.96, "eval_loss": 1.8413933515548706, "eval_runtime": 11.8856, "eval_samples_per_second": 344.619, "eval_steps_per_second": 21.539, "step": 22000 }, { "epoch": 0.96, "learning_rate": 3.225873932935567e-05, "loss": 1.8195, "step": 22010 }, { "epoch": 0.96, "learning_rate": 3.223794116966241e-05, "loss": 1.9048, "step": 22020 }, { "epoch": 0.96, "learning_rate": 3.22171419282808e-05, "loss": 1.8697, "step": 22030 }, { "epoch": 0.96, "learning_rate": 3.219634161526396e-05, "loss": 1.8435, "step": 22040 }, { "epoch": 0.97, "learning_rate": 3.2175540240665526e-05, "loss": 1.9132, "step": 22050 }, { "epoch": 0.97, "learning_rate": 3.215473781453965e-05, "loss": 1.8596, "step": 22060 }, { "epoch": 0.97, "learning_rate": 3.213393434694101e-05, "loss": 1.8665, "step": 22070 }, { "epoch": 0.97, "learning_rate": 3.211312984792476e-05, "loss": 1.8975, "step": 22080 }, { "epoch": 0.97, "learning_rate": 3.209232432754656e-05, "loss": 1.8316, "step": 22090 }, { "epoch": 0.97, "learning_rate": 3.207151779586259e-05, "loss": 1.8948, "step": 22100 }, { "epoch": 0.97, "eval_loss": 1.8409616947174072, "eval_runtime": 11.7108, "eval_samples_per_second": 349.763, "eval_steps_per_second": 21.86, "step": 22100 }, { "epoch": 0.97, "learning_rate": 3.205071026292946e-05, "loss": 1.8931, "step": 22110 }, { "epoch": 0.97, "learning_rate": 3.202990173880433e-05, "loss": 1.9053, "step": 22120 }, { "epoch": 0.97, "learning_rate": 3.2009092233544795e-05, "loss": 1.9124, "step": 22130 }, { "epoch": 0.97, "learning_rate": 3.198828175720895e-05, "loss": 1.8689, "step": 22140 }, { "epoch": 0.97, "learning_rate": 3.196747031985533e-05, "loss": 1.8693, "step": 22150 }, { "epoch": 0.97, "learning_rate": 3.194665793154297e-05, "loss": 1.8486, "step": 22160 }, { "epoch": 0.97, "learning_rate": 3.192584460233134e-05, "loss": 1.8557, "step": 22170 }, { "epoch": 0.97, "learning_rate": 3.190503034228037e-05, "loss": 1.8637, "step": 22180 }, { "epoch": 0.97, "learning_rate": 3.188421516145045e-05, "loss": 1.8603, "step": 22190 }, { "epoch": 0.97, "learning_rate": 3.18633990699024e-05, "loss": 1.8666, "step": 22200 }, { "epoch": 0.97, "eval_loss": 1.8402700424194336, "eval_runtime": 11.7872, "eval_samples_per_second": 347.497, "eval_steps_per_second": 21.719, "step": 22200 }, { "epoch": 0.97, "learning_rate": 3.184258207769749e-05, "loss": 1.888, "step": 22210 }, { "epoch": 0.97, "learning_rate": 3.182176419489741e-05, "loss": 1.8296, "step": 22220 }, { "epoch": 0.97, "learning_rate": 3.180094543156431e-05, "loss": 1.8857, "step": 22230 }, { "epoch": 0.97, "learning_rate": 3.1780125797760735e-05, "loss": 1.9119, "step": 22240 }, { "epoch": 0.97, "learning_rate": 3.175930530354968e-05, "loss": 1.8893, "step": 22250 }, { "epoch": 0.97, "learning_rate": 3.173848395899453e-05, "loss": 1.8697, "step": 22260 }, { "epoch": 0.97, "learning_rate": 3.171766177415909e-05, "loss": 1.901, "step": 22270 }, { "epoch": 0.98, "learning_rate": 3.1696838759107575e-05, "loss": 1.8397, "step": 22280 }, { "epoch": 0.98, "learning_rate": 3.1676014923904594e-05, "loss": 1.884, "step": 22290 }, { "epoch": 0.98, "learning_rate": 3.165519027861517e-05, "loss": 1.9316, "step": 22300 }, { "epoch": 0.98, "eval_loss": 1.8402478694915771, "eval_runtime": 11.775, "eval_samples_per_second": 347.857, "eval_steps_per_second": 21.741, "step": 22300 }, { "epoch": 0.98, "learning_rate": 3.163436483330469e-05, "loss": 1.8686, "step": 22310 }, { "epoch": 0.98, "learning_rate": 3.161353859803895e-05, "loss": 1.8972, "step": 22320 }, { "epoch": 0.98, "learning_rate": 3.1592711582884107e-05, "loss": 1.8481, "step": 22330 }, { "epoch": 0.98, "learning_rate": 3.1571883797906726e-05, "loss": 1.8679, "step": 22340 }, { "epoch": 0.98, "learning_rate": 3.155105525317372e-05, "loss": 1.8732, "step": 22350 }, { "epoch": 0.98, "learning_rate": 3.1530225958752365e-05, "loss": 1.8054, "step": 22360 }, { "epoch": 0.98, "learning_rate": 3.1509395924710334e-05, "loss": 1.9027, "step": 22370 }, { "epoch": 0.98, "learning_rate": 3.1488565161115593e-05, "loss": 1.8344, "step": 22380 }, { "epoch": 0.98, "learning_rate": 3.1467733678036546e-05, "loss": 1.8781, "step": 22390 }, { "epoch": 0.98, "learning_rate": 3.1446901485541865e-05, "loss": 1.8247, "step": 22400 }, { "epoch": 0.98, "eval_loss": 1.8399176597595215, "eval_runtime": 11.5771, "eval_samples_per_second": 353.801, "eval_steps_per_second": 22.113, "step": 22400 }, { "epoch": 0.98, "learning_rate": 3.142606859370063e-05, "loss": 1.8256, "step": 22410 }, { "epoch": 0.98, "learning_rate": 3.140523501258219e-05, "loss": 1.8657, "step": 22420 }, { "epoch": 0.98, "learning_rate": 3.1384400752256293e-05, "loss": 1.8599, "step": 22430 }, { "epoch": 0.98, "learning_rate": 3.136356582279298e-05, "loss": 1.8749, "step": 22440 }, { "epoch": 0.98, "learning_rate": 3.134273023426263e-05, "loss": 1.8761, "step": 22450 }, { "epoch": 0.98, "learning_rate": 3.132189399673593e-05, "loss": 1.8616, "step": 22460 }, { "epoch": 0.98, "learning_rate": 3.1301057120283876e-05, "loss": 1.865, "step": 22470 }, { "epoch": 0.98, "learning_rate": 3.1280219614977786e-05, "loss": 1.8759, "step": 22480 }, { "epoch": 0.98, "learning_rate": 3.125938149088927e-05, "loss": 1.8637, "step": 22490 }, { "epoch": 0.98, "learning_rate": 3.123854275809027e-05, "loss": 1.8637, "step": 22500 }, { "epoch": 0.98, "eval_loss": 1.839624285697937, "eval_runtime": 12.1106, "eval_samples_per_second": 338.216, "eval_steps_per_second": 21.139, "step": 22500 }, { "epoch": 0.99, "learning_rate": 3.1217703426652975e-05, "loss": 1.8921, "step": 22510 }, { "epoch": 0.99, "learning_rate": 3.119686350664989e-05, "loss": 1.8818, "step": 22520 }, { "epoch": 0.99, "learning_rate": 3.117602300815379e-05, "loss": 1.8604, "step": 22530 }, { "epoch": 0.99, "learning_rate": 3.1155181941237754e-05, "loss": 1.8978, "step": 22540 }, { "epoch": 0.99, "learning_rate": 3.113434031597512e-05, "loss": 1.8593, "step": 22550 }, { "epoch": 0.99, "learning_rate": 3.1113498142439473e-05, "loss": 1.8391, "step": 22560 }, { "epoch": 0.99, "learning_rate": 3.1092655430704725e-05, "loss": 1.8613, "step": 22570 }, { "epoch": 0.99, "learning_rate": 3.1071812190844975e-05, "loss": 1.8845, "step": 22580 }, { "epoch": 0.99, "learning_rate": 3.105096843293463e-05, "loss": 1.8934, "step": 22590 }, { "epoch": 0.99, "learning_rate": 3.1030124167048326e-05, "loss": 1.8463, "step": 22600 }, { "epoch": 0.99, "eval_loss": 1.839435338973999, "eval_runtime": 11.6439, "eval_samples_per_second": 351.773, "eval_steps_per_second": 21.986, "step": 22600 }, { "epoch": 0.99, "learning_rate": 3.100927940326095e-05, "loss": 1.8468, "step": 22610 }, { "epoch": 0.99, "learning_rate": 3.0988434151647625e-05, "loss": 1.8613, "step": 22620 }, { "epoch": 0.99, "learning_rate": 3.096758842228371e-05, "loss": 1.8547, "step": 22630 }, { "epoch": 0.99, "learning_rate": 3.09467422252448e-05, "loss": 1.8687, "step": 22640 }, { "epoch": 0.99, "learning_rate": 3.0925895570606714e-05, "loss": 1.8653, "step": 22650 }, { "epoch": 0.99, "learning_rate": 3.0905048468445494e-05, "loss": 1.8523, "step": 22660 }, { "epoch": 0.99, "learning_rate": 3.088420092883738e-05, "loss": 1.8353, "step": 22670 }, { "epoch": 0.99, "learning_rate": 3.086335296185885e-05, "loss": 1.8948, "step": 22680 }, { "epoch": 0.99, "learning_rate": 3.084250457758659e-05, "loss": 1.8711, "step": 22690 }, { "epoch": 0.99, "learning_rate": 3.0821655786097465e-05, "loss": 1.8616, "step": 22700 }, { "epoch": 0.99, "eval_loss": 1.8392356634140015, "eval_runtime": 11.5942, "eval_samples_per_second": 353.279, "eval_steps_per_second": 22.08, "step": 22700 }, { "epoch": 0.99, "learning_rate": 3.080080659746853e-05, "loss": 1.8775, "step": 22710 }, { "epoch": 0.99, "learning_rate": 3.0779957021777084e-05, "loss": 1.8427, "step": 22720 }, { "epoch": 1.0, "learning_rate": 3.0759107069100556e-05, "loss": 1.8399, "step": 22730 }, { "epoch": 1.0, "learning_rate": 3.0738256749516575e-05, "loss": 1.8356, "step": 22740 }, { "epoch": 1.0, "learning_rate": 3.071740607310296e-05, "loss": 1.8397, "step": 22750 }, { "epoch": 1.0, "learning_rate": 3.0696555049937705e-05, "loss": 1.8664, "step": 22760 }, { "epoch": 1.0, "learning_rate": 3.067570369009895e-05, "loss": 1.889, "step": 22770 }, { "epoch": 1.0, "learning_rate": 3.0654852003665e-05, "loss": 1.8426, "step": 22780 }, { "epoch": 1.0, "learning_rate": 3.0634000000714345e-05, "loss": 1.8929, "step": 22790 }, { "epoch": 1.0, "learning_rate": 3.061314769132559e-05, "loss": 1.9215, "step": 22800 }, { "epoch": 1.0, "eval_loss": 1.839365005493164, "eval_runtime": 11.7046, "eval_samples_per_second": 349.947, "eval_steps_per_second": 21.872, "step": 22800 }, { "epoch": 1.0, "learning_rate": 3.0592295085577536e-05, "loss": 1.8533, "step": 22810 }, { "epoch": 1.0, "learning_rate": 3.0571442193549066e-05, "loss": 1.8485, "step": 22820 }, { "epoch": 1.0, "learning_rate": 3.055058902531925e-05, "loss": 1.8331, "step": 22830 }, { "epoch": 1.0, "learning_rate": 3.052973559096729e-05, "loss": 1.8565, "step": 22840 }, { "epoch": 1.0, "learning_rate": 3.0508881900572467e-05, "loss": 1.8282, "step": 22850 }, { "epoch": 1.0, "learning_rate": 3.0488027964214257e-05, "loss": 1.7852, "step": 22860 }, { "epoch": 1.0, "learning_rate": 3.046717379197219e-05, "loss": 1.8608, "step": 22870 }, { "epoch": 1.0, "learning_rate": 3.0446319393925966e-05, "loss": 1.784, "step": 22880 }, { "epoch": 1.0, "learning_rate": 3.042546478015535e-05, "loss": 1.8352, "step": 22890 }, { "epoch": 1.0, "learning_rate": 3.0404609960740227e-05, "loss": 1.843, "step": 22900 }, { "epoch": 1.0, "eval_loss": 1.8390878438949585, "eval_runtime": 11.6738, "eval_samples_per_second": 350.871, "eval_steps_per_second": 21.929, "step": 22900 }, { "epoch": 1.0, "learning_rate": 3.0383754945760583e-05, "loss": 1.8813, "step": 22910 }, { "epoch": 1.0, "learning_rate": 3.0362899745296515e-05, "loss": 1.8617, "step": 22920 }, { "epoch": 1.0, "learning_rate": 3.034204436942818e-05, "loss": 1.8365, "step": 22930 }, { "epoch": 1.0, "learning_rate": 3.0321188828235827e-05, "loss": 1.8799, "step": 22940 }, { "epoch": 1.0, "learning_rate": 3.0300333131799806e-05, "loss": 1.8485, "step": 22950 }, { "epoch": 1.01, "learning_rate": 3.0279477290200505e-05, "loss": 1.8404, "step": 22960 }, { "epoch": 1.01, "learning_rate": 3.0258621313518433e-05, "loss": 1.8718, "step": 22970 }, { "epoch": 1.01, "learning_rate": 3.0237765211834105e-05, "loss": 1.8443, "step": 22980 }, { "epoch": 1.01, "learning_rate": 3.0216908995228152e-05, "loss": 1.8151, "step": 22990 }, { "epoch": 1.01, "learning_rate": 3.0196052673781224e-05, "loss": 1.778, "step": 23000 }, { "epoch": 1.01, "eval_loss": 1.8393231630325317, "eval_runtime": 12.9491, "eval_samples_per_second": 316.316, "eval_steps_per_second": 19.77, "step": 23000 }, { "epoch": 1.01, "learning_rate": 3.0175196257574026e-05, "loss": 1.8567, "step": 23010 }, { "epoch": 1.01, "learning_rate": 3.015433975668733e-05, "loss": 1.8481, "step": 23020 }, { "epoch": 1.01, "learning_rate": 3.0133483181201915e-05, "loss": 1.8392, "step": 23030 }, { "epoch": 1.01, "learning_rate": 3.011262654119864e-05, "loss": 1.8451, "step": 23040 }, { "epoch": 1.01, "learning_rate": 3.0091769846758353e-05, "loss": 1.8074, "step": 23050 }, { "epoch": 1.01, "learning_rate": 3.0070913107961955e-05, "loss": 1.8228, "step": 23060 }, { "epoch": 1.01, "learning_rate": 3.0050056334890354e-05, "loss": 1.8126, "step": 23070 }, { "epoch": 1.01, "learning_rate": 3.002919953762448e-05, "loss": 1.816, "step": 23080 }, { "epoch": 1.01, "learning_rate": 3.0008342726245283e-05, "loss": 1.8528, "step": 23090 }, { "epoch": 1.01, "learning_rate": 2.9987485910833704e-05, "loss": 1.8528, "step": 23100 }, { "epoch": 1.01, "eval_loss": 1.8387082815170288, "eval_runtime": 11.526, "eval_samples_per_second": 355.37, "eval_steps_per_second": 22.211, "step": 23100 }, { "epoch": 1.01, "learning_rate": 2.9966629101470693e-05, "loss": 1.8318, "step": 23110 }, { "epoch": 1.01, "learning_rate": 2.99457723082372e-05, "loss": 1.7999, "step": 23120 }, { "epoch": 1.01, "learning_rate": 2.9924915541214165e-05, "loss": 1.8028, "step": 23130 }, { "epoch": 1.01, "learning_rate": 2.9904058810482516e-05, "loss": 1.82, "step": 23140 }, { "epoch": 1.01, "learning_rate": 2.9883202126123156e-05, "loss": 1.827, "step": 23150 }, { "epoch": 1.01, "learning_rate": 2.9862345498216987e-05, "loss": 1.8541, "step": 23160 }, { "epoch": 1.01, "learning_rate": 2.9841488936844853e-05, "loss": 1.8234, "step": 23170 }, { "epoch": 1.01, "learning_rate": 2.9820632452087598e-05, "loss": 1.8179, "step": 23180 }, { "epoch": 1.02, "learning_rate": 2.9799776054025987e-05, "loss": 1.7522, "step": 23190 }, { "epoch": 1.02, "learning_rate": 2.9778919752740805e-05, "loss": 1.8131, "step": 23200 }, { "epoch": 1.02, "eval_loss": 1.8382573127746582, "eval_runtime": 11.5966, "eval_samples_per_second": 353.208, "eval_steps_per_second": 22.075, "step": 23200 }, { "epoch": 1.02, "learning_rate": 2.9758063558312715e-05, "loss": 1.8818, "step": 23210 }, { "epoch": 1.02, "learning_rate": 2.9737207480822403e-05, "loss": 1.8894, "step": 23220 }, { "epoch": 1.02, "learning_rate": 2.9716351530350437e-05, "loss": 1.8496, "step": 23230 }, { "epoch": 1.02, "learning_rate": 2.9695495716977364e-05, "loss": 1.8393, "step": 23240 }, { "epoch": 1.02, "learning_rate": 2.9674640050783645e-05, "loss": 1.8029, "step": 23250 }, { "epoch": 1.02, "learning_rate": 2.9653784541849667e-05, "loss": 1.836, "step": 23260 }, { "epoch": 1.02, "learning_rate": 2.9632929200255768e-05, "loss": 1.8207, "step": 23270 }, { "epoch": 1.02, "learning_rate": 2.961207403608217e-05, "loss": 1.8362, "step": 23280 }, { "epoch": 1.02, "learning_rate": 2.9591219059409042e-05, "loss": 1.845, "step": 23290 }, { "epoch": 1.02, "learning_rate": 2.957036428031641e-05, "loss": 1.841, "step": 23300 }, { "epoch": 1.02, "eval_loss": 1.8381986618041992, "eval_runtime": 11.5438, "eval_samples_per_second": 354.822, "eval_steps_per_second": 22.176, "step": 23300 }, { "epoch": 1.02, "learning_rate": 2.9549509708884293e-05, "loss": 1.7825, "step": 23310 }, { "epoch": 1.02, "learning_rate": 2.9528655355192507e-05, "loss": 1.8415, "step": 23320 }, { "epoch": 1.02, "learning_rate": 2.9507801229320857e-05, "loss": 1.8185, "step": 23330 }, { "epoch": 1.02, "learning_rate": 2.9486947341348954e-05, "loss": 1.7948, "step": 23340 }, { "epoch": 1.02, "learning_rate": 2.9466093701356362e-05, "loss": 1.8576, "step": 23350 }, { "epoch": 1.02, "learning_rate": 2.9445240319422474e-05, "loss": 1.8236, "step": 23360 }, { "epoch": 1.02, "learning_rate": 2.9424387205626606e-05, "loss": 1.821, "step": 23370 }, { "epoch": 1.02, "learning_rate": 2.9403534370047895e-05, "loss": 1.8555, "step": 23380 }, { "epoch": 1.02, "learning_rate": 2.938268182276539e-05, "loss": 1.8439, "step": 23390 }, { "epoch": 1.02, "learning_rate": 2.9361829573857957e-05, "loss": 1.887, "step": 23400 }, { "epoch": 1.02, "eval_loss": 1.8380987644195557, "eval_runtime": 11.4898, "eval_samples_per_second": 356.489, "eval_steps_per_second": 22.281, "step": 23400 }, { "epoch": 1.02, "learning_rate": 2.934097763340436e-05, "loss": 1.8797, "step": 23410 }, { "epoch": 1.03, "learning_rate": 2.9320126011483177e-05, "loss": 1.9105, "step": 23420 }, { "epoch": 1.03, "learning_rate": 2.929927471817286e-05, "loss": 1.8606, "step": 23430 }, { "epoch": 1.03, "learning_rate": 2.9278423763551684e-05, "loss": 1.8446, "step": 23440 }, { "epoch": 1.03, "learning_rate": 2.9257573157697777e-05, "loss": 1.8375, "step": 23450 }, { "epoch": 1.03, "learning_rate": 2.923672291068906e-05, "loss": 1.8811, "step": 23460 }, { "epoch": 1.03, "learning_rate": 2.9215873032603347e-05, "loss": 1.812, "step": 23470 }, { "epoch": 1.03, "learning_rate": 2.9195023533518214e-05, "loss": 1.8409, "step": 23480 }, { "epoch": 1.03, "learning_rate": 2.917417442351107e-05, "loss": 1.8653, "step": 23490 }, { "epoch": 1.03, "learning_rate": 2.9153325712659155e-05, "loss": 1.8448, "step": 23500 }, { "epoch": 1.03, "eval_loss": 1.8378263711929321, "eval_runtime": 11.9067, "eval_samples_per_second": 344.008, "eval_steps_per_second": 21.501, "step": 23500 }, { "epoch": 1.03, "learning_rate": 2.9132477411039492e-05, "loss": 1.753, "step": 23510 }, { "epoch": 1.03, "learning_rate": 2.9111629528728927e-05, "loss": 1.816, "step": 23520 }, { "epoch": 1.03, "learning_rate": 2.9090782075804082e-05, "loss": 1.8389, "step": 23530 }, { "epoch": 1.03, "learning_rate": 2.906993506234139e-05, "loss": 1.7968, "step": 23540 }, { "epoch": 1.03, "learning_rate": 2.904908849841706e-05, "loss": 1.8927, "step": 23550 }, { "epoch": 1.03, "learning_rate": 2.9028242394107096e-05, "loss": 1.8586, "step": 23560 }, { "epoch": 1.03, "learning_rate": 2.900739675948725e-05, "loss": 1.8503, "step": 23570 }, { "epoch": 1.03, "learning_rate": 2.8986551604633098e-05, "loss": 1.8244, "step": 23580 }, { "epoch": 1.03, "learning_rate": 2.8965706939619926e-05, "loss": 1.8273, "step": 23590 }, { "epoch": 1.03, "learning_rate": 2.8944862774522838e-05, "loss": 1.8311, "step": 23600 }, { "epoch": 1.03, "eval_loss": 1.837775707244873, "eval_runtime": 11.4396, "eval_samples_per_second": 358.055, "eval_steps_per_second": 22.378, "step": 23600 }, { "epoch": 1.03, "learning_rate": 2.8924019119416648e-05, "loss": 1.7796, "step": 23610 }, { "epoch": 1.03, "learning_rate": 2.890317598437596e-05, "loss": 1.8699, "step": 23620 }, { "epoch": 1.03, "learning_rate": 2.88823333794751e-05, "loss": 1.8105, "step": 23630 }, { "epoch": 1.03, "learning_rate": 2.8861491314788167e-05, "loss": 1.8384, "step": 23640 }, { "epoch": 1.04, "learning_rate": 2.884064980038896e-05, "loss": 1.8276, "step": 23650 }, { "epoch": 1.04, "learning_rate": 2.881980884635106e-05, "loss": 1.8022, "step": 23660 }, { "epoch": 1.04, "learning_rate": 2.8798968462747725e-05, "loss": 1.7894, "step": 23670 }, { "epoch": 1.04, "learning_rate": 2.8778128659651985e-05, "loss": 1.842, "step": 23680 }, { "epoch": 1.04, "learning_rate": 2.875728944713655e-05, "loss": 1.8128, "step": 23690 }, { "epoch": 1.04, "learning_rate": 2.873645083527388e-05, "loss": 1.8564, "step": 23700 }, { "epoch": 1.04, "eval_loss": 1.8370027542114258, "eval_runtime": 13.2784, "eval_samples_per_second": 308.47, "eval_steps_per_second": 19.279, "step": 23700 }, { "epoch": 1.04, "learning_rate": 2.8715612834136125e-05, "loss": 1.8202, "step": 23710 }, { "epoch": 1.04, "learning_rate": 2.8694775453795117e-05, "loss": 1.7821, "step": 23720 }, { "epoch": 1.04, "learning_rate": 2.867393870432244e-05, "loss": 1.8326, "step": 23730 }, { "epoch": 1.04, "learning_rate": 2.865310259578933e-05, "loss": 1.8237, "step": 23740 }, { "epoch": 1.04, "learning_rate": 2.863226713826674e-05, "loss": 1.8495, "step": 23750 }, { "epoch": 1.04, "learning_rate": 2.8611432341825292e-05, "loss": 1.8296, "step": 23760 }, { "epoch": 1.04, "learning_rate": 2.8590598216535298e-05, "loss": 1.8261, "step": 23770 }, { "epoch": 1.04, "learning_rate": 2.8569764772466733e-05, "loss": 1.834, "step": 23780 }, { "epoch": 1.04, "learning_rate": 2.854893201968926e-05, "loss": 1.8588, "step": 23790 }, { "epoch": 1.04, "learning_rate": 2.8528099968272194e-05, "loss": 1.8609, "step": 23800 }, { "epoch": 1.04, "eval_loss": 1.837214708328247, "eval_runtime": 11.7272, "eval_samples_per_second": 349.274, "eval_steps_per_second": 21.83, "step": 23800 }, { "epoch": 1.04, "learning_rate": 2.850726862828452e-05, "loss": 1.8529, "step": 23810 }, { "epoch": 1.04, "learning_rate": 2.8486438009794868e-05, "loss": 1.7861, "step": 23820 }, { "epoch": 1.04, "learning_rate": 2.8465608122871547e-05, "loss": 1.8063, "step": 23830 }, { "epoch": 1.04, "learning_rate": 2.8444778977582455e-05, "loss": 1.8232, "step": 23840 }, { "epoch": 1.04, "learning_rate": 2.842395058399521e-05, "loss": 1.8102, "step": 23850 }, { "epoch": 1.04, "learning_rate": 2.840312295217699e-05, "loss": 1.8094, "step": 23860 }, { "epoch": 1.04, "learning_rate": 2.8382296092194664e-05, "loss": 1.831, "step": 23870 }, { "epoch": 1.05, "learning_rate": 2.836147001411469e-05, "loss": 1.8275, "step": 23880 }, { "epoch": 1.05, "learning_rate": 2.8340644728003173e-05, "loss": 1.8325, "step": 23890 }, { "epoch": 1.05, "learning_rate": 2.8319820243925808e-05, "loss": 1.8287, "step": 23900 }, { "epoch": 1.05, "eval_loss": 1.8369483947753906, "eval_runtime": 11.5483, "eval_samples_per_second": 354.685, "eval_steps_per_second": 22.168, "step": 23900 }, { "epoch": 1.05, "learning_rate": 2.829899657194794e-05, "loss": 1.8468, "step": 23910 }, { "epoch": 1.05, "learning_rate": 2.827817372213448e-05, "loss": 1.8562, "step": 23920 }, { "epoch": 1.05, "learning_rate": 2.8257351704549975e-05, "loss": 1.8662, "step": 23930 }, { "epoch": 1.05, "learning_rate": 2.8236530529258547e-05, "loss": 1.8251, "step": 23940 }, { "epoch": 1.05, "learning_rate": 2.821571020632393e-05, "loss": 1.805, "step": 23950 }, { "epoch": 1.05, "learning_rate": 2.8194890745809443e-05, "loss": 1.802, "step": 23960 }, { "epoch": 1.05, "learning_rate": 2.817407215777795e-05, "loss": 1.7749, "step": 23970 }, { "epoch": 1.05, "learning_rate": 2.8153254452291956e-05, "loss": 1.7909, "step": 23980 }, { "epoch": 1.05, "learning_rate": 2.8132437639413486e-05, "loss": 1.7694, "step": 23990 }, { "epoch": 1.05, "learning_rate": 2.8111621729204177e-05, "loss": 1.8344, "step": 24000 }, { "epoch": 1.05, "eval_loss": 1.8369200229644775, "eval_runtime": 11.8804, "eval_samples_per_second": 344.77, "eval_steps_per_second": 21.548, "step": 24000 }, { "epoch": 1.05, "learning_rate": 2.809080673172519e-05, "loss": 1.7981, "step": 24010 }, { "epoch": 1.05, "learning_rate": 2.8069992657037273e-05, "loss": 1.8076, "step": 24020 }, { "epoch": 1.05, "learning_rate": 2.804917951520071e-05, "loss": 1.794, "step": 24030 }, { "epoch": 1.05, "learning_rate": 2.8028367316275357e-05, "loss": 1.8598, "step": 24040 }, { "epoch": 1.05, "learning_rate": 2.8007556070320583e-05, "loss": 1.8616, "step": 24050 }, { "epoch": 1.05, "learning_rate": 2.7986745787395333e-05, "loss": 1.8413, "step": 24060 }, { "epoch": 1.05, "learning_rate": 2.796593647755804e-05, "loss": 1.8168, "step": 24070 }, { "epoch": 1.05, "learning_rate": 2.794512815086673e-05, "loss": 1.7945, "step": 24080 }, { "epoch": 1.05, "learning_rate": 2.792432081737887e-05, "loss": 1.8828, "step": 24090 }, { "epoch": 1.05, "learning_rate": 2.7903514487151538e-05, "loss": 1.8317, "step": 24100 }, { "epoch": 1.05, "eval_loss": 1.8366378545761108, "eval_runtime": 12.7159, "eval_samples_per_second": 322.117, "eval_steps_per_second": 20.132, "step": 24100 }, { "epoch": 1.06, "learning_rate": 2.7882709170241247e-05, "loss": 1.8135, "step": 24110 }, { "epoch": 1.06, "learning_rate": 2.7861904876704087e-05, "loss": 1.8198, "step": 24120 }, { "epoch": 1.06, "learning_rate": 2.7841101616595597e-05, "loss": 1.8092, "step": 24130 }, { "epoch": 1.06, "learning_rate": 2.7820299399970864e-05, "loss": 1.8312, "step": 24140 }, { "epoch": 1.06, "learning_rate": 2.779949823688443e-05, "loss": 1.8219, "step": 24150 }, { "epoch": 1.06, "learning_rate": 2.777869813739036e-05, "loss": 1.8105, "step": 24160 }, { "epoch": 1.06, "learning_rate": 2.775789911154218e-05, "loss": 1.864, "step": 24170 }, { "epoch": 1.06, "learning_rate": 2.7737101169392924e-05, "loss": 1.7589, "step": 24180 }, { "epoch": 1.06, "learning_rate": 2.7716304320995084e-05, "loss": 1.8094, "step": 24190 }, { "epoch": 1.06, "learning_rate": 2.7695508576400596e-05, "loss": 1.8204, "step": 24200 }, { "epoch": 1.06, "eval_loss": 1.836849570274353, "eval_runtime": 11.6165, "eval_samples_per_second": 352.601, "eval_steps_per_second": 22.038, "step": 24200 }, { "epoch": 1.06, "learning_rate": 2.767471394566094e-05, "loss": 1.8652, "step": 24210 }, { "epoch": 1.06, "learning_rate": 2.7653920438826964e-05, "loss": 1.8454, "step": 24220 }, { "epoch": 1.06, "learning_rate": 2.763312806594906e-05, "loss": 1.7737, "step": 24230 }, { "epoch": 1.06, "learning_rate": 2.7612336837077e-05, "loss": 1.7854, "step": 24240 }, { "epoch": 1.06, "learning_rate": 2.7591546762260058e-05, "loss": 1.8266, "step": 24250 }, { "epoch": 1.06, "learning_rate": 2.7570757851546904e-05, "loss": 1.7916, "step": 24260 }, { "epoch": 1.06, "learning_rate": 2.754997011498568e-05, "loss": 1.8398, "step": 24270 }, { "epoch": 1.06, "learning_rate": 2.7529183562623943e-05, "loss": 1.8321, "step": 24280 }, { "epoch": 1.06, "learning_rate": 2.7508398204508692e-05, "loss": 1.8409, "step": 24290 }, { "epoch": 1.06, "learning_rate": 2.7487614050686325e-05, "loss": 1.8149, "step": 24300 }, { "epoch": 1.06, "eval_loss": 1.836502194404602, "eval_runtime": 11.6567, "eval_samples_per_second": 351.386, "eval_steps_per_second": 21.962, "step": 24300 }, { "epoch": 1.06, "learning_rate": 2.746683111120269e-05, "loss": 1.8265, "step": 24310 }, { "epoch": 1.06, "learning_rate": 2.744604939610301e-05, "loss": 1.8582, "step": 24320 }, { "epoch": 1.07, "learning_rate": 2.7425268915431954e-05, "loss": 1.8372, "step": 24330 }, { "epoch": 1.07, "learning_rate": 2.740448967923356e-05, "loss": 1.8785, "step": 24340 }, { "epoch": 1.07, "learning_rate": 2.7383711697551307e-05, "loss": 1.8081, "step": 24350 }, { "epoch": 1.07, "learning_rate": 2.7362934980428004e-05, "loss": 1.8398, "step": 24360 }, { "epoch": 1.07, "learning_rate": 2.7342159537905934e-05, "loss": 1.8358, "step": 24370 }, { "epoch": 1.07, "learning_rate": 2.7321385380026666e-05, "loss": 1.8095, "step": 24380 }, { "epoch": 1.07, "learning_rate": 2.7300612516831248e-05, "loss": 1.8128, "step": 24390 }, { "epoch": 1.07, "learning_rate": 2.727984095836001e-05, "loss": 1.8152, "step": 24400 }, { "epoch": 1.07, "eval_loss": 1.8360520601272583, "eval_runtime": 13.1567, "eval_samples_per_second": 311.323, "eval_steps_per_second": 19.458, "step": 24400 }, { "epoch": 1.07, "learning_rate": 2.7259070714652725e-05, "loss": 1.797, "step": 24410 }, { "epoch": 1.07, "learning_rate": 2.723830179574848e-05, "loss": 1.8195, "step": 24420 }, { "epoch": 1.07, "learning_rate": 2.721753421168576e-05, "loss": 1.8132, "step": 24430 }, { "epoch": 1.07, "learning_rate": 2.7196767972502376e-05, "loss": 1.8053, "step": 24440 }, { "epoch": 1.07, "learning_rate": 2.717600308823549e-05, "loss": 1.8321, "step": 24450 }, { "epoch": 1.07, "learning_rate": 2.7155239568921643e-05, "loss": 1.8653, "step": 24460 }, { "epoch": 1.07, "learning_rate": 2.7134477424596657e-05, "loss": 1.8642, "step": 24470 }, { "epoch": 1.07, "learning_rate": 2.711371666529577e-05, "loss": 1.8297, "step": 24480 }, { "epoch": 1.07, "learning_rate": 2.7092957301053448e-05, "loss": 1.8048, "step": 24490 }, { "epoch": 1.07, "learning_rate": 2.70721993419036e-05, "loss": 1.8603, "step": 24500 }, { "epoch": 1.07, "eval_loss": 1.8359553813934326, "eval_runtime": 12.136, "eval_samples_per_second": 337.508, "eval_steps_per_second": 21.094, "step": 24500 }, { "epoch": 1.07, "learning_rate": 2.705144279787935e-05, "loss": 1.8392, "step": 24510 }, { "epoch": 1.07, "learning_rate": 2.70306876790132e-05, "loss": 1.8608, "step": 24520 }, { "epoch": 1.07, "learning_rate": 2.7009933995336948e-05, "loss": 1.835, "step": 24530 }, { "epoch": 1.07, "learning_rate": 2.6989181756881696e-05, "loss": 1.8186, "step": 24540 }, { "epoch": 1.07, "learning_rate": 2.6968430973677844e-05, "loss": 1.8125, "step": 24550 }, { "epoch": 1.08, "learning_rate": 2.6947681655755097e-05, "loss": 1.8156, "step": 24560 }, { "epoch": 1.08, "learning_rate": 2.6926933813142446e-05, "loss": 1.824, "step": 24570 }, { "epoch": 1.08, "learning_rate": 2.690618745586817e-05, "loss": 1.8335, "step": 24580 }, { "epoch": 1.08, "learning_rate": 2.6885442593959828e-05, "loss": 1.7904, "step": 24590 }, { "epoch": 1.08, "learning_rate": 2.686469923744427e-05, "loss": 1.8317, "step": 24600 }, { "epoch": 1.08, "eval_loss": 1.8358409404754639, "eval_runtime": 11.6144, "eval_samples_per_second": 352.667, "eval_steps_per_second": 22.042, "step": 24600 }, { "epoch": 1.08, "learning_rate": 2.6843957396347592e-05, "loss": 1.7796, "step": 24610 }, { "epoch": 1.08, "learning_rate": 2.6823217080695197e-05, "loss": 1.8364, "step": 24620 }, { "epoch": 1.08, "learning_rate": 2.6802478300511687e-05, "loss": 1.8251, "step": 24630 }, { "epoch": 1.08, "learning_rate": 2.6781741065821003e-05, "loss": 1.8535, "step": 24640 }, { "epoch": 1.08, "learning_rate": 2.676100538664626e-05, "loss": 1.8141, "step": 24650 }, { "epoch": 1.08, "learning_rate": 2.6740271273009896e-05, "loss": 1.8181, "step": 24660 }, { "epoch": 1.08, "learning_rate": 2.671953873493353e-05, "loss": 1.8209, "step": 24670 }, { "epoch": 1.08, "learning_rate": 2.669880778243804e-05, "loss": 1.7613, "step": 24680 }, { "epoch": 1.08, "learning_rate": 2.6678078425543563e-05, "loss": 1.83, "step": 24690 }, { "epoch": 1.08, "learning_rate": 2.6657350674269424e-05, "loss": 1.8316, "step": 24700 }, { "epoch": 1.08, "eval_loss": 1.835756778717041, "eval_runtime": 11.6376, "eval_samples_per_second": 351.961, "eval_steps_per_second": 21.998, "step": 24700 }, { "epoch": 1.08, "learning_rate": 2.6636624538634208e-05, "loss": 1.8301, "step": 24710 }, { "epoch": 1.08, "learning_rate": 2.6615900028655687e-05, "loss": 1.8184, "step": 24720 }, { "epoch": 1.08, "learning_rate": 2.6595177154350883e-05, "loss": 1.8037, "step": 24730 }, { "epoch": 1.08, "learning_rate": 2.6574455925735977e-05, "loss": 1.8436, "step": 24740 }, { "epoch": 1.08, "learning_rate": 2.6553736352826426e-05, "loss": 1.8369, "step": 24750 }, { "epoch": 1.08, "learning_rate": 2.6533018445636797e-05, "loss": 1.8328, "step": 24760 }, { "epoch": 1.08, "learning_rate": 2.651230221418095e-05, "loss": 1.8164, "step": 24770 }, { "epoch": 1.08, "learning_rate": 2.6491587668471846e-05, "loss": 1.8137, "step": 24780 }, { "epoch": 1.09, "learning_rate": 2.6470874818521696e-05, "loss": 1.8131, "step": 24790 }, { "epoch": 1.09, "learning_rate": 2.6450163674341847e-05, "loss": 1.8169, "step": 24800 }, { "epoch": 1.09, "eval_loss": 1.8353500366210938, "eval_runtime": 11.5666, "eval_samples_per_second": 354.122, "eval_steps_per_second": 22.133, "step": 24800 }, { "epoch": 1.09, "learning_rate": 2.6429454245942863e-05, "loss": 1.8232, "step": 24810 }, { "epoch": 1.09, "learning_rate": 2.640874654333443e-05, "loss": 1.7629, "step": 24820 }, { "epoch": 1.09, "learning_rate": 2.6388040576525452e-05, "loss": 1.8425, "step": 24830 }, { "epoch": 1.09, "learning_rate": 2.636733635552395e-05, "loss": 1.8757, "step": 24840 }, { "epoch": 1.09, "learning_rate": 2.634663389033713e-05, "loss": 1.8382, "step": 24850 }, { "epoch": 1.09, "learning_rate": 2.632593319097133e-05, "loss": 1.8147, "step": 24860 }, { "epoch": 1.09, "learning_rate": 2.6305234267432055e-05, "loss": 1.8169, "step": 24870 }, { "epoch": 1.09, "learning_rate": 2.6284537129723926e-05, "loss": 1.8293, "step": 24880 }, { "epoch": 1.09, "learning_rate": 2.6263841787850735e-05, "loss": 1.8217, "step": 24890 }, { "epoch": 1.09, "learning_rate": 2.6243148251815364e-05, "loss": 1.8505, "step": 24900 }, { "epoch": 1.09, "eval_loss": 1.8351911306381226, "eval_runtime": 13.3476, "eval_samples_per_second": 306.872, "eval_steps_per_second": 19.179, "step": 24900 }, { "epoch": 1.09, "learning_rate": 2.6222456531619848e-05, "loss": 1.8278, "step": 24910 }, { "epoch": 1.09, "learning_rate": 2.6201766637265354e-05, "loss": 1.8546, "step": 24920 }, { "epoch": 1.09, "learning_rate": 2.6181078578752133e-05, "loss": 1.8576, "step": 24930 }, { "epoch": 1.09, "learning_rate": 2.6160392366079592e-05, "loss": 1.8243, "step": 24940 }, { "epoch": 1.09, "learning_rate": 2.6139708009246196e-05, "loss": 1.7829, "step": 24950 }, { "epoch": 1.09, "learning_rate": 2.611902551824956e-05, "loss": 1.845, "step": 24960 }, { "epoch": 1.09, "learning_rate": 2.609834490308637e-05, "loss": 1.8217, "step": 24970 }, { "epoch": 1.09, "learning_rate": 2.607766617375241e-05, "loss": 1.8471, "step": 24980 }, { "epoch": 1.09, "learning_rate": 2.6056989340242555e-05, "loss": 1.8651, "step": 24990 }, { "epoch": 1.09, "learning_rate": 2.6036314412550773e-05, "loss": 1.795, "step": 25000 }, { "epoch": 1.09, "eval_loss": 1.8348760604858398, "eval_runtime": 11.8701, "eval_samples_per_second": 345.068, "eval_steps_per_second": 21.567, "step": 25000 }, { "epoch": 1.09, "learning_rate": 2.601564140067007e-05, "loss": 1.7818, "step": 25010 }, { "epoch": 1.1, "learning_rate": 2.5994970314592603e-05, "loss": 1.785, "step": 25020 }, { "epoch": 1.1, "learning_rate": 2.5974301164309514e-05, "loss": 1.8312, "step": 25030 }, { "epoch": 1.1, "learning_rate": 2.595363395981106e-05, "loss": 1.8278, "step": 25040 }, { "epoch": 1.1, "learning_rate": 2.593296871108654e-05, "loss": 1.8119, "step": 25050 }, { "epoch": 1.1, "learning_rate": 2.591230542812433e-05, "loss": 1.8369, "step": 25060 }, { "epoch": 1.1, "learning_rate": 2.5891644120911817e-05, "loss": 1.8243, "step": 25070 }, { "epoch": 1.1, "learning_rate": 2.5870984799435472e-05, "loss": 1.8056, "step": 25080 }, { "epoch": 1.1, "learning_rate": 2.585032747368077e-05, "loss": 1.8389, "step": 25090 }, { "epoch": 1.1, "learning_rate": 2.582967215363226e-05, "loss": 1.8413, "step": 25100 }, { "epoch": 1.1, "eval_loss": 1.8349188566207886, "eval_runtime": 12.9593, "eval_samples_per_second": 316.066, "eval_steps_per_second": 19.754, "step": 25100 }, { "epoch": 1.1, "learning_rate": 2.5809018849273484e-05, "loss": 1.8708, "step": 25110 }, { "epoch": 1.1, "learning_rate": 2.5788367570587047e-05, "loss": 1.8269, "step": 25120 }, { "epoch": 1.1, "learning_rate": 2.5767718327554532e-05, "loss": 1.7818, "step": 25130 }, { "epoch": 1.1, "learning_rate": 2.5747071130156587e-05, "loss": 1.8257, "step": 25140 }, { "epoch": 1.1, "learning_rate": 2.5726425988372825e-05, "loss": 1.8555, "step": 25150 }, { "epoch": 1.1, "learning_rate": 2.5705782912181888e-05, "loss": 1.8056, "step": 25160 }, { "epoch": 1.1, "learning_rate": 2.568514191156143e-05, "loss": 1.8413, "step": 25170 }, { "epoch": 1.1, "learning_rate": 2.5664502996488072e-05, "loss": 1.8206, "step": 25180 }, { "epoch": 1.1, "learning_rate": 2.5643866176937462e-05, "loss": 1.868, "step": 25190 }, { "epoch": 1.1, "learning_rate": 2.5623231462884204e-05, "loss": 1.8046, "step": 25200 }, { "epoch": 1.1, "eval_loss": 1.8350028991699219, "eval_runtime": 12.8788, "eval_samples_per_second": 318.041, "eval_steps_per_second": 19.878, "step": 25200 }, { "epoch": 1.1, "learning_rate": 2.5602598864301912e-05, "loss": 1.8307, "step": 25210 }, { "epoch": 1.1, "learning_rate": 2.5581968391163145e-05, "loss": 1.7947, "step": 25220 }, { "epoch": 1.1, "learning_rate": 2.5561340053439473e-05, "loss": 1.8254, "step": 25230 }, { "epoch": 1.1, "learning_rate": 2.5540713861101395e-05, "loss": 1.8283, "step": 25240 }, { "epoch": 1.11, "learning_rate": 2.5520089824118422e-05, "loss": 1.8242, "step": 25250 }, { "epoch": 1.11, "learning_rate": 2.5499467952458946e-05, "loss": 1.8352, "step": 25260 }, { "epoch": 1.11, "learning_rate": 2.547884825609041e-05, "loss": 1.8657, "step": 25270 }, { "epoch": 1.11, "learning_rate": 2.545823074497911e-05, "loss": 1.8535, "step": 25280 }, { "epoch": 1.11, "learning_rate": 2.5437615429090373e-05, "loss": 1.8455, "step": 25290 }, { "epoch": 1.11, "learning_rate": 2.541700231838839e-05, "loss": 1.8007, "step": 25300 }, { "epoch": 1.11, "eval_loss": 1.8347127437591553, "eval_runtime": 12.1617, "eval_samples_per_second": 336.796, "eval_steps_per_second": 21.05, "step": 25300 }, { "epoch": 1.11, "learning_rate": 2.5396391422836335e-05, "loss": 1.8574, "step": 25310 }, { "epoch": 1.11, "learning_rate": 2.5375782752396286e-05, "loss": 1.8717, "step": 25320 }, { "epoch": 1.11, "learning_rate": 2.535517631702927e-05, "loss": 1.8575, "step": 25330 }, { "epoch": 1.11, "learning_rate": 2.5334572126695204e-05, "loss": 1.8246, "step": 25340 }, { "epoch": 1.11, "learning_rate": 2.5313970191352945e-05, "loss": 1.775, "step": 25350 }, { "epoch": 1.11, "learning_rate": 2.529337052096024e-05, "loss": 1.834, "step": 25360 }, { "epoch": 1.11, "learning_rate": 2.527277312547376e-05, "loss": 1.8294, "step": 25370 }, { "epoch": 1.11, "learning_rate": 2.5252178014849072e-05, "loss": 1.785, "step": 25380 }, { "epoch": 1.11, "learning_rate": 2.523158519904061e-05, "loss": 1.7699, "step": 25390 }, { "epoch": 1.11, "learning_rate": 2.521099468800175e-05, "loss": 1.8178, "step": 25400 }, { "epoch": 1.11, "eval_loss": 1.8346803188323975, "eval_runtime": 11.7822, "eval_samples_per_second": 347.642, "eval_steps_per_second": 21.728, "step": 25400 }, { "epoch": 1.11, "learning_rate": 2.5190406491684708e-05, "loss": 1.8216, "step": 25410 }, { "epoch": 1.11, "learning_rate": 2.5169820620040608e-05, "loss": 1.8559, "step": 25420 }, { "epoch": 1.11, "learning_rate": 2.5149237083019438e-05, "loss": 1.8424, "step": 25430 }, { "epoch": 1.11, "learning_rate": 2.5128655890570067e-05, "loss": 1.8335, "step": 25440 }, { "epoch": 1.11, "learning_rate": 2.5108077052640223e-05, "loss": 1.8141, "step": 25450 }, { "epoch": 1.11, "learning_rate": 2.5087500579176504e-05, "loss": 1.851, "step": 25460 }, { "epoch": 1.11, "learning_rate": 2.506692648012435e-05, "loss": 1.9212, "step": 25470 }, { "epoch": 1.12, "learning_rate": 2.5046354765428074e-05, "loss": 1.845, "step": 25480 }, { "epoch": 1.12, "learning_rate": 2.502578544503081e-05, "loss": 1.8246, "step": 25490 }, { "epoch": 1.12, "learning_rate": 2.5005218528874574e-05, "loss": 1.8055, "step": 25500 }, { "epoch": 1.12, "eval_loss": 1.834632158279419, "eval_runtime": 11.8751, "eval_samples_per_second": 344.925, "eval_steps_per_second": 21.558, "step": 25500 }, { "epoch": 1.12, "learning_rate": 2.4984654026900173e-05, "loss": 1.8475, "step": 25510 }, { "epoch": 1.12, "learning_rate": 2.4964091949047294e-05, "loss": 1.8136, "step": 25520 }, { "epoch": 1.12, "learning_rate": 2.49435323052544e-05, "loss": 1.7852, "step": 25530 }, { "epoch": 1.12, "learning_rate": 2.4922975105458838e-05, "loss": 1.7962, "step": 25540 }, { "epoch": 1.12, "learning_rate": 2.4902420359596706e-05, "loss": 1.8233, "step": 25550 }, { "epoch": 1.12, "learning_rate": 2.4881868077602996e-05, "loss": 1.8193, "step": 25560 }, { "epoch": 1.12, "learning_rate": 2.4861318269411427e-05, "loss": 1.8325, "step": 25570 }, { "epoch": 1.12, "learning_rate": 2.484077094495458e-05, "loss": 1.8399, "step": 25580 }, { "epoch": 1.12, "learning_rate": 2.4820226114163803e-05, "loss": 1.7936, "step": 25590 }, { "epoch": 1.12, "learning_rate": 2.4799683786969268e-05, "loss": 1.7853, "step": 25600 }, { "epoch": 1.12, "eval_loss": 1.834519863128662, "eval_runtime": 11.6394, "eval_samples_per_second": 351.907, "eval_steps_per_second": 21.994, "step": 25600 }, { "epoch": 1.12, "learning_rate": 2.477914397329991e-05, "loss": 1.8067, "step": 25610 }, { "epoch": 1.12, "learning_rate": 2.4758606683083456e-05, "loss": 1.8196, "step": 25620 }, { "epoch": 1.12, "learning_rate": 2.4738071926246435e-05, "loss": 1.7976, "step": 25630 }, { "epoch": 1.12, "learning_rate": 2.4717539712714103e-05, "loss": 1.8813, "step": 25640 }, { "epoch": 1.12, "learning_rate": 2.4697010052410555e-05, "loss": 1.7963, "step": 25650 }, { "epoch": 1.12, "learning_rate": 2.4676482955258578e-05, "loss": 1.8388, "step": 25660 }, { "epoch": 1.12, "learning_rate": 2.465595843117979e-05, "loss": 1.8194, "step": 25670 }, { "epoch": 1.12, "learning_rate": 2.46354364900945e-05, "loss": 1.83, "step": 25680 }, { "epoch": 1.12, "learning_rate": 2.461491714192182e-05, "loss": 1.8395, "step": 25690 }, { "epoch": 1.13, "learning_rate": 2.4594400396579582e-05, "loss": 1.8314, "step": 25700 }, { "epoch": 1.13, "eval_loss": 1.8341410160064697, "eval_runtime": 13.3901, "eval_samples_per_second": 305.899, "eval_steps_per_second": 19.119, "step": 25700 }, { "epoch": 1.13, "learning_rate": 2.457388626398437e-05, "loss": 1.8478, "step": 25710 }, { "epoch": 1.13, "learning_rate": 2.4553374754051494e-05, "loss": 1.8013, "step": 25720 }, { "epoch": 1.13, "learning_rate": 2.4532865876695022e-05, "loss": 1.8231, "step": 25730 }, { "epoch": 1.13, "learning_rate": 2.4512359641827707e-05, "loss": 1.8409, "step": 25740 }, { "epoch": 1.13, "learning_rate": 2.4491856059361073e-05, "loss": 1.8713, "step": 25750 }, { "epoch": 1.13, "learning_rate": 2.4471355139205323e-05, "loss": 1.8156, "step": 25760 }, { "epoch": 1.13, "learning_rate": 2.44508568912694e-05, "loss": 1.8367, "step": 25770 }, { "epoch": 1.13, "learning_rate": 2.4430361325460937e-05, "loss": 1.8107, "step": 25780 }, { "epoch": 1.13, "learning_rate": 2.440986845168629e-05, "loss": 1.7991, "step": 25790 }, { "epoch": 1.13, "learning_rate": 2.438937827985047e-05, "loss": 1.8464, "step": 25800 }, { "epoch": 1.13, "eval_loss": 1.834221601486206, "eval_runtime": 11.7029, "eval_samples_per_second": 349.999, "eval_steps_per_second": 21.875, "step": 25800 }, { "epoch": 1.13, "learning_rate": 2.4368890819857256e-05, "loss": 1.8099, "step": 25810 }, { "epoch": 1.13, "learning_rate": 2.4348406081609035e-05, "loss": 1.8311, "step": 25820 }, { "epoch": 1.13, "learning_rate": 2.432792407500695e-05, "loss": 1.8057, "step": 25830 }, { "epoch": 1.13, "learning_rate": 2.4307444809950767e-05, "loss": 1.8371, "step": 25840 }, { "epoch": 1.13, "learning_rate": 2.428696829633897e-05, "loss": 1.8405, "step": 25850 }, { "epoch": 1.13, "learning_rate": 2.4266494544068682e-05, "loss": 1.8629, "step": 25860 }, { "epoch": 1.13, "learning_rate": 2.4246023563035703e-05, "loss": 1.8636, "step": 25870 }, { "epoch": 1.13, "learning_rate": 2.4225555363134504e-05, "loss": 1.819, "step": 25880 }, { "epoch": 1.13, "learning_rate": 2.4205089954258187e-05, "loss": 1.8154, "step": 25890 }, { "epoch": 1.13, "learning_rate": 2.4184627346298544e-05, "loss": 1.8101, "step": 25900 }, { "epoch": 1.13, "eval_loss": 1.8341854810714722, "eval_runtime": 11.5933, "eval_samples_per_second": 353.308, "eval_steps_per_second": 22.082, "step": 25900 }, { "epoch": 1.13, "learning_rate": 2.4164167549145952e-05, "loss": 1.8222, "step": 25910 }, { "epoch": 1.13, "learning_rate": 2.4143710572689507e-05, "loss": 1.8402, "step": 25920 }, { "epoch": 1.14, "learning_rate": 2.4123256426816873e-05, "loss": 1.8229, "step": 25930 }, { "epoch": 1.14, "learning_rate": 2.4102805121414386e-05, "loss": 1.7893, "step": 25940 }, { "epoch": 1.14, "learning_rate": 2.4082356666366987e-05, "loss": 1.8193, "step": 25950 }, { "epoch": 1.14, "learning_rate": 2.4061911071558266e-05, "loss": 1.8274, "step": 25960 }, { "epoch": 1.14, "learning_rate": 2.4041468346870393e-05, "loss": 1.8479, "step": 25970 }, { "epoch": 1.14, "learning_rate": 2.4021028502184194e-05, "loss": 1.8438, "step": 25980 }, { "epoch": 1.14, "learning_rate": 2.4000591547379058e-05, "loss": 1.8017, "step": 25990 }, { "epoch": 1.14, "learning_rate": 2.398015749233302e-05, "loss": 1.8014, "step": 26000 }, { "epoch": 1.14, "eval_loss": 1.8340920209884644, "eval_runtime": 11.695, "eval_samples_per_second": 350.235, "eval_steps_per_second": 21.89, "step": 26000 }, { "epoch": 1.14, "learning_rate": 2.3959726346922674e-05, "loss": 1.7826, "step": 26010 }, { "epoch": 1.14, "learning_rate": 2.393929812102325e-05, "loss": 1.8292, "step": 26020 }, { "epoch": 1.14, "learning_rate": 2.391887282450852e-05, "loss": 1.8536, "step": 26030 }, { "epoch": 1.14, "learning_rate": 2.389845046725089e-05, "loss": 1.8431, "step": 26040 }, { "epoch": 1.14, "learning_rate": 2.3878031059121282e-05, "loss": 1.8513, "step": 26050 }, { "epoch": 1.14, "learning_rate": 2.3857614609989278e-05, "loss": 1.8321, "step": 26060 }, { "epoch": 1.14, "learning_rate": 2.383720112972293e-05, "loss": 1.8147, "step": 26070 }, { "epoch": 1.14, "learning_rate": 2.3816790628188953e-05, "loss": 1.7952, "step": 26080 }, { "epoch": 1.14, "learning_rate": 2.3796383115252546e-05, "loss": 1.8118, "step": 26090 }, { "epoch": 1.14, "learning_rate": 2.37759786007775e-05, "loss": 1.8081, "step": 26100 }, { "epoch": 1.14, "eval_loss": 1.833876371383667, "eval_runtime": 11.5373, "eval_samples_per_second": 355.021, "eval_steps_per_second": 22.189, "step": 26100 }, { "epoch": 1.14, "learning_rate": 2.3755577094626154e-05, "loss": 1.8162, "step": 26110 }, { "epoch": 1.14, "learning_rate": 2.3735178606659382e-05, "loss": 1.8302, "step": 26120 }, { "epoch": 1.14, "learning_rate": 2.3714783146736622e-05, "loss": 1.8093, "step": 26130 }, { "epoch": 1.14, "learning_rate": 2.369439072471581e-05, "loss": 1.8492, "step": 26140 }, { "epoch": 1.14, "learning_rate": 2.3674001350453468e-05, "loss": 1.8483, "step": 26150 }, { "epoch": 1.15, "learning_rate": 2.3653615033804564e-05, "loss": 1.8476, "step": 26160 }, { "epoch": 1.15, "learning_rate": 2.3633231784622693e-05, "loss": 1.8372, "step": 26170 }, { "epoch": 1.15, "learning_rate": 2.3612851612759865e-05, "loss": 1.8233, "step": 26180 }, { "epoch": 1.15, "learning_rate": 2.3592474528066693e-05, "loss": 1.7858, "step": 26190 }, { "epoch": 1.15, "learning_rate": 2.3572100540392215e-05, "loss": 1.8376, "step": 26200 }, { "epoch": 1.15, "eval_loss": 1.833698034286499, "eval_runtime": 11.6849, "eval_samples_per_second": 350.538, "eval_steps_per_second": 21.909, "step": 26200 }, { "epoch": 1.15, "learning_rate": 2.355172965958403e-05, "loss": 1.8245, "step": 26210 }, { "epoch": 1.15, "learning_rate": 2.353136189548821e-05, "loss": 1.842, "step": 26220 }, { "epoch": 1.15, "learning_rate": 2.3510997257949336e-05, "loss": 1.8888, "step": 26230 }, { "epoch": 1.15, "learning_rate": 2.3490635756810458e-05, "loss": 1.8482, "step": 26240 }, { "epoch": 1.15, "learning_rate": 2.3470277401913134e-05, "loss": 1.8517, "step": 26250 }, { "epoch": 1.15, "learning_rate": 2.3449922203097373e-05, "loss": 1.8199, "step": 26260 }, { "epoch": 1.15, "learning_rate": 2.342957017020169e-05, "loss": 1.824, "step": 26270 }, { "epoch": 1.15, "learning_rate": 2.3409221313063035e-05, "loss": 1.8576, "step": 26280 }, { "epoch": 1.15, "learning_rate": 2.3388875641516863e-05, "loss": 1.8216, "step": 26290 }, { "epoch": 1.15, "learning_rate": 2.3368533165397046e-05, "loss": 1.8346, "step": 26300 }, { "epoch": 1.15, "eval_loss": 1.8334472179412842, "eval_runtime": 11.5788, "eval_samples_per_second": 353.751, "eval_steps_per_second": 22.109, "step": 26300 }, { "epoch": 1.15, "learning_rate": 2.3348193894535956e-05, "loss": 1.7762, "step": 26310 }, { "epoch": 1.15, "learning_rate": 2.332785783876438e-05, "loss": 1.8077, "step": 26320 }, { "epoch": 1.15, "learning_rate": 2.3307525007911557e-05, "loss": 1.8083, "step": 26330 }, { "epoch": 1.15, "learning_rate": 2.328719541180519e-05, "loss": 1.8251, "step": 26340 }, { "epoch": 1.15, "learning_rate": 2.3266869060271383e-05, "loss": 1.8575, "step": 26350 }, { "epoch": 1.15, "learning_rate": 2.3246545963134718e-05, "loss": 1.7965, "step": 26360 }, { "epoch": 1.15, "learning_rate": 2.3226226130218155e-05, "loss": 1.8171, "step": 26370 }, { "epoch": 1.15, "learning_rate": 2.3205909571343112e-05, "loss": 1.7914, "step": 26380 }, { "epoch": 1.16, "learning_rate": 2.3185596296329404e-05, "loss": 1.8391, "step": 26390 }, { "epoch": 1.16, "learning_rate": 2.3165286314995278e-05, "loss": 1.7914, "step": 26400 }, { "epoch": 1.16, "eval_loss": 1.8334349393844604, "eval_runtime": 11.5307, "eval_samples_per_second": 355.226, "eval_steps_per_second": 22.202, "step": 26400 }, { "epoch": 1.16, "learning_rate": 2.3144979637157365e-05, "loss": 1.8035, "step": 26410 }, { "epoch": 1.16, "learning_rate": 2.3124676272630724e-05, "loss": 1.8065, "step": 26420 }, { "epoch": 1.16, "learning_rate": 2.3104376231228773e-05, "loss": 1.7798, "step": 26430 }, { "epoch": 1.16, "learning_rate": 2.308407952276339e-05, "loss": 1.7659, "step": 26440 }, { "epoch": 1.16, "learning_rate": 2.3063786157044766e-05, "loss": 1.8257, "step": 26450 }, { "epoch": 1.16, "learning_rate": 2.3043496143881546e-05, "loss": 1.8107, "step": 26460 }, { "epoch": 1.16, "learning_rate": 2.3023209493080696e-05, "loss": 1.8759, "step": 26470 }, { "epoch": 1.16, "learning_rate": 2.3002926214447598e-05, "loss": 1.8089, "step": 26480 }, { "epoch": 1.16, "learning_rate": 2.2982646317785977e-05, "loss": 1.7965, "step": 26490 }, { "epoch": 1.16, "learning_rate": 2.296236981289795e-05, "loss": 1.8116, "step": 26500 }, { "epoch": 1.16, "eval_loss": 1.8330750465393066, "eval_runtime": 12.1921, "eval_samples_per_second": 335.955, "eval_steps_per_second": 20.997, "step": 26500 }, { "epoch": 1.16, "learning_rate": 2.2942096709583966e-05, "loss": 1.8146, "step": 26510 }, { "epoch": 1.16, "learning_rate": 2.2921827017642855e-05, "loss": 1.8026, "step": 26520 }, { "epoch": 1.16, "learning_rate": 2.2901560746871777e-05, "loss": 1.8253, "step": 26530 }, { "epoch": 1.16, "learning_rate": 2.2881297907066254e-05, "loss": 1.786, "step": 26540 }, { "epoch": 1.16, "learning_rate": 2.286103850802014e-05, "loss": 1.8141, "step": 26550 }, { "epoch": 1.16, "learning_rate": 2.2840782559525635e-05, "loss": 1.8312, "step": 26560 }, { "epoch": 1.16, "learning_rate": 2.2820530071373278e-05, "loss": 1.7801, "step": 26570 }, { "epoch": 1.16, "learning_rate": 2.280028105335189e-05, "loss": 1.8628, "step": 26580 }, { "epoch": 1.16, "learning_rate": 2.2780035515248673e-05, "loss": 1.8254, "step": 26590 }, { "epoch": 1.16, "learning_rate": 2.275979346684911e-05, "loss": 1.8014, "step": 26600 }, { "epoch": 1.16, "eval_loss": 1.832726001739502, "eval_runtime": 11.4701, "eval_samples_per_second": 357.102, "eval_steps_per_second": 22.319, "step": 26600 }, { "epoch": 1.16, "learning_rate": 2.273955491793702e-05, "loss": 1.8464, "step": 26610 }, { "epoch": 1.17, "learning_rate": 2.2719319878294514e-05, "loss": 1.854, "step": 26620 }, { "epoch": 1.17, "learning_rate": 2.2699088357702025e-05, "loss": 1.8389, "step": 26630 }, { "epoch": 1.17, "learning_rate": 2.267886036593825e-05, "loss": 1.7847, "step": 26640 }, { "epoch": 1.17, "learning_rate": 2.2658635912780227e-05, "loss": 1.8347, "step": 26650 }, { "epoch": 1.17, "learning_rate": 2.2638415008003243e-05, "loss": 1.7616, "step": 26660 }, { "epoch": 1.17, "learning_rate": 2.2618197661380898e-05, "loss": 1.7771, "step": 26670 }, { "epoch": 1.17, "learning_rate": 2.259798388268505e-05, "loss": 1.8119, "step": 26680 }, { "epoch": 1.17, "learning_rate": 2.2577773681685867e-05, "loss": 1.8511, "step": 26690 }, { "epoch": 1.17, "learning_rate": 2.2557567068151724e-05, "loss": 1.7993, "step": 26700 }, { "epoch": 1.17, "eval_loss": 1.8325154781341553, "eval_runtime": 11.4069, "eval_samples_per_second": 359.08, "eval_steps_per_second": 22.443, "step": 26700 }, { "epoch": 1.17, "learning_rate": 2.253736405184935e-05, "loss": 1.7621, "step": 26710 }, { "epoch": 1.17, "learning_rate": 2.2517164642543642e-05, "loss": 1.8126, "step": 26720 }, { "epoch": 1.17, "learning_rate": 2.249696884999784e-05, "loss": 1.826, "step": 26730 }, { "epoch": 1.17, "learning_rate": 2.2476776683973364e-05, "loss": 1.7861, "step": 26740 }, { "epoch": 1.17, "learning_rate": 2.2456588154229932e-05, "loss": 1.8022, "step": 26750 }, { "epoch": 1.17, "learning_rate": 2.2436403270525473e-05, "loss": 1.7953, "step": 26760 }, { "epoch": 1.17, "learning_rate": 2.2416222042616177e-05, "loss": 1.8091, "step": 26770 }, { "epoch": 1.17, "learning_rate": 2.2396044480256443e-05, "loss": 1.839, "step": 26780 }, { "epoch": 1.17, "learning_rate": 2.237587059319892e-05, "loss": 1.835, "step": 26790 }, { "epoch": 1.17, "learning_rate": 2.235570039119448e-05, "loss": 1.8417, "step": 26800 }, { "epoch": 1.17, "eval_loss": 1.8325867652893066, "eval_runtime": 11.4762, "eval_samples_per_second": 356.911, "eval_steps_per_second": 22.307, "step": 26800 }, { "epoch": 1.17, "learning_rate": 2.2335533883992166e-05, "loss": 1.8167, "step": 26810 }, { "epoch": 1.17, "learning_rate": 2.2315371081339328e-05, "loss": 1.8286, "step": 26820 }, { "epoch": 1.17, "learning_rate": 2.2295211992981426e-05, "loss": 1.862, "step": 26830 }, { "epoch": 1.17, "learning_rate": 2.2275056628662205e-05, "loss": 1.7991, "step": 26840 }, { "epoch": 1.18, "learning_rate": 2.225490499812355e-05, "loss": 1.8163, "step": 26850 }, { "epoch": 1.18, "learning_rate": 2.2234757111105584e-05, "loss": 1.8737, "step": 26860 }, { "epoch": 1.18, "learning_rate": 2.2214612977346593e-05, "loss": 1.8203, "step": 26870 }, { "epoch": 1.18, "learning_rate": 2.2194472606583074e-05, "loss": 1.8228, "step": 26880 }, { "epoch": 1.18, "learning_rate": 2.2174336008549667e-05, "loss": 1.8561, "step": 26890 }, { "epoch": 1.18, "learning_rate": 2.2154203192979235e-05, "loss": 1.8229, "step": 26900 }, { "epoch": 1.18, "eval_loss": 1.8322032690048218, "eval_runtime": 11.5781, "eval_samples_per_second": 353.771, "eval_steps_per_second": 22.111, "step": 26900 }, { "epoch": 1.18, "learning_rate": 2.2134074169602773e-05, "loss": 1.8383, "step": 26910 }, { "epoch": 1.18, "learning_rate": 2.2113948948149477e-05, "loss": 1.792, "step": 26920 }, { "epoch": 1.18, "learning_rate": 2.209382753834667e-05, "loss": 1.7557, "step": 26930 }, { "epoch": 1.18, "learning_rate": 2.2073709949919867e-05, "loss": 1.8174, "step": 26940 }, { "epoch": 1.18, "learning_rate": 2.2053596192592704e-05, "loss": 1.8101, "step": 26950 }, { "epoch": 1.18, "learning_rate": 2.2033486276087e-05, "loss": 1.8121, "step": 26960 }, { "epoch": 1.18, "learning_rate": 2.201338021012268e-05, "loss": 1.792, "step": 26970 }, { "epoch": 1.18, "learning_rate": 2.199327800441785e-05, "loss": 1.8288, "step": 26980 }, { "epoch": 1.18, "learning_rate": 2.1973179668688697e-05, "loss": 1.8468, "step": 26990 }, { "epoch": 1.18, "learning_rate": 2.195308521264959e-05, "loss": 1.8541, "step": 27000 }, { "epoch": 1.18, "eval_loss": 1.8320529460906982, "eval_runtime": 11.853, "eval_samples_per_second": 345.568, "eval_steps_per_second": 21.598, "step": 27000 }, { "epoch": 1.18, "learning_rate": 2.193299464601299e-05, "loss": 1.8016, "step": 27010 }, { "epoch": 1.18, "learning_rate": 2.1912907978489493e-05, "loss": 1.8023, "step": 27020 }, { "epoch": 1.18, "learning_rate": 2.1892825219787804e-05, "loss": 1.8119, "step": 27030 }, { "epoch": 1.18, "learning_rate": 2.1872746379614736e-05, "loss": 1.808, "step": 27040 }, { "epoch": 1.18, "learning_rate": 2.185267146767522e-05, "loss": 1.8235, "step": 27050 }, { "epoch": 1.18, "learning_rate": 2.1832600493672274e-05, "loss": 1.8342, "step": 27060 }, { "epoch": 1.18, "learning_rate": 2.1812533467307035e-05, "loss": 1.8602, "step": 27070 }, { "epoch": 1.19, "learning_rate": 2.1792470398278683e-05, "loss": 1.8775, "step": 27080 }, { "epoch": 1.19, "learning_rate": 2.177241129628456e-05, "loss": 1.8305, "step": 27090 }, { "epoch": 1.19, "learning_rate": 2.1752356171020016e-05, "loss": 1.8182, "step": 27100 }, { "epoch": 1.19, "eval_loss": 1.832180142402649, "eval_runtime": 11.4296, "eval_samples_per_second": 358.368, "eval_steps_per_second": 22.398, "step": 27100 }, { "epoch": 1.19, "learning_rate": 2.1732305032178533e-05, "loss": 1.8361, "step": 27110 }, { "epoch": 1.19, "learning_rate": 2.1712257889451627e-05, "loss": 1.849, "step": 27120 }, { "epoch": 1.19, "learning_rate": 2.1692214752528916e-05, "loss": 1.8339, "step": 27130 }, { "epoch": 1.19, "learning_rate": 2.1672175631098056e-05, "loss": 1.8372, "step": 27140 }, { "epoch": 1.19, "learning_rate": 2.1652140534844787e-05, "loss": 1.7972, "step": 27150 }, { "epoch": 1.19, "learning_rate": 2.1632109473452864e-05, "loss": 1.8092, "step": 27160 }, { "epoch": 1.19, "learning_rate": 2.161208245660415e-05, "loss": 1.8685, "step": 27170 }, { "epoch": 1.19, "learning_rate": 2.1592059493978492e-05, "loss": 1.8396, "step": 27180 }, { "epoch": 1.19, "learning_rate": 2.1572040595253822e-05, "loss": 1.7891, "step": 27190 }, { "epoch": 1.19, "learning_rate": 2.1552025770106077e-05, "loss": 1.7929, "step": 27200 }, { "epoch": 1.19, "eval_loss": 1.8319555521011353, "eval_runtime": 11.6871, "eval_samples_per_second": 350.47, "eval_steps_per_second": 21.904, "step": 27200 }, { "epoch": 1.19, "learning_rate": 2.1532015028209264e-05, "loss": 1.8318, "step": 27210 }, { "epoch": 1.19, "learning_rate": 2.1512008379235355e-05, "loss": 1.8183, "step": 27220 }, { "epoch": 1.19, "learning_rate": 2.149200583285442e-05, "loss": 1.8565, "step": 27230 }, { "epoch": 1.19, "learning_rate": 2.1472007398734464e-05, "loss": 1.8268, "step": 27240 }, { "epoch": 1.19, "learning_rate": 2.1452013086541593e-05, "loss": 1.7994, "step": 27250 }, { "epoch": 1.19, "learning_rate": 2.143202290593984e-05, "loss": 1.8272, "step": 27260 }, { "epoch": 1.19, "learning_rate": 2.1412036866591293e-05, "loss": 1.8074, "step": 27270 }, { "epoch": 1.19, "learning_rate": 2.1392054978156015e-05, "loss": 1.8095, "step": 27280 }, { "epoch": 1.19, "learning_rate": 2.137207725029206e-05, "loss": 1.8097, "step": 27290 }, { "epoch": 1.2, "learning_rate": 2.1352103692655497e-05, "loss": 1.7954, "step": 27300 }, { "epoch": 1.2, "eval_loss": 1.8318532705307007, "eval_runtime": 11.5752, "eval_samples_per_second": 353.86, "eval_steps_per_second": 22.116, "step": 27300 }, { "epoch": 1.2, "learning_rate": 2.133213431490035e-05, "loss": 1.809, "step": 27310 }, { "epoch": 1.2, "learning_rate": 2.1312169126678647e-05, "loss": 1.8336, "step": 27320 }, { "epoch": 1.2, "learning_rate": 2.129220813764035e-05, "loss": 1.8235, "step": 27330 }, { "epoch": 1.2, "learning_rate": 2.127225135743346e-05, "loss": 1.7739, "step": 27340 }, { "epoch": 1.2, "learning_rate": 2.1252298795703863e-05, "loss": 1.857, "step": 27350 }, { "epoch": 1.2, "learning_rate": 2.123235046209549e-05, "loss": 1.7995, "step": 27360 }, { "epoch": 1.2, "learning_rate": 2.121240636625015e-05, "loss": 1.8557, "step": 27370 }, { "epoch": 1.2, "learning_rate": 2.1192466517807657e-05, "loss": 1.8035, "step": 27380 }, { "epoch": 1.2, "learning_rate": 2.1172530926405745e-05, "loss": 1.8037, "step": 27390 }, { "epoch": 1.2, "learning_rate": 2.1152599601680105e-05, "loss": 1.8958, "step": 27400 }, { "epoch": 1.2, "eval_loss": 1.8316702842712402, "eval_runtime": 11.651, "eval_samples_per_second": 351.557, "eval_steps_per_second": 21.972, "step": 27400 }, { "epoch": 1.2, "learning_rate": 2.1132672553264356e-05, "loss": 1.8168, "step": 27410 }, { "epoch": 1.2, "learning_rate": 2.111274979079006e-05, "loss": 1.8574, "step": 27420 }, { "epoch": 1.2, "learning_rate": 2.1092831323886694e-05, "loss": 1.7885, "step": 27430 }, { "epoch": 1.2, "learning_rate": 2.1072917162181672e-05, "loss": 1.8138, "step": 27440 }, { "epoch": 1.2, "learning_rate": 2.105300731530032e-05, "loss": 1.8419, "step": 27450 }, { "epoch": 1.2, "learning_rate": 2.1033101792865885e-05, "loss": 1.7851, "step": 27460 }, { "epoch": 1.2, "learning_rate": 2.1013200604499507e-05, "loss": 1.8412, "step": 27470 }, { "epoch": 1.2, "learning_rate": 2.099330375982026e-05, "loss": 1.8451, "step": 27480 }, { "epoch": 1.2, "learning_rate": 2.0973411268445075e-05, "loss": 1.8418, "step": 27490 }, { "epoch": 1.2, "learning_rate": 2.095352313998884e-05, "loss": 1.8237, "step": 27500 }, { "epoch": 1.2, "eval_loss": 1.831519603729248, "eval_runtime": 11.976, "eval_samples_per_second": 342.019, "eval_steps_per_second": 21.376, "step": 27500 }, { "epoch": 1.2, "learning_rate": 2.0933639384064277e-05, "loss": 1.8165, "step": 27510 }, { "epoch": 1.2, "learning_rate": 2.0913760010282015e-05, "loss": 1.8361, "step": 27520 }, { "epoch": 1.21, "learning_rate": 2.0893885028250577e-05, "loss": 1.8167, "step": 27530 }, { "epoch": 1.21, "learning_rate": 2.0874014447576348e-05, "loss": 1.7941, "step": 27540 }, { "epoch": 1.21, "learning_rate": 2.0854148277863597e-05, "loss": 1.799, "step": 27550 }, { "epoch": 1.21, "learning_rate": 2.0834286528714445e-05, "loss": 1.8005, "step": 27560 }, { "epoch": 1.21, "learning_rate": 2.08144292097289e-05, "loss": 1.8535, "step": 27570 }, { "epoch": 1.21, "learning_rate": 2.0794576330504802e-05, "loss": 1.8461, "step": 27580 }, { "epoch": 1.21, "learning_rate": 2.0774727900637875e-05, "loss": 1.8137, "step": 27590 }, { "epoch": 1.21, "learning_rate": 2.0754883929721646e-05, "loss": 1.8526, "step": 27600 }, { "epoch": 1.21, "eval_loss": 1.8314945697784424, "eval_runtime": 11.6983, "eval_samples_per_second": 350.137, "eval_steps_per_second": 21.884, "step": 27600 }, { "epoch": 1.21, "learning_rate": 2.0735044427347557e-05, "loss": 1.7918, "step": 27610 }, { "epoch": 1.21, "learning_rate": 2.0715209403104805e-05, "loss": 1.8192, "step": 27620 }, { "epoch": 1.21, "learning_rate": 2.0695378866580508e-05, "loss": 1.8595, "step": 27630 }, { "epoch": 1.21, "learning_rate": 2.0675552827359544e-05, "loss": 1.8632, "step": 27640 }, { "epoch": 1.21, "learning_rate": 2.065573129502467e-05, "loss": 1.8333, "step": 27650 }, { "epoch": 1.21, "learning_rate": 2.0635914279156423e-05, "loss": 1.7787, "step": 27660 }, { "epoch": 1.21, "learning_rate": 2.0616101789333192e-05, "loss": 1.7813, "step": 27670 }, { "epoch": 1.21, "learning_rate": 2.0596293835131144e-05, "loss": 1.7958, "step": 27680 }, { "epoch": 1.21, "learning_rate": 2.057649042612429e-05, "loss": 1.8267, "step": 27690 }, { "epoch": 1.21, "learning_rate": 2.0556691571884413e-05, "loss": 1.808, "step": 27700 }, { "epoch": 1.21, "eval_loss": 1.8313714265823364, "eval_runtime": 11.5536, "eval_samples_per_second": 354.521, "eval_steps_per_second": 22.158, "step": 27700 }, { "epoch": 1.21, "learning_rate": 2.0536897281981125e-05, "loss": 1.8536, "step": 27710 }, { "epoch": 1.21, "learning_rate": 2.0517107565981794e-05, "loss": 1.8201, "step": 27720 }, { "epoch": 1.21, "learning_rate": 2.0497322433451612e-05, "loss": 1.8408, "step": 27730 }, { "epoch": 1.21, "learning_rate": 2.0477541893953545e-05, "loss": 1.8357, "step": 27740 }, { "epoch": 1.21, "learning_rate": 2.0457765957048314e-05, "loss": 1.7831, "step": 27750 }, { "epoch": 1.22, "learning_rate": 2.0437994632294456e-05, "loss": 1.788, "step": 27760 }, { "epoch": 1.22, "learning_rate": 2.0418227929248246e-05, "loss": 1.833, "step": 27770 }, { "epoch": 1.22, "learning_rate": 2.0398465857463757e-05, "loss": 1.8343, "step": 27780 }, { "epoch": 1.22, "learning_rate": 2.0378708426492782e-05, "loss": 1.8193, "step": 27790 }, { "epoch": 1.22, "learning_rate": 2.035895564588492e-05, "loss": 1.8208, "step": 27800 }, { "epoch": 1.22, "eval_loss": 1.8313522338867188, "eval_runtime": 11.4654, "eval_samples_per_second": 357.248, "eval_steps_per_second": 22.328, "step": 27800 }, { "epoch": 1.22, "learning_rate": 2.0339207525187474e-05, "loss": 1.8266, "step": 27810 }, { "epoch": 1.22, "learning_rate": 2.031946407394553e-05, "loss": 1.7958, "step": 27820 }, { "epoch": 1.22, "learning_rate": 2.02997253017019e-05, "loss": 1.8288, "step": 27830 }, { "epoch": 1.22, "learning_rate": 2.027999121799714e-05, "loss": 1.8183, "step": 27840 }, { "epoch": 1.22, "learning_rate": 2.0260261832369536e-05, "loss": 1.8376, "step": 27850 }, { "epoch": 1.22, "learning_rate": 2.0240537154355117e-05, "loss": 1.8534, "step": 27860 }, { "epoch": 1.22, "learning_rate": 2.0220817193487602e-05, "loss": 1.827, "step": 27870 }, { "epoch": 1.22, "learning_rate": 2.0201101959298482e-05, "loss": 1.8304, "step": 27880 }, { "epoch": 1.22, "learning_rate": 2.018139146131691e-05, "loss": 1.8339, "step": 27890 }, { "epoch": 1.22, "learning_rate": 2.0161685709069786e-05, "loss": 1.7982, "step": 27900 }, { "epoch": 1.22, "eval_loss": 1.8311944007873535, "eval_runtime": 11.5326, "eval_samples_per_second": 355.168, "eval_steps_per_second": 22.198, "step": 27900 }, { "epoch": 1.22, "learning_rate": 2.0141984712081704e-05, "loss": 1.8171, "step": 27910 }, { "epoch": 1.22, "learning_rate": 2.012228847987496e-05, "loss": 1.8211, "step": 27920 }, { "epoch": 1.22, "learning_rate": 2.010259702196954e-05, "loss": 1.8328, "step": 27930 }, { "epoch": 1.22, "learning_rate": 2.0082910347883144e-05, "loss": 1.7907, "step": 27940 }, { "epoch": 1.22, "learning_rate": 2.0063228467131136e-05, "loss": 1.8231, "step": 27950 }, { "epoch": 1.22, "learning_rate": 2.0043551389226576e-05, "loss": 1.8691, "step": 27960 }, { "epoch": 1.22, "learning_rate": 2.0023879123680192e-05, "loss": 1.8043, "step": 27970 }, { "epoch": 1.22, "learning_rate": 2.000421168000041e-05, "loss": 1.8543, "step": 27980 }, { "epoch": 1.23, "learning_rate": 1.9984549067693303e-05, "loss": 1.8065, "step": 27990 }, { "epoch": 1.23, "learning_rate": 1.9964891296262595e-05, "loss": 1.8391, "step": 28000 }, { "epoch": 1.23, "eval_loss": 1.8308237791061401, "eval_runtime": 11.5213, "eval_samples_per_second": 355.515, "eval_steps_per_second": 22.22, "step": 28000 }, { "epoch": 1.23, "learning_rate": 1.9945238375209708e-05, "loss": 1.8262, "step": 28010 }, { "epoch": 1.23, "learning_rate": 1.992559031403369e-05, "loss": 1.8074, "step": 28020 }, { "epoch": 1.23, "learning_rate": 1.9905947122231273e-05, "loss": 1.792, "step": 28030 }, { "epoch": 1.23, "learning_rate": 1.988630880929679e-05, "loss": 1.7933, "step": 28040 }, { "epoch": 1.23, "learning_rate": 1.9866675384722252e-05, "loss": 1.8644, "step": 28050 }, { "epoch": 1.23, "learning_rate": 1.9847046857997285e-05, "loss": 1.8546, "step": 28060 }, { "epoch": 1.23, "learning_rate": 1.9827423238609173e-05, "loss": 1.8298, "step": 28070 }, { "epoch": 1.23, "learning_rate": 1.9807804536042796e-05, "loss": 1.8519, "step": 28080 }, { "epoch": 1.23, "learning_rate": 1.978819075978069e-05, "loss": 1.8168, "step": 28090 }, { "epoch": 1.23, "learning_rate": 1.9768581919302978e-05, "loss": 1.8294, "step": 28100 }, { "epoch": 1.23, "eval_loss": 1.830810785293579, "eval_runtime": 11.61, "eval_samples_per_second": 352.798, "eval_steps_per_second": 22.05, "step": 28100 }, { "epoch": 1.23, "learning_rate": 1.9748978024087435e-05, "loss": 1.8529, "step": 28110 }, { "epoch": 1.23, "learning_rate": 1.972937908360939e-05, "loss": 1.8372, "step": 28120 }, { "epoch": 1.23, "learning_rate": 1.970978510734185e-05, "loss": 1.8326, "step": 28130 }, { "epoch": 1.23, "learning_rate": 1.969019610475535e-05, "loss": 1.8487, "step": 28140 }, { "epoch": 1.23, "learning_rate": 1.9670612085318082e-05, "loss": 1.8219, "step": 28150 }, { "epoch": 1.23, "learning_rate": 1.9651033058495783e-05, "loss": 1.837, "step": 28160 }, { "epoch": 1.23, "learning_rate": 1.963145903375181e-05, "loss": 1.8009, "step": 28170 }, { "epoch": 1.23, "learning_rate": 1.9611890020547075e-05, "loss": 1.8739, "step": 28180 }, { "epoch": 1.23, "learning_rate": 1.9592326028340093e-05, "loss": 1.8071, "step": 28190 }, { "epoch": 1.23, "learning_rate": 1.9572767066586933e-05, "loss": 1.7504, "step": 28200 }, { "epoch": 1.23, "eval_loss": 1.830684781074524, "eval_runtime": 11.5059, "eval_samples_per_second": 355.991, "eval_steps_per_second": 22.249, "step": 28200 }, { "epoch": 1.23, "learning_rate": 1.955321314474124e-05, "loss": 1.8246, "step": 28210 }, { "epoch": 1.24, "learning_rate": 1.9533664272254245e-05, "loss": 1.8368, "step": 28220 }, { "epoch": 1.24, "learning_rate": 1.9514120458574667e-05, "loss": 1.8114, "step": 28230 }, { "epoch": 1.24, "learning_rate": 1.9494581713148883e-05, "loss": 1.7932, "step": 28240 }, { "epoch": 1.24, "learning_rate": 1.9475048045420723e-05, "loss": 1.8443, "step": 28250 }, { "epoch": 1.24, "learning_rate": 1.9455519464831643e-05, "loss": 1.8332, "step": 28260 }, { "epoch": 1.24, "learning_rate": 1.9435995980820576e-05, "loss": 1.8698, "step": 28270 }, { "epoch": 1.24, "learning_rate": 1.9416477602824035e-05, "loss": 1.8356, "step": 28280 }, { "epoch": 1.24, "learning_rate": 1.9396964340276034e-05, "loss": 1.8133, "step": 28290 }, { "epoch": 1.24, "learning_rate": 1.9377456202608148e-05, "loss": 1.8127, "step": 28300 }, { "epoch": 1.24, "eval_loss": 1.830805778503418, "eval_runtime": 11.4938, "eval_samples_per_second": 356.366, "eval_steps_per_second": 22.273, "step": 28300 }, { "epoch": 1.24, "learning_rate": 1.935795319924944e-05, "loss": 1.7979, "step": 28310 }, { "epoch": 1.24, "learning_rate": 1.933845533962652e-05, "loss": 1.8191, "step": 28320 }, { "epoch": 1.24, "learning_rate": 1.9318962633163493e-05, "loss": 1.8228, "step": 28330 }, { "epoch": 1.24, "learning_rate": 1.9299475089281988e-05, "loss": 1.8554, "step": 28340 }, { "epoch": 1.24, "learning_rate": 1.9279992717401114e-05, "loss": 1.8253, "step": 28350 }, { "epoch": 1.24, "learning_rate": 1.9260515526937517e-05, "loss": 1.8192, "step": 28360 }, { "epoch": 1.24, "learning_rate": 1.9241043527305306e-05, "loss": 1.8501, "step": 28370 }, { "epoch": 1.24, "learning_rate": 1.9221576727916107e-05, "loss": 1.8767, "step": 28380 }, { "epoch": 1.24, "learning_rate": 1.920211513817899e-05, "loss": 1.8047, "step": 28390 }, { "epoch": 1.24, "learning_rate": 1.918265876750057e-05, "loss": 1.8454, "step": 28400 }, { "epoch": 1.24, "eval_loss": 1.8304309844970703, "eval_runtime": 11.674, "eval_samples_per_second": 350.864, "eval_steps_per_second": 21.929, "step": 28400 }, { "epoch": 1.24, "learning_rate": 1.9163207625284877e-05, "loss": 1.8197, "step": 28410 }, { "epoch": 1.24, "learning_rate": 1.9143761720933473e-05, "loss": 1.849, "step": 28420 }, { "epoch": 1.24, "learning_rate": 1.9124321063845325e-05, "loss": 1.8015, "step": 28430 }, { "epoch": 1.24, "learning_rate": 1.910488566341692e-05, "loss": 1.8752, "step": 28440 }, { "epoch": 1.25, "learning_rate": 1.9085455529042175e-05, "loss": 1.834, "step": 28450 }, { "epoch": 1.25, "learning_rate": 1.9066030670112456e-05, "loss": 1.8128, "step": 28460 }, { "epoch": 1.25, "learning_rate": 1.9046611096016604e-05, "loss": 1.8009, "step": 28470 }, { "epoch": 1.25, "learning_rate": 1.9027196816140885e-05, "loss": 1.7636, "step": 28480 }, { "epoch": 1.25, "learning_rate": 1.9007787839869026e-05, "loss": 1.8176, "step": 28490 }, { "epoch": 1.25, "learning_rate": 1.8988384176582147e-05, "loss": 1.8369, "step": 28500 }, { "epoch": 1.25, "eval_loss": 1.8305020332336426, "eval_runtime": 11.6462, "eval_samples_per_second": 351.703, "eval_steps_per_second": 21.981, "step": 28500 }, { "epoch": 1.25, "learning_rate": 1.8968985835658877e-05, "loss": 1.7947, "step": 28510 }, { "epoch": 1.25, "learning_rate": 1.8949592826475173e-05, "loss": 1.7933, "step": 28520 }, { "epoch": 1.25, "learning_rate": 1.893020515840452e-05, "loss": 1.8181, "step": 28530 }, { "epoch": 1.25, "learning_rate": 1.8910822840817733e-05, "loss": 1.8049, "step": 28540 }, { "epoch": 1.25, "learning_rate": 1.8891445883083093e-05, "loss": 1.8141, "step": 28550 }, { "epoch": 1.25, "learning_rate": 1.8872074294566265e-05, "loss": 1.8061, "step": 28560 }, { "epoch": 1.25, "learning_rate": 1.8852708084630347e-05, "loss": 1.8378, "step": 28570 }, { "epoch": 1.25, "learning_rate": 1.88333472626358e-05, "loss": 1.8434, "step": 28580 }, { "epoch": 1.25, "learning_rate": 1.881399183794051e-05, "loss": 1.7783, "step": 28590 }, { "epoch": 1.25, "learning_rate": 1.879464181989974e-05, "loss": 1.8562, "step": 28600 }, { "epoch": 1.25, "eval_loss": 1.8305237293243408, "eval_runtime": 11.6453, "eval_samples_per_second": 351.728, "eval_steps_per_second": 21.983, "step": 28600 }, { "epoch": 1.25, "learning_rate": 1.877529721786615e-05, "loss": 1.8491, "step": 28610 }, { "epoch": 1.25, "learning_rate": 1.8755958041189765e-05, "loss": 1.835, "step": 28620 }, { "epoch": 1.25, "learning_rate": 1.8736624299218016e-05, "loss": 1.7899, "step": 28630 }, { "epoch": 1.25, "learning_rate": 1.8717296001295673e-05, "loss": 1.7965, "step": 28640 }, { "epoch": 1.25, "learning_rate": 1.8697973156764906e-05, "loss": 1.7844, "step": 28650 }, { "epoch": 1.25, "learning_rate": 1.867865577496521e-05, "loss": 1.8755, "step": 28660 }, { "epoch": 1.26, "learning_rate": 1.8659343865233497e-05, "loss": 1.7975, "step": 28670 }, { "epoch": 1.26, "learning_rate": 1.8640037436903977e-05, "loss": 1.8462, "step": 28680 }, { "epoch": 1.26, "learning_rate": 1.8620736499308255e-05, "loss": 1.833, "step": 28690 }, { "epoch": 1.26, "learning_rate": 1.8601441061775248e-05, "loss": 1.8273, "step": 28700 }, { "epoch": 1.26, "eval_loss": 1.830354928970337, "eval_runtime": 11.5397, "eval_samples_per_second": 354.948, "eval_steps_per_second": 22.184, "step": 28700 }, { "epoch": 1.26, "learning_rate": 1.8582151133631224e-05, "loss": 1.7919, "step": 28710 }, { "epoch": 1.26, "learning_rate": 1.856286672419981e-05, "loss": 1.8614, "step": 28720 }, { "epoch": 1.26, "learning_rate": 1.8543587842801934e-05, "loss": 1.8111, "step": 28730 }, { "epoch": 1.26, "learning_rate": 1.8524314498755872e-05, "loss": 1.8388, "step": 28740 }, { "epoch": 1.26, "learning_rate": 1.8505046701377218e-05, "loss": 1.7883, "step": 28750 }, { "epoch": 1.26, "learning_rate": 1.84857844599789e-05, "loss": 1.8307, "step": 28760 }, { "epoch": 1.26, "learning_rate": 1.846652778387111e-05, "loss": 1.8396, "step": 28770 }, { "epoch": 1.26, "learning_rate": 1.8447276682361428e-05, "loss": 1.8378, "step": 28780 }, { "epoch": 1.26, "learning_rate": 1.842803116475466e-05, "loss": 1.8442, "step": 28790 }, { "epoch": 1.26, "learning_rate": 1.8408791240352983e-05, "loss": 1.7593, "step": 28800 }, { "epoch": 1.26, "eval_loss": 1.8302619457244873, "eval_runtime": 11.3952, "eval_samples_per_second": 359.45, "eval_steps_per_second": 22.466, "step": 28800 }, { "epoch": 1.26, "learning_rate": 1.8389556918455813e-05, "loss": 1.8163, "step": 28810 }, { "epoch": 1.26, "learning_rate": 1.83703282083599e-05, "loss": 1.7921, "step": 28820 }, { "epoch": 1.26, "learning_rate": 1.835110511935925e-05, "loss": 1.8257, "step": 28830 }, { "epoch": 1.26, "learning_rate": 1.8331887660745182e-05, "loss": 1.7806, "step": 28840 }, { "epoch": 1.26, "learning_rate": 1.8312675841806263e-05, "loss": 1.8569, "step": 28850 }, { "epoch": 1.26, "learning_rate": 1.8293469671828365e-05, "loss": 1.8384, "step": 28860 }, { "epoch": 1.26, "learning_rate": 1.82742691600946e-05, "loss": 1.8023, "step": 28870 }, { "epoch": 1.26, "learning_rate": 1.8255074315885372e-05, "loss": 1.8437, "step": 28880 }, { "epoch": 1.26, "learning_rate": 1.8235885148478325e-05, "loss": 1.7939, "step": 28890 }, { "epoch": 1.27, "learning_rate": 1.8216701667148378e-05, "loss": 1.8193, "step": 28900 }, { "epoch": 1.27, "eval_loss": 1.830406904220581, "eval_runtime": 11.5645, "eval_samples_per_second": 354.187, "eval_steps_per_second": 22.137, "step": 28900 }, { "epoch": 1.27, "learning_rate": 1.819752388116767e-05, "loss": 1.8371, "step": 28910 }, { "epoch": 1.27, "learning_rate": 1.8178351799805637e-05, "loss": 1.8238, "step": 28920 }, { "epoch": 1.27, "learning_rate": 1.8159185432328906e-05, "loss": 1.8504, "step": 28930 }, { "epoch": 1.27, "learning_rate": 1.814002478800136e-05, "loss": 1.7753, "step": 28940 }, { "epoch": 1.27, "learning_rate": 1.812086987608414e-05, "loss": 1.8689, "step": 28950 }, { "epoch": 1.27, "learning_rate": 1.810172070583559e-05, "loss": 1.8385, "step": 28960 }, { "epoch": 1.27, "learning_rate": 1.8082577286511285e-05, "loss": 1.8529, "step": 28970 }, { "epoch": 1.27, "learning_rate": 1.8063439627364016e-05, "loss": 1.8337, "step": 28980 }, { "epoch": 1.27, "learning_rate": 1.804430773764381e-05, "loss": 1.7948, "step": 28990 }, { "epoch": 1.27, "learning_rate": 1.802518162659787e-05, "loss": 1.838, "step": 29000 }, { "epoch": 1.27, "eval_loss": 1.830385684967041, "eval_runtime": 11.7627, "eval_samples_per_second": 348.218, "eval_steps_per_second": 21.764, "step": 29000 }, { "epoch": 1.27, "learning_rate": 1.8006061303470648e-05, "loss": 1.7526, "step": 29010 }, { "epoch": 1.27, "learning_rate": 1.798694677750376e-05, "loss": 1.8202, "step": 29020 }, { "epoch": 1.27, "learning_rate": 1.796783805793606e-05, "loss": 1.8431, "step": 29030 }, { "epoch": 1.27, "learning_rate": 1.794873515400353e-05, "loss": 1.813, "step": 29040 }, { "epoch": 1.27, "learning_rate": 1.7929638074939433e-05, "loss": 1.8351, "step": 29050 }, { "epoch": 1.27, "learning_rate": 1.7910546829974127e-05, "loss": 1.8675, "step": 29060 }, { "epoch": 1.27, "learning_rate": 1.789146142833522e-05, "loss": 1.8152, "step": 29070 }, { "epoch": 1.27, "learning_rate": 1.787238187924744e-05, "loss": 1.8264, "step": 29080 }, { "epoch": 1.27, "learning_rate": 1.785330819193274e-05, "loss": 1.7769, "step": 29090 }, { "epoch": 1.27, "learning_rate": 1.7834240375610183e-05, "loss": 1.8131, "step": 29100 }, { "epoch": 1.27, "eval_loss": 1.830368995666504, "eval_runtime": 11.647, "eval_samples_per_second": 351.678, "eval_steps_per_second": 21.98, "step": 29100 }, { "epoch": 1.27, "learning_rate": 1.7815178439496047e-05, "loss": 1.8124, "step": 29110 }, { "epoch": 1.27, "learning_rate": 1.7796122392803727e-05, "loss": 1.8489, "step": 29120 }, { "epoch": 1.28, "learning_rate": 1.77770722447438e-05, "loss": 1.8363, "step": 29130 }, { "epoch": 1.28, "learning_rate": 1.7758028004523978e-05, "loss": 1.8315, "step": 29140 }, { "epoch": 1.28, "learning_rate": 1.773898968134912e-05, "loss": 1.8668, "step": 29150 }, { "epoch": 1.28, "learning_rate": 1.7719957284421228e-05, "loss": 1.8214, "step": 29160 }, { "epoch": 1.28, "learning_rate": 1.7700930822939424e-05, "loss": 1.78, "step": 29170 }, { "epoch": 1.28, "learning_rate": 1.7681910306099985e-05, "loss": 1.8128, "step": 29180 }, { "epoch": 1.28, "learning_rate": 1.766289574309629e-05, "loss": 1.8504, "step": 29190 }, { "epoch": 1.28, "learning_rate": 1.7643887143118875e-05, "loss": 1.7937, "step": 29200 }, { "epoch": 1.28, "eval_loss": 1.8301746845245361, "eval_runtime": 11.7318, "eval_samples_per_second": 349.135, "eval_steps_per_second": 21.821, "step": 29200 }, { "epoch": 1.28, "learning_rate": 1.7624884515355358e-05, "loss": 1.8176, "step": 29210 }, { "epoch": 1.28, "learning_rate": 1.760588786899049e-05, "loss": 1.8329, "step": 29220 }, { "epoch": 1.28, "learning_rate": 1.758689721320612e-05, "loss": 1.863, "step": 29230 }, { "epoch": 1.28, "learning_rate": 1.7567912557181218e-05, "loss": 1.8621, "step": 29240 }, { "epoch": 1.28, "learning_rate": 1.7548933910091834e-05, "loss": 1.8192, "step": 29250 }, { "epoch": 1.28, "learning_rate": 1.752996128111113e-05, "loss": 1.8057, "step": 29260 }, { "epoch": 1.28, "learning_rate": 1.751099467940934e-05, "loss": 1.8206, "step": 29270 }, { "epoch": 1.28, "learning_rate": 1.7492034114153825e-05, "loss": 1.8019, "step": 29280 }, { "epoch": 1.28, "learning_rate": 1.7473079594508966e-05, "loss": 1.8015, "step": 29290 }, { "epoch": 1.28, "learning_rate": 1.7454131129636273e-05, "loss": 1.8239, "step": 29300 }, { "epoch": 1.28, "eval_loss": 1.8301327228546143, "eval_runtime": 11.5499, "eval_samples_per_second": 354.636, "eval_steps_per_second": 22.165, "step": 29300 }, { "epoch": 1.28, "learning_rate": 1.7435188728694312e-05, "loss": 1.8463, "step": 29310 }, { "epoch": 1.28, "learning_rate": 1.741625240083873e-05, "loss": 1.8382, "step": 29320 }, { "epoch": 1.28, "learning_rate": 1.7397322155222203e-05, "loss": 1.8513, "step": 29330 }, { "epoch": 1.28, "learning_rate": 1.73783980009945e-05, "loss": 1.7766, "step": 29340 }, { "epoch": 1.28, "learning_rate": 1.735947994730245e-05, "loss": 1.8219, "step": 29350 }, { "epoch": 1.29, "learning_rate": 1.7340568003289917e-05, "loss": 1.851, "step": 29360 }, { "epoch": 1.29, "learning_rate": 1.7321662178097805e-05, "loss": 1.8086, "step": 29370 }, { "epoch": 1.29, "learning_rate": 1.7302762480864093e-05, "loss": 1.7966, "step": 29380 }, { "epoch": 1.29, "learning_rate": 1.7283868920723734e-05, "loss": 1.8585, "step": 29390 }, { "epoch": 1.29, "learning_rate": 1.726498150680881e-05, "loss": 1.8134, "step": 29400 }, { "epoch": 1.29, "eval_loss": 1.8302505016326904, "eval_runtime": 11.6196, "eval_samples_per_second": 352.509, "eval_steps_per_second": 22.032, "step": 29400 }, { "epoch": 1.29, "learning_rate": 1.7246100248248356e-05, "loss": 1.7909, "step": 29410 }, { "epoch": 1.29, "learning_rate": 1.7227225154168438e-05, "loss": 1.8333, "step": 29420 }, { "epoch": 1.29, "learning_rate": 1.7208356233692174e-05, "loss": 1.7974, "step": 29430 }, { "epoch": 1.29, "learning_rate": 1.718949349593969e-05, "loss": 1.833, "step": 29440 }, { "epoch": 1.29, "learning_rate": 1.717063695002812e-05, "loss": 1.8043, "step": 29450 }, { "epoch": 1.29, "learning_rate": 1.7151786605071588e-05, "loss": 1.8103, "step": 29460 }, { "epoch": 1.29, "learning_rate": 1.713294247018125e-05, "loss": 1.8118, "step": 29470 }, { "epoch": 1.29, "learning_rate": 1.7114104554465216e-05, "loss": 1.8458, "step": 29480 }, { "epoch": 1.29, "learning_rate": 1.709527286702867e-05, "loss": 1.8072, "step": 29490 }, { "epoch": 1.29, "learning_rate": 1.707644741697369e-05, "loss": 1.8057, "step": 29500 }, { "epoch": 1.29, "eval_loss": 1.8301050662994385, "eval_runtime": 11.921, "eval_samples_per_second": 343.596, "eval_steps_per_second": 21.475, "step": 29500 }, { "epoch": 1.29, "learning_rate": 1.7057628213399415e-05, "loss": 1.8512, "step": 29510 }, { "epoch": 1.29, "learning_rate": 1.703881526540191e-05, "loss": 1.7953, "step": 29520 }, { "epoch": 1.29, "learning_rate": 1.7020008582074257e-05, "loss": 1.8278, "step": 29530 }, { "epoch": 1.29, "learning_rate": 1.7001208172506487e-05, "loss": 1.7863, "step": 29540 }, { "epoch": 1.29, "learning_rate": 1.6982414045785624e-05, "loss": 1.8038, "step": 29550 }, { "epoch": 1.29, "learning_rate": 1.6963626210995608e-05, "loss": 1.861, "step": 29560 }, { "epoch": 1.29, "learning_rate": 1.6944844677217378e-05, "loss": 1.8666, "step": 29570 }, { "epoch": 1.29, "learning_rate": 1.6926069453528822e-05, "loss": 1.8175, "step": 29580 }, { "epoch": 1.3, "learning_rate": 1.690730054900478e-05, "loss": 1.8434, "step": 29590 }, { "epoch": 1.3, "learning_rate": 1.6888537972717008e-05, "loss": 1.8518, "step": 29600 }, { "epoch": 1.3, "eval_loss": 1.829789400100708, "eval_runtime": 11.5886, "eval_samples_per_second": 353.45, "eval_steps_per_second": 22.091, "step": 29600 }, { "epoch": 1.3, "learning_rate": 1.6869781733734234e-05, "loss": 1.8064, "step": 29610 }, { "epoch": 1.3, "learning_rate": 1.6851031841122114e-05, "loss": 1.8443, "step": 29620 }, { "epoch": 1.3, "learning_rate": 1.6832288303943254e-05, "loss": 1.8501, "step": 29630 }, { "epoch": 1.3, "learning_rate": 1.6813551131257154e-05, "loss": 1.8504, "step": 29640 }, { "epoch": 1.3, "learning_rate": 1.679482033212025e-05, "loss": 1.8269, "step": 29650 }, { "epoch": 1.3, "learning_rate": 1.6776095915585905e-05, "loss": 1.7929, "step": 29660 }, { "epoch": 1.3, "learning_rate": 1.6757377890704398e-05, "loss": 1.8159, "step": 29670 }, { "epoch": 1.3, "learning_rate": 1.6738666266522928e-05, "loss": 1.8405, "step": 29680 }, { "epoch": 1.3, "learning_rate": 1.671996105208556e-05, "loss": 1.7826, "step": 29690 }, { "epoch": 1.3, "learning_rate": 1.6701262256433304e-05, "loss": 1.8001, "step": 29700 }, { "epoch": 1.3, "eval_loss": 1.8297191858291626, "eval_runtime": 11.5768, "eval_samples_per_second": 353.81, "eval_steps_per_second": 22.113, "step": 29700 }, { "epoch": 1.3, "learning_rate": 1.6682569888604047e-05, "loss": 1.7877, "step": 29710 }, { "epoch": 1.3, "learning_rate": 1.666388395763259e-05, "loss": 1.8405, "step": 29720 }, { "epoch": 1.3, "learning_rate": 1.6645204472550576e-05, "loss": 1.8083, "step": 29730 }, { "epoch": 1.3, "learning_rate": 1.6626531442386598e-05, "loss": 1.7797, "step": 29740 }, { "epoch": 1.3, "learning_rate": 1.6607864876166048e-05, "loss": 1.8313, "step": 29750 }, { "epoch": 1.3, "learning_rate": 1.6589204782911287e-05, "loss": 1.798, "step": 29760 }, { "epoch": 1.3, "learning_rate": 1.6570551171641475e-05, "loss": 1.8381, "step": 29770 }, { "epoch": 1.3, "learning_rate": 1.6551904051372674e-05, "loss": 1.8251, "step": 29780 }, { "epoch": 1.3, "learning_rate": 1.6533263431117786e-05, "loss": 1.7904, "step": 29790 }, { "epoch": 1.3, "learning_rate": 1.6514629319886592e-05, "loss": 1.8533, "step": 29800 }, { "epoch": 1.3, "eval_loss": 1.8297039270401, "eval_runtime": 11.6268, "eval_samples_per_second": 352.288, "eval_steps_per_second": 22.018, "step": 29800 }, { "epoch": 1.3, "learning_rate": 1.6496001726685715e-05, "loss": 1.8153, "step": 29810 }, { "epoch": 1.31, "learning_rate": 1.647738066051865e-05, "loss": 1.866, "step": 29820 }, { "epoch": 1.31, "learning_rate": 1.6458766130385692e-05, "loss": 1.8188, "step": 29830 }, { "epoch": 1.31, "learning_rate": 1.6440158145284017e-05, "loss": 1.8115, "step": 29840 }, { "epoch": 1.31, "learning_rate": 1.642155671420762e-05, "loss": 1.8042, "step": 29850 }, { "epoch": 1.31, "learning_rate": 1.6402961846147346e-05, "loss": 1.8007, "step": 29860 }, { "epoch": 1.31, "learning_rate": 1.638437355009084e-05, "loss": 1.7936, "step": 29870 }, { "epoch": 1.31, "learning_rate": 1.6365791835022566e-05, "loss": 1.7836, "step": 29880 }, { "epoch": 1.31, "learning_rate": 1.6347216709923872e-05, "loss": 1.7985, "step": 29890 }, { "epoch": 1.31, "learning_rate": 1.6328648183772837e-05, "loss": 1.7926, "step": 29900 }, { "epoch": 1.31, "eval_loss": 1.82939612865448, "eval_runtime": 11.6247, "eval_samples_per_second": 352.353, "eval_steps_per_second": 22.022, "step": 29900 }, { "epoch": 1.31, "learning_rate": 1.6310086265544412e-05, "loss": 1.8354, "step": 29910 }, { "epoch": 1.31, "learning_rate": 1.6291530964210303e-05, "loss": 1.8529, "step": 29920 }, { "epoch": 1.31, "learning_rate": 1.6272982288739063e-05, "loss": 1.7827, "step": 29930 }, { "epoch": 1.31, "learning_rate": 1.6254440248096022e-05, "loss": 1.7849, "step": 29940 }, { "epoch": 1.31, "learning_rate": 1.623590485124331e-05, "loss": 1.836, "step": 29950 }, { "epoch": 1.31, "learning_rate": 1.621737610713983e-05, "loss": 1.7903, "step": 29960 }, { "epoch": 1.31, "learning_rate": 1.6198854024741286e-05, "loss": 1.7863, "step": 29970 }, { "epoch": 1.31, "learning_rate": 1.6180338613000155e-05, "loss": 1.7768, "step": 29980 }, { "epoch": 1.31, "learning_rate": 1.6161829880865707e-05, "loss": 1.804, "step": 29990 }, { "epoch": 1.31, "learning_rate": 1.6143327837283946e-05, "loss": 1.7666, "step": 30000 }, { "epoch": 1.31, "eval_loss": 1.8295360803604126, "eval_runtime": 11.8624, "eval_samples_per_second": 345.294, "eval_steps_per_second": 21.581, "step": 30000 }, { "epoch": 1.31, "learning_rate": 1.6124832491197682e-05, "loss": 1.8302, "step": 30010 }, { "epoch": 1.31, "learning_rate": 1.610634385154644e-05, "loss": 1.7949, "step": 30020 }, { "epoch": 1.31, "learning_rate": 1.608786192726658e-05, "loss": 1.8113, "step": 30030 }, { "epoch": 1.32, "learning_rate": 1.606938672729114e-05, "loss": 1.8027, "step": 30040 }, { "epoch": 1.32, "learning_rate": 1.6050918260549955e-05, "loss": 1.7857, "step": 30050 }, { "epoch": 1.32, "learning_rate": 1.6032456535969576e-05, "loss": 1.8274, "step": 30060 }, { "epoch": 1.32, "learning_rate": 1.6014001562473305e-05, "loss": 1.8056, "step": 30070 }, { "epoch": 1.32, "learning_rate": 1.5995553348981197e-05, "loss": 1.7804, "step": 30080 }, { "epoch": 1.32, "learning_rate": 1.5977111904410034e-05, "loss": 1.8346, "step": 30090 }, { "epoch": 1.32, "learning_rate": 1.5958677237673295e-05, "loss": 1.8124, "step": 30100 }, { "epoch": 1.32, "eval_loss": 1.8036388158798218, "eval_runtime": 12.2283, "eval_samples_per_second": 334.96, "eval_steps_per_second": 20.935, "step": 30100 }, { "epoch": 1.32, "learning_rate": 1.594024935768122e-05, "loss": 1.7914, "step": 30110 }, { "epoch": 1.32, "learning_rate": 1.5921828273340768e-05, "loss": 1.817, "step": 30120 }, { "epoch": 1.32, "learning_rate": 1.590341399355558e-05, "loss": 1.8477, "step": 30130 }, { "epoch": 1.32, "learning_rate": 1.588500652722605e-05, "loss": 1.8133, "step": 30140 }, { "epoch": 1.32, "learning_rate": 1.586660588324923e-05, "loss": 1.8079, "step": 30150 }, { "epoch": 1.32, "learning_rate": 1.5848212070518923e-05, "loss": 1.8397, "step": 30160 }, { "epoch": 1.32, "learning_rate": 1.5829825097925605e-05, "loss": 1.8179, "step": 30170 }, { "epoch": 1.32, "learning_rate": 1.5811444974356466e-05, "loss": 1.8638, "step": 30180 }, { "epoch": 1.32, "learning_rate": 1.579307170869534e-05, "loss": 1.8601, "step": 30190 }, { "epoch": 1.32, "learning_rate": 1.5774705309822796e-05, "loss": 1.8226, "step": 30200 }, { "epoch": 1.32, "eval_loss": 1.8035895824432373, "eval_runtime": 13.4266, "eval_samples_per_second": 305.066, "eval_steps_per_second": 19.067, "step": 30200 }, { "epoch": 1.32, "learning_rate": 1.575634578661606e-05, "loss": 1.8057, "step": 30210 }, { "epoch": 1.32, "learning_rate": 1.573799314794905e-05, "loss": 1.8171, "step": 30220 }, { "epoch": 1.32, "learning_rate": 1.571964740269233e-05, "loss": 1.8724, "step": 30230 }, { "epoch": 1.32, "learning_rate": 1.570130855971315e-05, "loss": 1.8347, "step": 30240 }, { "epoch": 1.32, "learning_rate": 1.5682976627875423e-05, "loss": 1.8281, "step": 30250 }, { "epoch": 1.32, "learning_rate": 1.566465161603974e-05, "loss": 1.8488, "step": 30260 }, { "epoch": 1.33, "learning_rate": 1.56463335330633e-05, "loss": 1.8638, "step": 30270 }, { "epoch": 1.33, "learning_rate": 1.5628022387799995e-05, "loss": 1.8497, "step": 30280 }, { "epoch": 1.33, "learning_rate": 1.5609718189100322e-05, "loss": 1.796, "step": 30290 }, { "epoch": 1.33, "learning_rate": 1.5591420945811503e-05, "loss": 1.8429, "step": 30300 }, { "epoch": 1.33, "eval_loss": 1.8034627437591553, "eval_runtime": 11.7516, "eval_samples_per_second": 348.548, "eval_steps_per_second": 21.784, "step": 30300 }, { "epoch": 1.33, "learning_rate": 1.5573130666777293e-05, "loss": 1.8275, "step": 30310 }, { "epoch": 1.33, "learning_rate": 1.5554847360838164e-05, "loss": 1.8428, "step": 30320 }, { "epoch": 1.33, "learning_rate": 1.5536571036831148e-05, "loss": 1.7699, "step": 30330 }, { "epoch": 1.33, "learning_rate": 1.5518301703589967e-05, "loss": 1.7879, "step": 30340 }, { "epoch": 1.33, "learning_rate": 1.550003936994494e-05, "loss": 1.8352, "step": 30350 }, { "epoch": 1.33, "learning_rate": 1.5481784044722975e-05, "loss": 1.8067, "step": 30360 }, { "epoch": 1.33, "learning_rate": 1.5463535736747636e-05, "loss": 1.7945, "step": 30370 }, { "epoch": 1.33, "learning_rate": 1.5445294454839047e-05, "loss": 1.8567, "step": 30380 }, { "epoch": 1.33, "learning_rate": 1.5427060207814008e-05, "loss": 1.8136, "step": 30390 }, { "epoch": 1.33, "learning_rate": 1.540883300448584e-05, "loss": 1.8147, "step": 30400 }, { "epoch": 1.33, "eval_loss": 1.803378939628601, "eval_runtime": 11.9608, "eval_samples_per_second": 342.451, "eval_steps_per_second": 21.403, "step": 30400 }, { "epoch": 1.33, "learning_rate": 1.5390612853664515e-05, "loss": 1.7916, "step": 30410 }, { "epoch": 1.33, "learning_rate": 1.537239976415656e-05, "loss": 1.7759, "step": 30420 }, { "epoch": 1.33, "learning_rate": 1.5354193744765113e-05, "loss": 1.8284, "step": 30430 }, { "epoch": 1.33, "learning_rate": 1.533599480428988e-05, "loss": 1.7931, "step": 30440 }, { "epoch": 1.33, "learning_rate": 1.5317802951527177e-05, "loss": 1.8196, "step": 30450 }, { "epoch": 1.33, "learning_rate": 1.5299618195269837e-05, "loss": 1.8148, "step": 30460 }, { "epoch": 1.33, "learning_rate": 1.5281440544307304e-05, "loss": 1.835, "step": 30470 }, { "epoch": 1.33, "learning_rate": 1.526327000742559e-05, "loss": 1.7835, "step": 30480 }, { "epoch": 1.33, "learning_rate": 1.5245106593407258e-05, "loss": 1.7868, "step": 30490 }, { "epoch": 1.34, "learning_rate": 1.522695031103141e-05, "loss": 1.8516, "step": 30500 }, { "epoch": 1.34, "eval_loss": 1.8033866882324219, "eval_runtime": 11.923, "eval_samples_per_second": 343.536, "eval_steps_per_second": 21.471, "step": 30500 }, { "epoch": 1.34, "learning_rate": 1.5208801169073735e-05, "loss": 1.8361, "step": 30510 }, { "epoch": 1.34, "learning_rate": 1.5190659176306442e-05, "loss": 1.8378, "step": 30520 }, { "epoch": 1.34, "learning_rate": 1.5172524341498316e-05, "loss": 1.8046, "step": 30530 }, { "epoch": 1.34, "learning_rate": 1.5154396673414641e-05, "loss": 1.8332, "step": 30540 }, { "epoch": 1.34, "learning_rate": 1.513627618081728e-05, "loss": 1.8318, "step": 30550 }, { "epoch": 1.34, "learning_rate": 1.5118162872464576e-05, "loss": 1.7733, "step": 30560 }, { "epoch": 1.34, "learning_rate": 1.5100056757111473e-05, "loss": 1.8487, "step": 30570 }, { "epoch": 1.34, "learning_rate": 1.5081957843509373e-05, "loss": 1.8064, "step": 30580 }, { "epoch": 1.34, "learning_rate": 1.5063866140406211e-05, "loss": 1.8432, "step": 30590 }, { "epoch": 1.34, "learning_rate": 1.5045781656546451e-05, "loss": 1.823, "step": 30600 }, { "epoch": 1.34, "eval_loss": 1.8032710552215576, "eval_runtime": 12.014, "eval_samples_per_second": 340.936, "eval_steps_per_second": 21.309, "step": 30600 }, { "epoch": 1.34, "learning_rate": 1.502770440067107e-05, "loss": 1.7994, "step": 30610 }, { "epoch": 1.34, "learning_rate": 1.5009634381517554e-05, "loss": 1.871, "step": 30620 }, { "epoch": 1.34, "learning_rate": 1.4991571607819855e-05, "loss": 1.7873, "step": 30630 }, { "epoch": 1.34, "learning_rate": 1.4973516088308472e-05, "loss": 1.8297, "step": 30640 }, { "epoch": 1.34, "learning_rate": 1.4955467831710347e-05, "loss": 1.803, "step": 30650 }, { "epoch": 1.34, "learning_rate": 1.4937426846748978e-05, "loss": 1.7824, "step": 30660 }, { "epoch": 1.34, "learning_rate": 1.4919393142144276e-05, "loss": 1.8144, "step": 30670 }, { "epoch": 1.34, "learning_rate": 1.4901366726612695e-05, "loss": 1.8307, "step": 30680 }, { "epoch": 1.34, "learning_rate": 1.488334760886711e-05, "loss": 1.8081, "step": 30690 }, { "epoch": 1.34, "learning_rate": 1.4865335797616908e-05, "loss": 1.8277, "step": 30700 }, { "epoch": 1.34, "eval_loss": 1.8033111095428467, "eval_runtime": 11.7975, "eval_samples_per_second": 347.192, "eval_steps_per_second": 21.699, "step": 30700 }, { "epoch": 1.34, "learning_rate": 1.4847331301567937e-05, "loss": 1.8293, "step": 30710 }, { "epoch": 1.34, "learning_rate": 1.4829334129422513e-05, "loss": 1.8478, "step": 30720 }, { "epoch": 1.35, "learning_rate": 1.4811344289879381e-05, "loss": 1.8669, "step": 30730 }, { "epoch": 1.35, "learning_rate": 1.4793361791633779e-05, "loss": 1.8202, "step": 30740 }, { "epoch": 1.35, "learning_rate": 1.477538664337738e-05, "loss": 1.8127, "step": 30750 }, { "epoch": 1.35, "learning_rate": 1.475741885379832e-05, "loss": 1.8181, "step": 30760 }, { "epoch": 1.35, "learning_rate": 1.4739458431581146e-05, "loss": 1.8136, "step": 30770 }, { "epoch": 1.35, "learning_rate": 1.4721505385406865e-05, "loss": 1.8594, "step": 30780 }, { "epoch": 1.35, "learning_rate": 1.470355972395293e-05, "loss": 1.7994, "step": 30790 }, { "epoch": 1.35, "learning_rate": 1.4685621455893215e-05, "loss": 1.7717, "step": 30800 }, { "epoch": 1.35, "eval_loss": 1.8033769130706787, "eval_runtime": 12.8899, "eval_samples_per_second": 317.768, "eval_steps_per_second": 19.861, "step": 30800 }, { "epoch": 1.35, "learning_rate": 1.4667690589897995e-05, "loss": 1.8188, "step": 30810 }, { "epoch": 1.35, "learning_rate": 1.4649767134634016e-05, "loss": 1.8189, "step": 30820 }, { "epoch": 1.35, "learning_rate": 1.463185109876439e-05, "loss": 1.8468, "step": 30830 }, { "epoch": 1.35, "learning_rate": 1.4613942490948683e-05, "loss": 1.8815, "step": 30840 }, { "epoch": 1.35, "learning_rate": 1.4596041319842866e-05, "loss": 1.8377, "step": 30850 }, { "epoch": 1.35, "learning_rate": 1.4578147594099282e-05, "loss": 1.8201, "step": 30860 }, { "epoch": 1.35, "learning_rate": 1.4560261322366711e-05, "loss": 1.8229, "step": 30870 }, { "epoch": 1.35, "learning_rate": 1.4542382513290323e-05, "loss": 1.8585, "step": 30880 }, { "epoch": 1.35, "learning_rate": 1.4524511175511686e-05, "loss": 1.7989, "step": 30890 }, { "epoch": 1.35, "learning_rate": 1.4506647317668719e-05, "loss": 1.8391, "step": 30900 }, { "epoch": 1.35, "eval_loss": 1.8032344579696655, "eval_runtime": 12.1463, "eval_samples_per_second": 337.222, "eval_steps_per_second": 21.076, "step": 30900 }, { "epoch": 1.35, "learning_rate": 1.4488790948395783e-05, "loss": 1.8361, "step": 30910 }, { "epoch": 1.35, "learning_rate": 1.4470942076323553e-05, "loss": 1.8794, "step": 30920 }, { "epoch": 1.35, "learning_rate": 1.4453100710079167e-05, "loss": 1.8314, "step": 30930 }, { "epoch": 1.35, "learning_rate": 1.4435266858286048e-05, "loss": 1.7951, "step": 30940 }, { "epoch": 1.35, "learning_rate": 1.441744052956405e-05, "loss": 1.8046, "step": 30950 }, { "epoch": 1.36, "learning_rate": 1.4399621732529337e-05, "loss": 1.7914, "step": 30960 }, { "epoch": 1.36, "learning_rate": 1.4381810475794482e-05, "loss": 1.7874, "step": 30970 }, { "epoch": 1.36, "learning_rate": 1.4364006767968386e-05, "loss": 1.7981, "step": 30980 }, { "epoch": 1.36, "learning_rate": 1.434621061765632e-05, "loss": 1.8093, "step": 30990 }, { "epoch": 1.36, "learning_rate": 1.432842203345987e-05, "loss": 1.8229, "step": 31000 }, { "epoch": 1.36, "eval_loss": 1.8032114505767822, "eval_runtime": 12.0468, "eval_samples_per_second": 340.007, "eval_steps_per_second": 21.25, "step": 31000 }, { "epoch": 1.36, "learning_rate": 1.4310641023976996e-05, "loss": 1.8338, "step": 31010 }, { "epoch": 1.36, "learning_rate": 1.4292867597801983e-05, "loss": 1.8277, "step": 31020 }, { "epoch": 1.36, "learning_rate": 1.427510176352547e-05, "loss": 1.8275, "step": 31030 }, { "epoch": 1.36, "learning_rate": 1.425734352973438e-05, "loss": 1.7842, "step": 31040 }, { "epoch": 1.36, "learning_rate": 1.4239592905012024e-05, "loss": 1.8522, "step": 31050 }, { "epoch": 1.36, "learning_rate": 1.4221849897937976e-05, "loss": 1.8172, "step": 31060 }, { "epoch": 1.36, "learning_rate": 1.4204114517088168e-05, "loss": 1.7892, "step": 31070 }, { "epoch": 1.36, "learning_rate": 1.4186386771034842e-05, "loss": 1.8359, "step": 31080 }, { "epoch": 1.36, "learning_rate": 1.4168666668346526e-05, "loss": 1.8521, "step": 31090 }, { "epoch": 1.36, "learning_rate": 1.4150954217588076e-05, "loss": 1.7878, "step": 31100 }, { "epoch": 1.36, "eval_loss": 1.8032019138336182, "eval_runtime": 18.0994, "eval_samples_per_second": 226.306, "eval_steps_per_second": 14.144, "step": 31100 }, { "epoch": 1.36, "learning_rate": 1.4133249427320644e-05, "loss": 1.8111, "step": 31110 }, { "epoch": 1.36, "learning_rate": 1.4115552306101688e-05, "loss": 1.8599, "step": 31120 }, { "epoch": 1.36, "learning_rate": 1.4097862862484926e-05, "loss": 1.7708, "step": 31130 }, { "epoch": 1.36, "learning_rate": 1.4080181105020406e-05, "loss": 1.79, "step": 31140 }, { "epoch": 1.36, "learning_rate": 1.4062507042254434e-05, "loss": 1.8523, "step": 31150 }, { "epoch": 1.36, "learning_rate": 1.4044840682729622e-05, "loss": 1.7994, "step": 31160 }, { "epoch": 1.36, "learning_rate": 1.4027182034984823e-05, "loss": 1.8007, "step": 31170 }, { "epoch": 1.36, "learning_rate": 1.4009531107555202e-05, "loss": 1.7724, "step": 31180 }, { "epoch": 1.37, "learning_rate": 1.3991887908972142e-05, "loss": 1.7872, "step": 31190 }, { "epoch": 1.37, "learning_rate": 1.397425244776336e-05, "loss": 1.8078, "step": 31200 }, { "epoch": 1.37, "eval_loss": 1.80314040184021, "eval_runtime": 12.2739, "eval_samples_per_second": 333.717, "eval_steps_per_second": 20.857, "step": 31200 }, { "epoch": 1.37, "learning_rate": 1.3956624732452768e-05, "loss": 1.815, "step": 31210 }, { "epoch": 1.37, "learning_rate": 1.3939004771560581e-05, "loss": 1.8561, "step": 31220 }, { "epoch": 1.37, "learning_rate": 1.392139257360322e-05, "loss": 1.7922, "step": 31230 }, { "epoch": 1.37, "learning_rate": 1.3903788147093393e-05, "loss": 1.7885, "step": 31240 }, { "epoch": 1.37, "learning_rate": 1.3886191500540042e-05, "loss": 1.7975, "step": 31250 }, { "epoch": 1.37, "learning_rate": 1.386860264244835e-05, "loss": 1.8109, "step": 31260 }, { "epoch": 1.37, "learning_rate": 1.3851021581319709e-05, "loss": 1.7984, "step": 31270 }, { "epoch": 1.37, "learning_rate": 1.3833448325651776e-05, "loss": 1.8225, "step": 31280 }, { "epoch": 1.37, "learning_rate": 1.3815882883938435e-05, "loss": 1.7602, "step": 31290 }, { "epoch": 1.37, "learning_rate": 1.3798325264669757e-05, "loss": 1.786, "step": 31300 }, { "epoch": 1.37, "eval_loss": 1.802952766418457, "eval_runtime": 15.4326, "eval_samples_per_second": 265.413, "eval_steps_per_second": 16.588, "step": 31300 }, { "epoch": 1.37, "learning_rate": 1.3780775476332083e-05, "loss": 1.8364, "step": 31310 }, { "epoch": 1.37, "learning_rate": 1.3763233527407915e-05, "loss": 1.8026, "step": 31320 }, { "epoch": 1.37, "learning_rate": 1.3745699426376008e-05, "loss": 1.803, "step": 31330 }, { "epoch": 1.37, "learning_rate": 1.3728173181711306e-05, "loss": 1.808, "step": 31340 }, { "epoch": 1.37, "learning_rate": 1.3710654801884973e-05, "loss": 1.7936, "step": 31350 }, { "epoch": 1.37, "learning_rate": 1.3693144295364332e-05, "loss": 1.8277, "step": 31360 }, { "epoch": 1.37, "learning_rate": 1.3675641670612937e-05, "loss": 1.802, "step": 31370 }, { "epoch": 1.37, "learning_rate": 1.3658146936090526e-05, "loss": 1.8276, "step": 31380 }, { "epoch": 1.37, "learning_rate": 1.3640660100253026e-05, "loss": 1.7541, "step": 31390 }, { "epoch": 1.37, "learning_rate": 1.3623181171552512e-05, "loss": 1.8347, "step": 31400 }, { "epoch": 1.37, "eval_loss": 1.8029532432556152, "eval_runtime": 22.8254, "eval_samples_per_second": 179.449, "eval_steps_per_second": 11.216, "step": 31400 }, { "epoch": 1.37, "learning_rate": 1.3605710158437284e-05, "loss": 1.8161, "step": 31410 }, { "epoch": 1.38, "learning_rate": 1.3588247069351793e-05, "loss": 1.8135, "step": 31420 }, { "epoch": 1.38, "learning_rate": 1.357079191273667e-05, "loss": 1.8231, "step": 31430 }, { "epoch": 1.38, "learning_rate": 1.3553344697028687e-05, "loss": 1.8008, "step": 31440 }, { "epoch": 1.38, "learning_rate": 1.353590543066082e-05, "loss": 1.8755, "step": 31450 }, { "epoch": 1.38, "learning_rate": 1.3518474122062139e-05, "loss": 1.7891, "step": 31460 }, { "epoch": 1.38, "learning_rate": 1.3501050779657955e-05, "loss": 1.7602, "step": 31470 }, { "epoch": 1.38, "learning_rate": 1.3483635411869648e-05, "loss": 1.7987, "step": 31480 }, { "epoch": 1.38, "learning_rate": 1.34662280271148e-05, "loss": 1.8161, "step": 31490 }, { "epoch": 1.38, "learning_rate": 1.3448828633807086e-05, "loss": 1.7987, "step": 31500 }, { "epoch": 1.38, "eval_loss": 1.8029508590698242, "eval_runtime": 13.1282, "eval_samples_per_second": 311.999, "eval_steps_per_second": 19.5, "step": 31500 }, { "epoch": 1.38, "learning_rate": 1.343143724035635e-05, "loss": 1.8132, "step": 31510 }, { "epoch": 1.38, "learning_rate": 1.3414053855168574e-05, "loss": 1.7628, "step": 31520 }, { "epoch": 1.38, "learning_rate": 1.3396678486645861e-05, "loss": 1.8087, "step": 31530 }, { "epoch": 1.38, "learning_rate": 1.3379311143186428e-05, "loss": 1.8384, "step": 31540 }, { "epoch": 1.38, "learning_rate": 1.3361951833184595e-05, "loss": 1.8143, "step": 31550 }, { "epoch": 1.38, "learning_rate": 1.3344600565030876e-05, "loss": 1.784, "step": 31560 }, { "epoch": 1.38, "learning_rate": 1.3327257347111809e-05, "loss": 1.8613, "step": 31570 }, { "epoch": 1.38, "learning_rate": 1.3309922187810103e-05, "loss": 1.817, "step": 31580 }, { "epoch": 1.38, "learning_rate": 1.3292595095504527e-05, "loss": 1.8067, "step": 31590 }, { "epoch": 1.38, "learning_rate": 1.3275276078569982e-05, "loss": 1.8025, "step": 31600 }, { "epoch": 1.38, "eval_loss": 1.8028916120529175, "eval_runtime": 12.1557, "eval_samples_per_second": 336.961, "eval_steps_per_second": 21.06, "step": 31600 }, { "epoch": 1.38, "learning_rate": 1.3257965145377457e-05, "loss": 1.8405, "step": 31610 }, { "epoch": 1.38, "learning_rate": 1.3240662304294046e-05, "loss": 1.8196, "step": 31620 }, { "epoch": 1.38, "learning_rate": 1.3223367563682894e-05, "loss": 1.8064, "step": 31630 }, { "epoch": 1.39, "learning_rate": 1.3206080931903264e-05, "loss": 1.8144, "step": 31640 }, { "epoch": 1.39, "learning_rate": 1.3188802417310497e-05, "loss": 1.7973, "step": 31650 }, { "epoch": 1.39, "learning_rate": 1.3171532028256013e-05, "loss": 1.8425, "step": 31660 }, { "epoch": 1.39, "learning_rate": 1.3154269773087273e-05, "loss": 1.7956, "step": 31670 }, { "epoch": 1.39, "learning_rate": 1.313701566014784e-05, "loss": 1.8636, "step": 31680 }, { "epoch": 1.39, "learning_rate": 1.3119769697777336e-05, "loss": 1.8104, "step": 31690 }, { "epoch": 1.39, "learning_rate": 1.3102531894311445e-05, "loss": 1.8135, "step": 31700 }, { "epoch": 1.39, "eval_loss": 1.8029296398162842, "eval_runtime": 13.8361, "eval_samples_per_second": 296.037, "eval_steps_per_second": 18.502, "step": 31700 }, { "epoch": 1.39, "learning_rate": 1.3085302258081881e-05, "loss": 1.8348, "step": 31710 }, { "epoch": 1.39, "learning_rate": 1.3068080797416454e-05, "loss": 1.828, "step": 31720 }, { "epoch": 1.39, "learning_rate": 1.3050867520638964e-05, "loss": 1.8008, "step": 31730 }, { "epoch": 1.39, "learning_rate": 1.3033662436069339e-05, "loss": 1.8172, "step": 31740 }, { "epoch": 1.39, "learning_rate": 1.3016465552023458e-05, "loss": 1.839, "step": 31750 }, { "epoch": 1.39, "learning_rate": 1.2999276876813308e-05, "loss": 1.8226, "step": 31760 }, { "epoch": 1.39, "learning_rate": 1.298209641874685e-05, "loss": 1.8308, "step": 31770 }, { "epoch": 1.39, "learning_rate": 1.2964924186128115e-05, "loss": 1.8015, "step": 31780 }, { "epoch": 1.39, "learning_rate": 1.2947760187257149e-05, "loss": 1.792, "step": 31790 }, { "epoch": 1.39, "learning_rate": 1.2930604430430003e-05, "loss": 1.8778, "step": 31800 }, { "epoch": 1.39, "eval_loss": 1.8028862476348877, "eval_runtime": 12.2991, "eval_samples_per_second": 333.031, "eval_steps_per_second": 20.814, "step": 31800 }, { "epoch": 1.39, "learning_rate": 1.2913456923938766e-05, "loss": 1.8518, "step": 31810 }, { "epoch": 1.39, "learning_rate": 1.2896317676071497e-05, "loss": 1.8393, "step": 31820 }, { "epoch": 1.39, "learning_rate": 1.2879186695112344e-05, "loss": 1.806, "step": 31830 }, { "epoch": 1.39, "learning_rate": 1.2862063989341374e-05, "loss": 1.8155, "step": 31840 }, { "epoch": 1.39, "learning_rate": 1.2844949567034711e-05, "loss": 1.8201, "step": 31850 }, { "epoch": 1.39, "learning_rate": 1.282784343646443e-05, "loss": 1.8248, "step": 31860 }, { "epoch": 1.4, "learning_rate": 1.2810745605898638e-05, "loss": 1.7913, "step": 31870 }, { "epoch": 1.4, "learning_rate": 1.279365608360141e-05, "loss": 1.789, "step": 31880 }, { "epoch": 1.4, "learning_rate": 1.2776574877832827e-05, "loss": 1.7382, "step": 31890 }, { "epoch": 1.4, "learning_rate": 1.2759501996848903e-05, "loss": 1.815, "step": 31900 }, { "epoch": 1.4, "eval_loss": 1.8028854131698608, "eval_runtime": 11.9394, "eval_samples_per_second": 343.066, "eval_steps_per_second": 21.442, "step": 31900 }, { "epoch": 1.4, "learning_rate": 1.2742437448901678e-05, "loss": 1.7857, "step": 31910 }, { "epoch": 1.4, "learning_rate": 1.2725381242239139e-05, "loss": 1.791, "step": 31920 }, { "epoch": 1.4, "learning_rate": 1.2708333385105263e-05, "loss": 1.8129, "step": 31930 }, { "epoch": 1.4, "learning_rate": 1.2691293885739948e-05, "loss": 1.8506, "step": 31940 }, { "epoch": 1.4, "learning_rate": 1.2674262752379095e-05, "loss": 1.8381, "step": 31950 }, { "epoch": 1.4, "learning_rate": 1.2657239993254544e-05, "loss": 1.7665, "step": 31960 }, { "epoch": 1.4, "learning_rate": 1.2640225616594102e-05, "loss": 1.8019, "step": 31970 }, { "epoch": 1.4, "learning_rate": 1.262321963062149e-05, "loss": 1.823, "step": 31980 }, { "epoch": 1.4, "learning_rate": 1.2606222043556413e-05, "loss": 1.8168, "step": 31990 }, { "epoch": 1.4, "learning_rate": 1.2589232863614483e-05, "loss": 1.8158, "step": 32000 }, { "epoch": 1.4, "eval_loss": 1.802783727645874, "eval_runtime": 11.8236, "eval_samples_per_second": 346.426, "eval_steps_per_second": 21.652, "step": 32000 }, { "epoch": 1.4, "learning_rate": 1.2572252099007274e-05, "loss": 1.8244, "step": 32010 }, { "epoch": 1.4, "learning_rate": 1.2555279757942292e-05, "loss": 1.8271, "step": 32020 }, { "epoch": 1.4, "learning_rate": 1.2538315848622944e-05, "loss": 1.8189, "step": 32030 }, { "epoch": 1.4, "learning_rate": 1.2521360379248586e-05, "loss": 1.805, "step": 32040 }, { "epoch": 1.4, "learning_rate": 1.2504413358014493e-05, "loss": 1.798, "step": 32050 }, { "epoch": 1.4, "learning_rate": 1.248747479311186e-05, "loss": 1.8368, "step": 32060 }, { "epoch": 1.4, "learning_rate": 1.2470544692727772e-05, "loss": 1.8545, "step": 32070 }, { "epoch": 1.4, "learning_rate": 1.2453623065045249e-05, "loss": 1.8446, "step": 32080 }, { "epoch": 1.4, "learning_rate": 1.2436709918243177e-05, "loss": 1.8307, "step": 32090 }, { "epoch": 1.41, "learning_rate": 1.2419805260496419e-05, "loss": 1.8157, "step": 32100 }, { "epoch": 1.41, "eval_loss": 1.8028050661087036, "eval_runtime": 12.0893, "eval_samples_per_second": 338.811, "eval_steps_per_second": 21.176, "step": 32100 }, { "epoch": 1.41, "learning_rate": 1.2402909099975648e-05, "loss": 1.8266, "step": 32110 }, { "epoch": 1.41, "learning_rate": 1.2386021444847493e-05, "loss": 1.821, "step": 32120 }, { "epoch": 1.41, "learning_rate": 1.236914230327442e-05, "loss": 1.8437, "step": 32130 }, { "epoch": 1.41, "learning_rate": 1.2352271683414824e-05, "loss": 1.7839, "step": 32140 }, { "epoch": 1.41, "learning_rate": 1.2335409593422967e-05, "loss": 1.7678, "step": 32150 }, { "epoch": 1.41, "learning_rate": 1.2318556041448987e-05, "loss": 1.8096, "step": 32160 }, { "epoch": 1.41, "learning_rate": 1.2301711035638879e-05, "loss": 1.8305, "step": 32170 }, { "epoch": 1.41, "learning_rate": 1.228487458413453e-05, "loss": 1.8427, "step": 32180 }, { "epoch": 1.41, "learning_rate": 1.2268046695073689e-05, "loss": 1.8266, "step": 32190 }, { "epoch": 1.41, "learning_rate": 1.225122737658997e-05, "loss": 1.8164, "step": 32200 }, { "epoch": 1.41, "eval_loss": 1.802794337272644, "eval_runtime": 12.9249, "eval_samples_per_second": 316.908, "eval_steps_per_second": 19.807, "step": 32200 }, { "epoch": 1.41, "learning_rate": 1.2234416636812812e-05, "loss": 1.7873, "step": 32210 }, { "epoch": 1.41, "learning_rate": 1.2217614483867556e-05, "loss": 1.863, "step": 32220 }, { "epoch": 1.41, "learning_rate": 1.220082092587534e-05, "loss": 1.8111, "step": 32230 }, { "epoch": 1.41, "learning_rate": 1.2184035970953213e-05, "loss": 1.8467, "step": 32240 }, { "epoch": 1.41, "learning_rate": 1.216725962721401e-05, "loss": 1.8348, "step": 32250 }, { "epoch": 1.41, "learning_rate": 1.2150491902766414e-05, "loss": 1.8502, "step": 32260 }, { "epoch": 1.41, "learning_rate": 1.2133732805714963e-05, "loss": 1.7893, "step": 32270 }, { "epoch": 1.41, "learning_rate": 1.2116982344160005e-05, "loss": 1.8066, "step": 32280 }, { "epoch": 1.41, "learning_rate": 1.2100240526197742e-05, "loss": 1.835, "step": 32290 }, { "epoch": 1.41, "learning_rate": 1.2083507359920152e-05, "loss": 1.8376, "step": 32300 }, { "epoch": 1.41, "eval_loss": 1.8027287721633911, "eval_runtime": 11.923, "eval_samples_per_second": 343.538, "eval_steps_per_second": 21.471, "step": 32300 }, { "epoch": 1.41, "learning_rate": 1.2066782853415066e-05, "loss": 1.775, "step": 32310 }, { "epoch": 1.41, "learning_rate": 1.2050067014766129e-05, "loss": 1.7925, "step": 32320 }, { "epoch": 1.42, "learning_rate": 1.2033359852052793e-05, "loss": 1.8244, "step": 32330 }, { "epoch": 1.42, "learning_rate": 1.2016661373350291e-05, "loss": 1.7604, "step": 32340 }, { "epoch": 1.42, "learning_rate": 1.1999971586729705e-05, "loss": 1.8296, "step": 32350 }, { "epoch": 1.42, "learning_rate": 1.1983290500257852e-05, "loss": 1.7692, "step": 32360 }, { "epoch": 1.42, "learning_rate": 1.1966618121997428e-05, "loss": 1.8503, "step": 32370 }, { "epoch": 1.42, "learning_rate": 1.1949954460006843e-05, "loss": 1.8422, "step": 32380 }, { "epoch": 1.42, "learning_rate": 1.1933299522340345e-05, "loss": 1.8154, "step": 32390 }, { "epoch": 1.42, "learning_rate": 1.1916653317047927e-05, "loss": 1.8168, "step": 32400 }, { "epoch": 1.42, "eval_loss": 1.8027868270874023, "eval_runtime": 12.029, "eval_samples_per_second": 340.51, "eval_steps_per_second": 21.282, "step": 32400 }, { "epoch": 1.42, "learning_rate": 1.1900015852175388e-05, "loss": 1.7937, "step": 32410 }, { "epoch": 1.42, "learning_rate": 1.1883387135764296e-05, "loss": 1.822, "step": 32420 }, { "epoch": 1.42, "learning_rate": 1.1866767175851998e-05, "loss": 1.8388, "step": 32430 }, { "epoch": 1.42, "learning_rate": 1.1850155980471576e-05, "loss": 1.7809, "step": 32440 }, { "epoch": 1.42, "learning_rate": 1.1833553557651913e-05, "loss": 1.797, "step": 32450 }, { "epoch": 1.42, "learning_rate": 1.1816959915417634e-05, "loss": 1.841, "step": 32460 }, { "epoch": 1.42, "learning_rate": 1.1800375061789142e-05, "loss": 1.7972, "step": 32470 }, { "epoch": 1.42, "learning_rate": 1.1783799004782554e-05, "loss": 1.8139, "step": 32480 }, { "epoch": 1.42, "learning_rate": 1.1767231752409753e-05, "loss": 1.8422, "step": 32490 }, { "epoch": 1.42, "learning_rate": 1.1750673312678374e-05, "loss": 1.8647, "step": 32500 }, { "epoch": 1.42, "eval_loss": 1.802736759185791, "eval_runtime": 11.754, "eval_samples_per_second": 348.478, "eval_steps_per_second": 21.78, "step": 32500 }, { "epoch": 1.42, "learning_rate": 1.173412369359179e-05, "loss": 1.8132, "step": 32510 }, { "epoch": 1.42, "learning_rate": 1.1717582903149114e-05, "loss": 1.7861, "step": 32520 }, { "epoch": 1.42, "learning_rate": 1.1701050949345166e-05, "loss": 1.8471, "step": 32530 }, { "epoch": 1.42, "learning_rate": 1.1684527840170532e-05, "loss": 1.816, "step": 32540 }, { "epoch": 1.42, "learning_rate": 1.1668013583611494e-05, "loss": 1.8111, "step": 32550 }, { "epoch": 1.43, "learning_rate": 1.1651508187650084e-05, "loss": 1.7778, "step": 32560 }, { "epoch": 1.43, "learning_rate": 1.1635011660264008e-05, "loss": 1.8028, "step": 32570 }, { "epoch": 1.43, "learning_rate": 1.161852400942673e-05, "loss": 1.8269, "step": 32580 }, { "epoch": 1.43, "learning_rate": 1.1602045243107399e-05, "loss": 1.8296, "step": 32590 }, { "epoch": 1.43, "learning_rate": 1.1585575369270884e-05, "loss": 1.8497, "step": 32600 }, { "epoch": 1.43, "eval_loss": 1.797649621963501, "eval_runtime": 12.0427, "eval_samples_per_second": 340.123, "eval_steps_per_second": 21.258, "step": 32600 }, { "epoch": 1.43, "learning_rate": 1.1569114395877735e-05, "loss": 1.7884, "step": 32610 }, { "epoch": 1.43, "learning_rate": 1.1552662330884225e-05, "loss": 1.8655, "step": 32620 }, { "epoch": 1.43, "learning_rate": 1.1536219182242281e-05, "loss": 1.8085, "step": 32630 }, { "epoch": 1.43, "learning_rate": 1.1519784957899591e-05, "loss": 1.8425, "step": 32640 }, { "epoch": 1.43, "learning_rate": 1.1503359665799452e-05, "loss": 1.7779, "step": 32650 }, { "epoch": 1.43, "learning_rate": 1.1486943313880902e-05, "loss": 1.8367, "step": 32660 }, { "epoch": 1.43, "learning_rate": 1.1470535910078613e-05, "loss": 1.8409, "step": 32670 }, { "epoch": 1.43, "learning_rate": 1.1454137462322965e-05, "loss": 1.7905, "step": 32680 }, { "epoch": 1.43, "learning_rate": 1.1437747978539995e-05, "loss": 1.8579, "step": 32690 }, { "epoch": 1.43, "learning_rate": 1.1421367466651418e-05, "loss": 1.8186, "step": 32700 }, { "epoch": 1.43, "eval_loss": 1.7975997924804688, "eval_runtime": 12.4174, "eval_samples_per_second": 329.86, "eval_steps_per_second": 20.616, "step": 32700 }, { "epoch": 1.43, "learning_rate": 1.14049959345746e-05, "loss": 1.818, "step": 32710 }, { "epoch": 1.43, "learning_rate": 1.138863339022255e-05, "loss": 1.8632, "step": 32720 }, { "epoch": 1.43, "learning_rate": 1.1372279841503986e-05, "loss": 1.7715, "step": 32730 }, { "epoch": 1.43, "learning_rate": 1.1355935296323226e-05, "loss": 1.8207, "step": 32740 }, { "epoch": 1.43, "learning_rate": 1.133959976258027e-05, "loss": 1.825, "step": 32750 }, { "epoch": 1.43, "learning_rate": 1.1323273248170733e-05, "loss": 1.8418, "step": 32760 }, { "epoch": 1.43, "learning_rate": 1.1306955760985888e-05, "loss": 1.8071, "step": 32770 }, { "epoch": 1.43, "learning_rate": 1.1290647308912653e-05, "loss": 1.8258, "step": 32780 }, { "epoch": 1.44, "learning_rate": 1.1274347899833573e-05, "loss": 1.8413, "step": 32790 }, { "epoch": 1.44, "learning_rate": 1.12580575416268e-05, "loss": 1.8404, "step": 32800 }, { "epoch": 1.44, "eval_loss": 1.797699213027954, "eval_runtime": 13.4742, "eval_samples_per_second": 303.988, "eval_steps_per_second": 18.999, "step": 32800 }, { "epoch": 1.44, "learning_rate": 1.1241776242166136e-05, "loss": 1.8321, "step": 32810 }, { "epoch": 1.44, "learning_rate": 1.1225504009321002e-05, "loss": 1.8252, "step": 32820 }, { "epoch": 1.44, "learning_rate": 1.120924085095644e-05, "loss": 1.7925, "step": 32830 }, { "epoch": 1.44, "learning_rate": 1.1192986774933079e-05, "loss": 1.7826, "step": 32840 }, { "epoch": 1.44, "learning_rate": 1.1176741789107188e-05, "loss": 1.8035, "step": 32850 }, { "epoch": 1.44, "learning_rate": 1.1160505901330634e-05, "loss": 1.8092, "step": 32860 }, { "epoch": 1.44, "learning_rate": 1.114427911945089e-05, "loss": 1.8353, "step": 32870 }, { "epoch": 1.44, "learning_rate": 1.1128061451311007e-05, "loss": 1.7647, "step": 32880 }, { "epoch": 1.44, "learning_rate": 1.1111852904749665e-05, "loss": 1.8204, "step": 32890 }, { "epoch": 1.44, "learning_rate": 1.1095653487601097e-05, "loss": 1.7997, "step": 32900 }, { "epoch": 1.44, "eval_loss": 1.7977631092071533, "eval_runtime": 11.7853, "eval_samples_per_second": 347.552, "eval_steps_per_second": 21.722, "step": 32900 }, { "epoch": 1.44, "learning_rate": 1.1079463207695147e-05, "loss": 1.7835, "step": 32910 }, { "epoch": 1.44, "learning_rate": 1.1063282072857246e-05, "loss": 1.8071, "step": 32920 }, { "epoch": 1.44, "learning_rate": 1.1047110090908403e-05, "loss": 1.8531, "step": 32930 }, { "epoch": 1.44, "learning_rate": 1.1030947269665177e-05, "loss": 1.8684, "step": 32940 }, { "epoch": 1.44, "learning_rate": 1.1014793616939731e-05, "loss": 1.8099, "step": 32950 }, { "epoch": 1.44, "learning_rate": 1.0998649140539795e-05, "loss": 1.8266, "step": 32960 }, { "epoch": 1.44, "learning_rate": 1.0982513848268635e-05, "loss": 1.8116, "step": 32970 }, { "epoch": 1.44, "learning_rate": 1.0966387747925117e-05, "loss": 1.7971, "step": 32980 }, { "epoch": 1.44, "learning_rate": 1.0950270847303609e-05, "loss": 1.7828, "step": 32990 }, { "epoch": 1.44, "learning_rate": 1.093416315419411e-05, "loss": 1.8123, "step": 33000 }, { "epoch": 1.44, "eval_loss": 1.7976471185684204, "eval_runtime": 11.5471, "eval_samples_per_second": 354.72, "eval_steps_per_second": 22.17, "step": 33000 }, { "epoch": 1.45, "learning_rate": 1.0918064676382096e-05, "loss": 1.8024, "step": 33010 }, { "epoch": 1.45, "learning_rate": 1.090197542164864e-05, "loss": 1.7934, "step": 33020 }, { "epoch": 1.45, "learning_rate": 1.0885895397770312e-05, "loss": 1.8409, "step": 33030 }, { "epoch": 1.45, "learning_rate": 1.086982461251926e-05, "loss": 1.8419, "step": 33040 }, { "epoch": 1.45, "learning_rate": 1.0853763073663145e-05, "loss": 1.871, "step": 33050 }, { "epoch": 1.45, "learning_rate": 1.0837710788965178e-05, "loss": 1.8298, "step": 33060 }, { "epoch": 1.45, "learning_rate": 1.0821667766184062e-05, "loss": 1.8285, "step": 33070 }, { "epoch": 1.45, "learning_rate": 1.0805634013074057e-05, "loss": 1.7954, "step": 33080 }, { "epoch": 1.45, "learning_rate": 1.0789609537384931e-05, "loss": 1.8158, "step": 33090 }, { "epoch": 1.45, "learning_rate": 1.0773594346861972e-05, "loss": 1.8429, "step": 33100 }, { "epoch": 1.45, "eval_loss": 1.797677755355835, "eval_runtime": 11.8947, "eval_samples_per_second": 344.354, "eval_steps_per_second": 21.522, "step": 33100 }, { "epoch": 1.45, "learning_rate": 1.0757588449245962e-05, "loss": 1.804, "step": 33110 }, { "epoch": 1.45, "learning_rate": 1.074159185227321e-05, "loss": 1.8201, "step": 33120 }, { "epoch": 1.45, "learning_rate": 1.0725604563675529e-05, "loss": 1.8097, "step": 33130 }, { "epoch": 1.45, "learning_rate": 1.0709626591180235e-05, "loss": 1.7624, "step": 33140 }, { "epoch": 1.45, "learning_rate": 1.0693657942510116e-05, "loss": 1.8371, "step": 33150 }, { "epoch": 1.45, "learning_rate": 1.067769862538349e-05, "loss": 1.8046, "step": 33160 }, { "epoch": 1.45, "learning_rate": 1.066174864751413e-05, "loss": 1.8491, "step": 33170 }, { "epoch": 1.45, "learning_rate": 1.064580801661132e-05, "loss": 1.8462, "step": 33180 }, { "epoch": 1.45, "learning_rate": 1.0629876740379831e-05, "loss": 1.7958, "step": 33190 }, { "epoch": 1.45, "learning_rate": 1.0613954826519874e-05, "loss": 1.8365, "step": 33200 }, { "epoch": 1.45, "eval_loss": 1.7976925373077393, "eval_runtime": 11.8876, "eval_samples_per_second": 344.56, "eval_steps_per_second": 21.535, "step": 33200 }, { "epoch": 1.45, "learning_rate": 1.059804228272717e-05, "loss": 1.8233, "step": 33210 }, { "epoch": 1.45, "learning_rate": 1.0582139116692907e-05, "loss": 1.8124, "step": 33220 }, { "epoch": 1.45, "learning_rate": 1.0566245336103738e-05, "loss": 1.8068, "step": 33230 }, { "epoch": 1.46, "learning_rate": 1.055036094864176e-05, "loss": 1.8066, "step": 33240 }, { "epoch": 1.46, "learning_rate": 1.0534485961984566e-05, "loss": 1.7826, "step": 33250 }, { "epoch": 1.46, "learning_rate": 1.0518620383805156e-05, "loss": 1.8468, "step": 33260 }, { "epoch": 1.46, "learning_rate": 1.0502764221772047e-05, "loss": 1.8527, "step": 33270 }, { "epoch": 1.46, "learning_rate": 1.0486917483549145e-05, "loss": 1.8265, "step": 33280 }, { "epoch": 1.46, "learning_rate": 1.0471080176795845e-05, "loss": 1.7898, "step": 33290 }, { "epoch": 1.46, "learning_rate": 1.0455252309166941e-05, "loss": 1.8109, "step": 33300 }, { "epoch": 1.46, "eval_loss": 1.7976725101470947, "eval_runtime": 11.8849, "eval_samples_per_second": 344.639, "eval_steps_per_second": 21.54, "step": 33300 }, { "epoch": 1.46, "learning_rate": 1.0439433888312702e-05, "loss": 1.8551, "step": 33310 }, { "epoch": 1.46, "learning_rate": 1.0423624921878813e-05, "loss": 1.8199, "step": 33320 }, { "epoch": 1.46, "learning_rate": 1.0407825417506404e-05, "loss": 1.8212, "step": 33330 }, { "epoch": 1.46, "learning_rate": 1.0392035382832e-05, "loss": 1.8219, "step": 33340 }, { "epoch": 1.46, "learning_rate": 1.0376254825487582e-05, "loss": 1.75, "step": 33350 }, { "epoch": 1.46, "learning_rate": 1.0360483753100536e-05, "loss": 1.8406, "step": 33360 }, { "epoch": 1.46, "learning_rate": 1.0344722173293676e-05, "loss": 1.8288, "step": 33370 }, { "epoch": 1.46, "learning_rate": 1.0328970093685193e-05, "loss": 1.8457, "step": 33380 }, { "epoch": 1.46, "learning_rate": 1.0313227521888739e-05, "loss": 1.858, "step": 33390 }, { "epoch": 1.46, "learning_rate": 1.0297494465513307e-05, "loss": 1.8263, "step": 33400 }, { "epoch": 1.46, "eval_loss": 1.7975435256958008, "eval_runtime": 11.9333, "eval_samples_per_second": 343.24, "eval_steps_per_second": 21.453, "step": 33400 }, { "epoch": 1.46, "learning_rate": 1.0281770932163362e-05, "loss": 1.759, "step": 33410 }, { "epoch": 1.46, "learning_rate": 1.0266056929438715e-05, "loss": 1.8427, "step": 33420 }, { "epoch": 1.46, "learning_rate": 1.0250352464934572e-05, "loss": 1.8069, "step": 33430 }, { "epoch": 1.46, "learning_rate": 1.0234657546241556e-05, "loss": 1.7832, "step": 33440 }, { "epoch": 1.46, "learning_rate": 1.0218972180945653e-05, "loss": 1.8104, "step": 33450 }, { "epoch": 1.46, "learning_rate": 1.0203296376628262e-05, "loss": 1.8281, "step": 33460 }, { "epoch": 1.47, "learning_rate": 1.0187630140866115e-05, "loss": 1.7494, "step": 33470 }, { "epoch": 1.47, "learning_rate": 1.0171973481231352e-05, "loss": 1.8211, "step": 33480 }, { "epoch": 1.47, "learning_rate": 1.0156326405291476e-05, "loss": 1.8315, "step": 33490 }, { "epoch": 1.47, "learning_rate": 1.014068892060937e-05, "loss": 1.7995, "step": 33500 }, { "epoch": 1.47, "eval_loss": 1.7975175380706787, "eval_runtime": 11.6479, "eval_samples_per_second": 351.65, "eval_steps_per_second": 21.978, "step": 33500 }, { "epoch": 1.47, "learning_rate": 1.0125061034743244e-05, "loss": 1.7929, "step": 33510 }, { "epoch": 1.47, "learning_rate": 1.0109442755246719e-05, "loss": 1.8212, "step": 33520 }, { "epoch": 1.47, "learning_rate": 1.0093834089668715e-05, "loss": 1.8553, "step": 33530 }, { "epoch": 1.47, "learning_rate": 1.0078235045553576e-05, "loss": 1.7782, "step": 33540 }, { "epoch": 1.47, "learning_rate": 1.0062645630440931e-05, "loss": 1.8372, "step": 33550 }, { "epoch": 1.47, "learning_rate": 1.0047065851865797e-05, "loss": 1.8232, "step": 33560 }, { "epoch": 1.47, "learning_rate": 1.0031495717358497e-05, "loss": 1.8573, "step": 33570 }, { "epoch": 1.47, "learning_rate": 1.0015935234444728e-05, "loss": 1.8615, "step": 33580 }, { "epoch": 1.47, "learning_rate": 1.00003844106455e-05, "loss": 1.7935, "step": 33590 }, { "epoch": 1.47, "learning_rate": 9.984843253477175e-06, "loss": 1.7813, "step": 33600 }, { "epoch": 1.47, "eval_loss": 1.7975555658340454, "eval_runtime": 11.8662, "eval_samples_per_second": 345.182, "eval_steps_per_second": 21.574, "step": 33600 }, { "epoch": 1.47, "learning_rate": 9.96931177045141e-06, "loss": 1.8015, "step": 33610 }, { "epoch": 1.47, "learning_rate": 9.95378996907521e-06, "loss": 1.8304, "step": 33620 }, { "epoch": 1.47, "learning_rate": 9.938277856850903e-06, "loss": 1.8519, "step": 33630 }, { "epoch": 1.47, "learning_rate": 9.922775441276128e-06, "loss": 1.7916, "step": 33640 }, { "epoch": 1.47, "learning_rate": 9.907282729843818e-06, "loss": 1.8084, "step": 33650 }, { "epoch": 1.47, "learning_rate": 9.891799730042259e-06, "loss": 1.8415, "step": 33660 }, { "epoch": 1.47, "learning_rate": 9.87632644935499e-06, "loss": 1.8212, "step": 33670 }, { "epoch": 1.47, "learning_rate": 9.860862895260891e-06, "loss": 1.8296, "step": 33680 }, { "epoch": 1.47, "learning_rate": 9.845409075234143e-06, "loss": 1.8163, "step": 33690 }, { "epoch": 1.48, "learning_rate": 9.829964996744183e-06, "loss": 1.7989, "step": 33700 }, { "epoch": 1.48, "eval_loss": 1.7975475788116455, "eval_runtime": 11.8379, "eval_samples_per_second": 346.008, "eval_steps_per_second": 21.626, "step": 33700 }, { "epoch": 1.48, "learning_rate": 9.814530667255777e-06, "loss": 1.8075, "step": 33710 }, { "epoch": 1.48, "learning_rate": 9.799106094228968e-06, "loss": 1.8176, "step": 33720 }, { "epoch": 1.48, "learning_rate": 9.783691285119089e-06, "loss": 1.8055, "step": 33730 }, { "epoch": 1.48, "learning_rate": 9.768286247376732e-06, "loss": 1.7935, "step": 33740 }, { "epoch": 1.48, "learning_rate": 9.752890988447783e-06, "loss": 1.7922, "step": 33750 }, { "epoch": 1.48, "learning_rate": 9.737505515773408e-06, "loss": 1.8302, "step": 33760 }, { "epoch": 1.48, "learning_rate": 9.722129836790034e-06, "loss": 1.8262, "step": 33770 }, { "epoch": 1.48, "learning_rate": 9.706763958929344e-06, "loss": 1.8267, "step": 33780 }, { "epoch": 1.48, "learning_rate": 9.691407889618314e-06, "loss": 1.8011, "step": 33790 }, { "epoch": 1.48, "learning_rate": 9.67606163627912e-06, "loss": 1.7894, "step": 33800 }, { "epoch": 1.48, "eval_loss": 1.7975398302078247, "eval_runtime": 11.7714, "eval_samples_per_second": 347.963, "eval_steps_per_second": 21.748, "step": 33800 }, { "epoch": 1.48, "learning_rate": 9.660725206329281e-06, "loss": 1.8095, "step": 33810 }, { "epoch": 1.48, "learning_rate": 9.645398607181486e-06, "loss": 1.8024, "step": 33820 }, { "epoch": 1.48, "learning_rate": 9.630081846243732e-06, "loss": 1.857, "step": 33830 }, { "epoch": 1.48, "learning_rate": 9.614774930919208e-06, "loss": 1.8129, "step": 33840 }, { "epoch": 1.48, "learning_rate": 9.599477868606384e-06, "loss": 1.8335, "step": 33850 }, { "epoch": 1.48, "learning_rate": 9.584190666698953e-06, "loss": 1.7774, "step": 33860 }, { "epoch": 1.48, "learning_rate": 9.56891333258586e-06, "loss": 1.7853, "step": 33870 }, { "epoch": 1.48, "learning_rate": 9.553645873651238e-06, "loss": 1.7977, "step": 33880 }, { "epoch": 1.48, "learning_rate": 9.538388297274485e-06, "loss": 1.856, "step": 33890 }, { "epoch": 1.48, "learning_rate": 9.523140610830224e-06, "loss": 1.733, "step": 33900 }, { "epoch": 1.48, "eval_loss": 1.7975311279296875, "eval_runtime": 11.7099, "eval_samples_per_second": 349.789, "eval_steps_per_second": 21.862, "step": 33900 }, { "epoch": 1.48, "learning_rate": 9.507902821688261e-06, "loss": 1.8348, "step": 33910 }, { "epoch": 1.48, "learning_rate": 9.492674937213659e-06, "loss": 1.819, "step": 33920 }, { "epoch": 1.49, "learning_rate": 9.477456964766666e-06, "loss": 1.8414, "step": 33930 }, { "epoch": 1.49, "learning_rate": 9.46224891170275e-06, "loss": 1.8223, "step": 33940 }, { "epoch": 1.49, "learning_rate": 9.447050785372585e-06, "loss": 1.8267, "step": 33950 }, { "epoch": 1.49, "learning_rate": 9.431862593122065e-06, "loss": 1.8111, "step": 33960 }, { "epoch": 1.49, "learning_rate": 9.416684342292233e-06, "loss": 1.7722, "step": 33970 }, { "epoch": 1.49, "learning_rate": 9.40151604021937e-06, "loss": 1.8534, "step": 33980 }, { "epoch": 1.49, "learning_rate": 9.38635769423494e-06, "loss": 1.8012, "step": 33990 }, { "epoch": 1.49, "learning_rate": 9.371209311665597e-06, "loss": 1.788, "step": 34000 }, { "epoch": 1.49, "eval_loss": 1.7975867986679077, "eval_runtime": 11.6503, "eval_samples_per_second": 351.579, "eval_steps_per_second": 21.974, "step": 34000 }, { "epoch": 1.49, "learning_rate": 9.356070899833157e-06, "loss": 1.7657, "step": 34010 }, { "epoch": 1.49, "learning_rate": 9.340942466054636e-06, "loss": 1.8549, "step": 34020 }, { "epoch": 1.49, "learning_rate": 9.32582401764223e-06, "loss": 1.8214, "step": 34030 }, { "epoch": 1.49, "learning_rate": 9.310715561903305e-06, "loss": 1.8474, "step": 34040 }, { "epoch": 1.49, "learning_rate": 9.295617106140383e-06, "loss": 1.7682, "step": 34050 }, { "epoch": 1.49, "learning_rate": 9.28052865765118e-06, "loss": 1.895, "step": 34060 }, { "epoch": 1.49, "learning_rate": 9.265450223728538e-06, "loss": 1.8382, "step": 34070 }, { "epoch": 1.49, "learning_rate": 9.250381811660492e-06, "loss": 1.8462, "step": 34080 }, { "epoch": 1.49, "learning_rate": 9.235323428730223e-06, "loss": 1.847, "step": 34090 }, { "epoch": 1.49, "learning_rate": 9.220275082216066e-06, "loss": 1.8147, "step": 34100 }, { "epoch": 1.49, "eval_loss": 1.7975001335144043, "eval_runtime": 11.9247, "eval_samples_per_second": 343.489, "eval_steps_per_second": 21.468, "step": 34100 }, { "epoch": 1.49, "learning_rate": 9.20523677939149e-06, "loss": 1.8361, "step": 34110 }, { "epoch": 1.49, "learning_rate": 9.190208527525124e-06, "loss": 1.7882, "step": 34120 }, { "epoch": 1.49, "learning_rate": 9.17519033388074e-06, "loss": 1.7995, "step": 34130 }, { "epoch": 1.49, "learning_rate": 9.160182205717249e-06, "loss": 1.8362, "step": 34140 }, { "epoch": 1.49, "learning_rate": 9.145184150288683e-06, "loss": 1.8442, "step": 34150 }, { "epoch": 1.5, "learning_rate": 9.130196174844202e-06, "loss": 1.7843, "step": 34160 }, { "epoch": 1.5, "learning_rate": 9.11521828662814e-06, "loss": 1.8043, "step": 34170 }, { "epoch": 1.5, "learning_rate": 9.100250492879893e-06, "loss": 1.7229, "step": 34180 }, { "epoch": 1.5, "learning_rate": 9.085292800834028e-06, "loss": 1.8089, "step": 34190 }, { "epoch": 1.5, "learning_rate": 9.070345217720187e-06, "loss": 1.8034, "step": 34200 }, { "epoch": 1.5, "eval_loss": 1.7975050210952759, "eval_runtime": 11.8339, "eval_samples_per_second": 346.124, "eval_steps_per_second": 21.633, "step": 34200 }, { "epoch": 1.5, "learning_rate": 9.055407750763159e-06, "loss": 1.8414, "step": 34210 }, { "epoch": 1.5, "learning_rate": 9.040480407182829e-06, "loss": 1.833, "step": 34220 }, { "epoch": 1.5, "learning_rate": 9.025563194194207e-06, "loss": 1.8378, "step": 34230 }, { "epoch": 1.5, "learning_rate": 9.010656119007366e-06, "loss": 1.8454, "step": 34240 }, { "epoch": 1.5, "learning_rate": 8.995759188827512e-06, "loss": 1.8588, "step": 34250 }, { "epoch": 1.5, "learning_rate": 8.980872410854948e-06, "loss": 1.8293, "step": 34260 }, { "epoch": 1.5, "learning_rate": 8.965995792285069e-06, "loss": 1.7856, "step": 34270 }, { "epoch": 1.5, "learning_rate": 8.951129340308327e-06, "loss": 1.8215, "step": 34280 }, { "epoch": 1.5, "learning_rate": 8.936273062110308e-06, "loss": 1.7872, "step": 34290 }, { "epoch": 1.5, "learning_rate": 8.921426964871632e-06, "loss": 1.8376, "step": 34300 }, { "epoch": 1.5, "eval_loss": 1.7974214553833008, "eval_runtime": 11.8688, "eval_samples_per_second": 345.108, "eval_steps_per_second": 21.569, "step": 34300 }, { "epoch": 1.5, "learning_rate": 8.906591055768057e-06, "loss": 1.7878, "step": 34310 }, { "epoch": 1.5, "learning_rate": 8.891765341970363e-06, "loss": 1.8004, "step": 34320 }, { "epoch": 1.5, "learning_rate": 8.876949830644432e-06, "loss": 1.8028, "step": 34330 }, { "epoch": 1.5, "learning_rate": 8.862144528951194e-06, "loss": 1.7938, "step": 34340 }, { "epoch": 1.5, "learning_rate": 8.84734944404666e-06, "loss": 1.8146, "step": 34350 }, { "epoch": 1.5, "learning_rate": 8.832564583081906e-06, "loss": 1.7881, "step": 34360 }, { "epoch": 1.5, "learning_rate": 8.81778995320306e-06, "loss": 1.7841, "step": 34370 }, { "epoch": 1.5, "learning_rate": 8.803025561551289e-06, "loss": 1.8205, "step": 34380 }, { "epoch": 1.51, "learning_rate": 8.78827141526284e-06, "loss": 1.7638, "step": 34390 }, { "epoch": 1.51, "learning_rate": 8.773527521468994e-06, "loss": 1.7943, "step": 34400 }, { "epoch": 1.51, "eval_loss": 1.7974566221237183, "eval_runtime": 11.745, "eval_samples_per_second": 348.745, "eval_steps_per_second": 21.797, "step": 34400 }, { "epoch": 1.51, "learning_rate": 8.758793887296068e-06, "loss": 1.7452, "step": 34410 }, { "epoch": 1.51, "learning_rate": 8.74407051986544e-06, "loss": 1.8075, "step": 34420 }, { "epoch": 1.51, "learning_rate": 8.729357426293494e-06, "loss": 1.8142, "step": 34430 }, { "epoch": 1.51, "learning_rate": 8.7146546136917e-06, "loss": 1.8146, "step": 34440 }, { "epoch": 1.51, "learning_rate": 8.699962089166505e-06, "loss": 1.8135, "step": 34450 }, { "epoch": 1.51, "learning_rate": 8.685279859819418e-06, "loss": 1.7831, "step": 34460 }, { "epoch": 1.51, "learning_rate": 8.670607932746948e-06, "loss": 1.8292, "step": 34470 }, { "epoch": 1.51, "learning_rate": 8.655946315040642e-06, "loss": 1.7835, "step": 34480 }, { "epoch": 1.51, "learning_rate": 8.64129501378706e-06, "loss": 1.8169, "step": 34490 }, { "epoch": 1.51, "learning_rate": 8.626654036067783e-06, "loss": 1.8659, "step": 34500 }, { "epoch": 1.51, "eval_loss": 1.797457218170166, "eval_runtime": 11.691, "eval_samples_per_second": 350.355, "eval_steps_per_second": 21.897, "step": 34500 }, { "epoch": 1.51, "learning_rate": 8.612023388959377e-06, "loss": 1.8225, "step": 34510 }, { "epoch": 1.51, "learning_rate": 8.597403079533434e-06, "loss": 1.8664, "step": 34520 }, { "epoch": 1.51, "learning_rate": 8.582793114856554e-06, "loss": 1.7656, "step": 34530 }, { "epoch": 1.51, "learning_rate": 8.568193501990337e-06, "loss": 1.8224, "step": 34540 }, { "epoch": 1.51, "learning_rate": 8.55360424799135e-06, "loss": 1.8056, "step": 34550 }, { "epoch": 1.51, "learning_rate": 8.539025359911197e-06, "loss": 1.8298, "step": 34560 }, { "epoch": 1.51, "learning_rate": 8.52445684479642e-06, "loss": 1.79, "step": 34570 }, { "epoch": 1.51, "learning_rate": 8.509898709688614e-06, "loss": 1.7757, "step": 34580 }, { "epoch": 1.51, "learning_rate": 8.495350961624296e-06, "loss": 1.8096, "step": 34590 }, { "epoch": 1.51, "learning_rate": 8.480813607635003e-06, "loss": 1.7909, "step": 34600 }, { "epoch": 1.51, "eval_loss": 1.797386884689331, "eval_runtime": 12.0435, "eval_samples_per_second": 340.101, "eval_steps_per_second": 21.256, "step": 34600 }, { "epoch": 1.52, "learning_rate": 8.466286654747212e-06, "loss": 1.8285, "step": 34610 }, { "epoch": 1.52, "learning_rate": 8.451770109982406e-06, "loss": 1.8151, "step": 34620 }, { "epoch": 1.52, "learning_rate": 8.437263980357028e-06, "loss": 1.8071, "step": 34630 }, { "epoch": 1.52, "learning_rate": 8.422768272882468e-06, "loss": 1.7474, "step": 34640 }, { "epoch": 1.52, "learning_rate": 8.408282994565098e-06, "loss": 1.7808, "step": 34650 }, { "epoch": 1.52, "learning_rate": 8.39380815240625e-06, "loss": 1.8051, "step": 34660 }, { "epoch": 1.52, "learning_rate": 8.379343753402214e-06, "loss": 1.8293, "step": 34670 }, { "epoch": 1.52, "learning_rate": 8.364889804544204e-06, "loss": 1.7951, "step": 34680 }, { "epoch": 1.52, "learning_rate": 8.350446312818424e-06, "loss": 1.8187, "step": 34690 }, { "epoch": 1.52, "learning_rate": 8.336013285205977e-06, "loss": 1.7762, "step": 34700 }, { "epoch": 1.52, "eval_loss": 1.7973322868347168, "eval_runtime": 11.7769, "eval_samples_per_second": 347.801, "eval_steps_per_second": 21.738, "step": 34700 }, { "epoch": 1.52, "learning_rate": 8.321590728682966e-06, "loss": 1.7932, "step": 34710 }, { "epoch": 1.52, "learning_rate": 8.307178650220382e-06, "loss": 1.7925, "step": 34720 }, { "epoch": 1.52, "learning_rate": 8.29277705678418e-06, "loss": 1.7959, "step": 34730 }, { "epoch": 1.52, "learning_rate": 8.27838595533523e-06, "loss": 1.802, "step": 34740 }, { "epoch": 1.52, "learning_rate": 8.26400535282934e-06, "loss": 1.8553, "step": 34750 }, { "epoch": 1.52, "learning_rate": 8.249635256217243e-06, "loss": 1.7845, "step": 34760 }, { "epoch": 1.52, "learning_rate": 8.23527567244461e-06, "loss": 1.783, "step": 34770 }, { "epoch": 1.52, "learning_rate": 8.220926608451988e-06, "loss": 1.8221, "step": 34780 }, { "epoch": 1.52, "learning_rate": 8.206588071174878e-06, "loss": 1.7731, "step": 34790 }, { "epoch": 1.52, "learning_rate": 8.192260067543685e-06, "loss": 1.8195, "step": 34800 }, { "epoch": 1.52, "eval_loss": 1.797337293624878, "eval_runtime": 11.9416, "eval_samples_per_second": 343.003, "eval_steps_per_second": 21.438, "step": 34800 }, { "epoch": 1.52, "learning_rate": 8.177942604483716e-06, "loss": 1.7583, "step": 34810 }, { "epoch": 1.52, "learning_rate": 8.163635688915178e-06, "loss": 1.8383, "step": 34820 }, { "epoch": 1.52, "learning_rate": 8.149339327753199e-06, "loss": 1.783, "step": 34830 }, { "epoch": 1.53, "learning_rate": 8.13505352790777e-06, "loss": 1.848, "step": 34840 }, { "epoch": 1.53, "learning_rate": 8.12077829628384e-06, "loss": 1.8442, "step": 34850 }, { "epoch": 1.53, "learning_rate": 8.106513639781188e-06, "loss": 1.8794, "step": 34860 }, { "epoch": 1.53, "learning_rate": 8.0922595652945e-06, "loss": 1.7914, "step": 34870 }, { "epoch": 1.53, "learning_rate": 8.078016079713358e-06, "loss": 1.7973, "step": 34880 }, { "epoch": 1.53, "learning_rate": 8.063783189922223e-06, "loss": 1.7731, "step": 34890 }, { "epoch": 1.53, "learning_rate": 8.049560902800438e-06, "loss": 1.8355, "step": 34900 }, { "epoch": 1.53, "eval_loss": 1.7973167896270752, "eval_runtime": 11.7033, "eval_samples_per_second": 349.986, "eval_steps_per_second": 21.874, "step": 34900 }, { "epoch": 1.53, "learning_rate": 8.035349225222201e-06, "loss": 1.8303, "step": 34910 }, { "epoch": 1.53, "learning_rate": 8.021148164056604e-06, "loss": 1.7822, "step": 34920 }, { "epoch": 1.53, "learning_rate": 8.006957726167596e-06, "loss": 1.8136, "step": 34930 }, { "epoch": 1.53, "learning_rate": 7.992777918414007e-06, "loss": 1.8305, "step": 34940 }, { "epoch": 1.53, "learning_rate": 7.978608747649503e-06, "loss": 1.7985, "step": 34950 }, { "epoch": 1.53, "learning_rate": 7.964450220722632e-06, "loss": 1.8094, "step": 34960 }, { "epoch": 1.53, "learning_rate": 7.95030234447677e-06, "loss": 1.845, "step": 34970 }, { "epoch": 1.53, "learning_rate": 7.936165125750192e-06, "loss": 1.8332, "step": 34980 }, { "epoch": 1.53, "learning_rate": 7.92203857137597e-06, "loss": 1.8231, "step": 34990 }, { "epoch": 1.53, "learning_rate": 7.907922688182065e-06, "loss": 1.7771, "step": 35000 }, { "epoch": 1.53, "eval_loss": 1.7973071336746216, "eval_runtime": 11.7574, "eval_samples_per_second": 348.375, "eval_steps_per_second": 21.773, "step": 35000 }, { "epoch": 1.53, "learning_rate": 7.893817482991233e-06, "loss": 1.8482, "step": 35010 }, { "epoch": 1.53, "learning_rate": 7.879722962621117e-06, "loss": 1.8413, "step": 35020 }, { "epoch": 1.53, "learning_rate": 7.865639133884164e-06, "loss": 1.7733, "step": 35030 }, { "epoch": 1.53, "learning_rate": 7.851566003587677e-06, "loss": 1.786, "step": 35040 }, { "epoch": 1.53, "learning_rate": 7.837503578533756e-06, "loss": 1.7989, "step": 35050 }, { "epoch": 1.53, "learning_rate": 7.823451865519353e-06, "loss": 1.8485, "step": 35060 }, { "epoch": 1.54, "learning_rate": 7.809410871336236e-06, "loss": 1.8469, "step": 35070 }, { "epoch": 1.54, "learning_rate": 7.795380602771e-06, "loss": 1.7898, "step": 35080 }, { "epoch": 1.54, "learning_rate": 7.781361066605038e-06, "loss": 1.774, "step": 35090 }, { "epoch": 1.54, "learning_rate": 7.767352269614553e-06, "loss": 1.8445, "step": 35100 }, { "epoch": 1.54, "eval_loss": 1.7972954511642456, "eval_runtime": 11.9038, "eval_samples_per_second": 344.093, "eval_steps_per_second": 21.506, "step": 35100 }, { "epoch": 1.54, "learning_rate": 7.75335421857058e-06, "loss": 1.7657, "step": 35110 }, { "epoch": 1.54, "learning_rate": 7.73936692023895e-06, "loss": 1.8643, "step": 35120 }, { "epoch": 1.54, "learning_rate": 7.725390381380298e-06, "loss": 1.8439, "step": 35130 }, { "epoch": 1.54, "learning_rate": 7.711424608750048e-06, "loss": 1.8153, "step": 35140 }, { "epoch": 1.54, "learning_rate": 7.697469609098427e-06, "loss": 1.8191, "step": 35150 }, { "epoch": 1.54, "learning_rate": 7.683525389170458e-06, "loss": 1.7889, "step": 35160 }, { "epoch": 1.54, "learning_rate": 7.66959195570597e-06, "loss": 1.7851, "step": 35170 }, { "epoch": 1.54, "learning_rate": 7.655669315439536e-06, "loss": 1.8457, "step": 35180 }, { "epoch": 1.54, "learning_rate": 7.641757475100542e-06, "loss": 1.8443, "step": 35190 }, { "epoch": 1.54, "learning_rate": 7.6278564414131515e-06, "loss": 1.7745, "step": 35200 }, { "epoch": 1.54, "eval_loss": 1.7972785234451294, "eval_runtime": 11.7681, "eval_samples_per_second": 348.059, "eval_steps_per_second": 21.754, "step": 35200 }, { "epoch": 1.54, "learning_rate": 7.613966221096313e-06, "loss": 1.7634, "step": 35210 }, { "epoch": 1.54, "learning_rate": 7.600086820863717e-06, "loss": 1.8021, "step": 35220 }, { "epoch": 1.54, "learning_rate": 7.586218247423864e-06, "loss": 1.7733, "step": 35230 }, { "epoch": 1.54, "learning_rate": 7.572360507479981e-06, "loss": 1.841, "step": 35240 }, { "epoch": 1.54, "learning_rate": 7.5585136077300895e-06, "loss": 1.8506, "step": 35250 }, { "epoch": 1.54, "learning_rate": 7.544677554866964e-06, "loss": 1.7568, "step": 35260 }, { "epoch": 1.54, "learning_rate": 7.530852355578142e-06, "loss": 1.8283, "step": 35270 }, { "epoch": 1.54, "learning_rate": 7.51703801654589e-06, "loss": 1.8275, "step": 35280 }, { "epoch": 1.54, "learning_rate": 7.503234544447244e-06, "loss": 1.8043, "step": 35290 }, { "epoch": 1.55, "learning_rate": 7.489441945953994e-06, "loss": 1.8329, "step": 35300 }, { "epoch": 1.55, "eval_loss": 1.7972854375839233, "eval_runtime": 11.6885, "eval_samples_per_second": 350.431, "eval_steps_per_second": 21.902, "step": 35300 }, { "epoch": 1.55, "learning_rate": 7.475660227732672e-06, "loss": 1.8416, "step": 35310 }, { "epoch": 1.55, "learning_rate": 7.461889396444537e-06, "loss": 1.8516, "step": 35320 }, { "epoch": 1.55, "learning_rate": 7.448129458745573e-06, "loss": 1.8768, "step": 35330 }, { "epoch": 1.55, "learning_rate": 7.434380421286559e-06, "loss": 1.8093, "step": 35340 }, { "epoch": 1.55, "learning_rate": 7.420642290712938e-06, "loss": 1.8084, "step": 35350 }, { "epoch": 1.55, "learning_rate": 7.4069150736649295e-06, "loss": 1.8161, "step": 35360 }, { "epoch": 1.55, "learning_rate": 7.393198776777437e-06, "loss": 1.8265, "step": 35370 }, { "epoch": 1.55, "learning_rate": 7.379493406680117e-06, "loss": 1.7728, "step": 35380 }, { "epoch": 1.55, "learning_rate": 7.365798969997334e-06, "loss": 1.8431, "step": 35390 }, { "epoch": 1.55, "learning_rate": 7.35211547334818e-06, "loss": 1.8254, "step": 35400 }, { "epoch": 1.55, "eval_loss": 1.7973430156707764, "eval_runtime": 11.8006, "eval_samples_per_second": 347.1, "eval_steps_per_second": 21.694, "step": 35400 }, { "epoch": 1.55, "learning_rate": 7.338442923346428e-06, "loss": 1.8339, "step": 35410 }, { "epoch": 1.55, "learning_rate": 7.324781326600588e-06, "loss": 1.8182, "step": 35420 }, { "epoch": 1.55, "learning_rate": 7.311130689713868e-06, "loss": 1.8214, "step": 35430 }, { "epoch": 1.55, "learning_rate": 7.297491019284188e-06, "loss": 1.8257, "step": 35440 }, { "epoch": 1.55, "learning_rate": 7.28386232190414e-06, "loss": 1.8534, "step": 35450 }, { "epoch": 1.55, "learning_rate": 7.2702446041610446e-06, "loss": 1.8448, "step": 35460 }, { "epoch": 1.55, "learning_rate": 7.2566378726368776e-06, "loss": 1.783, "step": 35470 }, { "epoch": 1.55, "learning_rate": 7.243042133908364e-06, "loss": 1.8584, "step": 35480 }, { "epoch": 1.55, "learning_rate": 7.2294573945468515e-06, "loss": 1.826, "step": 35490 }, { "epoch": 1.55, "learning_rate": 7.215883661118413e-06, "loss": 1.8125, "step": 35500 }, { "epoch": 1.55, "eval_loss": 1.7973475456237793, "eval_runtime": 11.7172, "eval_samples_per_second": 349.572, "eval_steps_per_second": 21.848, "step": 35500 }, { "epoch": 1.55, "learning_rate": 7.202320940183779e-06, "loss": 1.8274, "step": 35510 }, { "epoch": 1.55, "learning_rate": 7.1887692382983675e-06, "loss": 1.7883, "step": 35520 }, { "epoch": 1.56, "learning_rate": 7.175228562012272e-06, "loss": 1.8196, "step": 35530 }, { "epoch": 1.56, "learning_rate": 7.161698917870266e-06, "loss": 1.802, "step": 35540 }, { "epoch": 1.56, "learning_rate": 7.148180312411761e-06, "loss": 1.786, "step": 35550 }, { "epoch": 1.56, "learning_rate": 7.134672752170856e-06, "loss": 1.787, "step": 35560 }, { "epoch": 1.56, "learning_rate": 7.121176243676315e-06, "loss": 1.802, "step": 35570 }, { "epoch": 1.56, "learning_rate": 7.107690793451538e-06, "loss": 1.8255, "step": 35580 }, { "epoch": 1.56, "learning_rate": 7.094216408014606e-06, "loss": 1.8201, "step": 35590 }, { "epoch": 1.56, "learning_rate": 7.0807530938782216e-06, "loss": 1.79, "step": 35600 }, { "epoch": 1.56, "eval_loss": 1.7972863912582397, "eval_runtime": 11.965, "eval_samples_per_second": 342.331, "eval_steps_per_second": 21.396, "step": 35600 }, { "epoch": 1.56, "learning_rate": 7.067300857549785e-06, "loss": 1.8406, "step": 35610 }, { "epoch": 1.56, "learning_rate": 7.05385970553128e-06, "loss": 1.8449, "step": 35620 }, { "epoch": 1.56, "learning_rate": 7.04042964431939e-06, "loss": 1.7923, "step": 35630 }, { "epoch": 1.56, "learning_rate": 7.027010680405389e-06, "loss": 1.7873, "step": 35640 }, { "epoch": 1.56, "learning_rate": 7.0136028202752244e-06, "loss": 1.8115, "step": 35650 }, { "epoch": 1.56, "learning_rate": 7.000206070409455e-06, "loss": 1.8063, "step": 35660 }, { "epoch": 1.56, "learning_rate": 6.986820437283291e-06, "loss": 1.8808, "step": 35670 }, { "epoch": 1.56, "learning_rate": 6.973445927366538e-06, "loss": 1.8388, "step": 35680 }, { "epoch": 1.56, "learning_rate": 6.96008254712365e-06, "loss": 1.8139, "step": 35690 }, { "epoch": 1.56, "learning_rate": 6.946730303013694e-06, "loss": 1.8185, "step": 35700 }, { "epoch": 1.56, "eval_loss": 1.7972298860549927, "eval_runtime": 11.8804, "eval_samples_per_second": 344.768, "eval_steps_per_second": 21.548, "step": 35700 }, { "epoch": 1.56, "learning_rate": 6.933389201490366e-06, "loss": 1.8188, "step": 35710 }, { "epoch": 1.56, "learning_rate": 6.920059249001942e-06, "loss": 1.8225, "step": 35720 }, { "epoch": 1.56, "learning_rate": 6.906740451991357e-06, "loss": 1.8263, "step": 35730 }, { "epoch": 1.56, "learning_rate": 6.8934328168961e-06, "loss": 1.8524, "step": 35740 }, { "epoch": 1.56, "learning_rate": 6.880136350148327e-06, "loss": 1.7724, "step": 35750 }, { "epoch": 1.57, "learning_rate": 6.866851058174743e-06, "loss": 1.8175, "step": 35760 }, { "epoch": 1.57, "learning_rate": 6.853576947396683e-06, "loss": 1.8516, "step": 35770 }, { "epoch": 1.57, "learning_rate": 6.8403140242300554e-06, "loss": 1.7983, "step": 35780 }, { "epoch": 1.57, "learning_rate": 6.827062295085378e-06, "loss": 1.8137, "step": 35790 }, { "epoch": 1.57, "learning_rate": 6.813821766367759e-06, "loss": 1.7658, "step": 35800 }, { "epoch": 1.57, "eval_loss": 1.7972460985183716, "eval_runtime": 11.7601, "eval_samples_per_second": 348.295, "eval_steps_per_second": 21.768, "step": 35800 }, { "epoch": 1.57, "learning_rate": 6.800592444476874e-06, "loss": 1.8095, "step": 35810 }, { "epoch": 1.57, "learning_rate": 6.787374335807001e-06, "loss": 1.7734, "step": 35820 }, { "epoch": 1.57, "learning_rate": 6.774167446746992e-06, "loss": 1.8284, "step": 35830 }, { "epoch": 1.57, "learning_rate": 6.760971783680281e-06, "loss": 1.8354, "step": 35840 }, { "epoch": 1.57, "learning_rate": 6.74778735298486e-06, "loss": 1.803, "step": 35850 }, { "epoch": 1.57, "learning_rate": 6.7346141610333145e-06, "loss": 1.8212, "step": 35860 }, { "epoch": 1.57, "learning_rate": 6.721452214192765e-06, "loss": 1.7941, "step": 35870 }, { "epoch": 1.57, "learning_rate": 6.708301518824949e-06, "loss": 1.767, "step": 35880 }, { "epoch": 1.57, "learning_rate": 6.695162081286108e-06, "loss": 1.8077, "step": 35890 }, { "epoch": 1.57, "learning_rate": 6.682033907927087e-06, "loss": 1.808, "step": 35900 }, { "epoch": 1.57, "eval_loss": 1.7970843315124512, "eval_runtime": 11.7297, "eval_samples_per_second": 349.198, "eval_steps_per_second": 21.825, "step": 35900 }, { "epoch": 1.57, "learning_rate": 6.6689170050932545e-06, "loss": 1.8053, "step": 35910 }, { "epoch": 1.57, "learning_rate": 6.6558113791245475e-06, "loss": 1.8368, "step": 35920 }, { "epoch": 1.57, "learning_rate": 6.642717036355456e-06, "loss": 1.817, "step": 35930 }, { "epoch": 1.57, "learning_rate": 6.629633983115015e-06, "loss": 1.7996, "step": 35940 }, { "epoch": 1.57, "learning_rate": 6.616562225726782e-06, "loss": 1.8287, "step": 35950 }, { "epoch": 1.57, "learning_rate": 6.603501770508885e-06, "loss": 1.8236, "step": 35960 }, { "epoch": 1.57, "learning_rate": 6.590452623773968e-06, "loss": 1.7861, "step": 35970 }, { "epoch": 1.58, "learning_rate": 6.577414791829226e-06, "loss": 1.7758, "step": 35980 }, { "epoch": 1.58, "learning_rate": 6.564388280976365e-06, "loss": 1.7861, "step": 35990 }, { "epoch": 1.58, "learning_rate": 6.551373097511637e-06, "loss": 1.812, "step": 36000 }, { "epoch": 1.58, "eval_loss": 1.797182559967041, "eval_runtime": 11.8246, "eval_samples_per_second": 346.396, "eval_steps_per_second": 21.65, "step": 36000 }, { "epoch": 1.58, "learning_rate": 6.538369247725795e-06, "loss": 1.8064, "step": 36010 }, { "epoch": 1.58, "learning_rate": 6.52537673790416e-06, "loss": 1.8005, "step": 36020 }, { "epoch": 1.58, "learning_rate": 6.512395574326524e-06, "loss": 1.8345, "step": 36030 }, { "epoch": 1.58, "learning_rate": 6.49942576326721e-06, "loss": 1.8291, "step": 36040 }, { "epoch": 1.58, "learning_rate": 6.486467310995062e-06, "loss": 1.8399, "step": 36050 }, { "epoch": 1.58, "learning_rate": 6.473520223773433e-06, "loss": 1.8127, "step": 36060 }, { "epoch": 1.58, "learning_rate": 6.460584507860181e-06, "loss": 1.8742, "step": 36070 }, { "epoch": 1.58, "learning_rate": 6.447660169507653e-06, "loss": 1.8144, "step": 36080 }, { "epoch": 1.58, "learning_rate": 6.434747214962718e-06, "loss": 1.8517, "step": 36090 }, { "epoch": 1.58, "learning_rate": 6.421845650466734e-06, "loss": 1.8358, "step": 36100 }, { "epoch": 1.58, "eval_loss": 1.79728364944458, "eval_runtime": 11.953, "eval_samples_per_second": 342.677, "eval_steps_per_second": 21.417, "step": 36100 }, { "epoch": 1.58, "learning_rate": 6.408955482255563e-06, "loss": 1.7886, "step": 36110 }, { "epoch": 1.58, "learning_rate": 6.39607671655953e-06, "loss": 1.8088, "step": 36120 }, { "epoch": 1.58, "learning_rate": 6.3832093596034905e-06, "loss": 1.7968, "step": 36130 }, { "epoch": 1.58, "learning_rate": 6.3703534176067415e-06, "loss": 1.8042, "step": 36140 }, { "epoch": 1.58, "learning_rate": 6.357508896783093e-06, "loss": 1.8517, "step": 36150 }, { "epoch": 1.58, "learning_rate": 6.344675803340829e-06, "loss": 1.8469, "step": 36160 }, { "epoch": 1.58, "learning_rate": 6.331854143482715e-06, "loss": 1.7896, "step": 36170 }, { "epoch": 1.58, "learning_rate": 6.3190439234059695e-06, "loss": 1.804, "step": 36180 }, { "epoch": 1.58, "learning_rate": 6.306245149302297e-06, "loss": 1.8063, "step": 36190 }, { "epoch": 1.58, "learning_rate": 6.293457827357871e-06, "loss": 1.8314, "step": 36200 }, { "epoch": 1.58, "eval_loss": 1.7972148656845093, "eval_runtime": 11.4888, "eval_samples_per_second": 356.521, "eval_steps_per_second": 22.283, "step": 36200 }, { "epoch": 1.59, "learning_rate": 6.2806819637533365e-06, "loss": 1.8709, "step": 36210 }, { "epoch": 1.59, "learning_rate": 6.267917564663768e-06, "loss": 1.7827, "step": 36220 }, { "epoch": 1.59, "learning_rate": 6.255164636258737e-06, "loss": 1.7999, "step": 36230 }, { "epoch": 1.59, "learning_rate": 6.242423184702246e-06, "loss": 1.8188, "step": 36240 }, { "epoch": 1.59, "learning_rate": 6.229693216152774e-06, "loss": 1.831, "step": 36250 }, { "epoch": 1.59, "learning_rate": 6.216974736763212e-06, "loss": 1.8664, "step": 36260 }, { "epoch": 1.59, "learning_rate": 6.204267752680943e-06, "loss": 1.7857, "step": 36270 }, { "epoch": 1.59, "learning_rate": 6.191572270047754e-06, "loss": 1.8188, "step": 36280 }, { "epoch": 1.59, "learning_rate": 6.178888294999892e-06, "loss": 1.8396, "step": 36290 }, { "epoch": 1.59, "learning_rate": 6.166215833668055e-06, "loss": 1.8235, "step": 36300 }, { "epoch": 1.59, "eval_loss": 1.7972338199615479, "eval_runtime": 11.7825, "eval_samples_per_second": 347.634, "eval_steps_per_second": 21.727, "step": 36300 }, { "epoch": 1.59, "learning_rate": 6.1535548921773425e-06, "loss": 1.8227, "step": 36310 }, { "epoch": 1.59, "learning_rate": 6.140905476647312e-06, "loss": 1.8282, "step": 36320 }, { "epoch": 1.59, "learning_rate": 6.128267593191946e-06, "loss": 1.8089, "step": 36330 }, { "epoch": 1.59, "learning_rate": 6.115641247919653e-06, "loss": 1.8254, "step": 36340 }, { "epoch": 1.59, "learning_rate": 6.103026446933244e-06, "loss": 1.8223, "step": 36350 }, { "epoch": 1.59, "learning_rate": 6.090423196329991e-06, "loss": 1.8172, "step": 36360 }, { "epoch": 1.59, "learning_rate": 6.077831502201529e-06, "loss": 1.7998, "step": 36370 }, { "epoch": 1.59, "learning_rate": 6.0652513706339726e-06, "loss": 1.848, "step": 36380 }, { "epoch": 1.59, "learning_rate": 6.052682807707791e-06, "loss": 1.8263, "step": 36390 }, { "epoch": 1.59, "learning_rate": 6.040125819497895e-06, "loss": 1.8287, "step": 36400 }, { "epoch": 1.59, "eval_loss": 1.7972549200057983, "eval_runtime": 11.8176, "eval_samples_per_second": 346.601, "eval_steps_per_second": 21.663, "step": 36400 }, { "epoch": 1.59, "learning_rate": 6.027580412073575e-06, "loss": 1.8571, "step": 36410 }, { "epoch": 1.59, "learning_rate": 6.015046591498549e-06, "loss": 1.8373, "step": 36420 }, { "epoch": 1.59, "learning_rate": 6.002524363830925e-06, "loss": 1.831, "step": 36430 }, { "epoch": 1.6, "learning_rate": 5.990013735123212e-06, "loss": 1.7875, "step": 36440 }, { "epoch": 1.6, "learning_rate": 5.977514711422295e-06, "loss": 1.8114, "step": 36450 }, { "epoch": 1.6, "learning_rate": 5.965027298769468e-06, "loss": 1.8116, "step": 36460 }, { "epoch": 1.6, "learning_rate": 5.952551503200405e-06, "loss": 1.8001, "step": 36470 }, { "epoch": 1.6, "learning_rate": 5.940087330745181e-06, "loss": 1.8408, "step": 36480 }, { "epoch": 1.6, "learning_rate": 5.927634787428219e-06, "loss": 1.82, "step": 36490 }, { "epoch": 1.6, "learning_rate": 5.9151938792683545e-06, "loss": 1.8077, "step": 36500 }, { "epoch": 1.6, "eval_loss": 1.7971975803375244, "eval_runtime": 12.6073, "eval_samples_per_second": 324.89, "eval_steps_per_second": 20.306, "step": 36500 }, { "epoch": 1.6, "learning_rate": 5.902764612278788e-06, "loss": 1.8095, "step": 36510 }, { "epoch": 1.6, "learning_rate": 5.890346992467084e-06, "loss": 1.8333, "step": 36520 }, { "epoch": 1.6, "learning_rate": 5.877941025835194e-06, "loss": 1.8566, "step": 36530 }, { "epoch": 1.6, "learning_rate": 5.865546718379412e-06, "loss": 1.8239, "step": 36540 }, { "epoch": 1.6, "learning_rate": 5.853164076090429e-06, "loss": 1.8163, "step": 36550 }, { "epoch": 1.6, "learning_rate": 5.840793104953275e-06, "loss": 1.7902, "step": 36560 }, { "epoch": 1.6, "learning_rate": 5.8284338109473526e-06, "loss": 1.8278, "step": 36570 }, { "epoch": 1.6, "learning_rate": 5.8160862000464e-06, "loss": 1.8097, "step": 36580 }, { "epoch": 1.6, "learning_rate": 5.80375027821853e-06, "loss": 1.8486, "step": 36590 }, { "epoch": 1.6, "learning_rate": 5.791426051426197e-06, "loss": 1.8051, "step": 36600 }, { "epoch": 1.6, "eval_loss": 1.7971279621124268, "eval_runtime": 11.9768, "eval_samples_per_second": 341.994, "eval_steps_per_second": 21.375, "step": 36600 }, { "epoch": 1.6, "learning_rate": 5.779113525626213e-06, "loss": 1.8213, "step": 36610 }, { "epoch": 1.6, "learning_rate": 5.766812706769703e-06, "loss": 1.7921, "step": 36620 }, { "epoch": 1.6, "learning_rate": 5.754523600802175e-06, "loss": 1.8111, "step": 36630 }, { "epoch": 1.6, "learning_rate": 5.742246213663435e-06, "loss": 1.8006, "step": 36640 }, { "epoch": 1.6, "learning_rate": 5.729980551287668e-06, "loss": 1.8423, "step": 36650 }, { "epoch": 1.6, "learning_rate": 5.717726619603353e-06, "loss": 1.7913, "step": 36660 }, { "epoch": 1.61, "learning_rate": 5.705484424533332e-06, "loss": 1.7874, "step": 36670 }, { "epoch": 1.61, "learning_rate": 5.693253971994737e-06, "loss": 1.7997, "step": 36680 }, { "epoch": 1.61, "learning_rate": 5.681035267899055e-06, "loss": 1.8268, "step": 36690 }, { "epoch": 1.61, "learning_rate": 5.668828318152088e-06, "loss": 1.8094, "step": 36700 }, { "epoch": 1.61, "eval_loss": 1.797187328338623, "eval_runtime": 11.9607, "eval_samples_per_second": 342.454, "eval_steps_per_second": 21.403, "step": 36700 }, { "epoch": 1.61, "learning_rate": 5.656633128653958e-06, "loss": 1.8127, "step": 36710 }, { "epoch": 1.61, "learning_rate": 5.6444497052990805e-06, "loss": 1.8294, "step": 36720 }, { "epoch": 1.61, "learning_rate": 5.632278053976213e-06, "loss": 1.8017, "step": 36730 }, { "epoch": 1.61, "learning_rate": 5.6201181805684185e-06, "loss": 1.8082, "step": 36740 }, { "epoch": 1.61, "learning_rate": 5.6079700909530426e-06, "loss": 1.8064, "step": 36750 }, { "epoch": 1.61, "learning_rate": 5.595833791001772e-06, "loss": 1.7524, "step": 36760 }, { "epoch": 1.61, "learning_rate": 5.583709286580551e-06, "loss": 1.7921, "step": 36770 }, { "epoch": 1.61, "learning_rate": 5.571596583549677e-06, "loss": 1.8524, "step": 36780 }, { "epoch": 1.61, "learning_rate": 5.559495687763696e-06, "loss": 1.7973, "step": 36790 }, { "epoch": 1.61, "learning_rate": 5.547406605071474e-06, "loss": 1.8229, "step": 36800 }, { "epoch": 1.61, "eval_loss": 1.797210931777954, "eval_runtime": 11.794, "eval_samples_per_second": 347.297, "eval_steps_per_second": 21.706, "step": 36800 }, { "epoch": 1.61, "learning_rate": 5.535329341316149e-06, "loss": 1.8275, "step": 36810 }, { "epoch": 1.61, "learning_rate": 5.523263902335156e-06, "loss": 1.8013, "step": 36820 }, { "epoch": 1.61, "learning_rate": 5.511210293960222e-06, "loss": 1.8436, "step": 36830 }, { "epoch": 1.61, "learning_rate": 5.499168522017351e-06, "loss": 1.7926, "step": 36840 }, { "epoch": 1.61, "learning_rate": 5.4871385923268105e-06, "loss": 1.8132, "step": 36850 }, { "epoch": 1.61, "learning_rate": 5.475120510703163e-06, "loss": 1.8226, "step": 36860 }, { "epoch": 1.61, "learning_rate": 5.463114282955241e-06, "loss": 1.7857, "step": 36870 }, { "epoch": 1.61, "learning_rate": 5.451119914886143e-06, "loss": 1.8093, "step": 36880 }, { "epoch": 1.61, "learning_rate": 5.439137412293235e-06, "loss": 1.8043, "step": 36890 }, { "epoch": 1.62, "learning_rate": 5.427166780968155e-06, "loss": 1.7664, "step": 36900 }, { "epoch": 1.62, "eval_loss": 1.7971539497375488, "eval_runtime": 11.76, "eval_samples_per_second": 348.299, "eval_steps_per_second": 21.769, "step": 36900 }, { "epoch": 1.62, "learning_rate": 5.415208026696778e-06, "loss": 1.8128, "step": 36910 }, { "epoch": 1.62, "learning_rate": 5.403261155259289e-06, "loss": 1.793, "step": 36920 }, { "epoch": 1.62, "learning_rate": 5.391326172430078e-06, "loss": 1.7863, "step": 36930 }, { "epoch": 1.62, "learning_rate": 5.379403083977823e-06, "loss": 1.8186, "step": 36940 }, { "epoch": 1.62, "learning_rate": 5.367491895665422e-06, "loss": 1.8128, "step": 36950 }, { "epoch": 1.62, "learning_rate": 5.355592613250052e-06, "loss": 1.84, "step": 36960 }, { "epoch": 1.62, "learning_rate": 5.343705242483113e-06, "loss": 1.8215, "step": 36970 }, { "epoch": 1.62, "learning_rate": 5.331829789110276e-06, "loss": 1.8136, "step": 36980 }, { "epoch": 1.62, "learning_rate": 5.319966258871409e-06, "loss": 1.8243, "step": 36990 }, { "epoch": 1.62, "learning_rate": 5.308114657500649e-06, "loss": 1.8066, "step": 37000 }, { "epoch": 1.62, "eval_loss": 1.7971651554107666, "eval_runtime": 11.8992, "eval_samples_per_second": 344.224, "eval_steps_per_second": 21.514, "step": 37000 }, { "epoch": 1.62, "learning_rate": 5.2962749907263694e-06, "loss": 1.8203, "step": 37010 }, { "epoch": 1.62, "learning_rate": 5.284447264271147e-06, "loss": 1.8284, "step": 37020 }, { "epoch": 1.62, "learning_rate": 5.272631483851822e-06, "loss": 1.8251, "step": 37030 }, { "epoch": 1.62, "learning_rate": 5.260827655179419e-06, "loss": 1.8302, "step": 37040 }, { "epoch": 1.62, "learning_rate": 5.249035783959241e-06, "loss": 1.808, "step": 37050 }, { "epoch": 1.62, "learning_rate": 5.237255875890762e-06, "loss": 1.8063, "step": 37060 }, { "epoch": 1.62, "learning_rate": 5.225487936667701e-06, "loss": 1.8649, "step": 37070 }, { "epoch": 1.62, "learning_rate": 5.2137319719779755e-06, "loss": 1.8114, "step": 37080 }, { "epoch": 1.62, "learning_rate": 5.2019879875037255e-06, "loss": 1.839, "step": 37090 }, { "epoch": 1.62, "learning_rate": 5.190255988921298e-06, "loss": 1.8004, "step": 37100 }, { "epoch": 1.62, "eval_loss": 1.7972087860107422, "eval_runtime": 11.9296, "eval_samples_per_second": 343.347, "eval_steps_per_second": 21.459, "step": 37100 }, { "epoch": 1.62, "learning_rate": 5.178535981901261e-06, "loss": 1.7963, "step": 37110 }, { "epoch": 1.62, "learning_rate": 5.166827972108349e-06, "loss": 1.7894, "step": 37120 }, { "epoch": 1.63, "learning_rate": 5.155131965201535e-06, "loss": 1.8562, "step": 37130 }, { "epoch": 1.63, "learning_rate": 5.143447966833974e-06, "loss": 1.8311, "step": 37140 }, { "epoch": 1.63, "learning_rate": 5.1317759826530286e-06, "loss": 1.8282, "step": 37150 }, { "epoch": 1.63, "learning_rate": 5.12011601830023e-06, "loss": 1.8286, "step": 37160 }, { "epoch": 1.63, "learning_rate": 5.1084680794113245e-06, "loss": 1.7696, "step": 37170 }, { "epoch": 1.63, "learning_rate": 5.096832171616225e-06, "loss": 1.8051, "step": 37180 }, { "epoch": 1.63, "learning_rate": 5.08520830053906e-06, "loss": 1.8004, "step": 37190 }, { "epoch": 1.63, "learning_rate": 5.073596471798108e-06, "loss": 1.8315, "step": 37200 }, { "epoch": 1.63, "eval_loss": 1.7971980571746826, "eval_runtime": 11.9874, "eval_samples_per_second": 341.693, "eval_steps_per_second": 21.356, "step": 37200 }, { "epoch": 1.63, "learning_rate": 5.061996691005847e-06, "loss": 1.8219, "step": 37210 }, { "epoch": 1.63, "learning_rate": 5.0504089637689166e-06, "loss": 1.7975, "step": 37220 }, { "epoch": 1.63, "learning_rate": 5.03883329568814e-06, "loss": 1.8664, "step": 37230 }, { "epoch": 1.63, "learning_rate": 5.027269692358522e-06, "loss": 1.8035, "step": 37240 }, { "epoch": 1.63, "learning_rate": 5.015718159369207e-06, "loss": 1.8155, "step": 37250 }, { "epoch": 1.63, "learning_rate": 5.00417870230353e-06, "loss": 1.8054, "step": 37260 }, { "epoch": 1.63, "learning_rate": 4.992651326738983e-06, "loss": 1.8541, "step": 37270 }, { "epoch": 1.63, "learning_rate": 4.9811360382472245e-06, "loss": 1.8034, "step": 37280 }, { "epoch": 1.63, "learning_rate": 4.969632842394051e-06, "loss": 1.7881, "step": 37290 }, { "epoch": 1.63, "learning_rate": 4.958141744739436e-06, "loss": 1.8364, "step": 37300 }, { "epoch": 1.63, "eval_loss": 1.797140121459961, "eval_runtime": 12.2451, "eval_samples_per_second": 334.501, "eval_steps_per_second": 20.906, "step": 37300 }, { "epoch": 1.63, "learning_rate": 4.946662750837485e-06, "loss": 1.8236, "step": 37310 }, { "epoch": 1.63, "learning_rate": 4.935195866236473e-06, "loss": 1.7661, "step": 37320 }, { "epoch": 1.63, "learning_rate": 4.9237410964788124e-06, "loss": 1.8246, "step": 37330 }, { "epoch": 1.63, "learning_rate": 4.9122984471010675e-06, "loss": 1.8118, "step": 37340 }, { "epoch": 1.64, "learning_rate": 4.900867923633923e-06, "loss": 1.8188, "step": 37350 }, { "epoch": 1.64, "learning_rate": 4.889449531602227e-06, "loss": 1.8013, "step": 37360 }, { "epoch": 1.64, "learning_rate": 4.878043276524956e-06, "loss": 1.78, "step": 37370 }, { "epoch": 1.64, "learning_rate": 4.86664916391522e-06, "loss": 1.834, "step": 37380 }, { "epoch": 1.64, "learning_rate": 4.855267199280254e-06, "loss": 1.8049, "step": 37390 }, { "epoch": 1.64, "learning_rate": 4.843897388121422e-06, "loss": 1.7917, "step": 37400 }, { "epoch": 1.64, "eval_loss": 1.7971751689910889, "eval_runtime": 12.9807, "eval_samples_per_second": 315.545, "eval_steps_per_second": 19.722, "step": 37400 }, { "epoch": 1.64, "learning_rate": 4.832539735934227e-06, "loss": 1.795, "step": 37410 }, { "epoch": 1.64, "learning_rate": 4.821194248208287e-06, "loss": 1.8131, "step": 37420 }, { "epoch": 1.64, "learning_rate": 4.809860930427332e-06, "loss": 1.7924, "step": 37430 }, { "epoch": 1.64, "learning_rate": 4.798539788069227e-06, "loss": 1.8176, "step": 37440 }, { "epoch": 1.64, "learning_rate": 4.787230826605929e-06, "loss": 1.8091, "step": 37450 }, { "epoch": 1.64, "learning_rate": 4.775934051503523e-06, "loss": 1.8302, "step": 37460 }, { "epoch": 1.64, "learning_rate": 4.764649468222221e-06, "loss": 1.7777, "step": 37470 }, { "epoch": 1.64, "learning_rate": 4.753377082216298e-06, "loss": 1.8047, "step": 37480 }, { "epoch": 1.64, "learning_rate": 4.742116898934166e-06, "loss": 1.8524, "step": 37490 }, { "epoch": 1.64, "learning_rate": 4.73086892381834e-06, "loss": 1.8025, "step": 37500 }, { "epoch": 1.64, "eval_loss": 1.7971476316452026, "eval_runtime": 12.0152, "eval_samples_per_second": 340.901, "eval_steps_per_second": 21.306, "step": 37500 } ], "logging_steps": 10, "max_steps": 45688, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "total_flos": 9.930378174862983e+18, "train_batch_size": 32, "trial_name": null, "trial_params": null }