{ "best_metric": 1.391266107559204, "best_model_checkpoint": "/mnt/data1/sheshuaijie/Output/CoT/Trained/chinese-llama-plus-13b_chinese-cot+belle_data1M+alpaca_gpt4+instinwild_ch+HC3_huma+HC3_chatGPT_0.0002/lora/checkpoint-36300", "epoch": 1.9395688066041517, "global_step": 36300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2.1378941742383755e-06, "loss": 1.4783, "step": 20 }, { "epoch": 0.0, "learning_rate": 4.275788348476751e-06, "loss": 1.4068, "step": 40 }, { "epoch": 0.0, "eval_loss": 1.4569315910339355, "eval_runtime": 49.7589, "eval_samples_per_second": 60.291, "eval_steps_per_second": 1.889, "step": 50 }, { "epoch": 0.0, "learning_rate": 6.413682522715125e-06, "loss": 1.4169, "step": 60 }, { "epoch": 0.0, "learning_rate": 8.551576696953502e-06, "loss": 1.4038, "step": 80 }, { "epoch": 0.01, "learning_rate": 1.0689470871191876e-05, "loss": 1.3771, "step": 100 }, { "epoch": 0.01, "eval_loss": 1.4323030710220337, "eval_runtime": 49.9207, "eval_samples_per_second": 60.095, "eval_steps_per_second": 1.883, "step": 100 }, { "epoch": 0.01, "learning_rate": 1.282736504543025e-05, "loss": 1.3784, "step": 120 }, { "epoch": 0.01, "learning_rate": 1.4965259219668627e-05, "loss": 1.4263, "step": 140 }, { "epoch": 0.01, "eval_loss": 1.427394151687622, "eval_runtime": 49.9421, "eval_samples_per_second": 60.07, "eval_steps_per_second": 1.882, "step": 150 }, { "epoch": 0.01, "learning_rate": 1.7103153393907004e-05, "loss": 1.3669, "step": 160 }, { "epoch": 0.01, "learning_rate": 1.924104756814538e-05, "loss": 1.4239, "step": 180 }, { "epoch": 0.01, "learning_rate": 2.1378941742383753e-05, "loss": 1.3658, "step": 200 }, { "epoch": 0.01, "eval_loss": 1.4236927032470703, "eval_runtime": 49.8789, "eval_samples_per_second": 60.146, "eval_steps_per_second": 1.885, "step": 200 }, { "epoch": 0.01, "learning_rate": 2.351683591662213e-05, "loss": 1.3947, "step": 220 }, { "epoch": 0.01, "learning_rate": 2.56547300908605e-05, "loss": 1.3993, "step": 240 }, { "epoch": 0.01, "eval_loss": 1.4210280179977417, "eval_runtime": 49.958, "eval_samples_per_second": 60.05, "eval_steps_per_second": 1.882, "step": 250 }, { "epoch": 0.01, "learning_rate": 2.7792624265098877e-05, "loss": 1.3769, "step": 260 }, { "epoch": 0.01, "learning_rate": 2.9930518439337253e-05, "loss": 1.3947, "step": 280 }, { "epoch": 0.02, "learning_rate": 3.206841261357563e-05, "loss": 1.4067, "step": 300 }, { "epoch": 0.02, "eval_loss": 1.4191926717758179, "eval_runtime": 49.9003, "eval_samples_per_second": 60.12, "eval_steps_per_second": 1.884, "step": 300 }, { "epoch": 0.02, "learning_rate": 3.420630678781401e-05, "loss": 1.3541, "step": 320 }, { "epoch": 0.02, "learning_rate": 3.634420096205238e-05, "loss": 1.3919, "step": 340 }, { "epoch": 0.02, "eval_loss": 1.4174752235412598, "eval_runtime": 49.9563, "eval_samples_per_second": 60.053, "eval_steps_per_second": 1.882, "step": 350 }, { "epoch": 0.02, "learning_rate": 3.848209513629076e-05, "loss": 1.3776, "step": 360 }, { "epoch": 0.02, "learning_rate": 4.061998931052913e-05, "loss": 1.4259, "step": 380 }, { "epoch": 0.02, "learning_rate": 4.2757883484767506e-05, "loss": 1.3679, "step": 400 }, { "epoch": 0.02, "eval_loss": 1.4158518314361572, "eval_runtime": 49.8513, "eval_samples_per_second": 60.179, "eval_steps_per_second": 1.886, "step": 400 }, { "epoch": 0.02, "learning_rate": 4.4895777659005885e-05, "loss": 1.3791, "step": 420 }, { "epoch": 0.02, "learning_rate": 4.703367183324426e-05, "loss": 1.3695, "step": 440 }, { "epoch": 0.02, "eval_loss": 1.4145797491073608, "eval_runtime": 49.8906, "eval_samples_per_second": 60.132, "eval_steps_per_second": 1.884, "step": 450 }, { "epoch": 0.02, "learning_rate": 4.917156600748263e-05, "loss": 1.3826, "step": 460 }, { "epoch": 0.03, "learning_rate": 5.1309460181721e-05, "loss": 1.3851, "step": 480 }, { "epoch": 0.03, "learning_rate": 5.3447354355959376e-05, "loss": 1.3888, "step": 500 }, { "epoch": 0.03, "eval_loss": 1.4140371084213257, "eval_runtime": 49.9418, "eval_samples_per_second": 60.07, "eval_steps_per_second": 1.882, "step": 500 }, { "epoch": 0.03, "learning_rate": 5.5585248530197755e-05, "loss": 1.3633, "step": 520 }, { "epoch": 0.03, "learning_rate": 5.772314270443613e-05, "loss": 1.4141, "step": 540 }, { "epoch": 0.03, "eval_loss": 1.4142972230911255, "eval_runtime": 49.9711, "eval_samples_per_second": 60.035, "eval_steps_per_second": 1.881, "step": 550 }, { "epoch": 0.03, "learning_rate": 5.986103687867451e-05, "loss": 1.3633, "step": 560 }, { "epoch": 0.03, "learning_rate": 6.199893105291288e-05, "loss": 1.3659, "step": 580 }, { "epoch": 0.03, "learning_rate": 6.413682522715126e-05, "loss": 1.4105, "step": 600 }, { "epoch": 0.03, "eval_loss": 1.4129570722579956, "eval_runtime": 49.9129, "eval_samples_per_second": 60.105, "eval_steps_per_second": 1.883, "step": 600 }, { "epoch": 0.03, "learning_rate": 6.627471940138962e-05, "loss": 1.3381, "step": 620 }, { "epoch": 0.03, "learning_rate": 6.841261357562802e-05, "loss": 1.3776, "step": 640 }, { "epoch": 0.03, "eval_loss": 1.4125068187713623, "eval_runtime": 49.9267, "eval_samples_per_second": 60.088, "eval_steps_per_second": 1.883, "step": 650 }, { "epoch": 0.04, "learning_rate": 7.05505077498664e-05, "loss": 1.3704, "step": 660 }, { "epoch": 0.04, "learning_rate": 7.268840192410476e-05, "loss": 1.3755, "step": 680 }, { "epoch": 0.04, "learning_rate": 7.482629609834314e-05, "loss": 1.3866, "step": 700 }, { "epoch": 0.04, "eval_loss": 1.411845088005066, "eval_runtime": 49.8796, "eval_samples_per_second": 60.145, "eval_steps_per_second": 1.885, "step": 700 }, { "epoch": 0.04, "learning_rate": 7.696419027258152e-05, "loss": 1.3516, "step": 720 }, { "epoch": 0.04, "learning_rate": 7.910208444681989e-05, "loss": 1.3781, "step": 740 }, { "epoch": 0.04, "eval_loss": 1.4121510982513428, "eval_runtime": 49.9262, "eval_samples_per_second": 60.089, "eval_steps_per_second": 1.883, "step": 750 }, { "epoch": 0.04, "learning_rate": 8.123997862105827e-05, "loss": 1.3963, "step": 760 }, { "epoch": 0.04, "learning_rate": 8.337787279529665e-05, "loss": 1.3803, "step": 780 }, { "epoch": 0.04, "learning_rate": 8.551576696953501e-05, "loss": 1.3752, "step": 800 }, { "epoch": 0.04, "eval_loss": 1.4118362665176392, "eval_runtime": 49.9268, "eval_samples_per_second": 60.088, "eval_steps_per_second": 1.883, "step": 800 }, { "epoch": 0.04, "learning_rate": 8.765366114377339e-05, "loss": 1.401, "step": 820 }, { "epoch": 0.04, "learning_rate": 8.979155531801177e-05, "loss": 1.3748, "step": 840 }, { "epoch": 0.05, "eval_loss": 1.4110814332962036, "eval_runtime": 49.9302, "eval_samples_per_second": 60.084, "eval_steps_per_second": 1.883, "step": 850 }, { "epoch": 0.05, "learning_rate": 9.192944949225014e-05, "loss": 1.3708, "step": 860 }, { "epoch": 0.05, "learning_rate": 9.406734366648852e-05, "loss": 1.408, "step": 880 }, { "epoch": 0.05, "learning_rate": 9.62052378407269e-05, "loss": 1.4088, "step": 900 }, { "epoch": 0.05, "eval_loss": 1.4106462001800537, "eval_runtime": 49.927, "eval_samples_per_second": 60.088, "eval_steps_per_second": 1.883, "step": 900 }, { "epoch": 0.05, "learning_rate": 9.834313201496526e-05, "loss": 1.4126, "step": 920 }, { "epoch": 0.05, "learning_rate": 0.00010048102618920363, "loss": 1.3812, "step": 940 }, { "epoch": 0.05, "eval_loss": 1.4106441736221313, "eval_runtime": 49.9373, "eval_samples_per_second": 60.075, "eval_steps_per_second": 1.882, "step": 950 }, { "epoch": 0.05, "learning_rate": 0.000102618920363442, "loss": 1.3703, "step": 960 }, { "epoch": 0.05, "learning_rate": 0.00010475681453768039, "loss": 1.3686, "step": 980 }, { "epoch": 0.05, "learning_rate": 0.00010689470871191875, "loss": 1.3659, "step": 1000 }, { "epoch": 0.05, "eval_loss": 1.4101723432540894, "eval_runtime": 49.9126, "eval_samples_per_second": 60.105, "eval_steps_per_second": 1.883, "step": 1000 }, { "epoch": 0.05, "learning_rate": 0.00010903260288615713, "loss": 1.4065, "step": 1020 }, { "epoch": 0.06, "learning_rate": 0.00011117049706039551, "loss": 1.3392, "step": 1040 }, { "epoch": 0.06, "eval_loss": 1.410170555114746, "eval_runtime": 49.9495, "eval_samples_per_second": 60.061, "eval_steps_per_second": 1.882, "step": 1050 }, { "epoch": 0.06, "learning_rate": 0.00011330839123463388, "loss": 1.3471, "step": 1060 }, { "epoch": 0.06, "learning_rate": 0.00011544628540887226, "loss": 1.3389, "step": 1080 }, { "epoch": 0.06, "learning_rate": 0.00011758417958311063, "loss": 1.4075, "step": 1100 }, { "epoch": 0.06, "eval_loss": 1.4099453687667847, "eval_runtime": 49.7526, "eval_samples_per_second": 60.298, "eval_steps_per_second": 1.889, "step": 1100 }, { "epoch": 0.06, "learning_rate": 0.00011972207375734901, "loss": 1.3654, "step": 1120 }, { "epoch": 0.06, "learning_rate": 0.00012185996793158738, "loss": 1.3927, "step": 1140 }, { "epoch": 0.06, "eval_loss": 1.409054160118103, "eval_runtime": 49.9406, "eval_samples_per_second": 60.071, "eval_steps_per_second": 1.882, "step": 1150 }, { "epoch": 0.06, "learning_rate": 0.00012399786210582576, "loss": 1.3952, "step": 1160 }, { "epoch": 0.06, "learning_rate": 0.00012613575628006414, "loss": 1.361, "step": 1180 }, { "epoch": 0.06, "learning_rate": 0.00012827365045430252, "loss": 1.3464, "step": 1200 }, { "epoch": 0.06, "eval_loss": 1.408968448638916, "eval_runtime": 49.8975, "eval_samples_per_second": 60.123, "eval_steps_per_second": 1.884, "step": 1200 }, { "epoch": 0.07, "learning_rate": 0.00013041154462854087, "loss": 1.4216, "step": 1220 }, { "epoch": 0.07, "learning_rate": 0.00013254943880277925, "loss": 1.3608, "step": 1240 }, { "epoch": 0.07, "eval_loss": 1.4092673063278198, "eval_runtime": 49.923, "eval_samples_per_second": 60.093, "eval_steps_per_second": 1.883, "step": 1250 }, { "epoch": 0.07, "learning_rate": 0.00013468733297701766, "loss": 1.375, "step": 1260 }, { "epoch": 0.07, "learning_rate": 0.00013682522715125604, "loss": 1.359, "step": 1280 }, { "epoch": 0.07, "learning_rate": 0.00013896312132549441, "loss": 1.354, "step": 1300 }, { "epoch": 0.07, "eval_loss": 1.4099416732788086, "eval_runtime": 49.8935, "eval_samples_per_second": 60.128, "eval_steps_per_second": 1.884, "step": 1300 }, { "epoch": 0.07, "learning_rate": 0.0001411010154997328, "loss": 1.3792, "step": 1320 }, { "epoch": 0.07, "learning_rate": 0.00014323890967397115, "loss": 1.3666, "step": 1340 }, { "epoch": 0.07, "eval_loss": 1.4085068702697754, "eval_runtime": 49.9387, "eval_samples_per_second": 60.074, "eval_steps_per_second": 1.882, "step": 1350 }, { "epoch": 0.07, "learning_rate": 0.00014537680384820953, "loss": 1.3648, "step": 1360 }, { "epoch": 0.07, "learning_rate": 0.0001475146980224479, "loss": 1.3749, "step": 1380 }, { "epoch": 0.07, "learning_rate": 0.00014965259219668628, "loss": 1.3646, "step": 1400 }, { "epoch": 0.07, "eval_loss": 1.4088290929794312, "eval_runtime": 49.9166, "eval_samples_per_second": 60.1, "eval_steps_per_second": 1.883, "step": 1400 }, { "epoch": 0.08, "learning_rate": 0.00015179048637092466, "loss": 1.3961, "step": 1420 }, { "epoch": 0.08, "learning_rate": 0.00015392838054516304, "loss": 1.3936, "step": 1440 }, { "epoch": 0.08, "eval_loss": 1.408731460571289, "eval_runtime": 49.9201, "eval_samples_per_second": 60.096, "eval_steps_per_second": 1.883, "step": 1450 }, { "epoch": 0.08, "learning_rate": 0.0001560662747194014, "loss": 1.3706, "step": 1460 }, { "epoch": 0.08, "learning_rate": 0.00015820416889363977, "loss": 1.3484, "step": 1480 }, { "epoch": 0.08, "learning_rate": 0.00016034206306787815, "loss": 1.3926, "step": 1500 }, { "epoch": 0.08, "eval_loss": 1.4091012477874756, "eval_runtime": 49.8678, "eval_samples_per_second": 60.159, "eval_steps_per_second": 1.885, "step": 1500 }, { "epoch": 0.08, "learning_rate": 0.00016247995724211653, "loss": 1.3689, "step": 1520 }, { "epoch": 0.08, "learning_rate": 0.0001646178514163549, "loss": 1.3209, "step": 1540 }, { "epoch": 0.08, "eval_loss": 1.4094237089157104, "eval_runtime": 49.9507, "eval_samples_per_second": 60.059, "eval_steps_per_second": 1.882, "step": 1550 }, { "epoch": 0.08, "learning_rate": 0.0001667557455905933, "loss": 1.36, "step": 1560 }, { "epoch": 0.08, "learning_rate": 0.00016889363976483164, "loss": 1.3839, "step": 1580 }, { "epoch": 0.09, "learning_rate": 0.00017103153393907002, "loss": 1.3898, "step": 1600 }, { "epoch": 0.09, "eval_loss": 1.4086543321609497, "eval_runtime": 49.8979, "eval_samples_per_second": 60.123, "eval_steps_per_second": 1.884, "step": 1600 }, { "epoch": 0.09, "learning_rate": 0.0001731694281133084, "loss": 1.359, "step": 1620 }, { "epoch": 0.09, "learning_rate": 0.00017530732228754678, "loss": 1.3937, "step": 1640 }, { "epoch": 0.09, "eval_loss": 1.4081257581710815, "eval_runtime": 49.9291, "eval_samples_per_second": 60.085, "eval_steps_per_second": 1.883, "step": 1650 }, { "epoch": 0.09, "learning_rate": 0.00017744521646178516, "loss": 1.3744, "step": 1660 }, { "epoch": 0.09, "learning_rate": 0.00017958311063602354, "loss": 1.3738, "step": 1680 }, { "epoch": 0.09, "learning_rate": 0.00018172100481026192, "loss": 1.3589, "step": 1700 }, { "epoch": 0.09, "eval_loss": 1.4081943035125732, "eval_runtime": 49.8275, "eval_samples_per_second": 60.208, "eval_steps_per_second": 1.887, "step": 1700 }, { "epoch": 0.09, "learning_rate": 0.00018385889898450027, "loss": 1.396, "step": 1720 }, { "epoch": 0.09, "learning_rate": 0.00018599679315873865, "loss": 1.3503, "step": 1740 }, { "epoch": 0.09, "eval_loss": 1.4091119766235352, "eval_runtime": 49.9328, "eval_samples_per_second": 60.081, "eval_steps_per_second": 1.883, "step": 1750 }, { "epoch": 0.09, "learning_rate": 0.00018813468733297703, "loss": 1.3775, "step": 1760 }, { "epoch": 0.1, "learning_rate": 0.0001902725815072154, "loss": 1.4194, "step": 1780 }, { "epoch": 0.1, "learning_rate": 0.0001924104756814538, "loss": 1.3852, "step": 1800 }, { "epoch": 0.1, "eval_loss": 1.4091593027114868, "eval_runtime": 49.8852, "eval_samples_per_second": 60.138, "eval_steps_per_second": 1.884, "step": 1800 }, { "epoch": 0.1, "learning_rate": 0.00019454836985569217, "loss": 1.389, "step": 1820 }, { "epoch": 0.1, "learning_rate": 0.00019668626402993052, "loss": 1.3605, "step": 1840 }, { "epoch": 0.1, "eval_loss": 1.409116268157959, "eval_runtime": 49.9249, "eval_samples_per_second": 60.09, "eval_steps_per_second": 1.883, "step": 1850 }, { "epoch": 0.1, "learning_rate": 0.0001988241582041689, "loss": 1.3712, "step": 1860 }, { "epoch": 0.1, "learning_rate": 0.00019999516686401974, "loss": 1.3866, "step": 1880 }, { "epoch": 0.1, "learning_rate": 0.00019998442656184132, "loss": 1.4269, "step": 1900 }, { "epoch": 0.1, "eval_loss": 1.4094537496566772, "eval_runtime": 49.8922, "eval_samples_per_second": 60.13, "eval_steps_per_second": 1.884, "step": 1900 }, { "epoch": 0.1, "learning_rate": 0.00019997368625966294, "loss": 1.3613, "step": 1920 }, { "epoch": 0.1, "learning_rate": 0.00019996294595748452, "loss": 1.3664, "step": 1940 }, { "epoch": 0.1, "eval_loss": 1.409331202507019, "eval_runtime": 49.9367, "eval_samples_per_second": 60.076, "eval_steps_per_second": 1.882, "step": 1950 }, { "epoch": 0.1, "learning_rate": 0.0001999522056553061, "loss": 1.3729, "step": 1960 }, { "epoch": 0.11, "learning_rate": 0.00019994146535312772, "loss": 1.4009, "step": 1980 }, { "epoch": 0.11, "learning_rate": 0.0001999307250509493, "loss": 1.373, "step": 2000 }, { "epoch": 0.11, "eval_loss": 1.4088104963302612, "eval_runtime": 49.91, "eval_samples_per_second": 60.108, "eval_steps_per_second": 1.883, "step": 2000 }, { "epoch": 0.11, "learning_rate": 0.00019991998474877092, "loss": 1.4176, "step": 2020 }, { "epoch": 0.11, "learning_rate": 0.0001999092444465925, "loss": 1.3791, "step": 2040 }, { "epoch": 0.11, "eval_loss": 1.4090261459350586, "eval_runtime": 49.9322, "eval_samples_per_second": 60.082, "eval_steps_per_second": 1.883, "step": 2050 }, { "epoch": 0.11, "learning_rate": 0.00019989850414441412, "loss": 1.3044, "step": 2060 }, { "epoch": 0.11, "learning_rate": 0.0001998877638422357, "loss": 1.3906, "step": 2080 }, { "epoch": 0.11, "learning_rate": 0.0001998770235400573, "loss": 1.3836, "step": 2100 }, { "epoch": 0.11, "eval_loss": 1.4082199335098267, "eval_runtime": 49.9048, "eval_samples_per_second": 60.114, "eval_steps_per_second": 1.884, "step": 2100 }, { "epoch": 0.11, "learning_rate": 0.0001998662832378789, "loss": 1.3652, "step": 2120 }, { "epoch": 0.11, "learning_rate": 0.0001998555429357005, "loss": 1.3387, "step": 2140 }, { "epoch": 0.11, "eval_loss": 1.40771484375, "eval_runtime": 49.9276, "eval_samples_per_second": 60.087, "eval_steps_per_second": 1.883, "step": 2150 }, { "epoch": 0.12, "learning_rate": 0.0001998448026335221, "loss": 1.3131, "step": 2160 }, { "epoch": 0.12, "learning_rate": 0.0001998340623313437, "loss": 1.3854, "step": 2180 }, { "epoch": 0.12, "learning_rate": 0.00019982332202916528, "loss": 1.3528, "step": 2200 }, { "epoch": 0.12, "eval_loss": 1.4079679250717163, "eval_runtime": 49.8878, "eval_samples_per_second": 60.135, "eval_steps_per_second": 1.884, "step": 2200 }, { "epoch": 0.12, "learning_rate": 0.0001998125817269869, "loss": 1.3839, "step": 2220 }, { "epoch": 0.12, "learning_rate": 0.00019980184142480848, "loss": 1.3316, "step": 2240 }, { "epoch": 0.12, "eval_loss": 1.4077861309051514, "eval_runtime": 49.923, "eval_samples_per_second": 60.093, "eval_steps_per_second": 1.883, "step": 2250 }, { "epoch": 0.12, "learning_rate": 0.0001997911011226301, "loss": 1.4085, "step": 2260 }, { "epoch": 0.12, "learning_rate": 0.00019978036082045168, "loss": 1.4099, "step": 2280 }, { "epoch": 0.12, "learning_rate": 0.00019976962051827327, "loss": 1.3541, "step": 2300 }, { "epoch": 0.12, "eval_loss": 1.4079844951629639, "eval_runtime": 49.8734, "eval_samples_per_second": 60.152, "eval_steps_per_second": 1.885, "step": 2300 }, { "epoch": 0.12, "learning_rate": 0.00019975888021609488, "loss": 1.3947, "step": 2320 }, { "epoch": 0.13, "learning_rate": 0.00019974813991391647, "loss": 1.3833, "step": 2340 }, { "epoch": 0.13, "eval_loss": 1.4079506397247314, "eval_runtime": 49.9735, "eval_samples_per_second": 60.032, "eval_steps_per_second": 1.881, "step": 2350 }, { "epoch": 0.13, "learning_rate": 0.00019973739961173808, "loss": 1.3379, "step": 2360 }, { "epoch": 0.13, "learning_rate": 0.0001997266593095597, "loss": 1.3665, "step": 2380 }, { "epoch": 0.13, "learning_rate": 0.00019971591900738128, "loss": 1.3738, "step": 2400 }, { "epoch": 0.13, "eval_loss": 1.4071325063705444, "eval_runtime": 49.8925, "eval_samples_per_second": 60.129, "eval_steps_per_second": 1.884, "step": 2400 }, { "epoch": 0.13, "learning_rate": 0.0001997051787052029, "loss": 1.3443, "step": 2420 }, { "epoch": 0.13, "learning_rate": 0.00019969443840302448, "loss": 1.3573, "step": 2440 }, { "epoch": 0.13, "eval_loss": 1.4071837663650513, "eval_runtime": 49.9695, "eval_samples_per_second": 60.037, "eval_steps_per_second": 1.881, "step": 2450 }, { "epoch": 0.13, "learning_rate": 0.00019968369810084607, "loss": 1.3498, "step": 2460 }, { "epoch": 0.13, "learning_rate": 0.00019967295779866768, "loss": 1.3629, "step": 2480 }, { "epoch": 0.13, "learning_rate": 0.00019966221749648927, "loss": 1.3734, "step": 2500 }, { "epoch": 0.13, "eval_loss": 1.407920241355896, "eval_runtime": 49.8461, "eval_samples_per_second": 60.185, "eval_steps_per_second": 1.886, "step": 2500 }, { "epoch": 0.13, "learning_rate": 0.00019965147719431088, "loss": 1.3626, "step": 2520 }, { "epoch": 0.14, "learning_rate": 0.00019964073689213247, "loss": 1.3672, "step": 2540 }, { "epoch": 0.14, "eval_loss": 1.4076189994812012, "eval_runtime": 49.9079, "eval_samples_per_second": 60.111, "eval_steps_per_second": 1.883, "step": 2550 }, { "epoch": 0.14, "learning_rate": 0.00019962999658995408, "loss": 1.395, "step": 2560 }, { "epoch": 0.14, "learning_rate": 0.00019961925628777567, "loss": 1.3628, "step": 2580 }, { "epoch": 0.14, "learning_rate": 0.00019960851598559726, "loss": 1.3804, "step": 2600 }, { "epoch": 0.14, "eval_loss": 1.4080127477645874, "eval_runtime": 49.891, "eval_samples_per_second": 60.131, "eval_steps_per_second": 1.884, "step": 2600 }, { "epoch": 0.14, "learning_rate": 0.00019959777568341887, "loss": 1.3555, "step": 2620 }, { "epoch": 0.14, "learning_rate": 0.00019958703538124046, "loss": 1.3725, "step": 2640 }, { "epoch": 0.14, "eval_loss": 1.4076472520828247, "eval_runtime": 49.9139, "eval_samples_per_second": 60.104, "eval_steps_per_second": 1.883, "step": 2650 }, { "epoch": 0.14, "learning_rate": 0.00019957629507906207, "loss": 1.3865, "step": 2660 }, { "epoch": 0.14, "learning_rate": 0.00019956555477688366, "loss": 1.3725, "step": 2680 }, { "epoch": 0.14, "learning_rate": 0.00019955481447470524, "loss": 1.3654, "step": 2700 }, { "epoch": 0.14, "eval_loss": 1.4083791971206665, "eval_runtime": 49.871, "eval_samples_per_second": 60.155, "eval_steps_per_second": 1.885, "step": 2700 }, { "epoch": 0.15, "learning_rate": 0.00019954407417252686, "loss": 1.4114, "step": 2720 }, { "epoch": 0.15, "learning_rate": 0.00019953333387034844, "loss": 1.3674, "step": 2740 }, { "epoch": 0.15, "eval_loss": 1.407729148864746, "eval_runtime": 49.8891, "eval_samples_per_second": 60.133, "eval_steps_per_second": 1.884, "step": 2750 }, { "epoch": 0.15, "learning_rate": 0.00019952259356817006, "loss": 1.3409, "step": 2760 }, { "epoch": 0.15, "learning_rate": 0.00019951185326599164, "loss": 1.3983, "step": 2780 }, { "epoch": 0.15, "learning_rate": 0.00019950111296381323, "loss": 1.4033, "step": 2800 }, { "epoch": 0.15, "eval_loss": 1.4079221487045288, "eval_runtime": 49.8679, "eval_samples_per_second": 60.159, "eval_steps_per_second": 1.885, "step": 2800 }, { "epoch": 0.15, "learning_rate": 0.00019949037266163484, "loss": 1.3811, "step": 2820 }, { "epoch": 0.15, "learning_rate": 0.00019947963235945643, "loss": 1.3869, "step": 2840 }, { "epoch": 0.15, "eval_loss": 1.4070159196853638, "eval_runtime": 49.8875, "eval_samples_per_second": 60.135, "eval_steps_per_second": 1.884, "step": 2850 }, { "epoch": 0.15, "learning_rate": 0.00019946889205727804, "loss": 1.3576, "step": 2860 }, { "epoch": 0.15, "learning_rate": 0.00019945815175509963, "loss": 1.3872, "step": 2880 }, { "epoch": 0.15, "learning_rate": 0.00019944741145292122, "loss": 1.3753, "step": 2900 }, { "epoch": 0.15, "eval_loss": 1.4078199863433838, "eval_runtime": 49.9188, "eval_samples_per_second": 60.098, "eval_steps_per_second": 1.883, "step": 2900 }, { "epoch": 0.16, "learning_rate": 0.00019943667115074283, "loss": 1.357, "step": 2920 }, { "epoch": 0.16, "learning_rate": 0.00019942646786367337, "loss": 1.3749, "step": 2940 }, { "epoch": 0.16, "eval_loss": 1.4070653915405273, "eval_runtime": 49.9325, "eval_samples_per_second": 60.081, "eval_steps_per_second": 1.883, "step": 2950 }, { "epoch": 0.16, "learning_rate": 0.00019941572756149495, "loss": 1.3302, "step": 2960 }, { "epoch": 0.16, "learning_rate": 0.00019940498725931657, "loss": 1.3303, "step": 2980 }, { "epoch": 0.16, "learning_rate": 0.00019939424695713815, "loss": 1.3634, "step": 3000 }, { "epoch": 0.16, "eval_loss": 1.4064579010009766, "eval_runtime": 49.8564, "eval_samples_per_second": 60.173, "eval_steps_per_second": 1.885, "step": 3000 }, { "epoch": 0.16, "learning_rate": 0.00019938350665495974, "loss": 1.3598, "step": 3020 }, { "epoch": 0.16, "learning_rate": 0.00019937276635278135, "loss": 1.3385, "step": 3040 }, { "epoch": 0.16, "eval_loss": 1.4077314138412476, "eval_runtime": 49.9, "eval_samples_per_second": 60.12, "eval_steps_per_second": 1.884, "step": 3050 }, { "epoch": 0.16, "learning_rate": 0.00019936202605060294, "loss": 1.3799, "step": 3060 }, { "epoch": 0.16, "learning_rate": 0.00019935128574842455, "loss": 1.3893, "step": 3080 }, { "epoch": 0.17, "learning_rate": 0.00019934054544624614, "loss": 1.3544, "step": 3100 }, { "epoch": 0.17, "eval_loss": 1.4063854217529297, "eval_runtime": 49.8543, "eval_samples_per_second": 60.175, "eval_steps_per_second": 1.885, "step": 3100 }, { "epoch": 0.17, "learning_rate": 0.00019932980514406772, "loss": 1.3689, "step": 3120 }, { "epoch": 0.17, "learning_rate": 0.00019931906484188934, "loss": 1.3925, "step": 3140 }, { "epoch": 0.17, "eval_loss": 1.4067975282669067, "eval_runtime": 49.9106, "eval_samples_per_second": 60.107, "eval_steps_per_second": 1.883, "step": 3150 }, { "epoch": 0.17, "learning_rate": 0.00019930832453971092, "loss": 1.3536, "step": 3160 }, { "epoch": 0.17, "learning_rate": 0.00019929758423753254, "loss": 1.3813, "step": 3180 }, { "epoch": 0.17, "learning_rate": 0.00019928684393535412, "loss": 1.357, "step": 3200 }, { "epoch": 0.17, "eval_loss": 1.4065169095993042, "eval_runtime": 49.8559, "eval_samples_per_second": 60.173, "eval_steps_per_second": 1.885, "step": 3200 }, { "epoch": 0.17, "learning_rate": 0.00019927610363317574, "loss": 1.3642, "step": 3220 }, { "epoch": 0.17, "learning_rate": 0.00019926536333099732, "loss": 1.4115, "step": 3240 }, { "epoch": 0.17, "eval_loss": 1.4065427780151367, "eval_runtime": 49.9229, "eval_samples_per_second": 60.093, "eval_steps_per_second": 1.883, "step": 3250 }, { "epoch": 0.17, "learning_rate": 0.0001992546230288189, "loss": 1.37, "step": 3260 }, { "epoch": 0.18, "learning_rate": 0.00019924388272664052, "loss": 1.3934, "step": 3280 }, { "epoch": 0.18, "learning_rate": 0.0001992331424244621, "loss": 1.358, "step": 3300 }, { "epoch": 0.18, "eval_loss": 1.4061527252197266, "eval_runtime": 49.8613, "eval_samples_per_second": 60.167, "eval_steps_per_second": 1.885, "step": 3300 }, { "epoch": 0.18, "learning_rate": 0.00019922240212228372, "loss": 1.4362, "step": 3320 }, { "epoch": 0.18, "learning_rate": 0.0001992116618201053, "loss": 1.3564, "step": 3340 }, { "epoch": 0.18, "eval_loss": 1.4055869579315186, "eval_runtime": 49.9231, "eval_samples_per_second": 60.092, "eval_steps_per_second": 1.883, "step": 3350 }, { "epoch": 0.18, "learning_rate": 0.0001992009215179269, "loss": 1.3623, "step": 3360 }, { "epoch": 0.18, "learning_rate": 0.0001991901812157485, "loss": 1.3585, "step": 3380 }, { "epoch": 0.18, "learning_rate": 0.0001991794409135701, "loss": 1.3873, "step": 3400 }, { "epoch": 0.18, "eval_loss": 1.4059674739837646, "eval_runtime": 49.8966, "eval_samples_per_second": 60.124, "eval_steps_per_second": 1.884, "step": 3400 }, { "epoch": 0.18, "learning_rate": 0.0001991687006113917, "loss": 1.3368, "step": 3420 }, { "epoch": 0.18, "learning_rate": 0.0001991579603092133, "loss": 1.3468, "step": 3440 }, { "epoch": 0.18, "eval_loss": 1.405554175376892, "eval_runtime": 49.9447, "eval_samples_per_second": 60.066, "eval_steps_per_second": 1.882, "step": 3450 }, { "epoch": 0.18, "learning_rate": 0.0001991472200070349, "loss": 1.3725, "step": 3460 }, { "epoch": 0.19, "learning_rate": 0.00019913647970485652, "loss": 1.3846, "step": 3480 }, { "epoch": 0.19, "learning_rate": 0.0001991257394026781, "loss": 1.4053, "step": 3500 }, { "epoch": 0.19, "eval_loss": 1.405627727508545, "eval_runtime": 49.8796, "eval_samples_per_second": 60.145, "eval_steps_per_second": 1.885, "step": 3500 }, { "epoch": 0.19, "learning_rate": 0.0001991149991004997, "loss": 1.3681, "step": 3520 }, { "epoch": 0.19, "learning_rate": 0.0001991042587983213, "loss": 1.3415, "step": 3540 }, { "epoch": 0.19, "eval_loss": 1.4059925079345703, "eval_runtime": 49.9311, "eval_samples_per_second": 60.083, "eval_steps_per_second": 1.883, "step": 3550 }, { "epoch": 0.19, "learning_rate": 0.0001990935184961429, "loss": 1.3109, "step": 3560 }, { "epoch": 0.19, "learning_rate": 0.0001990827781939645, "loss": 1.38, "step": 3580 }, { "epoch": 0.19, "learning_rate": 0.0001990720378917861, "loss": 1.3641, "step": 3600 }, { "epoch": 0.19, "eval_loss": 1.4058605432510376, "eval_runtime": 49.9312, "eval_samples_per_second": 60.083, "eval_steps_per_second": 1.883, "step": 3600 }, { "epoch": 0.19, "learning_rate": 0.0001990612975896077, "loss": 1.3655, "step": 3620 }, { "epoch": 0.19, "learning_rate": 0.0001990505572874293, "loss": 1.3605, "step": 3640 }, { "epoch": 0.2, "eval_loss": 1.4055465459823608, "eval_runtime": 49.9566, "eval_samples_per_second": 60.052, "eval_steps_per_second": 1.882, "step": 3650 }, { "epoch": 0.2, "learning_rate": 0.00019903981698525088, "loss": 1.386, "step": 3660 }, { "epoch": 0.2, "learning_rate": 0.0001990290766830725, "loss": 1.3529, "step": 3680 }, { "epoch": 0.2, "learning_rate": 0.00019901833638089408, "loss": 1.3937, "step": 3700 }, { "epoch": 0.2, "eval_loss": 1.4052441120147705, "eval_runtime": 49.9264, "eval_samples_per_second": 60.088, "eval_steps_per_second": 1.883, "step": 3700 }, { "epoch": 0.2, "learning_rate": 0.0001990075960787157, "loss": 1.3474, "step": 3720 }, { "epoch": 0.2, "learning_rate": 0.00019899685577653728, "loss": 1.3422, "step": 3740 }, { "epoch": 0.2, "eval_loss": 1.404908537864685, "eval_runtime": 49.9154, "eval_samples_per_second": 60.102, "eval_steps_per_second": 1.883, "step": 3750 }, { "epoch": 0.2, "learning_rate": 0.00019898611547435887, "loss": 1.341, "step": 3760 }, { "epoch": 0.2, "learning_rate": 0.00019897537517218048, "loss": 1.3448, "step": 3780 }, { "epoch": 0.2, "learning_rate": 0.00019896463487000207, "loss": 1.3649, "step": 3800 }, { "epoch": 0.2, "eval_loss": 1.4054372310638428, "eval_runtime": 49.8885, "eval_samples_per_second": 60.134, "eval_steps_per_second": 1.884, "step": 3800 }, { "epoch": 0.2, "learning_rate": 0.00019895389456782368, "loss": 1.3718, "step": 3820 }, { "epoch": 0.21, "learning_rate": 0.00019894315426564527, "loss": 1.3241, "step": 3840 }, { "epoch": 0.21, "eval_loss": 1.4053229093551636, "eval_runtime": 49.9033, "eval_samples_per_second": 60.116, "eval_steps_per_second": 1.884, "step": 3850 }, { "epoch": 0.21, "learning_rate": 0.00019893241396346686, "loss": 1.3723, "step": 3860 }, { "epoch": 0.21, "learning_rate": 0.00019892167366128847, "loss": 1.4031, "step": 3880 }, { "epoch": 0.21, "learning_rate": 0.00019891093335911006, "loss": 1.3731, "step": 3900 }, { "epoch": 0.21, "eval_loss": 1.405073881149292, "eval_runtime": 49.8804, "eval_samples_per_second": 60.144, "eval_steps_per_second": 1.885, "step": 3900 }, { "epoch": 0.21, "learning_rate": 0.00019890019305693167, "loss": 1.3892, "step": 3920 }, { "epoch": 0.21, "learning_rate": 0.00019888945275475326, "loss": 1.3396, "step": 3940 }, { "epoch": 0.21, "eval_loss": 1.4052538871765137, "eval_runtime": 49.9053, "eval_samples_per_second": 60.114, "eval_steps_per_second": 1.884, "step": 3950 }, { "epoch": 0.21, "learning_rate": 0.00019887871245257484, "loss": 1.3367, "step": 3960 }, { "epoch": 0.21, "learning_rate": 0.00019886797215039646, "loss": 1.3634, "step": 3980 }, { "epoch": 0.21, "learning_rate": 0.00019885723184821804, "loss": 1.3573, "step": 4000 }, { "epoch": 0.21, "eval_loss": 1.4054239988327026, "eval_runtime": 49.8607, "eval_samples_per_second": 60.168, "eval_steps_per_second": 1.885, "step": 4000 }, { "epoch": 0.21, "learning_rate": 0.00019884649154603966, "loss": 1.374, "step": 4020 }, { "epoch": 0.22, "learning_rate": 0.00019883575124386124, "loss": 1.3274, "step": 4040 }, { "epoch": 0.22, "eval_loss": 1.4054758548736572, "eval_runtime": 49.9644, "eval_samples_per_second": 60.043, "eval_steps_per_second": 1.881, "step": 4050 }, { "epoch": 0.22, "learning_rate": 0.00019882501094168283, "loss": 1.3344, "step": 4060 }, { "epoch": 0.22, "learning_rate": 0.00019881427063950447, "loss": 1.4042, "step": 4080 }, { "epoch": 0.22, "learning_rate": 0.00019880353033732606, "loss": 1.3657, "step": 4100 }, { "epoch": 0.22, "eval_loss": 1.405276894569397, "eval_runtime": 49.8923, "eval_samples_per_second": 60.129, "eval_steps_per_second": 1.884, "step": 4100 }, { "epoch": 0.22, "learning_rate": 0.00019879279003514767, "loss": 1.4087, "step": 4120 }, { "epoch": 0.22, "learning_rate": 0.00019878204973296926, "loss": 1.3755, "step": 4140 }, { "epoch": 0.22, "eval_loss": 1.405236005783081, "eval_runtime": 49.9488, "eval_samples_per_second": 60.061, "eval_steps_per_second": 1.882, "step": 4150 }, { "epoch": 0.22, "learning_rate": 0.00019877130943079084, "loss": 1.3296, "step": 4160 }, { "epoch": 0.22, "learning_rate": 0.00019876056912861246, "loss": 1.3761, "step": 4180 }, { "epoch": 0.22, "learning_rate": 0.00019874982882643404, "loss": 1.3692, "step": 4200 }, { "epoch": 0.22, "eval_loss": 1.4048938751220703, "eval_runtime": 49.8671, "eval_samples_per_second": 60.16, "eval_steps_per_second": 1.885, "step": 4200 }, { "epoch": 0.23, "learning_rate": 0.00019873908852425566, "loss": 1.4076, "step": 4220 }, { "epoch": 0.23, "learning_rate": 0.00019872834822207724, "loss": 1.3583, "step": 4240 }, { "epoch": 0.23, "eval_loss": 1.405096411705017, "eval_runtime": 49.9436, "eval_samples_per_second": 60.068, "eval_steps_per_second": 1.882, "step": 4250 }, { "epoch": 0.23, "learning_rate": 0.00019871760791989883, "loss": 1.3269, "step": 4260 }, { "epoch": 0.23, "learning_rate": 0.00019870686761772044, "loss": 1.3954, "step": 4280 }, { "epoch": 0.23, "learning_rate": 0.00019869612731554203, "loss": 1.3728, "step": 4300 }, { "epoch": 0.23, "eval_loss": 1.4051318168640137, "eval_runtime": 49.9242, "eval_samples_per_second": 60.091, "eval_steps_per_second": 1.883, "step": 4300 }, { "epoch": 0.23, "learning_rate": 0.00019868538701336364, "loss": 1.3818, "step": 4320 }, { "epoch": 0.23, "learning_rate": 0.00019867464671118523, "loss": 1.3199, "step": 4340 }, { "epoch": 0.23, "eval_loss": 1.40473473072052, "eval_runtime": 49.9828, "eval_samples_per_second": 60.021, "eval_steps_per_second": 1.881, "step": 4350 }, { "epoch": 0.23, "learning_rate": 0.00019866390640900682, "loss": 1.3595, "step": 4360 }, { "epoch": 0.23, "learning_rate": 0.00019865316610682843, "loss": 1.3777, "step": 4380 }, { "epoch": 0.24, "learning_rate": 0.00019864242580465002, "loss": 1.3655, "step": 4400 }, { "epoch": 0.24, "eval_loss": 1.4049779176712036, "eval_runtime": 49.9218, "eval_samples_per_second": 60.094, "eval_steps_per_second": 1.883, "step": 4400 }, { "epoch": 0.24, "learning_rate": 0.00019863168550247163, "loss": 1.4016, "step": 4420 }, { "epoch": 0.24, "learning_rate": 0.00019862094520029322, "loss": 1.3786, "step": 4440 }, { "epoch": 0.24, "eval_loss": 1.4047762155532837, "eval_runtime": 49.9779, "eval_samples_per_second": 60.027, "eval_steps_per_second": 1.881, "step": 4450 }, { "epoch": 0.24, "learning_rate": 0.0001986102048981148, "loss": 1.3883, "step": 4460 }, { "epoch": 0.24, "learning_rate": 0.00019859946459593642, "loss": 1.3878, "step": 4480 }, { "epoch": 0.24, "learning_rate": 0.000198588724293758, "loss": 1.3631, "step": 4500 }, { "epoch": 0.24, "eval_loss": 1.40459406375885, "eval_runtime": 49.974, "eval_samples_per_second": 60.031, "eval_steps_per_second": 1.881, "step": 4500 }, { "epoch": 0.24, "learning_rate": 0.00019857798399157962, "loss": 1.358, "step": 4520 }, { "epoch": 0.24, "learning_rate": 0.0001985672436894012, "loss": 1.3183, "step": 4540 }, { "epoch": 0.24, "eval_loss": 1.4047383069992065, "eval_runtime": 49.97, "eval_samples_per_second": 60.036, "eval_steps_per_second": 1.881, "step": 4550 }, { "epoch": 0.24, "learning_rate": 0.00019855650338722282, "loss": 1.3327, "step": 4560 }, { "epoch": 0.24, "learning_rate": 0.0001985457630850444, "loss": 1.341, "step": 4580 }, { "epoch": 0.25, "learning_rate": 0.000198535022782866, "loss": 1.3578, "step": 4600 }, { "epoch": 0.25, "eval_loss": 1.4043097496032715, "eval_runtime": 49.9216, "eval_samples_per_second": 60.094, "eval_steps_per_second": 1.883, "step": 4600 }, { "epoch": 0.25, "learning_rate": 0.0001985242824806876, "loss": 1.3672, "step": 4620 }, { "epoch": 0.25, "learning_rate": 0.0001985135421785092, "loss": 1.3401, "step": 4640 }, { "epoch": 0.25, "eval_loss": 1.4051434993743896, "eval_runtime": 49.9422, "eval_samples_per_second": 60.069, "eval_steps_per_second": 1.882, "step": 4650 }, { "epoch": 0.25, "learning_rate": 0.0001985028018763308, "loss": 1.3944, "step": 4660 }, { "epoch": 0.25, "learning_rate": 0.0001984920615741524, "loss": 1.3479, "step": 4680 }, { "epoch": 0.25, "learning_rate": 0.00019848132127197398, "loss": 1.3385, "step": 4700 }, { "epoch": 0.25, "eval_loss": 1.404536485671997, "eval_runtime": 49.9602, "eval_samples_per_second": 60.048, "eval_steps_per_second": 1.881, "step": 4700 }, { "epoch": 0.25, "learning_rate": 0.00019847058096979562, "loss": 1.3895, "step": 4720 }, { "epoch": 0.25, "learning_rate": 0.0001984598406676172, "loss": 1.3638, "step": 4740 }, { "epoch": 0.25, "eval_loss": 1.4047491550445557, "eval_runtime": 49.9567, "eval_samples_per_second": 60.052, "eval_steps_per_second": 1.882, "step": 4750 }, { "epoch": 0.25, "learning_rate": 0.0001984491003654388, "loss": 1.3291, "step": 4760 }, { "epoch": 0.26, "learning_rate": 0.0001984383600632604, "loss": 1.3697, "step": 4780 }, { "epoch": 0.26, "learning_rate": 0.000198427619761082, "loss": 1.3794, "step": 4800 }, { "epoch": 0.26, "eval_loss": 1.4042659997940063, "eval_runtime": 49.9395, "eval_samples_per_second": 60.073, "eval_steps_per_second": 1.882, "step": 4800 }, { "epoch": 0.26, "learning_rate": 0.0001984168794589036, "loss": 1.3412, "step": 4820 }, { "epoch": 0.26, "learning_rate": 0.0001984061391567252, "loss": 1.3181, "step": 4840 }, { "epoch": 0.26, "eval_loss": 1.4045872688293457, "eval_runtime": 49.9714, "eval_samples_per_second": 60.034, "eval_steps_per_second": 1.881, "step": 4850 }, { "epoch": 0.26, "learning_rate": 0.00019839539885454678, "loss": 1.3461, "step": 4860 }, { "epoch": 0.26, "learning_rate": 0.0001983846585523684, "loss": 1.3832, "step": 4880 }, { "epoch": 0.26, "learning_rate": 0.00019837391825018998, "loss": 1.3673, "step": 4900 }, { "epoch": 0.26, "eval_loss": 1.4039243459701538, "eval_runtime": 49.9353, "eval_samples_per_second": 60.078, "eval_steps_per_second": 1.882, "step": 4900 }, { "epoch": 0.26, "learning_rate": 0.0001983631779480116, "loss": 1.3735, "step": 4920 }, { "epoch": 0.26, "learning_rate": 0.00019835243764583318, "loss": 1.3978, "step": 4940 }, { "epoch": 0.26, "eval_loss": 1.403692364692688, "eval_runtime": 49.9655, "eval_samples_per_second": 60.041, "eval_steps_per_second": 1.881, "step": 4950 }, { "epoch": 0.27, "learning_rate": 0.00019834169734365476, "loss": 1.3966, "step": 4960 }, { "epoch": 0.27, "learning_rate": 0.00019833095704147638, "loss": 1.3589, "step": 4980 }, { "epoch": 0.27, "learning_rate": 0.00019832021673929796, "loss": 1.3525, "step": 5000 }, { "epoch": 0.27, "eval_loss": 1.4040664434432983, "eval_runtime": 49.9201, "eval_samples_per_second": 60.096, "eval_steps_per_second": 1.883, "step": 5000 }, { "epoch": 0.27, "learning_rate": 0.00019830947643711958, "loss": 1.3461, "step": 5020 }, { "epoch": 0.27, "learning_rate": 0.00019829873613494116, "loss": 1.3542, "step": 5040 }, { "epoch": 0.27, "eval_loss": 1.4037916660308838, "eval_runtime": 49.9879, "eval_samples_per_second": 60.015, "eval_steps_per_second": 1.88, "step": 5050 }, { "epoch": 0.27, "learning_rate": 0.00019828799583276278, "loss": 1.332, "step": 5060 }, { "epoch": 0.27, "learning_rate": 0.00019827725553058436, "loss": 1.3698, "step": 5080 }, { "epoch": 0.27, "learning_rate": 0.00019826651522840595, "loss": 1.3322, "step": 5100 }, { "epoch": 0.27, "eval_loss": 1.403656244277954, "eval_runtime": 49.9297, "eval_samples_per_second": 60.085, "eval_steps_per_second": 1.883, "step": 5100 }, { "epoch": 0.27, "learning_rate": 0.00019825577492622756, "loss": 1.3376, "step": 5120 }, { "epoch": 0.27, "learning_rate": 0.00019824503462404915, "loss": 1.356, "step": 5140 }, { "epoch": 0.28, "eval_loss": 1.4035804271697998, "eval_runtime": 49.9959, "eval_samples_per_second": 60.005, "eval_steps_per_second": 1.88, "step": 5150 }, { "epoch": 0.28, "learning_rate": 0.00019823429432187076, "loss": 1.3228, "step": 5160 }, { "epoch": 0.28, "learning_rate": 0.00019822355401969235, "loss": 1.3784, "step": 5180 }, { "epoch": 0.28, "learning_rate": 0.00019821281371751394, "loss": 1.3658, "step": 5200 }, { "epoch": 0.28, "eval_loss": 1.403926134109497, "eval_runtime": 49.9437, "eval_samples_per_second": 60.068, "eval_steps_per_second": 1.882, "step": 5200 }, { "epoch": 0.28, "learning_rate": 0.00019820207341533555, "loss": 1.362, "step": 5220 }, { "epoch": 0.28, "learning_rate": 0.00019819133311315714, "loss": 1.3549, "step": 5240 }, { "epoch": 0.28, "eval_loss": 1.4029345512390137, "eval_runtime": 49.9979, "eval_samples_per_second": 60.003, "eval_steps_per_second": 1.88, "step": 5250 }, { "epoch": 0.28, "learning_rate": 0.00019818059281097875, "loss": 1.3809, "step": 5260 }, { "epoch": 0.28, "learning_rate": 0.00019816985250880034, "loss": 1.3573, "step": 5280 }, { "epoch": 0.28, "learning_rate": 0.00019815911220662192, "loss": 1.361, "step": 5300 }, { "epoch": 0.28, "eval_loss": 1.4024875164031982, "eval_runtime": 49.9453, "eval_samples_per_second": 60.066, "eval_steps_per_second": 1.882, "step": 5300 }, { "epoch": 0.28, "learning_rate": 0.00019814837190444354, "loss": 1.3267, "step": 5320 }, { "epoch": 0.29, "learning_rate": 0.00019813763160226515, "loss": 1.3765, "step": 5340 }, { "epoch": 0.29, "eval_loss": 1.4023877382278442, "eval_runtime": 49.9712, "eval_samples_per_second": 60.035, "eval_steps_per_second": 1.881, "step": 5350 }, { "epoch": 0.29, "learning_rate": 0.00019812689130008674, "loss": 1.3563, "step": 5360 }, { "epoch": 0.29, "learning_rate": 0.00019811615099790835, "loss": 1.3212, "step": 5380 }, { "epoch": 0.29, "learning_rate": 0.00019810541069572994, "loss": 1.3078, "step": 5400 }, { "epoch": 0.29, "eval_loss": 1.4039820432662964, "eval_runtime": 49.8899, "eval_samples_per_second": 60.132, "eval_steps_per_second": 1.884, "step": 5400 }, { "epoch": 0.29, "learning_rate": 0.00019809467039355155, "loss": 1.3441, "step": 5420 }, { "epoch": 0.29, "learning_rate": 0.00019808393009137314, "loss": 1.3772, "step": 5440 }, { "epoch": 0.29, "eval_loss": 1.402974247932434, "eval_runtime": 49.9826, "eval_samples_per_second": 60.021, "eval_steps_per_second": 1.881, "step": 5450 }, { "epoch": 0.29, "learning_rate": 0.00019807318978919475, "loss": 1.3703, "step": 5460 }, { "epoch": 0.29, "learning_rate": 0.00019806244948701634, "loss": 1.3942, "step": 5480 }, { "epoch": 0.29, "learning_rate": 0.00019805170918483792, "loss": 1.3586, "step": 5500 }, { "epoch": 0.29, "eval_loss": 1.4036117792129517, "eval_runtime": 49.921, "eval_samples_per_second": 60.095, "eval_steps_per_second": 1.883, "step": 5500 }, { "epoch": 0.29, "learning_rate": 0.00019804096888265954, "loss": 1.3272, "step": 5520 }, { "epoch": 0.3, "learning_rate": 0.00019803022858048112, "loss": 1.3751, "step": 5540 }, { "epoch": 0.3, "eval_loss": 1.4024755954742432, "eval_runtime": 49.9843, "eval_samples_per_second": 60.019, "eval_steps_per_second": 1.881, "step": 5550 }, { "epoch": 0.3, "learning_rate": 0.00019801948827830274, "loss": 1.3445, "step": 5560 }, { "epoch": 0.3, "learning_rate": 0.00019800874797612432, "loss": 1.3859, "step": 5580 }, { "epoch": 0.3, "learning_rate": 0.0001979980076739459, "loss": 1.3665, "step": 5600 }, { "epoch": 0.3, "eval_loss": 1.4023313522338867, "eval_runtime": 49.929, "eval_samples_per_second": 60.085, "eval_steps_per_second": 1.883, "step": 5600 }, { "epoch": 0.3, "learning_rate": 0.00019798726737176752, "loss": 1.3446, "step": 5620 }, { "epoch": 0.3, "learning_rate": 0.0001979765270695891, "loss": 1.3938, "step": 5640 }, { "epoch": 0.3, "eval_loss": 1.4025416374206543, "eval_runtime": 49.9691, "eval_samples_per_second": 60.037, "eval_steps_per_second": 1.881, "step": 5650 }, { "epoch": 0.3, "learning_rate": 0.00019796578676741072, "loss": 1.3895, "step": 5660 }, { "epoch": 0.3, "learning_rate": 0.0001979550464652323, "loss": 1.4207, "step": 5680 }, { "epoch": 0.3, "learning_rate": 0.0001979443061630539, "loss": 1.3675, "step": 5700 }, { "epoch": 0.3, "eval_loss": 1.4026676416397095, "eval_runtime": 49.9822, "eval_samples_per_second": 60.021, "eval_steps_per_second": 1.881, "step": 5700 }, { "epoch": 0.31, "learning_rate": 0.0001979335658608755, "loss": 1.3534, "step": 5720 }, { "epoch": 0.31, "learning_rate": 0.0001979228255586971, "loss": 1.3145, "step": 5740 }, { "epoch": 0.31, "eval_loss": 1.4028642177581787, "eval_runtime": 50.0235, "eval_samples_per_second": 59.972, "eval_steps_per_second": 1.879, "step": 5750 }, { "epoch": 0.31, "learning_rate": 0.0001979120852565187, "loss": 1.328, "step": 5760 }, { "epoch": 0.31, "learning_rate": 0.0001979013449543403, "loss": 1.3546, "step": 5780 }, { "epoch": 0.31, "learning_rate": 0.00019789060465216188, "loss": 1.3228, "step": 5800 }, { "epoch": 0.31, "eval_loss": 1.4030640125274658, "eval_runtime": 49.9206, "eval_samples_per_second": 60.095, "eval_steps_per_second": 1.883, "step": 5800 }, { "epoch": 0.31, "learning_rate": 0.0001978798643499835, "loss": 1.334, "step": 5820 }, { "epoch": 0.31, "learning_rate": 0.00019786912404780508, "loss": 1.3502, "step": 5840 }, { "epoch": 0.31, "eval_loss": 1.402596354484558, "eval_runtime": 49.9769, "eval_samples_per_second": 60.028, "eval_steps_per_second": 1.881, "step": 5850 }, { "epoch": 0.31, "learning_rate": 0.0001978583837456267, "loss": 1.4087, "step": 5860 }, { "epoch": 0.31, "learning_rate": 0.00019784764344344828, "loss": 1.3516, "step": 5880 }, { "epoch": 0.32, "learning_rate": 0.00019783690314126987, "loss": 1.3684, "step": 5900 }, { "epoch": 0.32, "eval_loss": 1.4028434753417969, "eval_runtime": 49.9166, "eval_samples_per_second": 60.1, "eval_steps_per_second": 1.883, "step": 5900 }, { "epoch": 0.32, "learning_rate": 0.00019782616283909148, "loss": 1.3747, "step": 5920 }, { "epoch": 0.32, "learning_rate": 0.00019781542253691307, "loss": 1.3603, "step": 5940 }, { "epoch": 0.32, "eval_loss": 1.4027584791183472, "eval_runtime": 50.4225, "eval_samples_per_second": 59.497, "eval_steps_per_second": 1.864, "step": 5950 }, { "epoch": 0.32, "learning_rate": 0.00019780468223473468, "loss": 1.3718, "step": 5960 }, { "epoch": 0.32, "learning_rate": 0.0001977939419325563, "loss": 1.3695, "step": 5980 }, { "epoch": 0.32, "learning_rate": 0.00019778320163037788, "loss": 1.3384, "step": 6000 }, { "epoch": 0.32, "eval_loss": 1.4024461507797241, "eval_runtime": 51.9592, "eval_samples_per_second": 57.738, "eval_steps_per_second": 1.809, "step": 6000 }, { "epoch": 0.32, "learning_rate": 0.0001977724613281995, "loss": 1.3768, "step": 6020 }, { "epoch": 0.32, "learning_rate": 0.00019776172102602108, "loss": 1.3948, "step": 6040 }, { "epoch": 0.32, "eval_loss": 1.4024426937103271, "eval_runtime": 51.9479, "eval_samples_per_second": 57.75, "eval_steps_per_second": 1.81, "step": 6050 }, { "epoch": 0.32, "learning_rate": 0.0001977509807238427, "loss": 1.3148, "step": 6060 }, { "epoch": 0.32, "learning_rate": 0.00019774024042166428, "loss": 1.321, "step": 6080 }, { "epoch": 0.33, "learning_rate": 0.00019772950011948587, "loss": 1.3558, "step": 6100 }, { "epoch": 0.33, "eval_loss": 1.4021098613739014, "eval_runtime": 49.9338, "eval_samples_per_second": 60.08, "eval_steps_per_second": 1.882, "step": 6100 }, { "epoch": 0.33, "learning_rate": 0.00019771875981730748, "loss": 1.356, "step": 6120 }, { "epoch": 0.33, "learning_rate": 0.00019770801951512907, "loss": 1.3726, "step": 6140 }, { "epoch": 0.33, "eval_loss": 1.4019304513931274, "eval_runtime": 49.9492, "eval_samples_per_second": 60.061, "eval_steps_per_second": 1.882, "step": 6150 }, { "epoch": 0.33, "learning_rate": 0.00019769727921295068, "loss": 1.3608, "step": 6160 }, { "epoch": 0.33, "learning_rate": 0.00019768653891077227, "loss": 1.3549, "step": 6180 }, { "epoch": 0.33, "learning_rate": 0.00019767579860859385, "loss": 1.38, "step": 6200 }, { "epoch": 0.33, "eval_loss": 1.402077317237854, "eval_runtime": 49.9509, "eval_samples_per_second": 60.059, "eval_steps_per_second": 1.882, "step": 6200 }, { "epoch": 0.33, "learning_rate": 0.00019766505830641547, "loss": 1.4072, "step": 6220 }, { "epoch": 0.33, "learning_rate": 0.00019765431800423705, "loss": 1.3653, "step": 6240 }, { "epoch": 0.33, "eval_loss": 1.402234673500061, "eval_runtime": 49.9544, "eval_samples_per_second": 60.055, "eval_steps_per_second": 1.882, "step": 6250 }, { "epoch": 0.33, "learning_rate": 0.00019764357770205867, "loss": 1.3435, "step": 6260 }, { "epoch": 0.34, "learning_rate": 0.00019763283739988025, "loss": 1.368, "step": 6280 }, { "epoch": 0.34, "learning_rate": 0.00019762209709770184, "loss": 1.3639, "step": 6300 }, { "epoch": 0.34, "eval_loss": 1.401593804359436, "eval_runtime": 49.9096, "eval_samples_per_second": 60.109, "eval_steps_per_second": 1.883, "step": 6300 }, { "epoch": 0.34, "learning_rate": 0.00019761135679552345, "loss": 1.3387, "step": 6320 }, { "epoch": 0.34, "learning_rate": 0.00019760061649334504, "loss": 1.3709, "step": 6340 }, { "epoch": 0.34, "eval_loss": 1.4018782377243042, "eval_runtime": 49.9438, "eval_samples_per_second": 60.068, "eval_steps_per_second": 1.882, "step": 6350 }, { "epoch": 0.34, "learning_rate": 0.00019758987619116665, "loss": 1.328, "step": 6360 }, { "epoch": 0.34, "learning_rate": 0.00019757913588898824, "loss": 1.3254, "step": 6380 }, { "epoch": 0.34, "learning_rate": 0.00019756839558680985, "loss": 1.3593, "step": 6400 }, { "epoch": 0.34, "eval_loss": 1.4019062519073486, "eval_runtime": 49.9103, "eval_samples_per_second": 60.108, "eval_steps_per_second": 1.883, "step": 6400 }, { "epoch": 0.34, "learning_rate": 0.00019755765528463144, "loss": 1.39, "step": 6420 }, { "epoch": 0.34, "learning_rate": 0.00019754691498245303, "loss": 1.366, "step": 6440 }, { "epoch": 0.34, "eval_loss": 1.4018573760986328, "eval_runtime": 52.8919, "eval_samples_per_second": 56.719, "eval_steps_per_second": 1.777, "step": 6450 }, { "epoch": 0.35, "learning_rate": 0.00019753617468027464, "loss": 1.3986, "step": 6460 }, { "epoch": 0.35, "learning_rate": 0.00019752543437809623, "loss": 1.3728, "step": 6480 }, { "epoch": 0.35, "learning_rate": 0.00019751469407591784, "loss": 1.3584, "step": 6500 }, { "epoch": 0.35, "eval_loss": 1.4021949768066406, "eval_runtime": 51.6993, "eval_samples_per_second": 58.028, "eval_steps_per_second": 1.818, "step": 6500 }, { "epoch": 0.35, "learning_rate": 0.00019750395377373943, "loss": 1.3515, "step": 6520 }, { "epoch": 0.35, "learning_rate": 0.000197493213471561, "loss": 1.3587, "step": 6540 }, { "epoch": 0.35, "eval_loss": 1.4026806354522705, "eval_runtime": 51.9928, "eval_samples_per_second": 57.7, "eval_steps_per_second": 1.808, "step": 6550 }, { "epoch": 0.35, "learning_rate": 0.00019748247316938263, "loss": 1.3223, "step": 6560 }, { "epoch": 0.35, "learning_rate": 0.0001974717328672042, "loss": 1.3452, "step": 6580 }, { "epoch": 0.35, "learning_rate": 0.00019746099256502583, "loss": 1.3525, "step": 6600 }, { "epoch": 0.35, "eval_loss": 1.402047872543335, "eval_runtime": 52.9322, "eval_samples_per_second": 56.676, "eval_steps_per_second": 1.776, "step": 6600 }, { "epoch": 0.35, "learning_rate": 0.00019745025226284744, "loss": 1.3546, "step": 6620 }, { "epoch": 0.35, "learning_rate": 0.00019743951196066903, "loss": 1.3808, "step": 6640 }, { "epoch": 0.36, "eval_loss": 1.4020296335220337, "eval_runtime": 52.9894, "eval_samples_per_second": 56.615, "eval_steps_per_second": 1.774, "step": 6650 }, { "epoch": 0.36, "learning_rate": 0.00019742877165849064, "loss": 1.3466, "step": 6660 }, { "epoch": 0.36, "learning_rate": 0.00019741803135631223, "loss": 1.3595, "step": 6680 }, { "epoch": 0.36, "learning_rate": 0.0001974072910541338, "loss": 1.3788, "step": 6700 }, { "epoch": 0.36, "eval_loss": 1.4023902416229248, "eval_runtime": 53.0092, "eval_samples_per_second": 56.594, "eval_steps_per_second": 1.773, "step": 6700 }, { "epoch": 0.36, "learning_rate": 0.00019739655075195543, "loss": 1.3787, "step": 6720 }, { "epoch": 0.36, "learning_rate": 0.000197385810449777, "loss": 1.3484, "step": 6740 }, { "epoch": 0.36, "eval_loss": 1.401914358139038, "eval_runtime": 52.9334, "eval_samples_per_second": 56.675, "eval_steps_per_second": 1.776, "step": 6750 }, { "epoch": 0.36, "learning_rate": 0.00019737507014759863, "loss": 1.3412, "step": 6760 }, { "epoch": 0.36, "learning_rate": 0.0001973643298454202, "loss": 1.3274, "step": 6780 }, { "epoch": 0.36, "learning_rate": 0.0001973535895432418, "loss": 1.4129, "step": 6800 }, { "epoch": 0.36, "eval_loss": 1.4013642072677612, "eval_runtime": 52.933, "eval_samples_per_second": 56.675, "eval_steps_per_second": 1.776, "step": 6800 }, { "epoch": 0.36, "learning_rate": 0.0001973428492410634, "loss": 1.3889, "step": 6820 }, { "epoch": 0.37, "learning_rate": 0.000197332108938885, "loss": 1.3519, "step": 6840 }, { "epoch": 0.37, "eval_loss": 1.4011621475219727, "eval_runtime": 52.9903, "eval_samples_per_second": 56.614, "eval_steps_per_second": 1.774, "step": 6850 }, { "epoch": 0.37, "learning_rate": 0.0001973213686367066, "loss": 1.3953, "step": 6860 }, { "epoch": 0.37, "learning_rate": 0.0001973106283345282, "loss": 1.3546, "step": 6880 }, { "epoch": 0.37, "learning_rate": 0.0001972998880323498, "loss": 1.3495, "step": 6900 }, { "epoch": 0.37, "eval_loss": 1.4015971422195435, "eval_runtime": 52.8958, "eval_samples_per_second": 56.715, "eval_steps_per_second": 1.777, "step": 6900 }, { "epoch": 0.37, "learning_rate": 0.0001972891477301714, "loss": 1.3357, "step": 6920 }, { "epoch": 0.37, "learning_rate": 0.00019727840742799299, "loss": 1.3114, "step": 6940 }, { "epoch": 0.37, "eval_loss": 1.4012014865875244, "eval_runtime": 52.9438, "eval_samples_per_second": 56.664, "eval_steps_per_second": 1.775, "step": 6950 }, { "epoch": 0.37, "learning_rate": 0.0001972676671258146, "loss": 1.3446, "step": 6960 }, { "epoch": 0.37, "learning_rate": 0.00019725692682363619, "loss": 1.3983, "step": 6980 }, { "epoch": 0.37, "learning_rate": 0.0001972461865214578, "loss": 1.3564, "step": 7000 }, { "epoch": 0.37, "eval_loss": 1.400884747505188, "eval_runtime": 52.8746, "eval_samples_per_second": 56.738, "eval_steps_per_second": 1.778, "step": 7000 }, { "epoch": 0.38, "learning_rate": 0.00019723544621927939, "loss": 1.326, "step": 7020 }, { "epoch": 0.38, "learning_rate": 0.00019722470591710097, "loss": 1.3127, "step": 7040 }, { "epoch": 0.38, "eval_loss": 1.4015973806381226, "eval_runtime": 52.9938, "eval_samples_per_second": 56.61, "eval_steps_per_second": 1.774, "step": 7050 }, { "epoch": 0.38, "learning_rate": 0.00019721396561492259, "loss": 1.3858, "step": 7060 }, { "epoch": 0.38, "learning_rate": 0.00019720322531274417, "loss": 1.3599, "step": 7080 }, { "epoch": 0.38, "learning_rate": 0.00019719248501056579, "loss": 1.3681, "step": 7100 }, { "epoch": 0.38, "eval_loss": 1.4011207818984985, "eval_runtime": 52.9195, "eval_samples_per_second": 56.69, "eval_steps_per_second": 1.776, "step": 7100 }, { "epoch": 0.38, "learning_rate": 0.00019718228172349632, "loss": 1.371, "step": 7120 }, { "epoch": 0.38, "learning_rate": 0.0001971715414213179, "loss": 1.3459, "step": 7140 }, { "epoch": 0.38, "eval_loss": 1.4016060829162598, "eval_runtime": 52.9177, "eval_samples_per_second": 56.692, "eval_steps_per_second": 1.776, "step": 7150 }, { "epoch": 0.38, "learning_rate": 0.0001971608011191395, "loss": 1.3567, "step": 7160 }, { "epoch": 0.38, "learning_rate": 0.0001971500608169611, "loss": 1.3795, "step": 7180 }, { "epoch": 0.38, "learning_rate": 0.0001971393205147827, "loss": 1.3499, "step": 7200 }, { "epoch": 0.38, "eval_loss": 1.4015876054763794, "eval_runtime": 52.8119, "eval_samples_per_second": 56.805, "eval_steps_per_second": 1.78, "step": 7200 }, { "epoch": 0.39, "learning_rate": 0.0001971285802126043, "loss": 1.3308, "step": 7220 }, { "epoch": 0.39, "learning_rate": 0.0001971178399104259, "loss": 1.3811, "step": 7240 }, { "epoch": 0.39, "eval_loss": 1.4017484188079834, "eval_runtime": 52.962, "eval_samples_per_second": 56.644, "eval_steps_per_second": 1.775, "step": 7250 }, { "epoch": 0.39, "learning_rate": 0.00019710709960824748, "loss": 1.3567, "step": 7260 }, { "epoch": 0.39, "learning_rate": 0.0001970963593060691, "loss": 1.3609, "step": 7280 }, { "epoch": 0.39, "learning_rate": 0.00019708561900389068, "loss": 1.3585, "step": 7300 }, { "epoch": 0.39, "eval_loss": 1.4017462730407715, "eval_runtime": 52.879, "eval_samples_per_second": 56.733, "eval_steps_per_second": 1.778, "step": 7300 }, { "epoch": 0.39, "learning_rate": 0.0001970748787017123, "loss": 1.37, "step": 7320 }, { "epoch": 0.39, "learning_rate": 0.00019706413839953388, "loss": 1.3404, "step": 7340 }, { "epoch": 0.39, "eval_loss": 1.4020615816116333, "eval_runtime": 52.9825, "eval_samples_per_second": 56.623, "eval_steps_per_second": 1.774, "step": 7350 }, { "epoch": 0.39, "learning_rate": 0.00019705339809735547, "loss": 1.3307, "step": 7360 }, { "epoch": 0.39, "learning_rate": 0.00019704265779517708, "loss": 1.3871, "step": 7380 }, { "epoch": 0.4, "learning_rate": 0.00019703191749299867, "loss": 1.3283, "step": 7400 }, { "epoch": 0.4, "eval_loss": 1.4005995988845825, "eval_runtime": 52.8907, "eval_samples_per_second": 56.721, "eval_steps_per_second": 1.777, "step": 7400 }, { "epoch": 0.4, "learning_rate": 0.00019702117719082028, "loss": 1.3592, "step": 7420 }, { "epoch": 0.4, "learning_rate": 0.00019701043688864187, "loss": 1.373, "step": 7440 }, { "epoch": 0.4, "eval_loss": 1.4015244245529175, "eval_runtime": 52.8915, "eval_samples_per_second": 56.72, "eval_steps_per_second": 1.777, "step": 7450 }, { "epoch": 0.4, "learning_rate": 0.00019699969658646345, "loss": 1.3815, "step": 7460 }, { "epoch": 0.4, "learning_rate": 0.000196989493299394, "loss": 1.3838, "step": 7480 }, { "epoch": 0.4, "learning_rate": 0.00019697875299721558, "loss": 1.3316, "step": 7500 }, { "epoch": 0.4, "eval_loss": 1.4018954038619995, "eval_runtime": 52.9361, "eval_samples_per_second": 56.672, "eval_steps_per_second": 1.776, "step": 7500 }, { "epoch": 0.4, "learning_rate": 0.00019696801269503716, "loss": 1.3692, "step": 7520 }, { "epoch": 0.4, "learning_rate": 0.00019695727239285878, "loss": 1.3496, "step": 7540 }, { "epoch": 0.4, "eval_loss": 1.401475429534912, "eval_runtime": 52.9712, "eval_samples_per_second": 56.635, "eval_steps_per_second": 1.775, "step": 7550 }, { "epoch": 0.4, "learning_rate": 0.0001969465320906804, "loss": 1.3507, "step": 7560 }, { "epoch": 0.41, "learning_rate": 0.00019693579178850198, "loss": 1.3649, "step": 7580 }, { "epoch": 0.41, "learning_rate": 0.0001969250514863236, "loss": 1.3968, "step": 7600 }, { "epoch": 0.41, "eval_loss": 1.4009819030761719, "eval_runtime": 52.8433, "eval_samples_per_second": 56.772, "eval_steps_per_second": 1.779, "step": 7600 }, { "epoch": 0.41, "learning_rate": 0.00019691431118414518, "loss": 1.4017, "step": 7620 }, { "epoch": 0.41, "learning_rate": 0.0001969035708819668, "loss": 1.3512, "step": 7640 }, { "epoch": 0.41, "eval_loss": 1.400898814201355, "eval_runtime": 52.9951, "eval_samples_per_second": 56.609, "eval_steps_per_second": 1.774, "step": 7650 }, { "epoch": 0.41, "learning_rate": 0.00019689283057978838, "loss": 1.336, "step": 7660 }, { "epoch": 0.41, "learning_rate": 0.00019688209027761, "loss": 1.3587, "step": 7680 }, { "epoch": 0.41, "learning_rate": 0.00019687134997543158, "loss": 1.3624, "step": 7700 }, { "epoch": 0.41, "eval_loss": 1.4006584882736206, "eval_runtime": 52.9022, "eval_samples_per_second": 56.708, "eval_steps_per_second": 1.777, "step": 7700 }, { "epoch": 0.41, "learning_rate": 0.00019686060967325316, "loss": 1.3288, "step": 7720 }, { "epoch": 0.41, "learning_rate": 0.00019684986937107478, "loss": 1.406, "step": 7740 }, { "epoch": 0.41, "eval_loss": 1.4009158611297607, "eval_runtime": 52.9599, "eval_samples_per_second": 56.647, "eval_steps_per_second": 1.775, "step": 7750 }, { "epoch": 0.41, "learning_rate": 0.00019683912906889636, "loss": 1.3257, "step": 7760 }, { "epoch": 0.42, "learning_rate": 0.00019682838876671798, "loss": 1.3483, "step": 7780 }, { "epoch": 0.42, "learning_rate": 0.00019681764846453956, "loss": 1.3366, "step": 7800 }, { "epoch": 0.42, "eval_loss": 1.4006359577178955, "eval_runtime": 52.9157, "eval_samples_per_second": 56.694, "eval_steps_per_second": 1.776, "step": 7800 }, { "epoch": 0.42, "learning_rate": 0.00019680690816236115, "loss": 1.3871, "step": 7820 }, { "epoch": 0.42, "learning_rate": 0.00019679616786018276, "loss": 1.361, "step": 7840 }, { "epoch": 0.42, "eval_loss": 1.3996506929397583, "eval_runtime": 52.9799, "eval_samples_per_second": 56.625, "eval_steps_per_second": 1.774, "step": 7850 }, { "epoch": 0.42, "learning_rate": 0.00019678542755800435, "loss": 1.3471, "step": 7860 }, { "epoch": 0.42, "learning_rate": 0.00019677468725582596, "loss": 1.3318, "step": 7880 }, { "epoch": 0.42, "learning_rate": 0.00019676394695364755, "loss": 1.3516, "step": 7900 }, { "epoch": 0.42, "eval_loss": 1.4007470607757568, "eval_runtime": 52.9141, "eval_samples_per_second": 56.696, "eval_steps_per_second": 1.776, "step": 7900 }, { "epoch": 0.42, "learning_rate": 0.00019675320665146914, "loss": 1.3489, "step": 7920 }, { "epoch": 0.42, "learning_rate": 0.00019674246634929075, "loss": 1.3629, "step": 7940 }, { "epoch": 0.42, "eval_loss": 1.4007574319839478, "eval_runtime": 52.8992, "eval_samples_per_second": 56.712, "eval_steps_per_second": 1.777, "step": 7950 }, { "epoch": 0.43, "learning_rate": 0.00019673172604711234, "loss": 1.3947, "step": 7960 }, { "epoch": 0.43, "learning_rate": 0.00019672098574493395, "loss": 1.3369, "step": 7980 }, { "epoch": 0.43, "learning_rate": 0.00019671024544275554, "loss": 1.3895, "step": 8000 }, { "epoch": 0.43, "eval_loss": 1.4001998901367188, "eval_runtime": 52.9172, "eval_samples_per_second": 56.692, "eval_steps_per_second": 1.776, "step": 8000 }, { "epoch": 0.43, "learning_rate": 0.00019669950514057712, "loss": 1.373, "step": 8020 }, { "epoch": 0.43, "learning_rate": 0.00019668876483839874, "loss": 1.3558, "step": 8040 }, { "epoch": 0.43, "eval_loss": 1.4004127979278564, "eval_runtime": 52.8315, "eval_samples_per_second": 56.784, "eval_steps_per_second": 1.779, "step": 8050 }, { "epoch": 0.43, "learning_rate": 0.00019667802453622032, "loss": 1.4025, "step": 8060 }, { "epoch": 0.43, "learning_rate": 0.00019666728423404194, "loss": 1.3267, "step": 8080 }, { "epoch": 0.43, "learning_rate": 0.00019665654393186352, "loss": 1.335, "step": 8100 }, { "epoch": 0.43, "eval_loss": 1.400840163230896, "eval_runtime": 52.8995, "eval_samples_per_second": 56.711, "eval_steps_per_second": 1.777, "step": 8100 }, { "epoch": 0.43, "learning_rate": 0.0001966458036296851, "loss": 1.3523, "step": 8120 }, { "epoch": 0.43, "learning_rate": 0.00019663506332750672, "loss": 1.3295, "step": 8140 }, { "epoch": 0.44, "eval_loss": 1.3999550342559814, "eval_runtime": 52.9664, "eval_samples_per_second": 56.64, "eval_steps_per_second": 1.775, "step": 8150 }, { "epoch": 0.44, "learning_rate": 0.0001966243230253283, "loss": 1.3866, "step": 8160 }, { "epoch": 0.44, "learning_rate": 0.00019661358272314995, "loss": 1.3632, "step": 8180 }, { "epoch": 0.44, "learning_rate": 0.00019660284242097154, "loss": 1.353, "step": 8200 }, { "epoch": 0.44, "eval_loss": 1.4003568887710571, "eval_runtime": 52.8325, "eval_samples_per_second": 56.783, "eval_steps_per_second": 1.779, "step": 8200 }, { "epoch": 0.44, "learning_rate": 0.00019659210211879312, "loss": 1.3927, "step": 8220 }, { "epoch": 0.44, "learning_rate": 0.00019658136181661474, "loss": 1.384, "step": 8240 }, { "epoch": 0.44, "eval_loss": 1.4005845785140991, "eval_runtime": 52.917, "eval_samples_per_second": 56.693, "eval_steps_per_second": 1.776, "step": 8250 }, { "epoch": 0.44, "learning_rate": 0.00019657062151443632, "loss": 1.4014, "step": 8260 }, { "epoch": 0.44, "learning_rate": 0.00019655988121225794, "loss": 1.3651, "step": 8280 }, { "epoch": 0.44, "learning_rate": 0.00019654914091007952, "loss": 1.3308, "step": 8300 }, { "epoch": 0.44, "eval_loss": 1.4005910158157349, "eval_runtime": 53.0074, "eval_samples_per_second": 56.596, "eval_steps_per_second": 1.773, "step": 8300 }, { "epoch": 0.44, "learning_rate": 0.0001965384006079011, "loss": 1.312, "step": 8320 }, { "epoch": 0.45, "learning_rate": 0.00019652766030572272, "loss": 1.3904, "step": 8340 }, { "epoch": 0.45, "eval_loss": 1.4002220630645752, "eval_runtime": 52.9847, "eval_samples_per_second": 56.62, "eval_steps_per_second": 1.774, "step": 8350 }, { "epoch": 0.45, "learning_rate": 0.0001965169200035443, "loss": 1.3463, "step": 8360 }, { "epoch": 0.45, "learning_rate": 0.00019650617970136592, "loss": 1.3611, "step": 8380 }, { "epoch": 0.45, "learning_rate": 0.0001964954393991875, "loss": 1.3388, "step": 8400 }, { "epoch": 0.45, "eval_loss": 1.400258183479309, "eval_runtime": 52.8963, "eval_samples_per_second": 56.715, "eval_steps_per_second": 1.777, "step": 8400 }, { "epoch": 0.45, "learning_rate": 0.0001964846990970091, "loss": 1.3349, "step": 8420 }, { "epoch": 0.45, "learning_rate": 0.0001964739587948307, "loss": 1.3354, "step": 8440 }, { "epoch": 0.45, "eval_loss": 1.4004709720611572, "eval_runtime": 52.9783, "eval_samples_per_second": 56.627, "eval_steps_per_second": 1.774, "step": 8450 }, { "epoch": 0.45, "learning_rate": 0.0001964632184926523, "loss": 1.3476, "step": 8460 }, { "epoch": 0.45, "learning_rate": 0.0001964524781904739, "loss": 1.3994, "step": 8480 }, { "epoch": 0.45, "learning_rate": 0.0001964417378882955, "loss": 1.3211, "step": 8500 }, { "epoch": 0.45, "eval_loss": 1.3999745845794678, "eval_runtime": 52.9063, "eval_samples_per_second": 56.704, "eval_steps_per_second": 1.777, "step": 8500 }, { "epoch": 0.46, "learning_rate": 0.00019643099758611708, "loss": 1.3556, "step": 8520 }, { "epoch": 0.46, "learning_rate": 0.0001964202572839387, "loss": 1.3796, "step": 8540 }, { "epoch": 0.46, "eval_loss": 1.4002516269683838, "eval_runtime": 52.962, "eval_samples_per_second": 56.644, "eval_steps_per_second": 1.775, "step": 8550 }, { "epoch": 0.46, "learning_rate": 0.00019640951698176028, "loss": 1.322, "step": 8560 }, { "epoch": 0.46, "learning_rate": 0.0001963987766795819, "loss": 1.3595, "step": 8580 }, { "epoch": 0.46, "learning_rate": 0.00019638803637740348, "loss": 1.3518, "step": 8600 }, { "epoch": 0.46, "eval_loss": 1.4005961418151855, "eval_runtime": 52.9027, "eval_samples_per_second": 56.708, "eval_steps_per_second": 1.777, "step": 8600 }, { "epoch": 0.46, "learning_rate": 0.0001963772960752251, "loss": 1.3272, "step": 8620 }, { "epoch": 0.46, "learning_rate": 0.00019636655577304668, "loss": 1.3803, "step": 8640 }, { "epoch": 0.46, "eval_loss": 1.4010200500488281, "eval_runtime": 52.9126, "eval_samples_per_second": 56.697, "eval_steps_per_second": 1.777, "step": 8650 }, { "epoch": 0.46, "learning_rate": 0.00019635581547086827, "loss": 1.3242, "step": 8660 }, { "epoch": 0.46, "learning_rate": 0.00019634507516868988, "loss": 1.3993, "step": 8680 }, { "epoch": 0.46, "learning_rate": 0.00019633433486651147, "loss": 1.3858, "step": 8700 }, { "epoch": 0.46, "eval_loss": 1.4003942012786865, "eval_runtime": 52.8889, "eval_samples_per_second": 56.723, "eval_steps_per_second": 1.777, "step": 8700 }, { "epoch": 0.47, "learning_rate": 0.00019632359456433308, "loss": 1.365, "step": 8720 }, { "epoch": 0.47, "learning_rate": 0.00019631285426215467, "loss": 1.3537, "step": 8740 }, { "epoch": 0.47, "eval_loss": 1.3994464874267578, "eval_runtime": 52.9395, "eval_samples_per_second": 56.668, "eval_steps_per_second": 1.776, "step": 8750 }, { "epoch": 0.47, "learning_rate": 0.00019630211395997625, "loss": 1.3707, "step": 8760 }, { "epoch": 0.47, "learning_rate": 0.00019629137365779787, "loss": 1.3361, "step": 8780 }, { "epoch": 0.47, "learning_rate": 0.00019628063335561945, "loss": 1.3452, "step": 8800 }, { "epoch": 0.47, "eval_loss": 1.3997398614883423, "eval_runtime": 52.9165, "eval_samples_per_second": 56.693, "eval_steps_per_second": 1.776, "step": 8800 }, { "epoch": 0.47, "learning_rate": 0.00019626989305344107, "loss": 1.331, "step": 8820 }, { "epoch": 0.47, "learning_rate": 0.00019625915275126268, "loss": 1.3507, "step": 8840 }, { "epoch": 0.47, "eval_loss": 1.4000722169876099, "eval_runtime": 52.9443, "eval_samples_per_second": 56.663, "eval_steps_per_second": 1.775, "step": 8850 }, { "epoch": 0.47, "learning_rate": 0.00019624841244908427, "loss": 1.335, "step": 8860 }, { "epoch": 0.47, "learning_rate": 0.00019623767214690588, "loss": 1.3474, "step": 8880 }, { "epoch": 0.48, "learning_rate": 0.00019622693184472747, "loss": 1.3362, "step": 8900 }, { "epoch": 0.48, "eval_loss": 1.4004658460617065, "eval_runtime": 52.942, "eval_samples_per_second": 56.666, "eval_steps_per_second": 1.776, "step": 8900 }, { "epoch": 0.48, "learning_rate": 0.00019621619154254905, "loss": 1.3395, "step": 8920 }, { "epoch": 0.48, "learning_rate": 0.00019620545124037067, "loss": 1.3722, "step": 8940 }, { "epoch": 0.48, "eval_loss": 1.4000860452651978, "eval_runtime": 52.927, "eval_samples_per_second": 56.682, "eval_steps_per_second": 1.776, "step": 8950 }, { "epoch": 0.48, "learning_rate": 0.00019619471093819225, "loss": 1.3352, "step": 8960 }, { "epoch": 0.48, "learning_rate": 0.00019618397063601387, "loss": 1.3897, "step": 8980 }, { "epoch": 0.48, "learning_rate": 0.00019617323033383545, "loss": 1.3678, "step": 9000 }, { "epoch": 0.48, "eval_loss": 1.3992573022842407, "eval_runtime": 52.8848, "eval_samples_per_second": 56.727, "eval_steps_per_second": 1.777, "step": 9000 }, { "epoch": 0.48, "learning_rate": 0.00019616249003165704, "loss": 1.326, "step": 9020 }, { "epoch": 0.48, "learning_rate": 0.00019615174972947865, "loss": 1.3879, "step": 9040 }, { "epoch": 0.48, "eval_loss": 1.4001773595809937, "eval_runtime": 52.9471, "eval_samples_per_second": 56.66, "eval_steps_per_second": 1.775, "step": 9050 }, { "epoch": 0.48, "learning_rate": 0.00019614100942730024, "loss": 1.3758, "step": 9060 }, { "epoch": 0.49, "learning_rate": 0.00019613026912512185, "loss": 1.3969, "step": 9080 }, { "epoch": 0.49, "learning_rate": 0.00019611952882294344, "loss": 1.3307, "step": 9100 }, { "epoch": 0.49, "eval_loss": 1.4005745649337769, "eval_runtime": 52.8911, "eval_samples_per_second": 56.72, "eval_steps_per_second": 1.777, "step": 9100 }, { "epoch": 0.49, "learning_rate": 0.00019610878852076505, "loss": 1.3549, "step": 9120 }, { "epoch": 0.49, "learning_rate": 0.00019609804821858664, "loss": 1.3189, "step": 9140 }, { "epoch": 0.49, "eval_loss": 1.4000535011291504, "eval_runtime": 52.9048, "eval_samples_per_second": 56.706, "eval_steps_per_second": 1.777, "step": 9150 }, { "epoch": 0.49, "learning_rate": 0.00019608730791640823, "loss": 1.4094, "step": 9160 }, { "epoch": 0.49, "learning_rate": 0.00019607656761422984, "loss": 1.3461, "step": 9180 }, { "epoch": 0.49, "learning_rate": 0.00019606582731205143, "loss": 1.3535, "step": 9200 }, { "epoch": 0.49, "eval_loss": 1.399420142173767, "eval_runtime": 52.9196, "eval_samples_per_second": 56.69, "eval_steps_per_second": 1.776, "step": 9200 }, { "epoch": 0.49, "learning_rate": 0.00019605508700987304, "loss": 1.3322, "step": 9220 }, { "epoch": 0.49, "learning_rate": 0.00019604434670769463, "loss": 1.3483, "step": 9240 }, { "epoch": 0.49, "eval_loss": 1.3995360136032104, "eval_runtime": 53.024, "eval_samples_per_second": 56.578, "eval_steps_per_second": 1.773, "step": 9250 }, { "epoch": 0.49, "learning_rate": 0.0001960336064055162, "loss": 1.3472, "step": 9260 }, { "epoch": 0.5, "learning_rate": 0.00019602286610333783, "loss": 1.3597, "step": 9280 }, { "epoch": 0.5, "learning_rate": 0.0001960121258011594, "loss": 1.3716, "step": 9300 }, { "epoch": 0.5, "eval_loss": 1.3998219966888428, "eval_runtime": 52.9143, "eval_samples_per_second": 56.695, "eval_steps_per_second": 1.776, "step": 9300 }, { "epoch": 0.5, "learning_rate": 0.00019600138549898103, "loss": 1.3394, "step": 9320 }, { "epoch": 0.5, "learning_rate": 0.0001959906451968026, "loss": 1.3498, "step": 9340 }, { "epoch": 0.5, "eval_loss": 1.3993160724639893, "eval_runtime": 53.0146, "eval_samples_per_second": 56.588, "eval_steps_per_second": 1.773, "step": 9350 }, { "epoch": 0.5, "learning_rate": 0.0001959799048946242, "loss": 1.3344, "step": 9360 }, { "epoch": 0.5, "learning_rate": 0.0001959691645924458, "loss": 1.352, "step": 9380 }, { "epoch": 0.5, "learning_rate": 0.0001959584242902674, "loss": 1.3464, "step": 9400 }, { "epoch": 0.5, "eval_loss": 1.400106430053711, "eval_runtime": 52.8683, "eval_samples_per_second": 56.745, "eval_steps_per_second": 1.778, "step": 9400 }, { "epoch": 0.5, "learning_rate": 0.000195947683988089, "loss": 1.3915, "step": 9420 }, { "epoch": 0.5, "learning_rate": 0.00019593694368591063, "loss": 1.3662, "step": 9440 }, { "epoch": 0.5, "eval_loss": 1.3995168209075928, "eval_runtime": 53.0041, "eval_samples_per_second": 56.599, "eval_steps_per_second": 1.773, "step": 9450 }, { "epoch": 0.51, "learning_rate": 0.0001959262033837322, "loss": 1.3307, "step": 9460 }, { "epoch": 0.51, "learning_rate": 0.00019591546308155383, "loss": 1.379, "step": 9480 }, { "epoch": 0.51, "learning_rate": 0.0001959047227793754, "loss": 1.3326, "step": 9500 }, { "epoch": 0.51, "eval_loss": 1.3995767831802368, "eval_runtime": 52.9586, "eval_samples_per_second": 56.648, "eval_steps_per_second": 1.775, "step": 9500 }, { "epoch": 0.51, "learning_rate": 0.00019589451949230592, "loss": 1.3196, "step": 9520 }, { "epoch": 0.51, "learning_rate": 0.00019588377919012754, "loss": 1.4136, "step": 9540 }, { "epoch": 0.51, "eval_loss": 1.399158000946045, "eval_runtime": 52.9987, "eval_samples_per_second": 56.605, "eval_steps_per_second": 1.774, "step": 9550 }, { "epoch": 0.51, "learning_rate": 0.00019587303888794912, "loss": 1.3556, "step": 9560 }, { "epoch": 0.51, "learning_rate": 0.0001958622985857707, "loss": 1.3996, "step": 9580 }, { "epoch": 0.51, "learning_rate": 0.00019585155828359232, "loss": 1.3511, "step": 9600 }, { "epoch": 0.51, "eval_loss": 1.398504376411438, "eval_runtime": 52.9812, "eval_samples_per_second": 56.624, "eval_steps_per_second": 1.774, "step": 9600 }, { "epoch": 0.51, "learning_rate": 0.0001958408179814139, "loss": 1.3574, "step": 9620 }, { "epoch": 0.52, "learning_rate": 0.00019583007767923552, "loss": 1.3567, "step": 9640 }, { "epoch": 0.52, "eval_loss": 1.3994630575180054, "eval_runtime": 52.8913, "eval_samples_per_second": 56.72, "eval_steps_per_second": 1.777, "step": 9650 }, { "epoch": 0.52, "learning_rate": 0.0001958193373770571, "loss": 1.412, "step": 9660 }, { "epoch": 0.52, "learning_rate": 0.0001958085970748787, "loss": 1.3513, "step": 9680 }, { "epoch": 0.52, "learning_rate": 0.0001957978567727003, "loss": 1.3857, "step": 9700 }, { "epoch": 0.52, "eval_loss": 1.3997857570648193, "eval_runtime": 52.869, "eval_samples_per_second": 56.744, "eval_steps_per_second": 1.778, "step": 9700 }, { "epoch": 0.52, "learning_rate": 0.0001957871164705219, "loss": 1.3393, "step": 9720 }, { "epoch": 0.52, "learning_rate": 0.0001957763761683435, "loss": 1.3286, "step": 9740 }, { "epoch": 0.52, "eval_loss": 1.3988032341003418, "eval_runtime": 52.9633, "eval_samples_per_second": 56.643, "eval_steps_per_second": 1.775, "step": 9750 }, { "epoch": 0.52, "learning_rate": 0.0001957656358661651, "loss": 1.3608, "step": 9760 }, { "epoch": 0.52, "learning_rate": 0.0001957548955639867, "loss": 1.3472, "step": 9780 }, { "epoch": 0.52, "learning_rate": 0.0001957441552618083, "loss": 1.3982, "step": 9800 }, { "epoch": 0.52, "eval_loss": 1.3989051580429077, "eval_runtime": 52.9858, "eval_samples_per_second": 56.619, "eval_steps_per_second": 1.774, "step": 9800 }, { "epoch": 0.52, "learning_rate": 0.00019573341495962988, "loss": 1.3708, "step": 9820 }, { "epoch": 0.53, "learning_rate": 0.0001957226746574515, "loss": 1.3517, "step": 9840 }, { "epoch": 0.53, "eval_loss": 1.4000502824783325, "eval_runtime": 52.9101, "eval_samples_per_second": 56.7, "eval_steps_per_second": 1.777, "step": 9850 }, { "epoch": 0.53, "learning_rate": 0.00019571193435527308, "loss": 1.3621, "step": 9860 }, { "epoch": 0.53, "learning_rate": 0.0001957011940530947, "loss": 1.3233, "step": 9880 }, { "epoch": 0.53, "learning_rate": 0.00019569045375091628, "loss": 1.3952, "step": 9900 }, { "epoch": 0.53, "eval_loss": 1.3988715410232544, "eval_runtime": 52.8297, "eval_samples_per_second": 56.786, "eval_steps_per_second": 1.779, "step": 9900 }, { "epoch": 0.53, "learning_rate": 0.0001956797134487379, "loss": 1.3501, "step": 9920 }, { "epoch": 0.53, "learning_rate": 0.0001956689731465595, "loss": 1.3183, "step": 9940 }, { "epoch": 0.53, "eval_loss": 1.397971510887146, "eval_runtime": 52.9608, "eval_samples_per_second": 56.646, "eval_steps_per_second": 1.775, "step": 9950 }, { "epoch": 0.53, "learning_rate": 0.0001956582328443811, "loss": 1.3749, "step": 9960 }, { "epoch": 0.53, "learning_rate": 0.00019564749254220268, "loss": 1.3466, "step": 9980 }, { "epoch": 0.53, "learning_rate": 0.0001956367522400243, "loss": 1.3783, "step": 10000 }, { "epoch": 0.53, "eval_loss": 1.3991659879684448, "eval_runtime": 52.9105, "eval_samples_per_second": 56.7, "eval_steps_per_second": 1.777, "step": 10000 }, { "epoch": 0.54, "learning_rate": 0.00019562601193784588, "loss": 1.367, "step": 10020 }, { "epoch": 0.54, "learning_rate": 0.0001956152716356675, "loss": 1.2971, "step": 10040 }, { "epoch": 0.54, "eval_loss": 1.3988336324691772, "eval_runtime": 52.9518, "eval_samples_per_second": 56.655, "eval_steps_per_second": 1.775, "step": 10050 }, { "epoch": 0.54, "learning_rate": 0.00019560453133348908, "loss": 1.356, "step": 10060 }, { "epoch": 0.54, "learning_rate": 0.00019559379103131067, "loss": 1.3696, "step": 10080 }, { "epoch": 0.54, "learning_rate": 0.00019558305072913228, "loss": 1.3909, "step": 10100 }, { "epoch": 0.54, "eval_loss": 1.3984498977661133, "eval_runtime": 52.8606, "eval_samples_per_second": 56.753, "eval_steps_per_second": 1.778, "step": 10100 }, { "epoch": 0.54, "learning_rate": 0.00019557231042695387, "loss": 1.3355, "step": 10120 }, { "epoch": 0.54, "learning_rate": 0.00019556157012477548, "loss": 1.3134, "step": 10140 }, { "epoch": 0.54, "eval_loss": 1.3991281986236572, "eval_runtime": 52.9144, "eval_samples_per_second": 56.695, "eval_steps_per_second": 1.776, "step": 10150 }, { "epoch": 0.54, "learning_rate": 0.00019555082982259707, "loss": 1.3037, "step": 10160 }, { "epoch": 0.54, "learning_rate": 0.00019554008952041868, "loss": 1.3041, "step": 10180 }, { "epoch": 0.55, "learning_rate": 0.00019552934921824027, "loss": 1.3363, "step": 10200 }, { "epoch": 0.55, "eval_loss": 1.3987741470336914, "eval_runtime": 52.9639, "eval_samples_per_second": 56.642, "eval_steps_per_second": 1.775, "step": 10200 }, { "epoch": 0.55, "learning_rate": 0.00019551860891606185, "loss": 1.3846, "step": 10220 }, { "epoch": 0.55, "learning_rate": 0.00019550786861388347, "loss": 1.3545, "step": 10240 }, { "epoch": 0.55, "eval_loss": 1.3983027935028076, "eval_runtime": 53.0334, "eval_samples_per_second": 56.568, "eval_steps_per_second": 1.772, "step": 10250 }, { "epoch": 0.55, "learning_rate": 0.00019549712831170505, "loss": 1.3237, "step": 10260 }, { "epoch": 0.55, "learning_rate": 0.00019548638800952667, "loss": 1.3866, "step": 10280 }, { "epoch": 0.55, "learning_rate": 0.00019547564770734825, "loss": 1.3839, "step": 10300 }, { "epoch": 0.55, "eval_loss": 1.3988779783248901, "eval_runtime": 52.9716, "eval_samples_per_second": 56.634, "eval_steps_per_second": 1.775, "step": 10300 }, { "epoch": 0.55, "learning_rate": 0.00019546490740516984, "loss": 1.3103, "step": 10320 }, { "epoch": 0.55, "learning_rate": 0.00019545416710299145, "loss": 1.3675, "step": 10340 }, { "epoch": 0.55, "eval_loss": 1.3987020254135132, "eval_runtime": 52.9442, "eval_samples_per_second": 56.663, "eval_steps_per_second": 1.775, "step": 10350 }, { "epoch": 0.55, "learning_rate": 0.00019544342680081304, "loss": 1.3107, "step": 10360 }, { "epoch": 0.55, "learning_rate": 0.00019543268649863465, "loss": 1.3848, "step": 10380 }, { "epoch": 0.56, "learning_rate": 0.00019542194619645624, "loss": 1.3692, "step": 10400 }, { "epoch": 0.56, "eval_loss": 1.3982036113739014, "eval_runtime": 52.9114, "eval_samples_per_second": 56.699, "eval_steps_per_second": 1.777, "step": 10400 }, { "epoch": 0.56, "learning_rate": 0.00019541120589427783, "loss": 1.3652, "step": 10420 }, { "epoch": 0.56, "learning_rate": 0.00019540046559209944, "loss": 1.3322, "step": 10440 }, { "epoch": 0.56, "eval_loss": 1.398821234703064, "eval_runtime": 52.9263, "eval_samples_per_second": 56.683, "eval_steps_per_second": 1.776, "step": 10450 }, { "epoch": 0.56, "learning_rate": 0.00019538972528992103, "loss": 1.377, "step": 10460 }, { "epoch": 0.56, "learning_rate": 0.00019537898498774264, "loss": 1.3895, "step": 10480 }, { "epoch": 0.56, "learning_rate": 0.00019536824468556423, "loss": 1.3505, "step": 10500 }, { "epoch": 0.56, "eval_loss": 1.398823857307434, "eval_runtime": 52.9032, "eval_samples_per_second": 56.707, "eval_steps_per_second": 1.777, "step": 10500 }, { "epoch": 0.56, "learning_rate": 0.0001953575043833858, "loss": 1.3608, "step": 10520 }, { "epoch": 0.56, "learning_rate": 0.00019534676408120745, "loss": 1.391, "step": 10540 }, { "epoch": 0.56, "eval_loss": 1.3982605934143066, "eval_runtime": 52.9352, "eval_samples_per_second": 56.673, "eval_steps_per_second": 1.776, "step": 10550 }, { "epoch": 0.56, "learning_rate": 0.00019533602377902904, "loss": 1.3712, "step": 10560 }, { "epoch": 0.57, "learning_rate": 0.00019532528347685063, "loss": 1.3494, "step": 10580 }, { "epoch": 0.57, "learning_rate": 0.00019531454317467224, "loss": 1.3404, "step": 10600 }, { "epoch": 0.57, "eval_loss": 1.3984962701797485, "eval_runtime": 53.1915, "eval_samples_per_second": 56.4, "eval_steps_per_second": 1.767, "step": 10600 }, { "epoch": 0.57, "learning_rate": 0.00019530380287249383, "loss": 1.3554, "step": 10620 }, { "epoch": 0.57, "learning_rate": 0.00019529306257031544, "loss": 1.3764, "step": 10640 }, { "epoch": 0.57, "eval_loss": 1.398600697517395, "eval_runtime": 53.4487, "eval_samples_per_second": 56.129, "eval_steps_per_second": 1.759, "step": 10650 }, { "epoch": 0.57, "learning_rate": 0.00019528232226813703, "loss": 1.3314, "step": 10660 }, { "epoch": 0.57, "learning_rate": 0.00019527158196595864, "loss": 1.3736, "step": 10680 }, { "epoch": 0.57, "learning_rate": 0.00019526084166378023, "loss": 1.3859, "step": 10700 }, { "epoch": 0.57, "eval_loss": 1.3986971378326416, "eval_runtime": 53.387, "eval_samples_per_second": 56.193, "eval_steps_per_second": 1.761, "step": 10700 }, { "epoch": 0.57, "learning_rate": 0.0001952501013616018, "loss": 1.3524, "step": 10720 }, { "epoch": 0.57, "learning_rate": 0.00019523936105942343, "loss": 1.3373, "step": 10740 }, { "epoch": 0.57, "eval_loss": 1.3985203504562378, "eval_runtime": 53.4258, "eval_samples_per_second": 56.153, "eval_steps_per_second": 1.759, "step": 10750 }, { "epoch": 0.57, "learning_rate": 0.000195228620757245, "loss": 1.3828, "step": 10760 }, { "epoch": 0.58, "learning_rate": 0.00019521788045506663, "loss": 1.2932, "step": 10780 }, { "epoch": 0.58, "learning_rate": 0.0001952071401528882, "loss": 1.3847, "step": 10800 }, { "epoch": 0.58, "eval_loss": 1.399185061454773, "eval_runtime": 53.4654, "eval_samples_per_second": 56.111, "eval_steps_per_second": 1.758, "step": 10800 }, { "epoch": 0.58, "learning_rate": 0.0001951963998507098, "loss": 1.3739, "step": 10820 }, { "epoch": 0.58, "learning_rate": 0.0001951856595485314, "loss": 1.3553, "step": 10840 }, { "epoch": 0.58, "eval_loss": 1.3980084657669067, "eval_runtime": 53.45, "eval_samples_per_second": 56.127, "eval_steps_per_second": 1.759, "step": 10850 }, { "epoch": 0.58, "learning_rate": 0.000195174919246353, "loss": 1.4051, "step": 10860 }, { "epoch": 0.58, "learning_rate": 0.0001951641789441746, "loss": 1.3456, "step": 10880 }, { "epoch": 0.58, "learning_rate": 0.0001951534386419962, "loss": 1.3756, "step": 10900 }, { "epoch": 0.58, "eval_loss": 1.3982112407684326, "eval_runtime": 49.9421, "eval_samples_per_second": 60.07, "eval_steps_per_second": 1.882, "step": 10900 }, { "epoch": 0.58, "learning_rate": 0.00019514269833981779, "loss": 1.3521, "step": 10920 }, { "epoch": 0.58, "learning_rate": 0.0001951319580376394, "loss": 1.3745, "step": 10940 }, { "epoch": 0.59, "eval_loss": 1.3991111516952515, "eval_runtime": 49.9838, "eval_samples_per_second": 60.019, "eval_steps_per_second": 1.881, "step": 10950 }, { "epoch": 0.59, "learning_rate": 0.00019512121773546099, "loss": 1.3661, "step": 10960 }, { "epoch": 0.59, "learning_rate": 0.0001951104774332826, "loss": 1.3637, "step": 10980 }, { "epoch": 0.59, "learning_rate": 0.00019509973713110419, "loss": 1.3263, "step": 11000 }, { "epoch": 0.59, "eval_loss": 1.398601770401001, "eval_runtime": 49.954, "eval_samples_per_second": 60.055, "eval_steps_per_second": 1.882, "step": 11000 }, { "epoch": 0.59, "learning_rate": 0.00019508899682892577, "loss": 1.3062, "step": 11020 }, { "epoch": 0.59, "learning_rate": 0.00019507825652674739, "loss": 1.3634, "step": 11040 }, { "epoch": 0.59, "eval_loss": 1.3987818956375122, "eval_runtime": 49.977, "eval_samples_per_second": 60.028, "eval_steps_per_second": 1.881, "step": 11050 }, { "epoch": 0.59, "learning_rate": 0.00019506751622456897, "loss": 1.3272, "step": 11060 }, { "epoch": 0.59, "learning_rate": 0.00019505677592239059, "loss": 1.369, "step": 11080 }, { "epoch": 0.59, "learning_rate": 0.00019504603562021217, "loss": 1.3177, "step": 11100 }, { "epoch": 0.59, "eval_loss": 1.3984121084213257, "eval_runtime": 49.9403, "eval_samples_per_second": 60.072, "eval_steps_per_second": 1.882, "step": 11100 }, { "epoch": 0.59, "learning_rate": 0.00019503529531803379, "loss": 1.3869, "step": 11120 }, { "epoch": 0.6, "learning_rate": 0.00019502455501585537, "loss": 1.3766, "step": 11140 }, { "epoch": 0.6, "eval_loss": 1.39817476272583, "eval_runtime": 49.9679, "eval_samples_per_second": 60.038, "eval_steps_per_second": 1.881, "step": 11150 }, { "epoch": 0.6, "learning_rate": 0.00019501381471367696, "loss": 1.3801, "step": 11160 }, { "epoch": 0.6, "learning_rate": 0.0001950030744114986, "loss": 1.3549, "step": 11180 }, { "epoch": 0.6, "learning_rate": 0.00019499233410932019, "loss": 1.37, "step": 11200 }, { "epoch": 0.6, "eval_loss": 1.3982213735580444, "eval_runtime": 49.9229, "eval_samples_per_second": 60.093, "eval_steps_per_second": 1.883, "step": 11200 }, { "epoch": 0.6, "learning_rate": 0.00019498159380714177, "loss": 1.3597, "step": 11220 }, { "epoch": 0.6, "learning_rate": 0.00019497085350496339, "loss": 1.3215, "step": 11240 }, { "epoch": 0.6, "eval_loss": 1.3987040519714355, "eval_runtime": 49.9503, "eval_samples_per_second": 60.06, "eval_steps_per_second": 1.882, "step": 11250 }, { "epoch": 0.6, "learning_rate": 0.00019496011320278497, "loss": 1.3655, "step": 11260 }, { "epoch": 0.6, "learning_rate": 0.00019494937290060659, "loss": 1.3059, "step": 11280 }, { "epoch": 0.6, "learning_rate": 0.00019493863259842817, "loss": 1.3689, "step": 11300 }, { "epoch": 0.6, "eval_loss": 1.397697925567627, "eval_runtime": 49.916, "eval_samples_per_second": 60.101, "eval_steps_per_second": 1.883, "step": 11300 }, { "epoch": 0.6, "learning_rate": 0.00019492789229624976, "loss": 1.3428, "step": 11320 }, { "epoch": 0.61, "learning_rate": 0.00019491715199407137, "loss": 1.3463, "step": 11340 }, { "epoch": 0.61, "eval_loss": 1.3985458612442017, "eval_runtime": 49.9432, "eval_samples_per_second": 60.068, "eval_steps_per_second": 1.882, "step": 11350 }, { "epoch": 0.61, "learning_rate": 0.00019490641169189296, "loss": 1.389, "step": 11360 }, { "epoch": 0.61, "learning_rate": 0.00019489567138971457, "loss": 1.3329, "step": 11380 }, { "epoch": 0.61, "learning_rate": 0.00019488493108753616, "loss": 1.3253, "step": 11400 }, { "epoch": 0.61, "eval_loss": 1.3987817764282227, "eval_runtime": 49.8579, "eval_samples_per_second": 60.171, "eval_steps_per_second": 1.885, "step": 11400 }, { "epoch": 0.61, "learning_rate": 0.00019487419078535775, "loss": 1.347, "step": 11420 }, { "epoch": 0.61, "learning_rate": 0.00019486345048317936, "loss": 1.3579, "step": 11440 }, { "epoch": 0.61, "eval_loss": 1.3982493877410889, "eval_runtime": 49.9401, "eval_samples_per_second": 60.072, "eval_steps_per_second": 1.882, "step": 11450 }, { "epoch": 0.61, "learning_rate": 0.00019485271018100095, "loss": 1.3539, "step": 11460 }, { "epoch": 0.61, "learning_rate": 0.00019484196987882256, "loss": 1.3812, "step": 11480 }, { "epoch": 0.61, "learning_rate": 0.00019483122957664415, "loss": 1.3314, "step": 11500 }, { "epoch": 0.61, "eval_loss": 1.3974528312683105, "eval_runtime": 49.8944, "eval_samples_per_second": 60.127, "eval_steps_per_second": 1.884, "step": 11500 }, { "epoch": 0.62, "learning_rate": 0.00019482048927446573, "loss": 1.3545, "step": 11520 }, { "epoch": 0.62, "learning_rate": 0.00019480974897228735, "loss": 1.3466, "step": 11540 }, { "epoch": 0.62, "eval_loss": 1.3979511260986328, "eval_runtime": 49.9421, "eval_samples_per_second": 60.07, "eval_steps_per_second": 1.882, "step": 11550 }, { "epoch": 0.62, "learning_rate": 0.00019479900867010893, "loss": 1.3226, "step": 11560 }, { "epoch": 0.62, "learning_rate": 0.00019478826836793055, "loss": 1.3634, "step": 11580 }, { "epoch": 0.62, "learning_rate": 0.00019477752806575213, "loss": 1.3406, "step": 11600 }, { "epoch": 0.62, "eval_loss": 1.3981132507324219, "eval_runtime": 49.9056, "eval_samples_per_second": 60.113, "eval_steps_per_second": 1.884, "step": 11600 }, { "epoch": 0.62, "learning_rate": 0.00019476678776357375, "loss": 1.3684, "step": 11620 }, { "epoch": 0.62, "learning_rate": 0.00019475604746139533, "loss": 1.3618, "step": 11640 }, { "epoch": 0.62, "eval_loss": 1.3980711698532104, "eval_runtime": 49.9349, "eval_samples_per_second": 60.078, "eval_steps_per_second": 1.882, "step": 11650 }, { "epoch": 0.62, "learning_rate": 0.00019474530715921692, "loss": 1.365, "step": 11660 }, { "epoch": 0.62, "learning_rate": 0.00019473456685703853, "loss": 1.3526, "step": 11680 }, { "epoch": 0.63, "learning_rate": 0.00019472382655486012, "loss": 1.3697, "step": 11700 }, { "epoch": 0.63, "eval_loss": 1.3980944156646729, "eval_runtime": 49.9354, "eval_samples_per_second": 60.078, "eval_steps_per_second": 1.882, "step": 11700 }, { "epoch": 0.63, "learning_rate": 0.00019471308625268173, "loss": 1.3351, "step": 11720 }, { "epoch": 0.63, "learning_rate": 0.00019470234595050332, "loss": 1.3683, "step": 11740 }, { "epoch": 0.63, "eval_loss": 1.3976329565048218, "eval_runtime": 49.9772, "eval_samples_per_second": 60.027, "eval_steps_per_second": 1.881, "step": 11750 }, { "epoch": 0.63, "learning_rate": 0.0001946916056483249, "loss": 1.3428, "step": 11760 }, { "epoch": 0.63, "learning_rate": 0.00019468086534614652, "loss": 1.3529, "step": 11780 }, { "epoch": 0.63, "learning_rate": 0.00019467066205907705, "loss": 1.339, "step": 11800 }, { "epoch": 0.63, "eval_loss": 1.3983910083770752, "eval_runtime": 49.9014, "eval_samples_per_second": 60.119, "eval_steps_per_second": 1.884, "step": 11800 }, { "epoch": 0.63, "learning_rate": 0.00019465992175689864, "loss": 1.3442, "step": 11820 }, { "epoch": 0.63, "learning_rate": 0.00019464918145472025, "loss": 1.3504, "step": 11840 }, { "epoch": 0.63, "eval_loss": 1.3987594842910767, "eval_runtime": 49.9806, "eval_samples_per_second": 60.023, "eval_steps_per_second": 1.881, "step": 11850 }, { "epoch": 0.63, "learning_rate": 0.00019463844115254184, "loss": 1.34, "step": 11860 }, { "epoch": 0.63, "learning_rate": 0.00019462770085036343, "loss": 1.3778, "step": 11880 }, { "epoch": 0.64, "learning_rate": 0.00019461696054818504, "loss": 1.3813, "step": 11900 }, { "epoch": 0.64, "eval_loss": 1.3981685638427734, "eval_runtime": 49.9063, "eval_samples_per_second": 60.113, "eval_steps_per_second": 1.884, "step": 11900 }, { "epoch": 0.64, "learning_rate": 0.00019460622024600663, "loss": 1.3647, "step": 11920 }, { "epoch": 0.64, "learning_rate": 0.00019459547994382824, "loss": 1.3227, "step": 11940 }, { "epoch": 0.64, "eval_loss": 1.398089051246643, "eval_runtime": 49.9227, "eval_samples_per_second": 60.093, "eval_steps_per_second": 1.883, "step": 11950 }, { "epoch": 0.64, "learning_rate": 0.00019458473964164983, "loss": 1.3295, "step": 11960 }, { "epoch": 0.64, "learning_rate": 0.0001945739993394714, "loss": 1.3703, "step": 11980 }, { "epoch": 0.64, "learning_rate": 0.00019456325903729303, "loss": 1.3861, "step": 12000 }, { "epoch": 0.64, "eval_loss": 1.3978980779647827, "eval_runtime": 49.9428, "eval_samples_per_second": 60.069, "eval_steps_per_second": 1.882, "step": 12000 }, { "epoch": 0.64, "learning_rate": 0.0001945525187351146, "loss": 1.3193, "step": 12020 }, { "epoch": 0.64, "learning_rate": 0.00019454177843293623, "loss": 1.3136, "step": 12040 }, { "epoch": 0.64, "eval_loss": 1.3983912467956543, "eval_runtime": 49.9338, "eval_samples_per_second": 60.079, "eval_steps_per_second": 1.882, "step": 12050 }, { "epoch": 0.64, "learning_rate": 0.0001945310381307578, "loss": 1.3612, "step": 12060 }, { "epoch": 0.65, "learning_rate": 0.0001945202978285794, "loss": 1.3684, "step": 12080 }, { "epoch": 0.65, "learning_rate": 0.000194509557526401, "loss": 1.3534, "step": 12100 }, { "epoch": 0.65, "eval_loss": 1.3977357149124146, "eval_runtime": 49.9318, "eval_samples_per_second": 60.082, "eval_steps_per_second": 1.883, "step": 12100 }, { "epoch": 0.65, "learning_rate": 0.0001944988172242226, "loss": 1.3366, "step": 12120 }, { "epoch": 0.65, "learning_rate": 0.0001944880769220442, "loss": 1.3793, "step": 12140 }, { "epoch": 0.65, "eval_loss": 1.3974858522415161, "eval_runtime": 49.9338, "eval_samples_per_second": 60.08, "eval_steps_per_second": 1.882, "step": 12150 }, { "epoch": 0.65, "learning_rate": 0.0001944773366198658, "loss": 1.3434, "step": 12160 }, { "epoch": 0.65, "learning_rate": 0.00019446659631768739, "loss": 1.3663, "step": 12180 }, { "epoch": 0.65, "learning_rate": 0.000194455856015509, "loss": 1.3569, "step": 12200 }, { "epoch": 0.65, "eval_loss": 1.3975430727005005, "eval_runtime": 49.935, "eval_samples_per_second": 60.078, "eval_steps_per_second": 1.882, "step": 12200 }, { "epoch": 0.65, "learning_rate": 0.00019444511571333059, "loss": 1.3211, "step": 12220 }, { "epoch": 0.65, "learning_rate": 0.0001944343754111522, "loss": 1.3385, "step": 12240 }, { "epoch": 0.65, "eval_loss": 1.397087574005127, "eval_runtime": 49.9229, "eval_samples_per_second": 60.093, "eval_steps_per_second": 1.883, "step": 12250 }, { "epoch": 0.66, "learning_rate": 0.00019442363510897379, "loss": 1.3647, "step": 12260 }, { "epoch": 0.66, "learning_rate": 0.0001944128948067954, "loss": 1.3593, "step": 12280 }, { "epoch": 0.66, "learning_rate": 0.000194402154504617, "loss": 1.315, "step": 12300 }, { "epoch": 0.66, "eval_loss": 1.3981412649154663, "eval_runtime": 49.8967, "eval_samples_per_second": 60.124, "eval_steps_per_second": 1.884, "step": 12300 }, { "epoch": 0.66, "learning_rate": 0.0001943914142024386, "loss": 1.3474, "step": 12320 }, { "epoch": 0.66, "learning_rate": 0.0001943806739002602, "loss": 1.3415, "step": 12340 }, { "epoch": 0.66, "eval_loss": 1.3985828161239624, "eval_runtime": 49.9464, "eval_samples_per_second": 60.064, "eval_steps_per_second": 1.882, "step": 12350 }, { "epoch": 0.66, "learning_rate": 0.0001943699335980818, "loss": 1.3482, "step": 12360 }, { "epoch": 0.66, "learning_rate": 0.00019435919329590339, "loss": 1.3501, "step": 12380 }, { "epoch": 0.66, "learning_rate": 0.000194348452993725, "loss": 1.3659, "step": 12400 }, { "epoch": 0.66, "eval_loss": 1.3977829217910767, "eval_runtime": 49.9024, "eval_samples_per_second": 60.117, "eval_steps_per_second": 1.884, "step": 12400 }, { "epoch": 0.66, "learning_rate": 0.00019433771269154659, "loss": 1.3447, "step": 12420 }, { "epoch": 0.66, "learning_rate": 0.0001943269723893682, "loss": 1.3557, "step": 12440 }, { "epoch": 0.67, "eval_loss": 1.397894263267517, "eval_runtime": 49.9359, "eval_samples_per_second": 60.077, "eval_steps_per_second": 1.882, "step": 12450 }, { "epoch": 0.67, "learning_rate": 0.00019431623208718979, "loss": 1.3797, "step": 12460 }, { "epoch": 0.67, "learning_rate": 0.00019430549178501137, "loss": 1.3659, "step": 12480 }, { "epoch": 0.67, "learning_rate": 0.00019429475148283299, "loss": 1.3196, "step": 12500 }, { "epoch": 0.67, "eval_loss": 1.3980075120925903, "eval_runtime": 49.9211, "eval_samples_per_second": 60.095, "eval_steps_per_second": 1.883, "step": 12500 }, { "epoch": 0.67, "learning_rate": 0.00019428401118065457, "loss": 1.3175, "step": 12520 }, { "epoch": 0.67, "learning_rate": 0.00019427327087847619, "loss": 1.3826, "step": 12540 }, { "epoch": 0.67, "eval_loss": 1.3978780508041382, "eval_runtime": 49.9413, "eval_samples_per_second": 60.07, "eval_steps_per_second": 1.882, "step": 12550 }, { "epoch": 0.67, "learning_rate": 0.00019426253057629777, "loss": 1.3643, "step": 12560 }, { "epoch": 0.67, "learning_rate": 0.00019425179027411936, "loss": 1.35, "step": 12580 }, { "epoch": 0.67, "learning_rate": 0.00019424104997194097, "loss": 1.4053, "step": 12600 }, { "epoch": 0.67, "eval_loss": 1.398128867149353, "eval_runtime": 49.914, "eval_samples_per_second": 60.103, "eval_steps_per_second": 1.883, "step": 12600 }, { "epoch": 0.67, "learning_rate": 0.00019423030966976256, "loss": 1.3571, "step": 12620 }, { "epoch": 0.68, "learning_rate": 0.00019421956936758417, "loss": 1.3333, "step": 12640 }, { "epoch": 0.68, "eval_loss": 1.3984968662261963, "eval_runtime": 49.9223, "eval_samples_per_second": 60.093, "eval_steps_per_second": 1.883, "step": 12650 }, { "epoch": 0.68, "learning_rate": 0.00019420882906540576, "loss": 1.3146, "step": 12660 }, { "epoch": 0.68, "learning_rate": 0.00019419808876322737, "loss": 1.3332, "step": 12680 }, { "epoch": 0.68, "learning_rate": 0.00019418734846104896, "loss": 1.3395, "step": 12700 }, { "epoch": 0.68, "eval_loss": 1.3976988792419434, "eval_runtime": 49.8906, "eval_samples_per_second": 60.132, "eval_steps_per_second": 1.884, "step": 12700 }, { "epoch": 0.68, "learning_rate": 0.00019417714517397947, "loss": 1.3311, "step": 12720 }, { "epoch": 0.68, "learning_rate": 0.00019416640487180105, "loss": 1.3763, "step": 12740 }, { "epoch": 0.68, "eval_loss": 1.3981629610061646, "eval_runtime": 49.9166, "eval_samples_per_second": 60.1, "eval_steps_per_second": 1.883, "step": 12750 }, { "epoch": 0.68, "learning_rate": 0.0001941556645696227, "loss": 1.3319, "step": 12760 }, { "epoch": 0.68, "learning_rate": 0.00019414492426744428, "loss": 1.3113, "step": 12780 }, { "epoch": 0.68, "learning_rate": 0.0001941341839652659, "loss": 1.3508, "step": 12800 }, { "epoch": 0.68, "eval_loss": 1.3979384899139404, "eval_runtime": 49.8894, "eval_samples_per_second": 60.133, "eval_steps_per_second": 1.884, "step": 12800 }, { "epoch": 0.68, "learning_rate": 0.00019412344366308748, "loss": 1.3468, "step": 12820 }, { "epoch": 0.69, "learning_rate": 0.00019411270336090907, "loss": 1.3509, "step": 12840 }, { "epoch": 0.69, "eval_loss": 1.3976484537124634, "eval_runtime": 49.9245, "eval_samples_per_second": 60.091, "eval_steps_per_second": 1.883, "step": 12850 }, { "epoch": 0.69, "learning_rate": 0.00019410196305873068, "loss": 1.3474, "step": 12860 }, { "epoch": 0.69, "learning_rate": 0.00019409122275655227, "loss": 1.3796, "step": 12880 }, { "epoch": 0.69, "learning_rate": 0.00019408048245437388, "loss": 1.362, "step": 12900 }, { "epoch": 0.69, "eval_loss": 1.3983675241470337, "eval_runtime": 49.9089, "eval_samples_per_second": 60.109, "eval_steps_per_second": 1.883, "step": 12900 }, { "epoch": 0.69, "learning_rate": 0.00019406974215219547, "loss": 1.3926, "step": 12920 }, { "epoch": 0.69, "learning_rate": 0.00019405900185001705, "loss": 1.3812, "step": 12940 }, { "epoch": 0.69, "eval_loss": 1.3985722064971924, "eval_runtime": 49.9385, "eval_samples_per_second": 60.074, "eval_steps_per_second": 1.882, "step": 12950 }, { "epoch": 0.69, "learning_rate": 0.00019404826154783867, "loss": 1.3442, "step": 12960 }, { "epoch": 0.69, "learning_rate": 0.00019403752124566025, "loss": 1.3183, "step": 12980 }, { "epoch": 0.69, "learning_rate": 0.00019402678094348187, "loss": 1.3254, "step": 13000 }, { "epoch": 0.69, "eval_loss": 1.3993173837661743, "eval_runtime": 49.8843, "eval_samples_per_second": 60.139, "eval_steps_per_second": 1.884, "step": 13000 }, { "epoch": 0.7, "learning_rate": 0.00019401604064130345, "loss": 1.3607, "step": 13020 }, { "epoch": 0.7, "learning_rate": 0.00019400530033912504, "loss": 1.3324, "step": 13040 }, { "epoch": 0.7, "eval_loss": 1.398432970046997, "eval_runtime": 49.9241, "eval_samples_per_second": 60.091, "eval_steps_per_second": 1.883, "step": 13050 }, { "epoch": 0.7, "learning_rate": 0.00019399456003694665, "loss": 1.3898, "step": 13060 }, { "epoch": 0.7, "learning_rate": 0.00019398381973476824, "loss": 1.337, "step": 13080 }, { "epoch": 0.7, "learning_rate": 0.00019397307943258985, "loss": 1.3527, "step": 13100 }, { "epoch": 0.7, "eval_loss": 1.3988566398620605, "eval_runtime": 49.8968, "eval_samples_per_second": 60.124, "eval_steps_per_second": 1.884, "step": 13100 }, { "epoch": 0.7, "learning_rate": 0.00019396233913041144, "loss": 1.3584, "step": 13120 }, { "epoch": 0.7, "learning_rate": 0.00019395159882823303, "loss": 1.3111, "step": 13140 }, { "epoch": 0.7, "eval_loss": 1.3984296321868896, "eval_runtime": 49.9247, "eval_samples_per_second": 60.091, "eval_steps_per_second": 1.883, "step": 13150 }, { "epoch": 0.7, "learning_rate": 0.00019394085852605464, "loss": 1.3707, "step": 13160 }, { "epoch": 0.7, "learning_rate": 0.00019393011822387623, "loss": 1.3172, "step": 13180 }, { "epoch": 0.71, "learning_rate": 0.00019391937792169784, "loss": 1.3356, "step": 13200 }, { "epoch": 0.71, "eval_loss": 1.398664116859436, "eval_runtime": 49.933, "eval_samples_per_second": 60.081, "eval_steps_per_second": 1.883, "step": 13200 }, { "epoch": 0.71, "learning_rate": 0.00019390863761951943, "loss": 1.3222, "step": 13220 }, { "epoch": 0.71, "learning_rate": 0.000193897897317341, "loss": 1.3346, "step": 13240 }, { "epoch": 0.71, "eval_loss": 1.3981634378433228, "eval_runtime": 49.9997, "eval_samples_per_second": 60.0, "eval_steps_per_second": 1.88, "step": 13250 }, { "epoch": 0.71, "learning_rate": 0.00019388715701516263, "loss": 1.3066, "step": 13260 }, { "epoch": 0.71, "learning_rate": 0.0001938764167129842, "loss": 1.3333, "step": 13280 }, { "epoch": 0.71, "learning_rate": 0.00019386567641080583, "loss": 1.324, "step": 13300 }, { "epoch": 0.71, "eval_loss": 1.3983701467514038, "eval_runtime": 49.8793, "eval_samples_per_second": 60.145, "eval_steps_per_second": 1.885, "step": 13300 }, { "epoch": 0.71, "learning_rate": 0.0001938549361086274, "loss": 1.3482, "step": 13320 }, { "epoch": 0.71, "learning_rate": 0.00019384419580644903, "loss": 1.3449, "step": 13340 }, { "epoch": 0.71, "eval_loss": 1.3981118202209473, "eval_runtime": 49.9209, "eval_samples_per_second": 60.095, "eval_steps_per_second": 1.883, "step": 13350 }, { "epoch": 0.71, "learning_rate": 0.0001938334555042706, "loss": 1.3021, "step": 13360 }, { "epoch": 0.71, "learning_rate": 0.00019382271520209223, "loss": 1.3314, "step": 13380 }, { "epoch": 0.72, "learning_rate": 0.00019381197489991384, "loss": 1.3431, "step": 13400 }, { "epoch": 0.72, "eval_loss": 1.397736668586731, "eval_runtime": 49.9221, "eval_samples_per_second": 60.094, "eval_steps_per_second": 1.883, "step": 13400 }, { "epoch": 0.72, "learning_rate": 0.00019380123459773543, "loss": 1.3641, "step": 13420 }, { "epoch": 0.72, "learning_rate": 0.000193790494295557, "loss": 1.3978, "step": 13440 }, { "epoch": 0.72, "eval_loss": 1.3974454402923584, "eval_runtime": 49.9612, "eval_samples_per_second": 60.047, "eval_steps_per_second": 1.881, "step": 13450 }, { "epoch": 0.72, "learning_rate": 0.00019377975399337863, "loss": 1.3549, "step": 13460 }, { "epoch": 0.72, "learning_rate": 0.0001937690136912002, "loss": 1.3348, "step": 13480 }, { "epoch": 0.72, "learning_rate": 0.00019375827338902183, "loss": 1.3524, "step": 13500 }, { "epoch": 0.72, "eval_loss": 1.3968658447265625, "eval_runtime": 49.8947, "eval_samples_per_second": 60.127, "eval_steps_per_second": 1.884, "step": 13500 }, { "epoch": 0.72, "learning_rate": 0.0001937475330868434, "loss": 1.3652, "step": 13520 }, { "epoch": 0.72, "learning_rate": 0.000193736792784665, "loss": 1.313, "step": 13540 }, { "epoch": 0.72, "eval_loss": 1.3979847431182861, "eval_runtime": 49.9397, "eval_samples_per_second": 60.072, "eval_steps_per_second": 1.882, "step": 13550 }, { "epoch": 0.72, "learning_rate": 0.0001937260524824866, "loss": 1.3404, "step": 13560 }, { "epoch": 0.73, "learning_rate": 0.0001937153121803082, "loss": 1.356, "step": 13580 }, { "epoch": 0.73, "learning_rate": 0.0001937045718781298, "loss": 1.3845, "step": 13600 }, { "epoch": 0.73, "eval_loss": 1.398152470588684, "eval_runtime": 49.908, "eval_samples_per_second": 60.111, "eval_steps_per_second": 1.883, "step": 13600 }, { "epoch": 0.73, "learning_rate": 0.0001936938315759514, "loss": 1.3063, "step": 13620 }, { "epoch": 0.73, "learning_rate": 0.00019368309127377299, "loss": 1.3415, "step": 13640 }, { "epoch": 0.73, "eval_loss": 1.397993803024292, "eval_runtime": 49.8822, "eval_samples_per_second": 60.142, "eval_steps_per_second": 1.884, "step": 13650 }, { "epoch": 0.73, "learning_rate": 0.0001936723509715946, "loss": 1.3344, "step": 13660 }, { "epoch": 0.73, "learning_rate": 0.00019366161066941619, "loss": 1.3579, "step": 13680 }, { "epoch": 0.73, "learning_rate": 0.0001936508703672378, "loss": 1.3523, "step": 13700 }, { "epoch": 0.73, "eval_loss": 1.3971080780029297, "eval_runtime": 49.8677, "eval_samples_per_second": 60.159, "eval_steps_per_second": 1.885, "step": 13700 }, { "epoch": 0.73, "learning_rate": 0.00019364013006505939, "loss": 1.3381, "step": 13720 }, { "epoch": 0.73, "learning_rate": 0.000193629389762881, "loss": 1.3221, "step": 13740 }, { "epoch": 0.73, "eval_loss": 1.3974502086639404, "eval_runtime": 49.9344, "eval_samples_per_second": 60.079, "eval_steps_per_second": 1.882, "step": 13750 }, { "epoch": 0.74, "learning_rate": 0.00019361864946070259, "loss": 1.3752, "step": 13760 }, { "epoch": 0.74, "learning_rate": 0.00019360790915852417, "loss": 1.3802, "step": 13780 }, { "epoch": 0.74, "learning_rate": 0.00019359716885634579, "loss": 1.3549, "step": 13800 }, { "epoch": 0.74, "eval_loss": 1.397215485572815, "eval_runtime": 49.8832, "eval_samples_per_second": 60.141, "eval_steps_per_second": 1.884, "step": 13800 }, { "epoch": 0.74, "learning_rate": 0.00019358642855416737, "loss": 1.3276, "step": 13820 }, { "epoch": 0.74, "learning_rate": 0.00019357568825198899, "loss": 1.3705, "step": 13840 }, { "epoch": 0.74, "eval_loss": 1.3972536325454712, "eval_runtime": 49.9311, "eval_samples_per_second": 60.083, "eval_steps_per_second": 1.883, "step": 13850 }, { "epoch": 0.74, "learning_rate": 0.00019356494794981057, "loss": 1.3858, "step": 13860 }, { "epoch": 0.74, "learning_rate": 0.00019355420764763216, "loss": 1.3432, "step": 13880 }, { "epoch": 0.74, "learning_rate": 0.00019354346734545377, "loss": 1.3265, "step": 13900 }, { "epoch": 0.74, "eval_loss": 1.3972595930099487, "eval_runtime": 49.9209, "eval_samples_per_second": 60.095, "eval_steps_per_second": 1.883, "step": 13900 }, { "epoch": 0.74, "learning_rate": 0.00019353272704327536, "loss": 1.3146, "step": 13920 }, { "epoch": 0.74, "learning_rate": 0.00019352198674109697, "loss": 1.3571, "step": 13940 }, { "epoch": 0.75, "eval_loss": 1.3972257375717163, "eval_runtime": 49.9372, "eval_samples_per_second": 60.075, "eval_steps_per_second": 1.882, "step": 13950 }, { "epoch": 0.75, "learning_rate": 0.00019351124643891856, "loss": 1.3238, "step": 13960 }, { "epoch": 0.75, "learning_rate": 0.00019350050613674015, "loss": 1.3853, "step": 13980 }, { "epoch": 0.75, "learning_rate": 0.00019348976583456176, "loss": 1.3557, "step": 14000 }, { "epoch": 0.75, "eval_loss": 1.3965345621109009, "eval_runtime": 49.9404, "eval_samples_per_second": 60.072, "eval_steps_per_second": 1.882, "step": 14000 }, { "epoch": 0.75, "learning_rate": 0.00019347902553238337, "loss": 1.3851, "step": 14020 }, { "epoch": 0.75, "learning_rate": 0.00019346828523020496, "loss": 1.3795, "step": 14040 }, { "epoch": 0.75, "eval_loss": 1.396411418914795, "eval_runtime": 49.9389, "eval_samples_per_second": 60.073, "eval_steps_per_second": 1.882, "step": 14050 }, { "epoch": 0.75, "learning_rate": 0.00019345754492802657, "loss": 1.3692, "step": 14060 }, { "epoch": 0.75, "learning_rate": 0.00019344680462584816, "loss": 1.3435, "step": 14080 }, { "epoch": 0.75, "learning_rate": 0.00019343606432366977, "loss": 1.3379, "step": 14100 }, { "epoch": 0.75, "eval_loss": 1.396824836730957, "eval_runtime": 49.913, "eval_samples_per_second": 60.105, "eval_steps_per_second": 1.883, "step": 14100 }, { "epoch": 0.75, "learning_rate": 0.00019342532402149136, "loss": 1.3492, "step": 14120 }, { "epoch": 0.76, "learning_rate": 0.00019341458371931295, "loss": 1.3871, "step": 14140 }, { "epoch": 0.76, "eval_loss": 1.397158145904541, "eval_runtime": 49.9329, "eval_samples_per_second": 60.081, "eval_steps_per_second": 1.883, "step": 14150 }, { "epoch": 0.76, "learning_rate": 0.00019340384341713456, "loss": 1.3377, "step": 14160 }, { "epoch": 0.76, "learning_rate": 0.00019339310311495615, "loss": 1.3564, "step": 14180 }, { "epoch": 0.76, "learning_rate": 0.00019338236281277776, "loss": 1.3323, "step": 14200 }, { "epoch": 0.76, "eval_loss": 1.3961149454116821, "eval_runtime": 49.8787, "eval_samples_per_second": 60.146, "eval_steps_per_second": 1.885, "step": 14200 }, { "epoch": 0.76, "learning_rate": 0.00019337162251059935, "loss": 1.3267, "step": 14220 }, { "epoch": 0.76, "learning_rate": 0.00019336088220842096, "loss": 1.3704, "step": 14240 }, { "epoch": 0.76, "eval_loss": 1.3969779014587402, "eval_runtime": 49.9253, "eval_samples_per_second": 60.09, "eval_steps_per_second": 1.883, "step": 14250 }, { "epoch": 0.76, "learning_rate": 0.00019335014190624255, "loss": 1.3075, "step": 14260 }, { "epoch": 0.76, "learning_rate": 0.00019333940160406413, "loss": 1.3306, "step": 14280 }, { "epoch": 0.76, "learning_rate": 0.00019332866130188575, "loss": 1.3715, "step": 14300 }, { "epoch": 0.76, "eval_loss": 1.398051381111145, "eval_runtime": 49.9213, "eval_samples_per_second": 60.095, "eval_steps_per_second": 1.883, "step": 14300 }, { "epoch": 0.77, "learning_rate": 0.00019331792099970733, "loss": 1.3683, "step": 14320 }, { "epoch": 0.77, "learning_rate": 0.00019330718069752895, "loss": 1.3532, "step": 14340 }, { "epoch": 0.77, "eval_loss": 1.3968747854232788, "eval_runtime": 49.8917, "eval_samples_per_second": 60.13, "eval_steps_per_second": 1.884, "step": 14350 }, { "epoch": 0.77, "learning_rate": 0.00019329644039535053, "loss": 1.3221, "step": 14360 }, { "epoch": 0.77, "learning_rate": 0.00019328570009317212, "loss": 1.3417, "step": 14380 }, { "epoch": 0.77, "learning_rate": 0.00019327495979099373, "loss": 1.3569, "step": 14400 }, { "epoch": 0.77, "eval_loss": 1.3969061374664307, "eval_runtime": 49.9229, "eval_samples_per_second": 60.093, "eval_steps_per_second": 1.883, "step": 14400 }, { "epoch": 0.77, "learning_rate": 0.00019326421948881532, "loss": 1.3145, "step": 14420 }, { "epoch": 0.77, "learning_rate": 0.00019325347918663693, "loss": 1.3658, "step": 14440 }, { "epoch": 0.77, "eval_loss": 1.3966419696807861, "eval_runtime": 49.9352, "eval_samples_per_second": 60.078, "eval_steps_per_second": 1.882, "step": 14450 }, { "epoch": 0.77, "learning_rate": 0.00019324273888445852, "loss": 1.3112, "step": 14460 }, { "epoch": 0.77, "learning_rate": 0.0001932319985822801, "loss": 1.3663, "step": 14480 }, { "epoch": 0.77, "learning_rate": 0.00019322125828010172, "loss": 1.344, "step": 14500 }, { "epoch": 0.77, "eval_loss": 1.3972774744033813, "eval_runtime": 49.8941, "eval_samples_per_second": 60.127, "eval_steps_per_second": 1.884, "step": 14500 }, { "epoch": 0.78, "learning_rate": 0.0001932105179779233, "loss": 1.4069, "step": 14520 }, { "epoch": 0.78, "learning_rate": 0.00019319977767574492, "loss": 1.317, "step": 14540 }, { "epoch": 0.78, "eval_loss": 1.3966619968414307, "eval_runtime": 49.9177, "eval_samples_per_second": 60.099, "eval_steps_per_second": 1.883, "step": 14550 }, { "epoch": 0.78, "learning_rate": 0.0001931890373735665, "loss": 1.3299, "step": 14560 }, { "epoch": 0.78, "learning_rate": 0.0001931782970713881, "loss": 1.3449, "step": 14580 }, { "epoch": 0.78, "learning_rate": 0.0001931675567692097, "loss": 1.3442, "step": 14600 }, { "epoch": 0.78, "eval_loss": 1.3963496685028076, "eval_runtime": 49.871, "eval_samples_per_second": 60.155, "eval_steps_per_second": 1.885, "step": 14600 }, { "epoch": 0.78, "learning_rate": 0.0001931568164670313, "loss": 1.323, "step": 14620 }, { "epoch": 0.78, "learning_rate": 0.00019314607616485293, "loss": 1.2905, "step": 14640 }, { "epoch": 0.78, "eval_loss": 1.3969212770462036, "eval_runtime": 49.9598, "eval_samples_per_second": 60.048, "eval_steps_per_second": 1.882, "step": 14650 }, { "epoch": 0.78, "learning_rate": 0.00019313533586267452, "loss": 1.3545, "step": 14660 }, { "epoch": 0.78, "learning_rate": 0.0001931245955604961, "loss": 1.3491, "step": 14680 }, { "epoch": 0.79, "learning_rate": 0.00019311385525831772, "loss": 1.3044, "step": 14700 }, { "epoch": 0.79, "eval_loss": 1.396906852722168, "eval_runtime": 49.8743, "eval_samples_per_second": 60.151, "eval_steps_per_second": 1.885, "step": 14700 }, { "epoch": 0.79, "learning_rate": 0.0001931031149561393, "loss": 1.3529, "step": 14720 }, { "epoch": 0.79, "learning_rate": 0.00019309237465396092, "loss": 1.357, "step": 14740 }, { "epoch": 0.79, "eval_loss": 1.3965973854064941, "eval_runtime": 49.9265, "eval_samples_per_second": 60.088, "eval_steps_per_second": 1.883, "step": 14750 }, { "epoch": 0.79, "learning_rate": 0.0001930816343517825, "loss": 1.3667, "step": 14760 }, { "epoch": 0.79, "learning_rate": 0.0001930708940496041, "loss": 1.387, "step": 14780 }, { "epoch": 0.79, "learning_rate": 0.0001930601537474257, "loss": 1.3874, "step": 14800 }, { "epoch": 0.79, "eval_loss": 1.3979121446609497, "eval_runtime": 49.9282, "eval_samples_per_second": 60.086, "eval_steps_per_second": 1.883, "step": 14800 }, { "epoch": 0.79, "learning_rate": 0.0001930494134452473, "loss": 1.3496, "step": 14820 }, { "epoch": 0.79, "learning_rate": 0.0001930386731430689, "loss": 1.368, "step": 14840 }, { "epoch": 0.79, "eval_loss": 1.3969990015029907, "eval_runtime": 49.921, "eval_samples_per_second": 60.095, "eval_steps_per_second": 1.883, "step": 14850 }, { "epoch": 0.79, "learning_rate": 0.0001930279328408905, "loss": 1.3156, "step": 14860 }, { "epoch": 0.8, "learning_rate": 0.00019301719253871208, "loss": 1.3375, "step": 14880 }, { "epoch": 0.8, "learning_rate": 0.0001930064522365337, "loss": 1.3167, "step": 14900 }, { "epoch": 0.8, "eval_loss": 1.3968135118484497, "eval_runtime": 49.8772, "eval_samples_per_second": 60.148, "eval_steps_per_second": 1.885, "step": 14900 }, { "epoch": 0.8, "learning_rate": 0.00019299571193435528, "loss": 1.3571, "step": 14920 }, { "epoch": 0.8, "learning_rate": 0.0001929849716321769, "loss": 1.3635, "step": 14940 }, { "epoch": 0.8, "eval_loss": 1.3962677717208862, "eval_runtime": 49.9213, "eval_samples_per_second": 60.095, "eval_steps_per_second": 1.883, "step": 14950 }, { "epoch": 0.8, "learning_rate": 0.00019297423132999848, "loss": 1.3213, "step": 14960 }, { "epoch": 0.8, "learning_rate": 0.00019296349102782006, "loss": 1.3479, "step": 14980 }, { "epoch": 0.8, "learning_rate": 0.00019295275072564168, "loss": 1.3337, "step": 15000 }, { "epoch": 0.8, "eval_loss": 1.3972392082214355, "eval_runtime": 49.9202, "eval_samples_per_second": 60.096, "eval_steps_per_second": 1.883, "step": 15000 }, { "epoch": 0.8, "learning_rate": 0.00019294201042346326, "loss": 1.3587, "step": 15020 }, { "epoch": 0.8, "learning_rate": 0.00019293127012128488, "loss": 1.3067, "step": 15040 }, { "epoch": 0.8, "eval_loss": 1.3960102796554565, "eval_runtime": 49.9235, "eval_samples_per_second": 60.092, "eval_steps_per_second": 1.883, "step": 15050 }, { "epoch": 0.8, "learning_rate": 0.00019292052981910646, "loss": 1.3598, "step": 15060 }, { "epoch": 0.81, "learning_rate": 0.00019290978951692805, "loss": 1.3731, "step": 15080 }, { "epoch": 0.81, "learning_rate": 0.00019289904921474966, "loss": 1.3441, "step": 15100 }, { "epoch": 0.81, "eval_loss": 1.3960813283920288, "eval_runtime": 49.8955, "eval_samples_per_second": 60.126, "eval_steps_per_second": 1.884, "step": 15100 }, { "epoch": 0.81, "learning_rate": 0.00019288830891257125, "loss": 1.4031, "step": 15120 }, { "epoch": 0.81, "learning_rate": 0.00019287756861039286, "loss": 1.3091, "step": 15140 }, { "epoch": 0.81, "eval_loss": 1.3964964151382446, "eval_runtime": 49.9392, "eval_samples_per_second": 60.073, "eval_steps_per_second": 1.882, "step": 15150 }, { "epoch": 0.81, "learning_rate": 0.00019286682830821445, "loss": 1.3879, "step": 15160 }, { "epoch": 0.81, "learning_rate": 0.00019285608800603606, "loss": 1.2988, "step": 15180 }, { "epoch": 0.81, "learning_rate": 0.00019284534770385765, "loss": 1.3283, "step": 15200 }, { "epoch": 0.81, "eval_loss": 1.3965301513671875, "eval_runtime": 49.8964, "eval_samples_per_second": 60.125, "eval_steps_per_second": 1.884, "step": 15200 }, { "epoch": 0.81, "learning_rate": 0.00019283460740167924, "loss": 1.296, "step": 15220 }, { "epoch": 0.81, "learning_rate": 0.00019282386709950085, "loss": 1.3221, "step": 15240 }, { "epoch": 0.81, "eval_loss": 1.396709680557251, "eval_runtime": 49.9295, "eval_samples_per_second": 60.085, "eval_steps_per_second": 1.883, "step": 15250 }, { "epoch": 0.82, "learning_rate": 0.00019281312679732244, "loss": 1.3727, "step": 15260 }, { "epoch": 0.82, "learning_rate": 0.00019280238649514405, "loss": 1.3346, "step": 15280 }, { "epoch": 0.82, "learning_rate": 0.00019279164619296566, "loss": 1.3356, "step": 15300 }, { "epoch": 0.82, "eval_loss": 1.3975436687469482, "eval_runtime": 49.8854, "eval_samples_per_second": 60.138, "eval_steps_per_second": 1.884, "step": 15300 }, { "epoch": 0.82, "learning_rate": 0.00019278090589078725, "loss": 1.3798, "step": 15320 }, { "epoch": 0.82, "learning_rate": 0.00019277016558860886, "loss": 1.3886, "step": 15340 }, { "epoch": 0.82, "eval_loss": 1.3962510824203491, "eval_runtime": 49.9207, "eval_samples_per_second": 60.095, "eval_steps_per_second": 1.883, "step": 15350 }, { "epoch": 0.82, "learning_rate": 0.00019275942528643045, "loss": 1.3468, "step": 15360 }, { "epoch": 0.82, "learning_rate": 0.00019274868498425204, "loss": 1.3112, "step": 15380 }, { "epoch": 0.82, "learning_rate": 0.00019273794468207365, "loss": 1.3781, "step": 15400 }, { "epoch": 0.82, "eval_loss": 1.396148920059204, "eval_runtime": 49.8966, "eval_samples_per_second": 60.124, "eval_steps_per_second": 1.884, "step": 15400 }, { "epoch": 0.82, "learning_rate": 0.00019272720437989524, "loss": 1.3551, "step": 15420 }, { "epoch": 0.82, "learning_rate": 0.00019271646407771685, "loss": 1.3576, "step": 15440 }, { "epoch": 0.83, "eval_loss": 1.3962191343307495, "eval_runtime": 49.9264, "eval_samples_per_second": 60.088, "eval_steps_per_second": 1.883, "step": 15450 }, { "epoch": 0.83, "learning_rate": 0.00019270572377553844, "loss": 1.3325, "step": 15460 }, { "epoch": 0.83, "learning_rate": 0.00019269498347336002, "loss": 1.3269, "step": 15480 }, { "epoch": 0.83, "learning_rate": 0.00019268424317118164, "loss": 1.3609, "step": 15500 }, { "epoch": 0.83, "eval_loss": 1.396767258644104, "eval_runtime": 49.8856, "eval_samples_per_second": 60.138, "eval_steps_per_second": 1.884, "step": 15500 }, { "epoch": 0.83, "learning_rate": 0.00019267350286900322, "loss": 1.3357, "step": 15520 }, { "epoch": 0.83, "learning_rate": 0.00019266276256682484, "loss": 1.3444, "step": 15540 }, { "epoch": 0.83, "eval_loss": 1.3966103792190552, "eval_runtime": 49.9428, "eval_samples_per_second": 60.069, "eval_steps_per_second": 1.882, "step": 15550 }, { "epoch": 0.83, "learning_rate": 0.00019265202226464642, "loss": 1.3193, "step": 15560 }, { "epoch": 0.83, "learning_rate": 0.000192641281962468, "loss": 1.3816, "step": 15580 }, { "epoch": 0.83, "learning_rate": 0.00019263054166028962, "loss": 1.3225, "step": 15600 }, { "epoch": 0.83, "eval_loss": 1.3972910642623901, "eval_runtime": 49.882, "eval_samples_per_second": 60.142, "eval_steps_per_second": 1.884, "step": 15600 }, { "epoch": 0.83, "learning_rate": 0.0001926198013581112, "loss": 1.3317, "step": 15620 }, { "epoch": 0.84, "learning_rate": 0.00019260906105593282, "loss": 1.3477, "step": 15640 }, { "epoch": 0.84, "eval_loss": 1.3958677053451538, "eval_runtime": 49.9085, "eval_samples_per_second": 60.11, "eval_steps_per_second": 1.883, "step": 15650 }, { "epoch": 0.84, "learning_rate": 0.0001925983207537544, "loss": 1.3987, "step": 15660 }, { "epoch": 0.84, "learning_rate": 0.00019258758045157602, "loss": 1.3268, "step": 15680 }, { "epoch": 0.84, "learning_rate": 0.0001925768401493976, "loss": 1.3587, "step": 15700 }, { "epoch": 0.84, "eval_loss": 1.3959128856658936, "eval_runtime": 49.9226, "eval_samples_per_second": 60.093, "eval_steps_per_second": 1.883, "step": 15700 }, { "epoch": 0.84, "learning_rate": 0.0001925660998472192, "loss": 1.3475, "step": 15720 }, { "epoch": 0.84, "learning_rate": 0.0001925553595450408, "loss": 1.3201, "step": 15740 }, { "epoch": 0.84, "eval_loss": 1.3962161540985107, "eval_runtime": 49.972, "eval_samples_per_second": 60.034, "eval_steps_per_second": 1.881, "step": 15750 }, { "epoch": 0.84, "learning_rate": 0.0001925446192428624, "loss": 1.3714, "step": 15760 }, { "epoch": 0.84, "learning_rate": 0.000192533878940684, "loss": 1.3036, "step": 15780 }, { "epoch": 0.84, "learning_rate": 0.0001925231386385056, "loss": 1.3653, "step": 15800 }, { "epoch": 0.84, "eval_loss": 1.395885705947876, "eval_runtime": 49.9021, "eval_samples_per_second": 60.118, "eval_steps_per_second": 1.884, "step": 15800 }, { "epoch": 0.85, "learning_rate": 0.00019251239833632718, "loss": 1.3571, "step": 15820 }, { "epoch": 0.85, "learning_rate": 0.0001925016580341488, "loss": 1.3387, "step": 15840 }, { "epoch": 0.85, "eval_loss": 1.3962838649749756, "eval_runtime": 49.9295, "eval_samples_per_second": 60.085, "eval_steps_per_second": 1.883, "step": 15850 }, { "epoch": 0.85, "learning_rate": 0.00019249091773197038, "loss": 1.3645, "step": 15860 }, { "epoch": 0.85, "learning_rate": 0.000192480177429792, "loss": 1.3541, "step": 15880 }, { "epoch": 0.85, "learning_rate": 0.0001924694371276136, "loss": 1.3139, "step": 15900 }, { "epoch": 0.85, "eval_loss": 1.3968653678894043, "eval_runtime": 49.9077, "eval_samples_per_second": 60.111, "eval_steps_per_second": 1.883, "step": 15900 }, { "epoch": 0.85, "learning_rate": 0.0001924586968254352, "loss": 1.4122, "step": 15920 }, { "epoch": 0.85, "learning_rate": 0.0001924479565232568, "loss": 1.3177, "step": 15940 }, { "epoch": 0.85, "eval_loss": 1.3964040279388428, "eval_runtime": 49.9666, "eval_samples_per_second": 60.04, "eval_steps_per_second": 1.881, "step": 15950 }, { "epoch": 0.85, "learning_rate": 0.0001924372162210784, "loss": 1.3521, "step": 15960 }, { "epoch": 0.85, "learning_rate": 0.00019242647591889998, "loss": 1.3372, "step": 15980 }, { "epoch": 0.85, "learning_rate": 0.0001924157356167216, "loss": 1.3797, "step": 16000 }, { "epoch": 0.85, "eval_loss": 1.3961364030838013, "eval_runtime": 49.8963, "eval_samples_per_second": 60.125, "eval_steps_per_second": 1.884, "step": 16000 }, { "epoch": 0.86, "learning_rate": 0.00019240499531454318, "loss": 1.339, "step": 16020 }, { "epoch": 0.86, "learning_rate": 0.0001923942550123648, "loss": 1.3424, "step": 16040 }, { "epoch": 0.86, "eval_loss": 1.3959347009658813, "eval_runtime": 49.9267, "eval_samples_per_second": 60.088, "eval_steps_per_second": 1.883, "step": 16050 }, { "epoch": 0.86, "learning_rate": 0.00019238351471018638, "loss": 1.3939, "step": 16060 }, { "epoch": 0.86, "learning_rate": 0.000192372774408008, "loss": 1.2875, "step": 16080 }, { "epoch": 0.86, "learning_rate": 0.00019236203410582958, "loss": 1.3484, "step": 16100 }, { "epoch": 0.86, "eval_loss": 1.3949804306030273, "eval_runtime": 49.8842, "eval_samples_per_second": 60.139, "eval_steps_per_second": 1.884, "step": 16100 }, { "epoch": 0.86, "learning_rate": 0.00019235129380365117, "loss": 1.3581, "step": 16120 }, { "epoch": 0.86, "learning_rate": 0.00019234055350147278, "loss": 1.3377, "step": 16140 }, { "epoch": 0.86, "eval_loss": 1.3949840068817139, "eval_runtime": 49.896, "eval_samples_per_second": 60.125, "eval_steps_per_second": 1.884, "step": 16150 }, { "epoch": 0.86, "learning_rate": 0.00019232981319929437, "loss": 1.3687, "step": 16160 }, { "epoch": 0.86, "learning_rate": 0.00019231907289711598, "loss": 1.3652, "step": 16180 }, { "epoch": 0.87, "learning_rate": 0.00019230833259493757, "loss": 1.354, "step": 16200 }, { "epoch": 0.87, "eval_loss": 1.394178867340088, "eval_runtime": 49.8937, "eval_samples_per_second": 60.128, "eval_steps_per_second": 1.884, "step": 16200 }, { "epoch": 0.87, "learning_rate": 0.00019229759229275916, "loss": 1.3611, "step": 16220 }, { "epoch": 0.87, "learning_rate": 0.00019228685199058077, "loss": 1.3312, "step": 16240 }, { "epoch": 0.87, "eval_loss": 1.395208716392517, "eval_runtime": 49.9448, "eval_samples_per_second": 60.066, "eval_steps_per_second": 1.882, "step": 16250 }, { "epoch": 0.87, "learning_rate": 0.00019227611168840236, "loss": 1.3424, "step": 16260 }, { "epoch": 0.87, "learning_rate": 0.00019226537138622397, "loss": 1.3186, "step": 16280 }, { "epoch": 0.87, "learning_rate": 0.00019225463108404556, "loss": 1.2937, "step": 16300 }, { "epoch": 0.87, "eval_loss": 1.3952096700668335, "eval_runtime": 49.8883, "eval_samples_per_second": 60.134, "eval_steps_per_second": 1.884, "step": 16300 }, { "epoch": 0.87, "learning_rate": 0.00019224389078186714, "loss": 1.3298, "step": 16320 }, { "epoch": 0.87, "learning_rate": 0.00019223315047968876, "loss": 1.3007, "step": 16340 }, { "epoch": 0.87, "eval_loss": 1.3952969312667847, "eval_runtime": 49.9449, "eval_samples_per_second": 60.066, "eval_steps_per_second": 1.882, "step": 16350 }, { "epoch": 0.87, "learning_rate": 0.00019222241017751034, "loss": 1.3539, "step": 16360 }, { "epoch": 0.88, "learning_rate": 0.00019221166987533196, "loss": 1.3331, "step": 16380 }, { "epoch": 0.88, "learning_rate": 0.00019220092957315354, "loss": 1.3799, "step": 16400 }, { "epoch": 0.88, "eval_loss": 1.3953661918640137, "eval_runtime": 49.9059, "eval_samples_per_second": 60.113, "eval_steps_per_second": 1.884, "step": 16400 }, { "epoch": 0.88, "learning_rate": 0.00019219018927097513, "loss": 1.3345, "step": 16420 }, { "epoch": 0.88, "learning_rate": 0.00019217944896879674, "loss": 1.349, "step": 16440 }, { "epoch": 0.88, "eval_loss": 1.3948606252670288, "eval_runtime": 49.9298, "eval_samples_per_second": 60.084, "eval_steps_per_second": 1.883, "step": 16450 }, { "epoch": 0.88, "learning_rate": 0.00019216870866661833, "loss": 1.32, "step": 16460 }, { "epoch": 0.88, "learning_rate": 0.00019215796836443994, "loss": 1.3409, "step": 16480 }, { "epoch": 0.88, "learning_rate": 0.00019214722806226153, "loss": 1.3658, "step": 16500 }, { "epoch": 0.88, "eval_loss": 1.3949049711227417, "eval_runtime": 49.8722, "eval_samples_per_second": 60.154, "eval_steps_per_second": 1.885, "step": 16500 }, { "epoch": 0.88, "learning_rate": 0.00019213648776008311, "loss": 1.3116, "step": 16520 }, { "epoch": 0.88, "learning_rate": 0.00019212574745790476, "loss": 1.3559, "step": 16540 }, { "epoch": 0.88, "eval_loss": 1.394840121269226, "eval_runtime": 49.9341, "eval_samples_per_second": 60.079, "eval_steps_per_second": 1.882, "step": 16550 }, { "epoch": 0.88, "learning_rate": 0.00019211500715572634, "loss": 1.3223, "step": 16560 }, { "epoch": 0.89, "learning_rate": 0.00019210426685354796, "loss": 1.3238, "step": 16580 }, { "epoch": 0.89, "learning_rate": 0.00019209352655136954, "loss": 1.3266, "step": 16600 }, { "epoch": 0.89, "eval_loss": 1.3949869871139526, "eval_runtime": 49.894, "eval_samples_per_second": 60.127, "eval_steps_per_second": 1.884, "step": 16600 }, { "epoch": 0.89, "learning_rate": 0.00019208278624919113, "loss": 1.3232, "step": 16620 }, { "epoch": 0.89, "learning_rate": 0.00019207204594701274, "loss": 1.3006, "step": 16640 }, { "epoch": 0.89, "eval_loss": 1.3941547870635986, "eval_runtime": 49.9595, "eval_samples_per_second": 60.049, "eval_steps_per_second": 1.882, "step": 16650 }, { "epoch": 0.89, "learning_rate": 0.00019206130564483433, "loss": 1.3501, "step": 16660 }, { "epoch": 0.89, "learning_rate": 0.00019205056534265594, "loss": 1.3213, "step": 16680 }, { "epoch": 0.89, "learning_rate": 0.00019203982504047753, "loss": 1.3421, "step": 16700 }, { "epoch": 0.89, "eval_loss": 1.394352674484253, "eval_runtime": 49.9085, "eval_samples_per_second": 60.11, "eval_steps_per_second": 1.883, "step": 16700 }, { "epoch": 0.89, "learning_rate": 0.00019202908473829911, "loss": 1.3582, "step": 16720 }, { "epoch": 0.89, "learning_rate": 0.00019201834443612073, "loss": 1.3153, "step": 16740 }, { "epoch": 0.89, "eval_loss": 1.394830584526062, "eval_runtime": 49.8738, "eval_samples_per_second": 60.152, "eval_steps_per_second": 1.885, "step": 16750 }, { "epoch": 0.9, "learning_rate": 0.00019200760413394231, "loss": 1.2934, "step": 16760 }, { "epoch": 0.9, "learning_rate": 0.00019199686383176393, "loss": 1.3533, "step": 16780 }, { "epoch": 0.9, "learning_rate": 0.00019198612352958551, "loss": 1.3527, "step": 16800 }, { "epoch": 0.9, "eval_loss": 1.3951090574264526, "eval_runtime": 49.8917, "eval_samples_per_second": 60.13, "eval_steps_per_second": 1.884, "step": 16800 }, { "epoch": 0.9, "learning_rate": 0.0001919753832274071, "loss": 1.3078, "step": 16820 }, { "epoch": 0.9, "learning_rate": 0.00019196464292522871, "loss": 1.3873, "step": 16840 }, { "epoch": 0.9, "eval_loss": 1.3950159549713135, "eval_runtime": 49.9075, "eval_samples_per_second": 60.111, "eval_steps_per_second": 1.883, "step": 16850 }, { "epoch": 0.9, "learning_rate": 0.0001919539026230503, "loss": 1.3461, "step": 16860 }, { "epoch": 0.9, "learning_rate": 0.00019194316232087191, "loss": 1.34, "step": 16880 }, { "epoch": 0.9, "learning_rate": 0.00019193295903380242, "loss": 1.2995, "step": 16900 }, { "epoch": 0.9, "eval_loss": 1.3943397998809814, "eval_runtime": 49.8823, "eval_samples_per_second": 60.142, "eval_steps_per_second": 1.884, "step": 16900 }, { "epoch": 0.9, "learning_rate": 0.000191922218731624, "loss": 1.3687, "step": 16920 }, { "epoch": 0.91, "learning_rate": 0.00019191147842944562, "loss": 1.3517, "step": 16940 }, { "epoch": 0.91, "eval_loss": 1.395455241203308, "eval_runtime": 49.9332, "eval_samples_per_second": 60.08, "eval_steps_per_second": 1.883, "step": 16950 }, { "epoch": 0.91, "learning_rate": 0.0001919007381272672, "loss": 1.3321, "step": 16960 }, { "epoch": 0.91, "learning_rate": 0.0001918899978250888, "loss": 1.2743, "step": 16980 }, { "epoch": 0.91, "learning_rate": 0.00019187925752291044, "loss": 1.3643, "step": 17000 }, { "epoch": 0.91, "eval_loss": 1.3946868181228638, "eval_runtime": 51.5397, "eval_samples_per_second": 58.208, "eval_steps_per_second": 1.824, "step": 17000 }, { "epoch": 0.91, "learning_rate": 0.00019186851722073202, "loss": 1.3694, "step": 17020 }, { "epoch": 0.91, "learning_rate": 0.0001918577769185536, "loss": 1.3556, "step": 17040 }, { "epoch": 0.91, "eval_loss": 1.3951468467712402, "eval_runtime": 53.3639, "eval_samples_per_second": 56.218, "eval_steps_per_second": 1.761, "step": 17050 }, { "epoch": 0.91, "learning_rate": 0.00019184703661637522, "loss": 1.29, "step": 17060 }, { "epoch": 0.91, "learning_rate": 0.0001918362963141968, "loss": 1.3841, "step": 17080 }, { "epoch": 0.91, "learning_rate": 0.00019182555601201842, "loss": 1.3496, "step": 17100 }, { "epoch": 0.91, "eval_loss": 1.3944398164749146, "eval_runtime": 53.236, "eval_samples_per_second": 56.353, "eval_steps_per_second": 1.766, "step": 17100 }, { "epoch": 0.91, "learning_rate": 0.00019181481570984, "loss": 1.3039, "step": 17120 }, { "epoch": 0.92, "learning_rate": 0.00019180407540766162, "loss": 1.3364, "step": 17140 }, { "epoch": 0.92, "eval_loss": 1.3947904109954834, "eval_runtime": 53.3188, "eval_samples_per_second": 56.265, "eval_steps_per_second": 1.763, "step": 17150 }, { "epoch": 0.92, "learning_rate": 0.0001917933351054832, "loss": 1.3622, "step": 17160 }, { "epoch": 0.92, "learning_rate": 0.0001917825948033048, "loss": 1.3147, "step": 17180 }, { "epoch": 0.92, "learning_rate": 0.0001917718545011264, "loss": 1.3449, "step": 17200 }, { "epoch": 0.92, "eval_loss": 1.3942961692810059, "eval_runtime": 53.2529, "eval_samples_per_second": 56.335, "eval_steps_per_second": 1.765, "step": 17200 }, { "epoch": 0.92, "learning_rate": 0.000191761114198948, "loss": 1.3413, "step": 17220 }, { "epoch": 0.92, "learning_rate": 0.0001917503738967696, "loss": 1.357, "step": 17240 }, { "epoch": 0.92, "eval_loss": 1.3949023485183716, "eval_runtime": 53.3355, "eval_samples_per_second": 56.248, "eval_steps_per_second": 1.762, "step": 17250 }, { "epoch": 0.92, "learning_rate": 0.0001917396335945912, "loss": 1.3537, "step": 17260 }, { "epoch": 0.92, "learning_rate": 0.00019172889329241278, "loss": 1.3437, "step": 17280 }, { "epoch": 0.92, "learning_rate": 0.0001917181529902344, "loss": 1.3773, "step": 17300 }, { "epoch": 0.92, "eval_loss": 1.395226240158081, "eval_runtime": 53.1999, "eval_samples_per_second": 56.391, "eval_steps_per_second": 1.767, "step": 17300 }, { "epoch": 0.93, "learning_rate": 0.00019170741268805598, "loss": 1.3632, "step": 17320 }, { "epoch": 0.93, "learning_rate": 0.0001916966723858776, "loss": 1.3362, "step": 17340 }, { "epoch": 0.93, "eval_loss": 1.3946948051452637, "eval_runtime": 53.2978, "eval_samples_per_second": 56.287, "eval_steps_per_second": 1.764, "step": 17350 }, { "epoch": 0.93, "learning_rate": 0.00019168593208369918, "loss": 1.3758, "step": 17360 }, { "epoch": 0.93, "learning_rate": 0.00019167519178152077, "loss": 1.3908, "step": 17380 }, { "epoch": 0.93, "learning_rate": 0.00019166445147934238, "loss": 1.328, "step": 17400 }, { "epoch": 0.93, "eval_loss": 1.3953857421875, "eval_runtime": 53.208, "eval_samples_per_second": 56.382, "eval_steps_per_second": 1.767, "step": 17400 }, { "epoch": 0.93, "learning_rate": 0.00019165371117716397, "loss": 1.32, "step": 17420 }, { "epoch": 0.93, "learning_rate": 0.00019164297087498558, "loss": 1.3451, "step": 17440 }, { "epoch": 0.93, "eval_loss": 1.395330786705017, "eval_runtime": 53.3471, "eval_samples_per_second": 56.235, "eval_steps_per_second": 1.762, "step": 17450 }, { "epoch": 0.93, "learning_rate": 0.00019163223057280717, "loss": 1.301, "step": 17460 }, { "epoch": 0.93, "learning_rate": 0.00019162149027062876, "loss": 1.3185, "step": 17480 }, { "epoch": 0.94, "learning_rate": 0.00019161074996845037, "loss": 1.3473, "step": 17500 }, { "epoch": 0.94, "eval_loss": 1.3948390483856201, "eval_runtime": 53.2209, "eval_samples_per_second": 56.369, "eval_steps_per_second": 1.766, "step": 17500 }, { "epoch": 0.94, "learning_rate": 0.00019160000966627196, "loss": 1.3131, "step": 17520 }, { "epoch": 0.94, "learning_rate": 0.00019158926936409357, "loss": 1.3577, "step": 17540 }, { "epoch": 0.94, "eval_loss": 1.3952394723892212, "eval_runtime": 53.3853, "eval_samples_per_second": 56.195, "eval_steps_per_second": 1.761, "step": 17550 }, { "epoch": 0.94, "learning_rate": 0.00019157852906191516, "loss": 1.3114, "step": 17560 }, { "epoch": 0.94, "learning_rate": 0.00019156778875973674, "loss": 1.301, "step": 17580 }, { "epoch": 0.94, "learning_rate": 0.00019155704845755836, "loss": 1.3607, "step": 17600 }, { "epoch": 0.94, "eval_loss": 1.3949131965637207, "eval_runtime": 53.3252, "eval_samples_per_second": 56.259, "eval_steps_per_second": 1.763, "step": 17600 }, { "epoch": 0.94, "learning_rate": 0.00019154630815537994, "loss": 1.3334, "step": 17620 }, { "epoch": 0.94, "learning_rate": 0.00019153556785320158, "loss": 1.3423, "step": 17640 }, { "epoch": 0.94, "eval_loss": 1.3943798542022705, "eval_runtime": 53.2682, "eval_samples_per_second": 56.319, "eval_steps_per_second": 1.765, "step": 17650 }, { "epoch": 0.94, "learning_rate": 0.00019152482755102317, "loss": 1.3629, "step": 17660 }, { "epoch": 0.94, "learning_rate": 0.00019151408724884476, "loss": 1.3538, "step": 17680 }, { "epoch": 0.95, "learning_rate": 0.00019150334694666637, "loss": 1.333, "step": 17700 }, { "epoch": 0.95, "eval_loss": 1.3944674730300903, "eval_runtime": 53.2835, "eval_samples_per_second": 56.303, "eval_steps_per_second": 1.764, "step": 17700 }, { "epoch": 0.95, "learning_rate": 0.00019149260664448796, "loss": 1.2652, "step": 17720 }, { "epoch": 0.95, "learning_rate": 0.00019148186634230957, "loss": 1.3316, "step": 17740 }, { "epoch": 0.95, "eval_loss": 1.3947995901107788, "eval_runtime": 53.3034, "eval_samples_per_second": 56.282, "eval_steps_per_second": 1.763, "step": 17750 }, { "epoch": 0.95, "learning_rate": 0.00019147112604013116, "loss": 1.3615, "step": 17760 }, { "epoch": 0.95, "learning_rate": 0.00019146038573795274, "loss": 1.3342, "step": 17780 }, { "epoch": 0.95, "learning_rate": 0.00019144964543577436, "loss": 1.3409, "step": 17800 }, { "epoch": 0.95, "eval_loss": 1.3946611881256104, "eval_runtime": 53.2382, "eval_samples_per_second": 56.351, "eval_steps_per_second": 1.766, "step": 17800 }, { "epoch": 0.95, "learning_rate": 0.00019143890513359594, "loss": 1.3304, "step": 17820 }, { "epoch": 0.95, "learning_rate": 0.00019142816483141756, "loss": 1.3312, "step": 17840 }, { "epoch": 0.95, "eval_loss": 1.3946311473846436, "eval_runtime": 53.3606, "eval_samples_per_second": 56.221, "eval_steps_per_second": 1.762, "step": 17850 }, { "epoch": 0.95, "learning_rate": 0.00019141742452923914, "loss": 1.269, "step": 17860 }, { "epoch": 0.96, "learning_rate": 0.00019140668422706073, "loss": 1.3386, "step": 17880 }, { "epoch": 0.96, "learning_rate": 0.00019139594392488234, "loss": 1.3227, "step": 17900 }, { "epoch": 0.96, "eval_loss": 1.3940836191177368, "eval_runtime": 53.3243, "eval_samples_per_second": 56.26, "eval_steps_per_second": 1.763, "step": 17900 }, { "epoch": 0.96, "learning_rate": 0.00019138520362270393, "loss": 1.3971, "step": 17920 }, { "epoch": 0.96, "learning_rate": 0.00019137446332052554, "loss": 1.3472, "step": 17940 }, { "epoch": 0.96, "eval_loss": 1.395328402519226, "eval_runtime": 53.2405, "eval_samples_per_second": 56.348, "eval_steps_per_second": 1.766, "step": 17950 }, { "epoch": 0.96, "learning_rate": 0.00019136372301834713, "loss": 1.3437, "step": 17960 }, { "epoch": 0.96, "learning_rate": 0.00019135298271616871, "loss": 1.3322, "step": 17980 }, { "epoch": 0.96, "learning_rate": 0.00019134224241399033, "loss": 1.3706, "step": 18000 }, { "epoch": 0.96, "eval_loss": 1.3958821296691895, "eval_runtime": 53.3164, "eval_samples_per_second": 56.268, "eval_steps_per_second": 1.763, "step": 18000 }, { "epoch": 0.96, "learning_rate": 0.00019133150211181191, "loss": 1.3879, "step": 18020 }, { "epoch": 0.96, "learning_rate": 0.00019132076180963353, "loss": 1.3791, "step": 18040 }, { "epoch": 0.96, "eval_loss": 1.3955086469650269, "eval_runtime": 53.3188, "eval_samples_per_second": 56.265, "eval_steps_per_second": 1.763, "step": 18050 }, { "epoch": 0.96, "learning_rate": 0.00019131002150745511, "loss": 1.3205, "step": 18060 }, { "epoch": 0.97, "learning_rate": 0.0001912992812052767, "loss": 1.3406, "step": 18080 }, { "epoch": 0.97, "learning_rate": 0.00019128854090309831, "loss": 1.3155, "step": 18100 }, { "epoch": 0.97, "eval_loss": 1.3944337368011475, "eval_runtime": 53.2487, "eval_samples_per_second": 56.339, "eval_steps_per_second": 1.765, "step": 18100 }, { "epoch": 0.97, "learning_rate": 0.0001912778006009199, "loss": 1.3242, "step": 18120 }, { "epoch": 0.97, "learning_rate": 0.00019126706029874151, "loss": 1.3449, "step": 18140 }, { "epoch": 0.97, "eval_loss": 1.3948968648910522, "eval_runtime": 53.2872, "eval_samples_per_second": 56.299, "eval_steps_per_second": 1.764, "step": 18150 }, { "epoch": 0.97, "learning_rate": 0.0001912563199965631, "loss": 1.3425, "step": 18160 }, { "epoch": 0.97, "learning_rate": 0.00019124557969438471, "loss": 1.3077, "step": 18180 }, { "epoch": 0.97, "learning_rate": 0.0001912348393922063, "loss": 1.363, "step": 18200 }, { "epoch": 0.97, "eval_loss": 1.3944798707962036, "eval_runtime": 53.2867, "eval_samples_per_second": 56.299, "eval_steps_per_second": 1.764, "step": 18200 }, { "epoch": 0.97, "learning_rate": 0.0001912240990900279, "loss": 1.3584, "step": 18220 }, { "epoch": 0.97, "learning_rate": 0.0001912133587878495, "loss": 1.3348, "step": 18240 }, { "epoch": 0.98, "eval_loss": 1.3941653966903687, "eval_runtime": 53.3131, "eval_samples_per_second": 56.271, "eval_steps_per_second": 1.763, "step": 18250 }, { "epoch": 0.98, "learning_rate": 0.00019120261848567111, "loss": 1.3377, "step": 18260 }, { "epoch": 0.98, "learning_rate": 0.0001911918781834927, "loss": 1.3091, "step": 18280 }, { "epoch": 0.98, "learning_rate": 0.00019118113788131431, "loss": 1.3352, "step": 18300 }, { "epoch": 0.98, "eval_loss": 1.3937102556228638, "eval_runtime": 53.2965, "eval_samples_per_second": 56.289, "eval_steps_per_second": 1.764, "step": 18300 }, { "epoch": 0.98, "learning_rate": 0.0001911703975791359, "loss": 1.3669, "step": 18320 }, { "epoch": 0.98, "learning_rate": 0.00019115965727695751, "loss": 1.3617, "step": 18340 }, { "epoch": 0.98, "eval_loss": 1.3937897682189941, "eval_runtime": 53.3594, "eval_samples_per_second": 56.223, "eval_steps_per_second": 1.762, "step": 18350 }, { "epoch": 0.98, "learning_rate": 0.0001911489169747791, "loss": 1.3196, "step": 18360 }, { "epoch": 0.98, "learning_rate": 0.0001911381766726007, "loss": 1.3334, "step": 18380 }, { "epoch": 0.98, "learning_rate": 0.0001911274363704223, "loss": 1.3616, "step": 18400 }, { "epoch": 0.98, "eval_loss": 1.3939082622528076, "eval_runtime": 53.248, "eval_samples_per_second": 56.34, "eval_steps_per_second": 1.765, "step": 18400 }, { "epoch": 0.98, "learning_rate": 0.0001911166960682439, "loss": 1.3552, "step": 18420 }, { "epoch": 0.99, "learning_rate": 0.0001911059557660655, "loss": 1.3178, "step": 18440 }, { "epoch": 0.99, "eval_loss": 1.3946744203567505, "eval_runtime": 53.3048, "eval_samples_per_second": 56.28, "eval_steps_per_second": 1.763, "step": 18450 }, { "epoch": 0.99, "learning_rate": 0.0001910952154638871, "loss": 1.3254, "step": 18460 }, { "epoch": 0.99, "learning_rate": 0.00019108447516170867, "loss": 1.3107, "step": 18480 }, { "epoch": 0.99, "learning_rate": 0.0001910737348595303, "loss": 1.3284, "step": 18500 }, { "epoch": 0.99, "eval_loss": 1.3947073221206665, "eval_runtime": 53.2779, "eval_samples_per_second": 56.309, "eval_steps_per_second": 1.764, "step": 18500 }, { "epoch": 0.99, "learning_rate": 0.00019106299455735187, "loss": 1.3478, "step": 18520 }, { "epoch": 0.99, "learning_rate": 0.0001910522542551735, "loss": 1.3324, "step": 18540 }, { "epoch": 0.99, "eval_loss": 1.3934414386749268, "eval_runtime": 53.3115, "eval_samples_per_second": 56.273, "eval_steps_per_second": 1.763, "step": 18550 }, { "epoch": 0.99, "learning_rate": 0.00019104151395299507, "loss": 1.3273, "step": 18560 }, { "epoch": 0.99, "learning_rate": 0.0001910307736508167, "loss": 1.3493, "step": 18580 }, { "epoch": 0.99, "learning_rate": 0.00019102003334863827, "loss": 1.3364, "step": 18600 }, { "epoch": 0.99, "eval_loss": 1.3933128118515015, "eval_runtime": 53.3449, "eval_samples_per_second": 56.238, "eval_steps_per_second": 1.762, "step": 18600 }, { "epoch": 0.99, "learning_rate": 0.00019100929304645986, "loss": 1.3448, "step": 18620 }, { "epoch": 1.0, "learning_rate": 0.00019099855274428147, "loss": 1.3668, "step": 18640 }, { "epoch": 1.0, "eval_loss": 1.3934011459350586, "eval_runtime": 53.3012, "eval_samples_per_second": 56.284, "eval_steps_per_second": 1.764, "step": 18650 }, { "epoch": 1.0, "learning_rate": 0.00019098781244210306, "loss": 1.3405, "step": 18660 }, { "epoch": 1.0, "learning_rate": 0.00019097707213992467, "loss": 1.3624, "step": 18680 }, { "epoch": 1.0, "learning_rate": 0.00019096633183774626, "loss": 1.335, "step": 18700 }, { "epoch": 1.0, "eval_loss": 1.3939635753631592, "eval_runtime": 53.2501, "eval_samples_per_second": 56.338, "eval_steps_per_second": 1.765, "step": 18700 }, { "epoch": 1.0, "learning_rate": 0.00019095559153556785, "loss": 1.3386, "step": 18720 }, { "epoch": 1.0, "learning_rate": 0.00019094485123338946, "loss": 1.3055, "step": 18740 }, { "epoch": 1.0, "eval_loss": 1.3944073915481567, "eval_runtime": 53.2526, "eval_samples_per_second": 56.335, "eval_steps_per_second": 1.765, "step": 18750 }, { "epoch": 1.0, "learning_rate": 0.00019093411093121105, "loss": 1.3419, "step": 18760 }, { "epoch": 1.0, "learning_rate": 0.00019092337062903266, "loss": 1.3359, "step": 18780 }, { "epoch": 1.0, "learning_rate": 0.00019091263032685425, "loss": 1.3629, "step": 18800 }, { "epoch": 1.0, "eval_loss": 1.3944207429885864, "eval_runtime": 53.2849, "eval_samples_per_second": 56.301, "eval_steps_per_second": 1.764, "step": 18800 }, { "epoch": 1.01, "learning_rate": 0.00019090189002467583, "loss": 1.3263, "step": 18820 }, { "epoch": 1.01, "learning_rate": 0.00019089114972249745, "loss": 1.3364, "step": 18840 }, { "epoch": 1.01, "eval_loss": 1.394610047340393, "eval_runtime": 53.2573, "eval_samples_per_second": 56.33, "eval_steps_per_second": 1.765, "step": 18850 }, { "epoch": 1.01, "learning_rate": 0.00019088040942031903, "loss": 1.3744, "step": 18860 }, { "epoch": 1.01, "learning_rate": 0.00019086966911814065, "loss": 1.343, "step": 18880 }, { "epoch": 1.01, "learning_rate": 0.00019085892881596226, "loss": 1.3077, "step": 18900 }, { "epoch": 1.01, "eval_loss": 1.393605351448059, "eval_runtime": 53.3174, "eval_samples_per_second": 56.267, "eval_steps_per_second": 1.763, "step": 18900 }, { "epoch": 1.01, "learning_rate": 0.00019084818851378385, "loss": 1.3005, "step": 18920 }, { "epoch": 1.01, "learning_rate": 0.00019083744821160546, "loss": 1.3271, "step": 18940 }, { "epoch": 1.01, "eval_loss": 1.3948262929916382, "eval_runtime": 53.304, "eval_samples_per_second": 56.281, "eval_steps_per_second": 1.763, "step": 18950 }, { "epoch": 1.01, "learning_rate": 0.00019082670790942705, "loss": 1.3305, "step": 18960 }, { "epoch": 1.01, "learning_rate": 0.00019081596760724863, "loss": 1.3139, "step": 18980 }, { "epoch": 1.02, "learning_rate": 0.00019080522730507025, "loss": 1.3215, "step": 19000 }, { "epoch": 1.02, "eval_loss": 1.3937315940856934, "eval_runtime": 53.2895, "eval_samples_per_second": 56.296, "eval_steps_per_second": 1.764, "step": 19000 }, { "epoch": 1.02, "learning_rate": 0.00019079448700289183, "loss": 1.3198, "step": 19020 }, { "epoch": 1.02, "learning_rate": 0.00019078374670071345, "loss": 1.3102, "step": 19040 }, { "epoch": 1.02, "eval_loss": 1.3946858644485474, "eval_runtime": 53.307, "eval_samples_per_second": 56.278, "eval_steps_per_second": 1.763, "step": 19050 }, { "epoch": 1.02, "learning_rate": 0.00019077300639853503, "loss": 1.3445, "step": 19060 }, { "epoch": 1.02, "learning_rate": 0.00019076226609635665, "loss": 1.3052, "step": 19080 }, { "epoch": 1.02, "learning_rate": 0.00019075152579417823, "loss": 1.3661, "step": 19100 }, { "epoch": 1.02, "eval_loss": 1.3941792249679565, "eval_runtime": 53.2371, "eval_samples_per_second": 56.352, "eval_steps_per_second": 1.766, "step": 19100 }, { "epoch": 1.02, "learning_rate": 0.00019074078549199982, "loss": 1.3194, "step": 19120 }, { "epoch": 1.02, "learning_rate": 0.00019073004518982143, "loss": 1.3014, "step": 19140 }, { "epoch": 1.02, "eval_loss": 1.3942538499832153, "eval_runtime": 53.3452, "eval_samples_per_second": 56.237, "eval_steps_per_second": 1.762, "step": 19150 }, { "epoch": 1.02, "learning_rate": 0.00019071930488764302, "loss": 1.3853, "step": 19160 }, { "epoch": 1.02, "learning_rate": 0.00019070856458546463, "loss": 1.3465, "step": 19180 }, { "epoch": 1.03, "learning_rate": 0.00019069782428328622, "loss": 1.3253, "step": 19200 }, { "epoch": 1.03, "eval_loss": 1.395198106765747, "eval_runtime": 53.2693, "eval_samples_per_second": 56.318, "eval_steps_per_second": 1.765, "step": 19200 }, { "epoch": 1.03, "learning_rate": 0.0001906870839811078, "loss": 1.3793, "step": 19220 }, { "epoch": 1.03, "learning_rate": 0.00019067688069403834, "loss": 1.3506, "step": 19240 }, { "epoch": 1.03, "eval_loss": 1.3944871425628662, "eval_runtime": 53.3343, "eval_samples_per_second": 56.249, "eval_steps_per_second": 1.762, "step": 19250 }, { "epoch": 1.03, "learning_rate": 0.00019066614039185993, "loss": 1.3374, "step": 19260 }, { "epoch": 1.03, "learning_rate": 0.00019065540008968151, "loss": 1.309, "step": 19280 }, { "epoch": 1.03, "learning_rate": 0.00019064465978750313, "loss": 1.3585, "step": 19300 }, { "epoch": 1.03, "eval_loss": 1.395226240158081, "eval_runtime": 53.2928, "eval_samples_per_second": 56.293, "eval_steps_per_second": 1.764, "step": 19300 }, { "epoch": 1.03, "learning_rate": 0.00019063391948532471, "loss": 1.3599, "step": 19320 }, { "epoch": 1.03, "learning_rate": 0.00019062317918314633, "loss": 1.3141, "step": 19340 }, { "epoch": 1.03, "eval_loss": 1.3945953845977783, "eval_runtime": 53.2692, "eval_samples_per_second": 56.318, "eval_steps_per_second": 1.765, "step": 19350 }, { "epoch": 1.03, "learning_rate": 0.00019061243888096794, "loss": 1.3673, "step": 19360 }, { "epoch": 1.04, "learning_rate": 0.00019060169857878953, "loss": 1.338, "step": 19380 }, { "epoch": 1.04, "learning_rate": 0.00019059095827661114, "loss": 1.3623, "step": 19400 }, { "epoch": 1.04, "eval_loss": 1.3945705890655518, "eval_runtime": 53.2984, "eval_samples_per_second": 56.287, "eval_steps_per_second": 1.764, "step": 19400 }, { "epoch": 1.04, "learning_rate": 0.00019058021797443273, "loss": 1.3656, "step": 19420 }, { "epoch": 1.04, "learning_rate": 0.00019056947767225431, "loss": 1.3009, "step": 19440 }, { "epoch": 1.04, "eval_loss": 1.3948311805725098, "eval_runtime": 53.2755, "eval_samples_per_second": 56.311, "eval_steps_per_second": 1.764, "step": 19450 }, { "epoch": 1.04, "learning_rate": 0.00019055873737007593, "loss": 1.3065, "step": 19460 }, { "epoch": 1.04, "learning_rate": 0.00019054799706789751, "loss": 1.3115, "step": 19480 }, { "epoch": 1.04, "learning_rate": 0.00019053725676571913, "loss": 1.3531, "step": 19500 }, { "epoch": 1.04, "eval_loss": 1.3938305377960205, "eval_runtime": 53.2725, "eval_samples_per_second": 56.314, "eval_steps_per_second": 1.765, "step": 19500 }, { "epoch": 1.04, "learning_rate": 0.00019052651646354071, "loss": 1.3467, "step": 19520 }, { "epoch": 1.04, "learning_rate": 0.0001905157761613623, "loss": 1.3106, "step": 19540 }, { "epoch": 1.04, "eval_loss": 1.3940069675445557, "eval_runtime": 53.2839, "eval_samples_per_second": 56.302, "eval_steps_per_second": 1.764, "step": 19550 }, { "epoch": 1.05, "learning_rate": 0.00019050503585918391, "loss": 1.345, "step": 19560 }, { "epoch": 1.05, "learning_rate": 0.0001904942955570055, "loss": 1.3822, "step": 19580 }, { "epoch": 1.05, "learning_rate": 0.00019048355525482711, "loss": 1.3011, "step": 19600 }, { "epoch": 1.05, "eval_loss": 1.3947548866271973, "eval_runtime": 53.327, "eval_samples_per_second": 56.257, "eval_steps_per_second": 1.763, "step": 19600 }, { "epoch": 1.05, "learning_rate": 0.0001904728149526487, "loss": 1.3236, "step": 19620 }, { "epoch": 1.05, "learning_rate": 0.0001904620746504703, "loss": 1.3328, "step": 19640 }, { "epoch": 1.05, "eval_loss": 1.3952617645263672, "eval_runtime": 53.2948, "eval_samples_per_second": 56.291, "eval_steps_per_second": 1.764, "step": 19650 }, { "epoch": 1.05, "learning_rate": 0.0001904513343482919, "loss": 1.3472, "step": 19660 }, { "epoch": 1.05, "learning_rate": 0.0001904405940461135, "loss": 1.3257, "step": 19680 }, { "epoch": 1.05, "learning_rate": 0.0001904298537439351, "loss": 1.3337, "step": 19700 }, { "epoch": 1.05, "eval_loss": 1.3953675031661987, "eval_runtime": 53.266, "eval_samples_per_second": 56.321, "eval_steps_per_second": 1.765, "step": 19700 }, { "epoch": 1.05, "learning_rate": 0.0001904191134417567, "loss": 1.3463, "step": 19720 }, { "epoch": 1.05, "learning_rate": 0.0001904083731395783, "loss": 1.3198, "step": 19740 }, { "epoch": 1.06, "eval_loss": 1.3945834636688232, "eval_runtime": 53.313, "eval_samples_per_second": 56.271, "eval_steps_per_second": 1.763, "step": 19750 }, { "epoch": 1.06, "learning_rate": 0.0001903976328373999, "loss": 1.3042, "step": 19760 }, { "epoch": 1.06, "learning_rate": 0.00019038689253522147, "loss": 1.3546, "step": 19780 }, { "epoch": 1.06, "learning_rate": 0.0001903761522330431, "loss": 1.3307, "step": 19800 }, { "epoch": 1.06, "eval_loss": 1.3953876495361328, "eval_runtime": 53.3066, "eval_samples_per_second": 56.278, "eval_steps_per_second": 1.763, "step": 19800 }, { "epoch": 1.06, "learning_rate": 0.00019036541193086467, "loss": 1.3741, "step": 19820 }, { "epoch": 1.06, "learning_rate": 0.0001903546716286863, "loss": 1.3176, "step": 19840 }, { "epoch": 1.06, "eval_loss": 1.3955553770065308, "eval_runtime": 53.2937, "eval_samples_per_second": 56.292, "eval_steps_per_second": 1.764, "step": 19850 }, { "epoch": 1.06, "learning_rate": 0.00019034393132650787, "loss": 1.3586, "step": 19860 }, { "epoch": 1.06, "learning_rate": 0.00019033319102432946, "loss": 1.3176, "step": 19880 }, { "epoch": 1.06, "learning_rate": 0.00019032245072215107, "loss": 1.3215, "step": 19900 }, { "epoch": 1.06, "eval_loss": 1.3952226638793945, "eval_runtime": 53.2682, "eval_samples_per_second": 56.319, "eval_steps_per_second": 1.765, "step": 19900 }, { "epoch": 1.06, "learning_rate": 0.00019031171041997266, "loss": 1.3255, "step": 19920 }, { "epoch": 1.07, "learning_rate": 0.00019030097011779427, "loss": 1.3072, "step": 19940 }, { "epoch": 1.07, "eval_loss": 1.3949054479599, "eval_runtime": 53.2964, "eval_samples_per_second": 56.289, "eval_steps_per_second": 1.764, "step": 19950 }, { "epoch": 1.07, "learning_rate": 0.00019029022981561586, "loss": 1.3479, "step": 19960 }, { "epoch": 1.07, "learning_rate": 0.00019027948951343745, "loss": 1.317, "step": 19980 }, { "epoch": 1.07, "learning_rate": 0.0001902687492112591, "loss": 1.3354, "step": 20000 }, { "epoch": 1.07, "eval_loss": 1.39454185962677, "eval_runtime": 53.2971, "eval_samples_per_second": 56.288, "eval_steps_per_second": 1.764, "step": 20000 }, { "epoch": 1.07, "learning_rate": 0.00019025800890908067, "loss": 1.3244, "step": 20020 }, { "epoch": 1.07, "learning_rate": 0.00019024726860690226, "loss": 1.3621, "step": 20040 }, { "epoch": 1.07, "eval_loss": 1.3944255113601685, "eval_runtime": 53.3682, "eval_samples_per_second": 56.213, "eval_steps_per_second": 1.761, "step": 20050 }, { "epoch": 1.07, "learning_rate": 0.00019023652830472387, "loss": 1.3042, "step": 20060 }, { "epoch": 1.07, "learning_rate": 0.00019022578800254546, "loss": 1.2605, "step": 20080 }, { "epoch": 1.07, "learning_rate": 0.00019021504770036707, "loss": 1.3017, "step": 20100 }, { "epoch": 1.07, "eval_loss": 1.395467758178711, "eval_runtime": 53.2868, "eval_samples_per_second": 56.299, "eval_steps_per_second": 1.764, "step": 20100 }, { "epoch": 1.08, "learning_rate": 0.00019020430739818866, "loss": 1.3345, "step": 20120 }, { "epoch": 1.08, "learning_rate": 0.00019019356709601027, "loss": 1.3217, "step": 20140 }, { "epoch": 1.08, "eval_loss": 1.3947306871414185, "eval_runtime": 53.3338, "eval_samples_per_second": 56.25, "eval_steps_per_second": 1.762, "step": 20150 }, { "epoch": 1.08, "learning_rate": 0.00019018282679383186, "loss": 1.3015, "step": 20160 }, { "epoch": 1.08, "learning_rate": 0.00019017208649165345, "loss": 1.3573, "step": 20180 }, { "epoch": 1.08, "learning_rate": 0.00019016134618947506, "loss": 1.3239, "step": 20200 }, { "epoch": 1.08, "eval_loss": 1.394304633140564, "eval_runtime": 53.3159, "eval_samples_per_second": 56.268, "eval_steps_per_second": 1.763, "step": 20200 }, { "epoch": 1.08, "learning_rate": 0.00019015060588729665, "loss": 1.3383, "step": 20220 }, { "epoch": 1.08, "learning_rate": 0.00019013986558511826, "loss": 1.3567, "step": 20240 }, { "epoch": 1.08, "eval_loss": 1.3949447870254517, "eval_runtime": 53.286, "eval_samples_per_second": 56.3, "eval_steps_per_second": 1.764, "step": 20250 }, { "epoch": 1.08, "learning_rate": 0.00019012912528293985, "loss": 1.3843, "step": 20260 }, { "epoch": 1.08, "learning_rate": 0.00019011838498076143, "loss": 1.3357, "step": 20280 }, { "epoch": 1.08, "learning_rate": 0.00019010764467858305, "loss": 1.3138, "step": 20300 }, { "epoch": 1.08, "eval_loss": 1.3950196504592896, "eval_runtime": 53.2602, "eval_samples_per_second": 56.327, "eval_steps_per_second": 1.765, "step": 20300 }, { "epoch": 1.09, "learning_rate": 0.00019009690437640463, "loss": 1.2994, "step": 20320 }, { "epoch": 1.09, "learning_rate": 0.00019008616407422625, "loss": 1.3119, "step": 20340 }, { "epoch": 1.09, "eval_loss": 1.3942936658859253, "eval_runtime": 53.3464, "eval_samples_per_second": 56.236, "eval_steps_per_second": 1.762, "step": 20350 }, { "epoch": 1.09, "learning_rate": 0.00019007542377204783, "loss": 1.3429, "step": 20360 }, { "epoch": 1.09, "learning_rate": 0.00019006468346986942, "loss": 1.3365, "step": 20380 }, { "epoch": 1.09, "learning_rate": 0.00019005394316769103, "loss": 1.3155, "step": 20400 }, { "epoch": 1.09, "eval_loss": 1.3946025371551514, "eval_runtime": 53.3252, "eval_samples_per_second": 56.259, "eval_steps_per_second": 1.763, "step": 20400 }, { "epoch": 1.09, "learning_rate": 0.00019004320286551262, "loss": 1.315, "step": 20420 }, { "epoch": 1.09, "learning_rate": 0.00019003246256333423, "loss": 1.3131, "step": 20440 }, { "epoch": 1.09, "eval_loss": 1.394667625427246, "eval_runtime": 53.3505, "eval_samples_per_second": 56.232, "eval_steps_per_second": 1.762, "step": 20450 }, { "epoch": 1.09, "learning_rate": 0.00019002172226115582, "loss": 1.3526, "step": 20460 }, { "epoch": 1.09, "learning_rate": 0.0001900109819589774, "loss": 1.3267, "step": 20480 }, { "epoch": 1.1, "learning_rate": 0.00019000024165679902, "loss": 1.3102, "step": 20500 }, { "epoch": 1.1, "eval_loss": 1.394671082496643, "eval_runtime": 53.3613, "eval_samples_per_second": 56.22, "eval_steps_per_second": 1.762, "step": 20500 }, { "epoch": 1.1, "learning_rate": 0.0001899895013546206, "loss": 1.3495, "step": 20520 }, { "epoch": 1.1, "learning_rate": 0.00018997876105244222, "loss": 1.3449, "step": 20540 }, { "epoch": 1.1, "eval_loss": 1.39447820186615, "eval_runtime": 53.3328, "eval_samples_per_second": 56.251, "eval_steps_per_second": 1.763, "step": 20550 }, { "epoch": 1.1, "learning_rate": 0.0001899680207502638, "loss": 1.3332, "step": 20560 }, { "epoch": 1.1, "learning_rate": 0.0001899572804480854, "loss": 1.3432, "step": 20580 }, { "epoch": 1.1, "learning_rate": 0.000189946540145907, "loss": 1.312, "step": 20600 }, { "epoch": 1.1, "eval_loss": 1.394857406616211, "eval_runtime": 53.318, "eval_samples_per_second": 56.266, "eval_steps_per_second": 1.763, "step": 20600 }, { "epoch": 1.1, "learning_rate": 0.00018993633685883754, "loss": 1.3569, "step": 20620 }, { "epoch": 1.1, "learning_rate": 0.00018992559655665913, "loss": 1.2998, "step": 20640 }, { "epoch": 1.1, "eval_loss": 1.3954495191574097, "eval_runtime": 53.3831, "eval_samples_per_second": 56.198, "eval_steps_per_second": 1.761, "step": 20650 }, { "epoch": 1.1, "learning_rate": 0.00018991485625448074, "loss": 1.3425, "step": 20660 }, { "epoch": 1.1, "learning_rate": 0.00018990411595230233, "loss": 1.3333, "step": 20680 }, { "epoch": 1.11, "learning_rate": 0.00018989337565012391, "loss": 1.3849, "step": 20700 }, { "epoch": 1.11, "eval_loss": 1.3940949440002441, "eval_runtime": 53.2672, "eval_samples_per_second": 56.32, "eval_steps_per_second": 1.765, "step": 20700 }, { "epoch": 1.11, "learning_rate": 0.00018988263534794553, "loss": 1.2803, "step": 20720 }, { "epoch": 1.11, "learning_rate": 0.00018987189504576711, "loss": 1.3092, "step": 20740 }, { "epoch": 1.11, "eval_loss": 1.3946436643600464, "eval_runtime": 53.3576, "eval_samples_per_second": 56.224, "eval_steps_per_second": 1.762, "step": 20750 }, { "epoch": 1.11, "learning_rate": 0.00018986115474358873, "loss": 1.3563, "step": 20760 }, { "epoch": 1.11, "learning_rate": 0.00018985041444141031, "loss": 1.2947, "step": 20780 }, { "epoch": 1.11, "learning_rate": 0.00018983967413923193, "loss": 1.3516, "step": 20800 }, { "epoch": 1.11, "eval_loss": 1.3943217992782593, "eval_runtime": 53.3099, "eval_samples_per_second": 56.275, "eval_steps_per_second": 1.763, "step": 20800 }, { "epoch": 1.11, "learning_rate": 0.00018982893383705351, "loss": 1.3147, "step": 20820 }, { "epoch": 1.11, "learning_rate": 0.0001898181935348751, "loss": 1.3344, "step": 20840 }, { "epoch": 1.11, "eval_loss": 1.3943856954574585, "eval_runtime": 49.9339, "eval_samples_per_second": 60.079, "eval_steps_per_second": 1.882, "step": 20850 }, { "epoch": 1.11, "learning_rate": 0.00018980745323269671, "loss": 1.3453, "step": 20860 }, { "epoch": 1.12, "learning_rate": 0.0001897967129305183, "loss": 1.2903, "step": 20880 }, { "epoch": 1.12, "learning_rate": 0.00018978597262833991, "loss": 1.3583, "step": 20900 }, { "epoch": 1.12, "eval_loss": 1.3942972421646118, "eval_runtime": 49.8709, "eval_samples_per_second": 60.155, "eval_steps_per_second": 1.885, "step": 20900 }, { "epoch": 1.12, "learning_rate": 0.0001897752323261615, "loss": 1.312, "step": 20920 }, { "epoch": 1.12, "learning_rate": 0.0001897644920239831, "loss": 1.3049, "step": 20940 }, { "epoch": 1.12, "eval_loss": 1.393723964691162, "eval_runtime": 49.9513, "eval_samples_per_second": 60.058, "eval_steps_per_second": 1.882, "step": 20950 }, { "epoch": 1.12, "learning_rate": 0.0001897537517218047, "loss": 1.3463, "step": 20960 }, { "epoch": 1.12, "learning_rate": 0.0001897430114196263, "loss": 1.3335, "step": 20980 }, { "epoch": 1.12, "learning_rate": 0.0001897322711174479, "loss": 1.332, "step": 21000 }, { "epoch": 1.12, "eval_loss": 1.3948601484298706, "eval_runtime": 49.8828, "eval_samples_per_second": 60.141, "eval_steps_per_second": 1.884, "step": 21000 }, { "epoch": 1.12, "learning_rate": 0.0001897215308152695, "loss": 1.3074, "step": 21020 }, { "epoch": 1.12, "learning_rate": 0.00018971079051309107, "loss": 1.3419, "step": 21040 }, { "epoch": 1.12, "eval_loss": 1.3940149545669556, "eval_runtime": 49.9346, "eval_samples_per_second": 60.079, "eval_steps_per_second": 1.882, "step": 21050 }, { "epoch": 1.13, "learning_rate": 0.0001897000502109127, "loss": 1.3513, "step": 21060 }, { "epoch": 1.13, "learning_rate": 0.00018968930990873427, "loss": 1.3396, "step": 21080 }, { "epoch": 1.13, "learning_rate": 0.0001896785696065559, "loss": 1.3321, "step": 21100 }, { "epoch": 1.13, "eval_loss": 1.3944331407546997, "eval_runtime": 49.9103, "eval_samples_per_second": 60.108, "eval_steps_per_second": 1.883, "step": 21100 }, { "epoch": 1.13, "learning_rate": 0.0001896678293043775, "loss": 1.3044, "step": 21120 }, { "epoch": 1.13, "learning_rate": 0.0001896570890021991, "loss": 1.3447, "step": 21140 }, { "epoch": 1.13, "eval_loss": 1.394074559211731, "eval_runtime": 49.931, "eval_samples_per_second": 60.083, "eval_steps_per_second": 1.883, "step": 21150 }, { "epoch": 1.13, "learning_rate": 0.0001896463487000207, "loss": 1.3281, "step": 21160 }, { "epoch": 1.13, "learning_rate": 0.0001896356083978423, "loss": 1.3708, "step": 21180 }, { "epoch": 1.13, "learning_rate": 0.0001896248680956639, "loss": 1.329, "step": 21200 }, { "epoch": 1.13, "eval_loss": 1.3945494890213013, "eval_runtime": 49.8915, "eval_samples_per_second": 60.13, "eval_steps_per_second": 1.884, "step": 21200 }, { "epoch": 1.13, "learning_rate": 0.0001896141277934855, "loss": 1.3313, "step": 21220 }, { "epoch": 1.13, "learning_rate": 0.00018960338749130707, "loss": 1.3064, "step": 21240 }, { "epoch": 1.14, "eval_loss": 1.3943963050842285, "eval_runtime": 49.9422, "eval_samples_per_second": 60.069, "eval_steps_per_second": 1.882, "step": 21250 }, { "epoch": 1.14, "learning_rate": 0.0001895926471891287, "loss": 1.3677, "step": 21260 }, { "epoch": 1.14, "learning_rate": 0.00018958190688695027, "loss": 1.3212, "step": 21280 }, { "epoch": 1.14, "learning_rate": 0.0001895711665847719, "loss": 1.321, "step": 21300 }, { "epoch": 1.14, "eval_loss": 1.3941173553466797, "eval_runtime": 49.8756, "eval_samples_per_second": 60.15, "eval_steps_per_second": 1.885, "step": 21300 }, { "epoch": 1.14, "learning_rate": 0.00018956042628259347, "loss": 1.3148, "step": 21320 }, { "epoch": 1.14, "learning_rate": 0.00018954968598041506, "loss": 1.3394, "step": 21340 }, { "epoch": 1.14, "eval_loss": 1.3952860832214355, "eval_runtime": 49.9435, "eval_samples_per_second": 60.068, "eval_steps_per_second": 1.882, "step": 21350 }, { "epoch": 1.14, "learning_rate": 0.00018953894567823667, "loss": 1.3219, "step": 21360 }, { "epoch": 1.14, "learning_rate": 0.00018952820537605826, "loss": 1.3146, "step": 21380 }, { "epoch": 1.14, "learning_rate": 0.00018951746507387987, "loss": 1.3589, "step": 21400 }, { "epoch": 1.14, "eval_loss": 1.394696593284607, "eval_runtime": 49.9086, "eval_samples_per_second": 60.11, "eval_steps_per_second": 1.883, "step": 21400 }, { "epoch": 1.14, "learning_rate": 0.00018950672477170146, "loss": 1.3228, "step": 21420 }, { "epoch": 1.15, "learning_rate": 0.00018949598446952305, "loss": 1.2984, "step": 21440 }, { "epoch": 1.15, "eval_loss": 1.395227074623108, "eval_runtime": 49.9256, "eval_samples_per_second": 60.089, "eval_steps_per_second": 1.883, "step": 21450 }, { "epoch": 1.15, "learning_rate": 0.00018948524416734466, "loss": 1.3303, "step": 21460 }, { "epoch": 1.15, "learning_rate": 0.00018947450386516625, "loss": 1.339, "step": 21480 }, { "epoch": 1.15, "learning_rate": 0.00018946376356298786, "loss": 1.3297, "step": 21500 }, { "epoch": 1.15, "eval_loss": 1.3939003944396973, "eval_runtime": 49.8908, "eval_samples_per_second": 60.131, "eval_steps_per_second": 1.884, "step": 21500 }, { "epoch": 1.15, "learning_rate": 0.00018945302326080945, "loss": 1.308, "step": 21520 }, { "epoch": 1.15, "learning_rate": 0.00018944228295863103, "loss": 1.3539, "step": 21540 }, { "epoch": 1.15, "eval_loss": 1.3942874670028687, "eval_runtime": 49.9923, "eval_samples_per_second": 60.009, "eval_steps_per_second": 1.88, "step": 21550 }, { "epoch": 1.15, "learning_rate": 0.00018943154265645265, "loss": 1.3464, "step": 21560 }, { "epoch": 1.15, "learning_rate": 0.00018942080235427423, "loss": 1.3452, "step": 21580 }, { "epoch": 1.15, "learning_rate": 0.00018941006205209585, "loss": 1.3701, "step": 21600 }, { "epoch": 1.15, "eval_loss": 1.3938068151474, "eval_runtime": 49.9117, "eval_samples_per_second": 60.106, "eval_steps_per_second": 1.883, "step": 21600 }, { "epoch": 1.16, "learning_rate": 0.00018939932174991743, "loss": 1.3529, "step": 21620 }, { "epoch": 1.16, "learning_rate": 0.00018938858144773902, "loss": 1.3452, "step": 21640 }, { "epoch": 1.16, "eval_loss": 1.3937488794326782, "eval_runtime": 49.9344, "eval_samples_per_second": 60.079, "eval_steps_per_second": 1.882, "step": 21650 }, { "epoch": 1.16, "learning_rate": 0.00018937784114556063, "loss": 1.3243, "step": 21660 }, { "epoch": 1.16, "learning_rate": 0.00018936710084338222, "loss": 1.3161, "step": 21680 }, { "epoch": 1.16, "learning_rate": 0.00018935636054120383, "loss": 1.3178, "step": 21700 }, { "epoch": 1.16, "eval_loss": 1.3939458131790161, "eval_runtime": 49.8961, "eval_samples_per_second": 60.125, "eval_steps_per_second": 1.884, "step": 21700 }, { "epoch": 1.16, "learning_rate": 0.00018934562023902542, "loss": 1.3084, "step": 21720 }, { "epoch": 1.16, "learning_rate": 0.00018933487993684703, "loss": 1.3273, "step": 21740 }, { "epoch": 1.16, "eval_loss": 1.394255518913269, "eval_runtime": 49.8969, "eval_samples_per_second": 60.124, "eval_steps_per_second": 1.884, "step": 21750 }, { "epoch": 1.16, "learning_rate": 0.00018932413963466865, "loss": 1.299, "step": 21760 }, { "epoch": 1.16, "learning_rate": 0.00018931339933249023, "loss": 1.3206, "step": 21780 }, { "epoch": 1.16, "learning_rate": 0.00018930265903031185, "loss": 1.3403, "step": 21800 }, { "epoch": 1.16, "eval_loss": 1.393869161605835, "eval_runtime": 49.8802, "eval_samples_per_second": 60.144, "eval_steps_per_second": 1.885, "step": 21800 }, { "epoch": 1.17, "learning_rate": 0.00018929191872813343, "loss": 1.3328, "step": 21820 }, { "epoch": 1.17, "learning_rate": 0.00018928117842595502, "loss": 1.2996, "step": 21840 }, { "epoch": 1.17, "eval_loss": 1.3937753438949585, "eval_runtime": 49.9775, "eval_samples_per_second": 60.027, "eval_steps_per_second": 1.881, "step": 21850 }, { "epoch": 1.17, "learning_rate": 0.00018927043812377663, "loss": 1.3158, "step": 21860 }, { "epoch": 1.17, "learning_rate": 0.00018925969782159822, "loss": 1.34, "step": 21880 }, { "epoch": 1.17, "learning_rate": 0.00018924895751941983, "loss": 1.315, "step": 21900 }, { "epoch": 1.17, "eval_loss": 1.393856406211853, "eval_runtime": 49.8879, "eval_samples_per_second": 60.135, "eval_steps_per_second": 1.884, "step": 21900 }, { "epoch": 1.17, "learning_rate": 0.00018923821721724142, "loss": 1.3036, "step": 21920 }, { "epoch": 1.17, "learning_rate": 0.000189227476915063, "loss": 1.324, "step": 21940 }, { "epoch": 1.17, "eval_loss": 1.3935225009918213, "eval_runtime": 49.9272, "eval_samples_per_second": 60.088, "eval_steps_per_second": 1.883, "step": 21950 }, { "epoch": 1.17, "learning_rate": 0.00018921673661288462, "loss": 1.2792, "step": 21960 }, { "epoch": 1.17, "learning_rate": 0.0001892059963107062, "loss": 1.285, "step": 21980 }, { "epoch": 1.18, "learning_rate": 0.00018919525600852782, "loss": 1.3465, "step": 22000 }, { "epoch": 1.18, "eval_loss": 1.3932766914367676, "eval_runtime": 49.8918, "eval_samples_per_second": 60.13, "eval_steps_per_second": 1.884, "step": 22000 }, { "epoch": 1.18, "learning_rate": 0.0001891845157063494, "loss": 1.3557, "step": 22020 }, { "epoch": 1.18, "learning_rate": 0.000189173775404171, "loss": 1.3295, "step": 22040 }, { "epoch": 1.18, "eval_loss": 1.3936799764633179, "eval_runtime": 49.9318, "eval_samples_per_second": 60.082, "eval_steps_per_second": 1.883, "step": 22050 }, { "epoch": 1.18, "learning_rate": 0.0001891630351019926, "loss": 1.273, "step": 22060 }, { "epoch": 1.18, "learning_rate": 0.0001891522947998142, "loss": 1.3366, "step": 22080 }, { "epoch": 1.18, "learning_rate": 0.0001891415544976358, "loss": 1.3403, "step": 22100 }, { "epoch": 1.18, "eval_loss": 1.3937078714370728, "eval_runtime": 49.8853, "eval_samples_per_second": 60.138, "eval_steps_per_second": 1.884, "step": 22100 }, { "epoch": 1.18, "learning_rate": 0.0001891308141954574, "loss": 1.3315, "step": 22120 }, { "epoch": 1.18, "learning_rate": 0.00018912007389327898, "loss": 1.2958, "step": 22140 }, { "epoch": 1.18, "eval_loss": 1.3935654163360596, "eval_runtime": 49.9374, "eval_samples_per_second": 60.075, "eval_steps_per_second": 1.882, "step": 22150 }, { "epoch": 1.18, "learning_rate": 0.0001891093335911006, "loss": 1.3404, "step": 22160 }, { "epoch": 1.19, "learning_rate": 0.00018909859328892218, "loss": 1.3616, "step": 22180 }, { "epoch": 1.19, "learning_rate": 0.0001890878529867438, "loss": 1.346, "step": 22200 }, { "epoch": 1.19, "eval_loss": 1.393470048904419, "eval_runtime": 49.816, "eval_samples_per_second": 60.222, "eval_steps_per_second": 1.887, "step": 22200 }, { "epoch": 1.19, "learning_rate": 0.00018907711268456538, "loss": 1.3741, "step": 22220 }, { "epoch": 1.19, "learning_rate": 0.000189066372382387, "loss": 1.3211, "step": 22240 }, { "epoch": 1.19, "eval_loss": 1.393375277519226, "eval_runtime": 49.9237, "eval_samples_per_second": 60.092, "eval_steps_per_second": 1.883, "step": 22250 }, { "epoch": 1.19, "learning_rate": 0.00018905563208020858, "loss": 1.3711, "step": 22260 }, { "epoch": 1.19, "learning_rate": 0.00018904489177803017, "loss": 1.3552, "step": 22280 }, { "epoch": 1.19, "learning_rate": 0.00018903415147585178, "loss": 1.3305, "step": 22300 }, { "epoch": 1.19, "eval_loss": 1.393662452697754, "eval_runtime": 49.8797, "eval_samples_per_second": 60.145, "eval_steps_per_second": 1.885, "step": 22300 }, { "epoch": 1.19, "learning_rate": 0.00018902341117367337, "loss": 1.3194, "step": 22320 }, { "epoch": 1.19, "learning_rate": 0.00018901267087149498, "loss": 1.3355, "step": 22340 }, { "epoch": 1.19, "eval_loss": 1.3936777114868164, "eval_runtime": 49.9571, "eval_samples_per_second": 60.052, "eval_steps_per_second": 1.882, "step": 22350 }, { "epoch": 1.19, "learning_rate": 0.0001890019305693166, "loss": 1.3547, "step": 22360 }, { "epoch": 1.2, "learning_rate": 0.00018899119026713818, "loss": 1.3148, "step": 22380 }, { "epoch": 1.2, "learning_rate": 0.0001889804499649598, "loss": 1.3456, "step": 22400 }, { "epoch": 1.2, "eval_loss": 1.3941611051559448, "eval_runtime": 49.9059, "eval_samples_per_second": 60.113, "eval_steps_per_second": 1.884, "step": 22400 }, { "epoch": 1.2, "learning_rate": 0.00018896970966278138, "loss": 1.3419, "step": 22420 }, { "epoch": 1.2, "learning_rate": 0.00018895896936060297, "loss": 1.3451, "step": 22440 }, { "epoch": 1.2, "eval_loss": 1.394294261932373, "eval_runtime": 49.9338, "eval_samples_per_second": 60.079, "eval_steps_per_second": 1.882, "step": 22450 }, { "epoch": 1.2, "learning_rate": 0.00018894822905842458, "loss": 1.346, "step": 22460 }, { "epoch": 1.2, "learning_rate": 0.00018893748875624617, "loss": 1.3597, "step": 22480 }, { "epoch": 1.2, "learning_rate": 0.00018892674845406778, "loss": 1.3069, "step": 22500 }, { "epoch": 1.2, "eval_loss": 1.3939677476882935, "eval_runtime": 49.9063, "eval_samples_per_second": 60.113, "eval_steps_per_second": 1.884, "step": 22500 }, { "epoch": 1.2, "learning_rate": 0.00018891600815188937, "loss": 1.3063, "step": 22520 }, { "epoch": 1.2, "learning_rate": 0.00018890526784971095, "loss": 1.3387, "step": 22540 }, { "epoch": 1.2, "eval_loss": 1.3937537670135498, "eval_runtime": 49.9009, "eval_samples_per_second": 60.119, "eval_steps_per_second": 1.884, "step": 22550 }, { "epoch": 1.21, "learning_rate": 0.00018889452754753257, "loss": 1.3412, "step": 22560 }, { "epoch": 1.21, "learning_rate": 0.00018888378724535415, "loss": 1.323, "step": 22580 }, { "epoch": 1.21, "learning_rate": 0.00018887304694317577, "loss": 1.2893, "step": 22600 }, { "epoch": 1.21, "eval_loss": 1.3940309286117554, "eval_runtime": 49.8846, "eval_samples_per_second": 60.139, "eval_steps_per_second": 1.884, "step": 22600 }, { "epoch": 1.21, "learning_rate": 0.00018886230664099735, "loss": 1.3087, "step": 22620 }, { "epoch": 1.21, "learning_rate": 0.00018885156633881897, "loss": 1.3158, "step": 22640 }, { "epoch": 1.21, "eval_loss": 1.393900752067566, "eval_runtime": 49.9827, "eval_samples_per_second": 60.021, "eval_steps_per_second": 1.881, "step": 22650 }, { "epoch": 1.21, "learning_rate": 0.00018884082603664055, "loss": 1.3096, "step": 22660 }, { "epoch": 1.21, "learning_rate": 0.00018883008573446214, "loss": 1.2929, "step": 22680 }, { "epoch": 1.21, "learning_rate": 0.00018881934543228375, "loss": 1.3141, "step": 22700 }, { "epoch": 1.21, "eval_loss": 1.3940834999084473, "eval_runtime": 49.8973, "eval_samples_per_second": 60.123, "eval_steps_per_second": 1.884, "step": 22700 }, { "epoch": 1.21, "learning_rate": 0.00018880860513010534, "loss": 1.3392, "step": 22720 }, { "epoch": 1.22, "learning_rate": 0.00018879786482792695, "loss": 1.2776, "step": 22740 }, { "epoch": 1.22, "eval_loss": 1.3944599628448486, "eval_runtime": 49.9134, "eval_samples_per_second": 60.104, "eval_steps_per_second": 1.883, "step": 22750 }, { "epoch": 1.22, "learning_rate": 0.00018878712452574854, "loss": 1.3268, "step": 22760 }, { "epoch": 1.22, "learning_rate": 0.00018877638422357013, "loss": 1.3086, "step": 22780 }, { "epoch": 1.22, "learning_rate": 0.00018876564392139174, "loss": 1.2932, "step": 22800 }, { "epoch": 1.22, "eval_loss": 1.39402437210083, "eval_runtime": 49.8649, "eval_samples_per_second": 60.163, "eval_steps_per_second": 1.885, "step": 22800 }, { "epoch": 1.22, "learning_rate": 0.00018875490361921333, "loss": 1.3709, "step": 22820 }, { "epoch": 1.22, "learning_rate": 0.00018874416331703494, "loss": 1.381, "step": 22840 }, { "epoch": 1.22, "eval_loss": 1.3939707279205322, "eval_runtime": 49.9399, "eval_samples_per_second": 60.072, "eval_steps_per_second": 1.882, "step": 22850 }, { "epoch": 1.22, "learning_rate": 0.00018873342301485653, "loss": 1.318, "step": 22860 }, { "epoch": 1.22, "learning_rate": 0.0001887226827126781, "loss": 1.3515, "step": 22880 }, { "epoch": 1.22, "learning_rate": 0.00018871194241049973, "loss": 1.332, "step": 22900 }, { "epoch": 1.22, "eval_loss": 1.3941855430603027, "eval_runtime": 49.8977, "eval_samples_per_second": 60.123, "eval_steps_per_second": 1.884, "step": 22900 }, { "epoch": 1.22, "learning_rate": 0.0001887012021083213, "loss": 1.3107, "step": 22920 }, { "epoch": 1.23, "learning_rate": 0.00018869046180614293, "loss": 1.2985, "step": 22940 }, { "epoch": 1.23, "eval_loss": 1.393490195274353, "eval_runtime": 49.9127, "eval_samples_per_second": 60.105, "eval_steps_per_second": 1.883, "step": 22950 }, { "epoch": 1.23, "learning_rate": 0.0001886797215039645, "loss": 1.3311, "step": 22960 }, { "epoch": 1.23, "learning_rate": 0.0001886689812017861, "loss": 1.3189, "step": 22980 }, { "epoch": 1.23, "learning_rate": 0.00018865824089960774, "loss": 1.3045, "step": 23000 }, { "epoch": 1.23, "eval_loss": 1.3936126232147217, "eval_runtime": 49.8816, "eval_samples_per_second": 60.142, "eval_steps_per_second": 1.884, "step": 23000 }, { "epoch": 1.23, "learning_rate": 0.00018864750059742933, "loss": 1.3236, "step": 23020 }, { "epoch": 1.23, "learning_rate": 0.0001886367602952509, "loss": 1.3055, "step": 23040 }, { "epoch": 1.23, "eval_loss": 1.3945151567459106, "eval_runtime": 49.9284, "eval_samples_per_second": 60.086, "eval_steps_per_second": 1.883, "step": 23050 }, { "epoch": 1.23, "learning_rate": 0.00018862601999307253, "loss": 1.3144, "step": 23060 }, { "epoch": 1.23, "learning_rate": 0.0001886152796908941, "loss": 1.3563, "step": 23080 }, { "epoch": 1.23, "learning_rate": 0.00018860453938871573, "loss": 1.3262, "step": 23100 }, { "epoch": 1.23, "eval_loss": 1.3944690227508545, "eval_runtime": 49.917, "eval_samples_per_second": 60.1, "eval_steps_per_second": 1.883, "step": 23100 }, { "epoch": 1.24, "learning_rate": 0.0001885937990865373, "loss": 1.2865, "step": 23120 }, { "epoch": 1.24, "learning_rate": 0.00018858305878435893, "loss": 1.358, "step": 23140 }, { "epoch": 1.24, "eval_loss": 1.3941535949707031, "eval_runtime": 49.9063, "eval_samples_per_second": 60.113, "eval_steps_per_second": 1.884, "step": 23150 }, { "epoch": 1.24, "learning_rate": 0.0001885723184821805, "loss": 1.327, "step": 23160 }, { "epoch": 1.24, "learning_rate": 0.0001885615781800021, "loss": 1.3815, "step": 23180 }, { "epoch": 1.24, "learning_rate": 0.0001885508378778237, "loss": 1.3377, "step": 23200 }, { "epoch": 1.24, "eval_loss": 1.3935812711715698, "eval_runtime": 49.8915, "eval_samples_per_second": 60.13, "eval_steps_per_second": 1.884, "step": 23200 }, { "epoch": 1.24, "learning_rate": 0.0001885400975756453, "loss": 1.3281, "step": 23220 }, { "epoch": 1.24, "learning_rate": 0.0001885293572734669, "loss": 1.3178, "step": 23240 }, { "epoch": 1.24, "eval_loss": 1.3938349485397339, "eval_runtime": 49.9329, "eval_samples_per_second": 60.081, "eval_steps_per_second": 1.883, "step": 23250 }, { "epoch": 1.24, "learning_rate": 0.0001885186169712885, "loss": 1.3363, "step": 23260 }, { "epoch": 1.24, "learning_rate": 0.00018850787666911008, "loss": 1.3189, "step": 23280 }, { "epoch": 1.24, "learning_rate": 0.0001884971363669317, "loss": 1.3143, "step": 23300 }, { "epoch": 1.24, "eval_loss": 1.3938268423080444, "eval_runtime": 49.8546, "eval_samples_per_second": 60.175, "eval_steps_per_second": 1.885, "step": 23300 }, { "epoch": 1.25, "learning_rate": 0.00018848639606475328, "loss": 1.3378, "step": 23320 }, { "epoch": 1.25, "learning_rate": 0.0001884756557625749, "loss": 1.3521, "step": 23340 }, { "epoch": 1.25, "eval_loss": 1.3935599327087402, "eval_runtime": 49.8847, "eval_samples_per_second": 60.139, "eval_steps_per_second": 1.884, "step": 23350 }, { "epoch": 1.25, "learning_rate": 0.00018846491546039648, "loss": 1.331, "step": 23360 }, { "epoch": 1.25, "learning_rate": 0.00018845417515821807, "loss": 1.3347, "step": 23380 }, { "epoch": 1.25, "learning_rate": 0.00018844343485603968, "loss": 1.2857, "step": 23400 }, { "epoch": 1.25, "eval_loss": 1.3932883739471436, "eval_runtime": 49.905, "eval_samples_per_second": 60.114, "eval_steps_per_second": 1.884, "step": 23400 }, { "epoch": 1.25, "learning_rate": 0.00018843269455386127, "loss": 1.3413, "step": 23420 }, { "epoch": 1.25, "learning_rate": 0.00018842195425168288, "loss": 1.3461, "step": 23440 }, { "epoch": 1.25, "eval_loss": 1.3934977054595947, "eval_runtime": 49.912, "eval_samples_per_second": 60.106, "eval_steps_per_second": 1.883, "step": 23450 }, { "epoch": 1.25, "learning_rate": 0.00018841121394950447, "loss": 1.3177, "step": 23460 }, { "epoch": 1.25, "learning_rate": 0.00018840047364732606, "loss": 1.3664, "step": 23480 }, { "epoch": 1.26, "learning_rate": 0.00018838973334514767, "loss": 1.371, "step": 23500 }, { "epoch": 1.26, "eval_loss": 1.3936991691589355, "eval_runtime": 49.8492, "eval_samples_per_second": 60.182, "eval_steps_per_second": 1.886, "step": 23500 }, { "epoch": 1.26, "learning_rate": 0.00018837899304296926, "loss": 1.3165, "step": 23520 }, { "epoch": 1.26, "learning_rate": 0.00018836825274079087, "loss": 1.338, "step": 23540 }, { "epoch": 1.26, "eval_loss": 1.3927234411239624, "eval_runtime": 49.9193, "eval_samples_per_second": 60.097, "eval_steps_per_second": 1.883, "step": 23550 }, { "epoch": 1.26, "learning_rate": 0.00018835751243861246, "loss": 1.2839, "step": 23560 }, { "epoch": 1.26, "learning_rate": 0.00018834677213643407, "loss": 1.3623, "step": 23580 }, { "epoch": 1.26, "learning_rate": 0.00018833603183425566, "loss": 1.3299, "step": 23600 }, { "epoch": 1.26, "eval_loss": 1.392892599105835, "eval_runtime": 49.8594, "eval_samples_per_second": 60.169, "eval_steps_per_second": 1.885, "step": 23600 }, { "epoch": 1.26, "learning_rate": 0.00018832529153207727, "loss": 1.326, "step": 23620 }, { "epoch": 1.26, "learning_rate": 0.00018831455122989888, "loss": 1.3473, "step": 23640 }, { "epoch": 1.26, "eval_loss": 1.392406940460205, "eval_runtime": 49.9562, "eval_samples_per_second": 60.053, "eval_steps_per_second": 1.882, "step": 23650 }, { "epoch": 1.26, "learning_rate": 0.00018830381092772047, "loss": 1.341, "step": 23660 }, { "epoch": 1.27, "learning_rate": 0.00018829307062554206, "loss": 1.2912, "step": 23680 }, { "epoch": 1.27, "learning_rate": 0.00018828233032336367, "loss": 1.3121, "step": 23700 }, { "epoch": 1.27, "eval_loss": 1.3924084901809692, "eval_runtime": 49.8918, "eval_samples_per_second": 60.13, "eval_steps_per_second": 1.884, "step": 23700 }, { "epoch": 1.27, "learning_rate": 0.00018827159002118526, "loss": 1.3299, "step": 23720 }, { "epoch": 1.27, "learning_rate": 0.00018826084971900687, "loss": 1.3266, "step": 23740 }, { "epoch": 1.27, "eval_loss": 1.3923590183258057, "eval_runtime": 49.9073, "eval_samples_per_second": 60.111, "eval_steps_per_second": 1.883, "step": 23750 }, { "epoch": 1.27, "learning_rate": 0.00018825010941682846, "loss": 1.3247, "step": 23760 }, { "epoch": 1.27, "learning_rate": 0.00018823936911465004, "loss": 1.3357, "step": 23780 }, { "epoch": 1.27, "learning_rate": 0.00018822862881247166, "loss": 1.3269, "step": 23800 }, { "epoch": 1.27, "eval_loss": 1.3921170234680176, "eval_runtime": 49.8632, "eval_samples_per_second": 60.165, "eval_steps_per_second": 1.885, "step": 23800 }, { "epoch": 1.27, "learning_rate": 0.00018821788851029324, "loss": 1.2837, "step": 23820 }, { "epoch": 1.27, "learning_rate": 0.00018820714820811486, "loss": 1.3065, "step": 23840 }, { "epoch": 1.27, "eval_loss": 1.3927005529403687, "eval_runtime": 49.9104, "eval_samples_per_second": 60.108, "eval_steps_per_second": 1.883, "step": 23850 }, { "epoch": 1.27, "learning_rate": 0.00018819640790593644, "loss": 1.3199, "step": 23860 }, { "epoch": 1.28, "learning_rate": 0.00018818566760375803, "loss": 1.3366, "step": 23880 }, { "epoch": 1.28, "learning_rate": 0.00018817492730157964, "loss": 1.3241, "step": 23900 }, { "epoch": 1.28, "eval_loss": 1.3931777477264404, "eval_runtime": 49.8758, "eval_samples_per_second": 60.149, "eval_steps_per_second": 1.885, "step": 23900 }, { "epoch": 1.28, "learning_rate": 0.00018816418699940123, "loss": 1.2909, "step": 23920 }, { "epoch": 1.28, "learning_rate": 0.00018815344669722284, "loss": 1.3167, "step": 23940 }, { "epoch": 1.28, "eval_loss": 1.3930566310882568, "eval_runtime": 49.9278, "eval_samples_per_second": 60.087, "eval_steps_per_second": 1.883, "step": 23950 }, { "epoch": 1.28, "learning_rate": 0.00018814270639504443, "loss": 1.3626, "step": 23960 }, { "epoch": 1.28, "learning_rate": 0.00018813196609286602, "loss": 1.3775, "step": 23980 }, { "epoch": 1.28, "learning_rate": 0.00018812122579068763, "loss": 1.289, "step": 24000 }, { "epoch": 1.28, "eval_loss": 1.3926464319229126, "eval_runtime": 49.8874, "eval_samples_per_second": 60.135, "eval_steps_per_second": 1.884, "step": 24000 }, { "epoch": 1.28, "learning_rate": 0.00018811048548850922, "loss": 1.3247, "step": 24020 }, { "epoch": 1.28, "learning_rate": 0.00018809974518633083, "loss": 1.387, "step": 24040 }, { "epoch": 1.29, "eval_loss": 1.3932690620422363, "eval_runtime": 49.9239, "eval_samples_per_second": 60.091, "eval_steps_per_second": 1.883, "step": 24050 }, { "epoch": 1.29, "learning_rate": 0.00018808900488415242, "loss": 1.3109, "step": 24060 }, { "epoch": 1.29, "learning_rate": 0.00018807826458197403, "loss": 1.3242, "step": 24080 }, { "epoch": 1.29, "learning_rate": 0.00018806752427979562, "loss": 1.3412, "step": 24100 }, { "epoch": 1.29, "eval_loss": 1.3931074142456055, "eval_runtime": 49.8986, "eval_samples_per_second": 60.122, "eval_steps_per_second": 1.884, "step": 24100 }, { "epoch": 1.29, "learning_rate": 0.0001880567839776172, "loss": 1.3798, "step": 24120 }, { "epoch": 1.29, "learning_rate": 0.00018804604367543882, "loss": 1.3796, "step": 24140 }, { "epoch": 1.29, "eval_loss": 1.3929234743118286, "eval_runtime": 49.9091, "eval_samples_per_second": 60.109, "eval_steps_per_second": 1.883, "step": 24150 }, { "epoch": 1.29, "learning_rate": 0.0001880353033732604, "loss": 1.3133, "step": 24160 }, { "epoch": 1.29, "learning_rate": 0.00018802456307108202, "loss": 1.3307, "step": 24180 }, { "epoch": 1.29, "learning_rate": 0.0001880138227689036, "loss": 1.3138, "step": 24200 }, { "epoch": 1.29, "eval_loss": 1.3924319744110107, "eval_runtime": 49.897, "eval_samples_per_second": 60.124, "eval_steps_per_second": 1.884, "step": 24200 }, { "epoch": 1.29, "learning_rate": 0.0001880030824667252, "loss": 1.3307, "step": 24220 }, { "epoch": 1.3, "learning_rate": 0.0001879923421645468, "loss": 1.3521, "step": 24240 }, { "epoch": 1.3, "eval_loss": 1.3926700353622437, "eval_runtime": 49.9085, "eval_samples_per_second": 60.11, "eval_steps_per_second": 1.883, "step": 24250 }, { "epoch": 1.3, "learning_rate": 0.00018798160186236842, "loss": 1.3207, "step": 24260 }, { "epoch": 1.3, "learning_rate": 0.00018797086156019, "loss": 1.2797, "step": 24280 }, { "epoch": 1.3, "learning_rate": 0.00018796012125801162, "loss": 1.3234, "step": 24300 }, { "epoch": 1.3, "eval_loss": 1.392791509628296, "eval_runtime": 49.8988, "eval_samples_per_second": 60.122, "eval_steps_per_second": 1.884, "step": 24300 }, { "epoch": 1.3, "learning_rate": 0.0001879493809558332, "loss": 1.3436, "step": 24320 }, { "epoch": 1.3, "learning_rate": 0.00018793864065365482, "loss": 1.3685, "step": 24340 }, { "epoch": 1.3, "eval_loss": 1.3929072618484497, "eval_runtime": 49.9294, "eval_samples_per_second": 60.085, "eval_steps_per_second": 1.883, "step": 24350 }, { "epoch": 1.3, "learning_rate": 0.0001879279003514764, "loss": 1.3149, "step": 24360 }, { "epoch": 1.3, "learning_rate": 0.000187917160049298, "loss": 1.341, "step": 24380 }, { "epoch": 1.3, "learning_rate": 0.0001879064197471196, "loss": 1.3232, "step": 24400 }, { "epoch": 1.3, "eval_loss": 1.3928802013397217, "eval_runtime": 49.9403, "eval_samples_per_second": 60.072, "eval_steps_per_second": 1.882, "step": 24400 }, { "epoch": 1.3, "learning_rate": 0.0001878956794449412, "loss": 1.3352, "step": 24420 }, { "epoch": 1.31, "learning_rate": 0.0001878849391427628, "loss": 1.3423, "step": 24440 }, { "epoch": 1.31, "eval_loss": 1.3924449682235718, "eval_runtime": 49.9373, "eval_samples_per_second": 60.075, "eval_steps_per_second": 1.882, "step": 24450 }, { "epoch": 1.31, "learning_rate": 0.0001878741988405844, "loss": 1.3055, "step": 24460 }, { "epoch": 1.31, "learning_rate": 0.000187863458538406, "loss": 1.3349, "step": 24480 }, { "epoch": 1.31, "learning_rate": 0.0001878527182362276, "loss": 1.3377, "step": 24500 }, { "epoch": 1.31, "eval_loss": 1.3933476209640503, "eval_runtime": 49.8969, "eval_samples_per_second": 60.124, "eval_steps_per_second": 1.884, "step": 24500 }, { "epoch": 1.31, "learning_rate": 0.00018784197793404918, "loss": 1.336, "step": 24520 }, { "epoch": 1.31, "learning_rate": 0.0001878312376318708, "loss": 1.3474, "step": 24540 }, { "epoch": 1.31, "eval_loss": 1.3931814432144165, "eval_runtime": 49.9181, "eval_samples_per_second": 60.098, "eval_steps_per_second": 1.883, "step": 24550 }, { "epoch": 1.31, "learning_rate": 0.00018782049732969238, "loss": 1.3556, "step": 24560 }, { "epoch": 1.31, "learning_rate": 0.000187809757027514, "loss": 1.3238, "step": 24580 }, { "epoch": 1.31, "learning_rate": 0.00018779901672533558, "loss": 1.3364, "step": 24600 }, { "epoch": 1.31, "eval_loss": 1.3931456804275513, "eval_runtime": 49.9019, "eval_samples_per_second": 60.118, "eval_steps_per_second": 1.884, "step": 24600 }, { "epoch": 1.32, "learning_rate": 0.00018778827642315716, "loss": 1.2976, "step": 24620 }, { "epoch": 1.32, "learning_rate": 0.00018777753612097878, "loss": 1.3303, "step": 24640 }, { "epoch": 1.32, "eval_loss": 1.3942646980285645, "eval_runtime": 49.9029, "eval_samples_per_second": 60.117, "eval_steps_per_second": 1.884, "step": 24650 }, { "epoch": 1.32, "learning_rate": 0.00018776679581880036, "loss": 1.3133, "step": 24660 }, { "epoch": 1.32, "learning_rate": 0.00018775605551662198, "loss": 1.2781, "step": 24680 }, { "epoch": 1.32, "learning_rate": 0.00018774531521444356, "loss": 1.3457, "step": 24700 }, { "epoch": 1.32, "eval_loss": 1.393941044807434, "eval_runtime": 49.8674, "eval_samples_per_second": 60.16, "eval_steps_per_second": 1.885, "step": 24700 }, { "epoch": 1.32, "learning_rate": 0.00018773457491226515, "loss": 1.3208, "step": 24720 }, { "epoch": 1.32, "learning_rate": 0.00018772383461008676, "loss": 1.3013, "step": 24740 }, { "epoch": 1.32, "eval_loss": 1.393032431602478, "eval_runtime": 49.9451, "eval_samples_per_second": 60.066, "eval_steps_per_second": 1.882, "step": 24750 }, { "epoch": 1.32, "learning_rate": 0.00018771309430790835, "loss": 1.3752, "step": 24760 }, { "epoch": 1.32, "learning_rate": 0.00018770235400572996, "loss": 1.3651, "step": 24780 }, { "epoch": 1.33, "learning_rate": 0.00018769161370355155, "loss": 1.2917, "step": 24800 }, { "epoch": 1.33, "eval_loss": 1.3928227424621582, "eval_runtime": 49.9307, "eval_samples_per_second": 60.083, "eval_steps_per_second": 1.883, "step": 24800 }, { "epoch": 1.33, "learning_rate": 0.00018768087340137314, "loss": 1.3655, "step": 24820 }, { "epoch": 1.33, "learning_rate": 0.00018767013309919475, "loss": 1.3621, "step": 24840 }, { "epoch": 1.33, "eval_loss": 1.3928583860397339, "eval_runtime": 49.9119, "eval_samples_per_second": 60.106, "eval_steps_per_second": 1.883, "step": 24850 }, { "epoch": 1.33, "learning_rate": 0.00018765939279701634, "loss": 1.3057, "step": 24860 }, { "epoch": 1.33, "learning_rate": 0.00018764865249483795, "loss": 1.3112, "step": 24880 }, { "epoch": 1.33, "learning_rate": 0.00018763791219265956, "loss": 1.3373, "step": 24900 }, { "epoch": 1.33, "eval_loss": 1.39380943775177, "eval_runtime": 49.9554, "eval_samples_per_second": 60.054, "eval_steps_per_second": 1.882, "step": 24900 }, { "epoch": 1.33, "learning_rate": 0.00018762717189048115, "loss": 1.3207, "step": 24920 }, { "epoch": 1.33, "learning_rate": 0.00018761643158830276, "loss": 1.3272, "step": 24940 }, { "epoch": 1.33, "eval_loss": 1.3933907747268677, "eval_runtime": 49.9328, "eval_samples_per_second": 60.081, "eval_steps_per_second": 1.883, "step": 24950 }, { "epoch": 1.33, "learning_rate": 0.00018760622830123327, "loss": 1.3419, "step": 24960 }, { "epoch": 1.33, "learning_rate": 0.00018759548799905486, "loss": 1.3051, "step": 24980 }, { "epoch": 1.34, "learning_rate": 0.00018758474769687647, "loss": 1.2969, "step": 25000 }, { "epoch": 1.34, "eval_loss": 1.3930931091308594, "eval_runtime": 49.9142, "eval_samples_per_second": 60.103, "eval_steps_per_second": 1.883, "step": 25000 }, { "epoch": 1.34, "learning_rate": 0.00018757400739469806, "loss": 1.332, "step": 25020 }, { "epoch": 1.34, "learning_rate": 0.00018756326709251964, "loss": 1.3231, "step": 25040 }, { "epoch": 1.34, "eval_loss": 1.3935271501541138, "eval_runtime": 49.9445, "eval_samples_per_second": 60.067, "eval_steps_per_second": 1.882, "step": 25050 }, { "epoch": 1.34, "learning_rate": 0.00018755252679034126, "loss": 1.3348, "step": 25060 }, { "epoch": 1.34, "learning_rate": 0.00018754178648816284, "loss": 1.3147, "step": 25080 }, { "epoch": 1.34, "learning_rate": 0.00018753104618598446, "loss": 1.3067, "step": 25100 }, { "epoch": 1.34, "eval_loss": 1.3923522233963013, "eval_runtime": 49.8579, "eval_samples_per_second": 60.171, "eval_steps_per_second": 1.885, "step": 25100 }, { "epoch": 1.34, "learning_rate": 0.00018752030588380604, "loss": 1.3578, "step": 25120 }, { "epoch": 1.34, "learning_rate": 0.00018750956558162766, "loss": 1.3334, "step": 25140 }, { "epoch": 1.34, "eval_loss": 1.3927327394485474, "eval_runtime": 49.9361, "eval_samples_per_second": 60.077, "eval_steps_per_second": 1.882, "step": 25150 }, { "epoch": 1.34, "learning_rate": 0.00018749882527944924, "loss": 1.3283, "step": 25160 }, { "epoch": 1.35, "learning_rate": 0.00018748808497727083, "loss": 1.3298, "step": 25180 }, { "epoch": 1.35, "learning_rate": 0.00018747734467509244, "loss": 1.3133, "step": 25200 }, { "epoch": 1.35, "eval_loss": 1.3926618099212646, "eval_runtime": 49.8902, "eval_samples_per_second": 60.132, "eval_steps_per_second": 1.884, "step": 25200 }, { "epoch": 1.35, "learning_rate": 0.00018746660437291403, "loss": 1.3015, "step": 25220 }, { "epoch": 1.35, "learning_rate": 0.00018745586407073564, "loss": 1.3212, "step": 25240 }, { "epoch": 1.35, "eval_loss": 1.3931440114974976, "eval_runtime": 49.9256, "eval_samples_per_second": 60.089, "eval_steps_per_second": 1.883, "step": 25250 }, { "epoch": 1.35, "learning_rate": 0.00018744512376855723, "loss": 1.3574, "step": 25260 }, { "epoch": 1.35, "learning_rate": 0.00018743438346637882, "loss": 1.3192, "step": 25280 }, { "epoch": 1.35, "learning_rate": 0.00018742364316420043, "loss": 1.3267, "step": 25300 }, { "epoch": 1.35, "eval_loss": 1.3930535316467285, "eval_runtime": 49.9314, "eval_samples_per_second": 60.082, "eval_steps_per_second": 1.883, "step": 25300 }, { "epoch": 1.35, "learning_rate": 0.00018741290286202202, "loss": 1.3137, "step": 25320 }, { "epoch": 1.35, "learning_rate": 0.00018740216255984363, "loss": 1.2821, "step": 25340 }, { "epoch": 1.35, "eval_loss": 1.3924797773361206, "eval_runtime": 49.9197, "eval_samples_per_second": 60.097, "eval_steps_per_second": 1.883, "step": 25350 }, { "epoch": 1.36, "learning_rate": 0.00018739142225766524, "loss": 1.3467, "step": 25360 }, { "epoch": 1.36, "learning_rate": 0.00018738068195548683, "loss": 1.3472, "step": 25380 }, { "epoch": 1.36, "learning_rate": 0.00018736994165330844, "loss": 1.3496, "step": 25400 }, { "epoch": 1.36, "eval_loss": 1.3928779363632202, "eval_runtime": 49.9035, "eval_samples_per_second": 60.116, "eval_steps_per_second": 1.884, "step": 25400 }, { "epoch": 1.36, "learning_rate": 0.00018735920135113003, "loss": 1.3438, "step": 25420 }, { "epoch": 1.36, "learning_rate": 0.00018734846104895162, "loss": 1.2587, "step": 25440 }, { "epoch": 1.36, "eval_loss": 1.3934276103973389, "eval_runtime": 49.9277, "eval_samples_per_second": 60.087, "eval_steps_per_second": 1.883, "step": 25450 }, { "epoch": 1.36, "learning_rate": 0.00018733772074677323, "loss": 1.3466, "step": 25460 }, { "epoch": 1.36, "learning_rate": 0.00018732698044459482, "loss": 1.3482, "step": 25480 }, { "epoch": 1.36, "learning_rate": 0.00018731624014241643, "loss": 1.3167, "step": 25500 }, { "epoch": 1.36, "eval_loss": 1.3931827545166016, "eval_runtime": 49.8943, "eval_samples_per_second": 60.127, "eval_steps_per_second": 1.884, "step": 25500 }, { "epoch": 1.36, "learning_rate": 0.00018730549984023802, "loss": 1.3419, "step": 25520 }, { "epoch": 1.36, "learning_rate": 0.00018729529655316853, "loss": 1.3171, "step": 25540 }, { "epoch": 1.37, "eval_loss": 1.393099308013916, "eval_runtime": 49.9382, "eval_samples_per_second": 60.074, "eval_steps_per_second": 1.882, "step": 25550 }, { "epoch": 1.37, "learning_rate": 0.00018728455625099014, "loss": 1.3, "step": 25560 }, { "epoch": 1.37, "learning_rate": 0.00018727381594881173, "loss": 1.3199, "step": 25580 }, { "epoch": 1.37, "learning_rate": 0.0001872630756466333, "loss": 1.3207, "step": 25600 }, { "epoch": 1.37, "eval_loss": 1.3924485445022583, "eval_runtime": 49.915, "eval_samples_per_second": 60.102, "eval_steps_per_second": 1.883, "step": 25600 }, { "epoch": 1.37, "learning_rate": 0.00018725233534445493, "loss": 1.2997, "step": 25620 }, { "epoch": 1.37, "learning_rate": 0.0001872415950422765, "loss": 1.3398, "step": 25640 }, { "epoch": 1.37, "eval_loss": 1.3940945863723755, "eval_runtime": 49.9277, "eval_samples_per_second": 60.087, "eval_steps_per_second": 1.883, "step": 25650 }, { "epoch": 1.37, "learning_rate": 0.00018723085474009813, "loss": 1.3315, "step": 25660 }, { "epoch": 1.37, "learning_rate": 0.0001872201144379197, "loss": 1.3389, "step": 25680 }, { "epoch": 1.37, "learning_rate": 0.0001872093741357413, "loss": 1.3481, "step": 25700 }, { "epoch": 1.37, "eval_loss": 1.3938196897506714, "eval_runtime": 49.8983, "eval_samples_per_second": 60.122, "eval_steps_per_second": 1.884, "step": 25700 }, { "epoch": 1.37, "learning_rate": 0.0001871986338335629, "loss": 1.3122, "step": 25720 }, { "epoch": 1.38, "learning_rate": 0.0001871878935313845, "loss": 1.3473, "step": 25740 }, { "epoch": 1.38, "eval_loss": 1.3930983543395996, "eval_runtime": 49.9502, "eval_samples_per_second": 60.06, "eval_steps_per_second": 1.882, "step": 25750 }, { "epoch": 1.38, "learning_rate": 0.0001871771532292061, "loss": 1.3158, "step": 25760 }, { "epoch": 1.38, "learning_rate": 0.0001871664129270277, "loss": 1.2729, "step": 25780 }, { "epoch": 1.38, "learning_rate": 0.0001871556726248493, "loss": 1.3306, "step": 25800 }, { "epoch": 1.38, "eval_loss": 1.3933429718017578, "eval_runtime": 49.8803, "eval_samples_per_second": 60.144, "eval_steps_per_second": 1.885, "step": 25800 }, { "epoch": 1.38, "learning_rate": 0.00018714493232267093, "loss": 1.3314, "step": 25820 }, { "epoch": 1.38, "learning_rate": 0.0001871341920204925, "loss": 1.2822, "step": 25840 }, { "epoch": 1.38, "eval_loss": 1.3929928541183472, "eval_runtime": 49.9522, "eval_samples_per_second": 60.057, "eval_steps_per_second": 1.882, "step": 25850 }, { "epoch": 1.38, "learning_rate": 0.00018712345171831413, "loss": 1.3469, "step": 25860 }, { "epoch": 1.38, "learning_rate": 0.0001871127114161357, "loss": 1.3227, "step": 25880 }, { "epoch": 1.38, "learning_rate": 0.0001871019711139573, "loss": 1.3566, "step": 25900 }, { "epoch": 1.38, "eval_loss": 1.393803596496582, "eval_runtime": 49.9071, "eval_samples_per_second": 60.112, "eval_steps_per_second": 1.883, "step": 25900 }, { "epoch": 1.38, "learning_rate": 0.0001870912308117789, "loss": 1.3172, "step": 25920 }, { "epoch": 1.39, "learning_rate": 0.0001870804905096005, "loss": 1.3102, "step": 25940 }, { "epoch": 1.39, "eval_loss": 1.3934656381607056, "eval_runtime": 49.9694, "eval_samples_per_second": 60.037, "eval_steps_per_second": 1.881, "step": 25950 }, { "epoch": 1.39, "learning_rate": 0.0001870697502074221, "loss": 1.3202, "step": 25960 }, { "epoch": 1.39, "learning_rate": 0.0001870590099052437, "loss": 1.3437, "step": 25980 }, { "epoch": 1.39, "learning_rate": 0.00018704826960306528, "loss": 1.2984, "step": 26000 }, { "epoch": 1.39, "eval_loss": 1.3933109045028687, "eval_runtime": 49.9288, "eval_samples_per_second": 60.086, "eval_steps_per_second": 1.883, "step": 26000 }, { "epoch": 1.39, "learning_rate": 0.0001870375293008869, "loss": 1.3126, "step": 26020 }, { "epoch": 1.39, "learning_rate": 0.00018702678899870848, "loss": 1.3076, "step": 26040 }, { "epoch": 1.39, "eval_loss": 1.393373727798462, "eval_runtime": 49.9405, "eval_samples_per_second": 60.071, "eval_steps_per_second": 1.882, "step": 26050 }, { "epoch": 1.39, "learning_rate": 0.0001870160486965301, "loss": 1.3448, "step": 26060 }, { "epoch": 1.39, "learning_rate": 0.00018700530839435168, "loss": 1.31, "step": 26080 }, { "epoch": 1.39, "learning_rate": 0.00018699456809217327, "loss": 1.2794, "step": 26100 }, { "epoch": 1.39, "eval_loss": 1.3935492038726807, "eval_runtime": 49.9018, "eval_samples_per_second": 60.118, "eval_steps_per_second": 1.884, "step": 26100 }, { "epoch": 1.4, "learning_rate": 0.00018698382778999488, "loss": 1.3262, "step": 26120 }, { "epoch": 1.4, "learning_rate": 0.00018697308748781647, "loss": 1.3433, "step": 26140 }, { "epoch": 1.4, "eval_loss": 1.393388032913208, "eval_runtime": 49.9565, "eval_samples_per_second": 60.052, "eval_steps_per_second": 1.882, "step": 26150 }, { "epoch": 1.4, "learning_rate": 0.00018696234718563808, "loss": 1.3313, "step": 26160 }, { "epoch": 1.4, "learning_rate": 0.00018695160688345967, "loss": 1.3358, "step": 26180 }, { "epoch": 1.4, "learning_rate": 0.00018694086658128128, "loss": 1.3343, "step": 26200 }, { "epoch": 1.4, "eval_loss": 1.3940303325653076, "eval_runtime": 49.921, "eval_samples_per_second": 60.095, "eval_steps_per_second": 1.883, "step": 26200 }, { "epoch": 1.4, "learning_rate": 0.00018693012627910287, "loss": 1.3278, "step": 26220 }, { "epoch": 1.4, "learning_rate": 0.00018691938597692446, "loss": 1.3649, "step": 26240 }, { "epoch": 1.4, "eval_loss": 1.3933554887771606, "eval_runtime": 49.9527, "eval_samples_per_second": 60.057, "eval_steps_per_second": 1.882, "step": 26250 }, { "epoch": 1.4, "learning_rate": 0.00018690864567474607, "loss": 1.3367, "step": 26260 }, { "epoch": 1.4, "learning_rate": 0.00018689790537256766, "loss": 1.3136, "step": 26280 }, { "epoch": 1.41, "learning_rate": 0.00018688716507038927, "loss": 1.322, "step": 26300 }, { "epoch": 1.41, "eval_loss": 1.393583059310913, "eval_runtime": 49.897, "eval_samples_per_second": 60.124, "eval_steps_per_second": 1.884, "step": 26300 }, { "epoch": 1.41, "learning_rate": 0.00018687642476821086, "loss": 1.3264, "step": 26320 }, { "epoch": 1.41, "learning_rate": 0.00018686568446603244, "loss": 1.3185, "step": 26340 }, { "epoch": 1.41, "eval_loss": 1.3927003145217896, "eval_runtime": 49.8674, "eval_samples_per_second": 60.16, "eval_steps_per_second": 1.885, "step": 26350 }, { "epoch": 1.41, "learning_rate": 0.00018685494416385406, "loss": 1.3418, "step": 26360 }, { "epoch": 1.41, "learning_rate": 0.00018684420386167564, "loss": 1.3065, "step": 26380 }, { "epoch": 1.41, "learning_rate": 0.00018683346355949726, "loss": 1.3206, "step": 26400 }, { "epoch": 1.41, "eval_loss": 1.3931444883346558, "eval_runtime": 49.9132, "eval_samples_per_second": 60.104, "eval_steps_per_second": 1.883, "step": 26400 }, { "epoch": 1.41, "learning_rate": 0.00018682272325731884, "loss": 1.306, "step": 26420 }, { "epoch": 1.41, "learning_rate": 0.00018681198295514043, "loss": 1.3244, "step": 26440 }, { "epoch": 1.41, "eval_loss": 1.3925822973251343, "eval_runtime": 49.9477, "eval_samples_per_second": 60.063, "eval_steps_per_second": 1.882, "step": 26450 }, { "epoch": 1.41, "learning_rate": 0.00018680124265296207, "loss": 1.3014, "step": 26460 }, { "epoch": 1.41, "learning_rate": 0.00018679050235078366, "loss": 1.3444, "step": 26480 }, { "epoch": 1.42, "learning_rate": 0.00018677976204860524, "loss": 1.2831, "step": 26500 }, { "epoch": 1.42, "eval_loss": 1.3929316997528076, "eval_runtime": 49.8972, "eval_samples_per_second": 60.124, "eval_steps_per_second": 1.884, "step": 26500 }, { "epoch": 1.42, "learning_rate": 0.00018676902174642686, "loss": 1.3343, "step": 26520 }, { "epoch": 1.42, "learning_rate": 0.00018675828144424844, "loss": 1.3284, "step": 26540 }, { "epoch": 1.42, "eval_loss": 1.392746090888977, "eval_runtime": 49.9365, "eval_samples_per_second": 60.076, "eval_steps_per_second": 1.882, "step": 26550 }, { "epoch": 1.42, "learning_rate": 0.00018674754114207006, "loss": 1.2949, "step": 26560 }, { "epoch": 1.42, "learning_rate": 0.00018673680083989164, "loss": 1.2962, "step": 26580 }, { "epoch": 1.42, "learning_rate": 0.00018672606053771323, "loss": 1.3729, "step": 26600 }, { "epoch": 1.42, "eval_loss": 1.393019676208496, "eval_runtime": 49.8964, "eval_samples_per_second": 60.125, "eval_steps_per_second": 1.884, "step": 26600 }, { "epoch": 1.42, "learning_rate": 0.00018671532023553484, "loss": 1.334, "step": 26620 }, { "epoch": 1.42, "learning_rate": 0.00018670457993335643, "loss": 1.2929, "step": 26640 }, { "epoch": 1.42, "eval_loss": 1.3938348293304443, "eval_runtime": 49.9577, "eval_samples_per_second": 60.051, "eval_steps_per_second": 1.882, "step": 26650 }, { "epoch": 1.42, "learning_rate": 0.00018669383963117804, "loss": 1.3346, "step": 26660 }, { "epoch": 1.43, "learning_rate": 0.00018668309932899963, "loss": 1.3547, "step": 26680 }, { "epoch": 1.43, "learning_rate": 0.00018667235902682124, "loss": 1.3088, "step": 26700 }, { "epoch": 1.43, "eval_loss": 1.3936799764633179, "eval_runtime": 49.8956, "eval_samples_per_second": 60.126, "eval_steps_per_second": 1.884, "step": 26700 }, { "epoch": 1.43, "learning_rate": 0.00018666161872464283, "loss": 1.3428, "step": 26720 }, { "epoch": 1.43, "learning_rate": 0.00018665087842246442, "loss": 1.3654, "step": 26740 }, { "epoch": 1.43, "eval_loss": 1.3933039903640747, "eval_runtime": 49.9078, "eval_samples_per_second": 60.111, "eval_steps_per_second": 1.883, "step": 26750 }, { "epoch": 1.43, "learning_rate": 0.00018664013812028603, "loss": 1.3661, "step": 26760 }, { "epoch": 1.43, "learning_rate": 0.00018662939781810762, "loss": 1.3255, "step": 26780 }, { "epoch": 1.43, "learning_rate": 0.00018661865751592923, "loss": 1.3279, "step": 26800 }, { "epoch": 1.43, "eval_loss": 1.3933026790618896, "eval_runtime": 49.8749, "eval_samples_per_second": 60.15, "eval_steps_per_second": 1.885, "step": 26800 }, { "epoch": 1.43, "learning_rate": 0.00018660791721375082, "loss": 1.3453, "step": 26820 }, { "epoch": 1.43, "learning_rate": 0.0001865971769115724, "loss": 1.3479, "step": 26840 }, { "epoch": 1.43, "eval_loss": 1.3933632373809814, "eval_runtime": 49.9185, "eval_samples_per_second": 60.098, "eval_steps_per_second": 1.883, "step": 26850 }, { "epoch": 1.44, "learning_rate": 0.00018658643660939402, "loss": 1.348, "step": 26860 }, { "epoch": 1.44, "learning_rate": 0.0001865756963072156, "loss": 1.2823, "step": 26880 }, { "epoch": 1.44, "learning_rate": 0.00018656495600503722, "loss": 1.3635, "step": 26900 }, { "epoch": 1.44, "eval_loss": 1.39280366897583, "eval_runtime": 49.9035, "eval_samples_per_second": 60.116, "eval_steps_per_second": 1.884, "step": 26900 }, { "epoch": 1.44, "learning_rate": 0.0001865542157028588, "loss": 1.3419, "step": 26920 }, { "epoch": 1.44, "learning_rate": 0.0001865434754006804, "loss": 1.3187, "step": 26940 }, { "epoch": 1.44, "eval_loss": 1.3931427001953125, "eval_runtime": 49.9404, "eval_samples_per_second": 60.072, "eval_steps_per_second": 1.882, "step": 26950 }, { "epoch": 1.44, "learning_rate": 0.000186532735098502, "loss": 1.3267, "step": 26960 }, { "epoch": 1.44, "learning_rate": 0.0001865219947963236, "loss": 1.2934, "step": 26980 }, { "epoch": 1.44, "learning_rate": 0.0001865112544941452, "loss": 1.3212, "step": 27000 }, { "epoch": 1.44, "eval_loss": 1.3930130004882812, "eval_runtime": 49.8854, "eval_samples_per_second": 60.138, "eval_steps_per_second": 1.884, "step": 27000 }, { "epoch": 1.44, "learning_rate": 0.0001865005141919668, "loss": 1.3249, "step": 27020 }, { "epoch": 1.44, "learning_rate": 0.00018648977388978838, "loss": 1.3197, "step": 27040 }, { "epoch": 1.45, "eval_loss": 1.392832636833191, "eval_runtime": 49.9646, "eval_samples_per_second": 60.043, "eval_steps_per_second": 1.881, "step": 27050 }, { "epoch": 1.45, "learning_rate": 0.00018647903358761, "loss": 1.3284, "step": 27060 }, { "epoch": 1.45, "learning_rate": 0.0001864682932854316, "loss": 1.2691, "step": 27080 }, { "epoch": 1.45, "learning_rate": 0.00018645755298325322, "loss": 1.3149, "step": 27100 }, { "epoch": 1.45, "eval_loss": 1.3940879106521606, "eval_runtime": 49.9065, "eval_samples_per_second": 60.112, "eval_steps_per_second": 1.884, "step": 27100 }, { "epoch": 1.45, "learning_rate": 0.0001864468126810748, "loss": 1.3257, "step": 27120 }, { "epoch": 1.45, "learning_rate": 0.0001864360723788964, "loss": 1.3285, "step": 27140 }, { "epoch": 1.45, "eval_loss": 1.3934521675109863, "eval_runtime": 49.9347, "eval_samples_per_second": 60.078, "eval_steps_per_second": 1.882, "step": 27150 }, { "epoch": 1.45, "learning_rate": 0.000186425332076718, "loss": 1.2978, "step": 27160 }, { "epoch": 1.45, "learning_rate": 0.0001864145917745396, "loss": 1.3298, "step": 27180 }, { "epoch": 1.45, "learning_rate": 0.0001864038514723612, "loss": 1.3334, "step": 27200 }, { "epoch": 1.45, "eval_loss": 1.3929413557052612, "eval_runtime": 49.9409, "eval_samples_per_second": 60.071, "eval_steps_per_second": 1.882, "step": 27200 }, { "epoch": 1.45, "learning_rate": 0.0001863931111701828, "loss": 1.3099, "step": 27220 }, { "epoch": 1.46, "learning_rate": 0.00018638237086800438, "loss": 1.3228, "step": 27240 }, { "epoch": 1.46, "eval_loss": 1.393315076828003, "eval_runtime": 49.8386, "eval_samples_per_second": 60.194, "eval_steps_per_second": 1.886, "step": 27250 }, { "epoch": 1.46, "learning_rate": 0.000186371630565826, "loss": 1.3152, "step": 27260 }, { "epoch": 1.46, "learning_rate": 0.00018636089026364758, "loss": 1.3366, "step": 27280 }, { "epoch": 1.46, "learning_rate": 0.0001863501499614692, "loss": 1.3362, "step": 27300 }, { "epoch": 1.46, "eval_loss": 1.3930094242095947, "eval_runtime": 49.8861, "eval_samples_per_second": 60.137, "eval_steps_per_second": 1.884, "step": 27300 }, { "epoch": 1.46, "learning_rate": 0.00018633940965929078, "loss": 1.2964, "step": 27320 }, { "epoch": 1.46, "learning_rate": 0.00018632866935711236, "loss": 1.3114, "step": 27340 }, { "epoch": 1.46, "eval_loss": 1.3929059505462646, "eval_runtime": 49.9407, "eval_samples_per_second": 60.071, "eval_steps_per_second": 1.882, "step": 27350 }, { "epoch": 1.46, "learning_rate": 0.00018631792905493398, "loss": 1.2916, "step": 27360 }, { "epoch": 1.46, "learning_rate": 0.00018630718875275556, "loss": 1.3073, "step": 27380 }, { "epoch": 1.46, "learning_rate": 0.00018629644845057718, "loss": 1.3526, "step": 27400 }, { "epoch": 1.46, "eval_loss": 1.3936972618103027, "eval_runtime": 49.9004, "eval_samples_per_second": 60.12, "eval_steps_per_second": 1.884, "step": 27400 }, { "epoch": 1.47, "learning_rate": 0.00018628570814839876, "loss": 1.3262, "step": 27420 }, { "epoch": 1.47, "learning_rate": 0.00018627496784622035, "loss": 1.3273, "step": 27440 }, { "epoch": 1.47, "eval_loss": 1.3936001062393188, "eval_runtime": 49.94, "eval_samples_per_second": 60.072, "eval_steps_per_second": 1.882, "step": 27450 }, { "epoch": 1.47, "learning_rate": 0.00018626422754404196, "loss": 1.3579, "step": 27460 }, { "epoch": 1.47, "learning_rate": 0.00018625348724186355, "loss": 1.3286, "step": 27480 }, { "epoch": 1.47, "learning_rate": 0.00018624274693968516, "loss": 1.2916, "step": 27500 }, { "epoch": 1.47, "eval_loss": 1.3938214778900146, "eval_runtime": 49.8847, "eval_samples_per_second": 60.139, "eval_steps_per_second": 1.884, "step": 27500 }, { "epoch": 1.47, "learning_rate": 0.00018623200663750675, "loss": 1.3314, "step": 27520 }, { "epoch": 1.47, "learning_rate": 0.00018622126633532834, "loss": 1.341, "step": 27540 }, { "epoch": 1.47, "eval_loss": 1.3936923742294312, "eval_runtime": 49.9482, "eval_samples_per_second": 60.062, "eval_steps_per_second": 1.882, "step": 27550 }, { "epoch": 1.47, "learning_rate": 0.00018621052603314995, "loss": 1.3155, "step": 27560 }, { "epoch": 1.47, "learning_rate": 0.00018619978573097154, "loss": 1.345, "step": 27580 }, { "epoch": 1.47, "learning_rate": 0.00018618904542879315, "loss": 1.3249, "step": 27600 }, { "epoch": 1.47, "eval_loss": 1.3934848308563232, "eval_runtime": 49.8694, "eval_samples_per_second": 60.157, "eval_steps_per_second": 1.885, "step": 27600 }, { "epoch": 1.48, "learning_rate": 0.00018617830512661474, "loss": 1.3474, "step": 27620 }, { "epoch": 1.48, "learning_rate": 0.00018616756482443635, "loss": 1.3055, "step": 27640 }, { "epoch": 1.48, "eval_loss": 1.3937422037124634, "eval_runtime": 49.9232, "eval_samples_per_second": 60.092, "eval_steps_per_second": 1.883, "step": 27650 }, { "epoch": 1.48, "learning_rate": 0.00018615682452225794, "loss": 1.3026, "step": 27660 }, { "epoch": 1.48, "learning_rate": 0.00018614608422007952, "loss": 1.3693, "step": 27680 }, { "epoch": 1.48, "learning_rate": 0.00018613534391790114, "loss": 1.3237, "step": 27700 }, { "epoch": 1.48, "eval_loss": 1.3935511112213135, "eval_runtime": 49.9057, "eval_samples_per_second": 60.113, "eval_steps_per_second": 1.884, "step": 27700 }, { "epoch": 1.48, "learning_rate": 0.00018612460361572275, "loss": 1.3571, "step": 27720 }, { "epoch": 1.48, "learning_rate": 0.00018611386331354434, "loss": 1.363, "step": 27740 }, { "epoch": 1.48, "eval_loss": 1.3939462900161743, "eval_runtime": 49.9303, "eval_samples_per_second": 60.084, "eval_steps_per_second": 1.883, "step": 27750 }, { "epoch": 1.48, "learning_rate": 0.00018610312301136595, "loss": 1.3343, "step": 27760 }, { "epoch": 1.48, "learning_rate": 0.00018609238270918754, "loss": 1.3462, "step": 27780 }, { "epoch": 1.49, "learning_rate": 0.00018608164240700915, "loss": 1.2996, "step": 27800 }, { "epoch": 1.49, "eval_loss": 1.393791675567627, "eval_runtime": 49.8552, "eval_samples_per_second": 60.174, "eval_steps_per_second": 1.885, "step": 27800 }, { "epoch": 1.49, "learning_rate": 0.00018607090210483074, "loss": 1.3455, "step": 27820 }, { "epoch": 1.49, "learning_rate": 0.00018606016180265232, "loss": 1.3544, "step": 27840 }, { "epoch": 1.49, "eval_loss": 1.3946839570999146, "eval_runtime": 49.9147, "eval_samples_per_second": 60.103, "eval_steps_per_second": 1.883, "step": 27850 }, { "epoch": 1.49, "learning_rate": 0.00018604942150047394, "loss": 1.3101, "step": 27860 }, { "epoch": 1.49, "learning_rate": 0.00018603868119829552, "loss": 1.3247, "step": 27880 }, { "epoch": 1.49, "learning_rate": 0.00018602794089611714, "loss": 1.3394, "step": 27900 }, { "epoch": 1.49, "eval_loss": 1.3933064937591553, "eval_runtime": 49.8942, "eval_samples_per_second": 60.127, "eval_steps_per_second": 1.884, "step": 27900 }, { "epoch": 1.49, "learning_rate": 0.00018601720059393872, "loss": 1.3662, "step": 27920 }, { "epoch": 1.49, "learning_rate": 0.0001860064602917603, "loss": 1.2861, "step": 27940 }, { "epoch": 1.49, "eval_loss": 1.3942046165466309, "eval_runtime": 49.9129, "eval_samples_per_second": 60.105, "eval_steps_per_second": 1.883, "step": 27950 }, { "epoch": 1.49, "learning_rate": 0.00018599571998958192, "loss": 1.3252, "step": 27960 }, { "epoch": 1.5, "learning_rate": 0.0001859849796874035, "loss": 1.2789, "step": 27980 }, { "epoch": 1.5, "learning_rate": 0.00018597423938522512, "loss": 1.31, "step": 28000 }, { "epoch": 1.5, "eval_loss": 1.3933945894241333, "eval_runtime": 49.8928, "eval_samples_per_second": 60.129, "eval_steps_per_second": 1.884, "step": 28000 }, { "epoch": 1.5, "learning_rate": 0.0001859634990830467, "loss": 1.3311, "step": 28020 }, { "epoch": 1.5, "learning_rate": 0.0001859527587808683, "loss": 1.348, "step": 28040 }, { "epoch": 1.5, "eval_loss": 1.3926706314086914, "eval_runtime": 49.9283, "eval_samples_per_second": 60.086, "eval_steps_per_second": 1.883, "step": 28050 }, { "epoch": 1.5, "learning_rate": 0.0001859420184786899, "loss": 1.3212, "step": 28060 }, { "epoch": 1.5, "learning_rate": 0.0001859312781765115, "loss": 1.3279, "step": 28080 }, { "epoch": 1.5, "learning_rate": 0.0001859205378743331, "loss": 1.3175, "step": 28100 }, { "epoch": 1.5, "eval_loss": 1.3925825357437134, "eval_runtime": 49.9118, "eval_samples_per_second": 60.106, "eval_steps_per_second": 1.883, "step": 28100 }, { "epoch": 1.5, "learning_rate": 0.0001859097975721547, "loss": 1.3251, "step": 28120 }, { "epoch": 1.5, "learning_rate": 0.0001858990572699763, "loss": 1.3301, "step": 28140 }, { "epoch": 1.5, "eval_loss": 1.3925762176513672, "eval_runtime": 49.9246, "eval_samples_per_second": 60.091, "eval_steps_per_second": 1.883, "step": 28150 }, { "epoch": 1.5, "learning_rate": 0.0001858883169677979, "loss": 1.313, "step": 28160 }, { "epoch": 1.51, "learning_rate": 0.00018587757666561948, "loss": 1.3034, "step": 28180 }, { "epoch": 1.51, "learning_rate": 0.0001858668363634411, "loss": 1.339, "step": 28200 }, { "epoch": 1.51, "eval_loss": 1.392657995223999, "eval_runtime": 49.8943, "eval_samples_per_second": 60.127, "eval_steps_per_second": 1.884, "step": 28200 }, { "epoch": 1.51, "learning_rate": 0.00018585609606126268, "loss": 1.3148, "step": 28220 }, { "epoch": 1.51, "learning_rate": 0.0001858453557590843, "loss": 1.3727, "step": 28240 }, { "epoch": 1.51, "eval_loss": 1.3924033641815186, "eval_runtime": 49.919, "eval_samples_per_second": 60.097, "eval_steps_per_second": 1.883, "step": 28250 }, { "epoch": 1.51, "learning_rate": 0.00018583461545690588, "loss": 1.3346, "step": 28260 }, { "epoch": 1.51, "learning_rate": 0.00018582387515472747, "loss": 1.3133, "step": 28280 }, { "epoch": 1.51, "learning_rate": 0.00018581313485254908, "loss": 1.3107, "step": 28300 }, { "epoch": 1.51, "eval_loss": 1.3919031620025635, "eval_runtime": 49.9062, "eval_samples_per_second": 60.113, "eval_steps_per_second": 1.884, "step": 28300 }, { "epoch": 1.51, "learning_rate": 0.00018580239455037067, "loss": 1.2881, "step": 28320 }, { "epoch": 1.51, "learning_rate": 0.00018579165424819228, "loss": 1.3474, "step": 28340 }, { "epoch": 1.51, "eval_loss": 1.392065405845642, "eval_runtime": 49.9338, "eval_samples_per_second": 60.08, "eval_steps_per_second": 1.882, "step": 28350 }, { "epoch": 1.52, "learning_rate": 0.0001857809139460139, "loss": 1.3724, "step": 28360 }, { "epoch": 1.52, "learning_rate": 0.00018577017364383548, "loss": 1.3168, "step": 28380 }, { "epoch": 1.52, "learning_rate": 0.0001857594333416571, "loss": 1.3254, "step": 28400 }, { "epoch": 1.52, "eval_loss": 1.392147421836853, "eval_runtime": 49.8898, "eval_samples_per_second": 60.132, "eval_steps_per_second": 1.884, "step": 28400 }, { "epoch": 1.52, "learning_rate": 0.00018574869303947868, "loss": 1.3542, "step": 28420 }, { "epoch": 1.52, "learning_rate": 0.00018573795273730027, "loss": 1.3569, "step": 28440 }, { "epoch": 1.52, "eval_loss": 1.3924622535705566, "eval_runtime": 49.9258, "eval_samples_per_second": 60.089, "eval_steps_per_second": 1.883, "step": 28450 }, { "epoch": 1.52, "learning_rate": 0.00018572721243512188, "loss": 1.3139, "step": 28460 }, { "epoch": 1.52, "learning_rate": 0.00018571647213294347, "loss": 1.3265, "step": 28480 }, { "epoch": 1.52, "learning_rate": 0.00018570573183076508, "loss": 1.3044, "step": 28500 }, { "epoch": 1.52, "eval_loss": 1.3926573991775513, "eval_runtime": 49.9088, "eval_samples_per_second": 60.11, "eval_steps_per_second": 1.883, "step": 28500 }, { "epoch": 1.52, "learning_rate": 0.00018569499152858667, "loss": 1.3366, "step": 28520 }, { "epoch": 1.52, "learning_rate": 0.00018568425122640828, "loss": 1.3042, "step": 28540 }, { "epoch": 1.53, "eval_loss": 1.3924212455749512, "eval_runtime": 49.9477, "eval_samples_per_second": 60.063, "eval_steps_per_second": 1.882, "step": 28550 }, { "epoch": 1.53, "learning_rate": 0.00018567351092422987, "loss": 1.3323, "step": 28560 }, { "epoch": 1.53, "learning_rate": 0.00018566277062205145, "loss": 1.3481, "step": 28580 }, { "epoch": 1.53, "learning_rate": 0.00018565203031987307, "loss": 1.3516, "step": 28600 }, { "epoch": 1.53, "eval_loss": 1.392707347869873, "eval_runtime": 49.881, "eval_samples_per_second": 60.143, "eval_steps_per_second": 1.884, "step": 28600 }, { "epoch": 1.53, "learning_rate": 0.00018564129001769465, "loss": 1.3056, "step": 28620 }, { "epoch": 1.53, "learning_rate": 0.00018563054971551627, "loss": 1.3222, "step": 28640 }, { "epoch": 1.53, "eval_loss": 1.3928587436676025, "eval_runtime": 49.9368, "eval_samples_per_second": 60.076, "eval_steps_per_second": 1.882, "step": 28650 }, { "epoch": 1.53, "learning_rate": 0.00018561980941333785, "loss": 1.3017, "step": 28660 }, { "epoch": 1.53, "learning_rate": 0.00018560906911115944, "loss": 1.3136, "step": 28680 }, { "epoch": 1.53, "learning_rate": 0.00018559832880898105, "loss": 1.3092, "step": 28700 }, { "epoch": 1.53, "eval_loss": 1.39238440990448, "eval_runtime": 49.9003, "eval_samples_per_second": 60.12, "eval_steps_per_second": 1.884, "step": 28700 }, { "epoch": 1.53, "learning_rate": 0.00018558758850680264, "loss": 1.2594, "step": 28720 }, { "epoch": 1.54, "learning_rate": 0.00018557684820462425, "loss": 1.3427, "step": 28740 }, { "epoch": 1.54, "eval_loss": 1.3922100067138672, "eval_runtime": 49.9586, "eval_samples_per_second": 60.05, "eval_steps_per_second": 1.882, "step": 28750 }, { "epoch": 1.54, "learning_rate": 0.00018556610790244584, "loss": 1.3591, "step": 28760 }, { "epoch": 1.54, "learning_rate": 0.00018555536760026743, "loss": 1.3285, "step": 28780 }, { "epoch": 1.54, "learning_rate": 0.00018554462729808904, "loss": 1.2918, "step": 28800 }, { "epoch": 1.54, "eval_loss": 1.3918845653533936, "eval_runtime": 49.8909, "eval_samples_per_second": 60.131, "eval_steps_per_second": 1.884, "step": 28800 }, { "epoch": 1.54, "learning_rate": 0.00018553388699591063, "loss": 1.2951, "step": 28820 }, { "epoch": 1.54, "learning_rate": 0.00018552314669373224, "loss": 1.3428, "step": 28840 }, { "epoch": 1.54, "eval_loss": 1.3924897909164429, "eval_runtime": 49.9365, "eval_samples_per_second": 60.076, "eval_steps_per_second": 1.882, "step": 28850 }, { "epoch": 1.54, "learning_rate": 0.00018551240639155383, "loss": 1.3393, "step": 28860 }, { "epoch": 1.54, "learning_rate": 0.0001855016660893754, "loss": 1.3361, "step": 28880 }, { "epoch": 1.54, "learning_rate": 0.00018549092578719703, "loss": 1.3527, "step": 28900 }, { "epoch": 1.54, "eval_loss": 1.3922792673110962, "eval_runtime": 49.9247, "eval_samples_per_second": 60.091, "eval_steps_per_second": 1.883, "step": 28900 }, { "epoch": 1.55, "learning_rate": 0.0001854801854850186, "loss": 1.3291, "step": 28920 }, { "epoch": 1.55, "learning_rate": 0.00018546944518284023, "loss": 1.3284, "step": 28940 }, { "epoch": 1.55, "eval_loss": 1.392605185508728, "eval_runtime": 49.9456, "eval_samples_per_second": 60.065, "eval_steps_per_second": 1.882, "step": 28950 }, { "epoch": 1.55, "learning_rate": 0.0001854587048806618, "loss": 1.3219, "step": 28960 }, { "epoch": 1.55, "learning_rate": 0.00018544796457848343, "loss": 1.31, "step": 28980 }, { "epoch": 1.55, "learning_rate": 0.00018543722427630504, "loss": 1.3238, "step": 29000 }, { "epoch": 1.55, "eval_loss": 1.3933876752853394, "eval_runtime": 49.9144, "eval_samples_per_second": 60.103, "eval_steps_per_second": 1.883, "step": 29000 }, { "epoch": 1.55, "learning_rate": 0.00018542648397412663, "loss": 1.3252, "step": 29020 }, { "epoch": 1.55, "learning_rate": 0.00018541574367194824, "loss": 1.2978, "step": 29040 }, { "epoch": 1.55, "eval_loss": 1.393264651298523, "eval_runtime": 49.9314, "eval_samples_per_second": 60.082, "eval_steps_per_second": 1.883, "step": 29050 }, { "epoch": 1.55, "learning_rate": 0.00018540500336976983, "loss": 1.3229, "step": 29060 }, { "epoch": 1.55, "learning_rate": 0.0001853942630675914, "loss": 1.3471, "step": 29080 }, { "epoch": 1.55, "learning_rate": 0.00018538352276541303, "loss": 1.2733, "step": 29100 }, { "epoch": 1.55, "eval_loss": 1.3934080600738525, "eval_runtime": 49.9242, "eval_samples_per_second": 60.091, "eval_steps_per_second": 1.883, "step": 29100 }, { "epoch": 1.56, "learning_rate": 0.0001853727824632346, "loss": 1.3456, "step": 29120 }, { "epoch": 1.56, "learning_rate": 0.00018536204216105623, "loss": 1.3596, "step": 29140 }, { "epoch": 1.56, "eval_loss": 1.3926467895507812, "eval_runtime": 49.9137, "eval_samples_per_second": 60.104, "eval_steps_per_second": 1.883, "step": 29150 }, { "epoch": 1.56, "learning_rate": 0.0001853513018588778, "loss": 1.305, "step": 29160 }, { "epoch": 1.56, "learning_rate": 0.0001853405615566994, "loss": 1.3292, "step": 29180 }, { "epoch": 1.56, "learning_rate": 0.000185329821254521, "loss": 1.2943, "step": 29200 }, { "epoch": 1.56, "eval_loss": 1.3923407793045044, "eval_runtime": 49.9073, "eval_samples_per_second": 60.111, "eval_steps_per_second": 1.883, "step": 29200 }, { "epoch": 1.56, "learning_rate": 0.0001853190809523426, "loss": 1.3187, "step": 29220 }, { "epoch": 1.56, "learning_rate": 0.0001853083406501642, "loss": 1.299, "step": 29240 }, { "epoch": 1.56, "eval_loss": 1.3922613859176636, "eval_runtime": 49.9241, "eval_samples_per_second": 60.091, "eval_steps_per_second": 1.883, "step": 29250 }, { "epoch": 1.56, "learning_rate": 0.0001852976003479858, "loss": 1.2977, "step": 29260 }, { "epoch": 1.56, "learning_rate": 0.00018528686004580739, "loss": 1.3288, "step": 29280 }, { "epoch": 1.57, "learning_rate": 0.000185276119743629, "loss": 1.3166, "step": 29300 }, { "epoch": 1.57, "eval_loss": 1.3935879468917847, "eval_runtime": 49.9027, "eval_samples_per_second": 60.117, "eval_steps_per_second": 1.884, "step": 29300 }, { "epoch": 1.57, "learning_rate": 0.00018526537944145059, "loss": 1.3237, "step": 29320 }, { "epoch": 1.57, "learning_rate": 0.0001852546391392722, "loss": 1.3144, "step": 29340 }, { "epoch": 1.57, "eval_loss": 1.3930631875991821, "eval_runtime": 49.9398, "eval_samples_per_second": 60.072, "eval_steps_per_second": 1.882, "step": 29350 }, { "epoch": 1.57, "learning_rate": 0.00018524389883709379, "loss": 1.3102, "step": 29360 }, { "epoch": 1.57, "learning_rate": 0.00018523315853491537, "loss": 1.335, "step": 29380 }, { "epoch": 1.57, "learning_rate": 0.00018522241823273699, "loss": 1.3373, "step": 29400 }, { "epoch": 1.57, "eval_loss": 1.3933336734771729, "eval_runtime": 49.9162, "eval_samples_per_second": 60.101, "eval_steps_per_second": 1.883, "step": 29400 }, { "epoch": 1.57, "learning_rate": 0.00018521167793055857, "loss": 1.332, "step": 29420 }, { "epoch": 1.57, "learning_rate": 0.00018520093762838019, "loss": 1.3363, "step": 29440 }, { "epoch": 1.57, "eval_loss": 1.3931363821029663, "eval_runtime": 49.9165, "eval_samples_per_second": 60.1, "eval_steps_per_second": 1.883, "step": 29450 }, { "epoch": 1.57, "learning_rate": 0.00018519019732620177, "loss": 1.3197, "step": 29460 }, { "epoch": 1.58, "learning_rate": 0.00018517945702402339, "loss": 1.2887, "step": 29480 }, { "epoch": 1.58, "learning_rate": 0.00018516871672184497, "loss": 1.3117, "step": 29500 }, { "epoch": 1.58, "eval_loss": 1.3927063941955566, "eval_runtime": 49.9535, "eval_samples_per_second": 60.056, "eval_steps_per_second": 1.882, "step": 29500 }, { "epoch": 1.58, "learning_rate": 0.00018515797641966656, "loss": 1.3437, "step": 29520 }, { "epoch": 1.58, "learning_rate": 0.00018514723611748817, "loss": 1.3171, "step": 29540 }, { "epoch": 1.58, "eval_loss": 1.392856478691101, "eval_runtime": 49.9414, "eval_samples_per_second": 60.07, "eval_steps_per_second": 1.882, "step": 29550 }, { "epoch": 1.58, "learning_rate": 0.00018513649581530976, "loss": 1.2841, "step": 29560 }, { "epoch": 1.58, "learning_rate": 0.00018512575551313137, "loss": 1.3138, "step": 29580 }, { "epoch": 1.58, "learning_rate": 0.00018511501521095299, "loss": 1.3238, "step": 29600 }, { "epoch": 1.58, "eval_loss": 1.3924871683120728, "eval_runtime": 49.8958, "eval_samples_per_second": 60.125, "eval_steps_per_second": 1.884, "step": 29600 }, { "epoch": 1.58, "learning_rate": 0.00018510427490877457, "loss": 1.3225, "step": 29620 }, { "epoch": 1.58, "learning_rate": 0.00018509353460659619, "loss": 1.3532, "step": 29640 }, { "epoch": 1.58, "eval_loss": 1.3923962116241455, "eval_runtime": 49.9475, "eval_samples_per_second": 60.063, "eval_steps_per_second": 1.882, "step": 29650 }, { "epoch": 1.58, "learning_rate": 0.00018508279430441777, "loss": 1.311, "step": 29660 }, { "epoch": 1.59, "learning_rate": 0.00018507205400223936, "loss": 1.3255, "step": 29680 }, { "epoch": 1.59, "learning_rate": 0.00018506131370006097, "loss": 1.2946, "step": 29700 }, { "epoch": 1.59, "eval_loss": 1.3928083181381226, "eval_runtime": 49.938, "eval_samples_per_second": 60.075, "eval_steps_per_second": 1.882, "step": 29700 }, { "epoch": 1.59, "learning_rate": 0.00018505111041299148, "loss": 1.3049, "step": 29720 }, { "epoch": 1.59, "learning_rate": 0.00018504037011081307, "loss": 1.3325, "step": 29740 }, { "epoch": 1.59, "eval_loss": 1.3928927183151245, "eval_runtime": 49.9328, "eval_samples_per_second": 60.081, "eval_steps_per_second": 1.883, "step": 29750 }, { "epoch": 1.59, "learning_rate": 0.00018502962980863468, "loss": 1.2984, "step": 29760 }, { "epoch": 1.59, "learning_rate": 0.00018501888950645627, "loss": 1.3365, "step": 29780 }, { "epoch": 1.59, "learning_rate": 0.00018500814920427788, "loss": 1.3203, "step": 29800 }, { "epoch": 1.59, "eval_loss": 1.3930357694625854, "eval_runtime": 49.8837, "eval_samples_per_second": 60.14, "eval_steps_per_second": 1.884, "step": 29800 }, { "epoch": 1.59, "learning_rate": 0.00018499740890209947, "loss": 1.3326, "step": 29820 }, { "epoch": 1.59, "learning_rate": 0.00018498666859992105, "loss": 1.3452, "step": 29840 }, { "epoch": 1.59, "eval_loss": 1.3932480812072754, "eval_runtime": 49.9401, "eval_samples_per_second": 60.072, "eval_steps_per_second": 1.882, "step": 29850 }, { "epoch": 1.6, "learning_rate": 0.00018497592829774267, "loss": 1.3253, "step": 29860 }, { "epoch": 1.6, "learning_rate": 0.00018496518799556425, "loss": 1.3311, "step": 29880 }, { "epoch": 1.6, "learning_rate": 0.00018495444769338587, "loss": 1.3038, "step": 29900 }, { "epoch": 1.6, "eval_loss": 1.393734097480774, "eval_runtime": 49.8662, "eval_samples_per_second": 60.161, "eval_steps_per_second": 1.885, "step": 29900 }, { "epoch": 1.6, "learning_rate": 0.00018494370739120745, "loss": 1.2999, "step": 29920 }, { "epoch": 1.6, "learning_rate": 0.00018493296708902904, "loss": 1.3208, "step": 29940 }, { "epoch": 1.6, "eval_loss": 1.3928980827331543, "eval_runtime": 49.94, "eval_samples_per_second": 60.072, "eval_steps_per_second": 1.882, "step": 29950 }, { "epoch": 1.6, "learning_rate": 0.00018492222678685065, "loss": 1.3317, "step": 29960 }, { "epoch": 1.6, "learning_rate": 0.00018491148648467224, "loss": 1.3216, "step": 29980 }, { "epoch": 1.6, "learning_rate": 0.00018490074618249385, "loss": 1.3013, "step": 30000 }, { "epoch": 1.6, "eval_loss": 1.3931738138198853, "eval_runtime": 49.8997, "eval_samples_per_second": 60.121, "eval_steps_per_second": 1.884, "step": 30000 }, { "epoch": 1.6, "learning_rate": 0.00018489000588031544, "loss": 1.2852, "step": 30020 }, { "epoch": 1.61, "learning_rate": 0.00018487926557813703, "loss": 1.2774, "step": 30040 }, { "epoch": 1.61, "eval_loss": 1.3928226232528687, "eval_runtime": 49.9408, "eval_samples_per_second": 60.071, "eval_steps_per_second": 1.882, "step": 30050 }, { "epoch": 1.61, "learning_rate": 0.00018486852527595864, "loss": 1.2834, "step": 30060 }, { "epoch": 1.61, "learning_rate": 0.00018485778497378025, "loss": 1.3385, "step": 30080 }, { "epoch": 1.61, "learning_rate": 0.00018484704467160187, "loss": 1.3576, "step": 30100 }, { "epoch": 1.61, "eval_loss": 1.3927522897720337, "eval_runtime": 49.9392, "eval_samples_per_second": 60.073, "eval_steps_per_second": 1.882, "step": 30100 }, { "epoch": 1.61, "learning_rate": 0.00018483630436942345, "loss": 1.3092, "step": 30120 }, { "epoch": 1.61, "learning_rate": 0.00018482556406724504, "loss": 1.3149, "step": 30140 }, { "epoch": 1.61, "eval_loss": 1.3941123485565186, "eval_runtime": 49.9459, "eval_samples_per_second": 60.065, "eval_steps_per_second": 1.882, "step": 30150 }, { "epoch": 1.61, "learning_rate": 0.00018481482376506665, "loss": 1.3414, "step": 30160 }, { "epoch": 1.61, "learning_rate": 0.00018480408346288824, "loss": 1.3644, "step": 30180 }, { "epoch": 1.61, "learning_rate": 0.00018479334316070985, "loss": 1.3482, "step": 30200 }, { "epoch": 1.61, "eval_loss": 1.3934115171432495, "eval_runtime": 49.9088, "eval_samples_per_second": 60.11, "eval_steps_per_second": 1.883, "step": 30200 }, { "epoch": 1.61, "learning_rate": 0.00018478260285853144, "loss": 1.3446, "step": 30220 }, { "epoch": 1.62, "learning_rate": 0.00018477186255635303, "loss": 1.3208, "step": 30240 }, { "epoch": 1.62, "eval_loss": 1.3933861255645752, "eval_runtime": 49.9422, "eval_samples_per_second": 60.069, "eval_steps_per_second": 1.882, "step": 30250 }, { "epoch": 1.62, "learning_rate": 0.00018476112225417464, "loss": 1.2915, "step": 30260 }, { "epoch": 1.62, "learning_rate": 0.00018475038195199623, "loss": 1.2714, "step": 30280 }, { "epoch": 1.62, "learning_rate": 0.00018473964164981784, "loss": 1.2847, "step": 30300 }, { "epoch": 1.62, "eval_loss": 1.3934121131896973, "eval_runtime": 49.906, "eval_samples_per_second": 60.113, "eval_steps_per_second": 1.884, "step": 30300 }, { "epoch": 1.62, "learning_rate": 0.00018472890134763943, "loss": 1.3169, "step": 30320 }, { "epoch": 1.62, "learning_rate": 0.000184718161045461, "loss": 1.3239, "step": 30340 }, { "epoch": 1.62, "eval_loss": 1.3946083784103394, "eval_runtime": 49.9402, "eval_samples_per_second": 60.072, "eval_steps_per_second": 1.882, "step": 30350 }, { "epoch": 1.62, "learning_rate": 0.00018470742074328263, "loss": 1.3185, "step": 30360 }, { "epoch": 1.62, "learning_rate": 0.0001846966804411042, "loss": 1.3229, "step": 30380 }, { "epoch": 1.62, "learning_rate": 0.00018468594013892583, "loss": 1.2981, "step": 30400 }, { "epoch": 1.62, "eval_loss": 1.3945001363754272, "eval_runtime": 49.8812, "eval_samples_per_second": 60.143, "eval_steps_per_second": 1.884, "step": 30400 }, { "epoch": 1.63, "learning_rate": 0.0001846751998367474, "loss": 1.3203, "step": 30420 }, { "epoch": 1.63, "learning_rate": 0.000184664459534569, "loss": 1.2967, "step": 30440 }, { "epoch": 1.63, "eval_loss": 1.393681287765503, "eval_runtime": 49.94, "eval_samples_per_second": 60.072, "eval_steps_per_second": 1.882, "step": 30450 }, { "epoch": 1.63, "learning_rate": 0.0001846537192323906, "loss": 1.2815, "step": 30460 }, { "epoch": 1.63, "learning_rate": 0.0001846429789302122, "loss": 1.3216, "step": 30480 }, { "epoch": 1.63, "learning_rate": 0.0001846322386280338, "loss": 1.3504, "step": 30500 }, { "epoch": 1.63, "eval_loss": 1.3937861919403076, "eval_runtime": 49.8911, "eval_samples_per_second": 60.131, "eval_steps_per_second": 1.884, "step": 30500 }, { "epoch": 1.63, "learning_rate": 0.0001846214983258554, "loss": 1.2858, "step": 30520 }, { "epoch": 1.63, "learning_rate": 0.00018461075802367699, "loss": 1.322, "step": 30540 }, { "epoch": 1.63, "eval_loss": 1.3939573764801025, "eval_runtime": 49.8953, "eval_samples_per_second": 60.126, "eval_steps_per_second": 1.884, "step": 30550 }, { "epoch": 1.63, "learning_rate": 0.0001846000177214986, "loss": 1.2909, "step": 30560 }, { "epoch": 1.63, "learning_rate": 0.00018458927741932019, "loss": 1.2957, "step": 30580 }, { "epoch": 1.64, "learning_rate": 0.0001845785371171418, "loss": 1.3201, "step": 30600 }, { "epoch": 1.64, "eval_loss": 1.3934041261672974, "eval_runtime": 49.8947, "eval_samples_per_second": 60.127, "eval_steps_per_second": 1.884, "step": 30600 }, { "epoch": 1.64, "learning_rate": 0.00018456779681496339, "loss": 1.2852, "step": 30620 }, { "epoch": 1.64, "learning_rate": 0.000184557056512785, "loss": 1.3183, "step": 30640 }, { "epoch": 1.64, "eval_loss": 1.3938255310058594, "eval_runtime": 49.9299, "eval_samples_per_second": 60.084, "eval_steps_per_second": 1.883, "step": 30650 }, { "epoch": 1.64, "learning_rate": 0.00018454631621060659, "loss": 1.3321, "step": 30660 }, { "epoch": 1.64, "learning_rate": 0.00018453557590842817, "loss": 1.321, "step": 30680 }, { "epoch": 1.64, "learning_rate": 0.00018452483560624979, "loss": 1.304, "step": 30700 }, { "epoch": 1.64, "eval_loss": 1.3930375576019287, "eval_runtime": 49.9022, "eval_samples_per_second": 60.118, "eval_steps_per_second": 1.884, "step": 30700 }, { "epoch": 1.64, "learning_rate": 0.0001845140953040714, "loss": 1.2811, "step": 30720 }, { "epoch": 1.64, "learning_rate": 0.00018450335500189299, "loss": 1.3484, "step": 30740 }, { "epoch": 1.64, "eval_loss": 1.3932937383651733, "eval_runtime": 49.8794, "eval_samples_per_second": 60.145, "eval_steps_per_second": 1.885, "step": 30750 }, { "epoch": 1.64, "learning_rate": 0.0001844926146997146, "loss": 1.3324, "step": 30760 }, { "epoch": 1.64, "learning_rate": 0.00018448187439753619, "loss": 1.3314, "step": 30780 }, { "epoch": 1.65, "learning_rate": 0.0001844711340953578, "loss": 1.3306, "step": 30800 }, { "epoch": 1.65, "eval_loss": 1.3924168348312378, "eval_runtime": 49.8668, "eval_samples_per_second": 60.16, "eval_steps_per_second": 1.885, "step": 30800 }, { "epoch": 1.65, "learning_rate": 0.00018446039379317939, "loss": 1.3495, "step": 30820 }, { "epoch": 1.65, "learning_rate": 0.00018444965349100097, "loss": 1.2725, "step": 30840 }, { "epoch": 1.65, "eval_loss": 1.3924477100372314, "eval_runtime": 49.9112, "eval_samples_per_second": 60.107, "eval_steps_per_second": 1.883, "step": 30850 }, { "epoch": 1.65, "learning_rate": 0.00018443891318882259, "loss": 1.3156, "step": 30860 }, { "epoch": 1.65, "learning_rate": 0.00018442817288664417, "loss": 1.2995, "step": 30880 }, { "epoch": 1.65, "learning_rate": 0.00018441743258446579, "loss": 1.3582, "step": 30900 }, { "epoch": 1.65, "eval_loss": 1.3932455778121948, "eval_runtime": 49.9019, "eval_samples_per_second": 60.118, "eval_steps_per_second": 1.884, "step": 30900 }, { "epoch": 1.65, "learning_rate": 0.00018440669228228737, "loss": 1.3256, "step": 30920 }, { "epoch": 1.65, "learning_rate": 0.00018439595198010896, "loss": 1.2901, "step": 30940 }, { "epoch": 1.65, "eval_loss": 1.3927903175354004, "eval_runtime": 49.9444, "eval_samples_per_second": 60.067, "eval_steps_per_second": 1.882, "step": 30950 }, { "epoch": 1.65, "learning_rate": 0.00018438521167793057, "loss": 1.3528, "step": 30960 }, { "epoch": 1.66, "learning_rate": 0.00018437447137575216, "loss": 1.2902, "step": 30980 }, { "epoch": 1.66, "learning_rate": 0.00018436373107357377, "loss": 1.3044, "step": 31000 }, { "epoch": 1.66, "eval_loss": 1.3926588296890259, "eval_runtime": 49.8979, "eval_samples_per_second": 60.123, "eval_steps_per_second": 1.884, "step": 31000 }, { "epoch": 1.66, "learning_rate": 0.00018435299077139536, "loss": 1.33, "step": 31020 }, { "epoch": 1.66, "learning_rate": 0.00018434225046921697, "loss": 1.3354, "step": 31040 }, { "epoch": 1.66, "eval_loss": 1.3930628299713135, "eval_runtime": 49.9406, "eval_samples_per_second": 60.071, "eval_steps_per_second": 1.882, "step": 31050 }, { "epoch": 1.66, "learning_rate": 0.00018433151016703856, "loss": 1.3041, "step": 31060 }, { "epoch": 1.66, "learning_rate": 0.00018432076986486015, "loss": 1.3329, "step": 31080 }, { "epoch": 1.66, "learning_rate": 0.00018431002956268176, "loss": 1.2903, "step": 31100 }, { "epoch": 1.66, "eval_loss": 1.393136739730835, "eval_runtime": 49.9088, "eval_samples_per_second": 60.11, "eval_steps_per_second": 1.883, "step": 31100 }, { "epoch": 1.66, "learning_rate": 0.00018429928926050335, "loss": 1.3196, "step": 31120 }, { "epoch": 1.66, "learning_rate": 0.00018428854895832496, "loss": 1.3596, "step": 31140 }, { "epoch": 1.66, "eval_loss": 1.3931183815002441, "eval_runtime": 49.9578, "eval_samples_per_second": 60.051, "eval_steps_per_second": 1.882, "step": 31150 }, { "epoch": 1.66, "learning_rate": 0.00018427780865614655, "loss": 1.2731, "step": 31160 }, { "epoch": 1.67, "learning_rate": 0.00018426706835396813, "loss": 1.3034, "step": 31180 }, { "epoch": 1.67, "learning_rate": 0.00018425632805178975, "loss": 1.3271, "step": 31200 }, { "epoch": 1.67, "eval_loss": 1.3934111595153809, "eval_runtime": 49.8674, "eval_samples_per_second": 60.16, "eval_steps_per_second": 1.885, "step": 31200 }, { "epoch": 1.67, "learning_rate": 0.00018424558774961133, "loss": 1.2989, "step": 31220 }, { "epoch": 1.67, "learning_rate": 0.00018423484744743295, "loss": 1.3109, "step": 31240 }, { "epoch": 1.67, "eval_loss": 1.3935177326202393, "eval_runtime": 49.9074, "eval_samples_per_second": 60.111, "eval_steps_per_second": 1.883, "step": 31250 }, { "epoch": 1.67, "learning_rate": 0.00018422410714525453, "loss": 1.3119, "step": 31260 }, { "epoch": 1.67, "learning_rate": 0.00018421336684307612, "loss": 1.3452, "step": 31280 }, { "epoch": 1.67, "learning_rate": 0.00018420262654089773, "loss": 1.3161, "step": 31300 }, { "epoch": 1.67, "eval_loss": 1.3936994075775146, "eval_runtime": 49.8829, "eval_samples_per_second": 60.141, "eval_steps_per_second": 1.884, "step": 31300 }, { "epoch": 1.67, "learning_rate": 0.00018419188623871932, "loss": 1.3162, "step": 31320 }, { "epoch": 1.67, "learning_rate": 0.00018418114593654093, "loss": 1.3291, "step": 31340 }, { "epoch": 1.68, "eval_loss": 1.3944883346557617, "eval_runtime": 49.9276, "eval_samples_per_second": 60.087, "eval_steps_per_second": 1.883, "step": 31350 }, { "epoch": 1.68, "learning_rate": 0.00018417040563436255, "loss": 1.3424, "step": 31360 }, { "epoch": 1.68, "learning_rate": 0.00018415966533218413, "loss": 1.3338, "step": 31380 }, { "epoch": 1.68, "learning_rate": 0.00018414892503000575, "loss": 1.337, "step": 31400 }, { "epoch": 1.68, "eval_loss": 1.39397394657135, "eval_runtime": 49.9112, "eval_samples_per_second": 60.107, "eval_steps_per_second": 1.883, "step": 31400 }, { "epoch": 1.68, "learning_rate": 0.00018413818472782733, "loss": 1.3138, "step": 31420 }, { "epoch": 1.68, "learning_rate": 0.00018412744442564892, "loss": 1.3364, "step": 31440 }, { "epoch": 1.68, "eval_loss": 1.3931034803390503, "eval_runtime": 49.952, "eval_samples_per_second": 60.058, "eval_steps_per_second": 1.882, "step": 31450 }, { "epoch": 1.68, "learning_rate": 0.00018411670412347053, "loss": 1.2953, "step": 31460 }, { "epoch": 1.68, "learning_rate": 0.00018410596382129212, "loss": 1.3017, "step": 31480 }, { "epoch": 1.68, "learning_rate": 0.00018409522351911373, "loss": 1.349, "step": 31500 }, { "epoch": 1.68, "eval_loss": 1.3928076028823853, "eval_runtime": 49.926, "eval_samples_per_second": 60.089, "eval_steps_per_second": 1.883, "step": 31500 }, { "epoch": 1.68, "learning_rate": 0.00018408448321693532, "loss": 1.3271, "step": 31520 }, { "epoch": 1.69, "learning_rate": 0.00018407374291475693, "loss": 1.3007, "step": 31540 }, { "epoch": 1.69, "eval_loss": 1.3933926820755005, "eval_runtime": 49.9339, "eval_samples_per_second": 60.079, "eval_steps_per_second": 1.882, "step": 31550 }, { "epoch": 1.69, "learning_rate": 0.00018406300261257852, "loss": 1.2924, "step": 31560 }, { "epoch": 1.69, "learning_rate": 0.0001840522623104001, "loss": 1.3428, "step": 31580 }, { "epoch": 1.69, "learning_rate": 0.00018404152200822172, "loss": 1.283, "step": 31600 }, { "epoch": 1.69, "eval_loss": 1.3929836750030518, "eval_runtime": 49.882, "eval_samples_per_second": 60.142, "eval_steps_per_second": 1.884, "step": 31600 }, { "epoch": 1.69, "learning_rate": 0.0001840307817060433, "loss": 1.256, "step": 31620 }, { "epoch": 1.69, "learning_rate": 0.00018402004140386492, "loss": 1.3488, "step": 31640 }, { "epoch": 1.69, "eval_loss": 1.3922340869903564, "eval_runtime": 49.9187, "eval_samples_per_second": 60.098, "eval_steps_per_second": 1.883, "step": 31650 }, { "epoch": 1.69, "learning_rate": 0.0001840093011016865, "loss": 1.3199, "step": 31660 }, { "epoch": 1.69, "learning_rate": 0.0001839985607995081, "loss": 1.3469, "step": 31680 }, { "epoch": 1.69, "learning_rate": 0.0001839878204973297, "loss": 1.3593, "step": 31700 }, { "epoch": 1.69, "eval_loss": 1.3929144144058228, "eval_runtime": 49.8826, "eval_samples_per_second": 60.141, "eval_steps_per_second": 1.884, "step": 31700 }, { "epoch": 1.69, "learning_rate": 0.0001839770801951513, "loss": 1.3439, "step": 31720 }, { "epoch": 1.7, "learning_rate": 0.0001839668769080818, "loss": 1.2887, "step": 31740 }, { "epoch": 1.7, "eval_loss": 1.3930996656417847, "eval_runtime": 49.9311, "eval_samples_per_second": 60.083, "eval_steps_per_second": 1.883, "step": 31750 }, { "epoch": 1.7, "learning_rate": 0.0001839561366059034, "loss": 1.2978, "step": 31760 }, { "epoch": 1.7, "learning_rate": 0.000183945396303725, "loss": 1.3095, "step": 31780 }, { "epoch": 1.7, "learning_rate": 0.0001839346560015466, "loss": 1.2788, "step": 31800 }, { "epoch": 1.7, "eval_loss": 1.3926992416381836, "eval_runtime": 49.8745, "eval_samples_per_second": 60.151, "eval_steps_per_second": 1.885, "step": 31800 }, { "epoch": 1.7, "learning_rate": 0.00018392391569936823, "loss": 1.3313, "step": 31820 }, { "epoch": 1.7, "learning_rate": 0.0001839131753971898, "loss": 1.3414, "step": 31840 }, { "epoch": 1.7, "eval_loss": 1.392523169517517, "eval_runtime": 49.9178, "eval_samples_per_second": 60.099, "eval_steps_per_second": 1.883, "step": 31850 }, { "epoch": 1.7, "learning_rate": 0.00018390243509501143, "loss": 1.339, "step": 31860 }, { "epoch": 1.7, "learning_rate": 0.000183891694792833, "loss": 1.3102, "step": 31880 }, { "epoch": 1.7, "learning_rate": 0.0001838809544906546, "loss": 1.3429, "step": 31900 }, { "epoch": 1.7, "eval_loss": 1.3921146392822266, "eval_runtime": 49.9058, "eval_samples_per_second": 60.113, "eval_steps_per_second": 1.884, "step": 31900 }, { "epoch": 1.71, "learning_rate": 0.0001838702141884762, "loss": 1.2851, "step": 31920 }, { "epoch": 1.71, "learning_rate": 0.00018386001090140672, "loss": 1.3249, "step": 31940 }, { "epoch": 1.71, "eval_loss": 1.3931465148925781, "eval_runtime": 49.9509, "eval_samples_per_second": 60.059, "eval_steps_per_second": 1.882, "step": 31950 }, { "epoch": 1.71, "learning_rate": 0.0001838492705992283, "loss": 1.3222, "step": 31960 }, { "epoch": 1.71, "learning_rate": 0.00018383853029704992, "loss": 1.3342, "step": 31980 }, { "epoch": 1.71, "learning_rate": 0.0001838277899948715, "loss": 1.3299, "step": 32000 }, { "epoch": 1.71, "eval_loss": 1.3919731378555298, "eval_runtime": 49.8835, "eval_samples_per_second": 60.14, "eval_steps_per_second": 1.884, "step": 32000 }, { "epoch": 1.71, "learning_rate": 0.00018381704969269312, "loss": 1.3588, "step": 32020 }, { "epoch": 1.71, "learning_rate": 0.0001838063093905147, "loss": 1.3282, "step": 32040 }, { "epoch": 1.71, "eval_loss": 1.3922117948532104, "eval_runtime": 49.9432, "eval_samples_per_second": 60.068, "eval_steps_per_second": 1.882, "step": 32050 }, { "epoch": 1.71, "learning_rate": 0.0001837955690883363, "loss": 1.3059, "step": 32060 }, { "epoch": 1.71, "learning_rate": 0.0001837848287861579, "loss": 1.3546, "step": 32080 }, { "epoch": 1.72, "learning_rate": 0.0001837740884839795, "loss": 1.3122, "step": 32100 }, { "epoch": 1.72, "eval_loss": 1.3919049501419067, "eval_runtime": 49.8831, "eval_samples_per_second": 60.141, "eval_steps_per_second": 1.884, "step": 32100 }, { "epoch": 1.72, "learning_rate": 0.0001837633481818011, "loss": 1.3176, "step": 32120 }, { "epoch": 1.72, "learning_rate": 0.0001837526078796227, "loss": 1.3534, "step": 32140 }, { "epoch": 1.72, "eval_loss": 1.3918403387069702, "eval_runtime": 49.906, "eval_samples_per_second": 60.113, "eval_steps_per_second": 1.884, "step": 32150 }, { "epoch": 1.72, "learning_rate": 0.00018374186757744428, "loss": 1.3272, "step": 32160 }, { "epoch": 1.72, "learning_rate": 0.0001837311272752659, "loss": 1.3243, "step": 32180 }, { "epoch": 1.72, "learning_rate": 0.00018372038697308748, "loss": 1.3227, "step": 32200 }, { "epoch": 1.72, "eval_loss": 1.392651081085205, "eval_runtime": 49.8869, "eval_samples_per_second": 60.136, "eval_steps_per_second": 1.884, "step": 32200 }, { "epoch": 1.72, "learning_rate": 0.0001837096466709091, "loss": 1.2965, "step": 32220 }, { "epoch": 1.72, "learning_rate": 0.00018369890636873068, "loss": 1.2821, "step": 32240 }, { "epoch": 1.72, "eval_loss": 1.393078088760376, "eval_runtime": 49.917, "eval_samples_per_second": 60.1, "eval_steps_per_second": 1.883, "step": 32250 }, { "epoch": 1.72, "learning_rate": 0.00018368816606655227, "loss": 1.3525, "step": 32260 }, { "epoch": 1.72, "learning_rate": 0.0001836774257643739, "loss": 1.3203, "step": 32280 }, { "epoch": 1.73, "learning_rate": 0.0001836666854621955, "loss": 1.2915, "step": 32300 }, { "epoch": 1.73, "eval_loss": 1.392730951309204, "eval_runtime": 49.9003, "eval_samples_per_second": 60.12, "eval_steps_per_second": 1.884, "step": 32300 }, { "epoch": 1.73, "learning_rate": 0.0001836559451600171, "loss": 1.2898, "step": 32320 }, { "epoch": 1.73, "learning_rate": 0.0001836452048578387, "loss": 1.3075, "step": 32340 }, { "epoch": 1.73, "eval_loss": 1.3931732177734375, "eval_runtime": 49.939, "eval_samples_per_second": 60.073, "eval_steps_per_second": 1.882, "step": 32350 }, { "epoch": 1.73, "learning_rate": 0.00018363446455566028, "loss": 1.3341, "step": 32360 }, { "epoch": 1.73, "learning_rate": 0.0001836237242534819, "loss": 1.2627, "step": 32380 }, { "epoch": 1.73, "learning_rate": 0.00018361298395130348, "loss": 1.3147, "step": 32400 }, { "epoch": 1.73, "eval_loss": 1.3935468196868896, "eval_runtime": 49.9064, "eval_samples_per_second": 60.113, "eval_steps_per_second": 1.884, "step": 32400 }, { "epoch": 1.73, "learning_rate": 0.0001836022436491251, "loss": 1.3023, "step": 32420 }, { "epoch": 1.73, "learning_rate": 0.00018359150334694668, "loss": 1.3501, "step": 32440 }, { "epoch": 1.73, "eval_loss": 1.394555687904358, "eval_runtime": 49.9489, "eval_samples_per_second": 60.061, "eval_steps_per_second": 1.882, "step": 32450 }, { "epoch": 1.73, "learning_rate": 0.00018358076304476827, "loss": 1.3262, "step": 32460 }, { "epoch": 1.74, "learning_rate": 0.00018357002274258988, "loss": 1.3019, "step": 32480 }, { "epoch": 1.74, "learning_rate": 0.00018355928244041147, "loss": 1.3435, "step": 32500 }, { "epoch": 1.74, "eval_loss": 1.393541693687439, "eval_runtime": 49.8892, "eval_samples_per_second": 60.133, "eval_steps_per_second": 1.884, "step": 32500 }, { "epoch": 1.74, "learning_rate": 0.00018354854213823308, "loss": 1.3309, "step": 32520 }, { "epoch": 1.74, "learning_rate": 0.00018353780183605467, "loss": 1.3178, "step": 32540 }, { "epoch": 1.74, "eval_loss": 1.393633246421814, "eval_runtime": 49.9132, "eval_samples_per_second": 60.104, "eval_steps_per_second": 1.883, "step": 32550 }, { "epoch": 1.74, "learning_rate": 0.00018352706153387625, "loss": 1.3195, "step": 32560 }, { "epoch": 1.74, "learning_rate": 0.00018351632123169787, "loss": 1.3246, "step": 32580 }, { "epoch": 1.74, "learning_rate": 0.00018350558092951945, "loss": 1.334, "step": 32600 }, { "epoch": 1.74, "eval_loss": 1.3931056261062622, "eval_runtime": 49.9067, "eval_samples_per_second": 60.112, "eval_steps_per_second": 1.884, "step": 32600 }, { "epoch": 1.74, "learning_rate": 0.00018349484062734107, "loss": 1.3078, "step": 32620 }, { "epoch": 1.74, "learning_rate": 0.00018348410032516265, "loss": 1.3127, "step": 32640 }, { "epoch": 1.74, "eval_loss": 1.3928685188293457, "eval_runtime": 49.9522, "eval_samples_per_second": 60.057, "eval_steps_per_second": 1.882, "step": 32650 }, { "epoch": 1.75, "learning_rate": 0.00018347336002298424, "loss": 1.335, "step": 32660 }, { "epoch": 1.75, "learning_rate": 0.00018346261972080585, "loss": 1.3122, "step": 32680 }, { "epoch": 1.75, "learning_rate": 0.00018345187941862744, "loss": 1.3307, "step": 32700 }, { "epoch": 1.75, "eval_loss": 1.3928115367889404, "eval_runtime": 49.8507, "eval_samples_per_second": 60.18, "eval_steps_per_second": 1.886, "step": 32700 }, { "epoch": 1.75, "learning_rate": 0.00018344113911644905, "loss": 1.3359, "step": 32720 }, { "epoch": 1.75, "learning_rate": 0.00018343039881427064, "loss": 1.3388, "step": 32740 }, { "epoch": 1.75, "eval_loss": 1.394007921218872, "eval_runtime": 49.8839, "eval_samples_per_second": 60.14, "eval_steps_per_second": 1.884, "step": 32750 }, { "epoch": 1.75, "learning_rate": 0.00018341965851209225, "loss": 1.2486, "step": 32760 }, { "epoch": 1.75, "learning_rate": 0.00018340891820991384, "loss": 1.3449, "step": 32780 }, { "epoch": 1.75, "learning_rate": 0.00018339817790773543, "loss": 1.3296, "step": 32800 }, { "epoch": 1.75, "eval_loss": 1.3931111097335815, "eval_runtime": 49.8634, "eval_samples_per_second": 60.164, "eval_steps_per_second": 1.885, "step": 32800 }, { "epoch": 1.75, "learning_rate": 0.00018338743760555704, "loss": 1.3501, "step": 32820 }, { "epoch": 1.75, "learning_rate": 0.00018337669730337863, "loss": 1.321, "step": 32840 }, { "epoch": 1.76, "eval_loss": 1.3937820196151733, "eval_runtime": 49.9247, "eval_samples_per_second": 60.09, "eval_steps_per_second": 1.883, "step": 32850 }, { "epoch": 1.76, "learning_rate": 0.00018336595700120024, "loss": 1.3138, "step": 32860 }, { "epoch": 1.76, "learning_rate": 0.00018335521669902183, "loss": 1.3292, "step": 32880 }, { "epoch": 1.76, "learning_rate": 0.0001833444763968434, "loss": 1.2905, "step": 32900 }, { "epoch": 1.76, "eval_loss": 1.3924466371536255, "eval_runtime": 49.9006, "eval_samples_per_second": 60.12, "eval_steps_per_second": 1.884, "step": 32900 }, { "epoch": 1.76, "learning_rate": 0.00018333373609466505, "loss": 1.3164, "step": 32920 }, { "epoch": 1.76, "learning_rate": 0.00018332299579248664, "loss": 1.3677, "step": 32940 }, { "epoch": 1.76, "eval_loss": 1.3932099342346191, "eval_runtime": 49.9247, "eval_samples_per_second": 60.09, "eval_steps_per_second": 1.883, "step": 32950 }, { "epoch": 1.76, "learning_rate": 0.00018331225549030823, "loss": 1.2867, "step": 32960 }, { "epoch": 1.76, "learning_rate": 0.00018330151518812984, "loss": 1.302, "step": 32980 }, { "epoch": 1.76, "learning_rate": 0.00018329077488595143, "loss": 1.3096, "step": 33000 }, { "epoch": 1.76, "eval_loss": 1.3928850889205933, "eval_runtime": 49.889, "eval_samples_per_second": 60.133, "eval_steps_per_second": 1.884, "step": 33000 }, { "epoch": 1.76, "learning_rate": 0.00018328003458377304, "loss": 1.3185, "step": 33020 }, { "epoch": 1.77, "learning_rate": 0.00018326929428159463, "loss": 1.325, "step": 33040 }, { "epoch": 1.77, "eval_loss": 1.3931729793548584, "eval_runtime": 49.9041, "eval_samples_per_second": 60.115, "eval_steps_per_second": 1.884, "step": 33050 }, { "epoch": 1.77, "learning_rate": 0.0001832585539794162, "loss": 1.2995, "step": 33060 }, { "epoch": 1.77, "learning_rate": 0.00018324781367723783, "loss": 1.3011, "step": 33080 }, { "epoch": 1.77, "learning_rate": 0.0001832370733750594, "loss": 1.3019, "step": 33100 }, { "epoch": 1.77, "eval_loss": 1.3940869569778442, "eval_runtime": 49.8445, "eval_samples_per_second": 60.187, "eval_steps_per_second": 1.886, "step": 33100 }, { "epoch": 1.77, "learning_rate": 0.00018322633307288103, "loss": 1.3061, "step": 33120 }, { "epoch": 1.77, "learning_rate": 0.0001832155927707026, "loss": 1.329, "step": 33140 }, { "epoch": 1.77, "eval_loss": 1.393612265586853, "eval_runtime": 49.8664, "eval_samples_per_second": 60.161, "eval_steps_per_second": 1.885, "step": 33150 }, { "epoch": 1.77, "learning_rate": 0.0001832048524685242, "loss": 1.3326, "step": 33160 }, { "epoch": 1.77, "learning_rate": 0.0001831941121663458, "loss": 1.343, "step": 33180 }, { "epoch": 1.77, "learning_rate": 0.0001831833718641674, "loss": 1.3023, "step": 33200 }, { "epoch": 1.77, "eval_loss": 1.393221139907837, "eval_runtime": 49.7506, "eval_samples_per_second": 60.301, "eval_steps_per_second": 1.889, "step": 33200 }, { "epoch": 1.77, "learning_rate": 0.000183172631561989, "loss": 1.3442, "step": 33220 }, { "epoch": 1.78, "learning_rate": 0.0001831618912598106, "loss": 1.3143, "step": 33240 }, { "epoch": 1.78, "eval_loss": 1.3926904201507568, "eval_runtime": 49.9123, "eval_samples_per_second": 60.105, "eval_steps_per_second": 1.883, "step": 33250 }, { "epoch": 1.78, "learning_rate": 0.0001831511509576322, "loss": 1.3522, "step": 33260 }, { "epoch": 1.78, "learning_rate": 0.0001831404106554538, "loss": 1.3147, "step": 33280 }, { "epoch": 1.78, "learning_rate": 0.00018312967035327539, "loss": 1.3086, "step": 33300 }, { "epoch": 1.78, "eval_loss": 1.3926082849502563, "eval_runtime": 49.9413, "eval_samples_per_second": 60.071, "eval_steps_per_second": 1.882, "step": 33300 }, { "epoch": 1.78, "learning_rate": 0.000183118930051097, "loss": 1.3294, "step": 33320 }, { "epoch": 1.78, "learning_rate": 0.00018310818974891859, "loss": 1.3597, "step": 33340 }, { "epoch": 1.78, "eval_loss": 1.3932656049728394, "eval_runtime": 49.9378, "eval_samples_per_second": 60.075, "eval_steps_per_second": 1.882, "step": 33350 }, { "epoch": 1.78, "learning_rate": 0.0001830974494467402, "loss": 1.2729, "step": 33360 }, { "epoch": 1.78, "learning_rate": 0.00018308670914456179, "loss": 1.3344, "step": 33380 }, { "epoch": 1.78, "learning_rate": 0.00018307596884238337, "loss": 1.3442, "step": 33400 }, { "epoch": 1.78, "eval_loss": 1.39275062084198, "eval_runtime": 49.874, "eval_samples_per_second": 60.152, "eval_steps_per_second": 1.885, "step": 33400 }, { "epoch": 1.79, "learning_rate": 0.00018306522854020499, "loss": 1.3094, "step": 33420 }, { "epoch": 1.79, "learning_rate": 0.00018305448823802657, "loss": 1.315, "step": 33440 }, { "epoch": 1.79, "eval_loss": 1.392531394958496, "eval_runtime": 49.9096, "eval_samples_per_second": 60.109, "eval_steps_per_second": 1.883, "step": 33450 }, { "epoch": 1.79, "learning_rate": 0.00018304374793584819, "loss": 1.3088, "step": 33460 }, { "epoch": 1.79, "learning_rate": 0.00018303300763366977, "loss": 1.3537, "step": 33480 }, { "epoch": 1.79, "learning_rate": 0.00018302226733149136, "loss": 1.3193, "step": 33500 }, { "epoch": 1.79, "eval_loss": 1.391752004623413, "eval_runtime": 49.9064, "eval_samples_per_second": 60.113, "eval_steps_per_second": 1.884, "step": 33500 }, { "epoch": 1.79, "learning_rate": 0.00018301152702931297, "loss": 1.305, "step": 33520 }, { "epoch": 1.79, "learning_rate": 0.00018300078672713459, "loss": 1.3429, "step": 33540 }, { "epoch": 1.79, "eval_loss": 1.3929482698440552, "eval_runtime": 49.9527, "eval_samples_per_second": 60.057, "eval_steps_per_second": 1.882, "step": 33550 }, { "epoch": 1.79, "learning_rate": 0.00018299004642495617, "loss": 1.2902, "step": 33560 }, { "epoch": 1.79, "learning_rate": 0.00018297930612277779, "loss": 1.3509, "step": 33580 }, { "epoch": 1.8, "learning_rate": 0.00018296856582059937, "loss": 1.298, "step": 33600 }, { "epoch": 1.8, "eval_loss": 1.3924405574798584, "eval_runtime": 49.8811, "eval_samples_per_second": 60.143, "eval_steps_per_second": 1.884, "step": 33600 }, { "epoch": 1.8, "learning_rate": 0.00018295782551842099, "loss": 1.3535, "step": 33620 }, { "epoch": 1.8, "learning_rate": 0.00018294708521624257, "loss": 1.3262, "step": 33640 }, { "epoch": 1.8, "eval_loss": 1.3920501470565796, "eval_runtime": 49.9199, "eval_samples_per_second": 60.096, "eval_steps_per_second": 1.883, "step": 33650 }, { "epoch": 1.8, "learning_rate": 0.00018293634491406419, "loss": 1.3319, "step": 33660 }, { "epoch": 1.8, "learning_rate": 0.00018292560461188577, "loss": 1.3148, "step": 33680 }, { "epoch": 1.8, "learning_rate": 0.00018291486430970736, "loss": 1.3394, "step": 33700 }, { "epoch": 1.8, "eval_loss": 1.392612338066101, "eval_runtime": 49.9021, "eval_samples_per_second": 60.118, "eval_steps_per_second": 1.884, "step": 33700 }, { "epoch": 1.8, "learning_rate": 0.00018290412400752897, "loss": 1.3368, "step": 33720 }, { "epoch": 1.8, "learning_rate": 0.00018289338370535056, "loss": 1.3229, "step": 33740 }, { "epoch": 1.8, "eval_loss": 1.392616868019104, "eval_runtime": 49.9073, "eval_samples_per_second": 60.112, "eval_steps_per_second": 1.883, "step": 33750 }, { "epoch": 1.8, "learning_rate": 0.00018288264340317217, "loss": 1.2909, "step": 33760 }, { "epoch": 1.8, "learning_rate": 0.00018287190310099376, "loss": 1.3625, "step": 33780 }, { "epoch": 1.81, "learning_rate": 0.00018286116279881535, "loss": 1.2939, "step": 33800 }, { "epoch": 1.81, "eval_loss": 1.3921815156936646, "eval_runtime": 49.9158, "eval_samples_per_second": 60.101, "eval_steps_per_second": 1.883, "step": 33800 }, { "epoch": 1.81, "learning_rate": 0.00018285042249663696, "loss": 1.3279, "step": 33820 }, { "epoch": 1.81, "learning_rate": 0.00018283968219445855, "loss": 1.3285, "step": 33840 }, { "epoch": 1.81, "eval_loss": 1.3925936222076416, "eval_runtime": 49.9425, "eval_samples_per_second": 60.069, "eval_steps_per_second": 1.882, "step": 33850 }, { "epoch": 1.81, "learning_rate": 0.00018282894189228016, "loss": 1.2904, "step": 33860 }, { "epoch": 1.81, "learning_rate": 0.00018281820159010175, "loss": 1.3316, "step": 33880 }, { "epoch": 1.81, "learning_rate": 0.00018280746128792333, "loss": 1.2796, "step": 33900 }, { "epoch": 1.81, "eval_loss": 1.3928660154342651, "eval_runtime": 49.915, "eval_samples_per_second": 60.102, "eval_steps_per_second": 1.883, "step": 33900 }, { "epoch": 1.81, "learning_rate": 0.00018279672098574495, "loss": 1.3583, "step": 33920 }, { "epoch": 1.81, "learning_rate": 0.00018278598068356653, "loss": 1.3029, "step": 33940 }, { "epoch": 1.81, "eval_loss": 1.3938392400741577, "eval_runtime": 49.9412, "eval_samples_per_second": 60.071, "eval_steps_per_second": 1.882, "step": 33950 }, { "epoch": 1.81, "learning_rate": 0.00018277524038138815, "loss": 1.2918, "step": 33960 }, { "epoch": 1.82, "learning_rate": 0.00018276450007920973, "loss": 1.2994, "step": 33980 }, { "epoch": 1.82, "learning_rate": 0.00018275375977703132, "loss": 1.3251, "step": 34000 }, { "epoch": 1.82, "eval_loss": 1.3923838138580322, "eval_runtime": 49.9075, "eval_samples_per_second": 60.111, "eval_steps_per_second": 1.883, "step": 34000 }, { "epoch": 1.82, "learning_rate": 0.00018274301947485293, "loss": 1.3138, "step": 34020 }, { "epoch": 1.82, "learning_rate": 0.00018273227917267452, "loss": 1.2666, "step": 34040 }, { "epoch": 1.82, "eval_loss": 1.3924466371536255, "eval_runtime": 49.9932, "eval_samples_per_second": 60.008, "eval_steps_per_second": 1.88, "step": 34050 }, { "epoch": 1.82, "learning_rate": 0.00018272153887049613, "loss": 1.3173, "step": 34060 }, { "epoch": 1.82, "learning_rate": 0.00018271079856831772, "loss": 1.3349, "step": 34080 }, { "epoch": 1.82, "learning_rate": 0.0001827000582661393, "loss": 1.3318, "step": 34100 }, { "epoch": 1.82, "eval_loss": 1.3935117721557617, "eval_runtime": 49.9078, "eval_samples_per_second": 60.111, "eval_steps_per_second": 1.883, "step": 34100 }, { "epoch": 1.82, "learning_rate": 0.00018268931796396092, "loss": 1.2808, "step": 34120 }, { "epoch": 1.82, "learning_rate": 0.0001826785776617825, "loss": 1.3121, "step": 34140 }, { "epoch": 1.82, "eval_loss": 1.3931491374969482, "eval_runtime": 49.9165, "eval_samples_per_second": 60.1, "eval_steps_per_second": 1.883, "step": 34150 }, { "epoch": 1.83, "learning_rate": 0.00018266837437471304, "loss": 1.3206, "step": 34160 }, { "epoch": 1.83, "learning_rate": 0.00018265763407253465, "loss": 1.2922, "step": 34180 }, { "epoch": 1.83, "learning_rate": 0.00018264689377035624, "loss": 1.351, "step": 34200 }, { "epoch": 1.83, "eval_loss": 1.3933879137039185, "eval_runtime": 49.872, "eval_samples_per_second": 60.154, "eval_steps_per_second": 1.885, "step": 34200 }, { "epoch": 1.83, "learning_rate": 0.00018263615346817783, "loss": 1.3538, "step": 34220 }, { "epoch": 1.83, "learning_rate": 0.00018262541316599944, "loss": 1.3316, "step": 34240 }, { "epoch": 1.83, "eval_loss": 1.393222689628601, "eval_runtime": 49.8959, "eval_samples_per_second": 60.125, "eval_steps_per_second": 1.884, "step": 34250 }, { "epoch": 1.83, "learning_rate": 0.00018261467286382103, "loss": 1.3685, "step": 34260 }, { "epoch": 1.83, "learning_rate": 0.00018260393256164264, "loss": 1.2776, "step": 34280 }, { "epoch": 1.83, "learning_rate": 0.00018259319225946423, "loss": 1.2839, "step": 34300 }, { "epoch": 1.83, "eval_loss": 1.3942714929580688, "eval_runtime": 49.8823, "eval_samples_per_second": 60.142, "eval_steps_per_second": 1.884, "step": 34300 }, { "epoch": 1.83, "learning_rate": 0.00018258245195728584, "loss": 1.3426, "step": 34320 }, { "epoch": 1.83, "learning_rate": 0.00018257171165510743, "loss": 1.3647, "step": 34340 }, { "epoch": 1.84, "eval_loss": 1.393857479095459, "eval_runtime": 49.9322, "eval_samples_per_second": 60.081, "eval_steps_per_second": 1.883, "step": 34350 }, { "epoch": 1.84, "learning_rate": 0.000182560971352929, "loss": 1.2837, "step": 34360 }, { "epoch": 1.84, "learning_rate": 0.00018255023105075063, "loss": 1.3124, "step": 34380 }, { "epoch": 1.84, "learning_rate": 0.0001825394907485722, "loss": 1.3185, "step": 34400 }, { "epoch": 1.84, "eval_loss": 1.3932018280029297, "eval_runtime": 49.8908, "eval_samples_per_second": 60.131, "eval_steps_per_second": 1.884, "step": 34400 }, { "epoch": 1.84, "learning_rate": 0.00018252875044639383, "loss": 1.2794, "step": 34420 }, { "epoch": 1.84, "learning_rate": 0.0001825180101442154, "loss": 1.3282, "step": 34440 }, { "epoch": 1.84, "eval_loss": 1.3929858207702637, "eval_runtime": 49.9429, "eval_samples_per_second": 60.069, "eval_steps_per_second": 1.882, "step": 34450 }, { "epoch": 1.84, "learning_rate": 0.000182507269842037, "loss": 1.306, "step": 34460 }, { "epoch": 1.84, "learning_rate": 0.0001824965295398586, "loss": 1.2997, "step": 34480 }, { "epoch": 1.84, "learning_rate": 0.0001824857892376802, "loss": 1.3054, "step": 34500 }, { "epoch": 1.84, "eval_loss": 1.3928805589675903, "eval_runtime": 49.8978, "eval_samples_per_second": 60.123, "eval_steps_per_second": 1.884, "step": 34500 }, { "epoch": 1.84, "learning_rate": 0.0001824750489355018, "loss": 1.2943, "step": 34520 }, { "epoch": 1.85, "learning_rate": 0.0001824643086333234, "loss": 1.3219, "step": 34540 }, { "epoch": 1.85, "eval_loss": 1.3927184343338013, "eval_runtime": 49.9209, "eval_samples_per_second": 60.095, "eval_steps_per_second": 1.883, "step": 34550 }, { "epoch": 1.85, "learning_rate": 0.00018245356833114499, "loss": 1.3242, "step": 34560 }, { "epoch": 1.85, "learning_rate": 0.0001824428280289666, "loss": 1.3142, "step": 34580 }, { "epoch": 1.85, "learning_rate": 0.00018243208772678819, "loss": 1.3121, "step": 34600 }, { "epoch": 1.85, "eval_loss": 1.39323890209198, "eval_runtime": 49.894, "eval_samples_per_second": 60.128, "eval_steps_per_second": 1.884, "step": 34600 }, { "epoch": 1.85, "learning_rate": 0.0001824213474246098, "loss": 1.3113, "step": 34620 }, { "epoch": 1.85, "learning_rate": 0.00018241060712243139, "loss": 1.3263, "step": 34640 }, { "epoch": 1.85, "eval_loss": 1.393105149269104, "eval_runtime": 49.9328, "eval_samples_per_second": 60.081, "eval_steps_per_second": 1.883, "step": 34650 }, { "epoch": 1.85, "learning_rate": 0.000182399866820253, "loss": 1.3155, "step": 34660 }, { "epoch": 1.85, "learning_rate": 0.0001823891265180746, "loss": 1.3247, "step": 34680 }, { "epoch": 1.85, "learning_rate": 0.0001823783862158962, "loss": 1.332, "step": 34700 }, { "epoch": 1.85, "eval_loss": 1.3934316635131836, "eval_runtime": 49.9008, "eval_samples_per_second": 60.119, "eval_steps_per_second": 1.884, "step": 34700 }, { "epoch": 1.86, "learning_rate": 0.00018236764591371779, "loss": 1.3021, "step": 34720 }, { "epoch": 1.86, "learning_rate": 0.0001823569056115394, "loss": 1.3075, "step": 34740 }, { "epoch": 1.86, "eval_loss": 1.3924082517623901, "eval_runtime": 49.9454, "eval_samples_per_second": 60.066, "eval_steps_per_second": 1.882, "step": 34750 }, { "epoch": 1.86, "learning_rate": 0.00018234616530936099, "loss": 1.3134, "step": 34760 }, { "epoch": 1.86, "learning_rate": 0.0001823354250071826, "loss": 1.3305, "step": 34780 }, { "epoch": 1.86, "learning_rate": 0.00018232468470500419, "loss": 1.292, "step": 34800 }, { "epoch": 1.86, "eval_loss": 1.3928916454315186, "eval_runtime": 49.9539, "eval_samples_per_second": 60.055, "eval_steps_per_second": 1.882, "step": 34800 }, { "epoch": 1.86, "learning_rate": 0.0001823139444028258, "loss": 1.3188, "step": 34820 }, { "epoch": 1.86, "learning_rate": 0.00018230320410064739, "loss": 1.2984, "step": 34840 }, { "epoch": 1.86, "eval_loss": 1.3925448656082153, "eval_runtime": 49.9779, "eval_samples_per_second": 60.027, "eval_steps_per_second": 1.881, "step": 34850 }, { "epoch": 1.86, "learning_rate": 0.00018229246379846897, "loss": 1.3219, "step": 34860 }, { "epoch": 1.86, "learning_rate": 0.00018228172349629059, "loss": 1.3085, "step": 34880 }, { "epoch": 1.86, "learning_rate": 0.00018227098319411217, "loss": 1.2877, "step": 34900 }, { "epoch": 1.86, "eval_loss": 1.392621397972107, "eval_runtime": 49.9427, "eval_samples_per_second": 60.069, "eval_steps_per_second": 1.882, "step": 34900 }, { "epoch": 1.87, "learning_rate": 0.00018226024289193379, "loss": 1.3283, "step": 34920 }, { "epoch": 1.87, "learning_rate": 0.00018224950258975537, "loss": 1.3525, "step": 34940 }, { "epoch": 1.87, "eval_loss": 1.3935717344284058, "eval_runtime": 49.9434, "eval_samples_per_second": 60.068, "eval_steps_per_second": 1.882, "step": 34950 }, { "epoch": 1.87, "learning_rate": 0.00018223876228757696, "loss": 1.3074, "step": 34960 }, { "epoch": 1.87, "learning_rate": 0.00018222802198539857, "loss": 1.321, "step": 34980 }, { "epoch": 1.87, "learning_rate": 0.00018221728168322016, "loss": 1.2963, "step": 35000 }, { "epoch": 1.87, "eval_loss": 1.393733024597168, "eval_runtime": 49.896, "eval_samples_per_second": 60.125, "eval_steps_per_second": 1.884, "step": 35000 }, { "epoch": 1.87, "learning_rate": 0.00018220654138104177, "loss": 1.3222, "step": 35020 }, { "epoch": 1.87, "learning_rate": 0.00018219580107886336, "loss": 1.3408, "step": 35040 }, { "epoch": 1.87, "eval_loss": 1.3933889865875244, "eval_runtime": 49.9267, "eval_samples_per_second": 60.088, "eval_steps_per_second": 1.883, "step": 35050 }, { "epoch": 1.87, "learning_rate": 0.00018218506077668495, "loss": 1.2484, "step": 35060 }, { "epoch": 1.87, "learning_rate": 0.00018217432047450656, "loss": 1.3439, "step": 35080 }, { "epoch": 1.88, "learning_rate": 0.00018216358017232815, "loss": 1.303, "step": 35100 }, { "epoch": 1.88, "eval_loss": 1.3932467699050903, "eval_runtime": 49.9093, "eval_samples_per_second": 60.109, "eval_steps_per_second": 1.883, "step": 35100 }, { "epoch": 1.88, "learning_rate": 0.00018215283987014976, "loss": 1.3138, "step": 35120 }, { "epoch": 1.88, "learning_rate": 0.00018214209956797135, "loss": 1.3562, "step": 35140 }, { "epoch": 1.88, "eval_loss": 1.3933935165405273, "eval_runtime": 49.9354, "eval_samples_per_second": 60.078, "eval_steps_per_second": 1.882, "step": 35150 }, { "epoch": 1.88, "learning_rate": 0.00018213135926579293, "loss": 1.3609, "step": 35160 }, { "epoch": 1.88, "learning_rate": 0.00018212061896361455, "loss": 1.305, "step": 35180 }, { "epoch": 1.88, "learning_rate": 0.00018210987866143613, "loss": 1.265, "step": 35200 }, { "epoch": 1.88, "eval_loss": 1.393653154373169, "eval_runtime": 49.9016, "eval_samples_per_second": 60.118, "eval_steps_per_second": 1.884, "step": 35200 }, { "epoch": 1.88, "learning_rate": 0.00018209913835925775, "loss": 1.339, "step": 35220 }, { "epoch": 1.88, "learning_rate": 0.00018208839805707933, "loss": 1.3163, "step": 35240 }, { "epoch": 1.88, "eval_loss": 1.3932678699493408, "eval_runtime": 49.9449, "eval_samples_per_second": 60.066, "eval_steps_per_second": 1.882, "step": 35250 }, { "epoch": 1.88, "learning_rate": 0.00018207765775490095, "loss": 1.3401, "step": 35260 }, { "epoch": 1.89, "learning_rate": 0.00018206691745272256, "loss": 1.3001, "step": 35280 }, { "epoch": 1.89, "learning_rate": 0.00018205617715054415, "loss": 1.2924, "step": 35300 }, { "epoch": 1.89, "eval_loss": 1.393690586090088, "eval_runtime": 49.912, "eval_samples_per_second": 60.106, "eval_steps_per_second": 1.883, "step": 35300 }, { "epoch": 1.89, "learning_rate": 0.00018204543684836576, "loss": 1.2857, "step": 35320 }, { "epoch": 1.89, "learning_rate": 0.00018203469654618735, "loss": 1.3263, "step": 35340 }, { "epoch": 1.89, "eval_loss": 1.3942912817001343, "eval_runtime": 49.8166, "eval_samples_per_second": 60.221, "eval_steps_per_second": 1.887, "step": 35350 }, { "epoch": 1.89, "learning_rate": 0.00018202395624400893, "loss": 1.2991, "step": 35360 }, { "epoch": 1.89, "learning_rate": 0.00018201321594183055, "loss": 1.2858, "step": 35380 }, { "epoch": 1.89, "learning_rate": 0.00018200247563965213, "loss": 1.2958, "step": 35400 }, { "epoch": 1.89, "eval_loss": 1.3931233882904053, "eval_runtime": 49.8931, "eval_samples_per_second": 60.129, "eval_steps_per_second": 1.884, "step": 35400 }, { "epoch": 1.89, "learning_rate": 0.00018199173533747375, "loss": 1.3178, "step": 35420 }, { "epoch": 1.89, "learning_rate": 0.00018198099503529533, "loss": 1.3069, "step": 35440 }, { "epoch": 1.89, "eval_loss": 1.3929921388626099, "eval_runtime": 49.9464, "eval_samples_per_second": 60.064, "eval_steps_per_second": 1.882, "step": 35450 }, { "epoch": 1.89, "learning_rate": 0.00018197025473311692, "loss": 1.3325, "step": 35460 }, { "epoch": 1.9, "learning_rate": 0.00018195951443093853, "loss": 1.2918, "step": 35480 }, { "epoch": 1.9, "learning_rate": 0.00018194877412876012, "loss": 1.2896, "step": 35500 }, { "epoch": 1.9, "eval_loss": 1.393085241317749, "eval_runtime": 49.8964, "eval_samples_per_second": 60.125, "eval_steps_per_second": 1.884, "step": 35500 }, { "epoch": 1.9, "learning_rate": 0.00018193803382658173, "loss": 1.3011, "step": 35520 }, { "epoch": 1.9, "learning_rate": 0.00018192729352440332, "loss": 1.2965, "step": 35540 }, { "epoch": 1.9, "eval_loss": 1.3940566778182983, "eval_runtime": 49.9327, "eval_samples_per_second": 60.081, "eval_steps_per_second": 1.883, "step": 35550 }, { "epoch": 1.9, "learning_rate": 0.0001819165532222249, "loss": 1.3188, "step": 35560 }, { "epoch": 1.9, "learning_rate": 0.00018190581292004652, "loss": 1.3119, "step": 35580 }, { "epoch": 1.9, "learning_rate": 0.0001818950726178681, "loss": 1.3274, "step": 35600 }, { "epoch": 1.9, "eval_loss": 1.3926782608032227, "eval_runtime": 49.9185, "eval_samples_per_second": 60.098, "eval_steps_per_second": 1.883, "step": 35600 }, { "epoch": 1.9, "learning_rate": 0.00018188433231568972, "loss": 1.348, "step": 35620 }, { "epoch": 1.9, "learning_rate": 0.0001818735920135113, "loss": 1.3444, "step": 35640 }, { "epoch": 1.9, "eval_loss": 1.3924182653427124, "eval_runtime": 49.9557, "eval_samples_per_second": 60.053, "eval_steps_per_second": 1.882, "step": 35650 }, { "epoch": 1.91, "learning_rate": 0.0001818628517113329, "loss": 1.2899, "step": 35660 }, { "epoch": 1.91, "learning_rate": 0.0001818521114091545, "loss": 1.2777, "step": 35680 }, { "epoch": 1.91, "learning_rate": 0.0001818413711069761, "loss": 1.3048, "step": 35700 }, { "epoch": 1.91, "eval_loss": 1.393597960472107, "eval_runtime": 49.9014, "eval_samples_per_second": 60.119, "eval_steps_per_second": 1.884, "step": 35700 }, { "epoch": 1.91, "learning_rate": 0.0001818306308047977, "loss": 1.3098, "step": 35720 }, { "epoch": 1.91, "learning_rate": 0.0001818198905026193, "loss": 1.3244, "step": 35740 }, { "epoch": 1.91, "eval_loss": 1.3939756155014038, "eval_runtime": 49.9147, "eval_samples_per_second": 60.103, "eval_steps_per_second": 1.883, "step": 35750 }, { "epoch": 1.91, "learning_rate": 0.0001818091502004409, "loss": 1.3246, "step": 35760 }, { "epoch": 1.91, "learning_rate": 0.0001817984098982625, "loss": 1.3302, "step": 35780 }, { "epoch": 1.91, "learning_rate": 0.00018178766959608408, "loss": 1.3189, "step": 35800 }, { "epoch": 1.91, "eval_loss": 1.393617868423462, "eval_runtime": 49.9014, "eval_samples_per_second": 60.119, "eval_steps_per_second": 1.884, "step": 35800 }, { "epoch": 1.91, "learning_rate": 0.0001817769292939057, "loss": 1.2832, "step": 35820 }, { "epoch": 1.91, "learning_rate": 0.00018176618899172728, "loss": 1.3016, "step": 35840 }, { "epoch": 1.92, "eval_loss": 1.3936024904251099, "eval_runtime": 49.9571, "eval_samples_per_second": 60.052, "eval_steps_per_second": 1.882, "step": 35850 }, { "epoch": 1.92, "learning_rate": 0.0001817554486895489, "loss": 1.3014, "step": 35860 }, { "epoch": 1.92, "learning_rate": 0.00018174470838737048, "loss": 1.2865, "step": 35880 }, { "epoch": 1.92, "learning_rate": 0.00018173396808519206, "loss": 1.2856, "step": 35900 }, { "epoch": 1.92, "eval_loss": 1.393366813659668, "eval_runtime": 49.8692, "eval_samples_per_second": 60.157, "eval_steps_per_second": 1.885, "step": 35900 }, { "epoch": 1.92, "learning_rate": 0.0001817232277830137, "loss": 1.3443, "step": 35920 }, { "epoch": 1.92, "learning_rate": 0.0001817124874808353, "loss": 1.3438, "step": 35940 }, { "epoch": 1.92, "eval_loss": 1.3926905393600464, "eval_runtime": 49.8999, "eval_samples_per_second": 60.12, "eval_steps_per_second": 1.884, "step": 35950 }, { "epoch": 1.92, "learning_rate": 0.00018170174717865688, "loss": 1.3326, "step": 35960 }, { "epoch": 1.92, "learning_rate": 0.0001816910068764785, "loss": 1.2869, "step": 35980 }, { "epoch": 1.92, "learning_rate": 0.00018168026657430008, "loss": 1.3337, "step": 36000 }, { "epoch": 1.92, "eval_loss": 1.3924248218536377, "eval_runtime": 49.869, "eval_samples_per_second": 60.158, "eval_steps_per_second": 1.885, "step": 36000 }, { "epoch": 1.92, "learning_rate": 0.0001816695262721217, "loss": 1.3459, "step": 36020 }, { "epoch": 1.93, "learning_rate": 0.00018165878596994328, "loss": 1.3044, "step": 36040 }, { "epoch": 1.93, "eval_loss": 1.3930872678756714, "eval_runtime": 49.9165, "eval_samples_per_second": 60.1, "eval_steps_per_second": 1.883, "step": 36050 }, { "epoch": 1.93, "learning_rate": 0.00018164804566776486, "loss": 1.3095, "step": 36060 }, { "epoch": 1.93, "learning_rate": 0.00018163730536558648, "loss": 1.3382, "step": 36080 }, { "epoch": 1.93, "learning_rate": 0.00018162656506340806, "loss": 1.3207, "step": 36100 }, { "epoch": 1.93, "eval_loss": 1.3932032585144043, "eval_runtime": 49.8839, "eval_samples_per_second": 60.14, "eval_steps_per_second": 1.884, "step": 36100 }, { "epoch": 1.93, "learning_rate": 0.00018161582476122968, "loss": 1.3144, "step": 36120 }, { "epoch": 1.93, "learning_rate": 0.00018160508445905126, "loss": 1.3294, "step": 36140 }, { "epoch": 1.93, "eval_loss": 1.392491102218628, "eval_runtime": 49.925, "eval_samples_per_second": 60.09, "eval_steps_per_second": 1.883, "step": 36150 }, { "epoch": 1.93, "learning_rate": 0.00018159434415687288, "loss": 1.3302, "step": 36160 }, { "epoch": 1.93, "learning_rate": 0.00018158360385469446, "loss": 1.3235, "step": 36180 }, { "epoch": 1.93, "learning_rate": 0.00018157286355251605, "loss": 1.3333, "step": 36200 }, { "epoch": 1.93, "eval_loss": 1.3923717737197876, "eval_runtime": 49.912, "eval_samples_per_second": 60.106, "eval_steps_per_second": 1.883, "step": 36200 }, { "epoch": 1.94, "learning_rate": 0.00018156212325033766, "loss": 1.3188, "step": 36220 }, { "epoch": 1.94, "learning_rate": 0.00018155138294815925, "loss": 1.3098, "step": 36240 }, { "epoch": 1.94, "eval_loss": 1.3927510976791382, "eval_runtime": 49.9002, "eval_samples_per_second": 60.12, "eval_steps_per_second": 1.884, "step": 36250 }, { "epoch": 1.94, "learning_rate": 0.00018154064264598086, "loss": 1.3403, "step": 36260 }, { "epoch": 1.94, "learning_rate": 0.00018152990234380245, "loss": 1.3093, "step": 36280 }, { "epoch": 1.94, "learning_rate": 0.00018151916204162404, "loss": 1.3128, "step": 36300 }, { "epoch": 1.94, "eval_loss": 1.391266107559204, "eval_runtime": 49.963, "eval_samples_per_second": 60.044, "eval_steps_per_second": 1.881, "step": 36300 } ], "max_steps": 374300, "num_train_epochs": 20, "total_flos": 4.634609115302619e+19, "trial_name": null, "trial_params": null }