{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.006297236406637901, "global_step": 10000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 0.0002, "loss": 4.3689, "step": 10 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 3.0166, "step": 20 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.6587, "step": 30 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.7002, "step": 40 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.8247, "step": 50 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.7709, "step": 60 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.5964, "step": 70 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.5285, "step": 80 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.6166, "step": 90 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.7509, "step": 100 }, { "epoch": 0.0, "eval_loss": 2.828864097595215, "eval_runtime": 25015.5353, "eval_samples_per_second": 4.048, "eval_steps_per_second": 2.024, "step": 100 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.6819, "step": 110 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.5314, "step": 120 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.4678, "step": 130 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.5079, "step": 140 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.6501, "step": 150 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.6981, "step": 160 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.5262, "step": 170 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.4819, "step": 180 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.4657, "step": 190 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.6072, "step": 200 }, { "epoch": 0.0, "eval_loss": 2.7019286155700684, "eval_runtime": 25020.9542, "eval_samples_per_second": 4.048, "eval_steps_per_second": 2.024, "step": 200 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.5656, "step": 210 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.5185, "step": 220 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.4271, "step": 230 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.4286, "step": 240 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.5728, "step": 250 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.5597, "step": 260 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.4337, "step": 270 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.4491, "step": 280 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.4494, "step": 290 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.5075, "step": 300 }, { "epoch": 0.0, "eval_loss": 2.6475937366485596, "eval_runtime": 25026.694, "eval_samples_per_second": 4.047, "eval_steps_per_second": 2.023, "step": 300 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.5654, "step": 310 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.445, "step": 320 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.4604, "step": 330 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.4279, "step": 340 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.545, "step": 350 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.579, "step": 360 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.4052, "step": 370 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3807, "step": 380 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.4444, "step": 390 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.4991, "step": 400 }, { "epoch": 0.0, "eval_loss": 2.606245279312134, "eval_runtime": 25030.0633, "eval_samples_per_second": 4.046, "eval_steps_per_second": 2.023, "step": 400 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.6451, "step": 410 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.4054, "step": 420 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.4127, "step": 430 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3986, "step": 440 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.4556, "step": 450 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.531, "step": 460 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.4173, "step": 470 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.4098, "step": 480 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3917, "step": 490 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.5342, "step": 500 }, { "epoch": 0.0, "eval_loss": 2.553729772567749, "eval_runtime": 25028.6332, "eval_samples_per_second": 4.046, "eval_steps_per_second": 2.023, "step": 500 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.5335, "step": 510 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.4117, "step": 520 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3891, "step": 530 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3963, "step": 540 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.4859, "step": 550 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.5574, "step": 560 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.4218, "step": 570 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3816, "step": 580 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3866, "step": 590 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.4992, "step": 600 }, { "epoch": 0.0, "eval_loss": 2.5366227626800537, "eval_runtime": 25013.3524, "eval_samples_per_second": 4.049, "eval_steps_per_second": 2.024, "step": 600 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.5315, "step": 610 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.4261, "step": 620 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3737, "step": 630 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.385, "step": 640 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.405, "step": 650 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.5182, "step": 660 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3498, "step": 670 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3539, "step": 680 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3276, "step": 690 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.4602, "step": 700 }, { "epoch": 0.0, "eval_loss": 2.5101232528686523, "eval_runtime": 25025.24, "eval_samples_per_second": 4.047, "eval_steps_per_second": 2.023, "step": 700 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.5107, "step": 710 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3919, "step": 720 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3818, "step": 730 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3866, "step": 740 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.4079, "step": 750 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.4908, "step": 760 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3965, "step": 770 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.323, "step": 780 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3136, "step": 790 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3786, "step": 800 }, { "epoch": 0.0, "eval_loss": 2.50821590423584, "eval_runtime": 25013.6917, "eval_samples_per_second": 4.049, "eval_steps_per_second": 2.024, "step": 800 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.5106, "step": 810 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3971, "step": 820 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3305, "step": 830 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3711, "step": 840 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.4803, "step": 850 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.4738, "step": 860 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3712, "step": 870 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3463, "step": 880 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2843, "step": 890 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3725, "step": 900 }, { "epoch": 0.0, "eval_loss": 2.494638204574585, "eval_runtime": 25014.3413, "eval_samples_per_second": 4.049, "eval_steps_per_second": 2.024, "step": 900 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.5215, "step": 910 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3802, "step": 920 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3499, "step": 930 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3199, "step": 940 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3491, "step": 950 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.4885, "step": 960 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3633, "step": 970 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3226, "step": 980 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2587, "step": 990 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3803, "step": 1000 }, { "epoch": 0.0, "eval_loss": 2.458460807800293, "eval_runtime": 25021.0121, "eval_samples_per_second": 4.048, "eval_steps_per_second": 2.024, "step": 1000 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3921, "step": 1010 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3305, "step": 1020 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2907, "step": 1030 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.319, "step": 1040 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.4507, "step": 1050 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.467, "step": 1060 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3639, "step": 1070 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3189, "step": 1080 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2974, "step": 1090 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3331, "step": 1100 }, { "epoch": 0.0, "eval_loss": 2.454998254776001, "eval_runtime": 25014.8749, "eval_samples_per_second": 4.049, "eval_steps_per_second": 2.024, "step": 1100 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.4715, "step": 1110 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3549, "step": 1120 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3196, "step": 1130 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2633, "step": 1140 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3661, "step": 1150 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.4783, "step": 1160 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3531, "step": 1170 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2943, "step": 1180 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2409, "step": 1190 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3605, "step": 1200 }, { "epoch": 0.0, "eval_loss": 2.448063611984253, "eval_runtime": 25013.9619, "eval_samples_per_second": 4.049, "eval_steps_per_second": 2.024, "step": 1200 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.4612, "step": 1210 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3746, "step": 1220 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.304, "step": 1230 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2925, "step": 1240 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2926, "step": 1250 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.4145, "step": 1260 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3593, "step": 1270 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2417, "step": 1280 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.301, "step": 1290 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3336, "step": 1300 }, { "epoch": 0.0, "eval_loss": 2.424609661102295, "eval_runtime": 25025.8125, "eval_samples_per_second": 4.047, "eval_steps_per_second": 2.023, "step": 1300 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.4126, "step": 1310 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.308, "step": 1320 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3052, "step": 1330 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2798, "step": 1340 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3208, "step": 1350 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3309, "step": 1360 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.331, "step": 1370 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3068, "step": 1380 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.1971, "step": 1390 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3114, "step": 1400 }, { "epoch": 0.0, "eval_loss": 2.4197137355804443, "eval_runtime": 25013.8484, "eval_samples_per_second": 4.049, "eval_steps_per_second": 2.024, "step": 1400 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.4611, "step": 1410 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2463, "step": 1420 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2844, "step": 1430 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2544, "step": 1440 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3002, "step": 1450 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.4263, "step": 1460 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.328, "step": 1470 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2609, "step": 1480 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2854, "step": 1490 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3257, "step": 1500 }, { "epoch": 0.0, "eval_loss": 2.407323122024536, "eval_runtime": 25024.2416, "eval_samples_per_second": 4.047, "eval_steps_per_second": 2.024, "step": 1500 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.4174, "step": 1510 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2817, "step": 1520 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2796, "step": 1530 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2834, "step": 1540 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2803, "step": 1550 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.39, "step": 1560 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2672, "step": 1570 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3054, "step": 1580 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2576, "step": 1590 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3099, "step": 1600 }, { "epoch": 0.0, "eval_loss": 2.401474952697754, "eval_runtime": 25023.95, "eval_samples_per_second": 4.047, "eval_steps_per_second": 2.024, "step": 1600 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.4225, "step": 1610 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3135, "step": 1620 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2705, "step": 1630 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2075, "step": 1640 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3811, "step": 1650 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.4239, "step": 1660 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3319, "step": 1670 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3106, "step": 1680 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2231, "step": 1690 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.311, "step": 1700 }, { "epoch": 0.0, "eval_loss": 2.3863916397094727, "eval_runtime": 25021.8392, "eval_samples_per_second": 4.047, "eval_steps_per_second": 2.024, "step": 1700 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.4332, "step": 1710 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3183, "step": 1720 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2177, "step": 1730 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3045, "step": 1740 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2627, "step": 1750 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.4087, "step": 1760 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3026, "step": 1770 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3316, "step": 1780 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.256, "step": 1790 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3276, "step": 1800 }, { "epoch": 0.0, "eval_loss": 2.3831562995910645, "eval_runtime": 25030.8877, "eval_samples_per_second": 4.046, "eval_steps_per_second": 2.023, "step": 1800 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3993, "step": 1810 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3138, "step": 1820 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2652, "step": 1830 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2848, "step": 1840 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3009, "step": 1850 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3942, "step": 1860 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2951, "step": 1870 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2457, "step": 1880 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2514, "step": 1890 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2656, "step": 1900 }, { "epoch": 0.0, "eval_loss": 2.3777658939361572, "eval_runtime": 25024.5627, "eval_samples_per_second": 4.047, "eval_steps_per_second": 2.023, "step": 1900 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3702, "step": 1910 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2896, "step": 1920 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2635, "step": 1930 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2506, "step": 1940 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2108, "step": 1950 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3469, "step": 1960 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2863, "step": 1970 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3095, "step": 1980 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2594, "step": 1990 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3124, "step": 2000 }, { "epoch": 0.0, "eval_loss": 2.3710832595825195, "eval_runtime": 25009.9377, "eval_samples_per_second": 4.049, "eval_steps_per_second": 2.025, "step": 2000 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.42, "step": 2010 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2725, "step": 2020 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.243, "step": 2030 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.1696, "step": 2040 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2764, "step": 2050 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.4149, "step": 2060 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3201, "step": 2070 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2489, "step": 2080 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.1856, "step": 2090 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.295, "step": 2100 }, { "epoch": 0.0, "eval_loss": 2.3622617721557617, "eval_runtime": 25023.6158, "eval_samples_per_second": 4.047, "eval_steps_per_second": 2.024, "step": 2100 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3851, "step": 2110 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2984, "step": 2120 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.189, "step": 2130 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2425, "step": 2140 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.312, "step": 2150 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3269, "step": 2160 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3006, "step": 2170 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2974, "step": 2180 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.1995, "step": 2190 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3021, "step": 2200 }, { "epoch": 0.0, "eval_loss": 2.356457471847534, "eval_runtime": 25014.0777, "eval_samples_per_second": 4.049, "eval_steps_per_second": 2.024, "step": 2200 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3836, "step": 2210 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2795, "step": 2220 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.196, "step": 2230 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2284, "step": 2240 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2464, "step": 2250 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.406, "step": 2260 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2814, "step": 2270 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2595, "step": 2280 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2578, "step": 2290 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2471, "step": 2300 }, { "epoch": 0.0, "eval_loss": 2.334439516067505, "eval_runtime": 25021.2061, "eval_samples_per_second": 4.048, "eval_steps_per_second": 2.024, "step": 2300 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3846, "step": 2310 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2797, "step": 2320 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2136, "step": 2330 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2303, "step": 2340 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3312, "step": 2350 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3484, "step": 2360 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3012, "step": 2370 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2255, "step": 2380 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.1879, "step": 2390 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2432, "step": 2400 }, { "epoch": 0.0, "eval_loss": 2.3361294269561768, "eval_runtime": 25017.0273, "eval_samples_per_second": 4.048, "eval_steps_per_second": 2.024, "step": 2400 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3223, "step": 2410 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2756, "step": 2420 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2312, "step": 2430 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2296, "step": 2440 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2083, "step": 2450 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3684, "step": 2460 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2336, "step": 2470 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2262, "step": 2480 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.1644, "step": 2490 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3096, "step": 2500 }, { "epoch": 0.0, "eval_loss": 2.336559772491455, "eval_runtime": 25015.5454, "eval_samples_per_second": 4.048, "eval_steps_per_second": 2.024, "step": 2500 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3185, "step": 2510 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2768, "step": 2520 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2151, "step": 2530 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2017, "step": 2540 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2616, "step": 2550 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3333, "step": 2560 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2404, "step": 2570 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.25, "step": 2580 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.22, "step": 2590 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2884, "step": 2600 }, { "epoch": 0.0, "eval_loss": 2.332972526550293, "eval_runtime": 25018.5289, "eval_samples_per_second": 4.048, "eval_steps_per_second": 2.024, "step": 2600 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3739, "step": 2610 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2611, "step": 2620 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2323, "step": 2630 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.1285, "step": 2640 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2504, "step": 2650 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3694, "step": 2660 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2317, "step": 2670 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2714, "step": 2680 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.1926, "step": 2690 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2617, "step": 2700 }, { "epoch": 0.0, "eval_loss": 2.325580596923828, "eval_runtime": 25014.9463, "eval_samples_per_second": 4.049, "eval_steps_per_second": 2.024, "step": 2700 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3665, "step": 2710 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2604, "step": 2720 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2129, "step": 2730 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2166, "step": 2740 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2418, "step": 2750 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3793, "step": 2760 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2684, "step": 2770 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.1758, "step": 2780 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2116, "step": 2790 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2753, "step": 2800 }, { "epoch": 0.0, "eval_loss": 2.3111133575439453, "eval_runtime": 25017.4138, "eval_samples_per_second": 4.048, "eval_steps_per_second": 2.024, "step": 2800 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3754, "step": 2810 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2508, "step": 2820 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.1793, "step": 2830 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.1444, "step": 2840 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2093, "step": 2850 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2917, "step": 2860 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2487, "step": 2870 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.1931, "step": 2880 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.1862, "step": 2890 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3007, "step": 2900 }, { "epoch": 0.0, "eval_loss": 2.3113691806793213, "eval_runtime": 25009.312, "eval_samples_per_second": 4.049, "eval_steps_per_second": 2.025, "step": 2900 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3637, "step": 2910 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2385, "step": 2920 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2092, "step": 2930 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.1755, "step": 2940 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2523, "step": 2950 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3127, "step": 2960 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2634, "step": 2970 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2487, "step": 2980 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.1269, "step": 2990 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.1926, "step": 3000 }, { "epoch": 0.0, "eval_loss": 2.2998297214508057, "eval_runtime": 25014.9441, "eval_samples_per_second": 4.049, "eval_steps_per_second": 2.024, "step": 3000 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3595, "step": 3010 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2325, "step": 3020 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.1953, "step": 3030 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.184, "step": 3040 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.1961, "step": 3050 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.3776, "step": 3060 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2627, "step": 3070 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2189, "step": 3080 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.1835, "step": 3090 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.2283, "step": 3100 }, { "epoch": 0.0, "eval_loss": 2.2964084148406982, "eval_runtime": 25012.2645, "eval_samples_per_second": 4.049, "eval_steps_per_second": 2.024, "step": 3100 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.1955, "step": 3150 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.1781, "step": 3200 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.1431, "step": 3250 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.1395, "step": 3300 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.1339, "step": 3350 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.1488, "step": 3400 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.1246, "step": 3450 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.1307, "step": 3500 }, { "epoch": 0.0, "eval_loss": 2.2042970657348633, "eval_runtime": 34108.0847, "eval_samples_per_second": 2.969, "eval_steps_per_second": 0.742, "step": 3500 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.1207, "step": 3550 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.1247, "step": 3600 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.1225, "step": 3650 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.0978, "step": 3700 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.1168, "step": 3750 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.1122, "step": 3800 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.1046, "step": 3850 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.1208, "step": 3900 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.1206, "step": 3950 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.1128, "step": 4000 }, { "epoch": 0.0, "eval_loss": 2.1652708053588867, "eval_runtime": 34110.436, "eval_samples_per_second": 2.969, "eval_steps_per_second": 0.742, "step": 4000 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.0952, "step": 4050 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.0932, "step": 4100 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.0884, "step": 4150 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.0733, "step": 4200 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.0889, "step": 4250 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.0845, "step": 4300 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.0734, "step": 4350 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.0829, "step": 4400 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.0784, "step": 4450 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.0669, "step": 4500 }, { "epoch": 0.0, "eval_loss": 2.144939661026001, "eval_runtime": 34106.0495, "eval_samples_per_second": 2.969, "eval_steps_per_second": 0.742, "step": 4500 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.0678, "step": 4550 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.0704, "step": 4600 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.0714, "step": 4650 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.0661, "step": 4700 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.0707, "step": 4750 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.0685, "step": 4800 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.0706, "step": 4850 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.0753, "step": 4900 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.056, "step": 4950 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.0646, "step": 5000 }, { "epoch": 0.0, "eval_loss": 2.1237690448760986, "eval_runtime": 34108.4877, "eval_samples_per_second": 2.969, "eval_steps_per_second": 0.742, "step": 5000 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.053, "step": 5050 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.0622, "step": 5100 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.0415, "step": 5150 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.0561, "step": 5200 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.0517, "step": 5250 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.0559, "step": 5300 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.0417, "step": 5350 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.0435, "step": 5400 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.0533, "step": 5450 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.0388, "step": 5500 }, { "epoch": 0.0, "eval_loss": 2.109766960144043, "eval_runtime": 34100.9613, "eval_samples_per_second": 2.97, "eval_steps_per_second": 0.742, "step": 5500 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.0524, "step": 5550 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.0521, "step": 5600 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.0356, "step": 5650 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.0565, "step": 5700 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.0341, "step": 5750 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.041, "step": 5800 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.0378, "step": 5850 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.0388, "step": 5900 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.0456, "step": 5950 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.0391, "step": 6000 }, { "epoch": 0.0, "eval_loss": 2.0924971103668213, "eval_runtime": 34094.327, "eval_samples_per_second": 2.97, "eval_steps_per_second": 0.743, "step": 6000 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.0364, "step": 6050 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.028, "step": 6100 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.0255, "step": 6150 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.0049, "step": 6200 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.0214, "step": 6250 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.0303, "step": 6300 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.0216, "step": 6350 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.0278, "step": 6400 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.04, "step": 6450 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.0146, "step": 6500 }, { "epoch": 0.0, "eval_loss": 2.082850217819214, "eval_runtime": 34130.627, "eval_samples_per_second": 2.967, "eval_steps_per_second": 0.742, "step": 6500 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.0299, "step": 6550 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.0249, "step": 6600 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.0169, "step": 6650 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.0043, "step": 6700 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.0169, "step": 6750 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.0146, "step": 6800 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.0178, "step": 6850 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.017, "step": 6900 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.0011, "step": 6950 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 1.9961, "step": 7000 }, { "epoch": 0.0, "eval_loss": 2.0665175914764404, "eval_runtime": 34103.0381, "eval_samples_per_second": 2.97, "eval_steps_per_second": 0.742, "step": 7000 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.0201, "step": 7050 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.0165, "step": 7100 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.0071, "step": 7150 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.0038, "step": 7200 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.0074, "step": 7250 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.0009, "step": 7300 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 1.9877, "step": 7350 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.006, "step": 7400 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 1.9915, "step": 7450 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 1.993, "step": 7500 }, { "epoch": 0.0, "eval_loss": 2.0603718757629395, "eval_runtime": 34086.5352, "eval_samples_per_second": 2.971, "eval_steps_per_second": 0.743, "step": 7500 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 1.9975, "step": 7550 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 1.994, "step": 7600 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.007, "step": 7650 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.0034, "step": 7700 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.0081, "step": 7750 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 1.9855, "step": 7800 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.0026, "step": 7850 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 2.0022, "step": 7900 }, { "epoch": 0.01, "learning_rate": 0.0002, "loss": 2.0013, "step": 7950 }, { "epoch": 0.01, "learning_rate": 0.0002, "loss": 2.0073, "step": 8000 }, { "epoch": 0.01, "eval_loss": 2.050844669342041, "eval_runtime": 34104.8601, "eval_samples_per_second": 2.969, "eval_steps_per_second": 0.742, "step": 8000 }, { "epoch": 0.01, "learning_rate": 0.0002, "loss": 1.9936, "step": 8050 }, { "epoch": 0.01, "learning_rate": 0.0002, "loss": 1.9682, "step": 8100 }, { "epoch": 0.01, "learning_rate": 0.0002, "loss": 2.0048, "step": 8150 }, { "epoch": 0.01, "learning_rate": 0.0002, "loss": 1.9783, "step": 8200 }, { "epoch": 0.01, "learning_rate": 0.0002, "loss": 1.9703, "step": 8250 }, { "epoch": 0.01, "learning_rate": 0.0002, "loss": 2.0042, "step": 8300 }, { "epoch": 0.01, "learning_rate": 0.0002, "loss": 1.9848, "step": 8350 }, { "epoch": 0.01, "learning_rate": 0.0002, "loss": 1.9808, "step": 8400 }, { "epoch": 0.01, "learning_rate": 0.0002, "loss": 1.9871, "step": 8450 }, { "epoch": 0.01, "learning_rate": 0.0002, "loss": 1.9733, "step": 8500 }, { "epoch": 0.01, "eval_loss": 2.0437986850738525, "eval_runtime": 34093.5093, "eval_samples_per_second": 2.97, "eval_steps_per_second": 0.743, "step": 8500 }, { "epoch": 0.01, "learning_rate": 0.0002, "loss": 1.9796, "step": 8550 }, { "epoch": 0.01, "learning_rate": 0.0002, "loss": 1.9943, "step": 8600 }, { "epoch": 0.01, "learning_rate": 0.0002, "loss": 1.9722, "step": 8650 }, { "epoch": 0.01, "learning_rate": 0.0002, "loss": 1.9835, "step": 8700 }, { "epoch": 0.01, "learning_rate": 0.0002, "loss": 1.963, "step": 8750 }, { "epoch": 0.01, "learning_rate": 0.0002, "loss": 2.0058, "step": 8800 }, { "epoch": 0.01, "learning_rate": 0.0002, "loss": 1.9804, "step": 8850 }, { "epoch": 0.01, "learning_rate": 0.0002, "loss": 1.9729, "step": 8900 }, { "epoch": 0.01, "learning_rate": 0.0002, "loss": 1.9867, "step": 8950 }, { "epoch": 0.01, "learning_rate": 0.0002, "loss": 1.976, "step": 9000 }, { "epoch": 0.01, "eval_loss": 2.041231155395508, "eval_runtime": 34089.8046, "eval_samples_per_second": 2.971, "eval_steps_per_second": 0.743, "step": 9000 }, { "epoch": 0.01, "learning_rate": 0.0002, "loss": 1.9773, "step": 9050 }, { "epoch": 0.01, "learning_rate": 0.0002, "loss": 1.964, "step": 9100 }, { "epoch": 0.01, "learning_rate": 0.0002, "loss": 1.9648, "step": 9150 }, { "epoch": 0.01, "learning_rate": 0.0002, "loss": 1.9843, "step": 9200 }, { "epoch": 0.01, "learning_rate": 0.0002, "loss": 1.9677, "step": 9250 }, { "epoch": 0.01, "learning_rate": 0.0002, "loss": 1.962, "step": 9300 }, { "epoch": 0.01, "learning_rate": 0.0002, "loss": 1.9953, "step": 9350 }, { "epoch": 0.01, "learning_rate": 0.0002, "loss": 1.9665, "step": 9400 }, { "epoch": 0.01, "learning_rate": 0.0002, "loss": 1.9724, "step": 9450 }, { "epoch": 0.01, "learning_rate": 0.0002, "loss": 1.9576, "step": 9500 }, { "epoch": 0.01, "eval_loss": 2.0336899757385254, "eval_runtime": 34110.7491, "eval_samples_per_second": 2.969, "eval_steps_per_second": 0.742, "step": 9500 }, { "epoch": 0.01, "learning_rate": 0.0002, "loss": 1.9817, "step": 9550 }, { "epoch": 0.01, "learning_rate": 0.0002, "loss": 1.952, "step": 9600 }, { "epoch": 0.01, "learning_rate": 0.0002, "loss": 1.9693, "step": 9650 }, { "epoch": 0.01, "learning_rate": 0.0002, "loss": 1.9827, "step": 9700 }, { "epoch": 0.01, "learning_rate": 0.0002, "loss": 1.979, "step": 9750 }, { "epoch": 0.01, "learning_rate": 0.0002, "loss": 1.9666, "step": 9800 }, { "epoch": 0.01, "learning_rate": 0.0002, "loss": 1.987, "step": 9850 }, { "epoch": 0.01, "learning_rate": 0.0002, "loss": 1.9596, "step": 9900 }, { "epoch": 0.01, "learning_rate": 0.0002, "loss": 1.9663, "step": 9950 }, { "epoch": 0.01, "learning_rate": 0.0002, "loss": 1.9783, "step": 10000 }, { "epoch": 0.01, "eval_loss": 2.0236124992370605, "eval_runtime": 34103.4942, "eval_samples_per_second": 2.97, "eval_steps_per_second": 0.742, "step": 10000 } ], "max_steps": 10000, "num_train_epochs": 1, "total_flos": 3.2626731178471956e+19, "trial_name": null, "trial_params": null }