{ "best_metric": null, "best_model_checkpoint": null, "epoch": 9.803921568627452, "eval_steps": 500, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1, "learning_rate": 0.0003996, "loss": 9.2736, "step": 10 }, { "epoch": 0.2, "learning_rate": 0.0003992, "loss": 7.7626, "step": 20 }, { "epoch": 0.29, "learning_rate": 0.00039880000000000004, "loss": 7.5202, "step": 30 }, { "epoch": 0.39, "learning_rate": 0.00039840000000000003, "loss": 7.371, "step": 40 }, { "epoch": 0.49, "learning_rate": 0.000398, "loss": 7.2352, "step": 50 }, { "epoch": 0.59, "learning_rate": 0.0003976, "loss": 7.1352, "step": 60 }, { "epoch": 0.69, "learning_rate": 0.0003972, "loss": 7.0625, "step": 70 }, { "epoch": 0.78, "learning_rate": 0.0003968, "loss": 6.911, "step": 80 }, { "epoch": 0.88, "learning_rate": 0.00039640000000000004, "loss": 6.7717, "step": 90 }, { "epoch": 0.98, "learning_rate": 0.00039600000000000003, "loss": 6.6484, "step": 100 }, { "epoch": 1.08, "learning_rate": 0.0003956, "loss": 6.5492, "step": 110 }, { "epoch": 1.18, "learning_rate": 0.0003952, "loss": 6.4417, "step": 120 }, { "epoch": 1.27, "learning_rate": 0.0003948, "loss": 6.3311, "step": 130 }, { "epoch": 1.37, "learning_rate": 0.0003944, "loss": 6.2206, "step": 140 }, { "epoch": 1.47, "learning_rate": 0.00039400000000000004, "loss": 6.1179, "step": 150 }, { "epoch": 1.57, "learning_rate": 0.0003936, "loss": 6.0151, "step": 160 }, { "epoch": 1.67, "learning_rate": 0.0003932, "loss": 5.9035, "step": 170 }, { "epoch": 1.76, "learning_rate": 0.0003928, "loss": 5.8222, "step": 180 }, { "epoch": 1.86, "learning_rate": 0.0003924, "loss": 5.7042, "step": 190 }, { "epoch": 1.96, "learning_rate": 0.000392, "loss": 5.6265, "step": 200 }, { "epoch": 2.06, "learning_rate": 0.00039160000000000003, "loss": 5.5338, "step": 210 }, { "epoch": 2.16, "learning_rate": 0.0003912, "loss": 5.4521, "step": 220 }, { "epoch": 2.25, "learning_rate": 0.0003908, "loss": 5.3552, "step": 230 }, { "epoch": 2.35, "learning_rate": 0.0003904, "loss": 5.2771, "step": 240 }, { "epoch": 2.45, "learning_rate": 0.00039, "loss": 5.1587, "step": 250 }, { "epoch": 2.55, "learning_rate": 0.0003896, "loss": 5.0899, "step": 260 }, { "epoch": 2.65, "learning_rate": 0.00038920000000000003, "loss": 5.0191, "step": 270 }, { "epoch": 2.75, "learning_rate": 0.0003888, "loss": 4.9602, "step": 280 }, { "epoch": 2.84, "learning_rate": 0.0003884, "loss": 4.8366, "step": 290 }, { "epoch": 2.94, "learning_rate": 0.000388, "loss": 4.7848, "step": 300 }, { "epoch": 3.04, "learning_rate": 0.0003876, "loss": 4.7199, "step": 310 }, { "epoch": 3.14, "learning_rate": 0.00038720000000000003, "loss": 4.6306, "step": 320 }, { "epoch": 3.24, "learning_rate": 0.0003868, "loss": 4.5337, "step": 330 }, { "epoch": 3.33, "learning_rate": 0.0003864, "loss": 4.4796, "step": 340 }, { "epoch": 3.43, "learning_rate": 0.000386, "loss": 4.3881, "step": 350 }, { "epoch": 3.53, "learning_rate": 0.0003856, "loss": 4.2989, "step": 360 }, { "epoch": 3.63, "learning_rate": 0.0003852, "loss": 4.2533, "step": 370 }, { "epoch": 3.73, "learning_rate": 0.00038480000000000003, "loss": 4.2379, "step": 380 }, { "epoch": 3.82, "learning_rate": 0.0003844, "loss": 4.142, "step": 390 }, { "epoch": 3.92, "learning_rate": 0.000384, "loss": 4.0793, "step": 400 }, { "epoch": 4.02, "learning_rate": 0.0003836, "loss": 4.0005, "step": 410 }, { "epoch": 4.12, "learning_rate": 0.0003832, "loss": 3.9619, "step": 420 }, { "epoch": 4.22, "learning_rate": 0.0003828, "loss": 3.8687, "step": 430 }, { "epoch": 4.31, "learning_rate": 0.0003824, "loss": 3.8486, "step": 440 }, { "epoch": 4.41, "learning_rate": 0.000382, "loss": 3.7684, "step": 450 }, { "epoch": 4.51, "learning_rate": 0.0003816, "loss": 3.7013, "step": 460 }, { "epoch": 4.61, "learning_rate": 0.0003812, "loss": 3.6851, "step": 470 }, { "epoch": 4.71, "learning_rate": 0.0003808, "loss": 3.6585, "step": 480 }, { "epoch": 4.8, "learning_rate": 0.0003804, "loss": 3.6172, "step": 490 }, { "epoch": 4.9, "learning_rate": 0.00038, "loss": 3.5557, "step": 500 }, { "epoch": 5.0, "learning_rate": 0.0003796, "loss": 3.4746, "step": 510 }, { "epoch": 5.1, "learning_rate": 0.0003792, "loss": 3.4473, "step": 520 }, { "epoch": 5.2, "learning_rate": 0.0003788, "loss": 3.3828, "step": 530 }, { "epoch": 5.29, "learning_rate": 0.0003784, "loss": 3.3868, "step": 540 }, { "epoch": 5.39, "learning_rate": 0.00037799999999999997, "loss": 3.31, "step": 550 }, { "epoch": 5.49, "learning_rate": 0.0003776, "loss": 3.2628, "step": 560 }, { "epoch": 5.59, "learning_rate": 0.0003772, "loss": 3.2541, "step": 570 }, { "epoch": 5.69, "learning_rate": 0.0003768, "loss": 3.223, "step": 580 }, { "epoch": 5.78, "learning_rate": 0.0003764, "loss": 3.2028, "step": 590 }, { "epoch": 5.88, "learning_rate": 0.000376, "loss": 3.1659, "step": 600 }, { "epoch": 5.98, "learning_rate": 0.0003756, "loss": 3.0847, "step": 610 }, { "epoch": 6.08, "learning_rate": 0.0003752, "loss": 3.0215, "step": 620 }, { "epoch": 6.18, "learning_rate": 0.00037480000000000006, "loss": 3.0149, "step": 630 }, { "epoch": 6.27, "learning_rate": 0.00037440000000000005, "loss": 3.0177, "step": 640 }, { "epoch": 6.37, "learning_rate": 0.00037400000000000004, "loss": 2.9474, "step": 650 }, { "epoch": 6.47, "learning_rate": 0.00037360000000000003, "loss": 2.9245, "step": 660 }, { "epoch": 6.57, "learning_rate": 0.0003732, "loss": 2.9218, "step": 670 }, { "epoch": 6.67, "learning_rate": 0.00037280000000000006, "loss": 2.8666, "step": 680 }, { "epoch": 6.76, "learning_rate": 0.00037240000000000005, "loss": 2.8821, "step": 690 }, { "epoch": 6.86, "learning_rate": 0.00037200000000000004, "loss": 2.8243, "step": 700 }, { "epoch": 6.96, "learning_rate": 0.00037160000000000003, "loss": 2.7753, "step": 710 }, { "epoch": 7.06, "learning_rate": 0.0003712, "loss": 2.7086, "step": 720 }, { "epoch": 7.16, "learning_rate": 0.0003708, "loss": 2.7104, "step": 730 }, { "epoch": 7.25, "learning_rate": 0.00037040000000000006, "loss": 2.7103, "step": 740 }, { "epoch": 7.35, "learning_rate": 0.00037000000000000005, "loss": 2.6707, "step": 750 }, { "epoch": 7.45, "learning_rate": 0.00036960000000000004, "loss": 2.6413, "step": 760 }, { "epoch": 7.55, "learning_rate": 0.00036920000000000003, "loss": 2.6359, "step": 770 }, { "epoch": 7.65, "learning_rate": 0.0003688, "loss": 2.5838, "step": 780 }, { "epoch": 7.75, "learning_rate": 0.0003684, "loss": 2.6212, "step": 790 }, { "epoch": 7.84, "learning_rate": 0.00036800000000000005, "loss": 2.5718, "step": 800 }, { "epoch": 7.94, "learning_rate": 0.00036760000000000004, "loss": 2.5348, "step": 810 }, { "epoch": 8.04, "learning_rate": 0.00036720000000000004, "loss": 2.4195, "step": 820 }, { "epoch": 8.14, "learning_rate": 0.0003668, "loss": 2.4938, "step": 830 }, { "epoch": 8.24, "learning_rate": 0.0003664, "loss": 2.4372, "step": 840 }, { "epoch": 8.33, "learning_rate": 0.000366, "loss": 2.4567, "step": 850 }, { "epoch": 8.43, "learning_rate": 0.00036560000000000005, "loss": 2.4108, "step": 860 }, { "epoch": 8.53, "learning_rate": 0.00036520000000000004, "loss": 2.3993, "step": 870 }, { "epoch": 8.63, "learning_rate": 0.00036480000000000003, "loss": 2.3739, "step": 880 }, { "epoch": 8.73, "learning_rate": 0.0003644, "loss": 2.391, "step": 890 }, { "epoch": 8.82, "learning_rate": 0.000364, "loss": 2.38, "step": 900 }, { "epoch": 8.92, "learning_rate": 0.00036360000000000006, "loss": 2.3257, "step": 910 }, { "epoch": 9.02, "learning_rate": 0.00036320000000000005, "loss": 2.1956, "step": 920 }, { "epoch": 9.12, "learning_rate": 0.00036280000000000004, "loss": 2.2944, "step": 930 }, { "epoch": 9.22, "learning_rate": 0.0003624, "loss": 2.2163, "step": 940 }, { "epoch": 9.31, "learning_rate": 0.000362, "loss": 2.2641, "step": 950 }, { "epoch": 9.41, "learning_rate": 0.0003616, "loss": 2.2035, "step": 960 }, { "epoch": 9.51, "learning_rate": 0.00036120000000000005, "loss": 2.188, "step": 970 }, { "epoch": 9.61, "learning_rate": 0.00036080000000000004, "loss": 2.197, "step": 980 }, { "epoch": 9.71, "learning_rate": 0.00036040000000000003, "loss": 2.18, "step": 990 }, { "epoch": 9.8, "learning_rate": 0.00036, "loss": 2.1994, "step": 1000 } ], "logging_steps": 10, "max_steps": 10000, "num_train_epochs": 99, "save_steps": 1000, "total_flos": 6.6810430881792e+16, "trial_name": null, "trial_params": null }