{ "best_metric": null, "best_model_checkpoint": null, "epoch": 9.94475138121547, "global_step": 1350, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07, "learning_rate": 0.0002, "loss": 1.9251, "step": 10 }, { "epoch": 0.15, "learning_rate": 0.0002, "loss": 1.1925, "step": 20 }, { "epoch": 0.22, "learning_rate": 0.0002, "loss": 0.6688, "step": 30 }, { "epoch": 0.29, "learning_rate": 0.0002, "loss": 0.778, "step": 40 }, { "epoch": 0.37, "learning_rate": 0.0002, "loss": 0.6743, "step": 50 }, { "epoch": 0.44, "learning_rate": 0.0002, "loss": 0.569, "step": 60 }, { "epoch": 0.52, "learning_rate": 0.0002, "loss": 0.598, "step": 70 }, { "epoch": 0.59, "learning_rate": 0.0002, "loss": 0.6713, "step": 80 }, { "epoch": 0.66, "learning_rate": 0.0002, "loss": 0.571, "step": 90 }, { "epoch": 0.74, "learning_rate": 0.0002, "loss": 0.503, "step": 100 }, { "epoch": 0.81, "learning_rate": 0.0002, "loss": 0.7059, "step": 110 }, { "epoch": 0.88, "learning_rate": 0.0002, "loss": 0.5989, "step": 120 }, { "epoch": 0.96, "learning_rate": 0.0002, "loss": 0.4895, "step": 130 }, { "epoch": 1.03, "learning_rate": 0.0002, "loss": 0.623, "step": 140 }, { "epoch": 1.1, "learning_rate": 0.0002, "loss": 0.637, "step": 150 }, { "epoch": 1.18, "learning_rate": 0.0002, "loss": 0.5461, "step": 160 }, { "epoch": 1.25, "learning_rate": 0.0002, "loss": 0.48, "step": 170 }, { "epoch": 1.33, "learning_rate": 0.0002, "loss": 0.7108, "step": 180 }, { "epoch": 1.4, "learning_rate": 0.0002, "loss": 0.5659, "step": 190 }, { "epoch": 1.47, "learning_rate": 0.0002, "loss": 0.4692, "step": 200 }, { "epoch": 1.55, "learning_rate": 0.0002, "loss": 0.674, "step": 210 }, { "epoch": 1.62, "learning_rate": 0.0002, "loss": 0.5979, "step": 220 }, { "epoch": 1.69, "learning_rate": 0.0002, "loss": 0.484, "step": 230 }, { "epoch": 1.77, "learning_rate": 0.0002, "loss": 0.5799, "step": 240 }, { "epoch": 1.84, "learning_rate": 0.0002, "loss": 0.6141, "step": 250 }, { "epoch": 1.92, "learning_rate": 0.0002, "loss": 0.5177, "step": 260 }, { "epoch": 1.99, "learning_rate": 0.0002, "loss": 0.4582, "step": 270 }, { "epoch": 2.06, "learning_rate": 0.0002, "loss": 0.6667, "step": 280 }, { "epoch": 2.14, "learning_rate": 0.0002, "loss": 0.56, "step": 290 }, { "epoch": 2.21, "learning_rate": 0.0002, "loss": 0.4628, "step": 300 }, { "epoch": 2.28, "learning_rate": 0.0002, "loss": 0.5623, "step": 310 }, { "epoch": 2.36, "learning_rate": 0.0002, "loss": 0.6027, "step": 320 }, { "epoch": 2.43, "learning_rate": 0.0002, "loss": 0.4787, "step": 330 }, { "epoch": 2.5, "learning_rate": 0.0002, "loss": 0.4718, "step": 340 }, { "epoch": 2.58, "learning_rate": 0.0002, "loss": 0.6168, "step": 350 }, { "epoch": 2.65, "learning_rate": 0.0002, "loss": 0.5156, "step": 360 }, { "epoch": 2.73, "learning_rate": 0.0002, "loss": 0.4076, "step": 370 }, { "epoch": 2.8, "learning_rate": 0.0002, "loss": 0.6857, "step": 380 }, { "epoch": 2.87, "learning_rate": 0.0002, "loss": 0.5429, "step": 390 }, { "epoch": 2.95, "learning_rate": 0.0002, "loss": 0.43, "step": 400 }, { "epoch": 3.02, "learning_rate": 0.0002, "loss": 0.5163, "step": 410 }, { "epoch": 3.09, "learning_rate": 0.0002, "loss": 0.6072, "step": 420 }, { "epoch": 3.17, "learning_rate": 0.0002, "loss": 0.4804, "step": 430 }, { "epoch": 3.24, "learning_rate": 0.0002, "loss": 0.3655, "step": 440 }, { "epoch": 3.31, "learning_rate": 0.0002, "loss": 0.6351, "step": 450 }, { "epoch": 3.39, "learning_rate": 0.0002, "loss": 0.512, "step": 460 }, { "epoch": 3.46, "learning_rate": 0.0002, "loss": 0.3907, "step": 470 }, { "epoch": 3.54, "learning_rate": 0.0002, "loss": 0.5736, "step": 480 }, { "epoch": 3.61, "learning_rate": 0.0002, "loss": 0.5337, "step": 490 }, { "epoch": 3.68, "learning_rate": 0.0002, "loss": 0.4324, "step": 500 }, { "epoch": 3.76, "learning_rate": 0.0002, "loss": 0.4844, "step": 510 }, { "epoch": 3.83, "learning_rate": 0.0002, "loss": 0.5886, "step": 520 }, { "epoch": 3.9, "learning_rate": 0.0002, "loss": 0.4723, "step": 530 }, { "epoch": 3.98, "learning_rate": 0.0002, "loss": 0.3895, "step": 540 }, { "epoch": 4.05, "learning_rate": 0.0002, "loss": 0.58, "step": 550 }, { "epoch": 4.13, "learning_rate": 0.0002, "loss": 0.4998, "step": 560 }, { "epoch": 4.2, "learning_rate": 0.0002, "loss": 0.3901, "step": 570 }, { "epoch": 4.27, "learning_rate": 0.0002, "loss": 0.4494, "step": 580 }, { "epoch": 4.35, "learning_rate": 0.0002, "loss": 0.5282, "step": 590 }, { "epoch": 4.42, "learning_rate": 0.0002, "loss": 0.4162, "step": 600 }, { "epoch": 4.49, "learning_rate": 0.0002, "loss": 0.3818, "step": 610 }, { "epoch": 4.57, "learning_rate": 0.0002, "loss": 0.594, "step": 620 }, { "epoch": 4.64, "learning_rate": 0.0002, "loss": 0.4547, "step": 630 }, { "epoch": 4.71, "learning_rate": 0.0002, "loss": 0.3598, "step": 640 }, { "epoch": 4.79, "learning_rate": 0.0002, "loss": 0.5721, "step": 650 }, { "epoch": 4.86, "learning_rate": 0.0002, "loss": 0.4997, "step": 660 }, { "epoch": 4.94, "learning_rate": 0.0002, "loss": 0.3974, "step": 670 }, { "epoch": 5.01, "learning_rate": 0.0002, "loss": 0.4133, "step": 680 }, { "epoch": 5.08, "learning_rate": 0.0002, "loss": 0.5438, "step": 690 }, { "epoch": 5.16, "learning_rate": 0.0002, "loss": 0.408, "step": 700 }, { "epoch": 5.23, "learning_rate": 0.0002, "loss": 0.3157, "step": 710 }, { "epoch": 5.3, "learning_rate": 0.0002, "loss": 0.5219, "step": 720 }, { "epoch": 5.38, "learning_rate": 0.0002, "loss": 0.4365, "step": 730 }, { "epoch": 5.45, "learning_rate": 0.0002, "loss": 0.3586, "step": 740 }, { "epoch": 5.52, "learning_rate": 0.0002, "loss": 0.462, "step": 750 }, { "epoch": 5.6, "learning_rate": 0.0002, "loss": 0.4815, "step": 760 }, { "epoch": 5.67, "learning_rate": 0.0002, "loss": 0.3818, "step": 770 }, { "epoch": 5.75, "learning_rate": 0.0002, "loss": 0.3744, "step": 780 }, { "epoch": 5.82, "learning_rate": 0.0002, "loss": 0.5412, "step": 790 }, { "epoch": 5.89, "learning_rate": 0.0002, "loss": 0.4341, "step": 800 }, { "epoch": 5.97, "learning_rate": 0.0002, "loss": 0.3258, "step": 810 }, { "epoch": 6.04, "learning_rate": 0.0002, "loss": 0.4872, "step": 820 }, { "epoch": 6.11, "learning_rate": 0.0002, "loss": 0.4313, "step": 830 }, { "epoch": 6.19, "learning_rate": 0.0002, "loss": 0.3199, "step": 840 }, { "epoch": 6.26, "learning_rate": 0.0002, "loss": 0.3386, "step": 850 }, { "epoch": 6.34, "learning_rate": 0.0002, "loss": 0.4872, "step": 860 }, { "epoch": 6.41, "learning_rate": 0.0002, "loss": 0.3744, "step": 870 }, { "epoch": 6.48, "learning_rate": 0.0002, "loss": 0.277, "step": 880 }, { "epoch": 6.56, "learning_rate": 0.0002, "loss": 0.5125, "step": 890 }, { "epoch": 6.63, "learning_rate": 0.0002, "loss": 0.4132, "step": 900 }, { "epoch": 6.7, "learning_rate": 0.0002, "loss": 0.31, "step": 910 }, { "epoch": 6.78, "learning_rate": 0.0002, "loss": 0.4628, "step": 920 }, { "epoch": 6.85, "learning_rate": 0.0002, "loss": 0.4471, "step": 930 }, { "epoch": 6.92, "learning_rate": 0.0002, "loss": 0.3348, "step": 940 }, { "epoch": 7.0, "learning_rate": 0.0002, "loss": 0.3292, "step": 950 }, { "epoch": 7.07, "learning_rate": 0.0002, "loss": 0.4596, "step": 960 }, { "epoch": 7.15, "learning_rate": 0.0002, "loss": 0.3506, "step": 970 }, { "epoch": 7.22, "learning_rate": 0.0002, "loss": 0.2606, "step": 980 }, { "epoch": 7.29, "learning_rate": 0.0002, "loss": 0.4052, "step": 990 }, { "epoch": 7.37, "learning_rate": 0.0002, "loss": 0.3891, "step": 1000 }, { "epoch": 7.44, "learning_rate": 0.0002, "loss": 0.3116, "step": 1010 }, { "epoch": 7.51, "learning_rate": 0.0002, "loss": 0.3483, "step": 1020 }, { "epoch": 7.59, "learning_rate": 0.0002, "loss": 0.4398, "step": 1030 }, { "epoch": 7.66, "learning_rate": 0.0002, "loss": 0.3157, "step": 1040 }, { "epoch": 7.73, "learning_rate": 0.0002, "loss": 0.268, "step": 1050 }, { "epoch": 7.81, "learning_rate": 0.0002, "loss": 0.4692, "step": 1060 }, { "epoch": 7.88, "learning_rate": 0.0002, "loss": 0.3724, "step": 1070 }, { "epoch": 7.96, "learning_rate": 0.0002, "loss": 0.2665, "step": 1080 }, { "epoch": 8.03, "learning_rate": 0.0002, "loss": 0.3718, "step": 1090 }, { "epoch": 8.1, "learning_rate": 0.0002, "loss": 0.372, "step": 1100 }, { "epoch": 8.18, "learning_rate": 0.0002, "loss": 0.2868, "step": 1110 }, { "epoch": 8.25, "learning_rate": 0.0002, "loss": 0.2351, "step": 1120 }, { "epoch": 8.32, "learning_rate": 0.0002, "loss": 0.4212, "step": 1130 }, { "epoch": 8.4, "learning_rate": 0.0002, "loss": 0.3118, "step": 1140 }, { "epoch": 8.47, "learning_rate": 0.0002, "loss": 0.2195, "step": 1150 }, { "epoch": 8.55, "learning_rate": 0.0002, "loss": 0.3907, "step": 1160 }, { "epoch": 8.62, "learning_rate": 0.0002, "loss": 0.335, "step": 1170 }, { "epoch": 8.69, "learning_rate": 0.0002, "loss": 0.26, "step": 1180 }, { "epoch": 8.77, "learning_rate": 0.0002, "loss": 0.3481, "step": 1190 }, { "epoch": 8.84, "learning_rate": 0.0002, "loss": 0.3802, "step": 1200 }, { "epoch": 8.91, "learning_rate": 0.0002, "loss": 0.2834, "step": 1210 }, { "epoch": 8.99, "learning_rate": 0.0002, "loss": 0.2545, "step": 1220 }, { "epoch": 9.06, "learning_rate": 0.0002, "loss": 0.3557, "step": 1230 }, { "epoch": 9.13, "learning_rate": 0.0002, "loss": 0.2917, "step": 1240 }, { "epoch": 9.21, "learning_rate": 0.0002, "loss": 0.2154, "step": 1250 }, { "epoch": 9.28, "learning_rate": 0.0002, "loss": 0.2994, "step": 1260 }, { "epoch": 9.36, "learning_rate": 0.0002, "loss": 0.321, "step": 1270 }, { "epoch": 9.43, "learning_rate": 0.0002, "loss": 0.2344, "step": 1280 }, { "epoch": 9.5, "learning_rate": 0.0002, "loss": 0.2356, "step": 1290 }, { "epoch": 9.58, "learning_rate": 0.0002, "loss": 0.3661, "step": 1300 }, { "epoch": 9.65, "learning_rate": 0.0002, "loss": 0.2786, "step": 1310 }, { "epoch": 9.72, "learning_rate": 0.0002, "loss": 0.1928, "step": 1320 }, { "epoch": 9.8, "learning_rate": 0.0002, "loss": 0.3897, "step": 1330 }, { "epoch": 9.87, "learning_rate": 0.0002, "loss": 0.2978, "step": 1340 }, { "epoch": 9.94, "learning_rate": 0.0002, "loss": 0.2234, "step": 1350 } ], "max_steps": 1350, "num_train_epochs": 10, "total_flos": 1.116092169148416e+17, "trial_name": null, "trial_params": null }