{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "global_step": 18750, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.13, "learning_rate": 1.961904761904762e-05, "loss": 2.4725, "step": 500 }, { "epoch": 0.27, "learning_rate": 1.923809523809524e-05, "loss": 2.2746, "step": 1000 }, { "epoch": 0.4, "learning_rate": 1.885714285714286e-05, "loss": 2.2761, "step": 1500 }, { "epoch": 0.53, "learning_rate": 1.8476190476190478e-05, "loss": 2.1547, "step": 2000 }, { "epoch": 0.67, "learning_rate": 1.8095238095238097e-05, "loss": 2.2002, "step": 2500 }, { "epoch": 0.8, "learning_rate": 1.7714285714285717e-05, "loss": 2.1066, "step": 3000 }, { "epoch": 0.93, "learning_rate": 1.7333333333333336e-05, "loss": 2.0066, "step": 3500 }, { "epoch": 1.0, "eval_loss": 2.0953896045684814, "eval_runtime": 5.7038, "eval_samples_per_second": 207.582, "step": 3750 }, { "epoch": 1.07, "learning_rate": 1.6952380952380955e-05, "loss": 1.9183, "step": 4000 }, { "epoch": 1.2, "learning_rate": 1.6571428571428574e-05, "loss": 2.0477, "step": 4500 }, { "epoch": 1.33, "learning_rate": 1.6190476190476193e-05, "loss": 1.8964, "step": 5000 }, { "epoch": 1.47, "learning_rate": 1.580952380952381e-05, "loss": 1.9112, "step": 5500 }, { "epoch": 1.6, "learning_rate": 1.542857142857143e-05, "loss": 1.9038, "step": 6000 }, { "epoch": 1.73, "learning_rate": 1.5047619047619049e-05, "loss": 1.9071, "step": 6500 }, { "epoch": 1.87, "learning_rate": 1.4666666666666666e-05, "loss": 1.8629, "step": 7000 }, { "epoch": 2.0, "learning_rate": 1.4285714285714287e-05, "loss": 1.8226, "step": 7500 }, { "epoch": 2.0, "eval_loss": 1.9552286863327026, "eval_runtime": 5.8934, "eval_samples_per_second": 200.902, "step": 7500 }, { "epoch": 2.13, "learning_rate": 1.3904761904761905e-05, "loss": 1.851, "step": 8000 }, { "epoch": 2.27, "learning_rate": 1.3523809523809525e-05, "loss": 1.6683, "step": 8500 }, { "epoch": 2.4, "learning_rate": 1.3142857142857145e-05, "loss": 1.8138, "step": 9000 }, { "epoch": 2.53, "learning_rate": 1.2761904761904762e-05, "loss": 1.7491, "step": 9500 }, { "epoch": 2.67, "learning_rate": 1.2380952380952383e-05, "loss": 1.7464, "step": 10000 }, { "epoch": 2.8, "learning_rate": 1.2e-05, "loss": 1.7883, "step": 10500 }, { "epoch": 2.93, "learning_rate": 1.1619047619047621e-05, "loss": 1.7698, "step": 11000 }, { "epoch": 3.0, "eval_loss": 1.8933346271514893, "eval_runtime": 5.5236, "eval_samples_per_second": 214.353, "step": 11250 }, { "epoch": 3.07, "learning_rate": 1.1238095238095239e-05, "loss": 1.7162, "step": 11500 }, { "epoch": 3.2, "learning_rate": 1.0857142857142858e-05, "loss": 1.6866, "step": 12000 }, { "epoch": 3.33, "learning_rate": 1.0476190476190477e-05, "loss": 1.731, "step": 12500 }, { "epoch": 3.47, "learning_rate": 1.0095238095238096e-05, "loss": 1.6949, "step": 13000 }, { "epoch": 3.6, "learning_rate": 9.714285714285715e-06, "loss": 1.5885, "step": 13500 }, { "epoch": 3.73, "learning_rate": 9.333333333333334e-06, "loss": 1.647, "step": 14000 }, { "epoch": 3.87, "learning_rate": 8.952380952380953e-06, "loss": 1.6989, "step": 14500 }, { "epoch": 4.0, "learning_rate": 8.571428571428571e-06, "loss": 1.5514, "step": 15000 }, { "epoch": 4.0, "eval_loss": 1.866198182106018, "eval_runtime": 5.5702, "eval_samples_per_second": 212.558, "step": 15000 }, { "epoch": 4.13, "learning_rate": 8.190476190476192e-06, "loss": 1.5816, "step": 15500 }, { "epoch": 4.27, "learning_rate": 7.809523809523811e-06, "loss": 1.5777, "step": 16000 }, { "epoch": 4.4, "learning_rate": 7.428571428571429e-06, "loss": 1.4894, "step": 16500 }, { "epoch": 4.53, "learning_rate": 7.047619047619048e-06, "loss": 1.5155, "step": 17000 }, { "epoch": 4.67, "learning_rate": 6.666666666666667e-06, "loss": 1.5695, "step": 17500 }, { "epoch": 4.8, "learning_rate": 6.285714285714286e-06, "loss": 1.5737, "step": 18000 }, { "epoch": 4.93, "learning_rate": 5.904761904761905e-06, "loss": 1.4525, "step": 18500 }, { "epoch": 5.0, "eval_loss": 1.7822632789611816, "eval_runtime": 5.6756, "eval_samples_per_second": 208.612, "step": 18750 } ], "max_steps": 26250, "num_train_epochs": 7, "total_flos": 881661582406872.0, "trial_name": null, "trial_params": null }