{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9983987189751802, "global_step": 624, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.996794871794872e-05, "loss": 2.5842, "step": 1 }, { "epoch": 0.03, "learning_rate": 1.967948717948718e-05, "loss": 2.511, "step": 10 }, { "epoch": 0.06, "learning_rate": 1.935897435897436e-05, "loss": 2.4961, "step": 20 }, { "epoch": 0.1, "learning_rate": 1.903846153846154e-05, "loss": 2.4203, "step": 30 }, { "epoch": 0.13, "learning_rate": 1.8717948717948718e-05, "loss": 2.3722, "step": 40 }, { "epoch": 0.16, "learning_rate": 1.8397435897435897e-05, "loss": 2.4072, "step": 50 }, { "epoch": 0.19, "learning_rate": 1.807692307692308e-05, "loss": 2.4484, "step": 60 }, { "epoch": 0.22, "learning_rate": 1.775641025641026e-05, "loss": 2.3817, "step": 70 }, { "epoch": 0.26, "learning_rate": 1.7435897435897438e-05, "loss": 2.4138, "step": 80 }, { "epoch": 0.29, "learning_rate": 1.7115384615384617e-05, "loss": 2.3693, "step": 90 }, { "epoch": 0.32, "learning_rate": 1.6794871794871796e-05, "loss": 2.3947, "step": 100 }, { "epoch": 0.35, "learning_rate": 1.6474358974358975e-05, "loss": 2.3659, "step": 110 }, { "epoch": 0.38, "learning_rate": 1.6153846153846154e-05, "loss": 2.3695, "step": 120 }, { "epoch": 0.42, "learning_rate": 1.5833333333333333e-05, "loss": 2.343, "step": 130 }, { "epoch": 0.45, "learning_rate": 1.5512820512820516e-05, "loss": 2.3434, "step": 140 }, { "epoch": 0.48, "learning_rate": 1.5192307692307693e-05, "loss": 2.3407, "step": 150 }, { "epoch": 0.51, "learning_rate": 1.4871794871794874e-05, "loss": 2.3595, "step": 160 }, { "epoch": 0.54, "learning_rate": 1.4551282051282051e-05, "loss": 2.3485, "step": 170 }, { "epoch": 0.58, "learning_rate": 1.4230769230769232e-05, "loss": 2.3488, "step": 180 }, { "epoch": 0.61, "learning_rate": 1.3910256410256411e-05, "loss": 2.4007, "step": 190 }, { "epoch": 0.64, "learning_rate": 1.3589743589743592e-05, "loss": 2.3197, "step": 200 }, { "epoch": 0.67, "learning_rate": 1.3269230769230769e-05, "loss": 2.324, "step": 210 }, { "epoch": 0.7, "learning_rate": 1.294871794871795e-05, "loss": 2.3546, "step": 220 }, { "epoch": 0.74, "learning_rate": 1.2628205128205129e-05, "loss": 2.3547, "step": 230 }, { "epoch": 0.77, "learning_rate": 1.230769230769231e-05, "loss": 2.3702, "step": 240 }, { "epoch": 0.8, "learning_rate": 1.1987179487179487e-05, "loss": 2.3071, "step": 250 }, { "epoch": 0.83, "learning_rate": 1.1666666666666668e-05, "loss": 2.3233, "step": 260 }, { "epoch": 0.86, "learning_rate": 1.1346153846153847e-05, "loss": 2.3361, "step": 270 }, { "epoch": 0.9, "learning_rate": 1.1025641025641028e-05, "loss": 2.3152, "step": 280 }, { "epoch": 0.93, "learning_rate": 1.0705128205128205e-05, "loss": 2.3653, "step": 290 }, { "epoch": 0.96, "learning_rate": 1.0384615384615386e-05, "loss": 2.3031, "step": 300 }, { "epoch": 0.99, "learning_rate": 1.0064102564102565e-05, "loss": 2.3035, "step": 310 }, { "epoch": 1.02, "learning_rate": 9.743589743589744e-06, "loss": 1.8449, "step": 320 }, { "epoch": 1.06, "learning_rate": 9.423076923076923e-06, "loss": 1.7192, "step": 330 }, { "epoch": 1.09, "learning_rate": 9.102564102564104e-06, "loss": 1.7361, "step": 340 }, { "epoch": 1.12, "learning_rate": 8.782051282051283e-06, "loss": 1.7584, "step": 350 }, { "epoch": 1.15, "learning_rate": 8.461538461538462e-06, "loss": 1.7269, "step": 360 }, { "epoch": 1.18, "learning_rate": 8.141025641025641e-06, "loss": 1.7208, "step": 370 }, { "epoch": 1.22, "learning_rate": 7.820512820512822e-06, "loss": 1.7485, "step": 380 }, { "epoch": 1.25, "learning_rate": 7.500000000000001e-06, "loss": 1.7506, "step": 390 }, { "epoch": 1.28, "learning_rate": 7.17948717948718e-06, "loss": 1.7193, "step": 400 }, { "epoch": 1.31, "learning_rate": 6.858974358974359e-06, "loss": 1.6919, "step": 410 }, { "epoch": 1.35, "learning_rate": 6.538461538461539e-06, "loss": 1.7516, "step": 420 }, { "epoch": 1.38, "learning_rate": 6.217948717948718e-06, "loss": 1.7129, "step": 430 }, { "epoch": 1.41, "learning_rate": 5.897435897435898e-06, "loss": 1.7253, "step": 440 }, { "epoch": 1.44, "learning_rate": 5.576923076923077e-06, "loss": 1.7259, "step": 450 }, { "epoch": 1.47, "learning_rate": 5.256410256410257e-06, "loss": 1.7065, "step": 460 }, { "epoch": 1.51, "learning_rate": 4.935897435897436e-06, "loss": 1.709, "step": 470 }, { "epoch": 1.54, "learning_rate": 4.615384615384616e-06, "loss": 1.6774, "step": 480 }, { "epoch": 1.57, "learning_rate": 4.294871794871795e-06, "loss": 1.7154, "step": 490 }, { "epoch": 1.6, "learning_rate": 3.974358974358974e-06, "loss": 1.6767, "step": 500 }, { "epoch": 1.63, "learning_rate": 3.653846153846154e-06, "loss": 1.7006, "step": 510 }, { "epoch": 1.67, "learning_rate": 3.3333333333333333e-06, "loss": 1.6998, "step": 520 }, { "epoch": 1.7, "learning_rate": 3.012820512820513e-06, "loss": 1.7088, "step": 530 }, { "epoch": 1.73, "learning_rate": 2.6923076923076923e-06, "loss": 1.6739, "step": 540 }, { "epoch": 1.76, "learning_rate": 2.371794871794872e-06, "loss": 1.6906, "step": 550 }, { "epoch": 1.79, "learning_rate": 2.0512820512820513e-06, "loss": 1.7699, "step": 560 }, { "epoch": 1.83, "learning_rate": 1.7307692307692308e-06, "loss": 1.6989, "step": 570 }, { "epoch": 1.86, "learning_rate": 1.4102564102564104e-06, "loss": 1.6954, "step": 580 }, { "epoch": 1.89, "learning_rate": 1.0897435897435899e-06, "loss": 1.7052, "step": 590 }, { "epoch": 1.92, "learning_rate": 7.692307692307694e-07, "loss": 1.7458, "step": 600 }, { "epoch": 1.95, "learning_rate": 4.4871794871794876e-07, "loss": 1.7088, "step": 610 }, { "epoch": 1.99, "learning_rate": 1.282051282051282e-07, "loss": 1.7122, "step": 620 }, { "epoch": 2.0, "step": 624, "total_flos": 1.810492168297513e+17, "train_loss": 2.042313535626118, "train_runtime": 4896.3607, "train_samples_per_second": 1.02, "train_steps_per_second": 0.127 } ], "max_steps": 624, "num_train_epochs": 2, "total_flos": 1.810492168297513e+17, "trial_name": null, "trial_params": null }