{ "best_metric": 1.5348279476165771, "best_model_checkpoint": "../saved_models_new/gptNEO_author_RB_epochs15_lr5e-05/checkpoint-4000", "epoch": 14.934660858743, "global_step": 24000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.24, "learning_rate": 3.3333333333333335e-05, "loss": 1.6896, "step": 2000 }, { "epoch": 1.24, "eval_loss": 1.5621881484985352, "eval_runtime": 14.0653, "eval_samples_per_second": 25.382, "eval_steps_per_second": 12.726, "step": 2000 }, { "epoch": 2.49, "learning_rate": 4.7630893153281216e-05, "loss": 1.3351, "step": 4000 }, { "epoch": 2.49, "eval_loss": 1.5348279476165771, "eval_runtime": 14.063, "eval_samples_per_second": 25.386, "eval_steps_per_second": 12.728, "step": 4000 }, { "epoch": 3.73, "learning_rate": 4.289267945984364e-05, "loss": 1.154, "step": 6000 }, { "epoch": 3.73, "eval_loss": 1.5707106590270996, "eval_runtime": 14.0614, "eval_samples_per_second": 25.389, "eval_steps_per_second": 12.73, "step": 6000 }, { "epoch": 4.98, "learning_rate": 3.815446576640607e-05, "loss": 0.9265, "step": 8000 }, { "epoch": 4.98, "eval_loss": 1.6517491340637207, "eval_runtime": 14.0589, "eval_samples_per_second": 25.393, "eval_steps_per_second": 12.732, "step": 8000 }, { "epoch": 6.22, "learning_rate": 3.341625207296849e-05, "loss": 0.6998, "step": 10000 }, { "epoch": 6.22, "eval_loss": 1.8329756259918213, "eval_runtime": 14.0588, "eval_samples_per_second": 25.393, "eval_steps_per_second": 12.732, "step": 10000 }, { "epoch": 7.47, "learning_rate": 2.867803837953092e-05, "loss": 0.5298, "step": 12000 }, { "epoch": 7.47, "eval_loss": 1.9407192468643188, "eval_runtime": 14.0594, "eval_samples_per_second": 25.392, "eval_steps_per_second": 12.732, "step": 12000 }, { "epoch": 8.71, "learning_rate": 2.3939824686093343e-05, "loss": 0.391, "step": 14000 }, { "epoch": 8.71, "eval_loss": 2.0445475578308105, "eval_runtime": 14.0538, "eval_samples_per_second": 25.402, "eval_steps_per_second": 12.737, "step": 14000 }, { "epoch": 9.96, "learning_rate": 1.920161099265577e-05, "loss": 0.2698, "step": 16000 }, { "epoch": 9.96, "eval_loss": 2.14052414894104, "eval_runtime": 14.0566, "eval_samples_per_second": 25.397, "eval_steps_per_second": 12.734, "step": 16000 }, { "epoch": 11.2, "learning_rate": 1.4463397299218195e-05, "loss": 0.1802, "step": 18000 }, { "epoch": 11.2, "eval_loss": 2.2577126026153564, "eval_runtime": 14.0589, "eval_samples_per_second": 25.393, "eval_steps_per_second": 12.732, "step": 18000 }, { "epoch": 12.45, "learning_rate": 9.725183605780622e-06, "loss": 0.1244, "step": 20000 }, { "epoch": 12.45, "eval_loss": 2.3212833404541016, "eval_runtime": 14.0654, "eval_samples_per_second": 25.381, "eval_steps_per_second": 12.726, "step": 20000 }, { "epoch": 13.69, "learning_rate": 4.986969912343047e-06, "loss": 0.0874, "step": 22000 }, { "epoch": 13.69, "eval_loss": 2.3692986965179443, "eval_runtime": 14.0588, "eval_samples_per_second": 25.393, "eval_steps_per_second": 12.732, "step": 22000 }, { "epoch": 14.93, "learning_rate": 2.4875621890547267e-07, "loss": 0.0666, "step": 24000 }, { "epoch": 14.93, "eval_loss": 2.398601531982422, "eval_runtime": 14.0655, "eval_samples_per_second": 25.381, "eval_steps_per_second": 12.726, "step": 24000 } ], "max_steps": 24105, "num_train_epochs": 15, "total_flos": 6267141886574592.0, "trial_name": null, "trial_params": null }