{ "best_metric": 1.6143786907196045, "best_model_checkpoint": "output/gspd/checkpoint-104", "epoch": 1.0, "global_step": 104, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "learning_rate": 0.00013641901727743684, "loss": 2.3995, "step": 5 }, { "epoch": 0.1, "learning_rate": 0.00013409385144248624, "loss": 2.0772, "step": 10 }, { "epoch": 0.14, "learning_rate": 0.00013027744460430822, "loss": 1.9236, "step": 15 }, { "epoch": 0.19, "learning_rate": 0.00012505669320030482, "loss": 1.9836, "step": 20 }, { "epoch": 0.24, "learning_rate": 0.00011855046943587908, "loss": 1.876, "step": 25 }, { "epoch": 0.29, "learning_rate": 0.00011090691466231807, "loss": 1.9239, "step": 30 }, { "epoch": 0.34, "learning_rate": 0.00010230006632035399, "loss": 1.8628, "step": 35 }, { "epoch": 0.38, "learning_rate": 9.292589525111794e-05, "loss": 1.6981, "step": 40 }, { "epoch": 0.43, "learning_rate": 8.299784360164853e-05, "loss": 1.7632, "step": 45 }, { "epoch": 0.48, "learning_rate": 7.274196492316882e-05, "loss": 1.8217, "step": 50 }, { "epoch": 0.53, "learning_rate": 6.239177711808685e-05, "loss": 1.7681, "step": 55 }, { "epoch": 0.58, "learning_rate": 5.218294542987356e-05, "loss": 1.7394, "step": 60 }, { "epoch": 0.62, "learning_rate": 4.2347916539754844e-05, "loss": 1.8022, "step": 65 }, { "epoch": 0.67, "learning_rate": 3.311062594741274e-05, "loss": 1.7416, "step": 70 }, { "epoch": 0.72, "learning_rate": 2.4681399144273327e-05, "loss": 1.7103, "step": 75 }, { "epoch": 0.77, "learning_rate": 1.725216267546246e-05, "loss": 1.7463, "step": 80 }, { "epoch": 0.82, "learning_rate": 1.0992074130600507e-05, "loss": 1.6627, "step": 85 }, { "epoch": 0.87, "learning_rate": 6.043670564942622e-06, "loss": 1.7961, "step": 90 }, { "epoch": 0.91, "learning_rate": 2.519623048140383e-06, "loss": 1.7855, "step": 95 }, { "epoch": 0.96, "learning_rate": 5.001712368734975e-07, "loss": 1.7266, "step": 100 }, { "epoch": 1.0, "eval_loss": 1.6143786907196045, "eval_runtime": 6.5105, "eval_samples_per_second": 23.04, "eval_steps_per_second": 2.918, "step": 104 } ], "max_steps": 104, "num_train_epochs": 1, "total_flos": 108566839296000.0, "trial_name": null, "trial_params": null }