{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9996257951852314, "eval_steps": 500, "global_step": 3006, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1, "learning_rate": 4.833666001330672e-05, "loss": 1.4437, "step": 100 }, { "epoch": 0.2, "learning_rate": 4.6673320026613444e-05, "loss": 1.3823, "step": 200 }, { "epoch": 0.3, "learning_rate": 4.5009980039920164e-05, "loss": 1.3499, "step": 300 }, { "epoch": 0.4, "learning_rate": 4.3346640053226885e-05, "loss": 1.3187, "step": 400 }, { "epoch": 0.5, "learning_rate": 4.1683300066533606e-05, "loss": 1.3217, "step": 500 }, { "epoch": 0.6, "learning_rate": 4.0019960079840326e-05, "loss": 1.3004, "step": 600 }, { "epoch": 0.7, "learning_rate": 3.835662009314704e-05, "loss": 1.2892, "step": 700 }, { "epoch": 0.8, "learning_rate": 3.669328010645376e-05, "loss": 1.2834, "step": 800 }, { "epoch": 0.9, "learning_rate": 3.502994011976048e-05, "loss": 1.274, "step": 900 }, { "epoch": 1.0, "learning_rate": 3.33666001330672e-05, "loss": 1.2676, "step": 1000 }, { "epoch": 1.1, "learning_rate": 3.170326014637392e-05, "loss": 1.2393, "step": 1100 }, { "epoch": 1.2, "learning_rate": 3.003992015968064e-05, "loss": 1.2381, "step": 1200 }, { "epoch": 1.3, "learning_rate": 2.837658017298736e-05, "loss": 1.2341, "step": 1300 }, { "epoch": 1.4, "learning_rate": 2.671324018629408e-05, "loss": 1.2248, "step": 1400 }, { "epoch": 1.5, "learning_rate": 2.5049900199600802e-05, "loss": 1.213, "step": 1500 }, { "epoch": 1.6, "learning_rate": 2.338656021290752e-05, "loss": 1.2051, "step": 1600 }, { "epoch": 1.7, "learning_rate": 2.172322022621424e-05, "loss": 1.2271, "step": 1700 }, { "epoch": 1.8, "learning_rate": 2.0059880239520957e-05, "loss": 1.2158, "step": 1800 }, { "epoch": 1.9, "learning_rate": 1.8396540252827678e-05, "loss": 1.2037, "step": 1900 }, { "epoch": 2.0, "learning_rate": 1.67332002661344e-05, "loss": 1.1981, "step": 2000 }, { "epoch": 2.1, "learning_rate": 1.506986027944112e-05, "loss": 1.1968, "step": 2100 }, { "epoch": 2.2, "learning_rate": 1.3406520292747837e-05, "loss": 1.1906, "step": 2200 }, { "epoch": 2.3, "learning_rate": 1.1743180306054557e-05, "loss": 1.1746, "step": 2300 }, { "epoch": 2.39, "learning_rate": 1.0079840319361278e-05, "loss": 1.1874, "step": 2400 }, { "epoch": 2.49, "learning_rate": 8.416500332667999e-06, "loss": 1.1763, "step": 2500 }, { "epoch": 2.59, "learning_rate": 6.753160345974717e-06, "loss": 1.1672, "step": 2600 }, { "epoch": 2.69, "learning_rate": 5.0898203592814375e-06, "loss": 1.1709, "step": 2700 }, { "epoch": 2.79, "learning_rate": 3.4264803725881573e-06, "loss": 1.1677, "step": 2800 }, { "epoch": 2.89, "learning_rate": 1.7631403858948771e-06, "loss": 1.1616, "step": 2900 }, { "epoch": 2.99, "learning_rate": 9.98003992015968e-08, "loss": 1.1609, "step": 3000 }, { "epoch": 3.0, "step": 3006, "total_flos": 5.378670905076864e+16, "train_loss": 1.2394252794231484, "train_runtime": 3602.0389, "train_samples_per_second": 106.833, "train_steps_per_second": 0.835 } ], "logging_steps": 100, "max_steps": 3006, "num_train_epochs": 3, "save_steps": 500, "total_flos": 5.378670905076864e+16, "trial_name": null, "trial_params": null }