{ "best_metric": 0.9637255072593689, "best_model_checkpoint": "./outputs/checkpoint-2200", "epoch": 2.9333333333333336, "eval_steps": 100, "global_step": 2200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.13, "learning_rate": 0.0002, "loss": 1.0609, "step": 100 }, { "epoch": 0.13, "eval_loss": 1.075240969657898, "eval_runtime": 354.5496, "eval_samples_per_second": 5.444, "eval_steps_per_second": 0.683, "step": 100 }, { "epoch": 0.27, "learning_rate": 0.0002, "loss": 0.9179, "step": 200 }, { "epoch": 0.27, "eval_loss": 1.0535345077514648, "eval_runtime": 353.5007, "eval_samples_per_second": 5.46, "eval_steps_per_second": 0.685, "step": 200 }, { "epoch": 0.4, "learning_rate": 0.0002, "loss": 0.9039, "step": 300 }, { "epoch": 0.4, "eval_loss": 1.0421228408813477, "eval_runtime": 353.3408, "eval_samples_per_second": 5.462, "eval_steps_per_second": 0.685, "step": 300 }, { "epoch": 0.53, "learning_rate": 0.0002, "loss": 0.8945, "step": 400 }, { "epoch": 0.53, "eval_loss": 1.033984661102295, "eval_runtime": 353.6405, "eval_samples_per_second": 5.458, "eval_steps_per_second": 0.684, "step": 400 }, { "epoch": 0.67, "learning_rate": 0.0002, "loss": 0.8815, "step": 500 }, { "epoch": 0.67, "eval_loss": 1.0270758867263794, "eval_runtime": 353.4888, "eval_samples_per_second": 5.46, "eval_steps_per_second": 0.685, "step": 500 }, { "epoch": 0.8, "learning_rate": 0.0002, "loss": 0.8818, "step": 600 }, { "epoch": 0.8, "eval_loss": 1.0182862281799316, "eval_runtime": 353.8429, "eval_samples_per_second": 5.454, "eval_steps_per_second": 0.684, "step": 600 }, { "epoch": 0.93, "learning_rate": 0.0002, "loss": 0.8787, "step": 700 }, { "epoch": 0.93, "eval_loss": 1.0158191919326782, "eval_runtime": 354.0822, "eval_samples_per_second": 5.451, "eval_steps_per_second": 0.683, "step": 700 }, { "epoch": 1.07, "learning_rate": 0.0002, "loss": 0.8685, "step": 800 }, { "epoch": 1.07, "eval_loss": 1.0099796056747437, "eval_runtime": 354.7588, "eval_samples_per_second": 5.44, "eval_steps_per_second": 0.682, "step": 800 }, { "epoch": 1.2, "learning_rate": 0.0002, "loss": 0.8637, "step": 900 }, { "epoch": 1.2, "eval_loss": 1.006648063659668, "eval_runtime": 354.057, "eval_samples_per_second": 5.451, "eval_steps_per_second": 0.684, "step": 900 }, { "epoch": 1.33, "learning_rate": 0.0002, "loss": 0.8554, "step": 1000 }, { "epoch": 1.33, "eval_loss": 1.0002124309539795, "eval_runtime": 354.0353, "eval_samples_per_second": 5.451, "eval_steps_per_second": 0.684, "step": 1000 }, { "epoch": 1.47, "learning_rate": 0.0002, "loss": 0.859, "step": 1100 }, { "epoch": 1.47, "eval_loss": 0.9991586804389954, "eval_runtime": 354.265, "eval_samples_per_second": 5.448, "eval_steps_per_second": 0.683, "step": 1100 }, { "epoch": 1.6, "learning_rate": 0.0002, "loss": 0.8584, "step": 1200 }, { "epoch": 1.6, "eval_loss": 0.9944009184837341, "eval_runtime": 353.9279, "eval_samples_per_second": 5.453, "eval_steps_per_second": 0.684, "step": 1200 }, { "epoch": 1.73, "learning_rate": 0.0002, "loss": 0.852, "step": 1300 }, { "epoch": 1.73, "eval_loss": 0.989990234375, "eval_runtime": 354.4704, "eval_samples_per_second": 5.445, "eval_steps_per_second": 0.683, "step": 1300 }, { "epoch": 1.87, "learning_rate": 0.0002, "loss": 0.846, "step": 1400 }, { "epoch": 1.87, "eval_loss": 0.9865836501121521, "eval_runtime": 358.9224, "eval_samples_per_second": 5.377, "eval_steps_per_second": 0.674, "step": 1400 }, { "epoch": 2.0, "learning_rate": 0.0002, "loss": 0.847, "step": 1500 }, { "epoch": 2.0, "eval_loss": 0.9840803742408752, "eval_runtime": 354.4171, "eval_samples_per_second": 5.446, "eval_steps_per_second": 0.683, "step": 1500 }, { "epoch": 2.13, "learning_rate": 0.0002, "loss": 0.8378, "step": 1600 }, { "epoch": 2.13, "eval_loss": 0.9801441431045532, "eval_runtime": 355.2231, "eval_samples_per_second": 5.433, "eval_steps_per_second": 0.681, "step": 1600 }, { "epoch": 2.27, "learning_rate": 0.0002, "loss": 0.8418, "step": 1700 }, { "epoch": 2.27, "eval_loss": 0.9762414693832397, "eval_runtime": 354.5929, "eval_samples_per_second": 5.443, "eval_steps_per_second": 0.682, "step": 1700 }, { "epoch": 2.4, "learning_rate": 0.0002, "loss": 0.831, "step": 1800 }, { "epoch": 2.4, "eval_loss": 0.975382924079895, "eval_runtime": 378.7821, "eval_samples_per_second": 5.095, "eval_steps_per_second": 0.639, "step": 1800 }, { "epoch": 2.53, "learning_rate": 0.0002, "loss": 0.8297, "step": 1900 }, { "epoch": 2.53, "eval_loss": 0.9727230072021484, "eval_runtime": 366.7965, "eval_samples_per_second": 5.262, "eval_steps_per_second": 0.66, "step": 1900 }, { "epoch": 2.67, "learning_rate": 0.0002, "loss": 0.8283, "step": 2000 }, { "epoch": 2.67, "eval_loss": 0.9686517715454102, "eval_runtime": 366.7742, "eval_samples_per_second": 5.262, "eval_steps_per_second": 0.66, "step": 2000 }, { "epoch": 2.8, "learning_rate": 0.0002, "loss": 0.8303, "step": 2100 }, { "epoch": 2.8, "eval_loss": 0.9657241106033325, "eval_runtime": 368.2461, "eval_samples_per_second": 5.241, "eval_steps_per_second": 0.657, "step": 2100 }, { "epoch": 2.93, "learning_rate": 0.0002, "loss": 0.8358, "step": 2200 }, { "epoch": 2.93, "eval_loss": 0.9637255072593689, "eval_runtime": 365.4459, "eval_samples_per_second": 5.281, "eval_steps_per_second": 0.662, "step": 2200 } ], "logging_steps": 100, "max_steps": 2250, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 100, "total_flos": 6.220289728654295e+17, "trial_name": null, "trial_params": null }