{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9323671497584543, "eval_steps": 25, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.24154589371980675, "grad_norm": 0.3504341244697571, "learning_rate": 0.000493936638139193, "loss": 1.2154, "step": 25 }, { "epoch": 0.24154589371980675, "eval_loss": 1.2845402956008911, "eval_runtime": 58.0708, "eval_samples_per_second": 3.565, "eval_steps_per_second": 0.448, "step": 25 }, { "epoch": 0.4830917874396135, "grad_norm": 0.11877840012311935, "learning_rate": 0.000463751348237005, "loss": 0.7638, "step": 50 }, { "epoch": 0.4830917874396135, "eval_loss": 1.255712628364563, "eval_runtime": 58.0315, "eval_samples_per_second": 3.567, "eval_steps_per_second": 0.448, "step": 50 }, { "epoch": 0.7246376811594203, "grad_norm": 0.09250932186841965, "learning_rate": 0.000411248712216741, "loss": 0.7477, "step": 75 }, { "epoch": 0.7246376811594203, "eval_loss": 1.1869494915008545, "eval_runtime": 57.9221, "eval_samples_per_second": 3.574, "eval_steps_per_second": 0.449, "step": 75 }, { "epoch": 0.966183574879227, "grad_norm": 0.10278703272342682, "learning_rate": 0.00034191042415818, "loss": 0.7249, "step": 100 }, { "epoch": 0.966183574879227, "eval_loss": 1.317003607749939, "eval_runtime": 57.9128, "eval_samples_per_second": 3.574, "eval_steps_per_second": 0.449, "step": 100 }, { "epoch": 1.2077294685990339, "grad_norm": 0.09411193430423737, "learning_rate": 0.00026297595453297494, "loss": 0.7095, "step": 125 }, { "epoch": 1.2077294685990339, "eval_loss": 1.3763236999511719, "eval_runtime": 57.9829, "eval_samples_per_second": 3.57, "eval_steps_per_second": 0.448, "step": 125 }, { "epoch": 1.4492753623188406, "grad_norm": 0.08513357490301132, "learning_rate": 0.00018268669172909137, "loss": 0.708, "step": 150 }, { "epoch": 1.4492753623188406, "eval_loss": 1.2287328243255615, "eval_runtime": 57.9704, "eval_samples_per_second": 3.571, "eval_steps_per_second": 0.449, "step": 150 }, { "epoch": 1.6908212560386473, "grad_norm": 0.08826680481433868, "learning_rate": 0.00010942547535123056, "loss": 0.7055, "step": 175 }, { "epoch": 1.6908212560386473, "eval_loss": 1.2410857677459717, "eval_runtime": 58.0223, "eval_samples_per_second": 3.568, "eval_steps_per_second": 0.448, "step": 175 }, { "epoch": 1.9323671497584543, "grad_norm": 0.0850740373134613, "learning_rate": 5.0841360885691e-05, "loss": 0.7017, "step": 200 }, { "epoch": 1.9323671497584543, "eval_loss": 1.2755271196365356, "eval_runtime": 57.8348, "eval_samples_per_second": 3.579, "eval_steps_per_second": 0.45, "step": 200 } ], "logging_steps": 25, "max_steps": 250, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.8667153521278976e+16, "train_batch_size": 1, "trial_name": null, "trial_params": null }