{ "best_metric": 0.8112931251525879, "best_model_checkpoint": "saves/LLaMA3-8B/lora/train_1/checkpoint-110", "epoch": 0.17500683620453925, "eval_steps": 10, "global_step": 160, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.010937927262783703, "grad_norm": 1.4765043258666992, "learning_rate": 0.00029999015487222375, "loss": 1.0078, "step": 10 }, { "epoch": 0.010937927262783703, "eval_loss": 0.9145990014076233, "eval_runtime": 2007.1957, "eval_samples_per_second": 23.476, "eval_steps_per_second": 0.734, "step": 10 }, { "epoch": 0.021875854525567406, "grad_norm": 0.5130007266998291, "learning_rate": 0.00029996062078124905, "loss": 0.5077, "step": 20 }, { "epoch": 0.021875854525567406, "eval_loss": 0.836322009563446, "eval_runtime": 2008.0976, "eval_samples_per_second": 23.465, "eval_steps_per_second": 0.734, "step": 20 }, { "epoch": 0.03281378178835111, "grad_norm": 0.6383033394813538, "learning_rate": 0.0002999114016039678, "loss": 0.4613, "step": 30 }, { "epoch": 0.03281378178835111, "eval_loss": 0.8337860703468323, "eval_runtime": 2007.8768, "eval_samples_per_second": 23.468, "eval_steps_per_second": 0.734, "step": 30 }, { "epoch": 0.04375170905113481, "grad_norm": 0.4186599552631378, "learning_rate": 0.00029984250380130117, "loss": 0.4794, "step": 40 }, { "epoch": 0.04375170905113481, "eval_loss": 0.8337470889091492, "eval_runtime": 2006.2999, "eval_samples_per_second": 23.486, "eval_steps_per_second": 0.734, "step": 40 }, { "epoch": 0.05468963631391851, "grad_norm": 0.39893779158592224, "learning_rate": 0.0002997539364173515, "loss": 0.4624, "step": 50 }, { "epoch": 0.05468963631391851, "eval_loss": 0.8181310892105103, "eval_runtime": 2006.8912, "eval_samples_per_second": 23.479, "eval_steps_per_second": 0.734, "step": 50 }, { "epoch": 0.06562756357670221, "grad_norm": 0.3842828571796417, "learning_rate": 0.00029964571107821494, "loss": 0.4366, "step": 60 }, { "epoch": 0.06562756357670221, "eval_loss": 0.8186683058738708, "eval_runtime": 2007.9415, "eval_samples_per_second": 23.467, "eval_steps_per_second": 0.734, "step": 60 }, { "epoch": 0.07656549083948591, "grad_norm": 0.36217156052589417, "learning_rate": 0.00029951784199045534, "loss": 0.4323, "step": 70 }, { "epoch": 0.07656549083948591, "eval_loss": 0.8172433972358704, "eval_runtime": 2006.1302, "eval_samples_per_second": 23.488, "eval_steps_per_second": 0.734, "step": 70 }, { "epoch": 0.08750341810226962, "grad_norm": 0.3985615074634552, "learning_rate": 0.0002993703459392396, "loss": 0.433, "step": 80 }, { "epoch": 0.08750341810226962, "eval_loss": 0.8255128860473633, "eval_runtime": 2007.4062, "eval_samples_per_second": 23.473, "eval_steps_per_second": 0.734, "step": 80 }, { "epoch": 0.09844134536505332, "grad_norm": 0.38471728563308716, "learning_rate": 0.00029920324228613376, "loss": 0.4401, "step": 90 }, { "epoch": 0.09844134536505332, "eval_loss": 0.8299794793128967, "eval_runtime": 2006.2366, "eval_samples_per_second": 23.487, "eval_steps_per_second": 0.734, "step": 90 }, { "epoch": 0.10937927262783702, "grad_norm": 0.4009046256542206, "learning_rate": 0.0002990165529665622, "loss": 0.4019, "step": 100 }, { "epoch": 0.10937927262783702, "eval_loss": 0.8478946089744568, "eval_runtime": 2006.4757, "eval_samples_per_second": 23.484, "eval_steps_per_second": 0.734, "step": 100 }, { "epoch": 0.12031719989062073, "grad_norm": 0.3922439515590668, "learning_rate": 0.0002988103024869277, "loss": 0.4112, "step": 110 }, { "epoch": 0.12031719989062073, "eval_loss": 0.8112931251525879, "eval_runtime": 2005.6033, "eval_samples_per_second": 23.494, "eval_steps_per_second": 0.734, "step": 110 }, { "epoch": 0.13125512715340443, "grad_norm": 0.37745875120162964, "learning_rate": 0.00029858451792139453, "loss": 0.4009, "step": 120 }, { "epoch": 0.13125512715340443, "eval_loss": 0.845430314540863, "eval_runtime": 2006.7483, "eval_samples_per_second": 23.481, "eval_steps_per_second": 0.734, "step": 120 }, { "epoch": 0.14219305441618812, "grad_norm": 0.3471999764442444, "learning_rate": 0.0002983392289083346, "loss": 0.4023, "step": 130 }, { "epoch": 0.14219305441618812, "eval_loss": 0.8435468077659607, "eval_runtime": 2005.5353, "eval_samples_per_second": 23.495, "eval_steps_per_second": 0.734, "step": 130 }, { "epoch": 0.15313098167897182, "grad_norm": 0.32064810395240784, "learning_rate": 0.0002980744676464371, "loss": 0.3924, "step": 140 }, { "epoch": 0.15313098167897182, "eval_loss": 0.8307648301124573, "eval_runtime": 2005.4412, "eval_samples_per_second": 23.496, "eval_steps_per_second": 0.735, "step": 140 }, { "epoch": 0.16406890894175555, "grad_norm": 0.31663310527801514, "learning_rate": 0.0002977902688904813, "loss": 0.4079, "step": 150 }, { "epoch": 0.16406890894175555, "eval_loss": 0.8372470140457153, "eval_runtime": 2005.2559, "eval_samples_per_second": 23.498, "eval_steps_per_second": 0.735, "step": 150 }, { "epoch": 0.17500683620453925, "grad_norm": 0.351032018661499, "learning_rate": 0.00029748666994677467, "loss": 0.3898, "step": 160 }, { "epoch": 0.17500683620453925, "eval_loss": 0.8541739583015442, "eval_runtime": 2005.8203, "eval_samples_per_second": 23.492, "eval_steps_per_second": 0.734, "step": 160 }, { "epoch": 0.17500683620453925, "step": 160, "total_flos": 2.8452078024759706e+17, "train_loss": 0.4666908696293831, "train_runtime": 39003.3227, "train_samples_per_second": 18.002, "train_steps_per_second": 0.07 } ], "logging_steps": 10, "max_steps": 2742, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 10, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.8452078024759706e+17, "train_batch_size": 32, "trial_name": null, "trial_params": null }