|
{ |
|
"best_metric": 0.8112931251525879, |
|
"best_model_checkpoint": "saves/LLaMA3-8B/lora/train_1/checkpoint-110", |
|
"epoch": 0.17500683620453925, |
|
"eval_steps": 10, |
|
"global_step": 160, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.010937927262783703, |
|
"grad_norm": 1.4765043258666992, |
|
"learning_rate": 0.00029999015487222375, |
|
"loss": 1.0078, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.010937927262783703, |
|
"eval_loss": 0.9145990014076233, |
|
"eval_runtime": 2007.1957, |
|
"eval_samples_per_second": 23.476, |
|
"eval_steps_per_second": 0.734, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.021875854525567406, |
|
"grad_norm": 0.5130007266998291, |
|
"learning_rate": 0.00029996062078124905, |
|
"loss": 0.5077, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.021875854525567406, |
|
"eval_loss": 0.836322009563446, |
|
"eval_runtime": 2008.0976, |
|
"eval_samples_per_second": 23.465, |
|
"eval_steps_per_second": 0.734, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03281378178835111, |
|
"grad_norm": 0.6383033394813538, |
|
"learning_rate": 0.0002999114016039678, |
|
"loss": 0.4613, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03281378178835111, |
|
"eval_loss": 0.8337860703468323, |
|
"eval_runtime": 2007.8768, |
|
"eval_samples_per_second": 23.468, |
|
"eval_steps_per_second": 0.734, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04375170905113481, |
|
"grad_norm": 0.4186599552631378, |
|
"learning_rate": 0.00029984250380130117, |
|
"loss": 0.4794, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.04375170905113481, |
|
"eval_loss": 0.8337470889091492, |
|
"eval_runtime": 2006.2999, |
|
"eval_samples_per_second": 23.486, |
|
"eval_steps_per_second": 0.734, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05468963631391851, |
|
"grad_norm": 0.39893779158592224, |
|
"learning_rate": 0.0002997539364173515, |
|
"loss": 0.4624, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.05468963631391851, |
|
"eval_loss": 0.8181310892105103, |
|
"eval_runtime": 2006.8912, |
|
"eval_samples_per_second": 23.479, |
|
"eval_steps_per_second": 0.734, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06562756357670221, |
|
"grad_norm": 0.3842828571796417, |
|
"learning_rate": 0.00029964571107821494, |
|
"loss": 0.4366, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.06562756357670221, |
|
"eval_loss": 0.8186683058738708, |
|
"eval_runtime": 2007.9415, |
|
"eval_samples_per_second": 23.467, |
|
"eval_steps_per_second": 0.734, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.07656549083948591, |
|
"grad_norm": 0.36217156052589417, |
|
"learning_rate": 0.00029951784199045534, |
|
"loss": 0.4323, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.07656549083948591, |
|
"eval_loss": 0.8172433972358704, |
|
"eval_runtime": 2006.1302, |
|
"eval_samples_per_second": 23.488, |
|
"eval_steps_per_second": 0.734, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.08750341810226962, |
|
"grad_norm": 0.3985615074634552, |
|
"learning_rate": 0.0002993703459392396, |
|
"loss": 0.433, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.08750341810226962, |
|
"eval_loss": 0.8255128860473633, |
|
"eval_runtime": 2007.4062, |
|
"eval_samples_per_second": 23.473, |
|
"eval_steps_per_second": 0.734, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.09844134536505332, |
|
"grad_norm": 0.38471728563308716, |
|
"learning_rate": 0.00029920324228613376, |
|
"loss": 0.4401, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.09844134536505332, |
|
"eval_loss": 0.8299794793128967, |
|
"eval_runtime": 2006.2366, |
|
"eval_samples_per_second": 23.487, |
|
"eval_steps_per_second": 0.734, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.10937927262783702, |
|
"grad_norm": 0.4009046256542206, |
|
"learning_rate": 0.0002990165529665622, |
|
"loss": 0.4019, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.10937927262783702, |
|
"eval_loss": 0.8478946089744568, |
|
"eval_runtime": 2006.4757, |
|
"eval_samples_per_second": 23.484, |
|
"eval_steps_per_second": 0.734, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.12031719989062073, |
|
"grad_norm": 0.3922439515590668, |
|
"learning_rate": 0.0002988103024869277, |
|
"loss": 0.4112, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.12031719989062073, |
|
"eval_loss": 0.8112931251525879, |
|
"eval_runtime": 2005.6033, |
|
"eval_samples_per_second": 23.494, |
|
"eval_steps_per_second": 0.734, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.13125512715340443, |
|
"grad_norm": 0.37745875120162964, |
|
"learning_rate": 0.00029858451792139453, |
|
"loss": 0.4009, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.13125512715340443, |
|
"eval_loss": 0.845430314540863, |
|
"eval_runtime": 2006.7483, |
|
"eval_samples_per_second": 23.481, |
|
"eval_steps_per_second": 0.734, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.14219305441618812, |
|
"grad_norm": 0.3471999764442444, |
|
"learning_rate": 0.0002983392289083346, |
|
"loss": 0.4023, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.14219305441618812, |
|
"eval_loss": 0.8435468077659607, |
|
"eval_runtime": 2005.5353, |
|
"eval_samples_per_second": 23.495, |
|
"eval_steps_per_second": 0.734, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.15313098167897182, |
|
"grad_norm": 0.32064810395240784, |
|
"learning_rate": 0.0002980744676464371, |
|
"loss": 0.3924, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.15313098167897182, |
|
"eval_loss": 0.8307648301124573, |
|
"eval_runtime": 2005.4412, |
|
"eval_samples_per_second": 23.496, |
|
"eval_steps_per_second": 0.735, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.16406890894175555, |
|
"grad_norm": 0.31663310527801514, |
|
"learning_rate": 0.0002977902688904813, |
|
"loss": 0.4079, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.16406890894175555, |
|
"eval_loss": 0.8372470140457153, |
|
"eval_runtime": 2005.2559, |
|
"eval_samples_per_second": 23.498, |
|
"eval_steps_per_second": 0.735, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.17500683620453925, |
|
"grad_norm": 0.351032018661499, |
|
"learning_rate": 0.00029748666994677467, |
|
"loss": 0.3898, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.17500683620453925, |
|
"eval_loss": 0.8541739583015442, |
|
"eval_runtime": 2005.8203, |
|
"eval_samples_per_second": 23.492, |
|
"eval_steps_per_second": 0.734, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.17500683620453925, |
|
"step": 160, |
|
"total_flos": 2.8452078024759706e+17, |
|
"train_loss": 0.4666908696293831, |
|
"train_runtime": 39003.3227, |
|
"train_samples_per_second": 18.002, |
|
"train_steps_per_second": 0.07 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2742, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 10, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.8452078024759706e+17, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|