|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.5805515239477503, |
|
"eval_steps": 9, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.005805515239477504, |
|
"eval_loss": 0.1446908712387085, |
|
"eval_runtime": 35.8549, |
|
"eval_samples_per_second": 8.088, |
|
"eval_steps_per_second": 1.032, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01741654571843251, |
|
"grad_norm": 8.589686393737793, |
|
"learning_rate": 3e-05, |
|
"loss": 0.6468, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.03483309143686502, |
|
"grad_norm": 2.196739435195923, |
|
"learning_rate": 6e-05, |
|
"loss": 0.3215, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.05224963715529753, |
|
"grad_norm": 1.0070608854293823, |
|
"learning_rate": 9e-05, |
|
"loss": 0.1349, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.05224963715529753, |
|
"eval_loss": 0.04279467836022377, |
|
"eval_runtime": 36.4605, |
|
"eval_samples_per_second": 7.954, |
|
"eval_steps_per_second": 1.015, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.06966618287373004, |
|
"grad_norm": 1.272331953048706, |
|
"learning_rate": 9.987820251299122e-05, |
|
"loss": 0.0889, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.08708272859216255, |
|
"grad_norm": 0.8801549077033997, |
|
"learning_rate": 9.924038765061042e-05, |
|
"loss": 0.1656, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.10449927431059507, |
|
"grad_norm": 1.045451045036316, |
|
"learning_rate": 9.806308479691595e-05, |
|
"loss": 0.0636, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.10449927431059507, |
|
"eval_loss": 0.031937576830387115, |
|
"eval_runtime": 36.4522, |
|
"eval_samples_per_second": 7.956, |
|
"eval_steps_per_second": 1.015, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.12191582002902758, |
|
"grad_norm": 0.48578259348869324, |
|
"learning_rate": 9.635919272833938e-05, |
|
"loss": 0.1489, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.13933236574746008, |
|
"grad_norm": 0.5960637927055359, |
|
"learning_rate": 9.414737964294636e-05, |
|
"loss": 0.0821, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.1567489114658926, |
|
"grad_norm": 0.6202490925788879, |
|
"learning_rate": 9.145187862775209e-05, |
|
"loss": 0.1026, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.1567489114658926, |
|
"eval_loss": 0.026807833462953568, |
|
"eval_runtime": 36.4704, |
|
"eval_samples_per_second": 7.952, |
|
"eval_steps_per_second": 1.015, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.1741654571843251, |
|
"grad_norm": 0.47882765531539917, |
|
"learning_rate": 8.83022221559489e-05, |
|
"loss": 0.0797, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.19158200290275762, |
|
"grad_norm": 0.5080346465110779, |
|
"learning_rate": 8.473291852294987e-05, |
|
"loss": 0.0137, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.20899854862119013, |
|
"grad_norm": 0.8254001140594482, |
|
"learning_rate": 8.07830737662829e-05, |
|
"loss": 0.0772, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.20899854862119013, |
|
"eval_loss": 0.02542661316692829, |
|
"eval_runtime": 36.4687, |
|
"eval_samples_per_second": 7.952, |
|
"eval_steps_per_second": 1.015, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.22641509433962265, |
|
"grad_norm": 0.47083836793899536, |
|
"learning_rate": 7.649596321166024e-05, |
|
"loss": 0.0673, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.24383164005805516, |
|
"grad_norm": 0.5346234440803528, |
|
"learning_rate": 7.191855733945387e-05, |
|
"loss": 0.0242, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.2612481857764877, |
|
"grad_norm": 0.42815378308296204, |
|
"learning_rate": 6.710100716628344e-05, |
|
"loss": 0.0727, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.2612481857764877, |
|
"eval_loss": 0.02381318248808384, |
|
"eval_runtime": 36.4961, |
|
"eval_samples_per_second": 7.946, |
|
"eval_steps_per_second": 1.014, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.27866473149492016, |
|
"grad_norm": 0.5860163569450378, |
|
"learning_rate": 6.209609477998338e-05, |
|
"loss": 0.0393, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.2960812772133527, |
|
"grad_norm": 0.3074454069137573, |
|
"learning_rate": 5.695865504800327e-05, |
|
"loss": 0.0321, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.3134978229317852, |
|
"grad_norm": 0.23264048993587494, |
|
"learning_rate": 5.174497483512506e-05, |
|
"loss": 0.0234, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.3134978229317852, |
|
"eval_loss": 0.022688375785946846, |
|
"eval_runtime": 36.4879, |
|
"eval_samples_per_second": 7.948, |
|
"eval_steps_per_second": 1.014, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.3309143686502177, |
|
"grad_norm": 0.21743811666965485, |
|
"learning_rate": 4.6512176312793736e-05, |
|
"loss": 0.0089, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.3483309143686502, |
|
"grad_norm": 0.5799148678779602, |
|
"learning_rate": 4.131759111665349e-05, |
|
"loss": 0.0938, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.36574746008708275, |
|
"grad_norm": 0.4093296527862549, |
|
"learning_rate": 3.6218132209150045e-05, |
|
"loss": 0.1154, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.36574746008708275, |
|
"eval_loss": 0.021804720163345337, |
|
"eval_runtime": 36.4988, |
|
"eval_samples_per_second": 7.945, |
|
"eval_steps_per_second": 1.014, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.38316400580551524, |
|
"grad_norm": 0.29445552825927734, |
|
"learning_rate": 3.12696703292044e-05, |
|
"loss": 0.0195, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.4005805515239477, |
|
"grad_norm": 0.19134579598903656, |
|
"learning_rate": 2.6526421860705473e-05, |
|
"loss": 0.0057, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.41799709724238027, |
|
"grad_norm": 0.5365853905677795, |
|
"learning_rate": 2.2040354826462668e-05, |
|
"loss": 0.0612, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.41799709724238027, |
|
"eval_loss": 0.02147766947746277, |
|
"eval_runtime": 36.4726, |
|
"eval_samples_per_second": 7.951, |
|
"eval_steps_per_second": 1.014, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.43541364296081275, |
|
"grad_norm": 0.14221055805683136, |
|
"learning_rate": 1.7860619515673033e-05, |
|
"loss": 0.0321, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.4528301886792453, |
|
"grad_norm": 0.52949059009552, |
|
"learning_rate": 1.4033009983067452e-05, |
|
"loss": 0.0412, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.4702467343976778, |
|
"grad_norm": 0.6387705206871033, |
|
"learning_rate": 1.0599462319663905e-05, |
|
"loss": 0.0287, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.4702467343976778, |
|
"eval_loss": 0.021191399544477463, |
|
"eval_runtime": 36.4534, |
|
"eval_samples_per_second": 7.955, |
|
"eval_steps_per_second": 1.015, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.4876632801161103, |
|
"grad_norm": 0.5022262334823608, |
|
"learning_rate": 7.597595192178702e-06, |
|
"loss": 0.1063, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.5050798258345428, |
|
"grad_norm": 0.40936022996902466, |
|
"learning_rate": 5.060297685041659e-06, |
|
"loss": 0.0505, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.5224963715529753, |
|
"grad_norm": 0.6321708559989929, |
|
"learning_rate": 3.0153689607045845e-06, |
|
"loss": 0.0997, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.5224963715529753, |
|
"eval_loss": 0.020932814106345177, |
|
"eval_runtime": 36.4786, |
|
"eval_samples_per_second": 7.95, |
|
"eval_steps_per_second": 1.014, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.5399129172714079, |
|
"grad_norm": 0.890493631362915, |
|
"learning_rate": 1.4852136862001764e-06, |
|
"loss": 0.0538, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.5573294629898403, |
|
"grad_norm": 0.6564047336578369, |
|
"learning_rate": 4.865965629214819e-07, |
|
"loss": 0.0611, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.5747460087082729, |
|
"grad_norm": 0.3723383843898773, |
|
"learning_rate": 3.04586490452119e-08, |
|
"loss": 0.0388, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.5747460087082729, |
|
"eval_loss": 0.020911818370223045, |
|
"eval_runtime": 36.4731, |
|
"eval_samples_per_second": 7.951, |
|
"eval_steps_per_second": 1.014, |
|
"step": 99 |
|
} |
|
], |
|
"logging_steps": 3, |
|
"max_steps": 100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 9, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.383609000529756e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|