auxyus's picture
Training in progress, step 100, checkpoint
2f14025 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.5805515239477503,
"eval_steps": 9,
"global_step": 100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.005805515239477504,
"eval_loss": 0.1446908712387085,
"eval_runtime": 35.8549,
"eval_samples_per_second": 8.088,
"eval_steps_per_second": 1.032,
"step": 1
},
{
"epoch": 0.01741654571843251,
"grad_norm": 8.589686393737793,
"learning_rate": 3e-05,
"loss": 0.6468,
"step": 3
},
{
"epoch": 0.03483309143686502,
"grad_norm": 2.196739435195923,
"learning_rate": 6e-05,
"loss": 0.3215,
"step": 6
},
{
"epoch": 0.05224963715529753,
"grad_norm": 1.0070608854293823,
"learning_rate": 9e-05,
"loss": 0.1349,
"step": 9
},
{
"epoch": 0.05224963715529753,
"eval_loss": 0.04279467836022377,
"eval_runtime": 36.4605,
"eval_samples_per_second": 7.954,
"eval_steps_per_second": 1.015,
"step": 9
},
{
"epoch": 0.06966618287373004,
"grad_norm": 1.272331953048706,
"learning_rate": 9.987820251299122e-05,
"loss": 0.0889,
"step": 12
},
{
"epoch": 0.08708272859216255,
"grad_norm": 0.8801549077033997,
"learning_rate": 9.924038765061042e-05,
"loss": 0.1656,
"step": 15
},
{
"epoch": 0.10449927431059507,
"grad_norm": 1.045451045036316,
"learning_rate": 9.806308479691595e-05,
"loss": 0.0636,
"step": 18
},
{
"epoch": 0.10449927431059507,
"eval_loss": 0.031937576830387115,
"eval_runtime": 36.4522,
"eval_samples_per_second": 7.956,
"eval_steps_per_second": 1.015,
"step": 18
},
{
"epoch": 0.12191582002902758,
"grad_norm": 0.48578259348869324,
"learning_rate": 9.635919272833938e-05,
"loss": 0.1489,
"step": 21
},
{
"epoch": 0.13933236574746008,
"grad_norm": 0.5960637927055359,
"learning_rate": 9.414737964294636e-05,
"loss": 0.0821,
"step": 24
},
{
"epoch": 0.1567489114658926,
"grad_norm": 0.6202490925788879,
"learning_rate": 9.145187862775209e-05,
"loss": 0.1026,
"step": 27
},
{
"epoch": 0.1567489114658926,
"eval_loss": 0.026807833462953568,
"eval_runtime": 36.4704,
"eval_samples_per_second": 7.952,
"eval_steps_per_second": 1.015,
"step": 27
},
{
"epoch": 0.1741654571843251,
"grad_norm": 0.47882765531539917,
"learning_rate": 8.83022221559489e-05,
"loss": 0.0797,
"step": 30
},
{
"epoch": 0.19158200290275762,
"grad_norm": 0.5080346465110779,
"learning_rate": 8.473291852294987e-05,
"loss": 0.0137,
"step": 33
},
{
"epoch": 0.20899854862119013,
"grad_norm": 0.8254001140594482,
"learning_rate": 8.07830737662829e-05,
"loss": 0.0772,
"step": 36
},
{
"epoch": 0.20899854862119013,
"eval_loss": 0.02542661316692829,
"eval_runtime": 36.4687,
"eval_samples_per_second": 7.952,
"eval_steps_per_second": 1.015,
"step": 36
},
{
"epoch": 0.22641509433962265,
"grad_norm": 0.47083836793899536,
"learning_rate": 7.649596321166024e-05,
"loss": 0.0673,
"step": 39
},
{
"epoch": 0.24383164005805516,
"grad_norm": 0.5346234440803528,
"learning_rate": 7.191855733945387e-05,
"loss": 0.0242,
"step": 42
},
{
"epoch": 0.2612481857764877,
"grad_norm": 0.42815378308296204,
"learning_rate": 6.710100716628344e-05,
"loss": 0.0727,
"step": 45
},
{
"epoch": 0.2612481857764877,
"eval_loss": 0.02381318248808384,
"eval_runtime": 36.4961,
"eval_samples_per_second": 7.946,
"eval_steps_per_second": 1.014,
"step": 45
},
{
"epoch": 0.27866473149492016,
"grad_norm": 0.5860163569450378,
"learning_rate": 6.209609477998338e-05,
"loss": 0.0393,
"step": 48
},
{
"epoch": 0.2960812772133527,
"grad_norm": 0.3074454069137573,
"learning_rate": 5.695865504800327e-05,
"loss": 0.0321,
"step": 51
},
{
"epoch": 0.3134978229317852,
"grad_norm": 0.23264048993587494,
"learning_rate": 5.174497483512506e-05,
"loss": 0.0234,
"step": 54
},
{
"epoch": 0.3134978229317852,
"eval_loss": 0.022688375785946846,
"eval_runtime": 36.4879,
"eval_samples_per_second": 7.948,
"eval_steps_per_second": 1.014,
"step": 54
},
{
"epoch": 0.3309143686502177,
"grad_norm": 0.21743811666965485,
"learning_rate": 4.6512176312793736e-05,
"loss": 0.0089,
"step": 57
},
{
"epoch": 0.3483309143686502,
"grad_norm": 0.5799148678779602,
"learning_rate": 4.131759111665349e-05,
"loss": 0.0938,
"step": 60
},
{
"epoch": 0.36574746008708275,
"grad_norm": 0.4093296527862549,
"learning_rate": 3.6218132209150045e-05,
"loss": 0.1154,
"step": 63
},
{
"epoch": 0.36574746008708275,
"eval_loss": 0.021804720163345337,
"eval_runtime": 36.4988,
"eval_samples_per_second": 7.945,
"eval_steps_per_second": 1.014,
"step": 63
},
{
"epoch": 0.38316400580551524,
"grad_norm": 0.29445552825927734,
"learning_rate": 3.12696703292044e-05,
"loss": 0.0195,
"step": 66
},
{
"epoch": 0.4005805515239477,
"grad_norm": 0.19134579598903656,
"learning_rate": 2.6526421860705473e-05,
"loss": 0.0057,
"step": 69
},
{
"epoch": 0.41799709724238027,
"grad_norm": 0.5365853905677795,
"learning_rate": 2.2040354826462668e-05,
"loss": 0.0612,
"step": 72
},
{
"epoch": 0.41799709724238027,
"eval_loss": 0.02147766947746277,
"eval_runtime": 36.4726,
"eval_samples_per_second": 7.951,
"eval_steps_per_second": 1.014,
"step": 72
},
{
"epoch": 0.43541364296081275,
"grad_norm": 0.14221055805683136,
"learning_rate": 1.7860619515673033e-05,
"loss": 0.0321,
"step": 75
},
{
"epoch": 0.4528301886792453,
"grad_norm": 0.52949059009552,
"learning_rate": 1.4033009983067452e-05,
"loss": 0.0412,
"step": 78
},
{
"epoch": 0.4702467343976778,
"grad_norm": 0.6387705206871033,
"learning_rate": 1.0599462319663905e-05,
"loss": 0.0287,
"step": 81
},
{
"epoch": 0.4702467343976778,
"eval_loss": 0.021191399544477463,
"eval_runtime": 36.4534,
"eval_samples_per_second": 7.955,
"eval_steps_per_second": 1.015,
"step": 81
},
{
"epoch": 0.4876632801161103,
"grad_norm": 0.5022262334823608,
"learning_rate": 7.597595192178702e-06,
"loss": 0.1063,
"step": 84
},
{
"epoch": 0.5050798258345428,
"grad_norm": 0.40936022996902466,
"learning_rate": 5.060297685041659e-06,
"loss": 0.0505,
"step": 87
},
{
"epoch": 0.5224963715529753,
"grad_norm": 0.6321708559989929,
"learning_rate": 3.0153689607045845e-06,
"loss": 0.0997,
"step": 90
},
{
"epoch": 0.5224963715529753,
"eval_loss": 0.020932814106345177,
"eval_runtime": 36.4786,
"eval_samples_per_second": 7.95,
"eval_steps_per_second": 1.014,
"step": 90
},
{
"epoch": 0.5399129172714079,
"grad_norm": 0.890493631362915,
"learning_rate": 1.4852136862001764e-06,
"loss": 0.0538,
"step": 93
},
{
"epoch": 0.5573294629898403,
"grad_norm": 0.6564047336578369,
"learning_rate": 4.865965629214819e-07,
"loss": 0.0611,
"step": 96
},
{
"epoch": 0.5747460087082729,
"grad_norm": 0.3723383843898773,
"learning_rate": 3.04586490452119e-08,
"loss": 0.0388,
"step": 99
},
{
"epoch": 0.5747460087082729,
"eval_loss": 0.020911818370223045,
"eval_runtime": 36.4731,
"eval_samples_per_second": 7.951,
"eval_steps_per_second": 1.014,
"step": 99
}
],
"logging_steps": 3,
"max_steps": 100,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 9,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.383609000529756e+17,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}