|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.3530364372469634, |
|
"eval_steps": 8, |
|
"global_step": 90, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.025910931174089068, |
|
"eval_loss": 0.21314890682697296, |
|
"eval_runtime": 12.1183, |
|
"eval_samples_per_second": 10.728, |
|
"eval_steps_per_second": 5.364, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.07773279352226721, |
|
"grad_norm": 6.452937126159668, |
|
"learning_rate": 3e-05, |
|
"loss": 6.5264, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.15546558704453442, |
|
"grad_norm": 6.785652160644531, |
|
"learning_rate": 6e-05, |
|
"loss": 6.3657, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.20728744939271254, |
|
"eval_loss": 0.10755843669176102, |
|
"eval_runtime": 12.1878, |
|
"eval_samples_per_second": 10.666, |
|
"eval_steps_per_second": 5.333, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.23319838056680162, |
|
"grad_norm": 5.717119216918945, |
|
"learning_rate": 9e-05, |
|
"loss": 4.5674, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.31093117408906884, |
|
"grad_norm": 6.126352787017822, |
|
"learning_rate": 0.00012, |
|
"loss": 1.7668, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.38866396761133604, |
|
"grad_norm": 2.648998498916626, |
|
"learning_rate": 0.00015000000000000001, |
|
"loss": 0.4109, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.4145748987854251, |
|
"eval_loss": 0.0012996116420254111, |
|
"eval_runtime": 12.1969, |
|
"eval_samples_per_second": 10.658, |
|
"eval_steps_per_second": 5.329, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.46639676113360323, |
|
"grad_norm": 0.33190277218818665, |
|
"learning_rate": 0.00018, |
|
"loss": 0.0473, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.5441295546558704, |
|
"grad_norm": 0.2983362078666687, |
|
"learning_rate": 0.00019989930665413147, |
|
"loss": 0.0116, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.6218623481781377, |
|
"grad_norm": 0.2238294929265976, |
|
"learning_rate": 0.00019839295885986296, |
|
"loss": 0.0077, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.6218623481781377, |
|
"eval_loss": 0.000773053674492985, |
|
"eval_runtime": 12.1863, |
|
"eval_samples_per_second": 10.668, |
|
"eval_steps_per_second": 5.334, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.6995951417004048, |
|
"grad_norm": 0.016547193750739098, |
|
"learning_rate": 0.00019510565162951537, |
|
"loss": 0.0142, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.7773279352226721, |
|
"grad_norm": 0.019047992303967476, |
|
"learning_rate": 0.0001900968867902419, |
|
"loss": 0.0045, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.8291497975708502, |
|
"eval_loss": 0.0002317978214705363, |
|
"eval_runtime": 12.2198, |
|
"eval_samples_per_second": 10.638, |
|
"eval_steps_per_second": 5.319, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.8550607287449393, |
|
"grad_norm": 1.0941699743270874, |
|
"learning_rate": 0.00018345732537213027, |
|
"loss": 0.0019, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.9327935222672065, |
|
"grad_norm": 1.774532675743103, |
|
"learning_rate": 0.00017530714660036112, |
|
"loss": 0.0047, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 1.0210526315789474, |
|
"grad_norm": 0.029302451759576797, |
|
"learning_rate": 0.00016579387259397127, |
|
"loss": 0.0021, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 1.0469635627530365, |
|
"eval_loss": 2.721450073295273e-05, |
|
"eval_runtime": 44.3093, |
|
"eval_samples_per_second": 2.934, |
|
"eval_steps_per_second": 1.467, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.0987854251012146, |
|
"grad_norm": 0.009189918637275696, |
|
"learning_rate": 0.00015508969814521025, |
|
"loss": 0.0005, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 1.1765182186234817, |
|
"grad_norm": 0.030586188659071922, |
|
"learning_rate": 0.00014338837391175582, |
|
"loss": 0.0006, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 1.2542510121457489, |
|
"grad_norm": 0.019615933299064636, |
|
"learning_rate": 0.00013090169943749476, |
|
"loss": 0.0008, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 1.2542510121457489, |
|
"eval_loss": 2.218412555521354e-05, |
|
"eval_runtime": 44.4013, |
|
"eval_samples_per_second": 2.928, |
|
"eval_steps_per_second": 1.464, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 1.3319838056680162, |
|
"grad_norm": 0.006924801040440798, |
|
"learning_rate": 0.00011785568947986367, |
|
"loss": 0.045, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 1.4097165991902834, |
|
"grad_norm": 0.02355915680527687, |
|
"learning_rate": 0.00010448648303505151, |
|
"loss": 0.0006, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 1.4615384615384617, |
|
"eval_loss": 2.361264887440484e-05, |
|
"eval_runtime": 44.3855, |
|
"eval_samples_per_second": 2.929, |
|
"eval_steps_per_second": 1.464, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 1.4874493927125507, |
|
"grad_norm": 1.601819396018982, |
|
"learning_rate": 9.103606910965666e-05, |
|
"loss": 0.0077, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 1.5651821862348179, |
|
"grad_norm": 0.09957171976566315, |
|
"learning_rate": 7.774790660436858e-05, |
|
"loss": 0.0016, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.642914979757085, |
|
"grad_norm": 0.03547385334968567, |
|
"learning_rate": 6.486251759186572e-05, |
|
"loss": 0.001, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 1.668825910931174, |
|
"eval_loss": 3.899199145962484e-05, |
|
"eval_runtime": 43.2962, |
|
"eval_samples_per_second": 3.003, |
|
"eval_steps_per_second": 1.501, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 1.7206477732793521, |
|
"grad_norm": 0.025507541373372078, |
|
"learning_rate": 5.261313375270014e-05, |
|
"loss": 0.0005, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 1.7983805668016193, |
|
"grad_norm": 0.009455524384975433, |
|
"learning_rate": 4.12214747707527e-05, |
|
"loss": 0.0004, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 1.8761133603238866, |
|
"grad_norm": 0.0038223874289542437, |
|
"learning_rate": 3.089373510131354e-05, |
|
"loss": 0.0009, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 1.8761133603238866, |
|
"eval_loss": 3.11742551275529e-05, |
|
"eval_runtime": 44.3844, |
|
"eval_samples_per_second": 2.929, |
|
"eval_steps_per_second": 1.464, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 1.953846153846154, |
|
"grad_norm": 0.005872055422514677, |
|
"learning_rate": 2.181685175319702e-05, |
|
"loss": 0.001, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 2.042105263157895, |
|
"grad_norm": 0.03851957991719246, |
|
"learning_rate": 1.415512063981339e-05, |
|
"loss": 0.0008, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 2.093927125506073, |
|
"eval_loss": 3.268746513640508e-05, |
|
"eval_runtime": 44.3546, |
|
"eval_samples_per_second": 2.931, |
|
"eval_steps_per_second": 1.465, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.119838056680162, |
|
"grad_norm": 0.009562190622091293, |
|
"learning_rate": 8.047222744854943e-06, |
|
"loss": 0.0003, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 2.197570850202429, |
|
"grad_norm": 0.00980624184012413, |
|
"learning_rate": 3.6037139304146762e-06, |
|
"loss": 0.0003, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 2.2753036437246963, |
|
"grad_norm": 0.010969814844429493, |
|
"learning_rate": 9.0502382320653e-07, |
|
"loss": 0.0003, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 2.3012145748987853, |
|
"eval_loss": 2.5345105314045213e-05, |
|
"eval_runtime": 44.4252, |
|
"eval_samples_per_second": 2.926, |
|
"eval_steps_per_second": 1.463, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 2.3530364372469634, |
|
"grad_norm": 2.2677857875823975, |
|
"learning_rate": 0.0, |
|
"loss": 0.009, |
|
"step": 90 |
|
} |
|
], |
|
"logging_steps": 3, |
|
"max_steps": 90, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 8, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.7332406838951936e+17, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|