|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.8514851485148514, |
|
"eval_steps": 3, |
|
"global_step": 36, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07920792079207921, |
|
"grad_norm": 0.6195542216300964, |
|
"learning_rate": 5e-05, |
|
"loss": 2.7446, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.15841584158415842, |
|
"grad_norm": 0.6150830984115601, |
|
"learning_rate": 0.0001, |
|
"loss": 2.7192, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.2376237623762376, |
|
"grad_norm": 0.6174027323722839, |
|
"learning_rate": 9.705882352941177e-05, |
|
"loss": 2.7788, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.2376237623762376, |
|
"eval_loss": 2.845029830932617, |
|
"eval_runtime": 2.5957, |
|
"eval_samples_per_second": 0.771, |
|
"eval_steps_per_second": 0.771, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.31683168316831684, |
|
"grad_norm": 0.5594128966331482, |
|
"learning_rate": 9.411764705882353e-05, |
|
"loss": 2.6762, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.39603960396039606, |
|
"grad_norm": 0.4553978145122528, |
|
"learning_rate": 9.11764705882353e-05, |
|
"loss": 2.577, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.4752475247524752, |
|
"grad_norm": 0.31477224826812744, |
|
"learning_rate": 8.823529411764706e-05, |
|
"loss": 2.7767, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.4752475247524752, |
|
"eval_loss": 2.74169921875, |
|
"eval_runtime": 2.6084, |
|
"eval_samples_per_second": 0.767, |
|
"eval_steps_per_second": 0.767, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.5544554455445545, |
|
"grad_norm": 0.6173616051673889, |
|
"learning_rate": 8.529411764705883e-05, |
|
"loss": 2.5605, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.6336633663366337, |
|
"grad_norm": 0.7040618062019348, |
|
"learning_rate": 8.23529411764706e-05, |
|
"loss": 2.6044, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.7128712871287128, |
|
"grad_norm": 0.5611338019371033, |
|
"learning_rate": 7.941176470588235e-05, |
|
"loss": 2.6618, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.7128712871287128, |
|
"eval_loss": 2.7047266960144043, |
|
"eval_runtime": 2.596, |
|
"eval_samples_per_second": 0.77, |
|
"eval_steps_per_second": 0.77, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.7920792079207921, |
|
"grad_norm": 0.4442787766456604, |
|
"learning_rate": 7.647058823529411e-05, |
|
"loss": 2.5313, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.8712871287128713, |
|
"grad_norm": 0.292217880487442, |
|
"learning_rate": 7.352941176470589e-05, |
|
"loss": 2.6006, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.9504950495049505, |
|
"grad_norm": 0.2892855107784271, |
|
"learning_rate": 7.058823529411765e-05, |
|
"loss": 2.4469, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.9504950495049505, |
|
"eval_loss": 2.671910524368286, |
|
"eval_runtime": 2.5993, |
|
"eval_samples_per_second": 0.769, |
|
"eval_steps_per_second": 0.769, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 1.0297029702970297, |
|
"grad_norm": 0.2804577350616455, |
|
"learning_rate": 6.764705882352942e-05, |
|
"loss": 2.5202, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 1.108910891089109, |
|
"grad_norm": 0.29348304867744446, |
|
"learning_rate": 6.470588235294118e-05, |
|
"loss": 2.4257, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 1.188118811881188, |
|
"grad_norm": 0.27386218309402466, |
|
"learning_rate": 6.176470588235295e-05, |
|
"loss": 2.4943, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 1.188118811881188, |
|
"eval_loss": 2.6478095054626465, |
|
"eval_runtime": 2.6061, |
|
"eval_samples_per_second": 0.767, |
|
"eval_steps_per_second": 0.767, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 1.2673267326732673, |
|
"grad_norm": 0.28914758563041687, |
|
"learning_rate": 5.882352941176471e-05, |
|
"loss": 2.5296, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 1.3465346534653464, |
|
"grad_norm": 0.2648894786834717, |
|
"learning_rate": 5.588235294117647e-05, |
|
"loss": 2.5639, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 1.4257425742574257, |
|
"grad_norm": 0.2344515174627304, |
|
"learning_rate": 5.294117647058824e-05, |
|
"loss": 2.4419, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 1.4257425742574257, |
|
"eval_loss": 2.626154899597168, |
|
"eval_runtime": 2.6016, |
|
"eval_samples_per_second": 0.769, |
|
"eval_steps_per_second": 0.769, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 1.504950495049505, |
|
"grad_norm": 0.2299778014421463, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4946, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 1.5841584158415842, |
|
"grad_norm": 0.2452971488237381, |
|
"learning_rate": 4.705882352941177e-05, |
|
"loss": 2.4017, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.6633663366336635, |
|
"grad_norm": 0.2606826722621918, |
|
"learning_rate": 4.411764705882353e-05, |
|
"loss": 2.5081, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 1.6633663366336635, |
|
"eval_loss": 2.6083157062530518, |
|
"eval_runtime": 2.6059, |
|
"eval_samples_per_second": 0.767, |
|
"eval_steps_per_second": 0.767, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 1.7425742574257426, |
|
"grad_norm": 0.2621608376502991, |
|
"learning_rate": 4.11764705882353e-05, |
|
"loss": 2.5019, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 1.8217821782178216, |
|
"grad_norm": 0.24822287261486053, |
|
"learning_rate": 3.8235294117647055e-05, |
|
"loss": 2.4289, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 1.900990099009901, |
|
"grad_norm": 0.26251110434532166, |
|
"learning_rate": 3.529411764705883e-05, |
|
"loss": 2.4366, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 1.900990099009901, |
|
"eval_loss": 2.592751979827881, |
|
"eval_runtime": 2.6021, |
|
"eval_samples_per_second": 0.769, |
|
"eval_steps_per_second": 0.769, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 1.9801980198019802, |
|
"grad_norm": 0.29046395421028137, |
|
"learning_rate": 3.235294117647059e-05, |
|
"loss": 2.4076, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 2.0594059405940595, |
|
"grad_norm": 0.3276838958263397, |
|
"learning_rate": 2.9411764705882354e-05, |
|
"loss": 2.3266, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 2.1386138613861387, |
|
"grad_norm": 0.25077176094055176, |
|
"learning_rate": 2.647058823529412e-05, |
|
"loss": 2.3362, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 2.1386138613861387, |
|
"eval_loss": 2.579805374145508, |
|
"eval_runtime": 2.6029, |
|
"eval_samples_per_second": 0.768, |
|
"eval_steps_per_second": 0.768, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 2.217821782178218, |
|
"grad_norm": 0.2501392662525177, |
|
"learning_rate": 2.3529411764705884e-05, |
|
"loss": 2.3981, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 2.297029702970297, |
|
"grad_norm": 0.23780333995819092, |
|
"learning_rate": 2.058823529411765e-05, |
|
"loss": 2.4174, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 2.376237623762376, |
|
"grad_norm": 0.3056803047657013, |
|
"learning_rate": 1.7647058823529414e-05, |
|
"loss": 2.3146, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 2.376237623762376, |
|
"eval_loss": 2.5697760581970215, |
|
"eval_runtime": 2.6051, |
|
"eval_samples_per_second": 0.768, |
|
"eval_steps_per_second": 0.768, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 2.4554455445544554, |
|
"grad_norm": 0.22868944704532623, |
|
"learning_rate": 1.4705882352941177e-05, |
|
"loss": 2.3982, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 2.5346534653465347, |
|
"grad_norm": 0.23521411418914795, |
|
"learning_rate": 1.1764705882352942e-05, |
|
"loss": 2.3992, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 2.613861386138614, |
|
"grad_norm": 0.267884224653244, |
|
"learning_rate": 8.823529411764707e-06, |
|
"loss": 2.4405, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 2.613861386138614, |
|
"eval_loss": 2.5635159015655518, |
|
"eval_runtime": 2.5955, |
|
"eval_samples_per_second": 0.771, |
|
"eval_steps_per_second": 0.771, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 2.693069306930693, |
|
"grad_norm": 0.21910974383354187, |
|
"learning_rate": 5.882352941176471e-06, |
|
"loss": 2.4613, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 2.772277227722772, |
|
"grad_norm": 0.262515127658844, |
|
"learning_rate": 2.9411764705882355e-06, |
|
"loss": 2.3542, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 2.8514851485148514, |
|
"grad_norm": 0.2211056649684906, |
|
"learning_rate": 0.0, |
|
"loss": 2.4264, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 2.8514851485148514, |
|
"eval_loss": 2.5613255500793457, |
|
"eval_runtime": 2.603, |
|
"eval_samples_per_second": 0.768, |
|
"eval_steps_per_second": 0.768, |
|
"step": 36 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 36, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 3, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.4554329481230336e+17, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|