|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 9.75609756097561, |
|
"eval_steps": 500, |
|
"global_step": 500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.3902439024390244, |
|
"grad_norm": 0.9049399495124817, |
|
"learning_rate": 4.347826086956522e-05, |
|
"loss": 2.2562, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.7804878048780488, |
|
"grad_norm": 0.8101117610931396, |
|
"learning_rate": 4.88544474393531e-05, |
|
"loss": 2.1366, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.170731707317073, |
|
"grad_norm": 0.6929331421852112, |
|
"learning_rate": 4.750673854447439e-05, |
|
"loss": 1.9538, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.5609756097560976, |
|
"grad_norm": 0.754576563835144, |
|
"learning_rate": 4.615902964959569e-05, |
|
"loss": 1.9039, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.951219512195122, |
|
"grad_norm": 0.8291248679161072, |
|
"learning_rate": 4.4811320754716985e-05, |
|
"loss": 1.8062, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.341463414634146, |
|
"grad_norm": 0.9486225247383118, |
|
"learning_rate": 4.3463611859838275e-05, |
|
"loss": 1.7765, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.7317073170731705, |
|
"grad_norm": 1.1279809474945068, |
|
"learning_rate": 4.211590296495957e-05, |
|
"loss": 1.7288, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 3.1219512195121952, |
|
"grad_norm": 1.1358485221862793, |
|
"learning_rate": 4.076819407008086e-05, |
|
"loss": 1.7136, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 3.5121951219512195, |
|
"grad_norm": 1.2939114570617676, |
|
"learning_rate": 3.942048517520216e-05, |
|
"loss": 1.698, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 3.902439024390244, |
|
"grad_norm": 1.3795692920684814, |
|
"learning_rate": 3.807277628032345e-05, |
|
"loss": 1.668, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 4.2926829268292686, |
|
"grad_norm": 1.2294673919677734, |
|
"learning_rate": 3.672506738544474e-05, |
|
"loss": 1.6258, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 4.682926829268292, |
|
"grad_norm": 1.4048880338668823, |
|
"learning_rate": 3.537735849056604e-05, |
|
"loss": 1.6133, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 5.073170731707317, |
|
"grad_norm": 1.1704691648483276, |
|
"learning_rate": 3.4029649595687336e-05, |
|
"loss": 1.6349, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 5.463414634146342, |
|
"grad_norm": 1.4525257349014282, |
|
"learning_rate": 3.2681940700808625e-05, |
|
"loss": 1.5788, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 5.853658536585366, |
|
"grad_norm": 1.5394439697265625, |
|
"learning_rate": 3.133423180592992e-05, |
|
"loss": 1.5827, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 6.2439024390243905, |
|
"grad_norm": 1.5792720317840576, |
|
"learning_rate": 2.998652291105121e-05, |
|
"loss": 1.5451, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 6.634146341463414, |
|
"grad_norm": 1.6444499492645264, |
|
"learning_rate": 2.863881401617251e-05, |
|
"loss": 1.5757, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 7.024390243902439, |
|
"grad_norm": 1.5749609470367432, |
|
"learning_rate": 2.7291105121293804e-05, |
|
"loss": 1.5414, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 7.414634146341464, |
|
"grad_norm": 1.6040682792663574, |
|
"learning_rate": 2.5943396226415094e-05, |
|
"loss": 1.5311, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 7.804878048780488, |
|
"grad_norm": 1.7397934198379517, |
|
"learning_rate": 2.459568733153639e-05, |
|
"loss": 1.534, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 8.195121951219512, |
|
"grad_norm": 1.9339927434921265, |
|
"learning_rate": 2.3247978436657683e-05, |
|
"loss": 1.5277, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 8.585365853658537, |
|
"grad_norm": 1.8686648607254028, |
|
"learning_rate": 2.1900269541778976e-05, |
|
"loss": 1.496, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 8.975609756097562, |
|
"grad_norm": 1.8943285942077637, |
|
"learning_rate": 2.055256064690027e-05, |
|
"loss": 1.4955, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 9.365853658536585, |
|
"grad_norm": 2.715195894241333, |
|
"learning_rate": 1.9204851752021562e-05, |
|
"loss": 1.4837, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 9.75609756097561, |
|
"grad_norm": 2.464538097381592, |
|
"learning_rate": 1.785714285714286e-05, |
|
"loss": 1.5082, |
|
"step": 500 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 765, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 15, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.137043399013171e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|