|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 15375, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0975609756097561, |
|
"grad_norm": 19.442411422729492, |
|
"learning_rate": 4.8373983739837406e-05, |
|
"loss": 6.7559, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.1951219512195122, |
|
"grad_norm": 22.672739028930664, |
|
"learning_rate": 4.6747967479674795e-05, |
|
"loss": 6.6932, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.2926829268292683, |
|
"grad_norm": 21.795516967773438, |
|
"learning_rate": 4.51219512195122e-05, |
|
"loss": 6.6652, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.3902439024390244, |
|
"grad_norm": 19.84381866455078, |
|
"learning_rate": 4.3495934959349595e-05, |
|
"loss": 6.6335, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.4878048780487805, |
|
"grad_norm": 14.22912883758545, |
|
"learning_rate": 4.186991869918699e-05, |
|
"loss": 6.6248, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.5853658536585366, |
|
"grad_norm": 14.391462326049805, |
|
"learning_rate": 4.0243902439024395e-05, |
|
"loss": 6.5929, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.6829268292682927, |
|
"grad_norm": 19.81720733642578, |
|
"learning_rate": 3.861788617886179e-05, |
|
"loss": 6.5589, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.7804878048780488, |
|
"grad_norm": 15.33761978149414, |
|
"learning_rate": 3.699186991869919e-05, |
|
"loss": 6.5327, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.8780487804878049, |
|
"grad_norm": 14.190281867980957, |
|
"learning_rate": 3.5365853658536584e-05, |
|
"loss": 6.5175, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.975609756097561, |
|
"grad_norm": 16.57828712463379, |
|
"learning_rate": 3.373983739837399e-05, |
|
"loss": 6.5137, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.0731707317073171, |
|
"grad_norm": 16.75761604309082, |
|
"learning_rate": 3.2113821138211384e-05, |
|
"loss": 6.495, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.170731707317073, |
|
"grad_norm": 18.840726852416992, |
|
"learning_rate": 3.048780487804878e-05, |
|
"loss": 6.4757, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.2682926829268293, |
|
"grad_norm": 17.630483627319336, |
|
"learning_rate": 2.886178861788618e-05, |
|
"loss": 6.4633, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.3658536585365852, |
|
"grad_norm": 16.721818923950195, |
|
"learning_rate": 2.7235772357723577e-05, |
|
"loss": 6.4462, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.4634146341463414, |
|
"grad_norm": 14.650636672973633, |
|
"learning_rate": 2.5609756097560977e-05, |
|
"loss": 6.4404, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.5609756097560976, |
|
"grad_norm": 13.825970649719238, |
|
"learning_rate": 2.3983739837398377e-05, |
|
"loss": 6.4326, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.6585365853658538, |
|
"grad_norm": 11.85326862335205, |
|
"learning_rate": 2.2357723577235773e-05, |
|
"loss": 6.4239, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.7560975609756098, |
|
"grad_norm": 13.92196273803711, |
|
"learning_rate": 2.073170731707317e-05, |
|
"loss": 6.4098, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.8536585365853657, |
|
"grad_norm": 12.077308654785156, |
|
"learning_rate": 1.9105691056910573e-05, |
|
"loss": 6.3987, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.951219512195122, |
|
"grad_norm": 12.406614303588867, |
|
"learning_rate": 1.747967479674797e-05, |
|
"loss": 6.3957, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.048780487804878, |
|
"grad_norm": 14.001736640930176, |
|
"learning_rate": 1.5853658536585366e-05, |
|
"loss": 6.3752, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 2.1463414634146343, |
|
"grad_norm": 12.691810607910156, |
|
"learning_rate": 1.4227642276422764e-05, |
|
"loss": 6.3566, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.2439024390243905, |
|
"grad_norm": 10.062420845031738, |
|
"learning_rate": 1.2601626016260162e-05, |
|
"loss": 6.3492, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.341463414634146, |
|
"grad_norm": 11.78906536102295, |
|
"learning_rate": 1.0975609756097562e-05, |
|
"loss": 6.3447, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.4390243902439024, |
|
"grad_norm": 13.368131637573242, |
|
"learning_rate": 9.34959349593496e-06, |
|
"loss": 6.339, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 2.5365853658536586, |
|
"grad_norm": 12.125652313232422, |
|
"learning_rate": 7.723577235772358e-06, |
|
"loss": 6.3305, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.6341463414634148, |
|
"grad_norm": 13.748695373535156, |
|
"learning_rate": 6.0975609756097564e-06, |
|
"loss": 6.3205, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.7317073170731705, |
|
"grad_norm": 13.787367820739746, |
|
"learning_rate": 4.471544715447155e-06, |
|
"loss": 6.3196, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.8292682926829267, |
|
"grad_norm": 15.013029098510742, |
|
"learning_rate": 2.8455284552845528e-06, |
|
"loss": 6.3116, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 2.926829268292683, |
|
"grad_norm": 15.244904518127441, |
|
"learning_rate": 1.2195121951219514e-06, |
|
"loss": 6.3107, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 15375, |
|
"total_flos": 5764753863475200.0, |
|
"train_loss": 6.457221655868903, |
|
"train_runtime": 1165.3935, |
|
"train_samples_per_second": 105.536, |
|
"train_steps_per_second": 13.193 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 15375, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5764753863475200.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|