|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 9.968152866242038, |
|
"eval_steps": 500, |
|
"global_step": 190, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.5095541401273885, |
|
"grad_norm": 21.399980545043945, |
|
"learning_rate": 4.8157894736842105e-05, |
|
"loss": 41.5426, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 1.0509554140127388, |
|
"grad_norm": 17.988994598388672, |
|
"learning_rate": 4.552631578947369e-05, |
|
"loss": 29.4127, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.5605095541401273, |
|
"grad_norm": 33.763912200927734, |
|
"learning_rate": 4.289473684210527e-05, |
|
"loss": 25.5085, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 2.1019108280254777, |
|
"grad_norm": 16.56460952758789, |
|
"learning_rate": 4.026315789473684e-05, |
|
"loss": 20.56, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 2.611464968152866, |
|
"grad_norm": 10.511013984680176, |
|
"learning_rate": 3.7631578947368425e-05, |
|
"loss": 18.1336, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 3.1528662420382165, |
|
"grad_norm": 10.428191184997559, |
|
"learning_rate": 3.5e-05, |
|
"loss": 17.3703, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 3.662420382165605, |
|
"grad_norm": 14.895818710327148, |
|
"learning_rate": 3.236842105263158e-05, |
|
"loss": 15.4829, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 4.203821656050955, |
|
"grad_norm": 14.849196434020996, |
|
"learning_rate": 2.9736842105263157e-05, |
|
"loss": 15.0444, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 4.713375796178344, |
|
"grad_norm": 18.697587966918945, |
|
"learning_rate": 2.710526315789474e-05, |
|
"loss": 12.8919, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 5.254777070063694, |
|
"grad_norm": 15.368135452270508, |
|
"learning_rate": 2.4473684210526318e-05, |
|
"loss": 12.3344, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 5.764331210191083, |
|
"grad_norm": 12.972413063049316, |
|
"learning_rate": 2.1842105263157896e-05, |
|
"loss": 10.8471, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 6.305732484076433, |
|
"grad_norm": 12.219730377197266, |
|
"learning_rate": 1.9210526315789474e-05, |
|
"loss": 10.4139, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 6.8152866242038215, |
|
"grad_norm": 10.693161964416504, |
|
"learning_rate": 1.6578947368421053e-05, |
|
"loss": 9.3934, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 7.356687898089172, |
|
"grad_norm": 7.562994956970215, |
|
"learning_rate": 1.3947368421052631e-05, |
|
"loss": 9.3898, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 7.86624203821656, |
|
"grad_norm": 8.633628845214844, |
|
"learning_rate": 1.1315789473684212e-05, |
|
"loss": 8.2861, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 8.40764331210191, |
|
"grad_norm": 5.833484649658203, |
|
"learning_rate": 8.68421052631579e-06, |
|
"loss": 8.2805, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 8.9171974522293, |
|
"grad_norm": 5.1007280349731445, |
|
"learning_rate": 6.0526315789473685e-06, |
|
"loss": 7.5686, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 9.45859872611465, |
|
"grad_norm": 3.2410666942596436, |
|
"learning_rate": 3.421052631578948e-06, |
|
"loss": 7.6358, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 9.968152866242038, |
|
"grad_norm": 3.1020469665527344, |
|
"learning_rate": 7.894736842105264e-07, |
|
"loss": 7.4635, |
|
"step": 190 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 190, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4446215761035264.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|