|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 8218, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.06084205402774397, |
|
"grad_norm": 9.958785057067871, |
|
"learning_rate": 4.6957897298612804e-05, |
|
"loss": 2.8566, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.12168410805548795, |
|
"grad_norm": 6.4962873458862305, |
|
"learning_rate": 4.3915794597225605e-05, |
|
"loss": 2.445, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.18252616208323194, |
|
"grad_norm": 6.847861289978027, |
|
"learning_rate": 4.0873691895838406e-05, |
|
"loss": 2.2613, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.2433682161109759, |
|
"grad_norm": 7.166022300720215, |
|
"learning_rate": 3.783158919445121e-05, |
|
"loss": 2.1305, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.3042102701387199, |
|
"grad_norm": 7.0767316818237305, |
|
"learning_rate": 3.4789486493064e-05, |
|
"loss": 2.0557, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.3650523241664639, |
|
"grad_norm": 5.804446220397949, |
|
"learning_rate": 3.174738379167681e-05, |
|
"loss": 2.0253, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.42589437819420783, |
|
"grad_norm": 4.874488830566406, |
|
"learning_rate": 2.8705281090289608e-05, |
|
"loss": 1.9551, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.4867364322219518, |
|
"grad_norm": 5.902616024017334, |
|
"learning_rate": 2.566317838890241e-05, |
|
"loss": 1.8709, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.5475784862496957, |
|
"grad_norm": 6.240393161773682, |
|
"learning_rate": 2.262107568751521e-05, |
|
"loss": 1.8769, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.6084205402774397, |
|
"grad_norm": 6.906947135925293, |
|
"learning_rate": 1.9578972986128012e-05, |
|
"loss": 1.8443, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.6692625943051838, |
|
"grad_norm": 5.4539971351623535, |
|
"learning_rate": 1.6536870284740814e-05, |
|
"loss": 1.7751, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.7301046483329278, |
|
"grad_norm": 5.705854892730713, |
|
"learning_rate": 1.3494767583353615e-05, |
|
"loss": 1.7458, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.7909467023606717, |
|
"grad_norm": 5.6163649559021, |
|
"learning_rate": 1.0452664881966416e-05, |
|
"loss": 1.6901, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.8517887563884157, |
|
"grad_norm": 6.5511016845703125, |
|
"learning_rate": 7.410562180579217e-06, |
|
"loss": 1.6807, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.9126308104161597, |
|
"grad_norm": 5.792708396911621, |
|
"learning_rate": 4.368459479192018e-06, |
|
"loss": 1.6844, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.9734728644439036, |
|
"grad_norm": 6.069802761077881, |
|
"learning_rate": 1.3263567778048189e-06, |
|
"loss": 1.674, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 8218, |
|
"total_flos": 1.063927729790976e+16, |
|
"train_loss": 1.9656052448999624, |
|
"train_runtime": 5505.0716, |
|
"train_samples_per_second": 14.927, |
|
"train_steps_per_second": 1.493 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 8218, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 10000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.063927729790976e+16, |
|
"train_batch_size": 10, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|