|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 285, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 7.481125379048471, |
|
"learning_rate": 2.586206896551724e-05, |
|
"loss": 1.8917, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 6.142398799516201, |
|
"learning_rate": 2.75390625e-05, |
|
"loss": 1.5682, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 5.558830208571597, |
|
"learning_rate": 2.4609375e-05, |
|
"loss": 1.3842, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 7.89159551198774, |
|
"learning_rate": 2.16796875e-05, |
|
"loss": 1.1744, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"grad_norm": 5.537780558514368, |
|
"learning_rate": 1.8750000000000002e-05, |
|
"loss": 0.9399, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"grad_norm": 5.21225355478704, |
|
"learning_rate": 1.58203125e-05, |
|
"loss": 0.784, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"grad_norm": 4.6578317858188045, |
|
"learning_rate": 1.2890625e-05, |
|
"loss": 0.6828, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"grad_norm": 5.158776950034473, |
|
"learning_rate": 9.9609375e-06, |
|
"loss": 0.5401, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"grad_norm": 4.243348238906735, |
|
"learning_rate": 7.03125e-06, |
|
"loss": 0.4242, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"grad_norm": 4.101160351605037, |
|
"learning_rate": 4.1015625e-06, |
|
"loss": 0.3594, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"grad_norm": 3.8015979276087366, |
|
"learning_rate": 1.1718750000000001e-06, |
|
"loss": 0.3152, |
|
"step": 275 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 285, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 14905214238720.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|