|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.2381593389525131, |
|
"global_step": 11500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0019792904922649986, |
|
"loss": 0.1744, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0019585809845299976, |
|
"loss": 0.1699, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0019378714767949966, |
|
"loss": 0.1663, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0019171619690599956, |
|
"loss": 0.1502, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0018964524613249943, |
|
"loss": 0.1528, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0018757429535899933, |
|
"loss": 0.1716, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.001855033445854992, |
|
"loss": 0.1643, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.001834323938119991, |
|
"loss": 0.1735, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0018136144303849897, |
|
"loss": 0.1576, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0017929049226499886, |
|
"loss": 0.1652, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0017721954149149874, |
|
"loss": 0.1676, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0017514859071799864, |
|
"loss": 0.1612, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0017307763994449852, |
|
"loss": 0.1681, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0017100668917099842, |
|
"loss": 0.1696, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.001689357383974983, |
|
"loss": 0.1692, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0016686478762399817, |
|
"loss": 0.172, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0016479383685049807, |
|
"loss": 0.159, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0016272288607699795, |
|
"loss": 0.1675, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0016065193530349785, |
|
"loss": 0.16, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0015858098452999772, |
|
"loss": 0.1647, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0015651003375649762, |
|
"loss": 0.1572, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.001544390829829975, |
|
"loss": 0.1548, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.001523681322094974, |
|
"loss": 0.1642, |
|
"step": 11500 |
|
} |
|
], |
|
"max_steps": 48287, |
|
"num_train_epochs": 1, |
|
"total_flos": 3.7354337206272e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|