|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"global_step": 11049, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.7737351796542676e-05, |
|
"loss": 2.2042, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.547470359308535e-05, |
|
"loss": 2.087, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.321205538962802e-05, |
|
"loss": 2.0242, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.09494071861707e-05, |
|
"loss": 1.978, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.868675898271337e-05, |
|
"loss": 1.9701, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.6424110779256044e-05, |
|
"loss": 1.9814, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.416146257579871e-05, |
|
"loss": 1.9524, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.189881437234139e-05, |
|
"loss": 1.9124, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 2.963616616888406e-05, |
|
"loss": 1.8723, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 2.7373517965426738e-05, |
|
"loss": 1.8591, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.511086976196941e-05, |
|
"loss": 1.8822, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.2848221558512085e-05, |
|
"loss": 1.8402, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.058557335505476e-05, |
|
"loss": 1.8634, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.8322925151597432e-05, |
|
"loss": 1.8638, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.6060276948140106e-05, |
|
"loss": 1.8548, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.3797628744682778e-05, |
|
"loss": 1.8085, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 1.1534980541225452e-05, |
|
"loss": 1.8086, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 9.272332337768125e-06, |
|
"loss": 1.809, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 7.009684134310799e-06, |
|
"loss": 1.8238, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 4.747035930853471e-06, |
|
"loss": 1.7984, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.4843877273961445e-06, |
|
"loss": 1.7959, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 2.21739523938818e-07, |
|
"loss": 1.7938, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 11049, |
|
"total_flos": 3178708656685056.0, |
|
"train_loss": 1.8986893418320963, |
|
"train_runtime": 35727.9633, |
|
"train_samples_per_second": 1.237, |
|
"train_steps_per_second": 0.309 |
|
} |
|
], |
|
"max_steps": 11049, |
|
"num_train_epochs": 3, |
|
"total_flos": 3178708656685056.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|