|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"global_step": 12111, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.793576087854017e-05, |
|
"loss": 2.3279, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.5871521757080346e-05, |
|
"loss": 1.6773, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.380728263562051e-05, |
|
"loss": 1.4905, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.174304351416068e-05, |
|
"loss": 1.3552, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.9678804392700854e-05, |
|
"loss": 1.2864, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.761456527124102e-05, |
|
"loss": 1.2008, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.555032614978119e-05, |
|
"loss": 1.1758, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.348608702832136e-05, |
|
"loss": 1.122, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.1421847906861534e-05, |
|
"loss": 1.0693, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 2.9357608785401702e-05, |
|
"loss": 1.037, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 2.729336966394187e-05, |
|
"loss": 1.0164, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.5229130542482042e-05, |
|
"loss": 0.99, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.316489142102221e-05, |
|
"loss": 0.9796, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.1100652299562382e-05, |
|
"loss": 0.9223, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.9036413178102553e-05, |
|
"loss": 0.9296, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.697217405664272e-05, |
|
"loss": 0.8937, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.4907934935182893e-05, |
|
"loss": 0.876, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 1.2843695813723062e-05, |
|
"loss": 0.8717, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.0779456692263231e-05, |
|
"loss": 0.8513, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 8.715217570803403e-06, |
|
"loss": 0.8391, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 6.650978449343573e-06, |
|
"loss": 0.8317, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 4.586739327883742e-06, |
|
"loss": 0.8231, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.522500206423912e-06, |
|
"loss": 0.8137, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 4.5826108496408225e-07, |
|
"loss": 0.8145, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 12111, |
|
"total_flos": 1.1295642461038182e+17, |
|
"train_loss": 1.0889082555776386, |
|
"train_runtime": 7447.3904, |
|
"train_samples_per_second": 16.262, |
|
"train_steps_per_second": 1.626 |
|
} |
|
], |
|
"max_steps": 12111, |
|
"num_train_epochs": 3, |
|
"total_flos": 1.1295642461038182e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|