|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"global_step": 2286, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.890638670166229e-05, |
|
"loss": 0.0389, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.781277340332459e-05, |
|
"loss": 0.0034, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.671916010498688e-05, |
|
"loss": 0.001, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.562554680664917e-05, |
|
"loss": 0.0008, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.453193350831146e-05, |
|
"loss": 0.0002, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.343832020997376e-05, |
|
"loss": 0.0002, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.2344706911636046e-05, |
|
"loss": 0.0015, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.125109361329834e-05, |
|
"loss": 0.0001, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.015748031496063e-05, |
|
"loss": 0.0001, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.906386701662293e-05, |
|
"loss": 0.0002, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.7970253718285216e-05, |
|
"loss": 0.0001, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 3.6876640419947505e-05, |
|
"loss": 0.0004, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.57830271216098e-05, |
|
"loss": 0.0002, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.468941382327209e-05, |
|
"loss": 0.0005, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.3595800524934386e-05, |
|
"loss": 0.0016, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.2502187226596675e-05, |
|
"loss": 0.0, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.1408573928258964e-05, |
|
"loss": 0.0001, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.0314960629921263e-05, |
|
"loss": 0.0007, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.9221347331583556e-05, |
|
"loss": 0.0001, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.8127734033245845e-05, |
|
"loss": 0.0001, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.7034120734908137e-05, |
|
"loss": 0.0007, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.5940507436570433e-05, |
|
"loss": 0.0001, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.4846894138232722e-05, |
|
"loss": 0.0, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.3753280839895015e-05, |
|
"loss": 0.0001, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.2659667541557307e-05, |
|
"loss": 0.0, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.15660542432196e-05, |
|
"loss": 0.0001, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.0472440944881892e-05, |
|
"loss": 0.0001, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9378827646544184e-05, |
|
"loss": 0.0001, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8285214348206477e-05, |
|
"loss": 0.0001, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.7191601049868766e-05, |
|
"loss": 0.0003, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.6097987751531062e-05, |
|
"loss": 0.0, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.500437445319335e-05, |
|
"loss": 0.0001, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.3910761154855645e-05, |
|
"loss": 0.0003, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.2817147856517936e-05, |
|
"loss": 0.0, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.1723534558180228e-05, |
|
"loss": 0.0001, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.062992125984252e-05, |
|
"loss": 0.0001, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.536307961504811e-06, |
|
"loss": 0.0, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 8.442694663167104e-06, |
|
"loss": 0.0, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 7.349081364829396e-06, |
|
"loss": 0.0, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 6.255468066491689e-06, |
|
"loss": 0.0, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.16185476815398e-06, |
|
"loss": 0.0001, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.068241469816273e-06, |
|
"loss": 0.0005, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.9746281714785652e-06, |
|
"loss": 0.0, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.8810148731408575e-06, |
|
"loss": 0.0, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 7.874015748031496e-07, |
|
"loss": 0.0, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 2286, |
|
"total_flos": 1.813953060062208e+16, |
|
"train_loss": 0.00115985169203965, |
|
"train_runtime": 1172.8877, |
|
"train_samples_per_second": 124.724, |
|
"train_steps_per_second": 1.949 |
|
} |
|
], |
|
"max_steps": 2286, |
|
"num_train_epochs": 1, |
|
"total_flos": 1.813953060062208e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|