mt5-counter-narrative-eu / trainer_state.json
ragerri's picture
Upload 12 files
e3265ee verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 49.992,
"global_step": 3100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.61,
"learning_rate": 0.000967741935483871,
"loss": 5.0178,
"step": 100
},
{
"epoch": 3.22,
"learning_rate": 0.0009354838709677419,
"loss": 2.8191,
"step": 200
},
{
"epoch": 4.83,
"learning_rate": 0.0009032258064516129,
"loss": 2.0873,
"step": 300
},
{
"epoch": 6.45,
"learning_rate": 0.0008709677419354839,
"loss": 1.5077,
"step": 400
},
{
"epoch": 8.06,
"learning_rate": 0.0008387096774193549,
"loss": 1.1005,
"step": 500
},
{
"epoch": 9.67,
"learning_rate": 0.0008064516129032258,
"loss": 0.7513,
"step": 600
},
{
"epoch": 11.29,
"learning_rate": 0.0007741935483870968,
"loss": 0.5449,
"step": 700
},
{
"epoch": 12.9,
"learning_rate": 0.0007419354838709678,
"loss": 0.415,
"step": 800
},
{
"epoch": 14.51,
"learning_rate": 0.0007096774193548388,
"loss": 0.3257,
"step": 900
},
{
"epoch": 16.13,
"learning_rate": 0.0006774193548387097,
"loss": 0.2728,
"step": 1000
},
{
"epoch": 17.74,
"learning_rate": 0.0006451612903225806,
"loss": 0.2365,
"step": 1100
},
{
"epoch": 19.35,
"learning_rate": 0.0006129032258064516,
"loss": 0.2159,
"step": 1200
},
{
"epoch": 20.96,
"learning_rate": 0.0005806451612903226,
"loss": 0.1881,
"step": 1300
},
{
"epoch": 22.58,
"learning_rate": 0.0005483870967741935,
"loss": 0.1687,
"step": 1400
},
{
"epoch": 24.19,
"learning_rate": 0.0005161290322580645,
"loss": 0.1611,
"step": 1500
},
{
"epoch": 25.8,
"learning_rate": 0.0004838709677419355,
"loss": 0.1469,
"step": 1600
},
{
"epoch": 27.42,
"learning_rate": 0.00045161290322580643,
"loss": 0.1405,
"step": 1700
},
{
"epoch": 29.03,
"learning_rate": 0.00041935483870967743,
"loss": 0.136,
"step": 1800
},
{
"epoch": 30.64,
"learning_rate": 0.0003870967741935484,
"loss": 0.1266,
"step": 1900
},
{
"epoch": 32.26,
"learning_rate": 0.0003548387096774194,
"loss": 0.1247,
"step": 2000
},
{
"epoch": 32.26,
"eval_bleu": 4.2768,
"eval_gen_len": 18.2295,
"eval_loss": 0.07783249020576477,
"eval_runtime": 151.6677,
"eval_samples_per_second": 13.187,
"eval_steps_per_second": 3.297,
"step": 2000
},
{
"epoch": 33.86,
"learning_rate": 0.0003225806451612903,
"loss": 0.1175,
"step": 2100
},
{
"epoch": 35.48,
"learning_rate": 0.0002903225806451613,
"loss": 0.1128,
"step": 2200
},
{
"epoch": 37.1,
"learning_rate": 0.00025806451612903227,
"loss": 0.1127,
"step": 2300
},
{
"epoch": 38.7,
"learning_rate": 0.00022580645161290321,
"loss": 0.106,
"step": 2400
},
{
"epoch": 40.32,
"learning_rate": 0.0001935483870967742,
"loss": 0.1062,
"step": 2500
},
{
"epoch": 41.93,
"learning_rate": 0.00016129032258064516,
"loss": 0.1026,
"step": 2600
},
{
"epoch": 43.54,
"learning_rate": 0.00012903225806451613,
"loss": 0.0995,
"step": 2700
},
{
"epoch": 45.16,
"learning_rate": 9.67741935483871e-05,
"loss": 0.0985,
"step": 2800
},
{
"epoch": 46.77,
"learning_rate": 6.451612903225807e-05,
"loss": 0.0958,
"step": 2900
},
{
"epoch": 48.38,
"learning_rate": 3.2258064516129034e-05,
"loss": 0.0942,
"step": 3000
},
{
"epoch": 49.99,
"learning_rate": 0.0,
"loss": 0.092,
"step": 3100
},
{
"epoch": 49.99,
"step": 3100,
"total_flos": 2.628606767136768e+16,
"train_loss": 0.5685424493974255,
"train_runtime": 3913.436,
"train_samples_per_second": 25.553,
"train_steps_per_second": 0.792
}
],
"max_steps": 3100,
"num_train_epochs": 50,
"total_flos": 2.628606767136768e+16,
"trial_name": null,
"trial_params": null
}