abletobetable's picture
Training in progress, step 500
e126b28
raw
history blame
25.3 kB
{
"best_metric": 0.3778,
"best_model_checkpoint": "rut5-base-absum-tech-support-calls/checkpoint-2000",
"epoch": 144.44444444444446,
"global_step": 2600,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 2.78,
"learning_rate": 5e-06,
"loss": 2.7022,
"step": 50
},
{
"epoch": 2.78,
"eval_avg_rouge_f": 0.0,
"eval_gen_len": 6.875,
"eval_loss": 2.296959400177002,
"eval_rouge-1": 0.0,
"eval_rouge-2": 0.0,
"eval_rouge-l": 0.0,
"eval_runtime": 3.1977,
"eval_samples_per_second": 2.502,
"eval_steps_per_second": 2.502,
"step": 50
},
{
"epoch": 5.56,
"learning_rate": 1e-05,
"loss": 2.2932,
"step": 100
},
{
"epoch": 5.56,
"eval_avg_rouge_f": 0.0,
"eval_gen_len": 10.375,
"eval_loss": 1.8183443546295166,
"eval_rouge-1": 0.0,
"eval_rouge-2": 0.0,
"eval_rouge-l": 0.0,
"eval_runtime": 1.7212,
"eval_samples_per_second": 4.648,
"eval_steps_per_second": 4.648,
"step": 100
},
{
"epoch": 8.33,
"learning_rate": 1.5000000000000002e-05,
"loss": 1.8234,
"step": 150
},
{
"epoch": 8.33,
"eval_avg_rouge_f": 0.3018,
"eval_gen_len": 14.0,
"eval_loss": 1.4890449047088623,
"eval_rouge-1": 0.3588,
"eval_rouge-2": 0.2205,
"eval_rouge-l": 0.3262,
"eval_runtime": 2.5527,
"eval_samples_per_second": 3.134,
"eval_steps_per_second": 3.134,
"step": 150
},
{
"epoch": 11.11,
"learning_rate": 2e-05,
"loss": 1.3727,
"step": 200
},
{
"epoch": 11.11,
"eval_avg_rouge_f": 0.2771,
"eval_gen_len": 12.375,
"eval_loss": 1.373950719833374,
"eval_rouge-1": 0.3493,
"eval_rouge-2": 0.1653,
"eval_rouge-l": 0.3167,
"eval_runtime": 2.0484,
"eval_samples_per_second": 3.906,
"eval_steps_per_second": 3.906,
"step": 200
},
{
"epoch": 13.89,
"learning_rate": 1.9767441860465116e-05,
"loss": 1.0367,
"step": 250
},
{
"epoch": 13.89,
"eval_avg_rouge_f": 0.1974,
"eval_gen_len": 15.375,
"eval_loss": 1.3832566738128662,
"eval_rouge-1": 0.2607,
"eval_rouge-2": 0.0984,
"eval_rouge-l": 0.2331,
"eval_runtime": 3.0521,
"eval_samples_per_second": 2.621,
"eval_steps_per_second": 2.621,
"step": 250
},
{
"epoch": 16.67,
"learning_rate": 1.9534883720930235e-05,
"loss": 0.841,
"step": 300
},
{
"epoch": 16.67,
"eval_avg_rouge_f": 0.3055,
"eval_gen_len": 16.0,
"eval_loss": 1.3515713214874268,
"eval_rouge-1": 0.3713,
"eval_rouge-2": 0.1857,
"eval_rouge-l": 0.3594,
"eval_runtime": 2.4808,
"eval_samples_per_second": 3.225,
"eval_steps_per_second": 3.225,
"step": 300
},
{
"epoch": 19.44,
"learning_rate": 1.9302325581395353e-05,
"loss": 0.7182,
"step": 350
},
{
"epoch": 19.44,
"eval_avg_rouge_f": 0.2672,
"eval_gen_len": 16.125,
"eval_loss": 1.3606797456741333,
"eval_rouge-1": 0.3352,
"eval_rouge-2": 0.143,
"eval_rouge-l": 0.3233,
"eval_runtime": 2.5469,
"eval_samples_per_second": 3.141,
"eval_steps_per_second": 3.141,
"step": 350
},
{
"epoch": 22.22,
"learning_rate": 1.9069767441860468e-05,
"loss": 0.5102,
"step": 400
},
{
"epoch": 22.22,
"eval_avg_rouge_f": 0.2849,
"eval_gen_len": 16.625,
"eval_loss": 1.3673444986343384,
"eval_rouge-1": 0.36,
"eval_rouge-2": 0.1597,
"eval_rouge-l": 0.3349,
"eval_runtime": 2.6789,
"eval_samples_per_second": 2.986,
"eval_steps_per_second": 2.986,
"step": 400
},
{
"epoch": 25.0,
"learning_rate": 1.8837209302325582e-05,
"loss": 0.4595,
"step": 450
},
{
"epoch": 25.0,
"eval_avg_rouge_f": 0.3228,
"eval_gen_len": 17.125,
"eval_loss": 1.371541976928711,
"eval_rouge-1": 0.3892,
"eval_rouge-2": 0.2153,
"eval_rouge-l": 0.3641,
"eval_runtime": 2.6184,
"eval_samples_per_second": 3.055,
"eval_steps_per_second": 3.055,
"step": 450
},
{
"epoch": 27.78,
"learning_rate": 1.86046511627907e-05,
"loss": 0.3886,
"step": 500
},
{
"epoch": 27.78,
"eval_avg_rouge_f": 0.3252,
"eval_gen_len": 16.375,
"eval_loss": 1.4634039402008057,
"eval_rouge-1": 0.3801,
"eval_rouge-2": 0.2274,
"eval_rouge-l": 0.3682,
"eval_runtime": 2.8205,
"eval_samples_per_second": 2.836,
"eval_steps_per_second": 2.836,
"step": 500
},
{
"epoch": 30.56,
"learning_rate": 1.8372093023255815e-05,
"loss": 0.3158,
"step": 550
},
{
"epoch": 30.56,
"eval_avg_rouge_f": 0.331,
"eval_gen_len": 16.75,
"eval_loss": 1.5123608112335205,
"eval_rouge-1": 0.3938,
"eval_rouge-2": 0.2319,
"eval_rouge-l": 0.3672,
"eval_runtime": 3.0478,
"eval_samples_per_second": 2.625,
"eval_steps_per_second": 2.625,
"step": 550
},
{
"epoch": 33.33,
"learning_rate": 1.813953488372093e-05,
"loss": 0.2687,
"step": 600
},
{
"epoch": 33.33,
"eval_avg_rouge_f": 0.3468,
"eval_gen_len": 16.5,
"eval_loss": 1.5868151187896729,
"eval_rouge-1": 0.3987,
"eval_rouge-2": 0.2568,
"eval_rouge-l": 0.3848,
"eval_runtime": 2.5696,
"eval_samples_per_second": 3.113,
"eval_steps_per_second": 3.113,
"step": 600
},
{
"epoch": 36.11,
"learning_rate": 1.790697674418605e-05,
"loss": 0.2361,
"step": 650
},
{
"epoch": 36.11,
"eval_avg_rouge_f": 0.3163,
"eval_gen_len": 17.75,
"eval_loss": 1.6459990739822388,
"eval_rouge-1": 0.375,
"eval_rouge-2": 0.2107,
"eval_rouge-l": 0.3631,
"eval_runtime": 2.8706,
"eval_samples_per_second": 2.787,
"eval_steps_per_second": 2.787,
"step": 650
},
{
"epoch": 38.89,
"learning_rate": 1.7674418604651163e-05,
"loss": 0.1991,
"step": 700
},
{
"epoch": 38.89,
"eval_avg_rouge_f": 0.3085,
"eval_gen_len": 16.25,
"eval_loss": 1.6946561336517334,
"eval_rouge-1": 0.3605,
"eval_rouge-2": 0.2177,
"eval_rouge-l": 0.3474,
"eval_runtime": 2.519,
"eval_samples_per_second": 3.176,
"eval_steps_per_second": 3.176,
"step": 700
},
{
"epoch": 41.67,
"learning_rate": 1.744186046511628e-05,
"loss": 0.151,
"step": 750
},
{
"epoch": 41.67,
"eval_avg_rouge_f": 0.3222,
"eval_gen_len": 16.5,
"eval_loss": 1.8248298168182373,
"eval_rouge-1": 0.3832,
"eval_rouge-2": 0.2274,
"eval_rouge-l": 0.3559,
"eval_runtime": 2.6923,
"eval_samples_per_second": 2.971,
"eval_steps_per_second": 2.971,
"step": 750
},
{
"epoch": 44.44,
"learning_rate": 1.7209302325581396e-05,
"loss": 0.1517,
"step": 800
},
{
"epoch": 44.44,
"eval_avg_rouge_f": 0.3811,
"eval_gen_len": 16.875,
"eval_loss": 1.7883902788162231,
"eval_rouge-1": 0.4309,
"eval_rouge-2": 0.294,
"eval_rouge-l": 0.4184,
"eval_runtime": 2.5559,
"eval_samples_per_second": 3.13,
"eval_steps_per_second": 3.13,
"step": 800
},
{
"epoch": 47.22,
"learning_rate": 1.697674418604651e-05,
"loss": 0.1444,
"step": 850
},
{
"epoch": 47.22,
"eval_avg_rouge_f": 0.322,
"eval_gen_len": 17.125,
"eval_loss": 1.8518762588500977,
"eval_rouge-1": 0.3843,
"eval_rouge-2": 0.2107,
"eval_rouge-l": 0.3711,
"eval_runtime": 2.7195,
"eval_samples_per_second": 2.942,
"eval_steps_per_second": 2.942,
"step": 850
},
{
"epoch": 50.0,
"learning_rate": 1.674418604651163e-05,
"loss": 0.1106,
"step": 900
},
{
"epoch": 50.0,
"eval_avg_rouge_f": 0.3209,
"eval_gen_len": 17.5,
"eval_loss": 1.9637408256530762,
"eval_rouge-1": 0.383,
"eval_rouge-2": 0.2107,
"eval_rouge-l": 0.3691,
"eval_runtime": 2.6625,
"eval_samples_per_second": 3.005,
"eval_steps_per_second": 3.005,
"step": 900
},
{
"epoch": 52.78,
"learning_rate": 1.6511627906976747e-05,
"loss": 0.0961,
"step": 950
},
{
"epoch": 52.78,
"eval_avg_rouge_f": 0.3103,
"eval_gen_len": 16.75,
"eval_loss": 2.07181715965271,
"eval_rouge-1": 0.3645,
"eval_rouge-2": 0.2177,
"eval_rouge-l": 0.3488,
"eval_runtime": 2.7383,
"eval_samples_per_second": 2.921,
"eval_steps_per_second": 2.921,
"step": 950
},
{
"epoch": 55.56,
"learning_rate": 1.6279069767441862e-05,
"loss": 0.1131,
"step": 1000
},
{
"epoch": 55.56,
"eval_avg_rouge_f": 0.3067,
"eval_gen_len": 16.75,
"eval_loss": 1.9934816360473633,
"eval_rouge-1": 0.3602,
"eval_rouge-2": 0.2153,
"eval_rouge-l": 0.3446,
"eval_runtime": 2.6295,
"eval_samples_per_second": 3.042,
"eval_steps_per_second": 3.042,
"step": 1000
},
{
"epoch": 58.33,
"learning_rate": 1.6046511627906977e-05,
"loss": 0.0996,
"step": 1050
},
{
"epoch": 58.33,
"eval_avg_rouge_f": 0.3712,
"eval_gen_len": 16.0,
"eval_loss": 2.06162166595459,
"eval_rouge-1": 0.4153,
"eval_rouge-2": 0.2986,
"eval_rouge-l": 0.3996,
"eval_runtime": 3.0388,
"eval_samples_per_second": 2.633,
"eval_steps_per_second": 2.633,
"step": 1050
},
{
"epoch": 61.11,
"learning_rate": 1.5813953488372095e-05,
"loss": 0.0663,
"step": 1100
},
{
"epoch": 61.11,
"eval_avg_rouge_f": 0.3786,
"eval_gen_len": 14.625,
"eval_loss": 2.1466333866119385,
"eval_rouge-1": 0.4257,
"eval_rouge-2": 0.301,
"eval_rouge-l": 0.409,
"eval_runtime": 3.1902,
"eval_samples_per_second": 2.508,
"eval_steps_per_second": 2.508,
"step": 1100
},
{
"epoch": 63.89,
"learning_rate": 1.558139534883721e-05,
"loss": 0.0789,
"step": 1150
},
{
"epoch": 63.89,
"eval_avg_rouge_f": 0.3728,
"eval_gen_len": 16.0,
"eval_loss": 2.1657214164733887,
"eval_rouge-1": 0.4166,
"eval_rouge-2": 0.301,
"eval_rouge-l": 0.4009,
"eval_runtime": 2.4781,
"eval_samples_per_second": 3.228,
"eval_steps_per_second": 3.228,
"step": 1150
},
{
"epoch": 66.67,
"learning_rate": 1.5348837209302328e-05,
"loss": 0.073,
"step": 1200
},
{
"epoch": 66.67,
"eval_avg_rouge_f": 0.3713,
"eval_gen_len": 16.25,
"eval_loss": 2.251979351043701,
"eval_rouge-1": 0.4131,
"eval_rouge-2": 0.301,
"eval_rouge-l": 0.3999,
"eval_runtime": 3.4503,
"eval_samples_per_second": 2.319,
"eval_steps_per_second": 2.319,
"step": 1200
},
{
"epoch": 69.44,
"learning_rate": 1.5116279069767443e-05,
"loss": 0.0739,
"step": 1250
},
{
"epoch": 69.44,
"eval_avg_rouge_f": 0.3051,
"eval_gen_len": 17.0,
"eval_loss": 2.260244369506836,
"eval_rouge-1": 0.3582,
"eval_rouge-2": 0.2145,
"eval_rouge-l": 0.3426,
"eval_runtime": 2.5948,
"eval_samples_per_second": 3.083,
"eval_steps_per_second": 3.083,
"step": 1250
},
{
"epoch": 72.22,
"learning_rate": 1.488372093023256e-05,
"loss": 0.0799,
"step": 1300
},
{
"epoch": 72.22,
"eval_avg_rouge_f": 0.3156,
"eval_gen_len": 16.75,
"eval_loss": 2.3278074264526367,
"eval_rouge-1": 0.369,
"eval_rouge-2": 0.2242,
"eval_rouge-l": 0.3534,
"eval_runtime": 3.3341,
"eval_samples_per_second": 2.399,
"eval_steps_per_second": 2.399,
"step": 1300
},
{
"epoch": 75.0,
"learning_rate": 1.4651162790697674e-05,
"loss": 0.0546,
"step": 1350
},
{
"epoch": 75.0,
"eval_avg_rouge_f": 0.3164,
"eval_gen_len": 16.5,
"eval_loss": 2.402118444442749,
"eval_rouge-1": 0.369,
"eval_rouge-2": 0.2242,
"eval_rouge-l": 0.3559,
"eval_runtime": 2.5497,
"eval_samples_per_second": 3.138,
"eval_steps_per_second": 3.138,
"step": 1350
},
{
"epoch": 77.78,
"learning_rate": 1.441860465116279e-05,
"loss": 0.0674,
"step": 1400
},
{
"epoch": 77.78,
"eval_avg_rouge_f": 0.3697,
"eval_gen_len": 17.25,
"eval_loss": 2.3492679595947266,
"eval_rouge-1": 0.4149,
"eval_rouge-2": 0.2924,
"eval_rouge-l": 0.4017,
"eval_runtime": 3.1613,
"eval_samples_per_second": 2.531,
"eval_steps_per_second": 2.531,
"step": 1400
},
{
"epoch": 80.56,
"learning_rate": 1.4186046511627909e-05,
"loss": 0.0459,
"step": 1450
},
{
"epoch": 80.56,
"eval_avg_rouge_f": 0.3839,
"eval_gen_len": 16.125,
"eval_loss": 2.3503048419952393,
"eval_rouge-1": 0.426,
"eval_rouge-2": 0.3153,
"eval_rouge-l": 0.4104,
"eval_runtime": 2.4756,
"eval_samples_per_second": 3.232,
"eval_steps_per_second": 3.232,
"step": 1450
},
{
"epoch": 83.33,
"learning_rate": 1.3953488372093025e-05,
"loss": 0.0501,
"step": 1500
},
{
"epoch": 83.33,
"eval_avg_rouge_f": 0.3732,
"eval_gen_len": 15.375,
"eval_loss": 2.371870517730713,
"eval_rouge-1": 0.4172,
"eval_rouge-2": 0.301,
"eval_rouge-l": 0.4016,
"eval_runtime": 2.8658,
"eval_samples_per_second": 2.792,
"eval_steps_per_second": 2.792,
"step": 1500
},
{
"epoch": 86.11,
"learning_rate": 1.372093023255814e-05,
"loss": 0.0509,
"step": 1550
},
{
"epoch": 86.11,
"eval_avg_rouge_f": 0.3926,
"eval_gen_len": 16.375,
"eval_loss": 2.4419479370117188,
"eval_rouge-1": 0.4361,
"eval_rouge-2": 0.3188,
"eval_rouge-l": 0.4229,
"eval_runtime": 3.0315,
"eval_samples_per_second": 2.639,
"eval_steps_per_second": 2.639,
"step": 1550
},
{
"epoch": 88.89,
"learning_rate": 1.3488372093023257e-05,
"loss": 0.0449,
"step": 1600
},
{
"epoch": 88.89,
"eval_avg_rouge_f": 0.4026,
"eval_gen_len": 16.375,
"eval_loss": 2.3171658515930176,
"eval_rouge-1": 0.4514,
"eval_rouge-2": 0.3188,
"eval_rouge-l": 0.4375,
"eval_runtime": 3.636,
"eval_samples_per_second": 2.2,
"eval_steps_per_second": 2.2,
"step": 1600
},
{
"epoch": 91.67,
"learning_rate": 1.3255813953488372e-05,
"loss": 0.0408,
"step": 1650
},
{
"epoch": 91.67,
"eval_avg_rouge_f": 0.3906,
"eval_gen_len": 16.25,
"eval_loss": 2.4437549114227295,
"eval_rouge-1": 0.4349,
"eval_rouge-2": 0.3153,
"eval_rouge-l": 0.4217,
"eval_runtime": 2.4939,
"eval_samples_per_second": 3.208,
"eval_steps_per_second": 3.208,
"step": 1650
},
{
"epoch": 94.44,
"learning_rate": 1.302325581395349e-05,
"loss": 0.0357,
"step": 1700
},
{
"epoch": 94.44,
"eval_avg_rouge_f": 0.3831,
"eval_gen_len": 16.25,
"eval_loss": 2.540635108947754,
"eval_rouge-1": 0.4236,
"eval_rouge-2": 0.3153,
"eval_rouge-l": 0.4104,
"eval_runtime": 3.5767,
"eval_samples_per_second": 2.237,
"eval_steps_per_second": 2.237,
"step": 1700
},
{
"epoch": 97.22,
"learning_rate": 1.2790697674418606e-05,
"loss": 0.0403,
"step": 1750
},
{
"epoch": 97.22,
"eval_avg_rouge_f": 0.3748,
"eval_gen_len": 16.375,
"eval_loss": 2.4441065788269043,
"eval_rouge-1": 0.4111,
"eval_rouge-2": 0.3153,
"eval_rouge-l": 0.398,
"eval_runtime": 2.5203,
"eval_samples_per_second": 3.174,
"eval_steps_per_second": 3.174,
"step": 1750
},
{
"epoch": 100.0,
"learning_rate": 1.2558139534883723e-05,
"loss": 0.0489,
"step": 1800
},
{
"epoch": 100.0,
"eval_avg_rouge_f": 0.3768,
"eval_gen_len": 16.125,
"eval_loss": 2.459872245788574,
"eval_rouge-1": 0.4154,
"eval_rouge-2": 0.3153,
"eval_rouge-l": 0.3997,
"eval_runtime": 3.4884,
"eval_samples_per_second": 2.293,
"eval_steps_per_second": 2.293,
"step": 1800
},
{
"epoch": 102.78,
"learning_rate": 1.2325581395348838e-05,
"loss": 0.032,
"step": 1850
},
{
"epoch": 102.78,
"eval_avg_rouge_f": 0.407,
"eval_gen_len": 15.0,
"eval_loss": 2.623534679412842,
"eval_rouge-1": 0.4515,
"eval_rouge-2": 0.3335,
"eval_rouge-l": 0.4359,
"eval_runtime": 2.3389,
"eval_samples_per_second": 3.42,
"eval_steps_per_second": 3.42,
"step": 1850
},
{
"epoch": 105.56,
"learning_rate": 1.2093023255813954e-05,
"loss": 0.0379,
"step": 1900
},
{
"epoch": 105.56,
"eval_avg_rouge_f": 0.407,
"eval_gen_len": 15.125,
"eval_loss": 2.6058127880096436,
"eval_rouge-1": 0.4515,
"eval_rouge-2": 0.3335,
"eval_rouge-l": 0.4359,
"eval_runtime": 3.2938,
"eval_samples_per_second": 2.429,
"eval_steps_per_second": 2.429,
"step": 1900
},
{
"epoch": 108.33,
"learning_rate": 1.1860465116279072e-05,
"loss": 0.0466,
"step": 1950
},
{
"epoch": 108.33,
"eval_avg_rouge_f": 0.3768,
"eval_gen_len": 16.125,
"eval_loss": 2.5748019218444824,
"eval_rouge-1": 0.4154,
"eval_rouge-2": 0.3153,
"eval_rouge-l": 0.3997,
"eval_runtime": 2.4802,
"eval_samples_per_second": 3.226,
"eval_steps_per_second": 3.226,
"step": 1950
},
{
"epoch": 111.11,
"learning_rate": 1.1627906976744187e-05,
"loss": 0.0317,
"step": 2000
},
{
"epoch": 111.11,
"eval_avg_rouge_f": 0.3778,
"eval_gen_len": 16.125,
"eval_loss": 2.663809299468994,
"eval_rouge-1": 0.4169,
"eval_rouge-2": 0.3153,
"eval_rouge-l": 0.4013,
"eval_runtime": 3.2301,
"eval_samples_per_second": 2.477,
"eval_steps_per_second": 2.477,
"step": 2000
},
{
"epoch": 113.89,
"learning_rate": 1.1395348837209304e-05,
"loss": 0.0234,
"step": 2050
},
{
"epoch": 113.89,
"eval_avg_rouge_f": 0.3888,
"eval_gen_len": 15.5,
"eval_loss": 2.740657091140747,
"eval_rouge-1": 0.4334,
"eval_rouge-2": 0.3153,
"eval_rouge-l": 0.4178,
"eval_runtime": 4.1851,
"eval_samples_per_second": 1.912,
"eval_steps_per_second": 1.912,
"step": 2050
},
{
"epoch": 116.67,
"learning_rate": 1.116279069767442e-05,
"loss": 0.0308,
"step": 2100
},
{
"epoch": 116.67,
"eval_avg_rouge_f": 0.3799,
"eval_gen_len": 16.125,
"eval_loss": 2.70857572555542,
"eval_rouge-1": 0.4201,
"eval_rouge-2": 0.3153,
"eval_rouge-l": 0.4044,
"eval_runtime": 2.5313,
"eval_samples_per_second": 3.16,
"eval_steps_per_second": 3.16,
"step": 2100
},
{
"epoch": 119.44,
"learning_rate": 1.0930232558139535e-05,
"loss": 0.0305,
"step": 2150
},
{
"epoch": 119.44,
"eval_avg_rouge_f": 0.3598,
"eval_gen_len": 15.5,
"eval_loss": 2.7068228721618652,
"eval_rouge-1": 0.4059,
"eval_rouge-2": 0.2831,
"eval_rouge-l": 0.3902,
"eval_runtime": 2.7634,
"eval_samples_per_second": 2.895,
"eval_steps_per_second": 2.895,
"step": 2150
},
{
"epoch": 122.22,
"learning_rate": 1.0697674418604651e-05,
"loss": 0.0289,
"step": 2200
},
{
"epoch": 122.22,
"eval_avg_rouge_f": 0.3598,
"eval_gen_len": 15.5,
"eval_loss": 2.8503403663635254,
"eval_rouge-1": 0.4059,
"eval_rouge-2": 0.2831,
"eval_rouge-l": 0.3902,
"eval_runtime": 2.4338,
"eval_samples_per_second": 3.287,
"eval_steps_per_second": 3.287,
"step": 2200
},
{
"epoch": 125.0,
"learning_rate": 1.046511627906977e-05,
"loss": 0.0555,
"step": 2250
},
{
"epoch": 125.0,
"eval_avg_rouge_f": 0.3598,
"eval_gen_len": 15.5,
"eval_loss": 2.8522231578826904,
"eval_rouge-1": 0.4059,
"eval_rouge-2": 0.2831,
"eval_rouge-l": 0.3902,
"eval_runtime": 2.8814,
"eval_samples_per_second": 2.776,
"eval_steps_per_second": 2.776,
"step": 2250
},
{
"epoch": 127.78,
"learning_rate": 1.0232558139534884e-05,
"loss": 0.022,
"step": 2300
},
{
"epoch": 127.78,
"eval_avg_rouge_f": 0.3598,
"eval_gen_len": 15.5,
"eval_loss": 2.9057185649871826,
"eval_rouge-1": 0.4059,
"eval_rouge-2": 0.2831,
"eval_rouge-l": 0.3902,
"eval_runtime": 2.43,
"eval_samples_per_second": 3.292,
"eval_steps_per_second": 3.292,
"step": 2300
},
{
"epoch": 130.56,
"learning_rate": 1e-05,
"loss": 0.0369,
"step": 2350
},
{
"epoch": 130.56,
"eval_avg_rouge_f": 0.3598,
"eval_gen_len": 15.5,
"eval_loss": 2.8735642433166504,
"eval_rouge-1": 0.4059,
"eval_rouge-2": 0.2831,
"eval_rouge-l": 0.3902,
"eval_runtime": 3.0784,
"eval_samples_per_second": 2.599,
"eval_steps_per_second": 2.599,
"step": 2350
},
{
"epoch": 133.33,
"learning_rate": 9.767441860465117e-06,
"loss": 0.0195,
"step": 2400
},
{
"epoch": 133.33,
"eval_avg_rouge_f": 0.3598,
"eval_gen_len": 15.5,
"eval_loss": 2.7636728286743164,
"eval_rouge-1": 0.4059,
"eval_rouge-2": 0.2831,
"eval_rouge-l": 0.3902,
"eval_runtime": 2.4413,
"eval_samples_per_second": 3.277,
"eval_steps_per_second": 3.277,
"step": 2400
},
{
"epoch": 136.11,
"learning_rate": 9.534883720930234e-06,
"loss": 0.0387,
"step": 2450
},
{
"epoch": 136.11,
"eval_avg_rouge_f": 0.3598,
"eval_gen_len": 15.5,
"eval_loss": 2.743685007095337,
"eval_rouge-1": 0.4059,
"eval_rouge-2": 0.2831,
"eval_rouge-l": 0.3902,
"eval_runtime": 2.9809,
"eval_samples_per_second": 2.684,
"eval_steps_per_second": 2.684,
"step": 2450
},
{
"epoch": 138.89,
"learning_rate": 9.30232558139535e-06,
"loss": 0.0298,
"step": 2500
},
{
"epoch": 138.89,
"eval_avg_rouge_f": 0.3443,
"eval_gen_len": 16.25,
"eval_loss": 2.8817646503448486,
"eval_rouge-1": 0.391,
"eval_rouge-2": 0.2665,
"eval_rouge-l": 0.3754,
"eval_runtime": 2.5341,
"eval_samples_per_second": 3.157,
"eval_steps_per_second": 3.157,
"step": 2500
},
{
"epoch": 141.67,
"learning_rate": 9.069767441860465e-06,
"loss": 0.0265,
"step": 2550
},
{
"epoch": 141.67,
"eval_avg_rouge_f": 0.3353,
"eval_gen_len": 16.5,
"eval_loss": 2.834005355834961,
"eval_rouge-1": 0.3776,
"eval_rouge-2": 0.2665,
"eval_rouge-l": 0.362,
"eval_runtime": 3.3656,
"eval_samples_per_second": 2.377,
"eval_steps_per_second": 2.377,
"step": 2550
},
{
"epoch": 144.44,
"learning_rate": 8.837209302325582e-06,
"loss": 0.0182,
"step": 2600
},
{
"epoch": 144.44,
"eval_avg_rouge_f": 0.3598,
"eval_gen_len": 15.5,
"eval_loss": 2.873906135559082,
"eval_rouge-1": 0.4059,
"eval_rouge-2": 0.2831,
"eval_rouge-l": 0.3902,
"eval_runtime": 2.4328,
"eval_samples_per_second": 3.288,
"eval_steps_per_second": 3.288,
"step": 2600
},
{
"epoch": 144.44,
"step": 2600,
"total_flos": 3409770731258880.0,
"train_loss": 0.29600492647060983,
"train_runtime": 1522.0987,
"train_samples_per_second": 8.869,
"train_steps_per_second": 2.956
}
],
"max_steps": 4500,
"num_train_epochs": 250,
"total_flos": 3409770731258880.0,
"trial_name": null,
"trial_params": null
}