|
{ |
|
"best_metric": 0.3778, |
|
"best_model_checkpoint": "rut5-base-absum-tech-support-calls/checkpoint-2000", |
|
"epoch": 144.44444444444446, |
|
"global_step": 2600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 5e-06, |
|
"loss": 2.7022, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"eval_avg_rouge_f": 0.0, |
|
"eval_gen_len": 6.875, |
|
"eval_loss": 2.296959400177002, |
|
"eval_rouge-1": 0.0, |
|
"eval_rouge-2": 0.0, |
|
"eval_rouge-l": 0.0, |
|
"eval_runtime": 3.1977, |
|
"eval_samples_per_second": 2.502, |
|
"eval_steps_per_second": 2.502, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2932, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"eval_avg_rouge_f": 0.0, |
|
"eval_gen_len": 10.375, |
|
"eval_loss": 1.8183443546295166, |
|
"eval_rouge-1": 0.0, |
|
"eval_rouge-2": 0.0, |
|
"eval_rouge-l": 0.0, |
|
"eval_runtime": 1.7212, |
|
"eval_samples_per_second": 4.648, |
|
"eval_steps_per_second": 4.648, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"learning_rate": 1.5000000000000002e-05, |
|
"loss": 1.8234, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"eval_avg_rouge_f": 0.3018, |
|
"eval_gen_len": 14.0, |
|
"eval_loss": 1.4890449047088623, |
|
"eval_rouge-1": 0.3588, |
|
"eval_rouge-2": 0.2205, |
|
"eval_rouge-l": 0.3262, |
|
"eval_runtime": 2.5527, |
|
"eval_samples_per_second": 3.134, |
|
"eval_steps_per_second": 3.134, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 11.11, |
|
"learning_rate": 2e-05, |
|
"loss": 1.3727, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 11.11, |
|
"eval_avg_rouge_f": 0.2771, |
|
"eval_gen_len": 12.375, |
|
"eval_loss": 1.373950719833374, |
|
"eval_rouge-1": 0.3493, |
|
"eval_rouge-2": 0.1653, |
|
"eval_rouge-l": 0.3167, |
|
"eval_runtime": 2.0484, |
|
"eval_samples_per_second": 3.906, |
|
"eval_steps_per_second": 3.906, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 13.89, |
|
"learning_rate": 1.9767441860465116e-05, |
|
"loss": 1.0367, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 13.89, |
|
"eval_avg_rouge_f": 0.1974, |
|
"eval_gen_len": 15.375, |
|
"eval_loss": 1.3832566738128662, |
|
"eval_rouge-1": 0.2607, |
|
"eval_rouge-2": 0.0984, |
|
"eval_rouge-l": 0.2331, |
|
"eval_runtime": 3.0521, |
|
"eval_samples_per_second": 2.621, |
|
"eval_steps_per_second": 2.621, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 16.67, |
|
"learning_rate": 1.9534883720930235e-05, |
|
"loss": 0.841, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 16.67, |
|
"eval_avg_rouge_f": 0.3055, |
|
"eval_gen_len": 16.0, |
|
"eval_loss": 1.3515713214874268, |
|
"eval_rouge-1": 0.3713, |
|
"eval_rouge-2": 0.1857, |
|
"eval_rouge-l": 0.3594, |
|
"eval_runtime": 2.4808, |
|
"eval_samples_per_second": 3.225, |
|
"eval_steps_per_second": 3.225, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 19.44, |
|
"learning_rate": 1.9302325581395353e-05, |
|
"loss": 0.7182, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 19.44, |
|
"eval_avg_rouge_f": 0.2672, |
|
"eval_gen_len": 16.125, |
|
"eval_loss": 1.3606797456741333, |
|
"eval_rouge-1": 0.3352, |
|
"eval_rouge-2": 0.143, |
|
"eval_rouge-l": 0.3233, |
|
"eval_runtime": 2.5469, |
|
"eval_samples_per_second": 3.141, |
|
"eval_steps_per_second": 3.141, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 22.22, |
|
"learning_rate": 1.9069767441860468e-05, |
|
"loss": 0.5102, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 22.22, |
|
"eval_avg_rouge_f": 0.2849, |
|
"eval_gen_len": 16.625, |
|
"eval_loss": 1.3673444986343384, |
|
"eval_rouge-1": 0.36, |
|
"eval_rouge-2": 0.1597, |
|
"eval_rouge-l": 0.3349, |
|
"eval_runtime": 2.6789, |
|
"eval_samples_per_second": 2.986, |
|
"eval_steps_per_second": 2.986, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 1.8837209302325582e-05, |
|
"loss": 0.4595, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_avg_rouge_f": 0.3228, |
|
"eval_gen_len": 17.125, |
|
"eval_loss": 1.371541976928711, |
|
"eval_rouge-1": 0.3892, |
|
"eval_rouge-2": 0.2153, |
|
"eval_rouge-l": 0.3641, |
|
"eval_runtime": 2.6184, |
|
"eval_samples_per_second": 3.055, |
|
"eval_steps_per_second": 3.055, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 27.78, |
|
"learning_rate": 1.86046511627907e-05, |
|
"loss": 0.3886, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 27.78, |
|
"eval_avg_rouge_f": 0.3252, |
|
"eval_gen_len": 16.375, |
|
"eval_loss": 1.4634039402008057, |
|
"eval_rouge-1": 0.3801, |
|
"eval_rouge-2": 0.2274, |
|
"eval_rouge-l": 0.3682, |
|
"eval_runtime": 2.8205, |
|
"eval_samples_per_second": 2.836, |
|
"eval_steps_per_second": 2.836, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 30.56, |
|
"learning_rate": 1.8372093023255815e-05, |
|
"loss": 0.3158, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 30.56, |
|
"eval_avg_rouge_f": 0.331, |
|
"eval_gen_len": 16.75, |
|
"eval_loss": 1.5123608112335205, |
|
"eval_rouge-1": 0.3938, |
|
"eval_rouge-2": 0.2319, |
|
"eval_rouge-l": 0.3672, |
|
"eval_runtime": 3.0478, |
|
"eval_samples_per_second": 2.625, |
|
"eval_steps_per_second": 2.625, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 33.33, |
|
"learning_rate": 1.813953488372093e-05, |
|
"loss": 0.2687, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 33.33, |
|
"eval_avg_rouge_f": 0.3468, |
|
"eval_gen_len": 16.5, |
|
"eval_loss": 1.5868151187896729, |
|
"eval_rouge-1": 0.3987, |
|
"eval_rouge-2": 0.2568, |
|
"eval_rouge-l": 0.3848, |
|
"eval_runtime": 2.5696, |
|
"eval_samples_per_second": 3.113, |
|
"eval_steps_per_second": 3.113, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 36.11, |
|
"learning_rate": 1.790697674418605e-05, |
|
"loss": 0.2361, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 36.11, |
|
"eval_avg_rouge_f": 0.3163, |
|
"eval_gen_len": 17.75, |
|
"eval_loss": 1.6459990739822388, |
|
"eval_rouge-1": 0.375, |
|
"eval_rouge-2": 0.2107, |
|
"eval_rouge-l": 0.3631, |
|
"eval_runtime": 2.8706, |
|
"eval_samples_per_second": 2.787, |
|
"eval_steps_per_second": 2.787, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 38.89, |
|
"learning_rate": 1.7674418604651163e-05, |
|
"loss": 0.1991, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 38.89, |
|
"eval_avg_rouge_f": 0.3085, |
|
"eval_gen_len": 16.25, |
|
"eval_loss": 1.6946561336517334, |
|
"eval_rouge-1": 0.3605, |
|
"eval_rouge-2": 0.2177, |
|
"eval_rouge-l": 0.3474, |
|
"eval_runtime": 2.519, |
|
"eval_samples_per_second": 3.176, |
|
"eval_steps_per_second": 3.176, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 41.67, |
|
"learning_rate": 1.744186046511628e-05, |
|
"loss": 0.151, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 41.67, |
|
"eval_avg_rouge_f": 0.3222, |
|
"eval_gen_len": 16.5, |
|
"eval_loss": 1.8248298168182373, |
|
"eval_rouge-1": 0.3832, |
|
"eval_rouge-2": 0.2274, |
|
"eval_rouge-l": 0.3559, |
|
"eval_runtime": 2.6923, |
|
"eval_samples_per_second": 2.971, |
|
"eval_steps_per_second": 2.971, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 44.44, |
|
"learning_rate": 1.7209302325581396e-05, |
|
"loss": 0.1517, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 44.44, |
|
"eval_avg_rouge_f": 0.3811, |
|
"eval_gen_len": 16.875, |
|
"eval_loss": 1.7883902788162231, |
|
"eval_rouge-1": 0.4309, |
|
"eval_rouge-2": 0.294, |
|
"eval_rouge-l": 0.4184, |
|
"eval_runtime": 2.5559, |
|
"eval_samples_per_second": 3.13, |
|
"eval_steps_per_second": 3.13, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 47.22, |
|
"learning_rate": 1.697674418604651e-05, |
|
"loss": 0.1444, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 47.22, |
|
"eval_avg_rouge_f": 0.322, |
|
"eval_gen_len": 17.125, |
|
"eval_loss": 1.8518762588500977, |
|
"eval_rouge-1": 0.3843, |
|
"eval_rouge-2": 0.2107, |
|
"eval_rouge-l": 0.3711, |
|
"eval_runtime": 2.7195, |
|
"eval_samples_per_second": 2.942, |
|
"eval_steps_per_second": 2.942, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 1.674418604651163e-05, |
|
"loss": 0.1106, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_avg_rouge_f": 0.3209, |
|
"eval_gen_len": 17.5, |
|
"eval_loss": 1.9637408256530762, |
|
"eval_rouge-1": 0.383, |
|
"eval_rouge-2": 0.2107, |
|
"eval_rouge-l": 0.3691, |
|
"eval_runtime": 2.6625, |
|
"eval_samples_per_second": 3.005, |
|
"eval_steps_per_second": 3.005, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 52.78, |
|
"learning_rate": 1.6511627906976747e-05, |
|
"loss": 0.0961, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 52.78, |
|
"eval_avg_rouge_f": 0.3103, |
|
"eval_gen_len": 16.75, |
|
"eval_loss": 2.07181715965271, |
|
"eval_rouge-1": 0.3645, |
|
"eval_rouge-2": 0.2177, |
|
"eval_rouge-l": 0.3488, |
|
"eval_runtime": 2.7383, |
|
"eval_samples_per_second": 2.921, |
|
"eval_steps_per_second": 2.921, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 55.56, |
|
"learning_rate": 1.6279069767441862e-05, |
|
"loss": 0.1131, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 55.56, |
|
"eval_avg_rouge_f": 0.3067, |
|
"eval_gen_len": 16.75, |
|
"eval_loss": 1.9934816360473633, |
|
"eval_rouge-1": 0.3602, |
|
"eval_rouge-2": 0.2153, |
|
"eval_rouge-l": 0.3446, |
|
"eval_runtime": 2.6295, |
|
"eval_samples_per_second": 3.042, |
|
"eval_steps_per_second": 3.042, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 58.33, |
|
"learning_rate": 1.6046511627906977e-05, |
|
"loss": 0.0996, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 58.33, |
|
"eval_avg_rouge_f": 0.3712, |
|
"eval_gen_len": 16.0, |
|
"eval_loss": 2.06162166595459, |
|
"eval_rouge-1": 0.4153, |
|
"eval_rouge-2": 0.2986, |
|
"eval_rouge-l": 0.3996, |
|
"eval_runtime": 3.0388, |
|
"eval_samples_per_second": 2.633, |
|
"eval_steps_per_second": 2.633, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 61.11, |
|
"learning_rate": 1.5813953488372095e-05, |
|
"loss": 0.0663, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 61.11, |
|
"eval_avg_rouge_f": 0.3786, |
|
"eval_gen_len": 14.625, |
|
"eval_loss": 2.1466333866119385, |
|
"eval_rouge-1": 0.4257, |
|
"eval_rouge-2": 0.301, |
|
"eval_rouge-l": 0.409, |
|
"eval_runtime": 3.1902, |
|
"eval_samples_per_second": 2.508, |
|
"eval_steps_per_second": 2.508, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 63.89, |
|
"learning_rate": 1.558139534883721e-05, |
|
"loss": 0.0789, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 63.89, |
|
"eval_avg_rouge_f": 0.3728, |
|
"eval_gen_len": 16.0, |
|
"eval_loss": 2.1657214164733887, |
|
"eval_rouge-1": 0.4166, |
|
"eval_rouge-2": 0.301, |
|
"eval_rouge-l": 0.4009, |
|
"eval_runtime": 2.4781, |
|
"eval_samples_per_second": 3.228, |
|
"eval_steps_per_second": 3.228, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 66.67, |
|
"learning_rate": 1.5348837209302328e-05, |
|
"loss": 0.073, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 66.67, |
|
"eval_avg_rouge_f": 0.3713, |
|
"eval_gen_len": 16.25, |
|
"eval_loss": 2.251979351043701, |
|
"eval_rouge-1": 0.4131, |
|
"eval_rouge-2": 0.301, |
|
"eval_rouge-l": 0.3999, |
|
"eval_runtime": 3.4503, |
|
"eval_samples_per_second": 2.319, |
|
"eval_steps_per_second": 2.319, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 69.44, |
|
"learning_rate": 1.5116279069767443e-05, |
|
"loss": 0.0739, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 69.44, |
|
"eval_avg_rouge_f": 0.3051, |
|
"eval_gen_len": 17.0, |
|
"eval_loss": 2.260244369506836, |
|
"eval_rouge-1": 0.3582, |
|
"eval_rouge-2": 0.2145, |
|
"eval_rouge-l": 0.3426, |
|
"eval_runtime": 2.5948, |
|
"eval_samples_per_second": 3.083, |
|
"eval_steps_per_second": 3.083, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 72.22, |
|
"learning_rate": 1.488372093023256e-05, |
|
"loss": 0.0799, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 72.22, |
|
"eval_avg_rouge_f": 0.3156, |
|
"eval_gen_len": 16.75, |
|
"eval_loss": 2.3278074264526367, |
|
"eval_rouge-1": 0.369, |
|
"eval_rouge-2": 0.2242, |
|
"eval_rouge-l": 0.3534, |
|
"eval_runtime": 3.3341, |
|
"eval_samples_per_second": 2.399, |
|
"eval_steps_per_second": 2.399, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"learning_rate": 1.4651162790697674e-05, |
|
"loss": 0.0546, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_avg_rouge_f": 0.3164, |
|
"eval_gen_len": 16.5, |
|
"eval_loss": 2.402118444442749, |
|
"eval_rouge-1": 0.369, |
|
"eval_rouge-2": 0.2242, |
|
"eval_rouge-l": 0.3559, |
|
"eval_runtime": 2.5497, |
|
"eval_samples_per_second": 3.138, |
|
"eval_steps_per_second": 3.138, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 77.78, |
|
"learning_rate": 1.441860465116279e-05, |
|
"loss": 0.0674, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 77.78, |
|
"eval_avg_rouge_f": 0.3697, |
|
"eval_gen_len": 17.25, |
|
"eval_loss": 2.3492679595947266, |
|
"eval_rouge-1": 0.4149, |
|
"eval_rouge-2": 0.2924, |
|
"eval_rouge-l": 0.4017, |
|
"eval_runtime": 3.1613, |
|
"eval_samples_per_second": 2.531, |
|
"eval_steps_per_second": 2.531, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 80.56, |
|
"learning_rate": 1.4186046511627909e-05, |
|
"loss": 0.0459, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 80.56, |
|
"eval_avg_rouge_f": 0.3839, |
|
"eval_gen_len": 16.125, |
|
"eval_loss": 2.3503048419952393, |
|
"eval_rouge-1": 0.426, |
|
"eval_rouge-2": 0.3153, |
|
"eval_rouge-l": 0.4104, |
|
"eval_runtime": 2.4756, |
|
"eval_samples_per_second": 3.232, |
|
"eval_steps_per_second": 3.232, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 83.33, |
|
"learning_rate": 1.3953488372093025e-05, |
|
"loss": 0.0501, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 83.33, |
|
"eval_avg_rouge_f": 0.3732, |
|
"eval_gen_len": 15.375, |
|
"eval_loss": 2.371870517730713, |
|
"eval_rouge-1": 0.4172, |
|
"eval_rouge-2": 0.301, |
|
"eval_rouge-l": 0.4016, |
|
"eval_runtime": 2.8658, |
|
"eval_samples_per_second": 2.792, |
|
"eval_steps_per_second": 2.792, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 86.11, |
|
"learning_rate": 1.372093023255814e-05, |
|
"loss": 0.0509, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 86.11, |
|
"eval_avg_rouge_f": 0.3926, |
|
"eval_gen_len": 16.375, |
|
"eval_loss": 2.4419479370117188, |
|
"eval_rouge-1": 0.4361, |
|
"eval_rouge-2": 0.3188, |
|
"eval_rouge-l": 0.4229, |
|
"eval_runtime": 3.0315, |
|
"eval_samples_per_second": 2.639, |
|
"eval_steps_per_second": 2.639, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 88.89, |
|
"learning_rate": 1.3488372093023257e-05, |
|
"loss": 0.0449, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 88.89, |
|
"eval_avg_rouge_f": 0.4026, |
|
"eval_gen_len": 16.375, |
|
"eval_loss": 2.3171658515930176, |
|
"eval_rouge-1": 0.4514, |
|
"eval_rouge-2": 0.3188, |
|
"eval_rouge-l": 0.4375, |
|
"eval_runtime": 3.636, |
|
"eval_samples_per_second": 2.2, |
|
"eval_steps_per_second": 2.2, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 91.67, |
|
"learning_rate": 1.3255813953488372e-05, |
|
"loss": 0.0408, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 91.67, |
|
"eval_avg_rouge_f": 0.3906, |
|
"eval_gen_len": 16.25, |
|
"eval_loss": 2.4437549114227295, |
|
"eval_rouge-1": 0.4349, |
|
"eval_rouge-2": 0.3153, |
|
"eval_rouge-l": 0.4217, |
|
"eval_runtime": 2.4939, |
|
"eval_samples_per_second": 3.208, |
|
"eval_steps_per_second": 3.208, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 94.44, |
|
"learning_rate": 1.302325581395349e-05, |
|
"loss": 0.0357, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 94.44, |
|
"eval_avg_rouge_f": 0.3831, |
|
"eval_gen_len": 16.25, |
|
"eval_loss": 2.540635108947754, |
|
"eval_rouge-1": 0.4236, |
|
"eval_rouge-2": 0.3153, |
|
"eval_rouge-l": 0.4104, |
|
"eval_runtime": 3.5767, |
|
"eval_samples_per_second": 2.237, |
|
"eval_steps_per_second": 2.237, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 97.22, |
|
"learning_rate": 1.2790697674418606e-05, |
|
"loss": 0.0403, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 97.22, |
|
"eval_avg_rouge_f": 0.3748, |
|
"eval_gen_len": 16.375, |
|
"eval_loss": 2.4441065788269043, |
|
"eval_rouge-1": 0.4111, |
|
"eval_rouge-2": 0.3153, |
|
"eval_rouge-l": 0.398, |
|
"eval_runtime": 2.5203, |
|
"eval_samples_per_second": 3.174, |
|
"eval_steps_per_second": 3.174, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"learning_rate": 1.2558139534883723e-05, |
|
"loss": 0.0489, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_avg_rouge_f": 0.3768, |
|
"eval_gen_len": 16.125, |
|
"eval_loss": 2.459872245788574, |
|
"eval_rouge-1": 0.4154, |
|
"eval_rouge-2": 0.3153, |
|
"eval_rouge-l": 0.3997, |
|
"eval_runtime": 3.4884, |
|
"eval_samples_per_second": 2.293, |
|
"eval_steps_per_second": 2.293, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 102.78, |
|
"learning_rate": 1.2325581395348838e-05, |
|
"loss": 0.032, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 102.78, |
|
"eval_avg_rouge_f": 0.407, |
|
"eval_gen_len": 15.0, |
|
"eval_loss": 2.623534679412842, |
|
"eval_rouge-1": 0.4515, |
|
"eval_rouge-2": 0.3335, |
|
"eval_rouge-l": 0.4359, |
|
"eval_runtime": 2.3389, |
|
"eval_samples_per_second": 3.42, |
|
"eval_steps_per_second": 3.42, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 105.56, |
|
"learning_rate": 1.2093023255813954e-05, |
|
"loss": 0.0379, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 105.56, |
|
"eval_avg_rouge_f": 0.407, |
|
"eval_gen_len": 15.125, |
|
"eval_loss": 2.6058127880096436, |
|
"eval_rouge-1": 0.4515, |
|
"eval_rouge-2": 0.3335, |
|
"eval_rouge-l": 0.4359, |
|
"eval_runtime": 3.2938, |
|
"eval_samples_per_second": 2.429, |
|
"eval_steps_per_second": 2.429, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 108.33, |
|
"learning_rate": 1.1860465116279072e-05, |
|
"loss": 0.0466, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 108.33, |
|
"eval_avg_rouge_f": 0.3768, |
|
"eval_gen_len": 16.125, |
|
"eval_loss": 2.5748019218444824, |
|
"eval_rouge-1": 0.4154, |
|
"eval_rouge-2": 0.3153, |
|
"eval_rouge-l": 0.3997, |
|
"eval_runtime": 2.4802, |
|
"eval_samples_per_second": 3.226, |
|
"eval_steps_per_second": 3.226, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 111.11, |
|
"learning_rate": 1.1627906976744187e-05, |
|
"loss": 0.0317, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 111.11, |
|
"eval_avg_rouge_f": 0.3778, |
|
"eval_gen_len": 16.125, |
|
"eval_loss": 2.663809299468994, |
|
"eval_rouge-1": 0.4169, |
|
"eval_rouge-2": 0.3153, |
|
"eval_rouge-l": 0.4013, |
|
"eval_runtime": 3.2301, |
|
"eval_samples_per_second": 2.477, |
|
"eval_steps_per_second": 2.477, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 113.89, |
|
"learning_rate": 1.1395348837209304e-05, |
|
"loss": 0.0234, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 113.89, |
|
"eval_avg_rouge_f": 0.3888, |
|
"eval_gen_len": 15.5, |
|
"eval_loss": 2.740657091140747, |
|
"eval_rouge-1": 0.4334, |
|
"eval_rouge-2": 0.3153, |
|
"eval_rouge-l": 0.4178, |
|
"eval_runtime": 4.1851, |
|
"eval_samples_per_second": 1.912, |
|
"eval_steps_per_second": 1.912, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 116.67, |
|
"learning_rate": 1.116279069767442e-05, |
|
"loss": 0.0308, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 116.67, |
|
"eval_avg_rouge_f": 0.3799, |
|
"eval_gen_len": 16.125, |
|
"eval_loss": 2.70857572555542, |
|
"eval_rouge-1": 0.4201, |
|
"eval_rouge-2": 0.3153, |
|
"eval_rouge-l": 0.4044, |
|
"eval_runtime": 2.5313, |
|
"eval_samples_per_second": 3.16, |
|
"eval_steps_per_second": 3.16, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 119.44, |
|
"learning_rate": 1.0930232558139535e-05, |
|
"loss": 0.0305, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 119.44, |
|
"eval_avg_rouge_f": 0.3598, |
|
"eval_gen_len": 15.5, |
|
"eval_loss": 2.7068228721618652, |
|
"eval_rouge-1": 0.4059, |
|
"eval_rouge-2": 0.2831, |
|
"eval_rouge-l": 0.3902, |
|
"eval_runtime": 2.7634, |
|
"eval_samples_per_second": 2.895, |
|
"eval_steps_per_second": 2.895, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 122.22, |
|
"learning_rate": 1.0697674418604651e-05, |
|
"loss": 0.0289, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 122.22, |
|
"eval_avg_rouge_f": 0.3598, |
|
"eval_gen_len": 15.5, |
|
"eval_loss": 2.8503403663635254, |
|
"eval_rouge-1": 0.4059, |
|
"eval_rouge-2": 0.2831, |
|
"eval_rouge-l": 0.3902, |
|
"eval_runtime": 2.4338, |
|
"eval_samples_per_second": 3.287, |
|
"eval_steps_per_second": 3.287, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 125.0, |
|
"learning_rate": 1.046511627906977e-05, |
|
"loss": 0.0555, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 125.0, |
|
"eval_avg_rouge_f": 0.3598, |
|
"eval_gen_len": 15.5, |
|
"eval_loss": 2.8522231578826904, |
|
"eval_rouge-1": 0.4059, |
|
"eval_rouge-2": 0.2831, |
|
"eval_rouge-l": 0.3902, |
|
"eval_runtime": 2.8814, |
|
"eval_samples_per_second": 2.776, |
|
"eval_steps_per_second": 2.776, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 127.78, |
|
"learning_rate": 1.0232558139534884e-05, |
|
"loss": 0.022, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 127.78, |
|
"eval_avg_rouge_f": 0.3598, |
|
"eval_gen_len": 15.5, |
|
"eval_loss": 2.9057185649871826, |
|
"eval_rouge-1": 0.4059, |
|
"eval_rouge-2": 0.2831, |
|
"eval_rouge-l": 0.3902, |
|
"eval_runtime": 2.43, |
|
"eval_samples_per_second": 3.292, |
|
"eval_steps_per_second": 3.292, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 130.56, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0369, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 130.56, |
|
"eval_avg_rouge_f": 0.3598, |
|
"eval_gen_len": 15.5, |
|
"eval_loss": 2.8735642433166504, |
|
"eval_rouge-1": 0.4059, |
|
"eval_rouge-2": 0.2831, |
|
"eval_rouge-l": 0.3902, |
|
"eval_runtime": 3.0784, |
|
"eval_samples_per_second": 2.599, |
|
"eval_steps_per_second": 2.599, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 133.33, |
|
"learning_rate": 9.767441860465117e-06, |
|
"loss": 0.0195, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 133.33, |
|
"eval_avg_rouge_f": 0.3598, |
|
"eval_gen_len": 15.5, |
|
"eval_loss": 2.7636728286743164, |
|
"eval_rouge-1": 0.4059, |
|
"eval_rouge-2": 0.2831, |
|
"eval_rouge-l": 0.3902, |
|
"eval_runtime": 2.4413, |
|
"eval_samples_per_second": 3.277, |
|
"eval_steps_per_second": 3.277, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 136.11, |
|
"learning_rate": 9.534883720930234e-06, |
|
"loss": 0.0387, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 136.11, |
|
"eval_avg_rouge_f": 0.3598, |
|
"eval_gen_len": 15.5, |
|
"eval_loss": 2.743685007095337, |
|
"eval_rouge-1": 0.4059, |
|
"eval_rouge-2": 0.2831, |
|
"eval_rouge-l": 0.3902, |
|
"eval_runtime": 2.9809, |
|
"eval_samples_per_second": 2.684, |
|
"eval_steps_per_second": 2.684, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 138.89, |
|
"learning_rate": 9.30232558139535e-06, |
|
"loss": 0.0298, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 138.89, |
|
"eval_avg_rouge_f": 0.3443, |
|
"eval_gen_len": 16.25, |
|
"eval_loss": 2.8817646503448486, |
|
"eval_rouge-1": 0.391, |
|
"eval_rouge-2": 0.2665, |
|
"eval_rouge-l": 0.3754, |
|
"eval_runtime": 2.5341, |
|
"eval_samples_per_second": 3.157, |
|
"eval_steps_per_second": 3.157, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 141.67, |
|
"learning_rate": 9.069767441860465e-06, |
|
"loss": 0.0265, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 141.67, |
|
"eval_avg_rouge_f": 0.3353, |
|
"eval_gen_len": 16.5, |
|
"eval_loss": 2.834005355834961, |
|
"eval_rouge-1": 0.3776, |
|
"eval_rouge-2": 0.2665, |
|
"eval_rouge-l": 0.362, |
|
"eval_runtime": 3.3656, |
|
"eval_samples_per_second": 2.377, |
|
"eval_steps_per_second": 2.377, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 144.44, |
|
"learning_rate": 8.837209302325582e-06, |
|
"loss": 0.0182, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 144.44, |
|
"eval_avg_rouge_f": 0.3598, |
|
"eval_gen_len": 15.5, |
|
"eval_loss": 2.873906135559082, |
|
"eval_rouge-1": 0.4059, |
|
"eval_rouge-2": 0.2831, |
|
"eval_rouge-l": 0.3902, |
|
"eval_runtime": 2.4328, |
|
"eval_samples_per_second": 3.288, |
|
"eval_steps_per_second": 3.288, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 144.44, |
|
"step": 2600, |
|
"total_flos": 3409770731258880.0, |
|
"train_loss": 0.29600492647060983, |
|
"train_runtime": 1522.0987, |
|
"train_samples_per_second": 8.869, |
|
"train_steps_per_second": 2.956 |
|
} |
|
], |
|
"max_steps": 4500, |
|
"num_train_epochs": 250, |
|
"total_flos": 3409770731258880.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|