{ "best_metric": 0.11113496124744415, "best_model_checkpoint": "uk-mt5-base-gec-synthetic/checkpoint-14400", "epoch": 14.937273198548471, "global_step": 14400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.31, "learning_rate": 0.000979253112033195, "loss": 1.9255, "step": 300 }, { "epoch": 0.31, "eval_google_bleu": 0.2703240096475649, "eval_loss": 0.9424494504928589, "eval_runtime": 347.3208, "eval_samples_per_second": 44.429, "eval_steps_per_second": 1.391, "step": 300 }, { "epoch": 0.62, "learning_rate": 0.0009585062240663901, "loss": 1.0814, "step": 600 }, { "epoch": 0.62, "eval_google_bleu": 0.2826040990979691, "eval_loss": 0.7668491005897522, "eval_runtime": 345.1153, "eval_samples_per_second": 44.713, "eval_steps_per_second": 1.4, "step": 600 }, { "epoch": 0.93, "learning_rate": 0.000937759336099585, "loss": 0.9355, "step": 900 }, { "epoch": 0.93, "eval_google_bleu": 0.2865646045540358, "eval_loss": 0.7197995185852051, "eval_runtime": 342.8829, "eval_samples_per_second": 45.004, "eval_steps_per_second": 1.409, "step": 900 }, { "epoch": 1.24, "learning_rate": 0.00091701244813278, "loss": 0.8279, "step": 1200 }, { "epoch": 1.24, "eval_google_bleu": 0.292388510379304, "eval_loss": 0.6961821913719177, "eval_runtime": 342.7478, "eval_samples_per_second": 45.021, "eval_steps_per_second": 1.409, "step": 1200 }, { "epoch": 1.56, "learning_rate": 0.0008962655601659752, "loss": 0.7798, "step": 1500 }, { "epoch": 1.56, "eval_google_bleu": 0.2966368793800289, "eval_loss": 0.6216537952423096, "eval_runtime": 341.7328, "eval_samples_per_second": 45.155, "eval_steps_per_second": 1.413, "step": 1500 }, { "epoch": 1.87, "learning_rate": 0.0008755186721991702, "loss": 0.7454, "step": 1800 }, { "epoch": 1.87, "eval_google_bleu": 0.2970095360433254, "eval_loss": 0.5832293629646301, "eval_runtime": 342.7772, "eval_samples_per_second": 45.018, "eval_steps_per_second": 1.409, "step": 1800 }, { "epoch": 2.18, "learning_rate": 0.0008547717842323651, "loss": 0.6828, "step": 2100 }, { "epoch": 2.18, "eval_google_bleu": 0.30616414078516274, "eval_loss": 0.5547264814376831, "eval_runtime": 342.5678, "eval_samples_per_second": 45.045, "eval_steps_per_second": 1.41, "step": 2100 }, { "epoch": 2.49, "learning_rate": 0.0008340248962655602, "loss": 0.6261, "step": 2400 }, { "epoch": 2.49, "eval_google_bleu": 0.30804651852418713, "eval_loss": 0.5183674693107605, "eval_runtime": 342.3924, "eval_samples_per_second": 45.068, "eval_steps_per_second": 1.411, "step": 2400 }, { "epoch": 2.8, "learning_rate": 0.0008132780082987552, "loss": 0.6187, "step": 2700 }, { "epoch": 2.8, "eval_google_bleu": 0.3114351154903824, "eval_loss": 0.4962822198867798, "eval_runtime": 344.2093, "eval_samples_per_second": 44.83, "eval_steps_per_second": 1.403, "step": 2700 }, { "epoch": 3.11, "learning_rate": 0.0007925311203319502, "loss": 0.5809, "step": 3000 }, { "epoch": 3.11, "eval_google_bleu": 0.3130147936777771, "eval_loss": 0.5069273114204407, "eval_runtime": 343.9406, "eval_samples_per_second": 44.865, "eval_steps_per_second": 1.404, "step": 3000 }, { "epoch": 3.42, "learning_rate": 0.0007717842323651453, "loss": 0.5366, "step": 3300 }, { "epoch": 3.42, "eval_google_bleu": 0.31680302377876657, "eval_loss": 0.46470168232917786, "eval_runtime": 343.1588, "eval_samples_per_second": 44.968, "eval_steps_per_second": 1.408, "step": 3300 }, { "epoch": 3.73, "learning_rate": 0.0007510373443983402, "loss": 0.526, "step": 3600 }, { "epoch": 3.73, "eval_google_bleu": 0.3175123454942687, "eval_loss": 0.4724844694137573, "eval_runtime": 344.2418, "eval_samples_per_second": 44.826, "eval_steps_per_second": 1.403, "step": 3600 }, { "epoch": 4.05, "learning_rate": 0.0007302904564315352, "loss": 0.5137, "step": 3900 }, { "epoch": 4.05, "eval_google_bleu": 0.32288427448475915, "eval_loss": 0.4414692223072052, "eval_runtime": 344.0594, "eval_samples_per_second": 44.85, "eval_steps_per_second": 1.404, "step": 3900 }, { "epoch": 4.36, "learning_rate": 0.0007095435684647303, "loss": 0.4603, "step": 4200 }, { "epoch": 4.36, "eval_google_bleu": 0.3268479743473163, "eval_loss": 0.43008026480674744, "eval_runtime": 342.6092, "eval_samples_per_second": 45.04, "eval_steps_per_second": 1.41, "step": 4200 }, { "epoch": 4.67, "learning_rate": 0.0006887966804979254, "loss": 0.4531, "step": 4500 }, { "epoch": 4.67, "eval_google_bleu": 0.3273518151404788, "eval_loss": 0.40794768929481506, "eval_runtime": 342.7787, "eval_samples_per_second": 45.017, "eval_steps_per_second": 1.409, "step": 4500 }, { "epoch": 4.98, "learning_rate": 0.0006680497925311203, "loss": 0.4555, "step": 4800 }, { "epoch": 4.98, "eval_google_bleu": 0.32905860892065497, "eval_loss": 0.39621850848197937, "eval_runtime": 343.6219, "eval_samples_per_second": 44.907, "eval_steps_per_second": 1.406, "step": 4800 }, { "epoch": 5.29, "learning_rate": 0.0006473029045643154, "loss": 0.392, "step": 5100 }, { "epoch": 5.29, "eval_google_bleu": 0.3336905264492333, "eval_loss": 0.3776128590106964, "eval_runtime": 343.634, "eval_samples_per_second": 44.905, "eval_steps_per_second": 1.406, "step": 5100 }, { "epoch": 5.6, "learning_rate": 0.0006265560165975104, "loss": 0.402, "step": 5400 }, { "epoch": 5.6, "eval_google_bleu": 0.3371762652161784, "eval_loss": 0.3662883937358856, "eval_runtime": 343.334, "eval_samples_per_second": 44.945, "eval_steps_per_second": 1.407, "step": 5400 }, { "epoch": 5.91, "learning_rate": 0.0006058091286307054, "loss": 0.3979, "step": 5700 }, { "epoch": 5.91, "eval_google_bleu": 0.34215661656562757, "eval_loss": 0.34634825587272644, "eval_runtime": 343.5776, "eval_samples_per_second": 44.913, "eval_steps_per_second": 1.406, "step": 5700 }, { "epoch": 6.22, "learning_rate": 0.0005850622406639005, "loss": 0.3545, "step": 6000 }, { "epoch": 6.22, "eval_google_bleu": 0.339056903098911, "eval_loss": 0.34026840329170227, "eval_runtime": 344.1962, "eval_samples_per_second": 44.832, "eval_steps_per_second": 1.403, "step": 6000 }, { "epoch": 6.53, "learning_rate": 0.0005643153526970954, "loss": 0.342, "step": 6300 }, { "epoch": 6.53, "eval_google_bleu": 0.34660828418236983, "eval_loss": 0.3360808789730072, "eval_runtime": 344.3987, "eval_samples_per_second": 44.806, "eval_steps_per_second": 1.402, "step": 6300 }, { "epoch": 6.85, "learning_rate": 0.0005435684647302904, "loss": 0.3462, "step": 6600 }, { "epoch": 6.85, "eval_google_bleu": 0.3519982269928361, "eval_loss": 0.3193075954914093, "eval_runtime": 343.4724, "eval_samples_per_second": 44.926, "eval_steps_per_second": 1.406, "step": 6600 }, { "epoch": 7.16, "learning_rate": 0.0005228215767634855, "loss": 0.3097, "step": 6900 }, { "epoch": 7.16, "eval_google_bleu": 0.35084156619797907, "eval_loss": 0.3339296281337738, "eval_runtime": 345.4283, "eval_samples_per_second": 44.672, "eval_steps_per_second": 1.398, "step": 6900 }, { "epoch": 7.47, "learning_rate": 0.0005020746887966805, "loss": 0.2937, "step": 7200 }, { "epoch": 7.47, "eval_google_bleu": 0.35751958227664965, "eval_loss": 0.29495447874069214, "eval_runtime": 344.049, "eval_samples_per_second": 44.851, "eval_steps_per_second": 1.404, "step": 7200 }, { "epoch": 7.78, "learning_rate": 0.00048132780082987554, "loss": 0.2962, "step": 7500 }, { "epoch": 7.78, "eval_google_bleu": 0.3584187047483623, "eval_loss": 0.27855053544044495, "eval_runtime": 344.1164, "eval_samples_per_second": 44.842, "eval_steps_per_second": 1.404, "step": 7500 }, { "epoch": 8.09, "learning_rate": 0.00046058091286307055, "loss": 0.2795, "step": 7800 }, { "epoch": 8.09, "eval_google_bleu": 0.3626892660498925, "eval_loss": 0.273299902677536, "eval_runtime": 344.7788, "eval_samples_per_second": 44.756, "eval_steps_per_second": 1.401, "step": 7800 }, { "epoch": 8.4, "learning_rate": 0.00043983402489626555, "loss": 0.2463, "step": 8100 }, { "epoch": 8.4, "eval_google_bleu": 0.36640212432731034, "eval_loss": 0.27249810099601746, "eval_runtime": 344.7536, "eval_samples_per_second": 44.76, "eval_steps_per_second": 1.401, "step": 8100 }, { "epoch": 8.71, "learning_rate": 0.0004190871369294606, "loss": 0.2473, "step": 8400 }, { "epoch": 8.71, "eval_google_bleu": 0.3680926990956975, "eval_loss": 0.25599655508995056, "eval_runtime": 343.8059, "eval_samples_per_second": 44.883, "eval_steps_per_second": 1.405, "step": 8400 }, { "epoch": 9.02, "learning_rate": 0.00039834024896265557, "loss": 0.2462, "step": 8700 }, { "epoch": 9.02, "eval_google_bleu": 0.3726668030252467, "eval_loss": 0.24074989557266235, "eval_runtime": 343.4854, "eval_samples_per_second": 44.925, "eval_steps_per_second": 1.406, "step": 8700 }, { "epoch": 9.34, "learning_rate": 0.00037759336099585063, "loss": 0.2003, "step": 9000 }, { "epoch": 9.34, "eval_google_bleu": 0.37575691510452164, "eval_loss": 0.23459354043006897, "eval_runtime": 343.9506, "eval_samples_per_second": 44.864, "eval_steps_per_second": 1.404, "step": 9000 }, { "epoch": 9.65, "learning_rate": 0.00035684647302904564, "loss": 0.2096, "step": 9300 }, { "epoch": 9.65, "eval_google_bleu": 0.3810405269728534, "eval_loss": 0.23272807896137238, "eval_runtime": 345.3533, "eval_samples_per_second": 44.682, "eval_steps_per_second": 1.399, "step": 9300 }, { "epoch": 9.96, "learning_rate": 0.0003360995850622407, "loss": 0.2109, "step": 9600 }, { "epoch": 9.96, "eval_google_bleu": 0.3860127254969551, "eval_loss": 0.2076478898525238, "eval_runtime": 345.3314, "eval_samples_per_second": 44.685, "eval_steps_per_second": 1.399, "step": 9600 }, { "epoch": 10.27, "learning_rate": 0.00031535269709543565, "loss": 0.1673, "step": 9900 }, { "epoch": 10.27, "eval_google_bleu": 0.39099070617947457, "eval_loss": 0.2026001662015915, "eval_runtime": 344.3009, "eval_samples_per_second": 44.818, "eval_steps_per_second": 1.403, "step": 9900 }, { "epoch": 10.58, "learning_rate": 0.0002946058091286307, "loss": 0.1722, "step": 10200 }, { "epoch": 10.58, "eval_google_bleu": 0.39423925473127097, "eval_loss": 0.19409596920013428, "eval_runtime": 345.022, "eval_samples_per_second": 44.725, "eval_steps_per_second": 1.4, "step": 10200 }, { "epoch": 10.89, "learning_rate": 0.0002738589211618258, "loss": 0.1692, "step": 10500 }, { "epoch": 10.89, "eval_google_bleu": 0.39808409620231944, "eval_loss": 0.18527193367481232, "eval_runtime": 344.9054, "eval_samples_per_second": 44.74, "eval_steps_per_second": 1.4, "step": 10500 }, { "epoch": 11.2, "learning_rate": 0.00025311203319502073, "loss": 0.1414, "step": 10800 }, { "epoch": 11.2, "eval_google_bleu": 0.3988704921153549, "eval_loss": 0.18296599388122559, "eval_runtime": 344.3914, "eval_samples_per_second": 44.807, "eval_steps_per_second": 1.402, "step": 10800 }, { "epoch": 11.51, "learning_rate": 0.00023236514522821577, "loss": 0.1357, "step": 11100 }, { "epoch": 11.51, "eval_google_bleu": 0.40408064479411665, "eval_loss": 0.17169393599033356, "eval_runtime": 345.2605, "eval_samples_per_second": 44.694, "eval_steps_per_second": 1.399, "step": 11100 }, { "epoch": 11.83, "learning_rate": 0.0002116182572614108, "loss": 0.1335, "step": 11400 }, { "epoch": 11.83, "eval_google_bleu": 0.4105103547726813, "eval_loss": 0.15817226469516754, "eval_runtime": 345.0386, "eval_samples_per_second": 44.723, "eval_steps_per_second": 1.4, "step": 11400 }, { "epoch": 12.14, "learning_rate": 0.0001908713692946058, "loss": 0.119, "step": 11700 }, { "epoch": 12.14, "eval_google_bleu": 0.4141623418407881, "eval_loss": 0.1521884799003601, "eval_runtime": 344.7777, "eval_samples_per_second": 44.756, "eval_steps_per_second": 1.401, "step": 11700 }, { "epoch": 12.45, "learning_rate": 0.00017012448132780084, "loss": 0.102, "step": 12000 }, { "epoch": 12.45, "eval_google_bleu": 0.4170916396229331, "eval_loss": 0.150096133351326, "eval_runtime": 344.5413, "eval_samples_per_second": 44.787, "eval_steps_per_second": 1.402, "step": 12000 }, { "epoch": 12.76, "learning_rate": 0.00014937759336099585, "loss": 0.1014, "step": 12300 }, { "epoch": 12.76, "eval_google_bleu": 0.4215356451396477, "eval_loss": 0.1389758437871933, "eval_runtime": 343.8578, "eval_samples_per_second": 44.876, "eval_steps_per_second": 1.405, "step": 12300 }, { "epoch": 13.07, "learning_rate": 0.00012863070539419086, "loss": 0.0938, "step": 12600 }, { "epoch": 13.07, "eval_google_bleu": 0.42419893182923646, "eval_loss": 0.13169047236442566, "eval_runtime": 345.236, "eval_samples_per_second": 44.697, "eval_steps_per_second": 1.399, "step": 12600 }, { "epoch": 13.38, "learning_rate": 0.0001078838174273859, "loss": 0.0756, "step": 12900 }, { "epoch": 13.38, "eval_google_bleu": 0.4277791960457889, "eval_loss": 0.12835000455379486, "eval_runtime": 345.2858, "eval_samples_per_second": 44.691, "eval_steps_per_second": 1.399, "step": 12900 }, { "epoch": 13.69, "learning_rate": 8.713692946058092e-05, "loss": 0.0746, "step": 13200 }, { "epoch": 13.69, "eval_google_bleu": 0.43310316226520934, "eval_loss": 0.12244871258735657, "eval_runtime": 344.2998, "eval_samples_per_second": 44.819, "eval_steps_per_second": 1.403, "step": 13200 }, { "epoch": 14.0, "learning_rate": 6.639004149377594e-05, "loss": 0.071, "step": 13500 }, { "epoch": 14.0, "eval_google_bleu": 0.4362318597352654, "eval_loss": 0.11750882863998413, "eval_runtime": 344.9812, "eval_samples_per_second": 44.73, "eval_steps_per_second": 1.4, "step": 13500 }, { "epoch": 14.32, "learning_rate": 4.564315352697095e-05, "loss": 0.0564, "step": 13800 }, { "epoch": 14.32, "eval_google_bleu": 0.43759814345546477, "eval_loss": 0.11728513240814209, "eval_runtime": 344.2434, "eval_samples_per_second": 44.826, "eval_steps_per_second": 1.403, "step": 13800 }, { "epoch": 14.63, "learning_rate": 2.4896265560165973e-05, "loss": 0.0533, "step": 14100 }, { "epoch": 14.63, "eval_google_bleu": 0.4387776387150605, "eval_loss": 0.11408372223377228, "eval_runtime": 343.7735, "eval_samples_per_second": 44.887, "eval_steps_per_second": 1.405, "step": 14100 }, { "epoch": 14.94, "learning_rate": 4.149377593360996e-06, "loss": 0.0535, "step": 14400 }, { "epoch": 14.94, "eval_google_bleu": 0.4404605929546923, "eval_loss": 0.11113496124744415, "eval_runtime": 344.4281, "eval_samples_per_second": 44.802, "eval_steps_per_second": 1.402, "step": 14400 } ], "max_steps": 14460, "num_train_epochs": 15, "total_flos": 2.7791905497716736e+16, "trial_name": null, "trial_params": null }