|
{ |
|
"best_metric": 0.11113496124744415, |
|
"best_model_checkpoint": "uk-mt5-base-gec-synthetic/checkpoint-14400", |
|
"epoch": 14.937273198548471, |
|
"global_step": 14400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.000979253112033195, |
|
"loss": 1.9255, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_google_bleu": 0.2703240096475649, |
|
"eval_loss": 0.9424494504928589, |
|
"eval_runtime": 347.3208, |
|
"eval_samples_per_second": 44.429, |
|
"eval_steps_per_second": 1.391, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.0009585062240663901, |
|
"loss": 1.0814, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_google_bleu": 0.2826040990979691, |
|
"eval_loss": 0.7668491005897522, |
|
"eval_runtime": 345.1153, |
|
"eval_samples_per_second": 44.713, |
|
"eval_steps_per_second": 1.4, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.000937759336099585, |
|
"loss": 0.9355, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_google_bleu": 0.2865646045540358, |
|
"eval_loss": 0.7197995185852051, |
|
"eval_runtime": 342.8829, |
|
"eval_samples_per_second": 45.004, |
|
"eval_steps_per_second": 1.409, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.00091701244813278, |
|
"loss": 0.8279, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"eval_google_bleu": 0.292388510379304, |
|
"eval_loss": 0.6961821913719177, |
|
"eval_runtime": 342.7478, |
|
"eval_samples_per_second": 45.021, |
|
"eval_steps_per_second": 1.409, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.0008962655601659752, |
|
"loss": 0.7798, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"eval_google_bleu": 0.2966368793800289, |
|
"eval_loss": 0.6216537952423096, |
|
"eval_runtime": 341.7328, |
|
"eval_samples_per_second": 45.155, |
|
"eval_steps_per_second": 1.413, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 0.0008755186721991702, |
|
"loss": 0.7454, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"eval_google_bleu": 0.2970095360433254, |
|
"eval_loss": 0.5832293629646301, |
|
"eval_runtime": 342.7772, |
|
"eval_samples_per_second": 45.018, |
|
"eval_steps_per_second": 1.409, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 0.0008547717842323651, |
|
"loss": 0.6828, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"eval_google_bleu": 0.30616414078516274, |
|
"eval_loss": 0.5547264814376831, |
|
"eval_runtime": 342.5678, |
|
"eval_samples_per_second": 45.045, |
|
"eval_steps_per_second": 1.41, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 0.0008340248962655602, |
|
"loss": 0.6261, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"eval_google_bleu": 0.30804651852418713, |
|
"eval_loss": 0.5183674693107605, |
|
"eval_runtime": 342.3924, |
|
"eval_samples_per_second": 45.068, |
|
"eval_steps_per_second": 1.411, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 0.0008132780082987552, |
|
"loss": 0.6187, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"eval_google_bleu": 0.3114351154903824, |
|
"eval_loss": 0.4962822198867798, |
|
"eval_runtime": 344.2093, |
|
"eval_samples_per_second": 44.83, |
|
"eval_steps_per_second": 1.403, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 0.0007925311203319502, |
|
"loss": 0.5809, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"eval_google_bleu": 0.3130147936777771, |
|
"eval_loss": 0.5069273114204407, |
|
"eval_runtime": 343.9406, |
|
"eval_samples_per_second": 44.865, |
|
"eval_steps_per_second": 1.404, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 0.0007717842323651453, |
|
"loss": 0.5366, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"eval_google_bleu": 0.31680302377876657, |
|
"eval_loss": 0.46470168232917786, |
|
"eval_runtime": 343.1588, |
|
"eval_samples_per_second": 44.968, |
|
"eval_steps_per_second": 1.408, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 0.0007510373443983402, |
|
"loss": 0.526, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"eval_google_bleu": 0.3175123454942687, |
|
"eval_loss": 0.4724844694137573, |
|
"eval_runtime": 344.2418, |
|
"eval_samples_per_second": 44.826, |
|
"eval_steps_per_second": 1.403, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 0.0007302904564315352, |
|
"loss": 0.5137, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"eval_google_bleu": 0.32288427448475915, |
|
"eval_loss": 0.4414692223072052, |
|
"eval_runtime": 344.0594, |
|
"eval_samples_per_second": 44.85, |
|
"eval_steps_per_second": 1.404, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 0.0007095435684647303, |
|
"loss": 0.4603, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"eval_google_bleu": 0.3268479743473163, |
|
"eval_loss": 0.43008026480674744, |
|
"eval_runtime": 342.6092, |
|
"eval_samples_per_second": 45.04, |
|
"eval_steps_per_second": 1.41, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 0.0006887966804979254, |
|
"loss": 0.4531, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"eval_google_bleu": 0.3273518151404788, |
|
"eval_loss": 0.40794768929481506, |
|
"eval_runtime": 342.7787, |
|
"eval_samples_per_second": 45.017, |
|
"eval_steps_per_second": 1.409, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"learning_rate": 0.0006680497925311203, |
|
"loss": 0.4555, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"eval_google_bleu": 0.32905860892065497, |
|
"eval_loss": 0.39621850848197937, |
|
"eval_runtime": 343.6219, |
|
"eval_samples_per_second": 44.907, |
|
"eval_steps_per_second": 1.406, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"learning_rate": 0.0006473029045643154, |
|
"loss": 0.392, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"eval_google_bleu": 0.3336905264492333, |
|
"eval_loss": 0.3776128590106964, |
|
"eval_runtime": 343.634, |
|
"eval_samples_per_second": 44.905, |
|
"eval_steps_per_second": 1.406, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 0.0006265560165975104, |
|
"loss": 0.402, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"eval_google_bleu": 0.3371762652161784, |
|
"eval_loss": 0.3662883937358856, |
|
"eval_runtime": 343.334, |
|
"eval_samples_per_second": 44.945, |
|
"eval_steps_per_second": 1.407, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 5.91, |
|
"learning_rate": 0.0006058091286307054, |
|
"loss": 0.3979, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 5.91, |
|
"eval_google_bleu": 0.34215661656562757, |
|
"eval_loss": 0.34634825587272644, |
|
"eval_runtime": 343.5776, |
|
"eval_samples_per_second": 44.913, |
|
"eval_steps_per_second": 1.406, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 6.22, |
|
"learning_rate": 0.0005850622406639005, |
|
"loss": 0.3545, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 6.22, |
|
"eval_google_bleu": 0.339056903098911, |
|
"eval_loss": 0.34026840329170227, |
|
"eval_runtime": 344.1962, |
|
"eval_samples_per_second": 44.832, |
|
"eval_steps_per_second": 1.403, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 6.53, |
|
"learning_rate": 0.0005643153526970954, |
|
"loss": 0.342, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 6.53, |
|
"eval_google_bleu": 0.34660828418236983, |
|
"eval_loss": 0.3360808789730072, |
|
"eval_runtime": 344.3987, |
|
"eval_samples_per_second": 44.806, |
|
"eval_steps_per_second": 1.402, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 6.85, |
|
"learning_rate": 0.0005435684647302904, |
|
"loss": 0.3462, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 6.85, |
|
"eval_google_bleu": 0.3519982269928361, |
|
"eval_loss": 0.3193075954914093, |
|
"eval_runtime": 343.4724, |
|
"eval_samples_per_second": 44.926, |
|
"eval_steps_per_second": 1.406, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 7.16, |
|
"learning_rate": 0.0005228215767634855, |
|
"loss": 0.3097, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 7.16, |
|
"eval_google_bleu": 0.35084156619797907, |
|
"eval_loss": 0.3339296281337738, |
|
"eval_runtime": 345.4283, |
|
"eval_samples_per_second": 44.672, |
|
"eval_steps_per_second": 1.398, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 7.47, |
|
"learning_rate": 0.0005020746887966805, |
|
"loss": 0.2937, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 7.47, |
|
"eval_google_bleu": 0.35751958227664965, |
|
"eval_loss": 0.29495447874069214, |
|
"eval_runtime": 344.049, |
|
"eval_samples_per_second": 44.851, |
|
"eval_steps_per_second": 1.404, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 7.78, |
|
"learning_rate": 0.00048132780082987554, |
|
"loss": 0.2962, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 7.78, |
|
"eval_google_bleu": 0.3584187047483623, |
|
"eval_loss": 0.27855053544044495, |
|
"eval_runtime": 344.1164, |
|
"eval_samples_per_second": 44.842, |
|
"eval_steps_per_second": 1.404, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"learning_rate": 0.00046058091286307055, |
|
"loss": 0.2795, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"eval_google_bleu": 0.3626892660498925, |
|
"eval_loss": 0.273299902677536, |
|
"eval_runtime": 344.7788, |
|
"eval_samples_per_second": 44.756, |
|
"eval_steps_per_second": 1.401, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"learning_rate": 0.00043983402489626555, |
|
"loss": 0.2463, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"eval_google_bleu": 0.36640212432731034, |
|
"eval_loss": 0.27249810099601746, |
|
"eval_runtime": 344.7536, |
|
"eval_samples_per_second": 44.76, |
|
"eval_steps_per_second": 1.401, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 8.71, |
|
"learning_rate": 0.0004190871369294606, |
|
"loss": 0.2473, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 8.71, |
|
"eval_google_bleu": 0.3680926990956975, |
|
"eval_loss": 0.25599655508995056, |
|
"eval_runtime": 343.8059, |
|
"eval_samples_per_second": 44.883, |
|
"eval_steps_per_second": 1.405, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"learning_rate": 0.00039834024896265557, |
|
"loss": 0.2462, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"eval_google_bleu": 0.3726668030252467, |
|
"eval_loss": 0.24074989557266235, |
|
"eval_runtime": 343.4854, |
|
"eval_samples_per_second": 44.925, |
|
"eval_steps_per_second": 1.406, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 9.34, |
|
"learning_rate": 0.00037759336099585063, |
|
"loss": 0.2003, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 9.34, |
|
"eval_google_bleu": 0.37575691510452164, |
|
"eval_loss": 0.23459354043006897, |
|
"eval_runtime": 343.9506, |
|
"eval_samples_per_second": 44.864, |
|
"eval_steps_per_second": 1.404, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 9.65, |
|
"learning_rate": 0.00035684647302904564, |
|
"loss": 0.2096, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 9.65, |
|
"eval_google_bleu": 0.3810405269728534, |
|
"eval_loss": 0.23272807896137238, |
|
"eval_runtime": 345.3533, |
|
"eval_samples_per_second": 44.682, |
|
"eval_steps_per_second": 1.399, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 9.96, |
|
"learning_rate": 0.0003360995850622407, |
|
"loss": 0.2109, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 9.96, |
|
"eval_google_bleu": 0.3860127254969551, |
|
"eval_loss": 0.2076478898525238, |
|
"eval_runtime": 345.3314, |
|
"eval_samples_per_second": 44.685, |
|
"eval_steps_per_second": 1.399, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 10.27, |
|
"learning_rate": 0.00031535269709543565, |
|
"loss": 0.1673, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 10.27, |
|
"eval_google_bleu": 0.39099070617947457, |
|
"eval_loss": 0.2026001662015915, |
|
"eval_runtime": 344.3009, |
|
"eval_samples_per_second": 44.818, |
|
"eval_steps_per_second": 1.403, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 10.58, |
|
"learning_rate": 0.0002946058091286307, |
|
"loss": 0.1722, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 10.58, |
|
"eval_google_bleu": 0.39423925473127097, |
|
"eval_loss": 0.19409596920013428, |
|
"eval_runtime": 345.022, |
|
"eval_samples_per_second": 44.725, |
|
"eval_steps_per_second": 1.4, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 10.89, |
|
"learning_rate": 0.0002738589211618258, |
|
"loss": 0.1692, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 10.89, |
|
"eval_google_bleu": 0.39808409620231944, |
|
"eval_loss": 0.18527193367481232, |
|
"eval_runtime": 344.9054, |
|
"eval_samples_per_second": 44.74, |
|
"eval_steps_per_second": 1.4, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 11.2, |
|
"learning_rate": 0.00025311203319502073, |
|
"loss": 0.1414, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 11.2, |
|
"eval_google_bleu": 0.3988704921153549, |
|
"eval_loss": 0.18296599388122559, |
|
"eval_runtime": 344.3914, |
|
"eval_samples_per_second": 44.807, |
|
"eval_steps_per_second": 1.402, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 11.51, |
|
"learning_rate": 0.00023236514522821577, |
|
"loss": 0.1357, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 11.51, |
|
"eval_google_bleu": 0.40408064479411665, |
|
"eval_loss": 0.17169393599033356, |
|
"eval_runtime": 345.2605, |
|
"eval_samples_per_second": 44.694, |
|
"eval_steps_per_second": 1.399, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 11.83, |
|
"learning_rate": 0.0002116182572614108, |
|
"loss": 0.1335, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 11.83, |
|
"eval_google_bleu": 0.4105103547726813, |
|
"eval_loss": 0.15817226469516754, |
|
"eval_runtime": 345.0386, |
|
"eval_samples_per_second": 44.723, |
|
"eval_steps_per_second": 1.4, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 12.14, |
|
"learning_rate": 0.0001908713692946058, |
|
"loss": 0.119, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 12.14, |
|
"eval_google_bleu": 0.4141623418407881, |
|
"eval_loss": 0.1521884799003601, |
|
"eval_runtime": 344.7777, |
|
"eval_samples_per_second": 44.756, |
|
"eval_steps_per_second": 1.401, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 12.45, |
|
"learning_rate": 0.00017012448132780084, |
|
"loss": 0.102, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 12.45, |
|
"eval_google_bleu": 0.4170916396229331, |
|
"eval_loss": 0.150096133351326, |
|
"eval_runtime": 344.5413, |
|
"eval_samples_per_second": 44.787, |
|
"eval_steps_per_second": 1.402, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 12.76, |
|
"learning_rate": 0.00014937759336099585, |
|
"loss": 0.1014, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 12.76, |
|
"eval_google_bleu": 0.4215356451396477, |
|
"eval_loss": 0.1389758437871933, |
|
"eval_runtime": 343.8578, |
|
"eval_samples_per_second": 44.876, |
|
"eval_steps_per_second": 1.405, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 13.07, |
|
"learning_rate": 0.00012863070539419086, |
|
"loss": 0.0938, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 13.07, |
|
"eval_google_bleu": 0.42419893182923646, |
|
"eval_loss": 0.13169047236442566, |
|
"eval_runtime": 345.236, |
|
"eval_samples_per_second": 44.697, |
|
"eval_steps_per_second": 1.399, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 13.38, |
|
"learning_rate": 0.0001078838174273859, |
|
"loss": 0.0756, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 13.38, |
|
"eval_google_bleu": 0.4277791960457889, |
|
"eval_loss": 0.12835000455379486, |
|
"eval_runtime": 345.2858, |
|
"eval_samples_per_second": 44.691, |
|
"eval_steps_per_second": 1.399, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 13.69, |
|
"learning_rate": 8.713692946058092e-05, |
|
"loss": 0.0746, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 13.69, |
|
"eval_google_bleu": 0.43310316226520934, |
|
"eval_loss": 0.12244871258735657, |
|
"eval_runtime": 344.2998, |
|
"eval_samples_per_second": 44.819, |
|
"eval_steps_per_second": 1.403, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 6.639004149377594e-05, |
|
"loss": 0.071, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_google_bleu": 0.4362318597352654, |
|
"eval_loss": 0.11750882863998413, |
|
"eval_runtime": 344.9812, |
|
"eval_samples_per_second": 44.73, |
|
"eval_steps_per_second": 1.4, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 14.32, |
|
"learning_rate": 4.564315352697095e-05, |
|
"loss": 0.0564, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 14.32, |
|
"eval_google_bleu": 0.43759814345546477, |
|
"eval_loss": 0.11728513240814209, |
|
"eval_runtime": 344.2434, |
|
"eval_samples_per_second": 44.826, |
|
"eval_steps_per_second": 1.403, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 14.63, |
|
"learning_rate": 2.4896265560165973e-05, |
|
"loss": 0.0533, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 14.63, |
|
"eval_google_bleu": 0.4387776387150605, |
|
"eval_loss": 0.11408372223377228, |
|
"eval_runtime": 343.7735, |
|
"eval_samples_per_second": 44.887, |
|
"eval_steps_per_second": 1.405, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 14.94, |
|
"learning_rate": 4.149377593360996e-06, |
|
"loss": 0.0535, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 14.94, |
|
"eval_google_bleu": 0.4404605929546923, |
|
"eval_loss": 0.11113496124744415, |
|
"eval_runtime": 344.4281, |
|
"eval_samples_per_second": 44.802, |
|
"eval_steps_per_second": 1.402, |
|
"step": 14400 |
|
} |
|
], |
|
"max_steps": 14460, |
|
"num_train_epochs": 15, |
|
"total_flos": 2.7791905497716736e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|