|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9696311523096615, |
|
"global_step": 25000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.980607376953807e-05, |
|
"loss": 0.2765, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_bleu": 31.7549, |
|
"eval_chrf++": 58.7253, |
|
"eval_gen_len": 22.1982, |
|
"eval_loss": 1.1465204954147339, |
|
"eval_runtime": 2452.853, |
|
"eval_samples_per_second": 6.832, |
|
"eval_spbleu": 45.4525, |
|
"eval_steps_per_second": 0.683, |
|
"eval_ter": 56.1665, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.9612147539076136e-05, |
|
"loss": 0.2494, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_bleu": 30.6155, |
|
"eval_chrf++": 58.4159, |
|
"eval_gen_len": 22.5912, |
|
"eval_loss": 1.1881319284439087, |
|
"eval_runtime": 2483.4163, |
|
"eval_samples_per_second": 6.748, |
|
"eval_spbleu": 43.8898, |
|
"eval_steps_per_second": 0.675, |
|
"eval_ter": 57.8655, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.94182213086142e-05, |
|
"loss": 0.2576, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_bleu": 29.789, |
|
"eval_chrf++": 57.4443, |
|
"eval_gen_len": 22.689, |
|
"eval_loss": 1.1570631265640259, |
|
"eval_runtime": 2516.9829, |
|
"eval_samples_per_second": 6.658, |
|
"eval_spbleu": 43.1911, |
|
"eval_steps_per_second": 0.666, |
|
"eval_ter": 59.1299, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.922429507815227e-05, |
|
"loss": 0.2624, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_bleu": 31.0288, |
|
"eval_chrf++": 58.4502, |
|
"eval_gen_len": 22.467, |
|
"eval_loss": 1.1633208990097046, |
|
"eval_runtime": 2478.0629, |
|
"eval_samples_per_second": 6.763, |
|
"eval_spbleu": 44.5894, |
|
"eval_steps_per_second": 0.676, |
|
"eval_ter": 57.6651, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.9030368847690336e-05, |
|
"loss": 0.2284, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_bleu": 31.2885, |
|
"eval_chrf++": 58.0527, |
|
"eval_gen_len": 22.7715, |
|
"eval_loss": 1.1723754405975342, |
|
"eval_runtime": 2538.3187, |
|
"eval_samples_per_second": 6.602, |
|
"eval_spbleu": 44.5546, |
|
"eval_steps_per_second": 0.66, |
|
"eval_ter": 57.8341, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.88364426172284e-05, |
|
"loss": 0.2314, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_bleu": 30.7414, |
|
"eval_chrf++": 58.1161, |
|
"eval_gen_len": 22.5233, |
|
"eval_loss": 1.1770201921463013, |
|
"eval_runtime": 2476.3422, |
|
"eval_samples_per_second": 6.767, |
|
"eval_spbleu": 44.334, |
|
"eval_steps_per_second": 0.677, |
|
"eval_ter": 58.228, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.8642516386766476e-05, |
|
"loss": 0.2294, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_bleu": 31.7781, |
|
"eval_chrf++": 58.9642, |
|
"eval_gen_len": 22.4762, |
|
"eval_loss": 1.2094552516937256, |
|
"eval_runtime": 2449.7863, |
|
"eval_samples_per_second": 6.841, |
|
"eval_spbleu": 45.3044, |
|
"eval_steps_per_second": 0.684, |
|
"eval_ter": 56.8016, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.844859015630454e-05, |
|
"loss": 0.2457, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_bleu": 31.0111, |
|
"eval_chrf++": 58.203, |
|
"eval_gen_len": 22.8377, |
|
"eval_loss": 1.1406781673431396, |
|
"eval_runtime": 2532.244, |
|
"eval_samples_per_second": 6.618, |
|
"eval_spbleu": 44.1543, |
|
"eval_steps_per_second": 0.662, |
|
"eval_ter": 57.7781, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.825466392584262e-05, |
|
"loss": 0.5335, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_bleu": 31.2577, |
|
"eval_chrf++": 58.4688, |
|
"eval_gen_len": 22.6196, |
|
"eval_loss": 1.0519380569458008, |
|
"eval_runtime": 2534.3052, |
|
"eval_samples_per_second": 6.612, |
|
"eval_spbleu": 44.8062, |
|
"eval_steps_per_second": 0.661, |
|
"eval_ter": 57.333, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.8060737695380683e-05, |
|
"loss": 0.5162, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_bleu": 32.2483, |
|
"eval_chrf++": 59.2532, |
|
"eval_gen_len": 22.461, |
|
"eval_loss": 1.0528730154037476, |
|
"eval_runtime": 2496.711, |
|
"eval_samples_per_second": 6.712, |
|
"eval_spbleu": 45.5671, |
|
"eval_steps_per_second": 0.671, |
|
"eval_ter": 56.2061, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.786681146491875e-05, |
|
"loss": 0.5135, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_bleu": 32.273, |
|
"eval_chrf++": 59.7056, |
|
"eval_gen_len": 22.429, |
|
"eval_loss": 1.0336111783981323, |
|
"eval_runtime": 2482.8365, |
|
"eval_samples_per_second": 6.75, |
|
"eval_spbleu": 46.0401, |
|
"eval_steps_per_second": 0.675, |
|
"eval_ter": 56.4706, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.767288523445682e-05, |
|
"loss": 0.5227, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_bleu": 32.9242, |
|
"eval_chrf++": 59.8998, |
|
"eval_gen_len": 22.6151, |
|
"eval_loss": 1.0207685232162476, |
|
"eval_runtime": 2489.8807, |
|
"eval_samples_per_second": 6.73, |
|
"eval_spbleu": 46.3519, |
|
"eval_steps_per_second": 0.673, |
|
"eval_ter": 55.7539, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.7478959003994884e-05, |
|
"loss": 0.5181, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_bleu": 33.276, |
|
"eval_chrf++": 60.0419, |
|
"eval_gen_len": 22.3479, |
|
"eval_loss": 1.0149798393249512, |
|
"eval_runtime": 2483.325, |
|
"eval_samples_per_second": 6.748, |
|
"eval_spbleu": 46.8525, |
|
"eval_steps_per_second": 0.675, |
|
"eval_ter": 55.268, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.728503277353295e-05, |
|
"loss": 0.4933, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_bleu": 33.8673, |
|
"eval_chrf++": 60.2337, |
|
"eval_gen_len": 22.3286, |
|
"eval_loss": 1.013890027999878, |
|
"eval_runtime": 2447.8332, |
|
"eval_samples_per_second": 6.846, |
|
"eval_spbleu": 47.5698, |
|
"eval_steps_per_second": 0.685, |
|
"eval_ter": 54.9242, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.709110654307102e-05, |
|
"loss": 0.5043, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_bleu": 33.4038, |
|
"eval_chrf++": 60.1428, |
|
"eval_gen_len": 22.5602, |
|
"eval_loss": 1.0105745792388916, |
|
"eval_runtime": 2487.4813, |
|
"eval_samples_per_second": 6.737, |
|
"eval_spbleu": 47.2035, |
|
"eval_steps_per_second": 0.674, |
|
"eval_ter": 55.3892, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.6897180312609084e-05, |
|
"loss": 0.5013, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_bleu": 34.3818, |
|
"eval_chrf++": 60.7806, |
|
"eval_gen_len": 22.365, |
|
"eval_loss": 1.0027358531951904, |
|
"eval_runtime": 2471.8949, |
|
"eval_samples_per_second": 6.779, |
|
"eval_spbleu": 47.8553, |
|
"eval_steps_per_second": 0.678, |
|
"eval_ter": 54.3031, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.670325408214715e-05, |
|
"loss": 0.4898, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_bleu": 34.5081, |
|
"eval_chrf++": 60.6054, |
|
"eval_gen_len": 22.1483, |
|
"eval_loss": 0.9998334646224976, |
|
"eval_runtime": 2464.0531, |
|
"eval_samples_per_second": 6.801, |
|
"eval_spbleu": 48.0651, |
|
"eval_steps_per_second": 0.68, |
|
"eval_ter": 53.7379, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.650932785168522e-05, |
|
"loss": 0.5011, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_bleu": 33.9543, |
|
"eval_chrf++": 60.3165, |
|
"eval_gen_len": 22.2106, |
|
"eval_loss": 0.9939271211624146, |
|
"eval_runtime": 2455.6708, |
|
"eval_samples_per_second": 6.824, |
|
"eval_spbleu": 47.6339, |
|
"eval_steps_per_second": 0.683, |
|
"eval_ter": 54.3159, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.6315401621223284e-05, |
|
"loss": 0.5014, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_bleu": 35.12, |
|
"eval_chrf++": 61.0072, |
|
"eval_gen_len": 22.2949, |
|
"eval_loss": 0.9882155060768127, |
|
"eval_runtime": 2466.3803, |
|
"eval_samples_per_second": 6.795, |
|
"eval_spbleu": 48.5338, |
|
"eval_steps_per_second": 0.68, |
|
"eval_ter": 53.6108, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.612147539076135e-05, |
|
"loss": 0.4861, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_bleu": 34.2002, |
|
"eval_chrf++": 60.9191, |
|
"eval_gen_len": 22.472, |
|
"eval_loss": 0.9832409620285034, |
|
"eval_runtime": 2528.4848, |
|
"eval_samples_per_second": 6.628, |
|
"eval_spbleu": 47.7142, |
|
"eval_steps_per_second": 0.663, |
|
"eval_ter": 54.9125, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.5927549160299424e-05, |
|
"loss": 0.4777, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_bleu": 35.0653, |
|
"eval_chrf++": 61.3647, |
|
"eval_gen_len": 22.1853, |
|
"eval_loss": 0.9780123829841614, |
|
"eval_runtime": 2478.2202, |
|
"eval_samples_per_second": 6.762, |
|
"eval_spbleu": 48.6202, |
|
"eval_steps_per_second": 0.676, |
|
"eval_ter": 53.7728, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.573362292983749e-05, |
|
"loss": 0.4882, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_bleu": 34.4099, |
|
"eval_chrf++": 60.8297, |
|
"eval_gen_len": 22.4734, |
|
"eval_loss": 0.9755488038063049, |
|
"eval_runtime": 2481.0601, |
|
"eval_samples_per_second": 6.754, |
|
"eval_spbleu": 48.1346, |
|
"eval_steps_per_second": 0.676, |
|
"eval_ter": 54.8636, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.5539696699375565e-05, |
|
"loss": 0.489, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_bleu": 35.3644, |
|
"eval_chrf++": 61.3208, |
|
"eval_gen_len": 22.2351, |
|
"eval_loss": 0.9737293720245361, |
|
"eval_runtime": 2438.4039, |
|
"eval_samples_per_second": 6.873, |
|
"eval_spbleu": 49.0035, |
|
"eval_steps_per_second": 0.687, |
|
"eval_ter": 53.0538, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.534577046891363e-05, |
|
"loss": 0.4778, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_bleu": 35.2536, |
|
"eval_chrf++": 61.1847, |
|
"eval_gen_len": 22.3455, |
|
"eval_loss": 0.9639460444450378, |
|
"eval_runtime": 2456.4058, |
|
"eval_samples_per_second": 6.822, |
|
"eval_spbleu": 48.8273, |
|
"eval_steps_per_second": 0.682, |
|
"eval_ter": 53.7647, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.51518442384517e-05, |
|
"loss": 0.4659, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_bleu": 35.0392, |
|
"eval_chrf++": 61.2274, |
|
"eval_gen_len": 22.6852, |
|
"eval_loss": 0.9616146683692932, |
|
"eval_runtime": 2525.3804, |
|
"eval_samples_per_second": 6.636, |
|
"eval_spbleu": 48.3755, |
|
"eval_steps_per_second": 0.664, |
|
"eval_ter": 54.1807, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.4957918007989765e-05, |
|
"loss": 0.4882, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_bleu": 35.2876, |
|
"eval_chrf++": 61.5126, |
|
"eval_gen_len": 22.4171, |
|
"eval_loss": 0.9623438715934753, |
|
"eval_runtime": 2448.0204, |
|
"eval_samples_per_second": 6.846, |
|
"eval_spbleu": 48.9762, |
|
"eval_steps_per_second": 0.685, |
|
"eval_ter": 53.5118, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.476399177752783e-05, |
|
"loss": 0.4757, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_bleu": 36.1928, |
|
"eval_chrf++": 61.7736, |
|
"eval_gen_len": 22.2772, |
|
"eval_loss": 0.9600822925567627, |
|
"eval_runtime": 2458.1658, |
|
"eval_samples_per_second": 6.817, |
|
"eval_spbleu": 49.4709, |
|
"eval_steps_per_second": 0.682, |
|
"eval_ter": 52.9163, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.45700655470659e-05, |
|
"loss": 0.4532, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_bleu": 35.3757, |
|
"eval_chrf++": 61.4145, |
|
"eval_gen_len": 22.4656, |
|
"eval_loss": 0.9569535255432129, |
|
"eval_runtime": 2509.366, |
|
"eval_samples_per_second": 6.678, |
|
"eval_spbleu": 48.8161, |
|
"eval_steps_per_second": 0.668, |
|
"eval_ter": 53.6143, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.4376139316603965e-05, |
|
"loss": 0.4624, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_bleu": 35.5809, |
|
"eval_chrf++": 61.5561, |
|
"eval_gen_len": 22.6749, |
|
"eval_loss": 0.9505798816680908, |
|
"eval_runtime": 2539.8539, |
|
"eval_samples_per_second": 6.598, |
|
"eval_spbleu": 48.7951, |
|
"eval_steps_per_second": 0.66, |
|
"eval_ter": 53.478, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.418221308614203e-05, |
|
"loss": 0.4731, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_bleu": 36.0873, |
|
"eval_chrf++": 61.7264, |
|
"eval_gen_len": 22.5004, |
|
"eval_loss": 0.9501732587814331, |
|
"eval_runtime": 2492.0241, |
|
"eval_samples_per_second": 6.725, |
|
"eval_spbleu": 49.4647, |
|
"eval_steps_per_second": 0.673, |
|
"eval_ter": 52.7939, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.39882868556801e-05, |
|
"loss": 0.4794, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_bleu": 36.1453, |
|
"eval_chrf++": 61.9504, |
|
"eval_gen_len": 22.3361, |
|
"eval_loss": 0.9433434009552002, |
|
"eval_runtime": 2532.9385, |
|
"eval_samples_per_second": 6.616, |
|
"eval_spbleu": 49.3902, |
|
"eval_steps_per_second": 0.662, |
|
"eval_ter": 52.9862, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.3794360625218165e-05, |
|
"loss": 0.4616, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_bleu": 36.4514, |
|
"eval_chrf++": 62.2395, |
|
"eval_gen_len": 22.4107, |
|
"eval_loss": 0.9410406351089478, |
|
"eval_runtime": 2518.4187, |
|
"eval_samples_per_second": 6.654, |
|
"eval_spbleu": 49.7739, |
|
"eval_steps_per_second": 0.665, |
|
"eval_ter": 52.5317, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.360043439475623e-05, |
|
"loss": 0.4768, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_bleu": 36.8462, |
|
"eval_chrf++": 62.2425, |
|
"eval_gen_len": 22.348, |
|
"eval_loss": 0.9391294717788696, |
|
"eval_runtime": 2464.2252, |
|
"eval_samples_per_second": 6.801, |
|
"eval_spbleu": 50.2231, |
|
"eval_steps_per_second": 0.68, |
|
"eval_ter": 52.1938, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.3406508164294306e-05, |
|
"loss": 0.4482, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_bleu": 36.9137, |
|
"eval_chrf++": 62.5127, |
|
"eval_gen_len": 22.5448, |
|
"eval_loss": 0.9357725381851196, |
|
"eval_runtime": 2549.3781, |
|
"eval_samples_per_second": 6.573, |
|
"eval_spbleu": 49.9168, |
|
"eval_steps_per_second": 0.657, |
|
"eval_ter": 52.3604, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.321258193383237e-05, |
|
"loss": 0.4648, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_bleu": 37.1733, |
|
"eval_chrf++": 62.7256, |
|
"eval_gen_len": 22.4406, |
|
"eval_loss": 0.935612678527832, |
|
"eval_runtime": 2499.4089, |
|
"eval_samples_per_second": 6.705, |
|
"eval_spbleu": 50.417, |
|
"eval_steps_per_second": 0.671, |
|
"eval_ter": 51.7731, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.301865570337044e-05, |
|
"loss": 0.4642, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_bleu": 37.1839, |
|
"eval_chrf++": 62.648, |
|
"eval_gen_len": 22.4244, |
|
"eval_loss": 0.9331343770027161, |
|
"eval_runtime": 2493.1006, |
|
"eval_samples_per_second": 6.722, |
|
"eval_spbleu": 50.1902, |
|
"eval_steps_per_second": 0.672, |
|
"eval_ter": 51.702, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.2824729472908506e-05, |
|
"loss": 0.4691, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_bleu": 37.1915, |
|
"eval_chrf++": 62.7489, |
|
"eval_gen_len": 22.4675, |
|
"eval_loss": 0.9320312142372131, |
|
"eval_runtime": 2469.7829, |
|
"eval_samples_per_second": 6.785, |
|
"eval_spbleu": 50.4819, |
|
"eval_steps_per_second": 0.679, |
|
"eval_ter": 51.5528, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.263080324244658e-05, |
|
"loss": 0.4676, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_bleu": 36.4381, |
|
"eval_chrf++": 62.2257, |
|
"eval_gen_len": 22.7873, |
|
"eval_loss": 0.9254695177078247, |
|
"eval_runtime": 2555.4331, |
|
"eval_samples_per_second": 6.558, |
|
"eval_spbleu": 49.2678, |
|
"eval_steps_per_second": 0.656, |
|
"eval_ter": 52.7193, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.2436877011984646e-05, |
|
"loss": 0.4515, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_bleu": 37.2335, |
|
"eval_chrf++": 62.7349, |
|
"eval_gen_len": 22.3555, |
|
"eval_loss": 0.9193410277366638, |
|
"eval_runtime": 2451.9368, |
|
"eval_samples_per_second": 6.835, |
|
"eval_spbleu": 50.8378, |
|
"eval_steps_per_second": 0.684, |
|
"eval_ter": 51.103, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.224295078152271e-05, |
|
"loss": 0.4605, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_bleu": 37.615, |
|
"eval_chrf++": 62.9994, |
|
"eval_gen_len": 22.4271, |
|
"eval_loss": 0.920886218547821, |
|
"eval_runtime": 2460.502, |
|
"eval_samples_per_second": 6.811, |
|
"eval_spbleu": 50.9187, |
|
"eval_steps_per_second": 0.681, |
|
"eval_ter": 51.1974, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.204902455106078e-05, |
|
"loss": 0.462, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_bleu": 37.4618, |
|
"eval_chrf++": 62.7306, |
|
"eval_gen_len": 22.3868, |
|
"eval_loss": 0.9150309562683105, |
|
"eval_runtime": 2485.0566, |
|
"eval_samples_per_second": 6.744, |
|
"eval_spbleu": 50.7521, |
|
"eval_steps_per_second": 0.674, |
|
"eval_ter": 51.4503, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.1855098320598846e-05, |
|
"loss": 0.4584, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_bleu": 37.6302, |
|
"eval_chrf++": 62.7543, |
|
"eval_gen_len": 22.2999, |
|
"eval_loss": 0.9146909713745117, |
|
"eval_runtime": 2475.6351, |
|
"eval_samples_per_second": 6.769, |
|
"eval_spbleu": 50.9152, |
|
"eval_steps_per_second": 0.677, |
|
"eval_ter": 51.2347, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.166117209013691e-05, |
|
"loss": 0.4511, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_bleu": 37.4586, |
|
"eval_chrf++": 62.7716, |
|
"eval_gen_len": 22.491, |
|
"eval_loss": 0.9128248691558838, |
|
"eval_runtime": 2475.0464, |
|
"eval_samples_per_second": 6.771, |
|
"eval_spbleu": 50.7685, |
|
"eval_steps_per_second": 0.677, |
|
"eval_ter": 51.1974, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 4.146724585967498e-05, |
|
"loss": 0.4463, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_bleu": 37.352, |
|
"eval_chrf++": 62.9395, |
|
"eval_gen_len": 22.5926, |
|
"eval_loss": 0.9129999876022339, |
|
"eval_runtime": 2497.6416, |
|
"eval_samples_per_second": 6.71, |
|
"eval_spbleu": 50.4575, |
|
"eval_steps_per_second": 0.671, |
|
"eval_ter": 51.5074, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.1273319629213047e-05, |
|
"loss": 0.4442, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_bleu": 37.2191, |
|
"eval_chrf++": 62.6919, |
|
"eval_gen_len": 22.3296, |
|
"eval_loss": 0.913128137588501, |
|
"eval_runtime": 2479.804, |
|
"eval_samples_per_second": 6.758, |
|
"eval_spbleu": 50.5573, |
|
"eval_steps_per_second": 0.676, |
|
"eval_ter": 51.8057, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 4.107939339875111e-05, |
|
"loss": 0.4398, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_bleu": 37.4159, |
|
"eval_chrf++": 62.7847, |
|
"eval_gen_len": 22.3621, |
|
"eval_loss": 0.9087494611740112, |
|
"eval_runtime": 2492.9389, |
|
"eval_samples_per_second": 6.722, |
|
"eval_spbleu": 50.7329, |
|
"eval_steps_per_second": 0.672, |
|
"eval_ter": 51.1589, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.088546716828918e-05, |
|
"loss": 0.4601, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_bleu": 37.6768, |
|
"eval_chrf++": 63.0053, |
|
"eval_gen_len": 22.2636, |
|
"eval_loss": 0.9055464267730713, |
|
"eval_runtime": 2494.326, |
|
"eval_samples_per_second": 6.718, |
|
"eval_spbleu": 50.7562, |
|
"eval_steps_per_second": 0.672, |
|
"eval_ter": 51.2394, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 4.0691540937827254e-05, |
|
"loss": 0.4436, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_bleu": 37.7386, |
|
"eval_chrf++": 63.1409, |
|
"eval_gen_len": 22.6223, |
|
"eval_loss": 0.9039002656936646, |
|
"eval_runtime": 2526.9249, |
|
"eval_samples_per_second": 6.632, |
|
"eval_spbleu": 50.7251, |
|
"eval_steps_per_second": 0.663, |
|
"eval_ter": 51.3804, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.049761470736532e-05, |
|
"loss": 0.4654, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_bleu": 38.0304, |
|
"eval_chrf++": 63.2108, |
|
"eval_gen_len": 22.343, |
|
"eval_loss": 0.9021787047386169, |
|
"eval_runtime": 2462.9496, |
|
"eval_samples_per_second": 6.804, |
|
"eval_spbleu": 51.3142, |
|
"eval_steps_per_second": 0.68, |
|
"eval_ter": 50.4096, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 4.030368847690339e-05, |
|
"loss": 0.4485, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_bleu": 38.1296, |
|
"eval_chrf++": 63.3962, |
|
"eval_gen_len": 22.518, |
|
"eval_loss": 0.9031027555465698, |
|
"eval_runtime": 2500.9301, |
|
"eval_samples_per_second": 6.701, |
|
"eval_spbleu": 51.1976, |
|
"eval_steps_per_second": 0.67, |
|
"eval_ter": 51.0377, |
|
"step": 25000 |
|
} |
|
], |
|
"max_steps": 128915, |
|
"num_train_epochs": 5, |
|
"total_flos": 2.127904100057088e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|