finetuned-nllb-1.3B / trainer_state.json
Kleber's picture
Upload folder using huggingface_hub
6ec1322
raw
history blame
23.9 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9696311523096615,
"global_step": 25000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02,
"learning_rate": 4.980607376953807e-05,
"loss": 0.2765,
"step": 500
},
{
"epoch": 0.02,
"eval_bleu": 31.7549,
"eval_chrf++": 58.7253,
"eval_gen_len": 22.1982,
"eval_loss": 1.1465204954147339,
"eval_runtime": 2452.853,
"eval_samples_per_second": 6.832,
"eval_spbleu": 45.4525,
"eval_steps_per_second": 0.683,
"eval_ter": 56.1665,
"step": 500
},
{
"epoch": 0.04,
"learning_rate": 4.9612147539076136e-05,
"loss": 0.2494,
"step": 1000
},
{
"epoch": 0.04,
"eval_bleu": 30.6155,
"eval_chrf++": 58.4159,
"eval_gen_len": 22.5912,
"eval_loss": 1.1881319284439087,
"eval_runtime": 2483.4163,
"eval_samples_per_second": 6.748,
"eval_spbleu": 43.8898,
"eval_steps_per_second": 0.675,
"eval_ter": 57.8655,
"step": 1000
},
{
"epoch": 0.06,
"learning_rate": 4.94182213086142e-05,
"loss": 0.2576,
"step": 1500
},
{
"epoch": 0.06,
"eval_bleu": 29.789,
"eval_chrf++": 57.4443,
"eval_gen_len": 22.689,
"eval_loss": 1.1570631265640259,
"eval_runtime": 2516.9829,
"eval_samples_per_second": 6.658,
"eval_spbleu": 43.1911,
"eval_steps_per_second": 0.666,
"eval_ter": 59.1299,
"step": 1500
},
{
"epoch": 0.08,
"learning_rate": 4.922429507815227e-05,
"loss": 0.2624,
"step": 2000
},
{
"epoch": 0.08,
"eval_bleu": 31.0288,
"eval_chrf++": 58.4502,
"eval_gen_len": 22.467,
"eval_loss": 1.1633208990097046,
"eval_runtime": 2478.0629,
"eval_samples_per_second": 6.763,
"eval_spbleu": 44.5894,
"eval_steps_per_second": 0.676,
"eval_ter": 57.6651,
"step": 2000
},
{
"epoch": 0.1,
"learning_rate": 4.9030368847690336e-05,
"loss": 0.2284,
"step": 2500
},
{
"epoch": 0.1,
"eval_bleu": 31.2885,
"eval_chrf++": 58.0527,
"eval_gen_len": 22.7715,
"eval_loss": 1.1723754405975342,
"eval_runtime": 2538.3187,
"eval_samples_per_second": 6.602,
"eval_spbleu": 44.5546,
"eval_steps_per_second": 0.66,
"eval_ter": 57.8341,
"step": 2500
},
{
"epoch": 0.12,
"learning_rate": 4.88364426172284e-05,
"loss": 0.2314,
"step": 3000
},
{
"epoch": 0.12,
"eval_bleu": 30.7414,
"eval_chrf++": 58.1161,
"eval_gen_len": 22.5233,
"eval_loss": 1.1770201921463013,
"eval_runtime": 2476.3422,
"eval_samples_per_second": 6.767,
"eval_spbleu": 44.334,
"eval_steps_per_second": 0.677,
"eval_ter": 58.228,
"step": 3000
},
{
"epoch": 0.14,
"learning_rate": 4.8642516386766476e-05,
"loss": 0.2294,
"step": 3500
},
{
"epoch": 0.14,
"eval_bleu": 31.7781,
"eval_chrf++": 58.9642,
"eval_gen_len": 22.4762,
"eval_loss": 1.2094552516937256,
"eval_runtime": 2449.7863,
"eval_samples_per_second": 6.841,
"eval_spbleu": 45.3044,
"eval_steps_per_second": 0.684,
"eval_ter": 56.8016,
"step": 3500
},
{
"epoch": 0.16,
"learning_rate": 4.844859015630454e-05,
"loss": 0.2457,
"step": 4000
},
{
"epoch": 0.16,
"eval_bleu": 31.0111,
"eval_chrf++": 58.203,
"eval_gen_len": 22.8377,
"eval_loss": 1.1406781673431396,
"eval_runtime": 2532.244,
"eval_samples_per_second": 6.618,
"eval_spbleu": 44.1543,
"eval_steps_per_second": 0.662,
"eval_ter": 57.7781,
"step": 4000
},
{
"epoch": 0.17,
"learning_rate": 4.825466392584262e-05,
"loss": 0.5335,
"step": 4500
},
{
"epoch": 0.17,
"eval_bleu": 31.2577,
"eval_chrf++": 58.4688,
"eval_gen_len": 22.6196,
"eval_loss": 1.0519380569458008,
"eval_runtime": 2534.3052,
"eval_samples_per_second": 6.612,
"eval_spbleu": 44.8062,
"eval_steps_per_second": 0.661,
"eval_ter": 57.333,
"step": 4500
},
{
"epoch": 0.19,
"learning_rate": 4.8060737695380683e-05,
"loss": 0.5162,
"step": 5000
},
{
"epoch": 0.19,
"eval_bleu": 32.2483,
"eval_chrf++": 59.2532,
"eval_gen_len": 22.461,
"eval_loss": 1.0528730154037476,
"eval_runtime": 2496.711,
"eval_samples_per_second": 6.712,
"eval_spbleu": 45.5671,
"eval_steps_per_second": 0.671,
"eval_ter": 56.2061,
"step": 5000
},
{
"epoch": 0.21,
"learning_rate": 4.786681146491875e-05,
"loss": 0.5135,
"step": 5500
},
{
"epoch": 0.21,
"eval_bleu": 32.273,
"eval_chrf++": 59.7056,
"eval_gen_len": 22.429,
"eval_loss": 1.0336111783981323,
"eval_runtime": 2482.8365,
"eval_samples_per_second": 6.75,
"eval_spbleu": 46.0401,
"eval_steps_per_second": 0.675,
"eval_ter": 56.4706,
"step": 5500
},
{
"epoch": 0.23,
"learning_rate": 4.767288523445682e-05,
"loss": 0.5227,
"step": 6000
},
{
"epoch": 0.23,
"eval_bleu": 32.9242,
"eval_chrf++": 59.8998,
"eval_gen_len": 22.6151,
"eval_loss": 1.0207685232162476,
"eval_runtime": 2489.8807,
"eval_samples_per_second": 6.73,
"eval_spbleu": 46.3519,
"eval_steps_per_second": 0.673,
"eval_ter": 55.7539,
"step": 6000
},
{
"epoch": 0.25,
"learning_rate": 4.7478959003994884e-05,
"loss": 0.5181,
"step": 6500
},
{
"epoch": 0.25,
"eval_bleu": 33.276,
"eval_chrf++": 60.0419,
"eval_gen_len": 22.3479,
"eval_loss": 1.0149798393249512,
"eval_runtime": 2483.325,
"eval_samples_per_second": 6.748,
"eval_spbleu": 46.8525,
"eval_steps_per_second": 0.675,
"eval_ter": 55.268,
"step": 6500
},
{
"epoch": 0.27,
"learning_rate": 4.728503277353295e-05,
"loss": 0.4933,
"step": 7000
},
{
"epoch": 0.27,
"eval_bleu": 33.8673,
"eval_chrf++": 60.2337,
"eval_gen_len": 22.3286,
"eval_loss": 1.013890027999878,
"eval_runtime": 2447.8332,
"eval_samples_per_second": 6.846,
"eval_spbleu": 47.5698,
"eval_steps_per_second": 0.685,
"eval_ter": 54.9242,
"step": 7000
},
{
"epoch": 0.29,
"learning_rate": 4.709110654307102e-05,
"loss": 0.5043,
"step": 7500
},
{
"epoch": 0.29,
"eval_bleu": 33.4038,
"eval_chrf++": 60.1428,
"eval_gen_len": 22.5602,
"eval_loss": 1.0105745792388916,
"eval_runtime": 2487.4813,
"eval_samples_per_second": 6.737,
"eval_spbleu": 47.2035,
"eval_steps_per_second": 0.674,
"eval_ter": 55.3892,
"step": 7500
},
{
"epoch": 0.31,
"learning_rate": 4.6897180312609084e-05,
"loss": 0.5013,
"step": 8000
},
{
"epoch": 0.31,
"eval_bleu": 34.3818,
"eval_chrf++": 60.7806,
"eval_gen_len": 22.365,
"eval_loss": 1.0027358531951904,
"eval_runtime": 2471.8949,
"eval_samples_per_second": 6.779,
"eval_spbleu": 47.8553,
"eval_steps_per_second": 0.678,
"eval_ter": 54.3031,
"step": 8000
},
{
"epoch": 0.33,
"learning_rate": 4.670325408214715e-05,
"loss": 0.4898,
"step": 8500
},
{
"epoch": 0.33,
"eval_bleu": 34.5081,
"eval_chrf++": 60.6054,
"eval_gen_len": 22.1483,
"eval_loss": 0.9998334646224976,
"eval_runtime": 2464.0531,
"eval_samples_per_second": 6.801,
"eval_spbleu": 48.0651,
"eval_steps_per_second": 0.68,
"eval_ter": 53.7379,
"step": 8500
},
{
"epoch": 0.35,
"learning_rate": 4.650932785168522e-05,
"loss": 0.5011,
"step": 9000
},
{
"epoch": 0.35,
"eval_bleu": 33.9543,
"eval_chrf++": 60.3165,
"eval_gen_len": 22.2106,
"eval_loss": 0.9939271211624146,
"eval_runtime": 2455.6708,
"eval_samples_per_second": 6.824,
"eval_spbleu": 47.6339,
"eval_steps_per_second": 0.683,
"eval_ter": 54.3159,
"step": 9000
},
{
"epoch": 0.37,
"learning_rate": 4.6315401621223284e-05,
"loss": 0.5014,
"step": 9500
},
{
"epoch": 0.37,
"eval_bleu": 35.12,
"eval_chrf++": 61.0072,
"eval_gen_len": 22.2949,
"eval_loss": 0.9882155060768127,
"eval_runtime": 2466.3803,
"eval_samples_per_second": 6.795,
"eval_spbleu": 48.5338,
"eval_steps_per_second": 0.68,
"eval_ter": 53.6108,
"step": 9500
},
{
"epoch": 0.39,
"learning_rate": 4.612147539076135e-05,
"loss": 0.4861,
"step": 10000
},
{
"epoch": 0.39,
"eval_bleu": 34.2002,
"eval_chrf++": 60.9191,
"eval_gen_len": 22.472,
"eval_loss": 0.9832409620285034,
"eval_runtime": 2528.4848,
"eval_samples_per_second": 6.628,
"eval_spbleu": 47.7142,
"eval_steps_per_second": 0.663,
"eval_ter": 54.9125,
"step": 10000
},
{
"epoch": 0.41,
"learning_rate": 4.5927549160299424e-05,
"loss": 0.4777,
"step": 10500
},
{
"epoch": 0.41,
"eval_bleu": 35.0653,
"eval_chrf++": 61.3647,
"eval_gen_len": 22.1853,
"eval_loss": 0.9780123829841614,
"eval_runtime": 2478.2202,
"eval_samples_per_second": 6.762,
"eval_spbleu": 48.6202,
"eval_steps_per_second": 0.676,
"eval_ter": 53.7728,
"step": 10500
},
{
"epoch": 0.43,
"learning_rate": 4.573362292983749e-05,
"loss": 0.4882,
"step": 11000
},
{
"epoch": 0.43,
"eval_bleu": 34.4099,
"eval_chrf++": 60.8297,
"eval_gen_len": 22.4734,
"eval_loss": 0.9755488038063049,
"eval_runtime": 2481.0601,
"eval_samples_per_second": 6.754,
"eval_spbleu": 48.1346,
"eval_steps_per_second": 0.676,
"eval_ter": 54.8636,
"step": 11000
},
{
"epoch": 0.45,
"learning_rate": 4.5539696699375565e-05,
"loss": 0.489,
"step": 11500
},
{
"epoch": 0.45,
"eval_bleu": 35.3644,
"eval_chrf++": 61.3208,
"eval_gen_len": 22.2351,
"eval_loss": 0.9737293720245361,
"eval_runtime": 2438.4039,
"eval_samples_per_second": 6.873,
"eval_spbleu": 49.0035,
"eval_steps_per_second": 0.687,
"eval_ter": 53.0538,
"step": 11500
},
{
"epoch": 0.47,
"learning_rate": 4.534577046891363e-05,
"loss": 0.4778,
"step": 12000
},
{
"epoch": 0.47,
"eval_bleu": 35.2536,
"eval_chrf++": 61.1847,
"eval_gen_len": 22.3455,
"eval_loss": 0.9639460444450378,
"eval_runtime": 2456.4058,
"eval_samples_per_second": 6.822,
"eval_spbleu": 48.8273,
"eval_steps_per_second": 0.682,
"eval_ter": 53.7647,
"step": 12000
},
{
"epoch": 0.48,
"learning_rate": 4.51518442384517e-05,
"loss": 0.4659,
"step": 12500
},
{
"epoch": 0.48,
"eval_bleu": 35.0392,
"eval_chrf++": 61.2274,
"eval_gen_len": 22.6852,
"eval_loss": 0.9616146683692932,
"eval_runtime": 2525.3804,
"eval_samples_per_second": 6.636,
"eval_spbleu": 48.3755,
"eval_steps_per_second": 0.664,
"eval_ter": 54.1807,
"step": 12500
},
{
"epoch": 0.5,
"learning_rate": 4.4957918007989765e-05,
"loss": 0.4882,
"step": 13000
},
{
"epoch": 0.5,
"eval_bleu": 35.2876,
"eval_chrf++": 61.5126,
"eval_gen_len": 22.4171,
"eval_loss": 0.9623438715934753,
"eval_runtime": 2448.0204,
"eval_samples_per_second": 6.846,
"eval_spbleu": 48.9762,
"eval_steps_per_second": 0.685,
"eval_ter": 53.5118,
"step": 13000
},
{
"epoch": 0.52,
"learning_rate": 4.476399177752783e-05,
"loss": 0.4757,
"step": 13500
},
{
"epoch": 0.52,
"eval_bleu": 36.1928,
"eval_chrf++": 61.7736,
"eval_gen_len": 22.2772,
"eval_loss": 0.9600822925567627,
"eval_runtime": 2458.1658,
"eval_samples_per_second": 6.817,
"eval_spbleu": 49.4709,
"eval_steps_per_second": 0.682,
"eval_ter": 52.9163,
"step": 13500
},
{
"epoch": 0.54,
"learning_rate": 4.45700655470659e-05,
"loss": 0.4532,
"step": 14000
},
{
"epoch": 0.54,
"eval_bleu": 35.3757,
"eval_chrf++": 61.4145,
"eval_gen_len": 22.4656,
"eval_loss": 0.9569535255432129,
"eval_runtime": 2509.366,
"eval_samples_per_second": 6.678,
"eval_spbleu": 48.8161,
"eval_steps_per_second": 0.668,
"eval_ter": 53.6143,
"step": 14000
},
{
"epoch": 0.56,
"learning_rate": 4.4376139316603965e-05,
"loss": 0.4624,
"step": 14500
},
{
"epoch": 0.56,
"eval_bleu": 35.5809,
"eval_chrf++": 61.5561,
"eval_gen_len": 22.6749,
"eval_loss": 0.9505798816680908,
"eval_runtime": 2539.8539,
"eval_samples_per_second": 6.598,
"eval_spbleu": 48.7951,
"eval_steps_per_second": 0.66,
"eval_ter": 53.478,
"step": 14500
},
{
"epoch": 0.58,
"learning_rate": 4.418221308614203e-05,
"loss": 0.4731,
"step": 15000
},
{
"epoch": 0.58,
"eval_bleu": 36.0873,
"eval_chrf++": 61.7264,
"eval_gen_len": 22.5004,
"eval_loss": 0.9501732587814331,
"eval_runtime": 2492.0241,
"eval_samples_per_second": 6.725,
"eval_spbleu": 49.4647,
"eval_steps_per_second": 0.673,
"eval_ter": 52.7939,
"step": 15000
},
{
"epoch": 0.6,
"learning_rate": 4.39882868556801e-05,
"loss": 0.4794,
"step": 15500
},
{
"epoch": 0.6,
"eval_bleu": 36.1453,
"eval_chrf++": 61.9504,
"eval_gen_len": 22.3361,
"eval_loss": 0.9433434009552002,
"eval_runtime": 2532.9385,
"eval_samples_per_second": 6.616,
"eval_spbleu": 49.3902,
"eval_steps_per_second": 0.662,
"eval_ter": 52.9862,
"step": 15500
},
{
"epoch": 0.62,
"learning_rate": 4.3794360625218165e-05,
"loss": 0.4616,
"step": 16000
},
{
"epoch": 0.62,
"eval_bleu": 36.4514,
"eval_chrf++": 62.2395,
"eval_gen_len": 22.4107,
"eval_loss": 0.9410406351089478,
"eval_runtime": 2518.4187,
"eval_samples_per_second": 6.654,
"eval_spbleu": 49.7739,
"eval_steps_per_second": 0.665,
"eval_ter": 52.5317,
"step": 16000
},
{
"epoch": 0.64,
"learning_rate": 4.360043439475623e-05,
"loss": 0.4768,
"step": 16500
},
{
"epoch": 0.64,
"eval_bleu": 36.8462,
"eval_chrf++": 62.2425,
"eval_gen_len": 22.348,
"eval_loss": 0.9391294717788696,
"eval_runtime": 2464.2252,
"eval_samples_per_second": 6.801,
"eval_spbleu": 50.2231,
"eval_steps_per_second": 0.68,
"eval_ter": 52.1938,
"step": 16500
},
{
"epoch": 0.66,
"learning_rate": 4.3406508164294306e-05,
"loss": 0.4482,
"step": 17000
},
{
"epoch": 0.66,
"eval_bleu": 36.9137,
"eval_chrf++": 62.5127,
"eval_gen_len": 22.5448,
"eval_loss": 0.9357725381851196,
"eval_runtime": 2549.3781,
"eval_samples_per_second": 6.573,
"eval_spbleu": 49.9168,
"eval_steps_per_second": 0.657,
"eval_ter": 52.3604,
"step": 17000
},
{
"epoch": 0.68,
"learning_rate": 4.321258193383237e-05,
"loss": 0.4648,
"step": 17500
},
{
"epoch": 0.68,
"eval_bleu": 37.1733,
"eval_chrf++": 62.7256,
"eval_gen_len": 22.4406,
"eval_loss": 0.935612678527832,
"eval_runtime": 2499.4089,
"eval_samples_per_second": 6.705,
"eval_spbleu": 50.417,
"eval_steps_per_second": 0.671,
"eval_ter": 51.7731,
"step": 17500
},
{
"epoch": 0.7,
"learning_rate": 4.301865570337044e-05,
"loss": 0.4642,
"step": 18000
},
{
"epoch": 0.7,
"eval_bleu": 37.1839,
"eval_chrf++": 62.648,
"eval_gen_len": 22.4244,
"eval_loss": 0.9331343770027161,
"eval_runtime": 2493.1006,
"eval_samples_per_second": 6.722,
"eval_spbleu": 50.1902,
"eval_steps_per_second": 0.672,
"eval_ter": 51.702,
"step": 18000
},
{
"epoch": 0.72,
"learning_rate": 4.2824729472908506e-05,
"loss": 0.4691,
"step": 18500
},
{
"epoch": 0.72,
"eval_bleu": 37.1915,
"eval_chrf++": 62.7489,
"eval_gen_len": 22.4675,
"eval_loss": 0.9320312142372131,
"eval_runtime": 2469.7829,
"eval_samples_per_second": 6.785,
"eval_spbleu": 50.4819,
"eval_steps_per_second": 0.679,
"eval_ter": 51.5528,
"step": 18500
},
{
"epoch": 0.74,
"learning_rate": 4.263080324244658e-05,
"loss": 0.4676,
"step": 19000
},
{
"epoch": 0.74,
"eval_bleu": 36.4381,
"eval_chrf++": 62.2257,
"eval_gen_len": 22.7873,
"eval_loss": 0.9254695177078247,
"eval_runtime": 2555.4331,
"eval_samples_per_second": 6.558,
"eval_spbleu": 49.2678,
"eval_steps_per_second": 0.656,
"eval_ter": 52.7193,
"step": 19000
},
{
"epoch": 0.76,
"learning_rate": 4.2436877011984646e-05,
"loss": 0.4515,
"step": 19500
},
{
"epoch": 0.76,
"eval_bleu": 37.2335,
"eval_chrf++": 62.7349,
"eval_gen_len": 22.3555,
"eval_loss": 0.9193410277366638,
"eval_runtime": 2451.9368,
"eval_samples_per_second": 6.835,
"eval_spbleu": 50.8378,
"eval_steps_per_second": 0.684,
"eval_ter": 51.103,
"step": 19500
},
{
"epoch": 0.78,
"learning_rate": 4.224295078152271e-05,
"loss": 0.4605,
"step": 20000
},
{
"epoch": 0.78,
"eval_bleu": 37.615,
"eval_chrf++": 62.9994,
"eval_gen_len": 22.4271,
"eval_loss": 0.920886218547821,
"eval_runtime": 2460.502,
"eval_samples_per_second": 6.811,
"eval_spbleu": 50.9187,
"eval_steps_per_second": 0.681,
"eval_ter": 51.1974,
"step": 20000
},
{
"epoch": 0.8,
"learning_rate": 4.204902455106078e-05,
"loss": 0.462,
"step": 20500
},
{
"epoch": 0.8,
"eval_bleu": 37.4618,
"eval_chrf++": 62.7306,
"eval_gen_len": 22.3868,
"eval_loss": 0.9150309562683105,
"eval_runtime": 2485.0566,
"eval_samples_per_second": 6.744,
"eval_spbleu": 50.7521,
"eval_steps_per_second": 0.674,
"eval_ter": 51.4503,
"step": 20500
},
{
"epoch": 0.81,
"learning_rate": 4.1855098320598846e-05,
"loss": 0.4584,
"step": 21000
},
{
"epoch": 0.81,
"eval_bleu": 37.6302,
"eval_chrf++": 62.7543,
"eval_gen_len": 22.2999,
"eval_loss": 0.9146909713745117,
"eval_runtime": 2475.6351,
"eval_samples_per_second": 6.769,
"eval_spbleu": 50.9152,
"eval_steps_per_second": 0.677,
"eval_ter": 51.2347,
"step": 21000
},
{
"epoch": 0.83,
"learning_rate": 4.166117209013691e-05,
"loss": 0.4511,
"step": 21500
},
{
"epoch": 0.83,
"eval_bleu": 37.4586,
"eval_chrf++": 62.7716,
"eval_gen_len": 22.491,
"eval_loss": 0.9128248691558838,
"eval_runtime": 2475.0464,
"eval_samples_per_second": 6.771,
"eval_spbleu": 50.7685,
"eval_steps_per_second": 0.677,
"eval_ter": 51.1974,
"step": 21500
},
{
"epoch": 0.85,
"learning_rate": 4.146724585967498e-05,
"loss": 0.4463,
"step": 22000
},
{
"epoch": 0.85,
"eval_bleu": 37.352,
"eval_chrf++": 62.9395,
"eval_gen_len": 22.5926,
"eval_loss": 0.9129999876022339,
"eval_runtime": 2497.6416,
"eval_samples_per_second": 6.71,
"eval_spbleu": 50.4575,
"eval_steps_per_second": 0.671,
"eval_ter": 51.5074,
"step": 22000
},
{
"epoch": 0.87,
"learning_rate": 4.1273319629213047e-05,
"loss": 0.4442,
"step": 22500
},
{
"epoch": 0.87,
"eval_bleu": 37.2191,
"eval_chrf++": 62.6919,
"eval_gen_len": 22.3296,
"eval_loss": 0.913128137588501,
"eval_runtime": 2479.804,
"eval_samples_per_second": 6.758,
"eval_spbleu": 50.5573,
"eval_steps_per_second": 0.676,
"eval_ter": 51.8057,
"step": 22500
},
{
"epoch": 0.89,
"learning_rate": 4.107939339875111e-05,
"loss": 0.4398,
"step": 23000
},
{
"epoch": 0.89,
"eval_bleu": 37.4159,
"eval_chrf++": 62.7847,
"eval_gen_len": 22.3621,
"eval_loss": 0.9087494611740112,
"eval_runtime": 2492.9389,
"eval_samples_per_second": 6.722,
"eval_spbleu": 50.7329,
"eval_steps_per_second": 0.672,
"eval_ter": 51.1589,
"step": 23000
},
{
"epoch": 0.91,
"learning_rate": 4.088546716828918e-05,
"loss": 0.4601,
"step": 23500
},
{
"epoch": 0.91,
"eval_bleu": 37.6768,
"eval_chrf++": 63.0053,
"eval_gen_len": 22.2636,
"eval_loss": 0.9055464267730713,
"eval_runtime": 2494.326,
"eval_samples_per_second": 6.718,
"eval_spbleu": 50.7562,
"eval_steps_per_second": 0.672,
"eval_ter": 51.2394,
"step": 23500
},
{
"epoch": 0.93,
"learning_rate": 4.0691540937827254e-05,
"loss": 0.4436,
"step": 24000
},
{
"epoch": 0.93,
"eval_bleu": 37.7386,
"eval_chrf++": 63.1409,
"eval_gen_len": 22.6223,
"eval_loss": 0.9039002656936646,
"eval_runtime": 2526.9249,
"eval_samples_per_second": 6.632,
"eval_spbleu": 50.7251,
"eval_steps_per_second": 0.663,
"eval_ter": 51.3804,
"step": 24000
},
{
"epoch": 0.95,
"learning_rate": 4.049761470736532e-05,
"loss": 0.4654,
"step": 24500
},
{
"epoch": 0.95,
"eval_bleu": 38.0304,
"eval_chrf++": 63.2108,
"eval_gen_len": 22.343,
"eval_loss": 0.9021787047386169,
"eval_runtime": 2462.9496,
"eval_samples_per_second": 6.804,
"eval_spbleu": 51.3142,
"eval_steps_per_second": 0.68,
"eval_ter": 50.4096,
"step": 24500
},
{
"epoch": 0.97,
"learning_rate": 4.030368847690339e-05,
"loss": 0.4485,
"step": 25000
},
{
"epoch": 0.97,
"eval_bleu": 38.1296,
"eval_chrf++": 63.3962,
"eval_gen_len": 22.518,
"eval_loss": 0.9031027555465698,
"eval_runtime": 2500.9301,
"eval_samples_per_second": 6.701,
"eval_spbleu": 51.1976,
"eval_steps_per_second": 0.67,
"eval_ter": 51.0377,
"step": 25000
}
],
"max_steps": 128915,
"num_train_epochs": 5,
"total_flos": 2.127904100057088e+17,
"trial_name": null,
"trial_params": null
}