|
{ |
|
"best_metric": 2.9252257347106934, |
|
"best_model_checkpoint": "output-mul-en/checkpoint-880", |
|
"epoch": 2.8949691883503337, |
|
"global_step": 1280, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.09, |
|
"eval_BLEU_ach": 4.5891, |
|
"eval_BLEU_lgg": 3.9017, |
|
"eval_BLEU_lug": 22.3101, |
|
"eval_BLEU_mean": 10.173, |
|
"eval_BLEU_nyn": 16.477, |
|
"eval_BLEU_teo": 3.5869, |
|
"eval_loss": 3.6784207820892334, |
|
"eval_runtime": 98.5338, |
|
"eval_samples_per_second": 25.372, |
|
"eval_steps_per_second": 1.593, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_BLEU_ach": 7.2647, |
|
"eval_BLEU_lgg": 6.811, |
|
"eval_BLEU_lug": 27.4383, |
|
"eval_BLEU_mean": 13.5142, |
|
"eval_BLEU_nyn": 19.2461, |
|
"eval_BLEU_teo": 6.8109, |
|
"eval_loss": 3.4569008350372314, |
|
"eval_runtime": 98.3425, |
|
"eval_samples_per_second": 25.421, |
|
"eval_steps_per_second": 1.596, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_BLEU_ach": 10.4342, |
|
"eval_BLEU_lgg": 8.5273, |
|
"eval_BLEU_lug": 28.6575, |
|
"eval_BLEU_mean": 15.7642, |
|
"eval_BLEU_nyn": 20.282, |
|
"eval_BLEU_teo": 10.92, |
|
"eval_loss": 3.3055758476257324, |
|
"eval_runtime": 89.5488, |
|
"eval_samples_per_second": 27.918, |
|
"eval_steps_per_second": 1.753, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_BLEU_ach": 13.0113, |
|
"eval_BLEU_lgg": 11.5233, |
|
"eval_BLEU_lug": 30.7874, |
|
"eval_BLEU_mean": 18.3694, |
|
"eval_BLEU_nyn": 22.0821, |
|
"eval_BLEU_teo": 14.4427, |
|
"eval_loss": 3.195366859436035, |
|
"eval_runtime": 101.5394, |
|
"eval_samples_per_second": 24.621, |
|
"eval_steps_per_second": 1.546, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_BLEU_ach": 14.9353, |
|
"eval_BLEU_lgg": 13.4147, |
|
"eval_BLEU_lug": 29.6307, |
|
"eval_BLEU_mean": 19.4572, |
|
"eval_BLEU_nyn": 22.1543, |
|
"eval_BLEU_teo": 17.1511, |
|
"eval_loss": 3.1346113681793213, |
|
"eval_runtime": 100.0696, |
|
"eval_samples_per_second": 24.983, |
|
"eval_steps_per_second": 1.569, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_BLEU_ach": 16.3908, |
|
"eval_BLEU_lgg": 15.0127, |
|
"eval_BLEU_lug": 30.5639, |
|
"eval_BLEU_mean": 20.7352, |
|
"eval_BLEU_nyn": 23.3006, |
|
"eval_BLEU_teo": 18.4081, |
|
"eval_loss": 3.0843381881713867, |
|
"eval_runtime": 95.8679, |
|
"eval_samples_per_second": 26.078, |
|
"eval_steps_per_second": 1.638, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_BLEU_ach": 16.8683, |
|
"eval_BLEU_lgg": 16.0119, |
|
"eval_BLEU_lug": 30.7078, |
|
"eval_BLEU_mean": 21.2384, |
|
"eval_BLEU_nyn": 23.4815, |
|
"eval_BLEU_teo": 19.1225, |
|
"eval_loss": 3.0460867881774902, |
|
"eval_runtime": 89.2688, |
|
"eval_samples_per_second": 28.005, |
|
"eval_steps_per_second": 1.759, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_BLEU_ach": 18.3074, |
|
"eval_BLEU_lgg": 17.1009, |
|
"eval_BLEU_lug": 30.8855, |
|
"eval_BLEU_mean": 22.1453, |
|
"eval_BLEU_nyn": 23.955, |
|
"eval_BLEU_teo": 20.4778, |
|
"eval_loss": 3.020615339279175, |
|
"eval_runtime": 89.8109, |
|
"eval_samples_per_second": 27.836, |
|
"eval_steps_per_second": 1.748, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_BLEU_ach": 18.3052, |
|
"eval_BLEU_lgg": 18.8868, |
|
"eval_BLEU_lug": 31.7406, |
|
"eval_BLEU_mean": 22.8623, |
|
"eval_BLEU_nyn": 24.0257, |
|
"eval_BLEU_teo": 21.353, |
|
"eval_loss": 3.0001604557037354, |
|
"eval_runtime": 89.2107, |
|
"eval_samples_per_second": 28.024, |
|
"eval_steps_per_second": 1.76, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_BLEU_ach": 19.0031, |
|
"eval_BLEU_lgg": 19.9177, |
|
"eval_BLEU_lug": 32.4372, |
|
"eval_BLEU_mean": 23.6027, |
|
"eval_BLEU_nyn": 24.527, |
|
"eval_BLEU_teo": 22.1284, |
|
"eval_loss": 2.977238893508911, |
|
"eval_runtime": 88.8288, |
|
"eval_samples_per_second": 28.144, |
|
"eval_steps_per_second": 1.767, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_BLEU_ach": 19.5689, |
|
"eval_BLEU_lgg": 20.7627, |
|
"eval_BLEU_lug": 31.6969, |
|
"eval_BLEU_mean": 23.6419, |
|
"eval_BLEU_nyn": 23.8603, |
|
"eval_BLEU_teo": 22.3207, |
|
"eval_loss": 2.9661757946014404, |
|
"eval_runtime": 89.2406, |
|
"eval_samples_per_second": 28.014, |
|
"eval_steps_per_second": 1.759, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"eval_BLEU_ach": 19.6735, |
|
"eval_BLEU_lgg": 20.6252, |
|
"eval_BLEU_lug": 32.1966, |
|
"eval_BLEU_mean": 23.9914, |
|
"eval_BLEU_nyn": 24.6766, |
|
"eval_BLEU_teo": 22.7851, |
|
"eval_loss": 2.9583656787872314, |
|
"eval_runtime": 89.2693, |
|
"eval_samples_per_second": 28.005, |
|
"eval_steps_per_second": 1.759, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 8.868778280542987e-05, |
|
"loss": 3.0071, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"eval_BLEU_ach": 20.0282, |
|
"eval_BLEU_lgg": 20.745, |
|
"eval_BLEU_lug": 32.1863, |
|
"eval_BLEU_mean": 24.4004, |
|
"eval_BLEU_nyn": 25.0994, |
|
"eval_BLEU_teo": 23.9434, |
|
"eval_loss": 2.9503962993621826, |
|
"eval_runtime": 90.2812, |
|
"eval_samples_per_second": 27.691, |
|
"eval_steps_per_second": 1.739, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"eval_BLEU_ach": 20.9723, |
|
"eval_BLEU_lgg": 21.7365, |
|
"eval_BLEU_lug": 32.5492, |
|
"eval_BLEU_mean": 25.072, |
|
"eval_BLEU_nyn": 25.1544, |
|
"eval_BLEU_teo": 24.9475, |
|
"eval_loss": 2.937408208847046, |
|
"eval_runtime": 89.2147, |
|
"eval_samples_per_second": 28.022, |
|
"eval_steps_per_second": 1.76, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"eval_BLEU_ach": 21.0079, |
|
"eval_BLEU_lgg": 22.3347, |
|
"eval_BLEU_lug": 32.5314, |
|
"eval_BLEU_mean": 25.0517, |
|
"eval_BLEU_nyn": 24.8982, |
|
"eval_BLEU_teo": 24.4864, |
|
"eval_loss": 2.9393210411071777, |
|
"eval_runtime": 89.3095, |
|
"eval_samples_per_second": 27.993, |
|
"eval_steps_per_second": 1.758, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"eval_BLEU_ach": 21.1477, |
|
"eval_BLEU_lgg": 21.9452, |
|
"eval_BLEU_lug": 32.4218, |
|
"eval_BLEU_mean": 25.2757, |
|
"eval_BLEU_nyn": 25.4418, |
|
"eval_BLEU_teo": 25.4219, |
|
"eval_loss": 2.932978630065918, |
|
"eval_runtime": 88.9606, |
|
"eval_samples_per_second": 28.102, |
|
"eval_steps_per_second": 1.765, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"eval_BLEU_ach": 21.1259, |
|
"eval_BLEU_lgg": 23.2482, |
|
"eval_BLEU_lug": 33.2059, |
|
"eval_BLEU_mean": 25.5329, |
|
"eval_BLEU_nyn": 25.6245, |
|
"eval_BLEU_teo": 24.4601, |
|
"eval_loss": 2.934147834777832, |
|
"eval_runtime": 88.209, |
|
"eval_samples_per_second": 28.342, |
|
"eval_steps_per_second": 1.78, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"eval_BLEU_ach": 21.1098, |
|
"eval_BLEU_lgg": 22.4299, |
|
"eval_BLEU_lug": 32.0142, |
|
"eval_BLEU_mean": 25.2204, |
|
"eval_BLEU_nyn": 25.4877, |
|
"eval_BLEU_teo": 25.0606, |
|
"eval_loss": 2.933361053466797, |
|
"eval_runtime": 89.478, |
|
"eval_samples_per_second": 27.94, |
|
"eval_steps_per_second": 1.755, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"eval_BLEU_ach": 21.1135, |
|
"eval_BLEU_lgg": 23.2845, |
|
"eval_BLEU_lug": 33.3865, |
|
"eval_BLEU_mean": 25.714, |
|
"eval_BLEU_nyn": 25.2436, |
|
"eval_BLEU_teo": 25.5417, |
|
"eval_loss": 2.929994583129883, |
|
"eval_runtime": 88.926, |
|
"eval_samples_per_second": 28.113, |
|
"eval_steps_per_second": 1.766, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"eval_BLEU_ach": 20.9468, |
|
"eval_BLEU_lgg": 23.4982, |
|
"eval_BLEU_lug": 33.1763, |
|
"eval_BLEU_mean": 25.794, |
|
"eval_BLEU_nyn": 25.7664, |
|
"eval_BLEU_teo": 25.5825, |
|
"eval_loss": 2.9271128177642822, |
|
"eval_runtime": 88.3831, |
|
"eval_samples_per_second": 28.286, |
|
"eval_steps_per_second": 1.776, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"eval_BLEU_ach": 21.4617, |
|
"eval_BLEU_lgg": 22.9345, |
|
"eval_BLEU_lug": 33.0767, |
|
"eval_BLEU_mean": 25.966, |
|
"eval_BLEU_nyn": 26.4419, |
|
"eval_BLEU_teo": 25.9155, |
|
"eval_loss": 2.9278223514556885, |
|
"eval_runtime": 89.3776, |
|
"eval_samples_per_second": 27.971, |
|
"eval_steps_per_second": 1.757, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_BLEU_ach": 21.7326, |
|
"eval_BLEU_lgg": 23.4004, |
|
"eval_BLEU_lug": 32.8355, |
|
"eval_BLEU_mean": 26.0083, |
|
"eval_BLEU_nyn": 25.9382, |
|
"eval_BLEU_teo": 26.1347, |
|
"eval_loss": 2.9252257347106934, |
|
"eval_runtime": 89.2878, |
|
"eval_samples_per_second": 27.999, |
|
"eval_steps_per_second": 1.758, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"eval_BLEU_ach": 21.166, |
|
"eval_BLEU_lgg": 23.5027, |
|
"eval_BLEU_lug": 32.6492, |
|
"eval_BLEU_mean": 25.7616, |
|
"eval_BLEU_nyn": 26.319, |
|
"eval_BLEU_teo": 25.1708, |
|
"eval_loss": 2.932344436645508, |
|
"eval_runtime": 88.932, |
|
"eval_samples_per_second": 28.111, |
|
"eval_steps_per_second": 1.765, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"eval_BLEU_ach": 22.0146, |
|
"eval_BLEU_lgg": 23.7447, |
|
"eval_BLEU_lug": 32.5611, |
|
"eval_BLEU_mean": 26.1863, |
|
"eval_BLEU_nyn": 26.7487, |
|
"eval_BLEU_teo": 25.8622, |
|
"eval_loss": 2.9332921504974365, |
|
"eval_runtime": 88.3499, |
|
"eval_samples_per_second": 28.297, |
|
"eval_steps_per_second": 1.777, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 7.737556561085974e-05, |
|
"loss": 2.6023, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"eval_BLEU_ach": 21.8859, |
|
"eval_BLEU_lgg": 23.2485, |
|
"eval_BLEU_lug": 32.9259, |
|
"eval_BLEU_mean": 25.6918, |
|
"eval_BLEU_nyn": 25.1293, |
|
"eval_BLEU_teo": 25.2696, |
|
"eval_loss": 2.9421749114990234, |
|
"eval_runtime": 87.8065, |
|
"eval_samples_per_second": 28.472, |
|
"eval_steps_per_second": 1.788, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"eval_BLEU_ach": 21.8249, |
|
"eval_BLEU_lgg": 23.8957, |
|
"eval_BLEU_lug": 33.012, |
|
"eval_BLEU_mean": 26.0203, |
|
"eval_BLEU_nyn": 25.7039, |
|
"eval_BLEU_teo": 25.665, |
|
"eval_loss": 2.9367637634277344, |
|
"eval_runtime": 89.1365, |
|
"eval_samples_per_second": 28.047, |
|
"eval_steps_per_second": 1.761, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"eval_BLEU_ach": 21.1719, |
|
"eval_BLEU_lgg": 24.1201, |
|
"eval_BLEU_lug": 33.5237, |
|
"eval_BLEU_mean": 26.0272, |
|
"eval_BLEU_nyn": 25.4232, |
|
"eval_BLEU_teo": 25.8972, |
|
"eval_loss": 2.9365782737731934, |
|
"eval_runtime": 88.6821, |
|
"eval_samples_per_second": 28.191, |
|
"eval_steps_per_second": 1.77, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"eval_BLEU_ach": 21.3613, |
|
"eval_BLEU_lgg": 24.3655, |
|
"eval_BLEU_lug": 32.9166, |
|
"eval_BLEU_mean": 25.9118, |
|
"eval_BLEU_nyn": 25.7681, |
|
"eval_BLEU_teo": 25.1473, |
|
"eval_loss": 2.945089101791382, |
|
"eval_runtime": 88.4188, |
|
"eval_samples_per_second": 28.275, |
|
"eval_steps_per_second": 1.776, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"eval_BLEU_ach": 21.3138, |
|
"eval_BLEU_lgg": 24.2363, |
|
"eval_BLEU_lug": 33.5684, |
|
"eval_BLEU_mean": 26.1225, |
|
"eval_BLEU_nyn": 25.8827, |
|
"eval_BLEU_teo": 25.6111, |
|
"eval_loss": 2.943458318710327, |
|
"eval_runtime": 93.8652, |
|
"eval_samples_per_second": 26.634, |
|
"eval_steps_per_second": 1.673, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"eval_BLEU_ach": 21.2108, |
|
"eval_BLEU_lgg": 24.543, |
|
"eval_BLEU_lug": 33.6907, |
|
"eval_BLEU_mean": 26.2053, |
|
"eval_BLEU_nyn": 25.9792, |
|
"eval_BLEU_teo": 25.6029, |
|
"eval_loss": 2.937652826309204, |
|
"eval_runtime": 88.5263, |
|
"eval_samples_per_second": 28.24, |
|
"eval_steps_per_second": 1.773, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"eval_BLEU_ach": 21.4273, |
|
"eval_BLEU_lgg": 24.1568, |
|
"eval_BLEU_lug": 33.2002, |
|
"eval_BLEU_mean": 26.2056, |
|
"eval_BLEU_nyn": 26.3045, |
|
"eval_BLEU_teo": 25.9391, |
|
"eval_loss": 2.9444987773895264, |
|
"eval_runtime": 89.2883, |
|
"eval_samples_per_second": 27.999, |
|
"eval_steps_per_second": 1.758, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"eval_BLEU_ach": 21.4991, |
|
"eval_BLEU_lgg": 24.7875, |
|
"eval_BLEU_lug": 33.2228, |
|
"eval_BLEU_mean": 26.2522, |
|
"eval_BLEU_nyn": 26.402, |
|
"eval_BLEU_teo": 25.3493, |
|
"eval_loss": 2.957012891769409, |
|
"eval_runtime": 89.5054, |
|
"eval_samples_per_second": 27.931, |
|
"eval_steps_per_second": 1.754, |
|
"step": 1280 |
|
} |
|
], |
|
"max_steps": 4420, |
|
"num_train_epochs": 10, |
|
"total_flos": 1.249876610629632e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|