{ "best_metric": 2.9252257347106934, "best_model_checkpoint": "output-mul-en/checkpoint-880", "epoch": 2.8949691883503337, "global_step": 1280, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.09, "eval_BLEU_ach": 4.5891, "eval_BLEU_lgg": 3.9017, "eval_BLEU_lug": 22.3101, "eval_BLEU_mean": 10.173, "eval_BLEU_nyn": 16.477, "eval_BLEU_teo": 3.5869, "eval_loss": 3.6784207820892334, "eval_runtime": 98.5338, "eval_samples_per_second": 25.372, "eval_steps_per_second": 1.593, "step": 40 }, { "epoch": 0.18, "eval_BLEU_ach": 7.2647, "eval_BLEU_lgg": 6.811, "eval_BLEU_lug": 27.4383, "eval_BLEU_mean": 13.5142, "eval_BLEU_nyn": 19.2461, "eval_BLEU_teo": 6.8109, "eval_loss": 3.4569008350372314, "eval_runtime": 98.3425, "eval_samples_per_second": 25.421, "eval_steps_per_second": 1.596, "step": 80 }, { "epoch": 0.27, "eval_BLEU_ach": 10.4342, "eval_BLEU_lgg": 8.5273, "eval_BLEU_lug": 28.6575, "eval_BLEU_mean": 15.7642, "eval_BLEU_nyn": 20.282, "eval_BLEU_teo": 10.92, "eval_loss": 3.3055758476257324, "eval_runtime": 89.5488, "eval_samples_per_second": 27.918, "eval_steps_per_second": 1.753, "step": 120 }, { "epoch": 0.36, "eval_BLEU_ach": 13.0113, "eval_BLEU_lgg": 11.5233, "eval_BLEU_lug": 30.7874, "eval_BLEU_mean": 18.3694, "eval_BLEU_nyn": 22.0821, "eval_BLEU_teo": 14.4427, "eval_loss": 3.195366859436035, "eval_runtime": 101.5394, "eval_samples_per_second": 24.621, "eval_steps_per_second": 1.546, "step": 160 }, { "epoch": 0.45, "eval_BLEU_ach": 14.9353, "eval_BLEU_lgg": 13.4147, "eval_BLEU_lug": 29.6307, "eval_BLEU_mean": 19.4572, "eval_BLEU_nyn": 22.1543, "eval_BLEU_teo": 17.1511, "eval_loss": 3.1346113681793213, "eval_runtime": 100.0696, "eval_samples_per_second": 24.983, "eval_steps_per_second": 1.569, "step": 200 }, { "epoch": 0.54, "eval_BLEU_ach": 16.3908, "eval_BLEU_lgg": 15.0127, "eval_BLEU_lug": 30.5639, "eval_BLEU_mean": 20.7352, "eval_BLEU_nyn": 23.3006, "eval_BLEU_teo": 18.4081, "eval_loss": 3.0843381881713867, "eval_runtime": 95.8679, "eval_samples_per_second": 26.078, "eval_steps_per_second": 1.638, "step": 240 }, { "epoch": 0.63, "eval_BLEU_ach": 16.8683, "eval_BLEU_lgg": 16.0119, "eval_BLEU_lug": 30.7078, "eval_BLEU_mean": 21.2384, "eval_BLEU_nyn": 23.4815, "eval_BLEU_teo": 19.1225, "eval_loss": 3.0460867881774902, "eval_runtime": 89.2688, "eval_samples_per_second": 28.005, "eval_steps_per_second": 1.759, "step": 280 }, { "epoch": 0.72, "eval_BLEU_ach": 18.3074, "eval_BLEU_lgg": 17.1009, "eval_BLEU_lug": 30.8855, "eval_BLEU_mean": 22.1453, "eval_BLEU_nyn": 23.955, "eval_BLEU_teo": 20.4778, "eval_loss": 3.020615339279175, "eval_runtime": 89.8109, "eval_samples_per_second": 27.836, "eval_steps_per_second": 1.748, "step": 320 }, { "epoch": 0.81, "eval_BLEU_ach": 18.3052, "eval_BLEU_lgg": 18.8868, "eval_BLEU_lug": 31.7406, "eval_BLEU_mean": 22.8623, "eval_BLEU_nyn": 24.0257, "eval_BLEU_teo": 21.353, "eval_loss": 3.0001604557037354, "eval_runtime": 89.2107, "eval_samples_per_second": 28.024, "eval_steps_per_second": 1.76, "step": 360 }, { "epoch": 0.9, "eval_BLEU_ach": 19.0031, "eval_BLEU_lgg": 19.9177, "eval_BLEU_lug": 32.4372, "eval_BLEU_mean": 23.6027, "eval_BLEU_nyn": 24.527, "eval_BLEU_teo": 22.1284, "eval_loss": 2.977238893508911, "eval_runtime": 88.8288, "eval_samples_per_second": 28.144, "eval_steps_per_second": 1.767, "step": 400 }, { "epoch": 0.99, "eval_BLEU_ach": 19.5689, "eval_BLEU_lgg": 20.7627, "eval_BLEU_lug": 31.6969, "eval_BLEU_mean": 23.6419, "eval_BLEU_nyn": 23.8603, "eval_BLEU_teo": 22.3207, "eval_loss": 2.9661757946014404, "eval_runtime": 89.2406, "eval_samples_per_second": 28.014, "eval_steps_per_second": 1.759, "step": 440 }, { "epoch": 1.09, "eval_BLEU_ach": 19.6735, "eval_BLEU_lgg": 20.6252, "eval_BLEU_lug": 32.1966, "eval_BLEU_mean": 23.9914, "eval_BLEU_nyn": 24.6766, "eval_BLEU_teo": 22.7851, "eval_loss": 2.9583656787872314, "eval_runtime": 89.2693, "eval_samples_per_second": 28.005, "eval_steps_per_second": 1.759, "step": 480 }, { "epoch": 1.13, "learning_rate": 8.868778280542987e-05, "loss": 3.0071, "step": 500 }, { "epoch": 1.18, "eval_BLEU_ach": 20.0282, "eval_BLEU_lgg": 20.745, "eval_BLEU_lug": 32.1863, "eval_BLEU_mean": 24.4004, "eval_BLEU_nyn": 25.0994, "eval_BLEU_teo": 23.9434, "eval_loss": 2.9503962993621826, "eval_runtime": 90.2812, "eval_samples_per_second": 27.691, "eval_steps_per_second": 1.739, "step": 520 }, { "epoch": 1.27, "eval_BLEU_ach": 20.9723, "eval_BLEU_lgg": 21.7365, "eval_BLEU_lug": 32.5492, "eval_BLEU_mean": 25.072, "eval_BLEU_nyn": 25.1544, "eval_BLEU_teo": 24.9475, "eval_loss": 2.937408208847046, "eval_runtime": 89.2147, "eval_samples_per_second": 28.022, "eval_steps_per_second": 1.76, "step": 560 }, { "epoch": 1.36, "eval_BLEU_ach": 21.0079, "eval_BLEU_lgg": 22.3347, "eval_BLEU_lug": 32.5314, "eval_BLEU_mean": 25.0517, "eval_BLEU_nyn": 24.8982, "eval_BLEU_teo": 24.4864, "eval_loss": 2.9393210411071777, "eval_runtime": 89.3095, "eval_samples_per_second": 27.993, "eval_steps_per_second": 1.758, "step": 600 }, { "epoch": 1.45, "eval_BLEU_ach": 21.1477, "eval_BLEU_lgg": 21.9452, "eval_BLEU_lug": 32.4218, "eval_BLEU_mean": 25.2757, "eval_BLEU_nyn": 25.4418, "eval_BLEU_teo": 25.4219, "eval_loss": 2.932978630065918, "eval_runtime": 88.9606, "eval_samples_per_second": 28.102, "eval_steps_per_second": 1.765, "step": 640 }, { "epoch": 1.54, "eval_BLEU_ach": 21.1259, "eval_BLEU_lgg": 23.2482, "eval_BLEU_lug": 33.2059, "eval_BLEU_mean": 25.5329, "eval_BLEU_nyn": 25.6245, "eval_BLEU_teo": 24.4601, "eval_loss": 2.934147834777832, "eval_runtime": 88.209, "eval_samples_per_second": 28.342, "eval_steps_per_second": 1.78, "step": 680 }, { "epoch": 1.63, "eval_BLEU_ach": 21.1098, "eval_BLEU_lgg": 22.4299, "eval_BLEU_lug": 32.0142, "eval_BLEU_mean": 25.2204, "eval_BLEU_nyn": 25.4877, "eval_BLEU_teo": 25.0606, "eval_loss": 2.933361053466797, "eval_runtime": 89.478, "eval_samples_per_second": 27.94, "eval_steps_per_second": 1.755, "step": 720 }, { "epoch": 1.72, "eval_BLEU_ach": 21.1135, "eval_BLEU_lgg": 23.2845, "eval_BLEU_lug": 33.3865, "eval_BLEU_mean": 25.714, "eval_BLEU_nyn": 25.2436, "eval_BLEU_teo": 25.5417, "eval_loss": 2.929994583129883, "eval_runtime": 88.926, "eval_samples_per_second": 28.113, "eval_steps_per_second": 1.766, "step": 760 }, { "epoch": 1.81, "eval_BLEU_ach": 20.9468, "eval_BLEU_lgg": 23.4982, "eval_BLEU_lug": 33.1763, "eval_BLEU_mean": 25.794, "eval_BLEU_nyn": 25.7664, "eval_BLEU_teo": 25.5825, "eval_loss": 2.9271128177642822, "eval_runtime": 88.3831, "eval_samples_per_second": 28.286, "eval_steps_per_second": 1.776, "step": 800 }, { "epoch": 1.9, "eval_BLEU_ach": 21.4617, "eval_BLEU_lgg": 22.9345, "eval_BLEU_lug": 33.0767, "eval_BLEU_mean": 25.966, "eval_BLEU_nyn": 26.4419, "eval_BLEU_teo": 25.9155, "eval_loss": 2.9278223514556885, "eval_runtime": 89.3776, "eval_samples_per_second": 27.971, "eval_steps_per_second": 1.757, "step": 840 }, { "epoch": 1.99, "eval_BLEU_ach": 21.7326, "eval_BLEU_lgg": 23.4004, "eval_BLEU_lug": 32.8355, "eval_BLEU_mean": 26.0083, "eval_BLEU_nyn": 25.9382, "eval_BLEU_teo": 26.1347, "eval_loss": 2.9252257347106934, "eval_runtime": 89.2878, "eval_samples_per_second": 27.999, "eval_steps_per_second": 1.758, "step": 880 }, { "epoch": 2.08, "eval_BLEU_ach": 21.166, "eval_BLEU_lgg": 23.5027, "eval_BLEU_lug": 32.6492, "eval_BLEU_mean": 25.7616, "eval_BLEU_nyn": 26.319, "eval_BLEU_teo": 25.1708, "eval_loss": 2.932344436645508, "eval_runtime": 88.932, "eval_samples_per_second": 28.111, "eval_steps_per_second": 1.765, "step": 920 }, { "epoch": 2.17, "eval_BLEU_ach": 22.0146, "eval_BLEU_lgg": 23.7447, "eval_BLEU_lug": 32.5611, "eval_BLEU_mean": 26.1863, "eval_BLEU_nyn": 26.7487, "eval_BLEU_teo": 25.8622, "eval_loss": 2.9332921504974365, "eval_runtime": 88.3499, "eval_samples_per_second": 28.297, "eval_steps_per_second": 1.777, "step": 960 }, { "epoch": 2.26, "learning_rate": 7.737556561085974e-05, "loss": 2.6023, "step": 1000 }, { "epoch": 2.26, "eval_BLEU_ach": 21.8859, "eval_BLEU_lgg": 23.2485, "eval_BLEU_lug": 32.9259, "eval_BLEU_mean": 25.6918, "eval_BLEU_nyn": 25.1293, "eval_BLEU_teo": 25.2696, "eval_loss": 2.9421749114990234, "eval_runtime": 87.8065, "eval_samples_per_second": 28.472, "eval_steps_per_second": 1.788, "step": 1000 }, { "epoch": 2.35, "eval_BLEU_ach": 21.8249, "eval_BLEU_lgg": 23.8957, "eval_BLEU_lug": 33.012, "eval_BLEU_mean": 26.0203, "eval_BLEU_nyn": 25.7039, "eval_BLEU_teo": 25.665, "eval_loss": 2.9367637634277344, "eval_runtime": 89.1365, "eval_samples_per_second": 28.047, "eval_steps_per_second": 1.761, "step": 1040 }, { "epoch": 2.44, "eval_BLEU_ach": 21.1719, "eval_BLEU_lgg": 24.1201, "eval_BLEU_lug": 33.5237, "eval_BLEU_mean": 26.0272, "eval_BLEU_nyn": 25.4232, "eval_BLEU_teo": 25.8972, "eval_loss": 2.9365782737731934, "eval_runtime": 88.6821, "eval_samples_per_second": 28.191, "eval_steps_per_second": 1.77, "step": 1080 }, { "epoch": 2.53, "eval_BLEU_ach": 21.3613, "eval_BLEU_lgg": 24.3655, "eval_BLEU_lug": 32.9166, "eval_BLEU_mean": 25.9118, "eval_BLEU_nyn": 25.7681, "eval_BLEU_teo": 25.1473, "eval_loss": 2.945089101791382, "eval_runtime": 88.4188, "eval_samples_per_second": 28.275, "eval_steps_per_second": 1.776, "step": 1120 }, { "epoch": 2.62, "eval_BLEU_ach": 21.3138, "eval_BLEU_lgg": 24.2363, "eval_BLEU_lug": 33.5684, "eval_BLEU_mean": 26.1225, "eval_BLEU_nyn": 25.8827, "eval_BLEU_teo": 25.6111, "eval_loss": 2.943458318710327, "eval_runtime": 93.8652, "eval_samples_per_second": 26.634, "eval_steps_per_second": 1.673, "step": 1160 }, { "epoch": 2.71, "eval_BLEU_ach": 21.2108, "eval_BLEU_lgg": 24.543, "eval_BLEU_lug": 33.6907, "eval_BLEU_mean": 26.2053, "eval_BLEU_nyn": 25.9792, "eval_BLEU_teo": 25.6029, "eval_loss": 2.937652826309204, "eval_runtime": 88.5263, "eval_samples_per_second": 28.24, "eval_steps_per_second": 1.773, "step": 1200 }, { "epoch": 2.8, "eval_BLEU_ach": 21.4273, "eval_BLEU_lgg": 24.1568, "eval_BLEU_lug": 33.2002, "eval_BLEU_mean": 26.2056, "eval_BLEU_nyn": 26.3045, "eval_BLEU_teo": 25.9391, "eval_loss": 2.9444987773895264, "eval_runtime": 89.2883, "eval_samples_per_second": 27.999, "eval_steps_per_second": 1.758, "step": 1240 }, { "epoch": 2.89, "eval_BLEU_ach": 21.4991, "eval_BLEU_lgg": 24.7875, "eval_BLEU_lug": 33.2228, "eval_BLEU_mean": 26.2522, "eval_BLEU_nyn": 26.402, "eval_BLEU_teo": 25.3493, "eval_loss": 2.957012891769409, "eval_runtime": 89.5054, "eval_samples_per_second": 27.931, "eval_steps_per_second": 1.754, "step": 1280 } ], "max_steps": 4420, "num_train_epochs": 10, "total_flos": 1.249876610629632e+17, "trial_name": null, "trial_params": null }