{ "best_metric": null, "best_model_checkpoint": null, "epoch": 49.99777942264989, "global_step": 16850, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_bleu": 0.8119, "eval_gen_len": 15.295, "eval_loss": 3.3328945636749268, "eval_runtime": 64.1416, "eval_samples_per_second": 88.102, "eval_steps_per_second": 4.412, "step": 337 }, { "epoch": 1.48, "learning_rate": 4.851632047477745e-05, "loss": 3.528, "step": 500 }, { "epoch": 2.0, "eval_bleu": 0.9197, "eval_gen_len": 15.3056, "eval_loss": 3.283737897872925, "eval_runtime": 63.9504, "eval_samples_per_second": 88.365, "eval_steps_per_second": 4.425, "step": 674 }, { "epoch": 2.97, "learning_rate": 4.70326409495549e-05, "loss": 3.3932, "step": 1000 }, { "epoch": 3.0, "eval_bleu": 0.9536, "eval_gen_len": 15.6238, "eval_loss": 3.2553622722625732, "eval_runtime": 62.6609, "eval_samples_per_second": 90.184, "eval_steps_per_second": 4.516, "step": 1011 }, { "epoch": 4.0, "eval_bleu": 1.0082, "eval_gen_len": 15.5254, "eval_loss": 3.236884117126465, "eval_runtime": 62.9428, "eval_samples_per_second": 89.78, "eval_steps_per_second": 4.496, "step": 1348 }, { "epoch": 4.45, "learning_rate": 4.554896142433235e-05, "loss": 3.3394, "step": 1500 }, { "epoch": 5.0, "eval_bleu": 1.088, "eval_gen_len": 15.7466, "eval_loss": 3.222892999649048, "eval_runtime": 64.2741, "eval_samples_per_second": 87.92, "eval_steps_per_second": 4.403, "step": 1685 }, { "epoch": 5.93, "learning_rate": 4.4065281899109794e-05, "loss": 3.2982, "step": 2000 }, { "epoch": 6.0, "eval_bleu": 1.1295, "eval_gen_len": 15.7866, "eval_loss": 3.210378885269165, "eval_runtime": 64.1493, "eval_samples_per_second": 88.091, "eval_steps_per_second": 4.412, "step": 2022 }, { "epoch": 7.0, "eval_bleu": 1.1648, "eval_gen_len": 15.6852, "eval_loss": 3.200908899307251, "eval_runtime": 63.0315, "eval_samples_per_second": 89.654, "eval_steps_per_second": 4.49, "step": 2359 }, { "epoch": 7.42, "learning_rate": 4.258160237388724e-05, "loss": 3.2658, "step": 2500 }, { "epoch": 8.0, "eval_bleu": 1.1548, "eval_gen_len": 15.9358, "eval_loss": 3.191558361053467, "eval_runtime": 64.0034, "eval_samples_per_second": 88.292, "eval_steps_per_second": 4.422, "step": 2696 }, { "epoch": 8.9, "learning_rate": 4.109792284866469e-05, "loss": 3.2397, "step": 3000 }, { "epoch": 9.0, "eval_bleu": 1.0651, "eval_gen_len": 16.0241, "eval_loss": 3.1847681999206543, "eval_runtime": 64.3166, "eval_samples_per_second": 87.862, "eval_steps_per_second": 4.4, "step": 3033 }, { "epoch": 10.0, "eval_bleu": 1.1443, "eval_gen_len": 16.0409, "eval_loss": 3.1797027587890625, "eval_runtime": 62.6803, "eval_samples_per_second": 90.156, "eval_steps_per_second": 4.515, "step": 3370 }, { "epoch": 10.38, "learning_rate": 3.961424332344214e-05, "loss": 3.2203, "step": 3500 }, { "epoch": 11.0, "eval_bleu": 1.1562, "eval_gen_len": 15.9648, "eval_loss": 3.1735124588012695, "eval_runtime": 64.1808, "eval_samples_per_second": 88.048, "eval_steps_per_second": 4.409, "step": 3707 }, { "epoch": 11.87, "learning_rate": 3.8130563798219586e-05, "loss": 3.1935, "step": 4000 }, { "epoch": 12.0, "eval_bleu": 1.145, "eval_gen_len": 15.7811, "eval_loss": 3.1689915657043457, "eval_runtime": 62.6765, "eval_samples_per_second": 90.161, "eval_steps_per_second": 4.515, "step": 4044 }, { "epoch": 13.0, "eval_bleu": 1.1534, "eval_gen_len": 15.9582, "eval_loss": 3.1667888164520264, "eval_runtime": 64.2839, "eval_samples_per_second": 87.907, "eval_steps_per_second": 4.402, "step": 4381 }, { "epoch": 13.35, "learning_rate": 3.664688427299703e-05, "loss": 3.1791, "step": 4500 }, { "epoch": 14.0, "eval_bleu": 1.1476, "eval_gen_len": 15.9605, "eval_loss": 3.1629080772399902, "eval_runtime": 63.0085, "eval_samples_per_second": 89.686, "eval_steps_per_second": 4.491, "step": 4718 }, { "epoch": 14.83, "learning_rate": 3.516320474777448e-05, "loss": 3.1562, "step": 5000 }, { "epoch": 15.0, "eval_bleu": 1.1504, "eval_gen_len": 15.9747, "eval_loss": 3.1592817306518555, "eval_runtime": 63.8036, "eval_samples_per_second": 88.569, "eval_steps_per_second": 4.435, "step": 5055 }, { "epoch": 16.0, "eval_bleu": 1.2003, "eval_gen_len": 15.6548, "eval_loss": 3.1569018363952637, "eval_runtime": 63.7567, "eval_samples_per_second": 88.634, "eval_steps_per_second": 4.439, "step": 5392 }, { "epoch": 16.32, "learning_rate": 3.3679525222551934e-05, "loss": 3.1461, "step": 5500 }, { "epoch": 17.0, "eval_bleu": 1.187, "eval_gen_len": 16.2831, "eval_loss": 3.1528096199035645, "eval_runtime": 62.8317, "eval_samples_per_second": 89.939, "eval_steps_per_second": 4.504, "step": 5729 }, { "epoch": 17.8, "learning_rate": 3.219584569732938e-05, "loss": 3.1301, "step": 6000 }, { "epoch": 18.0, "eval_bleu": 1.1908, "eval_gen_len": 16.2078, "eval_loss": 3.15116024017334, "eval_runtime": 62.7949, "eval_samples_per_second": 89.991, "eval_steps_per_second": 4.507, "step": 6066 }, { "epoch": 19.0, "eval_bleu": 1.2164, "eval_gen_len": 16.1816, "eval_loss": 3.149709463119507, "eval_runtime": 63.3277, "eval_samples_per_second": 89.234, "eval_steps_per_second": 4.469, "step": 6403 }, { "epoch": 19.29, "learning_rate": 3.071216617210683e-05, "loss": 3.1199, "step": 6500 }, { "epoch": 20.0, "eval_bleu": 1.2493, "eval_gen_len": 16.0138, "eval_loss": 3.147409200668335, "eval_runtime": 62.5603, "eval_samples_per_second": 90.329, "eval_steps_per_second": 4.524, "step": 6740 }, { "epoch": 20.77, "learning_rate": 2.9228486646884274e-05, "loss": 3.1012, "step": 7000 }, { "epoch": 21.0, "eval_bleu": 1.2189, "eval_gen_len": 15.9667, "eval_loss": 3.1471338272094727, "eval_runtime": 62.6224, "eval_samples_per_second": 90.239, "eval_steps_per_second": 4.519, "step": 7077 }, { "epoch": 22.0, "eval_bleu": 1.2535, "eval_gen_len": 16.0239, "eval_loss": 3.145897150039673, "eval_runtime": 62.7278, "eval_samples_per_second": 90.088, "eval_steps_per_second": 4.512, "step": 7414 }, { "epoch": 22.25, "learning_rate": 2.774480712166172e-05, "loss": 3.0953, "step": 7500 }, { "epoch": 23.0, "eval_bleu": 1.2554, "eval_gen_len": 15.7225, "eval_loss": 3.143968343734741, "eval_runtime": 63.12, "eval_samples_per_second": 89.528, "eval_steps_per_second": 4.484, "step": 7751 }, { "epoch": 23.74, "learning_rate": 2.6261127596439174e-05, "loss": 3.0761, "step": 8000 }, { "epoch": 24.0, "eval_bleu": 1.2086, "eval_gen_len": 16.0069, "eval_loss": 3.141495943069458, "eval_runtime": 62.7045, "eval_samples_per_second": 90.121, "eval_steps_per_second": 4.513, "step": 8088 }, { "epoch": 25.0, "eval_bleu": 1.305, "eval_gen_len": 15.9912, "eval_loss": 3.1416375637054443, "eval_runtime": 62.5621, "eval_samples_per_second": 90.326, "eval_steps_per_second": 4.524, "step": 8425 }, { "epoch": 25.22, "learning_rate": 2.4777448071216618e-05, "loss": 3.0822, "step": 8500 }, { "epoch": 26.0, "eval_bleu": 1.248, "eval_gen_len": 15.8406, "eval_loss": 3.1405673027038574, "eval_runtime": 62.6069, "eval_samples_per_second": 90.262, "eval_steps_per_second": 4.52, "step": 8762 }, { "epoch": 26.7, "learning_rate": 2.3293768545994066e-05, "loss": 3.0614, "step": 9000 }, { "epoch": 27.0, "eval_bleu": 1.2888, "eval_gen_len": 16.3189, "eval_loss": 3.140249252319336, "eval_runtime": 62.6544, "eval_samples_per_second": 90.193, "eval_steps_per_second": 4.517, "step": 9099 }, { "epoch": 28.0, "eval_bleu": 1.3016, "eval_gen_len": 16.0096, "eval_loss": 3.1379010677337646, "eval_runtime": 62.5617, "eval_samples_per_second": 90.327, "eval_steps_per_second": 4.524, "step": 9436 }, { "epoch": 28.19, "learning_rate": 2.1810089020771514e-05, "loss": 3.0594, "step": 9500 }, { "epoch": 29.0, "eval_bleu": 1.2268, "eval_gen_len": 16.3702, "eval_loss": 3.139193058013916, "eval_runtime": 62.7397, "eval_samples_per_second": 90.071, "eval_steps_per_second": 4.511, "step": 9773 }, { "epoch": 29.67, "learning_rate": 2.0326409495548962e-05, "loss": 3.0453, "step": 10000 }, { "epoch": 30.0, "eval_bleu": 1.278, "eval_gen_len": 15.9722, "eval_loss": 3.1378531455993652, "eval_runtime": 62.634, "eval_samples_per_second": 90.223, "eval_steps_per_second": 4.518, "step": 10110 }, { "epoch": 31.0, "eval_bleu": 1.2832, "eval_gen_len": 15.9759, "eval_loss": 3.136270046234131, "eval_runtime": 62.6958, "eval_samples_per_second": 90.134, "eval_steps_per_second": 4.514, "step": 10447 }, { "epoch": 31.16, "learning_rate": 1.884272997032641e-05, "loss": 3.0425, "step": 10500 }, { "epoch": 32.0, "eval_bleu": 1.3089, "eval_gen_len": 16.1499, "eval_loss": 3.136704206466675, "eval_runtime": 62.6194, "eval_samples_per_second": 90.244, "eval_steps_per_second": 4.519, "step": 10784 }, { "epoch": 32.64, "learning_rate": 1.7359050445103858e-05, "loss": 3.0339, "step": 11000 }, { "epoch": 33.0, "eval_bleu": 1.2718, "eval_gen_len": 16.1051, "eval_loss": 3.1367685794830322, "eval_runtime": 62.5638, "eval_samples_per_second": 90.324, "eval_steps_per_second": 4.523, "step": 11121 }, { "epoch": 34.0, "eval_bleu": 1.2683, "eval_gen_len": 16.0159, "eval_loss": 3.1349706649780273, "eval_runtime": 62.675, "eval_samples_per_second": 90.164, "eval_steps_per_second": 4.515, "step": 11458 }, { "epoch": 34.12, "learning_rate": 1.5875370919881306e-05, "loss": 3.0301, "step": 11500 }, { "epoch": 35.0, "eval_bleu": 1.286, "eval_gen_len": 16.1807, "eval_loss": 3.135624408721924, "eval_runtime": 62.61, "eval_samples_per_second": 90.257, "eval_steps_per_second": 4.52, "step": 11795 }, { "epoch": 35.61, "learning_rate": 1.4391691394658754e-05, "loss": 3.0197, "step": 12000 }, { "epoch": 36.0, "eval_bleu": 1.267, "eval_gen_len": 16.0966, "eval_loss": 3.1348326206207275, "eval_runtime": 62.637, "eval_samples_per_second": 90.218, "eval_steps_per_second": 4.518, "step": 12132 }, { "epoch": 37.0, "eval_bleu": 1.3013, "eval_gen_len": 15.9283, "eval_loss": 3.1351914405822754, "eval_runtime": 62.7389, "eval_samples_per_second": 90.072, "eval_steps_per_second": 4.511, "step": 12469 }, { "epoch": 37.09, "learning_rate": 1.29080118694362e-05, "loss": 3.0204, "step": 12500 }, { "epoch": 38.0, "eval_bleu": 1.282, "eval_gen_len": 16.0099, "eval_loss": 3.1345412731170654, "eval_runtime": 62.6541, "eval_samples_per_second": 90.194, "eval_steps_per_second": 4.517, "step": 12806 }, { "epoch": 38.57, "learning_rate": 1.142433234421365e-05, "loss": 3.0169, "step": 13000 }, { "epoch": 39.0, "eval_bleu": 1.3076, "eval_gen_len": 16.1136, "eval_loss": 3.1348047256469727, "eval_runtime": 62.7366, "eval_samples_per_second": 90.075, "eval_steps_per_second": 4.511, "step": 13143 }, { "epoch": 40.0, "eval_bleu": 1.2947, "eval_gen_len": 16.0117, "eval_loss": 3.135272979736328, "eval_runtime": 62.894, "eval_samples_per_second": 89.85, "eval_steps_per_second": 4.5, "step": 13480 }, { "epoch": 40.06, "learning_rate": 9.940652818991098e-06, "loss": 3.0093, "step": 13500 }, { "epoch": 41.0, "eval_bleu": 1.3047, "eval_gen_len": 16.0662, "eval_loss": 3.134445905685425, "eval_runtime": 62.7291, "eval_samples_per_second": 90.086, "eval_steps_per_second": 4.511, "step": 13817 }, { "epoch": 41.54, "learning_rate": 8.456973293768548e-06, "loss": 3.0009, "step": 14000 }, { "epoch": 42.0, "eval_bleu": 1.3017, "eval_gen_len": 16.1531, "eval_loss": 3.134453535079956, "eval_runtime": 62.7311, "eval_samples_per_second": 90.083, "eval_steps_per_second": 4.511, "step": 14154 }, { "epoch": 43.0, "eval_bleu": 1.2762, "eval_gen_len": 16.0741, "eval_loss": 3.1347758769989014, "eval_runtime": 62.6827, "eval_samples_per_second": 90.152, "eval_steps_per_second": 4.515, "step": 14491 }, { "epoch": 43.03, "learning_rate": 6.973293768545995e-06, "loss": 3.0066, "step": 14500 }, { "epoch": 44.0, "eval_bleu": 1.2743, "eval_gen_len": 16.069, "eval_loss": 3.1343321800231934, "eval_runtime": 62.7369, "eval_samples_per_second": 90.075, "eval_steps_per_second": 4.511, "step": 14828 }, { "epoch": 44.51, "learning_rate": 5.489614243323442e-06, "loss": 3.0014, "step": 15000 }, { "epoch": 45.0, "eval_bleu": 1.264, "eval_gen_len": 16.0232, "eval_loss": 3.1345059871673584, "eval_runtime": 62.8103, "eval_samples_per_second": 89.969, "eval_steps_per_second": 4.506, "step": 15165 }, { "epoch": 45.99, "learning_rate": 4.005934718100891e-06, "loss": 2.9948, "step": 15500 }, { "epoch": 46.0, "eval_bleu": 1.2856, "eval_gen_len": 16.1014, "eval_loss": 3.134138345718384, "eval_runtime": 62.6814, "eval_samples_per_second": 90.154, "eval_steps_per_second": 4.515, "step": 15502 }, { "epoch": 47.0, "eval_bleu": 1.2889, "eval_gen_len": 16.0956, "eval_loss": 3.1343159675598145, "eval_runtime": 62.7663, "eval_samples_per_second": 90.032, "eval_steps_per_second": 4.509, "step": 15839 }, { "epoch": 47.48, "learning_rate": 2.5222551928783385e-06, "loss": 3.0016, "step": 16000 }, { "epoch": 48.0, "eval_bleu": 1.2823, "eval_gen_len": 16.0342, "eval_loss": 3.13439679145813, "eval_runtime": 62.7228, "eval_samples_per_second": 90.095, "eval_steps_per_second": 4.512, "step": 16176 }, { "epoch": 48.96, "learning_rate": 1.0385756676557863e-06, "loss": 2.992, "step": 16500 }, { "epoch": 49.0, "eval_bleu": 1.2903, "eval_gen_len": 16.0894, "eval_loss": 3.134230136871338, "eval_runtime": 62.7414, "eval_samples_per_second": 90.068, "eval_steps_per_second": 4.511, "step": 16513 }, { "epoch": 50.0, "eval_bleu": 1.279, "eval_gen_len": 16.0568, "eval_loss": 3.1342360973358154, "eval_runtime": 62.8254, "eval_samples_per_second": 89.948, "eval_steps_per_second": 4.505, "step": 16850 }, { "epoch": 50.0, "step": 16850, "total_flos": 8.49568285776937e+16, "train_loss": 3.1156047950088093, "train_runtime": 18325.9577, "train_samples_per_second": 73.715, "train_steps_per_second": 0.919 } ], "max_steps": 16850, "num_train_epochs": 50, "total_flos": 8.49568285776937e+16, "trial_name": null, "trial_params": null }