|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 49.99777942264989, |
|
"global_step": 16850, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_bleu": 0.8119, |
|
"eval_gen_len": 15.295, |
|
"eval_loss": 3.3328945636749268, |
|
"eval_runtime": 64.1416, |
|
"eval_samples_per_second": 88.102, |
|
"eval_steps_per_second": 4.412, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 4.851632047477745e-05, |
|
"loss": 3.528, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bleu": 0.9197, |
|
"eval_gen_len": 15.3056, |
|
"eval_loss": 3.283737897872925, |
|
"eval_runtime": 63.9504, |
|
"eval_samples_per_second": 88.365, |
|
"eval_steps_per_second": 4.425, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 4.70326409495549e-05, |
|
"loss": 3.3932, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_bleu": 0.9536, |
|
"eval_gen_len": 15.6238, |
|
"eval_loss": 3.2553622722625732, |
|
"eval_runtime": 62.6609, |
|
"eval_samples_per_second": 90.184, |
|
"eval_steps_per_second": 4.516, |
|
"step": 1011 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bleu": 1.0082, |
|
"eval_gen_len": 15.5254, |
|
"eval_loss": 3.236884117126465, |
|
"eval_runtime": 62.9428, |
|
"eval_samples_per_second": 89.78, |
|
"eval_steps_per_second": 4.496, |
|
"step": 1348 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"learning_rate": 4.554896142433235e-05, |
|
"loss": 3.3394, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_bleu": 1.088, |
|
"eval_gen_len": 15.7466, |
|
"eval_loss": 3.222892999649048, |
|
"eval_runtime": 64.2741, |
|
"eval_samples_per_second": 87.92, |
|
"eval_steps_per_second": 4.403, |
|
"step": 1685 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"learning_rate": 4.4065281899109794e-05, |
|
"loss": 3.2982, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_bleu": 1.1295, |
|
"eval_gen_len": 15.7866, |
|
"eval_loss": 3.210378885269165, |
|
"eval_runtime": 64.1493, |
|
"eval_samples_per_second": 88.091, |
|
"eval_steps_per_second": 4.412, |
|
"step": 2022 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_bleu": 1.1648, |
|
"eval_gen_len": 15.6852, |
|
"eval_loss": 3.200908899307251, |
|
"eval_runtime": 63.0315, |
|
"eval_samples_per_second": 89.654, |
|
"eval_steps_per_second": 4.49, |
|
"step": 2359 |
|
}, |
|
{ |
|
"epoch": 7.42, |
|
"learning_rate": 4.258160237388724e-05, |
|
"loss": 3.2658, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_bleu": 1.1548, |
|
"eval_gen_len": 15.9358, |
|
"eval_loss": 3.191558361053467, |
|
"eval_runtime": 64.0034, |
|
"eval_samples_per_second": 88.292, |
|
"eval_steps_per_second": 4.422, |
|
"step": 2696 |
|
}, |
|
{ |
|
"epoch": 8.9, |
|
"learning_rate": 4.109792284866469e-05, |
|
"loss": 3.2397, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_bleu": 1.0651, |
|
"eval_gen_len": 16.0241, |
|
"eval_loss": 3.1847681999206543, |
|
"eval_runtime": 64.3166, |
|
"eval_samples_per_second": 87.862, |
|
"eval_steps_per_second": 4.4, |
|
"step": 3033 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_bleu": 1.1443, |
|
"eval_gen_len": 16.0409, |
|
"eval_loss": 3.1797027587890625, |
|
"eval_runtime": 62.6803, |
|
"eval_samples_per_second": 90.156, |
|
"eval_steps_per_second": 4.515, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 10.38, |
|
"learning_rate": 3.961424332344214e-05, |
|
"loss": 3.2203, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_bleu": 1.1562, |
|
"eval_gen_len": 15.9648, |
|
"eval_loss": 3.1735124588012695, |
|
"eval_runtime": 64.1808, |
|
"eval_samples_per_second": 88.048, |
|
"eval_steps_per_second": 4.409, |
|
"step": 3707 |
|
}, |
|
{ |
|
"epoch": 11.87, |
|
"learning_rate": 3.8130563798219586e-05, |
|
"loss": 3.1935, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_bleu": 1.145, |
|
"eval_gen_len": 15.7811, |
|
"eval_loss": 3.1689915657043457, |
|
"eval_runtime": 62.6765, |
|
"eval_samples_per_second": 90.161, |
|
"eval_steps_per_second": 4.515, |
|
"step": 4044 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_bleu": 1.1534, |
|
"eval_gen_len": 15.9582, |
|
"eval_loss": 3.1667888164520264, |
|
"eval_runtime": 64.2839, |
|
"eval_samples_per_second": 87.907, |
|
"eval_steps_per_second": 4.402, |
|
"step": 4381 |
|
}, |
|
{ |
|
"epoch": 13.35, |
|
"learning_rate": 3.664688427299703e-05, |
|
"loss": 3.1791, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_bleu": 1.1476, |
|
"eval_gen_len": 15.9605, |
|
"eval_loss": 3.1629080772399902, |
|
"eval_runtime": 63.0085, |
|
"eval_samples_per_second": 89.686, |
|
"eval_steps_per_second": 4.491, |
|
"step": 4718 |
|
}, |
|
{ |
|
"epoch": 14.83, |
|
"learning_rate": 3.516320474777448e-05, |
|
"loss": 3.1562, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_bleu": 1.1504, |
|
"eval_gen_len": 15.9747, |
|
"eval_loss": 3.1592817306518555, |
|
"eval_runtime": 63.8036, |
|
"eval_samples_per_second": 88.569, |
|
"eval_steps_per_second": 4.435, |
|
"step": 5055 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_bleu": 1.2003, |
|
"eval_gen_len": 15.6548, |
|
"eval_loss": 3.1569018363952637, |
|
"eval_runtime": 63.7567, |
|
"eval_samples_per_second": 88.634, |
|
"eval_steps_per_second": 4.439, |
|
"step": 5392 |
|
}, |
|
{ |
|
"epoch": 16.32, |
|
"learning_rate": 3.3679525222551934e-05, |
|
"loss": 3.1461, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_bleu": 1.187, |
|
"eval_gen_len": 16.2831, |
|
"eval_loss": 3.1528096199035645, |
|
"eval_runtime": 62.8317, |
|
"eval_samples_per_second": 89.939, |
|
"eval_steps_per_second": 4.504, |
|
"step": 5729 |
|
}, |
|
{ |
|
"epoch": 17.8, |
|
"learning_rate": 3.219584569732938e-05, |
|
"loss": 3.1301, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_bleu": 1.1908, |
|
"eval_gen_len": 16.2078, |
|
"eval_loss": 3.15116024017334, |
|
"eval_runtime": 62.7949, |
|
"eval_samples_per_second": 89.991, |
|
"eval_steps_per_second": 4.507, |
|
"step": 6066 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_bleu": 1.2164, |
|
"eval_gen_len": 16.1816, |
|
"eval_loss": 3.149709463119507, |
|
"eval_runtime": 63.3277, |
|
"eval_samples_per_second": 89.234, |
|
"eval_steps_per_second": 4.469, |
|
"step": 6403 |
|
}, |
|
{ |
|
"epoch": 19.29, |
|
"learning_rate": 3.071216617210683e-05, |
|
"loss": 3.1199, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_bleu": 1.2493, |
|
"eval_gen_len": 16.0138, |
|
"eval_loss": 3.147409200668335, |
|
"eval_runtime": 62.5603, |
|
"eval_samples_per_second": 90.329, |
|
"eval_steps_per_second": 4.524, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 20.77, |
|
"learning_rate": 2.9228486646884274e-05, |
|
"loss": 3.1012, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_bleu": 1.2189, |
|
"eval_gen_len": 15.9667, |
|
"eval_loss": 3.1471338272094727, |
|
"eval_runtime": 62.6224, |
|
"eval_samples_per_second": 90.239, |
|
"eval_steps_per_second": 4.519, |
|
"step": 7077 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_bleu": 1.2535, |
|
"eval_gen_len": 16.0239, |
|
"eval_loss": 3.145897150039673, |
|
"eval_runtime": 62.7278, |
|
"eval_samples_per_second": 90.088, |
|
"eval_steps_per_second": 4.512, |
|
"step": 7414 |
|
}, |
|
{ |
|
"epoch": 22.25, |
|
"learning_rate": 2.774480712166172e-05, |
|
"loss": 3.0953, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_bleu": 1.2554, |
|
"eval_gen_len": 15.7225, |
|
"eval_loss": 3.143968343734741, |
|
"eval_runtime": 63.12, |
|
"eval_samples_per_second": 89.528, |
|
"eval_steps_per_second": 4.484, |
|
"step": 7751 |
|
}, |
|
{ |
|
"epoch": 23.74, |
|
"learning_rate": 2.6261127596439174e-05, |
|
"loss": 3.0761, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_bleu": 1.2086, |
|
"eval_gen_len": 16.0069, |
|
"eval_loss": 3.141495943069458, |
|
"eval_runtime": 62.7045, |
|
"eval_samples_per_second": 90.121, |
|
"eval_steps_per_second": 4.513, |
|
"step": 8088 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_bleu": 1.305, |
|
"eval_gen_len": 15.9912, |
|
"eval_loss": 3.1416375637054443, |
|
"eval_runtime": 62.5621, |
|
"eval_samples_per_second": 90.326, |
|
"eval_steps_per_second": 4.524, |
|
"step": 8425 |
|
}, |
|
{ |
|
"epoch": 25.22, |
|
"learning_rate": 2.4777448071216618e-05, |
|
"loss": 3.0822, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_bleu": 1.248, |
|
"eval_gen_len": 15.8406, |
|
"eval_loss": 3.1405673027038574, |
|
"eval_runtime": 62.6069, |
|
"eval_samples_per_second": 90.262, |
|
"eval_steps_per_second": 4.52, |
|
"step": 8762 |
|
}, |
|
{ |
|
"epoch": 26.7, |
|
"learning_rate": 2.3293768545994066e-05, |
|
"loss": 3.0614, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_bleu": 1.2888, |
|
"eval_gen_len": 16.3189, |
|
"eval_loss": 3.140249252319336, |
|
"eval_runtime": 62.6544, |
|
"eval_samples_per_second": 90.193, |
|
"eval_steps_per_second": 4.517, |
|
"step": 9099 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_bleu": 1.3016, |
|
"eval_gen_len": 16.0096, |
|
"eval_loss": 3.1379010677337646, |
|
"eval_runtime": 62.5617, |
|
"eval_samples_per_second": 90.327, |
|
"eval_steps_per_second": 4.524, |
|
"step": 9436 |
|
}, |
|
{ |
|
"epoch": 28.19, |
|
"learning_rate": 2.1810089020771514e-05, |
|
"loss": 3.0594, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_bleu": 1.2268, |
|
"eval_gen_len": 16.3702, |
|
"eval_loss": 3.139193058013916, |
|
"eval_runtime": 62.7397, |
|
"eval_samples_per_second": 90.071, |
|
"eval_steps_per_second": 4.511, |
|
"step": 9773 |
|
}, |
|
{ |
|
"epoch": 29.67, |
|
"learning_rate": 2.0326409495548962e-05, |
|
"loss": 3.0453, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_bleu": 1.278, |
|
"eval_gen_len": 15.9722, |
|
"eval_loss": 3.1378531455993652, |
|
"eval_runtime": 62.634, |
|
"eval_samples_per_second": 90.223, |
|
"eval_steps_per_second": 4.518, |
|
"step": 10110 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_bleu": 1.2832, |
|
"eval_gen_len": 15.9759, |
|
"eval_loss": 3.136270046234131, |
|
"eval_runtime": 62.6958, |
|
"eval_samples_per_second": 90.134, |
|
"eval_steps_per_second": 4.514, |
|
"step": 10447 |
|
}, |
|
{ |
|
"epoch": 31.16, |
|
"learning_rate": 1.884272997032641e-05, |
|
"loss": 3.0425, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_bleu": 1.3089, |
|
"eval_gen_len": 16.1499, |
|
"eval_loss": 3.136704206466675, |
|
"eval_runtime": 62.6194, |
|
"eval_samples_per_second": 90.244, |
|
"eval_steps_per_second": 4.519, |
|
"step": 10784 |
|
}, |
|
{ |
|
"epoch": 32.64, |
|
"learning_rate": 1.7359050445103858e-05, |
|
"loss": 3.0339, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_bleu": 1.2718, |
|
"eval_gen_len": 16.1051, |
|
"eval_loss": 3.1367685794830322, |
|
"eval_runtime": 62.5638, |
|
"eval_samples_per_second": 90.324, |
|
"eval_steps_per_second": 4.523, |
|
"step": 11121 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_bleu": 1.2683, |
|
"eval_gen_len": 16.0159, |
|
"eval_loss": 3.1349706649780273, |
|
"eval_runtime": 62.675, |
|
"eval_samples_per_second": 90.164, |
|
"eval_steps_per_second": 4.515, |
|
"step": 11458 |
|
}, |
|
{ |
|
"epoch": 34.12, |
|
"learning_rate": 1.5875370919881306e-05, |
|
"loss": 3.0301, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_bleu": 1.286, |
|
"eval_gen_len": 16.1807, |
|
"eval_loss": 3.135624408721924, |
|
"eval_runtime": 62.61, |
|
"eval_samples_per_second": 90.257, |
|
"eval_steps_per_second": 4.52, |
|
"step": 11795 |
|
}, |
|
{ |
|
"epoch": 35.61, |
|
"learning_rate": 1.4391691394658754e-05, |
|
"loss": 3.0197, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_bleu": 1.267, |
|
"eval_gen_len": 16.0966, |
|
"eval_loss": 3.1348326206207275, |
|
"eval_runtime": 62.637, |
|
"eval_samples_per_second": 90.218, |
|
"eval_steps_per_second": 4.518, |
|
"step": 12132 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_bleu": 1.3013, |
|
"eval_gen_len": 15.9283, |
|
"eval_loss": 3.1351914405822754, |
|
"eval_runtime": 62.7389, |
|
"eval_samples_per_second": 90.072, |
|
"eval_steps_per_second": 4.511, |
|
"step": 12469 |
|
}, |
|
{ |
|
"epoch": 37.09, |
|
"learning_rate": 1.29080118694362e-05, |
|
"loss": 3.0204, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_bleu": 1.282, |
|
"eval_gen_len": 16.0099, |
|
"eval_loss": 3.1345412731170654, |
|
"eval_runtime": 62.6541, |
|
"eval_samples_per_second": 90.194, |
|
"eval_steps_per_second": 4.517, |
|
"step": 12806 |
|
}, |
|
{ |
|
"epoch": 38.57, |
|
"learning_rate": 1.142433234421365e-05, |
|
"loss": 3.0169, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_bleu": 1.3076, |
|
"eval_gen_len": 16.1136, |
|
"eval_loss": 3.1348047256469727, |
|
"eval_runtime": 62.7366, |
|
"eval_samples_per_second": 90.075, |
|
"eval_steps_per_second": 4.511, |
|
"step": 13143 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_bleu": 1.2947, |
|
"eval_gen_len": 16.0117, |
|
"eval_loss": 3.135272979736328, |
|
"eval_runtime": 62.894, |
|
"eval_samples_per_second": 89.85, |
|
"eval_steps_per_second": 4.5, |
|
"step": 13480 |
|
}, |
|
{ |
|
"epoch": 40.06, |
|
"learning_rate": 9.940652818991098e-06, |
|
"loss": 3.0093, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_bleu": 1.3047, |
|
"eval_gen_len": 16.0662, |
|
"eval_loss": 3.134445905685425, |
|
"eval_runtime": 62.7291, |
|
"eval_samples_per_second": 90.086, |
|
"eval_steps_per_second": 4.511, |
|
"step": 13817 |
|
}, |
|
{ |
|
"epoch": 41.54, |
|
"learning_rate": 8.456973293768548e-06, |
|
"loss": 3.0009, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_bleu": 1.3017, |
|
"eval_gen_len": 16.1531, |
|
"eval_loss": 3.134453535079956, |
|
"eval_runtime": 62.7311, |
|
"eval_samples_per_second": 90.083, |
|
"eval_steps_per_second": 4.511, |
|
"step": 14154 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_bleu": 1.2762, |
|
"eval_gen_len": 16.0741, |
|
"eval_loss": 3.1347758769989014, |
|
"eval_runtime": 62.6827, |
|
"eval_samples_per_second": 90.152, |
|
"eval_steps_per_second": 4.515, |
|
"step": 14491 |
|
}, |
|
{ |
|
"epoch": 43.03, |
|
"learning_rate": 6.973293768545995e-06, |
|
"loss": 3.0066, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_bleu": 1.2743, |
|
"eval_gen_len": 16.069, |
|
"eval_loss": 3.1343321800231934, |
|
"eval_runtime": 62.7369, |
|
"eval_samples_per_second": 90.075, |
|
"eval_steps_per_second": 4.511, |
|
"step": 14828 |
|
}, |
|
{ |
|
"epoch": 44.51, |
|
"learning_rate": 5.489614243323442e-06, |
|
"loss": 3.0014, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_bleu": 1.264, |
|
"eval_gen_len": 16.0232, |
|
"eval_loss": 3.1345059871673584, |
|
"eval_runtime": 62.8103, |
|
"eval_samples_per_second": 89.969, |
|
"eval_steps_per_second": 4.506, |
|
"step": 15165 |
|
}, |
|
{ |
|
"epoch": 45.99, |
|
"learning_rate": 4.005934718100891e-06, |
|
"loss": 2.9948, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_bleu": 1.2856, |
|
"eval_gen_len": 16.1014, |
|
"eval_loss": 3.134138345718384, |
|
"eval_runtime": 62.6814, |
|
"eval_samples_per_second": 90.154, |
|
"eval_steps_per_second": 4.515, |
|
"step": 15502 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_bleu": 1.2889, |
|
"eval_gen_len": 16.0956, |
|
"eval_loss": 3.1343159675598145, |
|
"eval_runtime": 62.7663, |
|
"eval_samples_per_second": 90.032, |
|
"eval_steps_per_second": 4.509, |
|
"step": 15839 |
|
}, |
|
{ |
|
"epoch": 47.48, |
|
"learning_rate": 2.5222551928783385e-06, |
|
"loss": 3.0016, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_bleu": 1.2823, |
|
"eval_gen_len": 16.0342, |
|
"eval_loss": 3.13439679145813, |
|
"eval_runtime": 62.7228, |
|
"eval_samples_per_second": 90.095, |
|
"eval_steps_per_second": 4.512, |
|
"step": 16176 |
|
}, |
|
{ |
|
"epoch": 48.96, |
|
"learning_rate": 1.0385756676557863e-06, |
|
"loss": 2.992, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_bleu": 1.2903, |
|
"eval_gen_len": 16.0894, |
|
"eval_loss": 3.134230136871338, |
|
"eval_runtime": 62.7414, |
|
"eval_samples_per_second": 90.068, |
|
"eval_steps_per_second": 4.511, |
|
"step": 16513 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_bleu": 1.279, |
|
"eval_gen_len": 16.0568, |
|
"eval_loss": 3.1342360973358154, |
|
"eval_runtime": 62.8254, |
|
"eval_samples_per_second": 89.948, |
|
"eval_steps_per_second": 4.505, |
|
"step": 16850 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"step": 16850, |
|
"total_flos": 8.49568285776937e+16, |
|
"train_loss": 3.1156047950088093, |
|
"train_runtime": 18325.9577, |
|
"train_samples_per_second": 73.715, |
|
"train_steps_per_second": 0.919 |
|
} |
|
], |
|
"max_steps": 16850, |
|
"num_train_epochs": 50, |
|
"total_flos": 8.49568285776937e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|