|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 24.91103202846975, |
|
"eval_steps": 500, |
|
"global_step": 14000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.9288256227758007e-05, |
|
"loss": 0.7021, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_bp": 0.4478803932550057, |
|
"eval_counts": [ |
|
951, |
|
851, |
|
761, |
|
672 |
|
], |
|
"eval_loss": 0.1726560890674591, |
|
"eval_precisions": [ |
|
95.96367305751765, |
|
92.19934994582881, |
|
89.00584795321637, |
|
85.38754764930114 |
|
], |
|
"eval_ref_len": 1787, |
|
"eval_runtime": 18.1569, |
|
"eval_samples_per_second": 3.745, |
|
"eval_score": 40.55776382580726, |
|
"eval_steps_per_second": 1.873, |
|
"eval_sys_len": 991, |
|
"eval_totals": [ |
|
991, |
|
923, |
|
855, |
|
787 |
|
], |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 1.8576512455516017e-05, |
|
"loss": 0.2854, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bp": 0.441355602158059, |
|
"eval_counts": [ |
|
962, |
|
883, |
|
802, |
|
721 |
|
], |
|
"eval_loss": 0.11405563354492188, |
|
"eval_precisions": [ |
|
97.86368260427264, |
|
96.50273224043715, |
|
94.68713105076742, |
|
92.55455712451861 |
|
], |
|
"eval_ref_len": 1787, |
|
"eval_runtime": 16.4762, |
|
"eval_samples_per_second": 4.127, |
|
"eval_score": 42.09699789518687, |
|
"eval_steps_per_second": 2.064, |
|
"eval_sys_len": 983, |
|
"eval_totals": [ |
|
983, |
|
915, |
|
847, |
|
779 |
|
], |
|
"step": 1124 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 1.7864768683274022e-05, |
|
"loss": 0.1956, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_bp": 0.44380356964488166, |
|
"eval_counts": [ |
|
965, |
|
886, |
|
806, |
|
726 |
|
], |
|
"eval_loss": 0.10203403234481812, |
|
"eval_precisions": [ |
|
97.87018255578093, |
|
96.51416122004358, |
|
94.82352941176471, |
|
92.8388746803069 |
|
], |
|
"eval_ref_len": 1787, |
|
"eval_runtime": 16.5425, |
|
"eval_samples_per_second": 4.111, |
|
"eval_score": 42.380165345955355, |
|
"eval_steps_per_second": 2.055, |
|
"eval_sys_len": 986, |
|
"eval_totals": [ |
|
986, |
|
918, |
|
850, |
|
782 |
|
], |
|
"step": 1686 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 1.715302491103203e-05, |
|
"loss": 0.1441, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bp": 0.437272650606809, |
|
"eval_counts": [ |
|
966, |
|
894, |
|
816, |
|
739 |
|
], |
|
"eval_loss": 0.05576588958501816, |
|
"eval_precisions": [ |
|
98.77300613496932, |
|
98.24175824175825, |
|
96.91211401425178, |
|
95.4780361757106 |
|
], |
|
"eval_ref_len": 1787, |
|
"eval_runtime": 16.5385, |
|
"eval_samples_per_second": 4.112, |
|
"eval_score": 42.565359021286106, |
|
"eval_steps_per_second": 2.056, |
|
"eval_sys_len": 978, |
|
"eval_totals": [ |
|
978, |
|
910, |
|
842, |
|
774 |
|
], |
|
"step": 2248 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"learning_rate": 1.6441281138790037e-05, |
|
"loss": 0.1082, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_bp": 0.425821753267709, |
|
"eval_counts": [ |
|
956, |
|
882, |
|
809, |
|
736 |
|
], |
|
"eval_loss": 0.0341346338391304, |
|
"eval_precisions": [ |
|
99.1701244813278, |
|
98.4375, |
|
97.70531400966183, |
|
96.84210526315789 |
|
], |
|
"eval_ref_len": 1787, |
|
"eval_runtime": 16.4383, |
|
"eval_samples_per_second": 4.137, |
|
"eval_score": 41.745417030738814, |
|
"eval_steps_per_second": 2.068, |
|
"eval_sys_len": 964, |
|
"eval_totals": [ |
|
964, |
|
896, |
|
828, |
|
760 |
|
], |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 5.34, |
|
"learning_rate": 1.5729537366548043e-05, |
|
"loss": 0.0942, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_bp": 0.4356384499188918, |
|
"eval_counts": [ |
|
973, |
|
899, |
|
824, |
|
749 |
|
], |
|
"eval_loss": 0.05856137350201607, |
|
"eval_precisions": [ |
|
99.69262295081967, |
|
99.00881057268722, |
|
98.0952380952381, |
|
97.02072538860104 |
|
], |
|
"eval_ref_len": 1787, |
|
"eval_runtime": 16.3313, |
|
"eval_samples_per_second": 4.164, |
|
"eval_score": 42.888270294493864, |
|
"eval_steps_per_second": 2.082, |
|
"eval_sys_len": 976, |
|
"eval_totals": [ |
|
976, |
|
908, |
|
840, |
|
772 |
|
], |
|
"step": 3372 |
|
}, |
|
{ |
|
"epoch": 6.23, |
|
"learning_rate": 1.5017793594306052e-05, |
|
"loss": 0.0713, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_bp": 0.42745918009113226, |
|
"eval_counts": [ |
|
964, |
|
894, |
|
824, |
|
754 |
|
], |
|
"eval_loss": 0.02425399236381054, |
|
"eval_precisions": [ |
|
99.79296066252589, |
|
99.55456570155901, |
|
99.27710843373494, |
|
98.9501312335958 |
|
], |
|
"eval_ref_len": 1787, |
|
"eval_runtime": 16.4255, |
|
"eval_samples_per_second": 4.14, |
|
"eval_score": 42.486533114975146, |
|
"eval_steps_per_second": 2.07, |
|
"eval_sys_len": 966, |
|
"eval_totals": [ |
|
966, |
|
898, |
|
830, |
|
762 |
|
], |
|
"step": 3934 |
|
}, |
|
{ |
|
"epoch": 7.12, |
|
"learning_rate": 1.4306049822064058e-05, |
|
"loss": 0.0599, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_bp": 0.43073249568232586, |
|
"eval_counts": [ |
|
961, |
|
890, |
|
820, |
|
751 |
|
], |
|
"eval_loss": 0.015419703908264637, |
|
"eval_precisions": [ |
|
99.0721649484536, |
|
98.66962305986696, |
|
98.32134292565948, |
|
98.04177545691905 |
|
], |
|
"eval_ref_len": 1787, |
|
"eval_runtime": 16.187, |
|
"eval_samples_per_second": 4.201, |
|
"eval_score": 42.438122327030186, |
|
"eval_steps_per_second": 2.1, |
|
"eval_sys_len": 970, |
|
"eval_totals": [ |
|
970, |
|
902, |
|
834, |
|
766 |
|
], |
|
"step": 4496 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 1.3594306049822066e-05, |
|
"loss": 0.048, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 8.9, |
|
"learning_rate": 1.2882562277580073e-05, |
|
"loss": 0.0392, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_bp": 0.4250028527550296, |
|
"eval_counts": [ |
|
959, |
|
889, |
|
818, |
|
747 |
|
], |
|
"eval_loss": 0.019724518060684204, |
|
"eval_precisions": [ |
|
99.5846313603323, |
|
99.32960893854748, |
|
98.9117291414752, |
|
98.41897233201581 |
|
], |
|
"eval_ref_len": 1787, |
|
"eval_runtime": 16.8132, |
|
"eval_samples_per_second": 4.044, |
|
"eval_score": 42.10088852964188, |
|
"eval_steps_per_second": 2.022, |
|
"eval_sys_len": 963, |
|
"eval_totals": [ |
|
963, |
|
895, |
|
827, |
|
759 |
|
], |
|
"step": 5058 |
|
}, |
|
{ |
|
"epoch": 9.79, |
|
"learning_rate": 1.217081850533808e-05, |
|
"loss": 0.0319, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_bp": 0.42909609813393135, |
|
"eval_counts": [ |
|
966, |
|
898, |
|
830, |
|
762 |
|
], |
|
"eval_loss": 0.005858541466295719, |
|
"eval_precisions": [ |
|
99.79338842975207, |
|
99.77777777777777, |
|
99.75961538461539, |
|
99.73821989528795 |
|
], |
|
"eval_ref_len": 1787, |
|
"eval_runtime": 16.1653, |
|
"eval_samples_per_second": 4.207, |
|
"eval_score": 42.809736944779694, |
|
"eval_steps_per_second": 2.103, |
|
"eval_sys_len": 968, |
|
"eval_totals": [ |
|
968, |
|
900, |
|
832, |
|
764 |
|
], |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 10.68, |
|
"learning_rate": 1.1459074733096086e-05, |
|
"loss": 0.026, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_bp": 0.42909609813393135, |
|
"eval_counts": [ |
|
966, |
|
898, |
|
829, |
|
760 |
|
], |
|
"eval_loss": 0.010834704153239727, |
|
"eval_precisions": [ |
|
99.79338842975207, |
|
99.77777777777777, |
|
99.63942307692308, |
|
99.47643979057591 |
|
], |
|
"eval_ref_len": 1787, |
|
"eval_runtime": 16.4611, |
|
"eval_samples_per_second": 4.131, |
|
"eval_score": 42.768727019910926, |
|
"eval_steps_per_second": 2.065, |
|
"eval_sys_len": 968, |
|
"eval_totals": [ |
|
968, |
|
900, |
|
832, |
|
764 |
|
], |
|
"step": 6182 |
|
}, |
|
{ |
|
"epoch": 11.57, |
|
"learning_rate": 1.0747330960854094e-05, |
|
"loss": 0.025, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_bp": 0.42909609813393135, |
|
"eval_counts": [ |
|
966, |
|
898, |
|
830, |
|
762 |
|
], |
|
"eval_loss": 0.0013562627136707306, |
|
"eval_precisions": [ |
|
99.79338842975207, |
|
99.77777777777777, |
|
99.75961538461539, |
|
99.73821989528795 |
|
], |
|
"eval_ref_len": 1787, |
|
"eval_runtime": 16.1168, |
|
"eval_samples_per_second": 4.219, |
|
"eval_score": 42.809736944779694, |
|
"eval_steps_per_second": 2.11, |
|
"eval_sys_len": 968, |
|
"eval_totals": [ |
|
968, |
|
900, |
|
832, |
|
764 |
|
], |
|
"step": 6744 |
|
}, |
|
{ |
|
"epoch": 12.46, |
|
"learning_rate": 1.0035587188612101e-05, |
|
"loss": 0.0182, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_bp": 0.42909609813393135, |
|
"eval_counts": [ |
|
966, |
|
898, |
|
830, |
|
762 |
|
], |
|
"eval_loss": 0.0013860436156392097, |
|
"eval_precisions": [ |
|
99.79338842975207, |
|
99.77777777777777, |
|
99.75961538461539, |
|
99.73821989528795 |
|
], |
|
"eval_ref_len": 1787, |
|
"eval_runtime": 16.6416, |
|
"eval_samples_per_second": 4.086, |
|
"eval_score": 42.809736944779694, |
|
"eval_steps_per_second": 2.043, |
|
"eval_sys_len": 968, |
|
"eval_totals": [ |
|
968, |
|
900, |
|
832, |
|
764 |
|
], |
|
"step": 7306 |
|
}, |
|
{ |
|
"epoch": 13.35, |
|
"learning_rate": 9.323843416370107e-06, |
|
"loss": 0.0148, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_bp": 0.4315504956415897, |
|
"eval_counts": [ |
|
963, |
|
894, |
|
825, |
|
756 |
|
], |
|
"eval_loss": 0.004028095863759518, |
|
"eval_precisions": [ |
|
99.17610710607622, |
|
99.00332225913621, |
|
98.80239520958084, |
|
98.56584093872229 |
|
], |
|
"eval_ref_len": 1787, |
|
"eval_runtime": 16.2486, |
|
"eval_samples_per_second": 4.185, |
|
"eval_score": 42.674584599098594, |
|
"eval_steps_per_second": 2.092, |
|
"eval_sys_len": 971, |
|
"eval_totals": [ |
|
971, |
|
903, |
|
835, |
|
767 |
|
], |
|
"step": 7868 |
|
}, |
|
{ |
|
"epoch": 14.23, |
|
"learning_rate": 8.612099644128115e-06, |
|
"loss": 0.0149, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_bp": 0.4299143626976017, |
|
"eval_counts": [ |
|
967, |
|
899, |
|
830, |
|
761 |
|
], |
|
"eval_loss": 0.004267631098628044, |
|
"eval_precisions": [ |
|
99.79360165118679, |
|
99.7780244173141, |
|
99.63985594237695, |
|
99.47712418300654 |
|
], |
|
"eval_ref_len": 1787, |
|
"eval_runtime": 16.2443, |
|
"eval_samples_per_second": 4.186, |
|
"eval_score": 42.85045442963625, |
|
"eval_steps_per_second": 2.093, |
|
"eval_sys_len": 969, |
|
"eval_totals": [ |
|
969, |
|
901, |
|
833, |
|
765 |
|
], |
|
"step": 8430 |
|
}, |
|
{ |
|
"epoch": 15.12, |
|
"learning_rate": 7.900355871886122e-06, |
|
"loss": 0.0144, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_bp": 0.42909609813393135, |
|
"eval_counts": [ |
|
966, |
|
898, |
|
830, |
|
762 |
|
], |
|
"eval_loss": 0.00286501320078969, |
|
"eval_precisions": [ |
|
99.79338842975207, |
|
99.77777777777777, |
|
99.75961538461539, |
|
99.73821989528795 |
|
], |
|
"eval_ref_len": 1787, |
|
"eval_runtime": 16.5144, |
|
"eval_samples_per_second": 4.118, |
|
"eval_score": 42.809736944779694, |
|
"eval_steps_per_second": 2.059, |
|
"eval_sys_len": 968, |
|
"eval_totals": [ |
|
968, |
|
900, |
|
832, |
|
764 |
|
], |
|
"step": 8992 |
|
}, |
|
{ |
|
"epoch": 16.01, |
|
"learning_rate": 7.188612099644129e-06, |
|
"loss": 0.0112, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 16.9, |
|
"learning_rate": 6.476868327402136e-06, |
|
"loss": 0.0098, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_bp": 0.42909609813393135, |
|
"eval_counts": [ |
|
966, |
|
898, |
|
830, |
|
762 |
|
], |
|
"eval_loss": 0.0009978804737329483, |
|
"eval_precisions": [ |
|
99.79338842975207, |
|
99.77777777777777, |
|
99.75961538461539, |
|
99.73821989528795 |
|
], |
|
"eval_ref_len": 1787, |
|
"eval_runtime": 16.1028, |
|
"eval_samples_per_second": 4.223, |
|
"eval_score": 42.809736944779694, |
|
"eval_steps_per_second": 2.111, |
|
"eval_sys_len": 968, |
|
"eval_totals": [ |
|
968, |
|
900, |
|
832, |
|
764 |
|
], |
|
"step": 9554 |
|
}, |
|
{ |
|
"epoch": 17.79, |
|
"learning_rate": 5.765124555160143e-06, |
|
"loss": 0.0089, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_bp": 0.42909609813393135, |
|
"eval_counts": [ |
|
966, |
|
898, |
|
830, |
|
762 |
|
], |
|
"eval_loss": 0.00037646759301424026, |
|
"eval_precisions": [ |
|
99.79338842975207, |
|
99.77777777777777, |
|
99.75961538461539, |
|
99.73821989528795 |
|
], |
|
"eval_ref_len": 1787, |
|
"eval_runtime": 16.4684, |
|
"eval_samples_per_second": 4.129, |
|
"eval_score": 42.809736944779694, |
|
"eval_steps_per_second": 2.065, |
|
"eval_sys_len": 968, |
|
"eval_totals": [ |
|
968, |
|
900, |
|
832, |
|
764 |
|
], |
|
"step": 10116 |
|
}, |
|
{ |
|
"epoch": 18.68, |
|
"learning_rate": 5.05338078291815e-06, |
|
"loss": 0.0075, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_bp": 0.42909609813393135, |
|
"eval_counts": [ |
|
966, |
|
898, |
|
830, |
|
762 |
|
], |
|
"eval_loss": 0.00018596854351926595, |
|
"eval_precisions": [ |
|
99.79338842975207, |
|
99.77777777777777, |
|
99.75961538461539, |
|
99.73821989528795 |
|
], |
|
"eval_ref_len": 1787, |
|
"eval_runtime": 16.3451, |
|
"eval_samples_per_second": 4.16, |
|
"eval_score": 42.809736944779694, |
|
"eval_steps_per_second": 2.08, |
|
"eval_sys_len": 968, |
|
"eval_totals": [ |
|
968, |
|
900, |
|
832, |
|
764 |
|
], |
|
"step": 10678 |
|
}, |
|
{ |
|
"epoch": 19.57, |
|
"learning_rate": 4.341637010676157e-06, |
|
"loss": 0.0068, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_bp": 0.42909609813393135, |
|
"eval_counts": [ |
|
966, |
|
898, |
|
830, |
|
762 |
|
], |
|
"eval_loss": 7.006935447861906e-06, |
|
"eval_precisions": [ |
|
99.79338842975207, |
|
99.77777777777777, |
|
99.75961538461539, |
|
99.73821989528795 |
|
], |
|
"eval_ref_len": 1787, |
|
"eval_runtime": 16.0905, |
|
"eval_samples_per_second": 4.226, |
|
"eval_score": 42.809736944779694, |
|
"eval_steps_per_second": 2.113, |
|
"eval_sys_len": 968, |
|
"eval_totals": [ |
|
968, |
|
900, |
|
832, |
|
764 |
|
], |
|
"step": 11240 |
|
}, |
|
{ |
|
"epoch": 20.46, |
|
"learning_rate": 3.629893238434164e-06, |
|
"loss": 0.0056, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_bp": 0.42909609813393135, |
|
"eval_counts": [ |
|
966, |
|
898, |
|
830, |
|
762 |
|
], |
|
"eval_loss": 1.5906211046967655e-05, |
|
"eval_precisions": [ |
|
99.79338842975207, |
|
99.77777777777777, |
|
99.75961538461539, |
|
99.73821989528795 |
|
], |
|
"eval_ref_len": 1787, |
|
"eval_runtime": 16.3937, |
|
"eval_samples_per_second": 4.148, |
|
"eval_score": 42.809736944779694, |
|
"eval_steps_per_second": 2.074, |
|
"eval_sys_len": 968, |
|
"eval_totals": [ |
|
968, |
|
900, |
|
832, |
|
764 |
|
], |
|
"step": 11802 |
|
}, |
|
{ |
|
"epoch": 21.35, |
|
"learning_rate": 2.918149466192171e-06, |
|
"loss": 0.0041, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_bp": 0.42909609813393135, |
|
"eval_counts": [ |
|
966, |
|
898, |
|
830, |
|
762 |
|
], |
|
"eval_loss": 1.3131144442013465e-05, |
|
"eval_precisions": [ |
|
99.79338842975207, |
|
99.77777777777777, |
|
99.75961538461539, |
|
99.73821989528795 |
|
], |
|
"eval_ref_len": 1787, |
|
"eval_runtime": 16.4474, |
|
"eval_samples_per_second": 4.134, |
|
"eval_score": 42.809736944779694, |
|
"eval_steps_per_second": 2.067, |
|
"eval_sys_len": 968, |
|
"eval_totals": [ |
|
968, |
|
900, |
|
832, |
|
764 |
|
], |
|
"step": 12364 |
|
}, |
|
{ |
|
"epoch": 22.24, |
|
"learning_rate": 2.2064056939501782e-06, |
|
"loss": 0.0033, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_bp": 0.42909609813393135, |
|
"eval_counts": [ |
|
966, |
|
898, |
|
830, |
|
762 |
|
], |
|
"eval_loss": 5.197064183448674e-06, |
|
"eval_precisions": [ |
|
99.79338842975207, |
|
99.77777777777777, |
|
99.75961538461539, |
|
99.73821989528795 |
|
], |
|
"eval_ref_len": 1787, |
|
"eval_runtime": 16.1321, |
|
"eval_samples_per_second": 4.215, |
|
"eval_score": 42.809736944779694, |
|
"eval_steps_per_second": 2.108, |
|
"eval_sys_len": 968, |
|
"eval_totals": [ |
|
968, |
|
900, |
|
832, |
|
764 |
|
], |
|
"step": 12926 |
|
}, |
|
{ |
|
"epoch": 23.13, |
|
"learning_rate": 1.494661921708185e-06, |
|
"loss": 0.0033, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_bp": 0.42909609813393135, |
|
"eval_counts": [ |
|
966, |
|
898, |
|
830, |
|
762 |
|
], |
|
"eval_loss": 2.9162031296436908e-06, |
|
"eval_precisions": [ |
|
99.79338842975207, |
|
99.77777777777777, |
|
99.75961538461539, |
|
99.73821989528795 |
|
], |
|
"eval_ref_len": 1787, |
|
"eval_runtime": 16.4335, |
|
"eval_samples_per_second": 4.138, |
|
"eval_score": 42.809736944779694, |
|
"eval_steps_per_second": 2.069, |
|
"eval_sys_len": 968, |
|
"eval_totals": [ |
|
968, |
|
900, |
|
832, |
|
764 |
|
], |
|
"step": 13488 |
|
}, |
|
{ |
|
"epoch": 24.02, |
|
"learning_rate": 7.829181494661923e-07, |
|
"loss": 0.0025, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 24.91, |
|
"learning_rate": 7.117437722419929e-08, |
|
"loss": 0.0023, |
|
"step": 14000 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 14050, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 25, |
|
"save_steps": 500, |
|
"total_flos": 2476170265006080.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|