|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 46.51162790697674, |
|
"eval_steps": 500, |
|
"global_step": 6000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_bp": 0.8731878729849682, |
|
"eval_counts": [ |
|
4461, |
|
3931, |
|
3406, |
|
2881 |
|
], |
|
"eval_loss": 0.004611688666045666, |
|
"eval_precisions": [ |
|
99.33199732798931, |
|
99.14249684741488, |
|
99.04041872637394, |
|
98.90147614143494 |
|
], |
|
"eval_ref_len": 5100, |
|
"eval_runtime": 16.8, |
|
"eval_samples_per_second": 31.31, |
|
"eval_score": 86.5363874068898, |
|
"eval_steps_per_second": 1.012, |
|
"eval_sys_len": 4491, |
|
"eval_totals": [ |
|
4491, |
|
3965, |
|
3439, |
|
2913 |
|
], |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bp": 0.869649689238195, |
|
"eval_counts": [ |
|
4458, |
|
3930, |
|
3403, |
|
2876 |
|
], |
|
"eval_loss": 0.00696711428463459, |
|
"eval_precisions": [ |
|
99.62011173184358, |
|
99.51886553557863, |
|
99.41571720712825, |
|
99.27511218501898 |
|
], |
|
"eval_ref_len": 5100, |
|
"eval_runtime": 13.788, |
|
"eval_samples_per_second": 38.149, |
|
"eval_score": 86.49307063670169, |
|
"eval_steps_per_second": 1.233, |
|
"eval_sys_len": 4475, |
|
"eval_totals": [ |
|
4475, |
|
3949, |
|
3423, |
|
2897 |
|
], |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_bp": 0.8727461954138965, |
|
"eval_counts": [ |
|
4463, |
|
3936, |
|
3409, |
|
2882 |
|
], |
|
"eval_loss": 0.0054754531010985374, |
|
"eval_precisions": [ |
|
99.42080641568278, |
|
99.31869795609387, |
|
99.18533604887983, |
|
99.00377877018207 |
|
], |
|
"eval_ref_len": 5100, |
|
"eval_runtime": 13.887, |
|
"eval_samples_per_second": 37.877, |
|
"eval_score": 86.60437841944254, |
|
"eval_steps_per_second": 1.224, |
|
"eval_sys_len": 4489, |
|
"eval_totals": [ |
|
4489, |
|
3963, |
|
3437, |
|
2911 |
|
], |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 1.8449612403100777e-05, |
|
"loss": 0.0338, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bp": 0.8703139283627056, |
|
"eval_counts": [ |
|
4464, |
|
3938, |
|
3409, |
|
2881 |
|
], |
|
"eval_loss": 0.0021970090456306934, |
|
"eval_precisions": [ |
|
99.68736042876284, |
|
99.64574898785425, |
|
99.50379451255108, |
|
99.34482758620689 |
|
], |
|
"eval_ref_len": 5100, |
|
"eval_runtime": 14.1368, |
|
"eval_samples_per_second": 37.208, |
|
"eval_score": 86.63569782205596, |
|
"eval_steps_per_second": 1.203, |
|
"eval_sys_len": 4478, |
|
"eval_totals": [ |
|
4478, |
|
3952, |
|
3426, |
|
2900 |
|
], |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_bp": 0.8707565416150743, |
|
"eval_counts": [ |
|
4461, |
|
3933, |
|
3406, |
|
2880 |
|
], |
|
"eval_loss": 0.0019001211039721966, |
|
"eval_precisions": [ |
|
99.57589285714286, |
|
99.46889226100151, |
|
99.35822637106185, |
|
99.24190213645761 |
|
], |
|
"eval_ref_len": 5100, |
|
"eval_runtime": 13.879, |
|
"eval_samples_per_second": 37.899, |
|
"eval_score": 86.56290964643597, |
|
"eval_steps_per_second": 1.225, |
|
"eval_sys_len": 4480, |
|
"eval_totals": [ |
|
4480, |
|
3954, |
|
3428, |
|
2902 |
|
], |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_bp": 0.8700925578924447, |
|
"eval_counts": [ |
|
4463, |
|
3937, |
|
3411, |
|
2885 |
|
], |
|
"eval_loss": 0.0021749669685959816, |
|
"eval_precisions": [ |
|
99.6872905963815, |
|
99.64565932675272, |
|
99.5912408759124, |
|
99.51707485339773 |
|
], |
|
"eval_ref_len": 5100, |
|
"eval_runtime": 14.1959, |
|
"eval_samples_per_second": 37.053, |
|
"eval_score": 86.6701772747815, |
|
"eval_steps_per_second": 1.198, |
|
"eval_sys_len": 4477, |
|
"eval_totals": [ |
|
4477, |
|
3951, |
|
3425, |
|
2899 |
|
], |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_bp": 0.8703139283627056, |
|
"eval_counts": [ |
|
4464, |
|
3938, |
|
3412, |
|
2886 |
|
], |
|
"eval_loss": 0.0006092642433941364, |
|
"eval_precisions": [ |
|
99.68736042876284, |
|
99.64574898785425, |
|
99.59136018680677, |
|
99.51724137931035 |
|
], |
|
"eval_ref_len": 5100, |
|
"eval_runtime": 13.778, |
|
"eval_samples_per_second": 38.177, |
|
"eval_score": 86.69232496739014, |
|
"eval_steps_per_second": 1.234, |
|
"eval_sys_len": 4478, |
|
"eval_totals": [ |
|
4478, |
|
3952, |
|
3426, |
|
2900 |
|
], |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 7.75, |
|
"learning_rate": 1.689922480620155e-05, |
|
"loss": 0.0059, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_bp": 0.8714201423501817, |
|
"eval_counts": [ |
|
4460, |
|
3930, |
|
3404, |
|
2878 |
|
], |
|
"eval_loss": 0.0026030810549855232, |
|
"eval_precisions": [ |
|
99.48695070265447, |
|
99.31766489764973, |
|
99.21305741766248, |
|
99.07056798623064 |
|
], |
|
"eval_ref_len": 5100, |
|
"eval_runtime": 14.1647, |
|
"eval_samples_per_second": 37.135, |
|
"eval_score": 86.50757156803533, |
|
"eval_steps_per_second": 1.2, |
|
"eval_sys_len": 4483, |
|
"eval_totals": [ |
|
4483, |
|
3957, |
|
3431, |
|
2905 |
|
], |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_bp": 0.8711989846507546, |
|
"eval_counts": [ |
|
4459, |
|
3929, |
|
3403, |
|
2877 |
|
], |
|
"eval_loss": 0.00256777903996408, |
|
"eval_precisions": [ |
|
99.48683623382419, |
|
99.31749241658241, |
|
99.21282798833819, |
|
99.0702479338843 |
|
], |
|
"eval_ref_len": 5100, |
|
"eval_runtime": 13.7621, |
|
"eval_samples_per_second": 38.221, |
|
"eval_score": 86.4854345384993, |
|
"eval_steps_per_second": 1.235, |
|
"eval_sys_len": 4482, |
|
"eval_totals": [ |
|
4482, |
|
3956, |
|
3430, |
|
2904 |
|
], |
|
"step": 1161 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_bp": 0.8700925578924447, |
|
"eval_counts": [ |
|
4463, |
|
3937, |
|
3410, |
|
2883 |
|
], |
|
"eval_loss": 0.0008762977086007595, |
|
"eval_precisions": [ |
|
99.6872905963815, |
|
99.64565932675272, |
|
99.56204379562044, |
|
99.44808554674026 |
|
], |
|
"eval_ref_len": 5100, |
|
"eval_runtime": 14.033, |
|
"eval_samples_per_second": 37.483, |
|
"eval_score": 86.64880068746125, |
|
"eval_steps_per_second": 1.211, |
|
"eval_sys_len": 4477, |
|
"eval_totals": [ |
|
4477, |
|
3951, |
|
3425, |
|
2899 |
|
], |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_bp": 0.8707565416150743, |
|
"eval_counts": [ |
|
4462, |
|
3935, |
|
3408, |
|
2881 |
|
], |
|
"eval_loss": 0.0005689110257662833, |
|
"eval_precisions": [ |
|
99.59821428571429, |
|
99.51947395042994, |
|
99.41656942823803, |
|
99.276361130255 |
|
], |
|
"eval_ref_len": 5100, |
|
"eval_runtime": 13.8477, |
|
"eval_samples_per_second": 37.985, |
|
"eval_score": 86.5989861295871, |
|
"eval_steps_per_second": 1.228, |
|
"eval_sys_len": 4480, |
|
"eval_totals": [ |
|
4480, |
|
3954, |
|
3428, |
|
2902 |
|
], |
|
"step": 1419 |
|
}, |
|
{ |
|
"epoch": 11.63, |
|
"learning_rate": 1.5348837209302328e-05, |
|
"loss": 0.0042, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_bp": 0.8711989846507546, |
|
"eval_counts": [ |
|
4459, |
|
3929, |
|
3403, |
|
2877 |
|
], |
|
"eval_loss": 0.0013145459815859795, |
|
"eval_precisions": [ |
|
99.48683623382419, |
|
99.31749241658241, |
|
99.21282798833819, |
|
99.0702479338843 |
|
], |
|
"eval_ref_len": 5100, |
|
"eval_runtime": 13.7281, |
|
"eval_samples_per_second": 38.316, |
|
"eval_score": 86.4854345384993, |
|
"eval_steps_per_second": 1.238, |
|
"eval_sys_len": 4482, |
|
"eval_totals": [ |
|
4482, |
|
3956, |
|
3430, |
|
2904 |
|
], |
|
"step": 1548 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_bp": 0.8700925578924447, |
|
"eval_counts": [ |
|
4463, |
|
3937, |
|
3410, |
|
2883 |
|
], |
|
"eval_loss": 0.0008758930489420891, |
|
"eval_precisions": [ |
|
99.6872905963815, |
|
99.64565932675272, |
|
99.56204379562044, |
|
99.44808554674026 |
|
], |
|
"eval_ref_len": 5100, |
|
"eval_runtime": 13.731, |
|
"eval_samples_per_second": 38.308, |
|
"eval_score": 86.64880068746125, |
|
"eval_steps_per_second": 1.238, |
|
"eval_sys_len": 4477, |
|
"eval_totals": [ |
|
4477, |
|
3951, |
|
3425, |
|
2899 |
|
], |
|
"step": 1677 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_bp": 0.8700925578924447, |
|
"eval_counts": [ |
|
4463, |
|
3937, |
|
3411, |
|
2885 |
|
], |
|
"eval_loss": 0.0003304094134364277, |
|
"eval_precisions": [ |
|
99.6872905963815, |
|
99.64565932675272, |
|
99.5912408759124, |
|
99.51707485339773 |
|
], |
|
"eval_ref_len": 5100, |
|
"eval_runtime": 14.021, |
|
"eval_samples_per_second": 37.515, |
|
"eval_score": 86.6701772747815, |
|
"eval_steps_per_second": 1.212, |
|
"eval_sys_len": 4477, |
|
"eval_totals": [ |
|
4477, |
|
3951, |
|
3425, |
|
2899 |
|
], |
|
"step": 1806 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_bp": 0.8700925578924447, |
|
"eval_counts": [ |
|
4463, |
|
3937, |
|
3411, |
|
2885 |
|
], |
|
"eval_loss": 0.0008369011338800192, |
|
"eval_precisions": [ |
|
99.6872905963815, |
|
99.64565932675272, |
|
99.5912408759124, |
|
99.51707485339773 |
|
], |
|
"eval_ref_len": 5100, |
|
"eval_runtime": 13.8925, |
|
"eval_samples_per_second": 37.862, |
|
"eval_score": 86.6701772747815, |
|
"eval_steps_per_second": 1.224, |
|
"eval_sys_len": 4477, |
|
"eval_totals": [ |
|
4477, |
|
3951, |
|
3425, |
|
2899 |
|
], |
|
"step": 1935 |
|
}, |
|
{ |
|
"epoch": 15.5, |
|
"learning_rate": 1.3798449612403102e-05, |
|
"loss": 0.0027, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_bp": 0.8700925578924447, |
|
"eval_counts": [ |
|
4463, |
|
3937, |
|
3411, |
|
2885 |
|
], |
|
"eval_loss": 0.00020534679060801864, |
|
"eval_precisions": [ |
|
99.6872905963815, |
|
99.64565932675272, |
|
99.5912408759124, |
|
99.51707485339773 |
|
], |
|
"eval_ref_len": 5100, |
|
"eval_runtime": 13.9128, |
|
"eval_samples_per_second": 37.807, |
|
"eval_score": 86.6701772747815, |
|
"eval_steps_per_second": 1.222, |
|
"eval_sys_len": 4477, |
|
"eval_totals": [ |
|
4477, |
|
3951, |
|
3425, |
|
2899 |
|
], |
|
"step": 2064 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_bp": 0.8700925578924447, |
|
"eval_counts": [ |
|
4463, |
|
3937, |
|
3411, |
|
2885 |
|
], |
|
"eval_loss": 0.001328099868260324, |
|
"eval_precisions": [ |
|
99.6872905963815, |
|
99.64565932675272, |
|
99.5912408759124, |
|
99.51707485339773 |
|
], |
|
"eval_ref_len": 5100, |
|
"eval_runtime": 13.6909, |
|
"eval_samples_per_second": 38.42, |
|
"eval_score": 86.6701772747815, |
|
"eval_steps_per_second": 1.242, |
|
"eval_sys_len": 4477, |
|
"eval_totals": [ |
|
4477, |
|
3951, |
|
3425, |
|
2899 |
|
], |
|
"step": 2193 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_bp": 0.8700925578924447, |
|
"eval_counts": [ |
|
4463, |
|
3937, |
|
3411, |
|
2885 |
|
], |
|
"eval_loss": 0.0003365726734045893, |
|
"eval_precisions": [ |
|
99.6872905963815, |
|
99.64565932675272, |
|
99.5912408759124, |
|
99.51707485339773 |
|
], |
|
"eval_ref_len": 5100, |
|
"eval_runtime": 13.7109, |
|
"eval_samples_per_second": 38.364, |
|
"eval_score": 86.6701772747815, |
|
"eval_steps_per_second": 1.24, |
|
"eval_sys_len": 4477, |
|
"eval_totals": [ |
|
4477, |
|
3951, |
|
3425, |
|
2899 |
|
], |
|
"step": 2322 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_bp": 0.8700925578924447, |
|
"eval_counts": [ |
|
4463, |
|
3937, |
|
3411, |
|
2885 |
|
], |
|
"eval_loss": 8.775618334766477e-05, |
|
"eval_precisions": [ |
|
99.6872905963815, |
|
99.64565932675272, |
|
99.5912408759124, |
|
99.51707485339773 |
|
], |
|
"eval_ref_len": 5100, |
|
"eval_runtime": 14.0994, |
|
"eval_samples_per_second": 37.307, |
|
"eval_score": 86.6701772747815, |
|
"eval_steps_per_second": 1.206, |
|
"eval_sys_len": 4477, |
|
"eval_totals": [ |
|
4477, |
|
3951, |
|
3425, |
|
2899 |
|
], |
|
"step": 2451 |
|
}, |
|
{ |
|
"epoch": 19.38, |
|
"learning_rate": 1.2248062015503876e-05, |
|
"loss": 0.002, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_bp": 0.8700925578924447, |
|
"eval_counts": [ |
|
4463, |
|
3937, |
|
3411, |
|
2885 |
|
], |
|
"eval_loss": 8.96168130566366e-05, |
|
"eval_precisions": [ |
|
99.6872905963815, |
|
99.64565932675272, |
|
99.5912408759124, |
|
99.51707485339773 |
|
], |
|
"eval_ref_len": 5100, |
|
"eval_runtime": 13.8102, |
|
"eval_samples_per_second": 38.088, |
|
"eval_score": 86.6701772747815, |
|
"eval_steps_per_second": 1.231, |
|
"eval_sys_len": 4477, |
|
"eval_totals": [ |
|
4477, |
|
3951, |
|
3425, |
|
2899 |
|
], |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_bp": 0.8700925578924447, |
|
"eval_counts": [ |
|
4463, |
|
3937, |
|
3411, |
|
2885 |
|
], |
|
"eval_loss": 0.0007285438477993011, |
|
"eval_precisions": [ |
|
99.6872905963815, |
|
99.64565932675272, |
|
99.5912408759124, |
|
99.51707485339773 |
|
], |
|
"eval_ref_len": 5100, |
|
"eval_runtime": 13.7462, |
|
"eval_samples_per_second": 38.265, |
|
"eval_score": 86.6701772747815, |
|
"eval_steps_per_second": 1.237, |
|
"eval_sys_len": 4477, |
|
"eval_totals": [ |
|
4477, |
|
3951, |
|
3425, |
|
2899 |
|
], |
|
"step": 2709 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_bp": 0.8703139283627056, |
|
"eval_counts": [ |
|
4464, |
|
3938, |
|
3412, |
|
2886 |
|
], |
|
"eval_loss": 0.0018083422910422087, |
|
"eval_precisions": [ |
|
99.68736042876284, |
|
99.64574898785425, |
|
99.59136018680677, |
|
99.51724137931035 |
|
], |
|
"eval_ref_len": 5100, |
|
"eval_runtime": 13.8084, |
|
"eval_samples_per_second": 38.093, |
|
"eval_score": 86.69232496739014, |
|
"eval_steps_per_second": 1.231, |
|
"eval_sys_len": 4478, |
|
"eval_totals": [ |
|
4478, |
|
3952, |
|
3426, |
|
2900 |
|
], |
|
"step": 2838 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_bp": 0.8700925578924447, |
|
"eval_counts": [ |
|
4463, |
|
3937, |
|
3411, |
|
2885 |
|
], |
|
"eval_loss": 0.000638504687231034, |
|
"eval_precisions": [ |
|
99.6872905963815, |
|
99.64565932675272, |
|
99.5912408759124, |
|
99.51707485339773 |
|
], |
|
"eval_ref_len": 5100, |
|
"eval_runtime": 13.995, |
|
"eval_samples_per_second": 37.585, |
|
"eval_score": 86.6701772747815, |
|
"eval_steps_per_second": 1.215, |
|
"eval_sys_len": 4477, |
|
"eval_totals": [ |
|
4477, |
|
3951, |
|
3425, |
|
2899 |
|
], |
|
"step": 2967 |
|
}, |
|
{ |
|
"epoch": 23.26, |
|
"learning_rate": 1.0697674418604651e-05, |
|
"loss": 0.0015, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_bp": 0.8700925578924447, |
|
"eval_counts": [ |
|
4463, |
|
3937, |
|
3411, |
|
2885 |
|
], |
|
"eval_loss": 0.00022906862432137132, |
|
"eval_precisions": [ |
|
99.6872905963815, |
|
99.64565932675272, |
|
99.5912408759124, |
|
99.51707485339773 |
|
], |
|
"eval_ref_len": 5100, |
|
"eval_runtime": 13.7342, |
|
"eval_samples_per_second": 38.299, |
|
"eval_score": 86.6701772747815, |
|
"eval_steps_per_second": 1.238, |
|
"eval_sys_len": 4477, |
|
"eval_totals": [ |
|
4477, |
|
3951, |
|
3425, |
|
2899 |
|
], |
|
"step": 3096 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_bp": 0.8700925578924447, |
|
"eval_counts": [ |
|
4463, |
|
3937, |
|
3411, |
|
2885 |
|
], |
|
"eval_loss": 0.000275774480542168, |
|
"eval_precisions": [ |
|
99.6872905963815, |
|
99.64565932675272, |
|
99.5912408759124, |
|
99.51707485339773 |
|
], |
|
"eval_ref_len": 5100, |
|
"eval_runtime": 14.1769, |
|
"eval_samples_per_second": 37.103, |
|
"eval_score": 86.6701772747815, |
|
"eval_steps_per_second": 1.199, |
|
"eval_sys_len": 4477, |
|
"eval_totals": [ |
|
4477, |
|
3951, |
|
3425, |
|
2899 |
|
], |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_bp": 0.8700925578924447, |
|
"eval_counts": [ |
|
4463, |
|
3937, |
|
3411, |
|
2885 |
|
], |
|
"eval_loss": 0.00025378959253430367, |
|
"eval_precisions": [ |
|
99.6872905963815, |
|
99.64565932675272, |
|
99.5912408759124, |
|
99.51707485339773 |
|
], |
|
"eval_ref_len": 5100, |
|
"eval_runtime": 13.944, |
|
"eval_samples_per_second": 37.722, |
|
"eval_score": 86.6701772747815, |
|
"eval_steps_per_second": 1.219, |
|
"eval_sys_len": 4477, |
|
"eval_totals": [ |
|
4477, |
|
3951, |
|
3425, |
|
2899 |
|
], |
|
"step": 3354 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_bp": 0.8700925578924447, |
|
"eval_counts": [ |
|
4463, |
|
3937, |
|
3411, |
|
2885 |
|
], |
|
"eval_loss": 2.7398107704357244e-05, |
|
"eval_precisions": [ |
|
99.6872905963815, |
|
99.64565932675272, |
|
99.5912408759124, |
|
99.51707485339773 |
|
], |
|
"eval_ref_len": 5100, |
|
"eval_runtime": 13.9791, |
|
"eval_samples_per_second": 37.628, |
|
"eval_score": 86.6701772747815, |
|
"eval_steps_per_second": 1.216, |
|
"eval_sys_len": 4477, |
|
"eval_totals": [ |
|
4477, |
|
3951, |
|
3425, |
|
2899 |
|
], |
|
"step": 3483 |
|
}, |
|
{ |
|
"epoch": 27.13, |
|
"learning_rate": 9.147286821705427e-06, |
|
"loss": 0.0014, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_bp": 0.8700925578924447, |
|
"eval_counts": [ |
|
4463, |
|
3937, |
|
3411, |
|
2885 |
|
], |
|
"eval_loss": 6.660177314188331e-05, |
|
"eval_precisions": [ |
|
99.6872905963815, |
|
99.64565932675272, |
|
99.5912408759124, |
|
99.51707485339773 |
|
], |
|
"eval_ref_len": 5100, |
|
"eval_runtime": 14.1404, |
|
"eval_samples_per_second": 37.198, |
|
"eval_score": 86.6701772747815, |
|
"eval_steps_per_second": 1.202, |
|
"eval_sys_len": 4477, |
|
"eval_totals": [ |
|
4477, |
|
3951, |
|
3425, |
|
2899 |
|
], |
|
"step": 3612 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_bp": 0.8700925578924447, |
|
"eval_counts": [ |
|
4463, |
|
3937, |
|
3411, |
|
2885 |
|
], |
|
"eval_loss": 0.00022675798390991986, |
|
"eval_precisions": [ |
|
99.6872905963815, |
|
99.64565932675272, |
|
99.5912408759124, |
|
99.51707485339773 |
|
], |
|
"eval_ref_len": 5100, |
|
"eval_runtime": 13.9231, |
|
"eval_samples_per_second": 37.779, |
|
"eval_score": 86.6701772747815, |
|
"eval_steps_per_second": 1.221, |
|
"eval_sys_len": 4477, |
|
"eval_totals": [ |
|
4477, |
|
3951, |
|
3425, |
|
2899 |
|
], |
|
"step": 3741 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_bp": 0.8700925578924447, |
|
"eval_counts": [ |
|
4463, |
|
3937, |
|
3411, |
|
2885 |
|
], |
|
"eval_loss": 5.096692984807305e-05, |
|
"eval_precisions": [ |
|
99.6872905963815, |
|
99.64565932675272, |
|
99.5912408759124, |
|
99.51707485339773 |
|
], |
|
"eval_ref_len": 5100, |
|
"eval_runtime": 14.3573, |
|
"eval_samples_per_second": 36.636, |
|
"eval_score": 86.6701772747815, |
|
"eval_steps_per_second": 1.184, |
|
"eval_sys_len": 4477, |
|
"eval_totals": [ |
|
4477, |
|
3951, |
|
3425, |
|
2899 |
|
], |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_bp": 0.8700925578924447, |
|
"eval_counts": [ |
|
4463, |
|
3937, |
|
3411, |
|
2885 |
|
], |
|
"eval_loss": 9.942305041477084e-05, |
|
"eval_precisions": [ |
|
99.6872905963815, |
|
99.64565932675272, |
|
99.5912408759124, |
|
99.51707485339773 |
|
], |
|
"eval_ref_len": 5100, |
|
"eval_runtime": 13.7812, |
|
"eval_samples_per_second": 38.168, |
|
"eval_score": 86.6701772747815, |
|
"eval_steps_per_second": 1.234, |
|
"eval_sys_len": 4477, |
|
"eval_totals": [ |
|
4477, |
|
3951, |
|
3425, |
|
2899 |
|
], |
|
"step": 3999 |
|
}, |
|
{ |
|
"epoch": 31.01, |
|
"learning_rate": 7.596899224806202e-06, |
|
"loss": 0.0009, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_bp": 0.8700925578924447, |
|
"eval_counts": [ |
|
4463, |
|
3937, |
|
3411, |
|
2885 |
|
], |
|
"eval_loss": 0.00020378571934998035, |
|
"eval_precisions": [ |
|
99.6872905963815, |
|
99.64565932675272, |
|
99.5912408759124, |
|
99.51707485339773 |
|
], |
|
"eval_ref_len": 5100, |
|
"eval_runtime": 14.1117, |
|
"eval_samples_per_second": 37.274, |
|
"eval_score": 86.6701772747815, |
|
"eval_steps_per_second": 1.205, |
|
"eval_sys_len": 4477, |
|
"eval_totals": [ |
|
4477, |
|
3951, |
|
3425, |
|
2899 |
|
], |
|
"step": 4128 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_bp": 0.8700925578924447, |
|
"eval_counts": [ |
|
4463, |
|
3937, |
|
3411, |
|
2885 |
|
], |
|
"eval_loss": 4.814989733858965e-05, |
|
"eval_precisions": [ |
|
99.6872905963815, |
|
99.64565932675272, |
|
99.5912408759124, |
|
99.51707485339773 |
|
], |
|
"eval_ref_len": 5100, |
|
"eval_runtime": 13.7623, |
|
"eval_samples_per_second": 38.22, |
|
"eval_score": 86.6701772747815, |
|
"eval_steps_per_second": 1.235, |
|
"eval_sys_len": 4477, |
|
"eval_totals": [ |
|
4477, |
|
3951, |
|
3425, |
|
2899 |
|
], |
|
"step": 4257 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_bp": 0.8700925578924447, |
|
"eval_counts": [ |
|
4463, |
|
3937, |
|
3411, |
|
2885 |
|
], |
|
"eval_loss": 5.921188130741939e-05, |
|
"eval_precisions": [ |
|
99.6872905963815, |
|
99.64565932675272, |
|
99.5912408759124, |
|
99.51707485339773 |
|
], |
|
"eval_ref_len": 5100, |
|
"eval_runtime": 13.904, |
|
"eval_samples_per_second": 37.831, |
|
"eval_score": 86.6701772747815, |
|
"eval_steps_per_second": 1.223, |
|
"eval_sys_len": 4477, |
|
"eval_totals": [ |
|
4477, |
|
3951, |
|
3425, |
|
2899 |
|
], |
|
"step": 4386 |
|
}, |
|
{ |
|
"epoch": 34.88, |
|
"learning_rate": 6.046511627906977e-06, |
|
"loss": 0.0008, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_bp": 0.8700925578924447, |
|
"eval_counts": [ |
|
4463, |
|
3937, |
|
3411, |
|
2885 |
|
], |
|
"eval_loss": 2.948127621493768e-05, |
|
"eval_precisions": [ |
|
99.6872905963815, |
|
99.64565932675272, |
|
99.5912408759124, |
|
99.51707485339773 |
|
], |
|
"eval_ref_len": 5100, |
|
"eval_runtime": 14.005, |
|
"eval_samples_per_second": 37.558, |
|
"eval_score": 86.6701772747815, |
|
"eval_steps_per_second": 1.214, |
|
"eval_sys_len": 4477, |
|
"eval_totals": [ |
|
4477, |
|
3951, |
|
3425, |
|
2899 |
|
], |
|
"step": 4515 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_bp": 0.8700925578924447, |
|
"eval_counts": [ |
|
4463, |
|
3937, |
|
3411, |
|
2885 |
|
], |
|
"eval_loss": 1.501874066889286e-05, |
|
"eval_precisions": [ |
|
99.6872905963815, |
|
99.64565932675272, |
|
99.5912408759124, |
|
99.51707485339773 |
|
], |
|
"eval_ref_len": 5100, |
|
"eval_runtime": 13.8563, |
|
"eval_samples_per_second": 37.961, |
|
"eval_score": 86.6701772747815, |
|
"eval_steps_per_second": 1.227, |
|
"eval_sys_len": 4477, |
|
"eval_totals": [ |
|
4477, |
|
3951, |
|
3425, |
|
2899 |
|
], |
|
"step": 4644 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_bp": 0.8700925578924447, |
|
"eval_counts": [ |
|
4463, |
|
3937, |
|
3411, |
|
2885 |
|
], |
|
"eval_loss": 3.573419598978944e-05, |
|
"eval_precisions": [ |
|
99.6872905963815, |
|
99.64565932675272, |
|
99.5912408759124, |
|
99.51707485339773 |
|
], |
|
"eval_ref_len": 5100, |
|
"eval_runtime": 14.0819, |
|
"eval_samples_per_second": 37.353, |
|
"eval_score": 86.6701772747815, |
|
"eval_steps_per_second": 1.207, |
|
"eval_sys_len": 4477, |
|
"eval_totals": [ |
|
4477, |
|
3951, |
|
3425, |
|
2899 |
|
], |
|
"step": 4773 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_bp": 0.8700925578924447, |
|
"eval_counts": [ |
|
4463, |
|
3937, |
|
3411, |
|
2885 |
|
], |
|
"eval_loss": 1.292789511353476e-05, |
|
"eval_precisions": [ |
|
99.6872905963815, |
|
99.64565932675272, |
|
99.5912408759124, |
|
99.51707485339773 |
|
], |
|
"eval_ref_len": 5100, |
|
"eval_runtime": 13.8393, |
|
"eval_samples_per_second": 38.008, |
|
"eval_score": 86.6701772747815, |
|
"eval_steps_per_second": 1.228, |
|
"eval_sys_len": 4477, |
|
"eval_totals": [ |
|
4477, |
|
3951, |
|
3425, |
|
2899 |
|
], |
|
"step": 4902 |
|
}, |
|
{ |
|
"epoch": 38.76, |
|
"learning_rate": 4.4961240310077525e-06, |
|
"loss": 0.0006, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_bp": 0.8700925578924447, |
|
"eval_counts": [ |
|
4463, |
|
3937, |
|
3411, |
|
2885 |
|
], |
|
"eval_loss": 2.826396666932851e-05, |
|
"eval_precisions": [ |
|
99.6872905963815, |
|
99.64565932675272, |
|
99.5912408759124, |
|
99.51707485339773 |
|
], |
|
"eval_ref_len": 5100, |
|
"eval_runtime": 14.2505, |
|
"eval_samples_per_second": 36.911, |
|
"eval_score": 86.6701772747815, |
|
"eval_steps_per_second": 1.193, |
|
"eval_sys_len": 4477, |
|
"eval_totals": [ |
|
4477, |
|
3951, |
|
3425, |
|
2899 |
|
], |
|
"step": 5031 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_bp": 0.8700925578924447, |
|
"eval_counts": [ |
|
4463, |
|
3937, |
|
3411, |
|
2885 |
|
], |
|
"eval_loss": 9.735503226693254e-06, |
|
"eval_precisions": [ |
|
99.6872905963815, |
|
99.64565932675272, |
|
99.5912408759124, |
|
99.51707485339773 |
|
], |
|
"eval_ref_len": 5100, |
|
"eval_runtime": 13.8339, |
|
"eval_samples_per_second": 38.023, |
|
"eval_score": 86.6701772747815, |
|
"eval_steps_per_second": 1.229, |
|
"eval_sys_len": 4477, |
|
"eval_totals": [ |
|
4477, |
|
3951, |
|
3425, |
|
2899 |
|
], |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_bp": 0.8700925578924447, |
|
"eval_counts": [ |
|
4463, |
|
3937, |
|
3411, |
|
2885 |
|
], |
|
"eval_loss": 5.421350579126738e-06, |
|
"eval_precisions": [ |
|
99.6872905963815, |
|
99.64565932675272, |
|
99.5912408759124, |
|
99.51707485339773 |
|
], |
|
"eval_ref_len": 5100, |
|
"eval_runtime": 13.9445, |
|
"eval_samples_per_second": 37.721, |
|
"eval_score": 86.6701772747815, |
|
"eval_steps_per_second": 1.219, |
|
"eval_sys_len": 4477, |
|
"eval_totals": [ |
|
4477, |
|
3951, |
|
3425, |
|
2899 |
|
], |
|
"step": 5289 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_bp": 0.8700925578924447, |
|
"eval_counts": [ |
|
4463, |
|
3937, |
|
3411, |
|
2885 |
|
], |
|
"eval_loss": 1.1437254215707071e-05, |
|
"eval_precisions": [ |
|
99.6872905963815, |
|
99.64565932675272, |
|
99.5912408759124, |
|
99.51707485339773 |
|
], |
|
"eval_ref_len": 5100, |
|
"eval_runtime": 13.8327, |
|
"eval_samples_per_second": 38.026, |
|
"eval_score": 86.6701772747815, |
|
"eval_steps_per_second": 1.229, |
|
"eval_sys_len": 4477, |
|
"eval_totals": [ |
|
4477, |
|
3951, |
|
3425, |
|
2899 |
|
], |
|
"step": 5418 |
|
}, |
|
{ |
|
"epoch": 42.64, |
|
"learning_rate": 2.9457364341085276e-06, |
|
"loss": 0.0004, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_bp": 0.8700925578924447, |
|
"eval_counts": [ |
|
4463, |
|
3937, |
|
3411, |
|
2885 |
|
], |
|
"eval_loss": 1.4262999684433453e-05, |
|
"eval_precisions": [ |
|
99.6872905963815, |
|
99.64565932675272, |
|
99.5912408759124, |
|
99.51707485339773 |
|
], |
|
"eval_ref_len": 5100, |
|
"eval_runtime": 13.8768, |
|
"eval_samples_per_second": 37.905, |
|
"eval_score": 86.6701772747815, |
|
"eval_steps_per_second": 1.225, |
|
"eval_sys_len": 4477, |
|
"eval_totals": [ |
|
4477, |
|
3951, |
|
3425, |
|
2899 |
|
], |
|
"step": 5547 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_bp": 0.8700925578924447, |
|
"eval_counts": [ |
|
4463, |
|
3937, |
|
3411, |
|
2885 |
|
], |
|
"eval_loss": 3.849239874398336e-06, |
|
"eval_precisions": [ |
|
99.6872905963815, |
|
99.64565932675272, |
|
99.5912408759124, |
|
99.51707485339773 |
|
], |
|
"eval_ref_len": 5100, |
|
"eval_runtime": 13.9608, |
|
"eval_samples_per_second": 37.677, |
|
"eval_score": 86.6701772747815, |
|
"eval_steps_per_second": 1.218, |
|
"eval_sys_len": 4477, |
|
"eval_totals": [ |
|
4477, |
|
3951, |
|
3425, |
|
2899 |
|
], |
|
"step": 5676 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_bp": 0.8700925578924447, |
|
"eval_counts": [ |
|
4463, |
|
3937, |
|
3411, |
|
2885 |
|
], |
|
"eval_loss": 1.1063038982683793e-05, |
|
"eval_precisions": [ |
|
99.6872905963815, |
|
99.64565932675272, |
|
99.5912408759124, |
|
99.51707485339773 |
|
], |
|
"eval_ref_len": 5100, |
|
"eval_runtime": 13.8436, |
|
"eval_samples_per_second": 37.996, |
|
"eval_score": 86.6701772747815, |
|
"eval_steps_per_second": 1.228, |
|
"eval_sys_len": 4477, |
|
"eval_totals": [ |
|
4477, |
|
3951, |
|
3425, |
|
2899 |
|
], |
|
"step": 5805 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_bp": 0.8700925578924447, |
|
"eval_counts": [ |
|
4463, |
|
3937, |
|
3411, |
|
2885 |
|
], |
|
"eval_loss": 7.035921953502111e-06, |
|
"eval_precisions": [ |
|
99.6872905963815, |
|
99.64565932675272, |
|
99.5912408759124, |
|
99.51707485339773 |
|
], |
|
"eval_ref_len": 5100, |
|
"eval_runtime": 14.1871, |
|
"eval_samples_per_second": 37.076, |
|
"eval_score": 86.6701772747815, |
|
"eval_steps_per_second": 1.198, |
|
"eval_sys_len": 4477, |
|
"eval_totals": [ |
|
4477, |
|
3951, |
|
3425, |
|
2899 |
|
], |
|
"step": 5934 |
|
}, |
|
{ |
|
"epoch": 46.51, |
|
"learning_rate": 1.3953488372093025e-06, |
|
"loss": 0.0002, |
|
"step": 6000 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 6450, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"total_flos": 2.06928054010368e+16, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|