new-vit5base / trainer_state.json
duyvu8373's picture
Upload 12 files
f0e9492 verified
raw
history blame contribute delete
No virus
19.3 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 24.91103202846975,
"eval_steps": 500,
"global_step": 14000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.89,
"learning_rate": 1.9288256227758007e-05,
"loss": 0.7021,
"step": 500
},
{
"epoch": 1.0,
"eval_bp": 0.4478803932550057,
"eval_counts": [
951,
851,
761,
672
],
"eval_loss": 0.1726560890674591,
"eval_precisions": [
95.96367305751765,
92.19934994582881,
89.00584795321637,
85.38754764930114
],
"eval_ref_len": 1787,
"eval_runtime": 18.1569,
"eval_samples_per_second": 3.745,
"eval_score": 40.55776382580726,
"eval_steps_per_second": 1.873,
"eval_sys_len": 991,
"eval_totals": [
991,
923,
855,
787
],
"step": 562
},
{
"epoch": 1.78,
"learning_rate": 1.8576512455516017e-05,
"loss": 0.2854,
"step": 1000
},
{
"epoch": 2.0,
"eval_bp": 0.441355602158059,
"eval_counts": [
962,
883,
802,
721
],
"eval_loss": 0.11405563354492188,
"eval_precisions": [
97.86368260427264,
96.50273224043715,
94.68713105076742,
92.55455712451861
],
"eval_ref_len": 1787,
"eval_runtime": 16.4762,
"eval_samples_per_second": 4.127,
"eval_score": 42.09699789518687,
"eval_steps_per_second": 2.064,
"eval_sys_len": 983,
"eval_totals": [
983,
915,
847,
779
],
"step": 1124
},
{
"epoch": 2.67,
"learning_rate": 1.7864768683274022e-05,
"loss": 0.1956,
"step": 1500
},
{
"epoch": 3.0,
"eval_bp": 0.44380356964488166,
"eval_counts": [
965,
886,
806,
726
],
"eval_loss": 0.10203403234481812,
"eval_precisions": [
97.87018255578093,
96.51416122004358,
94.82352941176471,
92.8388746803069
],
"eval_ref_len": 1787,
"eval_runtime": 16.5425,
"eval_samples_per_second": 4.111,
"eval_score": 42.380165345955355,
"eval_steps_per_second": 2.055,
"eval_sys_len": 986,
"eval_totals": [
986,
918,
850,
782
],
"step": 1686
},
{
"epoch": 3.56,
"learning_rate": 1.715302491103203e-05,
"loss": 0.1441,
"step": 2000
},
{
"epoch": 4.0,
"eval_bp": 0.437272650606809,
"eval_counts": [
966,
894,
816,
739
],
"eval_loss": 0.05576588958501816,
"eval_precisions": [
98.77300613496932,
98.24175824175825,
96.91211401425178,
95.4780361757106
],
"eval_ref_len": 1787,
"eval_runtime": 16.5385,
"eval_samples_per_second": 4.112,
"eval_score": 42.565359021286106,
"eval_steps_per_second": 2.056,
"eval_sys_len": 978,
"eval_totals": [
978,
910,
842,
774
],
"step": 2248
},
{
"epoch": 4.45,
"learning_rate": 1.6441281138790037e-05,
"loss": 0.1082,
"step": 2500
},
{
"epoch": 5.0,
"eval_bp": 0.425821753267709,
"eval_counts": [
956,
882,
809,
736
],
"eval_loss": 0.0341346338391304,
"eval_precisions": [
99.1701244813278,
98.4375,
97.70531400966183,
96.84210526315789
],
"eval_ref_len": 1787,
"eval_runtime": 16.4383,
"eval_samples_per_second": 4.137,
"eval_score": 41.745417030738814,
"eval_steps_per_second": 2.068,
"eval_sys_len": 964,
"eval_totals": [
964,
896,
828,
760
],
"step": 2810
},
{
"epoch": 5.34,
"learning_rate": 1.5729537366548043e-05,
"loss": 0.0942,
"step": 3000
},
{
"epoch": 6.0,
"eval_bp": 0.4356384499188918,
"eval_counts": [
973,
899,
824,
749
],
"eval_loss": 0.05856137350201607,
"eval_precisions": [
99.69262295081967,
99.00881057268722,
98.0952380952381,
97.02072538860104
],
"eval_ref_len": 1787,
"eval_runtime": 16.3313,
"eval_samples_per_second": 4.164,
"eval_score": 42.888270294493864,
"eval_steps_per_second": 2.082,
"eval_sys_len": 976,
"eval_totals": [
976,
908,
840,
772
],
"step": 3372
},
{
"epoch": 6.23,
"learning_rate": 1.5017793594306052e-05,
"loss": 0.0713,
"step": 3500
},
{
"epoch": 7.0,
"eval_bp": 0.42745918009113226,
"eval_counts": [
964,
894,
824,
754
],
"eval_loss": 0.02425399236381054,
"eval_precisions": [
99.79296066252589,
99.55456570155901,
99.27710843373494,
98.9501312335958
],
"eval_ref_len": 1787,
"eval_runtime": 16.4255,
"eval_samples_per_second": 4.14,
"eval_score": 42.486533114975146,
"eval_steps_per_second": 2.07,
"eval_sys_len": 966,
"eval_totals": [
966,
898,
830,
762
],
"step": 3934
},
{
"epoch": 7.12,
"learning_rate": 1.4306049822064058e-05,
"loss": 0.0599,
"step": 4000
},
{
"epoch": 8.0,
"eval_bp": 0.43073249568232586,
"eval_counts": [
961,
890,
820,
751
],
"eval_loss": 0.015419703908264637,
"eval_precisions": [
99.0721649484536,
98.66962305986696,
98.32134292565948,
98.04177545691905
],
"eval_ref_len": 1787,
"eval_runtime": 16.187,
"eval_samples_per_second": 4.201,
"eval_score": 42.438122327030186,
"eval_steps_per_second": 2.1,
"eval_sys_len": 970,
"eval_totals": [
970,
902,
834,
766
],
"step": 4496
},
{
"epoch": 8.01,
"learning_rate": 1.3594306049822066e-05,
"loss": 0.048,
"step": 4500
},
{
"epoch": 8.9,
"learning_rate": 1.2882562277580073e-05,
"loss": 0.0392,
"step": 5000
},
{
"epoch": 9.0,
"eval_bp": 0.4250028527550296,
"eval_counts": [
959,
889,
818,
747
],
"eval_loss": 0.019724518060684204,
"eval_precisions": [
99.5846313603323,
99.32960893854748,
98.9117291414752,
98.41897233201581
],
"eval_ref_len": 1787,
"eval_runtime": 16.8132,
"eval_samples_per_second": 4.044,
"eval_score": 42.10088852964188,
"eval_steps_per_second": 2.022,
"eval_sys_len": 963,
"eval_totals": [
963,
895,
827,
759
],
"step": 5058
},
{
"epoch": 9.79,
"learning_rate": 1.217081850533808e-05,
"loss": 0.0319,
"step": 5500
},
{
"epoch": 10.0,
"eval_bp": 0.42909609813393135,
"eval_counts": [
966,
898,
830,
762
],
"eval_loss": 0.005858541466295719,
"eval_precisions": [
99.79338842975207,
99.77777777777777,
99.75961538461539,
99.73821989528795
],
"eval_ref_len": 1787,
"eval_runtime": 16.1653,
"eval_samples_per_second": 4.207,
"eval_score": 42.809736944779694,
"eval_steps_per_second": 2.103,
"eval_sys_len": 968,
"eval_totals": [
968,
900,
832,
764
],
"step": 5620
},
{
"epoch": 10.68,
"learning_rate": 1.1459074733096086e-05,
"loss": 0.026,
"step": 6000
},
{
"epoch": 11.0,
"eval_bp": 0.42909609813393135,
"eval_counts": [
966,
898,
829,
760
],
"eval_loss": 0.010834704153239727,
"eval_precisions": [
99.79338842975207,
99.77777777777777,
99.63942307692308,
99.47643979057591
],
"eval_ref_len": 1787,
"eval_runtime": 16.4611,
"eval_samples_per_second": 4.131,
"eval_score": 42.768727019910926,
"eval_steps_per_second": 2.065,
"eval_sys_len": 968,
"eval_totals": [
968,
900,
832,
764
],
"step": 6182
},
{
"epoch": 11.57,
"learning_rate": 1.0747330960854094e-05,
"loss": 0.025,
"step": 6500
},
{
"epoch": 12.0,
"eval_bp": 0.42909609813393135,
"eval_counts": [
966,
898,
830,
762
],
"eval_loss": 0.0013562627136707306,
"eval_precisions": [
99.79338842975207,
99.77777777777777,
99.75961538461539,
99.73821989528795
],
"eval_ref_len": 1787,
"eval_runtime": 16.1168,
"eval_samples_per_second": 4.219,
"eval_score": 42.809736944779694,
"eval_steps_per_second": 2.11,
"eval_sys_len": 968,
"eval_totals": [
968,
900,
832,
764
],
"step": 6744
},
{
"epoch": 12.46,
"learning_rate": 1.0035587188612101e-05,
"loss": 0.0182,
"step": 7000
},
{
"epoch": 13.0,
"eval_bp": 0.42909609813393135,
"eval_counts": [
966,
898,
830,
762
],
"eval_loss": 0.0013860436156392097,
"eval_precisions": [
99.79338842975207,
99.77777777777777,
99.75961538461539,
99.73821989528795
],
"eval_ref_len": 1787,
"eval_runtime": 16.6416,
"eval_samples_per_second": 4.086,
"eval_score": 42.809736944779694,
"eval_steps_per_second": 2.043,
"eval_sys_len": 968,
"eval_totals": [
968,
900,
832,
764
],
"step": 7306
},
{
"epoch": 13.35,
"learning_rate": 9.323843416370107e-06,
"loss": 0.0148,
"step": 7500
},
{
"epoch": 14.0,
"eval_bp": 0.4315504956415897,
"eval_counts": [
963,
894,
825,
756
],
"eval_loss": 0.004028095863759518,
"eval_precisions": [
99.17610710607622,
99.00332225913621,
98.80239520958084,
98.56584093872229
],
"eval_ref_len": 1787,
"eval_runtime": 16.2486,
"eval_samples_per_second": 4.185,
"eval_score": 42.674584599098594,
"eval_steps_per_second": 2.092,
"eval_sys_len": 971,
"eval_totals": [
971,
903,
835,
767
],
"step": 7868
},
{
"epoch": 14.23,
"learning_rate": 8.612099644128115e-06,
"loss": 0.0149,
"step": 8000
},
{
"epoch": 15.0,
"eval_bp": 0.4299143626976017,
"eval_counts": [
967,
899,
830,
761
],
"eval_loss": 0.004267631098628044,
"eval_precisions": [
99.79360165118679,
99.7780244173141,
99.63985594237695,
99.47712418300654
],
"eval_ref_len": 1787,
"eval_runtime": 16.2443,
"eval_samples_per_second": 4.186,
"eval_score": 42.85045442963625,
"eval_steps_per_second": 2.093,
"eval_sys_len": 969,
"eval_totals": [
969,
901,
833,
765
],
"step": 8430
},
{
"epoch": 15.12,
"learning_rate": 7.900355871886122e-06,
"loss": 0.0144,
"step": 8500
},
{
"epoch": 16.0,
"eval_bp": 0.42909609813393135,
"eval_counts": [
966,
898,
830,
762
],
"eval_loss": 0.00286501320078969,
"eval_precisions": [
99.79338842975207,
99.77777777777777,
99.75961538461539,
99.73821989528795
],
"eval_ref_len": 1787,
"eval_runtime": 16.5144,
"eval_samples_per_second": 4.118,
"eval_score": 42.809736944779694,
"eval_steps_per_second": 2.059,
"eval_sys_len": 968,
"eval_totals": [
968,
900,
832,
764
],
"step": 8992
},
{
"epoch": 16.01,
"learning_rate": 7.188612099644129e-06,
"loss": 0.0112,
"step": 9000
},
{
"epoch": 16.9,
"learning_rate": 6.476868327402136e-06,
"loss": 0.0098,
"step": 9500
},
{
"epoch": 17.0,
"eval_bp": 0.42909609813393135,
"eval_counts": [
966,
898,
830,
762
],
"eval_loss": 0.0009978804737329483,
"eval_precisions": [
99.79338842975207,
99.77777777777777,
99.75961538461539,
99.73821989528795
],
"eval_ref_len": 1787,
"eval_runtime": 16.1028,
"eval_samples_per_second": 4.223,
"eval_score": 42.809736944779694,
"eval_steps_per_second": 2.111,
"eval_sys_len": 968,
"eval_totals": [
968,
900,
832,
764
],
"step": 9554
},
{
"epoch": 17.79,
"learning_rate": 5.765124555160143e-06,
"loss": 0.0089,
"step": 10000
},
{
"epoch": 18.0,
"eval_bp": 0.42909609813393135,
"eval_counts": [
966,
898,
830,
762
],
"eval_loss": 0.00037646759301424026,
"eval_precisions": [
99.79338842975207,
99.77777777777777,
99.75961538461539,
99.73821989528795
],
"eval_ref_len": 1787,
"eval_runtime": 16.4684,
"eval_samples_per_second": 4.129,
"eval_score": 42.809736944779694,
"eval_steps_per_second": 2.065,
"eval_sys_len": 968,
"eval_totals": [
968,
900,
832,
764
],
"step": 10116
},
{
"epoch": 18.68,
"learning_rate": 5.05338078291815e-06,
"loss": 0.0075,
"step": 10500
},
{
"epoch": 19.0,
"eval_bp": 0.42909609813393135,
"eval_counts": [
966,
898,
830,
762
],
"eval_loss": 0.00018596854351926595,
"eval_precisions": [
99.79338842975207,
99.77777777777777,
99.75961538461539,
99.73821989528795
],
"eval_ref_len": 1787,
"eval_runtime": 16.3451,
"eval_samples_per_second": 4.16,
"eval_score": 42.809736944779694,
"eval_steps_per_second": 2.08,
"eval_sys_len": 968,
"eval_totals": [
968,
900,
832,
764
],
"step": 10678
},
{
"epoch": 19.57,
"learning_rate": 4.341637010676157e-06,
"loss": 0.0068,
"step": 11000
},
{
"epoch": 20.0,
"eval_bp": 0.42909609813393135,
"eval_counts": [
966,
898,
830,
762
],
"eval_loss": 7.006935447861906e-06,
"eval_precisions": [
99.79338842975207,
99.77777777777777,
99.75961538461539,
99.73821989528795
],
"eval_ref_len": 1787,
"eval_runtime": 16.0905,
"eval_samples_per_second": 4.226,
"eval_score": 42.809736944779694,
"eval_steps_per_second": 2.113,
"eval_sys_len": 968,
"eval_totals": [
968,
900,
832,
764
],
"step": 11240
},
{
"epoch": 20.46,
"learning_rate": 3.629893238434164e-06,
"loss": 0.0056,
"step": 11500
},
{
"epoch": 21.0,
"eval_bp": 0.42909609813393135,
"eval_counts": [
966,
898,
830,
762
],
"eval_loss": 1.5906211046967655e-05,
"eval_precisions": [
99.79338842975207,
99.77777777777777,
99.75961538461539,
99.73821989528795
],
"eval_ref_len": 1787,
"eval_runtime": 16.3937,
"eval_samples_per_second": 4.148,
"eval_score": 42.809736944779694,
"eval_steps_per_second": 2.074,
"eval_sys_len": 968,
"eval_totals": [
968,
900,
832,
764
],
"step": 11802
},
{
"epoch": 21.35,
"learning_rate": 2.918149466192171e-06,
"loss": 0.0041,
"step": 12000
},
{
"epoch": 22.0,
"eval_bp": 0.42909609813393135,
"eval_counts": [
966,
898,
830,
762
],
"eval_loss": 1.3131144442013465e-05,
"eval_precisions": [
99.79338842975207,
99.77777777777777,
99.75961538461539,
99.73821989528795
],
"eval_ref_len": 1787,
"eval_runtime": 16.4474,
"eval_samples_per_second": 4.134,
"eval_score": 42.809736944779694,
"eval_steps_per_second": 2.067,
"eval_sys_len": 968,
"eval_totals": [
968,
900,
832,
764
],
"step": 12364
},
{
"epoch": 22.24,
"learning_rate": 2.2064056939501782e-06,
"loss": 0.0033,
"step": 12500
},
{
"epoch": 23.0,
"eval_bp": 0.42909609813393135,
"eval_counts": [
966,
898,
830,
762
],
"eval_loss": 5.197064183448674e-06,
"eval_precisions": [
99.79338842975207,
99.77777777777777,
99.75961538461539,
99.73821989528795
],
"eval_ref_len": 1787,
"eval_runtime": 16.1321,
"eval_samples_per_second": 4.215,
"eval_score": 42.809736944779694,
"eval_steps_per_second": 2.108,
"eval_sys_len": 968,
"eval_totals": [
968,
900,
832,
764
],
"step": 12926
},
{
"epoch": 23.13,
"learning_rate": 1.494661921708185e-06,
"loss": 0.0033,
"step": 13000
},
{
"epoch": 24.0,
"eval_bp": 0.42909609813393135,
"eval_counts": [
966,
898,
830,
762
],
"eval_loss": 2.9162031296436908e-06,
"eval_precisions": [
99.79338842975207,
99.77777777777777,
99.75961538461539,
99.73821989528795
],
"eval_ref_len": 1787,
"eval_runtime": 16.4335,
"eval_samples_per_second": 4.138,
"eval_score": 42.809736944779694,
"eval_steps_per_second": 2.069,
"eval_sys_len": 968,
"eval_totals": [
968,
900,
832,
764
],
"step": 13488
},
{
"epoch": 24.02,
"learning_rate": 7.829181494661923e-07,
"loss": 0.0025,
"step": 13500
},
{
"epoch": 24.91,
"learning_rate": 7.117437722419929e-08,
"loss": 0.0023,
"step": 14000
}
],
"logging_steps": 500,
"max_steps": 14050,
"num_input_tokens_seen": 0,
"num_train_epochs": 25,
"save_steps": 500,
"total_flos": 2476170265006080.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}