|
{ |
|
"best_metric": 0.8990687138182734, |
|
"best_model_checkpoint": "vit-cxr4-384/checkpoint-2400", |
|
"epoch": 2.0, |
|
"eval_steps": 100, |
|
"global_step": 2552, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.982366771159875e-05, |
|
"loss": 0.7452, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.9627742946708465e-05, |
|
"loss": 0.5821, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.943181818181818e-05, |
|
"loss": 0.5277, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.92358934169279e-05, |
|
"loss": 0.5076, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.903996865203762e-05, |
|
"loss": 0.4483, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.8844043887147336e-05, |
|
"loss": 0.5492, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.864811912225706e-05, |
|
"loss": 0.3997, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.845219435736678e-05, |
|
"loss": 0.4182, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.827586206896552e-05, |
|
"loss": 0.4935, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.807993730407524e-05, |
|
"loss": 0.3694, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_accuracy": 0.8233603344656388, |
|
"eval_f1": 0.8251422659079152, |
|
"eval_loss": 0.3850725293159485, |
|
"eval_precision": 0.8075949367088607, |
|
"eval_recall": 0.8434690639873083, |
|
"eval_runtime": 172.1875, |
|
"eval_samples_per_second": 22.226, |
|
"eval_steps_per_second": 0.929, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.788401253918495e-05, |
|
"loss": 0.3867, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.768808777429467e-05, |
|
"loss": 0.4248, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.749216300940439e-05, |
|
"loss": 0.331, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.7296238244514106e-05, |
|
"loss": 0.3978, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.7100313479623823e-05, |
|
"loss": 0.3176, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.690438871473354e-05, |
|
"loss": 0.5361, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.670846394984326e-05, |
|
"loss": 0.3897, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.651253918495298e-05, |
|
"loss": 0.3066, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.63166144200627e-05, |
|
"loss": 0.3042, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.612068965517242e-05, |
|
"loss": 0.3084, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_accuracy": 0.8364253984844525, |
|
"eval_f1": 0.8566193311956023, |
|
"eval_loss": 0.4478375017642975, |
|
"eval_precision": 0.7555555555555555, |
|
"eval_recall": 0.9888947646747752, |
|
"eval_runtime": 168.684, |
|
"eval_samples_per_second": 22.687, |
|
"eval_steps_per_second": 0.949, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.592476489028214e-05, |
|
"loss": 0.4338, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.5728840125391855e-05, |
|
"loss": 0.3653, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.553291536050157e-05, |
|
"loss": 0.3125, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.533699059561129e-05, |
|
"loss": 0.4054, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.514106583072101e-05, |
|
"loss": 0.3616, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.4945141065830726e-05, |
|
"loss": 0.3286, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.474921630094044e-05, |
|
"loss": 0.3067, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.4553291536050155e-05, |
|
"loss": 0.3698, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.435736677115987e-05, |
|
"loss": 0.3418, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.416144200626959e-05, |
|
"loss": 0.3177, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_accuracy": 0.8651685393258427, |
|
"eval_f1": 0.8752417794970986, |
|
"eval_loss": 0.30726760625839233, |
|
"eval_precision": 0.8062360801781737, |
|
"eval_recall": 0.9571655208884188, |
|
"eval_runtime": 167.4202, |
|
"eval_samples_per_second": 22.859, |
|
"eval_steps_per_second": 0.956, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.396551724137931e-05, |
|
"loss": 0.2805, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.3769592476489026e-05, |
|
"loss": 0.3783, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.357366771159875e-05, |
|
"loss": 0.2961, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.337774294670847e-05, |
|
"loss": 0.2732, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.318181818181819e-05, |
|
"loss": 0.2852, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.2985893416927904e-05, |
|
"loss": 0.307, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.278996865203762e-05, |
|
"loss": 0.3855, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.259404388714734e-05, |
|
"loss": 0.3382, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.239811912225706e-05, |
|
"loss": 0.2779, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.2202194357366776e-05, |
|
"loss": 0.3413, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_accuracy": 0.8677815521296054, |
|
"eval_f1": 0.8757977417771232, |
|
"eval_loss": 0.2936372458934784, |
|
"eval_precision": 0.8172240036646816, |
|
"eval_recall": 0.9434161819143311, |
|
"eval_runtime": 166.7592, |
|
"eval_samples_per_second": 22.949, |
|
"eval_steps_per_second": 0.959, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.2006269592476494e-05, |
|
"loss": 0.266, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.1810344827586205e-05, |
|
"loss": 0.3177, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.161442006269592e-05, |
|
"loss": 0.2613, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.141849529780564e-05, |
|
"loss": 0.3307, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.122257053291536e-05, |
|
"loss": 0.2463, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.1026645768025076e-05, |
|
"loss": 0.2588, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.08307210031348e-05, |
|
"loss": 0.4268, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.063479623824452e-05, |
|
"loss": 0.3498, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.0438871473354236e-05, |
|
"loss": 0.3102, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.0242946708463954e-05, |
|
"loss": 0.2612, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_accuracy": 0.8709171674941207, |
|
"eval_f1": 0.8747464503042597, |
|
"eval_loss": 0.29362666606903076, |
|
"eval_precision": 0.8402338041889917, |
|
"eval_recall": 0.9122157588577472, |
|
"eval_runtime": 166.9694, |
|
"eval_samples_per_second": 22.92, |
|
"eval_steps_per_second": 0.958, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.004702194357367e-05, |
|
"loss": 0.3364, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.985109717868339e-05, |
|
"loss": 0.2959, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.965517241379311e-05, |
|
"loss": 0.289, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.9459247648902825e-05, |
|
"loss": 0.3726, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.926332288401254e-05, |
|
"loss": 0.2789, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.906739811912226e-05, |
|
"loss": 0.3125, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.887147335423197e-05, |
|
"loss": 0.3008, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.867554858934169e-05, |
|
"loss": 0.3505, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.847962382445141e-05, |
|
"loss": 0.3043, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.828369905956113e-05, |
|
"loss": 0.3607, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_accuracy": 0.876927096942775, |
|
"eval_f1": 0.8852059468681452, |
|
"eval_loss": 0.2716532349586487, |
|
"eval_precision": 0.8209764918625678, |
|
"eval_recall": 0.9603384452670545, |
|
"eval_runtime": 165.7826, |
|
"eval_samples_per_second": 23.084, |
|
"eval_steps_per_second": 0.965, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.808777429467085e-05, |
|
"loss": 0.3092, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.789184952978057e-05, |
|
"loss": 0.2916, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.7695924764890286e-05, |
|
"loss": 0.2446, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.3083, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 3.730407523510972e-05, |
|
"loss": 0.2633, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 3.710815047021944e-05, |
|
"loss": 0.2867, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 3.691222570532916e-05, |
|
"loss": 0.2652, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 3.6716300940438875e-05, |
|
"loss": 0.2317, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 3.652037617554859e-05, |
|
"loss": 0.2298, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 3.632445141065831e-05, |
|
"loss": 0.274, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_accuracy": 0.8719623726156258, |
|
"eval_f1": 0.8765120967741936, |
|
"eval_loss": 0.2875473201274872, |
|
"eval_precision": 0.8372652864708714, |
|
"eval_recall": 0.9196192490745637, |
|
"eval_runtime": 165.0637, |
|
"eval_samples_per_second": 23.185, |
|
"eval_steps_per_second": 0.969, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 3.612852664576803e-05, |
|
"loss": 0.2862, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 3.593260188087774e-05, |
|
"loss": 0.2746, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 3.573667711598746e-05, |
|
"loss": 0.2948, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 3.554075235109718e-05, |
|
"loss": 0.2878, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 3.53448275862069e-05, |
|
"loss": 0.2381, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.514890282131662e-05, |
|
"loss": 0.2854, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.4952978056426335e-05, |
|
"loss": 0.2571, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.475705329153605e-05, |
|
"loss": 0.2587, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.456112852664577e-05, |
|
"loss": 0.2867, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.436520376175549e-05, |
|
"loss": 0.3127, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_accuracy": 0.8761431931016462, |
|
"eval_f1": 0.8853965183752417, |
|
"eval_loss": 0.26641419529914856, |
|
"eval_precision": 0.8155902004454343, |
|
"eval_recall": 0.9682707562136436, |
|
"eval_runtime": 175.8421, |
|
"eval_samples_per_second": 21.764, |
|
"eval_steps_per_second": 0.91, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.4169278996865206e-05, |
|
"loss": 0.3393, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.3973354231974924e-05, |
|
"loss": 0.2543, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.377742946708464e-05, |
|
"loss": 0.2367, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.358150470219436e-05, |
|
"loss": 0.3035, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.338557993730408e-05, |
|
"loss": 0.3645, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.3189655172413796e-05, |
|
"loss": 0.2589, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.299373040752351e-05, |
|
"loss": 0.244, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.279780564263323e-05, |
|
"loss": 0.2688, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.260188087774295e-05, |
|
"loss": 0.2616, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.240595611285267e-05, |
|
"loss": 0.2875, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_accuracy": 0.8771883982231513, |
|
"eval_f1": 0.8825, |
|
"eval_loss": 0.2643195688724518, |
|
"eval_precision": 0.8368895211000474, |
|
"eval_recall": 0.9333685880486515, |
|
"eval_runtime": 172.9293, |
|
"eval_samples_per_second": 22.13, |
|
"eval_steps_per_second": 0.925, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.2210031347962385e-05, |
|
"loss": 0.2741, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.20141065830721e-05, |
|
"loss": 0.2502, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.181818181818182e-05, |
|
"loss": 0.2983, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.162225705329154e-05, |
|
"loss": 0.26, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.1426332288401256e-05, |
|
"loss": 0.2931, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.1230407523510974e-05, |
|
"loss": 0.2665, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.103448275862069e-05, |
|
"loss": 0.2586, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.083855799373041e-05, |
|
"loss": 0.2245, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.064263322884013e-05, |
|
"loss": 0.2973, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.044670846394985e-05, |
|
"loss": 0.2652, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_accuracy": 0.8745753854193885, |
|
"eval_f1": 0.8841139546112989, |
|
"eval_loss": 0.2659239172935486, |
|
"eval_precision": 0.8134162594402488, |
|
"eval_recall": 0.9682707562136436, |
|
"eval_runtime": 172.1185, |
|
"eval_samples_per_second": 22.235, |
|
"eval_steps_per_second": 0.93, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.0250783699059566e-05, |
|
"loss": 0.2408, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.0054858934169277e-05, |
|
"loss": 0.284, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 2.9858934169278995e-05, |
|
"loss": 0.2906, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 2.9663009404388713e-05, |
|
"loss": 0.2415, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 2.946708463949843e-05, |
|
"loss": 0.2642, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 2.9271159874608152e-05, |
|
"loss": 0.2606, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 2.907523510971787e-05, |
|
"loss": 0.3017, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 2.8879310344827588e-05, |
|
"loss": 0.2525, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 2.8683385579937305e-05, |
|
"loss": 0.2373, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.8487460815047023e-05, |
|
"loss": 0.2661, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_accuracy": 0.8792788084661615, |
|
"eval_f1": 0.885473475458602, |
|
"eval_loss": 0.2590779662132263, |
|
"eval_precision": 0.8334111059262715, |
|
"eval_recall": 0.9444738233738763, |
|
"eval_runtime": 172.4587, |
|
"eval_samples_per_second": 22.191, |
|
"eval_steps_per_second": 0.928, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.829153605015674e-05, |
|
"loss": 0.2587, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.809561128526646e-05, |
|
"loss": 0.2987, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 2.7899686520376177e-05, |
|
"loss": 0.2422, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 2.7703761755485898e-05, |
|
"loss": 0.2469, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.7507836990595616e-05, |
|
"loss": 0.2884, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.7311912225705334e-05, |
|
"loss": 0.264, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 2.711598746081505e-05, |
|
"loss": 0.3332, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 2.6920062695924762e-05, |
|
"loss": 0.241, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.672413793103448e-05, |
|
"loss": 0.2405, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.65282131661442e-05, |
|
"loss": 0.3019, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_accuracy": 0.8756205905408937, |
|
"eval_f1": 0.8723175965665235, |
|
"eval_loss": 0.27287837862968445, |
|
"eval_precision": 0.8851388132825259, |
|
"eval_recall": 0.8598625066102591, |
|
"eval_runtime": 172.6345, |
|
"eval_samples_per_second": 22.168, |
|
"eval_steps_per_second": 0.927, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.633228840125392e-05, |
|
"loss": 0.241, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.6136363636363637e-05, |
|
"loss": 0.1991, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.5940438871473355e-05, |
|
"loss": 0.2499, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 2.5744514106583073e-05, |
|
"loss": 0.335, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 2.554858934169279e-05, |
|
"loss": 0.2865, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.535266457680251e-05, |
|
"loss": 0.2706, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.5156739811912226e-05, |
|
"loss": 0.3153, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.4960815047021947e-05, |
|
"loss": 0.2667, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 2.4764890282131662e-05, |
|
"loss": 0.2463, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 2.456896551724138e-05, |
|
"loss": 0.229, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_accuracy": 0.8871178468774497, |
|
"eval_f1": 0.8937007874015748, |
|
"eval_loss": 0.25479769706726074, |
|
"eval_precision": 0.8357109986194201, |
|
"eval_recall": 0.9603384452670545, |
|
"eval_runtime": 173.7953, |
|
"eval_samples_per_second": 22.02, |
|
"eval_steps_per_second": 0.921, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 2.4373040752351098e-05, |
|
"loss": 0.2328, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 2.4177115987460815e-05, |
|
"loss": 0.1678, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 2.3981191222570533e-05, |
|
"loss": 0.2616, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 2.378526645768025e-05, |
|
"loss": 0.2398, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 2.3589341692789972e-05, |
|
"loss": 0.275, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 2.339341692789969e-05, |
|
"loss": 0.1851, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 2.3197492163009404e-05, |
|
"loss": 0.2828, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 2.3001567398119122e-05, |
|
"loss": 0.2727, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 2.280564263322884e-05, |
|
"loss": 0.259, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 2.2609717868338558e-05, |
|
"loss": 0.1841, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_accuracy": 0.8863339430363208, |
|
"eval_f1": 0.8890589135424637, |
|
"eval_loss": 0.24377945065498352, |
|
"eval_precision": 0.8586206896551725, |
|
"eval_recall": 0.9217345319936542, |
|
"eval_runtime": 176.1028, |
|
"eval_samples_per_second": 21.732, |
|
"eval_steps_per_second": 0.909, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 2.2413793103448276e-05, |
|
"loss": 0.2321, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 2.2217868338557997e-05, |
|
"loss": 0.2112, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 2.2021943573667715e-05, |
|
"loss": 0.1736, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 2.182601880877743e-05, |
|
"loss": 0.2211, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 2.1630094043887147e-05, |
|
"loss": 0.1855, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 2.1434169278996865e-05, |
|
"loss": 0.2346, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 2.1238244514106583e-05, |
|
"loss": 0.1662, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 2.10423197492163e-05, |
|
"loss": 0.2776, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 2.0846394984326022e-05, |
|
"loss": 0.2281, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 2.065047021943574e-05, |
|
"loss": 0.2257, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"eval_accuracy": 0.8905147635223413, |
|
"eval_f1": 0.8930849706557795, |
|
"eval_loss": 0.2364816665649414, |
|
"eval_precision": 0.8629191321499013, |
|
"eval_recall": 0.9254362771020624, |
|
"eval_runtime": 173.2602, |
|
"eval_samples_per_second": 22.088, |
|
"eval_steps_per_second": 0.923, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 2.0454545454545457e-05, |
|
"loss": 0.2546, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 2.0258620689655172e-05, |
|
"loss": 0.139, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 2.006269592476489e-05, |
|
"loss": 0.2382, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 1.9866771159874607e-05, |
|
"loss": 0.2475, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 1.9670846394984325e-05, |
|
"loss": 0.262, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 1.9474921630094046e-05, |
|
"loss": 0.211, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 1.9278996865203764e-05, |
|
"loss": 0.2436, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 1.9083072100313482e-05, |
|
"loss": 0.1954, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 1.8887147335423197e-05, |
|
"loss": 0.174, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 1.8691222570532914e-05, |
|
"loss": 0.2217, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"eval_accuracy": 0.8803240135876665, |
|
"eval_f1": 0.8773433315479378, |
|
"eval_loss": 0.25091663002967834, |
|
"eval_precision": 0.8887683125339121, |
|
"eval_recall": 0.8662083553675304, |
|
"eval_runtime": 168.9717, |
|
"eval_samples_per_second": 22.649, |
|
"eval_steps_per_second": 0.947, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 1.8495297805642632e-05, |
|
"loss": 0.2478, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 1.829937304075235e-05, |
|
"loss": 0.2255, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 1.810344827586207e-05, |
|
"loss": 0.1823, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 1.790752351097179e-05, |
|
"loss": 0.1716, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 1.7711598746081507e-05, |
|
"loss": 0.2434, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 1.7515673981191225e-05, |
|
"loss": 0.1945, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 1.731974921630094e-05, |
|
"loss": 0.1695, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1.7123824451410657e-05, |
|
"loss": 0.181, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1.6927899686520378e-05, |
|
"loss": 0.2047, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 1.6731974921630096e-05, |
|
"loss": 0.2619, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"eval_accuracy": 0.8873791481578259, |
|
"eval_f1": 0.893711467324291, |
|
"eval_loss": 0.25879478454589844, |
|
"eval_precision": 0.8373382624768947, |
|
"eval_recall": 0.958223162347964, |
|
"eval_runtime": 167.0043, |
|
"eval_samples_per_second": 22.916, |
|
"eval_steps_per_second": 0.958, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 1.6536050156739814e-05, |
|
"loss": 0.2562, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 1.634012539184953e-05, |
|
"loss": 0.2448, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 1.614420062695925e-05, |
|
"loss": 0.2452, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 1.5948275862068967e-05, |
|
"loss": 0.225, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 1.5752351097178682e-05, |
|
"loss": 0.1817, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 1.5556426332288403e-05, |
|
"loss": 0.1978, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 1.536050156739812e-05, |
|
"loss": 0.1928, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 1.5164576802507839e-05, |
|
"loss": 0.2085, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 1.4968652037617556e-05, |
|
"loss": 0.1174, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 1.4772727272727274e-05, |
|
"loss": 0.2222, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"eval_accuracy": 0.8907760648027175, |
|
"eval_f1": 0.8931492842535788, |
|
"eval_loss": 0.2520654797554016, |
|
"eval_precision": 0.8644235526966848, |
|
"eval_recall": 0.9238498149127445, |
|
"eval_runtime": 168.4799, |
|
"eval_samples_per_second": 22.715, |
|
"eval_steps_per_second": 0.95, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 1.4576802507836992e-05, |
|
"loss": 0.2499, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 1.4380877742946708e-05, |
|
"loss": 0.231, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 1.4184952978056426e-05, |
|
"loss": 0.2446, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 1.3989028213166144e-05, |
|
"loss": 0.2212, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 1.3793103448275863e-05, |
|
"loss": 0.1873, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 1.3597178683385581e-05, |
|
"loss": 0.1655, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 1.3401253918495299e-05, |
|
"loss": 0.1798, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 1.3205329153605017e-05, |
|
"loss": 0.212, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 1.3009404388714735e-05, |
|
"loss": 0.2339, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 1.281347962382445e-05, |
|
"loss": 0.2044, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"eval_accuracy": 0.8899921609615887, |
|
"eval_f1": 0.895972325179145, |
|
"eval_loss": 0.2598423957824707, |
|
"eval_precision": 0.8409090909090909, |
|
"eval_recall": 0.9587519830777367, |
|
"eval_runtime": 168.1275, |
|
"eval_samples_per_second": 22.762, |
|
"eval_steps_per_second": 0.952, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 1.2617554858934169e-05, |
|
"loss": 0.1993, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 1.2421630094043888e-05, |
|
"loss": 0.2176, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 1.2225705329153606e-05, |
|
"loss": 0.1554, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 1.2029780564263324e-05, |
|
"loss": 0.2332, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 1.1833855799373042e-05, |
|
"loss": 0.1662, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 1.163793103448276e-05, |
|
"loss": 0.1932, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 1.1442006269592477e-05, |
|
"loss": 0.1996, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 1.1246081504702195e-05, |
|
"loss": 0.1516, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 1.1050156739811913e-05, |
|
"loss": 0.2517, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 1.085423197492163e-05, |
|
"loss": 0.2238, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"eval_accuracy": 0.8764044943820225, |
|
"eval_f1": 0.8690838638250762, |
|
"eval_loss": 0.264072448015213, |
|
"eval_precision": 0.9117305458768873, |
|
"eval_recall": 0.8302485457429931, |
|
"eval_runtime": 166.9078, |
|
"eval_samples_per_second": 22.929, |
|
"eval_steps_per_second": 0.959, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 1.0658307210031348e-05, |
|
"loss": 0.2006, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 1.0462382445141066e-05, |
|
"loss": 0.1282, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 1.0266457680250784e-05, |
|
"loss": 0.1714, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 1.0070532915360502e-05, |
|
"loss": 0.1795, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 9.87460815047022e-06, |
|
"loss": 0.1915, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 9.678683385579938e-06, |
|
"loss": 0.2153, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 9.482758620689655e-06, |
|
"loss": 0.2176, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 9.286833855799373e-06, |
|
"loss": 0.2194, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 9.090909090909091e-06, |
|
"loss": 0.1527, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 8.89498432601881e-06, |
|
"loss": 0.249, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"eval_accuracy": 0.8926051737653514, |
|
"eval_f1": 0.8979389123416935, |
|
"eval_loss": 0.2367783486843109, |
|
"eval_precision": 0.846441947565543, |
|
"eval_recall": 0.9561078794288737, |
|
"eval_runtime": 167.7276, |
|
"eval_samples_per_second": 22.817, |
|
"eval_steps_per_second": 0.954, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 8.699059561128527e-06, |
|
"loss": 0.175, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 8.503134796238245e-06, |
|
"loss": 0.2238, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 8.307210031347962e-06, |
|
"loss": 0.2249, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 8.111285266457682e-06, |
|
"loss": 0.2395, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 7.915360501567398e-06, |
|
"loss": 0.1775, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 7.719435736677116e-06, |
|
"loss": 0.2111, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 7.5235109717868345e-06, |
|
"loss": 0.2092, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 7.3275862068965514e-06, |
|
"loss": 0.2313, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 7.131661442006269e-06, |
|
"loss": 0.1508, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 6.935736677115988e-06, |
|
"loss": 0.1773, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"eval_accuracy": 0.894172981447609, |
|
"eval_f1": 0.8963929393706831, |
|
"eval_loss": 0.22330810129642487, |
|
"eval_precision": 0.8681863230921705, |
|
"eval_recall": 0.9264939185616076, |
|
"eval_runtime": 166.0964, |
|
"eval_samples_per_second": 23.041, |
|
"eval_steps_per_second": 0.963, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 6.739811912225706e-06, |
|
"loss": 0.165, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 6.543887147335423e-06, |
|
"loss": 0.2013, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 6.347962382445141e-06, |
|
"loss": 0.2334, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 6.152037617554859e-06, |
|
"loss": 0.1951, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 5.956112852664577e-06, |
|
"loss": 0.165, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 5.760188087774295e-06, |
|
"loss": 0.1469, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 5.564263322884013e-06, |
|
"loss": 0.2499, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 5.368338557993731e-06, |
|
"loss": 0.1992, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 5.172413793103448e-06, |
|
"loss": 0.2433, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 4.976489028213167e-06, |
|
"loss": 0.1447, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_accuracy": 0.8957407891298668, |
|
"eval_f1": 0.8970322580645161, |
|
"eval_loss": 0.22690534591674805, |
|
"eval_precision": 0.876008064516129, |
|
"eval_recall": 0.9190904283447912, |
|
"eval_runtime": 166.3913, |
|
"eval_samples_per_second": 23.0, |
|
"eval_steps_per_second": 0.962, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 4.780564263322884e-06, |
|
"loss": 0.2265, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 4.584639498432603e-06, |
|
"loss": 0.1754, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 4.38871473354232e-06, |
|
"loss": 0.1462, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 4.1927899686520374e-06, |
|
"loss": 0.1796, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 3.996865203761756e-06, |
|
"loss": 0.1956, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 3.800940438871473e-06, |
|
"loss": 0.2103, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 3.6050156739811913e-06, |
|
"loss": 0.1791, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 3.409090909090909e-06, |
|
"loss": 0.1634, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 3.2131661442006274e-06, |
|
"loss": 0.1923, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 3.017241379310345e-06, |
|
"loss": 0.245, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"eval_accuracy": 0.8952181865691142, |
|
"eval_f1": 0.8990687138182734, |
|
"eval_loss": 0.23547519743442535, |
|
"eval_precision": 0.8578290105667628, |
|
"eval_recall": 0.9444738233738763, |
|
"eval_runtime": 166.9651, |
|
"eval_samples_per_second": 22.921, |
|
"eval_steps_per_second": 0.958, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 2.821316614420063e-06, |
|
"loss": 0.1853, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 2.625391849529781e-06, |
|
"loss": 0.1623, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 2.4294670846394982e-06, |
|
"loss": 0.2096, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 2.233542319749216e-06, |
|
"loss": 0.183, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 2.0376175548589343e-06, |
|
"loss": 0.1839, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.841692789968652e-06, |
|
"loss": 0.2006, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.64576802507837e-06, |
|
"loss": 0.179, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.4498432601880878e-06, |
|
"loss": 0.1804, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.2539184952978056e-06, |
|
"loss": 0.1566, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.0579937304075236e-06, |
|
"loss": 0.1685, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"eval_accuracy": 0.8933890776064802, |
|
"eval_f1": 0.8964992389649924, |
|
"eval_loss": 0.23118489980697632, |
|
"eval_precision": 0.8615309605070697, |
|
"eval_recall": 0.9344262295081968, |
|
"eval_runtime": 171.1147, |
|
"eval_samples_per_second": 22.365, |
|
"eval_steps_per_second": 0.935, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 8.620689655172415e-07, |
|
"loss": 0.1461, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 6.661442006269593e-07, |
|
"loss": 0.2458, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 4.7021943573667715e-07, |
|
"loss": 0.1939, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 2.74294670846395e-07, |
|
"loss": 0.1551, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 7.836990595611285e-08, |
|
"loss": 0.1953, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 2552, |
|
"total_flos": 1.3995088604806644e+19, |
|
"train_loss": 0.259534664043345, |
|
"train_runtime": 9346.2499, |
|
"train_samples_per_second": 6.553, |
|
"train_steps_per_second": 0.273 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8926332288401254, |
|
"eval_f1": 0.8949654996166624, |
|
"eval_loss": 0.24127539992332458, |
|
"eval_precision": 0.8524829600778968, |
|
"eval_recall": 0.9419042495965573, |
|
"eval_runtime": 177.3711, |
|
"eval_samples_per_second": 21.582, |
|
"eval_steps_per_second": 0.902, |
|
"step": 2552 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2552, |
|
"num_train_epochs": 2, |
|
"save_steps": 100, |
|
"total_flos": 1.3995088604806644e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|