{ "best_metric": 0.18881116431984588, "best_model_checkpoint": "/home/azureuser/workspace/mounts/blobs/blob-deeplm22/v-youmohamed/BUS15100_MB2_RAII_20epoch_notweettokenizer_fp16/checkpoint-22848", "epoch": 9.810055940866345, "global_step": 22848, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 0.0002, "loss": 3.784, "step": 5 }, { "epoch": 0.0, "learning_rate": 0.0002, "loss": 3.424, "step": 10 }, { "epoch": 0.01, "learning_rate": 0.0002, "loss": 3.3057, "step": 15 }, { "epoch": 0.01, "learning_rate": 0.0002, "loss": 3.2421, "step": 20 }, { "epoch": 0.01, "learning_rate": 0.0002, "loss": 3.2215, "step": 25 }, { "epoch": 0.01, "learning_rate": 0.0002, "loss": 3.1779, "step": 30 }, { "epoch": 0.02, "learning_rate": 0.0002, "loss": 3.1602, "step": 35 }, { "epoch": 0.02, "learning_rate": 0.0002, "loss": 3.1559, "step": 40 }, { "epoch": 0.02, "learning_rate": 0.0002, "loss": 3.1296, "step": 45 }, { "epoch": 0.02, "learning_rate": 0.0002, "loss": 3.1443, "step": 50 }, { "epoch": 0.02, "learning_rate": 0.0002, "loss": 3.1196, "step": 55 }, { "epoch": 0.03, "learning_rate": 0.0002, "loss": 3.0972, "step": 60 }, { "epoch": 0.03, "eval_loss": 3.0696167945861816, "eval_runtime": 5.2553, "eval_samples_per_second": 1879.426, "eval_steps_per_second": 14.842, "eval_top3_3_weighted_f1_score ": 0.37920731422345033, "eval_top_1_macro_f1_score": 0.020407498859125326, "eval_top_1_weighted_f1score": 0.1768537842540722, "eval_top_3_macro_f1_score": 0.07428603231314447, "step": 64 }, { "epoch": 0.03, "learning_rate": 0.0002, "loss": 3.113, "step": 65 }, { "epoch": 0.03, "learning_rate": 0.0002, "loss": 3.0864, "step": 70 }, { "epoch": 0.03, "learning_rate": 0.0002, "loss": 3.0839, "step": 75 }, { "epoch": 0.03, "learning_rate": 0.0002, "loss": 3.0558, "step": 80 }, { "epoch": 0.04, "learning_rate": 0.0002, "loss": 3.0555, "step": 85 }, { "epoch": 0.04, "learning_rate": 0.0002, "loss": 3.0654, "step": 90 }, { "epoch": 0.04, "learning_rate": 0.0002, "loss": 3.0673, "step": 95 }, { "epoch": 0.04, "learning_rate": 0.0002, "loss": 3.065, "step": 100 }, { "epoch": 0.05, "learning_rate": 0.0002, "loss": 3.054, "step": 105 }, { "epoch": 0.05, "learning_rate": 0.0002, "loss": 3.0582, "step": 110 }, { "epoch": 0.05, "learning_rate": 0.0002, "loss": 3.0719, "step": 115 }, { "epoch": 0.05, "learning_rate": 0.0002, "loss": 3.0522, "step": 120 }, { "epoch": 0.05, "learning_rate": 0.0002, "loss": 3.0465, "step": 125 }, { "epoch": 0.05, "eval_loss": 3.0220658779144287, "eval_runtime": 5.1808, "eval_samples_per_second": 1906.477, "eval_steps_per_second": 15.056, "eval_top3_3_weighted_f1_score ": 0.3920861263441107, "eval_top_1_macro_f1_score": 0.028020399250865188, "eval_top_1_weighted_f1score": 0.17802180543353585, "eval_top_3_macro_f1_score": 0.09612917294995578, "step": 128 }, { "epoch": 0.06, "learning_rate": 0.0002, "loss": 3.0548, "step": 130 }, { "epoch": 0.06, "learning_rate": 0.0002, "loss": 3.0408, "step": 135 }, { "epoch": 0.06, "learning_rate": 0.0002, "loss": 3.032, "step": 140 }, { "epoch": 0.06, "learning_rate": 0.0002, "loss": 3.0339, "step": 145 }, { "epoch": 0.06, "learning_rate": 0.0002, "loss": 3.0306, "step": 150 }, { "epoch": 0.07, "learning_rate": 0.0002, "loss": 3.0257, "step": 155 }, { "epoch": 0.07, "learning_rate": 0.0002, "loss": 3.0252, "step": 160 }, { "epoch": 0.07, "learning_rate": 0.0002, "loss": 3.0324, "step": 165 }, { "epoch": 0.07, "learning_rate": 0.0002, "loss": 3.0434, "step": 170 }, { "epoch": 0.08, "learning_rate": 0.0002, "loss": 3.0395, "step": 175 }, { "epoch": 0.08, "learning_rate": 0.0002, "loss": 3.0243, "step": 180 }, { "epoch": 0.08, "learning_rate": 0.0002, "loss": 3.0207, "step": 185 }, { "epoch": 0.08, "learning_rate": 0.0002, "loss": 3.0134, "step": 190 }, { "epoch": 0.08, "eval_loss": 2.9999098777770996, "eval_runtime": 5.5948, "eval_samples_per_second": 1765.374, "eval_steps_per_second": 13.941, "eval_top3_3_weighted_f1_score ": 0.3942607393594228, "eval_top_1_macro_f1_score": 0.03024064981912809, "eval_top_1_weighted_f1score": 0.18151954856828598, "eval_top_3_macro_f1_score": 0.10555039816849211, "step": 192 }, { "epoch": 0.08, "learning_rate": 0.0002, "loss": 3.0304, "step": 195 }, { "epoch": 0.09, "learning_rate": 0.0002, "loss": 3.0166, "step": 200 }, { "epoch": 0.09, "learning_rate": 0.0002, "loss": 3.0238, "step": 205 }, { "epoch": 0.09, "learning_rate": 0.0002, "loss": 3.0408, "step": 210 }, { "epoch": 0.09, "learning_rate": 0.0002, "loss": 3.0097, "step": 215 }, { "epoch": 0.09, "learning_rate": 0.0002, "loss": 3.0377, "step": 220 }, { "epoch": 0.1, "learning_rate": 0.0002, "loss": 3.0076, "step": 225 }, { "epoch": 0.1, "learning_rate": 0.0002, "loss": 3.0003, "step": 230 }, { "epoch": 0.1, "learning_rate": 0.0002, "loss": 3.0113, "step": 235 }, { "epoch": 0.1, "learning_rate": 0.0002, "loss": 2.9959, "step": 240 }, { "epoch": 0.11, "learning_rate": 0.0002, "loss": 3.0143, "step": 245 }, { "epoch": 0.11, "learning_rate": 0.0002, "loss": 2.9976, "step": 250 }, { "epoch": 0.11, "learning_rate": 0.0002, "loss": 3.0203, "step": 255 }, { "epoch": 0.11, "eval_loss": 2.9871039390563965, "eval_runtime": 5.1769, "eval_samples_per_second": 1907.885, "eval_steps_per_second": 15.067, "eval_top3_3_weighted_f1_score ": 0.3969199243149213, "eval_top_1_macro_f1_score": 0.037170653331203264, "eval_top_1_weighted_f1score": 0.185406119539436, "eval_top_3_macro_f1_score": 0.11274159780578662, "step": 256 }, { "epoch": 0.11, "learning_rate": 0.0002, "loss": 3.0089, "step": 260 }, { "epoch": 0.11, "learning_rate": 0.0002, "loss": 3.0067, "step": 265 }, { "epoch": 0.12, "learning_rate": 0.0002, "loss": 3.0025, "step": 270 }, { "epoch": 0.12, "learning_rate": 0.0002, "loss": 3.006, "step": 275 }, { "epoch": 0.12, "learning_rate": 0.0002, "loss": 2.9925, "step": 280 }, { "epoch": 0.12, "learning_rate": 0.0002, "loss": 2.9842, "step": 285 }, { "epoch": 0.12, "learning_rate": 0.0002, "loss": 3.0024, "step": 290 }, { "epoch": 0.13, "learning_rate": 0.0002, "loss": 2.9809, "step": 295 }, { "epoch": 0.13, "learning_rate": 0.0002, "loss": 2.9923, "step": 300 }, { "epoch": 0.13, "learning_rate": 0.0002, "loss": 2.9932, "step": 305 }, { "epoch": 0.13, "learning_rate": 0.0002, "loss": 2.9727, "step": 310 }, { "epoch": 0.14, "learning_rate": 0.0002, "loss": 2.9862, "step": 315 }, { "epoch": 0.14, "learning_rate": 0.0002, "loss": 2.9879, "step": 320 }, { "epoch": 0.14, "eval_loss": 2.9748592376708984, "eval_runtime": 5.2664, "eval_samples_per_second": 1875.471, "eval_steps_per_second": 14.811, "eval_top3_3_weighted_f1_score ": 0.4006357227793805, "eval_top_1_macro_f1_score": 0.039023552155804886, "eval_top_1_weighted_f1score": 0.18467489427014783, "eval_top_3_macro_f1_score": 0.12391745887899196, "step": 320 }, { "epoch": 0.14, "learning_rate": 0.0002, "loss": 3.0016, "step": 325 }, { "epoch": 0.14, "learning_rate": 0.0002, "loss": 2.9895, "step": 330 }, { "epoch": 0.14, "learning_rate": 0.0002, "loss": 2.9644, "step": 335 }, { "epoch": 0.15, "learning_rate": 0.0002, "loss": 2.966, "step": 340 }, { "epoch": 0.15, "learning_rate": 0.0002, "loss": 2.9903, "step": 345 }, { "epoch": 0.15, "learning_rate": 0.0002, "loss": 2.9934, "step": 350 }, { "epoch": 0.15, "learning_rate": 0.0002, "loss": 2.9849, "step": 355 }, { "epoch": 0.15, "learning_rate": 0.0002, "loss": 3.0056, "step": 360 }, { "epoch": 0.16, "learning_rate": 0.0002, "loss": 2.9924, "step": 365 }, { "epoch": 0.16, "learning_rate": 0.0002, "loss": 3.0068, "step": 370 }, { "epoch": 0.16, "learning_rate": 0.0002, "loss": 3.004, "step": 375 }, { "epoch": 0.16, "learning_rate": 0.0002, "loss": 2.9847, "step": 380 }, { "epoch": 0.16, "eval_loss": 2.965792417526245, "eval_runtime": 5.2467, "eval_samples_per_second": 1882.515, "eval_steps_per_second": 14.866, "eval_top3_3_weighted_f1_score ": 0.4021244172567648, "eval_top_1_macro_f1_score": 0.04106761958718834, "eval_top_1_weighted_f1score": 0.1866042980893108, "eval_top_3_macro_f1_score": 0.12467798211027789, "step": 384 }, { "epoch": 0.17, "learning_rate": 0.0002, "loss": 2.984, "step": 385 }, { "epoch": 0.17, "learning_rate": 0.0002, "loss": 2.9918, "step": 390 }, { "epoch": 0.17, "learning_rate": 0.0002, "loss": 2.9835, "step": 395 }, { "epoch": 0.17, "learning_rate": 0.0002, "loss": 2.9888, "step": 400 }, { "epoch": 0.17, "learning_rate": 0.0002, "loss": 2.9929, "step": 405 }, { "epoch": 0.18, "learning_rate": 0.0002, "loss": 2.9953, "step": 410 }, { "epoch": 0.18, "learning_rate": 0.0002, "loss": 3.0103, "step": 415 }, { "epoch": 0.18, "learning_rate": 0.0002, "loss": 2.9881, "step": 420 }, { "epoch": 0.18, "learning_rate": 0.0002, "loss": 2.9919, "step": 425 }, { "epoch": 0.18, "learning_rate": 0.0002, "loss": 2.9959, "step": 430 }, { "epoch": 0.19, "learning_rate": 0.0002, "loss": 2.991, "step": 435 }, { "epoch": 0.19, "learning_rate": 0.0002, "loss": 2.9878, "step": 440 }, { "epoch": 0.19, "learning_rate": 0.0002, "loss": 2.9748, "step": 445 }, { "epoch": 0.19, "eval_loss": 2.963198184967041, "eval_runtime": 5.205, "eval_samples_per_second": 1897.6, "eval_steps_per_second": 14.986, "eval_top3_3_weighted_f1_score ": 0.40011703728872183, "eval_top_1_macro_f1_score": 0.047993869870430424, "eval_top_1_weighted_f1score": 0.1872302731003996, "eval_top_3_macro_f1_score": 0.12995078978564845, "step": 448 }, { "epoch": 0.19, "learning_rate": 0.0002, "loss": 2.9749, "step": 450 }, { "epoch": 0.2, "learning_rate": 0.0002, "loss": 2.9785, "step": 455 }, { "epoch": 0.2, "learning_rate": 0.0002, "loss": 2.9724, "step": 460 }, { "epoch": 0.2, "learning_rate": 0.0002, "loss": 2.9701, "step": 465 }, { "epoch": 0.2, "learning_rate": 0.0002, "loss": 2.9911, "step": 470 }, { "epoch": 0.2, "learning_rate": 0.0002, "loss": 2.9835, "step": 475 }, { "epoch": 0.21, "learning_rate": 0.0002, "loss": 2.9906, "step": 480 }, { "epoch": 0.21, "learning_rate": 0.0002, "loss": 2.9914, "step": 485 }, { "epoch": 0.21, "learning_rate": 0.0002, "loss": 2.9798, "step": 490 }, { "epoch": 0.21, "learning_rate": 0.0002, "loss": 2.9851, "step": 495 }, { "epoch": 0.21, "learning_rate": 0.0002, "loss": 2.9703, "step": 500 }, { "epoch": 0.22, "learning_rate": 0.0002, "loss": 2.9708, "step": 505 }, { "epoch": 0.22, "learning_rate": 0.0002, "loss": 2.9796, "step": 510 }, { "epoch": 0.22, "eval_loss": 2.9612252712249756, "eval_runtime": 5.2355, "eval_samples_per_second": 1886.559, "eval_steps_per_second": 14.898, "eval_top3_3_weighted_f1_score ": 0.4014014489333963, "eval_top_1_macro_f1_score": 0.04680478656720424, "eval_top_1_weighted_f1score": 0.18789703224926918, "eval_top_3_macro_f1_score": 0.13950661353435131, "step": 512 }, { "epoch": 0.22, "learning_rate": 0.0002, "loss": 2.9854, "step": 515 }, { "epoch": 0.22, "learning_rate": 0.0002, "loss": 2.9795, "step": 520 }, { "epoch": 0.23, "learning_rate": 0.0002, "loss": 2.9649, "step": 525 }, { "epoch": 0.23, "learning_rate": 0.0002, "loss": 2.9881, "step": 530 }, { "epoch": 0.23, "learning_rate": 0.0002, "loss": 2.9729, "step": 535 }, { "epoch": 0.23, "learning_rate": 0.0002, "loss": 2.9667, "step": 540 }, { "epoch": 0.23, "learning_rate": 0.0002, "loss": 2.9752, "step": 545 }, { "epoch": 0.24, "learning_rate": 0.0002, "loss": 2.9637, "step": 550 }, { "epoch": 0.24, "learning_rate": 0.0002, "loss": 2.9961, "step": 555 }, { "epoch": 0.24, "learning_rate": 0.0002, "loss": 2.9586, "step": 560 }, { "epoch": 0.24, "learning_rate": 0.0002, "loss": 2.9549, "step": 565 }, { "epoch": 0.24, "learning_rate": 0.0002, "loss": 2.9843, "step": 570 }, { "epoch": 0.25, "learning_rate": 0.0002, "loss": 2.971, "step": 575 }, { "epoch": 0.25, "eval_loss": 2.954483985900879, "eval_runtime": 5.2537, "eval_samples_per_second": 1880.015, "eval_steps_per_second": 14.847, "eval_top3_3_weighted_f1_score ": 0.4037452311007219, "eval_top_1_macro_f1_score": 0.05065078032503657, "eval_top_1_weighted_f1score": 0.19158153341467882, "eval_top_3_macro_f1_score": 0.14248352888683585, "step": 576 }, { "epoch": 0.25, "learning_rate": 0.0002, "loss": 2.9896, "step": 580 }, { "epoch": 0.25, "learning_rate": 0.0002, "loss": 2.9595, "step": 585 }, { "epoch": 0.25, "learning_rate": 0.0002, "loss": 2.9752, "step": 590 }, { "epoch": 0.26, "learning_rate": 0.0002, "loss": 2.9757, "step": 595 }, { "epoch": 0.26, "learning_rate": 0.0002, "loss": 2.9737, "step": 600 }, { "epoch": 0.26, "learning_rate": 0.0002, "loss": 2.99, "step": 605 }, { "epoch": 0.26, "learning_rate": 0.0002, "loss": 2.9835, "step": 610 }, { "epoch": 0.26, "learning_rate": 0.0002, "loss": 2.966, "step": 615 }, { "epoch": 0.27, "learning_rate": 0.0002, "loss": 2.9672, "step": 620 }, { "epoch": 0.27, "learning_rate": 0.0002, "loss": 2.9609, "step": 625 }, { "epoch": 0.27, "learning_rate": 0.0002, "loss": 2.9971, "step": 630 }, { "epoch": 0.27, "learning_rate": 0.0002, "loss": 2.9622, "step": 635 }, { "epoch": 0.27, "learning_rate": 0.0002, "loss": 2.9651, "step": 640 }, { "epoch": 0.27, "eval_loss": 2.953902244567871, "eval_runtime": 5.539, "eval_samples_per_second": 1783.186, "eval_steps_per_second": 14.082, "eval_top3_3_weighted_f1_score ": 0.4038882116356172, "eval_top_1_macro_f1_score": 0.050013153177604464, "eval_top_1_weighted_f1score": 0.1915424883657993, "eval_top_3_macro_f1_score": 0.13810582785349604, "step": 640 }, { "epoch": 0.28, "learning_rate": 0.0002, "loss": 2.9598, "step": 645 }, { "epoch": 0.28, "learning_rate": 0.0002, "loss": 2.9606, "step": 650 }, { "epoch": 0.28, "learning_rate": 0.0002, "loss": 2.9801, "step": 655 }, { "epoch": 0.28, "learning_rate": 0.0002, "loss": 2.9759, "step": 660 }, { "epoch": 0.29, "learning_rate": 0.0002, "loss": 2.9598, "step": 665 }, { "epoch": 0.29, "learning_rate": 0.0002, "loss": 2.9723, "step": 670 }, { "epoch": 0.29, "learning_rate": 0.0002, "loss": 2.9861, "step": 675 }, { "epoch": 0.29, "learning_rate": 0.0002, "loss": 2.9542, "step": 680 }, { "epoch": 0.29, "learning_rate": 0.0002, "loss": 2.9579, "step": 685 }, { "epoch": 0.3, "learning_rate": 0.0002, "loss": 2.9817, "step": 690 }, { "epoch": 0.3, "learning_rate": 0.0002, "loss": 2.9556, "step": 695 }, { "epoch": 0.3, "learning_rate": 0.0002, "loss": 2.9718, "step": 700 }, { "epoch": 0.3, "eval_loss": 2.9501540660858154, "eval_runtime": 5.1853, "eval_samples_per_second": 1904.791, "eval_steps_per_second": 15.042, "eval_top3_3_weighted_f1_score ": 0.40385563418101295, "eval_top_1_macro_f1_score": 0.05046209013926654, "eval_top_1_weighted_f1score": 0.18921842462280833, "eval_top_3_macro_f1_score": 0.13372864819392832, "step": 704 }, { "epoch": 0.3, "learning_rate": 0.0002, "loss": 2.96, "step": 705 }, { "epoch": 0.3, "learning_rate": 0.0002, "loss": 2.9647, "step": 710 }, { "epoch": 0.31, "learning_rate": 0.0002, "loss": 2.9719, "step": 715 }, { "epoch": 0.31, "learning_rate": 0.0002, "loss": 2.9711, "step": 720 }, { "epoch": 0.31, "learning_rate": 0.0002, "loss": 2.9726, "step": 725 }, { "epoch": 0.31, "learning_rate": 0.0002, "loss": 2.9719, "step": 730 }, { "epoch": 0.32, "learning_rate": 0.0002, "loss": 2.9694, "step": 735 }, { "epoch": 0.32, "learning_rate": 0.0002, "loss": 2.9623, "step": 740 }, { "epoch": 0.32, "learning_rate": 0.0002, "loss": 2.9862, "step": 745 }, { "epoch": 0.32, "learning_rate": 0.0002, "loss": 2.9744, "step": 750 }, { "epoch": 0.32, "learning_rate": 0.0002, "loss": 2.973, "step": 755 }, { "epoch": 0.33, "learning_rate": 0.0002, "loss": 2.9739, "step": 760 }, { "epoch": 0.33, "learning_rate": 0.0002, "loss": 2.9621, "step": 765 }, { "epoch": 0.33, "eval_loss": 2.9458868503570557, "eval_runtime": 5.152, "eval_samples_per_second": 1917.107, "eval_steps_per_second": 15.14, "eval_top3_3_weighted_f1_score ": 0.4047092459025178, "eval_top_1_macro_f1_score": 0.05353345372204659, "eval_top_1_weighted_f1score": 0.187248655714094, "eval_top_3_macro_f1_score": 0.14077859018850478, "step": 768 }, { "epoch": 0.33, "learning_rate": 0.0002, "loss": 2.9841, "step": 770 }, { "epoch": 0.33, "learning_rate": 0.0002, "loss": 2.9716, "step": 775 }, { "epoch": 0.33, "learning_rate": 0.0002, "loss": 2.9685, "step": 780 }, { "epoch": 0.34, "learning_rate": 0.0002, "loss": 2.983, "step": 785 }, { "epoch": 0.34, "learning_rate": 0.0002, "loss": 2.962, "step": 790 }, { "epoch": 0.34, "learning_rate": 0.0002, "loss": 2.9715, "step": 795 }, { "epoch": 0.34, "learning_rate": 0.0002, "loss": 2.9698, "step": 800 }, { "epoch": 0.35, "learning_rate": 0.0002, "loss": 2.9408, "step": 805 }, { "epoch": 0.35, "learning_rate": 0.0002, "loss": 2.9695, "step": 810 }, { "epoch": 0.35, "learning_rate": 0.0002, "loss": 2.9706, "step": 815 }, { "epoch": 0.35, "learning_rate": 0.0002, "loss": 2.9626, "step": 820 }, { "epoch": 0.35, "learning_rate": 0.0002, "loss": 2.956, "step": 825 }, { "epoch": 0.36, "learning_rate": 0.0002, "loss": 2.984, "step": 830 }, { "epoch": 0.36, "eval_loss": 2.9440317153930664, "eval_runtime": 5.2175, "eval_samples_per_second": 1893.038, "eval_steps_per_second": 14.95, "eval_top3_3_weighted_f1_score ": 0.405362919359186, "eval_top_1_macro_f1_score": 0.0504562500575876, "eval_top_1_weighted_f1score": 0.18343072088628645, "eval_top_3_macro_f1_score": 0.13591648502699705, "step": 832 }, { "epoch": 0.36, "learning_rate": 0.0002, "loss": 2.9437, "step": 835 }, { "epoch": 0.36, "learning_rate": 0.0002, "loss": 2.995, "step": 840 }, { "epoch": 0.36, "learning_rate": 0.0002, "loss": 2.9771, "step": 845 }, { "epoch": 0.36, "learning_rate": 0.0002, "loss": 2.9567, "step": 850 }, { "epoch": 0.37, "learning_rate": 0.0002, "loss": 2.973, "step": 855 }, { "epoch": 0.37, "learning_rate": 0.0002, "loss": 2.9657, "step": 860 }, { "epoch": 0.37, "learning_rate": 0.0002, "loss": 2.9627, "step": 865 }, { "epoch": 0.37, "learning_rate": 0.0002, "loss": 2.9428, "step": 870 }, { "epoch": 0.38, "learning_rate": 0.0002, "loss": 2.9718, "step": 875 }, { "epoch": 0.38, "learning_rate": 0.0002, "loss": 2.9623, "step": 880 }, { "epoch": 0.38, "learning_rate": 0.0002, "loss": 2.9516, "step": 885 }, { "epoch": 0.38, "learning_rate": 0.0002, "loss": 2.9539, "step": 890 }, { "epoch": 0.38, "learning_rate": 0.0002, "loss": 2.958, "step": 895 }, { "epoch": 0.38, "eval_loss": 2.9393413066864014, "eval_runtime": 5.3066, "eval_samples_per_second": 1861.255, "eval_steps_per_second": 14.699, "eval_top3_3_weighted_f1_score ": 0.4115333814510633, "eval_top_1_macro_f1_score": 0.05314498607987754, "eval_top_1_weighted_f1score": 0.1859384539986762, "eval_top_3_macro_f1_score": 0.14574054221754426, "step": 896 }, { "epoch": 0.39, "learning_rate": 0.0002, "loss": 2.9523, "step": 900 }, { "epoch": 0.39, "learning_rate": 0.0002, "loss": 2.9482, "step": 905 }, { "epoch": 0.39, "learning_rate": 0.0002, "loss": 2.9693, "step": 910 }, { "epoch": 0.39, "learning_rate": 0.0002, "loss": 2.9594, "step": 915 }, { "epoch": 0.39, "learning_rate": 0.0002, "loss": 2.9805, "step": 920 }, { "epoch": 0.4, "learning_rate": 0.0002, "loss": 2.9655, "step": 925 }, { "epoch": 0.4, "learning_rate": 0.0002, "loss": 2.963, "step": 930 }, { "epoch": 0.4, "learning_rate": 0.0002, "loss": 2.9678, "step": 935 }, { "epoch": 0.4, "learning_rate": 0.0002, "loss": 2.9783, "step": 940 }, { "epoch": 0.41, "learning_rate": 0.0002, "loss": 2.9668, "step": 945 }, { "epoch": 0.41, "learning_rate": 0.0002, "loss": 2.9748, "step": 950 }, { "epoch": 0.41, "learning_rate": 0.0002, "loss": 2.9671, "step": 955 }, { "epoch": 0.41, "learning_rate": 0.0002, "loss": 2.943, "step": 960 }, { "epoch": 0.41, "eval_loss": 2.9382472038269043, "eval_runtime": 5.2569, "eval_samples_per_second": 1878.847, "eval_steps_per_second": 14.838, "eval_top3_3_weighted_f1_score ": 0.4070953829436608, "eval_top_1_macro_f1_score": 0.0566477657462707, "eval_top_1_weighted_f1score": 0.19010243182786668, "eval_top_3_macro_f1_score": 0.14185244247312048, "step": 960 }, { "epoch": 0.41, "learning_rate": 0.0002, "loss": 2.97, "step": 965 }, { "epoch": 0.42, "learning_rate": 0.0002, "loss": 2.9586, "step": 970 }, { "epoch": 0.42, "learning_rate": 0.0002, "loss": 2.9601, "step": 975 }, { "epoch": 0.42, "learning_rate": 0.0002, "loss": 2.9669, "step": 980 }, { "epoch": 0.42, "learning_rate": 0.0002, "loss": 2.9548, "step": 985 }, { "epoch": 0.42, "learning_rate": 0.0002, "loss": 2.9478, "step": 990 }, { "epoch": 0.43, "learning_rate": 0.0002, "loss": 2.9489, "step": 995 }, { "epoch": 0.43, "learning_rate": 0.0002, "loss": 2.9437, "step": 1000 }, { "epoch": 0.43, "learning_rate": 0.0002, "loss": 2.9588, "step": 1005 }, { "epoch": 0.43, "learning_rate": 0.0002, "loss": 2.9625, "step": 1010 }, { "epoch": 0.44, "learning_rate": 0.0002, "loss": 2.97, "step": 1015 }, { "epoch": 0.44, "learning_rate": 0.0002, "loss": 2.9745, "step": 1020 }, { "epoch": 0.44, "eval_loss": 2.9444897174835205, "eval_runtime": 5.212, "eval_samples_per_second": 1895.057, "eval_steps_per_second": 14.966, "eval_top3_3_weighted_f1_score ": 0.40828849035898257, "eval_top_1_macro_f1_score": 0.056020443190640944, "eval_top_1_weighted_f1score": 0.19234719626068883, "eval_top_3_macro_f1_score": 0.14346199622671688, "step": 1024 }, { "epoch": 0.44, "learning_rate": 0.0002, "loss": 2.9545, "step": 1025 }, { "epoch": 0.44, "learning_rate": 0.0002, "loss": 2.9525, "step": 1030 }, { "epoch": 0.44, "learning_rate": 0.0002, "loss": 2.9498, "step": 1035 }, { "epoch": 0.45, "learning_rate": 0.0002, "loss": 2.9585, "step": 1040 }, { "epoch": 0.45, "learning_rate": 0.0002, "loss": 2.964, "step": 1045 }, { "epoch": 0.45, "learning_rate": 0.0002, "loss": 2.9636, "step": 1050 }, { "epoch": 0.45, "learning_rate": 0.0002, "loss": 2.9333, "step": 1055 }, { "epoch": 0.46, "learning_rate": 0.0002, "loss": 2.9628, "step": 1060 }, { "epoch": 0.46, "learning_rate": 0.0002, "loss": 2.9565, "step": 1065 }, { "epoch": 0.46, "learning_rate": 0.0002, "loss": 2.9596, "step": 1070 }, { "epoch": 0.46, "learning_rate": 0.0002, "loss": 2.957, "step": 1075 }, { "epoch": 0.46, "learning_rate": 0.0002, "loss": 2.9798, "step": 1080 }, { "epoch": 0.47, "learning_rate": 0.0002, "loss": 2.9657, "step": 1085 }, { "epoch": 0.47, "eval_loss": 2.9426662921905518, "eval_runtime": 5.6668, "eval_samples_per_second": 1742.966, "eval_steps_per_second": 13.764, "eval_top3_3_weighted_f1_score ": 0.40339180646168105, "eval_top_1_macro_f1_score": 0.05765999455627635, "eval_top_1_weighted_f1score": 0.1923372887115798, "eval_top_3_macro_f1_score": 0.13777971543818865, "step": 1088 }, { "epoch": 0.47, "learning_rate": 0.0002, "loss": 2.9568, "step": 1090 }, { "epoch": 0.47, "learning_rate": 0.0002, "loss": 2.9686, "step": 1095 }, { "epoch": 0.47, "learning_rate": 0.0002, "loss": 2.9574, "step": 1100 }, { "epoch": 0.47, "learning_rate": 0.0002, "loss": 2.9382, "step": 1105 }, { "epoch": 0.48, "learning_rate": 0.0002, "loss": 2.9588, "step": 1110 }, { "epoch": 0.48, "learning_rate": 0.0002, "loss": 2.9431, "step": 1115 }, { "epoch": 0.48, "learning_rate": 0.0002, "loss": 2.9496, "step": 1120 }, { "epoch": 0.48, "learning_rate": 0.0002, "loss": 2.9554, "step": 1125 }, { "epoch": 0.49, "learning_rate": 0.0002, "loss": 2.9468, "step": 1130 }, { "epoch": 0.49, "learning_rate": 0.0002, "loss": 2.9416, "step": 1135 }, { "epoch": 0.49, "learning_rate": 0.0002, "loss": 2.95, "step": 1140 }, { "epoch": 0.49, "learning_rate": 0.0002, "loss": 2.9422, "step": 1145 }, { "epoch": 0.49, "learning_rate": 0.0002, "loss": 2.9473, "step": 1150 }, { "epoch": 0.49, "eval_loss": 2.9419898986816406, "eval_runtime": 5.2315, "eval_samples_per_second": 1888.003, "eval_steps_per_second": 14.91, "eval_top3_3_weighted_f1_score ": 0.40865085111737687, "eval_top_1_macro_f1_score": 0.05391141724799508, "eval_top_1_weighted_f1score": 0.1920279224053727, "eval_top_3_macro_f1_score": 0.14288060798438232, "step": 1152 }, { "epoch": 0.5, "learning_rate": 0.0002, "loss": 2.9533, "step": 1155 }, { "epoch": 0.5, "learning_rate": 0.0002, "loss": 2.955, "step": 1160 }, { "epoch": 0.5, "learning_rate": 0.0002, "loss": 2.9692, "step": 1165 }, { "epoch": 0.5, "learning_rate": 0.0002, "loss": 2.9594, "step": 1170 }, { "epoch": 0.5, "learning_rate": 0.0002, "loss": 2.9603, "step": 1175 }, { "epoch": 0.51, "learning_rate": 0.0002, "loss": 2.9575, "step": 1180 }, { "epoch": 0.51, "learning_rate": 0.0002, "loss": 2.9564, "step": 1185 }, { "epoch": 0.51, "learning_rate": 0.0002, "loss": 2.9645, "step": 1190 }, { "epoch": 0.51, "learning_rate": 0.0002, "loss": 2.9553, "step": 1195 }, { "epoch": 0.52, "learning_rate": 0.0002, "loss": 2.9523, "step": 1200 }, { "epoch": 0.52, "learning_rate": 0.0002, "loss": 2.9438, "step": 1205 }, { "epoch": 0.52, "learning_rate": 0.0002, "loss": 2.9815, "step": 1210 }, { "epoch": 0.52, "learning_rate": 0.0002, "loss": 2.961, "step": 1215 }, { "epoch": 0.52, "eval_loss": 2.9412014484405518, "eval_runtime": 5.2238, "eval_samples_per_second": 1890.778, "eval_steps_per_second": 14.932, "eval_top3_3_weighted_f1_score ": 0.40786636359138395, "eval_top_1_macro_f1_score": 0.0474824971739334, "eval_top_1_weighted_f1score": 0.18867625959655746, "eval_top_3_macro_f1_score": 0.14364241717026507, "step": 1216 }, { "epoch": 0.52, "learning_rate": 0.0002, "loss": 2.9478, "step": 1220 }, { "epoch": 0.53, "learning_rate": 0.0002, "loss": 2.9594, "step": 1225 }, { "epoch": 0.53, "learning_rate": 0.0002, "loss": 2.9485, "step": 1230 }, { "epoch": 0.53, "learning_rate": 0.0002, "loss": 2.9543, "step": 1235 }, { "epoch": 0.53, "learning_rate": 0.0002, "loss": 2.9607, "step": 1240 }, { "epoch": 0.53, "learning_rate": 0.0002, "loss": 2.9439, "step": 1245 }, { "epoch": 0.54, "learning_rate": 0.0002, "loss": 2.9231, "step": 1250 }, { "epoch": 0.54, "learning_rate": 0.0002, "loss": 2.9409, "step": 1255 }, { "epoch": 0.54, "learning_rate": 0.0002, "loss": 2.9574, "step": 1260 }, { "epoch": 0.54, "learning_rate": 0.0002, "loss": 2.937, "step": 1265 }, { "epoch": 0.55, "learning_rate": 0.0002, "loss": 2.9455, "step": 1270 }, { "epoch": 0.55, "learning_rate": 0.0002, "loss": 2.94, "step": 1275 }, { "epoch": 0.55, "learning_rate": 0.0002, "loss": 2.9752, "step": 1280 }, { "epoch": 0.55, "eval_loss": 2.93802809715271, "eval_runtime": 5.3062, "eval_samples_per_second": 1861.394, "eval_steps_per_second": 14.7, "eval_top3_3_weighted_f1_score ": 0.41160079192129195, "eval_top_1_macro_f1_score": 0.05795090891818421, "eval_top_1_weighted_f1score": 0.19012240817937326, "eval_top_3_macro_f1_score": 0.1487031524147606, "step": 1280 }, { "epoch": 0.55, "learning_rate": 0.0002, "loss": 2.9508, "step": 1285 }, { "epoch": 0.55, "learning_rate": 0.0002, "loss": 2.9331, "step": 1290 }, { "epoch": 0.56, "learning_rate": 0.0002, "loss": 2.948, "step": 1295 }, { "epoch": 0.56, "learning_rate": 0.0002, "loss": 2.9513, "step": 1300 }, { "epoch": 0.56, "learning_rate": 0.0002, "loss": 2.9614, "step": 1305 }, { "epoch": 0.56, "learning_rate": 0.0002, "loss": 2.9509, "step": 1310 }, { "epoch": 0.56, "learning_rate": 0.0002, "loss": 2.9331, "step": 1315 }, { "epoch": 0.57, "learning_rate": 0.0002, "loss": 2.9668, "step": 1320 }, { "epoch": 0.57, "learning_rate": 0.0002, "loss": 2.9467, "step": 1325 }, { "epoch": 0.57, "learning_rate": 0.0002, "loss": 2.9578, "step": 1330 }, { "epoch": 0.57, "learning_rate": 0.0002, "loss": 2.9483, "step": 1335 }, { "epoch": 0.58, "learning_rate": 0.0002, "loss": 2.9415, "step": 1340 }, { "epoch": 0.58, "eval_loss": 2.941464900970459, "eval_runtime": 5.2866, "eval_samples_per_second": 1868.309, "eval_steps_per_second": 14.754, "eval_top3_3_weighted_f1_score ": 0.4069475337150649, "eval_top_1_macro_f1_score": 0.052197365761238596, "eval_top_1_weighted_f1score": 0.18994939652129228, "eval_top_3_macro_f1_score": 0.14622829923307734, "step": 1344 }, { "epoch": 0.58, "learning_rate": 0.0002, "loss": 2.965, "step": 1345 }, { "epoch": 0.58, "learning_rate": 0.0002, "loss": 2.9499, "step": 1350 }, { "epoch": 0.58, "learning_rate": 0.0002, "loss": 2.9565, "step": 1355 }, { "epoch": 0.58, "learning_rate": 0.0002, "loss": 2.9463, "step": 1360 }, { "epoch": 0.59, "learning_rate": 0.0002, "loss": 2.9597, "step": 1365 }, { "epoch": 0.59, "learning_rate": 0.0002, "loss": 2.9346, "step": 1370 }, { "epoch": 0.59, "learning_rate": 0.0002, "loss": 2.9643, "step": 1375 }, { "epoch": 0.59, "learning_rate": 0.0002, "loss": 2.928, "step": 1380 }, { "epoch": 0.59, "learning_rate": 0.0002, "loss": 2.9518, "step": 1385 }, { "epoch": 0.6, "learning_rate": 0.0002, "loss": 2.9471, "step": 1390 }, { "epoch": 0.6, "learning_rate": 0.0002, "loss": 2.9582, "step": 1395 }, { "epoch": 0.6, "learning_rate": 0.0002, "loss": 2.9396, "step": 1400 }, { "epoch": 0.6, "learning_rate": 0.0002, "loss": 2.9633, "step": 1405 }, { "epoch": 0.6, "eval_loss": 2.9351675510406494, "eval_runtime": 5.2046, "eval_samples_per_second": 1897.729, "eval_steps_per_second": 14.987, "eval_top3_3_weighted_f1_score ": 0.40903510064773524, "eval_top_1_macro_f1_score": 0.058491334775530215, "eval_top_1_weighted_f1score": 0.1931870648145974, "eval_top_3_macro_f1_score": 0.14995626358660458, "step": 1408 }, { "epoch": 0.61, "learning_rate": 0.0002, "loss": 2.9465, "step": 1410 }, { "epoch": 0.61, "learning_rate": 0.0002, "loss": 2.9488, "step": 1415 }, { "epoch": 0.61, "learning_rate": 0.0002, "loss": 2.9451, "step": 1420 }, { "epoch": 0.61, "learning_rate": 0.0002, "loss": 2.9526, "step": 1425 }, { "epoch": 0.61, "learning_rate": 0.0002, "loss": 2.9276, "step": 1430 }, { "epoch": 0.62, "learning_rate": 0.0002, "loss": 2.9354, "step": 1435 }, { "epoch": 0.62, "learning_rate": 0.0002, "loss": 2.9543, "step": 1440 }, { "epoch": 0.62, "learning_rate": 0.0002, "loss": 2.9433, "step": 1445 }, { "epoch": 0.62, "learning_rate": 0.0002, "loss": 2.9343, "step": 1450 }, { "epoch": 0.62, "learning_rate": 0.0002, "loss": 2.9643, "step": 1455 }, { "epoch": 0.63, "learning_rate": 0.0002, "loss": 2.9505, "step": 1460 }, { "epoch": 0.63, "learning_rate": 0.0002, "loss": 2.9454, "step": 1465 }, { "epoch": 0.63, "learning_rate": 0.0002, "loss": 2.9607, "step": 1470 }, { "epoch": 0.63, "eval_loss": 2.929859161376953, "eval_runtime": 5.188, "eval_samples_per_second": 1903.819, "eval_steps_per_second": 15.035, "eval_top3_3_weighted_f1_score ": 0.4112572271075635, "eval_top_1_macro_f1_score": 0.05492674184491993, "eval_top_1_weighted_f1score": 0.1874419660454753, "eval_top_3_macro_f1_score": 0.15057532260912443, "step": 1472 }, { "epoch": 0.63, "learning_rate": 0.0002, "loss": 2.9707, "step": 1475 }, { "epoch": 0.64, "learning_rate": 0.0002, "loss": 2.9234, "step": 1480 }, { "epoch": 0.64, "learning_rate": 0.0002, "loss": 2.9706, "step": 1485 }, { "epoch": 0.64, "learning_rate": 0.0002, "loss": 2.924, "step": 1490 }, { "epoch": 0.64, "learning_rate": 0.0002, "loss": 2.9582, "step": 1495 }, { "epoch": 0.64, "learning_rate": 0.0002, "loss": 2.9376, "step": 1500 }, { "epoch": 0.65, "learning_rate": 0.0002, "loss": 2.9393, "step": 1505 }, { "epoch": 0.65, "learning_rate": 0.0002, "loss": 2.9135, "step": 1510 }, { "epoch": 0.65, "learning_rate": 0.0002, "loss": 2.9541, "step": 1515 }, { "epoch": 0.65, "learning_rate": 0.0002, "loss": 2.9508, "step": 1520 }, { "epoch": 0.65, "learning_rate": 0.0002, "loss": 2.9382, "step": 1525 }, { "epoch": 0.66, "learning_rate": 0.0002, "loss": 2.9257, "step": 1530 }, { "epoch": 0.66, "learning_rate": 0.0002, "loss": 2.9421, "step": 1535 }, { "epoch": 0.66, "eval_loss": 2.935636043548584, "eval_runtime": 5.498, "eval_samples_per_second": 1796.478, "eval_steps_per_second": 14.187, "eval_top3_3_weighted_f1_score ": 0.4054698890830444, "eval_top_1_macro_f1_score": 0.0557953259862022, "eval_top_1_weighted_f1score": 0.19142192923965634, "eval_top_3_macro_f1_score": 0.14438591551016072, "step": 1536 }, { "epoch": 0.66, "learning_rate": 0.0002, "loss": 2.928, "step": 1540 }, { "epoch": 0.66, "learning_rate": 0.0002, "loss": 2.963, "step": 1545 }, { "epoch": 0.67, "learning_rate": 0.0002, "loss": 2.9523, "step": 1550 }, { "epoch": 0.67, "learning_rate": 0.0002, "loss": 2.9275, "step": 1555 }, { "epoch": 0.67, "learning_rate": 0.0002, "loss": 2.9429, "step": 1560 }, { "epoch": 0.67, "learning_rate": 0.0002, "loss": 2.9386, "step": 1565 }, { "epoch": 0.67, "learning_rate": 0.0002, "loss": 2.9524, "step": 1570 }, { "epoch": 0.68, "learning_rate": 0.0002, "loss": 2.9344, "step": 1575 }, { "epoch": 0.68, "learning_rate": 0.0002, "loss": 2.9353, "step": 1580 }, { "epoch": 0.68, "learning_rate": 0.0002, "loss": 2.9316, "step": 1585 }, { "epoch": 0.68, "learning_rate": 0.0002, "loss": 2.96, "step": 1590 }, { "epoch": 0.68, "learning_rate": 0.0002, "loss": 2.9282, "step": 1595 }, { "epoch": 0.69, "learning_rate": 0.0002, "loss": 2.9194, "step": 1600 }, { "epoch": 0.69, "eval_loss": 2.9320883750915527, "eval_runtime": 5.2444, "eval_samples_per_second": 1883.348, "eval_steps_per_second": 14.873, "eval_top3_3_weighted_f1_score ": 0.40821638906631724, "eval_top_1_macro_f1_score": 0.05321140707012678, "eval_top_1_weighted_f1score": 0.190021300619809, "eval_top_3_macro_f1_score": 0.14259731701282546, "step": 1600 }, { "epoch": 0.69, "learning_rate": 0.0002, "loss": 2.9443, "step": 1605 }, { "epoch": 0.69, "learning_rate": 0.0002, "loss": 2.9445, "step": 1610 }, { "epoch": 0.69, "learning_rate": 0.0002, "loss": 2.9489, "step": 1615 }, { "epoch": 0.7, "learning_rate": 0.0002, "loss": 2.945, "step": 1620 }, { "epoch": 0.7, "learning_rate": 0.0002, "loss": 2.9427, "step": 1625 }, { "epoch": 0.7, "learning_rate": 0.0002, "loss": 2.9204, "step": 1630 }, { "epoch": 0.7, "learning_rate": 0.0002, "loss": 2.9226, "step": 1635 }, { "epoch": 0.7, "learning_rate": 0.0002, "loss": 2.9547, "step": 1640 }, { "epoch": 0.71, "learning_rate": 0.0002, "loss": 2.9377, "step": 1645 }, { "epoch": 0.71, "learning_rate": 0.0002, "loss": 2.9445, "step": 1650 }, { "epoch": 0.71, "learning_rate": 0.0002, "loss": 2.9534, "step": 1655 }, { "epoch": 0.71, "learning_rate": 0.0002, "loss": 2.9466, "step": 1660 }, { "epoch": 0.71, "eval_loss": 2.9336674213409424, "eval_runtime": 5.2704, "eval_samples_per_second": 1874.037, "eval_steps_per_second": 14.8, "eval_top3_3_weighted_f1_score ": 0.41047552952278205, "eval_top_1_macro_f1_score": 0.0536502267865967, "eval_top_1_weighted_f1score": 0.19126675755018152, "eval_top_3_macro_f1_score": 0.1456769847671719, "step": 1664 }, { "epoch": 0.71, "learning_rate": 0.0002, "loss": 2.9518, "step": 1665 }, { "epoch": 0.72, "learning_rate": 0.0002, "loss": 2.9646, "step": 1670 }, { "epoch": 0.72, "learning_rate": 0.0002, "loss": 2.9389, "step": 1675 }, { "epoch": 0.72, "learning_rate": 0.0002, "loss": 2.9527, "step": 1680 }, { "epoch": 0.72, "learning_rate": 0.0002, "loss": 2.9399, "step": 1685 }, { "epoch": 0.73, "learning_rate": 0.0002, "loss": 2.9312, "step": 1690 }, { "epoch": 0.73, "learning_rate": 0.0002, "loss": 2.9517, "step": 1695 }, { "epoch": 0.73, "learning_rate": 0.0002, "loss": 2.9561, "step": 1700 }, { "epoch": 0.73, "learning_rate": 0.0002, "loss": 2.9535, "step": 1705 }, { "epoch": 0.73, "learning_rate": 0.0002, "loss": 2.939, "step": 1710 }, { "epoch": 0.74, "learning_rate": 0.0002, "loss": 2.9429, "step": 1715 }, { "epoch": 0.74, "learning_rate": 0.0002, "loss": 2.938, "step": 1720 }, { "epoch": 0.74, "learning_rate": 0.0002, "loss": 2.9497, "step": 1725 }, { "epoch": 0.74, "eval_loss": 2.9341320991516113, "eval_runtime": 5.2886, "eval_samples_per_second": 1867.59, "eval_steps_per_second": 14.749, "eval_top3_3_weighted_f1_score ": 0.4057059914344135, "eval_top_1_macro_f1_score": 0.05370325541314189, "eval_top_1_weighted_f1score": 0.19225534007309714, "eval_top_3_macro_f1_score": 0.13987396780712794, "step": 1728 }, { "epoch": 0.74, "learning_rate": 0.0002, "loss": 2.9489, "step": 1730 }, { "epoch": 0.74, "learning_rate": 0.0002, "loss": 2.955, "step": 1735 }, { "epoch": 0.75, "learning_rate": 0.0002, "loss": 2.9453, "step": 1740 }, { "epoch": 0.75, "learning_rate": 0.0002, "loss": 2.9324, "step": 1745 }, { "epoch": 0.75, "learning_rate": 0.0002, "loss": 2.942, "step": 1750 }, { "epoch": 0.75, "learning_rate": 0.0002, "loss": 2.9264, "step": 1755 }, { "epoch": 0.76, "learning_rate": 0.0002, "loss": 2.9291, "step": 1760 }, { "epoch": 0.76, "learning_rate": 0.0002, "loss": 2.9455, "step": 1765 }, { "epoch": 0.76, "learning_rate": 0.0002, "loss": 2.9395, "step": 1770 }, { "epoch": 0.76, "learning_rate": 0.0002, "loss": 2.9584, "step": 1775 }, { "epoch": 0.76, "learning_rate": 0.0002, "loss": 2.9371, "step": 1780 }, { "epoch": 0.77, "learning_rate": 0.0002, "loss": 2.947, "step": 1785 }, { "epoch": 0.77, "learning_rate": 0.0002, "loss": 2.9429, "step": 1790 }, { "epoch": 0.77, "eval_loss": 2.9273977279663086, "eval_runtime": 5.296, "eval_samples_per_second": 1864.99, "eval_steps_per_second": 14.728, "eval_top3_3_weighted_f1_score ": 0.4131031890088074, "eval_top_1_macro_f1_score": 0.05607100541398047, "eval_top_1_weighted_f1score": 0.18862150823940385, "eval_top_3_macro_f1_score": 0.14317950642794616, "step": 1792 }, { "epoch": 0.77, "learning_rate": 0.0002, "loss": 2.9382, "step": 1795 }, { "epoch": 0.77, "learning_rate": 0.0002, "loss": 2.9594, "step": 1800 }, { "epoch": 0.77, "learning_rate": 0.0002, "loss": 2.9377, "step": 1805 }, { "epoch": 0.78, "learning_rate": 0.0002, "loss": 2.944, "step": 1810 }, { "epoch": 0.78, "learning_rate": 0.0002, "loss": 2.9318, "step": 1815 }, { "epoch": 0.78, "learning_rate": 0.0002, "loss": 2.9465, "step": 1820 }, { "epoch": 0.78, "learning_rate": 0.0002, "loss": 2.9327, "step": 1825 }, { "epoch": 0.79, "learning_rate": 0.0002, "loss": 2.9411, "step": 1830 }, { "epoch": 0.79, "learning_rate": 0.0002, "loss": 2.9453, "step": 1835 }, { "epoch": 0.79, "learning_rate": 0.0002, "loss": 2.9551, "step": 1840 }, { "epoch": 0.79, "learning_rate": 0.0002, "loss": 2.9551, "step": 1845 }, { "epoch": 0.79, "learning_rate": 0.0002, "loss": 2.9496, "step": 1850 }, { "epoch": 0.8, "learning_rate": 0.0002, "loss": 2.9507, "step": 1855 }, { "epoch": 0.8, "eval_loss": 2.9243032932281494, "eval_runtime": 5.2513, "eval_samples_per_second": 1880.88, "eval_steps_per_second": 14.854, "eval_top3_3_weighted_f1_score ": 0.4053991556785117, "eval_top_1_macro_f1_score": 0.05826966485471442, "eval_top_1_weighted_f1score": 0.19437289584739018, "eval_top_3_macro_f1_score": 0.13632058202701589, "step": 1856 }, { "epoch": 0.8, "learning_rate": 0.0002, "loss": 2.9426, "step": 1860 }, { "epoch": 0.8, "learning_rate": 0.0002, "loss": 2.9279, "step": 1865 }, { "epoch": 0.8, "learning_rate": 0.0002, "loss": 2.9562, "step": 1870 }, { "epoch": 0.8, "learning_rate": 0.0002, "loss": 2.931, "step": 1875 }, { "epoch": 0.81, "learning_rate": 0.0002, "loss": 2.9338, "step": 1880 }, { "epoch": 0.81, "learning_rate": 0.0002, "loss": 2.9204, "step": 1885 }, { "epoch": 0.81, "learning_rate": 0.0002, "loss": 2.964, "step": 1890 }, { "epoch": 0.81, "learning_rate": 0.0002, "loss": 2.9598, "step": 1895 }, { "epoch": 0.82, "learning_rate": 0.0002, "loss": 2.941, "step": 1900 }, { "epoch": 0.82, "learning_rate": 0.0002, "loss": 2.9377, "step": 1905 }, { "epoch": 0.82, "learning_rate": 0.0002, "loss": 2.942, "step": 1910 }, { "epoch": 0.82, "learning_rate": 0.0002, "loss": 2.9381, "step": 1915 }, { "epoch": 0.82, "learning_rate": 0.0002, "loss": 2.9409, "step": 1920 }, { "epoch": 0.82, "eval_loss": 2.9285144805908203, "eval_runtime": 5.3302, "eval_samples_per_second": 1853.038, "eval_steps_per_second": 14.634, "eval_top3_3_weighted_f1_score ": 0.4124377696571018, "eval_top_1_macro_f1_score": 0.06190878574489557, "eval_top_1_weighted_f1score": 0.19263435856385583, "eval_top_3_macro_f1_score": 0.14077472295761495, "step": 1920 }, { "epoch": 0.83, "learning_rate": 0.0002, "loss": 2.9466, "step": 1925 }, { "epoch": 0.83, "learning_rate": 0.0002, "loss": 2.9365, "step": 1930 }, { "epoch": 0.83, "learning_rate": 0.0002, "loss": 2.915, "step": 1935 }, { "epoch": 0.83, "learning_rate": 0.0002, "loss": 2.9284, "step": 1940 }, { "epoch": 0.83, "learning_rate": 0.0002, "loss": 2.9357, "step": 1945 }, { "epoch": 0.84, "learning_rate": 0.0002, "loss": 2.9482, "step": 1950 }, { "epoch": 0.84, "learning_rate": 0.0002, "loss": 2.9476, "step": 1955 }, { "epoch": 0.84, "learning_rate": 0.0002, "loss": 2.9314, "step": 1960 }, { "epoch": 0.84, "learning_rate": 0.0002, "loss": 2.9429, "step": 1965 }, { "epoch": 0.85, "learning_rate": 0.0002, "loss": 2.9536, "step": 1970 }, { "epoch": 0.85, "learning_rate": 0.0002, "loss": 2.9512, "step": 1975 }, { "epoch": 0.85, "learning_rate": 0.0002, "loss": 2.9371, "step": 1980 }, { "epoch": 0.85, "eval_loss": 2.928523540496826, "eval_runtime": 5.2792, "eval_samples_per_second": 1870.915, "eval_steps_per_second": 14.775, "eval_top3_3_weighted_f1_score ": 0.41511573211461816, "eval_top_1_macro_f1_score": 0.06132345745002284, "eval_top_1_weighted_f1score": 0.19199397596078466, "eval_top_3_macro_f1_score": 0.15076717368083384, "step": 1984 }, { "epoch": 0.85, "learning_rate": 0.0002, "loss": 2.9442, "step": 1985 }, { "epoch": 0.85, "learning_rate": 0.0002, "loss": 2.931, "step": 1990 }, { "epoch": 0.86, "learning_rate": 0.0002, "loss": 2.9765, "step": 1995 }, { "epoch": 0.86, "learning_rate": 0.0002, "loss": 2.9434, "step": 2000 }, { "epoch": 0.86, "learning_rate": 0.0002, "loss": 2.9254, "step": 2005 }, { "epoch": 0.86, "learning_rate": 0.0002, "loss": 2.9435, "step": 2010 }, { "epoch": 0.87, "learning_rate": 0.0002, "loss": 2.9263, "step": 2015 }, { "epoch": 0.87, "learning_rate": 0.0002, "loss": 2.9388, "step": 2020 }, { "epoch": 0.87, "learning_rate": 0.0002, "loss": 2.9447, "step": 2025 }, { "epoch": 0.87, "learning_rate": 0.0002, "loss": 2.9527, "step": 2030 }, { "epoch": 0.87, "learning_rate": 0.0002, "loss": 2.9114, "step": 2035 }, { "epoch": 0.88, "learning_rate": 0.0002, "loss": 2.9575, "step": 2040 }, { "epoch": 0.88, "learning_rate": 0.0002, "loss": 2.9203, "step": 2045 }, { "epoch": 0.88, "eval_loss": 2.921570062637329, "eval_runtime": 5.2505, "eval_samples_per_second": 1881.171, "eval_steps_per_second": 14.856, "eval_top3_3_weighted_f1_score ": 0.4120243601453162, "eval_top_1_macro_f1_score": 0.05856737068306764, "eval_top_1_weighted_f1score": 0.19277721536806952, "eval_top_3_macro_f1_score": 0.14630634863938272, "step": 2048 }, { "epoch": 0.88, "learning_rate": 0.0002, "loss": 2.9216, "step": 2050 }, { "epoch": 0.88, "learning_rate": 0.0002, "loss": 2.9413, "step": 2055 }, { "epoch": 0.88, "learning_rate": 0.0002, "loss": 2.9394, "step": 2060 }, { "epoch": 0.89, "learning_rate": 0.0002, "loss": 2.94, "step": 2065 }, { "epoch": 0.89, "learning_rate": 0.0002, "loss": 2.9602, "step": 2070 }, { "epoch": 0.89, "learning_rate": 0.0002, "loss": 2.9339, "step": 2075 }, { "epoch": 0.89, "learning_rate": 0.0002, "loss": 2.9453, "step": 2080 }, { "epoch": 0.9, "learning_rate": 0.0002, "loss": 2.9346, "step": 2085 }, { "epoch": 0.9, "learning_rate": 0.0002, "loss": 2.9427, "step": 2090 }, { "epoch": 0.9, "learning_rate": 0.0002, "loss": 2.932, "step": 2095 }, { "epoch": 0.9, "learning_rate": 0.0002, "loss": 2.9356, "step": 2100 }, { "epoch": 0.9, "learning_rate": 0.0002, "loss": 2.9558, "step": 2105 }, { "epoch": 0.91, "learning_rate": 0.0002, "loss": 2.949, "step": 2110 }, { "epoch": 0.91, "eval_loss": 2.9211103916168213, "eval_runtime": 5.8002, "eval_samples_per_second": 1702.864, "eval_steps_per_second": 13.448, "eval_top3_3_weighted_f1_score ": 0.411140072973993, "eval_top_1_macro_f1_score": 0.05827460613661236, "eval_top_1_weighted_f1score": 0.1900658924831603, "eval_top_3_macro_f1_score": 0.14356756002606255, "step": 2112 }, { "epoch": 0.91, "learning_rate": 0.0002, "loss": 2.9237, "step": 2115 }, { "epoch": 0.91, "learning_rate": 0.0002, "loss": 2.9368, "step": 2120 }, { "epoch": 0.91, "learning_rate": 0.0002, "loss": 2.924, "step": 2125 }, { "epoch": 0.91, "learning_rate": 0.0002, "loss": 2.9344, "step": 2130 }, { "epoch": 0.92, "learning_rate": 0.0002, "loss": 2.9425, "step": 2135 }, { "epoch": 0.92, "learning_rate": 0.0002, "loss": 2.9303, "step": 2140 }, { "epoch": 0.92, "learning_rate": 0.0002, "loss": 2.9222, "step": 2145 }, { "epoch": 0.92, "learning_rate": 0.0002, "loss": 2.9271, "step": 2150 }, { "epoch": 0.93, "learning_rate": 0.0002, "loss": 2.9429, "step": 2155 }, { "epoch": 0.93, "learning_rate": 0.0002, "loss": 2.9412, "step": 2160 }, { "epoch": 0.93, "learning_rate": 0.0002, "loss": 2.9398, "step": 2165 }, { "epoch": 0.93, "learning_rate": 0.0002, "loss": 2.9253, "step": 2170 }, { "epoch": 0.93, "learning_rate": 0.0002, "loss": 2.9378, "step": 2175 }, { "epoch": 0.93, "eval_loss": 2.9222617149353027, "eval_runtime": 5.2532, "eval_samples_per_second": 1880.202, "eval_steps_per_second": 14.848, "eval_top3_3_weighted_f1_score ": 0.4077832403009119, "eval_top_1_macro_f1_score": 0.06079385373525116, "eval_top_1_weighted_f1score": 0.19627489088203884, "eval_top_3_macro_f1_score": 0.14590453185761695, "step": 2176 }, { "epoch": 0.94, "learning_rate": 0.0002, "loss": 2.9355, "step": 2180 }, { "epoch": 0.94, "learning_rate": 0.0002, "loss": 2.9415, "step": 2185 }, { "epoch": 0.94, "learning_rate": 0.0002, "loss": 2.9342, "step": 2190 }, { "epoch": 0.94, "learning_rate": 0.0002, "loss": 2.9168, "step": 2195 }, { "epoch": 0.94, "learning_rate": 0.0002, "loss": 2.9586, "step": 2200 }, { "epoch": 0.95, "learning_rate": 0.0002, "loss": 2.9345, "step": 2205 }, { "epoch": 0.95, "learning_rate": 0.0002, "loss": 2.9422, "step": 2210 }, { "epoch": 0.95, "learning_rate": 0.0002, "loss": 2.9417, "step": 2215 }, { "epoch": 0.95, "learning_rate": 0.0002, "loss": 2.9322, "step": 2220 }, { "epoch": 0.96, "learning_rate": 0.0002, "loss": 2.94, "step": 2225 }, { "epoch": 0.96, "learning_rate": 0.0002, "loss": 2.9378, "step": 2230 }, { "epoch": 0.96, "learning_rate": 0.0002, "loss": 2.9127, "step": 2235 }, { "epoch": 0.96, "learning_rate": 0.0002, "loss": 2.9423, "step": 2240 }, { "epoch": 0.96, "eval_loss": 2.9238317012786865, "eval_runtime": 5.2712, "eval_samples_per_second": 1873.771, "eval_steps_per_second": 14.797, "eval_top3_3_weighted_f1_score ": 0.41065264958740455, "eval_top_1_macro_f1_score": 0.06100532477583297, "eval_top_1_weighted_f1score": 0.19239137323566624, "eval_top_3_macro_f1_score": 0.14523224439471996, "step": 2240 }, { "epoch": 0.96, "learning_rate": 0.0002, "loss": 2.9176, "step": 2245 }, { "epoch": 0.97, "learning_rate": 0.0002, "loss": 2.9513, "step": 2250 }, { "epoch": 0.97, "learning_rate": 0.0002, "loss": 2.9281, "step": 2255 }, { "epoch": 0.97, "learning_rate": 0.0002, "loss": 2.9392, "step": 2260 }, { "epoch": 0.97, "learning_rate": 0.0002, "loss": 2.9205, "step": 2265 }, { "epoch": 0.97, "learning_rate": 0.0002, "loss": 2.9252, "step": 2270 }, { "epoch": 0.98, "learning_rate": 0.0002, "loss": 2.9274, "step": 2275 }, { "epoch": 0.98, "learning_rate": 0.0002, "loss": 2.925, "step": 2280 }, { "epoch": 0.98, "learning_rate": 0.0002, "loss": 2.9356, "step": 2285 }, { "epoch": 0.98, "learning_rate": 0.0002, "loss": 2.932, "step": 2290 }, { "epoch": 0.99, "learning_rate": 0.0002, "loss": 2.9383, "step": 2295 }, { "epoch": 0.99, "learning_rate": 0.0002, "loss": 2.9329, "step": 2300 }, { "epoch": 0.99, "eval_loss": 2.9221272468566895, "eval_runtime": 5.3659, "eval_samples_per_second": 1840.711, "eval_steps_per_second": 14.536, "eval_top3_3_weighted_f1_score ": 0.4150667448633111, "eval_top_1_macro_f1_score": 0.05624215902445944, "eval_top_1_weighted_f1score": 0.1924437688190861, "eval_top_3_macro_f1_score": 0.14481328991643022, "step": 2304 }, { "epoch": 0.99, "learning_rate": 0.0002, "loss": 2.943, "step": 2305 }, { "epoch": 0.99, "learning_rate": 0.0002, "loss": 2.9167, "step": 2310 }, { "epoch": 0.99, "learning_rate": 0.0002, "loss": 2.9415, "step": 2315 }, { "epoch": 1.0, "learning_rate": 0.0002, "loss": 2.9308, "step": 2320 }, { "epoch": 1.0, "learning_rate": 0.0002, "loss": 2.9441, "step": 2325 }, { "epoch": 1.0, "learning_rate": 0.0002, "loss": 3.1956, "step": 2330 }, { "epoch": 1.0, "learning_rate": 0.0002, "loss": 2.9391, "step": 2335 }, { "epoch": 1.0, "learning_rate": 0.0002, "loss": 2.9307, "step": 2340 }, { "epoch": 1.01, "learning_rate": 0.0002, "loss": 2.9192, "step": 2345 }, { "epoch": 1.01, "learning_rate": 0.0002, "loss": 2.9519, "step": 2350 }, { "epoch": 1.01, "learning_rate": 0.0002, "loss": 2.934, "step": 2355 }, { "epoch": 1.01, "learning_rate": 0.0002, "loss": 2.9205, "step": 2360 }, { "epoch": 1.02, "learning_rate": 0.0002, "loss": 2.9126, "step": 2365 }, { "epoch": 1.02, "eval_loss": 2.9245643615722656, "eval_runtime": 5.3574, "eval_samples_per_second": 1843.634, "eval_steps_per_second": 14.559, "eval_top3_3_weighted_f1_score ": 0.41190062778999437, "eval_top_1_macro_f1_score": 0.05170441427634679, "eval_top_1_weighted_f1score": 0.1915950465826955, "eval_top_3_macro_f1_score": 0.14541768101651006, "step": 2368 }, { "epoch": 1.02, "learning_rate": 0.0002, "loss": 2.923, "step": 2370 }, { "epoch": 1.02, "learning_rate": 0.0002, "loss": 2.9124, "step": 2375 }, { "epoch": 1.02, "learning_rate": 0.0002, "loss": 2.9111, "step": 2380 }, { "epoch": 1.02, "learning_rate": 0.0002, "loss": 2.9083, "step": 2385 }, { "epoch": 1.03, "learning_rate": 0.0002, "loss": 2.9273, "step": 2390 }, { "epoch": 1.03, "learning_rate": 0.0002, "loss": 2.9231, "step": 2395 }, { "epoch": 1.03, "learning_rate": 0.0002, "loss": 2.938, "step": 2400 }, { "epoch": 1.03, "learning_rate": 0.0002, "loss": 2.9343, "step": 2405 }, { "epoch": 1.03, "learning_rate": 0.0002, "loss": 2.936, "step": 2410 }, { "epoch": 1.04, "learning_rate": 0.0002, "loss": 2.9219, "step": 2415 }, { "epoch": 1.04, "learning_rate": 0.0002, "loss": 2.9297, "step": 2420 }, { "epoch": 1.04, "learning_rate": 0.0002, "loss": 2.942, "step": 2425 }, { "epoch": 1.04, "learning_rate": 0.0002, "loss": 2.9285, "step": 2430 }, { "epoch": 1.04, "eval_loss": 2.9210715293884277, "eval_runtime": 5.2926, "eval_samples_per_second": 1866.205, "eval_steps_per_second": 14.738, "eval_top3_3_weighted_f1_score ": 0.4104425810756134, "eval_top_1_macro_f1_score": 0.058897211609453624, "eval_top_1_weighted_f1score": 0.19441356472244228, "eval_top_3_macro_f1_score": 0.14331504611626866, "step": 2432 }, { "epoch": 1.05, "learning_rate": 0.0002, "loss": 2.9341, "step": 2435 }, { "epoch": 1.05, "learning_rate": 0.0002, "loss": 2.918, "step": 2440 }, { "epoch": 1.05, "learning_rate": 0.0002, "loss": 2.9321, "step": 2445 }, { "epoch": 1.05, "learning_rate": 0.0002, "loss": 2.9285, "step": 2450 }, { "epoch": 1.05, "learning_rate": 0.0002, "loss": 2.9324, "step": 2455 }, { "epoch": 1.06, "learning_rate": 0.0002, "loss": 2.942, "step": 2460 }, { "epoch": 1.06, "learning_rate": 0.0002, "loss": 2.909, "step": 2465 }, { "epoch": 1.06, "learning_rate": 0.0002, "loss": 2.9444, "step": 2470 }, { "epoch": 1.06, "learning_rate": 0.0002, "loss": 2.9411, "step": 2475 }, { "epoch": 1.06, "learning_rate": 0.0002, "loss": 2.9211, "step": 2480 }, { "epoch": 1.07, "learning_rate": 0.0002, "loss": 2.9457, "step": 2485 }, { "epoch": 1.07, "learning_rate": 0.0002, "loss": 2.931, "step": 2490 }, { "epoch": 1.07, "learning_rate": 0.0002, "loss": 2.9121, "step": 2495 }, { "epoch": 1.07, "eval_loss": 2.9219577312469482, "eval_runtime": 5.3428, "eval_samples_per_second": 1848.645, "eval_steps_per_second": 14.599, "eval_top3_3_weighted_f1_score ": 0.4126210764843451, "eval_top_1_macro_f1_score": 0.055206132881277424, "eval_top_1_weighted_f1score": 0.19580194845230464, "eval_top_3_macro_f1_score": 0.14175695902025756, "step": 2496 }, { "epoch": 1.07, "learning_rate": 0.0002, "loss": 2.9203, "step": 2500 }, { "epoch": 1.08, "learning_rate": 0.0002, "loss": 2.9403, "step": 2505 }, { "epoch": 1.08, "learning_rate": 0.0002, "loss": 2.9353, "step": 2510 }, { "epoch": 1.08, "learning_rate": 0.0002, "loss": 2.9345, "step": 2515 }, { "epoch": 1.08, "learning_rate": 0.0002, "loss": 2.9246, "step": 2520 }, { "epoch": 1.08, "learning_rate": 0.0002, "loss": 2.9118, "step": 2525 }, { "epoch": 1.09, "learning_rate": 0.0002, "loss": 2.9406, "step": 2530 }, { "epoch": 1.09, "learning_rate": 0.0002, "loss": 2.9261, "step": 2535 }, { "epoch": 1.09, "learning_rate": 0.0002, "loss": 2.9172, "step": 2540 }, { "epoch": 1.09, "learning_rate": 0.0002, "loss": 2.9372, "step": 2545 }, { "epoch": 1.09, "learning_rate": 0.0002, "loss": 2.9009, "step": 2550 }, { "epoch": 1.1, "learning_rate": 0.0002, "loss": 2.9144, "step": 2555 }, { "epoch": 1.1, "learning_rate": 0.0002, "loss": 2.9015, "step": 2560 }, { "epoch": 1.1, "eval_loss": 2.9242866039276123, "eval_runtime": 5.7071, "eval_samples_per_second": 1730.647, "eval_steps_per_second": 13.667, "eval_top3_3_weighted_f1_score ": 0.4107174514165816, "eval_top_1_macro_f1_score": 0.05746469658311381, "eval_top_1_weighted_f1score": 0.19074469006616215, "eval_top_3_macro_f1_score": 0.1424298406329182, "step": 2560 }, { "epoch": 1.1, "learning_rate": 0.0002, "loss": 2.933, "step": 2565 }, { "epoch": 1.1, "learning_rate": 0.0002, "loss": 2.9143, "step": 2570 }, { "epoch": 1.11, "learning_rate": 0.0002, "loss": 2.9446, "step": 2575 }, { "epoch": 1.11, "learning_rate": 0.0002, "loss": 2.9453, "step": 2580 }, { "epoch": 1.11, "learning_rate": 0.0002, "loss": 2.9048, "step": 2585 }, { "epoch": 1.11, "learning_rate": 0.0002, "loss": 2.9319, "step": 2590 }, { "epoch": 1.11, "learning_rate": 0.0002, "loss": 2.9199, "step": 2595 }, { "epoch": 1.12, "learning_rate": 0.0002, "loss": 2.9281, "step": 2600 }, { "epoch": 1.12, "learning_rate": 0.0002, "loss": 2.9218, "step": 2605 }, { "epoch": 1.12, "learning_rate": 0.0002, "loss": 2.9381, "step": 2610 }, { "epoch": 1.12, "learning_rate": 0.0002, "loss": 2.935, "step": 2615 }, { "epoch": 1.12, "learning_rate": 0.0002, "loss": 2.9473, "step": 2620 }, { "epoch": 1.13, "eval_loss": 2.9131009578704834, "eval_runtime": 5.3565, "eval_samples_per_second": 1843.945, "eval_steps_per_second": 14.562, "eval_top3_3_weighted_f1_score ": 0.41318059902803694, "eval_top_1_macro_f1_score": 0.05385593199473167, "eval_top_1_weighted_f1score": 0.19077652700730305, "eval_top_3_macro_f1_score": 0.1408254102673545, "step": 2624 }, { "epoch": 1.13, "learning_rate": 0.0002, "loss": 2.9366, "step": 2625 }, { "epoch": 1.13, "learning_rate": 0.0002, "loss": 2.9195, "step": 2630 }, { "epoch": 1.13, "learning_rate": 0.0002, "loss": 2.9383, "step": 2635 }, { "epoch": 1.13, "learning_rate": 0.0002, "loss": 2.9316, "step": 2640 }, { "epoch": 1.14, "learning_rate": 0.0002, "loss": 2.9528, "step": 2645 }, { "epoch": 1.14, "learning_rate": 0.0002, "loss": 2.939, "step": 2650 }, { "epoch": 1.14, "learning_rate": 0.0002, "loss": 2.9093, "step": 2655 }, { "epoch": 1.14, "learning_rate": 0.0002, "loss": 2.9258, "step": 2660 }, { "epoch": 1.14, "learning_rate": 0.0002, "loss": 2.9124, "step": 2665 }, { "epoch": 1.15, "learning_rate": 0.0002, "loss": 2.9377, "step": 2670 }, { "epoch": 1.15, "learning_rate": 0.0002, "loss": 2.9199, "step": 2675 }, { "epoch": 1.15, "learning_rate": 0.0002, "loss": 2.9306, "step": 2680 }, { "epoch": 1.15, "learning_rate": 0.0002, "loss": 2.9142, "step": 2685 }, { "epoch": 1.15, "eval_loss": 2.9189188480377197, "eval_runtime": 5.3137, "eval_samples_per_second": 1858.775, "eval_steps_per_second": 14.679, "eval_top3_3_weighted_f1_score ": 0.4110172769165812, "eval_top_1_macro_f1_score": 0.059839117057030516, "eval_top_1_weighted_f1score": 0.19338477249901195, "eval_top_3_macro_f1_score": 0.14382223501334376, "step": 2688 }, { "epoch": 1.15, "learning_rate": 0.0002, "loss": 2.9331, "step": 2690 }, { "epoch": 1.16, "learning_rate": 0.0002, "loss": 2.9393, "step": 2695 }, { "epoch": 1.16, "learning_rate": 0.0002, "loss": 2.9215, "step": 2700 }, { "epoch": 1.16, "learning_rate": 0.0002, "loss": 2.9379, "step": 2705 }, { "epoch": 1.16, "learning_rate": 0.0002, "loss": 2.906, "step": 2710 }, { "epoch": 1.17, "learning_rate": 0.0002, "loss": 2.9306, "step": 2715 }, { "epoch": 1.17, "learning_rate": 0.0002, "loss": 2.9323, "step": 2720 }, { "epoch": 1.17, "learning_rate": 0.0002, "loss": 2.928, "step": 2725 }, { "epoch": 1.17, "learning_rate": 0.0002, "loss": 2.9304, "step": 2730 }, { "epoch": 1.17, "learning_rate": 0.0002, "loss": 2.9281, "step": 2735 }, { "epoch": 1.18, "learning_rate": 0.0002, "loss": 2.9403, "step": 2740 }, { "epoch": 1.18, "learning_rate": 0.0002, "loss": 2.9239, "step": 2745 }, { "epoch": 1.18, "learning_rate": 0.0002, "loss": 2.9297, "step": 2750 }, { "epoch": 1.18, "eval_loss": 2.9221882820129395, "eval_runtime": 5.2478, "eval_samples_per_second": 1882.135, "eval_steps_per_second": 14.863, "eval_top3_3_weighted_f1_score ": 0.41274163601888564, "eval_top_1_macro_f1_score": 0.05734115522215458, "eval_top_1_weighted_f1score": 0.19576214347043555, "eval_top_3_macro_f1_score": 0.14691401943928395, "step": 2752 }, { "epoch": 1.18, "learning_rate": 0.0002, "loss": 2.9317, "step": 2755 }, { "epoch": 1.19, "learning_rate": 0.0002, "loss": 2.941, "step": 2760 }, { "epoch": 1.19, "learning_rate": 0.0002, "loss": 2.9057, "step": 2765 }, { "epoch": 1.19, "learning_rate": 0.0002, "loss": 2.9468, "step": 2770 }, { "epoch": 1.19, "learning_rate": 0.0002, "loss": 2.9253, "step": 2775 }, { "epoch": 1.19, "learning_rate": 0.0002, "loss": 2.9225, "step": 2780 }, { "epoch": 1.2, "learning_rate": 0.0002, "loss": 2.9352, "step": 2785 }, { "epoch": 1.2, "learning_rate": 0.0002, "loss": 2.9171, "step": 2790 }, { "epoch": 1.2, "learning_rate": 0.0002, "loss": 2.9241, "step": 2795 }, { "epoch": 1.2, "learning_rate": 0.0002, "loss": 2.9361, "step": 2800 }, { "epoch": 1.2, "learning_rate": 0.0002, "loss": 2.9227, "step": 2805 }, { "epoch": 1.21, "learning_rate": 0.0002, "loss": 2.9213, "step": 2810 }, { "epoch": 1.21, "learning_rate": 0.0002, "loss": 2.9134, "step": 2815 }, { "epoch": 1.21, "eval_loss": 2.9217724800109863, "eval_runtime": 5.2627, "eval_samples_per_second": 1876.799, "eval_steps_per_second": 14.821, "eval_top3_3_weighted_f1_score ": 0.4107645977708775, "eval_top_1_macro_f1_score": 0.056444541131062925, "eval_top_1_weighted_f1score": 0.19637409796491542, "eval_top_3_macro_f1_score": 0.1400984673549117, "step": 2816 }, { "epoch": 1.21, "learning_rate": 0.0002, "loss": 2.9225, "step": 2820 }, { "epoch": 1.21, "learning_rate": 0.0002, "loss": 2.958, "step": 2825 }, { "epoch": 1.22, "learning_rate": 0.0002, "loss": 2.9031, "step": 2830 }, { "epoch": 1.22, "learning_rate": 0.0002, "loss": 2.9381, "step": 2835 }, { "epoch": 1.22, "learning_rate": 0.0002, "loss": 2.9127, "step": 2840 }, { "epoch": 1.22, "learning_rate": 0.0002, "loss": 2.9378, "step": 2845 }, { "epoch": 1.22, "learning_rate": 0.0002, "loss": 2.911, "step": 2850 }, { "epoch": 1.23, "learning_rate": 0.0002, "loss": 2.9222, "step": 2855 }, { "epoch": 1.23, "learning_rate": 0.0002, "loss": 2.9215, "step": 2860 }, { "epoch": 1.23, "learning_rate": 0.0002, "loss": 2.9375, "step": 2865 }, { "epoch": 1.23, "learning_rate": 0.0002, "loss": 2.9454, "step": 2870 }, { "epoch": 1.23, "learning_rate": 0.0002, "loss": 2.9096, "step": 2875 }, { "epoch": 1.24, "learning_rate": 0.0002, "loss": 2.9152, "step": 2880 }, { "epoch": 1.24, "eval_loss": 2.9204282760620117, "eval_runtime": 5.3058, "eval_samples_per_second": 1861.544, "eval_steps_per_second": 14.701, "eval_top3_3_weighted_f1_score ": 0.40930973081815303, "eval_top_1_macro_f1_score": 0.05561365452286687, "eval_top_1_weighted_f1score": 0.19327404630127948, "eval_top_3_macro_f1_score": 0.14019544772441375, "step": 2880 }, { "epoch": 1.24, "learning_rate": 0.0002, "loss": 2.9167, "step": 2885 }, { "epoch": 1.24, "learning_rate": 0.0002, "loss": 2.922, "step": 2890 }, { "epoch": 1.24, "learning_rate": 0.0002, "loss": 2.9246, "step": 2895 }, { "epoch": 1.25, "learning_rate": 0.0002, "loss": 2.9248, "step": 2900 }, { "epoch": 1.25, "learning_rate": 0.0002, "loss": 2.9297, "step": 2905 }, { "epoch": 1.25, "learning_rate": 0.0002, "loss": 2.9257, "step": 2910 }, { "epoch": 1.25, "learning_rate": 0.0002, "loss": 2.9427, "step": 2915 }, { "epoch": 1.25, "learning_rate": 0.0002, "loss": 2.9243, "step": 2920 }, { "epoch": 1.26, "learning_rate": 0.0002, "loss": 2.9384, "step": 2925 }, { "epoch": 1.26, "learning_rate": 0.0002, "loss": 2.9396, "step": 2930 }, { "epoch": 1.26, "learning_rate": 0.0002, "loss": 2.929, "step": 2935 }, { "epoch": 1.26, "learning_rate": 0.0002, "loss": 2.9284, "step": 2940 }, { "epoch": 1.26, "eval_loss": 2.91880464553833, "eval_runtime": 5.2529, "eval_samples_per_second": 1880.281, "eval_steps_per_second": 14.849, "eval_top3_3_weighted_f1_score ": 0.41468391257401327, "eval_top_1_macro_f1_score": 0.05407696011087027, "eval_top_1_weighted_f1score": 0.19182727350240894, "eval_top_3_macro_f1_score": 0.14511553350894063, "step": 2944 }, { "epoch": 1.26, "learning_rate": 0.0002, "loss": 2.9168, "step": 2945 }, { "epoch": 1.27, "learning_rate": 0.0002, "loss": 2.9287, "step": 2950 }, { "epoch": 1.27, "learning_rate": 0.0002, "loss": 2.9231, "step": 2955 }, { "epoch": 1.27, "learning_rate": 0.0002, "loss": 2.9235, "step": 2960 }, { "epoch": 1.27, "learning_rate": 0.0002, "loss": 2.9083, "step": 2965 }, { "epoch": 1.28, "learning_rate": 0.0002, "loss": 2.9331, "step": 2970 }, { "epoch": 1.28, "learning_rate": 0.0002, "loss": 2.9207, "step": 2975 }, { "epoch": 1.28, "learning_rate": 0.0002, "loss": 2.9197, "step": 2980 }, { "epoch": 1.28, "learning_rate": 0.0002, "loss": 2.9176, "step": 2985 }, { "epoch": 1.28, "learning_rate": 0.0002, "loss": 2.9332, "step": 2990 }, { "epoch": 1.29, "learning_rate": 0.0002, "loss": 2.9367, "step": 2995 }, { "epoch": 1.29, "learning_rate": 0.0002, "loss": 2.9198, "step": 3000 }, { "epoch": 1.29, "learning_rate": 0.0002, "loss": 2.9148, "step": 3005 }, { "epoch": 1.29, "eval_loss": 2.919055461883545, "eval_runtime": 5.8139, "eval_samples_per_second": 1698.868, "eval_steps_per_second": 13.416, "eval_top3_3_weighted_f1_score ": 0.41247529392678284, "eval_top_1_macro_f1_score": 0.054934720407290305, "eval_top_1_weighted_f1score": 0.19640278010652523, "eval_top_3_macro_f1_score": 0.1467724167948877, "step": 3008 }, { "epoch": 1.29, "learning_rate": 0.0002, "loss": 2.9363, "step": 3010 }, { "epoch": 1.29, "learning_rate": 0.0002, "loss": 2.9326, "step": 3015 }, { "epoch": 1.3, "learning_rate": 0.0002, "loss": 2.9316, "step": 3020 }, { "epoch": 1.3, "learning_rate": 0.0002, "loss": 2.9109, "step": 3025 }, { "epoch": 1.3, "learning_rate": 0.0002, "loss": 2.9285, "step": 3030 }, { "epoch": 1.3, "learning_rate": 0.0002, "loss": 2.9181, "step": 3035 }, { "epoch": 1.31, "learning_rate": 0.0002, "loss": 2.9333, "step": 3040 }, { "epoch": 1.31, "learning_rate": 0.0002, "loss": 2.9034, "step": 3045 }, { "epoch": 1.31, "learning_rate": 0.0002, "loss": 2.9173, "step": 3050 }, { "epoch": 1.31, "learning_rate": 0.0002, "loss": 2.9323, "step": 3055 }, { "epoch": 1.31, "learning_rate": 0.0002, "loss": 2.9288, "step": 3060 }, { "epoch": 1.32, "learning_rate": 0.0002, "loss": 2.9114, "step": 3065 }, { "epoch": 1.32, "learning_rate": 0.0002, "loss": 2.9247, "step": 3070 }, { "epoch": 1.32, "eval_loss": 2.9235095977783203, "eval_runtime": 5.3056, "eval_samples_per_second": 1861.629, "eval_steps_per_second": 14.702, "eval_top3_3_weighted_f1_score ": 0.4146372781603772, "eval_top_1_macro_f1_score": 0.05380511361692963, "eval_top_1_weighted_f1score": 0.19638734169374839, "eval_top_3_macro_f1_score": 0.13671213989889544, "step": 3072 }, { "epoch": 1.32, "learning_rate": 0.0002, "loss": 2.9449, "step": 3075 }, { "epoch": 1.32, "learning_rate": 0.0002, "loss": 2.9157, "step": 3080 }, { "epoch": 1.32, "learning_rate": 0.0002, "loss": 2.9232, "step": 3085 }, { "epoch": 1.33, "learning_rate": 0.0002, "loss": 2.9436, "step": 3090 }, { "epoch": 1.33, "learning_rate": 0.0002, "loss": 2.9291, "step": 3095 }, { "epoch": 1.33, "learning_rate": 0.0002, "loss": 2.94, "step": 3100 }, { "epoch": 1.33, "learning_rate": 0.0002, "loss": 2.9169, "step": 3105 }, { "epoch": 1.34, "learning_rate": 0.0002, "loss": 2.9195, "step": 3110 }, { "epoch": 1.34, "learning_rate": 0.0002, "loss": 2.9095, "step": 3115 }, { "epoch": 1.34, "learning_rate": 0.0002, "loss": 2.9367, "step": 3120 }, { "epoch": 1.34, "learning_rate": 0.0002, "loss": 2.9214, "step": 3125 }, { "epoch": 1.34, "learning_rate": 0.0002, "loss": 2.9208, "step": 3130 }, { "epoch": 1.35, "learning_rate": 0.0002, "loss": 2.9269, "step": 3135 }, { "epoch": 1.35, "eval_loss": 2.918182849884033, "eval_runtime": 5.3357, "eval_samples_per_second": 1851.121, "eval_steps_per_second": 14.619, "eval_top3_3_weighted_f1_score ": 0.41396850463298607, "eval_top_1_macro_f1_score": 0.05368109841264443, "eval_top_1_weighted_f1score": 0.19213040928604935, "eval_top_3_macro_f1_score": 0.13911494630172772, "step": 3136 }, { "epoch": 1.35, "learning_rate": 0.0002, "loss": 2.9295, "step": 3140 }, { "epoch": 1.35, "learning_rate": 0.0002, "loss": 2.9234, "step": 3145 }, { "epoch": 1.35, "learning_rate": 0.0002, "loss": 2.9329, "step": 3150 }, { "epoch": 1.35, "learning_rate": 0.0002, "loss": 2.9057, "step": 3155 }, { "epoch": 1.36, "learning_rate": 0.0002, "loss": 2.919, "step": 3160 }, { "epoch": 1.36, "learning_rate": 0.0002, "loss": 2.9219, "step": 3165 }, { "epoch": 1.36, "learning_rate": 0.0002, "loss": 2.9228, "step": 3170 }, { "epoch": 1.36, "learning_rate": 0.0002, "loss": 2.9145, "step": 3175 }, { "epoch": 1.37, "learning_rate": 0.0002, "loss": 2.9346, "step": 3180 }, { "epoch": 1.37, "learning_rate": 0.0002, "loss": 2.9324, "step": 3185 }, { "epoch": 1.37, "learning_rate": 0.0002, "loss": 2.919, "step": 3190 }, { "epoch": 1.37, "learning_rate": 0.0002, "loss": 2.9182, "step": 3195 }, { "epoch": 1.37, "learning_rate": 0.0002, "loss": 2.9074, "step": 3200 }, { "epoch": 1.37, "eval_loss": 2.9170238971710205, "eval_runtime": 169.2426, "eval_samples_per_second": 58.36, "eval_steps_per_second": 0.461, "eval_top3_3_weighted_f1_score ": 0.41064106083390567, "eval_top_1_macro_f1_score": 0.049563680065994195, "eval_top_1_weighted_f1score": 0.1943582042639729, "eval_top_3_macro_f1_score": 0.13638503687999215, "step": 3200 }, { "epoch": 1.38, "learning_rate": 0.0002, "loss": 2.92, "step": 3205 }, { "epoch": 1.38, "learning_rate": 0.0002, "loss": 2.9165, "step": 3210 }, { "epoch": 1.38, "learning_rate": 0.0002, "loss": 2.9199, "step": 3215 }, { "epoch": 1.38, "learning_rate": 0.0002, "loss": 2.8932, "step": 3220 }, { "epoch": 1.38, "learning_rate": 0.0002, "loss": 2.9242, "step": 3225 }, { "epoch": 1.39, "learning_rate": 0.0002, "loss": 2.9407, "step": 3230 }, { "epoch": 1.39, "learning_rate": 0.0002, "loss": 2.9265, "step": 3235 }, { "epoch": 1.39, "learning_rate": 0.0002, "loss": 2.9165, "step": 3240 }, { "epoch": 1.39, "learning_rate": 0.0002, "loss": 2.9259, "step": 3245 }, { "epoch": 1.4, "learning_rate": 0.0002, "loss": 2.9007, "step": 3250 }, { "epoch": 1.4, "learning_rate": 0.0002, "loss": 2.9461, "step": 3255 }, { "epoch": 1.4, "learning_rate": 0.0002, "loss": 2.9216, "step": 3260 }, { "epoch": 1.4, "eval_loss": 2.9117391109466553, "eval_runtime": 5.3119, "eval_samples_per_second": 1859.4, "eval_steps_per_second": 14.684, "eval_top3_3_weighted_f1_score ": 0.41026852363425426, "eval_top_1_macro_f1_score": 0.05343009155164555, "eval_top_1_weighted_f1score": 0.1928369190755077, "eval_top_3_macro_f1_score": 0.14031584306958347, "step": 3264 }, { "epoch": 1.4, "learning_rate": 0.0002, "loss": 2.922, "step": 3265 }, { "epoch": 1.4, "learning_rate": 0.0002, "loss": 2.9084, "step": 3270 }, { "epoch": 1.41, "learning_rate": 0.0002, "loss": 2.9289, "step": 3275 }, { "epoch": 1.41, "learning_rate": 0.0002, "loss": 2.9356, "step": 3280 }, { "epoch": 1.41, "learning_rate": 0.0002, "loss": 2.9163, "step": 3285 }, { "epoch": 1.41, "learning_rate": 0.0002, "loss": 2.9038, "step": 3290 }, { "epoch": 1.41, "learning_rate": 0.0002, "loss": 2.9152, "step": 3295 }, { "epoch": 1.42, "learning_rate": 0.0002, "loss": 2.8888, "step": 3300 }, { "epoch": 1.42, "learning_rate": 0.0002, "loss": 2.9314, "step": 3305 }, { "epoch": 1.42, "learning_rate": 0.0002, "loss": 2.9263, "step": 3310 }, { "epoch": 1.42, "learning_rate": 0.0002, "loss": 2.9225, "step": 3315 }, { "epoch": 1.43, "learning_rate": 0.0002, "loss": 2.9033, "step": 3320 }, { "epoch": 1.43, "learning_rate": 0.0002, "loss": 2.9206, "step": 3325 }, { "epoch": 1.43, "eval_loss": 2.9116451740264893, "eval_runtime": 5.2924, "eval_samples_per_second": 1866.277, "eval_steps_per_second": 14.738, "eval_top3_3_weighted_f1_score ": 0.4140711332372438, "eval_top_1_macro_f1_score": 0.05488237610711219, "eval_top_1_weighted_f1score": 0.19281961541929482, "eval_top_3_macro_f1_score": 0.13829503016111905, "step": 3328 }, { "epoch": 1.43, "learning_rate": 0.0002, "loss": 2.9357, "step": 3330 }, { "epoch": 1.43, "learning_rate": 0.0002, "loss": 2.9191, "step": 3335 }, { "epoch": 1.43, "learning_rate": 0.0002, "loss": 2.9082, "step": 3340 }, { "epoch": 1.44, "learning_rate": 0.0002, "loss": 2.9097, "step": 3345 }, { "epoch": 1.44, "learning_rate": 0.0002, "loss": 2.9385, "step": 3350 }, { "epoch": 1.44, "learning_rate": 0.0002, "loss": 2.9219, "step": 3355 }, { "epoch": 1.44, "learning_rate": 0.0002, "loss": 2.9229, "step": 3360 }, { "epoch": 1.44, "learning_rate": 0.0002, "loss": 2.9249, "step": 3365 }, { "epoch": 1.45, "learning_rate": 0.0002, "loss": 2.9073, "step": 3370 }, { "epoch": 1.45, "learning_rate": 0.0002, "loss": 2.9148, "step": 3375 }, { "epoch": 1.45, "learning_rate": 0.0002, "loss": 2.9249, "step": 3380 }, { "epoch": 1.45, "learning_rate": 0.0002, "loss": 2.9124, "step": 3385 }, { "epoch": 1.46, "learning_rate": 0.0002, "loss": 2.9198, "step": 3390 }, { "epoch": 1.46, "eval_loss": 2.914485454559326, "eval_runtime": 5.2782, "eval_samples_per_second": 1871.297, "eval_steps_per_second": 14.778, "eval_top3_3_weighted_f1_score ": 0.41599164658249094, "eval_top_1_macro_f1_score": 0.057787282618300566, "eval_top_1_weighted_f1score": 0.19886535679954948, "eval_top_3_macro_f1_score": 0.14757469973464918, "step": 3392 }, { "epoch": 1.46, "learning_rate": 0.0002, "loss": 2.9276, "step": 3395 }, { "epoch": 1.46, "learning_rate": 0.0002, "loss": 2.9286, "step": 3400 }, { "epoch": 1.46, "learning_rate": 0.0002, "loss": 2.9303, "step": 3405 }, { "epoch": 1.46, "learning_rate": 0.0002, "loss": 2.9116, "step": 3410 }, { "epoch": 1.47, "learning_rate": 0.0002, "loss": 2.9331, "step": 3415 }, { "epoch": 1.47, "learning_rate": 0.0002, "loss": 2.9166, "step": 3420 }, { "epoch": 1.47, "learning_rate": 0.0002, "loss": 2.9202, "step": 3425 }, { "epoch": 1.47, "learning_rate": 0.0002, "loss": 2.9077, "step": 3430 }, { "epoch": 1.47, "learning_rate": 0.0002, "loss": 2.9384, "step": 3435 }, { "epoch": 1.48, "learning_rate": 0.0002, "loss": 2.9223, "step": 3440 }, { "epoch": 1.48, "learning_rate": 0.0002, "loss": 2.9459, "step": 3445 }, { "epoch": 1.48, "learning_rate": 0.0002, "loss": 2.8982, "step": 3450 }, { "epoch": 1.48, "learning_rate": 0.0002, "loss": 2.9291, "step": 3455 }, { "epoch": 1.48, "eval_loss": 2.9148874282836914, "eval_runtime": 5.3031, "eval_samples_per_second": 1862.488, "eval_steps_per_second": 14.708, "eval_top3_3_weighted_f1_score ": 0.41277012168510674, "eval_top_1_macro_f1_score": 0.050621606205600723, "eval_top_1_weighted_f1score": 0.19398352345475858, "eval_top_3_macro_f1_score": 0.1402377319399804, "step": 3456 }, { "epoch": 1.49, "learning_rate": 0.0002, "loss": 2.9347, "step": 3460 }, { "epoch": 1.49, "learning_rate": 0.0002, "loss": 2.9317, "step": 3465 }, { "epoch": 1.49, "learning_rate": 0.0002, "loss": 2.9001, "step": 3470 }, { "epoch": 1.49, "learning_rate": 0.0002, "loss": 2.9291, "step": 3475 }, { "epoch": 1.49, "learning_rate": 0.0002, "loss": 2.9027, "step": 3480 }, { "epoch": 1.5, "learning_rate": 0.0002, "loss": 2.9321, "step": 3485 }, { "epoch": 1.5, "learning_rate": 0.0002, "loss": 2.9156, "step": 3490 }, { "epoch": 1.5, "learning_rate": 0.0002, "loss": 2.9191, "step": 3495 }, { "epoch": 1.5, "learning_rate": 0.0002, "loss": 2.8953, "step": 3500 }, { "epoch": 1.5, "learning_rate": 0.0002, "loss": 2.9173, "step": 3505 }, { "epoch": 1.51, "learning_rate": 0.0002, "loss": 2.9088, "step": 3510 }, { "epoch": 1.51, "learning_rate": 0.0002, "loss": 2.902, "step": 3515 }, { "epoch": 1.51, "learning_rate": 0.0002, "loss": 2.9155, "step": 3520 }, { "epoch": 1.51, "eval_loss": 2.9106876850128174, "eval_runtime": 5.3153, "eval_samples_per_second": 1858.229, "eval_steps_per_second": 14.675, "eval_top3_3_weighted_f1_score ": 0.4138086157567288, "eval_top_1_macro_f1_score": 0.05603082682374438, "eval_top_1_weighted_f1score": 0.19434091264811978, "eval_top_3_macro_f1_score": 0.14252124615802533, "step": 3520 }, { "epoch": 1.51, "learning_rate": 0.0002, "loss": 2.9075, "step": 3525 }, { "epoch": 1.52, "learning_rate": 0.0002, "loss": 2.9301, "step": 3530 }, { "epoch": 1.52, "learning_rate": 0.0002, "loss": 2.9149, "step": 3535 }, { "epoch": 1.52, "learning_rate": 0.0002, "loss": 2.9142, "step": 3540 }, { "epoch": 1.52, "learning_rate": 0.0002, "loss": 2.9143, "step": 3545 }, { "epoch": 1.52, "learning_rate": 0.0002, "loss": 2.9038, "step": 3550 }, { "epoch": 1.53, "learning_rate": 0.0002, "loss": 2.9133, "step": 3555 }, { "epoch": 1.53, "learning_rate": 0.0002, "loss": 2.9394, "step": 3560 }, { "epoch": 1.53, "learning_rate": 0.0002, "loss": 2.9279, "step": 3565 }, { "epoch": 1.53, "learning_rate": 0.0002, "loss": 2.9155, "step": 3570 }, { "epoch": 1.53, "learning_rate": 0.0002, "loss": 2.8947, "step": 3575 }, { "epoch": 1.54, "learning_rate": 0.0002, "loss": 2.9055, "step": 3580 }, { "epoch": 1.54, "eval_loss": 2.9097824096679688, "eval_runtime": 5.2753, "eval_samples_per_second": 1872.293, "eval_steps_per_second": 14.786, "eval_top3_3_weighted_f1_score ": 0.4117600222372489, "eval_top_1_macro_f1_score": 0.05823863667346763, "eval_top_1_weighted_f1score": 0.19705568177663993, "eval_top_3_macro_f1_score": 0.1393118750761268, "step": 3584 }, { "epoch": 1.54, "learning_rate": 0.0002, "loss": 2.9048, "step": 3585 }, { "epoch": 1.54, "learning_rate": 0.0002, "loss": 2.9182, "step": 3590 }, { "epoch": 1.54, "learning_rate": 0.0002, "loss": 2.9238, "step": 3595 }, { "epoch": 1.55, "learning_rate": 0.0002, "loss": 2.9116, "step": 3600 }, { "epoch": 1.55, "learning_rate": 0.0002, "loss": 2.9165, "step": 3605 }, { "epoch": 1.55, "learning_rate": 0.0002, "loss": 2.9338, "step": 3610 }, { "epoch": 1.55, "learning_rate": 0.0002, "loss": 2.9333, "step": 3615 }, { "epoch": 1.55, "learning_rate": 0.0002, "loss": 2.9424, "step": 3620 }, { "epoch": 1.56, "learning_rate": 0.0002, "loss": 2.9053, "step": 3625 }, { "epoch": 1.56, "learning_rate": 0.0002, "loss": 2.9238, "step": 3630 }, { "epoch": 1.56, "learning_rate": 0.0002, "loss": 2.9277, "step": 3635 }, { "epoch": 1.56, "learning_rate": 0.0002, "loss": 2.9378, "step": 3640 }, { "epoch": 1.56, "learning_rate": 0.0002, "loss": 2.9047, "step": 3645 }, { "epoch": 1.57, "eval_loss": 2.9116811752319336, "eval_runtime": 5.2902, "eval_samples_per_second": 1867.023, "eval_steps_per_second": 14.744, "eval_top3_3_weighted_f1_score ": 0.41820266362901887, "eval_top_1_macro_f1_score": 0.058255851936218704, "eval_top_1_weighted_f1score": 0.20220022982345096, "eval_top_3_macro_f1_score": 0.14668049823990248, "step": 3648 }, { "epoch": 1.57, "learning_rate": 0.0002, "loss": 2.9164, "step": 3650 }, { "epoch": 1.57, "learning_rate": 0.0002, "loss": 2.9039, "step": 3655 }, { "epoch": 1.57, "learning_rate": 0.0002, "loss": 2.8993, "step": 3660 }, { "epoch": 1.57, "learning_rate": 0.0002, "loss": 2.9134, "step": 3665 }, { "epoch": 1.58, "learning_rate": 0.0002, "loss": 2.9097, "step": 3670 }, { "epoch": 1.58, "learning_rate": 0.0002, "loss": 2.9115, "step": 3675 }, { "epoch": 1.58, "learning_rate": 0.0002, "loss": 2.9007, "step": 3680 }, { "epoch": 1.58, "learning_rate": 0.0002, "loss": 2.9212, "step": 3685 }, { "epoch": 1.58, "learning_rate": 0.0002, "loss": 2.9029, "step": 3690 }, { "epoch": 1.59, "learning_rate": 0.0002, "loss": 2.9087, "step": 3695 }, { "epoch": 1.59, "learning_rate": 0.0002, "loss": 2.9374, "step": 3700 }, { "epoch": 1.59, "learning_rate": 0.0002, "loss": 2.9146, "step": 3705 }, { "epoch": 1.59, "learning_rate": 0.0002, "loss": 2.9014, "step": 3710 }, { "epoch": 1.59, "eval_loss": 2.9136781692504883, "eval_runtime": 5.3288, "eval_samples_per_second": 1853.506, "eval_steps_per_second": 14.637, "eval_top3_3_weighted_f1_score ": 0.4111978805220285, "eval_top_1_macro_f1_score": 0.057007157073673036, "eval_top_1_weighted_f1score": 0.1984773001189914, "eval_top_3_macro_f1_score": 0.1429177656884473, "step": 3712 }, { "epoch": 1.59, "learning_rate": 0.0002, "loss": 2.9181, "step": 3715 }, { "epoch": 1.6, "learning_rate": 0.0002, "loss": 2.8939, "step": 3720 }, { "epoch": 1.6, "learning_rate": 0.0002, "loss": 2.9005, "step": 3725 }, { "epoch": 1.6, "learning_rate": 0.0002, "loss": 2.9201, "step": 3730 }, { "epoch": 1.6, "learning_rate": 0.0002, "loss": 2.9124, "step": 3735 }, { "epoch": 1.61, "learning_rate": 0.0002, "loss": 2.9138, "step": 3740 }, { "epoch": 1.61, "learning_rate": 0.0002, "loss": 2.9055, "step": 3745 }, { "epoch": 1.61, "learning_rate": 0.0002, "loss": 2.9123, "step": 3750 }, { "epoch": 1.61, "learning_rate": 0.0002, "loss": 2.9223, "step": 3755 }, { "epoch": 1.61, "learning_rate": 0.0002, "loss": 2.9131, "step": 3760 }, { "epoch": 1.62, "learning_rate": 0.0002, "loss": 2.9128, "step": 3765 }, { "epoch": 1.62, "learning_rate": 0.0002, "loss": 2.9017, "step": 3770 }, { "epoch": 1.62, "learning_rate": 0.0002, "loss": 2.914, "step": 3775 }, { "epoch": 1.62, "eval_loss": 2.9097282886505127, "eval_runtime": 5.359, "eval_samples_per_second": 1843.064, "eval_steps_per_second": 14.555, "eval_top3_3_weighted_f1_score ": 0.4174773301607924, "eval_top_1_macro_f1_score": 0.05349445280856316, "eval_top_1_weighted_f1score": 0.1935677243995519, "eval_top_3_macro_f1_score": 0.14339050964397831, "step": 3776 }, { "epoch": 1.62, "learning_rate": 0.0002, "loss": 2.9215, "step": 3780 }, { "epoch": 1.63, "learning_rate": 0.0002, "loss": 2.9247, "step": 3785 }, { "epoch": 1.63, "learning_rate": 0.0002, "loss": 2.9118, "step": 3790 }, { "epoch": 1.63, "learning_rate": 0.0002, "loss": 2.8899, "step": 3795 }, { "epoch": 1.63, "learning_rate": 0.0002, "loss": 2.9191, "step": 3800 }, { "epoch": 1.63, "learning_rate": 0.0002, "loss": 2.9125, "step": 3805 }, { "epoch": 1.64, "learning_rate": 0.0002, "loss": 2.896, "step": 3810 }, { "epoch": 1.64, "learning_rate": 0.0002, "loss": 2.9089, "step": 3815 }, { "epoch": 1.64, "learning_rate": 0.0002, "loss": 2.9144, "step": 3820 }, { "epoch": 1.64, "learning_rate": 0.0002, "loss": 2.9069, "step": 3825 }, { "epoch": 1.64, "learning_rate": 0.0002, "loss": 2.9029, "step": 3830 }, { "epoch": 1.65, "learning_rate": 0.0002, "loss": 2.8979, "step": 3835 }, { "epoch": 1.65, "learning_rate": 0.0002, "loss": 2.9067, "step": 3840 }, { "epoch": 1.65, "eval_loss": 2.9060206413269043, "eval_runtime": 5.3734, "eval_samples_per_second": 1838.137, "eval_steps_per_second": 14.516, "eval_top3_3_weighted_f1_score ": 0.4185748244118136, "eval_top_1_macro_f1_score": 0.05513820508046533, "eval_top_1_weighted_f1score": 0.19501145432684122, "eval_top_3_macro_f1_score": 0.14302491472446657, "step": 3840 }, { "epoch": 1.65, "learning_rate": 0.0002, "loss": 2.9024, "step": 3845 }, { "epoch": 1.65, "learning_rate": 0.0002, "loss": 2.9259, "step": 3850 }, { "epoch": 1.66, "learning_rate": 0.0002, "loss": 2.915, "step": 3855 }, { "epoch": 1.66, "learning_rate": 0.0002, "loss": 2.8975, "step": 3860 }, { "epoch": 1.66, "learning_rate": 0.0002, "loss": 2.9105, "step": 3865 }, { "epoch": 1.66, "learning_rate": 0.0002, "loss": 2.9098, "step": 3870 }, { "epoch": 1.66, "learning_rate": 0.0002, "loss": 2.916, "step": 3875 }, { "epoch": 1.67, "learning_rate": 0.0002, "loss": 2.9133, "step": 3880 }, { "epoch": 1.67, "learning_rate": 0.0002, "loss": 2.9186, "step": 3885 }, { "epoch": 1.67, "learning_rate": 0.0002, "loss": 2.9075, "step": 3890 }, { "epoch": 1.67, "learning_rate": 0.0002, "loss": 2.9257, "step": 3895 }, { "epoch": 1.67, "learning_rate": 0.0002, "loss": 2.9012, "step": 3900 }, { "epoch": 1.68, "eval_loss": 2.909200668334961, "eval_runtime": 5.3297, "eval_samples_per_second": 1853.197, "eval_steps_per_second": 14.635, "eval_top3_3_weighted_f1_score ": 0.4143734814141679, "eval_top_1_macro_f1_score": 0.0537386594991731, "eval_top_1_weighted_f1score": 0.19279945061011616, "eval_top_3_macro_f1_score": 0.14795670029194763, "step": 3904 }, { "epoch": 1.68, "learning_rate": 0.0002, "loss": 2.9048, "step": 3905 }, { "epoch": 1.68, "learning_rate": 0.0002, "loss": 2.915, "step": 3910 }, { "epoch": 1.68, "learning_rate": 0.0002, "loss": 2.914, "step": 3915 }, { "epoch": 1.68, "learning_rate": 0.0002, "loss": 2.9222, "step": 3920 }, { "epoch": 1.69, "learning_rate": 0.0002, "loss": 2.9121, "step": 3925 }, { "epoch": 1.69, "learning_rate": 0.0002, "loss": 2.9194, "step": 3930 }, { "epoch": 1.69, "learning_rate": 0.0002, "loss": 2.9276, "step": 3935 }, { "epoch": 1.69, "learning_rate": 0.0002, "loss": 2.9291, "step": 3940 }, { "epoch": 1.69, "learning_rate": 0.0002, "loss": 2.8963, "step": 3945 }, { "epoch": 1.7, "learning_rate": 0.0002, "loss": 2.9213, "step": 3950 }, { "epoch": 1.7, "learning_rate": 0.0002, "loss": 2.908, "step": 3955 }, { "epoch": 1.7, "learning_rate": 0.0002, "loss": 2.9139, "step": 3960 }, { "epoch": 1.7, "learning_rate": 0.0002, "loss": 2.9147, "step": 3965 }, { "epoch": 1.7, "eval_loss": 2.904391288757324, "eval_runtime": 5.3339, "eval_samples_per_second": 1851.742, "eval_steps_per_second": 14.623, "eval_top3_3_weighted_f1_score ": 0.41579837645088025, "eval_top_1_macro_f1_score": 0.05622983577240496, "eval_top_1_weighted_f1score": 0.19530923139465425, "eval_top_3_macro_f1_score": 0.1397693104499722, "step": 3968 }, { "epoch": 1.7, "learning_rate": 0.0002, "loss": 2.9195, "step": 3970 }, { "epoch": 1.71, "learning_rate": 0.0002, "loss": 2.9397, "step": 3975 }, { "epoch": 1.71, "learning_rate": 0.0002, "loss": 2.9116, "step": 3980 }, { "epoch": 1.71, "learning_rate": 0.0002, "loss": 2.9376, "step": 3985 }, { "epoch": 1.71, "learning_rate": 0.0002, "loss": 2.9237, "step": 3990 }, { "epoch": 1.72, "learning_rate": 0.0002, "loss": 2.9208, "step": 3995 }, { "epoch": 1.72, "learning_rate": 0.0002, "loss": 2.9068, "step": 4000 }, { "epoch": 1.72, "learning_rate": 0.0002, "loss": 2.9086, "step": 4005 }, { "epoch": 1.72, "learning_rate": 0.0002, "loss": 2.8963, "step": 4010 }, { "epoch": 1.72, "learning_rate": 0.0002, "loss": 2.8877, "step": 4015 }, { "epoch": 1.73, "learning_rate": 0.0002, "loss": 2.928, "step": 4020 }, { "epoch": 1.73, "learning_rate": 0.0002, "loss": 2.9135, "step": 4025 }, { "epoch": 1.73, "learning_rate": 0.0002, "loss": 2.908, "step": 4030 }, { "epoch": 1.73, "eval_loss": 2.90883207321167, "eval_runtime": 5.3154, "eval_samples_per_second": 1858.194, "eval_steps_per_second": 14.674, "eval_top3_3_weighted_f1_score ": 0.416650888423843, "eval_top_1_macro_f1_score": 0.05452347270506178, "eval_top_1_weighted_f1score": 0.1954259208665436, "eval_top_3_macro_f1_score": 0.14547676692371397, "step": 4032 }, { "epoch": 1.73, "learning_rate": 0.0002, "loss": 2.9015, "step": 4035 }, { "epoch": 1.73, "learning_rate": 0.0002, "loss": 2.9234, "step": 4040 }, { "epoch": 1.74, "learning_rate": 0.0002, "loss": 2.9126, "step": 4045 }, { "epoch": 1.74, "learning_rate": 0.0002, "loss": 2.905, "step": 4050 }, { "epoch": 1.74, "learning_rate": 0.0002, "loss": 2.9202, "step": 4055 }, { "epoch": 1.74, "learning_rate": 0.0002, "loss": 2.9229, "step": 4060 }, { "epoch": 1.75, "learning_rate": 0.0002, "loss": 2.9296, "step": 4065 }, { "epoch": 1.75, "learning_rate": 0.0002, "loss": 2.9249, "step": 4070 }, { "epoch": 1.75, "learning_rate": 0.0002, "loss": 2.9094, "step": 4075 }, { "epoch": 1.75, "learning_rate": 0.0002, "loss": 2.9157, "step": 4080 }, { "epoch": 1.75, "learning_rate": 0.0002, "loss": 2.9287, "step": 4085 }, { "epoch": 1.76, "learning_rate": 0.0002, "loss": 2.9212, "step": 4090 }, { "epoch": 1.76, "learning_rate": 0.0002, "loss": 2.8952, "step": 4095 }, { "epoch": 1.76, "eval_loss": 2.914785385131836, "eval_runtime": 5.3312, "eval_samples_per_second": 1852.677, "eval_steps_per_second": 14.631, "eval_top3_3_weighted_f1_score ": 0.4185348765090556, "eval_top_1_macro_f1_score": 0.05764061973952746, "eval_top_1_weighted_f1score": 0.19691281408017583, "eval_top_3_macro_f1_score": 0.1456156190028161, "step": 4096 }, { "epoch": 1.76, "learning_rate": 0.0002, "loss": 2.9214, "step": 4100 }, { "epoch": 1.76, "learning_rate": 0.0002, "loss": 2.9131, "step": 4105 }, { "epoch": 1.76, "learning_rate": 0.0002, "loss": 2.9001, "step": 4110 }, { "epoch": 1.77, "learning_rate": 0.0002, "loss": 2.9173, "step": 4115 }, { "epoch": 1.77, "learning_rate": 0.0002, "loss": 2.9069, "step": 4120 }, { "epoch": 1.77, "learning_rate": 0.0002, "loss": 2.9344, "step": 4125 }, { "epoch": 1.77, "learning_rate": 0.0002, "loss": 2.9176, "step": 4130 }, { "epoch": 1.78, "learning_rate": 0.0002, "loss": 2.932, "step": 4135 }, { "epoch": 1.78, "learning_rate": 0.0002, "loss": 2.9112, "step": 4140 }, { "epoch": 1.78, "learning_rate": 0.0002, "loss": 2.896, "step": 4145 }, { "epoch": 1.78, "learning_rate": 0.0002, "loss": 2.926, "step": 4150 }, { "epoch": 1.78, "learning_rate": 0.0002, "loss": 2.9119, "step": 4155 }, { "epoch": 1.79, "learning_rate": 0.0002, "loss": 2.9204, "step": 4160 }, { "epoch": 1.79, "eval_loss": 2.9073331356048584, "eval_runtime": 5.3361, "eval_samples_per_second": 1850.992, "eval_steps_per_second": 14.618, "eval_top3_3_weighted_f1_score ": 0.4166796433939981, "eval_top_1_macro_f1_score": 0.05722567454289957, "eval_top_1_weighted_f1score": 0.19412432161186186, "eval_top_3_macro_f1_score": 0.14723111807796124, "step": 4160 }, { "epoch": 1.79, "learning_rate": 0.0002, "loss": 2.8993, "step": 4165 }, { "epoch": 1.79, "learning_rate": 0.0002, "loss": 2.9186, "step": 4170 }, { "epoch": 1.79, "learning_rate": 0.0002, "loss": 2.9047, "step": 4175 }, { "epoch": 1.79, "learning_rate": 0.0002, "loss": 2.9204, "step": 4180 }, { "epoch": 1.8, "learning_rate": 0.0002, "loss": 2.908, "step": 4185 }, { "epoch": 1.8, "learning_rate": 0.0002, "loss": 2.9282, "step": 4190 }, { "epoch": 1.8, "learning_rate": 0.0002, "loss": 2.9121, "step": 4195 }, { "epoch": 1.8, "learning_rate": 0.0002, "loss": 2.9177, "step": 4200 }, { "epoch": 1.81, "learning_rate": 0.0002, "loss": 2.9038, "step": 4205 }, { "epoch": 1.81, "learning_rate": 0.0002, "loss": 2.9246, "step": 4210 }, { "epoch": 1.81, "learning_rate": 0.0002, "loss": 2.9083, "step": 4215 }, { "epoch": 1.81, "learning_rate": 0.0002, "loss": 2.9188, "step": 4220 }, { "epoch": 1.81, "eval_loss": 2.9065234661102295, "eval_runtime": 5.3649, "eval_samples_per_second": 1841.057, "eval_steps_per_second": 14.539, "eval_top3_3_weighted_f1_score ": 0.41645565051543537, "eval_top_1_macro_f1_score": 0.056695468234468124, "eval_top_1_weighted_f1score": 0.1949375757721395, "eval_top_3_macro_f1_score": 0.14736203326563801, "step": 4224 }, { "epoch": 1.81, "learning_rate": 0.0002, "loss": 2.924, "step": 4225 }, { "epoch": 1.82, "learning_rate": 0.0002, "loss": 2.9285, "step": 4230 }, { "epoch": 1.82, "learning_rate": 0.0002, "loss": 2.9264, "step": 4235 }, { "epoch": 1.82, "learning_rate": 0.0002, "loss": 2.9202, "step": 4240 }, { "epoch": 1.82, "learning_rate": 0.0002, "loss": 2.9263, "step": 4245 }, { "epoch": 1.82, "learning_rate": 0.0002, "loss": 2.923, "step": 4250 }, { "epoch": 1.83, "learning_rate": 0.0002, "loss": 2.9035, "step": 4255 }, { "epoch": 1.83, "learning_rate": 0.0002, "loss": 2.9033, "step": 4260 }, { "epoch": 1.83, "learning_rate": 0.0002, "loss": 2.9267, "step": 4265 }, { "epoch": 1.83, "learning_rate": 0.0002, "loss": 2.9168, "step": 4270 }, { "epoch": 1.84, "learning_rate": 0.0002, "loss": 2.9215, "step": 4275 }, { "epoch": 1.84, "learning_rate": 0.0002, "loss": 2.8942, "step": 4280 }, { "epoch": 1.84, "learning_rate": 0.0002, "loss": 2.9227, "step": 4285 }, { "epoch": 1.84, "eval_loss": 2.9160542488098145, "eval_runtime": 5.2816, "eval_samples_per_second": 1870.065, "eval_steps_per_second": 14.768, "eval_top3_3_weighted_f1_score ": 0.4112509098612428, "eval_top_1_macro_f1_score": 0.05835205008822769, "eval_top_1_weighted_f1score": 0.19920753039269692, "eval_top_3_macro_f1_score": 0.13974934867905217, "step": 4288 }, { "epoch": 1.84, "learning_rate": 0.0002, "loss": 2.9039, "step": 4290 }, { "epoch": 1.84, "learning_rate": 0.0002, "loss": 2.8846, "step": 4295 }, { "epoch": 1.85, "learning_rate": 0.0002, "loss": 2.8988, "step": 4300 }, { "epoch": 1.85, "learning_rate": 0.0002, "loss": 2.9182, "step": 4305 }, { "epoch": 1.85, "learning_rate": 0.0002, "loss": 2.9194, "step": 4310 }, { "epoch": 1.85, "learning_rate": 0.0002, "loss": 2.9185, "step": 4315 }, { "epoch": 1.85, "learning_rate": 0.0002, "loss": 2.92, "step": 4320 }, { "epoch": 1.86, "learning_rate": 0.0002, "loss": 2.907, "step": 4325 }, { "epoch": 1.86, "learning_rate": 0.0002, "loss": 2.9049, "step": 4330 }, { "epoch": 1.86, "learning_rate": 0.0002, "loss": 2.9168, "step": 4335 }, { "epoch": 1.86, "learning_rate": 0.0002, "loss": 2.9012, "step": 4340 }, { "epoch": 1.87, "learning_rate": 0.0002, "loss": 2.9008, "step": 4345 }, { "epoch": 1.87, "learning_rate": 0.0002, "loss": 2.9114, "step": 4350 }, { "epoch": 1.87, "eval_loss": 2.9069080352783203, "eval_runtime": 5.2449, "eval_samples_per_second": 1883.177, "eval_steps_per_second": 14.872, "eval_top3_3_weighted_f1_score ": 0.4131753084295918, "eval_top_1_macro_f1_score": 0.06237738522108177, "eval_top_1_weighted_f1score": 0.20188181913152448, "eval_top_3_macro_f1_score": 0.1425914449331181, "step": 4352 }, { "epoch": 1.87, "learning_rate": 0.0002, "loss": 2.9061, "step": 4355 }, { "epoch": 1.87, "learning_rate": 0.0002, "loss": 2.9113, "step": 4360 }, { "epoch": 1.87, "learning_rate": 0.0002, "loss": 2.8873, "step": 4365 }, { "epoch": 1.88, "learning_rate": 0.0002, "loss": 2.9297, "step": 4370 }, { "epoch": 1.88, "learning_rate": 0.0002, "loss": 2.8953, "step": 4375 }, { "epoch": 1.88, "learning_rate": 0.0002, "loss": 2.9105, "step": 4380 }, { "epoch": 1.88, "learning_rate": 0.0002, "loss": 2.9281, "step": 4385 }, { "epoch": 1.88, "learning_rate": 0.0002, "loss": 2.904, "step": 4390 }, { "epoch": 1.89, "learning_rate": 0.0002, "loss": 2.9272, "step": 4395 }, { "epoch": 1.89, "learning_rate": 0.0002, "loss": 2.8938, "step": 4400 }, { "epoch": 1.89, "learning_rate": 0.0002, "loss": 2.9084, "step": 4405 }, { "epoch": 1.89, "learning_rate": 0.0002, "loss": 2.9135, "step": 4410 }, { "epoch": 1.9, "learning_rate": 0.0002, "loss": 2.909, "step": 4415 }, { "epoch": 1.9, "eval_loss": 2.908174514770508, "eval_runtime": 5.3274, "eval_samples_per_second": 1854.013, "eval_steps_per_second": 14.641, "eval_top3_3_weighted_f1_score ": 0.4165512120544975, "eval_top_1_macro_f1_score": 0.059023123561221094, "eval_top_1_weighted_f1score": 0.19620342691966175, "eval_top_3_macro_f1_score": 0.14350421344968878, "step": 4416 }, { "epoch": 1.9, "learning_rate": 0.0002, "loss": 2.9153, "step": 4420 }, { "epoch": 1.9, "learning_rate": 0.0002, "loss": 2.9189, "step": 4425 }, { "epoch": 1.9, "learning_rate": 0.0002, "loss": 2.9199, "step": 4430 }, { "epoch": 1.9, "learning_rate": 0.0002, "loss": 2.9202, "step": 4435 }, { "epoch": 1.91, "learning_rate": 0.0002, "loss": 2.9097, "step": 4440 }, { "epoch": 1.91, "learning_rate": 0.0002, "loss": 2.9136, "step": 4445 }, { "epoch": 1.91, "learning_rate": 0.0002, "loss": 2.9187, "step": 4450 }, { "epoch": 1.91, "learning_rate": 0.0002, "loss": 2.9051, "step": 4455 }, { "epoch": 1.91, "learning_rate": 0.0002, "loss": 2.9131, "step": 4460 }, { "epoch": 1.92, "learning_rate": 0.0002, "loss": 2.9089, "step": 4465 }, { "epoch": 1.92, "learning_rate": 0.0002, "loss": 2.8953, "step": 4470 }, { "epoch": 1.92, "learning_rate": 0.0002, "loss": 2.9303, "step": 4475 }, { "epoch": 1.92, "learning_rate": 0.0002, "loss": 2.9149, "step": 4480 }, { "epoch": 1.92, "eval_loss": 2.9029014110565186, "eval_runtime": 5.3399, "eval_samples_per_second": 1849.662, "eval_steps_per_second": 14.607, "eval_top3_3_weighted_f1_score ": 0.41880335718795425, "eval_top_1_macro_f1_score": 0.05744559670095288, "eval_top_1_weighted_f1score": 0.1989887507512129, "eval_top_3_macro_f1_score": 0.14449459246824883, "step": 4480 }, { "epoch": 1.93, "learning_rate": 0.0002, "loss": 2.9025, "step": 4485 }, { "epoch": 1.93, "learning_rate": 0.0002, "loss": 2.9341, "step": 4490 }, { "epoch": 1.93, "learning_rate": 0.0002, "loss": 2.9269, "step": 4495 }, { "epoch": 1.93, "learning_rate": 0.0002, "loss": 2.8972, "step": 4500 }, { "epoch": 1.93, "learning_rate": 0.0002, "loss": 2.8998, "step": 4505 }, { "epoch": 1.94, "learning_rate": 0.0002, "loss": 2.9039, "step": 4510 }, { "epoch": 1.94, "learning_rate": 0.0002, "loss": 2.9145, "step": 4515 }, { "epoch": 1.94, "learning_rate": 0.0002, "loss": 2.9066, "step": 4520 }, { "epoch": 1.94, "learning_rate": 0.0002, "loss": 2.9148, "step": 4525 }, { "epoch": 1.94, "learning_rate": 0.0002, "loss": 2.9176, "step": 4530 }, { "epoch": 1.95, "learning_rate": 0.0002, "loss": 2.8787, "step": 4535 }, { "epoch": 1.95, "learning_rate": 0.0002, "loss": 2.9088, "step": 4540 }, { "epoch": 1.95, "eval_loss": 2.905123233795166, "eval_runtime": 5.3029, "eval_samples_per_second": 1862.574, "eval_steps_per_second": 14.709, "eval_top3_3_weighted_f1_score ": 0.41605559730748176, "eval_top_1_macro_f1_score": 0.05698845252098233, "eval_top_1_weighted_f1score": 0.19595302817016025, "eval_top_3_macro_f1_score": 0.1486403646875358, "step": 4544 }, { "epoch": 1.95, "learning_rate": 0.0002, "loss": 2.9087, "step": 4545 }, { "epoch": 1.95, "learning_rate": 0.0002, "loss": 2.9306, "step": 4550 }, { "epoch": 1.96, "learning_rate": 0.0002, "loss": 2.9084, "step": 4555 }, { "epoch": 1.96, "learning_rate": 0.0002, "loss": 2.9113, "step": 4560 }, { "epoch": 1.96, "learning_rate": 0.0002, "loss": 2.9034, "step": 4565 }, { "epoch": 1.96, "learning_rate": 0.0002, "loss": 2.891, "step": 4570 }, { "epoch": 1.96, "learning_rate": 0.0002, "loss": 2.9184, "step": 4575 }, { "epoch": 1.97, "learning_rate": 0.0002, "loss": 2.9196, "step": 4580 }, { "epoch": 1.97, "learning_rate": 0.0002, "loss": 2.9025, "step": 4585 }, { "epoch": 1.97, "learning_rate": 0.0002, "loss": 2.9052, "step": 4590 }, { "epoch": 1.97, "learning_rate": 0.0002, "loss": 2.9056, "step": 4595 }, { "epoch": 1.97, "learning_rate": 0.0002, "loss": 2.9216, "step": 4600 }, { "epoch": 1.98, "learning_rate": 0.0002, "loss": 2.8998, "step": 4605 }, { "epoch": 1.98, "eval_loss": 2.901381492614746, "eval_runtime": 5.244, "eval_samples_per_second": 1883.497, "eval_steps_per_second": 14.874, "eval_top3_3_weighted_f1_score ": 0.4190062695557982, "eval_top_1_macro_f1_score": 0.059787971401313846, "eval_top_1_weighted_f1score": 0.19889101422706393, "eval_top_3_macro_f1_score": 0.15426000566225975, "step": 4608 }, { "epoch": 1.98, "learning_rate": 0.0002, "loss": 2.9209, "step": 4610 }, { "epoch": 1.98, "learning_rate": 0.0002, "loss": 2.9142, "step": 4615 }, { "epoch": 1.98, "learning_rate": 0.0002, "loss": 2.9056, "step": 4620 }, { "epoch": 1.99, "learning_rate": 0.0002, "loss": 2.9182, "step": 4625 }, { "epoch": 1.99, "learning_rate": 0.0002, "loss": 2.9028, "step": 4630 }, { "epoch": 1.99, "learning_rate": 0.0002, "loss": 2.9051, "step": 4635 }, { "epoch": 1.99, "learning_rate": 0.0002, "loss": 2.9102, "step": 4640 }, { "epoch": 1.99, "learning_rate": 0.0002, "loss": 2.9053, "step": 4645 }, { "epoch": 2.0, "learning_rate": 0.0002, "loss": 2.9247, "step": 4650 }, { "epoch": 2.0, "learning_rate": 0.0002, "loss": 2.9123, "step": 4655 }, { "epoch": 2.0, "learning_rate": 0.0002, "loss": 3.1869, "step": 4660 }, { "epoch": 2.0, "learning_rate": 0.0002, "loss": 2.897, "step": 4665 }, { "epoch": 2.01, "learning_rate": 0.0002, "loss": 2.8913, "step": 4670 }, { "epoch": 2.01, "eval_loss": 2.902728319168091, "eval_runtime": 5.2917, "eval_samples_per_second": 1866.498, "eval_steps_per_second": 14.74, "eval_top3_3_weighted_f1_score ": 0.4154482191451159, "eval_top_1_macro_f1_score": 0.05910348194925984, "eval_top_1_weighted_f1score": 0.20223672449236257, "eval_top_3_macro_f1_score": 0.14626048197345107, "step": 4672 }, { "epoch": 2.01, "learning_rate": 0.0002, "loss": 2.9062, "step": 4675 }, { "epoch": 2.01, "learning_rate": 0.0002, "loss": 2.9174, "step": 4680 }, { "epoch": 2.01, "learning_rate": 0.0002, "loss": 2.8912, "step": 4685 }, { "epoch": 2.01, "learning_rate": 0.0002, "loss": 2.8863, "step": 4690 }, { "epoch": 2.02, "learning_rate": 0.0002, "loss": 2.898, "step": 4695 }, { "epoch": 2.02, "learning_rate": 0.0002, "loss": 2.8964, "step": 4700 }, { "epoch": 2.02, "learning_rate": 0.0002, "loss": 2.9035, "step": 4705 }, { "epoch": 2.02, "learning_rate": 0.0002, "loss": 2.9061, "step": 4710 }, { "epoch": 2.02, "learning_rate": 0.0002, "loss": 2.9078, "step": 4715 }, { "epoch": 2.03, "learning_rate": 0.0002, "loss": 2.9121, "step": 4720 }, { "epoch": 2.03, "learning_rate": 0.0002, "loss": 2.9054, "step": 4725 }, { "epoch": 2.03, "learning_rate": 0.0002, "loss": 2.9076, "step": 4730 }, { "epoch": 2.03, "learning_rate": 0.0002, "loss": 2.9018, "step": 4735 }, { "epoch": 2.03, "eval_loss": 2.903496503829956, "eval_runtime": 5.3745, "eval_samples_per_second": 1837.739, "eval_steps_per_second": 14.513, "eval_top3_3_weighted_f1_score ": 0.4163058624724811, "eval_top_1_macro_f1_score": 0.05647649082182299, "eval_top_1_weighted_f1score": 0.19924151899797926, "eval_top_3_macro_f1_score": 0.1405979459406439, "step": 4736 }, { "epoch": 2.04, "learning_rate": 0.0002, "loss": 2.9071, "step": 4740 }, { "epoch": 2.04, "learning_rate": 0.0002, "loss": 2.9004, "step": 4745 }, { "epoch": 2.04, "learning_rate": 0.0002, "loss": 2.9195, "step": 4750 }, { "epoch": 2.04, "learning_rate": 0.0002, "loss": 2.9051, "step": 4755 }, { "epoch": 2.04, "learning_rate": 0.0002, "loss": 2.8896, "step": 4760 }, { "epoch": 2.05, "learning_rate": 0.0002, "loss": 2.9106, "step": 4765 }, { "epoch": 2.05, "learning_rate": 0.0002, "loss": 2.8991, "step": 4770 }, { "epoch": 2.05, "learning_rate": 0.0002, "loss": 2.9019, "step": 4775 }, { "epoch": 2.05, "learning_rate": 0.0002, "loss": 2.8923, "step": 4780 }, { "epoch": 2.05, "learning_rate": 0.0002, "loss": 2.9036, "step": 4785 }, { "epoch": 2.06, "learning_rate": 0.0002, "loss": 2.9053, "step": 4790 }, { "epoch": 2.06, "learning_rate": 0.0002, "loss": 2.8904, "step": 4795 }, { "epoch": 2.06, "learning_rate": 0.0002, "loss": 2.8997, "step": 4800 }, { "epoch": 2.06, "eval_loss": 2.909992218017578, "eval_runtime": 5.3247, "eval_samples_per_second": 1854.946, "eval_steps_per_second": 14.649, "eval_top3_3_weighted_f1_score ": 0.41880815422019485, "eval_top_1_macro_f1_score": 0.05827192013803967, "eval_top_1_weighted_f1score": 0.19750961372028833, "eval_top_3_macro_f1_score": 0.14495982688137427, "step": 4800 }, { "epoch": 2.06, "learning_rate": 0.0002, "loss": 2.9132, "step": 4805 }, { "epoch": 2.07, "learning_rate": 0.0002, "loss": 2.8989, "step": 4810 }, { "epoch": 2.07, "learning_rate": 0.0002, "loss": 2.8843, "step": 4815 }, { "epoch": 2.07, "learning_rate": 0.0002, "loss": 2.8965, "step": 4820 }, { "epoch": 2.07, "learning_rate": 0.0002, "loss": 2.914, "step": 4825 }, { "epoch": 2.07, "learning_rate": 0.0002, "loss": 2.8799, "step": 4830 }, { "epoch": 2.08, "learning_rate": 0.0002, "loss": 2.8981, "step": 4835 }, { "epoch": 2.08, "learning_rate": 0.0002, "loss": 2.9123, "step": 4840 }, { "epoch": 2.08, "learning_rate": 0.0002, "loss": 2.9262, "step": 4845 }, { "epoch": 2.08, "learning_rate": 0.0002, "loss": 2.8943, "step": 4850 }, { "epoch": 2.08, "learning_rate": 0.0002, "loss": 2.9001, "step": 4855 }, { "epoch": 2.09, "learning_rate": 0.0002, "loss": 2.917, "step": 4860 }, { "epoch": 2.09, "eval_loss": 2.9023990631103516, "eval_runtime": 5.2937, "eval_samples_per_second": 1865.817, "eval_steps_per_second": 14.735, "eval_top3_3_weighted_f1_score ": 0.417123946923505, "eval_top_1_macro_f1_score": 0.05402152788915577, "eval_top_1_weighted_f1score": 0.19808512530123618, "eval_top_3_macro_f1_score": 0.1564014984350641, "step": 4864 }, { "epoch": 2.09, "learning_rate": 0.0002, "loss": 2.9091, "step": 4865 }, { "epoch": 2.09, "learning_rate": 0.0002, "loss": 2.9159, "step": 4870 }, { "epoch": 2.09, "learning_rate": 0.0002, "loss": 2.9097, "step": 4875 }, { "epoch": 2.1, "learning_rate": 0.0002, "loss": 2.8911, "step": 4880 }, { "epoch": 2.1, "learning_rate": 0.0002, "loss": 2.9067, "step": 4885 }, { "epoch": 2.1, "learning_rate": 0.0002, "loss": 2.9007, "step": 4890 }, { "epoch": 2.1, "learning_rate": 0.0002, "loss": 2.8923, "step": 4895 }, { "epoch": 2.1, "learning_rate": 0.0002, "loss": 2.9066, "step": 4900 }, { "epoch": 2.11, "learning_rate": 0.0002, "loss": 2.9114, "step": 4905 }, { "epoch": 2.11, "learning_rate": 0.0002, "loss": 2.9082, "step": 4910 }, { "epoch": 2.11, "learning_rate": 0.0002, "loss": 2.9178, "step": 4915 }, { "epoch": 2.11, "learning_rate": 0.0002, "loss": 2.9244, "step": 4920 }, { "epoch": 2.11, "learning_rate": 0.0002, "loss": 2.9009, "step": 4925 }, { "epoch": 2.12, "eval_loss": 2.8972628116607666, "eval_runtime": 5.3426, "eval_samples_per_second": 1848.731, "eval_steps_per_second": 14.6, "eval_top3_3_weighted_f1_score ": 0.4156100206555792, "eval_top_1_macro_f1_score": 0.05458305463384416, "eval_top_1_weighted_f1score": 0.19470342306471872, "eval_top_3_macro_f1_score": 0.14190048695385177, "step": 4928 }, { "epoch": 2.12, "learning_rate": 0.0002, "loss": 2.9053, "step": 4930 }, { "epoch": 2.12, "learning_rate": 0.0002, "loss": 2.8988, "step": 4935 }, { "epoch": 2.12, "learning_rate": 0.0002, "loss": 2.9056, "step": 4940 }, { "epoch": 2.12, "learning_rate": 0.0002, "loss": 2.915, "step": 4945 }, { "epoch": 2.13, "learning_rate": 0.0002, "loss": 2.8977, "step": 4950 }, { "epoch": 2.13, "learning_rate": 0.0002, "loss": 2.9033, "step": 4955 }, { "epoch": 2.13, "learning_rate": 0.0002, "loss": 2.8917, "step": 4960 }, { "epoch": 2.13, "learning_rate": 0.0002, "loss": 2.9232, "step": 4965 }, { "epoch": 2.13, "learning_rate": 0.0002, "loss": 2.8904, "step": 4970 }, { "epoch": 2.14, "learning_rate": 0.0002, "loss": 2.8912, "step": 4975 }, { "epoch": 2.14, "learning_rate": 0.0002, "loss": 2.8886, "step": 4980 }, { "epoch": 2.14, "learning_rate": 0.0002, "loss": 2.9094, "step": 4985 }, { "epoch": 2.14, "learning_rate": 0.0002, "loss": 2.9144, "step": 4990 }, { "epoch": 2.14, "eval_loss": 2.9059925079345703, "eval_runtime": 5.3147, "eval_samples_per_second": 1858.424, "eval_steps_per_second": 14.676, "eval_top3_3_weighted_f1_score ": 0.41711598390450566, "eval_top_1_macro_f1_score": 0.055409555982394415, "eval_top_1_weighted_f1score": 0.19777402153595952, "eval_top_3_macro_f1_score": 0.1464816808654842, "step": 4992 }, { "epoch": 2.14, "learning_rate": 0.0002, "loss": 2.8881, "step": 4995 }, { "epoch": 2.15, "learning_rate": 0.0002, "loss": 2.8905, "step": 5000 }, { "epoch": 2.15, "learning_rate": 0.0002, "loss": 2.9009, "step": 5005 }, { "epoch": 2.15, "learning_rate": 0.0002, "loss": 2.9233, "step": 5010 }, { "epoch": 2.15, "learning_rate": 0.0002, "loss": 2.9139, "step": 5015 }, { "epoch": 2.16, "learning_rate": 0.0002, "loss": 2.9036, "step": 5020 }, { "epoch": 2.16, "learning_rate": 0.0002, "loss": 2.8985, "step": 5025 }, { "epoch": 2.16, "learning_rate": 0.0002, "loss": 2.8937, "step": 5030 }, { "epoch": 2.16, "learning_rate": 0.0002, "loss": 2.8878, "step": 5035 }, { "epoch": 2.16, "learning_rate": 0.0002, "loss": 2.898, "step": 5040 }, { "epoch": 2.17, "learning_rate": 0.0002, "loss": 2.8962, "step": 5045 }, { "epoch": 2.17, "learning_rate": 0.0002, "loss": 2.912, "step": 5050 }, { "epoch": 2.17, "learning_rate": 0.0002, "loss": 2.9028, "step": 5055 }, { "epoch": 2.17, "eval_loss": 2.898975133895874, "eval_runtime": 5.291, "eval_samples_per_second": 1866.74, "eval_steps_per_second": 14.742, "eval_top3_3_weighted_f1_score ": 0.42027650116928694, "eval_top_1_macro_f1_score": 0.056066861668324454, "eval_top_1_weighted_f1score": 0.20134187579837567, "eval_top_3_macro_f1_score": 0.14763695642950755, "step": 5056 }, { "epoch": 2.17, "learning_rate": 0.0002, "loss": 2.8855, "step": 5060 }, { "epoch": 2.17, "learning_rate": 0.0002, "loss": 2.9033, "step": 5065 }, { "epoch": 2.18, "learning_rate": 0.0002, "loss": 2.9056, "step": 5070 }, { "epoch": 2.18, "learning_rate": 0.0002, "loss": 2.9207, "step": 5075 }, { "epoch": 2.18, "learning_rate": 0.0002, "loss": 2.9318, "step": 5080 }, { "epoch": 2.18, "learning_rate": 0.0002, "loss": 2.9121, "step": 5085 }, { "epoch": 2.19, "learning_rate": 0.0002, "loss": 2.8943, "step": 5090 }, { "epoch": 2.19, "learning_rate": 0.0002, "loss": 2.9054, "step": 5095 }, { "epoch": 2.19, "learning_rate": 0.0002, "loss": 2.9012, "step": 5100 }, { "epoch": 2.19, "learning_rate": 0.0002, "loss": 2.9149, "step": 5105 }, { "epoch": 2.19, "learning_rate": 0.0002, "loss": 2.9038, "step": 5110 }, { "epoch": 2.2, "learning_rate": 0.0002, "loss": 2.8955, "step": 5115 }, { "epoch": 2.2, "learning_rate": 0.0002, "loss": 2.8892, "step": 5120 }, { "epoch": 2.2, "eval_loss": 2.892855167388916, "eval_runtime": 5.2995, "eval_samples_per_second": 1863.764, "eval_steps_per_second": 14.718, "eval_top3_3_weighted_f1_score ": 0.4157954024117956, "eval_top_1_macro_f1_score": 0.05936697956810241, "eval_top_1_weighted_f1score": 0.20247573523572931, "eval_top_3_macro_f1_score": 0.14266020318390252, "step": 5120 }, { "epoch": 2.2, "learning_rate": 0.0002, "loss": 2.8818, "step": 5125 }, { "epoch": 2.2, "learning_rate": 0.0002, "loss": 2.9041, "step": 5130 }, { "epoch": 2.2, "learning_rate": 0.0002, "loss": 2.9043, "step": 5135 }, { "epoch": 2.21, "learning_rate": 0.0002, "loss": 2.9055, "step": 5140 }, { "epoch": 2.21, "learning_rate": 0.0002, "loss": 2.913, "step": 5145 }, { "epoch": 2.21, "learning_rate": 0.0002, "loss": 2.8993, "step": 5150 }, { "epoch": 2.21, "learning_rate": 0.0002, "loss": 2.8972, "step": 5155 }, { "epoch": 2.22, "learning_rate": 0.0002, "loss": 2.8794, "step": 5160 }, { "epoch": 2.22, "learning_rate": 0.0002, "loss": 2.902, "step": 5165 }, { "epoch": 2.22, "learning_rate": 0.0002, "loss": 2.8963, "step": 5170 }, { "epoch": 2.22, "learning_rate": 0.0002, "loss": 2.8895, "step": 5175 }, { "epoch": 2.22, "learning_rate": 0.0002, "loss": 2.8859, "step": 5180 }, { "epoch": 2.23, "eval_loss": 2.89302921295166, "eval_runtime": 5.396, "eval_samples_per_second": 1830.429, "eval_steps_per_second": 14.455, "eval_top3_3_weighted_f1_score ": 0.41303606443703067, "eval_top_1_macro_f1_score": 0.05968216401094024, "eval_top_1_weighted_f1score": 0.20331060932366576, "eval_top_3_macro_f1_score": 0.14440810187347125, "step": 5184 }, { "epoch": 2.23, "learning_rate": 0.0002, "loss": 2.8696, "step": 5185 }, { "epoch": 2.23, "learning_rate": 0.0002, "loss": 2.9068, "step": 5190 }, { "epoch": 2.23, "learning_rate": 0.0002, "loss": 2.8954, "step": 5195 }, { "epoch": 2.23, "learning_rate": 0.0002, "loss": 2.9057, "step": 5200 }, { "epoch": 2.23, "learning_rate": 0.0002, "loss": 2.8911, "step": 5205 }, { "epoch": 2.24, "learning_rate": 0.0002, "loss": 2.9077, "step": 5210 }, { "epoch": 2.24, "learning_rate": 0.0002, "loss": 2.9102, "step": 5215 }, { "epoch": 2.24, "learning_rate": 0.0002, "loss": 2.8988, "step": 5220 }, { "epoch": 2.24, "learning_rate": 0.0002, "loss": 2.8987, "step": 5225 }, { "epoch": 2.25, "learning_rate": 0.0002, "loss": 2.9164, "step": 5230 }, { "epoch": 2.25, "learning_rate": 0.0002, "loss": 2.8974, "step": 5235 }, { "epoch": 2.25, "learning_rate": 0.0002, "loss": 2.8952, "step": 5240 }, { "epoch": 2.25, "learning_rate": 0.0002, "loss": 2.9294, "step": 5245 }, { "epoch": 2.25, "eval_loss": 2.899155616760254, "eval_runtime": 5.3508, "eval_samples_per_second": 1845.891, "eval_steps_per_second": 14.577, "eval_top3_3_weighted_f1_score ": 0.42197230106720923, "eval_top_1_macro_f1_score": 0.06035559277068504, "eval_top_1_weighted_f1score": 0.19955784494082673, "eval_top_3_macro_f1_score": 0.14700216854763365, "step": 5248 }, { "epoch": 2.25, "learning_rate": 0.0002, "loss": 2.9087, "step": 5250 }, { "epoch": 2.26, "learning_rate": 0.0002, "loss": 2.8955, "step": 5255 }, { "epoch": 2.26, "learning_rate": 0.0002, "loss": 2.8916, "step": 5260 }, { "epoch": 2.26, "learning_rate": 0.0002, "loss": 2.8943, "step": 5265 }, { "epoch": 2.26, "learning_rate": 0.0002, "loss": 2.903, "step": 5270 }, { "epoch": 2.26, "learning_rate": 0.0002, "loss": 2.8978, "step": 5275 }, { "epoch": 2.27, "learning_rate": 0.0002, "loss": 2.91, "step": 5280 }, { "epoch": 2.27, "learning_rate": 0.0002, "loss": 2.9157, "step": 5285 }, { "epoch": 2.27, "learning_rate": 0.0002, "loss": 2.9086, "step": 5290 }, { "epoch": 2.27, "learning_rate": 0.0002, "loss": 2.9067, "step": 5295 }, { "epoch": 2.28, "learning_rate": 0.0002, "loss": 2.9071, "step": 5300 }, { "epoch": 2.28, "learning_rate": 0.0002, "loss": 2.913, "step": 5305 }, { "epoch": 2.28, "learning_rate": 0.0002, "loss": 2.8918, "step": 5310 }, { "epoch": 2.28, "eval_loss": 2.8990283012390137, "eval_runtime": 5.29, "eval_samples_per_second": 1867.109, "eval_steps_per_second": 14.745, "eval_top3_3_weighted_f1_score ": 0.420600106378898, "eval_top_1_macro_f1_score": 0.05742805002784499, "eval_top_1_weighted_f1score": 0.19965523388204837, "eval_top_3_macro_f1_score": 0.1505476498909347, "step": 5312 }, { "epoch": 2.28, "learning_rate": 0.0002, "loss": 2.9106, "step": 5315 }, { "epoch": 2.28, "learning_rate": 0.0002, "loss": 2.8789, "step": 5320 }, { "epoch": 2.29, "learning_rate": 0.0002, "loss": 2.8928, "step": 5325 }, { "epoch": 2.29, "learning_rate": 0.0002, "loss": 2.8814, "step": 5330 }, { "epoch": 2.29, "learning_rate": 0.0002, "loss": 2.8978, "step": 5335 }, { "epoch": 2.29, "learning_rate": 0.0002, "loss": 2.875, "step": 5340 }, { "epoch": 2.29, "learning_rate": 0.0002, "loss": 2.9026, "step": 5345 }, { "epoch": 2.3, "learning_rate": 0.0002, "loss": 2.8901, "step": 5350 }, { "epoch": 2.3, "learning_rate": 0.0002, "loss": 2.9217, "step": 5355 }, { "epoch": 2.3, "learning_rate": 0.0002, "loss": 2.914, "step": 5360 }, { "epoch": 2.3, "learning_rate": 0.0002, "loss": 2.8997, "step": 5365 }, { "epoch": 2.31, "learning_rate": 0.0002, "loss": 2.901, "step": 5370 }, { "epoch": 2.31, "learning_rate": 0.0002, "loss": 2.8975, "step": 5375 }, { "epoch": 2.31, "eval_loss": 2.9051334857940674, "eval_runtime": 5.3756, "eval_samples_per_second": 1837.369, "eval_steps_per_second": 14.51, "eval_top3_3_weighted_f1_score ": 0.4154097299787692, "eval_top_1_macro_f1_score": 0.0580655682817357, "eval_top_1_weighted_f1score": 0.19945941252814092, "eval_top_3_macro_f1_score": 0.14492591699598248, "step": 5376 }, { "epoch": 2.31, "learning_rate": 0.0002, "loss": 2.8853, "step": 5380 }, { "epoch": 2.31, "learning_rate": 0.0002, "loss": 2.9137, "step": 5385 }, { "epoch": 2.31, "learning_rate": 0.0002, "loss": 2.9018, "step": 5390 }, { "epoch": 2.32, "learning_rate": 0.0002, "loss": 2.9064, "step": 5395 }, { "epoch": 2.32, "learning_rate": 0.0002, "loss": 2.8952, "step": 5400 }, { "epoch": 2.32, "learning_rate": 0.0002, "loss": 2.906, "step": 5405 }, { "epoch": 2.32, "learning_rate": 0.0002, "loss": 2.8936, "step": 5410 }, { "epoch": 2.32, "learning_rate": 0.0002, "loss": 2.8944, "step": 5415 }, { "epoch": 2.33, "learning_rate": 0.0002, "loss": 2.8801, "step": 5420 }, { "epoch": 2.33, "learning_rate": 0.0002, "loss": 2.93, "step": 5425 }, { "epoch": 2.33, "learning_rate": 0.0002, "loss": 2.8827, "step": 5430 }, { "epoch": 2.33, "learning_rate": 0.0002, "loss": 2.8971, "step": 5435 }, { "epoch": 2.34, "learning_rate": 0.0002, "loss": 2.9031, "step": 5440 }, { "epoch": 2.34, "eval_loss": 2.9040162563323975, "eval_runtime": 5.444, "eval_samples_per_second": 1814.282, "eval_steps_per_second": 14.328, "eval_top3_3_weighted_f1_score ": 0.41358052554168034, "eval_top_1_macro_f1_score": 0.05707689836126898, "eval_top_1_weighted_f1score": 0.19804938443363193, "eval_top_3_macro_f1_score": 0.14201305208124318, "step": 5440 }, { "epoch": 2.34, "learning_rate": 0.0002, "loss": 2.9092, "step": 5445 }, { "epoch": 2.34, "learning_rate": 0.0002, "loss": 2.9033, "step": 5450 }, { "epoch": 2.34, "learning_rate": 0.0002, "loss": 2.9152, "step": 5455 }, { "epoch": 2.34, "learning_rate": 0.0002, "loss": 2.9021, "step": 5460 }, { "epoch": 2.35, "learning_rate": 0.0002, "loss": 2.9093, "step": 5465 }, { "epoch": 2.35, "learning_rate": 0.0002, "loss": 2.8977, "step": 5470 }, { "epoch": 2.35, "learning_rate": 0.0002, "loss": 2.8999, "step": 5475 }, { "epoch": 2.35, "learning_rate": 0.0002, "loss": 2.9113, "step": 5480 }, { "epoch": 2.36, "learning_rate": 0.0002, "loss": 2.8996, "step": 5485 }, { "epoch": 2.36, "learning_rate": 0.0002, "loss": 2.9033, "step": 5490 }, { "epoch": 2.36, "learning_rate": 0.0002, "loss": 2.9038, "step": 5495 }, { "epoch": 2.36, "learning_rate": 0.0002, "loss": 2.9253, "step": 5500 }, { "epoch": 2.36, "eval_loss": 2.8994529247283936, "eval_runtime": 5.3486, "eval_samples_per_second": 1846.655, "eval_steps_per_second": 14.583, "eval_top3_3_weighted_f1_score ": 0.41753233385444316, "eval_top_1_macro_f1_score": 0.05349162501424304, "eval_top_1_weighted_f1score": 0.19682846206528506, "eval_top_3_macro_f1_score": 0.15138121336524843, "step": 5504 }, { "epoch": 2.36, "learning_rate": 0.0002, "loss": 2.8881, "step": 5505 }, { "epoch": 2.37, "learning_rate": 0.0002, "loss": 2.8859, "step": 5510 }, { "epoch": 2.37, "learning_rate": 0.0002, "loss": 2.9088, "step": 5515 }, { "epoch": 2.37, "learning_rate": 0.0002, "loss": 2.9117, "step": 5520 }, { "epoch": 2.37, "learning_rate": 0.0002, "loss": 2.9069, "step": 5525 }, { "epoch": 2.37, "learning_rate": 0.0002, "loss": 2.9107, "step": 5530 }, { "epoch": 2.38, "learning_rate": 0.0002, "loss": 2.9007, "step": 5535 }, { "epoch": 2.38, "learning_rate": 0.0002, "loss": 2.9023, "step": 5540 }, { "epoch": 2.38, "learning_rate": 0.0002, "loss": 2.8929, "step": 5545 }, { "epoch": 2.38, "learning_rate": 0.0002, "loss": 2.8813, "step": 5550 }, { "epoch": 2.39, "learning_rate": 0.0002, "loss": 2.8936, "step": 5555 }, { "epoch": 2.39, "learning_rate": 0.0002, "loss": 2.8711, "step": 5560 }, { "epoch": 2.39, "learning_rate": 0.0002, "loss": 2.9015, "step": 5565 }, { "epoch": 2.39, "eval_loss": 2.900458812713623, "eval_runtime": 5.326, "eval_samples_per_second": 1854.48, "eval_steps_per_second": 14.645, "eval_top3_3_weighted_f1_score ": 0.41880679197562753, "eval_top_1_macro_f1_score": 0.05999635911721816, "eval_top_1_weighted_f1score": 0.19856166956788507, "eval_top_3_macro_f1_score": 0.1492237171872263, "step": 5568 }, { "epoch": 2.39, "learning_rate": 0.0002, "loss": 2.9005, "step": 5570 }, { "epoch": 2.39, "learning_rate": 0.0002, "loss": 2.884, "step": 5575 }, { "epoch": 2.4, "learning_rate": 0.0002, "loss": 2.9037, "step": 5580 }, { "epoch": 2.4, "learning_rate": 0.0002, "loss": 2.9133, "step": 5585 }, { "epoch": 2.4, "learning_rate": 0.0002, "loss": 2.8886, "step": 5590 }, { "epoch": 2.4, "learning_rate": 0.0002, "loss": 2.9296, "step": 5595 }, { "epoch": 2.4, "learning_rate": 0.0002, "loss": 2.9045, "step": 5600 }, { "epoch": 2.41, "learning_rate": 0.0002, "loss": 2.8971, "step": 5605 }, { "epoch": 2.41, "learning_rate": 0.0002, "loss": 2.9326, "step": 5610 }, { "epoch": 2.41, "learning_rate": 0.0002, "loss": 2.8866, "step": 5615 }, { "epoch": 2.41, "learning_rate": 0.0002, "loss": 2.9046, "step": 5620 }, { "epoch": 2.42, "learning_rate": 0.0002, "loss": 2.8954, "step": 5625 }, { "epoch": 2.42, "learning_rate": 0.0002, "loss": 2.8775, "step": 5630 }, { "epoch": 2.42, "eval_loss": 2.9008569717407227, "eval_runtime": 5.377, "eval_samples_per_second": 1836.901, "eval_steps_per_second": 14.506, "eval_top3_3_weighted_f1_score ": 0.4188094807665909, "eval_top_1_macro_f1_score": 0.056329276543405866, "eval_top_1_weighted_f1score": 0.19788854247420265, "eval_top_3_macro_f1_score": 0.15469545654298356, "step": 5632 }, { "epoch": 2.42, "learning_rate": 0.0002, "loss": 2.892, "step": 5635 }, { "epoch": 2.42, "learning_rate": 0.0002, "loss": 2.8939, "step": 5640 }, { "epoch": 2.42, "learning_rate": 0.0002, "loss": 2.8942, "step": 5645 }, { "epoch": 2.43, "learning_rate": 0.0002, "loss": 2.8904, "step": 5650 }, { "epoch": 2.43, "learning_rate": 0.0002, "loss": 2.9093, "step": 5655 }, { "epoch": 2.43, "learning_rate": 0.0002, "loss": 2.9126, "step": 5660 }, { "epoch": 2.43, "learning_rate": 0.0002, "loss": 2.9113, "step": 5665 }, { "epoch": 2.43, "learning_rate": 0.0002, "loss": 2.9051, "step": 5670 }, { "epoch": 2.44, "learning_rate": 0.0002, "loss": 2.8914, "step": 5675 }, { "epoch": 2.44, "learning_rate": 0.0002, "loss": 2.8887, "step": 5680 }, { "epoch": 2.44, "learning_rate": 0.0002, "loss": 2.8919, "step": 5685 }, { "epoch": 2.44, "learning_rate": 0.0002, "loss": 2.8829, "step": 5690 }, { "epoch": 2.45, "learning_rate": 0.0002, "loss": 2.9113, "step": 5695 }, { "epoch": 2.45, "eval_loss": 2.8912224769592285, "eval_runtime": 5.2948, "eval_samples_per_second": 1865.409, "eval_steps_per_second": 14.731, "eval_top3_3_weighted_f1_score ": 0.42107093582619226, "eval_top_1_macro_f1_score": 0.06742872368412635, "eval_top_1_weighted_f1score": 0.20246055579316635, "eval_top_3_macro_f1_score": 0.15704454539282703, "step": 5696 }, { "epoch": 2.45, "learning_rate": 0.0002, "loss": 2.8917, "step": 5700 }, { "epoch": 2.45, "learning_rate": 0.0002, "loss": 2.8794, "step": 5705 }, { "epoch": 2.45, "learning_rate": 0.0002, "loss": 2.9101, "step": 5710 }, { "epoch": 2.45, "learning_rate": 0.0002, "loss": 2.8895, "step": 5715 }, { "epoch": 2.46, "learning_rate": 0.0002, "loss": 2.9059, "step": 5720 }, { "epoch": 2.46, "learning_rate": 0.0002, "loss": 2.8941, "step": 5725 }, { "epoch": 2.46, "learning_rate": 0.0002, "loss": 2.8942, "step": 5730 }, { "epoch": 2.46, "learning_rate": 0.0002, "loss": 2.8922, "step": 5735 }, { "epoch": 2.46, "learning_rate": 0.0002, "loss": 2.9017, "step": 5740 }, { "epoch": 2.47, "learning_rate": 0.0002, "loss": 2.9096, "step": 5745 }, { "epoch": 2.47, "learning_rate": 0.0002, "loss": 2.8884, "step": 5750 }, { "epoch": 2.47, "learning_rate": 0.0002, "loss": 2.9138, "step": 5755 }, { "epoch": 2.47, "learning_rate": 0.0002, "loss": 2.895, "step": 5760 }, { "epoch": 2.47, "eval_loss": 2.8928873538970947, "eval_runtime": 5.3553, "eval_samples_per_second": 1844.355, "eval_steps_per_second": 14.565, "eval_top3_3_weighted_f1_score ": 0.4188124182838734, "eval_top_1_macro_f1_score": 0.05698105154490517, "eval_top_1_weighted_f1score": 0.1969784382902619, "eval_top_3_macro_f1_score": 0.1468489374120009, "step": 5760 }, { "epoch": 2.48, "learning_rate": 0.0002, "loss": 2.8968, "step": 5765 }, { "epoch": 2.48, "learning_rate": 0.0002, "loss": 2.8991, "step": 5770 }, { "epoch": 2.48, "learning_rate": 0.0002, "loss": 2.91, "step": 5775 }, { "epoch": 2.48, "learning_rate": 0.0002, "loss": 2.9031, "step": 5780 }, { "epoch": 2.48, "learning_rate": 0.0002, "loss": 2.8902, "step": 5785 }, { "epoch": 2.49, "learning_rate": 0.0002, "loss": 2.8994, "step": 5790 }, { "epoch": 2.49, "learning_rate": 0.0002, "loss": 2.9227, "step": 5795 }, { "epoch": 2.49, "learning_rate": 0.0002, "loss": 2.9144, "step": 5800 }, { "epoch": 2.49, "learning_rate": 0.0002, "loss": 2.9039, "step": 5805 }, { "epoch": 2.49, "learning_rate": 0.0002, "loss": 2.8977, "step": 5810 }, { "epoch": 2.5, "learning_rate": 0.0002, "loss": 2.902, "step": 5815 }, { "epoch": 2.5, "learning_rate": 0.0002, "loss": 2.8849, "step": 5820 }, { "epoch": 2.5, "eval_loss": 2.8987321853637695, "eval_runtime": 5.3523, "eval_samples_per_second": 1845.359, "eval_steps_per_second": 14.573, "eval_top3_3_weighted_f1_score ": 0.41866179584826513, "eval_top_1_macro_f1_score": 0.05674525605488127, "eval_top_1_weighted_f1score": 0.19825922860539377, "eval_top_3_macro_f1_score": 0.1463405234615811, "step": 5824 }, { "epoch": 2.5, "learning_rate": 0.0002, "loss": 2.895, "step": 5825 }, { "epoch": 2.5, "learning_rate": 0.0002, "loss": 2.9044, "step": 5830 }, { "epoch": 2.51, "learning_rate": 0.0002, "loss": 2.9172, "step": 5835 }, { "epoch": 2.51, "learning_rate": 0.0002, "loss": 2.8954, "step": 5840 }, { "epoch": 2.51, "learning_rate": 0.0002, "loss": 2.8852, "step": 5845 }, { "epoch": 2.51, "learning_rate": 0.0002, "loss": 2.9082, "step": 5850 }, { "epoch": 2.51, "learning_rate": 0.0002, "loss": 2.8783, "step": 5855 }, { "epoch": 2.52, "learning_rate": 0.0002, "loss": 2.8873, "step": 5860 }, { "epoch": 2.52, "learning_rate": 0.0002, "loss": 2.9091, "step": 5865 }, { "epoch": 2.52, "learning_rate": 0.0002, "loss": 2.9112, "step": 5870 }, { "epoch": 2.52, "learning_rate": 0.0002, "loss": 2.8916, "step": 5875 }, { "epoch": 2.52, "learning_rate": 0.0002, "loss": 2.9009, "step": 5880 }, { "epoch": 2.53, "learning_rate": 0.0002, "loss": 2.9031, "step": 5885 }, { "epoch": 2.53, "eval_loss": 2.8963630199432373, "eval_runtime": 5.3128, "eval_samples_per_second": 1859.093, "eval_steps_per_second": 14.682, "eval_top3_3_weighted_f1_score ": 0.42364414548351786, "eval_top_1_macro_f1_score": 0.06334344575414973, "eval_top_1_weighted_f1score": 0.20622342804341917, "eval_top_3_macro_f1_score": 0.16110020412647388, "step": 5888 }, { "epoch": 2.53, "learning_rate": 0.0002, "loss": 2.9168, "step": 5890 }, { "epoch": 2.53, "learning_rate": 0.0002, "loss": 2.9014, "step": 5895 }, { "epoch": 2.53, "learning_rate": 0.0002, "loss": 2.9104, "step": 5900 }, { "epoch": 2.54, "learning_rate": 0.0002, "loss": 2.894, "step": 5905 }, { "epoch": 2.54, "learning_rate": 0.0002, "loss": 2.9175, "step": 5910 }, { "epoch": 2.54, "learning_rate": 0.0002, "loss": 2.9081, "step": 5915 }, { "epoch": 2.54, "learning_rate": 0.0002, "loss": 2.8837, "step": 5920 }, { "epoch": 2.54, "learning_rate": 0.0002, "loss": 2.886, "step": 5925 }, { "epoch": 2.55, "learning_rate": 0.0002, "loss": 2.8903, "step": 5930 }, { "epoch": 2.55, "learning_rate": 0.0002, "loss": 2.8903, "step": 5935 }, { "epoch": 2.55, "learning_rate": 0.0002, "loss": 2.9187, "step": 5940 }, { "epoch": 2.55, "learning_rate": 0.0002, "loss": 2.8963, "step": 5945 }, { "epoch": 2.55, "learning_rate": 0.0002, "loss": 2.8996, "step": 5950 }, { "epoch": 2.56, "eval_loss": 2.8969385623931885, "eval_runtime": 5.3385, "eval_samples_per_second": 1850.153, "eval_steps_per_second": 14.611, "eval_top3_3_weighted_f1_score ": 0.42083135452031184, "eval_top_1_macro_f1_score": 0.06035260452585553, "eval_top_1_weighted_f1score": 0.20025903908754178, "eval_top_3_macro_f1_score": 0.16009983415987541, "step": 5952 }, { "epoch": 2.56, "learning_rate": 0.0002, "loss": 2.9023, "step": 5955 }, { "epoch": 2.56, "learning_rate": 0.0002, "loss": 2.8774, "step": 5960 }, { "epoch": 2.56, "learning_rate": 0.0002, "loss": 2.8883, "step": 5965 }, { "epoch": 2.56, "learning_rate": 0.0002, "loss": 2.9057, "step": 5970 }, { "epoch": 2.57, "learning_rate": 0.0002, "loss": 2.884, "step": 5975 }, { "epoch": 2.57, "learning_rate": 0.0002, "loss": 2.9052, "step": 5980 }, { "epoch": 2.57, "learning_rate": 0.0002, "loss": 2.8892, "step": 5985 }, { "epoch": 2.57, "learning_rate": 0.0002, "loss": 2.9064, "step": 5990 }, { "epoch": 2.57, "learning_rate": 0.0002, "loss": 2.8888, "step": 5995 }, { "epoch": 2.58, "learning_rate": 0.0002, "loss": 2.8808, "step": 6000 }, { "epoch": 2.58, "learning_rate": 0.0002, "loss": 2.9019, "step": 6005 }, { "epoch": 2.58, "learning_rate": 0.0002, "loss": 2.8862, "step": 6010 }, { "epoch": 2.58, "learning_rate": 0.0002, "loss": 2.8906, "step": 6015 }, { "epoch": 2.58, "eval_loss": 2.8960154056549072, "eval_runtime": 5.3477, "eval_samples_per_second": 1846.951, "eval_steps_per_second": 14.586, "eval_top3_3_weighted_f1_score ": 0.4202857211737356, "eval_top_1_macro_f1_score": 0.05629361685201398, "eval_top_1_weighted_f1score": 0.19886688542729833, "eval_top_3_macro_f1_score": 0.15739346501860904, "step": 6016 }, { "epoch": 2.58, "learning_rate": 0.0002, "loss": 2.8876, "step": 6020 }, { "epoch": 2.59, "learning_rate": 0.0002, "loss": 2.9022, "step": 6025 }, { "epoch": 2.59, "learning_rate": 0.0002, "loss": 2.9002, "step": 6030 }, { "epoch": 2.59, "learning_rate": 0.0002, "loss": 2.8757, "step": 6035 }, { "epoch": 2.59, "learning_rate": 0.0002, "loss": 2.889, "step": 6040 }, { "epoch": 2.6, "learning_rate": 0.0002, "loss": 2.8832, "step": 6045 }, { "epoch": 2.6, "learning_rate": 0.0002, "loss": 2.8912, "step": 6050 }, { "epoch": 2.6, "learning_rate": 0.0002, "loss": 2.9111, "step": 6055 }, { "epoch": 2.6, "learning_rate": 0.0002, "loss": 2.8951, "step": 6060 }, { "epoch": 2.6, "learning_rate": 0.0002, "loss": 2.8937, "step": 6065 }, { "epoch": 2.61, "learning_rate": 0.0002, "loss": 2.905, "step": 6070 }, { "epoch": 2.61, "learning_rate": 0.0002, "loss": 2.8915, "step": 6075 }, { "epoch": 2.61, "learning_rate": 0.0002, "loss": 2.8974, "step": 6080 }, { "epoch": 2.61, "eval_loss": 2.895582675933838, "eval_runtime": 5.3637, "eval_samples_per_second": 1841.439, "eval_steps_per_second": 14.542, "eval_top3_3_weighted_f1_score ": 0.4176598072491688, "eval_top_1_macro_f1_score": 0.057114300468531984, "eval_top_1_weighted_f1score": 0.19814815553081722, "eval_top_3_macro_f1_score": 0.15260389700126822, "step": 6080 }, { "epoch": 2.61, "learning_rate": 0.0002, "loss": 2.9064, "step": 6085 }, { "epoch": 2.61, "learning_rate": 0.0002, "loss": 2.8916, "step": 6090 }, { "epoch": 2.62, "learning_rate": 0.0002, "loss": 2.8959, "step": 6095 }, { "epoch": 2.62, "learning_rate": 0.0002, "loss": 2.8922, "step": 6100 }, { "epoch": 2.62, "learning_rate": 0.0002, "loss": 2.8873, "step": 6105 }, { "epoch": 2.62, "learning_rate": 0.0002, "loss": 2.9052, "step": 6110 }, { "epoch": 2.63, "learning_rate": 0.0002, "loss": 2.8922, "step": 6115 }, { "epoch": 2.63, "learning_rate": 0.0002, "loss": 2.8906, "step": 6120 }, { "epoch": 2.63, "learning_rate": 0.0002, "loss": 2.9078, "step": 6125 }, { "epoch": 2.63, "learning_rate": 0.0002, "loss": 2.9076, "step": 6130 }, { "epoch": 2.63, "learning_rate": 0.0002, "loss": 2.8806, "step": 6135 }, { "epoch": 2.64, "learning_rate": 0.0002, "loss": 2.9068, "step": 6140 }, { "epoch": 2.64, "eval_loss": 2.8937771320343018, "eval_runtime": 5.3873, "eval_samples_per_second": 1833.398, "eval_steps_per_second": 14.479, "eval_top3_3_weighted_f1_score ": 0.4152767953063901, "eval_top_1_macro_f1_score": 0.05652294168706159, "eval_top_1_weighted_f1score": 0.1984693966121817, "eval_top_3_macro_f1_score": 0.14713473047869802, "step": 6144 }, { "epoch": 2.64, "learning_rate": 0.0002, "loss": 2.8899, "step": 6145 }, { "epoch": 2.64, "learning_rate": 0.0002, "loss": 2.9021, "step": 6150 }, { "epoch": 2.64, "learning_rate": 0.0002, "loss": 2.9115, "step": 6155 }, { "epoch": 2.64, "learning_rate": 0.0002, "loss": 2.8954, "step": 6160 }, { "epoch": 2.65, "learning_rate": 0.0002, "loss": 2.9016, "step": 6165 }, { "epoch": 2.65, "learning_rate": 0.0002, "loss": 2.8823, "step": 6170 }, { "epoch": 2.65, "learning_rate": 0.0002, "loss": 2.8805, "step": 6175 }, { "epoch": 2.65, "learning_rate": 0.0002, "loss": 2.924, "step": 6180 }, { "epoch": 2.66, "learning_rate": 0.0002, "loss": 2.9038, "step": 6185 }, { "epoch": 2.66, "learning_rate": 0.0002, "loss": 2.8957, "step": 6190 }, { "epoch": 2.66, "learning_rate": 0.0002, "loss": 2.8875, "step": 6195 }, { "epoch": 2.66, "learning_rate": 0.0002, "loss": 2.897, "step": 6200 }, { "epoch": 2.66, "learning_rate": 0.0002, "loss": 2.8915, "step": 6205 }, { "epoch": 2.67, "eval_loss": 2.896385669708252, "eval_runtime": 5.3292, "eval_samples_per_second": 1853.389, "eval_steps_per_second": 14.636, "eval_top3_3_weighted_f1_score ": 0.4153291557850485, "eval_top_1_macro_f1_score": 0.057094784382916375, "eval_top_1_weighted_f1score": 0.20055240712210706, "eval_top_3_macro_f1_score": 0.15311951608430358, "step": 6208 }, { "epoch": 2.67, "learning_rate": 0.0002, "loss": 2.8894, "step": 6210 }, { "epoch": 2.67, "learning_rate": 0.0002, "loss": 2.9108, "step": 6215 }, { "epoch": 2.67, "learning_rate": 0.0002, "loss": 2.8983, "step": 6220 }, { "epoch": 2.67, "learning_rate": 0.0002, "loss": 2.9117, "step": 6225 }, { "epoch": 2.67, "learning_rate": 0.0002, "loss": 2.8931, "step": 6230 }, { "epoch": 2.68, "learning_rate": 0.0002, "loss": 2.896, "step": 6235 }, { "epoch": 2.68, "learning_rate": 0.0002, "loss": 2.8996, "step": 6240 }, { "epoch": 2.68, "learning_rate": 0.0002, "loss": 2.8913, "step": 6245 }, { "epoch": 2.68, "learning_rate": 0.0002, "loss": 2.9074, "step": 6250 }, { "epoch": 2.69, "learning_rate": 0.0002, "loss": 2.8939, "step": 6255 }, { "epoch": 2.69, "learning_rate": 0.0002, "loss": 2.8737, "step": 6260 }, { "epoch": 2.69, "learning_rate": 0.0002, "loss": 2.8954, "step": 6265 }, { "epoch": 2.69, "learning_rate": 0.0002, "loss": 2.8976, "step": 6270 }, { "epoch": 2.69, "eval_loss": 2.8949334621429443, "eval_runtime": 5.4818, "eval_samples_per_second": 1801.796, "eval_steps_per_second": 14.229, "eval_top3_3_weighted_f1_score ": 0.41843966740612304, "eval_top_1_macro_f1_score": 0.060669798002958124, "eval_top_1_weighted_f1score": 0.2023168005976943, "eval_top_3_macro_f1_score": 0.15022535908324353, "step": 6272 }, { "epoch": 2.69, "learning_rate": 0.0002, "loss": 2.9054, "step": 6275 }, { "epoch": 2.7, "learning_rate": 0.0002, "loss": 2.8725, "step": 6280 }, { "epoch": 2.7, "learning_rate": 0.0002, "loss": 2.8898, "step": 6285 }, { "epoch": 2.7, "learning_rate": 0.0002, "loss": 2.8948, "step": 6290 }, { "epoch": 2.7, "learning_rate": 0.0002, "loss": 2.9011, "step": 6295 }, { "epoch": 2.7, "learning_rate": 0.0002, "loss": 2.882, "step": 6300 }, { "epoch": 2.71, "learning_rate": 0.0002, "loss": 2.9061, "step": 6305 }, { "epoch": 2.71, "learning_rate": 0.0002, "loss": 2.8826, "step": 6310 }, { "epoch": 2.71, "learning_rate": 0.0002, "loss": 2.9004, "step": 6315 }, { "epoch": 2.71, "learning_rate": 0.0002, "loss": 2.8911, "step": 6320 }, { "epoch": 2.72, "learning_rate": 0.0002, "loss": 2.8974, "step": 6325 }, { "epoch": 2.72, "learning_rate": 0.0002, "loss": 2.8867, "step": 6330 }, { "epoch": 2.72, "learning_rate": 0.0002, "loss": 2.8868, "step": 6335 }, { "epoch": 2.72, "eval_loss": 2.895129680633545, "eval_runtime": 5.3357, "eval_samples_per_second": 1851.133, "eval_steps_per_second": 14.619, "eval_top3_3_weighted_f1_score ": 0.41598930533361117, "eval_top_1_macro_f1_score": 0.0586003021627697, "eval_top_1_weighted_f1score": 0.1990979731060376, "eval_top_3_macro_f1_score": 0.15169411305263725, "step": 6336 }, { "epoch": 2.72, "learning_rate": 0.0002, "loss": 2.9008, "step": 6340 }, { "epoch": 2.72, "learning_rate": 0.0002, "loss": 2.9026, "step": 6345 }, { "epoch": 2.73, "learning_rate": 0.0002, "loss": 2.8753, "step": 6350 }, { "epoch": 2.73, "learning_rate": 0.0002, "loss": 2.9147, "step": 6355 }, { "epoch": 2.73, "learning_rate": 0.0002, "loss": 2.8997, "step": 6360 }, { "epoch": 2.73, "learning_rate": 0.0002, "loss": 2.9068, "step": 6365 }, { "epoch": 2.73, "learning_rate": 0.0002, "loss": 2.8695, "step": 6370 }, { "epoch": 2.74, "learning_rate": 0.0002, "loss": 2.8845, "step": 6375 }, { "epoch": 2.74, "learning_rate": 0.0002, "loss": 2.892, "step": 6380 }, { "epoch": 2.74, "learning_rate": 0.0002, "loss": 2.8928, "step": 6385 }, { "epoch": 2.74, "learning_rate": 0.0002, "loss": 2.8795, "step": 6390 }, { "epoch": 2.75, "learning_rate": 0.0002, "loss": 2.905, "step": 6395 }, { "epoch": 2.75, "learning_rate": 0.0002, "loss": 2.9009, "step": 6400 }, { "epoch": 2.75, "eval_loss": 2.899210214614868, "eval_runtime": 169.1747, "eval_samples_per_second": 58.383, "eval_steps_per_second": 0.461, "eval_top3_3_weighted_f1_score ": 0.4218197411776494, "eval_top_1_macro_f1_score": 0.05912854442318141, "eval_top_1_weighted_f1score": 0.20171983817246605, "eval_top_3_macro_f1_score": 0.15132117088198244, "step": 6400 }, { "epoch": 2.75, "learning_rate": 0.0002, "loss": 2.8763, "step": 6405 }, { "epoch": 2.75, "learning_rate": 0.0002, "loss": 2.8917, "step": 6410 }, { "epoch": 2.75, "learning_rate": 0.0002, "loss": 2.9015, "step": 6415 }, { "epoch": 2.76, "learning_rate": 0.0002, "loss": 2.8771, "step": 6420 }, { "epoch": 2.76, "learning_rate": 0.0002, "loss": 2.9113, "step": 6425 }, { "epoch": 2.76, "learning_rate": 0.0002, "loss": 2.8838, "step": 6430 }, { "epoch": 2.76, "learning_rate": 0.0002, "loss": 2.9129, "step": 6435 }, { "epoch": 2.76, "learning_rate": 0.0002, "loss": 2.8798, "step": 6440 }, { "epoch": 2.77, "learning_rate": 0.0002, "loss": 2.9101, "step": 6445 }, { "epoch": 2.77, "learning_rate": 0.0002, "loss": 2.8899, "step": 6450 }, { "epoch": 2.77, "learning_rate": 0.0002, "loss": 2.9031, "step": 6455 }, { "epoch": 2.77, "learning_rate": 0.0002, "loss": 2.8787, "step": 6460 }, { "epoch": 2.78, "eval_loss": 2.892040252685547, "eval_runtime": 5.3482, "eval_samples_per_second": 1846.806, "eval_steps_per_second": 14.584, "eval_top3_3_weighted_f1_score ": 0.4170486755456548, "eval_top_1_macro_f1_score": 0.05793422592556205, "eval_top_1_weighted_f1score": 0.19810835071144503, "eval_top_3_macro_f1_score": 0.1505479607296611, "step": 6464 }, { "epoch": 2.78, "learning_rate": 0.0002, "loss": 2.8964, "step": 6465 }, { "epoch": 2.78, "learning_rate": 0.0002, "loss": 2.8934, "step": 6470 }, { "epoch": 2.78, "learning_rate": 0.0002, "loss": 2.9021, "step": 6475 }, { "epoch": 2.78, "learning_rate": 0.0002, "loss": 2.9039, "step": 6480 }, { "epoch": 2.78, "learning_rate": 0.0002, "loss": 2.8895, "step": 6485 }, { "epoch": 2.79, "learning_rate": 0.0002, "loss": 2.8927, "step": 6490 }, { "epoch": 2.79, "learning_rate": 0.0002, "loss": 2.8772, "step": 6495 }, { "epoch": 2.79, "learning_rate": 0.0002, "loss": 2.901, "step": 6500 }, { "epoch": 2.79, "learning_rate": 0.0002, "loss": 2.9051, "step": 6505 }, { "epoch": 2.8, "learning_rate": 0.0002, "loss": 2.8878, "step": 6510 }, { "epoch": 2.8, "learning_rate": 0.0002, "loss": 2.9254, "step": 6515 }, { "epoch": 2.8, "learning_rate": 0.0002, "loss": 2.8969, "step": 6520 }, { "epoch": 2.8, "learning_rate": 0.0002, "loss": 2.9099, "step": 6525 }, { "epoch": 2.8, "eval_loss": 2.8924155235290527, "eval_runtime": 5.3573, "eval_samples_per_second": 1843.663, "eval_steps_per_second": 14.56, "eval_top3_3_weighted_f1_score ": 0.41909111506627666, "eval_top_1_macro_f1_score": 0.05690534821638667, "eval_top_1_weighted_f1score": 0.20215670730100305, "eval_top_3_macro_f1_score": 0.14864920864192577, "step": 6528 }, { "epoch": 2.8, "learning_rate": 0.0002, "loss": 2.8965, "step": 6530 }, { "epoch": 2.81, "learning_rate": 0.0002, "loss": 2.9013, "step": 6535 }, { "epoch": 2.81, "learning_rate": 0.0002, "loss": 2.9026, "step": 6540 }, { "epoch": 2.81, "learning_rate": 0.0002, "loss": 2.8956, "step": 6545 }, { "epoch": 2.81, "learning_rate": 0.0002, "loss": 2.8937, "step": 6550 }, { "epoch": 2.81, "learning_rate": 0.0002, "loss": 2.9018, "step": 6555 }, { "epoch": 2.82, "learning_rate": 0.0002, "loss": 2.8946, "step": 6560 }, { "epoch": 2.82, "learning_rate": 0.0002, "loss": 2.8994, "step": 6565 }, { "epoch": 2.82, "learning_rate": 0.0002, "loss": 2.899, "step": 6570 }, { "epoch": 2.82, "learning_rate": 0.0002, "loss": 2.8887, "step": 6575 }, { "epoch": 2.83, "learning_rate": 0.0002, "loss": 2.8928, "step": 6580 }, { "epoch": 2.83, "learning_rate": 0.0002, "loss": 2.8894, "step": 6585 }, { "epoch": 2.83, "learning_rate": 0.0002, "loss": 2.8895, "step": 6590 }, { "epoch": 2.83, "eval_loss": 2.897266149520874, "eval_runtime": 5.6923, "eval_samples_per_second": 1735.139, "eval_steps_per_second": 13.703, "eval_top3_3_weighted_f1_score ": 0.4202645153745453, "eval_top_1_macro_f1_score": 0.05525966538027519, "eval_top_1_weighted_f1score": 0.19904882008303484, "eval_top_3_macro_f1_score": 0.1464249193729017, "step": 6592 }, { "epoch": 2.83, "learning_rate": 0.0002, "loss": 2.9131, "step": 6595 }, { "epoch": 2.83, "learning_rate": 0.0002, "loss": 2.9116, "step": 6600 }, { "epoch": 2.84, "learning_rate": 0.0002, "loss": 2.8903, "step": 6605 }, { "epoch": 2.84, "learning_rate": 0.0002, "loss": 2.9147, "step": 6610 }, { "epoch": 2.84, "learning_rate": 0.0002, "loss": 2.9038, "step": 6615 }, { "epoch": 2.84, "learning_rate": 0.0002, "loss": 2.8877, "step": 6620 }, { "epoch": 2.84, "learning_rate": 0.0002, "loss": 2.9054, "step": 6625 }, { "epoch": 2.85, "learning_rate": 0.0002, "loss": 2.8871, "step": 6630 }, { "epoch": 2.85, "learning_rate": 0.0002, "loss": 2.9061, "step": 6635 }, { "epoch": 2.85, "learning_rate": 0.0002, "loss": 2.892, "step": 6640 }, { "epoch": 2.85, "learning_rate": 0.0002, "loss": 2.8879, "step": 6645 }, { "epoch": 2.86, "learning_rate": 0.0002, "loss": 2.8866, "step": 6650 }, { "epoch": 2.86, "learning_rate": 0.0002, "loss": 2.9116, "step": 6655 }, { "epoch": 2.86, "eval_loss": 2.8881497383117676, "eval_runtime": 5.3811, "eval_samples_per_second": 1835.5, "eval_steps_per_second": 14.495, "eval_top3_3_weighted_f1_score ": 0.4166262160847077, "eval_top_1_macro_f1_score": 0.05515885760505369, "eval_top_1_weighted_f1score": 0.19965289233016073, "eval_top_3_macro_f1_score": 0.15554556026877253, "step": 6656 }, { "epoch": 2.86, "learning_rate": 0.0002, "loss": 2.8949, "step": 6660 }, { "epoch": 2.86, "learning_rate": 0.0002, "loss": 2.8776, "step": 6665 }, { "epoch": 2.86, "learning_rate": 0.0002, "loss": 2.9125, "step": 6670 }, { "epoch": 2.87, "learning_rate": 0.0002, "loss": 2.8886, "step": 6675 }, { "epoch": 2.87, "learning_rate": 0.0002, "loss": 2.9188, "step": 6680 }, { "epoch": 2.87, "learning_rate": 0.0002, "loss": 2.9127, "step": 6685 }, { "epoch": 2.87, "learning_rate": 0.0002, "loss": 2.8796, "step": 6690 }, { "epoch": 2.87, "learning_rate": 0.0002, "loss": 2.8876, "step": 6695 }, { "epoch": 2.88, "learning_rate": 0.0002, "loss": 2.8901, "step": 6700 }, { "epoch": 2.88, "learning_rate": 0.0002, "loss": 2.9042, "step": 6705 }, { "epoch": 2.88, "learning_rate": 0.0002, "loss": 2.9133, "step": 6710 }, { "epoch": 2.88, "learning_rate": 0.0002, "loss": 2.8916, "step": 6715 }, { "epoch": 2.89, "learning_rate": 0.0002, "loss": 2.9015, "step": 6720 }, { "epoch": 2.89, "eval_loss": 2.8918018341064453, "eval_runtime": 5.4058, "eval_samples_per_second": 1827.113, "eval_steps_per_second": 14.429, "eval_top3_3_weighted_f1_score ": 0.42017445310857415, "eval_top_1_macro_f1_score": 0.05860894422257588, "eval_top_1_weighted_f1score": 0.20191660438221576, "eval_top_3_macro_f1_score": 0.15352878658061833, "step": 6720 }, { "epoch": 2.89, "learning_rate": 0.0002, "loss": 2.8796, "step": 6725 }, { "epoch": 2.89, "learning_rate": 0.0002, "loss": 2.9098, "step": 6730 }, { "epoch": 2.89, "learning_rate": 0.0002, "loss": 2.8912, "step": 6735 }, { "epoch": 2.89, "learning_rate": 0.0002, "loss": 2.8922, "step": 6740 }, { "epoch": 2.9, "learning_rate": 0.0002, "loss": 2.9009, "step": 6745 }, { "epoch": 2.9, "learning_rate": 0.0002, "loss": 2.8948, "step": 6750 }, { "epoch": 2.9, "learning_rate": 0.0002, "loss": 2.8842, "step": 6755 }, { "epoch": 2.9, "learning_rate": 0.0002, "loss": 2.8905, "step": 6760 }, { "epoch": 2.9, "learning_rate": 0.0002, "loss": 2.8984, "step": 6765 }, { "epoch": 2.91, "learning_rate": 0.0002, "loss": 2.871, "step": 6770 }, { "epoch": 2.91, "learning_rate": 0.0002, "loss": 2.898, "step": 6775 }, { "epoch": 2.91, "learning_rate": 0.0002, "loss": 2.8749, "step": 6780 }, { "epoch": 2.91, "eval_loss": 2.892881393432617, "eval_runtime": 5.4106, "eval_samples_per_second": 1825.483, "eval_steps_per_second": 14.416, "eval_top3_3_weighted_f1_score ": 0.4206111838061315, "eval_top_1_macro_f1_score": 0.058917812133703555, "eval_top_1_weighted_f1score": 0.20102460044812717, "eval_top_3_macro_f1_score": 0.14910007170370257, "step": 6784 }, { "epoch": 2.91, "learning_rate": 0.0002, "loss": 2.8994, "step": 6785 }, { "epoch": 2.92, "learning_rate": 0.0002, "loss": 2.9037, "step": 6790 }, { "epoch": 2.92, "learning_rate": 0.0002, "loss": 2.8989, "step": 6795 }, { "epoch": 2.92, "learning_rate": 0.0002, "loss": 2.8828, "step": 6800 }, { "epoch": 2.92, "learning_rate": 0.0002, "loss": 2.9208, "step": 6805 }, { "epoch": 2.92, "learning_rate": 0.0002, "loss": 2.896, "step": 6810 }, { "epoch": 2.93, "learning_rate": 0.0002, "loss": 2.8883, "step": 6815 }, { "epoch": 2.93, "learning_rate": 0.0002, "loss": 2.8927, "step": 6820 }, { "epoch": 2.93, "learning_rate": 0.0002, "loss": 2.9005, "step": 6825 }, { "epoch": 2.93, "learning_rate": 0.0002, "loss": 2.8785, "step": 6830 }, { "epoch": 2.93, "learning_rate": 0.0002, "loss": 2.9018, "step": 6835 }, { "epoch": 2.94, "learning_rate": 0.0002, "loss": 2.9038, "step": 6840 }, { "epoch": 2.94, "learning_rate": 0.0002, "loss": 2.8796, "step": 6845 }, { "epoch": 2.94, "eval_loss": 2.8879458904266357, "eval_runtime": 5.326, "eval_samples_per_second": 1854.496, "eval_steps_per_second": 14.645, "eval_top3_3_weighted_f1_score ": 0.42427756331941385, "eval_top_1_macro_f1_score": 0.06002926605638728, "eval_top_1_weighted_f1score": 0.19956929238546744, "eval_top_3_macro_f1_score": 0.15784711325428397, "step": 6848 }, { "epoch": 2.94, "learning_rate": 0.0002, "loss": 2.899, "step": 6850 }, { "epoch": 2.94, "learning_rate": 0.0002, "loss": 2.884, "step": 6855 }, { "epoch": 2.95, "learning_rate": 0.0002, "loss": 2.8927, "step": 6860 }, { "epoch": 2.95, "learning_rate": 0.0002, "loss": 2.8962, "step": 6865 }, { "epoch": 2.95, "learning_rate": 0.0002, "loss": 2.8889, "step": 6870 }, { "epoch": 2.95, "learning_rate": 0.0002, "loss": 2.8822, "step": 6875 }, { "epoch": 2.95, "learning_rate": 0.0002, "loss": 2.8929, "step": 6880 }, { "epoch": 2.96, "learning_rate": 0.0002, "loss": 2.8961, "step": 6885 }, { "epoch": 2.96, "learning_rate": 0.0002, "loss": 2.8792, "step": 6890 }, { "epoch": 2.96, "learning_rate": 0.0002, "loss": 2.9299, "step": 6895 }, { "epoch": 2.96, "learning_rate": 0.0002, "loss": 2.9027, "step": 6900 }, { "epoch": 2.96, "learning_rate": 0.0002, "loss": 2.8931, "step": 6905 }, { "epoch": 2.97, "learning_rate": 0.0002, "loss": 2.9024, "step": 6910 }, { "epoch": 2.97, "eval_loss": 2.889580488204956, "eval_runtime": 5.3297, "eval_samples_per_second": 1853.184, "eval_steps_per_second": 14.635, "eval_top3_3_weighted_f1_score ": 0.42118660139022474, "eval_top_1_macro_f1_score": 0.061585736587491564, "eval_top_1_weighted_f1score": 0.1995878237563215, "eval_top_3_macro_f1_score": 0.16217028756043067, "step": 6912 }, { "epoch": 2.97, "learning_rate": 0.0002, "loss": 2.8887, "step": 6915 }, { "epoch": 2.97, "learning_rate": 0.0002, "loss": 2.8887, "step": 6920 }, { "epoch": 2.97, "learning_rate": 0.0002, "loss": 2.8983, "step": 6925 }, { "epoch": 2.98, "learning_rate": 0.0002, "loss": 2.8955, "step": 6930 }, { "epoch": 2.98, "learning_rate": 0.0002, "loss": 2.8833, "step": 6935 }, { "epoch": 2.98, "learning_rate": 0.0002, "loss": 2.8877, "step": 6940 }, { "epoch": 2.98, "learning_rate": 0.0002, "loss": 2.9031, "step": 6945 }, { "epoch": 2.98, "learning_rate": 0.0002, "loss": 2.8919, "step": 6950 }, { "epoch": 2.99, "learning_rate": 0.0002, "loss": 2.9103, "step": 6955 }, { "epoch": 2.99, "learning_rate": 0.0002, "loss": 2.8871, "step": 6960 }, { "epoch": 2.99, "learning_rate": 0.0002, "loss": 2.9042, "step": 6965 }, { "epoch": 2.99, "learning_rate": 0.0002, "loss": 2.9028, "step": 6970 }, { "epoch": 2.99, "learning_rate": 0.0002, "loss": 2.8965, "step": 6975 }, { "epoch": 3.0, "eval_loss": 2.894157648086548, "eval_runtime": 5.7095, "eval_samples_per_second": 1729.922, "eval_steps_per_second": 13.661, "eval_top3_3_weighted_f1_score ": 0.415873483256903, "eval_top_1_macro_f1_score": 0.05969227264761643, "eval_top_1_weighted_f1score": 0.2007634987366759, "eval_top_3_macro_f1_score": 0.14123113694671527, "step": 6976 }, { "epoch": 3.0, "learning_rate": 0.0002, "loss": 2.8804, "step": 6980 }, { "epoch": 3.0, "learning_rate": 0.0002, "loss": 2.9018, "step": 6985 }, { "epoch": 3.0, "learning_rate": 0.0002, "loss": 3.1866, "step": 6990 }, { "epoch": 3.0, "learning_rate": 0.0002, "loss": 2.8849, "step": 6995 }, { "epoch": 3.01, "learning_rate": 0.0002, "loss": 2.8799, "step": 7000 }, { "epoch": 3.01, "learning_rate": 0.0002, "loss": 2.8856, "step": 7005 }, { "epoch": 3.01, "learning_rate": 0.0002, "loss": 2.878, "step": 7010 }, { "epoch": 3.01, "learning_rate": 0.0002, "loss": 2.8811, "step": 7015 }, { "epoch": 3.01, "learning_rate": 0.0002, "loss": 2.9049, "step": 7020 }, { "epoch": 3.02, "learning_rate": 0.0002, "loss": 2.8864, "step": 7025 }, { "epoch": 3.02, "learning_rate": 0.0002, "loss": 2.8818, "step": 7030 }, { "epoch": 3.02, "learning_rate": 0.0002, "loss": 2.8784, "step": 7035 }, { "epoch": 3.02, "learning_rate": 0.0002, "loss": 2.885, "step": 7040 }, { "epoch": 3.02, "eval_loss": 2.888051986694336, "eval_runtime": 5.3058, "eval_samples_per_second": 1861.536, "eval_steps_per_second": 14.701, "eval_top3_3_weighted_f1_score ": 0.41846760208054623, "eval_top_1_macro_f1_score": 0.05625990712567595, "eval_top_1_weighted_f1score": 0.20095345295287725, "eval_top_3_macro_f1_score": 0.15870986664918688, "step": 7040 }, { "epoch": 3.02, "learning_rate": 0.0002, "loss": 2.8778, "step": 7045 }, { "epoch": 3.03, "learning_rate": 0.0002, "loss": 2.8966, "step": 7050 }, { "epoch": 3.03, "learning_rate": 0.0002, "loss": 2.882, "step": 7055 }, { "epoch": 3.03, "learning_rate": 0.0002, "loss": 2.9075, "step": 7060 }, { "epoch": 3.03, "learning_rate": 0.0002, "loss": 2.8859, "step": 7065 }, { "epoch": 3.04, "learning_rate": 0.0002, "loss": 2.8881, "step": 7070 }, { "epoch": 3.04, "learning_rate": 0.0002, "loss": 2.8851, "step": 7075 }, { "epoch": 3.04, "learning_rate": 0.0002, "loss": 2.8995, "step": 7080 }, { "epoch": 3.04, "learning_rate": 0.0002, "loss": 2.9107, "step": 7085 }, { "epoch": 3.04, "learning_rate": 0.0002, "loss": 2.8927, "step": 7090 }, { "epoch": 3.05, "learning_rate": 0.0002, "loss": 2.8799, "step": 7095 }, { "epoch": 3.05, "learning_rate": 0.0002, "loss": 2.8879, "step": 7100 }, { "epoch": 3.05, "eval_loss": 2.8877265453338623, "eval_runtime": 5.3303, "eval_samples_per_second": 1853.0, "eval_steps_per_second": 14.633, "eval_top3_3_weighted_f1_score ": 0.42163813405919864, "eval_top_1_macro_f1_score": 0.05934196938210321, "eval_top_1_weighted_f1score": 0.20608303826374583, "eval_top_3_macro_f1_score": 0.1563438316036981, "step": 7104 }, { "epoch": 3.05, "learning_rate": 0.0002, "loss": 2.8737, "step": 7105 }, { "epoch": 3.05, "learning_rate": 0.0002, "loss": 2.8776, "step": 7110 }, { "epoch": 3.05, "learning_rate": 0.0002, "loss": 2.8626, "step": 7115 }, { "epoch": 3.06, "learning_rate": 0.0002, "loss": 2.8891, "step": 7120 }, { "epoch": 3.06, "learning_rate": 0.0002, "loss": 2.8948, "step": 7125 }, { "epoch": 3.06, "learning_rate": 0.0002, "loss": 2.8976, "step": 7130 }, { "epoch": 3.06, "learning_rate": 0.0002, "loss": 2.8962, "step": 7135 }, { "epoch": 3.07, "learning_rate": 0.0002, "loss": 2.9038, "step": 7140 }, { "epoch": 3.07, "learning_rate": 0.0002, "loss": 2.8992, "step": 7145 }, { "epoch": 3.07, "learning_rate": 0.0002, "loss": 2.8897, "step": 7150 }, { "epoch": 3.07, "learning_rate": 0.0002, "loss": 2.8659, "step": 7155 }, { "epoch": 3.07, "learning_rate": 0.0002, "loss": 2.8967, "step": 7160 }, { "epoch": 3.08, "learning_rate": 0.0002, "loss": 2.8697, "step": 7165 }, { "epoch": 3.08, "eval_loss": 2.8849167823791504, "eval_runtime": 5.313, "eval_samples_per_second": 1859.021, "eval_steps_per_second": 14.681, "eval_top3_3_weighted_f1_score ": 0.41884094061088384, "eval_top_1_macro_f1_score": 0.06004807726120719, "eval_top_1_weighted_f1score": 0.20160694888346856, "eval_top_3_macro_f1_score": 0.1554945377775778, "step": 7168 }, { "epoch": 3.08, "learning_rate": 0.0002, "loss": 2.8904, "step": 7170 }, { "epoch": 3.08, "learning_rate": 0.0002, "loss": 2.8731, "step": 7175 }, { "epoch": 3.08, "learning_rate": 0.0002, "loss": 2.9, "step": 7180 }, { "epoch": 3.08, "learning_rate": 0.0002, "loss": 2.8707, "step": 7185 }, { "epoch": 3.09, "learning_rate": 0.0002, "loss": 2.8841, "step": 7190 }, { "epoch": 3.09, "learning_rate": 0.0002, "loss": 2.8873, "step": 7195 }, { "epoch": 3.09, "learning_rate": 0.0002, "loss": 2.8837, "step": 7200 }, { "epoch": 3.09, "learning_rate": 0.0002, "loss": 2.8919, "step": 7205 }, { "epoch": 3.1, "learning_rate": 0.0002, "loss": 2.8859, "step": 7210 }, { "epoch": 3.1, "learning_rate": 0.0002, "loss": 2.9038, "step": 7215 }, { "epoch": 3.1, "learning_rate": 0.0002, "loss": 2.904, "step": 7220 }, { "epoch": 3.1, "learning_rate": 0.0002, "loss": 2.8747, "step": 7225 }, { "epoch": 3.1, "learning_rate": 0.0002, "loss": 2.8997, "step": 7230 }, { "epoch": 3.11, "eval_loss": 2.884518623352051, "eval_runtime": 5.3065, "eval_samples_per_second": 1861.319, "eval_steps_per_second": 14.699, "eval_top3_3_weighted_f1_score ": 0.42023967321832556, "eval_top_1_macro_f1_score": 0.06522960041748352, "eval_top_1_weighted_f1score": 0.20200750846480445, "eval_top_3_macro_f1_score": 0.1600667571620107, "step": 7232 }, { "epoch": 3.11, "learning_rate": 0.0002, "loss": 2.8931, "step": 7235 }, { "epoch": 3.11, "learning_rate": 0.0002, "loss": 2.8625, "step": 7240 }, { "epoch": 3.11, "learning_rate": 0.0002, "loss": 2.8752, "step": 7245 }, { "epoch": 3.11, "learning_rate": 0.0002, "loss": 2.9108, "step": 7250 }, { "epoch": 3.12, "learning_rate": 0.0002, "loss": 2.8663, "step": 7255 }, { "epoch": 3.12, "learning_rate": 0.0002, "loss": 2.8862, "step": 7260 }, { "epoch": 3.12, "learning_rate": 0.0002, "loss": 2.8968, "step": 7265 }, { "epoch": 3.12, "learning_rate": 0.0002, "loss": 2.8676, "step": 7270 }, { "epoch": 3.12, "learning_rate": 0.0002, "loss": 2.8632, "step": 7275 }, { "epoch": 3.13, "learning_rate": 0.0002, "loss": 2.8851, "step": 7280 }, { "epoch": 3.13, "learning_rate": 0.0002, "loss": 2.8964, "step": 7285 }, { "epoch": 3.13, "learning_rate": 0.0002, "loss": 2.8879, "step": 7290 }, { "epoch": 3.13, "learning_rate": 0.0002, "loss": 2.9061, "step": 7295 }, { "epoch": 3.13, "eval_loss": 2.886598587036133, "eval_runtime": 5.3853, "eval_samples_per_second": 1834.071, "eval_steps_per_second": 14.484, "eval_top3_3_weighted_f1_score ": 0.42203774980654735, "eval_top_1_macro_f1_score": 0.06814627498070826, "eval_top_1_weighted_f1score": 0.20365995015379834, "eval_top_3_macro_f1_score": 0.15943133680115384, "step": 7296 }, { "epoch": 3.13, "learning_rate": 0.0002, "loss": 2.8975, "step": 7300 }, { "epoch": 3.14, "learning_rate": 0.0002, "loss": 2.8634, "step": 7305 }, { "epoch": 3.14, "learning_rate": 0.0002, "loss": 2.8952, "step": 7310 }, { "epoch": 3.14, "learning_rate": 0.0002, "loss": 2.8895, "step": 7315 }, { "epoch": 3.14, "learning_rate": 0.0002, "loss": 2.8759, "step": 7320 }, { "epoch": 3.15, "learning_rate": 0.0002, "loss": 2.8962, "step": 7325 }, { "epoch": 3.15, "learning_rate": 0.0002, "loss": 2.8806, "step": 7330 }, { "epoch": 3.15, "learning_rate": 0.0002, "loss": 2.8977, "step": 7335 }, { "epoch": 3.15, "learning_rate": 0.0002, "loss": 2.8829, "step": 7340 }, { "epoch": 3.15, "learning_rate": 0.0002, "loss": 2.8727, "step": 7345 }, { "epoch": 3.16, "learning_rate": 0.0002, "loss": 2.9073, "step": 7350 }, { "epoch": 3.16, "learning_rate": 0.0002, "loss": 2.8897, "step": 7355 }, { "epoch": 3.16, "learning_rate": 0.0002, "loss": 2.9069, "step": 7360 }, { "epoch": 3.16, "eval_loss": 2.8906261920928955, "eval_runtime": 5.3997, "eval_samples_per_second": 1829.178, "eval_steps_per_second": 14.445, "eval_top3_3_weighted_f1_score ": 0.4215247529697737, "eval_top_1_macro_f1_score": 0.057807715800752876, "eval_top_1_weighted_f1score": 0.20332823261700023, "eval_top_3_macro_f1_score": 0.15819296969443752, "step": 7360 }, { "epoch": 3.16, "learning_rate": 0.0002, "loss": 2.8936, "step": 7365 }, { "epoch": 3.16, "learning_rate": 0.0002, "loss": 2.9033, "step": 7370 }, { "epoch": 3.17, "learning_rate": 0.0002, "loss": 2.8953, "step": 7375 }, { "epoch": 3.17, "learning_rate": 0.0002, "loss": 2.8828, "step": 7380 }, { "epoch": 3.17, "learning_rate": 0.0002, "loss": 2.8767, "step": 7385 }, { "epoch": 3.17, "learning_rate": 0.0002, "loss": 2.8889, "step": 7390 }, { "epoch": 3.18, "learning_rate": 0.0002, "loss": 2.8801, "step": 7395 }, { "epoch": 3.18, "learning_rate": 0.0002, "loss": 2.8718, "step": 7400 }, { "epoch": 3.18, "learning_rate": 0.0002, "loss": 2.8986, "step": 7405 }, { "epoch": 3.18, "learning_rate": 0.0002, "loss": 2.8828, "step": 7410 }, { "epoch": 3.18, "learning_rate": 0.0002, "loss": 2.877, "step": 7415 }, { "epoch": 3.19, "learning_rate": 0.0002, "loss": 2.9114, "step": 7420 }, { "epoch": 3.19, "eval_loss": 2.892573595046997, "eval_runtime": 5.7812, "eval_samples_per_second": 1708.454, "eval_steps_per_second": 13.492, "eval_top3_3_weighted_f1_score ": 0.418720602900631, "eval_top_1_macro_f1_score": 0.05863396154566181, "eval_top_1_weighted_f1score": 0.1981049993087968, "eval_top_3_macro_f1_score": 0.15423932435130192, "step": 7424 }, { "epoch": 3.19, "learning_rate": 0.0002, "loss": 2.883, "step": 7425 }, { "epoch": 3.19, "learning_rate": 0.0002, "loss": 2.8825, "step": 7430 }, { "epoch": 3.19, "learning_rate": 0.0002, "loss": 2.8918, "step": 7435 }, { "epoch": 3.19, "learning_rate": 0.0002, "loss": 2.8921, "step": 7440 }, { "epoch": 3.2, "learning_rate": 0.0002, "loss": 2.8748, "step": 7445 }, { "epoch": 3.2, "learning_rate": 0.0002, "loss": 2.8846, "step": 7450 }, { "epoch": 3.2, "learning_rate": 0.0002, "loss": 2.8992, "step": 7455 }, { "epoch": 3.2, "learning_rate": 0.0002, "loss": 2.8784, "step": 7460 }, { "epoch": 3.21, "learning_rate": 0.0002, "loss": 2.8865, "step": 7465 }, { "epoch": 3.21, "learning_rate": 0.0002, "loss": 2.8897, "step": 7470 }, { "epoch": 3.21, "learning_rate": 0.0002, "loss": 2.8738, "step": 7475 }, { "epoch": 3.21, "learning_rate": 0.0002, "loss": 2.8643, "step": 7480 }, { "epoch": 3.21, "learning_rate": 0.0002, "loss": 2.8745, "step": 7485 }, { "epoch": 3.22, "eval_loss": 2.8910961151123047, "eval_runtime": 5.3508, "eval_samples_per_second": 1845.882, "eval_steps_per_second": 14.577, "eval_top3_3_weighted_f1_score ": 0.4186955517891209, "eval_top_1_macro_f1_score": 0.0612936693324126, "eval_top_1_weighted_f1score": 0.19950173195710216, "eval_top_3_macro_f1_score": 0.15471827358614995, "step": 7488 }, { "epoch": 3.22, "learning_rate": 0.0002, "loss": 2.8887, "step": 7490 }, { "epoch": 3.22, "learning_rate": 0.0002, "loss": 2.891, "step": 7495 }, { "epoch": 3.22, "learning_rate": 0.0002, "loss": 2.8935, "step": 7500 }, { "epoch": 3.22, "learning_rate": 0.0002, "loss": 2.8885, "step": 7505 }, { "epoch": 3.22, "learning_rate": 0.0002, "loss": 2.8802, "step": 7510 }, { "epoch": 3.23, "learning_rate": 0.0002, "loss": 2.892, "step": 7515 }, { "epoch": 3.23, "learning_rate": 0.0002, "loss": 2.9136, "step": 7520 }, { "epoch": 3.23, "learning_rate": 0.0002, "loss": 2.8675, "step": 7525 }, { "epoch": 3.23, "learning_rate": 0.0002, "loss": 2.8879, "step": 7530 }, { "epoch": 3.24, "learning_rate": 0.0002, "loss": 2.9036, "step": 7535 }, { "epoch": 3.24, "learning_rate": 0.0002, "loss": 2.8844, "step": 7540 }, { "epoch": 3.24, "learning_rate": 0.0002, "loss": 2.8935, "step": 7545 }, { "epoch": 3.24, "learning_rate": 0.0002, "loss": 2.8796, "step": 7550 }, { "epoch": 3.24, "eval_loss": 2.8844926357269287, "eval_runtime": 5.3149, "eval_samples_per_second": 1858.373, "eval_steps_per_second": 14.676, "eval_top3_3_weighted_f1_score ": 0.4227162451195223, "eval_top_1_macro_f1_score": 0.06140110811329589, "eval_top_1_weighted_f1score": 0.20275439396155656, "eval_top_3_macro_f1_score": 0.16255781638160421, "step": 7552 }, { "epoch": 3.24, "learning_rate": 0.0002, "loss": 2.881, "step": 7555 }, { "epoch": 3.25, "learning_rate": 0.0002, "loss": 2.8826, "step": 7560 }, { "epoch": 3.25, "learning_rate": 0.0002, "loss": 2.885, "step": 7565 }, { "epoch": 3.25, "learning_rate": 0.0002, "loss": 2.8933, "step": 7570 }, { "epoch": 3.25, "learning_rate": 0.0002, "loss": 2.8866, "step": 7575 }, { "epoch": 3.25, "learning_rate": 0.0002, "loss": 2.902, "step": 7580 }, { "epoch": 3.26, "learning_rate": 0.0002, "loss": 2.8856, "step": 7585 }, { "epoch": 3.26, "learning_rate": 0.0002, "loss": 2.8819, "step": 7590 }, { "epoch": 3.26, "learning_rate": 0.0002, "loss": 2.877, "step": 7595 }, { "epoch": 3.26, "learning_rate": 0.0002, "loss": 2.8728, "step": 7600 }, { "epoch": 3.27, "learning_rate": 0.0002, "loss": 2.9072, "step": 7605 }, { "epoch": 3.27, "learning_rate": 0.0002, "loss": 2.8713, "step": 7610 }, { "epoch": 3.27, "learning_rate": 0.0002, "loss": 2.8873, "step": 7615 }, { "epoch": 3.27, "eval_loss": 2.8841776847839355, "eval_runtime": 5.327, "eval_samples_per_second": 1854.134, "eval_steps_per_second": 14.642, "eval_top3_3_weighted_f1_score ": 0.4187903482484385, "eval_top_1_macro_f1_score": 0.05984177228173996, "eval_top_1_weighted_f1score": 0.20498038698864343, "eval_top_3_macro_f1_score": 0.15411286732053833, "step": 7616 }, { "epoch": 3.27, "learning_rate": 0.0002, "loss": 2.8939, "step": 7620 }, { "epoch": 3.27, "learning_rate": 0.0002, "loss": 2.8701, "step": 7625 }, { "epoch": 3.28, "learning_rate": 0.0002, "loss": 2.8888, "step": 7630 }, { "epoch": 3.28, "learning_rate": 0.0002, "loss": 2.8777, "step": 7635 }, { "epoch": 3.28, "learning_rate": 0.0002, "loss": 2.8815, "step": 7640 }, { "epoch": 3.28, "learning_rate": 0.0002, "loss": 2.8801, "step": 7645 }, { "epoch": 3.28, "learning_rate": 0.0002, "loss": 2.8839, "step": 7650 }, { "epoch": 3.29, "learning_rate": 0.0002, "loss": 2.8764, "step": 7655 }, { "epoch": 3.29, "learning_rate": 0.0002, "loss": 2.8993, "step": 7660 }, { "epoch": 3.29, "learning_rate": 0.0002, "loss": 2.8922, "step": 7665 }, { "epoch": 3.29, "learning_rate": 0.0002, "loss": 2.8862, "step": 7670 }, { "epoch": 3.3, "learning_rate": 0.0002, "loss": 2.8891, "step": 7675 }, { "epoch": 3.3, "learning_rate": 0.0002, "loss": 2.8825, "step": 7680 }, { "epoch": 3.3, "eval_loss": 2.894038438796997, "eval_runtime": 5.3658, "eval_samples_per_second": 1840.748, "eval_steps_per_second": 14.537, "eval_top3_3_weighted_f1_score ": 0.4202346727377731, "eval_top_1_macro_f1_score": 0.06032218990545729, "eval_top_1_weighted_f1score": 0.2011031684607638, "eval_top_3_macro_f1_score": 0.15071342049635814, "step": 7680 }, { "epoch": 3.3, "learning_rate": 0.0002, "loss": 2.898, "step": 7685 }, { "epoch": 3.3, "learning_rate": 0.0002, "loss": 2.8607, "step": 7690 }, { "epoch": 3.3, "learning_rate": 0.0002, "loss": 2.8781, "step": 7695 }, { "epoch": 3.31, "learning_rate": 0.0002, "loss": 2.874, "step": 7700 }, { "epoch": 3.31, "learning_rate": 0.0002, "loss": 2.8886, "step": 7705 }, { "epoch": 3.31, "learning_rate": 0.0002, "loss": 2.8876, "step": 7710 }, { "epoch": 3.31, "learning_rate": 0.0002, "loss": 2.8901, "step": 7715 }, { "epoch": 3.31, "learning_rate": 0.0002, "loss": 2.8837, "step": 7720 }, { "epoch": 3.32, "learning_rate": 0.0002, "loss": 2.885, "step": 7725 }, { "epoch": 3.32, "learning_rate": 0.0002, "loss": 2.8722, "step": 7730 }, { "epoch": 3.32, "learning_rate": 0.0002, "loss": 2.8758, "step": 7735 }, { "epoch": 3.32, "learning_rate": 0.0002, "loss": 2.9031, "step": 7740 }, { "epoch": 3.32, "eval_loss": 2.882852792739868, "eval_runtime": 5.3107, "eval_samples_per_second": 1859.817, "eval_steps_per_second": 14.687, "eval_top3_3_weighted_f1_score ": 0.4190313630044371, "eval_top_1_macro_f1_score": 0.061544251912979984, "eval_top_1_weighted_f1score": 0.205241513146368, "eval_top_3_macro_f1_score": 0.14671600365597517, "step": 7744 }, { "epoch": 3.33, "learning_rate": 0.0002, "loss": 2.8854, "step": 7745 }, { "epoch": 3.33, "learning_rate": 0.0002, "loss": 2.8721, "step": 7750 }, { "epoch": 3.33, "learning_rate": 0.0002, "loss": 2.8899, "step": 7755 }, { "epoch": 3.33, "learning_rate": 0.0002, "loss": 2.8927, "step": 7760 }, { "epoch": 3.33, "learning_rate": 0.0002, "loss": 2.8745, "step": 7765 }, { "epoch": 3.34, "learning_rate": 0.0002, "loss": 2.8924, "step": 7770 }, { "epoch": 3.34, "learning_rate": 0.0002, "loss": 2.8806, "step": 7775 }, { "epoch": 3.34, "learning_rate": 0.0002, "loss": 2.8661, "step": 7780 }, { "epoch": 3.34, "learning_rate": 0.0002, "loss": 2.9012, "step": 7785 }, { "epoch": 3.34, "learning_rate": 0.0002, "loss": 2.8762, "step": 7790 }, { "epoch": 3.35, "learning_rate": 0.0002, "loss": 2.8795, "step": 7795 }, { "epoch": 3.35, "learning_rate": 0.0002, "loss": 2.8862, "step": 7800 }, { "epoch": 3.35, "learning_rate": 0.0002, "loss": 2.8786, "step": 7805 }, { "epoch": 3.35, "eval_loss": 2.8905601501464844, "eval_runtime": 5.2935, "eval_samples_per_second": 1865.862, "eval_steps_per_second": 14.735, "eval_top3_3_weighted_f1_score ": 0.41593301042000663, "eval_top_1_macro_f1_score": 0.06133824832129849, "eval_top_1_weighted_f1score": 0.20215134462916098, "eval_top_3_macro_f1_score": 0.15742207883543302, "step": 7808 }, { "epoch": 3.35, "learning_rate": 0.0002, "loss": 2.8712, "step": 7810 }, { "epoch": 3.36, "learning_rate": 0.0002, "loss": 2.8843, "step": 7815 }, { "epoch": 3.36, "learning_rate": 0.0002, "loss": 2.884, "step": 7820 }, { "epoch": 3.36, "learning_rate": 0.0002, "loss": 2.898, "step": 7825 }, { "epoch": 3.36, "learning_rate": 0.0002, "loss": 2.8884, "step": 7830 }, { "epoch": 3.36, "learning_rate": 0.0002, "loss": 2.8606, "step": 7835 }, { "epoch": 3.37, "learning_rate": 0.0002, "loss": 2.8791, "step": 7840 }, { "epoch": 3.37, "learning_rate": 0.0002, "loss": 2.8691, "step": 7845 }, { "epoch": 3.37, "learning_rate": 0.0002, "loss": 2.895, "step": 7850 }, { "epoch": 3.37, "learning_rate": 0.0002, "loss": 2.8872, "step": 7855 }, { "epoch": 3.37, "learning_rate": 0.0002, "loss": 2.8708, "step": 7860 }, { "epoch": 3.38, "learning_rate": 0.0002, "loss": 2.9043, "step": 7865 }, { "epoch": 3.38, "learning_rate": 0.0002, "loss": 2.8964, "step": 7870 }, { "epoch": 3.38, "eval_loss": 2.88686466217041, "eval_runtime": 31.8739, "eval_samples_per_second": 309.878, "eval_steps_per_second": 2.447, "eval_top3_3_weighted_f1_score ": 0.4230829780304739, "eval_top_1_macro_f1_score": 0.054228979253503506, "eval_top_1_weighted_f1score": 0.19922267969873778, "eval_top_3_macro_f1_score": 0.15943393088965593, "step": 7872 }, { "epoch": 3.38, "learning_rate": 0.0002, "loss": 2.8811, "step": 7875 }, { "epoch": 3.38, "learning_rate": 0.0002, "loss": 2.8766, "step": 7880 }, { "epoch": 3.39, "learning_rate": 0.0002, "loss": 2.8873, "step": 7885 }, { "epoch": 3.39, "learning_rate": 0.0002, "loss": 2.8824, "step": 7890 }, { "epoch": 3.39, "learning_rate": 0.0002, "loss": 2.8831, "step": 7895 }, { "epoch": 3.39, "learning_rate": 0.0002, "loss": 2.8913, "step": 7900 }, { "epoch": 3.39, "learning_rate": 0.0002, "loss": 2.9162, "step": 7905 }, { "epoch": 3.4, "learning_rate": 0.0002, "loss": 2.8883, "step": 7910 }, { "epoch": 3.4, "learning_rate": 0.0002, "loss": 2.8627, "step": 7915 }, { "epoch": 3.4, "learning_rate": 0.0002, "loss": 2.8923, "step": 7920 }, { "epoch": 3.4, "learning_rate": 0.0002, "loss": 2.9033, "step": 7925 }, { "epoch": 3.4, "learning_rate": 0.0002, "loss": 2.8813, "step": 7930 }, { "epoch": 3.41, "learning_rate": 0.0002, "loss": 2.9054, "step": 7935 }, { "epoch": 3.41, "eval_loss": 2.885826587677002, "eval_runtime": 5.3867, "eval_samples_per_second": 1833.603, "eval_steps_per_second": 14.48, "eval_top3_3_weighted_f1_score ": 0.42314543248296244, "eval_top_1_macro_f1_score": 0.0681717331051118, "eval_top_1_weighted_f1score": 0.19846969594949262, "eval_top_3_macro_f1_score": 0.15816567728101838, "step": 7936 }, { "epoch": 3.41, "learning_rate": 0.0002, "loss": 2.8999, "step": 7940 }, { "epoch": 3.41, "learning_rate": 0.0002, "loss": 2.8825, "step": 7945 }, { "epoch": 3.41, "learning_rate": 0.0002, "loss": 2.8626, "step": 7950 }, { "epoch": 3.42, "learning_rate": 0.0002, "loss": 2.882, "step": 7955 }, { "epoch": 3.42, "learning_rate": 0.0002, "loss": 2.8873, "step": 7960 }, { "epoch": 3.42, "learning_rate": 0.0002, "loss": 2.8941, "step": 7965 }, { "epoch": 3.42, "learning_rate": 0.0002, "loss": 2.9129, "step": 7970 }, { "epoch": 3.42, "learning_rate": 0.0002, "loss": 2.8854, "step": 7975 }, { "epoch": 3.43, "learning_rate": 0.0002, "loss": 2.8872, "step": 7980 }, { "epoch": 3.43, "learning_rate": 0.0002, "loss": 2.8721, "step": 7985 }, { "epoch": 3.43, "learning_rate": 0.0002, "loss": 2.887, "step": 7990 }, { "epoch": 3.43, "learning_rate": 0.0002, "loss": 2.8783, "step": 7995 }, { "epoch": 3.43, "learning_rate": 0.0002, "loss": 2.8847, "step": 8000 }, { "epoch": 3.43, "eval_loss": 2.8894846439361572, "eval_runtime": 5.3414, "eval_samples_per_second": 1849.136, "eval_steps_per_second": 14.603, "eval_top3_3_weighted_f1_score ": 0.4191980028620256, "eval_top_1_macro_f1_score": 0.06801016994886046, "eval_top_1_weighted_f1score": 0.2038280543824952, "eval_top_3_macro_f1_score": 0.16341329875150717, "step": 8000 }, { "epoch": 3.44, "learning_rate": 0.0002, "loss": 2.902, "step": 8005 }, { "epoch": 3.44, "learning_rate": 0.0002, "loss": 2.8807, "step": 8010 }, { "epoch": 3.44, "learning_rate": 0.0002, "loss": 2.9025, "step": 8015 }, { "epoch": 3.44, "learning_rate": 0.0002, "loss": 2.8807, "step": 8020 }, { "epoch": 3.45, "learning_rate": 0.0002, "loss": 2.8845, "step": 8025 }, { "epoch": 3.45, "learning_rate": 0.0002, "loss": 2.9069, "step": 8030 }, { "epoch": 3.45, "learning_rate": 0.0002, "loss": 2.8704, "step": 8035 }, { "epoch": 3.45, "learning_rate": 0.0002, "loss": 2.8993, "step": 8040 }, { "epoch": 3.45, "learning_rate": 0.0002, "loss": 2.8888, "step": 8045 }, { "epoch": 3.46, "learning_rate": 0.0002, "loss": 2.8918, "step": 8050 }, { "epoch": 3.46, "learning_rate": 0.0002, "loss": 2.9014, "step": 8055 }, { "epoch": 3.46, "learning_rate": 0.0002, "loss": 2.8809, "step": 8060 }, { "epoch": 3.46, "eval_loss": 2.883298873901367, "eval_runtime": 5.3415, "eval_samples_per_second": 1849.094, "eval_steps_per_second": 14.603, "eval_top3_3_weighted_f1_score ": 0.4232752392927697, "eval_top_1_macro_f1_score": 0.062447123808691926, "eval_top_1_weighted_f1score": 0.20257688143489982, "eval_top_3_macro_f1_score": 0.16734324172721946, "step": 8064 }, { "epoch": 3.46, "learning_rate": 0.0002, "loss": 2.8984, "step": 8065 }, { "epoch": 3.46, "learning_rate": 0.0002, "loss": 2.8893, "step": 8070 }, { "epoch": 3.47, "learning_rate": 0.0002, "loss": 2.8649, "step": 8075 }, { "epoch": 3.47, "learning_rate": 0.0002, "loss": 2.8817, "step": 8080 }, { "epoch": 3.47, "learning_rate": 0.0002, "loss": 2.8859, "step": 8085 }, { "epoch": 3.47, "learning_rate": 0.0002, "loss": 2.8785, "step": 8090 }, { "epoch": 3.48, "learning_rate": 0.0002, "loss": 2.8918, "step": 8095 }, { "epoch": 3.48, "learning_rate": 0.0002, "loss": 2.8707, "step": 8100 }, { "epoch": 3.48, "learning_rate": 0.0002, "loss": 2.8799, "step": 8105 }, { "epoch": 3.48, "learning_rate": 0.0002, "loss": 2.8773, "step": 8110 }, { "epoch": 3.48, "learning_rate": 0.0002, "loss": 2.8845, "step": 8115 }, { "epoch": 3.49, "learning_rate": 0.0002, "loss": 2.8925, "step": 8120 }, { "epoch": 3.49, "learning_rate": 0.0002, "loss": 2.8915, "step": 8125 }, { "epoch": 3.49, "eval_loss": 2.885603427886963, "eval_runtime": 5.4426, "eval_samples_per_second": 1814.745, "eval_steps_per_second": 14.331, "eval_top3_3_weighted_f1_score ": 0.4233867206076715, "eval_top_1_macro_f1_score": 0.05447611955443182, "eval_top_1_weighted_f1score": 0.20162142002690994, "eval_top_3_macro_f1_score": 0.16188996546475656, "step": 8128 }, { "epoch": 3.49, "learning_rate": 0.0002, "loss": 2.8799, "step": 8130 }, { "epoch": 3.49, "learning_rate": 0.0002, "loss": 2.8855, "step": 8135 }, { "epoch": 3.49, "learning_rate": 0.0002, "loss": 2.889, "step": 8140 }, { "epoch": 3.5, "learning_rate": 0.0002, "loss": 2.8756, "step": 8145 }, { "epoch": 3.5, "learning_rate": 0.0002, "loss": 2.8852, "step": 8150 }, { "epoch": 3.5, "learning_rate": 0.0002, "loss": 2.9, "step": 8155 }, { "epoch": 3.5, "learning_rate": 0.0002, "loss": 2.8899, "step": 8160 }, { "epoch": 3.51, "learning_rate": 0.0002, "loss": 2.8748, "step": 8165 }, { "epoch": 3.51, "learning_rate": 0.0002, "loss": 2.8777, "step": 8170 }, { "epoch": 3.51, "learning_rate": 0.0002, "loss": 2.8726, "step": 8175 }, { "epoch": 3.51, "learning_rate": 0.0002, "loss": 2.8666, "step": 8180 }, { "epoch": 3.51, "learning_rate": 0.0002, "loss": 2.8982, "step": 8185 }, { "epoch": 3.52, "learning_rate": 0.0002, "loss": 2.8709, "step": 8190 }, { "epoch": 3.52, "eval_loss": 2.8865466117858887, "eval_runtime": 5.3554, "eval_samples_per_second": 1844.29, "eval_steps_per_second": 14.565, "eval_top3_3_weighted_f1_score ": 0.4203827795263443, "eval_top_1_macro_f1_score": 0.06852860836345402, "eval_top_1_weighted_f1score": 0.20701276419199244, "eval_top_3_macro_f1_score": 0.15979011951298183, "step": 8192 }, { "epoch": 3.52, "learning_rate": 0.0002, "loss": 2.8828, "step": 8195 }, { "epoch": 3.52, "learning_rate": 0.0002, "loss": 2.8876, "step": 8200 }, { "epoch": 3.52, "learning_rate": 0.0002, "loss": 2.8804, "step": 8205 }, { "epoch": 3.53, "learning_rate": 0.0002, "loss": 2.8728, "step": 8210 }, { "epoch": 3.53, "learning_rate": 0.0002, "loss": 2.8694, "step": 8215 }, { "epoch": 3.53, "learning_rate": 0.0002, "loss": 2.8962, "step": 8220 }, { "epoch": 3.53, "learning_rate": 0.0002, "loss": 2.8765, "step": 8225 }, { "epoch": 3.53, "learning_rate": 0.0002, "loss": 2.8933, "step": 8230 }, { "epoch": 3.54, "learning_rate": 0.0002, "loss": 2.869, "step": 8235 }, { "epoch": 3.54, "learning_rate": 0.0002, "loss": 2.9021, "step": 8240 }, { "epoch": 3.54, "learning_rate": 0.0002, "loss": 2.8953, "step": 8245 }, { "epoch": 3.54, "learning_rate": 0.0002, "loss": 2.8961, "step": 8250 }, { "epoch": 3.54, "learning_rate": 0.0002, "loss": 2.8899, "step": 8255 }, { "epoch": 3.54, "eval_loss": 2.88881254196167, "eval_runtime": 10.47, "eval_samples_per_second": 943.365, "eval_steps_per_second": 7.45, "eval_top3_3_weighted_f1_score ": 0.421615034340461, "eval_top_1_macro_f1_score": 0.05707255432075041, "eval_top_1_weighted_f1score": 0.20236393382029402, "eval_top_3_macro_f1_score": 0.16403338219761618, "step": 8256 }, { "epoch": 3.55, "learning_rate": 0.0002, "loss": 2.8778, "step": 8260 }, { "epoch": 3.55, "learning_rate": 0.0002, "loss": 2.8824, "step": 8265 }, { "epoch": 3.55, "learning_rate": 0.0002, "loss": 2.8672, "step": 8270 }, { "epoch": 3.55, "learning_rate": 0.0002, "loss": 2.8754, "step": 8275 }, { "epoch": 3.56, "learning_rate": 0.0002, "loss": 2.8962, "step": 8280 }, { "epoch": 3.56, "learning_rate": 0.0002, "loss": 2.8939, "step": 8285 }, { "epoch": 3.56, "learning_rate": 0.0002, "loss": 2.8706, "step": 8290 }, { "epoch": 3.56, "learning_rate": 0.0002, "loss": 2.8633, "step": 8295 }, { "epoch": 3.56, "learning_rate": 0.0002, "loss": 2.8647, "step": 8300 }, { "epoch": 3.57, "learning_rate": 0.0002, "loss": 2.8836, "step": 8305 }, { "epoch": 3.57, "learning_rate": 0.0002, "loss": 2.894, "step": 8310 }, { "epoch": 3.57, "learning_rate": 0.0002, "loss": 2.887, "step": 8315 }, { "epoch": 3.57, "learning_rate": 0.0002, "loss": 2.8758, "step": 8320 }, { "epoch": 3.57, "eval_loss": 2.882431983947754, "eval_runtime": 5.7726, "eval_samples_per_second": 1711.007, "eval_steps_per_second": 13.512, "eval_top3_3_weighted_f1_score ": 0.4276064007964306, "eval_top_1_macro_f1_score": 0.0631328266883076, "eval_top_1_weighted_f1score": 0.204925576803142, "eval_top_3_macro_f1_score": 0.16421739218602505, "step": 8320 }, { "epoch": 3.57, "learning_rate": 0.0002, "loss": 2.8648, "step": 8325 }, { "epoch": 3.58, "learning_rate": 0.0002, "loss": 2.876, "step": 8330 }, { "epoch": 3.58, "learning_rate": 0.0002, "loss": 2.8822, "step": 8335 }, { "epoch": 3.58, "learning_rate": 0.0002, "loss": 2.8788, "step": 8340 }, { "epoch": 3.58, "learning_rate": 0.0002, "loss": 2.8747, "step": 8345 }, { "epoch": 3.59, "learning_rate": 0.0002, "loss": 2.8772, "step": 8350 }, { "epoch": 3.59, "learning_rate": 0.0002, "loss": 2.8802, "step": 8355 }, { "epoch": 3.59, "learning_rate": 0.0002, "loss": 2.8786, "step": 8360 }, { "epoch": 3.59, "learning_rate": 0.0002, "loss": 2.8756, "step": 8365 }, { "epoch": 3.59, "learning_rate": 0.0002, "loss": 2.8815, "step": 8370 }, { "epoch": 3.6, "learning_rate": 0.0002, "loss": 2.8888, "step": 8375 }, { "epoch": 3.6, "learning_rate": 0.0002, "loss": 2.8708, "step": 8380 }, { "epoch": 3.6, "eval_loss": 2.8809597492218018, "eval_runtime": 5.341, "eval_samples_per_second": 1849.276, "eval_steps_per_second": 14.604, "eval_top3_3_weighted_f1_score ": 0.4246114037313046, "eval_top_1_macro_f1_score": 0.05977962775940492, "eval_top_1_weighted_f1score": 0.19956349021000716, "eval_top_3_macro_f1_score": 0.1684172253348305, "step": 8384 }, { "epoch": 3.6, "learning_rate": 0.0002, "loss": 2.8783, "step": 8385 }, { "epoch": 3.6, "learning_rate": 0.0002, "loss": 2.8975, "step": 8390 }, { "epoch": 3.6, "learning_rate": 0.0002, "loss": 2.8774, "step": 8395 }, { "epoch": 3.61, "learning_rate": 0.0002, "loss": 2.8734, "step": 8400 }, { "epoch": 3.61, "learning_rate": 0.0002, "loss": 2.8875, "step": 8405 }, { "epoch": 3.61, "learning_rate": 0.0002, "loss": 2.8743, "step": 8410 }, { "epoch": 3.61, "learning_rate": 0.0002, "loss": 2.8687, "step": 8415 }, { "epoch": 3.62, "learning_rate": 0.0002, "loss": 2.8858, "step": 8420 }, { "epoch": 3.62, "learning_rate": 0.0002, "loss": 2.898, "step": 8425 }, { "epoch": 3.62, "learning_rate": 0.0002, "loss": 2.9, "step": 8430 }, { "epoch": 3.62, "learning_rate": 0.0002, "loss": 2.8913, "step": 8435 }, { "epoch": 3.62, "learning_rate": 0.0002, "loss": 2.8613, "step": 8440 }, { "epoch": 3.63, "learning_rate": 0.0002, "loss": 2.9039, "step": 8445 }, { "epoch": 3.63, "eval_loss": 2.879713535308838, "eval_runtime": 5.3117, "eval_samples_per_second": 1859.497, "eval_steps_per_second": 14.685, "eval_top3_3_weighted_f1_score ": 0.422907351453079, "eval_top_1_macro_f1_score": 0.061077590558727204, "eval_top_1_weighted_f1score": 0.2037311984234322, "eval_top_3_macro_f1_score": 0.15977641394382092, "step": 8448 }, { "epoch": 3.63, "learning_rate": 0.0002, "loss": 2.8776, "step": 8450 }, { "epoch": 3.63, "learning_rate": 0.0002, "loss": 2.8751, "step": 8455 }, { "epoch": 3.63, "learning_rate": 0.0002, "loss": 2.8622, "step": 8460 }, { "epoch": 3.63, "learning_rate": 0.0002, "loss": 2.8913, "step": 8465 }, { "epoch": 3.64, "learning_rate": 0.0002, "loss": 2.87, "step": 8470 }, { "epoch": 3.64, "learning_rate": 0.0002, "loss": 2.8828, "step": 8475 }, { "epoch": 3.64, "learning_rate": 0.0002, "loss": 2.875, "step": 8480 }, { "epoch": 3.64, "learning_rate": 0.0002, "loss": 2.8747, "step": 8485 }, { "epoch": 3.65, "learning_rate": 0.0002, "loss": 2.8767, "step": 8490 }, { "epoch": 3.65, "learning_rate": 0.0002, "loss": 2.8958, "step": 8495 }, { "epoch": 3.65, "learning_rate": 0.0002, "loss": 2.8598, "step": 8500 }, { "epoch": 3.65, "learning_rate": 0.0002, "loss": 2.8764, "step": 8505 }, { "epoch": 3.65, "learning_rate": 0.0002, "loss": 2.9056, "step": 8510 }, { "epoch": 3.65, "eval_loss": 2.879021644592285, "eval_runtime": 5.3893, "eval_samples_per_second": 1832.701, "eval_steps_per_second": 14.473, "eval_top3_3_weighted_f1_score ": 0.42083999002881417, "eval_top_1_macro_f1_score": 0.05751107770938145, "eval_top_1_weighted_f1score": 0.20063172811985464, "eval_top_3_macro_f1_score": 0.15231224977727817, "step": 8512 }, { "epoch": 3.66, "learning_rate": 0.0002, "loss": 2.8772, "step": 8515 }, { "epoch": 3.66, "learning_rate": 0.0002, "loss": 2.858, "step": 8520 }, { "epoch": 3.66, "learning_rate": 0.0002, "loss": 2.8674, "step": 8525 }, { "epoch": 3.66, "learning_rate": 0.0002, "loss": 2.8663, "step": 8530 }, { "epoch": 3.66, "learning_rate": 0.0002, "loss": 2.8912, "step": 8535 }, { "epoch": 3.67, "learning_rate": 0.0002, "loss": 2.8946, "step": 8540 }, { "epoch": 3.67, "learning_rate": 0.0002, "loss": 2.8685, "step": 8545 }, { "epoch": 3.67, "learning_rate": 0.0002, "loss": 2.8856, "step": 8550 }, { "epoch": 3.67, "learning_rate": 0.0002, "loss": 2.8764, "step": 8555 }, { "epoch": 3.68, "learning_rate": 0.0002, "loss": 2.8753, "step": 8560 }, { "epoch": 3.68, "learning_rate": 0.0002, "loss": 2.8829, "step": 8565 }, { "epoch": 3.68, "learning_rate": 0.0002, "loss": 2.872, "step": 8570 }, { "epoch": 3.68, "learning_rate": 0.0002, "loss": 2.8762, "step": 8575 }, { "epoch": 3.68, "eval_loss": 2.8791356086730957, "eval_runtime": 5.5245, "eval_samples_per_second": 1787.855, "eval_steps_per_second": 14.119, "eval_top3_3_weighted_f1_score ": 0.4259180292541815, "eval_top_1_macro_f1_score": 0.0614787145421599, "eval_top_1_weighted_f1score": 0.20327243243179235, "eval_top_3_macro_f1_score": 0.15667803624411272, "step": 8576 }, { "epoch": 3.68, "learning_rate": 0.0002, "loss": 2.912, "step": 8580 }, { "epoch": 3.69, "learning_rate": 0.0002, "loss": 2.8959, "step": 8585 }, { "epoch": 3.69, "learning_rate": 0.0002, "loss": 2.8912, "step": 8590 }, { "epoch": 3.69, "learning_rate": 0.0002, "loss": 2.871, "step": 8595 }, { "epoch": 3.69, "learning_rate": 0.0002, "loss": 2.8803, "step": 8600 }, { "epoch": 3.69, "learning_rate": 0.0002, "loss": 2.8931, "step": 8605 }, { "epoch": 3.7, "learning_rate": 0.0002, "loss": 2.8736, "step": 8610 }, { "epoch": 3.7, "learning_rate": 0.0002, "loss": 2.8696, "step": 8615 }, { "epoch": 3.7, "learning_rate": 0.0002, "loss": 2.8816, "step": 8620 }, { "epoch": 3.7, "learning_rate": 0.0002, "loss": 2.8843, "step": 8625 }, { "epoch": 3.71, "learning_rate": 0.0002, "loss": 2.8798, "step": 8630 }, { "epoch": 3.71, "learning_rate": 0.0002, "loss": 2.8836, "step": 8635 }, { "epoch": 3.71, "learning_rate": 0.0002, "loss": 2.8925, "step": 8640 }, { "epoch": 3.71, "eval_loss": 2.878877878189087, "eval_runtime": 5.3054, "eval_samples_per_second": 1861.705, "eval_steps_per_second": 14.702, "eval_top3_3_weighted_f1_score ": 0.4211972428050605, "eval_top_1_macro_f1_score": 0.05865004403272165, "eval_top_1_weighted_f1score": 0.20404542161091987, "eval_top_3_macro_f1_score": 0.15737159298688402, "step": 8640 }, { "epoch": 3.71, "learning_rate": 0.0002, "loss": 2.8764, "step": 8645 }, { "epoch": 3.71, "learning_rate": 0.0002, "loss": 2.8805, "step": 8650 }, { "epoch": 3.72, "learning_rate": 0.0002, "loss": 2.8693, "step": 8655 }, { "epoch": 3.72, "learning_rate": 0.0002, "loss": 2.8963, "step": 8660 }, { "epoch": 3.72, "learning_rate": 0.0002, "loss": 2.8858, "step": 8665 }, { "epoch": 3.72, "learning_rate": 0.0002, "loss": 2.8833, "step": 8670 }, { "epoch": 3.72, "learning_rate": 0.0002, "loss": 2.8643, "step": 8675 }, { "epoch": 3.73, "learning_rate": 0.0002, "loss": 2.8789, "step": 8680 }, { "epoch": 3.73, "learning_rate": 0.0002, "loss": 2.8966, "step": 8685 }, { "epoch": 3.73, "learning_rate": 0.0002, "loss": 2.878, "step": 8690 }, { "epoch": 3.73, "learning_rate": 0.0002, "loss": 2.8832, "step": 8695 }, { "epoch": 3.74, "learning_rate": 0.0002, "loss": 2.8737, "step": 8700 }, { "epoch": 3.74, "eval_loss": 2.8780808448791504, "eval_runtime": 5.458, "eval_samples_per_second": 1809.643, "eval_steps_per_second": 14.291, "eval_top3_3_weighted_f1_score ": 0.42025603415139495, "eval_top_1_macro_f1_score": 0.06226039136025313, "eval_top_1_weighted_f1score": 0.2016970828248235, "eval_top_3_macro_f1_score": 0.16186434784527917, "step": 8704 }, { "epoch": 3.74, "learning_rate": 0.0002, "loss": 2.8802, "step": 8705 }, { "epoch": 3.74, "learning_rate": 0.0002, "loss": 2.899, "step": 8710 }, { "epoch": 3.74, "learning_rate": 0.0002, "loss": 2.8758, "step": 8715 }, { "epoch": 3.74, "learning_rate": 0.0002, "loss": 2.8736, "step": 8720 }, { "epoch": 3.75, "learning_rate": 0.0002, "loss": 2.8747, "step": 8725 }, { "epoch": 3.75, "learning_rate": 0.0002, "loss": 2.8799, "step": 8730 }, { "epoch": 3.75, "learning_rate": 0.0002, "loss": 2.8711, "step": 8735 }, { "epoch": 3.75, "learning_rate": 0.0002, "loss": 2.8604, "step": 8740 }, { "epoch": 3.75, "learning_rate": 0.0002, "loss": 2.8926, "step": 8745 }, { "epoch": 3.76, "learning_rate": 0.0002, "loss": 2.8734, "step": 8750 }, { "epoch": 3.76, "learning_rate": 0.0002, "loss": 2.8876, "step": 8755 }, { "epoch": 3.76, "learning_rate": 0.0002, "loss": 2.8947, "step": 8760 }, { "epoch": 3.76, "learning_rate": 0.0002, "loss": 2.8779, "step": 8765 }, { "epoch": 3.76, "eval_loss": 2.8843703269958496, "eval_runtime": 5.7769, "eval_samples_per_second": 1709.743, "eval_steps_per_second": 13.502, "eval_top3_3_weighted_f1_score ": 0.4199394713414115, "eval_top_1_macro_f1_score": 0.061616820845209974, "eval_top_1_weighted_f1score": 0.20079067358787378, "eval_top_3_macro_f1_score": 0.1595062776335458, "step": 8768 }, { "epoch": 3.77, "learning_rate": 0.0002, "loss": 2.8758, "step": 8770 }, { "epoch": 3.77, "learning_rate": 0.0002, "loss": 2.8699, "step": 8775 }, { "epoch": 3.77, "learning_rate": 0.0002, "loss": 2.8794, "step": 8780 }, { "epoch": 3.77, "learning_rate": 0.0002, "loss": 2.8799, "step": 8785 }, { "epoch": 3.77, "learning_rate": 0.0002, "loss": 2.8858, "step": 8790 }, { "epoch": 3.78, "learning_rate": 0.0002, "loss": 2.8732, "step": 8795 }, { "epoch": 3.78, "learning_rate": 0.0002, "loss": 2.8762, "step": 8800 }, { "epoch": 3.78, "learning_rate": 0.0002, "loss": 2.8577, "step": 8805 }, { "epoch": 3.78, "learning_rate": 0.0002, "loss": 2.874, "step": 8810 }, { "epoch": 3.78, "learning_rate": 0.0002, "loss": 2.9012, "step": 8815 }, { "epoch": 3.79, "learning_rate": 0.0002, "loss": 2.8745, "step": 8820 }, { "epoch": 3.79, "learning_rate": 0.0002, "loss": 2.8711, "step": 8825 }, { "epoch": 3.79, "learning_rate": 0.0002, "loss": 2.8715, "step": 8830 }, { "epoch": 3.79, "eval_loss": 2.886676073074341, "eval_runtime": 5.3544, "eval_samples_per_second": 1844.649, "eval_steps_per_second": 14.567, "eval_top3_3_weighted_f1_score ": 0.4182482749371767, "eval_top_1_macro_f1_score": 0.06164296965374765, "eval_top_1_weighted_f1score": 0.20366121612865182, "eval_top_3_macro_f1_score": 0.157679707876537, "step": 8832 }, { "epoch": 3.79, "learning_rate": 0.0002, "loss": 2.871, "step": 8835 }, { "epoch": 3.8, "learning_rate": 0.0002, "loss": 2.8605, "step": 8840 }, { "epoch": 3.8, "learning_rate": 0.0002, "loss": 2.8915, "step": 8845 }, { "epoch": 3.8, "learning_rate": 0.0002, "loss": 2.8595, "step": 8850 }, { "epoch": 3.8, "learning_rate": 0.0002, "loss": 2.8813, "step": 8855 }, { "epoch": 3.8, "learning_rate": 0.0002, "loss": 2.878, "step": 8860 }, { "epoch": 3.81, "learning_rate": 0.0002, "loss": 2.8922, "step": 8865 }, { "epoch": 3.81, "learning_rate": 0.0002, "loss": 2.8643, "step": 8870 }, { "epoch": 3.81, "learning_rate": 0.0002, "loss": 2.8853, "step": 8875 }, { "epoch": 3.81, "learning_rate": 0.0002, "loss": 2.8897, "step": 8880 }, { "epoch": 3.81, "learning_rate": 0.0002, "loss": 2.8917, "step": 8885 }, { "epoch": 3.82, "learning_rate": 0.0002, "loss": 2.8748, "step": 8890 }, { "epoch": 3.82, "learning_rate": 0.0002, "loss": 2.891, "step": 8895 }, { "epoch": 3.82, "eval_loss": 2.885464668273926, "eval_runtime": 5.294, "eval_samples_per_second": 1865.685, "eval_steps_per_second": 14.734, "eval_top3_3_weighted_f1_score ": 0.42080617949576915, "eval_top_1_macro_f1_score": 0.056517574070011053, "eval_top_1_weighted_f1score": 0.2049458290344821, "eval_top_3_macro_f1_score": 0.15343116095089573, "step": 8896 }, { "epoch": 3.82, "learning_rate": 0.0002, "loss": 2.8686, "step": 8900 }, { "epoch": 3.82, "learning_rate": 0.0002, "loss": 2.8812, "step": 8905 }, { "epoch": 3.83, "learning_rate": 0.0002, "loss": 2.8742, "step": 8910 }, { "epoch": 3.83, "learning_rate": 0.0002, "loss": 2.8812, "step": 8915 }, { "epoch": 3.83, "learning_rate": 0.0002, "loss": 2.888, "step": 8920 }, { "epoch": 3.83, "learning_rate": 0.0002, "loss": 2.887, "step": 8925 }, { "epoch": 3.83, "learning_rate": 0.0002, "loss": 2.8786, "step": 8930 }, { "epoch": 3.84, "learning_rate": 0.0002, "loss": 2.8878, "step": 8935 }, { "epoch": 3.84, "learning_rate": 0.0002, "loss": 2.8726, "step": 8940 }, { "epoch": 3.84, "learning_rate": 0.0002, "loss": 2.8626, "step": 8945 }, { "epoch": 3.84, "learning_rate": 0.0002, "loss": 2.8748, "step": 8950 }, { "epoch": 3.84, "learning_rate": 0.0002, "loss": 2.8728, "step": 8955 }, { "epoch": 3.85, "learning_rate": 0.0002, "loss": 2.8796, "step": 8960 }, { "epoch": 3.85, "eval_loss": 2.880286931991577, "eval_runtime": 5.3448, "eval_samples_per_second": 1847.978, "eval_steps_per_second": 14.594, "eval_top3_3_weighted_f1_score ": 0.4218945381783791, "eval_top_1_macro_f1_score": 0.06158161969228803, "eval_top_1_weighted_f1score": 0.20375764386011855, "eval_top_3_macro_f1_score": 0.1568737935791377, "step": 8960 }, { "epoch": 3.85, "learning_rate": 0.0002, "loss": 2.8809, "step": 8965 }, { "epoch": 3.85, "learning_rate": 0.0002, "loss": 2.8947, "step": 8970 }, { "epoch": 3.85, "learning_rate": 0.0002, "loss": 2.8879, "step": 8975 }, { "epoch": 3.86, "learning_rate": 0.0002, "loss": 2.9108, "step": 8980 }, { "epoch": 3.86, "learning_rate": 0.0002, "loss": 2.8772, "step": 8985 }, { "epoch": 3.86, "learning_rate": 0.0002, "loss": 2.8653, "step": 8990 }, { "epoch": 3.86, "learning_rate": 0.0002, "loss": 2.8982, "step": 8995 }, { "epoch": 3.86, "learning_rate": 0.0002, "loss": 2.8927, "step": 9000 }, { "epoch": 3.87, "learning_rate": 0.0002, "loss": 2.897, "step": 9005 }, { "epoch": 3.87, "learning_rate": 0.0002, "loss": 2.8872, "step": 9010 }, { "epoch": 3.87, "learning_rate": 0.0002, "loss": 2.8918, "step": 9015 }, { "epoch": 3.87, "learning_rate": 0.0002, "loss": 2.8942, "step": 9020 }, { "epoch": 3.87, "eval_loss": 2.879659652709961, "eval_runtime": 5.3314, "eval_samples_per_second": 1852.603, "eval_steps_per_second": 14.63, "eval_top3_3_weighted_f1_score ": 0.42280799021925125, "eval_top_1_macro_f1_score": 0.0616648047561624, "eval_top_1_weighted_f1score": 0.20212917796376262, "eval_top_3_macro_f1_score": 0.1632725285893378, "step": 9024 }, { "epoch": 3.87, "learning_rate": 0.0002, "loss": 2.8579, "step": 9025 }, { "epoch": 3.88, "learning_rate": 0.0002, "loss": 2.8849, "step": 9030 }, { "epoch": 3.88, "learning_rate": 0.0002, "loss": 2.8805, "step": 9035 }, { "epoch": 3.88, "learning_rate": 0.0002, "loss": 2.8848, "step": 9040 }, { "epoch": 3.88, "learning_rate": 0.0002, "loss": 2.8812, "step": 9045 }, { "epoch": 3.89, "learning_rate": 0.0002, "loss": 2.8676, "step": 9050 }, { "epoch": 3.89, "learning_rate": 0.0002, "loss": 2.8795, "step": 9055 }, { "epoch": 3.89, "learning_rate": 0.0002, "loss": 2.8658, "step": 9060 }, { "epoch": 3.89, "learning_rate": 0.0002, "loss": 2.9151, "step": 9065 }, { "epoch": 3.89, "learning_rate": 0.0002, "loss": 2.8678, "step": 9070 }, { "epoch": 3.9, "learning_rate": 0.0002, "loss": 2.864, "step": 9075 }, { "epoch": 3.9, "learning_rate": 0.0002, "loss": 2.8792, "step": 9080 }, { "epoch": 3.9, "learning_rate": 0.0002, "loss": 2.8661, "step": 9085 }, { "epoch": 3.9, "eval_loss": 2.878302812576294, "eval_runtime": 5.3561, "eval_samples_per_second": 1844.062, "eval_steps_per_second": 14.563, "eval_top3_3_weighted_f1_score ": 0.4218642511499269, "eval_top_1_macro_f1_score": 0.06042573034503991, "eval_top_1_weighted_f1score": 0.20167938826181162, "eval_top_3_macro_f1_score": 0.15948181652047955, "step": 9088 }, { "epoch": 3.9, "learning_rate": 0.0002, "loss": 2.8864, "step": 9090 }, { "epoch": 3.9, "learning_rate": 0.0002, "loss": 2.864, "step": 9095 }, { "epoch": 3.91, "learning_rate": 0.0002, "loss": 2.8964, "step": 9100 }, { "epoch": 3.91, "learning_rate": 0.0002, "loss": 2.8743, "step": 9105 }, { "epoch": 3.91, "learning_rate": 0.0002, "loss": 2.8914, "step": 9110 }, { "epoch": 3.91, "learning_rate": 0.0002, "loss": 2.8743, "step": 9115 }, { "epoch": 3.92, "learning_rate": 0.0002, "loss": 2.8617, "step": 9120 }, { "epoch": 3.92, "learning_rate": 0.0002, "loss": 2.8921, "step": 9125 }, { "epoch": 3.92, "learning_rate": 0.0002, "loss": 2.8898, "step": 9130 }, { "epoch": 3.92, "learning_rate": 0.0002, "loss": 2.869, "step": 9135 }, { "epoch": 3.92, "learning_rate": 0.0002, "loss": 2.8837, "step": 9140 }, { "epoch": 3.93, "learning_rate": 0.0002, "loss": 2.8513, "step": 9145 }, { "epoch": 3.93, "learning_rate": 0.0002, "loss": 2.8895, "step": 9150 }, { "epoch": 3.93, "eval_loss": 2.87255597114563, "eval_runtime": 5.3612, "eval_samples_per_second": 1842.319, "eval_steps_per_second": 14.549, "eval_top3_3_weighted_f1_score ": 0.4240820028763008, "eval_top_1_macro_f1_score": 0.0618924953113334, "eval_top_1_weighted_f1score": 0.20417364990423967, "eval_top_3_macro_f1_score": 0.16038084358769286, "step": 9152 }, { "epoch": 3.93, "learning_rate": 0.0002, "loss": 2.8824, "step": 9155 }, { "epoch": 3.93, "learning_rate": 0.0002, "loss": 2.8778, "step": 9160 }, { "epoch": 3.93, "learning_rate": 0.0002, "loss": 2.88, "step": 9165 }, { "epoch": 3.94, "learning_rate": 0.0002, "loss": 2.88, "step": 9170 }, { "epoch": 3.94, "learning_rate": 0.0002, "loss": 2.8837, "step": 9175 }, { "epoch": 3.94, "learning_rate": 0.0002, "loss": 2.8647, "step": 9180 }, { "epoch": 3.94, "learning_rate": 0.0002, "loss": 2.8689, "step": 9185 }, { "epoch": 3.95, "learning_rate": 0.0002, "loss": 2.9013, "step": 9190 }, { "epoch": 3.95, "learning_rate": 0.0002, "loss": 2.8834, "step": 9195 }, { "epoch": 3.95, "learning_rate": 0.0002, "loss": 2.8805, "step": 9200 }, { "epoch": 3.95, "learning_rate": 0.0002, "loss": 2.8832, "step": 9205 }, { "epoch": 3.95, "learning_rate": 0.0002, "loss": 2.8956, "step": 9210 }, { "epoch": 3.96, "learning_rate": 0.0002, "loss": 2.8945, "step": 9215 }, { "epoch": 3.96, "eval_loss": 2.8825714588165283, "eval_runtime": 5.3312, "eval_samples_per_second": 1852.669, "eval_steps_per_second": 14.631, "eval_top3_3_weighted_f1_score ": 0.4190590262717862, "eval_top_1_macro_f1_score": 0.06269843842904875, "eval_top_1_weighted_f1score": 0.2079376903452088, "eval_top_3_macro_f1_score": 0.16065438728345302, "step": 9216 }, { "epoch": 3.96, "learning_rate": 0.0002, "loss": 2.8708, "step": 9220 }, { "epoch": 3.96, "learning_rate": 0.0002, "loss": 2.8795, "step": 9225 }, { "epoch": 3.96, "learning_rate": 0.0002, "loss": 2.8845, "step": 9230 }, { "epoch": 3.97, "learning_rate": 0.0002, "loss": 2.869, "step": 9235 }, { "epoch": 3.97, "learning_rate": 0.0002, "loss": 2.8712, "step": 9240 }, { "epoch": 3.97, "learning_rate": 0.0002, "loss": 2.8997, "step": 9245 }, { "epoch": 3.97, "learning_rate": 0.0002, "loss": 2.8736, "step": 9250 }, { "epoch": 3.97, "learning_rate": 0.0002, "loss": 2.8864, "step": 9255 }, { "epoch": 3.98, "learning_rate": 0.0002, "loss": 2.8914, "step": 9260 }, { "epoch": 3.98, "learning_rate": 0.0002, "loss": 2.881, "step": 9265 }, { "epoch": 3.98, "learning_rate": 0.0002, "loss": 2.893, "step": 9270 }, { "epoch": 3.98, "learning_rate": 0.0002, "loss": 2.9017, "step": 9275 }, { "epoch": 3.98, "learning_rate": 0.0002, "loss": 2.879, "step": 9280 }, { "epoch": 3.98, "eval_loss": 2.8842124938964844, "eval_runtime": 5.3466, "eval_samples_per_second": 1847.352, "eval_steps_per_second": 14.589, "eval_top3_3_weighted_f1_score ": 0.42577666209976406, "eval_top_1_macro_f1_score": 0.06101519782251127, "eval_top_1_weighted_f1score": 0.20089361137055256, "eval_top_3_macro_f1_score": 0.1615738776964954, "step": 9280 }, { "epoch": 3.99, "learning_rate": 0.0002, "loss": 2.8843, "step": 9285 }, { "epoch": 3.99, "learning_rate": 0.0002, "loss": 2.8643, "step": 9290 }, { "epoch": 3.99, "learning_rate": 0.0002, "loss": 2.8929, "step": 9295 }, { "epoch": 3.99, "learning_rate": 0.0002, "loss": 2.8731, "step": 9300 }, { "epoch": 4.0, "learning_rate": 0.0002, "loss": 2.8743, "step": 9305 }, { "epoch": 4.0, "learning_rate": 0.0002, "loss": 2.8696, "step": 9310 }, { "epoch": 4.0, "learning_rate": 0.0002, "loss": 2.8812, "step": 9315 }, { "epoch": 4.0, "learning_rate": 0.0002, "loss": 3.1464, "step": 9320 }, { "epoch": 4.0, "learning_rate": 0.0002, "loss": 2.8617, "step": 9325 }, { "epoch": 4.01, "learning_rate": 0.0002, "loss": 2.8698, "step": 9330 }, { "epoch": 4.01, "learning_rate": 0.0002, "loss": 2.8647, "step": 9335 }, { "epoch": 4.01, "learning_rate": 0.0002, "loss": 2.8597, "step": 9340 }, { "epoch": 4.01, "eval_loss": 2.883638620376587, "eval_runtime": 5.7093, "eval_samples_per_second": 1729.988, "eval_steps_per_second": 13.662, "eval_top3_3_weighted_f1_score ": 0.4215743958712875, "eval_top_1_macro_f1_score": 0.06326665146672523, "eval_top_1_weighted_f1score": 0.20706063459634225, "eval_top_3_macro_f1_score": 0.164675083738132, "step": 9344 }, { "epoch": 4.01, "learning_rate": 0.0002, "loss": 2.87, "step": 9345 }, { "epoch": 4.01, "learning_rate": 0.0002, "loss": 2.8808, "step": 9350 }, { "epoch": 4.02, "learning_rate": 0.0002, "loss": 2.8727, "step": 9355 }, { "epoch": 4.02, "learning_rate": 0.0002, "loss": 2.8586, "step": 9360 }, { "epoch": 4.02, "learning_rate": 0.0002, "loss": 2.8823, "step": 9365 }, { "epoch": 4.02, "learning_rate": 0.0002, "loss": 2.8571, "step": 9370 }, { "epoch": 4.03, "learning_rate": 0.0002, "loss": 2.8712, "step": 9375 }, { "epoch": 4.03, "learning_rate": 0.0002, "loss": 2.8675, "step": 9380 }, { "epoch": 4.03, "learning_rate": 0.0002, "loss": 2.8744, "step": 9385 }, { "epoch": 4.03, "learning_rate": 0.0002, "loss": 2.8751, "step": 9390 }, { "epoch": 4.03, "learning_rate": 0.0002, "loss": 2.8718, "step": 9395 }, { "epoch": 4.04, "learning_rate": 0.0002, "loss": 2.8751, "step": 9400 }, { "epoch": 4.04, "learning_rate": 0.0002, "loss": 2.8854, "step": 9405 }, { "epoch": 4.04, "eval_loss": 2.8765015602111816, "eval_runtime": 5.3743, "eval_samples_per_second": 1837.804, "eval_steps_per_second": 14.513, "eval_top3_3_weighted_f1_score ": 0.41924298785763464, "eval_top_1_macro_f1_score": 0.06146762957996142, "eval_top_1_weighted_f1score": 0.20146868730144363, "eval_top_3_macro_f1_score": 0.16860583910350077, "step": 9408 }, { "epoch": 4.04, "learning_rate": 0.0002, "loss": 2.88, "step": 9410 }, { "epoch": 4.04, "learning_rate": 0.0002, "loss": 2.8792, "step": 9415 }, { "epoch": 4.04, "learning_rate": 0.0002, "loss": 2.8433, "step": 9420 }, { "epoch": 4.05, "learning_rate": 0.0002, "loss": 2.8556, "step": 9425 }, { "epoch": 4.05, "learning_rate": 0.0002, "loss": 2.856, "step": 9430 }, { "epoch": 4.05, "learning_rate": 0.0002, "loss": 2.8753, "step": 9435 }, { "epoch": 4.05, "learning_rate": 0.0002, "loss": 2.8768, "step": 9440 }, { "epoch": 4.06, "learning_rate": 0.0002, "loss": 2.8665, "step": 9445 }, { "epoch": 4.06, "learning_rate": 0.0002, "loss": 2.8778, "step": 9450 }, { "epoch": 4.06, "learning_rate": 0.0002, "loss": 2.8614, "step": 9455 }, { "epoch": 4.06, "learning_rate": 0.0002, "loss": 2.8824, "step": 9460 }, { "epoch": 4.06, "learning_rate": 0.0002, "loss": 2.8729, "step": 9465 }, { "epoch": 4.07, "learning_rate": 0.0002, "loss": 2.8771, "step": 9470 }, { "epoch": 4.07, "eval_loss": 2.8779726028442383, "eval_runtime": 5.4094, "eval_samples_per_second": 1825.88, "eval_steps_per_second": 14.419, "eval_top3_3_weighted_f1_score ": 0.4252106970458161, "eval_top_1_macro_f1_score": 0.0641577432963229, "eval_top_1_weighted_f1score": 0.20700521183069107, "eval_top_3_macro_f1_score": 0.1656110416323952, "step": 9472 }, { "epoch": 4.07, "learning_rate": 0.0002, "loss": 2.8591, "step": 9475 }, { "epoch": 4.07, "learning_rate": 0.0002, "loss": 2.9054, "step": 9480 }, { "epoch": 4.07, "learning_rate": 0.0002, "loss": 2.8546, "step": 9485 }, { "epoch": 4.07, "learning_rate": 0.0002, "loss": 2.867, "step": 9490 }, { "epoch": 4.08, "learning_rate": 0.0002, "loss": 2.8687, "step": 9495 }, { "epoch": 4.08, "learning_rate": 0.0002, "loss": 2.8776, "step": 9500 }, { "epoch": 4.08, "learning_rate": 0.0002, "loss": 2.864, "step": 9505 }, { "epoch": 4.08, "learning_rate": 0.0002, "loss": 2.8747, "step": 9510 }, { "epoch": 4.09, "learning_rate": 0.0002, "loss": 2.8784, "step": 9515 }, { "epoch": 4.09, "learning_rate": 0.0002, "loss": 2.8825, "step": 9520 }, { "epoch": 4.09, "learning_rate": 0.0002, "loss": 2.8616, "step": 9525 }, { "epoch": 4.09, "learning_rate": 0.0002, "loss": 2.8754, "step": 9530 }, { "epoch": 4.09, "learning_rate": 0.0002, "loss": 2.8616, "step": 9535 }, { "epoch": 4.09, "eval_loss": 2.8782520294189453, "eval_runtime": 5.3961, "eval_samples_per_second": 1830.397, "eval_steps_per_second": 14.455, "eval_top3_3_weighted_f1_score ": 0.4217188679247572, "eval_top_1_macro_f1_score": 0.059938420057228194, "eval_top_1_weighted_f1score": 0.20490516714063806, "eval_top_3_macro_f1_score": 0.15982189116819304, "step": 9536 }, { "epoch": 4.1, "learning_rate": 0.0002, "loss": 2.882, "step": 9540 }, { "epoch": 4.1, "learning_rate": 0.0002, "loss": 2.8954, "step": 9545 }, { "epoch": 4.1, "learning_rate": 0.0002, "loss": 2.8895, "step": 9550 }, { "epoch": 4.1, "learning_rate": 0.0002, "loss": 2.8846, "step": 9555 }, { "epoch": 4.1, "learning_rate": 0.0002, "loss": 2.8692, "step": 9560 }, { "epoch": 4.11, "learning_rate": 0.0002, "loss": 2.889, "step": 9565 }, { "epoch": 4.11, "learning_rate": 0.0002, "loss": 2.8744, "step": 9570 }, { "epoch": 4.11, "learning_rate": 0.0002, "loss": 2.8817, "step": 9575 }, { "epoch": 4.11, "learning_rate": 0.0002, "loss": 2.8755, "step": 9580 }, { "epoch": 4.12, "learning_rate": 0.0002, "loss": 2.8817, "step": 9585 }, { "epoch": 4.12, "learning_rate": 0.0002, "loss": 2.8787, "step": 9590 }, { "epoch": 4.12, "learning_rate": 0.0002, "loss": 2.8649, "step": 9595 }, { "epoch": 4.12, "learning_rate": 0.0002, "loss": 2.8577, "step": 9600 }, { "epoch": 4.12, "eval_loss": 2.87673020362854, "eval_runtime": 5.3716, "eval_samples_per_second": 1838.757, "eval_steps_per_second": 14.521, "eval_top3_3_weighted_f1_score ": 0.4241371909438834, "eval_top_1_macro_f1_score": 0.06164446683494285, "eval_top_1_weighted_f1score": 0.2063257533667319, "eval_top_3_macro_f1_score": 0.14961892013967493, "step": 9600 }, { "epoch": 4.12, "learning_rate": 0.0002, "loss": 2.866, "step": 9605 }, { "epoch": 4.13, "learning_rate": 0.0002, "loss": 2.8934, "step": 9610 }, { "epoch": 4.13, "learning_rate": 0.0002, "loss": 2.8551, "step": 9615 }, { "epoch": 4.13, "learning_rate": 0.0002, "loss": 2.88, "step": 9620 }, { "epoch": 4.13, "learning_rate": 0.0002, "loss": 2.8691, "step": 9625 }, { "epoch": 4.13, "learning_rate": 0.0002, "loss": 2.8748, "step": 9630 }, { "epoch": 4.14, "learning_rate": 0.0002, "loss": 2.8476, "step": 9635 }, { "epoch": 4.14, "learning_rate": 0.0002, "loss": 2.8511, "step": 9640 }, { "epoch": 4.14, "learning_rate": 0.0002, "loss": 2.8548, "step": 9645 }, { "epoch": 4.14, "learning_rate": 0.0002, "loss": 2.8843, "step": 9650 }, { "epoch": 4.15, "learning_rate": 0.0002, "loss": 2.8641, "step": 9655 }, { "epoch": 4.15, "learning_rate": 0.0002, "loss": 2.8908, "step": 9660 }, { "epoch": 4.15, "eval_loss": 2.879879951477051, "eval_runtime": 5.3414, "eval_samples_per_second": 1849.131, "eval_steps_per_second": 14.603, "eval_top3_3_weighted_f1_score ": 0.42225217111103863, "eval_top_1_macro_f1_score": 0.060374192075967156, "eval_top_1_weighted_f1score": 0.20219577753251478, "eval_top_3_macro_f1_score": 0.16262665440212856, "step": 9664 }, { "epoch": 4.15, "learning_rate": 0.0002, "loss": 2.8659, "step": 9665 }, { "epoch": 4.15, "learning_rate": 0.0002, "loss": 2.8792, "step": 9670 }, { "epoch": 4.15, "learning_rate": 0.0002, "loss": 2.8814, "step": 9675 }, { "epoch": 4.16, "learning_rate": 0.0002, "loss": 2.8846, "step": 9680 }, { "epoch": 4.16, "learning_rate": 0.0002, "loss": 2.8615, "step": 9685 }, { "epoch": 4.16, "learning_rate": 0.0002, "loss": 2.861, "step": 9690 }, { "epoch": 4.16, "learning_rate": 0.0002, "loss": 2.8919, "step": 9695 }, { "epoch": 4.16, "learning_rate": 0.0002, "loss": 2.863, "step": 9700 }, { "epoch": 4.17, "learning_rate": 0.0002, "loss": 2.8653, "step": 9705 }, { "epoch": 4.17, "learning_rate": 0.0002, "loss": 2.8543, "step": 9710 }, { "epoch": 4.17, "learning_rate": 0.0002, "loss": 2.8794, "step": 9715 }, { "epoch": 4.17, "learning_rate": 0.0002, "loss": 2.8426, "step": 9720 }, { "epoch": 4.18, "learning_rate": 0.0002, "loss": 2.8828, "step": 9725 }, { "epoch": 4.18, "eval_loss": 2.8804783821105957, "eval_runtime": 5.386, "eval_samples_per_second": 1833.837, "eval_steps_per_second": 14.482, "eval_top3_3_weighted_f1_score ": 0.42565165650821607, "eval_top_1_macro_f1_score": 0.060127552219321874, "eval_top_1_weighted_f1score": 0.20383335370478492, "eval_top_3_macro_f1_score": 0.1701485885248088, "step": 9728 }, { "epoch": 4.18, "learning_rate": 0.0002, "loss": 2.8662, "step": 9730 }, { "epoch": 4.18, "learning_rate": 0.0002, "loss": 2.8886, "step": 9735 }, { "epoch": 4.18, "learning_rate": 0.0002, "loss": 2.8564, "step": 9740 }, { "epoch": 4.18, "learning_rate": 0.0002, "loss": 2.8761, "step": 9745 }, { "epoch": 4.19, "learning_rate": 0.0002, "loss": 2.8767, "step": 9750 }, { "epoch": 4.19, "learning_rate": 0.0002, "loss": 2.8746, "step": 9755 }, { "epoch": 4.19, "learning_rate": 0.0002, "loss": 2.8851, "step": 9760 }, { "epoch": 4.19, "learning_rate": 0.0002, "loss": 2.8975, "step": 9765 }, { "epoch": 4.19, "learning_rate": 0.0002, "loss": 2.8617, "step": 9770 }, { "epoch": 4.2, "learning_rate": 0.0002, "loss": 2.8709, "step": 9775 }, { "epoch": 4.2, "learning_rate": 0.0002, "loss": 2.874, "step": 9780 }, { "epoch": 4.2, "learning_rate": 0.0002, "loss": 2.87, "step": 9785 }, { "epoch": 4.2, "learning_rate": 0.0002, "loss": 2.8744, "step": 9790 }, { "epoch": 4.2, "eval_loss": 2.8811566829681396, "eval_runtime": 5.3688, "eval_samples_per_second": 1839.718, "eval_steps_per_second": 14.529, "eval_top3_3_weighted_f1_score ": 0.4227489382471628, "eval_top_1_macro_f1_score": 0.062052743274787756, "eval_top_1_weighted_f1score": 0.20170422612536257, "eval_top_3_macro_f1_score": 0.15961342505729212, "step": 9792 }, { "epoch": 4.21, "learning_rate": 0.0002, "loss": 2.8511, "step": 9795 }, { "epoch": 4.21, "learning_rate": 0.0002, "loss": 2.8552, "step": 9800 }, { "epoch": 4.21, "learning_rate": 0.0002, "loss": 2.9049, "step": 9805 }, { "epoch": 4.21, "learning_rate": 0.0002, "loss": 2.8756, "step": 9810 }, { "epoch": 4.21, "learning_rate": 0.0002, "loss": 2.8647, "step": 9815 }, { "epoch": 4.22, "learning_rate": 0.0002, "loss": 2.8916, "step": 9820 }, { "epoch": 4.22, "learning_rate": 0.0002, "loss": 2.8717, "step": 9825 }, { "epoch": 4.22, "learning_rate": 0.0002, "loss": 2.8545, "step": 9830 }, { "epoch": 4.22, "learning_rate": 0.0002, "loss": 2.8712, "step": 9835 }, { "epoch": 4.22, "learning_rate": 0.0002, "loss": 2.8932, "step": 9840 }, { "epoch": 4.23, "learning_rate": 0.0002, "loss": 2.8731, "step": 9845 }, { "epoch": 4.23, "learning_rate": 0.0002, "loss": 2.8679, "step": 9850 }, { "epoch": 4.23, "learning_rate": 0.0002, "loss": 2.8706, "step": 9855 }, { "epoch": 4.23, "eval_loss": 2.879899024963379, "eval_runtime": 5.3621, "eval_samples_per_second": 1841.988, "eval_steps_per_second": 14.546, "eval_top3_3_weighted_f1_score ": 0.4279835897973105, "eval_top_1_macro_f1_score": 0.05889265059198331, "eval_top_1_weighted_f1score": 0.205083467453559, "eval_top_3_macro_f1_score": 0.15827034485660252, "step": 9856 }, { "epoch": 4.23, "learning_rate": 0.0002, "loss": 2.8669, "step": 9860 }, { "epoch": 4.24, "learning_rate": 0.0002, "loss": 2.8931, "step": 9865 }, { "epoch": 4.24, "learning_rate": 0.0002, "loss": 2.8669, "step": 9870 }, { "epoch": 4.24, "learning_rate": 0.0002, "loss": 2.873, "step": 9875 }, { "epoch": 4.24, "learning_rate": 0.0002, "loss": 2.861, "step": 9880 }, { "epoch": 4.24, "learning_rate": 0.0002, "loss": 2.8835, "step": 9885 }, { "epoch": 4.25, "learning_rate": 0.0002, "loss": 2.8886, "step": 9890 }, { "epoch": 4.25, "learning_rate": 0.0002, "loss": 2.885, "step": 9895 }, { "epoch": 4.25, "learning_rate": 0.0002, "loss": 2.8952, "step": 9900 }, { "epoch": 4.25, "learning_rate": 0.0002, "loss": 2.8859, "step": 9905 }, { "epoch": 4.25, "learning_rate": 0.0002, "loss": 2.8761, "step": 9910 }, { "epoch": 4.26, "learning_rate": 0.0002, "loss": 2.8874, "step": 9915 }, { "epoch": 4.26, "learning_rate": 0.0002, "loss": 2.8745, "step": 9920 }, { "epoch": 4.26, "eval_loss": 2.8851325511932373, "eval_runtime": 5.3333, "eval_samples_per_second": 1851.933, "eval_steps_per_second": 14.625, "eval_top3_3_weighted_f1_score ": 0.422462627894626, "eval_top_1_macro_f1_score": 0.06341811301595834, "eval_top_1_weighted_f1score": 0.20387972505464994, "eval_top_3_macro_f1_score": 0.16272604470885255, "step": 9920 }, { "epoch": 4.26, "learning_rate": 0.0002, "loss": 2.8729, "step": 9925 }, { "epoch": 4.26, "learning_rate": 0.0002, "loss": 2.8885, "step": 9930 }, { "epoch": 4.27, "learning_rate": 0.0002, "loss": 2.8657, "step": 9935 }, { "epoch": 4.27, "learning_rate": 0.0002, "loss": 2.8847, "step": 9940 }, { "epoch": 4.27, "learning_rate": 0.0002, "loss": 2.8715, "step": 9945 }, { "epoch": 4.27, "learning_rate": 0.0002, "loss": 2.8726, "step": 9950 }, { "epoch": 4.27, "learning_rate": 0.0002, "loss": 2.8639, "step": 9955 }, { "epoch": 4.28, "learning_rate": 0.0002, "loss": 2.868, "step": 9960 }, { "epoch": 4.28, "learning_rate": 0.0002, "loss": 2.8602, "step": 9965 }, { "epoch": 4.28, "learning_rate": 0.0002, "loss": 2.8617, "step": 9970 }, { "epoch": 4.28, "learning_rate": 0.0002, "loss": 2.8811, "step": 9975 }, { "epoch": 4.29, "learning_rate": 0.0002, "loss": 2.8497, "step": 9980 }, { "epoch": 4.29, "eval_loss": 2.8782029151916504, "eval_runtime": 5.3368, "eval_samples_per_second": 1850.72, "eval_steps_per_second": 14.615, "eval_top3_3_weighted_f1_score ": 0.42270002035339793, "eval_top_1_macro_f1_score": 0.05897566452561095, "eval_top_1_weighted_f1score": 0.20188161923476727, "eval_top_3_macro_f1_score": 0.16398597778615756, "step": 9984 }, { "epoch": 4.29, "learning_rate": 0.0002, "loss": 2.862, "step": 9985 }, { "epoch": 4.29, "learning_rate": 0.0002, "loss": 2.8789, "step": 9990 }, { "epoch": 4.29, "learning_rate": 0.0002, "loss": 2.8553, "step": 9995 }, { "epoch": 4.29, "learning_rate": 0.0002, "loss": 2.8945, "step": 10000 }, { "epoch": 4.3, "learning_rate": 0.0002, "loss": 2.873, "step": 10005 }, { "epoch": 4.3, "learning_rate": 0.0002, "loss": 2.8449, "step": 10010 }, { "epoch": 4.3, "learning_rate": 0.0002, "loss": 2.8841, "step": 10015 }, { "epoch": 4.3, "learning_rate": 0.0002, "loss": 2.8727, "step": 10020 }, { "epoch": 4.3, "learning_rate": 0.0002, "loss": 2.8746, "step": 10025 }, { "epoch": 4.31, "learning_rate": 0.0002, "loss": 2.8733, "step": 10030 }, { "epoch": 4.31, "learning_rate": 0.0002, "loss": 2.8732, "step": 10035 }, { "epoch": 4.31, "learning_rate": 0.0002, "loss": 2.872, "step": 10040 }, { "epoch": 4.31, "learning_rate": 0.0002, "loss": 2.8799, "step": 10045 }, { "epoch": 4.31, "eval_loss": 2.880223512649536, "eval_runtime": 5.4218, "eval_samples_per_second": 1821.72, "eval_steps_per_second": 14.386, "eval_top3_3_weighted_f1_score ": 0.4247351525921999, "eval_top_1_macro_f1_score": 0.06044784376357685, "eval_top_1_weighted_f1score": 0.20083124943581332, "eval_top_3_macro_f1_score": 0.1675721411499172, "step": 10048 }, { "epoch": 4.32, "learning_rate": 0.0002, "loss": 2.8721, "step": 10050 }, { "epoch": 4.32, "learning_rate": 0.0002, "loss": 2.8763, "step": 10055 }, { "epoch": 4.32, "learning_rate": 0.0002, "loss": 2.8823, "step": 10060 }, { "epoch": 4.32, "learning_rate": 0.0002, "loss": 2.8615, "step": 10065 }, { "epoch": 4.32, "learning_rate": 0.0002, "loss": 2.8798, "step": 10070 }, { "epoch": 4.33, "learning_rate": 0.0002, "loss": 2.8715, "step": 10075 }, { "epoch": 4.33, "learning_rate": 0.0002, "loss": 2.8782, "step": 10080 }, { "epoch": 4.33, "learning_rate": 0.0002, "loss": 2.8802, "step": 10085 }, { "epoch": 4.33, "learning_rate": 0.0002, "loss": 2.873, "step": 10090 }, { "epoch": 4.33, "learning_rate": 0.0002, "loss": 2.8872, "step": 10095 }, { "epoch": 4.34, "learning_rate": 0.0002, "loss": 2.8498, "step": 10100 }, { "epoch": 4.34, "learning_rate": 0.0002, "loss": 2.8765, "step": 10105 }, { "epoch": 4.34, "learning_rate": 0.0002, "loss": 2.884, "step": 10110 }, { "epoch": 4.34, "eval_loss": 2.8734776973724365, "eval_runtime": 139.1467, "eval_samples_per_second": 70.983, "eval_steps_per_second": 0.561, "eval_top3_3_weighted_f1_score ": 0.4308448068453806, "eval_top_1_macro_f1_score": 0.06091145198219989, "eval_top_1_weighted_f1score": 0.20389321132048763, "eval_top_3_macro_f1_score": 0.16629805337811587, "step": 10112 }, { "epoch": 4.34, "learning_rate": 0.0002, "loss": 2.8456, "step": 10115 }, { "epoch": 4.35, "learning_rate": 0.0002, "loss": 2.864, "step": 10120 }, { "epoch": 4.35, "learning_rate": 0.0002, "loss": 2.8585, "step": 10125 }, { "epoch": 4.35, "learning_rate": 0.0002, "loss": 2.8713, "step": 10130 }, { "epoch": 4.35, "learning_rate": 0.0002, "loss": 2.866, "step": 10135 }, { "epoch": 4.35, "learning_rate": 0.0002, "loss": 2.8866, "step": 10140 }, { "epoch": 4.36, "learning_rate": 0.0002, "loss": 2.8774, "step": 10145 }, { "epoch": 4.36, "learning_rate": 0.0002, "loss": 2.8601, "step": 10150 }, { "epoch": 4.36, "learning_rate": 0.0002, "loss": 2.8647, "step": 10155 }, { "epoch": 4.36, "learning_rate": 0.0002, "loss": 2.8821, "step": 10160 }, { "epoch": 4.36, "learning_rate": 0.0002, "loss": 2.865, "step": 10165 }, { "epoch": 4.37, "learning_rate": 0.0002, "loss": 2.8653, "step": 10170 }, { "epoch": 4.37, "learning_rate": 0.0002, "loss": 2.8674, "step": 10175 }, { "epoch": 4.37, "eval_loss": 2.879244327545166, "eval_runtime": 5.4222, "eval_samples_per_second": 1821.597, "eval_steps_per_second": 14.385, "eval_top3_3_weighted_f1_score ": 0.4266599016089825, "eval_top_1_macro_f1_score": 0.06294538988470444, "eval_top_1_weighted_f1score": 0.206026998323378, "eval_top_3_macro_f1_score": 0.17407376498147326, "step": 10176 }, { "epoch": 4.37, "learning_rate": 0.0002, "loss": 2.8591, "step": 10180 }, { "epoch": 4.37, "learning_rate": 0.0002, "loss": 2.8701, "step": 10185 }, { "epoch": 4.38, "learning_rate": 0.0002, "loss": 2.8676, "step": 10190 }, { "epoch": 4.38, "learning_rate": 0.0002, "loss": 2.8613, "step": 10195 }, { "epoch": 4.38, "learning_rate": 0.0002, "loss": 2.8336, "step": 10200 }, { "epoch": 4.38, "learning_rate": 0.0002, "loss": 2.8824, "step": 10205 }, { "epoch": 4.38, "learning_rate": 0.0002, "loss": 2.8491, "step": 10210 }, { "epoch": 4.39, "learning_rate": 0.0002, "loss": 2.8734, "step": 10215 }, { "epoch": 4.39, "learning_rate": 0.0002, "loss": 2.8711, "step": 10220 }, { "epoch": 4.39, "learning_rate": 0.0002, "loss": 2.8716, "step": 10225 }, { "epoch": 4.39, "learning_rate": 0.0002, "loss": 2.8491, "step": 10230 }, { "epoch": 4.39, "learning_rate": 0.0002, "loss": 2.8757, "step": 10235 }, { "epoch": 4.4, "learning_rate": 0.0002, "loss": 2.8691, "step": 10240 }, { "epoch": 4.4, "eval_loss": 2.879953622817993, "eval_runtime": 5.3715, "eval_samples_per_second": 1838.77, "eval_steps_per_second": 14.521, "eval_top3_3_weighted_f1_score ": 0.4198328743907584, "eval_top_1_macro_f1_score": 0.06120930163098784, "eval_top_1_weighted_f1score": 0.20363865109484638, "eval_top_3_macro_f1_score": 0.16543035959168473, "step": 10240 }, { "epoch": 4.4, "learning_rate": 0.0002, "loss": 2.8769, "step": 10245 }, { "epoch": 4.4, "learning_rate": 0.0002, "loss": 2.8655, "step": 10250 }, { "epoch": 4.4, "learning_rate": 0.0002, "loss": 2.8938, "step": 10255 }, { "epoch": 4.41, "learning_rate": 0.0002, "loss": 2.8763, "step": 10260 }, { "epoch": 4.41, "learning_rate": 0.0002, "loss": 2.8786, "step": 10265 }, { "epoch": 4.41, "learning_rate": 0.0002, "loss": 2.855, "step": 10270 }, { "epoch": 4.41, "learning_rate": 0.0002, "loss": 2.8589, "step": 10275 }, { "epoch": 4.41, "learning_rate": 0.0002, "loss": 2.8651, "step": 10280 }, { "epoch": 4.42, "learning_rate": 0.0002, "loss": 2.8724, "step": 10285 }, { "epoch": 4.42, "learning_rate": 0.0002, "loss": 2.8898, "step": 10290 }, { "epoch": 4.42, "learning_rate": 0.0002, "loss": 2.881, "step": 10295 }, { "epoch": 4.42, "learning_rate": 0.0002, "loss": 2.8583, "step": 10300 }, { "epoch": 4.42, "eval_loss": 2.8827550411224365, "eval_runtime": 5.4789, "eval_samples_per_second": 1802.738, "eval_steps_per_second": 14.236, "eval_top3_3_weighted_f1_score ": 0.42347784477898887, "eval_top_1_macro_f1_score": 0.06256916446524377, "eval_top_1_weighted_f1score": 0.20833558425133877, "eval_top_3_macro_f1_score": 0.16414994500053223, "step": 10304 }, { "epoch": 4.42, "learning_rate": 0.0002, "loss": 2.8842, "step": 10305 }, { "epoch": 4.43, "learning_rate": 0.0002, "loss": 2.8786, "step": 10310 }, { "epoch": 4.43, "learning_rate": 0.0002, "loss": 2.8845, "step": 10315 }, { "epoch": 4.43, "learning_rate": 0.0002, "loss": 2.8607, "step": 10320 }, { "epoch": 4.43, "learning_rate": 0.0002, "loss": 2.8784, "step": 10325 }, { "epoch": 4.44, "learning_rate": 0.0002, "loss": 2.8683, "step": 10330 }, { "epoch": 4.44, "learning_rate": 0.0002, "loss": 2.8775, "step": 10335 }, { "epoch": 4.44, "learning_rate": 0.0002, "loss": 2.8632, "step": 10340 }, { "epoch": 4.44, "learning_rate": 0.0002, "loss": 2.8847, "step": 10345 }, { "epoch": 4.44, "learning_rate": 0.0002, "loss": 2.8616, "step": 10350 }, { "epoch": 4.45, "learning_rate": 0.0002, "loss": 2.8863, "step": 10355 }, { "epoch": 4.45, "learning_rate": 0.0002, "loss": 2.8621, "step": 10360 }, { "epoch": 4.45, "learning_rate": 0.0002, "loss": 2.8814, "step": 10365 }, { "epoch": 4.45, "eval_loss": 2.8773937225341797, "eval_runtime": 5.3195, "eval_samples_per_second": 1856.755, "eval_steps_per_second": 14.663, "eval_top3_3_weighted_f1_score ": 0.4253007826286828, "eval_top_1_macro_f1_score": 0.06625233119299727, "eval_top_1_weighted_f1score": 0.20676790414176083, "eval_top_3_macro_f1_score": 0.15646460587288663, "step": 10368 }, { "epoch": 4.45, "learning_rate": 0.0002, "loss": 2.8721, "step": 10370 }, { "epoch": 4.45, "learning_rate": 0.0002, "loss": 2.8784, "step": 10375 }, { "epoch": 4.46, "learning_rate": 0.0002, "loss": 2.8737, "step": 10380 }, { "epoch": 4.46, "learning_rate": 0.0002, "loss": 2.8854, "step": 10385 }, { "epoch": 4.46, "learning_rate": 0.0002, "loss": 2.8788, "step": 10390 }, { "epoch": 4.46, "learning_rate": 0.0002, "loss": 2.8855, "step": 10395 }, { "epoch": 4.47, "learning_rate": 0.0002, "loss": 2.8558, "step": 10400 }, { "epoch": 4.47, "learning_rate": 0.0002, "loss": 2.8643, "step": 10405 }, { "epoch": 4.47, "learning_rate": 0.0002, "loss": 2.8695, "step": 10410 }, { "epoch": 4.47, "learning_rate": 0.0002, "loss": 2.8685, "step": 10415 }, { "epoch": 4.47, "learning_rate": 0.0002, "loss": 2.8605, "step": 10420 }, { "epoch": 4.48, "learning_rate": 0.0002, "loss": 2.8704, "step": 10425 }, { "epoch": 4.48, "learning_rate": 0.0002, "loss": 2.8757, "step": 10430 }, { "epoch": 4.48, "eval_loss": 2.874392032623291, "eval_runtime": 5.3568, "eval_samples_per_second": 1843.813, "eval_steps_per_second": 14.561, "eval_top3_3_weighted_f1_score ": 0.4241647649747962, "eval_top_1_macro_f1_score": 0.06217161351615617, "eval_top_1_weighted_f1score": 0.20898908527186977, "eval_top_3_macro_f1_score": 0.1573498335384943, "step": 10432 }, { "epoch": 4.48, "learning_rate": 0.0002, "loss": 2.8822, "step": 10435 }, { "epoch": 4.48, "learning_rate": 0.0002, "loss": 2.8756, "step": 10440 }, { "epoch": 4.48, "learning_rate": 0.0002, "loss": 2.8591, "step": 10445 }, { "epoch": 4.49, "learning_rate": 0.0002, "loss": 2.8642, "step": 10450 }, { "epoch": 4.49, "learning_rate": 0.0002, "loss": 2.8757, "step": 10455 }, { "epoch": 4.49, "learning_rate": 0.0002, "loss": 2.8917, "step": 10460 }, { "epoch": 4.49, "learning_rate": 0.0002, "loss": 2.8637, "step": 10465 }, { "epoch": 4.5, "learning_rate": 0.0002, "loss": 2.8601, "step": 10470 }, { "epoch": 4.5, "learning_rate": 0.0002, "loss": 2.8885, "step": 10475 }, { "epoch": 4.5, "learning_rate": 0.0002, "loss": 2.8794, "step": 10480 }, { "epoch": 4.5, "learning_rate": 0.0002, "loss": 2.8707, "step": 10485 }, { "epoch": 4.5, "learning_rate": 0.0002, "loss": 2.8813, "step": 10490 }, { "epoch": 4.51, "learning_rate": 0.0002, "loss": 2.8719, "step": 10495 }, { "epoch": 4.51, "eval_loss": 2.8797762393951416, "eval_runtime": 5.403, "eval_samples_per_second": 1828.074, "eval_steps_per_second": 14.437, "eval_top3_3_weighted_f1_score ": 0.4272453879809695, "eval_top_1_macro_f1_score": 0.06766294229572796, "eval_top_1_weighted_f1score": 0.20884783338044238, "eval_top_3_macro_f1_score": 0.1722198495559193, "step": 10496 }, { "epoch": 4.51, "learning_rate": 0.0002, "loss": 2.8728, "step": 10500 }, { "epoch": 4.51, "learning_rate": 0.0002, "loss": 2.8593, "step": 10505 }, { "epoch": 4.51, "learning_rate": 0.0002, "loss": 2.8696, "step": 10510 }, { "epoch": 4.51, "learning_rate": 0.0002, "loss": 2.8629, "step": 10515 }, { "epoch": 4.52, "learning_rate": 0.0002, "loss": 2.863, "step": 10520 }, { "epoch": 4.52, "learning_rate": 0.0002, "loss": 2.8742, "step": 10525 }, { "epoch": 4.52, "learning_rate": 0.0002, "loss": 2.8855, "step": 10530 }, { "epoch": 4.52, "learning_rate": 0.0002, "loss": 2.8857, "step": 10535 }, { "epoch": 4.53, "learning_rate": 0.0002, "loss": 2.8721, "step": 10540 }, { "epoch": 4.53, "learning_rate": 0.0002, "loss": 2.868, "step": 10545 }, { "epoch": 4.53, "learning_rate": 0.0002, "loss": 2.8471, "step": 10550 }, { "epoch": 4.53, "learning_rate": 0.0002, "loss": 2.8562, "step": 10555 }, { "epoch": 4.53, "learning_rate": 0.0002, "loss": 2.8585, "step": 10560 }, { "epoch": 4.53, "eval_loss": 2.8755102157592773, "eval_runtime": 5.5079, "eval_samples_per_second": 1793.255, "eval_steps_per_second": 14.162, "eval_top3_3_weighted_f1_score ": 0.4226135577632516, "eval_top_1_macro_f1_score": 0.060655061707233185, "eval_top_1_weighted_f1score": 0.2049577373490113, "eval_top_3_macro_f1_score": 0.16153021748453586, "step": 10560 }, { "epoch": 4.54, "learning_rate": 0.0002, "loss": 2.8577, "step": 10565 }, { "epoch": 4.54, "learning_rate": 0.0002, "loss": 2.8732, "step": 10570 }, { "epoch": 4.54, "learning_rate": 0.0002, "loss": 2.8692, "step": 10575 }, { "epoch": 4.54, "learning_rate": 0.0002, "loss": 2.8647, "step": 10580 }, { "epoch": 4.54, "learning_rate": 0.0002, "loss": 2.8605, "step": 10585 }, { "epoch": 4.55, "learning_rate": 0.0002, "loss": 2.865, "step": 10590 }, { "epoch": 4.55, "learning_rate": 0.0002, "loss": 2.8874, "step": 10595 }, { "epoch": 4.55, "learning_rate": 0.0002, "loss": 2.8683, "step": 10600 }, { "epoch": 4.55, "learning_rate": 0.0002, "loss": 2.8801, "step": 10605 }, { "epoch": 4.56, "learning_rate": 0.0002, "loss": 2.8742, "step": 10610 }, { "epoch": 4.56, "learning_rate": 0.0002, "loss": 2.8899, "step": 10615 }, { "epoch": 4.56, "learning_rate": 0.0002, "loss": 2.8946, "step": 10620 }, { "epoch": 4.56, "eval_loss": 2.8786725997924805, "eval_runtime": 5.3244, "eval_samples_per_second": 1855.061, "eval_steps_per_second": 14.65, "eval_top3_3_weighted_f1_score ": 0.42379557588689853, "eval_top_1_macro_f1_score": 0.05847571229304535, "eval_top_1_weighted_f1score": 0.2037950547430509, "eval_top_3_macro_f1_score": 0.16553631749588468, "step": 10624 }, { "epoch": 4.56, "learning_rate": 0.0002, "loss": 2.8632, "step": 10625 }, { "epoch": 4.56, "learning_rate": 0.0002, "loss": 2.8722, "step": 10630 }, { "epoch": 4.57, "learning_rate": 0.0002, "loss": 2.8608, "step": 10635 }, { "epoch": 4.57, "learning_rate": 0.0002, "loss": 2.8459, "step": 10640 }, { "epoch": 4.57, "learning_rate": 0.0002, "loss": 2.87, "step": 10645 }, { "epoch": 4.57, "learning_rate": 0.0002, "loss": 2.8787, "step": 10650 }, { "epoch": 4.57, "learning_rate": 0.0002, "loss": 2.8491, "step": 10655 }, { "epoch": 4.58, "learning_rate": 0.0002, "loss": 2.8665, "step": 10660 }, { "epoch": 4.58, "learning_rate": 0.0002, "loss": 2.8937, "step": 10665 }, { "epoch": 4.58, "learning_rate": 0.0002, "loss": 2.8568, "step": 10670 }, { "epoch": 4.58, "learning_rate": 0.0002, "loss": 2.8775, "step": 10675 }, { "epoch": 4.59, "learning_rate": 0.0002, "loss": 2.8685, "step": 10680 }, { "epoch": 4.59, "learning_rate": 0.0002, "loss": 2.8719, "step": 10685 }, { "epoch": 4.59, "eval_loss": 2.8753890991210938, "eval_runtime": 5.3311, "eval_samples_per_second": 1852.715, "eval_steps_per_second": 14.631, "eval_top3_3_weighted_f1_score ": 0.42260784607554397, "eval_top_1_macro_f1_score": 0.06287292865160847, "eval_top_1_weighted_f1score": 0.2094794253009507, "eval_top_3_macro_f1_score": 0.1610328994366965, "step": 10688 }, { "epoch": 4.59, "learning_rate": 0.0002, "loss": 2.8832, "step": 10690 }, { "epoch": 4.59, "learning_rate": 0.0002, "loss": 2.8564, "step": 10695 }, { "epoch": 4.59, "learning_rate": 0.0002, "loss": 2.8732, "step": 10700 }, { "epoch": 4.6, "learning_rate": 0.0002, "loss": 2.847, "step": 10705 }, { "epoch": 4.6, "learning_rate": 0.0002, "loss": 2.8663, "step": 10710 }, { "epoch": 4.6, "learning_rate": 0.0002, "loss": 2.8617, "step": 10715 }, { "epoch": 4.6, "learning_rate": 0.0002, "loss": 2.8748, "step": 10720 }, { "epoch": 4.6, "learning_rate": 0.0002, "loss": 2.8826, "step": 10725 }, { "epoch": 4.61, "learning_rate": 0.0002, "loss": 2.8555, "step": 10730 }, { "epoch": 4.61, "learning_rate": 0.0002, "loss": 2.8914, "step": 10735 }, { "epoch": 4.61, "learning_rate": 0.0002, "loss": 2.8661, "step": 10740 }, { "epoch": 4.61, "learning_rate": 0.0002, "loss": 2.8907, "step": 10745 }, { "epoch": 4.62, "learning_rate": 0.0002, "loss": 2.8685, "step": 10750 }, { "epoch": 4.62, "eval_loss": 2.879948377609253, "eval_runtime": 5.4339, "eval_samples_per_second": 1817.655, "eval_steps_per_second": 14.354, "eval_top3_3_weighted_f1_score ": 0.4235817003770342, "eval_top_1_macro_f1_score": 0.06705393736968994, "eval_top_1_weighted_f1score": 0.20217185922297795, "eval_top_3_macro_f1_score": 0.17065551134269813, "step": 10752 }, { "epoch": 4.62, "learning_rate": 0.0002, "loss": 2.8665, "step": 10755 }, { "epoch": 4.62, "learning_rate": 0.0002, "loss": 2.8751, "step": 10760 }, { "epoch": 4.62, "learning_rate": 0.0002, "loss": 2.863, "step": 10765 }, { "epoch": 4.62, "learning_rate": 0.0002, "loss": 2.8561, "step": 10770 }, { "epoch": 4.63, "learning_rate": 0.0002, "loss": 2.8703, "step": 10775 }, { "epoch": 4.63, "learning_rate": 0.0002, "loss": 2.874, "step": 10780 }, { "epoch": 4.63, "learning_rate": 0.0002, "loss": 2.87, "step": 10785 }, { "epoch": 4.63, "learning_rate": 0.0002, "loss": 2.8626, "step": 10790 }, { "epoch": 4.63, "learning_rate": 0.0002, "loss": 2.8662, "step": 10795 }, { "epoch": 4.64, "learning_rate": 0.0002, "loss": 2.8836, "step": 10800 }, { "epoch": 4.64, "learning_rate": 0.0002, "loss": 2.8696, "step": 10805 }, { "epoch": 4.64, "learning_rate": 0.0002, "loss": 2.8687, "step": 10810 }, { "epoch": 4.64, "learning_rate": 0.0002, "loss": 2.8697, "step": 10815 }, { "epoch": 4.64, "eval_loss": 2.875290870666504, "eval_runtime": 5.4802, "eval_samples_per_second": 1802.315, "eval_steps_per_second": 14.233, "eval_top3_3_weighted_f1_score ": 0.4247873291971013, "eval_top_1_macro_f1_score": 0.06214410102775452, "eval_top_1_weighted_f1score": 0.2042170409560508, "eval_top_3_macro_f1_score": 0.17027586444340612, "step": 10816 }, { "epoch": 4.65, "learning_rate": 0.0002, "loss": 2.8723, "step": 10820 }, { "epoch": 4.65, "learning_rate": 0.0002, "loss": 2.8745, "step": 10825 }, { "epoch": 4.65, "learning_rate": 0.0002, "loss": 2.8828, "step": 10830 }, { "epoch": 4.65, "learning_rate": 0.0002, "loss": 2.8813, "step": 10835 }, { "epoch": 4.65, "learning_rate": 0.0002, "loss": 2.8754, "step": 10840 }, { "epoch": 4.66, "learning_rate": 0.0002, "loss": 2.8569, "step": 10845 }, { "epoch": 4.66, "learning_rate": 0.0002, "loss": 2.8773, "step": 10850 }, { "epoch": 4.66, "learning_rate": 0.0002, "loss": 2.8842, "step": 10855 }, { "epoch": 4.66, "learning_rate": 0.0002, "loss": 2.8441, "step": 10860 }, { "epoch": 4.66, "learning_rate": 0.0002, "loss": 2.8704, "step": 10865 }, { "epoch": 4.67, "learning_rate": 0.0002, "loss": 2.8872, "step": 10870 }, { "epoch": 4.67, "learning_rate": 0.0002, "loss": 2.8667, "step": 10875 }, { "epoch": 4.67, "learning_rate": 0.0002, "loss": 2.8772, "step": 10880 }, { "epoch": 4.67, "eval_loss": 2.8746321201324463, "eval_runtime": 5.3791, "eval_samples_per_second": 1836.176, "eval_steps_per_second": 14.501, "eval_top3_3_weighted_f1_score ": 0.4204443013055093, "eval_top_1_macro_f1_score": 0.0693137360049466, "eval_top_1_weighted_f1score": 0.2055956875544066, "eval_top_3_macro_f1_score": 0.16231714288231683, "step": 10880 }, { "epoch": 4.67, "learning_rate": 0.0002, "loss": 2.8689, "step": 10885 }, { "epoch": 4.68, "learning_rate": 0.0002, "loss": 2.8821, "step": 10890 }, { "epoch": 4.68, "learning_rate": 0.0002, "loss": 2.8656, "step": 10895 }, { "epoch": 4.68, "learning_rate": 0.0002, "loss": 2.8871, "step": 10900 }, { "epoch": 4.68, "learning_rate": 0.0002, "loss": 2.8863, "step": 10905 }, { "epoch": 4.68, "learning_rate": 0.0002, "loss": 2.868, "step": 10910 }, { "epoch": 4.69, "learning_rate": 0.0002, "loss": 2.8785, "step": 10915 }, { "epoch": 4.69, "learning_rate": 0.0002, "loss": 2.8675, "step": 10920 }, { "epoch": 4.69, "learning_rate": 0.0002, "loss": 2.879, "step": 10925 }, { "epoch": 4.69, "learning_rate": 0.0002, "loss": 2.8627, "step": 10930 }, { "epoch": 4.7, "learning_rate": 0.0002, "loss": 2.8582, "step": 10935 }, { "epoch": 4.7, "learning_rate": 0.0002, "loss": 2.8894, "step": 10940 }, { "epoch": 4.7, "eval_loss": 2.8700804710388184, "eval_runtime": 5.4134, "eval_samples_per_second": 1824.558, "eval_steps_per_second": 14.409, "eval_top3_3_weighted_f1_score ": 0.42589136835292835, "eval_top_1_macro_f1_score": 0.059708047360599786, "eval_top_1_weighted_f1score": 0.20443041923958735, "eval_top_3_macro_f1_score": 0.16975798713822587, "step": 10944 }, { "epoch": 4.7, "learning_rate": 0.0002, "loss": 2.8855, "step": 10945 }, { "epoch": 4.7, "learning_rate": 0.0002, "loss": 2.858, "step": 10950 }, { "epoch": 4.7, "learning_rate": 0.0002, "loss": 2.8793, "step": 10955 }, { "epoch": 4.71, "learning_rate": 0.0002, "loss": 2.889, "step": 10960 }, { "epoch": 4.71, "learning_rate": 0.0002, "loss": 2.8608, "step": 10965 }, { "epoch": 4.71, "learning_rate": 0.0002, "loss": 2.8833, "step": 10970 }, { "epoch": 4.71, "learning_rate": 0.0002, "loss": 2.8788, "step": 10975 }, { "epoch": 4.71, "learning_rate": 0.0002, "loss": 2.8621, "step": 10980 }, { "epoch": 4.72, "learning_rate": 0.0002, "loss": 2.8855, "step": 10985 }, { "epoch": 4.72, "learning_rate": 0.0002, "loss": 2.8735, "step": 10990 }, { "epoch": 4.72, "learning_rate": 0.0002, "loss": 2.8818, "step": 10995 }, { "epoch": 4.72, "learning_rate": 0.0002, "loss": 2.8554, "step": 11000 }, { "epoch": 4.73, "learning_rate": 0.0002, "loss": 2.8757, "step": 11005 }, { "epoch": 4.73, "eval_loss": 2.871457099914551, "eval_runtime": 5.416, "eval_samples_per_second": 1823.672, "eval_steps_per_second": 14.402, "eval_top3_3_weighted_f1_score ": 0.42616063103307444, "eval_top_1_macro_f1_score": 0.06361175134933073, "eval_top_1_weighted_f1score": 0.20556103154493974, "eval_top_3_macro_f1_score": 0.17060698685157544, "step": 11008 }, { "epoch": 4.73, "learning_rate": 0.0002, "loss": 2.8743, "step": 11010 }, { "epoch": 4.73, "learning_rate": 0.0002, "loss": 2.8618, "step": 11015 }, { "epoch": 4.73, "learning_rate": 0.0002, "loss": 2.8839, "step": 11020 }, { "epoch": 4.73, "learning_rate": 0.0002, "loss": 2.8524, "step": 11025 }, { "epoch": 4.74, "learning_rate": 0.0002, "loss": 2.8652, "step": 11030 }, { "epoch": 4.74, "learning_rate": 0.0002, "loss": 2.8531, "step": 11035 }, { "epoch": 4.74, "learning_rate": 0.0002, "loss": 2.8837, "step": 11040 }, { "epoch": 4.74, "learning_rate": 0.0002, "loss": 2.882, "step": 11045 }, { "epoch": 4.74, "learning_rate": 0.0002, "loss": 2.8549, "step": 11050 }, { "epoch": 4.75, "learning_rate": 0.0002, "loss": 2.8925, "step": 11055 }, { "epoch": 4.75, "learning_rate": 0.0002, "loss": 2.8674, "step": 11060 }, { "epoch": 4.75, "learning_rate": 0.0002, "loss": 2.8573, "step": 11065 }, { "epoch": 4.75, "learning_rate": 0.0002, "loss": 2.8725, "step": 11070 }, { "epoch": 4.75, "eval_loss": 2.870499610900879, "eval_runtime": 5.3558, "eval_samples_per_second": 1844.158, "eval_steps_per_second": 14.564, "eval_top3_3_weighted_f1_score ": 0.4288978052226948, "eval_top_1_macro_f1_score": 0.07176387029488746, "eval_top_1_weighted_f1score": 0.20843436611601748, "eval_top_3_macro_f1_score": 0.17093716666685138, "step": 11072 }, { "epoch": 4.76, "learning_rate": 0.0002, "loss": 2.8613, "step": 11075 }, { "epoch": 4.76, "learning_rate": 0.0002, "loss": 2.8542, "step": 11080 }, { "epoch": 4.76, "learning_rate": 0.0002, "loss": 2.867, "step": 11085 }, { "epoch": 4.76, "learning_rate": 0.0002, "loss": 2.8616, "step": 11090 }, { "epoch": 4.76, "learning_rate": 0.0002, "loss": 2.8561, "step": 11095 }, { "epoch": 4.77, "learning_rate": 0.0002, "loss": 2.8804, "step": 11100 }, { "epoch": 4.77, "learning_rate": 0.0002, "loss": 2.8846, "step": 11105 }, { "epoch": 4.77, "learning_rate": 0.0002, "loss": 2.879, "step": 11110 }, { "epoch": 4.77, "learning_rate": 0.0002, "loss": 2.8512, "step": 11115 }, { "epoch": 4.77, "learning_rate": 0.0002, "loss": 2.8774, "step": 11120 }, { "epoch": 4.78, "learning_rate": 0.0002, "loss": 2.8659, "step": 11125 }, { "epoch": 4.78, "learning_rate": 0.0002, "loss": 2.8751, "step": 11130 }, { "epoch": 4.78, "learning_rate": 0.0002, "loss": 2.8807, "step": 11135 }, { "epoch": 4.78, "eval_loss": 2.8669276237487793, "eval_runtime": 5.4397, "eval_samples_per_second": 1815.721, "eval_steps_per_second": 14.339, "eval_top3_3_weighted_f1_score ": 0.4257387530117306, "eval_top_1_macro_f1_score": 0.06590494818333718, "eval_top_1_weighted_f1score": 0.20400630100061296, "eval_top_3_macro_f1_score": 0.17254735306962476, "step": 11136 }, { "epoch": 4.78, "learning_rate": 0.0002, "loss": 2.8525, "step": 11140 }, { "epoch": 4.79, "learning_rate": 0.0002, "loss": 2.877, "step": 11145 }, { "epoch": 4.79, "learning_rate": 0.0002, "loss": 2.8589, "step": 11150 }, { "epoch": 4.79, "learning_rate": 0.0002, "loss": 2.8678, "step": 11155 }, { "epoch": 4.79, "learning_rate": 0.0002, "loss": 2.8517, "step": 11160 }, { "epoch": 4.79, "learning_rate": 0.0002, "loss": 2.8519, "step": 11165 }, { "epoch": 4.8, "learning_rate": 0.0002, "loss": 2.8652, "step": 11170 }, { "epoch": 4.8, "learning_rate": 0.0002, "loss": 2.8647, "step": 11175 }, { "epoch": 4.8, "learning_rate": 0.0002, "loss": 2.8527, "step": 11180 }, { "epoch": 4.8, "learning_rate": 0.0002, "loss": 2.8604, "step": 11185 }, { "epoch": 4.8, "learning_rate": 0.0002, "loss": 2.864, "step": 11190 }, { "epoch": 4.81, "learning_rate": 0.0002, "loss": 2.854, "step": 11195 }, { "epoch": 4.81, "learning_rate": 0.0002, "loss": 2.8817, "step": 11200 }, { "epoch": 4.81, "eval_loss": 2.8746719360351562, "eval_runtime": 5.3723, "eval_samples_per_second": 1838.521, "eval_steps_per_second": 14.519, "eval_top3_3_weighted_f1_score ": 0.42587045814788116, "eval_top_1_macro_f1_score": 0.07313648022622676, "eval_top_1_weighted_f1score": 0.2102847351298883, "eval_top_3_macro_f1_score": 0.170707313462331, "step": 11200 }, { "epoch": 4.81, "learning_rate": 0.0002, "loss": 2.8673, "step": 11205 }, { "epoch": 4.81, "learning_rate": 0.0002, "loss": 2.8628, "step": 11210 }, { "epoch": 4.82, "learning_rate": 0.0002, "loss": 2.8783, "step": 11215 }, { "epoch": 4.82, "learning_rate": 0.0002, "loss": 2.8721, "step": 11220 }, { "epoch": 4.82, "learning_rate": 0.0002, "loss": 2.8644, "step": 11225 }, { "epoch": 4.82, "learning_rate": 0.0002, "loss": 2.8755, "step": 11230 }, { "epoch": 4.82, "learning_rate": 0.0002, "loss": 2.8512, "step": 11235 }, { "epoch": 4.83, "learning_rate": 0.0002, "loss": 2.8712, "step": 11240 }, { "epoch": 4.83, "learning_rate": 0.0002, "loss": 2.8686, "step": 11245 }, { "epoch": 4.83, "learning_rate": 0.0002, "loss": 2.8566, "step": 11250 }, { "epoch": 4.83, "learning_rate": 0.0002, "loss": 2.8607, "step": 11255 }, { "epoch": 4.83, "learning_rate": 0.0002, "loss": 2.8644, "step": 11260 }, { "epoch": 4.84, "eval_loss": 2.8743584156036377, "eval_runtime": 5.3747, "eval_samples_per_second": 1837.672, "eval_steps_per_second": 14.512, "eval_top3_3_weighted_f1_score ": 0.42675392687670083, "eval_top_1_macro_f1_score": 0.06248300017997619, "eval_top_1_weighted_f1score": 0.20552355444489265, "eval_top_3_macro_f1_score": 0.15496691610870802, "step": 11264 }, { "epoch": 4.84, "learning_rate": 0.0002, "loss": 2.8604, "step": 11265 }, { "epoch": 4.84, "learning_rate": 0.0002, "loss": 2.8624, "step": 11270 }, { "epoch": 4.84, "learning_rate": 0.0002, "loss": 2.8777, "step": 11275 }, { "epoch": 4.84, "learning_rate": 0.0002, "loss": 2.8722, "step": 11280 }, { "epoch": 4.85, "learning_rate": 0.0002, "loss": 2.8666, "step": 11285 }, { "epoch": 4.85, "learning_rate": 0.0002, "loss": 2.86, "step": 11290 }, { "epoch": 4.85, "learning_rate": 0.0002, "loss": 2.8707, "step": 11295 }, { "epoch": 4.85, "learning_rate": 0.0002, "loss": 2.8704, "step": 11300 }, { "epoch": 4.85, "learning_rate": 0.0002, "loss": 2.8898, "step": 11305 }, { "epoch": 4.86, "learning_rate": 0.0002, "loss": 2.8673, "step": 11310 }, { "epoch": 4.86, "learning_rate": 0.0002, "loss": 2.8565, "step": 11315 }, { "epoch": 4.86, "learning_rate": 0.0002, "loss": 2.8564, "step": 11320 }, { "epoch": 4.86, "learning_rate": 0.0002, "loss": 2.8595, "step": 11325 }, { "epoch": 4.86, "eval_loss": 2.867732524871826, "eval_runtime": 5.444, "eval_samples_per_second": 1814.291, "eval_steps_per_second": 14.328, "eval_top3_3_weighted_f1_score ": 0.4309978290330031, "eval_top_1_macro_f1_score": 0.061983701728155746, "eval_top_1_weighted_f1score": 0.20225068121052153, "eval_top_3_macro_f1_score": 0.16265779170233347, "step": 11328 }, { "epoch": 4.86, "learning_rate": 0.0002, "loss": 2.87, "step": 11330 }, { "epoch": 4.87, "learning_rate": 0.0002, "loss": 2.8574, "step": 11335 }, { "epoch": 4.87, "learning_rate": 0.0002, "loss": 2.8744, "step": 11340 }, { "epoch": 4.87, "learning_rate": 0.0002, "loss": 2.8887, "step": 11345 }, { "epoch": 4.87, "learning_rate": 0.0002, "loss": 2.8804, "step": 11350 }, { "epoch": 4.88, "learning_rate": 0.0002, "loss": 2.8715, "step": 11355 }, { "epoch": 4.88, "learning_rate": 0.0002, "loss": 2.8649, "step": 11360 }, { "epoch": 4.88, "learning_rate": 0.0002, "loss": 2.8839, "step": 11365 }, { "epoch": 4.88, "learning_rate": 0.0002, "loss": 2.8783, "step": 11370 }, { "epoch": 4.88, "learning_rate": 0.0002, "loss": 2.8696, "step": 11375 }, { "epoch": 4.89, "learning_rate": 0.0002, "loss": 2.8499, "step": 11380 }, { "epoch": 4.89, "learning_rate": 0.0002, "loss": 2.87, "step": 11385 }, { "epoch": 4.89, "learning_rate": 0.0002, "loss": 2.8564, "step": 11390 }, { "epoch": 4.89, "eval_loss": 2.8717246055603027, "eval_runtime": 5.4032, "eval_samples_per_second": 1828.006, "eval_steps_per_second": 14.436, "eval_top3_3_weighted_f1_score ": 0.4209417191823971, "eval_top_1_macro_f1_score": 0.05910583218241548, "eval_top_1_weighted_f1score": 0.2016748822248444, "eval_top_3_macro_f1_score": 0.15671989541896147, "step": 11392 }, { "epoch": 4.89, "learning_rate": 0.0002, "loss": 2.8842, "step": 11395 }, { "epoch": 4.89, "learning_rate": 0.0002, "loss": 2.8713, "step": 11400 }, { "epoch": 4.9, "learning_rate": 0.0002, "loss": 2.8689, "step": 11405 }, { "epoch": 4.9, "learning_rate": 0.0002, "loss": 2.8837, "step": 11410 }, { "epoch": 4.9, "learning_rate": 0.0002, "loss": 2.8813, "step": 11415 }, { "epoch": 4.9, "learning_rate": 0.0002, "loss": 2.8727, "step": 11420 }, { "epoch": 4.91, "learning_rate": 0.0002, "loss": 2.8642, "step": 11425 }, { "epoch": 4.91, "learning_rate": 0.0002, "loss": 2.8925, "step": 11430 }, { "epoch": 4.91, "learning_rate": 0.0002, "loss": 2.8666, "step": 11435 }, { "epoch": 4.91, "learning_rate": 0.0002, "loss": 2.8687, "step": 11440 }, { "epoch": 4.91, "learning_rate": 0.0002, "loss": 2.8731, "step": 11445 }, { "epoch": 4.92, "learning_rate": 0.0002, "loss": 2.8668, "step": 11450 }, { "epoch": 4.92, "learning_rate": 0.0002, "loss": 2.8671, "step": 11455 }, { "epoch": 4.92, "eval_loss": 2.870335102081299, "eval_runtime": 5.3908, "eval_samples_per_second": 1832.184, "eval_steps_per_second": 14.469, "eval_top3_3_weighted_f1_score ": 0.4259999648606368, "eval_top_1_macro_f1_score": 0.06792783069712435, "eval_top_1_weighted_f1score": 0.20425705865041188, "eval_top_3_macro_f1_score": 0.1648241576211881, "step": 11456 }, { "epoch": 4.92, "learning_rate": 0.0002, "loss": 2.8905, "step": 11460 }, { "epoch": 4.92, "learning_rate": 0.0002, "loss": 2.8711, "step": 11465 }, { "epoch": 4.92, "learning_rate": 0.0002, "loss": 2.8592, "step": 11470 }, { "epoch": 4.93, "learning_rate": 0.0002, "loss": 2.8706, "step": 11475 }, { "epoch": 4.93, "learning_rate": 0.0002, "loss": 2.8644, "step": 11480 }, { "epoch": 4.93, "learning_rate": 0.0002, "loss": 2.8637, "step": 11485 }, { "epoch": 4.93, "learning_rate": 0.0002, "loss": 2.8692, "step": 11490 }, { "epoch": 4.94, "learning_rate": 0.0002, "loss": 2.8631, "step": 11495 }, { "epoch": 4.94, "learning_rate": 0.0002, "loss": 2.8631, "step": 11500 }, { "epoch": 4.94, "learning_rate": 0.0002, "loss": 2.8639, "step": 11505 }, { "epoch": 4.94, "learning_rate": 0.0002, "loss": 2.8825, "step": 11510 }, { "epoch": 4.94, "learning_rate": 0.0002, "loss": 2.8785, "step": 11515 }, { "epoch": 4.95, "learning_rate": 0.0002, "loss": 2.8823, "step": 11520 }, { "epoch": 4.95, "eval_loss": 2.8750851154327393, "eval_runtime": 5.3705, "eval_samples_per_second": 1839.112, "eval_steps_per_second": 14.524, "eval_top3_3_weighted_f1_score ": 0.4242109118570131, "eval_top_1_macro_f1_score": 0.06375210790313554, "eval_top_1_weighted_f1score": 0.20754066622135697, "eval_top_3_macro_f1_score": 0.16606030329682445, "step": 11520 }, { "epoch": 4.95, "learning_rate": 0.0002, "loss": 2.8884, "step": 11525 }, { "epoch": 4.95, "learning_rate": 0.0002, "loss": 2.8572, "step": 11530 }, { "epoch": 4.95, "learning_rate": 0.0002, "loss": 2.874, "step": 11535 }, { "epoch": 4.95, "learning_rate": 0.0002, "loss": 2.8651, "step": 11540 }, { "epoch": 4.96, "learning_rate": 0.0002, "loss": 2.8744, "step": 11545 }, { "epoch": 4.96, "learning_rate": 0.0002, "loss": 2.8714, "step": 11550 }, { "epoch": 4.96, "learning_rate": 0.0002, "loss": 2.8696, "step": 11555 }, { "epoch": 4.96, "learning_rate": 0.0002, "loss": 2.8759, "step": 11560 }, { "epoch": 4.97, "learning_rate": 0.0002, "loss": 2.8748, "step": 11565 }, { "epoch": 4.97, "learning_rate": 0.0002, "loss": 2.8702, "step": 11570 }, { "epoch": 4.97, "learning_rate": 0.0002, "loss": 2.8682, "step": 11575 }, { "epoch": 4.97, "learning_rate": 0.0002, "loss": 2.8846, "step": 11580 }, { "epoch": 4.97, "eval_loss": 2.867961883544922, "eval_runtime": 5.4009, "eval_samples_per_second": 1828.759, "eval_steps_per_second": 14.442, "eval_top3_3_weighted_f1_score ": 0.42672444215232747, "eval_top_1_macro_f1_score": 0.06347691449250102, "eval_top_1_weighted_f1score": 0.20503581963070142, "eval_top_3_macro_f1_score": 0.1667686899827073, "step": 11584 }, { "epoch": 4.97, "learning_rate": 0.0002, "loss": 2.8562, "step": 11585 }, { "epoch": 4.98, "learning_rate": 0.0002, "loss": 2.8612, "step": 11590 }, { "epoch": 4.98, "learning_rate": 0.0002, "loss": 2.8549, "step": 11595 }, { "epoch": 4.98, "learning_rate": 0.0002, "loss": 2.8825, "step": 11600 }, { "epoch": 4.98, "learning_rate": 0.0002, "loss": 2.8552, "step": 11605 }, { "epoch": 4.98, "learning_rate": 0.0002, "loss": 2.8804, "step": 11610 }, { "epoch": 4.99, "learning_rate": 0.0002, "loss": 2.8684, "step": 11615 }, { "epoch": 4.99, "learning_rate": 0.0002, "loss": 2.8593, "step": 11620 }, { "epoch": 4.99, "learning_rate": 0.0002, "loss": 2.8777, "step": 11625 }, { "epoch": 4.99, "learning_rate": 0.0002, "loss": 2.8714, "step": 11630 }, { "epoch": 5.0, "learning_rate": 0.0002, "loss": 2.8632, "step": 11635 }, { "epoch": 5.0, "learning_rate": 0.0002, "loss": 2.8629, "step": 11640 }, { "epoch": 5.0, "learning_rate": 0.0002, "loss": 2.86, "step": 11645 }, { "epoch": 5.0, "eval_loss": 2.8654212951660156, "eval_runtime": 5.6035, "eval_samples_per_second": 1762.633, "eval_steps_per_second": 13.92, "eval_top3_3_weighted_f1_score ": 0.4220382255574687, "eval_top_1_macro_f1_score": 0.06684360445045137, "eval_top_1_weighted_f1score": 0.20898039023299905, "eval_top_3_macro_f1_score": 0.17073237497455263, "step": 11648 }, { "epoch": 5.0, "learning_rate": 0.0002, "loss": 3.129, "step": 11650 }, { "epoch": 5.0, "learning_rate": 0.0002, "loss": 2.8614, "step": 11655 }, { "epoch": 5.01, "learning_rate": 0.0002, "loss": 2.8601, "step": 11660 }, { "epoch": 5.01, "learning_rate": 0.0002, "loss": 2.8554, "step": 11665 }, { "epoch": 5.01, "learning_rate": 0.0002, "loss": 2.8457, "step": 11670 }, { "epoch": 5.01, "learning_rate": 0.0002, "loss": 2.8544, "step": 11675 }, { "epoch": 5.02, "learning_rate": 0.0002, "loss": 2.8652, "step": 11680 }, { "epoch": 5.02, "learning_rate": 0.0002, "loss": 2.8651, "step": 11685 }, { "epoch": 5.02, "learning_rate": 0.0002, "loss": 2.8725, "step": 11690 }, { "epoch": 5.02, "learning_rate": 0.0002, "loss": 2.8634, "step": 11695 }, { "epoch": 5.02, "learning_rate": 0.0002, "loss": 2.8744, "step": 11700 }, { "epoch": 5.03, "learning_rate": 0.0002, "loss": 2.855, "step": 11705 }, { "epoch": 5.03, "learning_rate": 0.0002, "loss": 2.8393, "step": 11710 }, { "epoch": 5.03, "eval_loss": 2.8707430362701416, "eval_runtime": 5.3598, "eval_samples_per_second": 1842.778, "eval_steps_per_second": 14.553, "eval_top3_3_weighted_f1_score ": 0.42300889856606255, "eval_top_1_macro_f1_score": 0.0596400632884743, "eval_top_1_weighted_f1score": 0.20651925870463048, "eval_top_3_macro_f1_score": 0.1609654670119019, "step": 11712 }, { "epoch": 5.03, "learning_rate": 0.0002, "loss": 2.8609, "step": 11715 }, { "epoch": 5.03, "learning_rate": 0.0002, "loss": 2.8567, "step": 11720 }, { "epoch": 5.03, "learning_rate": 0.0002, "loss": 2.8314, "step": 11725 }, { "epoch": 5.04, "learning_rate": 0.0002, "loss": 2.8734, "step": 11730 }, { "epoch": 5.04, "learning_rate": 0.0002, "loss": 2.8838, "step": 11735 }, { "epoch": 5.04, "learning_rate": 0.0002, "loss": 2.8456, "step": 11740 }, { "epoch": 5.04, "learning_rate": 0.0002, "loss": 2.8792, "step": 11745 }, { "epoch": 5.05, "learning_rate": 0.0002, "loss": 2.8758, "step": 11750 }, { "epoch": 5.05, "learning_rate": 0.0002, "loss": 2.8634, "step": 11755 }, { "epoch": 5.05, "learning_rate": 0.0002, "loss": 2.8499, "step": 11760 }, { "epoch": 5.05, "learning_rate": 0.0002, "loss": 2.8573, "step": 11765 }, { "epoch": 5.05, "learning_rate": 0.0002, "loss": 2.8563, "step": 11770 }, { "epoch": 5.06, "learning_rate": 0.0002, "loss": 2.8518, "step": 11775 }, { "epoch": 5.06, "eval_loss": 2.874115228652954, "eval_runtime": 5.4346, "eval_samples_per_second": 1817.431, "eval_steps_per_second": 14.352, "eval_top3_3_weighted_f1_score ": 0.42229709993671566, "eval_top_1_macro_f1_score": 0.06295362765695227, "eval_top_1_weighted_f1score": 0.20956336439790296, "eval_top_3_macro_f1_score": 0.16666229223268295, "step": 11776 }, { "epoch": 5.06, "learning_rate": 0.0002, "loss": 2.8844, "step": 11780 }, { "epoch": 5.06, "learning_rate": 0.0002, "loss": 2.8602, "step": 11785 }, { "epoch": 5.06, "learning_rate": 0.0002, "loss": 2.8671, "step": 11790 }, { "epoch": 5.06, "learning_rate": 0.0002, "loss": 2.8627, "step": 11795 }, { "epoch": 5.07, "learning_rate": 0.0002, "loss": 2.8666, "step": 11800 }, { "epoch": 5.07, "learning_rate": 0.0002, "loss": 2.8503, "step": 11805 }, { "epoch": 5.07, "learning_rate": 0.0002, "loss": 2.8662, "step": 11810 }, { "epoch": 5.07, "learning_rate": 0.0002, "loss": 2.8648, "step": 11815 }, { "epoch": 5.08, "learning_rate": 0.0002, "loss": 2.8638, "step": 11820 }, { "epoch": 5.08, "learning_rate": 0.0002, "loss": 2.8403, "step": 11825 }, { "epoch": 5.08, "learning_rate": 0.0002, "loss": 2.8703, "step": 11830 }, { "epoch": 5.08, "learning_rate": 0.0002, "loss": 2.8691, "step": 11835 }, { "epoch": 5.08, "learning_rate": 0.0002, "loss": 2.8768, "step": 11840 }, { "epoch": 5.08, "eval_loss": 2.8761074542999268, "eval_runtime": 5.4132, "eval_samples_per_second": 1824.612, "eval_steps_per_second": 14.409, "eval_top3_3_weighted_f1_score ": 0.4203521589308624, "eval_top_1_macro_f1_score": 0.06184167382652768, "eval_top_1_weighted_f1score": 0.20401684997897046, "eval_top_3_macro_f1_score": 0.15860838499071558, "step": 11840 }, { "epoch": 5.09, "learning_rate": 0.0002, "loss": 2.8447, "step": 11845 }, { "epoch": 5.09, "learning_rate": 0.0002, "loss": 2.8608, "step": 11850 }, { "epoch": 5.09, "learning_rate": 0.0002, "loss": 2.872, "step": 11855 }, { "epoch": 5.09, "learning_rate": 0.0002, "loss": 2.867, "step": 11860 }, { "epoch": 5.09, "learning_rate": 0.0002, "loss": 2.8593, "step": 11865 }, { "epoch": 5.1, "learning_rate": 0.0002, "loss": 2.8622, "step": 11870 }, { "epoch": 5.1, "learning_rate": 0.0002, "loss": 2.8737, "step": 11875 }, { "epoch": 5.1, "learning_rate": 0.0002, "loss": 2.8491, "step": 11880 }, { "epoch": 5.1, "learning_rate": 0.0002, "loss": 2.8556, "step": 11885 }, { "epoch": 5.11, "learning_rate": 0.0002, "loss": 2.8807, "step": 11890 }, { "epoch": 5.11, "learning_rate": 0.0002, "loss": 2.8684, "step": 11895 }, { "epoch": 5.11, "learning_rate": 0.0002, "loss": 2.8505, "step": 11900 }, { "epoch": 5.11, "eval_loss": 2.8665826320648193, "eval_runtime": 5.4113, "eval_samples_per_second": 1825.258, "eval_steps_per_second": 14.414, "eval_top3_3_weighted_f1_score ": 0.4268338631196219, "eval_top_1_macro_f1_score": 0.06435100153173876, "eval_top_1_weighted_f1score": 0.20520514027260486, "eval_top_3_macro_f1_score": 0.1676481366511563, "step": 11904 }, { "epoch": 5.11, "learning_rate": 0.0002, "loss": 2.8601, "step": 11905 }, { "epoch": 5.11, "learning_rate": 0.0002, "loss": 2.8628, "step": 11910 }, { "epoch": 5.12, "learning_rate": 0.0002, "loss": 2.8658, "step": 11915 }, { "epoch": 5.12, "learning_rate": 0.0002, "loss": 2.871, "step": 11920 }, { "epoch": 5.12, "learning_rate": 0.0002, "loss": 2.8513, "step": 11925 }, { "epoch": 5.12, "learning_rate": 0.0002, "loss": 2.8764, "step": 11930 }, { "epoch": 5.12, "learning_rate": 0.0002, "loss": 2.868, "step": 11935 }, { "epoch": 5.13, "learning_rate": 0.0002, "loss": 2.8653, "step": 11940 }, { "epoch": 5.13, "learning_rate": 0.0002, "loss": 2.8553, "step": 11945 }, { "epoch": 5.13, "learning_rate": 0.0002, "loss": 2.8699, "step": 11950 }, { "epoch": 5.13, "learning_rate": 0.0002, "loss": 2.8497, "step": 11955 }, { "epoch": 5.14, "learning_rate": 0.0002, "loss": 2.8776, "step": 11960 }, { "epoch": 5.14, "learning_rate": 0.0002, "loss": 2.8804, "step": 11965 }, { "epoch": 5.14, "eval_loss": 2.869213104248047, "eval_runtime": 5.3698, "eval_samples_per_second": 1839.376, "eval_steps_per_second": 14.526, "eval_top3_3_weighted_f1_score ": 0.4286770986392775, "eval_top_1_macro_f1_score": 0.06212153921303334, "eval_top_1_weighted_f1score": 0.20747741095249192, "eval_top_3_macro_f1_score": 0.16919109524391723, "step": 11968 }, { "epoch": 5.14, "learning_rate": 0.0002, "loss": 2.8778, "step": 11970 }, { "epoch": 5.14, "learning_rate": 0.0002, "loss": 2.8382, "step": 11975 }, { "epoch": 5.14, "learning_rate": 0.0002, "loss": 2.8638, "step": 11980 }, { "epoch": 5.15, "learning_rate": 0.0002, "loss": 2.8789, "step": 11985 }, { "epoch": 5.15, "learning_rate": 0.0002, "loss": 2.8688, "step": 11990 }, { "epoch": 5.15, "learning_rate": 0.0002, "loss": 2.8525, "step": 11995 }, { "epoch": 5.15, "learning_rate": 0.0002, "loss": 2.8605, "step": 12000 }, { "epoch": 5.15, "learning_rate": 0.0002, "loss": 2.8528, "step": 12005 }, { "epoch": 5.16, "learning_rate": 0.0002, "loss": 2.8558, "step": 12010 }, { "epoch": 5.16, "learning_rate": 0.0002, "loss": 2.8442, "step": 12015 }, { "epoch": 5.16, "learning_rate": 0.0002, "loss": 2.8441, "step": 12020 }, { "epoch": 5.16, "learning_rate": 0.0002, "loss": 2.8646, "step": 12025 }, { "epoch": 5.17, "learning_rate": 0.0002, "loss": 2.8699, "step": 12030 }, { "epoch": 5.17, "eval_loss": 2.8711791038513184, "eval_runtime": 5.4033, "eval_samples_per_second": 1827.959, "eval_steps_per_second": 14.436, "eval_top3_3_weighted_f1_score ": 0.42502434310512177, "eval_top_1_macro_f1_score": 0.06390669240183022, "eval_top_1_weighted_f1score": 0.20611401320076303, "eval_top_3_macro_f1_score": 0.172394356924741, "step": 12032 }, { "epoch": 5.17, "learning_rate": 0.0002, "loss": 2.8558, "step": 12035 }, { "epoch": 5.17, "learning_rate": 0.0002, "loss": 2.885, "step": 12040 }, { "epoch": 5.17, "learning_rate": 0.0002, "loss": 2.848, "step": 12045 }, { "epoch": 5.17, "learning_rate": 0.0002, "loss": 2.8417, "step": 12050 }, { "epoch": 5.18, "learning_rate": 0.0002, "loss": 2.8463, "step": 12055 }, { "epoch": 5.18, "learning_rate": 0.0002, "loss": 2.8496, "step": 12060 }, { "epoch": 5.18, "learning_rate": 0.0002, "loss": 2.8607, "step": 12065 }, { "epoch": 5.18, "learning_rate": 0.0002, "loss": 2.8511, "step": 12070 }, { "epoch": 5.18, "learning_rate": 0.0002, "loss": 2.8565, "step": 12075 }, { "epoch": 5.19, "learning_rate": 0.0002, "loss": 2.8628, "step": 12080 }, { "epoch": 5.19, "learning_rate": 0.0002, "loss": 2.8676, "step": 12085 }, { "epoch": 5.19, "learning_rate": 0.0002, "loss": 2.8555, "step": 12090 }, { "epoch": 5.19, "learning_rate": 0.0002, "loss": 2.8641, "step": 12095 }, { "epoch": 5.19, "eval_loss": 2.8671929836273193, "eval_runtime": 5.3803, "eval_samples_per_second": 1835.788, "eval_steps_per_second": 14.497, "eval_top3_3_weighted_f1_score ": 0.42276656929871903, "eval_top_1_macro_f1_score": 0.06505845030432661, "eval_top_1_weighted_f1score": 0.20797779672348793, "eval_top_3_macro_f1_score": 0.1596413030435833, "step": 12096 }, { "epoch": 5.2, "learning_rate": 0.0002, "loss": 2.8645, "step": 12100 }, { "epoch": 5.2, "learning_rate": 0.0002, "loss": 2.8814, "step": 12105 }, { "epoch": 5.2, "learning_rate": 0.0002, "loss": 2.862, "step": 12110 }, { "epoch": 5.2, "learning_rate": 0.0002, "loss": 2.8611, "step": 12115 }, { "epoch": 5.2, "learning_rate": 0.0002, "loss": 2.87, "step": 12120 }, { "epoch": 5.21, "learning_rate": 0.0002, "loss": 2.8739, "step": 12125 }, { "epoch": 5.21, "learning_rate": 0.0002, "loss": 2.861, "step": 12130 }, { "epoch": 5.21, "learning_rate": 0.0002, "loss": 2.8722, "step": 12135 }, { "epoch": 5.21, "learning_rate": 0.0002, "loss": 2.8651, "step": 12140 }, { "epoch": 5.21, "learning_rate": 0.0002, "loss": 2.8709, "step": 12145 }, { "epoch": 5.22, "learning_rate": 0.0002, "loss": 2.8778, "step": 12150 }, { "epoch": 5.22, "learning_rate": 0.0002, "loss": 2.8481, "step": 12155 }, { "epoch": 5.22, "learning_rate": 0.0002, "loss": 2.8723, "step": 12160 }, { "epoch": 5.22, "eval_loss": 2.8729920387268066, "eval_runtime": 5.3265, "eval_samples_per_second": 1854.322, "eval_steps_per_second": 14.644, "eval_top3_3_weighted_f1_score ": 0.4286138900543716, "eval_top_1_macro_f1_score": 0.06630210480419188, "eval_top_1_weighted_f1score": 0.20918349320253848, "eval_top_3_macro_f1_score": 0.16540882860527564, "step": 12160 }, { "epoch": 5.22, "learning_rate": 0.0002, "loss": 2.85, "step": 12165 }, { "epoch": 5.23, "learning_rate": 0.0002, "loss": 2.8639, "step": 12170 }, { "epoch": 5.23, "learning_rate": 0.0002, "loss": 2.851, "step": 12175 }, { "epoch": 5.23, "learning_rate": 0.0002, "loss": 2.871, "step": 12180 }, { "epoch": 5.23, "learning_rate": 0.0002, "loss": 2.847, "step": 12185 }, { "epoch": 5.23, "learning_rate": 0.0002, "loss": 2.8488, "step": 12190 }, { "epoch": 5.24, "learning_rate": 0.0002, "loss": 2.8658, "step": 12195 }, { "epoch": 5.24, "learning_rate": 0.0002, "loss": 2.876, "step": 12200 }, { "epoch": 5.24, "learning_rate": 0.0002, "loss": 2.8622, "step": 12205 }, { "epoch": 5.24, "learning_rate": 0.0002, "loss": 2.8678, "step": 12210 }, { "epoch": 5.24, "learning_rate": 0.0002, "loss": 2.8712, "step": 12215 }, { "epoch": 5.25, "learning_rate": 0.0002, "loss": 2.8565, "step": 12220 }, { "epoch": 5.25, "eval_loss": 2.8733251094818115, "eval_runtime": 5.3808, "eval_samples_per_second": 1835.59, "eval_steps_per_second": 14.496, "eval_top3_3_weighted_f1_score ": 0.4274854960455037, "eval_top_1_macro_f1_score": 0.06317168004233445, "eval_top_1_weighted_f1score": 0.20456381211628516, "eval_top_3_macro_f1_score": 0.16196828106224206, "step": 12224 }, { "epoch": 5.25, "learning_rate": 0.0002, "loss": 2.874, "step": 12225 }, { "epoch": 5.25, "learning_rate": 0.0002, "loss": 2.8648, "step": 12230 }, { "epoch": 5.25, "learning_rate": 0.0002, "loss": 2.8733, "step": 12235 }, { "epoch": 5.26, "learning_rate": 0.0002, "loss": 2.8735, "step": 12240 }, { "epoch": 5.26, "learning_rate": 0.0002, "loss": 2.85, "step": 12245 }, { "epoch": 5.26, "learning_rate": 0.0002, "loss": 2.8569, "step": 12250 }, { "epoch": 5.26, "learning_rate": 0.0002, "loss": 2.8573, "step": 12255 }, { "epoch": 5.26, "learning_rate": 0.0002, "loss": 2.8682, "step": 12260 }, { "epoch": 5.27, "learning_rate": 0.0002, "loss": 2.8525, "step": 12265 }, { "epoch": 5.27, "learning_rate": 0.0002, "loss": 2.8592, "step": 12270 }, { "epoch": 5.27, "learning_rate": 0.0002, "loss": 2.8631, "step": 12275 }, { "epoch": 5.27, "learning_rate": 0.0002, "loss": 2.8686, "step": 12280 }, { "epoch": 5.27, "learning_rate": 0.0002, "loss": 2.8631, "step": 12285 }, { "epoch": 5.28, "eval_loss": 2.8703885078430176, "eval_runtime": 5.3983, "eval_samples_per_second": 1829.664, "eval_steps_per_second": 14.449, "eval_top3_3_weighted_f1_score ": 0.4263112461317659, "eval_top_1_macro_f1_score": 0.06456388348788861, "eval_top_1_weighted_f1score": 0.20418942800250817, "eval_top_3_macro_f1_score": 0.16130626570251105, "step": 12288 }, { "epoch": 5.28, "learning_rate": 0.0002, "loss": 2.8709, "step": 12290 }, { "epoch": 5.28, "learning_rate": 0.0002, "loss": 2.8683, "step": 12295 }, { "epoch": 5.28, "learning_rate": 0.0002, "loss": 2.8414, "step": 12300 }, { "epoch": 5.28, "learning_rate": 0.0002, "loss": 2.8632, "step": 12305 }, { "epoch": 5.29, "learning_rate": 0.0002, "loss": 2.8566, "step": 12310 }, { "epoch": 5.29, "learning_rate": 0.0002, "loss": 2.8758, "step": 12315 }, { "epoch": 5.29, "learning_rate": 0.0002, "loss": 2.8512, "step": 12320 }, { "epoch": 5.29, "learning_rate": 0.0002, "loss": 2.8551, "step": 12325 }, { "epoch": 5.29, "learning_rate": 0.0002, "loss": 2.873, "step": 12330 }, { "epoch": 5.3, "learning_rate": 0.0002, "loss": 2.8599, "step": 12335 }, { "epoch": 5.3, "learning_rate": 0.0002, "loss": 2.8492, "step": 12340 }, { "epoch": 5.3, "learning_rate": 0.0002, "loss": 2.8622, "step": 12345 }, { "epoch": 5.3, "learning_rate": 0.0002, "loss": 2.8536, "step": 12350 }, { "epoch": 5.3, "eval_loss": 2.863574504852295, "eval_runtime": 5.3774, "eval_samples_per_second": 1836.757, "eval_steps_per_second": 14.505, "eval_top3_3_weighted_f1_score ": 0.4223760703791298, "eval_top_1_macro_f1_score": 0.06263228412679026, "eval_top_1_weighted_f1score": 0.20472715153654886, "eval_top_3_macro_f1_score": 0.160187916726336, "step": 12352 }, { "epoch": 5.3, "learning_rate": 0.0002, "loss": 2.8635, "step": 12355 }, { "epoch": 5.31, "learning_rate": 0.0002, "loss": 2.869, "step": 12360 }, { "epoch": 5.31, "learning_rate": 0.0002, "loss": 2.8512, "step": 12365 }, { "epoch": 5.31, "learning_rate": 0.0002, "loss": 2.8566, "step": 12370 }, { "epoch": 5.31, "learning_rate": 0.0002, "loss": 2.8495, "step": 12375 }, { "epoch": 5.32, "learning_rate": 0.0002, "loss": 2.8607, "step": 12380 }, { "epoch": 5.32, "learning_rate": 0.0002, "loss": 2.8804, "step": 12385 }, { "epoch": 5.32, "learning_rate": 0.0002, "loss": 2.8378, "step": 12390 }, { "epoch": 5.32, "learning_rate": 0.0002, "loss": 2.8562, "step": 12395 }, { "epoch": 5.32, "learning_rate": 0.0002, "loss": 2.8595, "step": 12400 }, { "epoch": 5.33, "learning_rate": 0.0002, "loss": 2.8836, "step": 12405 }, { "epoch": 5.33, "learning_rate": 0.0002, "loss": 2.8741, "step": 12410 }, { "epoch": 5.33, "learning_rate": 0.0002, "loss": 2.8704, "step": 12415 }, { "epoch": 5.33, "eval_loss": 2.8663103580474854, "eval_runtime": 5.4585, "eval_samples_per_second": 1809.458, "eval_steps_per_second": 14.29, "eval_top3_3_weighted_f1_score ": 0.4265369230250399, "eval_top_1_macro_f1_score": 0.06631147155356475, "eval_top_1_weighted_f1score": 0.20835882539610368, "eval_top_3_macro_f1_score": 0.16525838505297283, "step": 12416 }, { "epoch": 5.33, "learning_rate": 0.0002, "loss": 2.8569, "step": 12420 }, { "epoch": 5.33, "learning_rate": 0.0002, "loss": 2.8438, "step": 12425 }, { "epoch": 5.34, "learning_rate": 0.0002, "loss": 2.8783, "step": 12430 }, { "epoch": 5.34, "learning_rate": 0.0002, "loss": 2.8545, "step": 12435 }, { "epoch": 5.34, "learning_rate": 0.0002, "loss": 2.8474, "step": 12440 }, { "epoch": 5.34, "learning_rate": 0.0002, "loss": 2.8513, "step": 12445 }, { "epoch": 5.35, "learning_rate": 0.0002, "loss": 2.8518, "step": 12450 }, { "epoch": 5.35, "learning_rate": 0.0002, "loss": 2.877, "step": 12455 }, { "epoch": 5.35, "learning_rate": 0.0002, "loss": 2.8537, "step": 12460 }, { "epoch": 5.35, "learning_rate": 0.0002, "loss": 2.854, "step": 12465 }, { "epoch": 5.35, "learning_rate": 0.0002, "loss": 2.8581, "step": 12470 }, { "epoch": 5.36, "learning_rate": 0.0002, "loss": 2.8619, "step": 12475 }, { "epoch": 5.36, "learning_rate": 0.0002, "loss": 2.8626, "step": 12480 }, { "epoch": 5.36, "eval_loss": 2.876800775527954, "eval_runtime": 5.3362, "eval_samples_per_second": 1850.953, "eval_steps_per_second": 14.617, "eval_top3_3_weighted_f1_score ": 0.42567207437957555, "eval_top_1_macro_f1_score": 0.06345758787819845, "eval_top_1_weighted_f1score": 0.20840399941031143, "eval_top_3_macro_f1_score": 0.16604798262384834, "step": 12480 }, { "epoch": 5.36, "learning_rate": 0.0002, "loss": 2.8579, "step": 12485 }, { "epoch": 5.36, "learning_rate": 0.0002, "loss": 2.8536, "step": 12490 }, { "epoch": 5.36, "learning_rate": 0.0002, "loss": 2.8633, "step": 12495 }, { "epoch": 5.37, "learning_rate": 0.0002, "loss": 2.8632, "step": 12500 }, { "epoch": 5.37, "learning_rate": 0.0002, "loss": 2.8443, "step": 12505 }, { "epoch": 5.37, "learning_rate": 0.0002, "loss": 2.8394, "step": 12510 }, { "epoch": 5.37, "learning_rate": 0.0002, "loss": 2.8773, "step": 12515 }, { "epoch": 5.38, "learning_rate": 0.0002, "loss": 2.8668, "step": 12520 }, { "epoch": 5.38, "learning_rate": 0.0002, "loss": 2.8709, "step": 12525 }, { "epoch": 5.38, "learning_rate": 0.0002, "loss": 2.864, "step": 12530 }, { "epoch": 5.38, "learning_rate": 0.0002, "loss": 2.8664, "step": 12535 }, { "epoch": 5.38, "learning_rate": 0.0002, "loss": 2.8633, "step": 12540 }, { "epoch": 5.39, "eval_loss": 2.87705397605896, "eval_runtime": 5.3971, "eval_samples_per_second": 1830.044, "eval_steps_per_second": 14.452, "eval_top3_3_weighted_f1_score ": 0.42581772075435337, "eval_top_1_macro_f1_score": 0.06089987140851035, "eval_top_1_weighted_f1score": 0.20790260048900666, "eval_top_3_macro_f1_score": 0.1678851592543872, "step": 12544 }, { "epoch": 5.39, "learning_rate": 0.0002, "loss": 2.8694, "step": 12545 }, { "epoch": 5.39, "learning_rate": 0.0002, "loss": 2.8583, "step": 12550 }, { "epoch": 5.39, "learning_rate": 0.0002, "loss": 2.8645, "step": 12555 }, { "epoch": 5.39, "learning_rate": 0.0002, "loss": 2.8641, "step": 12560 }, { "epoch": 5.39, "learning_rate": 0.0002, "loss": 2.8456, "step": 12565 }, { "epoch": 5.4, "learning_rate": 0.0002, "loss": 2.8604, "step": 12570 }, { "epoch": 5.4, "learning_rate": 0.0002, "loss": 2.8678, "step": 12575 }, { "epoch": 5.4, "learning_rate": 0.0002, "loss": 2.8494, "step": 12580 }, { "epoch": 5.4, "learning_rate": 0.0002, "loss": 2.8537, "step": 12585 }, { "epoch": 5.41, "learning_rate": 0.0002, "loss": 2.8827, "step": 12590 }, { "epoch": 5.41, "learning_rate": 0.0002, "loss": 2.8531, "step": 12595 }, { "epoch": 5.41, "learning_rate": 0.0002, "loss": 2.8692, "step": 12600 }, { "epoch": 5.41, "learning_rate": 0.0002, "loss": 2.8673, "step": 12605 }, { "epoch": 5.41, "eval_loss": 2.8670997619628906, "eval_runtime": 5.4492, "eval_samples_per_second": 1812.558, "eval_steps_per_second": 14.314, "eval_top3_3_weighted_f1_score ": 0.4297177427482825, "eval_top_1_macro_f1_score": 0.06382875173672384, "eval_top_1_weighted_f1score": 0.20357842086656863, "eval_top_3_macro_f1_score": 0.17012538987789935, "step": 12608 }, { "epoch": 5.41, "learning_rate": 0.0002, "loss": 2.8615, "step": 12610 }, { "epoch": 5.42, "learning_rate": 0.0002, "loss": 2.8554, "step": 12615 }, { "epoch": 5.42, "learning_rate": 0.0002, "loss": 2.8455, "step": 12620 }, { "epoch": 5.42, "learning_rate": 0.0002, "loss": 2.8468, "step": 12625 }, { "epoch": 5.42, "learning_rate": 0.0002, "loss": 2.8411, "step": 12630 }, { "epoch": 5.42, "learning_rate": 0.0002, "loss": 2.8594, "step": 12635 }, { "epoch": 5.43, "learning_rate": 0.0002, "loss": 2.84, "step": 12640 }, { "epoch": 5.43, "learning_rate": 0.0002, "loss": 2.8653, "step": 12645 }, { "epoch": 5.43, "learning_rate": 0.0002, "loss": 2.85, "step": 12650 }, { "epoch": 5.43, "learning_rate": 0.0002, "loss": 2.8549, "step": 12655 }, { "epoch": 5.44, "learning_rate": 0.0002, "loss": 2.8647, "step": 12660 }, { "epoch": 5.44, "learning_rate": 0.0002, "loss": 2.8719, "step": 12665 }, { "epoch": 5.44, "learning_rate": 0.0002, "loss": 2.8792, "step": 12670 }, { "epoch": 5.44, "eval_loss": 2.868391752243042, "eval_runtime": 5.3487, "eval_samples_per_second": 1846.602, "eval_steps_per_second": 14.583, "eval_top3_3_weighted_f1_score ": 0.4267632240110778, "eval_top_1_macro_f1_score": 0.06546434917081703, "eval_top_1_weighted_f1score": 0.20801387882004196, "eval_top_3_macro_f1_score": 0.15873036291281165, "step": 12672 }, { "epoch": 5.44, "learning_rate": 0.0002, "loss": 2.8626, "step": 12675 }, { "epoch": 5.44, "learning_rate": 0.0002, "loss": 2.8761, "step": 12680 }, { "epoch": 5.45, "learning_rate": 0.0002, "loss": 2.8667, "step": 12685 }, { "epoch": 5.45, "learning_rate": 0.0002, "loss": 2.851, "step": 12690 }, { "epoch": 5.45, "learning_rate": 0.0002, "loss": 2.8605, "step": 12695 }, { "epoch": 5.45, "learning_rate": 0.0002, "loss": 2.858, "step": 12700 }, { "epoch": 5.46, "learning_rate": 0.0002, "loss": 2.8593, "step": 12705 }, { "epoch": 5.46, "learning_rate": 0.0002, "loss": 2.8702, "step": 12710 }, { "epoch": 5.46, "learning_rate": 0.0002, "loss": 2.8716, "step": 12715 }, { "epoch": 5.46, "learning_rate": 0.0002, "loss": 2.8562, "step": 12720 }, { "epoch": 5.46, "learning_rate": 0.0002, "loss": 2.8254, "step": 12725 }, { "epoch": 5.47, "learning_rate": 0.0002, "loss": 2.869, "step": 12730 }, { "epoch": 5.47, "learning_rate": 0.0002, "loss": 2.8661, "step": 12735 }, { "epoch": 5.47, "eval_loss": 2.8712151050567627, "eval_runtime": 5.4522, "eval_samples_per_second": 1811.55, "eval_steps_per_second": 14.306, "eval_top3_3_weighted_f1_score ": 0.42248428596315507, "eval_top_1_macro_f1_score": 0.06338137711067554, "eval_top_1_weighted_f1score": 0.2052704347027335, "eval_top_3_macro_f1_score": 0.15724683781010063, "step": 12736 }, { "epoch": 5.47, "learning_rate": 0.0002, "loss": 2.8729, "step": 12740 }, { "epoch": 5.47, "learning_rate": 0.0002, "loss": 2.858, "step": 12745 }, { "epoch": 5.47, "learning_rate": 0.0002, "loss": 2.8738, "step": 12750 }, { "epoch": 5.48, "learning_rate": 0.0002, "loss": 2.8715, "step": 12755 }, { "epoch": 5.48, "learning_rate": 0.0002, "loss": 2.8754, "step": 12760 }, { "epoch": 5.48, "learning_rate": 0.0002, "loss": 2.8586, "step": 12765 }, { "epoch": 5.48, "learning_rate": 0.0002, "loss": 2.8447, "step": 12770 }, { "epoch": 5.49, "learning_rate": 0.0002, "loss": 2.861, "step": 12775 }, { "epoch": 5.49, "learning_rate": 0.0002, "loss": 2.8458, "step": 12780 }, { "epoch": 5.49, "learning_rate": 0.0002, "loss": 2.8616, "step": 12785 }, { "epoch": 5.49, "learning_rate": 0.0002, "loss": 2.884, "step": 12790 }, { "epoch": 5.49, "learning_rate": 0.0002, "loss": 2.8502, "step": 12795 }, { "epoch": 5.5, "learning_rate": 0.0002, "loss": 2.8394, "step": 12800 }, { "epoch": 5.5, "eval_loss": 2.879948854446411, "eval_runtime": 5.6176, "eval_samples_per_second": 1758.219, "eval_steps_per_second": 13.885, "eval_top3_3_weighted_f1_score ": 0.42693128085143395, "eval_top_1_macro_f1_score": 0.06255348634396939, "eval_top_1_weighted_f1score": 0.20712040547374516, "eval_top_3_macro_f1_score": 0.16143149477749164, "step": 12800 }, { "epoch": 5.5, "learning_rate": 0.0002, "loss": 2.8694, "step": 12805 }, { "epoch": 5.5, "learning_rate": 0.0002, "loss": 2.8559, "step": 12810 }, { "epoch": 5.5, "learning_rate": 0.0002, "loss": 2.8812, "step": 12815 }, { "epoch": 5.5, "learning_rate": 0.0002, "loss": 2.861, "step": 12820 }, { "epoch": 5.51, "learning_rate": 0.0002, "loss": 2.8573, "step": 12825 }, { "epoch": 5.51, "learning_rate": 0.0002, "loss": 2.8635, "step": 12830 }, { "epoch": 5.51, "learning_rate": 0.0002, "loss": 2.848, "step": 12835 }, { "epoch": 5.51, "learning_rate": 0.0002, "loss": 2.8531, "step": 12840 }, { "epoch": 5.52, "learning_rate": 0.0002, "loss": 2.8421, "step": 12845 }, { "epoch": 5.52, "learning_rate": 0.0002, "loss": 2.8666, "step": 12850 }, { "epoch": 5.52, "learning_rate": 0.0002, "loss": 2.8783, "step": 12855 }, { "epoch": 5.52, "learning_rate": 0.0002, "loss": 2.8746, "step": 12860 }, { "epoch": 5.52, "eval_loss": 2.86928129196167, "eval_runtime": 5.7863, "eval_samples_per_second": 1706.969, "eval_steps_per_second": 13.48, "eval_top3_3_weighted_f1_score ": 0.4290956311025736, "eval_top_1_macro_f1_score": 0.058589711765819036, "eval_top_1_weighted_f1score": 0.2056205317016191, "eval_top_3_macro_f1_score": 0.15132675835803747, "step": 12864 }, { "epoch": 5.52, "learning_rate": 0.0002, "loss": 2.8748, "step": 12865 }, { "epoch": 5.53, "learning_rate": 0.0002, "loss": 2.8679, "step": 12870 }, { "epoch": 5.53, "learning_rate": 0.0002, "loss": 2.8584, "step": 12875 }, { "epoch": 5.53, "learning_rate": 0.0002, "loss": 2.8602, "step": 12880 }, { "epoch": 5.53, "learning_rate": 0.0002, "loss": 2.869, "step": 12885 }, { "epoch": 5.53, "learning_rate": 0.0002, "loss": 2.8737, "step": 12890 }, { "epoch": 5.54, "learning_rate": 0.0002, "loss": 2.8369, "step": 12895 }, { "epoch": 5.54, "learning_rate": 0.0002, "loss": 2.8664, "step": 12900 }, { "epoch": 5.54, "learning_rate": 0.0002, "loss": 2.8638, "step": 12905 }, { "epoch": 5.54, "learning_rate": 0.0002, "loss": 2.8647, "step": 12910 }, { "epoch": 5.55, "learning_rate": 0.0002, "loss": 2.8533, "step": 12915 }, { "epoch": 5.55, "learning_rate": 0.0002, "loss": 2.8467, "step": 12920 }, { "epoch": 5.55, "learning_rate": 0.0002, "loss": 2.867, "step": 12925 }, { "epoch": 5.55, "eval_loss": 2.8681602478027344, "eval_runtime": 5.4459, "eval_samples_per_second": 1813.666, "eval_steps_per_second": 14.323, "eval_top3_3_weighted_f1_score ": 0.4273838793747354, "eval_top_1_macro_f1_score": 0.061575625026085906, "eval_top_1_weighted_f1score": 0.21100168044007095, "eval_top_3_macro_f1_score": 0.15884318808664932, "step": 12928 }, { "epoch": 5.55, "learning_rate": 0.0002, "loss": 2.8561, "step": 12930 }, { "epoch": 5.55, "learning_rate": 0.0002, "loss": 2.8804, "step": 12935 }, { "epoch": 5.56, "learning_rate": 0.0002, "loss": 2.876, "step": 12940 }, { "epoch": 5.56, "learning_rate": 0.0002, "loss": 2.8807, "step": 12945 }, { "epoch": 5.56, "learning_rate": 0.0002, "loss": 2.8474, "step": 12950 }, { "epoch": 5.56, "learning_rate": 0.0002, "loss": 2.8466, "step": 12955 }, { "epoch": 5.56, "learning_rate": 0.0002, "loss": 2.8551, "step": 12960 }, { "epoch": 5.57, "learning_rate": 0.0002, "loss": 2.8674, "step": 12965 }, { "epoch": 5.57, "learning_rate": 0.0002, "loss": 2.8457, "step": 12970 }, { "epoch": 5.57, "learning_rate": 0.0002, "loss": 2.8403, "step": 12975 }, { "epoch": 5.57, "learning_rate": 0.0002, "loss": 2.868, "step": 12980 }, { "epoch": 5.58, "learning_rate": 0.0002, "loss": 2.8596, "step": 12985 }, { "epoch": 5.58, "learning_rate": 0.0002, "loss": 2.8549, "step": 12990 }, { "epoch": 5.58, "eval_loss": 2.8629438877105713, "eval_runtime": 5.3775, "eval_samples_per_second": 1836.741, "eval_steps_per_second": 14.505, "eval_top3_3_weighted_f1_score ": 0.4308889259952943, "eval_top_1_macro_f1_score": 0.062038629347178215, "eval_top_1_weighted_f1score": 0.20715269808731004, "eval_top_3_macro_f1_score": 0.17147488703095864, "step": 12992 }, { "epoch": 5.58, "learning_rate": 0.0002, "loss": 2.8672, "step": 12995 }, { "epoch": 5.58, "learning_rate": 0.0002, "loss": 2.8647, "step": 13000 }, { "epoch": 5.58, "learning_rate": 0.0002, "loss": 2.8485, "step": 13005 }, { "epoch": 5.59, "learning_rate": 0.0002, "loss": 2.859, "step": 13010 }, { "epoch": 5.59, "learning_rate": 0.0002, "loss": 2.8562, "step": 13015 }, { "epoch": 5.59, "learning_rate": 0.0002, "loss": 2.8596, "step": 13020 }, { "epoch": 5.59, "learning_rate": 0.0002, "loss": 2.8635, "step": 13025 }, { "epoch": 5.59, "learning_rate": 0.0002, "loss": 2.8669, "step": 13030 }, { "epoch": 5.6, "learning_rate": 0.0002, "loss": 2.8497, "step": 13035 }, { "epoch": 5.6, "learning_rate": 0.0002, "loss": 2.8684, "step": 13040 }, { "epoch": 5.6, "learning_rate": 0.0002, "loss": 2.8629, "step": 13045 }, { "epoch": 5.6, "learning_rate": 0.0002, "loss": 2.8443, "step": 13050 }, { "epoch": 5.61, "learning_rate": 0.0002, "loss": 2.8626, "step": 13055 }, { "epoch": 5.61, "eval_loss": 2.8713433742523193, "eval_runtime": 5.4068, "eval_samples_per_second": 1826.774, "eval_steps_per_second": 14.426, "eval_top3_3_weighted_f1_score ": 0.425654833318534, "eval_top_1_macro_f1_score": 0.06096395287879924, "eval_top_1_weighted_f1score": 0.1998952162609984, "eval_top_3_macro_f1_score": 0.15487317312853313, "step": 13056 }, { "epoch": 5.61, "learning_rate": 0.0002, "loss": 2.8616, "step": 13060 }, { "epoch": 5.61, "learning_rate": 0.0002, "loss": 2.8593, "step": 13065 }, { "epoch": 5.61, "learning_rate": 0.0002, "loss": 2.8718, "step": 13070 }, { "epoch": 5.61, "learning_rate": 0.0002, "loss": 2.8668, "step": 13075 }, { "epoch": 5.62, "learning_rate": 0.0002, "loss": 2.8575, "step": 13080 }, { "epoch": 5.62, "learning_rate": 0.0002, "loss": 2.8803, "step": 13085 }, { "epoch": 5.62, "learning_rate": 0.0002, "loss": 2.8554, "step": 13090 }, { "epoch": 5.62, "learning_rate": 0.0002, "loss": 2.8676, "step": 13095 }, { "epoch": 5.62, "learning_rate": 0.0002, "loss": 2.8642, "step": 13100 }, { "epoch": 5.63, "learning_rate": 0.0002, "loss": 2.835, "step": 13105 }, { "epoch": 5.63, "learning_rate": 0.0002, "loss": 2.8682, "step": 13110 }, { "epoch": 5.63, "learning_rate": 0.0002, "loss": 2.8657, "step": 13115 }, { "epoch": 5.63, "learning_rate": 0.0002, "loss": 2.8387, "step": 13120 }, { "epoch": 5.63, "eval_loss": 2.858103036880493, "eval_runtime": 5.429, "eval_samples_per_second": 1819.302, "eval_steps_per_second": 14.367, "eval_top3_3_weighted_f1_score ": 0.42880534706887957, "eval_top_1_macro_f1_score": 0.06353459620981412, "eval_top_1_weighted_f1score": 0.20656542151091475, "eval_top_3_macro_f1_score": 0.17523647602647105, "step": 13120 }, { "epoch": 5.64, "learning_rate": 0.0002, "loss": 2.8561, "step": 13125 }, { "epoch": 5.64, "learning_rate": 0.0002, "loss": 2.8387, "step": 13130 }, { "epoch": 5.64, "learning_rate": 0.0002, "loss": 2.8498, "step": 13135 }, { "epoch": 5.64, "learning_rate": 0.0002, "loss": 2.8696, "step": 13140 }, { "epoch": 5.64, "learning_rate": 0.0002, "loss": 2.8669, "step": 13145 }, { "epoch": 5.65, "learning_rate": 0.0002, "loss": 2.8558, "step": 13150 }, { "epoch": 5.65, "learning_rate": 0.0002, "loss": 2.8622, "step": 13155 }, { "epoch": 5.65, "learning_rate": 0.0002, "loss": 2.8537, "step": 13160 }, { "epoch": 5.65, "learning_rate": 0.0002, "loss": 2.8831, "step": 13165 }, { "epoch": 5.65, "learning_rate": 0.0002, "loss": 2.8523, "step": 13170 }, { "epoch": 5.66, "learning_rate": 0.0002, "loss": 2.8511, "step": 13175 }, { "epoch": 5.66, "learning_rate": 0.0002, "loss": 2.875, "step": 13180 }, { "epoch": 5.66, "eval_loss": 2.86613392829895, "eval_runtime": 5.3406, "eval_samples_per_second": 1849.415, "eval_steps_per_second": 14.605, "eval_top3_3_weighted_f1_score ": 0.4274260278298021, "eval_top_1_macro_f1_score": 0.06387742834131617, "eval_top_1_weighted_f1score": 0.20790766898744287, "eval_top_3_macro_f1_score": 0.17308233668545236, "step": 13184 }, { "epoch": 5.66, "learning_rate": 0.0002, "loss": 2.8571, "step": 13185 }, { "epoch": 5.66, "learning_rate": 0.0002, "loss": 2.8537, "step": 13190 }, { "epoch": 5.67, "learning_rate": 0.0002, "loss": 2.8505, "step": 13195 }, { "epoch": 5.67, "learning_rate": 0.0002, "loss": 2.8668, "step": 13200 }, { "epoch": 5.67, "learning_rate": 0.0002, "loss": 2.8539, "step": 13205 }, { "epoch": 5.67, "learning_rate": 0.0002, "loss": 2.8382, "step": 13210 }, { "epoch": 5.67, "learning_rate": 0.0002, "loss": 2.8745, "step": 13215 }, { "epoch": 5.68, "learning_rate": 0.0002, "loss": 2.8717, "step": 13220 }, { "epoch": 5.68, "learning_rate": 0.0002, "loss": 2.8712, "step": 13225 }, { "epoch": 5.68, "learning_rate": 0.0002, "loss": 2.8648, "step": 13230 }, { "epoch": 5.68, "learning_rate": 0.0002, "loss": 2.8517, "step": 13235 }, { "epoch": 5.68, "learning_rate": 0.0002, "loss": 2.8547, "step": 13240 }, { "epoch": 5.69, "learning_rate": 0.0002, "loss": 2.8668, "step": 13245 }, { "epoch": 5.69, "eval_loss": 2.870232343673706, "eval_runtime": 5.3867, "eval_samples_per_second": 1833.582, "eval_steps_per_second": 14.48, "eval_top3_3_weighted_f1_score ": 0.4259613585170209, "eval_top_1_macro_f1_score": 0.06270662591985765, "eval_top_1_weighted_f1score": 0.20832308964286858, "eval_top_3_macro_f1_score": 0.16006006125049677, "step": 13248 }, { "epoch": 5.69, "learning_rate": 0.0002, "loss": 2.8652, "step": 13250 }, { "epoch": 5.69, "learning_rate": 0.0002, "loss": 2.8643, "step": 13255 }, { "epoch": 5.69, "learning_rate": 0.0002, "loss": 2.8483, "step": 13260 }, { "epoch": 5.7, "learning_rate": 0.0002, "loss": 2.8709, "step": 13265 }, { "epoch": 5.7, "learning_rate": 0.0002, "loss": 2.8656, "step": 13270 }, { "epoch": 5.7, "learning_rate": 0.0002, "loss": 2.8435, "step": 13275 }, { "epoch": 5.7, "learning_rate": 0.0002, "loss": 2.8586, "step": 13280 }, { "epoch": 5.7, "learning_rate": 0.0002, "loss": 2.8718, "step": 13285 }, { "epoch": 5.71, "learning_rate": 0.0002, "loss": 2.8669, "step": 13290 }, { "epoch": 5.71, "learning_rate": 0.0002, "loss": 2.856, "step": 13295 }, { "epoch": 5.71, "learning_rate": 0.0002, "loss": 2.8685, "step": 13300 }, { "epoch": 5.71, "learning_rate": 0.0002, "loss": 2.8447, "step": 13305 }, { "epoch": 5.71, "learning_rate": 0.0002, "loss": 2.8545, "step": 13310 }, { "epoch": 5.72, "eval_loss": 2.863187551498413, "eval_runtime": 5.7315, "eval_samples_per_second": 1723.298, "eval_steps_per_second": 13.609, "eval_top3_3_weighted_f1_score ": 0.42690298388808606, "eval_top_1_macro_f1_score": 0.06395728694989959, "eval_top_1_weighted_f1score": 0.20885784572072683, "eval_top_3_macro_f1_score": 0.16412125306640024, "step": 13312 }, { "epoch": 5.72, "learning_rate": 0.0002, "loss": 2.8498, "step": 13315 }, { "epoch": 5.72, "learning_rate": 0.0002, "loss": 2.8525, "step": 13320 }, { "epoch": 5.72, "learning_rate": 0.0002, "loss": 2.8392, "step": 13325 }, { "epoch": 5.72, "learning_rate": 0.0002, "loss": 2.864, "step": 13330 }, { "epoch": 5.73, "learning_rate": 0.0002, "loss": 2.8534, "step": 13335 }, { "epoch": 5.73, "learning_rate": 0.0002, "loss": 2.8429, "step": 13340 }, { "epoch": 5.73, "learning_rate": 0.0002, "loss": 2.8672, "step": 13345 }, { "epoch": 5.73, "learning_rate": 0.0002, "loss": 2.8466, "step": 13350 }, { "epoch": 5.73, "learning_rate": 0.0002, "loss": 2.85, "step": 13355 }, { "epoch": 5.74, "learning_rate": 0.0002, "loss": 2.8391, "step": 13360 }, { "epoch": 5.74, "learning_rate": 0.0002, "loss": 2.8524, "step": 13365 }, { "epoch": 5.74, "learning_rate": 0.0002, "loss": 2.8589, "step": 13370 }, { "epoch": 5.74, "learning_rate": 0.0002, "loss": 2.867, "step": 13375 }, { "epoch": 5.74, "eval_loss": 2.8663976192474365, "eval_runtime": 5.3525, "eval_samples_per_second": 1845.314, "eval_steps_per_second": 14.573, "eval_top3_3_weighted_f1_score ": 0.4253532176538349, "eval_top_1_macro_f1_score": 0.06061484272739991, "eval_top_1_weighted_f1score": 0.2047296327024241, "eval_top_3_macro_f1_score": 0.1562257377230752, "step": 13376 }, { "epoch": 5.74, "learning_rate": 0.0002, "loss": 2.8514, "step": 13380 }, { "epoch": 5.75, "learning_rate": 0.0002, "loss": 2.861, "step": 13385 }, { "epoch": 5.75, "learning_rate": 0.0002, "loss": 2.8382, "step": 13390 }, { "epoch": 5.75, "learning_rate": 0.0002, "loss": 2.8601, "step": 13395 }, { "epoch": 5.75, "learning_rate": 0.0002, "loss": 2.8605, "step": 13400 }, { "epoch": 5.76, "learning_rate": 0.0002, "loss": 2.8605, "step": 13405 }, { "epoch": 5.76, "learning_rate": 0.0002, "loss": 2.8144, "step": 13410 }, { "epoch": 5.76, "learning_rate": 0.0002, "loss": 2.8521, "step": 13415 }, { "epoch": 5.76, "learning_rate": 0.0002, "loss": 2.8664, "step": 13420 }, { "epoch": 5.76, "learning_rate": 0.0002, "loss": 2.8566, "step": 13425 }, { "epoch": 5.77, "learning_rate": 0.0002, "loss": 2.8653, "step": 13430 }, { "epoch": 5.77, "learning_rate": 0.0002, "loss": 2.867, "step": 13435 }, { "epoch": 5.77, "learning_rate": 0.0002, "loss": 2.874, "step": 13440 }, { "epoch": 5.77, "eval_loss": 2.8693718910217285, "eval_runtime": 5.4136, "eval_samples_per_second": 1824.483, "eval_steps_per_second": 14.408, "eval_top3_3_weighted_f1_score ": 0.4248694503646851, "eval_top_1_macro_f1_score": 0.06314351867333298, "eval_top_1_weighted_f1score": 0.20468529657027187, "eval_top_3_macro_f1_score": 0.16571741276693988, "step": 13440 }, { "epoch": 5.77, "learning_rate": 0.0002, "loss": 2.8584, "step": 13445 }, { "epoch": 5.77, "learning_rate": 0.0002, "loss": 2.8642, "step": 13450 }, { "epoch": 5.78, "learning_rate": 0.0002, "loss": 2.8578, "step": 13455 }, { "epoch": 5.78, "learning_rate": 0.0002, "loss": 2.8515, "step": 13460 }, { "epoch": 5.78, "learning_rate": 0.0002, "loss": 2.8659, "step": 13465 }, { "epoch": 5.78, "learning_rate": 0.0002, "loss": 2.8775, "step": 13470 }, { "epoch": 5.79, "learning_rate": 0.0002, "loss": 2.8643, "step": 13475 }, { "epoch": 5.79, "learning_rate": 0.0002, "loss": 2.8599, "step": 13480 }, { "epoch": 5.79, "learning_rate": 0.0002, "loss": 2.8759, "step": 13485 }, { "epoch": 5.79, "learning_rate": 0.0002, "loss": 2.851, "step": 13490 }, { "epoch": 5.79, "learning_rate": 0.0002, "loss": 2.8371, "step": 13495 }, { "epoch": 5.8, "learning_rate": 0.0002, "loss": 2.8475, "step": 13500 }, { "epoch": 5.8, "eval_loss": 2.8693103790283203, "eval_runtime": 5.3423, "eval_samples_per_second": 1848.844, "eval_steps_per_second": 14.601, "eval_top3_3_weighted_f1_score ": 0.4260684239751329, "eval_top_1_macro_f1_score": 0.06399201002672834, "eval_top_1_weighted_f1score": 0.20817983183427266, "eval_top_3_macro_f1_score": 0.17214746946434742, "step": 13504 }, { "epoch": 5.8, "learning_rate": 0.0002, "loss": 2.8702, "step": 13505 }, { "epoch": 5.8, "learning_rate": 0.0002, "loss": 2.8573, "step": 13510 }, { "epoch": 5.8, "learning_rate": 0.0002, "loss": 2.8647, "step": 13515 }, { "epoch": 5.8, "learning_rate": 0.0002, "loss": 2.8717, "step": 13520 }, { "epoch": 5.81, "learning_rate": 0.0002, "loss": 2.8741, "step": 13525 }, { "epoch": 5.81, "learning_rate": 0.0002, "loss": 2.8809, "step": 13530 }, { "epoch": 5.81, "learning_rate": 0.0002, "loss": 2.8765, "step": 13535 }, { "epoch": 5.81, "learning_rate": 0.0002, "loss": 2.8739, "step": 13540 }, { "epoch": 5.82, "learning_rate": 0.0002, "loss": 2.8675, "step": 13545 }, { "epoch": 5.82, "learning_rate": 0.0002, "loss": 2.8628, "step": 13550 }, { "epoch": 5.82, "learning_rate": 0.0002, "loss": 2.8534, "step": 13555 }, { "epoch": 5.82, "learning_rate": 0.0002, "loss": 2.8823, "step": 13560 }, { "epoch": 5.82, "learning_rate": 0.0002, "loss": 2.8322, "step": 13565 }, { "epoch": 5.83, "eval_loss": 2.8661611080169678, "eval_runtime": 5.3918, "eval_samples_per_second": 1831.85, "eval_steps_per_second": 14.466, "eval_top3_3_weighted_f1_score ": 0.42730270541070525, "eval_top_1_macro_f1_score": 0.06618373886191772, "eval_top_1_weighted_f1score": 0.20648184144746817, "eval_top_3_macro_f1_score": 0.16505583558797157, "step": 13568 }, { "epoch": 5.83, "learning_rate": 0.0002, "loss": 2.847, "step": 13570 }, { "epoch": 5.83, "learning_rate": 0.0002, "loss": 2.862, "step": 13575 }, { "epoch": 5.83, "learning_rate": 0.0002, "loss": 2.8567, "step": 13580 }, { "epoch": 5.83, "learning_rate": 0.0002, "loss": 2.8323, "step": 13585 }, { "epoch": 5.83, "learning_rate": 0.0002, "loss": 2.8641, "step": 13590 }, { "epoch": 5.84, "learning_rate": 0.0002, "loss": 2.846, "step": 13595 }, { "epoch": 5.84, "learning_rate": 0.0002, "loss": 2.8376, "step": 13600 }, { "epoch": 5.84, "learning_rate": 0.0002, "loss": 2.8823, "step": 13605 }, { "epoch": 5.84, "learning_rate": 0.0002, "loss": 2.842, "step": 13610 }, { "epoch": 5.85, "learning_rate": 0.0002, "loss": 2.8359, "step": 13615 }, { "epoch": 5.85, "learning_rate": 0.0002, "loss": 2.8554, "step": 13620 }, { "epoch": 5.85, "learning_rate": 0.0002, "loss": 2.8706, "step": 13625 }, { "epoch": 5.85, "learning_rate": 0.0002, "loss": 2.8745, "step": 13630 }, { "epoch": 5.85, "eval_loss": 2.864320993423462, "eval_runtime": 5.3584, "eval_samples_per_second": 1843.28, "eval_steps_per_second": 14.557, "eval_top3_3_weighted_f1_score ": 0.42378643624582846, "eval_top_1_macro_f1_score": 0.06515805059497368, "eval_top_1_weighted_f1score": 0.20768668382202293, "eval_top_3_macro_f1_score": 0.17012690134722805, "step": 13632 }, { "epoch": 5.85, "learning_rate": 0.0002, "loss": 2.8395, "step": 13635 }, { "epoch": 5.86, "learning_rate": 0.0002, "loss": 2.8744, "step": 13640 }, { "epoch": 5.86, "learning_rate": 0.0002, "loss": 2.864, "step": 13645 }, { "epoch": 5.86, "learning_rate": 0.0002, "loss": 2.8372, "step": 13650 }, { "epoch": 5.86, "learning_rate": 0.0002, "loss": 2.8665, "step": 13655 }, { "epoch": 5.87, "learning_rate": 0.0002, "loss": 2.8614, "step": 13660 }, { "epoch": 5.87, "learning_rate": 0.0002, "loss": 2.8614, "step": 13665 }, { "epoch": 5.87, "learning_rate": 0.0002, "loss": 2.8686, "step": 13670 }, { "epoch": 5.87, "learning_rate": 0.0002, "loss": 2.8848, "step": 13675 }, { "epoch": 5.87, "learning_rate": 0.0002, "loss": 2.8652, "step": 13680 }, { "epoch": 5.88, "learning_rate": 0.0002, "loss": 2.8721, "step": 13685 }, { "epoch": 5.88, "learning_rate": 0.0002, "loss": 2.8481, "step": 13690 }, { "epoch": 5.88, "learning_rate": 0.0002, "loss": 2.8708, "step": 13695 }, { "epoch": 5.88, "eval_loss": 2.8583221435546875, "eval_runtime": 5.3333, "eval_samples_per_second": 1851.944, "eval_steps_per_second": 14.625, "eval_top3_3_weighted_f1_score ": 0.42629623086863955, "eval_top_1_macro_f1_score": 0.0642516151445838, "eval_top_1_weighted_f1score": 0.20652017273950096, "eval_top_3_macro_f1_score": 0.1631062990693639, "step": 13696 }, { "epoch": 5.88, "learning_rate": 0.0002, "loss": 2.8788, "step": 13700 }, { "epoch": 5.88, "learning_rate": 0.0002, "loss": 2.8682, "step": 13705 }, { "epoch": 5.89, "learning_rate": 0.0002, "loss": 2.8777, "step": 13710 }, { "epoch": 5.89, "learning_rate": 0.0002, "loss": 2.8642, "step": 13715 }, { "epoch": 5.89, "learning_rate": 0.0002, "loss": 2.873, "step": 13720 }, { "epoch": 5.89, "learning_rate": 0.0002, "loss": 2.8731, "step": 13725 }, { "epoch": 5.9, "learning_rate": 0.0002, "loss": 2.8629, "step": 13730 }, { "epoch": 5.9, "learning_rate": 0.0002, "loss": 2.8553, "step": 13735 }, { "epoch": 5.9, "learning_rate": 0.0002, "loss": 2.875, "step": 13740 }, { "epoch": 5.9, "learning_rate": 0.0002, "loss": 2.8715, "step": 13745 }, { "epoch": 5.9, "learning_rate": 0.0002, "loss": 2.8548, "step": 13750 }, { "epoch": 5.91, "learning_rate": 0.0002, "loss": 2.8645, "step": 13755 }, { "epoch": 5.91, "learning_rate": 0.0002, "loss": 2.8673, "step": 13760 }, { "epoch": 5.91, "eval_loss": 2.8654608726501465, "eval_runtime": 5.4259, "eval_samples_per_second": 1820.348, "eval_steps_per_second": 14.376, "eval_top3_3_weighted_f1_score ": 0.42425758776269407, "eval_top_1_macro_f1_score": 0.06367910049044434, "eval_top_1_weighted_f1score": 0.2089364923158843, "eval_top_3_macro_f1_score": 0.1582568048780128, "step": 13760 }, { "epoch": 5.91, "learning_rate": 0.0002, "loss": 2.8668, "step": 13765 }, { "epoch": 5.91, "learning_rate": 0.0002, "loss": 2.8458, "step": 13770 }, { "epoch": 5.91, "learning_rate": 0.0002, "loss": 2.8654, "step": 13775 }, { "epoch": 5.92, "learning_rate": 0.0002, "loss": 2.8592, "step": 13780 }, { "epoch": 5.92, "learning_rate": 0.0002, "loss": 2.8478, "step": 13785 }, { "epoch": 5.92, "learning_rate": 0.0002, "loss": 2.8547, "step": 13790 }, { "epoch": 5.92, "learning_rate": 0.0002, "loss": 2.862, "step": 13795 }, { "epoch": 5.93, "learning_rate": 0.0002, "loss": 2.8509, "step": 13800 }, { "epoch": 5.93, "learning_rate": 0.0002, "loss": 2.8399, "step": 13805 }, { "epoch": 5.93, "learning_rate": 0.0002, "loss": 2.8533, "step": 13810 }, { "epoch": 5.93, "learning_rate": 0.0002, "loss": 2.8816, "step": 13815 }, { "epoch": 5.93, "learning_rate": 0.0002, "loss": 2.8541, "step": 13820 }, { "epoch": 5.94, "eval_loss": 2.8690874576568604, "eval_runtime": 5.3822, "eval_samples_per_second": 1835.138, "eval_steps_per_second": 14.492, "eval_top3_3_weighted_f1_score ": 0.42912869704184087, "eval_top_1_macro_f1_score": 0.06746140343210294, "eval_top_1_weighted_f1score": 0.20455814407819742, "eval_top_3_macro_f1_score": 0.17004323228141258, "step": 13824 }, { "epoch": 5.94, "learning_rate": 0.0002, "loss": 2.8377, "step": 13825 }, { "epoch": 5.94, "learning_rate": 0.0002, "loss": 2.8498, "step": 13830 }, { "epoch": 5.94, "learning_rate": 0.0002, "loss": 2.8494, "step": 13835 }, { "epoch": 5.94, "learning_rate": 0.0002, "loss": 2.8549, "step": 13840 }, { "epoch": 5.94, "learning_rate": 0.0002, "loss": 2.8533, "step": 13845 }, { "epoch": 5.95, "learning_rate": 0.0002, "loss": 2.843, "step": 13850 }, { "epoch": 5.95, "learning_rate": 0.0002, "loss": 2.8539, "step": 13855 }, { "epoch": 5.95, "learning_rate": 0.0002, "loss": 2.8525, "step": 13860 }, { "epoch": 5.95, "learning_rate": 0.0002, "loss": 2.8571, "step": 13865 }, { "epoch": 5.96, "learning_rate": 0.0002, "loss": 2.8651, "step": 13870 }, { "epoch": 5.96, "learning_rate": 0.0002, "loss": 2.8529, "step": 13875 }, { "epoch": 5.96, "learning_rate": 0.0002, "loss": 2.8557, "step": 13880 }, { "epoch": 5.96, "learning_rate": 0.0002, "loss": 2.8675, "step": 13885 }, { "epoch": 5.96, "eval_loss": 2.8727800846099854, "eval_runtime": 5.3875, "eval_samples_per_second": 1833.315, "eval_steps_per_second": 14.478, "eval_top3_3_weighted_f1_score ": 0.4274189365310889, "eval_top_1_macro_f1_score": 0.061235624823663806, "eval_top_1_weighted_f1score": 0.2056026869490293, "eval_top_3_macro_f1_score": 0.1648534533740258, "step": 13888 }, { "epoch": 5.96, "learning_rate": 0.0002, "loss": 2.8544, "step": 13890 }, { "epoch": 5.97, "learning_rate": 0.0002, "loss": 2.8626, "step": 13895 }, { "epoch": 5.97, "learning_rate": 0.0002, "loss": 2.855, "step": 13900 }, { "epoch": 5.97, "learning_rate": 0.0002, "loss": 2.8777, "step": 13905 }, { "epoch": 5.97, "learning_rate": 0.0002, "loss": 2.8709, "step": 13910 }, { "epoch": 5.97, "learning_rate": 0.0002, "loss": 2.8398, "step": 13915 }, { "epoch": 5.98, "learning_rate": 0.0002, "loss": 2.8632, "step": 13920 }, { "epoch": 5.98, "learning_rate": 0.0002, "loss": 2.8581, "step": 13925 }, { "epoch": 5.98, "learning_rate": 0.0002, "loss": 2.8505, "step": 13930 }, { "epoch": 5.98, "learning_rate": 0.0002, "loss": 2.8522, "step": 13935 }, { "epoch": 5.99, "learning_rate": 0.0002, "loss": 2.8594, "step": 13940 }, { "epoch": 5.99, "learning_rate": 0.0002, "loss": 2.8659, "step": 13945 }, { "epoch": 5.99, "learning_rate": 0.0002, "loss": 2.8697, "step": 13950 }, { "epoch": 5.99, "eval_loss": 2.8647892475128174, "eval_runtime": 5.46, "eval_samples_per_second": 1808.988, "eval_steps_per_second": 14.286, "eval_top3_3_weighted_f1_score ": 0.42687586035584, "eval_top_1_macro_f1_score": 0.060179487166277516, "eval_top_1_weighted_f1score": 0.20831999698694495, "eval_top_3_macro_f1_score": 0.17113203575304348, "step": 13952 }, { "epoch": 5.99, "learning_rate": 0.0002, "loss": 2.872, "step": 13955 }, { "epoch": 5.99, "learning_rate": 0.0002, "loss": 2.8515, "step": 13960 }, { "epoch": 6.0, "learning_rate": 0.0002, "loss": 2.8577, "step": 13965 }, { "epoch": 6.0, "learning_rate": 0.0002, "loss": 2.8516, "step": 13970 }, { "epoch": 6.0, "learning_rate": 0.0002, "loss": 3.1273, "step": 13975 }, { "epoch": 6.0, "learning_rate": 0.0002, "loss": 2.8336, "step": 13980 }, { "epoch": 6.0, "learning_rate": 0.0002, "loss": 2.8612, "step": 13985 }, { "epoch": 6.01, "learning_rate": 0.0002, "loss": 2.8514, "step": 13990 }, { "epoch": 6.01, "learning_rate": 0.0002, "loss": 2.8519, "step": 13995 }, { "epoch": 6.01, "learning_rate": 0.0002, "loss": 2.8403, "step": 14000 }, { "epoch": 6.01, "learning_rate": 0.0002, "loss": 2.8412, "step": 14005 }, { "epoch": 6.02, "learning_rate": 0.0002, "loss": 2.851, "step": 14010 }, { "epoch": 6.02, "learning_rate": 0.0002, "loss": 2.873, "step": 14015 }, { "epoch": 6.02, "eval_loss": 2.860562562942505, "eval_runtime": 5.383, "eval_samples_per_second": 1834.841, "eval_steps_per_second": 14.49, "eval_top3_3_weighted_f1_score ": 0.4255769719365458, "eval_top_1_macro_f1_score": 0.059525963490135815, "eval_top_1_weighted_f1score": 0.20327920220186296, "eval_top_3_macro_f1_score": 0.15675116269801012, "step": 14016 }, { "epoch": 6.02, "learning_rate": 0.0002, "loss": 2.8514, "step": 14020 }, { "epoch": 6.02, "learning_rate": 0.0002, "loss": 2.839, "step": 14025 }, { "epoch": 6.02, "learning_rate": 0.0002, "loss": 2.8482, "step": 14030 }, { "epoch": 6.03, "learning_rate": 0.0002, "loss": 2.8526, "step": 14035 }, { "epoch": 6.03, "learning_rate": 0.0002, "loss": 2.8607, "step": 14040 }, { "epoch": 6.03, "learning_rate": 0.0002, "loss": 2.8564, "step": 14045 }, { "epoch": 6.03, "learning_rate": 0.0002, "loss": 2.8436, "step": 14050 }, { "epoch": 6.03, "learning_rate": 0.0002, "loss": 2.8322, "step": 14055 }, { "epoch": 6.04, "learning_rate": 0.0002, "loss": 2.8414, "step": 14060 }, { "epoch": 6.04, "learning_rate": 0.0002, "loss": 2.8601, "step": 14065 }, { "epoch": 6.04, "learning_rate": 0.0002, "loss": 2.8483, "step": 14070 }, { "epoch": 6.04, "learning_rate": 0.0002, "loss": 2.8478, "step": 14075 }, { "epoch": 6.05, "learning_rate": 0.0002, "loss": 2.8451, "step": 14080 }, { "epoch": 6.05, "eval_loss": 2.8654625415802, "eval_runtime": 5.384, "eval_samples_per_second": 1834.514, "eval_steps_per_second": 14.487, "eval_top3_3_weighted_f1_score ": 0.4253389481140167, "eval_top_1_macro_f1_score": 0.062891722866952, "eval_top_1_weighted_f1score": 0.21179422766158557, "eval_top_3_macro_f1_score": 0.17613893427212401, "step": 14080 }, { "epoch": 6.05, "learning_rate": 0.0002, "loss": 2.8549, "step": 14085 }, { "epoch": 6.05, "learning_rate": 0.0002, "loss": 2.8633, "step": 14090 }, { "epoch": 6.05, "learning_rate": 0.0002, "loss": 2.8657, "step": 14095 }, { "epoch": 6.05, "learning_rate": 0.0002, "loss": 2.8451, "step": 14100 }, { "epoch": 6.06, "learning_rate": 0.0002, "loss": 2.863, "step": 14105 }, { "epoch": 6.06, "learning_rate": 0.0002, "loss": 2.8503, "step": 14110 }, { "epoch": 6.06, "learning_rate": 0.0002, "loss": 2.8591, "step": 14115 }, { "epoch": 6.06, "learning_rate": 0.0002, "loss": 2.8525, "step": 14120 }, { "epoch": 6.06, "learning_rate": 0.0002, "loss": 2.8641, "step": 14125 }, { "epoch": 6.07, "learning_rate": 0.0002, "loss": 2.8659, "step": 14130 }, { "epoch": 6.07, "learning_rate": 0.0002, "loss": 2.845, "step": 14135 }, { "epoch": 6.07, "learning_rate": 0.0002, "loss": 2.8598, "step": 14140 }, { "epoch": 6.07, "eval_loss": 2.8648641109466553, "eval_runtime": 5.374, "eval_samples_per_second": 1837.934, "eval_steps_per_second": 14.514, "eval_top3_3_weighted_f1_score ": 0.42864839485569883, "eval_top_1_macro_f1_score": 0.06324209484409811, "eval_top_1_weighted_f1score": 0.20698305107306783, "eval_top_3_macro_f1_score": 0.16745562944778383, "step": 14144 }, { "epoch": 6.07, "learning_rate": 0.0002, "loss": 2.8452, "step": 14145 }, { "epoch": 6.08, "learning_rate": 0.0002, "loss": 2.8544, "step": 14150 }, { "epoch": 6.08, "learning_rate": 0.0002, "loss": 2.8478, "step": 14155 }, { "epoch": 6.08, "learning_rate": 0.0002, "loss": 2.8495, "step": 14160 }, { "epoch": 6.08, "learning_rate": 0.0002, "loss": 2.8603, "step": 14165 }, { "epoch": 6.08, "learning_rate": 0.0002, "loss": 2.8549, "step": 14170 }, { "epoch": 6.09, "learning_rate": 0.0002, "loss": 2.8509, "step": 14175 }, { "epoch": 6.09, "learning_rate": 0.0002, "loss": 2.8535, "step": 14180 }, { "epoch": 6.09, "learning_rate": 0.0002, "loss": 2.8548, "step": 14185 }, { "epoch": 6.09, "learning_rate": 0.0002, "loss": 2.8528, "step": 14190 }, { "epoch": 6.09, "learning_rate": 0.0002, "loss": 2.8439, "step": 14195 }, { "epoch": 6.1, "learning_rate": 0.0002, "loss": 2.844, "step": 14200 }, { "epoch": 6.1, "learning_rate": 0.0002, "loss": 2.8517, "step": 14205 }, { "epoch": 6.1, "eval_loss": 2.861860752105713, "eval_runtime": 5.4817, "eval_samples_per_second": 1801.805, "eval_steps_per_second": 14.229, "eval_top3_3_weighted_f1_score ": 0.43031760463309665, "eval_top_1_macro_f1_score": 0.06874036906878012, "eval_top_1_weighted_f1score": 0.20905465756024647, "eval_top_3_macro_f1_score": 0.18252730462500377, "step": 14208 }, { "epoch": 6.1, "learning_rate": 0.0002, "loss": 2.8319, "step": 14210 }, { "epoch": 6.1, "learning_rate": 0.0002, "loss": 2.8515, "step": 14215 }, { "epoch": 6.11, "learning_rate": 0.0002, "loss": 2.8413, "step": 14220 }, { "epoch": 6.11, "learning_rate": 0.0002, "loss": 2.8309, "step": 14225 }, { "epoch": 6.11, "learning_rate": 0.0002, "loss": 2.8593, "step": 14230 }, { "epoch": 6.11, "learning_rate": 0.0002, "loss": 2.8585, "step": 14235 }, { "epoch": 6.11, "learning_rate": 0.0002, "loss": 2.8681, "step": 14240 }, { "epoch": 6.12, "learning_rate": 0.0002, "loss": 2.8532, "step": 14245 }, { "epoch": 6.12, "learning_rate": 0.0002, "loss": 2.8486, "step": 14250 }, { "epoch": 6.12, "learning_rate": 0.0002, "loss": 2.8548, "step": 14255 }, { "epoch": 6.12, "learning_rate": 0.0002, "loss": 2.8656, "step": 14260 }, { "epoch": 6.12, "learning_rate": 0.0002, "loss": 2.8558, "step": 14265 }, { "epoch": 6.13, "learning_rate": 0.0002, "loss": 2.8538, "step": 14270 }, { "epoch": 6.13, "eval_loss": 2.866154193878174, "eval_runtime": 5.5182, "eval_samples_per_second": 1789.9, "eval_steps_per_second": 14.135, "eval_top3_3_weighted_f1_score ": 0.42693272860639986, "eval_top_1_macro_f1_score": 0.0656622919165084, "eval_top_1_weighted_f1score": 0.20984535680148877, "eval_top_3_macro_f1_score": 0.16282466912359944, "step": 14272 }, { "epoch": 6.13, "learning_rate": 0.0002, "loss": 2.854, "step": 14275 }, { "epoch": 6.13, "learning_rate": 0.0002, "loss": 2.8605, "step": 14280 }, { "epoch": 6.13, "learning_rate": 0.0002, "loss": 2.8363, "step": 14285 }, { "epoch": 6.14, "learning_rate": 0.0002, "loss": 2.8364, "step": 14290 }, { "epoch": 6.14, "learning_rate": 0.0002, "loss": 2.8547, "step": 14295 }, { "epoch": 6.14, "learning_rate": 0.0002, "loss": 2.8332, "step": 14300 }, { "epoch": 6.14, "learning_rate": 0.0002, "loss": 2.8482, "step": 14305 }, { "epoch": 6.14, "learning_rate": 0.0002, "loss": 2.8299, "step": 14310 }, { "epoch": 6.15, "learning_rate": 0.0002, "loss": 2.8676, "step": 14315 }, { "epoch": 6.15, "learning_rate": 0.0002, "loss": 2.8554, "step": 14320 }, { "epoch": 6.15, "learning_rate": 0.0002, "loss": 2.8615, "step": 14325 }, { "epoch": 6.15, "learning_rate": 0.0002, "loss": 2.8541, "step": 14330 }, { "epoch": 6.15, "learning_rate": 0.0002, "loss": 2.8821, "step": 14335 }, { "epoch": 6.16, "eval_loss": 2.8681774139404297, "eval_runtime": 5.4556, "eval_samples_per_second": 1810.427, "eval_steps_per_second": 14.297, "eval_top3_3_weighted_f1_score ": 0.427189588569116, "eval_top_1_macro_f1_score": 0.0599012626709618, "eval_top_1_weighted_f1score": 0.20990918717178195, "eval_top_3_macro_f1_score": 0.16753857269223285, "step": 14336 }, { "epoch": 6.16, "learning_rate": 0.0002, "loss": 2.8474, "step": 14340 }, { "epoch": 6.16, "learning_rate": 0.0002, "loss": 2.8398, "step": 14345 }, { "epoch": 6.16, "learning_rate": 0.0002, "loss": 2.8699, "step": 14350 }, { "epoch": 6.16, "learning_rate": 0.0002, "loss": 2.8539, "step": 14355 }, { "epoch": 6.17, "learning_rate": 0.0002, "loss": 2.8414, "step": 14360 }, { "epoch": 6.17, "learning_rate": 0.0002, "loss": 2.8796, "step": 14365 }, { "epoch": 6.17, "learning_rate": 0.0002, "loss": 2.8634, "step": 14370 }, { "epoch": 6.17, "learning_rate": 0.0002, "loss": 2.8476, "step": 14375 }, { "epoch": 6.17, "learning_rate": 0.0002, "loss": 2.86, "step": 14380 }, { "epoch": 6.18, "learning_rate": 0.0002, "loss": 2.8563, "step": 14385 }, { "epoch": 6.18, "learning_rate": 0.0002, "loss": 2.8348, "step": 14390 }, { "epoch": 6.18, "learning_rate": 0.0002, "loss": 2.8344, "step": 14395 }, { "epoch": 6.18, "learning_rate": 0.0002, "loss": 2.8441, "step": 14400 }, { "epoch": 6.18, "eval_loss": 2.8655192852020264, "eval_runtime": 5.6848, "eval_samples_per_second": 1737.435, "eval_steps_per_second": 13.721, "eval_top3_3_weighted_f1_score ": 0.4245790732996531, "eval_top_1_macro_f1_score": 0.06231000189655607, "eval_top_1_weighted_f1score": 0.20954993066502497, "eval_top_3_macro_f1_score": 0.1644070791871058, "step": 14400 }, { "epoch": 6.19, "learning_rate": 0.0002, "loss": 2.8468, "step": 14405 }, { "epoch": 6.19, "learning_rate": 0.0002, "loss": 2.8375, "step": 14410 }, { "epoch": 6.19, "learning_rate": 0.0002, "loss": 2.8371, "step": 14415 }, { "epoch": 6.19, "learning_rate": 0.0002, "loss": 2.8513, "step": 14420 }, { "epoch": 6.19, "learning_rate": 0.0002, "loss": 2.8461, "step": 14425 }, { "epoch": 6.2, "learning_rate": 0.0002, "loss": 2.8437, "step": 14430 }, { "epoch": 6.2, "learning_rate": 0.0002, "loss": 2.8686, "step": 14435 }, { "epoch": 6.2, "learning_rate": 0.0002, "loss": 2.8461, "step": 14440 }, { "epoch": 6.2, "learning_rate": 0.0002, "loss": 2.856, "step": 14445 }, { "epoch": 6.2, "learning_rate": 0.0002, "loss": 2.8638, "step": 14450 }, { "epoch": 6.21, "learning_rate": 0.0002, "loss": 2.8623, "step": 14455 }, { "epoch": 6.21, "learning_rate": 0.0002, "loss": 2.8547, "step": 14460 }, { "epoch": 6.21, "eval_loss": 2.8639345169067383, "eval_runtime": 5.4197, "eval_samples_per_second": 1822.416, "eval_steps_per_second": 14.392, "eval_top3_3_weighted_f1_score ": 0.42709454392903934, "eval_top_1_macro_f1_score": 0.06288798968966343, "eval_top_1_weighted_f1score": 0.2075210786928968, "eval_top_3_macro_f1_score": 0.16917322997926354, "step": 14464 }, { "epoch": 6.21, "learning_rate": 0.0002, "loss": 2.8408, "step": 14465 }, { "epoch": 6.21, "learning_rate": 0.0002, "loss": 2.8427, "step": 14470 }, { "epoch": 6.22, "learning_rate": 0.0002, "loss": 2.856, "step": 14475 }, { "epoch": 6.22, "learning_rate": 0.0002, "loss": 2.8593, "step": 14480 }, { "epoch": 6.22, "learning_rate": 0.0002, "loss": 2.8465, "step": 14485 }, { "epoch": 6.22, "learning_rate": 0.0002, "loss": 2.8452, "step": 14490 }, { "epoch": 6.22, "learning_rate": 0.0002, "loss": 2.8483, "step": 14495 }, { "epoch": 6.23, "learning_rate": 0.0002, "loss": 2.8479, "step": 14500 }, { "epoch": 6.23, "learning_rate": 0.0002, "loss": 2.8418, "step": 14505 }, { "epoch": 6.23, "learning_rate": 0.0002, "loss": 2.8389, "step": 14510 }, { "epoch": 6.23, "learning_rate": 0.0002, "loss": 2.851, "step": 14515 }, { "epoch": 6.23, "learning_rate": 0.0002, "loss": 2.8498, "step": 14520 }, { "epoch": 6.24, "learning_rate": 0.0002, "loss": 2.86, "step": 14525 }, { "epoch": 6.24, "eval_loss": 2.8650901317596436, "eval_runtime": 5.3566, "eval_samples_per_second": 1843.888, "eval_steps_per_second": 14.561, "eval_top3_3_weighted_f1_score ": 0.4270264516778953, "eval_top_1_macro_f1_score": 0.06016463468597995, "eval_top_1_weighted_f1score": 0.20816923117935468, "eval_top_3_macro_f1_score": 0.1681092323881849, "step": 14528 }, { "epoch": 6.24, "learning_rate": 0.0002, "loss": 2.8442, "step": 14530 }, { "epoch": 6.24, "learning_rate": 0.0002, "loss": 2.8518, "step": 14535 }, { "epoch": 6.24, "learning_rate": 0.0002, "loss": 2.8626, "step": 14540 }, { "epoch": 6.25, "learning_rate": 0.0002, "loss": 2.8593, "step": 14545 }, { "epoch": 6.25, "learning_rate": 0.0002, "loss": 2.8491, "step": 14550 }, { "epoch": 6.25, "learning_rate": 0.0002, "loss": 2.8635, "step": 14555 }, { "epoch": 6.25, "learning_rate": 0.0002, "loss": 2.8411, "step": 14560 }, { "epoch": 6.25, "learning_rate": 0.0002, "loss": 2.8618, "step": 14565 }, { "epoch": 6.26, "learning_rate": 0.0002, "loss": 2.8505, "step": 14570 }, { "epoch": 6.26, "learning_rate": 0.0002, "loss": 2.8768, "step": 14575 }, { "epoch": 6.26, "learning_rate": 0.0002, "loss": 2.8765, "step": 14580 }, { "epoch": 6.26, "learning_rate": 0.0002, "loss": 2.8384, "step": 14585 }, { "epoch": 6.26, "learning_rate": 0.0002, "loss": 2.8706, "step": 14590 }, { "epoch": 6.27, "eval_loss": 2.858391523361206, "eval_runtime": 5.6623, "eval_samples_per_second": 1744.358, "eval_steps_per_second": 13.775, "eval_top3_3_weighted_f1_score ": 0.4299116980347578, "eval_top_1_macro_f1_score": 0.06511235020674873, "eval_top_1_weighted_f1score": 0.20653598410434315, "eval_top_3_macro_f1_score": 0.16354742919613674, "step": 14592 }, { "epoch": 6.27, "learning_rate": 0.0002, "loss": 2.8357, "step": 14595 }, { "epoch": 6.27, "learning_rate": 0.0002, "loss": 2.8531, "step": 14600 }, { "epoch": 6.27, "learning_rate": 0.0002, "loss": 2.8593, "step": 14605 }, { "epoch": 6.27, "learning_rate": 0.0002, "loss": 2.8504, "step": 14610 }, { "epoch": 6.28, "learning_rate": 0.0002, "loss": 2.8411, "step": 14615 }, { "epoch": 6.28, "learning_rate": 0.0002, "loss": 2.8343, "step": 14620 }, { "epoch": 6.28, "learning_rate": 0.0002, "loss": 2.8452, "step": 14625 }, { "epoch": 6.28, "learning_rate": 0.0002, "loss": 2.8424, "step": 14630 }, { "epoch": 6.28, "learning_rate": 0.0002, "loss": 2.833, "step": 14635 }, { "epoch": 6.29, "learning_rate": 0.0002, "loss": 2.8476, "step": 14640 }, { "epoch": 6.29, "learning_rate": 0.0002, "loss": 2.848, "step": 14645 }, { "epoch": 6.29, "learning_rate": 0.0002, "loss": 2.8517, "step": 14650 }, { "epoch": 6.29, "learning_rate": 0.0002, "loss": 2.8433, "step": 14655 }, { "epoch": 6.29, "eval_loss": 2.8645198345184326, "eval_runtime": 5.3318, "eval_samples_per_second": 1852.462, "eval_steps_per_second": 14.629, "eval_top3_3_weighted_f1_score ": 0.4262237669359407, "eval_top_1_macro_f1_score": 0.05589004399179858, "eval_top_1_weighted_f1score": 0.20421429552342285, "eval_top_3_macro_f1_score": 0.1550932169485424, "step": 14656 }, { "epoch": 6.29, "learning_rate": 0.0002, "loss": 2.8627, "step": 14660 }, { "epoch": 6.3, "learning_rate": 0.0002, "loss": 2.8622, "step": 14665 }, { "epoch": 6.3, "learning_rate": 0.0002, "loss": 2.8479, "step": 14670 }, { "epoch": 6.3, "learning_rate": 0.0002, "loss": 2.8494, "step": 14675 }, { "epoch": 6.3, "learning_rate": 0.0002, "loss": 2.8575, "step": 14680 }, { "epoch": 6.31, "learning_rate": 0.0002, "loss": 2.8432, "step": 14685 }, { "epoch": 6.31, "learning_rate": 0.0002, "loss": 2.8668, "step": 14690 }, { "epoch": 6.31, "learning_rate": 0.0002, "loss": 2.8546, "step": 14695 }, { "epoch": 6.31, "learning_rate": 0.0002, "loss": 2.8488, "step": 14700 }, { "epoch": 6.31, "learning_rate": 0.0002, "loss": 2.8841, "step": 14705 }, { "epoch": 6.32, "learning_rate": 0.0002, "loss": 2.8385, "step": 14710 }, { "epoch": 6.32, "learning_rate": 0.0002, "loss": 2.8552, "step": 14715 }, { "epoch": 6.32, "learning_rate": 0.0002, "loss": 2.8619, "step": 14720 }, { "epoch": 6.32, "eval_loss": 2.8671915531158447, "eval_runtime": 5.3091, "eval_samples_per_second": 1860.382, "eval_steps_per_second": 14.692, "eval_top3_3_weighted_f1_score ": 0.42512401694508484, "eval_top_1_macro_f1_score": 0.06521271931683321, "eval_top_1_weighted_f1score": 0.21140881058230832, "eval_top_3_macro_f1_score": 0.16005632792300933, "step": 14720 }, { "epoch": 6.32, "learning_rate": 0.0002, "loss": 2.8389, "step": 14725 }, { "epoch": 6.32, "learning_rate": 0.0002, "loss": 2.843, "step": 14730 }, { "epoch": 6.33, "learning_rate": 0.0002, "loss": 2.8362, "step": 14735 }, { "epoch": 6.33, "learning_rate": 0.0002, "loss": 2.8522, "step": 14740 }, { "epoch": 6.33, "learning_rate": 0.0002, "loss": 2.8569, "step": 14745 }, { "epoch": 6.33, "learning_rate": 0.0002, "loss": 2.863, "step": 14750 }, { "epoch": 6.34, "learning_rate": 0.0002, "loss": 2.8645, "step": 14755 }, { "epoch": 6.34, "learning_rate": 0.0002, "loss": 2.8531, "step": 14760 }, { "epoch": 6.34, "learning_rate": 0.0002, "loss": 2.8377, "step": 14765 }, { "epoch": 6.34, "learning_rate": 0.0002, "loss": 2.8377, "step": 14770 }, { "epoch": 6.34, "learning_rate": 0.0002, "loss": 2.8447, "step": 14775 }, { "epoch": 6.35, "learning_rate": 0.0002, "loss": 2.846, "step": 14780 }, { "epoch": 6.35, "eval_loss": 2.8662109375, "eval_runtime": 5.2681, "eval_samples_per_second": 1874.854, "eval_steps_per_second": 14.806, "eval_top3_3_weighted_f1_score ": 0.4297477209216371, "eval_top_1_macro_f1_score": 0.060755136969351614, "eval_top_1_weighted_f1score": 0.20593723360268085, "eval_top_3_macro_f1_score": 0.17352720917964556, "step": 14784 }, { "epoch": 6.35, "learning_rate": 0.0002, "loss": 2.8464, "step": 14785 }, { "epoch": 6.35, "learning_rate": 0.0002, "loss": 2.8563, "step": 14790 }, { "epoch": 6.35, "learning_rate": 0.0002, "loss": 2.8697, "step": 14795 }, { "epoch": 6.35, "learning_rate": 0.0002, "loss": 2.8512, "step": 14800 }, { "epoch": 6.36, "learning_rate": 0.0002, "loss": 2.8609, "step": 14805 }, { "epoch": 6.36, "learning_rate": 0.0002, "loss": 2.8332, "step": 14810 }, { "epoch": 6.36, "learning_rate": 0.0002, "loss": 2.8574, "step": 14815 }, { "epoch": 6.36, "learning_rate": 0.0002, "loss": 2.8542, "step": 14820 }, { "epoch": 6.37, "learning_rate": 0.0002, "loss": 2.8372, "step": 14825 }, { "epoch": 6.37, "learning_rate": 0.0002, "loss": 2.8395, "step": 14830 }, { "epoch": 6.37, "learning_rate": 0.0002, "loss": 2.8292, "step": 14835 }, { "epoch": 6.37, "learning_rate": 0.0002, "loss": 2.8662, "step": 14840 }, { "epoch": 6.37, "learning_rate": 0.0002, "loss": 2.8382, "step": 14845 }, { "epoch": 6.38, "eval_loss": 2.859290599822998, "eval_runtime": 5.7805, "eval_samples_per_second": 1708.674, "eval_steps_per_second": 13.494, "eval_top3_3_weighted_f1_score ": 0.43133298735934217, "eval_top_1_macro_f1_score": 0.0652974806153142, "eval_top_1_weighted_f1score": 0.2092498162086767, "eval_top_3_macro_f1_score": 0.1750329939017237, "step": 14848 }, { "epoch": 6.38, "learning_rate": 0.0002, "loss": 2.8341, "step": 14850 }, { "epoch": 6.38, "learning_rate": 0.0002, "loss": 2.8683, "step": 14855 }, { "epoch": 6.38, "learning_rate": 0.0002, "loss": 2.8435, "step": 14860 }, { "epoch": 6.38, "learning_rate": 0.0002, "loss": 2.8594, "step": 14865 }, { "epoch": 6.38, "learning_rate": 0.0002, "loss": 2.8539, "step": 14870 }, { "epoch": 6.39, "learning_rate": 0.0002, "loss": 2.8618, "step": 14875 }, { "epoch": 6.39, "learning_rate": 0.0002, "loss": 2.8378, "step": 14880 }, { "epoch": 6.39, "learning_rate": 0.0002, "loss": 2.8657, "step": 14885 }, { "epoch": 6.39, "learning_rate": 0.0002, "loss": 2.8537, "step": 14890 }, { "epoch": 6.4, "learning_rate": 0.0002, "loss": 2.8498, "step": 14895 }, { "epoch": 6.4, "learning_rate": 0.0002, "loss": 2.8519, "step": 14900 }, { "epoch": 6.4, "learning_rate": 0.0002, "loss": 2.8498, "step": 14905 }, { "epoch": 6.4, "learning_rate": 0.0002, "loss": 2.8615, "step": 14910 }, { "epoch": 6.4, "eval_loss": 2.86201810836792, "eval_runtime": 5.5374, "eval_samples_per_second": 1783.685, "eval_steps_per_second": 14.086, "eval_top3_3_weighted_f1_score ": 0.4316349888539969, "eval_top_1_macro_f1_score": 0.06433221584881162, "eval_top_1_weighted_f1score": 0.2086131824809885, "eval_top_3_macro_f1_score": 0.184924268898213, "step": 14912 }, { "epoch": 6.4, "learning_rate": 0.0002, "loss": 2.8604, "step": 14915 }, { "epoch": 6.41, "learning_rate": 0.0002, "loss": 2.836, "step": 14920 }, { "epoch": 6.41, "learning_rate": 0.0002, "loss": 2.8746, "step": 14925 }, { "epoch": 6.41, "learning_rate": 0.0002, "loss": 2.837, "step": 14930 }, { "epoch": 6.41, "learning_rate": 0.0002, "loss": 2.856, "step": 14935 }, { "epoch": 6.41, "learning_rate": 0.0002, "loss": 2.8432, "step": 14940 }, { "epoch": 6.42, "learning_rate": 0.0002, "loss": 2.8475, "step": 14945 }, { "epoch": 6.42, "learning_rate": 0.0002, "loss": 2.8412, "step": 14950 }, { "epoch": 6.42, "learning_rate": 0.0002, "loss": 2.8392, "step": 14955 }, { "epoch": 6.42, "learning_rate": 0.0002, "loss": 2.8678, "step": 14960 }, { "epoch": 6.43, "learning_rate": 0.0002, "loss": 2.862, "step": 14965 }, { "epoch": 6.43, "learning_rate": 0.0002, "loss": 2.8524, "step": 14970 }, { "epoch": 6.43, "learning_rate": 0.0002, "loss": 2.8481, "step": 14975 }, { "epoch": 6.43, "eval_loss": 2.8615529537200928, "eval_runtime": 5.447, "eval_samples_per_second": 1813.276, "eval_steps_per_second": 14.32, "eval_top3_3_weighted_f1_score ": 0.4279112433014357, "eval_top_1_macro_f1_score": 0.06308184819464147, "eval_top_1_weighted_f1score": 0.2073751147238242, "eval_top_3_macro_f1_score": 0.17409335405517545, "step": 14976 }, { "epoch": 6.43, "learning_rate": 0.0002, "loss": 2.8486, "step": 14980 }, { "epoch": 6.43, "learning_rate": 0.0002, "loss": 2.8595, "step": 14985 }, { "epoch": 6.44, "learning_rate": 0.0002, "loss": 2.8336, "step": 14990 }, { "epoch": 6.44, "learning_rate": 0.0002, "loss": 2.8386, "step": 14995 }, { "epoch": 6.44, "learning_rate": 0.0002, "loss": 2.8618, "step": 15000 }, { "epoch": 6.44, "learning_rate": 0.0002, "loss": 2.8466, "step": 15005 }, { "epoch": 6.44, "learning_rate": 0.0002, "loss": 2.8611, "step": 15010 }, { "epoch": 6.45, "learning_rate": 0.0002, "loss": 2.8674, "step": 15015 }, { "epoch": 6.45, "learning_rate": 0.0002, "loss": 2.8358, "step": 15020 }, { "epoch": 6.45, "learning_rate": 0.0002, "loss": 2.8402, "step": 15025 }, { "epoch": 6.45, "learning_rate": 0.0002, "loss": 2.8661, "step": 15030 }, { "epoch": 6.46, "learning_rate": 0.0002, "loss": 2.8504, "step": 15035 }, { "epoch": 6.46, "learning_rate": 0.0002, "loss": 2.8301, "step": 15040 }, { "epoch": 6.46, "eval_loss": 2.8677945137023926, "eval_runtime": 5.3988, "eval_samples_per_second": 1829.48, "eval_steps_per_second": 14.448, "eval_top3_3_weighted_f1_score ": 0.42455007539366635, "eval_top_1_macro_f1_score": 0.058996169356064575, "eval_top_1_weighted_f1score": 0.20544254012800425, "eval_top_3_macro_f1_score": 0.16829896470607966, "step": 15040 }, { "epoch": 6.46, "learning_rate": 0.0002, "loss": 2.8494, "step": 15045 }, { "epoch": 6.46, "learning_rate": 0.0002, "loss": 2.8606, "step": 15050 }, { "epoch": 6.46, "learning_rate": 0.0002, "loss": 2.8466, "step": 15055 }, { "epoch": 6.47, "learning_rate": 0.0002, "loss": 2.8531, "step": 15060 }, { "epoch": 6.47, "learning_rate": 0.0002, "loss": 2.8617, "step": 15065 }, { "epoch": 6.47, "learning_rate": 0.0002, "loss": 2.8485, "step": 15070 }, { "epoch": 6.47, "learning_rate": 0.0002, "loss": 2.8492, "step": 15075 }, { "epoch": 6.47, "learning_rate": 0.0002, "loss": 2.845, "step": 15080 }, { "epoch": 6.48, "learning_rate": 0.0002, "loss": 2.8405, "step": 15085 }, { "epoch": 6.48, "learning_rate": 0.0002, "loss": 2.8466, "step": 15090 }, { "epoch": 6.48, "learning_rate": 0.0002, "loss": 2.8636, "step": 15095 }, { "epoch": 6.48, "learning_rate": 0.0002, "loss": 2.8454, "step": 15100 }, { "epoch": 6.49, "eval_loss": 2.861515522003174, "eval_runtime": 5.3588, "eval_samples_per_second": 1843.121, "eval_steps_per_second": 14.555, "eval_top3_3_weighted_f1_score ": 0.4294943102080586, "eval_top_1_macro_f1_score": 0.0604777460693949, "eval_top_1_weighted_f1score": 0.21113439617484422, "eval_top_3_macro_f1_score": 0.1744706343059108, "step": 15104 }, { "epoch": 6.49, "learning_rate": 0.0002, "loss": 2.8714, "step": 15105 }, { "epoch": 6.49, "learning_rate": 0.0002, "loss": 2.8188, "step": 15110 }, { "epoch": 6.49, "learning_rate": 0.0002, "loss": 2.8437, "step": 15115 }, { "epoch": 6.49, "learning_rate": 0.0002, "loss": 2.8525, "step": 15120 }, { "epoch": 6.49, "learning_rate": 0.0002, "loss": 2.8415, "step": 15125 }, { "epoch": 6.5, "learning_rate": 0.0002, "loss": 2.8485, "step": 15130 }, { "epoch": 6.5, "learning_rate": 0.0002, "loss": 2.8497, "step": 15135 }, { "epoch": 6.5, "learning_rate": 0.0002, "loss": 2.8489, "step": 15140 }, { "epoch": 6.5, "learning_rate": 0.0002, "loss": 2.8427, "step": 15145 }, { "epoch": 6.5, "learning_rate": 0.0002, "loss": 2.8462, "step": 15150 }, { "epoch": 6.51, "learning_rate": 0.0002, "loss": 2.8597, "step": 15155 }, { "epoch": 6.51, "learning_rate": 0.0002, "loss": 2.8521, "step": 15160 }, { "epoch": 6.51, "learning_rate": 0.0002, "loss": 2.817, "step": 15165 }, { "epoch": 6.51, "eval_loss": 2.8600211143493652, "eval_runtime": 5.32, "eval_samples_per_second": 1856.568, "eval_steps_per_second": 14.662, "eval_top3_3_weighted_f1_score ": 0.4284376689033966, "eval_top_1_macro_f1_score": 0.06438466999821016, "eval_top_1_weighted_f1score": 0.20780307893755443, "eval_top_3_macro_f1_score": 0.16777399572464152, "step": 15168 }, { "epoch": 6.51, "learning_rate": 0.0002, "loss": 2.846, "step": 15170 }, { "epoch": 6.52, "learning_rate": 0.0002, "loss": 2.8572, "step": 15175 }, { "epoch": 6.52, "learning_rate": 0.0002, "loss": 2.8324, "step": 15180 }, { "epoch": 6.52, "learning_rate": 0.0002, "loss": 2.8406, "step": 15185 }, { "epoch": 6.52, "learning_rate": 0.0002, "loss": 2.8641, "step": 15190 }, { "epoch": 6.52, "learning_rate": 0.0002, "loss": 2.8615, "step": 15195 }, { "epoch": 6.53, "learning_rate": 0.0002, "loss": 2.843, "step": 15200 }, { "epoch": 6.53, "learning_rate": 0.0002, "loss": 2.8466, "step": 15205 }, { "epoch": 6.53, "learning_rate": 0.0002, "loss": 2.8429, "step": 15210 }, { "epoch": 6.53, "learning_rate": 0.0002, "loss": 2.844, "step": 15215 }, { "epoch": 6.53, "learning_rate": 0.0002, "loss": 2.8594, "step": 15220 }, { "epoch": 6.54, "learning_rate": 0.0002, "loss": 2.8429, "step": 15225 }, { "epoch": 6.54, "learning_rate": 0.0002, "loss": 2.8423, "step": 15230 }, { "epoch": 6.54, "eval_loss": 2.863083839416504, "eval_runtime": 5.3545, "eval_samples_per_second": 1844.601, "eval_steps_per_second": 14.567, "eval_top3_3_weighted_f1_score ": 0.4300403285250032, "eval_top_1_macro_f1_score": 0.06085514466902401, "eval_top_1_weighted_f1score": 0.20511851127430486, "eval_top_3_macro_f1_score": 0.17172974713983308, "step": 15232 }, { "epoch": 6.54, "learning_rate": 0.0002, "loss": 2.8535, "step": 15235 }, { "epoch": 6.54, "learning_rate": 0.0002, "loss": 2.8695, "step": 15240 }, { "epoch": 6.55, "learning_rate": 0.0002, "loss": 2.8507, "step": 15245 }, { "epoch": 6.55, "learning_rate": 0.0002, "loss": 2.8498, "step": 15250 }, { "epoch": 6.55, "learning_rate": 0.0002, "loss": 2.8427, "step": 15255 }, { "epoch": 6.55, "learning_rate": 0.0002, "loss": 2.8357, "step": 15260 }, { "epoch": 6.55, "learning_rate": 0.0002, "loss": 2.8614, "step": 15265 }, { "epoch": 6.56, "learning_rate": 0.0002, "loss": 2.8473, "step": 15270 }, { "epoch": 6.56, "learning_rate": 0.0002, "loss": 2.849, "step": 15275 }, { "epoch": 6.56, "learning_rate": 0.0002, "loss": 2.8605, "step": 15280 }, { "epoch": 6.56, "learning_rate": 0.0002, "loss": 2.8493, "step": 15285 }, { "epoch": 6.56, "learning_rate": 0.0002, "loss": 2.8494, "step": 15290 }, { "epoch": 6.57, "learning_rate": 0.0002, "loss": 2.8545, "step": 15295 }, { "epoch": 6.57, "eval_loss": 2.8594741821289062, "eval_runtime": 5.7069, "eval_samples_per_second": 1730.709, "eval_steps_per_second": 13.668, "eval_top3_3_weighted_f1_score ": 0.4306786416508773, "eval_top_1_macro_f1_score": 0.06689709710684523, "eval_top_1_weighted_f1score": 0.20801576964445317, "eval_top_3_macro_f1_score": 0.1785273189858423, "step": 15296 }, { "epoch": 6.57, "learning_rate": 0.0002, "loss": 2.8281, "step": 15300 }, { "epoch": 6.57, "learning_rate": 0.0002, "loss": 2.8605, "step": 15305 }, { "epoch": 6.57, "learning_rate": 0.0002, "loss": 2.8659, "step": 15310 }, { "epoch": 6.58, "learning_rate": 0.0002, "loss": 2.8617, "step": 15315 }, { "epoch": 6.58, "learning_rate": 0.0002, "loss": 2.873, "step": 15320 }, { "epoch": 6.58, "learning_rate": 0.0002, "loss": 2.8651, "step": 15325 }, { "epoch": 6.58, "learning_rate": 0.0002, "loss": 2.8484, "step": 15330 }, { "epoch": 6.58, "learning_rate": 0.0002, "loss": 2.8604, "step": 15335 }, { "epoch": 6.59, "learning_rate": 0.0002, "loss": 2.8542, "step": 15340 }, { "epoch": 6.59, "learning_rate": 0.0002, "loss": 2.8507, "step": 15345 }, { "epoch": 6.59, "learning_rate": 0.0002, "loss": 2.8628, "step": 15350 }, { "epoch": 6.59, "learning_rate": 0.0002, "loss": 2.8512, "step": 15355 }, { "epoch": 6.59, "learning_rate": 0.0002, "loss": 2.8585, "step": 15360 }, { "epoch": 6.59, "eval_loss": 2.8582818508148193, "eval_runtime": 5.3443, "eval_samples_per_second": 1848.126, "eval_steps_per_second": 14.595, "eval_top3_3_weighted_f1_score ": 0.4305267363864232, "eval_top_1_macro_f1_score": 0.06304345759792579, "eval_top_1_weighted_f1score": 0.20939826484016782, "eval_top_3_macro_f1_score": 0.17486988539878526, "step": 15360 }, { "epoch": 6.6, "learning_rate": 0.0002, "loss": 2.8642, "step": 15365 }, { "epoch": 6.6, "learning_rate": 0.0002, "loss": 2.836, "step": 15370 }, { "epoch": 6.6, "learning_rate": 0.0002, "loss": 2.8651, "step": 15375 }, { "epoch": 6.6, "learning_rate": 0.0002, "loss": 2.8432, "step": 15380 }, { "epoch": 6.61, "learning_rate": 0.0002, "loss": 2.8479, "step": 15385 }, { "epoch": 6.61, "learning_rate": 0.0002, "loss": 2.8469, "step": 15390 }, { "epoch": 6.61, "learning_rate": 0.0002, "loss": 2.8572, "step": 15395 }, { "epoch": 6.61, "learning_rate": 0.0002, "loss": 2.8398, "step": 15400 }, { "epoch": 6.61, "learning_rate": 0.0002, "loss": 2.8419, "step": 15405 }, { "epoch": 6.62, "learning_rate": 0.0002, "loss": 2.8463, "step": 15410 }, { "epoch": 6.62, "learning_rate": 0.0002, "loss": 2.8609, "step": 15415 }, { "epoch": 6.62, "learning_rate": 0.0002, "loss": 2.8418, "step": 15420 }, { "epoch": 6.62, "eval_loss": 2.8603336811065674, "eval_runtime": 5.4654, "eval_samples_per_second": 1807.183, "eval_steps_per_second": 14.272, "eval_top3_3_weighted_f1_score ": 0.4294540806516274, "eval_top_1_macro_f1_score": 0.0642858580569476, "eval_top_1_weighted_f1score": 0.2125765022957422, "eval_top_3_macro_f1_score": 0.17009768405843356, "step": 15424 }, { "epoch": 6.62, "learning_rate": 0.0002, "loss": 2.8605, "step": 15425 }, { "epoch": 6.63, "learning_rate": 0.0002, "loss": 2.8363, "step": 15430 }, { "epoch": 6.63, "learning_rate": 0.0002, "loss": 2.8557, "step": 15435 }, { "epoch": 6.63, "learning_rate": 0.0002, "loss": 2.8503, "step": 15440 }, { "epoch": 6.63, "learning_rate": 0.0002, "loss": 2.86, "step": 15445 }, { "epoch": 6.63, "learning_rate": 0.0002, "loss": 2.8626, "step": 15450 }, { "epoch": 6.64, "learning_rate": 0.0002, "loss": 2.8514, "step": 15455 }, { "epoch": 6.64, "learning_rate": 0.0002, "loss": 2.8542, "step": 15460 }, { "epoch": 6.64, "learning_rate": 0.0002, "loss": 2.8435, "step": 15465 }, { "epoch": 6.64, "learning_rate": 0.0002, "loss": 2.8607, "step": 15470 }, { "epoch": 6.64, "learning_rate": 0.0002, "loss": 2.8245, "step": 15475 }, { "epoch": 6.65, "learning_rate": 0.0002, "loss": 2.8375, "step": 15480 }, { "epoch": 6.65, "learning_rate": 0.0002, "loss": 2.8646, "step": 15485 }, { "epoch": 6.65, "eval_loss": 2.8616693019866943, "eval_runtime": 5.3635, "eval_samples_per_second": 1841.536, "eval_steps_per_second": 14.543, "eval_top3_3_weighted_f1_score ": 0.4317643351334253, "eval_top_1_macro_f1_score": 0.06857994824263079, "eval_top_1_weighted_f1score": 0.20923647646907634, "eval_top_3_macro_f1_score": 0.17278554096226317, "step": 15488 }, { "epoch": 6.65, "learning_rate": 0.0002, "loss": 2.8726, "step": 15490 }, { "epoch": 6.65, "learning_rate": 0.0002, "loss": 2.8421, "step": 15495 }, { "epoch": 6.66, "learning_rate": 0.0002, "loss": 2.8466, "step": 15500 }, { "epoch": 6.66, "learning_rate": 0.0002, "loss": 2.852, "step": 15505 }, { "epoch": 6.66, "learning_rate": 0.0002, "loss": 2.8677, "step": 15510 }, { "epoch": 6.66, "learning_rate": 0.0002, "loss": 2.8664, "step": 15515 }, { "epoch": 6.66, "learning_rate": 0.0002, "loss": 2.84, "step": 15520 }, { "epoch": 6.67, "learning_rate": 0.0002, "loss": 2.8589, "step": 15525 }, { "epoch": 6.67, "learning_rate": 0.0002, "loss": 2.8703, "step": 15530 }, { "epoch": 6.67, "learning_rate": 0.0002, "loss": 2.8393, "step": 15535 }, { "epoch": 6.67, "learning_rate": 0.0002, "loss": 2.8345, "step": 15540 }, { "epoch": 6.67, "learning_rate": 0.0002, "loss": 2.8366, "step": 15545 }, { "epoch": 6.68, "learning_rate": 0.0002, "loss": 2.8498, "step": 15550 }, { "epoch": 6.68, "eval_loss": 2.8605828285217285, "eval_runtime": 5.4526, "eval_samples_per_second": 1811.443, "eval_steps_per_second": 14.305, "eval_top3_3_weighted_f1_score ": 0.4249779934053836, "eval_top_1_macro_f1_score": 0.06433192614509126, "eval_top_1_weighted_f1score": 0.20726134325996243, "eval_top_3_macro_f1_score": 0.17178580038675367, "step": 15552 }, { "epoch": 6.68, "learning_rate": 0.0002, "loss": 2.836, "step": 15555 }, { "epoch": 6.68, "learning_rate": 0.0002, "loss": 2.8728, "step": 15560 }, { "epoch": 6.68, "learning_rate": 0.0002, "loss": 2.8697, "step": 15565 }, { "epoch": 6.69, "learning_rate": 0.0002, "loss": 2.8513, "step": 15570 }, { "epoch": 6.69, "learning_rate": 0.0002, "loss": 2.8389, "step": 15575 }, { "epoch": 6.69, "learning_rate": 0.0002, "loss": 2.8436, "step": 15580 }, { "epoch": 6.69, "learning_rate": 0.0002, "loss": 2.8607, "step": 15585 }, { "epoch": 6.69, "learning_rate": 0.0002, "loss": 2.8615, "step": 15590 }, { "epoch": 6.7, "learning_rate": 0.0002, "loss": 2.8428, "step": 15595 }, { "epoch": 6.7, "learning_rate": 0.0002, "loss": 2.848, "step": 15600 }, { "epoch": 6.7, "learning_rate": 0.0002, "loss": 2.8401, "step": 15605 }, { "epoch": 6.7, "learning_rate": 0.0002, "loss": 2.8456, "step": 15610 }, { "epoch": 6.7, "learning_rate": 0.0002, "loss": 2.8484, "step": 15615 }, { "epoch": 6.7, "eval_loss": 2.861997604370117, "eval_runtime": 5.332, "eval_samples_per_second": 1852.407, "eval_steps_per_second": 14.629, "eval_top3_3_weighted_f1_score ": 0.4288646100979513, "eval_top_1_macro_f1_score": 0.06356551941777129, "eval_top_1_weighted_f1score": 0.21000433410632016, "eval_top_3_macro_f1_score": 0.17679677287315787, "step": 15616 }, { "epoch": 6.71, "learning_rate": 0.0002, "loss": 2.8472, "step": 15620 }, { "epoch": 6.71, "learning_rate": 0.0002, "loss": 2.857, "step": 15625 }, { "epoch": 6.71, "learning_rate": 0.0002, "loss": 2.8587, "step": 15630 }, { "epoch": 6.71, "learning_rate": 0.0002, "loss": 2.8302, "step": 15635 }, { "epoch": 6.72, "learning_rate": 0.0002, "loss": 2.8559, "step": 15640 }, { "epoch": 6.72, "learning_rate": 0.0002, "loss": 2.8624, "step": 15645 }, { "epoch": 6.72, "learning_rate": 0.0002, "loss": 2.8345, "step": 15650 }, { "epoch": 6.72, "learning_rate": 0.0002, "loss": 2.8756, "step": 15655 }, { "epoch": 6.72, "learning_rate": 0.0002, "loss": 2.8705, "step": 15660 }, { "epoch": 6.73, "learning_rate": 0.0002, "loss": 2.8654, "step": 15665 }, { "epoch": 6.73, "learning_rate": 0.0002, "loss": 2.8623, "step": 15670 }, { "epoch": 6.73, "learning_rate": 0.0002, "loss": 2.8496, "step": 15675 }, { "epoch": 6.73, "learning_rate": 0.0002, "loss": 2.8585, "step": 15680 }, { "epoch": 6.73, "eval_loss": 2.855945348739624, "eval_runtime": 5.2544, "eval_samples_per_second": 1879.767, "eval_steps_per_second": 14.845, "eval_top3_3_weighted_f1_score ": 0.430248813419073, "eval_top_1_macro_f1_score": 0.0606318984427841, "eval_top_1_weighted_f1score": 0.20736633714019903, "eval_top_3_macro_f1_score": 0.17073038099669996, "step": 15680 }, { "epoch": 6.73, "learning_rate": 0.0002, "loss": 2.8563, "step": 15685 }, { "epoch": 6.74, "learning_rate": 0.0002, "loss": 2.8636, "step": 15690 }, { "epoch": 6.74, "learning_rate": 0.0002, "loss": 2.8634, "step": 15695 }, { "epoch": 6.74, "learning_rate": 0.0002, "loss": 2.8457, "step": 15700 }, { "epoch": 6.74, "learning_rate": 0.0002, "loss": 2.8517, "step": 15705 }, { "epoch": 6.75, "learning_rate": 0.0002, "loss": 2.8528, "step": 15710 }, { "epoch": 6.75, "learning_rate": 0.0002, "loss": 2.8508, "step": 15715 }, { "epoch": 6.75, "learning_rate": 0.0002, "loss": 2.8598, "step": 15720 }, { "epoch": 6.75, "learning_rate": 0.0002, "loss": 2.8656, "step": 15725 }, { "epoch": 6.75, "learning_rate": 0.0002, "loss": 2.8538, "step": 15730 }, { "epoch": 6.76, "learning_rate": 0.0002, "loss": 2.8402, "step": 15735 }, { "epoch": 6.76, "learning_rate": 0.0002, "loss": 2.8551, "step": 15740 }, { "epoch": 6.76, "eval_loss": 2.8596832752227783, "eval_runtime": 5.7028, "eval_samples_per_second": 1731.955, "eval_steps_per_second": 13.677, "eval_top3_3_weighted_f1_score ": 0.428473242206308, "eval_top_1_macro_f1_score": 0.0657145511200022, "eval_top_1_weighted_f1score": 0.21096293119397097, "eval_top_3_macro_f1_score": 0.17008036575110236, "step": 15744 }, { "epoch": 6.76, "learning_rate": 0.0002, "loss": 2.8366, "step": 15745 }, { "epoch": 6.76, "learning_rate": 0.0002, "loss": 2.8622, "step": 15750 }, { "epoch": 6.76, "learning_rate": 0.0002, "loss": 2.8522, "step": 15755 }, { "epoch": 6.77, "learning_rate": 0.0002, "loss": 2.85, "step": 15760 }, { "epoch": 6.77, "learning_rate": 0.0002, "loss": 2.8458, "step": 15765 }, { "epoch": 6.77, "learning_rate": 0.0002, "loss": 2.8364, "step": 15770 }, { "epoch": 6.77, "learning_rate": 0.0002, "loss": 2.8436, "step": 15775 }, { "epoch": 6.78, "learning_rate": 0.0002, "loss": 2.8549, "step": 15780 }, { "epoch": 6.78, "learning_rate": 0.0002, "loss": 2.8475, "step": 15785 }, { "epoch": 6.78, "learning_rate": 0.0002, "loss": 2.839, "step": 15790 }, { "epoch": 6.78, "learning_rate": 0.0002, "loss": 2.8635, "step": 15795 }, { "epoch": 6.78, "learning_rate": 0.0002, "loss": 2.8578, "step": 15800 }, { "epoch": 6.79, "learning_rate": 0.0002, "loss": 2.8419, "step": 15805 }, { "epoch": 6.79, "eval_loss": 2.8585236072540283, "eval_runtime": 5.3958, "eval_samples_per_second": 1830.508, "eval_steps_per_second": 14.456, "eval_top3_3_weighted_f1_score ": 0.4266768311266503, "eval_top_1_macro_f1_score": 0.0707578405812454, "eval_top_1_weighted_f1score": 0.21065535159659776, "eval_top_3_macro_f1_score": 0.16743608079872865, "step": 15808 }, { "epoch": 6.79, "learning_rate": 0.0002, "loss": 2.8572, "step": 15810 }, { "epoch": 6.79, "learning_rate": 0.0002, "loss": 2.8292, "step": 15815 }, { "epoch": 6.79, "learning_rate": 0.0002, "loss": 2.8566, "step": 15820 }, { "epoch": 6.79, "learning_rate": 0.0002, "loss": 2.8441, "step": 15825 }, { "epoch": 6.8, "learning_rate": 0.0002, "loss": 2.8666, "step": 15830 }, { "epoch": 6.8, "learning_rate": 0.0002, "loss": 2.845, "step": 15835 }, { "epoch": 6.8, "learning_rate": 0.0002, "loss": 2.8519, "step": 15840 }, { "epoch": 6.8, "learning_rate": 0.0002, "loss": 2.849, "step": 15845 }, { "epoch": 6.81, "learning_rate": 0.0002, "loss": 2.8519, "step": 15850 }, { "epoch": 6.81, "learning_rate": 0.0002, "loss": 2.8682, "step": 15855 }, { "epoch": 6.81, "learning_rate": 0.0002, "loss": 2.8507, "step": 15860 }, { "epoch": 6.81, "learning_rate": 0.0002, "loss": 2.8448, "step": 15865 }, { "epoch": 6.81, "learning_rate": 0.0002, "loss": 2.862, "step": 15870 }, { "epoch": 6.81, "eval_loss": 2.8602190017700195, "eval_runtime": 5.3689, "eval_samples_per_second": 1839.652, "eval_steps_per_second": 14.528, "eval_top3_3_weighted_f1_score ": 0.42848811567997996, "eval_top_1_macro_f1_score": 0.06972770009692326, "eval_top_1_weighted_f1score": 0.2064827795339559, "eval_top_3_macro_f1_score": 0.167845057594611, "step": 15872 }, { "epoch": 6.82, "learning_rate": 0.0002, "loss": 2.834, "step": 15875 }, { "epoch": 6.82, "learning_rate": 0.0002, "loss": 2.8586, "step": 15880 }, { "epoch": 6.82, "learning_rate": 0.0002, "loss": 2.8485, "step": 15885 }, { "epoch": 6.82, "learning_rate": 0.0002, "loss": 2.8545, "step": 15890 }, { "epoch": 6.82, "learning_rate": 0.0002, "loss": 2.8605, "step": 15895 }, { "epoch": 6.83, "learning_rate": 0.0002, "loss": 2.8455, "step": 15900 }, { "epoch": 6.83, "learning_rate": 0.0002, "loss": 2.8602, "step": 15905 }, { "epoch": 6.83, "learning_rate": 0.0002, "loss": 2.8512, "step": 15910 }, { "epoch": 6.83, "learning_rate": 0.0002, "loss": 2.8305, "step": 15915 }, { "epoch": 6.84, "learning_rate": 0.0002, "loss": 2.8506, "step": 15920 }, { "epoch": 6.84, "learning_rate": 0.0002, "loss": 2.82, "step": 15925 }, { "epoch": 6.84, "learning_rate": 0.0002, "loss": 2.8432, "step": 15930 }, { "epoch": 6.84, "learning_rate": 0.0002, "loss": 2.8565, "step": 15935 }, { "epoch": 6.84, "eval_loss": 2.863837242126465, "eval_runtime": 5.4053, "eval_samples_per_second": 1827.283, "eval_steps_per_second": 14.43, "eval_top3_3_weighted_f1_score ": 0.4302442452852865, "eval_top_1_macro_f1_score": 0.060992147784006606, "eval_top_1_weighted_f1score": 0.20896368893525422, "eval_top_3_macro_f1_score": 0.18208826275398493, "step": 15936 }, { "epoch": 6.84, "learning_rate": 0.0002, "loss": 2.8579, "step": 15940 }, { "epoch": 6.85, "learning_rate": 0.0002, "loss": 2.8354, "step": 15945 }, { "epoch": 6.85, "learning_rate": 0.0002, "loss": 2.8569, "step": 15950 }, { "epoch": 6.85, "learning_rate": 0.0002, "loss": 2.8482, "step": 15955 }, { "epoch": 6.85, "learning_rate": 0.0002, "loss": 2.8542, "step": 15960 }, { "epoch": 6.85, "learning_rate": 0.0002, "loss": 2.8643, "step": 15965 }, { "epoch": 6.86, "learning_rate": 0.0002, "loss": 2.857, "step": 15970 }, { "epoch": 6.86, "learning_rate": 0.0002, "loss": 2.8319, "step": 15975 }, { "epoch": 6.86, "learning_rate": 0.0002, "loss": 2.8735, "step": 15980 }, { "epoch": 6.86, "learning_rate": 0.0002, "loss": 2.8331, "step": 15985 }, { "epoch": 6.87, "learning_rate": 0.0002, "loss": 2.8428, "step": 15990 }, { "epoch": 6.87, "learning_rate": 0.0002, "loss": 2.8471, "step": 15995 }, { "epoch": 6.87, "learning_rate": 0.0002, "loss": 2.8721, "step": 16000 }, { "epoch": 6.87, "eval_loss": 2.8585147857666016, "eval_runtime": 5.3275, "eval_samples_per_second": 1853.98, "eval_steps_per_second": 14.641, "eval_top3_3_weighted_f1_score ": 0.43199057777332744, "eval_top_1_macro_f1_score": 0.06341408243519375, "eval_top_1_weighted_f1score": 0.20974999878504372, "eval_top_3_macro_f1_score": 0.16988259006005924, "step": 16000 }, { "epoch": 6.87, "learning_rate": 0.0002, "loss": 2.8619, "step": 16005 }, { "epoch": 6.87, "learning_rate": 0.0002, "loss": 2.8547, "step": 16010 }, { "epoch": 6.88, "learning_rate": 0.0002, "loss": 2.8292, "step": 16015 }, { "epoch": 6.88, "learning_rate": 0.0002, "loss": 2.8334, "step": 16020 }, { "epoch": 6.88, "learning_rate": 0.0002, "loss": 2.8664, "step": 16025 }, { "epoch": 6.88, "learning_rate": 0.0002, "loss": 2.8531, "step": 16030 }, { "epoch": 6.88, "learning_rate": 0.0002, "loss": 2.8523, "step": 16035 }, { "epoch": 6.89, "learning_rate": 0.0002, "loss": 2.8483, "step": 16040 }, { "epoch": 6.89, "learning_rate": 0.0002, "loss": 2.8438, "step": 16045 }, { "epoch": 6.89, "learning_rate": 0.0002, "loss": 2.8472, "step": 16050 }, { "epoch": 6.89, "learning_rate": 0.0002, "loss": 2.8462, "step": 16055 }, { "epoch": 6.9, "learning_rate": 0.0002, "loss": 2.8341, "step": 16060 }, { "epoch": 6.9, "eval_loss": 2.851205825805664, "eval_runtime": 5.3121, "eval_samples_per_second": 1859.328, "eval_steps_per_second": 14.683, "eval_top3_3_weighted_f1_score ": 0.4297521068243642, "eval_top_1_macro_f1_score": 0.061757631764458934, "eval_top_1_weighted_f1score": 0.20677145621707246, "eval_top_3_macro_f1_score": 0.1682712038279633, "step": 16064 }, { "epoch": 6.9, "learning_rate": 0.0002, "loss": 2.835, "step": 16065 }, { "epoch": 6.9, "learning_rate": 0.0002, "loss": 2.8624, "step": 16070 }, { "epoch": 6.9, "learning_rate": 0.0002, "loss": 2.8467, "step": 16075 }, { "epoch": 6.9, "learning_rate": 0.0002, "loss": 2.8478, "step": 16080 }, { "epoch": 6.91, "learning_rate": 0.0002, "loss": 2.8466, "step": 16085 }, { "epoch": 6.91, "learning_rate": 0.0002, "loss": 2.8503, "step": 16090 }, { "epoch": 6.91, "learning_rate": 0.0002, "loss": 2.85, "step": 16095 }, { "epoch": 6.91, "learning_rate": 0.0002, "loss": 2.8702, "step": 16100 }, { "epoch": 6.91, "learning_rate": 0.0002, "loss": 2.8443, "step": 16105 }, { "epoch": 6.92, "learning_rate": 0.0002, "loss": 2.8466, "step": 16110 }, { "epoch": 6.92, "learning_rate": 0.0002, "loss": 2.8426, "step": 16115 }, { "epoch": 6.92, "learning_rate": 0.0002, "loss": 2.8482, "step": 16120 }, { "epoch": 6.92, "learning_rate": 0.0002, "loss": 2.8273, "step": 16125 }, { "epoch": 6.92, "eval_loss": 2.8544063568115234, "eval_runtime": 5.3059, "eval_samples_per_second": 1861.518, "eval_steps_per_second": 14.701, "eval_top3_3_weighted_f1_score ": 0.43016403994162344, "eval_top_1_macro_f1_score": 0.06482697306747977, "eval_top_1_weighted_f1score": 0.2089395643737166, "eval_top_3_macro_f1_score": 0.17654727126251615, "step": 16128 }, { "epoch": 6.93, "learning_rate": 0.0002, "loss": 2.87, "step": 16130 }, { "epoch": 6.93, "learning_rate": 0.0002, "loss": 2.8672, "step": 16135 }, { "epoch": 6.93, "learning_rate": 0.0002, "loss": 2.8547, "step": 16140 }, { "epoch": 6.93, "learning_rate": 0.0002, "loss": 2.8488, "step": 16145 }, { "epoch": 6.93, "learning_rate": 0.0002, "loss": 2.8757, "step": 16150 }, { "epoch": 6.94, "learning_rate": 0.0002, "loss": 2.8401, "step": 16155 }, { "epoch": 6.94, "learning_rate": 0.0002, "loss": 2.8322, "step": 16160 }, { "epoch": 6.94, "learning_rate": 0.0002, "loss": 2.8423, "step": 16165 }, { "epoch": 6.94, "learning_rate": 0.0002, "loss": 2.8396, "step": 16170 }, { "epoch": 6.94, "learning_rate": 0.0002, "loss": 2.8585, "step": 16175 }, { "epoch": 6.95, "learning_rate": 0.0002, "loss": 2.8481, "step": 16180 }, { "epoch": 6.95, "learning_rate": 0.0002, "loss": 2.8782, "step": 16185 }, { "epoch": 6.95, "learning_rate": 0.0002, "loss": 2.8561, "step": 16190 }, { "epoch": 6.95, "eval_loss": 2.861961603164673, "eval_runtime": 5.2066, "eval_samples_per_second": 1897.024, "eval_steps_per_second": 14.981, "eval_top3_3_weighted_f1_score ": 0.428182266021052, "eval_top_1_macro_f1_score": 0.06571064136595867, "eval_top_1_weighted_f1score": 0.21030290543500288, "eval_top_3_macro_f1_score": 0.171269395433393, "step": 16192 }, { "epoch": 6.95, "learning_rate": 0.0002, "loss": 2.8533, "step": 16195 }, { "epoch": 6.96, "learning_rate": 0.0002, "loss": 2.8391, "step": 16200 }, { "epoch": 6.96, "learning_rate": 0.0002, "loss": 2.85, "step": 16205 }, { "epoch": 6.96, "learning_rate": 0.0002, "loss": 2.8592, "step": 16210 }, { "epoch": 6.96, "learning_rate": 0.0002, "loss": 2.829, "step": 16215 }, { "epoch": 6.96, "learning_rate": 0.0002, "loss": 2.8483, "step": 16220 }, { "epoch": 6.97, "learning_rate": 0.0002, "loss": 2.8658, "step": 16225 }, { "epoch": 6.97, "learning_rate": 0.0002, "loss": 2.8496, "step": 16230 }, { "epoch": 6.97, "learning_rate": 0.0002, "loss": 2.8422, "step": 16235 }, { "epoch": 6.97, "learning_rate": 0.0002, "loss": 2.8596, "step": 16240 }, { "epoch": 6.97, "learning_rate": 0.0002, "loss": 2.8472, "step": 16245 }, { "epoch": 6.98, "learning_rate": 0.0002, "loss": 2.8338, "step": 16250 }, { "epoch": 6.98, "learning_rate": 0.0002, "loss": 2.8435, "step": 16255 }, { "epoch": 6.98, "eval_loss": 2.8606724739074707, "eval_runtime": 5.4097, "eval_samples_per_second": 1825.793, "eval_steps_per_second": 14.419, "eval_top3_3_weighted_f1_score ": 0.4300385572016085, "eval_top_1_macro_f1_score": 0.06157440821152848, "eval_top_1_weighted_f1score": 0.20951877862069976, "eval_top_3_macro_f1_score": 0.1660304825701084, "step": 16256 }, { "epoch": 6.98, "learning_rate": 0.0002, "loss": 2.8539, "step": 16260 }, { "epoch": 6.98, "learning_rate": 0.0002, "loss": 2.8364, "step": 16265 }, { "epoch": 6.99, "learning_rate": 0.0002, "loss": 2.8498, "step": 16270 }, { "epoch": 6.99, "learning_rate": 0.0002, "loss": 2.8502, "step": 16275 }, { "epoch": 6.99, "learning_rate": 0.0002, "loss": 2.8586, "step": 16280 }, { "epoch": 6.99, "learning_rate": 0.0002, "loss": 2.8518, "step": 16285 }, { "epoch": 6.99, "learning_rate": 0.0002, "loss": 2.8463, "step": 16290 }, { "epoch": 7.0, "learning_rate": 0.0002, "loss": 2.8537, "step": 16295 }, { "epoch": 7.0, "learning_rate": 0.0002, "loss": 2.8591, "step": 16300 }, { "epoch": 7.0, "learning_rate": 0.0002, "loss": 3.1218, "step": 16305 }, { "epoch": 7.0, "learning_rate": 0.0002, "loss": 2.8544, "step": 16310 }, { "epoch": 7.01, "learning_rate": 0.0002, "loss": 2.8176, "step": 16315 }, { "epoch": 7.01, "learning_rate": 0.0002, "loss": 2.8156, "step": 16320 }, { "epoch": 7.01, "eval_loss": 2.855952262878418, "eval_runtime": 5.5545, "eval_samples_per_second": 1778.202, "eval_steps_per_second": 14.043, "eval_top3_3_weighted_f1_score ": 0.4283237284132631, "eval_top_1_macro_f1_score": 0.0575789324921102, "eval_top_1_weighted_f1score": 0.20848049741441527, "eval_top_3_macro_f1_score": 0.17694628353914202, "step": 16320 }, { "epoch": 7.01, "learning_rate": 0.0002, "loss": 2.8242, "step": 16325 }, { "epoch": 7.01, "learning_rate": 0.0002, "loss": 2.847, "step": 16330 }, { "epoch": 7.01, "learning_rate": 0.0002, "loss": 2.8236, "step": 16335 }, { "epoch": 7.02, "learning_rate": 0.0002, "loss": 2.8283, "step": 16340 }, { "epoch": 7.02, "learning_rate": 0.0002, "loss": 2.8517, "step": 16345 }, { "epoch": 7.02, "learning_rate": 0.0002, "loss": 2.8402, "step": 16350 }, { "epoch": 7.02, "learning_rate": 0.0002, "loss": 2.8323, "step": 16355 }, { "epoch": 7.02, "learning_rate": 0.0002, "loss": 2.8671, "step": 16360 }, { "epoch": 7.03, "learning_rate": 0.0002, "loss": 2.8417, "step": 16365 }, { "epoch": 7.03, "learning_rate": 0.0002, "loss": 2.8531, "step": 16370 }, { "epoch": 7.03, "learning_rate": 0.0002, "loss": 2.8315, "step": 16375 }, { "epoch": 7.03, "learning_rate": 0.0002, "loss": 2.8595, "step": 16380 }, { "epoch": 7.03, "eval_loss": 2.8588004112243652, "eval_runtime": 5.7346, "eval_samples_per_second": 1722.354, "eval_steps_per_second": 13.602, "eval_top3_3_weighted_f1_score ": 0.4273591248370392, "eval_top_1_macro_f1_score": 0.061884711993587124, "eval_top_1_weighted_f1score": 0.20787224118291617, "eval_top_3_macro_f1_score": 0.16624012084061907, "step": 16384 }, { "epoch": 7.04, "learning_rate": 0.0002, "loss": 2.8403, "step": 16385 }, { "epoch": 7.04, "learning_rate": 0.0002, "loss": 2.8371, "step": 16390 }, { "epoch": 7.04, "learning_rate": 0.0002, "loss": 2.8387, "step": 16395 }, { "epoch": 7.04, "learning_rate": 0.0002, "loss": 2.8429, "step": 16400 }, { "epoch": 7.04, "learning_rate": 0.0002, "loss": 2.8273, "step": 16405 }, { "epoch": 7.05, "learning_rate": 0.0002, "loss": 2.8444, "step": 16410 }, { "epoch": 7.05, "learning_rate": 0.0002, "loss": 2.8494, "step": 16415 }, { "epoch": 7.05, "learning_rate": 0.0002, "loss": 2.8589, "step": 16420 }, { "epoch": 7.05, "learning_rate": 0.0002, "loss": 2.8403, "step": 16425 }, { "epoch": 7.05, "learning_rate": 0.0002, "loss": 2.8445, "step": 16430 }, { "epoch": 7.06, "learning_rate": 0.0002, "loss": 2.8309, "step": 16435 }, { "epoch": 7.06, "learning_rate": 0.0002, "loss": 2.8604, "step": 16440 }, { "epoch": 7.06, "learning_rate": 0.0002, "loss": 2.8379, "step": 16445 }, { "epoch": 7.06, "eval_loss": 2.8595006465911865, "eval_runtime": 5.3939, "eval_samples_per_second": 1831.154, "eval_steps_per_second": 14.461, "eval_top3_3_weighted_f1_score ": 0.4305098625205523, "eval_top_1_macro_f1_score": 0.06630078967768736, "eval_top_1_weighted_f1score": 0.20840423447435372, "eval_top_3_macro_f1_score": 0.17737793347878275, "step": 16448 }, { "epoch": 7.06, "learning_rate": 0.0002, "loss": 2.8491, "step": 16450 }, { "epoch": 7.07, "learning_rate": 0.0002, "loss": 2.8588, "step": 16455 }, { "epoch": 7.07, "learning_rate": 0.0002, "loss": 2.8379, "step": 16460 }, { "epoch": 7.07, "learning_rate": 0.0002, "loss": 2.8526, "step": 16465 }, { "epoch": 7.07, "learning_rate": 0.0002, "loss": 2.8304, "step": 16470 }, { "epoch": 7.07, "learning_rate": 0.0002, "loss": 2.8533, "step": 16475 }, { "epoch": 7.08, "learning_rate": 0.0002, "loss": 2.8459, "step": 16480 }, { "epoch": 7.08, "learning_rate": 0.0002, "loss": 2.8561, "step": 16485 }, { "epoch": 7.08, "learning_rate": 0.0002, "loss": 2.8452, "step": 16490 }, { "epoch": 7.08, "learning_rate": 0.0002, "loss": 2.8329, "step": 16495 }, { "epoch": 7.08, "learning_rate": 0.0002, "loss": 2.8503, "step": 16500 }, { "epoch": 7.09, "learning_rate": 0.0002, "loss": 2.8499, "step": 16505 }, { "epoch": 7.09, "learning_rate": 0.0002, "loss": 2.8469, "step": 16510 }, { "epoch": 7.09, "eval_loss": 2.857330799102783, "eval_runtime": 5.4246, "eval_samples_per_second": 1820.782, "eval_steps_per_second": 14.379, "eval_top3_3_weighted_f1_score ": 0.42866429123980676, "eval_top_1_macro_f1_score": 0.06555160336361505, "eval_top_1_weighted_f1score": 0.2098092332953534, "eval_top_3_macro_f1_score": 0.1806800909084033, "step": 16512 }, { "epoch": 7.09, "learning_rate": 0.0002, "loss": 2.8308, "step": 16515 }, { "epoch": 7.09, "learning_rate": 0.0002, "loss": 2.8235, "step": 16520 }, { "epoch": 7.1, "learning_rate": 0.0002, "loss": 2.8311, "step": 16525 }, { "epoch": 7.1, "learning_rate": 0.0002, "loss": 2.8636, "step": 16530 }, { "epoch": 7.1, "learning_rate": 0.0002, "loss": 2.8419, "step": 16535 }, { "epoch": 7.1, "learning_rate": 0.0002, "loss": 2.8332, "step": 16540 }, { "epoch": 7.1, "learning_rate": 0.0002, "loss": 2.8366, "step": 16545 }, { "epoch": 7.11, "learning_rate": 0.0002, "loss": 2.8257, "step": 16550 }, { "epoch": 7.11, "learning_rate": 0.0002, "loss": 2.8402, "step": 16555 }, { "epoch": 7.11, "learning_rate": 0.0002, "loss": 2.8381, "step": 16560 }, { "epoch": 7.11, "learning_rate": 0.0002, "loss": 2.8469, "step": 16565 }, { "epoch": 7.11, "learning_rate": 0.0002, "loss": 2.8324, "step": 16570 }, { "epoch": 7.12, "learning_rate": 0.0002, "loss": 2.8331, "step": 16575 }, { "epoch": 7.12, "eval_loss": 2.858490467071533, "eval_runtime": 5.3889, "eval_samples_per_second": 1832.848, "eval_steps_per_second": 14.474, "eval_top3_3_weighted_f1_score ": 0.4254713711423142, "eval_top_1_macro_f1_score": 0.06492993178622292, "eval_top_1_weighted_f1score": 0.20903476903188825, "eval_top_3_macro_f1_score": 0.1717600858332048, "step": 16576 }, { "epoch": 7.12, "learning_rate": 0.0002, "loss": 2.8436, "step": 16580 }, { "epoch": 7.12, "learning_rate": 0.0002, "loss": 2.8354, "step": 16585 }, { "epoch": 7.12, "learning_rate": 0.0002, "loss": 2.8447, "step": 16590 }, { "epoch": 7.13, "learning_rate": 0.0002, "loss": 2.8325, "step": 16595 }, { "epoch": 7.13, "learning_rate": 0.0002, "loss": 2.8526, "step": 16600 }, { "epoch": 7.13, "learning_rate": 0.0002, "loss": 2.8381, "step": 16605 }, { "epoch": 7.13, "learning_rate": 0.0002, "loss": 2.8632, "step": 16610 }, { "epoch": 7.13, "learning_rate": 0.0002, "loss": 2.8383, "step": 16615 }, { "epoch": 7.14, "learning_rate": 0.0002, "loss": 2.839, "step": 16620 }, { "epoch": 7.14, "learning_rate": 0.0002, "loss": 2.87, "step": 16625 }, { "epoch": 7.14, "learning_rate": 0.0002, "loss": 2.838, "step": 16630 }, { "epoch": 7.14, "learning_rate": 0.0002, "loss": 2.8497, "step": 16635 }, { "epoch": 7.14, "learning_rate": 0.0002, "loss": 2.8525, "step": 16640 }, { "epoch": 7.14, "eval_loss": 2.857104539871216, "eval_runtime": 5.3992, "eval_samples_per_second": 1829.344, "eval_steps_per_second": 14.447, "eval_top3_3_weighted_f1_score ": 0.431505093360038, "eval_top_1_macro_f1_score": 0.05961979391858313, "eval_top_1_weighted_f1score": 0.20848881834889726, "eval_top_3_macro_f1_score": 0.17628531216244345, "step": 16640 }, { "epoch": 7.15, "learning_rate": 0.0002, "loss": 2.84, "step": 16645 }, { "epoch": 7.15, "learning_rate": 0.0002, "loss": 2.8469, "step": 16650 }, { "epoch": 7.15, "learning_rate": 0.0002, "loss": 2.8372, "step": 16655 }, { "epoch": 7.15, "learning_rate": 0.0002, "loss": 2.8435, "step": 16660 }, { "epoch": 7.16, "learning_rate": 0.0002, "loss": 2.8504, "step": 16665 }, { "epoch": 7.16, "learning_rate": 0.0002, "loss": 2.8377, "step": 16670 }, { "epoch": 7.16, "learning_rate": 0.0002, "loss": 2.8392, "step": 16675 }, { "epoch": 7.16, "learning_rate": 0.0002, "loss": 2.8468, "step": 16680 }, { "epoch": 7.16, "learning_rate": 0.0002, "loss": 2.8301, "step": 16685 }, { "epoch": 7.17, "learning_rate": 0.0002, "loss": 2.8279, "step": 16690 }, { "epoch": 7.17, "learning_rate": 0.0002, "loss": 2.8477, "step": 16695 }, { "epoch": 7.17, "learning_rate": 0.0002, "loss": 2.8488, "step": 16700 }, { "epoch": 7.17, "eval_loss": 2.8567821979522705, "eval_runtime": 5.3288, "eval_samples_per_second": 1853.515, "eval_steps_per_second": 14.637, "eval_top3_3_weighted_f1_score ": 0.426813939517681, "eval_top_1_macro_f1_score": 0.060002998836245686, "eval_top_1_weighted_f1score": 0.20760508995764726, "eval_top_3_macro_f1_score": 0.16427234238860938, "step": 16704 }, { "epoch": 7.17, "learning_rate": 0.0002, "loss": 2.8347, "step": 16705 }, { "epoch": 7.17, "learning_rate": 0.0002, "loss": 2.8574, "step": 16710 }, { "epoch": 7.18, "learning_rate": 0.0002, "loss": 2.8526, "step": 16715 }, { "epoch": 7.18, "learning_rate": 0.0002, "loss": 2.835, "step": 16720 }, { "epoch": 7.18, "learning_rate": 0.0002, "loss": 2.8329, "step": 16725 }, { "epoch": 7.18, "learning_rate": 0.0002, "loss": 2.8123, "step": 16730 }, { "epoch": 7.19, "learning_rate": 0.0002, "loss": 2.8416, "step": 16735 }, { "epoch": 7.19, "learning_rate": 0.0002, "loss": 2.8577, "step": 16740 }, { "epoch": 7.19, "learning_rate": 0.0002, "loss": 2.8483, "step": 16745 }, { "epoch": 7.19, "learning_rate": 0.0002, "loss": 2.8313, "step": 16750 }, { "epoch": 7.19, "learning_rate": 0.0002, "loss": 2.8366, "step": 16755 }, { "epoch": 7.2, "learning_rate": 0.0002, "loss": 2.8497, "step": 16760 }, { "epoch": 7.2, "learning_rate": 0.0002, "loss": 2.8475, "step": 16765 }, { "epoch": 7.2, "eval_loss": 2.860691547393799, "eval_runtime": 5.302, "eval_samples_per_second": 1862.895, "eval_steps_per_second": 14.712, "eval_top3_3_weighted_f1_score ": 0.4339193446571937, "eval_top_1_macro_f1_score": 0.06711008630203333, "eval_top_1_weighted_f1score": 0.21108482590150213, "eval_top_3_macro_f1_score": 0.17314573020296042, "step": 16768 }, { "epoch": 7.2, "learning_rate": 0.0002, "loss": 2.8298, "step": 16770 }, { "epoch": 7.2, "learning_rate": 0.0002, "loss": 2.8429, "step": 16775 }, { "epoch": 7.2, "learning_rate": 0.0002, "loss": 2.8503, "step": 16780 }, { "epoch": 7.21, "learning_rate": 0.0002, "loss": 2.8411, "step": 16785 }, { "epoch": 7.21, "learning_rate": 0.0002, "loss": 2.8575, "step": 16790 }, { "epoch": 7.21, "learning_rate": 0.0002, "loss": 2.8367, "step": 16795 }, { "epoch": 7.21, "learning_rate": 0.0002, "loss": 2.8588, "step": 16800 }, { "epoch": 7.22, "learning_rate": 0.0002, "loss": 2.8326, "step": 16805 }, { "epoch": 7.22, "learning_rate": 0.0002, "loss": 2.8387, "step": 16810 }, { "epoch": 7.22, "learning_rate": 0.0002, "loss": 2.8564, "step": 16815 }, { "epoch": 7.22, "learning_rate": 0.0002, "loss": 2.8235, "step": 16820 }, { "epoch": 7.22, "learning_rate": 0.0002, "loss": 2.8385, "step": 16825 }, { "epoch": 7.23, "learning_rate": 0.0002, "loss": 2.8305, "step": 16830 }, { "epoch": 7.23, "eval_loss": 2.8646328449249268, "eval_runtime": 5.8268, "eval_samples_per_second": 1695.101, "eval_steps_per_second": 13.386, "eval_top3_3_weighted_f1_score ": 0.43004454176978774, "eval_top_1_macro_f1_score": 0.06530522520674749, "eval_top_1_weighted_f1score": 0.20867859060781246, "eval_top_3_macro_f1_score": 0.17294844551379238, "step": 16832 }, { "epoch": 7.23, "learning_rate": 0.0002, "loss": 2.8323, "step": 16835 }, { "epoch": 7.23, "learning_rate": 0.0002, "loss": 2.8682, "step": 16840 }, { "epoch": 7.23, "learning_rate": 0.0002, "loss": 2.847, "step": 16845 }, { "epoch": 7.23, "learning_rate": 0.0002, "loss": 2.8425, "step": 16850 }, { "epoch": 7.24, "learning_rate": 0.0002, "loss": 2.8319, "step": 16855 }, { "epoch": 7.24, "learning_rate": 0.0002, "loss": 2.8519, "step": 16860 }, { "epoch": 7.24, "learning_rate": 0.0002, "loss": 2.8505, "step": 16865 }, { "epoch": 7.24, "learning_rate": 0.0002, "loss": 2.8421, "step": 16870 }, { "epoch": 7.25, "learning_rate": 0.0002, "loss": 2.8664, "step": 16875 }, { "epoch": 7.25, "learning_rate": 0.0002, "loss": 2.8495, "step": 16880 }, { "epoch": 7.25, "learning_rate": 0.0002, "loss": 2.8372, "step": 16885 }, { "epoch": 7.25, "learning_rate": 0.0002, "loss": 2.8415, "step": 16890 }, { "epoch": 7.25, "learning_rate": 0.0002, "loss": 2.8473, "step": 16895 }, { "epoch": 7.25, "eval_loss": 2.854012966156006, "eval_runtime": 5.3389, "eval_samples_per_second": 1849.997, "eval_steps_per_second": 14.61, "eval_top3_3_weighted_f1_score ": 0.4284043207078249, "eval_top_1_macro_f1_score": 0.06462930759729849, "eval_top_1_weighted_f1score": 0.20830027842242396, "eval_top_3_macro_f1_score": 0.1627043973691934, "step": 16896 }, { "epoch": 7.26, "learning_rate": 0.0002, "loss": 2.8372, "step": 16900 }, { "epoch": 7.26, "learning_rate": 0.0002, "loss": 2.8373, "step": 16905 }, { "epoch": 7.26, "learning_rate": 0.0002, "loss": 2.8642, "step": 16910 }, { "epoch": 7.26, "learning_rate": 0.0002, "loss": 2.8456, "step": 16915 }, { "epoch": 7.26, "learning_rate": 0.0002, "loss": 2.8349, "step": 16920 }, { "epoch": 7.27, "learning_rate": 0.0002, "loss": 2.8274, "step": 16925 }, { "epoch": 7.27, "learning_rate": 0.0002, "loss": 2.8398, "step": 16930 }, { "epoch": 7.27, "learning_rate": 0.0002, "loss": 2.8426, "step": 16935 }, { "epoch": 7.27, "learning_rate": 0.0002, "loss": 2.856, "step": 16940 }, { "epoch": 7.28, "learning_rate": 0.0002, "loss": 2.8443, "step": 16945 }, { "epoch": 7.28, "learning_rate": 0.0002, "loss": 2.8323, "step": 16950 }, { "epoch": 7.28, "learning_rate": 0.0002, "loss": 2.8192, "step": 16955 }, { "epoch": 7.28, "learning_rate": 0.0002, "loss": 2.8599, "step": 16960 }, { "epoch": 7.28, "eval_loss": 2.8601081371307373, "eval_runtime": 5.3679, "eval_samples_per_second": 1840.028, "eval_steps_per_second": 14.531, "eval_top3_3_weighted_f1_score ": 0.42730106890012653, "eval_top_1_macro_f1_score": 0.06571170098153195, "eval_top_1_weighted_f1score": 0.21113712987875882, "eval_top_3_macro_f1_score": 0.16392468907892946, "step": 16960 }, { "epoch": 7.28, "learning_rate": 0.0002, "loss": 2.8394, "step": 16965 }, { "epoch": 7.29, "learning_rate": 0.0002, "loss": 2.8547, "step": 16970 }, { "epoch": 7.29, "learning_rate": 0.0002, "loss": 2.8244, "step": 16975 }, { "epoch": 7.29, "learning_rate": 0.0002, "loss": 2.8388, "step": 16980 }, { "epoch": 7.29, "learning_rate": 0.0002, "loss": 2.8523, "step": 16985 }, { "epoch": 7.29, "learning_rate": 0.0002, "loss": 2.8568, "step": 16990 }, { "epoch": 7.3, "learning_rate": 0.0002, "loss": 2.8267, "step": 16995 }, { "epoch": 7.3, "learning_rate": 0.0002, "loss": 2.8374, "step": 17000 }, { "epoch": 7.3, "learning_rate": 0.0002, "loss": 2.8228, "step": 17005 }, { "epoch": 7.3, "learning_rate": 0.0002, "loss": 2.8479, "step": 17010 }, { "epoch": 7.31, "learning_rate": 0.0002, "loss": 2.8194, "step": 17015 }, { "epoch": 7.31, "learning_rate": 0.0002, "loss": 2.8335, "step": 17020 }, { "epoch": 7.31, "eval_loss": 2.8549346923828125, "eval_runtime": 5.3656, "eval_samples_per_second": 1840.786, "eval_steps_per_second": 14.537, "eval_top3_3_weighted_f1_score ": 0.42883865358982237, "eval_top_1_macro_f1_score": 0.06524114841337716, "eval_top_1_weighted_f1score": 0.20846580646556187, "eval_top_3_macro_f1_score": 0.16890002329164136, "step": 17024 }, { "epoch": 7.31, "learning_rate": 0.0002, "loss": 2.8383, "step": 17025 }, { "epoch": 7.31, "learning_rate": 0.0002, "loss": 2.8451, "step": 17030 }, { "epoch": 7.31, "learning_rate": 0.0002, "loss": 2.8595, "step": 17035 }, { "epoch": 7.32, "learning_rate": 0.0002, "loss": 2.8535, "step": 17040 }, { "epoch": 7.32, "learning_rate": 0.0002, "loss": 2.8801, "step": 17045 }, { "epoch": 7.32, "learning_rate": 0.0002, "loss": 2.8346, "step": 17050 }, { "epoch": 7.32, "learning_rate": 0.0002, "loss": 2.8629, "step": 17055 }, { "epoch": 7.32, "learning_rate": 0.0002, "loss": 2.838, "step": 17060 }, { "epoch": 7.33, "learning_rate": 0.0002, "loss": 2.8382, "step": 17065 }, { "epoch": 7.33, "learning_rate": 0.0002, "loss": 2.8502, "step": 17070 }, { "epoch": 7.33, "learning_rate": 0.0002, "loss": 2.8392, "step": 17075 }, { "epoch": 7.33, "learning_rate": 0.0002, "loss": 2.8272, "step": 17080 }, { "epoch": 7.34, "learning_rate": 0.0002, "loss": 2.8537, "step": 17085 }, { "epoch": 7.34, "eval_loss": 2.865147829055786, "eval_runtime": 5.3509, "eval_samples_per_second": 1845.85, "eval_steps_per_second": 14.577, "eval_top3_3_weighted_f1_score ": 0.42743006298211117, "eval_top_1_macro_f1_score": 0.06463209285279022, "eval_top_1_weighted_f1score": 0.2067056738977208, "eval_top_3_macro_f1_score": 0.1742909513278907, "step": 17088 }, { "epoch": 7.34, "learning_rate": 0.0002, "loss": 2.8331, "step": 17090 }, { "epoch": 7.34, "learning_rate": 0.0002, "loss": 2.8404, "step": 17095 }, { "epoch": 7.34, "learning_rate": 0.0002, "loss": 2.8499, "step": 17100 }, { "epoch": 7.34, "learning_rate": 0.0002, "loss": 2.8553, "step": 17105 }, { "epoch": 7.35, "learning_rate": 0.0002, "loss": 2.8517, "step": 17110 }, { "epoch": 7.35, "learning_rate": 0.0002, "loss": 2.8432, "step": 17115 }, { "epoch": 7.35, "learning_rate": 0.0002, "loss": 2.855, "step": 17120 }, { "epoch": 7.35, "learning_rate": 0.0002, "loss": 2.8396, "step": 17125 }, { "epoch": 7.36, "learning_rate": 0.0002, "loss": 2.8374, "step": 17130 }, { "epoch": 7.36, "learning_rate": 0.0002, "loss": 2.8232, "step": 17135 }, { "epoch": 7.36, "learning_rate": 0.0002, "loss": 2.8471, "step": 17140 }, { "epoch": 7.36, "learning_rate": 0.0002, "loss": 2.8489, "step": 17145 }, { "epoch": 7.36, "learning_rate": 0.0002, "loss": 2.8263, "step": 17150 }, { "epoch": 7.36, "eval_loss": 2.8595502376556396, "eval_runtime": 5.289, "eval_samples_per_second": 1867.477, "eval_steps_per_second": 14.748, "eval_top3_3_weighted_f1_score ": 0.43148654677842535, "eval_top_1_macro_f1_score": 0.061337160560468444, "eval_top_1_weighted_f1score": 0.2061431426812684, "eval_top_3_macro_f1_score": 0.1739050577609029, "step": 17152 }, { "epoch": 7.37, "learning_rate": 0.0002, "loss": 2.8307, "step": 17155 }, { "epoch": 7.37, "learning_rate": 0.0002, "loss": 2.8373, "step": 17160 }, { "epoch": 7.37, "learning_rate": 0.0002, "loss": 2.8351, "step": 17165 }, { "epoch": 7.37, "learning_rate": 0.0002, "loss": 2.8361, "step": 17170 }, { "epoch": 7.37, "learning_rate": 0.0002, "loss": 2.8239, "step": 17175 }, { "epoch": 7.38, "learning_rate": 0.0002, "loss": 2.8395, "step": 17180 }, { "epoch": 7.38, "learning_rate": 0.0002, "loss": 2.8387, "step": 17185 }, { "epoch": 7.38, "learning_rate": 0.0002, "loss": 2.8442, "step": 17190 }, { "epoch": 7.38, "learning_rate": 0.0002, "loss": 2.837, "step": 17195 }, { "epoch": 7.39, "learning_rate": 0.0002, "loss": 2.8311, "step": 17200 }, { "epoch": 7.39, "learning_rate": 0.0002, "loss": 2.85, "step": 17205 }, { "epoch": 7.39, "learning_rate": 0.0002, "loss": 2.8385, "step": 17210 }, { "epoch": 7.39, "learning_rate": 0.0002, "loss": 2.8498, "step": 17215 }, { "epoch": 7.39, "eval_loss": 2.8654897212982178, "eval_runtime": 5.3215, "eval_samples_per_second": 1856.072, "eval_steps_per_second": 14.658, "eval_top3_3_weighted_f1_score ": 0.42720531625659613, "eval_top_1_macro_f1_score": 0.057758238120331054, "eval_top_1_weighted_f1score": 0.2050961597199169, "eval_top_3_macro_f1_score": 0.17000776791912028, "step": 17216 }, { "epoch": 7.39, "learning_rate": 0.0002, "loss": 2.8393, "step": 17220 }, { "epoch": 7.4, "learning_rate": 0.0002, "loss": 2.8624, "step": 17225 }, { "epoch": 7.4, "learning_rate": 0.0002, "loss": 2.8341, "step": 17230 }, { "epoch": 7.4, "learning_rate": 0.0002, "loss": 2.8406, "step": 17235 }, { "epoch": 7.4, "learning_rate": 0.0002, "loss": 2.8272, "step": 17240 }, { "epoch": 7.4, "learning_rate": 0.0002, "loss": 2.8519, "step": 17245 }, { "epoch": 7.41, "learning_rate": 0.0002, "loss": 2.854, "step": 17250 }, { "epoch": 7.41, "learning_rate": 0.0002, "loss": 2.8476, "step": 17255 }, { "epoch": 7.41, "learning_rate": 0.0002, "loss": 2.8548, "step": 17260 }, { "epoch": 7.41, "learning_rate": 0.0002, "loss": 2.8343, "step": 17265 }, { "epoch": 7.42, "learning_rate": 0.0002, "loss": 2.8313, "step": 17270 }, { "epoch": 7.42, "learning_rate": 0.0002, "loss": 2.8501, "step": 17275 }, { "epoch": 7.42, "learning_rate": 0.0002, "loss": 2.8392, "step": 17280 }, { "epoch": 7.42, "eval_loss": 2.8601627349853516, "eval_runtime": 5.333, "eval_samples_per_second": 1852.043, "eval_steps_per_second": 14.626, "eval_top3_3_weighted_f1_score ": 0.4274173263101556, "eval_top_1_macro_f1_score": 0.061797421242248705, "eval_top_1_weighted_f1score": 0.2106343646505144, "eval_top_3_macro_f1_score": 0.17457318219501852, "step": 17280 }, { "epoch": 7.42, "learning_rate": 0.0002, "loss": 2.8385, "step": 17285 }, { "epoch": 7.42, "learning_rate": 0.0002, "loss": 2.8343, "step": 17290 }, { "epoch": 7.43, "learning_rate": 0.0002, "loss": 2.8471, "step": 17295 }, { "epoch": 7.43, "learning_rate": 0.0002, "loss": 2.8546, "step": 17300 }, { "epoch": 7.43, "learning_rate": 0.0002, "loss": 2.858, "step": 17305 }, { "epoch": 7.43, "learning_rate": 0.0002, "loss": 2.8404, "step": 17310 }, { "epoch": 7.43, "learning_rate": 0.0002, "loss": 2.8447, "step": 17315 }, { "epoch": 7.44, "learning_rate": 0.0002, "loss": 2.848, "step": 17320 }, { "epoch": 7.44, "learning_rate": 0.0002, "loss": 2.8269, "step": 17325 }, { "epoch": 7.44, "learning_rate": 0.0002, "loss": 2.8559, "step": 17330 }, { "epoch": 7.44, "learning_rate": 0.0002, "loss": 2.8374, "step": 17335 }, { "epoch": 7.45, "learning_rate": 0.0002, "loss": 2.8388, "step": 17340 }, { "epoch": 7.45, "eval_loss": 2.8564577102661133, "eval_runtime": 5.3497, "eval_samples_per_second": 1846.288, "eval_steps_per_second": 14.58, "eval_top3_3_weighted_f1_score ": 0.43055422937916166, "eval_top_1_macro_f1_score": 0.06300539966389757, "eval_top_1_weighted_f1score": 0.20401126849456902, "eval_top_3_macro_f1_score": 0.17896822590948389, "step": 17344 }, { "epoch": 7.45, "learning_rate": 0.0002, "loss": 2.8579, "step": 17345 }, { "epoch": 7.45, "learning_rate": 0.0002, "loss": 2.8498, "step": 17350 }, { "epoch": 7.45, "learning_rate": 0.0002, "loss": 2.8427, "step": 17355 }, { "epoch": 7.45, "learning_rate": 0.0002, "loss": 2.8675, "step": 17360 }, { "epoch": 7.46, "learning_rate": 0.0002, "loss": 2.837, "step": 17365 }, { "epoch": 7.46, "learning_rate": 0.0002, "loss": 2.8395, "step": 17370 }, { "epoch": 7.46, "learning_rate": 0.0002, "loss": 2.8582, "step": 17375 }, { "epoch": 7.46, "learning_rate": 0.0002, "loss": 2.8453, "step": 17380 }, { "epoch": 7.46, "learning_rate": 0.0002, "loss": 2.8357, "step": 17385 }, { "epoch": 7.47, "learning_rate": 0.0002, "loss": 2.8518, "step": 17390 }, { "epoch": 7.47, "learning_rate": 0.0002, "loss": 2.8498, "step": 17395 }, { "epoch": 7.47, "learning_rate": 0.0002, "loss": 2.8369, "step": 17400 }, { "epoch": 7.47, "learning_rate": 0.0002, "loss": 2.8482, "step": 17405 }, { "epoch": 7.47, "eval_loss": 2.8515002727508545, "eval_runtime": 5.3558, "eval_samples_per_second": 1844.163, "eval_steps_per_second": 14.564, "eval_top3_3_weighted_f1_score ": 0.434874154355208, "eval_top_1_macro_f1_score": 0.06739339181520489, "eval_top_1_weighted_f1score": 0.20936395426218773, "eval_top_3_macro_f1_score": 0.17809085616540352, "step": 17408 }, { "epoch": 7.48, "learning_rate": 0.0002, "loss": 2.8336, "step": 17410 }, { "epoch": 7.48, "learning_rate": 0.0002, "loss": 2.8546, "step": 17415 }, { "epoch": 7.48, "learning_rate": 0.0002, "loss": 2.8422, "step": 17420 }, { "epoch": 7.48, "learning_rate": 0.0002, "loss": 2.8387, "step": 17425 }, { "epoch": 7.48, "learning_rate": 0.0002, "loss": 2.851, "step": 17430 }, { "epoch": 7.49, "learning_rate": 0.0002, "loss": 2.8567, "step": 17435 }, { "epoch": 7.49, "learning_rate": 0.0002, "loss": 2.8496, "step": 17440 }, { "epoch": 7.49, "learning_rate": 0.0002, "loss": 2.8493, "step": 17445 }, { "epoch": 7.49, "learning_rate": 0.0002, "loss": 2.8474, "step": 17450 }, { "epoch": 7.49, "learning_rate": 0.0002, "loss": 2.8485, "step": 17455 }, { "epoch": 7.5, "learning_rate": 0.0002, "loss": 2.8462, "step": 17460 }, { "epoch": 7.5, "learning_rate": 0.0002, "loss": 2.8646, "step": 17465 }, { "epoch": 7.5, "learning_rate": 0.0002, "loss": 2.833, "step": 17470 }, { "epoch": 7.5, "eval_loss": 2.8604257106781006, "eval_runtime": 5.415, "eval_samples_per_second": 1824.014, "eval_steps_per_second": 14.404, "eval_top3_3_weighted_f1_score ": 0.4266952661163531, "eval_top_1_macro_f1_score": 0.06480621602723906, "eval_top_1_weighted_f1score": 0.210137807720952, "eval_top_3_macro_f1_score": 0.17397079150143166, "step": 17472 }, { "epoch": 7.5, "learning_rate": 0.0002, "loss": 2.8367, "step": 17475 }, { "epoch": 7.51, "learning_rate": 0.0002, "loss": 2.8534, "step": 17480 }, { "epoch": 7.51, "learning_rate": 0.0002, "loss": 2.8538, "step": 17485 }, { "epoch": 7.51, "learning_rate": 0.0002, "loss": 2.8477, "step": 17490 }, { "epoch": 7.51, "learning_rate": 0.0002, "loss": 2.8335, "step": 17495 }, { "epoch": 7.51, "learning_rate": 0.0002, "loss": 2.8667, "step": 17500 }, { "epoch": 7.52, "learning_rate": 0.0002, "loss": 2.8323, "step": 17505 }, { "epoch": 7.52, "learning_rate": 0.0002, "loss": 2.8407, "step": 17510 }, { "epoch": 7.52, "learning_rate": 0.0002, "loss": 2.8334, "step": 17515 }, { "epoch": 7.52, "learning_rate": 0.0002, "loss": 2.8359, "step": 17520 }, { "epoch": 7.52, "learning_rate": 0.0002, "loss": 2.8693, "step": 17525 }, { "epoch": 7.53, "learning_rate": 0.0002, "loss": 2.856, "step": 17530 }, { "epoch": 7.53, "learning_rate": 0.0002, "loss": 2.8486, "step": 17535 }, { "epoch": 7.53, "eval_loss": 2.8551135063171387, "eval_runtime": 5.4053, "eval_samples_per_second": 1827.293, "eval_steps_per_second": 14.43, "eval_top3_3_weighted_f1_score ": 0.43152797707811985, "eval_top_1_macro_f1_score": 0.06773384679351183, "eval_top_1_weighted_f1score": 0.21062981726584978, "eval_top_3_macro_f1_score": 0.18280062824655052, "step": 17536 }, { "epoch": 7.53, "learning_rate": 0.0002, "loss": 2.8527, "step": 17540 }, { "epoch": 7.53, "learning_rate": 0.0002, "loss": 2.8275, "step": 17545 }, { "epoch": 7.54, "learning_rate": 0.0002, "loss": 2.8423, "step": 17550 }, { "epoch": 7.54, "learning_rate": 0.0002, "loss": 2.8408, "step": 17555 }, { "epoch": 7.54, "learning_rate": 0.0002, "loss": 2.8475, "step": 17560 }, { "epoch": 7.54, "learning_rate": 0.0002, "loss": 2.8513, "step": 17565 }, { "epoch": 7.54, "learning_rate": 0.0002, "loss": 2.8439, "step": 17570 }, { "epoch": 7.55, "learning_rate": 0.0002, "loss": 2.8475, "step": 17575 }, { "epoch": 7.55, "learning_rate": 0.0002, "loss": 2.8394, "step": 17580 }, { "epoch": 7.55, "learning_rate": 0.0002, "loss": 2.8424, "step": 17585 }, { "epoch": 7.55, "learning_rate": 0.0002, "loss": 2.8415, "step": 17590 }, { "epoch": 7.55, "learning_rate": 0.0002, "loss": 2.8319, "step": 17595 }, { "epoch": 7.56, "learning_rate": 0.0002, "loss": 2.8255, "step": 17600 }, { "epoch": 7.56, "eval_loss": 2.8550779819488525, "eval_runtime": 5.4082, "eval_samples_per_second": 1826.293, "eval_steps_per_second": 14.422, "eval_top3_3_weighted_f1_score ": 0.42816611431330714, "eval_top_1_macro_f1_score": 0.07119551719576099, "eval_top_1_weighted_f1score": 0.2091762216346021, "eval_top_3_macro_f1_score": 0.17398042630222224, "step": 17600 }, { "epoch": 7.56, "learning_rate": 0.0002, "loss": 2.8293, "step": 17605 }, { "epoch": 7.56, "learning_rate": 0.0002, "loss": 2.8567, "step": 17610 }, { "epoch": 7.56, "learning_rate": 0.0002, "loss": 2.8346, "step": 17615 }, { "epoch": 7.57, "learning_rate": 0.0002, "loss": 2.8368, "step": 17620 }, { "epoch": 7.57, "learning_rate": 0.0002, "loss": 2.8357, "step": 17625 }, { "epoch": 7.57, "learning_rate": 0.0002, "loss": 2.8602, "step": 17630 }, { "epoch": 7.57, "learning_rate": 0.0002, "loss": 2.8676, "step": 17635 }, { "epoch": 7.57, "learning_rate": 0.0002, "loss": 2.845, "step": 17640 }, { "epoch": 7.58, "learning_rate": 0.0002, "loss": 2.8414, "step": 17645 }, { "epoch": 7.58, "learning_rate": 0.0002, "loss": 2.8414, "step": 17650 }, { "epoch": 7.58, "learning_rate": 0.0002, "loss": 2.8216, "step": 17655 }, { "epoch": 7.58, "learning_rate": 0.0002, "loss": 2.8362, "step": 17660 }, { "epoch": 7.58, "eval_loss": 2.857983350753784, "eval_runtime": 5.3725, "eval_samples_per_second": 1838.432, "eval_steps_per_second": 14.518, "eval_top3_3_weighted_f1_score ": 0.4318687991757832, "eval_top_1_macro_f1_score": 0.06183854898403182, "eval_top_1_weighted_f1score": 0.20706907494516977, "eval_top_3_macro_f1_score": 0.18083329218727304, "step": 17664 }, { "epoch": 7.58, "learning_rate": 0.0002, "loss": 2.834, "step": 17665 }, { "epoch": 7.59, "learning_rate": 0.0002, "loss": 2.8465, "step": 17670 }, { "epoch": 7.59, "learning_rate": 0.0002, "loss": 2.8367, "step": 17675 }, { "epoch": 7.59, "learning_rate": 0.0002, "loss": 2.8234, "step": 17680 }, { "epoch": 7.59, "learning_rate": 0.0002, "loss": 2.8321, "step": 17685 }, { "epoch": 7.6, "learning_rate": 0.0002, "loss": 2.8487, "step": 17690 }, { "epoch": 7.6, "learning_rate": 0.0002, "loss": 2.823, "step": 17695 }, { "epoch": 7.6, "learning_rate": 0.0002, "loss": 2.8287, "step": 17700 }, { "epoch": 7.6, "learning_rate": 0.0002, "loss": 2.8645, "step": 17705 }, { "epoch": 7.6, "learning_rate": 0.0002, "loss": 2.8486, "step": 17710 }, { "epoch": 7.61, "learning_rate": 0.0002, "loss": 2.8471, "step": 17715 }, { "epoch": 7.61, "learning_rate": 0.0002, "loss": 2.8504, "step": 17720 }, { "epoch": 7.61, "learning_rate": 0.0002, "loss": 2.8385, "step": 17725 }, { "epoch": 7.61, "eval_loss": 2.861382246017456, "eval_runtime": 5.3217, "eval_samples_per_second": 1855.997, "eval_steps_per_second": 14.657, "eval_top3_3_weighted_f1_score ": 0.4320698808780226, "eval_top_1_macro_f1_score": 0.06481853703279937, "eval_top_1_weighted_f1score": 0.21251613785550227, "eval_top_3_macro_f1_score": 0.17647934637597118, "step": 17728 }, { "epoch": 7.61, "learning_rate": 0.0002, "loss": 2.8437, "step": 17730 }, { "epoch": 7.61, "learning_rate": 0.0002, "loss": 2.8271, "step": 17735 }, { "epoch": 7.62, "learning_rate": 0.0002, "loss": 2.8377, "step": 17740 }, { "epoch": 7.62, "learning_rate": 0.0002, "loss": 2.836, "step": 17745 }, { "epoch": 7.62, "learning_rate": 0.0002, "loss": 2.8472, "step": 17750 }, { "epoch": 7.62, "learning_rate": 0.0002, "loss": 2.862, "step": 17755 }, { "epoch": 7.63, "learning_rate": 0.0002, "loss": 2.8414, "step": 17760 }, { "epoch": 7.63, "learning_rate": 0.0002, "loss": 2.8407, "step": 17765 }, { "epoch": 7.63, "learning_rate": 0.0002, "loss": 2.8361, "step": 17770 }, { "epoch": 7.63, "learning_rate": 0.0002, "loss": 2.8445, "step": 17775 }, { "epoch": 7.63, "learning_rate": 0.0002, "loss": 2.8516, "step": 17780 }, { "epoch": 7.64, "learning_rate": 0.0002, "loss": 2.872, "step": 17785 }, { "epoch": 7.64, "learning_rate": 0.0002, "loss": 2.8308, "step": 17790 }, { "epoch": 7.64, "eval_loss": 2.8573529720306396, "eval_runtime": 5.3564, "eval_samples_per_second": 1843.959, "eval_steps_per_second": 14.562, "eval_top3_3_weighted_f1_score ": 0.43103753460109756, "eval_top_1_macro_f1_score": 0.06978424722369772, "eval_top_1_weighted_f1score": 0.20948118371885413, "eval_top_3_macro_f1_score": 0.18157377242094067, "step": 17792 }, { "epoch": 7.64, "learning_rate": 0.0002, "loss": 2.842, "step": 17795 }, { "epoch": 7.64, "learning_rate": 0.0002, "loss": 2.8266, "step": 17800 }, { "epoch": 7.64, "learning_rate": 0.0002, "loss": 2.8391, "step": 17805 }, { "epoch": 7.65, "learning_rate": 0.0002, "loss": 2.8482, "step": 17810 }, { "epoch": 7.65, "learning_rate": 0.0002, "loss": 2.8195, "step": 17815 }, { "epoch": 7.65, "learning_rate": 0.0002, "loss": 2.8669, "step": 17820 }, { "epoch": 7.65, "learning_rate": 0.0002, "loss": 2.8397, "step": 17825 }, { "epoch": 7.66, "learning_rate": 0.0002, "loss": 2.8409, "step": 17830 }, { "epoch": 7.66, "learning_rate": 0.0002, "loss": 2.8332, "step": 17835 }, { "epoch": 7.66, "learning_rate": 0.0002, "loss": 2.846, "step": 17840 }, { "epoch": 7.66, "learning_rate": 0.0002, "loss": 2.8637, "step": 17845 }, { "epoch": 7.66, "learning_rate": 0.0002, "loss": 2.8468, "step": 17850 }, { "epoch": 7.67, "learning_rate": 0.0002, "loss": 2.8444, "step": 17855 }, { "epoch": 7.67, "eval_loss": 2.853611707687378, "eval_runtime": 5.3499, "eval_samples_per_second": 1846.216, "eval_steps_per_second": 14.58, "eval_top3_3_weighted_f1_score ": 0.4262529393811172, "eval_top_1_macro_f1_score": 0.07395114953054589, "eval_top_1_weighted_f1score": 0.21340532479024657, "eval_top_3_macro_f1_score": 0.18009092560892373, "step": 17856 }, { "epoch": 7.67, "learning_rate": 0.0002, "loss": 2.8518, "step": 17860 }, { "epoch": 7.67, "learning_rate": 0.0002, "loss": 2.8395, "step": 17865 }, { "epoch": 7.67, "learning_rate": 0.0002, "loss": 2.85, "step": 17870 }, { "epoch": 7.67, "learning_rate": 0.0002, "loss": 2.8294, "step": 17875 }, { "epoch": 7.68, "learning_rate": 0.0002, "loss": 2.839, "step": 17880 }, { "epoch": 7.68, "learning_rate": 0.0002, "loss": 2.8436, "step": 17885 }, { "epoch": 7.68, "learning_rate": 0.0002, "loss": 2.8153, "step": 17890 }, { "epoch": 7.68, "learning_rate": 0.0002, "loss": 2.834, "step": 17895 }, { "epoch": 7.69, "learning_rate": 0.0002, "loss": 2.8368, "step": 17900 }, { "epoch": 7.69, "learning_rate": 0.0002, "loss": 2.8428, "step": 17905 }, { "epoch": 7.69, "learning_rate": 0.0002, "loss": 2.8276, "step": 17910 }, { "epoch": 7.69, "learning_rate": 0.0002, "loss": 2.8422, "step": 17915 }, { "epoch": 7.69, "learning_rate": 0.0002, "loss": 2.8503, "step": 17920 }, { "epoch": 7.69, "eval_loss": 2.8571088314056396, "eval_runtime": 5.3514, "eval_samples_per_second": 1845.67, "eval_steps_per_second": 14.576, "eval_top3_3_weighted_f1_score ": 0.4373222408633935, "eval_top_1_macro_f1_score": 0.0749892769259755, "eval_top_1_weighted_f1score": 0.21236818566394777, "eval_top_3_macro_f1_score": 0.1865552557620529, "step": 17920 }, { "epoch": 7.7, "learning_rate": 0.0002, "loss": 2.8488, "step": 17925 }, { "epoch": 7.7, "learning_rate": 0.0002, "loss": 2.8269, "step": 17930 }, { "epoch": 7.7, "learning_rate": 0.0002, "loss": 2.8434, "step": 17935 }, { "epoch": 7.7, "learning_rate": 0.0002, "loss": 2.8441, "step": 17940 }, { "epoch": 7.7, "learning_rate": 0.0002, "loss": 2.8485, "step": 17945 }, { "epoch": 7.71, "learning_rate": 0.0002, "loss": 2.8192, "step": 17950 }, { "epoch": 7.71, "learning_rate": 0.0002, "loss": 2.8557, "step": 17955 }, { "epoch": 7.71, "learning_rate": 0.0002, "loss": 2.8694, "step": 17960 }, { "epoch": 7.71, "learning_rate": 0.0002, "loss": 2.8372, "step": 17965 }, { "epoch": 7.72, "learning_rate": 0.0002, "loss": 2.8251, "step": 17970 }, { "epoch": 7.72, "learning_rate": 0.0002, "loss": 2.8558, "step": 17975 }, { "epoch": 7.72, "learning_rate": 0.0002, "loss": 2.8445, "step": 17980 }, { "epoch": 7.72, "eval_loss": 2.8570690155029297, "eval_runtime": 5.301, "eval_samples_per_second": 1863.224, "eval_steps_per_second": 14.714, "eval_top3_3_weighted_f1_score ": 0.4313267960058733, "eval_top_1_macro_f1_score": 0.06703520627019759, "eval_top_1_weighted_f1score": 0.21127194701671587, "eval_top_3_macro_f1_score": 0.17693727592084227, "step": 17984 }, { "epoch": 7.72, "learning_rate": 0.0002, "loss": 2.8495, "step": 17985 }, { "epoch": 7.72, "learning_rate": 0.0002, "loss": 2.8478, "step": 17990 }, { "epoch": 7.73, "learning_rate": 0.0002, "loss": 2.8384, "step": 17995 }, { "epoch": 7.73, "learning_rate": 0.0002, "loss": 2.8477, "step": 18000 }, { "epoch": 7.73, "learning_rate": 0.0002, "loss": 2.8507, "step": 18005 }, { "epoch": 7.73, "learning_rate": 0.0002, "loss": 2.8489, "step": 18010 }, { "epoch": 7.73, "learning_rate": 0.0002, "loss": 2.8318, "step": 18015 }, { "epoch": 7.74, "learning_rate": 0.0002, "loss": 2.8793, "step": 18020 }, { "epoch": 7.74, "learning_rate": 0.0002, "loss": 2.8545, "step": 18025 }, { "epoch": 7.74, "learning_rate": 0.0002, "loss": 2.8333, "step": 18030 }, { "epoch": 7.74, "learning_rate": 0.0002, "loss": 2.8325, "step": 18035 }, { "epoch": 7.75, "learning_rate": 0.0002, "loss": 2.8399, "step": 18040 }, { "epoch": 7.75, "learning_rate": 0.0002, "loss": 2.8254, "step": 18045 }, { "epoch": 7.75, "eval_loss": 2.860426902770996, "eval_runtime": 5.3043, "eval_samples_per_second": 1862.084, "eval_steps_per_second": 14.705, "eval_top3_3_weighted_f1_score ": 0.4305783468865011, "eval_top_1_macro_f1_score": 0.06261944572563972, "eval_top_1_weighted_f1score": 0.20871575977838583, "eval_top_3_macro_f1_score": 0.16743002794593007, "step": 18048 }, { "epoch": 7.75, "learning_rate": 0.0002, "loss": 2.8591, "step": 18050 }, { "epoch": 7.75, "learning_rate": 0.0002, "loss": 2.8379, "step": 18055 }, { "epoch": 7.75, "learning_rate": 0.0002, "loss": 2.8346, "step": 18060 }, { "epoch": 7.76, "learning_rate": 0.0002, "loss": 2.8247, "step": 18065 }, { "epoch": 7.76, "learning_rate": 0.0002, "loss": 2.85, "step": 18070 }, { "epoch": 7.76, "learning_rate": 0.0002, "loss": 2.8206, "step": 18075 }, { "epoch": 7.76, "learning_rate": 0.0002, "loss": 2.8467, "step": 18080 }, { "epoch": 7.76, "learning_rate": 0.0002, "loss": 2.8542, "step": 18085 }, { "epoch": 7.77, "learning_rate": 0.0002, "loss": 2.8319, "step": 18090 }, { "epoch": 7.77, "learning_rate": 0.0002, "loss": 2.8095, "step": 18095 }, { "epoch": 7.77, "learning_rate": 0.0002, "loss": 2.8545, "step": 18100 }, { "epoch": 7.77, "learning_rate": 0.0002, "loss": 2.856, "step": 18105 }, { "epoch": 7.78, "learning_rate": 0.0002, "loss": 2.839, "step": 18110 }, { "epoch": 7.78, "eval_loss": 2.8566136360168457, "eval_runtime": 5.3679, "eval_samples_per_second": 1840.015, "eval_steps_per_second": 14.531, "eval_top3_3_weighted_f1_score ": 0.42925124869710385, "eval_top_1_macro_f1_score": 0.06889615487223762, "eval_top_1_weighted_f1score": 0.2145421984986216, "eval_top_3_macro_f1_score": 0.17001836793742864, "step": 18112 }, { "epoch": 7.78, "learning_rate": 0.0002, "loss": 2.8465, "step": 18115 }, { "epoch": 7.78, "learning_rate": 0.0002, "loss": 2.8317, "step": 18120 }, { "epoch": 7.78, "learning_rate": 0.0002, "loss": 2.8478, "step": 18125 }, { "epoch": 7.78, "learning_rate": 0.0002, "loss": 2.8657, "step": 18130 }, { "epoch": 7.79, "learning_rate": 0.0002, "loss": 2.8261, "step": 18135 }, { "epoch": 7.79, "learning_rate": 0.0002, "loss": 2.84, "step": 18140 }, { "epoch": 7.79, "learning_rate": 0.0002, "loss": 2.8378, "step": 18145 }, { "epoch": 7.79, "learning_rate": 0.0002, "loss": 2.8539, "step": 18150 }, { "epoch": 7.8, "learning_rate": 0.0002, "loss": 2.829, "step": 18155 }, { "epoch": 7.8, "learning_rate": 0.0002, "loss": 2.8471, "step": 18160 }, { "epoch": 7.8, "learning_rate": 0.0002, "loss": 2.8295, "step": 18165 }, { "epoch": 7.8, "learning_rate": 0.0002, "loss": 2.8408, "step": 18170 }, { "epoch": 7.8, "learning_rate": 0.0002, "loss": 2.8316, "step": 18175 }, { "epoch": 7.8, "eval_loss": 2.8526108264923096, "eval_runtime": 5.3873, "eval_samples_per_second": 1833.37, "eval_steps_per_second": 14.478, "eval_top3_3_weighted_f1_score ": 0.43403000494809485, "eval_top_1_macro_f1_score": 0.06221753481705046, "eval_top_1_weighted_f1score": 0.20882870929826783, "eval_top_3_macro_f1_score": 0.16983619616315163, "step": 18176 }, { "epoch": 7.81, "learning_rate": 0.0002, "loss": 2.8651, "step": 18180 }, { "epoch": 7.81, "learning_rate": 0.0002, "loss": 2.8491, "step": 18185 }, { "epoch": 7.81, "learning_rate": 0.0002, "loss": 2.8227, "step": 18190 }, { "epoch": 7.81, "learning_rate": 0.0002, "loss": 2.8139, "step": 18195 }, { "epoch": 7.81, "learning_rate": 0.0002, "loss": 2.8494, "step": 18200 }, { "epoch": 7.82, "learning_rate": 0.0002, "loss": 2.8414, "step": 18205 }, { "epoch": 7.82, "learning_rate": 0.0002, "loss": 2.8545, "step": 18210 }, { "epoch": 7.82, "learning_rate": 0.0002, "loss": 2.8594, "step": 18215 }, { "epoch": 7.82, "learning_rate": 0.0002, "loss": 2.8326, "step": 18220 }, { "epoch": 7.83, "learning_rate": 0.0002, "loss": 2.8349, "step": 18225 }, { "epoch": 7.83, "learning_rate": 0.0002, "loss": 2.8324, "step": 18230 }, { "epoch": 7.83, "learning_rate": 0.0002, "loss": 2.8484, "step": 18235 }, { "epoch": 7.83, "learning_rate": 0.0002, "loss": 2.8556, "step": 18240 }, { "epoch": 7.83, "eval_loss": 2.8567590713500977, "eval_runtime": 5.4229, "eval_samples_per_second": 1821.352, "eval_steps_per_second": 14.383, "eval_top3_3_weighted_f1_score ": 0.43131154879888567, "eval_top_1_macro_f1_score": 0.0663159777747228, "eval_top_1_weighted_f1score": 0.2103576812917169, "eval_top_3_macro_f1_score": 0.17838595104232294, "step": 18240 }, { "epoch": 7.83, "learning_rate": 0.0002, "loss": 2.8416, "step": 18245 }, { "epoch": 7.84, "learning_rate": 0.0002, "loss": 2.8255, "step": 18250 }, { "epoch": 7.84, "learning_rate": 0.0002, "loss": 2.861, "step": 18255 }, { "epoch": 7.84, "learning_rate": 0.0002, "loss": 2.8423, "step": 18260 }, { "epoch": 7.84, "learning_rate": 0.0002, "loss": 2.8465, "step": 18265 }, { "epoch": 7.84, "learning_rate": 0.0002, "loss": 2.8381, "step": 18270 }, { "epoch": 7.85, "learning_rate": 0.0002, "loss": 2.8375, "step": 18275 }, { "epoch": 7.85, "learning_rate": 0.0002, "loss": 2.8542, "step": 18280 }, { "epoch": 7.85, "learning_rate": 0.0002, "loss": 2.8351, "step": 18285 }, { "epoch": 7.85, "learning_rate": 0.0002, "loss": 2.8369, "step": 18290 }, { "epoch": 7.86, "learning_rate": 0.0002, "loss": 2.8228, "step": 18295 }, { "epoch": 7.86, "learning_rate": 0.0002, "loss": 2.8367, "step": 18300 }, { "epoch": 7.86, "eval_loss": 2.8563692569732666, "eval_runtime": 5.4208, "eval_samples_per_second": 1822.061, "eval_steps_per_second": 14.389, "eval_top3_3_weighted_f1_score ": 0.4336133923446643, "eval_top_1_macro_f1_score": 0.06405406043198943, "eval_top_1_weighted_f1score": 0.20811731854029558, "eval_top_3_macro_f1_score": 0.17326654889218854, "step": 18304 }, { "epoch": 7.86, "learning_rate": 0.0002, "loss": 2.8452, "step": 18305 }, { "epoch": 7.86, "learning_rate": 0.0002, "loss": 2.8498, "step": 18310 }, { "epoch": 7.86, "learning_rate": 0.0002, "loss": 2.8376, "step": 18315 }, { "epoch": 7.87, "learning_rate": 0.0002, "loss": 2.8498, "step": 18320 }, { "epoch": 7.87, "learning_rate": 0.0002, "loss": 2.8484, "step": 18325 }, { "epoch": 7.87, "learning_rate": 0.0002, "loss": 2.86, "step": 18330 }, { "epoch": 7.87, "learning_rate": 0.0002, "loss": 2.8683, "step": 18335 }, { "epoch": 7.87, "learning_rate": 0.0002, "loss": 2.8415, "step": 18340 }, { "epoch": 7.88, "learning_rate": 0.0002, "loss": 2.8455, "step": 18345 }, { "epoch": 7.88, "learning_rate": 0.0002, "loss": 2.8442, "step": 18350 }, { "epoch": 7.88, "learning_rate": 0.0002, "loss": 2.8319, "step": 18355 }, { "epoch": 7.88, "learning_rate": 0.0002, "loss": 2.8413, "step": 18360 }, { "epoch": 7.89, "learning_rate": 0.0002, "loss": 2.8429, "step": 18365 }, { "epoch": 7.89, "eval_loss": 2.851510763168335, "eval_runtime": 5.3567, "eval_samples_per_second": 1843.851, "eval_steps_per_second": 14.561, "eval_top3_3_weighted_f1_score ": 0.42833027638318966, "eval_top_1_macro_f1_score": 0.06899612112792806, "eval_top_1_weighted_f1score": 0.20976544117055287, "eval_top_3_macro_f1_score": 0.17218828481472864, "step": 18368 }, { "epoch": 7.89, "learning_rate": 0.0002, "loss": 2.8466, "step": 18370 }, { "epoch": 7.89, "learning_rate": 0.0002, "loss": 2.8636, "step": 18375 }, { "epoch": 7.89, "learning_rate": 0.0002, "loss": 2.8507, "step": 18380 }, { "epoch": 7.89, "learning_rate": 0.0002, "loss": 2.8384, "step": 18385 }, { "epoch": 7.9, "learning_rate": 0.0002, "loss": 2.8591, "step": 18390 }, { "epoch": 7.9, "learning_rate": 0.0002, "loss": 2.8563, "step": 18395 }, { "epoch": 7.9, "learning_rate": 0.0002, "loss": 2.8483, "step": 18400 }, { "epoch": 7.9, "learning_rate": 0.0002, "loss": 2.8525, "step": 18405 }, { "epoch": 7.9, "learning_rate": 0.0002, "loss": 2.8306, "step": 18410 }, { "epoch": 7.91, "learning_rate": 0.0002, "loss": 2.8536, "step": 18415 }, { "epoch": 7.91, "learning_rate": 0.0002, "loss": 2.8541, "step": 18420 }, { "epoch": 7.91, "learning_rate": 0.0002, "loss": 2.8557, "step": 18425 }, { "epoch": 7.91, "learning_rate": 0.0002, "loss": 2.8333, "step": 18430 }, { "epoch": 7.91, "eval_loss": 2.850963830947876, "eval_runtime": 5.4114, "eval_samples_per_second": 1825.218, "eval_steps_per_second": 14.414, "eval_top3_3_weighted_f1_score ": 0.430849103412485, "eval_top_1_macro_f1_score": 0.059133958162772844, "eval_top_1_weighted_f1score": 0.2083677341411514, "eval_top_3_macro_f1_score": 0.1701283381965286, "step": 18432 }, { "epoch": 7.92, "learning_rate": 0.0002, "loss": 2.8163, "step": 18435 }, { "epoch": 7.92, "learning_rate": 0.0002, "loss": 2.8414, "step": 18440 }, { "epoch": 7.92, "learning_rate": 0.0002, "loss": 2.8409, "step": 18445 }, { "epoch": 7.92, "learning_rate": 0.0002, "loss": 2.856, "step": 18450 }, { "epoch": 7.92, "learning_rate": 0.0002, "loss": 2.8315, "step": 18455 }, { "epoch": 7.93, "learning_rate": 0.0002, "loss": 2.8286, "step": 18460 }, { "epoch": 7.93, "learning_rate": 0.0002, "loss": 2.8562, "step": 18465 }, { "epoch": 7.93, "learning_rate": 0.0002, "loss": 2.8319, "step": 18470 }, { "epoch": 7.93, "learning_rate": 0.0002, "loss": 2.8521, "step": 18475 }, { "epoch": 7.93, "learning_rate": 0.0002, "loss": 2.8579, "step": 18480 }, { "epoch": 7.94, "learning_rate": 0.0002, "loss": 2.8376, "step": 18485 }, { "epoch": 7.94, "learning_rate": 0.0002, "loss": 2.8553, "step": 18490 }, { "epoch": 7.94, "learning_rate": 0.0002, "loss": 2.8424, "step": 18495 }, { "epoch": 7.94, "eval_loss": 2.85726261138916, "eval_runtime": 5.3679, "eval_samples_per_second": 1839.996, "eval_steps_per_second": 14.531, "eval_top3_3_weighted_f1_score ": 0.4352311699556431, "eval_top_1_macro_f1_score": 0.06514683461849503, "eval_top_1_weighted_f1score": 0.20901840271096092, "eval_top_3_macro_f1_score": 0.1825441574341896, "step": 18496 }, { "epoch": 7.94, "learning_rate": 0.0002, "loss": 2.8344, "step": 18500 }, { "epoch": 7.95, "learning_rate": 0.0002, "loss": 2.8308, "step": 18505 }, { "epoch": 7.95, "learning_rate": 0.0002, "loss": 2.8429, "step": 18510 }, { "epoch": 7.95, "learning_rate": 0.0002, "loss": 2.8609, "step": 18515 }, { "epoch": 7.95, "learning_rate": 0.0002, "loss": 2.8437, "step": 18520 }, { "epoch": 7.95, "learning_rate": 0.0002, "loss": 2.8463, "step": 18525 }, { "epoch": 7.96, "learning_rate": 0.0002, "loss": 2.8299, "step": 18530 }, { "epoch": 7.96, "learning_rate": 0.0002, "loss": 2.8522, "step": 18535 }, { "epoch": 7.96, "learning_rate": 0.0002, "loss": 2.8434, "step": 18540 }, { "epoch": 7.96, "learning_rate": 0.0002, "loss": 2.8441, "step": 18545 }, { "epoch": 7.96, "learning_rate": 0.0002, "loss": 2.8359, "step": 18550 }, { "epoch": 7.97, "learning_rate": 0.0002, "loss": 2.8344, "step": 18555 }, { "epoch": 7.97, "learning_rate": 0.0002, "loss": 2.84, "step": 18560 }, { "epoch": 7.97, "eval_loss": 2.856273889541626, "eval_runtime": 5.4152, "eval_samples_per_second": 1823.941, "eval_steps_per_second": 14.404, "eval_top3_3_weighted_f1_score ": 0.4279170248532264, "eval_top_1_macro_f1_score": 0.06686860193563618, "eval_top_1_weighted_f1score": 0.2119040248259937, "eval_top_3_macro_f1_score": 0.1735783335625637, "step": 18560 }, { "epoch": 7.97, "learning_rate": 0.0002, "loss": 2.8732, "step": 18565 }, { "epoch": 7.97, "learning_rate": 0.0002, "loss": 2.8395, "step": 18570 }, { "epoch": 7.98, "learning_rate": 0.0002, "loss": 2.8353, "step": 18575 }, { "epoch": 7.98, "learning_rate": 0.0002, "loss": 2.8383, "step": 18580 }, { "epoch": 7.98, "learning_rate": 0.0002, "loss": 2.8234, "step": 18585 }, { "epoch": 7.98, "learning_rate": 0.0002, "loss": 2.828, "step": 18590 }, { "epoch": 7.98, "learning_rate": 0.0002, "loss": 2.841, "step": 18595 }, { "epoch": 7.99, "learning_rate": 0.0002, "loss": 2.835, "step": 18600 }, { "epoch": 7.99, "learning_rate": 0.0002, "loss": 2.8447, "step": 18605 }, { "epoch": 7.99, "learning_rate": 0.0002, "loss": 2.8348, "step": 18610 }, { "epoch": 7.99, "learning_rate": 0.0002, "loss": 2.8295, "step": 18615 }, { "epoch": 7.99, "learning_rate": 0.0002, "loss": 2.8351, "step": 18620 }, { "epoch": 8.0, "eval_loss": 2.8579623699188232, "eval_runtime": 5.3757, "eval_samples_per_second": 1837.347, "eval_steps_per_second": 14.51, "eval_top3_3_weighted_f1_score ": 0.43013772806028433, "eval_top_1_macro_f1_score": 0.06952676378462946, "eval_top_1_weighted_f1score": 0.21015821301808751, "eval_top_3_macro_f1_score": 0.18382542806366886, "step": 18624 }, { "epoch": 8.0, "learning_rate": 0.0002, "loss": 2.8244, "step": 18625 }, { "epoch": 8.0, "learning_rate": 0.0002, "loss": 2.8217, "step": 18630 }, { "epoch": 8.0, "learning_rate": 0.0002, "loss": 3.1162, "step": 18635 }, { "epoch": 8.0, "learning_rate": 0.0002, "loss": 2.8311, "step": 18640 }, { "epoch": 8.01, "learning_rate": 0.0002, "loss": 2.8119, "step": 18645 }, { "epoch": 8.01, "learning_rate": 0.0002, "loss": 2.8401, "step": 18650 }, { "epoch": 8.01, "learning_rate": 0.0002, "loss": 2.8329, "step": 18655 }, { "epoch": 8.01, "learning_rate": 0.0002, "loss": 2.8304, "step": 18660 }, { "epoch": 8.01, "learning_rate": 0.0002, "loss": 2.8249, "step": 18665 }, { "epoch": 8.02, "learning_rate": 0.0002, "loss": 2.8305, "step": 18670 }, { "epoch": 8.02, "learning_rate": 0.0002, "loss": 2.8421, "step": 18675 }, { "epoch": 8.02, "learning_rate": 0.0002, "loss": 2.8378, "step": 18680 }, { "epoch": 8.02, "learning_rate": 0.0002, "loss": 2.8456, "step": 18685 }, { "epoch": 8.02, "eval_loss": 2.8500025272369385, "eval_runtime": 5.4191, "eval_samples_per_second": 1822.637, "eval_steps_per_second": 14.394, "eval_top3_3_weighted_f1_score ": 0.4337105301444936, "eval_top_1_macro_f1_score": 0.06945696759471479, "eval_top_1_weighted_f1score": 0.21319932658089386, "eval_top_3_macro_f1_score": 0.17186180065786436, "step": 18688 }, { "epoch": 8.02, "learning_rate": 0.0002, "loss": 2.8445, "step": 18690 }, { "epoch": 8.03, "learning_rate": 0.0002, "loss": 2.8188, "step": 18695 }, { "epoch": 8.03, "learning_rate": 0.0002, "loss": 2.8428, "step": 18700 }, { "epoch": 8.03, "learning_rate": 0.0002, "loss": 2.8293, "step": 18705 }, { "epoch": 8.03, "learning_rate": 0.0002, "loss": 2.8368, "step": 18710 }, { "epoch": 8.04, "learning_rate": 0.0002, "loss": 2.8482, "step": 18715 }, { "epoch": 8.04, "learning_rate": 0.0002, "loss": 2.8299, "step": 18720 }, { "epoch": 8.04, "learning_rate": 0.0002, "loss": 2.8352, "step": 18725 }, { "epoch": 8.04, "learning_rate": 0.0002, "loss": 2.8205, "step": 18730 }, { "epoch": 8.04, "learning_rate": 0.0002, "loss": 2.8329, "step": 18735 }, { "epoch": 8.05, "learning_rate": 0.0002, "loss": 2.81, "step": 18740 }, { "epoch": 8.05, "learning_rate": 0.0002, "loss": 2.8359, "step": 18745 }, { "epoch": 8.05, "learning_rate": 0.0002, "loss": 2.8255, "step": 18750 }, { "epoch": 8.05, "eval_loss": 2.8510630130767822, "eval_runtime": 5.8799, "eval_samples_per_second": 1679.792, "eval_steps_per_second": 13.266, "eval_top3_3_weighted_f1_score ": 0.43241645510398236, "eval_top_1_macro_f1_score": 0.0626539668077727, "eval_top_1_weighted_f1score": 0.21252954983858618, "eval_top_3_macro_f1_score": 0.17364294148805745, "step": 18752 }, { "epoch": 8.05, "learning_rate": 0.0002, "loss": 2.8283, "step": 18755 }, { "epoch": 8.05, "learning_rate": 0.0002, "loss": 2.8327, "step": 18760 }, { "epoch": 8.06, "learning_rate": 0.0002, "loss": 2.8379, "step": 18765 }, { "epoch": 8.06, "learning_rate": 0.0002, "loss": 2.8265, "step": 18770 }, { "epoch": 8.06, "learning_rate": 0.0002, "loss": 2.8493, "step": 18775 }, { "epoch": 8.06, "learning_rate": 0.0002, "loss": 2.8323, "step": 18780 }, { "epoch": 8.07, "learning_rate": 0.0002, "loss": 2.8333, "step": 18785 }, { "epoch": 8.07, "learning_rate": 0.0002, "loss": 2.8442, "step": 18790 }, { "epoch": 8.07, "learning_rate": 0.0002, "loss": 2.8252, "step": 18795 }, { "epoch": 8.07, "learning_rate": 0.0002, "loss": 2.8213, "step": 18800 }, { "epoch": 8.07, "learning_rate": 0.0002, "loss": 2.8389, "step": 18805 }, { "epoch": 8.08, "learning_rate": 0.0002, "loss": 2.8281, "step": 18810 }, { "epoch": 8.08, "learning_rate": 0.0002, "loss": 2.8431, "step": 18815 }, { "epoch": 8.08, "eval_loss": 2.8637490272521973, "eval_runtime": 5.4628, "eval_samples_per_second": 1808.041, "eval_steps_per_second": 14.278, "eval_top3_3_weighted_f1_score ": 0.42713120105614755, "eval_top_1_macro_f1_score": 0.06592746106594652, "eval_top_1_weighted_f1score": 0.2145307261266979, "eval_top_3_macro_f1_score": 0.17053777198677741, "step": 18816 }, { "epoch": 8.08, "learning_rate": 0.0002, "loss": 2.8538, "step": 18820 }, { "epoch": 8.08, "learning_rate": 0.0002, "loss": 2.844, "step": 18825 }, { "epoch": 8.08, "learning_rate": 0.0002, "loss": 2.832, "step": 18830 }, { "epoch": 8.09, "learning_rate": 0.0002, "loss": 2.8244, "step": 18835 }, { "epoch": 8.09, "learning_rate": 0.0002, "loss": 2.8194, "step": 18840 }, { "epoch": 8.09, "learning_rate": 0.0002, "loss": 2.828, "step": 18845 }, { "epoch": 8.09, "learning_rate": 0.0002, "loss": 2.8327, "step": 18850 }, { "epoch": 8.1, "learning_rate": 0.0002, "loss": 2.8255, "step": 18855 }, { "epoch": 8.1, "learning_rate": 0.0002, "loss": 2.8415, "step": 18860 }, { "epoch": 8.1, "learning_rate": 0.0002, "loss": 2.8184, "step": 18865 }, { "epoch": 8.1, "learning_rate": 0.0002, "loss": 2.8489, "step": 18870 }, { "epoch": 8.1, "learning_rate": 0.0002, "loss": 2.8427, "step": 18875 }, { "epoch": 8.11, "learning_rate": 0.0002, "loss": 2.8472, "step": 18880 }, { "epoch": 8.11, "eval_loss": 2.8579342365264893, "eval_runtime": 5.4463, "eval_samples_per_second": 1813.524, "eval_steps_per_second": 14.322, "eval_top3_3_weighted_f1_score ": 0.43202022231306336, "eval_top_1_macro_f1_score": 0.06720262193925525, "eval_top_1_weighted_f1score": 0.21366382711413187, "eval_top_3_macro_f1_score": 0.1870978869165953, "step": 18880 }, { "epoch": 8.11, "learning_rate": 0.0002, "loss": 2.823, "step": 18885 }, { "epoch": 8.11, "learning_rate": 0.0002, "loss": 2.8301, "step": 18890 }, { "epoch": 8.11, "learning_rate": 0.0002, "loss": 2.8216, "step": 18895 }, { "epoch": 8.12, "learning_rate": 0.0002, "loss": 2.8462, "step": 18900 }, { "epoch": 8.12, "learning_rate": 0.0002, "loss": 2.8409, "step": 18905 }, { "epoch": 8.12, "learning_rate": 0.0002, "loss": 2.8372, "step": 18910 }, { "epoch": 8.12, "learning_rate": 0.0002, "loss": 2.8489, "step": 18915 }, { "epoch": 8.12, "learning_rate": 0.0002, "loss": 2.8178, "step": 18920 }, { "epoch": 8.13, "learning_rate": 0.0002, "loss": 2.8139, "step": 18925 }, { "epoch": 8.13, "learning_rate": 0.0002, "loss": 2.8313, "step": 18930 }, { "epoch": 8.13, "learning_rate": 0.0002, "loss": 2.8428, "step": 18935 }, { "epoch": 8.13, "learning_rate": 0.0002, "loss": 2.8321, "step": 18940 }, { "epoch": 8.13, "eval_loss": 2.853893995285034, "eval_runtime": 5.4045, "eval_samples_per_second": 1827.541, "eval_steps_per_second": 14.432, "eval_top3_3_weighted_f1_score ": 0.43363298161690594, "eval_top_1_macro_f1_score": 0.06796439537460448, "eval_top_1_weighted_f1score": 0.20954028319159415, "eval_top_3_macro_f1_score": 0.18018650141645645, "step": 18944 }, { "epoch": 8.13, "learning_rate": 0.0002, "loss": 2.8378, "step": 18945 }, { "epoch": 8.14, "learning_rate": 0.0002, "loss": 2.8358, "step": 18950 }, { "epoch": 8.14, "learning_rate": 0.0002, "loss": 2.8285, "step": 18955 }, { "epoch": 8.14, "learning_rate": 0.0002, "loss": 2.8359, "step": 18960 }, { "epoch": 8.14, "learning_rate": 0.0002, "loss": 2.8241, "step": 18965 }, { "epoch": 8.15, "learning_rate": 0.0002, "loss": 2.8292, "step": 18970 }, { "epoch": 8.15, "learning_rate": 0.0002, "loss": 2.8352, "step": 18975 }, { "epoch": 8.15, "learning_rate": 0.0002, "loss": 2.8379, "step": 18980 }, { "epoch": 8.15, "learning_rate": 0.0002, "loss": 2.8272, "step": 18985 }, { "epoch": 8.15, "learning_rate": 0.0002, "loss": 2.8298, "step": 18990 }, { "epoch": 8.16, "learning_rate": 0.0002, "loss": 2.8477, "step": 18995 }, { "epoch": 8.16, "learning_rate": 0.0002, "loss": 2.818, "step": 19000 }, { "epoch": 8.16, "learning_rate": 0.0002, "loss": 2.8352, "step": 19005 }, { "epoch": 8.16, "eval_loss": 2.85410737991333, "eval_runtime": 5.4082, "eval_samples_per_second": 1826.301, "eval_steps_per_second": 14.423, "eval_top3_3_weighted_f1_score ": 0.43263465736946743, "eval_top_1_macro_f1_score": 0.06272062959897279, "eval_top_1_weighted_f1score": 0.21361910459006775, "eval_top_3_macro_f1_score": 0.17537673383327287, "step": 19008 }, { "epoch": 8.16, "learning_rate": 0.0002, "loss": 2.8282, "step": 19010 }, { "epoch": 8.16, "learning_rate": 0.0002, "loss": 2.8275, "step": 19015 }, { "epoch": 8.17, "learning_rate": 0.0002, "loss": 2.8249, "step": 19020 }, { "epoch": 8.17, "learning_rate": 0.0002, "loss": 2.8326, "step": 19025 }, { "epoch": 8.17, "learning_rate": 0.0002, "loss": 2.828, "step": 19030 }, { "epoch": 8.17, "learning_rate": 0.0002, "loss": 2.8375, "step": 19035 }, { "epoch": 8.18, "learning_rate": 0.0002, "loss": 2.8474, "step": 19040 }, { "epoch": 8.18, "learning_rate": 0.0002, "loss": 2.823, "step": 19045 }, { "epoch": 8.18, "learning_rate": 0.0002, "loss": 2.8401, "step": 19050 }, { "epoch": 8.18, "learning_rate": 0.0002, "loss": 2.8147, "step": 19055 }, { "epoch": 8.18, "learning_rate": 0.0002, "loss": 2.8169, "step": 19060 }, { "epoch": 8.19, "learning_rate": 0.0002, "loss": 2.8384, "step": 19065 }, { "epoch": 8.19, "learning_rate": 0.0002, "loss": 2.8394, "step": 19070 }, { "epoch": 8.19, "eval_loss": 2.8559343814849854, "eval_runtime": 5.3943, "eval_samples_per_second": 1831.018, "eval_steps_per_second": 14.46, "eval_top3_3_weighted_f1_score ": 0.4317591661287136, "eval_top_1_macro_f1_score": 0.06449347914155465, "eval_top_1_weighted_f1score": 0.20687341605906204, "eval_top_3_macro_f1_score": 0.17633157286886114, "step": 19072 }, { "epoch": 8.19, "learning_rate": 0.0002, "loss": 2.8361, "step": 19075 }, { "epoch": 8.19, "learning_rate": 0.0002, "loss": 2.8305, "step": 19080 }, { "epoch": 8.19, "learning_rate": 0.0002, "loss": 2.8386, "step": 19085 }, { "epoch": 8.2, "learning_rate": 0.0002, "loss": 2.8342, "step": 19090 }, { "epoch": 8.2, "learning_rate": 0.0002, "loss": 2.8268, "step": 19095 }, { "epoch": 8.2, "learning_rate": 0.0002, "loss": 2.8336, "step": 19100 }, { "epoch": 8.2, "learning_rate": 0.0002, "loss": 2.8249, "step": 19105 }, { "epoch": 8.21, "learning_rate": 0.0002, "loss": 2.8376, "step": 19110 }, { "epoch": 8.21, "learning_rate": 0.0002, "loss": 2.8399, "step": 19115 }, { "epoch": 8.21, "learning_rate": 0.0002, "loss": 2.8374, "step": 19120 }, { "epoch": 8.21, "learning_rate": 0.0002, "loss": 2.839, "step": 19125 }, { "epoch": 8.21, "learning_rate": 0.0002, "loss": 2.8173, "step": 19130 }, { "epoch": 8.22, "learning_rate": 0.0002, "loss": 2.8368, "step": 19135 }, { "epoch": 8.22, "eval_loss": 2.8506228923797607, "eval_runtime": 5.4809, "eval_samples_per_second": 1802.072, "eval_steps_per_second": 14.231, "eval_top3_3_weighted_f1_score ": 0.4331842785247634, "eval_top_1_macro_f1_score": 0.06250492468490476, "eval_top_1_weighted_f1score": 0.20867825153577363, "eval_top_3_macro_f1_score": 0.1768876667317683, "step": 19136 }, { "epoch": 8.22, "learning_rate": 0.0002, "loss": 2.8282, "step": 19140 }, { "epoch": 8.22, "learning_rate": 0.0002, "loss": 2.845, "step": 19145 }, { "epoch": 8.22, "learning_rate": 0.0002, "loss": 2.848, "step": 19150 }, { "epoch": 8.22, "learning_rate": 0.0002, "loss": 2.8351, "step": 19155 }, { "epoch": 8.23, "learning_rate": 0.0002, "loss": 2.8321, "step": 19160 }, { "epoch": 8.23, "learning_rate": 0.0002, "loss": 2.8321, "step": 19165 }, { "epoch": 8.23, "learning_rate": 0.0002, "loss": 2.831, "step": 19170 }, { "epoch": 8.23, "learning_rate": 0.0002, "loss": 2.8441, "step": 19175 }, { "epoch": 8.24, "learning_rate": 0.0002, "loss": 2.8412, "step": 19180 }, { "epoch": 8.24, "learning_rate": 0.0002, "loss": 2.8408, "step": 19185 }, { "epoch": 8.24, "learning_rate": 0.0002, "loss": 2.8389, "step": 19190 }, { "epoch": 8.24, "learning_rate": 0.0002, "loss": 2.835, "step": 19195 }, { "epoch": 8.24, "learning_rate": 0.0002, "loss": 2.847, "step": 19200 }, { "epoch": 8.24, "eval_loss": 2.854755163192749, "eval_runtime": 5.4547, "eval_samples_per_second": 1810.726, "eval_steps_per_second": 14.3, "eval_top3_3_weighted_f1_score ": 0.43055610174109016, "eval_top_1_macro_f1_score": 0.0635001065286571, "eval_top_1_weighted_f1score": 0.21100021654073248, "eval_top_3_macro_f1_score": 0.1696295469752659, "step": 19200 }, { "epoch": 8.25, "learning_rate": 0.0002, "loss": 2.8358, "step": 19205 }, { "epoch": 8.25, "learning_rate": 0.0002, "loss": 2.8163, "step": 19210 }, { "epoch": 8.25, "learning_rate": 0.0002, "loss": 2.856, "step": 19215 }, { "epoch": 8.25, "learning_rate": 0.0002, "loss": 2.8352, "step": 19220 }, { "epoch": 8.25, "learning_rate": 0.0002, "loss": 2.828, "step": 19225 }, { "epoch": 8.26, "learning_rate": 0.0002, "loss": 2.8547, "step": 19230 }, { "epoch": 8.26, "learning_rate": 0.0002, "loss": 2.8305, "step": 19235 }, { "epoch": 8.26, "learning_rate": 0.0002, "loss": 2.8485, "step": 19240 }, { "epoch": 8.26, "learning_rate": 0.0002, "loss": 2.8362, "step": 19245 }, { "epoch": 8.27, "learning_rate": 0.0002, "loss": 2.8369, "step": 19250 }, { "epoch": 8.27, "learning_rate": 0.0002, "loss": 2.837, "step": 19255 }, { "epoch": 8.27, "learning_rate": 0.0002, "loss": 2.8321, "step": 19260 }, { "epoch": 8.27, "eval_loss": 2.8574471473693848, "eval_runtime": 5.3874, "eval_samples_per_second": 1833.353, "eval_steps_per_second": 14.478, "eval_top3_3_weighted_f1_score ": 0.4297157811002323, "eval_top_1_macro_f1_score": 0.06663708955187947, "eval_top_1_weighted_f1score": 0.20993324481778222, "eval_top_3_macro_f1_score": 0.177119064241403, "step": 19264 }, { "epoch": 8.27, "learning_rate": 0.0002, "loss": 2.8311, "step": 19265 }, { "epoch": 8.27, "learning_rate": 0.0002, "loss": 2.8374, "step": 19270 }, { "epoch": 8.28, "learning_rate": 0.0002, "loss": 2.8374, "step": 19275 }, { "epoch": 8.28, "learning_rate": 0.0002, "loss": 2.8371, "step": 19280 }, { "epoch": 8.28, "learning_rate": 0.0002, "loss": 2.8548, "step": 19285 }, { "epoch": 8.28, "learning_rate": 0.0002, "loss": 2.8516, "step": 19290 }, { "epoch": 8.28, "learning_rate": 0.0002, "loss": 2.8305, "step": 19295 }, { "epoch": 8.29, "learning_rate": 0.0002, "loss": 2.8651, "step": 19300 }, { "epoch": 8.29, "learning_rate": 0.0002, "loss": 2.8431, "step": 19305 }, { "epoch": 8.29, "learning_rate": 0.0002, "loss": 2.8336, "step": 19310 }, { "epoch": 8.29, "learning_rate": 0.0002, "loss": 2.8387, "step": 19315 }, { "epoch": 8.3, "learning_rate": 0.0002, "loss": 2.8259, "step": 19320 }, { "epoch": 8.3, "learning_rate": 0.0002, "loss": 2.8423, "step": 19325 }, { "epoch": 8.3, "eval_loss": 2.853882074356079, "eval_runtime": 5.6442, "eval_samples_per_second": 1749.924, "eval_steps_per_second": 13.819, "eval_top3_3_weighted_f1_score ": 0.43012953339607285, "eval_top_1_macro_f1_score": 0.06517454054415779, "eval_top_1_weighted_f1score": 0.20759689547411692, "eval_top_3_macro_f1_score": 0.16976490472592393, "step": 19328 }, { "epoch": 8.3, "learning_rate": 0.0002, "loss": 2.8384, "step": 19330 }, { "epoch": 8.3, "learning_rate": 0.0002, "loss": 2.8206, "step": 19335 }, { "epoch": 8.3, "learning_rate": 0.0002, "loss": 2.8321, "step": 19340 }, { "epoch": 8.31, "learning_rate": 0.0002, "loss": 2.8482, "step": 19345 }, { "epoch": 8.31, "learning_rate": 0.0002, "loss": 2.8277, "step": 19350 }, { "epoch": 8.31, "learning_rate": 0.0002, "loss": 2.8656, "step": 19355 }, { "epoch": 8.31, "learning_rate": 0.0002, "loss": 2.8476, "step": 19360 }, { "epoch": 8.31, "learning_rate": 0.0002, "loss": 2.8275, "step": 19365 }, { "epoch": 8.32, "learning_rate": 0.0002, "loss": 2.8334, "step": 19370 }, { "epoch": 8.32, "learning_rate": 0.0002, "loss": 2.8598, "step": 19375 }, { "epoch": 8.32, "learning_rate": 0.0002, "loss": 2.8308, "step": 19380 }, { "epoch": 8.32, "learning_rate": 0.0002, "loss": 2.8359, "step": 19385 }, { "epoch": 8.33, "learning_rate": 0.0002, "loss": 2.8489, "step": 19390 }, { "epoch": 8.33, "eval_loss": 2.85400652885437, "eval_runtime": 5.3549, "eval_samples_per_second": 1844.472, "eval_steps_per_second": 14.566, "eval_top3_3_weighted_f1_score ": 0.4319273729595622, "eval_top_1_macro_f1_score": 0.06498108418862693, "eval_top_1_weighted_f1score": 0.2148537004192246, "eval_top_3_macro_f1_score": 0.17358489391881235, "step": 19392 }, { "epoch": 8.33, "learning_rate": 0.0002, "loss": 2.8266, "step": 19395 }, { "epoch": 8.33, "learning_rate": 0.0002, "loss": 2.8398, "step": 19400 }, { "epoch": 8.33, "learning_rate": 0.0002, "loss": 2.8361, "step": 19405 }, { "epoch": 8.33, "learning_rate": 0.0002, "loss": 2.8331, "step": 19410 }, { "epoch": 8.34, "learning_rate": 0.0002, "loss": 2.8458, "step": 19415 }, { "epoch": 8.34, "learning_rate": 0.0002, "loss": 2.8197, "step": 19420 }, { "epoch": 8.34, "learning_rate": 0.0002, "loss": 2.8347, "step": 19425 }, { "epoch": 8.34, "learning_rate": 0.0002, "loss": 2.8368, "step": 19430 }, { "epoch": 8.34, "learning_rate": 0.0002, "loss": 2.8354, "step": 19435 }, { "epoch": 8.35, "learning_rate": 0.0002, "loss": 2.8447, "step": 19440 }, { "epoch": 8.35, "learning_rate": 0.0002, "loss": 2.8472, "step": 19445 }, { "epoch": 8.35, "learning_rate": 0.0002, "loss": 2.8329, "step": 19450 }, { "epoch": 8.35, "learning_rate": 0.0002, "loss": 2.829, "step": 19455 }, { "epoch": 8.35, "eval_loss": 2.8535819053649902, "eval_runtime": 5.399, "eval_samples_per_second": 1829.409, "eval_steps_per_second": 14.447, "eval_top3_3_weighted_f1_score ": 0.4313302234369109, "eval_top_1_macro_f1_score": 0.0752702175481044, "eval_top_1_weighted_f1score": 0.20996350239635977, "eval_top_3_macro_f1_score": 0.18139335229578923, "step": 19456 }, { "epoch": 8.36, "learning_rate": 0.0002, "loss": 2.844, "step": 19460 }, { "epoch": 8.36, "learning_rate": 0.0002, "loss": 2.8342, "step": 19465 }, { "epoch": 8.36, "learning_rate": 0.0002, "loss": 2.8224, "step": 19470 }, { "epoch": 8.36, "learning_rate": 0.0002, "loss": 2.8198, "step": 19475 }, { "epoch": 8.36, "learning_rate": 0.0002, "loss": 2.8426, "step": 19480 }, { "epoch": 8.37, "learning_rate": 0.0002, "loss": 2.8374, "step": 19485 }, { "epoch": 8.37, "learning_rate": 0.0002, "loss": 2.8521, "step": 19490 }, { "epoch": 8.37, "learning_rate": 0.0002, "loss": 2.8315, "step": 19495 }, { "epoch": 8.37, "learning_rate": 0.0002, "loss": 2.8425, "step": 19500 }, { "epoch": 8.37, "learning_rate": 0.0002, "loss": 2.8376, "step": 19505 }, { "epoch": 8.38, "learning_rate": 0.0002, "loss": 2.8233, "step": 19510 }, { "epoch": 8.38, "learning_rate": 0.0002, "loss": 2.8257, "step": 19515 }, { "epoch": 8.38, "learning_rate": 0.0002, "loss": 2.8347, "step": 19520 }, { "epoch": 8.38, "eval_loss": 2.853299617767334, "eval_runtime": 5.3715, "eval_samples_per_second": 1838.789, "eval_steps_per_second": 14.521, "eval_top3_3_weighted_f1_score ": 0.4320093917224778, "eval_top_1_macro_f1_score": 0.06644228036784446, "eval_top_1_weighted_f1score": 0.20949961738693795, "eval_top_3_macro_f1_score": 0.1840076991094436, "step": 19520 }, { "epoch": 8.38, "learning_rate": 0.0002, "loss": 2.8314, "step": 19525 }, { "epoch": 8.39, "learning_rate": 0.0002, "loss": 2.8439, "step": 19530 }, { "epoch": 8.39, "learning_rate": 0.0002, "loss": 2.8226, "step": 19535 }, { "epoch": 8.39, "learning_rate": 0.0002, "loss": 2.8342, "step": 19540 }, { "epoch": 8.39, "learning_rate": 0.0002, "loss": 2.8424, "step": 19545 }, { "epoch": 8.39, "learning_rate": 0.0002, "loss": 2.822, "step": 19550 }, { "epoch": 8.4, "learning_rate": 0.0002, "loss": 2.8421, "step": 19555 }, { "epoch": 8.4, "learning_rate": 0.0002, "loss": 2.8321, "step": 19560 }, { "epoch": 8.4, "learning_rate": 0.0002, "loss": 2.8192, "step": 19565 }, { "epoch": 8.4, "learning_rate": 0.0002, "loss": 2.8418, "step": 19570 }, { "epoch": 8.4, "learning_rate": 0.0002, "loss": 2.8484, "step": 19575 }, { "epoch": 8.41, "learning_rate": 0.0002, "loss": 2.8314, "step": 19580 }, { "epoch": 8.41, "eval_loss": 2.8602147102355957, "eval_runtime": 5.3751, "eval_samples_per_second": 1837.561, "eval_steps_per_second": 14.511, "eval_top3_3_weighted_f1_score ": 0.4269702864997268, "eval_top_1_macro_f1_score": 0.0700457680521881, "eval_top_1_weighted_f1score": 0.21204503998643293, "eval_top_3_macro_f1_score": 0.17994195026074286, "step": 19584 }, { "epoch": 8.41, "learning_rate": 0.0002, "loss": 2.8422, "step": 19585 }, { "epoch": 8.41, "learning_rate": 0.0002, "loss": 2.8316, "step": 19590 }, { "epoch": 8.41, "learning_rate": 0.0002, "loss": 2.8193, "step": 19595 }, { "epoch": 8.42, "learning_rate": 0.0002, "loss": 2.8462, "step": 19600 }, { "epoch": 8.42, "learning_rate": 0.0002, "loss": 2.8263, "step": 19605 }, { "epoch": 8.42, "learning_rate": 0.0002, "loss": 2.8465, "step": 19610 }, { "epoch": 8.42, "learning_rate": 0.0002, "loss": 2.8213, "step": 19615 }, { "epoch": 8.42, "learning_rate": 0.0002, "loss": 2.84, "step": 19620 }, { "epoch": 8.43, "learning_rate": 0.0002, "loss": 2.8208, "step": 19625 }, { "epoch": 8.43, "learning_rate": 0.0002, "loss": 2.8587, "step": 19630 }, { "epoch": 8.43, "learning_rate": 0.0002, "loss": 2.8466, "step": 19635 }, { "epoch": 8.43, "learning_rate": 0.0002, "loss": 2.8328, "step": 19640 }, { "epoch": 8.43, "learning_rate": 0.0002, "loss": 2.843, "step": 19645 }, { "epoch": 8.44, "eval_loss": 2.85685396194458, "eval_runtime": 5.4434, "eval_samples_per_second": 1814.498, "eval_steps_per_second": 14.329, "eval_top3_3_weighted_f1_score ": 0.43339844212291834, "eval_top_1_macro_f1_score": 0.06649034474737178, "eval_top_1_weighted_f1score": 0.2085780776842864, "eval_top_3_macro_f1_score": 0.17763883837651623, "step": 19648 }, { "epoch": 8.44, "learning_rate": 0.0002, "loss": 2.836, "step": 19650 }, { "epoch": 8.44, "learning_rate": 0.0002, "loss": 2.8353, "step": 19655 }, { "epoch": 8.44, "learning_rate": 0.0002, "loss": 2.828, "step": 19660 }, { "epoch": 8.44, "learning_rate": 0.0002, "loss": 2.8359, "step": 19665 }, { "epoch": 8.45, "learning_rate": 0.0002, "loss": 2.8407, "step": 19670 }, { "epoch": 8.45, "learning_rate": 0.0002, "loss": 2.8429, "step": 19675 }, { "epoch": 8.45, "learning_rate": 0.0002, "loss": 2.8351, "step": 19680 }, { "epoch": 8.45, "learning_rate": 0.0002, "loss": 2.8458, "step": 19685 }, { "epoch": 8.45, "learning_rate": 0.0002, "loss": 2.8136, "step": 19690 }, { "epoch": 8.46, "learning_rate": 0.0002, "loss": 2.8428, "step": 19695 }, { "epoch": 8.46, "learning_rate": 0.0002, "loss": 2.8403, "step": 19700 }, { "epoch": 8.46, "learning_rate": 0.0002, "loss": 2.8398, "step": 19705 }, { "epoch": 8.46, "learning_rate": 0.0002, "loss": 2.8402, "step": 19710 }, { "epoch": 8.46, "eval_loss": 2.8591010570526123, "eval_runtime": 5.4576, "eval_samples_per_second": 1809.764, "eval_steps_per_second": 14.292, "eval_top3_3_weighted_f1_score ": 0.42780982053014366, "eval_top_1_macro_f1_score": 0.06383286805381078, "eval_top_1_weighted_f1score": 0.21159359693828264, "eval_top_3_macro_f1_score": 0.16860038129077684, "step": 19712 }, { "epoch": 8.46, "learning_rate": 0.0002, "loss": 2.8338, "step": 19715 }, { "epoch": 8.47, "learning_rate": 0.0002, "loss": 2.8248, "step": 19720 }, { "epoch": 8.47, "learning_rate": 0.0002, "loss": 2.8464, "step": 19725 }, { "epoch": 8.47, "learning_rate": 0.0002, "loss": 2.8248, "step": 19730 }, { "epoch": 8.47, "learning_rate": 0.0002, "loss": 2.8232, "step": 19735 }, { "epoch": 8.48, "learning_rate": 0.0002, "loss": 2.8319, "step": 19740 }, { "epoch": 8.48, "learning_rate": 0.0002, "loss": 2.854, "step": 19745 }, { "epoch": 8.48, "learning_rate": 0.0002, "loss": 2.8439, "step": 19750 }, { "epoch": 8.48, "learning_rate": 0.0002, "loss": 2.8221, "step": 19755 }, { "epoch": 8.48, "learning_rate": 0.0002, "loss": 2.8253, "step": 19760 }, { "epoch": 8.49, "learning_rate": 0.0002, "loss": 2.8374, "step": 19765 }, { "epoch": 8.49, "learning_rate": 0.0002, "loss": 2.8457, "step": 19770 }, { "epoch": 8.49, "learning_rate": 0.0002, "loss": 2.8446, "step": 19775 }, { "epoch": 8.49, "eval_loss": 2.853350877761841, "eval_runtime": 5.7598, "eval_samples_per_second": 1714.83, "eval_steps_per_second": 13.542, "eval_top3_3_weighted_f1_score ": 0.42941646595194505, "eval_top_1_macro_f1_score": 0.06501042798199738, "eval_top_1_weighted_f1score": 0.21478077419947375, "eval_top_3_macro_f1_score": 0.1626845115439078, "step": 19776 }, { "epoch": 8.49, "learning_rate": 0.0002, "loss": 2.8385, "step": 19780 }, { "epoch": 8.49, "learning_rate": 0.0002, "loss": 2.8462, "step": 19785 }, { "epoch": 8.5, "learning_rate": 0.0002, "loss": 2.8443, "step": 19790 }, { "epoch": 8.5, "learning_rate": 0.0002, "loss": 2.8409, "step": 19795 }, { "epoch": 8.5, "learning_rate": 0.0002, "loss": 2.8506, "step": 19800 }, { "epoch": 8.5, "learning_rate": 0.0002, "loss": 2.8418, "step": 19805 }, { "epoch": 8.51, "learning_rate": 0.0002, "loss": 2.8459, "step": 19810 }, { "epoch": 8.51, "learning_rate": 0.0002, "loss": 2.8261, "step": 19815 }, { "epoch": 8.51, "learning_rate": 0.0002, "loss": 2.8281, "step": 19820 }, { "epoch": 8.51, "learning_rate": 0.0002, "loss": 2.8418, "step": 19825 }, { "epoch": 8.51, "learning_rate": 0.0002, "loss": 2.8225, "step": 19830 }, { "epoch": 8.52, "learning_rate": 0.0002, "loss": 2.8432, "step": 19835 }, { "epoch": 8.52, "learning_rate": 0.0002, "loss": 2.8311, "step": 19840 }, { "epoch": 8.52, "eval_loss": 2.854144334793091, "eval_runtime": 5.3638, "eval_samples_per_second": 1841.435, "eval_steps_per_second": 14.542, "eval_top3_3_weighted_f1_score ": 0.43159365451982973, "eval_top_1_macro_f1_score": 0.06598902755694942, "eval_top_1_weighted_f1score": 0.206736239520406, "eval_top_3_macro_f1_score": 0.17230136260454781, "step": 19840 }, { "epoch": 8.52, "learning_rate": 0.0002, "loss": 2.8188, "step": 19845 }, { "epoch": 8.52, "learning_rate": 0.0002, "loss": 2.8321, "step": 19850 }, { "epoch": 8.53, "learning_rate": 0.0002, "loss": 2.8326, "step": 19855 }, { "epoch": 8.53, "learning_rate": 0.0002, "loss": 2.8259, "step": 19860 }, { "epoch": 8.53, "learning_rate": 0.0002, "loss": 2.8292, "step": 19865 }, { "epoch": 8.53, "learning_rate": 0.0002, "loss": 2.8334, "step": 19870 }, { "epoch": 8.53, "learning_rate": 0.0002, "loss": 2.8367, "step": 19875 }, { "epoch": 8.54, "learning_rate": 0.0002, "loss": 2.8444, "step": 19880 }, { "epoch": 8.54, "learning_rate": 0.0002, "loss": 2.8333, "step": 19885 }, { "epoch": 8.54, "learning_rate": 0.0002, "loss": 2.8253, "step": 19890 }, { "epoch": 8.54, "learning_rate": 0.0002, "loss": 2.8226, "step": 19895 }, { "epoch": 8.54, "learning_rate": 0.0002, "loss": 2.8468, "step": 19900 }, { "epoch": 8.55, "eval_loss": 2.85261607170105, "eval_runtime": 5.3816, "eval_samples_per_second": 1835.334, "eval_steps_per_second": 14.494, "eval_top3_3_weighted_f1_score ": 0.4352208097228637, "eval_top_1_macro_f1_score": 0.06523791958540971, "eval_top_1_weighted_f1score": 0.20880045846759562, "eval_top_3_macro_f1_score": 0.18290744854017976, "step": 19904 }, { "epoch": 8.55, "learning_rate": 0.0002, "loss": 2.8367, "step": 19905 }, { "epoch": 8.55, "learning_rate": 0.0002, "loss": 2.8336, "step": 19910 }, { "epoch": 8.55, "learning_rate": 0.0002, "loss": 2.8462, "step": 19915 }, { "epoch": 8.55, "learning_rate": 0.0002, "loss": 2.8208, "step": 19920 }, { "epoch": 8.56, "learning_rate": 0.0002, "loss": 2.8297, "step": 19925 }, { "epoch": 8.56, "learning_rate": 0.0002, "loss": 2.8514, "step": 19930 }, { "epoch": 8.56, "learning_rate": 0.0002, "loss": 2.8288, "step": 19935 }, { "epoch": 8.56, "learning_rate": 0.0002, "loss": 2.8159, "step": 19940 }, { "epoch": 8.56, "learning_rate": 0.0002, "loss": 2.8222, "step": 19945 }, { "epoch": 8.57, "learning_rate": 0.0002, "loss": 2.8423, "step": 19950 }, { "epoch": 8.57, "learning_rate": 0.0002, "loss": 2.8326, "step": 19955 }, { "epoch": 8.57, "learning_rate": 0.0002, "loss": 2.8548, "step": 19960 }, { "epoch": 8.57, "learning_rate": 0.0002, "loss": 2.8384, "step": 19965 }, { "epoch": 8.57, "eval_loss": 2.851257562637329, "eval_runtime": 5.3655, "eval_samples_per_second": 1840.844, "eval_steps_per_second": 14.537, "eval_top3_3_weighted_f1_score ": 0.4299533869655781, "eval_top_1_macro_f1_score": 0.06771736772473094, "eval_top_1_weighted_f1score": 0.21492597330227603, "eval_top_3_macro_f1_score": 0.18193064565020653, "step": 19968 }, { "epoch": 8.57, "learning_rate": 0.0002, "loss": 2.8149, "step": 19970 }, { "epoch": 8.58, "learning_rate": 0.0002, "loss": 2.8542, "step": 19975 }, { "epoch": 8.58, "learning_rate": 0.0002, "loss": 2.8327, "step": 19980 }, { "epoch": 8.58, "learning_rate": 0.0002, "loss": 2.8409, "step": 19985 }, { "epoch": 8.58, "learning_rate": 0.0002, "loss": 2.8427, "step": 19990 }, { "epoch": 8.59, "learning_rate": 0.0002, "loss": 2.8305, "step": 19995 }, { "epoch": 8.59, "learning_rate": 0.0002, "loss": 2.8304, "step": 20000 }, { "epoch": 8.59, "learning_rate": 0.0002, "loss": 2.8322, "step": 20005 }, { "epoch": 8.59, "learning_rate": 0.0002, "loss": 2.8362, "step": 20010 }, { "epoch": 8.59, "learning_rate": 0.0002, "loss": 2.8327, "step": 20015 }, { "epoch": 8.6, "learning_rate": 0.0002, "loss": 2.831, "step": 20020 }, { "epoch": 8.6, "learning_rate": 0.0002, "loss": 2.837, "step": 20025 }, { "epoch": 8.6, "learning_rate": 0.0002, "loss": 2.8361, "step": 20030 }, { "epoch": 8.6, "eval_loss": 2.8540263175964355, "eval_runtime": 5.3347, "eval_samples_per_second": 1851.452, "eval_steps_per_second": 14.621, "eval_top3_3_weighted_f1_score ": 0.4341394672567419, "eval_top_1_macro_f1_score": 0.06408709442512175, "eval_top_1_weighted_f1score": 0.2087100711924547, "eval_top_3_macro_f1_score": 0.18112913049426627, "step": 20032 }, { "epoch": 8.6, "learning_rate": 0.0002, "loss": 2.8324, "step": 20035 }, { "epoch": 8.6, "learning_rate": 0.0002, "loss": 2.8269, "step": 20040 }, { "epoch": 8.61, "learning_rate": 0.0002, "loss": 2.8605, "step": 20045 }, { "epoch": 8.61, "learning_rate": 0.0002, "loss": 2.8267, "step": 20050 }, { "epoch": 8.61, "learning_rate": 0.0002, "loss": 2.8079, "step": 20055 }, { "epoch": 8.61, "learning_rate": 0.0002, "loss": 2.8415, "step": 20060 }, { "epoch": 8.62, "learning_rate": 0.0002, "loss": 2.8371, "step": 20065 }, { "epoch": 8.62, "learning_rate": 0.0002, "loss": 2.8626, "step": 20070 }, { "epoch": 8.62, "learning_rate": 0.0002, "loss": 2.8272, "step": 20075 }, { "epoch": 8.62, "learning_rate": 0.0002, "loss": 2.8198, "step": 20080 }, { "epoch": 8.62, "learning_rate": 0.0002, "loss": 2.8148, "step": 20085 }, { "epoch": 8.63, "learning_rate": 0.0002, "loss": 2.8292, "step": 20090 }, { "epoch": 8.63, "learning_rate": 0.0002, "loss": 2.8567, "step": 20095 }, { "epoch": 8.63, "eval_loss": 2.856651544570923, "eval_runtime": 5.402, "eval_samples_per_second": 1828.381, "eval_steps_per_second": 14.439, "eval_top3_3_weighted_f1_score ": 0.4282551261947818, "eval_top_1_macro_f1_score": 0.0644853415655072, "eval_top_1_weighted_f1score": 0.21121483324746365, "eval_top_3_macro_f1_score": 0.18130103612404322, "step": 20096 }, { "epoch": 8.63, "learning_rate": 0.0002, "loss": 2.8408, "step": 20100 }, { "epoch": 8.63, "learning_rate": 0.0002, "loss": 2.8365, "step": 20105 }, { "epoch": 8.63, "learning_rate": 0.0002, "loss": 2.8453, "step": 20110 }, { "epoch": 8.64, "learning_rate": 0.0002, "loss": 2.8165, "step": 20115 }, { "epoch": 8.64, "learning_rate": 0.0002, "loss": 2.8284, "step": 20120 }, { "epoch": 8.64, "learning_rate": 0.0002, "loss": 2.829, "step": 20125 }, { "epoch": 8.64, "learning_rate": 0.0002, "loss": 2.8144, "step": 20130 }, { "epoch": 8.65, "learning_rate": 0.0002, "loss": 2.8423, "step": 20135 }, { "epoch": 8.65, "learning_rate": 0.0002, "loss": 2.8309, "step": 20140 }, { "epoch": 8.65, "learning_rate": 0.0002, "loss": 2.8246, "step": 20145 }, { "epoch": 8.65, "learning_rate": 0.0002, "loss": 2.8331, "step": 20150 }, { "epoch": 8.65, "learning_rate": 0.0002, "loss": 2.8482, "step": 20155 }, { "epoch": 8.66, "learning_rate": 0.0002, "loss": 2.8177, "step": 20160 }, { "epoch": 8.66, "eval_loss": 2.8480165004730225, "eval_runtime": 5.5042, "eval_samples_per_second": 1794.449, "eval_steps_per_second": 14.171, "eval_top3_3_weighted_f1_score ": 0.43337207017074075, "eval_top_1_macro_f1_score": 0.06480975786761277, "eval_top_1_weighted_f1score": 0.21026838409829918, "eval_top_3_macro_f1_score": 0.17932563774639834, "step": 20160 }, { "epoch": 8.66, "learning_rate": 0.0002, "loss": 2.8395, "step": 20165 }, { "epoch": 8.66, "learning_rate": 0.0002, "loss": 2.8484, "step": 20170 }, { "epoch": 8.66, "learning_rate": 0.0002, "loss": 2.8321, "step": 20175 }, { "epoch": 8.66, "learning_rate": 0.0002, "loss": 2.8314, "step": 20180 }, { "epoch": 8.67, "learning_rate": 0.0002, "loss": 2.8154, "step": 20185 }, { "epoch": 8.67, "learning_rate": 0.0002, "loss": 2.8542, "step": 20190 }, { "epoch": 8.67, "learning_rate": 0.0002, "loss": 2.8317, "step": 20195 }, { "epoch": 8.67, "learning_rate": 0.0002, "loss": 2.8338, "step": 20200 }, { "epoch": 8.68, "learning_rate": 0.0002, "loss": 2.8433, "step": 20205 }, { "epoch": 8.68, "learning_rate": 0.0002, "loss": 2.8187, "step": 20210 }, { "epoch": 8.68, "learning_rate": 0.0002, "loss": 2.8124, "step": 20215 }, { "epoch": 8.68, "learning_rate": 0.0002, "loss": 2.8611, "step": 20220 }, { "epoch": 8.68, "eval_loss": 2.8530097007751465, "eval_runtime": 5.7222, "eval_samples_per_second": 1726.071, "eval_steps_per_second": 13.631, "eval_top3_3_weighted_f1_score ": 0.43243047676301666, "eval_top_1_macro_f1_score": 0.06911414138931672, "eval_top_1_weighted_f1score": 0.21065873269725666, "eval_top_3_macro_f1_score": 0.18240826040338734, "step": 20224 }, { "epoch": 8.68, "learning_rate": 0.0002, "loss": 2.825, "step": 20225 }, { "epoch": 8.69, "learning_rate": 0.0002, "loss": 2.833, "step": 20230 }, { "epoch": 8.69, "learning_rate": 0.0002, "loss": 2.8455, "step": 20235 }, { "epoch": 8.69, "learning_rate": 0.0002, "loss": 2.8362, "step": 20240 }, { "epoch": 8.69, "learning_rate": 0.0002, "loss": 2.8433, "step": 20245 }, { "epoch": 8.69, "learning_rate": 0.0002, "loss": 2.8191, "step": 20250 }, { "epoch": 8.7, "learning_rate": 0.0002, "loss": 2.8268, "step": 20255 }, { "epoch": 8.7, "learning_rate": 0.0002, "loss": 2.8234, "step": 20260 }, { "epoch": 8.7, "learning_rate": 0.0002, "loss": 2.848, "step": 20265 }, { "epoch": 8.7, "learning_rate": 0.0002, "loss": 2.8324, "step": 20270 }, { "epoch": 8.71, "learning_rate": 0.0002, "loss": 2.8212, "step": 20275 }, { "epoch": 8.71, "learning_rate": 0.0002, "loss": 2.8213, "step": 20280 }, { "epoch": 8.71, "learning_rate": 0.0002, "loss": 2.8327, "step": 20285 }, { "epoch": 8.71, "eval_loss": 2.8486971855163574, "eval_runtime": 5.5563, "eval_samples_per_second": 1777.621, "eval_steps_per_second": 14.038, "eval_top3_3_weighted_f1_score ": 0.4321731847599356, "eval_top_1_macro_f1_score": 0.06616773147903071, "eval_top_1_weighted_f1score": 0.20913986319352837, "eval_top_3_macro_f1_score": 0.1849872128022307, "step": 20288 }, { "epoch": 8.71, "learning_rate": 0.0002, "loss": 2.83, "step": 20290 }, { "epoch": 8.71, "learning_rate": 0.0002, "loss": 2.8372, "step": 20295 }, { "epoch": 8.72, "learning_rate": 0.0002, "loss": 2.8508, "step": 20300 }, { "epoch": 8.72, "learning_rate": 0.0002, "loss": 2.8209, "step": 20305 }, { "epoch": 8.72, "learning_rate": 0.0002, "loss": 2.8513, "step": 20310 }, { "epoch": 8.72, "learning_rate": 0.0002, "loss": 2.8444, "step": 20315 }, { "epoch": 8.72, "learning_rate": 0.0002, "loss": 2.8394, "step": 20320 }, { "epoch": 8.73, "learning_rate": 0.0002, "loss": 2.8323, "step": 20325 }, { "epoch": 8.73, "learning_rate": 0.0002, "loss": 2.8308, "step": 20330 }, { "epoch": 8.73, "learning_rate": 0.0002, "loss": 2.8421, "step": 20335 }, { "epoch": 8.73, "learning_rate": 0.0002, "loss": 2.8274, "step": 20340 }, { "epoch": 8.74, "learning_rate": 0.0002, "loss": 2.83, "step": 20345 }, { "epoch": 8.74, "learning_rate": 0.0002, "loss": 2.8313, "step": 20350 }, { "epoch": 8.74, "eval_loss": 2.8561837673187256, "eval_runtime": 5.4564, "eval_samples_per_second": 1810.166, "eval_steps_per_second": 14.295, "eval_top3_3_weighted_f1_score ": 0.4329660646002272, "eval_top_1_macro_f1_score": 0.06357845100388941, "eval_top_1_weighted_f1score": 0.20728980571997332, "eval_top_3_macro_f1_score": 0.1785247873659485, "step": 20352 }, { "epoch": 8.74, "learning_rate": 0.0002, "loss": 2.8452, "step": 20355 }, { "epoch": 8.74, "learning_rate": 0.0002, "loss": 2.8215, "step": 20360 }, { "epoch": 8.74, "learning_rate": 0.0002, "loss": 2.8143, "step": 20365 }, { "epoch": 8.75, "learning_rate": 0.0002, "loss": 2.8374, "step": 20370 }, { "epoch": 8.75, "learning_rate": 0.0002, "loss": 2.8293, "step": 20375 }, { "epoch": 8.75, "learning_rate": 0.0002, "loss": 2.8326, "step": 20380 }, { "epoch": 8.75, "learning_rate": 0.0002, "loss": 2.8355, "step": 20385 }, { "epoch": 8.75, "learning_rate": 0.0002, "loss": 2.8294, "step": 20390 }, { "epoch": 8.76, "learning_rate": 0.0002, "loss": 2.8186, "step": 20395 }, { "epoch": 8.76, "learning_rate": 0.0002, "loss": 2.8395, "step": 20400 }, { "epoch": 8.76, "learning_rate": 0.0002, "loss": 2.8409, "step": 20405 }, { "epoch": 8.76, "learning_rate": 0.0002, "loss": 2.8485, "step": 20410 }, { "epoch": 8.77, "learning_rate": 0.0002, "loss": 2.8467, "step": 20415 }, { "epoch": 8.77, "eval_loss": 2.853515148162842, "eval_runtime": 5.8751, "eval_samples_per_second": 1681.162, "eval_steps_per_second": 13.276, "eval_top3_3_weighted_f1_score ": 0.42716829290360553, "eval_top_1_macro_f1_score": 0.062442550388123255, "eval_top_1_weighted_f1score": 0.211065276644501, "eval_top_3_macro_f1_score": 0.17624991701191145, "step": 20416 }, { "epoch": 8.77, "learning_rate": 0.0002, "loss": 2.8272, "step": 20420 }, { "epoch": 8.77, "learning_rate": 0.0002, "loss": 2.834, "step": 20425 }, { "epoch": 8.77, "learning_rate": 0.0002, "loss": 2.8309, "step": 20430 }, { "epoch": 8.77, "learning_rate": 0.0002, "loss": 2.8284, "step": 20435 }, { "epoch": 8.78, "learning_rate": 0.0002, "loss": 2.8232, "step": 20440 }, { "epoch": 8.78, "learning_rate": 0.0002, "loss": 2.8365, "step": 20445 }, { "epoch": 8.78, "learning_rate": 0.0002, "loss": 2.8449, "step": 20450 }, { "epoch": 8.78, "learning_rate": 0.0002, "loss": 2.8339, "step": 20455 }, { "epoch": 8.78, "learning_rate": 0.0002, "loss": 2.8371, "step": 20460 }, { "epoch": 8.79, "learning_rate": 0.0002, "loss": 2.8224, "step": 20465 }, { "epoch": 8.79, "learning_rate": 0.0002, "loss": 2.8379, "step": 20470 }, { "epoch": 8.79, "learning_rate": 0.0002, "loss": 2.8253, "step": 20475 }, { "epoch": 8.79, "learning_rate": 0.0002, "loss": 2.8317, "step": 20480 }, { "epoch": 8.79, "eval_loss": 2.8553853034973145, "eval_runtime": 5.8394, "eval_samples_per_second": 1691.432, "eval_steps_per_second": 13.357, "eval_top3_3_weighted_f1_score ": 0.4326537990865509, "eval_top_1_macro_f1_score": 0.0681194421241324, "eval_top_1_weighted_f1score": 0.20830574540627783, "eval_top_3_macro_f1_score": 0.1794247304551217, "step": 20480 }, { "epoch": 8.8, "learning_rate": 0.0002, "loss": 2.8429, "step": 20485 }, { "epoch": 8.8, "learning_rate": 0.0002, "loss": 2.8486, "step": 20490 }, { "epoch": 8.8, "learning_rate": 0.0002, "loss": 2.854, "step": 20495 }, { "epoch": 8.8, "learning_rate": 0.0002, "loss": 2.8296, "step": 20500 }, { "epoch": 8.8, "learning_rate": 0.0002, "loss": 2.8269, "step": 20505 }, { "epoch": 8.81, "learning_rate": 0.0002, "loss": 2.8257, "step": 20510 }, { "epoch": 8.81, "learning_rate": 0.0002, "loss": 2.8182, "step": 20515 }, { "epoch": 8.81, "learning_rate": 0.0002, "loss": 2.8464, "step": 20520 }, { "epoch": 8.81, "learning_rate": 0.0002, "loss": 2.8264, "step": 20525 }, { "epoch": 8.81, "learning_rate": 0.0002, "loss": 2.8184, "step": 20530 }, { "epoch": 8.82, "learning_rate": 0.0002, "loss": 2.8397, "step": 20535 }, { "epoch": 8.82, "learning_rate": 0.0002, "loss": 2.8166, "step": 20540 }, { "epoch": 8.82, "eval_loss": 2.8540971279144287, "eval_runtime": 5.5966, "eval_samples_per_second": 1764.818, "eval_steps_per_second": 13.937, "eval_top3_3_weighted_f1_score ": 0.4336968251093306, "eval_top_1_macro_f1_score": 0.06616061236652564, "eval_top_1_weighted_f1score": 0.2100926076889365, "eval_top_3_macro_f1_score": 0.17929303771621424, "step": 20544 }, { "epoch": 8.82, "learning_rate": 0.0002, "loss": 2.8528, "step": 20545 }, { "epoch": 8.82, "learning_rate": 0.0002, "loss": 2.8265, "step": 20550 }, { "epoch": 8.83, "learning_rate": 0.0002, "loss": 2.8391, "step": 20555 }, { "epoch": 8.83, "learning_rate": 0.0002, "loss": 2.8103, "step": 20560 }, { "epoch": 8.83, "learning_rate": 0.0002, "loss": 2.836, "step": 20565 }, { "epoch": 8.83, "learning_rate": 0.0002, "loss": 2.8286, "step": 20570 }, { "epoch": 8.83, "learning_rate": 0.0002, "loss": 2.846, "step": 20575 }, { "epoch": 8.84, "learning_rate": 0.0002, "loss": 2.834, "step": 20580 }, { "epoch": 8.84, "learning_rate": 0.0002, "loss": 2.8305, "step": 20585 }, { "epoch": 8.84, "learning_rate": 0.0002, "loss": 2.8161, "step": 20590 }, { "epoch": 8.84, "learning_rate": 0.0002, "loss": 2.8352, "step": 20595 }, { "epoch": 8.84, "learning_rate": 0.0002, "loss": 2.8411, "step": 20600 }, { "epoch": 8.85, "learning_rate": 0.0002, "loss": 2.8442, "step": 20605 }, { "epoch": 8.85, "eval_loss": 2.8550314903259277, "eval_runtime": 6.1337, "eval_samples_per_second": 1610.297, "eval_steps_per_second": 12.717, "eval_top3_3_weighted_f1_score ": 0.4326305080382614, "eval_top_1_macro_f1_score": 0.07372309765671434, "eval_top_1_weighted_f1score": 0.2139504239199311, "eval_top_3_macro_f1_score": 0.1799990482204693, "step": 20608 }, { "epoch": 8.85, "learning_rate": 0.0002, "loss": 2.8244, "step": 20610 }, { "epoch": 8.85, "learning_rate": 0.0002, "loss": 2.8222, "step": 20615 }, { "epoch": 8.85, "learning_rate": 0.0002, "loss": 2.8622, "step": 20620 }, { "epoch": 8.86, "learning_rate": 0.0002, "loss": 2.8437, "step": 20625 }, { "epoch": 8.86, "learning_rate": 0.0002, "loss": 2.832, "step": 20630 }, { "epoch": 8.86, "learning_rate": 0.0002, "loss": 2.8459, "step": 20635 }, { "epoch": 8.86, "learning_rate": 0.0002, "loss": 2.8278, "step": 20640 }, { "epoch": 8.86, "learning_rate": 0.0002, "loss": 2.841, "step": 20645 }, { "epoch": 8.87, "learning_rate": 0.0002, "loss": 2.8278, "step": 20650 }, { "epoch": 8.87, "learning_rate": 0.0002, "loss": 2.8249, "step": 20655 }, { "epoch": 8.87, "learning_rate": 0.0002, "loss": 2.832, "step": 20660 }, { "epoch": 8.87, "learning_rate": 0.0002, "loss": 2.8463, "step": 20665 }, { "epoch": 8.87, "learning_rate": 0.0002, "loss": 2.8365, "step": 20670 }, { "epoch": 8.88, "eval_loss": 2.8521645069122314, "eval_runtime": 5.5183, "eval_samples_per_second": 1789.869, "eval_steps_per_second": 14.135, "eval_top3_3_weighted_f1_score ": 0.4288653183056263, "eval_top_1_macro_f1_score": 0.06953153388721477, "eval_top_1_weighted_f1score": 0.21221184352327754, "eval_top_3_macro_f1_score": 0.18016623182443242, "step": 20672 }, { "epoch": 8.88, "learning_rate": 0.0002, "loss": 2.8273, "step": 20675 }, { "epoch": 8.88, "learning_rate": 0.0002, "loss": 2.8244, "step": 20680 }, { "epoch": 8.88, "learning_rate": 0.0002, "loss": 2.8392, "step": 20685 }, { "epoch": 8.88, "learning_rate": 0.0002, "loss": 2.8325, "step": 20690 }, { "epoch": 8.89, "learning_rate": 0.0002, "loss": 2.8501, "step": 20695 }, { "epoch": 8.89, "learning_rate": 0.0002, "loss": 2.8462, "step": 20700 }, { "epoch": 8.89, "learning_rate": 0.0002, "loss": 2.8094, "step": 20705 }, { "epoch": 8.89, "learning_rate": 0.0002, "loss": 2.832, "step": 20710 }, { "epoch": 8.89, "learning_rate": 0.0002, "loss": 2.8426, "step": 20715 }, { "epoch": 8.9, "learning_rate": 0.0002, "loss": 2.8392, "step": 20720 }, { "epoch": 8.9, "learning_rate": 0.0002, "loss": 2.8419, "step": 20725 }, { "epoch": 8.9, "learning_rate": 0.0002, "loss": 2.8351, "step": 20730 }, { "epoch": 8.9, "learning_rate": 0.0002, "loss": 2.8333, "step": 20735 }, { "epoch": 8.9, "eval_loss": 2.8555121421813965, "eval_runtime": 5.4467, "eval_samples_per_second": 1813.399, "eval_steps_per_second": 14.321, "eval_top3_3_weighted_f1_score ": 0.43004321131135825, "eval_top_1_macro_f1_score": 0.06994283542784271, "eval_top_1_weighted_f1score": 0.21361213602172482, "eval_top_3_macro_f1_score": 0.17755904404896694, "step": 20736 }, { "epoch": 8.9, "learning_rate": 0.0002, "loss": 2.8459, "step": 20740 }, { "epoch": 8.91, "learning_rate": 0.0002, "loss": 2.8369, "step": 20745 }, { "epoch": 8.91, "learning_rate": 0.0002, "loss": 2.8176, "step": 20750 }, { "epoch": 8.91, "learning_rate": 0.0002, "loss": 2.8389, "step": 20755 }, { "epoch": 8.91, "learning_rate": 0.0002, "loss": 2.8273, "step": 20760 }, { "epoch": 8.92, "learning_rate": 0.0002, "loss": 2.8363, "step": 20765 }, { "epoch": 8.92, "learning_rate": 0.0002, "loss": 2.8606, "step": 20770 }, { "epoch": 8.92, "learning_rate": 0.0002, "loss": 2.8383, "step": 20775 }, { "epoch": 8.92, "learning_rate": 0.0002, "loss": 2.8403, "step": 20780 }, { "epoch": 8.92, "learning_rate": 0.0002, "loss": 2.8345, "step": 20785 }, { "epoch": 8.93, "learning_rate": 0.0002, "loss": 2.8098, "step": 20790 }, { "epoch": 8.93, "learning_rate": 0.0002, "loss": 2.841, "step": 20795 }, { "epoch": 8.93, "learning_rate": 0.0002, "loss": 2.8349, "step": 20800 }, { "epoch": 8.93, "eval_loss": 2.8507375717163086, "eval_runtime": 5.4642, "eval_samples_per_second": 1807.593, "eval_steps_per_second": 14.275, "eval_top3_3_weighted_f1_score ": 0.4316537377435325, "eval_top_1_macro_f1_score": 0.06610656325564745, "eval_top_1_weighted_f1score": 0.20977739744610047, "eval_top_3_macro_f1_score": 0.18079424819850132, "step": 20800 }, { "epoch": 8.93, "learning_rate": 0.0002, "loss": 2.8301, "step": 20805 }, { "epoch": 8.93, "learning_rate": 0.0002, "loss": 2.8341, "step": 20810 }, { "epoch": 8.94, "learning_rate": 0.0002, "loss": 2.8262, "step": 20815 }, { "epoch": 8.94, "learning_rate": 0.0002, "loss": 2.8387, "step": 20820 }, { "epoch": 8.94, "learning_rate": 0.0002, "loss": 2.8353, "step": 20825 }, { "epoch": 8.94, "learning_rate": 0.0002, "loss": 2.8426, "step": 20830 }, { "epoch": 8.95, "learning_rate": 0.0002, "loss": 2.8469, "step": 20835 }, { "epoch": 8.95, "learning_rate": 0.0002, "loss": 2.8444, "step": 20840 }, { "epoch": 8.95, "learning_rate": 0.0002, "loss": 2.8151, "step": 20845 }, { "epoch": 8.95, "learning_rate": 0.0002, "loss": 2.8364, "step": 20850 }, { "epoch": 8.95, "learning_rate": 0.0002, "loss": 2.8322, "step": 20855 }, { "epoch": 8.96, "learning_rate": 0.0002, "loss": 2.8369, "step": 20860 }, { "epoch": 8.96, "eval_loss": 2.847623825073242, "eval_runtime": 5.9068, "eval_samples_per_second": 1672.139, "eval_steps_per_second": 13.205, "eval_top3_3_weighted_f1_score ": 0.4303577125006042, "eval_top_1_macro_f1_score": 0.06564107838098014, "eval_top_1_weighted_f1score": 0.2122386192591822, "eval_top_3_macro_f1_score": 0.17716679614969025, "step": 20864 }, { "epoch": 8.96, "learning_rate": 0.0002, "loss": 2.8349, "step": 20865 }, { "epoch": 8.96, "learning_rate": 0.0002, "loss": 2.8263, "step": 20870 }, { "epoch": 8.96, "learning_rate": 0.0002, "loss": 2.835, "step": 20875 }, { "epoch": 8.97, "learning_rate": 0.0002, "loss": 2.8356, "step": 20880 }, { "epoch": 8.97, "learning_rate": 0.0002, "loss": 2.836, "step": 20885 }, { "epoch": 8.97, "learning_rate": 0.0002, "loss": 2.8329, "step": 20890 }, { "epoch": 8.97, "learning_rate": 0.0002, "loss": 2.842, "step": 20895 }, { "epoch": 8.97, "learning_rate": 0.0002, "loss": 2.8187, "step": 20900 }, { "epoch": 8.98, "learning_rate": 0.0002, "loss": 2.8351, "step": 20905 }, { "epoch": 8.98, "learning_rate": 0.0002, "loss": 2.8364, "step": 20910 }, { "epoch": 8.98, "learning_rate": 0.0002, "loss": 2.8261, "step": 20915 }, { "epoch": 8.98, "learning_rate": 0.0002, "loss": 2.8202, "step": 20920 }, { "epoch": 8.98, "learning_rate": 0.0002, "loss": 2.8255, "step": 20925 }, { "epoch": 8.99, "eval_loss": 2.8509836196899414, "eval_runtime": 5.4318, "eval_samples_per_second": 1818.367, "eval_steps_per_second": 14.36, "eval_top3_3_weighted_f1_score ": 0.432648431099353, "eval_top_1_macro_f1_score": 0.06447750563996495, "eval_top_1_weighted_f1score": 0.21153075665082782, "eval_top_3_macro_f1_score": 0.17601003529362458, "step": 20928 }, { "epoch": 8.99, "learning_rate": 0.0002, "loss": 2.8201, "step": 20930 }, { "epoch": 8.99, "learning_rate": 0.0002, "loss": 2.8235, "step": 20935 }, { "epoch": 8.99, "learning_rate": 0.0002, "loss": 2.8373, "step": 20940 }, { "epoch": 8.99, "learning_rate": 0.0002, "loss": 2.8281, "step": 20945 }, { "epoch": 9.0, "learning_rate": 0.0002, "loss": 2.8441, "step": 20950 }, { "epoch": 9.0, "learning_rate": 0.0002, "loss": 2.8454, "step": 20955 }, { "epoch": 9.0, "learning_rate": 0.0002, "loss": 2.8238, "step": 20960 }, { "epoch": 9.0, "learning_rate": 0.0002, "loss": 3.0734, "step": 20965 }, { "epoch": 9.0, "learning_rate": 0.0002, "loss": 2.8411, "step": 20970 }, { "epoch": 9.01, "learning_rate": 0.0002, "loss": 2.8194, "step": 20975 }, { "epoch": 9.01, "learning_rate": 0.0002, "loss": 2.8416, "step": 20980 }, { "epoch": 9.01, "learning_rate": 0.0002, "loss": 2.8319, "step": 20985 }, { "epoch": 9.01, "learning_rate": 0.0002, "loss": 2.8336, "step": 20990 }, { "epoch": 9.01, "eval_loss": 2.8559987545013428, "eval_runtime": 5.5014, "eval_samples_per_second": 1795.368, "eval_steps_per_second": 14.178, "eval_top3_3_weighted_f1_score ": 0.4289481852333947, "eval_top_1_macro_f1_score": 0.06149344404550756, "eval_top_1_weighted_f1score": 0.20998515238494447, "eval_top_3_macro_f1_score": 0.17319624072739703, "step": 20992 }, { "epoch": 9.01, "learning_rate": 0.0002, "loss": 2.8448, "step": 20995 }, { "epoch": 9.02, "learning_rate": 0.0002, "loss": 2.8188, "step": 21000 }, { "epoch": 9.02, "learning_rate": 0.0002, "loss": 2.8309, "step": 21005 }, { "epoch": 9.02, "learning_rate": 0.0002, "loss": 2.8224, "step": 21010 }, { "epoch": 9.02, "learning_rate": 0.0002, "loss": 2.8382, "step": 21015 }, { "epoch": 9.03, "learning_rate": 0.0002, "loss": 2.8143, "step": 21020 }, { "epoch": 9.03, "learning_rate": 0.0002, "loss": 2.8152, "step": 21025 }, { "epoch": 9.03, "learning_rate": 0.0002, "loss": 2.8295, "step": 21030 }, { "epoch": 9.03, "learning_rate": 0.0002, "loss": 2.83, "step": 21035 }, { "epoch": 9.03, "learning_rate": 0.0002, "loss": 2.8305, "step": 21040 }, { "epoch": 9.04, "learning_rate": 0.0002, "loss": 2.837, "step": 21045 }, { "epoch": 9.04, "learning_rate": 0.0002, "loss": 2.8118, "step": 21050 }, { "epoch": 9.04, "learning_rate": 0.0002, "loss": 2.8202, "step": 21055 }, { "epoch": 9.04, "eval_loss": 2.854757785797119, "eval_runtime": 10.7174, "eval_samples_per_second": 921.582, "eval_steps_per_second": 7.278, "eval_top3_3_weighted_f1_score ": 0.4290146721185138, "eval_top_1_macro_f1_score": 0.0650658667462059, "eval_top_1_weighted_f1score": 0.21017897554909437, "eval_top_3_macro_f1_score": 0.1747978277239454, "step": 21056 }, { "epoch": 9.04, "learning_rate": 0.0002, "loss": 2.8215, "step": 21060 }, { "epoch": 9.04, "learning_rate": 0.0002, "loss": 2.8141, "step": 21065 }, { "epoch": 9.05, "learning_rate": 0.0002, "loss": 2.852, "step": 21070 }, { "epoch": 9.05, "learning_rate": 0.0002, "loss": 2.835, "step": 21075 }, { "epoch": 9.05, "learning_rate": 0.0002, "loss": 2.8165, "step": 21080 }, { "epoch": 9.05, "learning_rate": 0.0002, "loss": 2.8322, "step": 21085 }, { "epoch": 9.06, "learning_rate": 0.0002, "loss": 2.8475, "step": 21090 }, { "epoch": 9.06, "learning_rate": 0.0002, "loss": 2.8225, "step": 21095 }, { "epoch": 9.06, "learning_rate": 0.0002, "loss": 2.8358, "step": 21100 }, { "epoch": 9.06, "learning_rate": 0.0002, "loss": 2.8176, "step": 21105 }, { "epoch": 9.06, "learning_rate": 0.0002, "loss": 2.8112, "step": 21110 }, { "epoch": 9.07, "learning_rate": 0.0002, "loss": 2.8296, "step": 21115 }, { "epoch": 9.07, "learning_rate": 0.0002, "loss": 2.8352, "step": 21120 }, { "epoch": 9.07, "eval_loss": 2.8495287895202637, "eval_runtime": 5.4299, "eval_samples_per_second": 1819.009, "eval_steps_per_second": 14.365, "eval_top3_3_weighted_f1_score ": 0.4308293286894479, "eval_top_1_macro_f1_score": 0.06681364780074087, "eval_top_1_weighted_f1score": 0.21217147307987427, "eval_top_3_macro_f1_score": 0.17689882794775566, "step": 21120 }, { "epoch": 9.07, "learning_rate": 0.0002, "loss": 2.8436, "step": 21125 }, { "epoch": 9.07, "learning_rate": 0.0002, "loss": 2.8252, "step": 21130 }, { "epoch": 9.07, "learning_rate": 0.0002, "loss": 2.8181, "step": 21135 }, { "epoch": 9.08, "learning_rate": 0.0002, "loss": 2.8324, "step": 21140 }, { "epoch": 9.08, "learning_rate": 0.0002, "loss": 2.8365, "step": 21145 }, { "epoch": 9.08, "learning_rate": 0.0002, "loss": 2.8105, "step": 21150 }, { "epoch": 9.08, "learning_rate": 0.0002, "loss": 2.8363, "step": 21155 }, { "epoch": 9.09, "learning_rate": 0.0002, "loss": 2.8243, "step": 21160 }, { "epoch": 9.09, "learning_rate": 0.0002, "loss": 2.8268, "step": 21165 }, { "epoch": 9.09, "learning_rate": 0.0002, "loss": 2.8319, "step": 21170 }, { "epoch": 9.09, "learning_rate": 0.0002, "loss": 2.8289, "step": 21175 }, { "epoch": 9.09, "learning_rate": 0.0002, "loss": 2.8203, "step": 21180 }, { "epoch": 9.1, "eval_loss": 2.8540923595428467, "eval_runtime": 6.1385, "eval_samples_per_second": 1609.032, "eval_steps_per_second": 12.707, "eval_top3_3_weighted_f1_score ": 0.43406388949070634, "eval_top_1_macro_f1_score": 0.06571266282943601, "eval_top_1_weighted_f1score": 0.2099046559634731, "eval_top_3_macro_f1_score": 0.18371348679155983, "step": 21184 }, { "epoch": 9.1, "learning_rate": 0.0002, "loss": 2.8121, "step": 21185 }, { "epoch": 9.1, "learning_rate": 0.0002, "loss": 2.834, "step": 21190 }, { "epoch": 9.1, "learning_rate": 0.0002, "loss": 2.8026, "step": 21195 }, { "epoch": 9.1, "learning_rate": 0.0002, "loss": 2.8086, "step": 21200 }, { "epoch": 9.1, "learning_rate": 0.0002, "loss": 2.825, "step": 21205 }, { "epoch": 9.11, "learning_rate": 0.0002, "loss": 2.8041, "step": 21210 }, { "epoch": 9.11, "learning_rate": 0.0002, "loss": 2.84, "step": 21215 }, { "epoch": 9.11, "learning_rate": 0.0002, "loss": 2.8324, "step": 21220 }, { "epoch": 9.11, "learning_rate": 0.0002, "loss": 2.8166, "step": 21225 }, { "epoch": 9.12, "learning_rate": 0.0002, "loss": 2.8193, "step": 21230 }, { "epoch": 9.12, "learning_rate": 0.0002, "loss": 2.8049, "step": 21235 }, { "epoch": 9.12, "learning_rate": 0.0002, "loss": 2.8227, "step": 21240 }, { "epoch": 9.12, "learning_rate": 0.0002, "loss": 2.8306, "step": 21245 }, { "epoch": 9.12, "eval_loss": 2.8501055240631104, "eval_runtime": 5.5695, "eval_samples_per_second": 1773.395, "eval_steps_per_second": 14.005, "eval_top3_3_weighted_f1_score ": 0.43584761181094744, "eval_top_1_macro_f1_score": 0.06445290422447014, "eval_top_1_weighted_f1score": 0.2088455615308982, "eval_top_3_macro_f1_score": 0.1864230245826315, "step": 21248 }, { "epoch": 9.12, "learning_rate": 0.0002, "loss": 2.8164, "step": 21250 }, { "epoch": 9.13, "learning_rate": 0.0002, "loss": 2.819, "step": 21255 }, { "epoch": 9.13, "learning_rate": 0.0002, "loss": 2.8414, "step": 21260 }, { "epoch": 9.13, "learning_rate": 0.0002, "loss": 2.8397, "step": 21265 }, { "epoch": 9.13, "learning_rate": 0.0002, "loss": 2.827, "step": 21270 }, { "epoch": 9.13, "learning_rate": 0.0002, "loss": 2.8136, "step": 21275 }, { "epoch": 9.14, "learning_rate": 0.0002, "loss": 2.8306, "step": 21280 }, { "epoch": 9.14, "learning_rate": 0.0002, "loss": 2.8243, "step": 21285 }, { "epoch": 9.14, "learning_rate": 0.0002, "loss": 2.8259, "step": 21290 }, { "epoch": 9.14, "learning_rate": 0.0002, "loss": 2.8153, "step": 21295 }, { "epoch": 9.15, "learning_rate": 0.0002, "loss": 2.8321, "step": 21300 }, { "epoch": 9.15, "learning_rate": 0.0002, "loss": 2.8507, "step": 21305 }, { "epoch": 9.15, "learning_rate": 0.0002, "loss": 2.8251, "step": 21310 }, { "epoch": 9.15, "eval_loss": 2.851583242416382, "eval_runtime": 5.5855, "eval_samples_per_second": 1768.321, "eval_steps_per_second": 13.965, "eval_top3_3_weighted_f1_score ": 0.43293469928532674, "eval_top_1_macro_f1_score": 0.06847001353805812, "eval_top_1_weighted_f1score": 0.21222443480614808, "eval_top_3_macro_f1_score": 0.18320235700441048, "step": 21312 }, { "epoch": 9.15, "learning_rate": 0.0002, "loss": 2.8166, "step": 21315 }, { "epoch": 9.15, "learning_rate": 0.0002, "loss": 2.8306, "step": 21320 }, { "epoch": 9.16, "learning_rate": 0.0002, "loss": 2.8238, "step": 21325 }, { "epoch": 9.16, "learning_rate": 0.0002, "loss": 2.8382, "step": 21330 }, { "epoch": 9.16, "learning_rate": 0.0002, "loss": 2.8357, "step": 21335 }, { "epoch": 9.16, "learning_rate": 0.0002, "loss": 2.8219, "step": 21340 }, { "epoch": 9.16, "learning_rate": 0.0002, "loss": 2.8116, "step": 21345 }, { "epoch": 9.17, "learning_rate": 0.0002, "loss": 2.8126, "step": 21350 }, { "epoch": 9.17, "learning_rate": 0.0002, "loss": 2.8103, "step": 21355 }, { "epoch": 9.17, "learning_rate": 0.0002, "loss": 2.8098, "step": 21360 }, { "epoch": 9.17, "learning_rate": 0.0002, "loss": 2.8175, "step": 21365 }, { "epoch": 9.18, "learning_rate": 0.0002, "loss": 2.8548, "step": 21370 }, { "epoch": 9.18, "learning_rate": 0.0002, "loss": 2.8343, "step": 21375 }, { "epoch": 9.18, "eval_loss": 2.8495962619781494, "eval_runtime": 25.4192, "eval_samples_per_second": 388.565, "eval_steps_per_second": 3.069, "eval_top3_3_weighted_f1_score ": 0.4305444508763087, "eval_top_1_macro_f1_score": 0.061936007639464566, "eval_top_1_weighted_f1score": 0.20609522102782227, "eval_top_3_macro_f1_score": 0.17489965966499793, "step": 21376 }, { "epoch": 9.18, "learning_rate": 0.0002, "loss": 2.8336, "step": 21380 }, { "epoch": 9.18, "learning_rate": 0.0002, "loss": 2.821, "step": 21385 }, { "epoch": 9.18, "learning_rate": 0.0002, "loss": 2.8326, "step": 21390 }, { "epoch": 9.19, "learning_rate": 0.0002, "loss": 2.8436, "step": 21395 }, { "epoch": 9.19, "learning_rate": 0.0002, "loss": 2.8187, "step": 21400 }, { "epoch": 9.19, "learning_rate": 0.0002, "loss": 2.8061, "step": 21405 }, { "epoch": 9.19, "learning_rate": 0.0002, "loss": 2.8322, "step": 21410 }, { "epoch": 9.19, "learning_rate": 0.0002, "loss": 2.8005, "step": 21415 }, { "epoch": 9.2, "learning_rate": 0.0002, "loss": 2.825, "step": 21420 }, { "epoch": 9.2, "learning_rate": 0.0002, "loss": 2.8152, "step": 21425 }, { "epoch": 9.2, "learning_rate": 0.0002, "loss": 2.8234, "step": 21430 }, { "epoch": 9.2, "learning_rate": 0.0002, "loss": 2.8286, "step": 21435 }, { "epoch": 9.21, "learning_rate": 0.0002, "loss": 2.8279, "step": 21440 }, { "epoch": 9.21, "eval_loss": 2.85322642326355, "eval_runtime": 5.5819, "eval_samples_per_second": 1769.456, "eval_steps_per_second": 13.974, "eval_top3_3_weighted_f1_score ": 0.43037269133521056, "eval_top_1_macro_f1_score": 0.06291767301115053, "eval_top_1_weighted_f1score": 0.2086298451004576, "eval_top_3_macro_f1_score": 0.17855019495783017, "step": 21440 }, { "epoch": 9.21, "learning_rate": 0.0002, "loss": 2.8342, "step": 21445 }, { "epoch": 9.21, "learning_rate": 0.0002, "loss": 2.8205, "step": 21450 }, { "epoch": 9.21, "learning_rate": 0.0002, "loss": 2.8328, "step": 21455 }, { "epoch": 9.21, "learning_rate": 0.0002, "loss": 2.8341, "step": 21460 }, { "epoch": 9.22, "learning_rate": 0.0002, "loss": 2.8249, "step": 21465 }, { "epoch": 9.22, "learning_rate": 0.0002, "loss": 2.8306, "step": 21470 }, { "epoch": 9.22, "learning_rate": 0.0002, "loss": 2.8461, "step": 21475 }, { "epoch": 9.22, "learning_rate": 0.0002, "loss": 2.8158, "step": 21480 }, { "epoch": 9.22, "learning_rate": 0.0002, "loss": 2.8276, "step": 21485 }, { "epoch": 9.23, "learning_rate": 0.0002, "loss": 2.8324, "step": 21490 }, { "epoch": 9.23, "learning_rate": 0.0002, "loss": 2.8111, "step": 21495 }, { "epoch": 9.23, "learning_rate": 0.0002, "loss": 2.8185, "step": 21500 }, { "epoch": 9.23, "eval_loss": 2.8582258224487305, "eval_runtime": 5.912, "eval_samples_per_second": 1670.667, "eval_steps_per_second": 13.193, "eval_top3_3_weighted_f1_score ": 0.4274626782562602, "eval_top_1_macro_f1_score": 0.06607128955554938, "eval_top_1_weighted_f1score": 0.21184014436558643, "eval_top_3_macro_f1_score": 0.18036024703073375, "step": 21504 }, { "epoch": 9.23, "learning_rate": 0.0002, "loss": 2.8181, "step": 21505 }, { "epoch": 9.24, "learning_rate": 0.0002, "loss": 2.8322, "step": 21510 }, { "epoch": 9.24, "learning_rate": 0.0002, "loss": 2.8098, "step": 21515 }, { "epoch": 9.24, "learning_rate": 0.0002, "loss": 2.8165, "step": 21520 }, { "epoch": 9.24, "learning_rate": 0.0002, "loss": 2.8232, "step": 21525 }, { "epoch": 9.24, "learning_rate": 0.0002, "loss": 2.8163, "step": 21530 }, { "epoch": 9.25, "learning_rate": 0.0002, "loss": 2.8181, "step": 21535 }, { "epoch": 9.25, "learning_rate": 0.0002, "loss": 2.8469, "step": 21540 }, { "epoch": 9.25, "learning_rate": 0.0002, "loss": 2.8473, "step": 21545 }, { "epoch": 9.25, "learning_rate": 0.0002, "loss": 2.8175, "step": 21550 }, { "epoch": 9.25, "learning_rate": 0.0002, "loss": 2.827, "step": 21555 }, { "epoch": 9.26, "learning_rate": 0.0002, "loss": 2.814, "step": 21560 }, { "epoch": 9.26, "learning_rate": 0.0002, "loss": 2.8258, "step": 21565 }, { "epoch": 9.26, "eval_loss": 2.8505871295928955, "eval_runtime": 5.4807, "eval_samples_per_second": 1802.15, "eval_steps_per_second": 14.232, "eval_top3_3_weighted_f1_score ": 0.43250106565599944, "eval_top_1_macro_f1_score": 0.06029549034961428, "eval_top_1_weighted_f1score": 0.2089473526350904, "eval_top_3_macro_f1_score": 0.17339420729037616, "step": 21568 }, { "epoch": 9.26, "learning_rate": 0.0002, "loss": 2.8296, "step": 21570 }, { "epoch": 9.26, "learning_rate": 0.0002, "loss": 2.8234, "step": 21575 }, { "epoch": 9.27, "learning_rate": 0.0002, "loss": 2.8388, "step": 21580 }, { "epoch": 9.27, "learning_rate": 0.0002, "loss": 2.8288, "step": 21585 }, { "epoch": 9.27, "learning_rate": 0.0002, "loss": 2.8039, "step": 21590 }, { "epoch": 9.27, "learning_rate": 0.0002, "loss": 2.8232, "step": 21595 }, { "epoch": 9.27, "learning_rate": 0.0002, "loss": 2.8336, "step": 21600 }, { "epoch": 9.28, "learning_rate": 0.0002, "loss": 2.8202, "step": 21605 }, { "epoch": 9.28, "learning_rate": 0.0002, "loss": 2.8094, "step": 21610 }, { "epoch": 9.28, "learning_rate": 0.0002, "loss": 2.8303, "step": 21615 }, { "epoch": 9.28, "learning_rate": 0.0002, "loss": 2.8379, "step": 21620 }, { "epoch": 9.29, "learning_rate": 0.0002, "loss": 2.8256, "step": 21625 }, { "epoch": 9.29, "learning_rate": 0.0002, "loss": 2.8461, "step": 21630 }, { "epoch": 9.29, "eval_loss": 2.851837396621704, "eval_runtime": 5.5414, "eval_samples_per_second": 1782.391, "eval_steps_per_second": 14.076, "eval_top3_3_weighted_f1_score ": 0.43157979362043053, "eval_top_1_macro_f1_score": 0.0714168071148363, "eval_top_1_weighted_f1score": 0.21458997690049192, "eval_top_3_macro_f1_score": 0.17546914981613587, "step": 21632 }, { "epoch": 9.29, "learning_rate": 0.0002, "loss": 2.847, "step": 21635 }, { "epoch": 9.29, "learning_rate": 0.0002, "loss": 2.8119, "step": 21640 }, { "epoch": 9.29, "learning_rate": 0.0002, "loss": 2.8436, "step": 21645 }, { "epoch": 9.3, "learning_rate": 0.0002, "loss": 2.8369, "step": 21650 }, { "epoch": 9.3, "learning_rate": 0.0002, "loss": 2.8357, "step": 21655 }, { "epoch": 9.3, "learning_rate": 0.0002, "loss": 2.845, "step": 21660 }, { "epoch": 9.3, "learning_rate": 0.0002, "loss": 2.8565, "step": 21665 }, { "epoch": 9.3, "learning_rate": 0.0002, "loss": 2.8261, "step": 21670 }, { "epoch": 9.31, "learning_rate": 0.0002, "loss": 2.8452, "step": 21675 }, { "epoch": 9.31, "learning_rate": 0.0002, "loss": 2.83, "step": 21680 }, { "epoch": 9.31, "learning_rate": 0.0002, "loss": 2.8472, "step": 21685 }, { "epoch": 9.31, "learning_rate": 0.0002, "loss": 2.8485, "step": 21690 }, { "epoch": 9.32, "learning_rate": 0.0002, "loss": 2.8218, "step": 21695 }, { "epoch": 9.32, "eval_loss": 2.847691535949707, "eval_runtime": 5.5922, "eval_samples_per_second": 1766.197, "eval_steps_per_second": 13.948, "eval_top3_3_weighted_f1_score ": 0.43518061084283816, "eval_top_1_macro_f1_score": 0.06834749791042832, "eval_top_1_weighted_f1score": 0.21213859890679648, "eval_top_3_macro_f1_score": 0.17734608601908802, "step": 21696 }, { "epoch": 9.32, "learning_rate": 0.0002, "loss": 2.8276, "step": 21700 }, { "epoch": 9.32, "learning_rate": 0.0002, "loss": 2.8248, "step": 21705 }, { "epoch": 9.32, "learning_rate": 0.0002, "loss": 2.8303, "step": 21710 }, { "epoch": 9.32, "learning_rate": 0.0002, "loss": 2.8197, "step": 21715 }, { "epoch": 9.33, "learning_rate": 0.0002, "loss": 2.7987, "step": 21720 }, { "epoch": 9.33, "learning_rate": 0.0002, "loss": 2.8109, "step": 21725 }, { "epoch": 9.33, "learning_rate": 0.0002, "loss": 2.8255, "step": 21730 }, { "epoch": 9.33, "learning_rate": 0.0002, "loss": 2.8302, "step": 21735 }, { "epoch": 9.33, "learning_rate": 0.0002, "loss": 2.813, "step": 21740 }, { "epoch": 9.34, "learning_rate": 0.0002, "loss": 2.8251, "step": 21745 }, { "epoch": 9.34, "learning_rate": 0.0002, "loss": 2.8225, "step": 21750 }, { "epoch": 9.34, "learning_rate": 0.0002, "loss": 2.8303, "step": 21755 }, { "epoch": 9.34, "learning_rate": 0.0002, "loss": 2.8168, "step": 21760 }, { "epoch": 9.34, "eval_loss": 2.8492608070373535, "eval_runtime": 5.5571, "eval_samples_per_second": 1777.38, "eval_steps_per_second": 14.036, "eval_top3_3_weighted_f1_score ": 0.43120071411956107, "eval_top_1_macro_f1_score": 0.06795811905068046, "eval_top_1_weighted_f1score": 0.20868870727325284, "eval_top_3_macro_f1_score": 0.17405456349897896, "step": 21760 }, { "epoch": 9.35, "learning_rate": 0.0002, "loss": 2.828, "step": 21765 }, { "epoch": 9.35, "learning_rate": 0.0002, "loss": 2.8502, "step": 21770 }, { "epoch": 9.35, "learning_rate": 0.0002, "loss": 2.8378, "step": 21775 }, { "epoch": 9.35, "learning_rate": 0.0002, "loss": 2.8266, "step": 21780 }, { "epoch": 9.35, "learning_rate": 0.0002, "loss": 2.8167, "step": 21785 }, { "epoch": 9.36, "learning_rate": 0.0002, "loss": 2.8502, "step": 21790 }, { "epoch": 9.36, "learning_rate": 0.0002, "loss": 2.8251, "step": 21795 }, { "epoch": 9.36, "learning_rate": 0.0002, "loss": 2.8313, "step": 21800 }, { "epoch": 9.36, "learning_rate": 0.0002, "loss": 2.8291, "step": 21805 }, { "epoch": 9.36, "learning_rate": 0.0002, "loss": 2.829, "step": 21810 }, { "epoch": 9.37, "learning_rate": 0.0002, "loss": 2.8318, "step": 21815 }, { "epoch": 9.37, "learning_rate": 0.0002, "loss": 2.808, "step": 21820 }, { "epoch": 9.37, "eval_loss": 2.8524250984191895, "eval_runtime": 7.1815, "eval_samples_per_second": 1375.334, "eval_steps_per_second": 10.861, "eval_top3_3_weighted_f1_score ": 0.430441569957755, "eval_top_1_macro_f1_score": 0.07308010153184245, "eval_top_1_weighted_f1score": 0.21382525358508753, "eval_top_3_macro_f1_score": 0.17174174769716297, "step": 21824 }, { "epoch": 9.37, "learning_rate": 0.0002, "loss": 2.8469, "step": 21825 }, { "epoch": 9.37, "learning_rate": 0.0002, "loss": 2.8196, "step": 21830 }, { "epoch": 9.38, "learning_rate": 0.0002, "loss": 2.831, "step": 21835 }, { "epoch": 9.38, "learning_rate": 0.0002, "loss": 2.8362, "step": 21840 }, { "epoch": 9.38, "learning_rate": 0.0002, "loss": 2.8416, "step": 21845 }, { "epoch": 9.38, "learning_rate": 0.0002, "loss": 2.8243, "step": 21850 }, { "epoch": 9.38, "learning_rate": 0.0002, "loss": 2.8218, "step": 21855 }, { "epoch": 9.39, "learning_rate": 0.0002, "loss": 2.8279, "step": 21860 }, { "epoch": 9.39, "learning_rate": 0.0002, "loss": 2.8244, "step": 21865 }, { "epoch": 9.39, "learning_rate": 0.0002, "loss": 2.8265, "step": 21870 }, { "epoch": 9.39, "learning_rate": 0.0002, "loss": 2.827, "step": 21875 }, { "epoch": 9.39, "learning_rate": 0.0002, "loss": 2.8126, "step": 21880 }, { "epoch": 9.4, "learning_rate": 0.0002, "loss": 2.8269, "step": 21885 }, { "epoch": 9.4, "eval_loss": 2.852505683898926, "eval_runtime": 5.4688, "eval_samples_per_second": 1806.055, "eval_steps_per_second": 14.263, "eval_top3_3_weighted_f1_score ": 0.4300055491500529, "eval_top_1_macro_f1_score": 0.07128116741764888, "eval_top_1_weighted_f1score": 0.21187664243099974, "eval_top_3_macro_f1_score": 0.1793112644216981, "step": 21888 }, { "epoch": 9.4, "learning_rate": 0.0002, "loss": 2.8452, "step": 21890 }, { "epoch": 9.4, "learning_rate": 0.0002, "loss": 2.8257, "step": 21895 }, { "epoch": 9.4, "learning_rate": 0.0002, "loss": 2.8259, "step": 21900 }, { "epoch": 9.41, "learning_rate": 0.0002, "loss": 2.82, "step": 21905 }, { "epoch": 9.41, "learning_rate": 0.0002, "loss": 2.835, "step": 21910 }, { "epoch": 9.41, "learning_rate": 0.0002, "loss": 2.8246, "step": 21915 }, { "epoch": 9.41, "learning_rate": 0.0002, "loss": 2.8155, "step": 21920 }, { "epoch": 9.41, "learning_rate": 0.0002, "loss": 2.8231, "step": 21925 }, { "epoch": 9.42, "learning_rate": 0.0002, "loss": 2.8415, "step": 21930 }, { "epoch": 9.42, "learning_rate": 0.0002, "loss": 2.8179, "step": 21935 }, { "epoch": 9.42, "learning_rate": 0.0002, "loss": 2.8314, "step": 21940 }, { "epoch": 9.42, "learning_rate": 0.0002, "loss": 2.8223, "step": 21945 }, { "epoch": 9.42, "learning_rate": 0.0002, "loss": 2.8283, "step": 21950 }, { "epoch": 9.43, "eval_loss": 2.856452465057373, "eval_runtime": 5.957, "eval_samples_per_second": 1658.052, "eval_steps_per_second": 13.094, "eval_top3_3_weighted_f1_score ": 0.4333480847886804, "eval_top_1_macro_f1_score": 0.07014983787727506, "eval_top_1_weighted_f1score": 0.2144048069881346, "eval_top_3_macro_f1_score": 0.18613721082481452, "step": 21952 }, { "epoch": 9.43, "learning_rate": 0.0002, "loss": 2.8348, "step": 21955 }, { "epoch": 9.43, "learning_rate": 0.0002, "loss": 2.8178, "step": 21960 }, { "epoch": 9.43, "learning_rate": 0.0002, "loss": 2.8175, "step": 21965 }, { "epoch": 9.43, "learning_rate": 0.0002, "loss": 2.8415, "step": 21970 }, { "epoch": 9.44, "learning_rate": 0.0002, "loss": 2.8158, "step": 21975 }, { "epoch": 9.44, "learning_rate": 0.0002, "loss": 2.8273, "step": 21980 }, { "epoch": 9.44, "learning_rate": 0.0002, "loss": 2.8177, "step": 21985 }, { "epoch": 9.44, "learning_rate": 0.0002, "loss": 2.8303, "step": 21990 }, { "epoch": 9.44, "learning_rate": 0.0002, "loss": 2.8278, "step": 21995 }, { "epoch": 9.45, "learning_rate": 0.0002, "loss": 2.8423, "step": 22000 }, { "epoch": 9.45, "learning_rate": 0.0002, "loss": 2.8302, "step": 22005 }, { "epoch": 9.45, "learning_rate": 0.0002, "loss": 2.8423, "step": 22010 }, { "epoch": 9.45, "learning_rate": 0.0002, "loss": 2.8265, "step": 22015 }, { "epoch": 9.45, "eval_loss": 2.8534743785858154, "eval_runtime": 162.204, "eval_samples_per_second": 60.892, "eval_steps_per_second": 0.481, "eval_top3_3_weighted_f1_score ": 0.43166440461259736, "eval_top_1_macro_f1_score": 0.06529782059485596, "eval_top_1_weighted_f1score": 0.20582071646299285, "eval_top_3_macro_f1_score": 0.17866792501545486, "step": 22016 }, { "epoch": 9.45, "learning_rate": 0.0002, "loss": 2.8452, "step": 22020 }, { "epoch": 9.46, "learning_rate": 0.0002, "loss": 2.83, "step": 22025 }, { "epoch": 9.46, "learning_rate": 0.0002, "loss": 2.8341, "step": 22030 }, { "epoch": 9.46, "learning_rate": 0.0002, "loss": 2.823, "step": 22035 }, { "epoch": 9.46, "learning_rate": 0.0002, "loss": 2.8163, "step": 22040 }, { "epoch": 9.47, "learning_rate": 0.0002, "loss": 2.8293, "step": 22045 }, { "epoch": 9.47, "learning_rate": 0.0002, "loss": 2.8245, "step": 22050 }, { "epoch": 9.47, "learning_rate": 0.0002, "loss": 2.8175, "step": 22055 }, { "epoch": 9.47, "learning_rate": 0.0002, "loss": 2.8455, "step": 22060 }, { "epoch": 9.47, "learning_rate": 0.0002, "loss": 2.8299, "step": 22065 }, { "epoch": 9.48, "learning_rate": 0.0002, "loss": 2.8204, "step": 22070 }, { "epoch": 9.48, "learning_rate": 0.0002, "loss": 2.8138, "step": 22075 }, { "epoch": 9.48, "learning_rate": 0.0002, "loss": 2.8309, "step": 22080 }, { "epoch": 9.48, "eval_loss": 2.85726261138916, "eval_runtime": 5.3407, "eval_samples_per_second": 1849.38, "eval_steps_per_second": 14.605, "eval_top3_3_weighted_f1_score ": 0.4286655454078363, "eval_top_1_macro_f1_score": 0.06709038385470456, "eval_top_1_weighted_f1score": 0.21081449479089667, "eval_top_3_macro_f1_score": 0.1730030726530344, "step": 22080 }, { "epoch": 9.48, "learning_rate": 0.0002, "loss": 2.828, "step": 22085 }, { "epoch": 9.48, "learning_rate": 0.0002, "loss": 2.8352, "step": 22090 }, { "epoch": 9.49, "learning_rate": 0.0002, "loss": 2.8116, "step": 22095 }, { "epoch": 9.49, "learning_rate": 0.0002, "loss": 2.849, "step": 22100 }, { "epoch": 9.49, "learning_rate": 0.0002, "loss": 2.8063, "step": 22105 }, { "epoch": 9.49, "learning_rate": 0.0002, "loss": 2.8206, "step": 22110 }, { "epoch": 9.5, "learning_rate": 0.0002, "loss": 2.8206, "step": 22115 }, { "epoch": 9.5, "learning_rate": 0.0002, "loss": 2.8322, "step": 22120 }, { "epoch": 9.5, "learning_rate": 0.0002, "loss": 2.8226, "step": 22125 }, { "epoch": 9.5, "learning_rate": 0.0002, "loss": 2.8469, "step": 22130 }, { "epoch": 9.5, "learning_rate": 0.0002, "loss": 2.8194, "step": 22135 }, { "epoch": 9.51, "learning_rate": 0.0002, "loss": 2.8375, "step": 22140 }, { "epoch": 9.51, "eval_loss": 2.8557488918304443, "eval_runtime": 5.4862, "eval_samples_per_second": 1800.328, "eval_steps_per_second": 14.217, "eval_top3_3_weighted_f1_score ": 0.4268182211630273, "eval_top_1_macro_f1_score": 0.06563339841606039, "eval_top_1_weighted_f1score": 0.2131272297426529, "eval_top_3_macro_f1_score": 0.17459416251668647, "step": 22144 }, { "epoch": 9.51, "learning_rate": 0.0002, "loss": 2.8423, "step": 22145 }, { "epoch": 9.51, "learning_rate": 0.0002, "loss": 2.8401, "step": 22150 }, { "epoch": 9.51, "learning_rate": 0.0002, "loss": 2.8376, "step": 22155 }, { "epoch": 9.51, "learning_rate": 0.0002, "loss": 2.8359, "step": 22160 }, { "epoch": 9.52, "learning_rate": 0.0002, "loss": 2.8281, "step": 22165 }, { "epoch": 9.52, "learning_rate": 0.0002, "loss": 2.8337, "step": 22170 }, { "epoch": 9.52, "learning_rate": 0.0002, "loss": 2.8145, "step": 22175 }, { "epoch": 9.52, "learning_rate": 0.0002, "loss": 2.833, "step": 22180 }, { "epoch": 9.53, "learning_rate": 0.0002, "loss": 2.8276, "step": 22185 }, { "epoch": 9.53, "learning_rate": 0.0002, "loss": 2.8406, "step": 22190 }, { "epoch": 9.53, "learning_rate": 0.0002, "loss": 2.8246, "step": 22195 }, { "epoch": 9.53, "learning_rate": 0.0002, "loss": 2.8288, "step": 22200 }, { "epoch": 9.53, "learning_rate": 0.0002, "loss": 2.8213, "step": 22205 }, { "epoch": 9.54, "eval_loss": 2.8494086265563965, "eval_runtime": 5.2945, "eval_samples_per_second": 1865.532, "eval_steps_per_second": 14.732, "eval_top3_3_weighted_f1_score ": 0.4315943580943734, "eval_top_1_macro_f1_score": 0.07372850905722614, "eval_top_1_weighted_f1score": 0.21281821345479093, "eval_top_3_macro_f1_score": 0.18265632089833317, "step": 22208 }, { "epoch": 9.54, "learning_rate": 0.0002, "loss": 2.8358, "step": 22210 }, { "epoch": 9.54, "learning_rate": 0.0002, "loss": 2.8284, "step": 22215 }, { "epoch": 9.54, "learning_rate": 0.0002, "loss": 2.8315, "step": 22220 }, { "epoch": 9.54, "learning_rate": 0.0002, "loss": 2.8489, "step": 22225 }, { "epoch": 9.54, "learning_rate": 0.0002, "loss": 2.8261, "step": 22230 }, { "epoch": 9.55, "learning_rate": 0.0002, "loss": 2.8295, "step": 22235 }, { "epoch": 9.55, "learning_rate": 0.0002, "loss": 2.8134, "step": 22240 }, { "epoch": 9.55, "learning_rate": 0.0002, "loss": 2.8074, "step": 22245 }, { "epoch": 9.55, "learning_rate": 0.0002, "loss": 2.8203, "step": 22250 }, { "epoch": 9.56, "learning_rate": 0.0002, "loss": 2.8459, "step": 22255 }, { "epoch": 9.56, "learning_rate": 0.0002, "loss": 2.8342, "step": 22260 }, { "epoch": 9.56, "learning_rate": 0.0002, "loss": 2.8209, "step": 22265 }, { "epoch": 9.56, "learning_rate": 0.0002, "loss": 2.8238, "step": 22270 }, { "epoch": 9.56, "eval_loss": 2.8474643230438232, "eval_runtime": 5.4026, "eval_samples_per_second": 1828.2, "eval_steps_per_second": 14.438, "eval_top3_3_weighted_f1_score ": 0.43126150796226115, "eval_top_1_macro_f1_score": 0.07045744025919749, "eval_top_1_weighted_f1score": 0.21364646849437402, "eval_top_3_macro_f1_score": 0.1791263113839099, "step": 22272 }, { "epoch": 9.56, "learning_rate": 0.0002, "loss": 2.8111, "step": 22275 }, { "epoch": 9.57, "learning_rate": 0.0002, "loss": 2.8235, "step": 22280 }, { "epoch": 9.57, "learning_rate": 0.0002, "loss": 2.8053, "step": 22285 }, { "epoch": 9.57, "learning_rate": 0.0002, "loss": 2.8362, "step": 22290 }, { "epoch": 9.57, "learning_rate": 0.0002, "loss": 2.8404, "step": 22295 }, { "epoch": 9.57, "learning_rate": 0.0002, "loss": 2.8111, "step": 22300 }, { "epoch": 9.58, "learning_rate": 0.0002, "loss": 2.8196, "step": 22305 }, { "epoch": 9.58, "learning_rate": 0.0002, "loss": 2.8273, "step": 22310 }, { "epoch": 9.58, "learning_rate": 0.0002, "loss": 2.8171, "step": 22315 }, { "epoch": 9.58, "learning_rate": 0.0002, "loss": 2.8298, "step": 22320 }, { "epoch": 9.59, "learning_rate": 0.0002, "loss": 2.8231, "step": 22325 }, { "epoch": 9.59, "learning_rate": 0.0002, "loss": 2.8299, "step": 22330 }, { "epoch": 9.59, "learning_rate": 0.0002, "loss": 2.8276, "step": 22335 }, { "epoch": 9.59, "eval_loss": 2.847917318344116, "eval_runtime": 5.3857, "eval_samples_per_second": 1833.93, "eval_steps_per_second": 14.483, "eval_top3_3_weighted_f1_score ": 0.436237582005185, "eval_top_1_macro_f1_score": 0.06547436603574881, "eval_top_1_weighted_f1score": 0.2072242509479481, "eval_top_3_macro_f1_score": 0.17854312296656763, "step": 22336 }, { "epoch": 9.59, "learning_rate": 0.0002, "loss": 2.8275, "step": 22340 }, { "epoch": 9.59, "learning_rate": 0.0002, "loss": 2.8365, "step": 22345 }, { "epoch": 9.6, "learning_rate": 0.0002, "loss": 2.838, "step": 22350 }, { "epoch": 9.6, "learning_rate": 0.0002, "loss": 2.8305, "step": 22355 }, { "epoch": 9.6, "learning_rate": 0.0002, "loss": 2.828, "step": 22360 }, { "epoch": 9.6, "learning_rate": 0.0002, "loss": 2.8228, "step": 22365 }, { "epoch": 9.6, "learning_rate": 0.0002, "loss": 2.8146, "step": 22370 }, { "epoch": 9.61, "learning_rate": 0.0002, "loss": 2.8268, "step": 22375 }, { "epoch": 9.61, "learning_rate": 0.0002, "loss": 2.8265, "step": 22380 }, { "epoch": 9.61, "learning_rate": 0.0002, "loss": 2.827, "step": 22385 }, { "epoch": 9.61, "learning_rate": 0.0002, "loss": 2.8183, "step": 22390 }, { "epoch": 9.62, "learning_rate": 0.0002, "loss": 2.8385, "step": 22395 }, { "epoch": 9.62, "learning_rate": 0.0002, "loss": 2.8141, "step": 22400 }, { "epoch": 9.62, "eval_loss": 2.852114677429199, "eval_runtime": 5.6974, "eval_samples_per_second": 1733.609, "eval_steps_per_second": 13.691, "eval_top3_3_weighted_f1_score ": 0.4325883484046382, "eval_top_1_macro_f1_score": 0.07310835748462423, "eval_top_1_weighted_f1score": 0.20887421005502937, "eval_top_3_macro_f1_score": 0.18169750082095731, "step": 22400 }, { "epoch": 9.62, "learning_rate": 0.0002, "loss": 2.8212, "step": 22405 }, { "epoch": 9.62, "learning_rate": 0.0002, "loss": 2.8337, "step": 22410 }, { "epoch": 9.62, "learning_rate": 0.0002, "loss": 2.8198, "step": 22415 }, { "epoch": 9.63, "learning_rate": 0.0002, "loss": 2.82, "step": 22420 }, { "epoch": 9.63, "learning_rate": 0.0002, "loss": 2.834, "step": 22425 }, { "epoch": 9.63, "learning_rate": 0.0002, "loss": 2.8306, "step": 22430 }, { "epoch": 9.63, "learning_rate": 0.0002, "loss": 2.8419, "step": 22435 }, { "epoch": 9.63, "learning_rate": 0.0002, "loss": 2.8332, "step": 22440 }, { "epoch": 9.64, "learning_rate": 0.0002, "loss": 2.8348, "step": 22445 }, { "epoch": 9.64, "learning_rate": 0.0002, "loss": 2.8367, "step": 22450 }, { "epoch": 9.64, "learning_rate": 0.0002, "loss": 2.8361, "step": 22455 }, { "epoch": 9.64, "learning_rate": 0.0002, "loss": 2.8091, "step": 22460 }, { "epoch": 9.65, "eval_loss": 2.853534460067749, "eval_runtime": 5.3887, "eval_samples_per_second": 1832.895, "eval_steps_per_second": 14.475, "eval_top3_3_weighted_f1_score ": 0.4305856628206878, "eval_top_1_macro_f1_score": 0.06318847528797358, "eval_top_1_weighted_f1score": 0.20984362953446536, "eval_top_3_macro_f1_score": 0.17533045615218756, "step": 22464 }, { "epoch": 9.65, "learning_rate": 0.0002, "loss": 2.8239, "step": 22465 }, { "epoch": 9.65, "learning_rate": 0.0002, "loss": 2.8335, "step": 22470 }, { "epoch": 9.65, "learning_rate": 0.0002, "loss": 2.8246, "step": 22475 }, { "epoch": 9.65, "learning_rate": 0.0002, "loss": 2.8269, "step": 22480 }, { "epoch": 9.65, "learning_rate": 0.0002, "loss": 2.8542, "step": 22485 }, { "epoch": 9.66, "learning_rate": 0.0002, "loss": 2.821, "step": 22490 }, { "epoch": 9.66, "learning_rate": 0.0002, "loss": 2.8174, "step": 22495 }, { "epoch": 9.66, "learning_rate": 0.0002, "loss": 2.8198, "step": 22500 }, { "epoch": 9.66, "learning_rate": 0.0002, "loss": 2.8363, "step": 22505 }, { "epoch": 9.66, "learning_rate": 0.0002, "loss": 2.8401, "step": 22510 }, { "epoch": 9.67, "learning_rate": 0.0002, "loss": 2.8382, "step": 22515 }, { "epoch": 9.67, "learning_rate": 0.0002, "loss": 2.8427, "step": 22520 }, { "epoch": 9.67, "learning_rate": 0.0002, "loss": 2.8378, "step": 22525 }, { "epoch": 9.67, "eval_loss": 2.851630449295044, "eval_runtime": 5.437, "eval_samples_per_second": 1816.626, "eval_steps_per_second": 14.346, "eval_top3_3_weighted_f1_score ": 0.43217284214512847, "eval_top_1_macro_f1_score": 0.0669065728068694, "eval_top_1_weighted_f1score": 0.20909471808589813, "eval_top_3_macro_f1_score": 0.18281573863659614, "step": 22528 }, { "epoch": 9.67, "learning_rate": 0.0002, "loss": 2.8307, "step": 22530 }, { "epoch": 9.68, "learning_rate": 0.0002, "loss": 2.826, "step": 22535 }, { "epoch": 9.68, "learning_rate": 0.0002, "loss": 2.8305, "step": 22540 }, { "epoch": 9.68, "learning_rate": 0.0002, "loss": 2.8193, "step": 22545 }, { "epoch": 9.68, "learning_rate": 0.0002, "loss": 2.8253, "step": 22550 }, { "epoch": 9.68, "learning_rate": 0.0002, "loss": 2.8231, "step": 22555 }, { "epoch": 9.69, "learning_rate": 0.0002, "loss": 2.8477, "step": 22560 }, { "epoch": 9.69, "learning_rate": 0.0002, "loss": 2.8311, "step": 22565 }, { "epoch": 9.69, "learning_rate": 0.0002, "loss": 2.8283, "step": 22570 }, { "epoch": 9.69, "learning_rate": 0.0002, "loss": 2.8125, "step": 22575 }, { "epoch": 9.7, "learning_rate": 0.0002, "loss": 2.8378, "step": 22580 }, { "epoch": 9.7, "learning_rate": 0.0002, "loss": 2.8241, "step": 22585 }, { "epoch": 9.7, "learning_rate": 0.0002, "loss": 2.8404, "step": 22590 }, { "epoch": 9.7, "eval_loss": 2.8550713062286377, "eval_runtime": 5.7527, "eval_samples_per_second": 1716.929, "eval_steps_per_second": 13.559, "eval_top3_3_weighted_f1_score ": 0.4319429275812067, "eval_top_1_macro_f1_score": 0.0680740631387194, "eval_top_1_weighted_f1score": 0.21071631271646857, "eval_top_3_macro_f1_score": 0.1813096685504854, "step": 22592 }, { "epoch": 9.7, "learning_rate": 0.0002, "loss": 2.8184, "step": 22595 }, { "epoch": 9.7, "learning_rate": 0.0002, "loss": 2.8108, "step": 22600 }, { "epoch": 9.71, "learning_rate": 0.0002, "loss": 2.8173, "step": 22605 }, { "epoch": 9.71, "learning_rate": 0.0002, "loss": 2.84, "step": 22610 }, { "epoch": 9.71, "learning_rate": 0.0002, "loss": 2.8089, "step": 22615 }, { "epoch": 9.71, "learning_rate": 0.0002, "loss": 2.8204, "step": 22620 }, { "epoch": 9.71, "learning_rate": 0.0002, "loss": 2.8181, "step": 22625 }, { "epoch": 9.72, "learning_rate": 0.0002, "loss": 2.8546, "step": 22630 }, { "epoch": 9.72, "learning_rate": 0.0002, "loss": 2.8349, "step": 22635 }, { "epoch": 9.72, "learning_rate": 0.0002, "loss": 2.8365, "step": 22640 }, { "epoch": 9.72, "learning_rate": 0.0002, "loss": 2.8004, "step": 22645 }, { "epoch": 9.73, "learning_rate": 0.0002, "loss": 2.8291, "step": 22650 }, { "epoch": 9.73, "learning_rate": 0.0002, "loss": 2.8167, "step": 22655 }, { "epoch": 9.73, "eval_loss": 2.847604990005493, "eval_runtime": 5.3717, "eval_samples_per_second": 1838.701, "eval_steps_per_second": 14.52, "eval_top3_3_weighted_f1_score ": 0.4296377629392317, "eval_top_1_macro_f1_score": 0.0751315435133377, "eval_top_1_weighted_f1score": 0.21394146061309774, "eval_top_3_macro_f1_score": 0.1780066666602597, "step": 22656 }, { "epoch": 9.73, "learning_rate": 0.0002, "loss": 2.8369, "step": 22660 }, { "epoch": 9.73, "learning_rate": 0.0002, "loss": 2.8226, "step": 22665 }, { "epoch": 9.73, "learning_rate": 0.0002, "loss": 2.819, "step": 22670 }, { "epoch": 9.74, "learning_rate": 0.0002, "loss": 2.8188, "step": 22675 }, { "epoch": 9.74, "learning_rate": 0.0002, "loss": 2.8105, "step": 22680 }, { "epoch": 9.74, "learning_rate": 0.0002, "loss": 2.8384, "step": 22685 }, { "epoch": 9.74, "learning_rate": 0.0002, "loss": 2.8155, "step": 22690 }, { "epoch": 9.74, "learning_rate": 0.0002, "loss": 2.8317, "step": 22695 }, { "epoch": 9.75, "learning_rate": 0.0002, "loss": 2.8236, "step": 22700 }, { "epoch": 9.75, "learning_rate": 0.0002, "loss": 2.8264, "step": 22705 }, { "epoch": 9.75, "learning_rate": 0.0002, "loss": 2.823, "step": 22710 }, { "epoch": 9.75, "learning_rate": 0.0002, "loss": 2.8361, "step": 22715 }, { "epoch": 9.76, "learning_rate": 0.0002, "loss": 2.8274, "step": 22720 }, { "epoch": 9.76, "eval_loss": 2.847738742828369, "eval_runtime": 5.4086, "eval_samples_per_second": 1826.165, "eval_steps_per_second": 14.421, "eval_top3_3_weighted_f1_score ": 0.4323342268436191, "eval_top_1_macro_f1_score": 0.06781506604704406, "eval_top_1_weighted_f1score": 0.2109029706071375, "eval_top_3_macro_f1_score": 0.17692047586267784, "step": 22720 }, { "epoch": 9.76, "learning_rate": 0.0002, "loss": 2.8207, "step": 22725 }, { "epoch": 9.76, "learning_rate": 0.0002, "loss": 2.8415, "step": 22730 }, { "epoch": 9.76, "learning_rate": 0.0002, "loss": 2.821, "step": 22735 }, { "epoch": 9.76, "learning_rate": 0.0002, "loss": 2.8306, "step": 22740 }, { "epoch": 9.77, "learning_rate": 0.0002, "loss": 2.829, "step": 22745 }, { "epoch": 9.77, "learning_rate": 0.0002, "loss": 2.8446, "step": 22750 }, { "epoch": 9.77, "learning_rate": 0.0002, "loss": 2.8467, "step": 22755 }, { "epoch": 9.77, "learning_rate": 0.0002, "loss": 2.8425, "step": 22760 }, { "epoch": 9.77, "learning_rate": 0.0002, "loss": 2.8356, "step": 22765 }, { "epoch": 9.78, "learning_rate": 0.0002, "loss": 2.8316, "step": 22770 }, { "epoch": 9.78, "learning_rate": 0.0002, "loss": 2.8151, "step": 22775 }, { "epoch": 9.78, "learning_rate": 0.0002, "loss": 2.8337, "step": 22780 }, { "epoch": 9.78, "eval_loss": 2.8500022888183594, "eval_runtime": 5.4145, "eval_samples_per_second": 1824.184, "eval_steps_per_second": 14.406, "eval_top3_3_weighted_f1_score ": 0.43283902112300215, "eval_top_1_macro_f1_score": 0.06821030623830515, "eval_top_1_weighted_f1score": 0.21041662920364143, "eval_top_3_macro_f1_score": 0.1764320095292666, "step": 22784 }, { "epoch": 9.78, "learning_rate": 0.0002, "loss": 2.8228, "step": 22785 }, { "epoch": 9.79, "learning_rate": 0.0002, "loss": 2.8269, "step": 22790 }, { "epoch": 9.79, "learning_rate": 0.0002, "loss": 2.8381, "step": 22795 }, { "epoch": 9.79, "learning_rate": 0.0002, "loss": 2.8279, "step": 22800 }, { "epoch": 9.79, "learning_rate": 0.0002, "loss": 2.8441, "step": 22805 }, { "epoch": 9.79, "learning_rate": 0.0002, "loss": 2.8308, "step": 22810 }, { "epoch": 9.8, "learning_rate": 0.0002, "loss": 2.8261, "step": 22815 }, { "epoch": 9.8, "learning_rate": 0.0002, "loss": 2.8219, "step": 22820 }, { "epoch": 9.8, "learning_rate": 0.0002, "loss": 2.8216, "step": 22825 }, { "epoch": 9.8, "learning_rate": 0.0002, "loss": 2.8363, "step": 22830 }, { "epoch": 9.8, "learning_rate": 0.0002, "loss": 2.8465, "step": 22835 }, { "epoch": 9.81, "learning_rate": 0.0002, "loss": 2.8268, "step": 22840 }, { "epoch": 9.81, "learning_rate": 0.0002, "loss": 2.8296, "step": 22845 }, { "epoch": 9.81, "eval_loss": 2.8474063873291016, "eval_runtime": 5.4254, "eval_samples_per_second": 1820.525, "eval_steps_per_second": 14.377, "eval_top3_3_weighted_f1_score ": 0.43618021259645307, "eval_top_1_macro_f1_score": 0.06937867083371499, "eval_top_1_weighted_f1score": 0.20832137094757502, "eval_top_3_macro_f1_score": 0.18881116431984588, "step": 22848 } ], "max_steps": 23290, "num_train_epochs": 10, "total_flos": 5.64324213028251e+16, "trial_name": null, "trial_params": null }