diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,30298 @@ +{ + "best_metric": 0.8810526315789474, + "best_model_checkpoint": "videomae-finetuned-nba-5-class-4-batch-8000-vid-multiclass-4/checkpoint-40000", + "epoch": 24.04, + "eval_steps": 500, + "global_step": 50000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 3.0000000000000004e-08, + "loss": 1.6289, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 6.000000000000001e-08, + "loss": 1.6849, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 9e-08, + "loss": 1.7074, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 1.2000000000000002e-07, + "loss": 1.657, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 1.5000000000000002e-07, + "loss": 1.6217, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 1.8e-07, + "loss": 1.6704, + "step": 60 + }, + { + "epoch": 0.0, + "learning_rate": 2.1e-07, + "loss": 1.6519, + "step": 70 + }, + { + "epoch": 0.0, + "learning_rate": 2.4000000000000003e-07, + "loss": 1.7045, + "step": 80 + }, + { + "epoch": 0.0, + "learning_rate": 2.7e-07, + "loss": 1.6178, + "step": 90 + }, + { + "epoch": 0.0, + "learning_rate": 3.0000000000000004e-07, + "loss": 1.6117, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 3.2999999999999996e-07, + "loss": 1.614, + "step": 110 + }, + { + "epoch": 0.0, + "learning_rate": 3.6e-07, + "loss": 1.6564, + "step": 120 + }, + { + "epoch": 0.0, + "learning_rate": 3.8999999999999997e-07, + "loss": 1.6711, + "step": 130 + }, + { + "epoch": 0.0, + "learning_rate": 4.2e-07, + "loss": 1.6176, + "step": 140 + }, + { + "epoch": 0.0, + "learning_rate": 4.5e-07, + "loss": 1.5739, + "step": 150 + }, + { + "epoch": 0.0, + "learning_rate": 4.800000000000001e-07, + "loss": 1.6065, + "step": 160 + }, + { + "epoch": 0.0, + "learning_rate": 5.100000000000001e-07, + "loss": 1.611, + "step": 170 + }, + { + "epoch": 0.0, + "learning_rate": 5.4e-07, + "loss": 1.6273, + "step": 180 + }, + { + "epoch": 0.0, + "learning_rate": 5.7e-07, + "loss": 1.6327, + "step": 190 + }, + { + "epoch": 0.0, + "learning_rate": 6.000000000000001e-07, + "loss": 1.5965, + "step": 200 + }, + { + "epoch": 0.0, + "learning_rate": 6.3e-07, + "loss": 1.5956, + "step": 210 + }, + { + "epoch": 0.0, + "learning_rate": 6.599999999999999e-07, + "loss": 1.611, + "step": 220 + }, + { + "epoch": 0.0, + "learning_rate": 6.9e-07, + "loss": 1.6605, + "step": 230 + }, + { + "epoch": 0.0, + "learning_rate": 7.2e-07, + "loss": 1.6057, + "step": 240 + }, + { + "epoch": 0.01, + "learning_rate": 7.5e-07, + "loss": 1.6235, + "step": 250 + }, + { + "epoch": 0.01, + "learning_rate": 7.799999999999999e-07, + "loss": 1.6241, + "step": 260 + }, + { + "epoch": 0.01, + "learning_rate": 8.1e-07, + "loss": 1.5955, + "step": 270 + }, + { + "epoch": 0.01, + "learning_rate": 8.4e-07, + "loss": 1.5911, + "step": 280 + }, + { + "epoch": 0.01, + "learning_rate": 8.7e-07, + "loss": 1.628, + "step": 290 + }, + { + "epoch": 0.01, + "learning_rate": 9e-07, + "loss": 1.6224, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 9.3e-07, + "loss": 1.6257, + "step": 310 + }, + { + "epoch": 0.01, + "learning_rate": 9.600000000000001e-07, + "loss": 1.6915, + "step": 320 + }, + { + "epoch": 0.01, + "learning_rate": 9.9e-07, + "loss": 1.63, + "step": 330 + }, + { + "epoch": 0.01, + "learning_rate": 1.0200000000000002e-06, + "loss": 1.6068, + "step": 340 + }, + { + "epoch": 0.01, + "learning_rate": 1.0500000000000001e-06, + "loss": 1.6432, + "step": 350 + }, + { + "epoch": 0.01, + "learning_rate": 1.08e-06, + "loss": 1.636, + "step": 360 + }, + { + "epoch": 0.01, + "learning_rate": 1.11e-06, + "loss": 1.6221, + "step": 370 + }, + { + "epoch": 0.01, + "learning_rate": 1.14e-06, + "loss": 1.6367, + "step": 380 + }, + { + "epoch": 0.01, + "learning_rate": 1.17e-06, + "loss": 1.6197, + "step": 390 + }, + { + "epoch": 0.01, + "learning_rate": 1.2000000000000002e-06, + "loss": 1.6448, + "step": 400 + }, + { + "epoch": 0.01, + "learning_rate": 1.23e-06, + "loss": 1.6023, + "step": 410 + }, + { + "epoch": 0.01, + "learning_rate": 1.26e-06, + "loss": 1.5916, + "step": 420 + }, + { + "epoch": 0.01, + "learning_rate": 1.29e-06, + "loss": 1.6068, + "step": 430 + }, + { + "epoch": 0.01, + "learning_rate": 1.3199999999999999e-06, + "loss": 1.6125, + "step": 440 + }, + { + "epoch": 0.01, + "learning_rate": 1.35e-06, + "loss": 1.5929, + "step": 450 + }, + { + "epoch": 0.01, + "learning_rate": 1.38e-06, + "loss": 1.5875, + "step": 460 + }, + { + "epoch": 0.01, + "learning_rate": 1.41e-06, + "loss": 1.6399, + "step": 470 + }, + { + "epoch": 0.01, + "learning_rate": 1.44e-06, + "loss": 1.6119, + "step": 480 + }, + { + "epoch": 0.01, + "learning_rate": 1.4700000000000001e-06, + "loss": 1.6221, + "step": 490 + }, + { + "epoch": 0.01, + "learning_rate": 1.5e-06, + "loss": 1.5873, + "step": 500 + }, + { + "epoch": 0.01, + "learning_rate": 1.53e-06, + "loss": 1.6285, + "step": 510 + }, + { + "epoch": 0.01, + "learning_rate": 1.5599999999999999e-06, + "loss": 1.6254, + "step": 520 + }, + { + "epoch": 0.01, + "learning_rate": 1.59e-06, + "loss": 1.6067, + "step": 530 + }, + { + "epoch": 0.01, + "learning_rate": 1.62e-06, + "loss": 1.573, + "step": 540 + }, + { + "epoch": 0.01, + "learning_rate": 1.65e-06, + "loss": 1.6053, + "step": 550 + }, + { + "epoch": 0.01, + "learning_rate": 1.68e-06, + "loss": 1.5985, + "step": 560 + }, + { + "epoch": 0.01, + "learning_rate": 1.7100000000000001e-06, + "loss": 1.5918, + "step": 570 + }, + { + "epoch": 0.01, + "learning_rate": 1.74e-06, + "loss": 1.5986, + "step": 580 + }, + { + "epoch": 0.01, + "learning_rate": 1.77e-06, + "loss": 1.613, + "step": 590 + }, + { + "epoch": 0.01, + "learning_rate": 1.8e-06, + "loss": 1.6021, + "step": 600 + }, + { + "epoch": 0.01, + "learning_rate": 1.83e-06, + "loss": 1.5799, + "step": 610 + }, + { + "epoch": 0.01, + "learning_rate": 1.86e-06, + "loss": 1.59, + "step": 620 + }, + { + "epoch": 0.01, + "learning_rate": 1.8900000000000001e-06, + "loss": 1.6096, + "step": 630 + }, + { + "epoch": 0.01, + "learning_rate": 1.9200000000000003e-06, + "loss": 1.5695, + "step": 640 + }, + { + "epoch": 0.01, + "learning_rate": 1.95e-06, + "loss": 1.5877, + "step": 650 + }, + { + "epoch": 0.01, + "learning_rate": 1.98e-06, + "loss": 1.6079, + "step": 660 + }, + { + "epoch": 0.01, + "learning_rate": 2.0100000000000002e-06, + "loss": 1.5655, + "step": 670 + }, + { + "epoch": 0.01, + "learning_rate": 2.0400000000000004e-06, + "loss": 1.6314, + "step": 680 + }, + { + "epoch": 0.01, + "learning_rate": 2.07e-06, + "loss": 1.5892, + "step": 690 + }, + { + "epoch": 0.01, + "learning_rate": 2.1000000000000002e-06, + "loss": 1.6097, + "step": 700 + }, + { + "epoch": 0.01, + "learning_rate": 2.13e-06, + "loss": 1.6052, + "step": 710 + }, + { + "epoch": 0.01, + "learning_rate": 2.16e-06, + "loss": 1.6101, + "step": 720 + }, + { + "epoch": 0.01, + "learning_rate": 2.1899999999999998e-06, + "loss": 1.5883, + "step": 730 + }, + { + "epoch": 0.01, + "learning_rate": 2.22e-06, + "loss": 1.6043, + "step": 740 + }, + { + "epoch": 0.01, + "learning_rate": 2.25e-06, + "loss": 1.6058, + "step": 750 + }, + { + "epoch": 0.02, + "learning_rate": 2.28e-06, + "loss": 1.6058, + "step": 760 + }, + { + "epoch": 0.02, + "learning_rate": 2.31e-06, + "loss": 1.583, + "step": 770 + }, + { + "epoch": 0.02, + "learning_rate": 2.34e-06, + "loss": 1.5749, + "step": 780 + }, + { + "epoch": 0.02, + "learning_rate": 2.37e-06, + "loss": 1.5734, + "step": 790 + }, + { + "epoch": 0.02, + "learning_rate": 2.4000000000000003e-06, + "loss": 1.5619, + "step": 800 + }, + { + "epoch": 0.02, + "learning_rate": 2.43e-06, + "loss": 1.5821, + "step": 810 + }, + { + "epoch": 0.02, + "learning_rate": 2.46e-06, + "loss": 1.5731, + "step": 820 + }, + { + "epoch": 0.02, + "learning_rate": 2.4900000000000003e-06, + "loss": 1.5712, + "step": 830 + }, + { + "epoch": 0.02, + "learning_rate": 2.52e-06, + "loss": 1.5662, + "step": 840 + }, + { + "epoch": 0.02, + "learning_rate": 2.55e-06, + "loss": 1.5786, + "step": 850 + }, + { + "epoch": 0.02, + "learning_rate": 2.58e-06, + "loss": 1.5821, + "step": 860 + }, + { + "epoch": 0.02, + "learning_rate": 2.61e-06, + "loss": 1.5779, + "step": 870 + }, + { + "epoch": 0.02, + "learning_rate": 2.6399999999999997e-06, + "loss": 1.568, + "step": 880 + }, + { + "epoch": 0.02, + "learning_rate": 2.67e-06, + "loss": 1.5414, + "step": 890 + }, + { + "epoch": 0.02, + "learning_rate": 2.7e-06, + "loss": 1.5311, + "step": 900 + }, + { + "epoch": 0.02, + "learning_rate": 2.73e-06, + "loss": 1.5791, + "step": 910 + }, + { + "epoch": 0.02, + "learning_rate": 2.76e-06, + "loss": 1.5277, + "step": 920 + }, + { + "epoch": 0.02, + "learning_rate": 2.79e-06, + "loss": 1.5659, + "step": 930 + }, + { + "epoch": 0.02, + "learning_rate": 2.82e-06, + "loss": 1.5636, + "step": 940 + }, + { + "epoch": 0.02, + "learning_rate": 2.8500000000000002e-06, + "loss": 1.5084, + "step": 950 + }, + { + "epoch": 0.02, + "learning_rate": 2.88e-06, + "loss": 1.5494, + "step": 960 + }, + { + "epoch": 0.02, + "learning_rate": 2.91e-06, + "loss": 1.5576, + "step": 970 + }, + { + "epoch": 0.02, + "learning_rate": 2.9400000000000002e-06, + "loss": 1.5472, + "step": 980 + }, + { + "epoch": 0.02, + "learning_rate": 2.9700000000000004e-06, + "loss": 1.5201, + "step": 990 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 1.5265, + "step": 1000 + }, + { + "epoch": 0.02, + "learning_rate": 3.0300000000000002e-06, + "loss": 1.5452, + "step": 1010 + }, + { + "epoch": 0.02, + "learning_rate": 3.06e-06, + "loss": 1.5058, + "step": 1020 + }, + { + "epoch": 0.02, + "learning_rate": 3.09e-06, + "loss": 1.5153, + "step": 1030 + }, + { + "epoch": 0.02, + "learning_rate": 3.1199999999999998e-06, + "loss": 1.5483, + "step": 1040 + }, + { + "epoch": 0.02, + "learning_rate": 3.15e-06, + "loss": 1.5126, + "step": 1050 + }, + { + "epoch": 0.02, + "learning_rate": 3.18e-06, + "loss": 1.4588, + "step": 1060 + }, + { + "epoch": 0.02, + "learning_rate": 3.21e-06, + "loss": 1.4918, + "step": 1070 + }, + { + "epoch": 0.02, + "learning_rate": 3.24e-06, + "loss": 1.4994, + "step": 1080 + }, + { + "epoch": 0.02, + "learning_rate": 3.27e-06, + "loss": 1.4209, + "step": 1090 + }, + { + "epoch": 0.02, + "learning_rate": 3.3e-06, + "loss": 1.5078, + "step": 1100 + }, + { + "epoch": 0.02, + "learning_rate": 3.3300000000000003e-06, + "loss": 1.5444, + "step": 1110 + }, + { + "epoch": 0.02, + "learning_rate": 3.36e-06, + "loss": 1.5468, + "step": 1120 + }, + { + "epoch": 0.02, + "learning_rate": 3.39e-06, + "loss": 1.4958, + "step": 1130 + }, + { + "epoch": 0.02, + "learning_rate": 3.4200000000000003e-06, + "loss": 1.4897, + "step": 1140 + }, + { + "epoch": 0.02, + "learning_rate": 3.4500000000000004e-06, + "loss": 1.4517, + "step": 1150 + }, + { + "epoch": 0.02, + "learning_rate": 3.48e-06, + "loss": 1.3506, + "step": 1160 + }, + { + "epoch": 0.02, + "learning_rate": 3.5100000000000003e-06, + "loss": 1.4706, + "step": 1170 + }, + { + "epoch": 0.02, + "learning_rate": 3.54e-06, + "loss": 1.4046, + "step": 1180 + }, + { + "epoch": 0.02, + "learning_rate": 3.57e-06, + "loss": 1.498, + "step": 1190 + }, + { + "epoch": 0.02, + "learning_rate": 3.6e-06, + "loss": 1.4703, + "step": 1200 + }, + { + "epoch": 0.02, + "learning_rate": 3.63e-06, + "loss": 1.4965, + "step": 1210 + }, + { + "epoch": 0.02, + "learning_rate": 3.66e-06, + "loss": 1.3704, + "step": 1220 + }, + { + "epoch": 0.02, + "learning_rate": 3.6900000000000002e-06, + "loss": 1.3918, + "step": 1230 + }, + { + "epoch": 0.02, + "learning_rate": 3.72e-06, + "loss": 1.3952, + "step": 1240 + }, + { + "epoch": 0.03, + "learning_rate": 3.75e-06, + "loss": 1.3897, + "step": 1250 + }, + { + "epoch": 0.03, + "learning_rate": 3.7800000000000002e-06, + "loss": 1.3657, + "step": 1260 + }, + { + "epoch": 0.03, + "learning_rate": 3.81e-06, + "loss": 1.3527, + "step": 1270 + }, + { + "epoch": 0.03, + "learning_rate": 3.8400000000000005e-06, + "loss": 1.39, + "step": 1280 + }, + { + "epoch": 0.03, + "learning_rate": 3.87e-06, + "loss": 1.3892, + "step": 1290 + }, + { + "epoch": 0.03, + "learning_rate": 3.9e-06, + "loss": 1.4569, + "step": 1300 + }, + { + "epoch": 0.03, + "learning_rate": 3.9300000000000005e-06, + "loss": 1.3253, + "step": 1310 + }, + { + "epoch": 0.03, + "learning_rate": 3.96e-06, + "loss": 1.5201, + "step": 1320 + }, + { + "epoch": 0.03, + "learning_rate": 3.99e-06, + "loss": 1.4022, + "step": 1330 + }, + { + "epoch": 0.03, + "learning_rate": 4.0200000000000005e-06, + "loss": 1.3751, + "step": 1340 + }, + { + "epoch": 0.03, + "learning_rate": 4.05e-06, + "loss": 1.3118, + "step": 1350 + }, + { + "epoch": 0.03, + "learning_rate": 4.080000000000001e-06, + "loss": 1.3506, + "step": 1360 + }, + { + "epoch": 0.03, + "learning_rate": 4.1100000000000005e-06, + "loss": 1.522, + "step": 1370 + }, + { + "epoch": 0.03, + "learning_rate": 4.14e-06, + "loss": 1.4244, + "step": 1380 + }, + { + "epoch": 0.03, + "learning_rate": 4.170000000000001e-06, + "loss": 1.4063, + "step": 1390 + }, + { + "epoch": 0.03, + "learning_rate": 4.2000000000000004e-06, + "loss": 1.48, + "step": 1400 + }, + { + "epoch": 0.03, + "learning_rate": 4.229999999999999e-06, + "loss": 1.359, + "step": 1410 + }, + { + "epoch": 0.03, + "learning_rate": 4.26e-06, + "loss": 1.323, + "step": 1420 + }, + { + "epoch": 0.03, + "learning_rate": 4.29e-06, + "loss": 1.4078, + "step": 1430 + }, + { + "epoch": 0.03, + "learning_rate": 4.32e-06, + "loss": 1.4117, + "step": 1440 + }, + { + "epoch": 0.03, + "learning_rate": 4.35e-06, + "loss": 1.3358, + "step": 1450 + }, + { + "epoch": 0.03, + "learning_rate": 4.3799999999999996e-06, + "loss": 1.3316, + "step": 1460 + }, + { + "epoch": 0.03, + "learning_rate": 4.41e-06, + "loss": 1.2612, + "step": 1470 + }, + { + "epoch": 0.03, + "learning_rate": 4.44e-06, + "loss": 1.3363, + "step": 1480 + }, + { + "epoch": 0.03, + "learning_rate": 4.4699999999999996e-06, + "loss": 1.4093, + "step": 1490 + }, + { + "epoch": 0.03, + "learning_rate": 4.5e-06, + "loss": 1.2774, + "step": 1500 + }, + { + "epoch": 0.03, + "learning_rate": 4.53e-06, + "loss": 1.2604, + "step": 1510 + }, + { + "epoch": 0.03, + "learning_rate": 4.56e-06, + "loss": 1.2288, + "step": 1520 + }, + { + "epoch": 0.03, + "learning_rate": 4.59e-06, + "loss": 1.3362, + "step": 1530 + }, + { + "epoch": 0.03, + "learning_rate": 4.62e-06, + "loss": 1.4124, + "step": 1540 + }, + { + "epoch": 0.03, + "learning_rate": 4.65e-06, + "loss": 1.2563, + "step": 1550 + }, + { + "epoch": 0.03, + "learning_rate": 4.68e-06, + "loss": 1.2945, + "step": 1560 + }, + { + "epoch": 0.03, + "learning_rate": 4.71e-06, + "loss": 1.4438, + "step": 1570 + }, + { + "epoch": 0.03, + "learning_rate": 4.74e-06, + "loss": 1.2614, + "step": 1580 + }, + { + "epoch": 0.03, + "learning_rate": 4.77e-06, + "loss": 1.313, + "step": 1590 + }, + { + "epoch": 0.03, + "learning_rate": 4.800000000000001e-06, + "loss": 1.0942, + "step": 1600 + }, + { + "epoch": 0.03, + "learning_rate": 4.83e-06, + "loss": 1.2324, + "step": 1610 + }, + { + "epoch": 0.03, + "learning_rate": 4.86e-06, + "loss": 1.5338, + "step": 1620 + }, + { + "epoch": 0.03, + "learning_rate": 4.890000000000001e-06, + "loss": 1.4801, + "step": 1630 + }, + { + "epoch": 0.03, + "learning_rate": 4.92e-06, + "loss": 1.2899, + "step": 1640 + }, + { + "epoch": 0.03, + "learning_rate": 4.95e-06, + "loss": 1.3281, + "step": 1650 + }, + { + "epoch": 0.03, + "learning_rate": 4.980000000000001e-06, + "loss": 1.2799, + "step": 1660 + }, + { + "epoch": 0.03, + "learning_rate": 5.01e-06, + "loss": 1.2776, + "step": 1670 + }, + { + "epoch": 0.03, + "learning_rate": 5.04e-06, + "loss": 1.1707, + "step": 1680 + }, + { + "epoch": 0.03, + "learning_rate": 5.070000000000001e-06, + "loss": 1.2293, + "step": 1690 + }, + { + "epoch": 0.03, + "learning_rate": 5.1e-06, + "loss": 1.2953, + "step": 1700 + }, + { + "epoch": 0.03, + "learning_rate": 5.130000000000001e-06, + "loss": 1.2206, + "step": 1710 + }, + { + "epoch": 0.03, + "learning_rate": 5.16e-06, + "loss": 1.2309, + "step": 1720 + }, + { + "epoch": 0.03, + "learning_rate": 5.1899999999999994e-06, + "loss": 1.0757, + "step": 1730 + }, + { + "epoch": 0.03, + "learning_rate": 5.22e-06, + "loss": 1.4371, + "step": 1740 + }, + { + "epoch": 0.04, + "learning_rate": 5.25e-06, + "loss": 1.0968, + "step": 1750 + }, + { + "epoch": 0.04, + "learning_rate": 5.279999999999999e-06, + "loss": 1.2251, + "step": 1760 + }, + { + "epoch": 0.04, + "learning_rate": 5.31e-06, + "loss": 1.3717, + "step": 1770 + }, + { + "epoch": 0.04, + "learning_rate": 5.34e-06, + "loss": 1.2957, + "step": 1780 + }, + { + "epoch": 0.04, + "learning_rate": 5.37e-06, + "loss": 1.2934, + "step": 1790 + }, + { + "epoch": 0.04, + "learning_rate": 5.4e-06, + "loss": 1.2453, + "step": 1800 + }, + { + "epoch": 0.04, + "learning_rate": 5.43e-06, + "loss": 1.1924, + "step": 1810 + }, + { + "epoch": 0.04, + "learning_rate": 5.46e-06, + "loss": 1.1958, + "step": 1820 + }, + { + "epoch": 0.04, + "learning_rate": 5.49e-06, + "loss": 1.1375, + "step": 1830 + }, + { + "epoch": 0.04, + "learning_rate": 5.52e-06, + "loss": 1.2275, + "step": 1840 + }, + { + "epoch": 0.04, + "learning_rate": 5.55e-06, + "loss": 1.2237, + "step": 1850 + }, + { + "epoch": 0.04, + "learning_rate": 5.58e-06, + "loss": 1.3711, + "step": 1860 + }, + { + "epoch": 0.04, + "learning_rate": 5.6100000000000005e-06, + "loss": 1.1037, + "step": 1870 + }, + { + "epoch": 0.04, + "learning_rate": 5.64e-06, + "loss": 1.2853, + "step": 1880 + }, + { + "epoch": 0.04, + "learning_rate": 5.67e-06, + "loss": 1.2894, + "step": 1890 + }, + { + "epoch": 0.04, + "learning_rate": 5.7000000000000005e-06, + "loss": 1.0479, + "step": 1900 + }, + { + "epoch": 0.04, + "learning_rate": 5.73e-06, + "loss": 1.3781, + "step": 1910 + }, + { + "epoch": 0.04, + "learning_rate": 5.76e-06, + "loss": 1.313, + "step": 1920 + }, + { + "epoch": 0.04, + "learning_rate": 5.7900000000000005e-06, + "loss": 1.2074, + "step": 1930 + }, + { + "epoch": 0.04, + "learning_rate": 5.82e-06, + "loss": 1.1394, + "step": 1940 + }, + { + "epoch": 0.04, + "learning_rate": 5.850000000000001e-06, + "loss": 1.0924, + "step": 1950 + }, + { + "epoch": 0.04, + "learning_rate": 5.8800000000000005e-06, + "loss": 1.1553, + "step": 1960 + }, + { + "epoch": 0.04, + "learning_rate": 5.91e-06, + "loss": 1.3285, + "step": 1970 + }, + { + "epoch": 0.04, + "learning_rate": 5.940000000000001e-06, + "loss": 1.4713, + "step": 1980 + }, + { + "epoch": 0.04, + "learning_rate": 5.9700000000000004e-06, + "loss": 0.9996, + "step": 1990 + }, + { + "epoch": 0.04, + "learning_rate": 6e-06, + "loss": 1.3802, + "step": 2000 + }, + { + "epoch": 0.04, + "eval_accuracy": 0.52, + "eval_f1": 0.52, + "eval_loss": 1.23806631565094, + "eval_runtime": 786.4493, + "eval_samples_per_second": 6.04, + "eval_steps_per_second": 1.511, + "step": 2000 + }, + { + "epoch": 1.0, + "learning_rate": 6.030000000000001e-06, + "loss": 1.4493, + "step": 2010 + }, + { + "epoch": 1.0, + "learning_rate": 6.0600000000000004e-06, + "loss": 1.3382, + "step": 2020 + }, + { + "epoch": 1.0, + "learning_rate": 6.090000000000001e-06, + "loss": 1.2994, + "step": 2030 + }, + { + "epoch": 1.0, + "learning_rate": 6.12e-06, + "loss": 1.1561, + "step": 2040 + }, + { + "epoch": 1.0, + "learning_rate": 6.1499999999999996e-06, + "loss": 1.2175, + "step": 2050 + }, + { + "epoch": 1.0, + "learning_rate": 6.18e-06, + "loss": 1.2732, + "step": 2060 + }, + { + "epoch": 1.0, + "learning_rate": 6.21e-06, + "loss": 1.1406, + "step": 2070 + }, + { + "epoch": 1.0, + "learning_rate": 6.2399999999999995e-06, + "loss": 1.2093, + "step": 2080 + }, + { + "epoch": 1.0, + "learning_rate": 6.27e-06, + "loss": 1.0776, + "step": 2090 + }, + { + "epoch": 1.0, + "learning_rate": 6.3e-06, + "loss": 1.3075, + "step": 2100 + }, + { + "epoch": 1.0, + "learning_rate": 6.3299999999999995e-06, + "loss": 1.2189, + "step": 2110 + }, + { + "epoch": 1.0, + "learning_rate": 6.36e-06, + "loss": 1.1371, + "step": 2120 + }, + { + "epoch": 1.0, + "learning_rate": 6.39e-06, + "loss": 1.2426, + "step": 2130 + }, + { + "epoch": 1.0, + "learning_rate": 6.42e-06, + "loss": 1.1542, + "step": 2140 + }, + { + "epoch": 1.0, + "learning_rate": 6.45e-06, + "loss": 1.2668, + "step": 2150 + }, + { + "epoch": 1.0, + "learning_rate": 6.48e-06, + "loss": 1.1043, + "step": 2160 + }, + { + "epoch": 1.0, + "learning_rate": 6.51e-06, + "loss": 1.3451, + "step": 2170 + }, + { + "epoch": 1.0, + "learning_rate": 6.54e-06, + "loss": 1.1112, + "step": 2180 + }, + { + "epoch": 1.0, + "learning_rate": 6.57e-06, + "loss": 0.9727, + "step": 2190 + }, + { + "epoch": 1.0, + "learning_rate": 6.6e-06, + "loss": 1.1381, + "step": 2200 + }, + { + "epoch": 1.0, + "learning_rate": 6.63e-06, + "loss": 1.1848, + "step": 2210 + }, + { + "epoch": 1.0, + "learning_rate": 6.660000000000001e-06, + "loss": 1.2371, + "step": 2220 + }, + { + "epoch": 1.0, + "learning_rate": 6.69e-06, + "loss": 1.4057, + "step": 2230 + }, + { + "epoch": 1.0, + "learning_rate": 6.72e-06, + "loss": 1.2255, + "step": 2240 + }, + { + "epoch": 1.0, + "learning_rate": 6.750000000000001e-06, + "loss": 1.2957, + "step": 2250 + }, + { + "epoch": 1.01, + "learning_rate": 6.78e-06, + "loss": 1.1859, + "step": 2260 + }, + { + "epoch": 1.01, + "learning_rate": 6.81e-06, + "loss": 1.3443, + "step": 2270 + }, + { + "epoch": 1.01, + "learning_rate": 6.840000000000001e-06, + "loss": 1.2337, + "step": 2280 + }, + { + "epoch": 1.01, + "learning_rate": 6.87e-06, + "loss": 1.0896, + "step": 2290 + }, + { + "epoch": 1.01, + "learning_rate": 6.900000000000001e-06, + "loss": 1.1986, + "step": 2300 + }, + { + "epoch": 1.01, + "learning_rate": 6.9300000000000006e-06, + "loss": 1.2643, + "step": 2310 + }, + { + "epoch": 1.01, + "learning_rate": 6.96e-06, + "loss": 1.0731, + "step": 2320 + }, + { + "epoch": 1.01, + "learning_rate": 6.990000000000001e-06, + "loss": 1.0705, + "step": 2330 + }, + { + "epoch": 1.01, + "learning_rate": 7.0200000000000006e-06, + "loss": 1.141, + "step": 2340 + }, + { + "epoch": 1.01, + "learning_rate": 7.049999999999999e-06, + "loss": 0.9496, + "step": 2350 + }, + { + "epoch": 1.01, + "learning_rate": 7.08e-06, + "loss": 1.2506, + "step": 2360 + }, + { + "epoch": 1.01, + "learning_rate": 7.11e-06, + "loss": 1.3169, + "step": 2370 + }, + { + "epoch": 1.01, + "learning_rate": 7.14e-06, + "loss": 1.2174, + "step": 2380 + }, + { + "epoch": 1.01, + "learning_rate": 7.17e-06, + "loss": 1.3699, + "step": 2390 + }, + { + "epoch": 1.01, + "learning_rate": 7.2e-06, + "loss": 1.2294, + "step": 2400 + }, + { + "epoch": 1.01, + "learning_rate": 7.23e-06, + "loss": 1.0979, + "step": 2410 + }, + { + "epoch": 1.01, + "learning_rate": 7.26e-06, + "loss": 1.1005, + "step": 2420 + }, + { + "epoch": 1.01, + "learning_rate": 7.29e-06, + "loss": 1.0107, + "step": 2430 + }, + { + "epoch": 1.01, + "learning_rate": 7.32e-06, + "loss": 1.187, + "step": 2440 + }, + { + "epoch": 1.01, + "learning_rate": 7.35e-06, + "loss": 0.9485, + "step": 2450 + }, + { + "epoch": 1.01, + "learning_rate": 7.3800000000000005e-06, + "loss": 1.2392, + "step": 2460 + }, + { + "epoch": 1.01, + "learning_rate": 7.41e-06, + "loss": 1.1229, + "step": 2470 + }, + { + "epoch": 1.01, + "learning_rate": 7.44e-06, + "loss": 1.0925, + "step": 2480 + }, + { + "epoch": 1.01, + "learning_rate": 7.4700000000000005e-06, + "loss": 1.1106, + "step": 2490 + }, + { + "epoch": 1.01, + "learning_rate": 7.5e-06, + "loss": 1.2757, + "step": 2500 + }, + { + "epoch": 1.01, + "learning_rate": 7.53e-06, + "loss": 1.0142, + "step": 2510 + }, + { + "epoch": 1.01, + "learning_rate": 7.5600000000000005e-06, + "loss": 1.3197, + "step": 2520 + }, + { + "epoch": 1.01, + "learning_rate": 7.59e-06, + "loss": 1.1721, + "step": 2530 + }, + { + "epoch": 1.01, + "learning_rate": 7.62e-06, + "loss": 1.2866, + "step": 2540 + }, + { + "epoch": 1.01, + "learning_rate": 7.65e-06, + "loss": 1.2752, + "step": 2550 + }, + { + "epoch": 1.01, + "learning_rate": 7.680000000000001e-06, + "loss": 1.1496, + "step": 2560 + }, + { + "epoch": 1.01, + "learning_rate": 7.71e-06, + "loss": 1.0845, + "step": 2570 + }, + { + "epoch": 1.01, + "learning_rate": 7.74e-06, + "loss": 1.0634, + "step": 2580 + }, + { + "epoch": 1.01, + "learning_rate": 7.77e-06, + "loss": 1.179, + "step": 2590 + }, + { + "epoch": 1.01, + "learning_rate": 7.8e-06, + "loss": 1.2051, + "step": 2600 + }, + { + "epoch": 1.01, + "learning_rate": 7.830000000000001e-06, + "loss": 1.062, + "step": 2610 + }, + { + "epoch": 1.01, + "learning_rate": 7.860000000000001e-06, + "loss": 1.2288, + "step": 2620 + }, + { + "epoch": 1.01, + "learning_rate": 7.89e-06, + "loss": 1.4461, + "step": 2630 + }, + { + "epoch": 1.01, + "learning_rate": 7.92e-06, + "loss": 1.0729, + "step": 2640 + }, + { + "epoch": 1.01, + "learning_rate": 7.95e-06, + "loss": 1.1626, + "step": 2650 + }, + { + "epoch": 1.01, + "learning_rate": 7.98e-06, + "loss": 1.1844, + "step": 2660 + }, + { + "epoch": 1.01, + "learning_rate": 8.010000000000001e-06, + "loss": 1.2165, + "step": 2670 + }, + { + "epoch": 1.01, + "learning_rate": 8.040000000000001e-06, + "loss": 1.0876, + "step": 2680 + }, + { + "epoch": 1.01, + "learning_rate": 8.07e-06, + "loss": 1.2056, + "step": 2690 + }, + { + "epoch": 1.01, + "learning_rate": 8.1e-06, + "loss": 1.3396, + "step": 2700 + }, + { + "epoch": 1.01, + "learning_rate": 8.13e-06, + "loss": 1.165, + "step": 2710 + }, + { + "epoch": 1.01, + "learning_rate": 8.160000000000001e-06, + "loss": 0.9852, + "step": 2720 + }, + { + "epoch": 1.01, + "learning_rate": 8.190000000000001e-06, + "loss": 0.7788, + "step": 2730 + }, + { + "epoch": 1.01, + "learning_rate": 8.220000000000001e-06, + "loss": 1.1942, + "step": 2740 + }, + { + "epoch": 1.01, + "learning_rate": 8.25e-06, + "loss": 1.2522, + "step": 2750 + }, + { + "epoch": 1.02, + "learning_rate": 8.28e-06, + "loss": 1.2626, + "step": 2760 + }, + { + "epoch": 1.02, + "learning_rate": 8.310000000000002e-06, + "loss": 1.1067, + "step": 2770 + }, + { + "epoch": 1.02, + "learning_rate": 8.340000000000001e-06, + "loss": 1.0825, + "step": 2780 + }, + { + "epoch": 1.02, + "learning_rate": 8.370000000000001e-06, + "loss": 1.1901, + "step": 2790 + }, + { + "epoch": 1.02, + "learning_rate": 8.400000000000001e-06, + "loss": 1.2596, + "step": 2800 + }, + { + "epoch": 1.02, + "learning_rate": 8.43e-06, + "loss": 1.2528, + "step": 2810 + }, + { + "epoch": 1.02, + "learning_rate": 8.459999999999999e-06, + "loss": 1.1476, + "step": 2820 + }, + { + "epoch": 1.02, + "learning_rate": 8.49e-06, + "loss": 1.2297, + "step": 2830 + }, + { + "epoch": 1.02, + "learning_rate": 8.52e-06, + "loss": 1.0016, + "step": 2840 + }, + { + "epoch": 1.02, + "learning_rate": 8.55e-06, + "loss": 1.2551, + "step": 2850 + }, + { + "epoch": 1.02, + "learning_rate": 8.58e-06, + "loss": 0.9679, + "step": 2860 + }, + { + "epoch": 1.02, + "learning_rate": 8.609999999999999e-06, + "loss": 1.2398, + "step": 2870 + }, + { + "epoch": 1.02, + "learning_rate": 8.64e-06, + "loss": 1.0966, + "step": 2880 + }, + { + "epoch": 1.02, + "learning_rate": 8.67e-06, + "loss": 0.9264, + "step": 2890 + }, + { + "epoch": 1.02, + "learning_rate": 8.7e-06, + "loss": 1.1826, + "step": 2900 + }, + { + "epoch": 1.02, + "learning_rate": 8.73e-06, + "loss": 1.0171, + "step": 2910 + }, + { + "epoch": 1.02, + "learning_rate": 8.759999999999999e-06, + "loss": 1.0463, + "step": 2920 + }, + { + "epoch": 1.02, + "learning_rate": 8.79e-06, + "loss": 1.2805, + "step": 2930 + }, + { + "epoch": 1.02, + "learning_rate": 8.82e-06, + "loss": 1.1559, + "step": 2940 + }, + { + "epoch": 1.02, + "learning_rate": 8.85e-06, + "loss": 1.1426, + "step": 2950 + }, + { + "epoch": 1.02, + "learning_rate": 8.88e-06, + "loss": 1.3127, + "step": 2960 + }, + { + "epoch": 1.02, + "learning_rate": 8.91e-06, + "loss": 1.2136, + "step": 2970 + }, + { + "epoch": 1.02, + "learning_rate": 8.939999999999999e-06, + "loss": 1.2175, + "step": 2980 + }, + { + "epoch": 1.02, + "learning_rate": 8.97e-06, + "loss": 1.3208, + "step": 2990 + }, + { + "epoch": 1.02, + "learning_rate": 9e-06, + "loss": 1.2648, + "step": 3000 + }, + { + "epoch": 1.02, + "learning_rate": 9.03e-06, + "loss": 1.0219, + "step": 3010 + }, + { + "epoch": 1.02, + "learning_rate": 9.06e-06, + "loss": 1.1467, + "step": 3020 + }, + { + "epoch": 1.02, + "learning_rate": 9.09e-06, + "loss": 0.9967, + "step": 3030 + }, + { + "epoch": 1.02, + "learning_rate": 9.12e-06, + "loss": 0.9672, + "step": 3040 + }, + { + "epoch": 1.02, + "learning_rate": 9.15e-06, + "loss": 0.9971, + "step": 3050 + }, + { + "epoch": 1.02, + "learning_rate": 9.18e-06, + "loss": 1.2074, + "step": 3060 + }, + { + "epoch": 1.02, + "learning_rate": 9.21e-06, + "loss": 1.2956, + "step": 3070 + }, + { + "epoch": 1.02, + "learning_rate": 9.24e-06, + "loss": 1.0696, + "step": 3080 + }, + { + "epoch": 1.02, + "learning_rate": 9.27e-06, + "loss": 1.1306, + "step": 3090 + }, + { + "epoch": 1.02, + "learning_rate": 9.3e-06, + "loss": 0.9367, + "step": 3100 + }, + { + "epoch": 1.02, + "learning_rate": 9.33e-06, + "loss": 0.9672, + "step": 3110 + }, + { + "epoch": 1.02, + "learning_rate": 9.36e-06, + "loss": 1.0834, + "step": 3120 + }, + { + "epoch": 1.02, + "learning_rate": 9.39e-06, + "loss": 1.0347, + "step": 3130 + }, + { + "epoch": 1.02, + "learning_rate": 9.42e-06, + "loss": 0.881, + "step": 3140 + }, + { + "epoch": 1.02, + "learning_rate": 9.450000000000001e-06, + "loss": 0.8191, + "step": 3150 + }, + { + "epoch": 1.02, + "learning_rate": 9.48e-06, + "loss": 1.0785, + "step": 3160 + }, + { + "epoch": 1.02, + "learning_rate": 9.51e-06, + "loss": 1.1429, + "step": 3170 + }, + { + "epoch": 1.02, + "learning_rate": 9.54e-06, + "loss": 0.9583, + "step": 3180 + }, + { + "epoch": 1.02, + "learning_rate": 9.57e-06, + "loss": 1.3133, + "step": 3190 + }, + { + "epoch": 1.02, + "learning_rate": 9.600000000000001e-06, + "loss": 1.1462, + "step": 3200 + }, + { + "epoch": 1.02, + "learning_rate": 9.630000000000001e-06, + "loss": 1.1374, + "step": 3210 + }, + { + "epoch": 1.02, + "learning_rate": 9.66e-06, + "loss": 1.0824, + "step": 3220 + }, + { + "epoch": 1.02, + "learning_rate": 9.69e-06, + "loss": 1.2885, + "step": 3230 + }, + { + "epoch": 1.02, + "learning_rate": 9.72e-06, + "loss": 1.2692, + "step": 3240 + }, + { + "epoch": 1.02, + "learning_rate": 9.75e-06, + "loss": 1.4844, + "step": 3250 + }, + { + "epoch": 1.03, + "learning_rate": 9.780000000000001e-06, + "loss": 1.2084, + "step": 3260 + }, + { + "epoch": 1.03, + "learning_rate": 9.810000000000001e-06, + "loss": 1.1797, + "step": 3270 + }, + { + "epoch": 1.03, + "learning_rate": 9.84e-06, + "loss": 1.0209, + "step": 3280 + }, + { + "epoch": 1.03, + "learning_rate": 9.87e-06, + "loss": 1.1393, + "step": 3290 + }, + { + "epoch": 1.03, + "learning_rate": 9.9e-06, + "loss": 1.023, + "step": 3300 + }, + { + "epoch": 1.03, + "learning_rate": 9.930000000000001e-06, + "loss": 1.0185, + "step": 3310 + }, + { + "epoch": 1.03, + "learning_rate": 9.960000000000001e-06, + "loss": 1.0343, + "step": 3320 + }, + { + "epoch": 1.03, + "learning_rate": 9.990000000000001e-06, + "loss": 1.0599, + "step": 3330 + }, + { + "epoch": 1.03, + "learning_rate": 1.002e-05, + "loss": 0.9796, + "step": 3340 + }, + { + "epoch": 1.03, + "learning_rate": 1.005e-05, + "loss": 1.016, + "step": 3350 + }, + { + "epoch": 1.03, + "learning_rate": 1.008e-05, + "loss": 1.3834, + "step": 3360 + }, + { + "epoch": 1.03, + "learning_rate": 1.0110000000000001e-05, + "loss": 1.0219, + "step": 3370 + }, + { + "epoch": 1.03, + "learning_rate": 1.0140000000000001e-05, + "loss": 0.7479, + "step": 3380 + }, + { + "epoch": 1.03, + "learning_rate": 1.0170000000000001e-05, + "loss": 1.2832, + "step": 3390 + }, + { + "epoch": 1.03, + "learning_rate": 1.02e-05, + "loss": 1.2133, + "step": 3400 + }, + { + "epoch": 1.03, + "learning_rate": 1.023e-05, + "loss": 1.3182, + "step": 3410 + }, + { + "epoch": 1.03, + "learning_rate": 1.0260000000000002e-05, + "loss": 0.9505, + "step": 3420 + }, + { + "epoch": 1.03, + "learning_rate": 1.0290000000000001e-05, + "loss": 0.8651, + "step": 3430 + }, + { + "epoch": 1.03, + "learning_rate": 1.032e-05, + "loss": 0.9899, + "step": 3440 + }, + { + "epoch": 1.03, + "learning_rate": 1.035e-05, + "loss": 1.1628, + "step": 3450 + }, + { + "epoch": 1.03, + "learning_rate": 1.0379999999999999e-05, + "loss": 1.2309, + "step": 3460 + }, + { + "epoch": 1.03, + "learning_rate": 1.041e-05, + "loss": 1.0505, + "step": 3470 + }, + { + "epoch": 1.03, + "learning_rate": 1.044e-05, + "loss": 1.042, + "step": 3480 + }, + { + "epoch": 1.03, + "learning_rate": 1.047e-05, + "loss": 1.1551, + "step": 3490 + }, + { + "epoch": 1.03, + "learning_rate": 1.05e-05, + "loss": 1.0644, + "step": 3500 + }, + { + "epoch": 1.03, + "learning_rate": 1.0529999999999999e-05, + "loss": 1.0462, + "step": 3510 + }, + { + "epoch": 1.03, + "learning_rate": 1.0559999999999999e-05, + "loss": 1.0319, + "step": 3520 + }, + { + "epoch": 1.03, + "learning_rate": 1.059e-05, + "loss": 1.0456, + "step": 3530 + }, + { + "epoch": 1.03, + "learning_rate": 1.062e-05, + "loss": 0.9852, + "step": 3540 + }, + { + "epoch": 1.03, + "learning_rate": 1.065e-05, + "loss": 1.0286, + "step": 3550 + }, + { + "epoch": 1.03, + "learning_rate": 1.068e-05, + "loss": 1.0501, + "step": 3560 + }, + { + "epoch": 1.03, + "learning_rate": 1.0709999999999999e-05, + "loss": 1.0495, + "step": 3570 + }, + { + "epoch": 1.03, + "learning_rate": 1.074e-05, + "loss": 0.947, + "step": 3580 + }, + { + "epoch": 1.03, + "learning_rate": 1.077e-05, + "loss": 0.9354, + "step": 3590 + }, + { + "epoch": 1.03, + "learning_rate": 1.08e-05, + "loss": 0.9716, + "step": 3600 + }, + { + "epoch": 1.03, + "learning_rate": 1.083e-05, + "loss": 1.0102, + "step": 3610 + }, + { + "epoch": 1.03, + "learning_rate": 1.086e-05, + "loss": 1.2956, + "step": 3620 + }, + { + "epoch": 1.03, + "learning_rate": 1.089e-05, + "loss": 1.311, + "step": 3630 + }, + { + "epoch": 1.03, + "learning_rate": 1.092e-05, + "loss": 1.2648, + "step": 3640 + }, + { + "epoch": 1.03, + "learning_rate": 1.095e-05, + "loss": 1.1586, + "step": 3650 + }, + { + "epoch": 1.03, + "learning_rate": 1.098e-05, + "loss": 1.0152, + "step": 3660 + }, + { + "epoch": 1.03, + "learning_rate": 1.101e-05, + "loss": 1.0581, + "step": 3670 + }, + { + "epoch": 1.03, + "learning_rate": 1.104e-05, + "loss": 1.0796, + "step": 3680 + }, + { + "epoch": 1.03, + "learning_rate": 1.107e-05, + "loss": 1.078, + "step": 3690 + }, + { + "epoch": 1.03, + "learning_rate": 1.11e-05, + "loss": 1.2068, + "step": 3700 + }, + { + "epoch": 1.03, + "learning_rate": 1.113e-05, + "loss": 1.0106, + "step": 3710 + }, + { + "epoch": 1.03, + "learning_rate": 1.116e-05, + "loss": 1.1514, + "step": 3720 + }, + { + "epoch": 1.03, + "learning_rate": 1.119e-05, + "loss": 0.9577, + "step": 3730 + }, + { + "epoch": 1.03, + "learning_rate": 1.1220000000000001e-05, + "loss": 1.2864, + "step": 3740 + }, + { + "epoch": 1.03, + "learning_rate": 1.125e-05, + "loss": 1.1648, + "step": 3750 + }, + { + "epoch": 1.04, + "learning_rate": 1.128e-05, + "loss": 1.1744, + "step": 3760 + }, + { + "epoch": 1.04, + "learning_rate": 1.131e-05, + "loss": 1.0227, + "step": 3770 + }, + { + "epoch": 1.04, + "learning_rate": 1.134e-05, + "loss": 0.8843, + "step": 3780 + }, + { + "epoch": 1.04, + "learning_rate": 1.137e-05, + "loss": 0.9374, + "step": 3790 + }, + { + "epoch": 1.04, + "learning_rate": 1.1400000000000001e-05, + "loss": 1.045, + "step": 3800 + }, + { + "epoch": 1.04, + "learning_rate": 1.143e-05, + "loss": 1.1581, + "step": 3810 + }, + { + "epoch": 1.04, + "learning_rate": 1.146e-05, + "loss": 1.0742, + "step": 3820 + }, + { + "epoch": 1.04, + "learning_rate": 1.149e-05, + "loss": 1.2959, + "step": 3830 + }, + { + "epoch": 1.04, + "learning_rate": 1.152e-05, + "loss": 1.1807, + "step": 3840 + }, + { + "epoch": 1.04, + "learning_rate": 1.1550000000000001e-05, + "loss": 1.0489, + "step": 3850 + }, + { + "epoch": 1.04, + "learning_rate": 1.1580000000000001e-05, + "loss": 1.1499, + "step": 3860 + }, + { + "epoch": 1.04, + "learning_rate": 1.161e-05, + "loss": 1.1995, + "step": 3870 + }, + { + "epoch": 1.04, + "learning_rate": 1.164e-05, + "loss": 0.9318, + "step": 3880 + }, + { + "epoch": 1.04, + "learning_rate": 1.167e-05, + "loss": 1.0526, + "step": 3890 + }, + { + "epoch": 1.04, + "learning_rate": 1.1700000000000001e-05, + "loss": 1.1047, + "step": 3900 + }, + { + "epoch": 1.04, + "learning_rate": 1.1730000000000001e-05, + "loss": 1.0165, + "step": 3910 + }, + { + "epoch": 1.04, + "learning_rate": 1.1760000000000001e-05, + "loss": 1.188, + "step": 3920 + }, + { + "epoch": 1.04, + "learning_rate": 1.179e-05, + "loss": 1.137, + "step": 3930 + }, + { + "epoch": 1.04, + "learning_rate": 1.182e-05, + "loss": 1.1563, + "step": 3940 + }, + { + "epoch": 1.04, + "learning_rate": 1.185e-05, + "loss": 1.1343, + "step": 3950 + }, + { + "epoch": 1.04, + "learning_rate": 1.1880000000000001e-05, + "loss": 1.1096, + "step": 3960 + }, + { + "epoch": 1.04, + "learning_rate": 1.1910000000000001e-05, + "loss": 1.1273, + "step": 3970 + }, + { + "epoch": 1.04, + "learning_rate": 1.1940000000000001e-05, + "loss": 0.976, + "step": 3980 + }, + { + "epoch": 1.04, + "learning_rate": 1.197e-05, + "loss": 0.9634, + "step": 3990 + }, + { + "epoch": 1.04, + "learning_rate": 1.2e-05, + "loss": 1.0115, + "step": 4000 + }, + { + "epoch": 1.04, + "eval_accuracy": 0.6684210526315789, + "eval_f1": 0.6684210526315789, + "eval_loss": 1.0522350072860718, + "eval_runtime": 771.0264, + "eval_samples_per_second": 6.161, + "eval_steps_per_second": 1.541, + "step": 4000 + }, + { + "epoch": 2.0, + "learning_rate": 1.2030000000000002e-05, + "loss": 1.1145, + "step": 4010 + }, + { + "epoch": 2.0, + "learning_rate": 1.2060000000000001e-05, + "loss": 1.378, + "step": 4020 + }, + { + "epoch": 2.0, + "learning_rate": 1.2090000000000001e-05, + "loss": 1.1337, + "step": 4030 + }, + { + "epoch": 2.0, + "learning_rate": 1.2120000000000001e-05, + "loss": 0.8445, + "step": 4040 + }, + { + "epoch": 2.0, + "learning_rate": 1.215e-05, + "loss": 0.8296, + "step": 4050 + }, + { + "epoch": 2.0, + "learning_rate": 1.2180000000000002e-05, + "loss": 1.2409, + "step": 4060 + }, + { + "epoch": 2.0, + "learning_rate": 1.221e-05, + "loss": 1.0398, + "step": 4070 + }, + { + "epoch": 2.0, + "learning_rate": 1.224e-05, + "loss": 0.9204, + "step": 4080 + }, + { + "epoch": 2.0, + "learning_rate": 1.227e-05, + "loss": 0.9676, + "step": 4090 + }, + { + "epoch": 2.0, + "learning_rate": 1.2299999999999999e-05, + "loss": 1.0203, + "step": 4100 + }, + { + "epoch": 2.0, + "learning_rate": 1.2329999999999999e-05, + "loss": 1.1571, + "step": 4110 + }, + { + "epoch": 2.0, + "learning_rate": 1.236e-05, + "loss": 1.2317, + "step": 4120 + }, + { + "epoch": 2.0, + "learning_rate": 1.239e-05, + "loss": 1.004, + "step": 4130 + }, + { + "epoch": 2.0, + "learning_rate": 1.242e-05, + "loss": 1.3242, + "step": 4140 + }, + { + "epoch": 2.0, + "learning_rate": 1.245e-05, + "loss": 1.0648, + "step": 4150 + }, + { + "epoch": 2.0, + "learning_rate": 1.2479999999999999e-05, + "loss": 0.8635, + "step": 4160 + }, + { + "epoch": 2.0, + "learning_rate": 1.251e-05, + "loss": 0.9419, + "step": 4170 + }, + { + "epoch": 2.0, + "learning_rate": 1.254e-05, + "loss": 1.0629, + "step": 4180 + }, + { + "epoch": 2.0, + "learning_rate": 1.257e-05, + "loss": 1.0586, + "step": 4190 + }, + { + "epoch": 2.0, + "learning_rate": 1.26e-05, + "loss": 0.9666, + "step": 4200 + }, + { + "epoch": 2.0, + "learning_rate": 1.263e-05, + "loss": 0.8782, + "step": 4210 + }, + { + "epoch": 2.0, + "learning_rate": 1.2659999999999999e-05, + "loss": 1.1148, + "step": 4220 + }, + { + "epoch": 2.0, + "learning_rate": 1.269e-05, + "loss": 0.9179, + "step": 4230 + }, + { + "epoch": 2.0, + "learning_rate": 1.272e-05, + "loss": 1.1387, + "step": 4240 + }, + { + "epoch": 2.0, + "learning_rate": 1.275e-05, + "loss": 1.1456, + "step": 4250 + }, + { + "epoch": 2.01, + "learning_rate": 1.278e-05, + "loss": 1.1761, + "step": 4260 + }, + { + "epoch": 2.01, + "learning_rate": 1.281e-05, + "loss": 0.7777, + "step": 4270 + }, + { + "epoch": 2.01, + "learning_rate": 1.284e-05, + "loss": 0.9239, + "step": 4280 + }, + { + "epoch": 2.01, + "learning_rate": 1.287e-05, + "loss": 0.8541, + "step": 4290 + }, + { + "epoch": 2.01, + "learning_rate": 1.29e-05, + "loss": 1.1384, + "step": 4300 + }, + { + "epoch": 2.01, + "learning_rate": 1.293e-05, + "loss": 0.9394, + "step": 4310 + }, + { + "epoch": 2.01, + "learning_rate": 1.296e-05, + "loss": 0.9221, + "step": 4320 + }, + { + "epoch": 2.01, + "learning_rate": 1.2990000000000001e-05, + "loss": 1.0862, + "step": 4330 + }, + { + "epoch": 2.01, + "learning_rate": 1.302e-05, + "loss": 1.1334, + "step": 4340 + }, + { + "epoch": 2.01, + "learning_rate": 1.305e-05, + "loss": 1.1778, + "step": 4350 + }, + { + "epoch": 2.01, + "learning_rate": 1.308e-05, + "loss": 0.9096, + "step": 4360 + }, + { + "epoch": 2.01, + "learning_rate": 1.311e-05, + "loss": 1.006, + "step": 4370 + }, + { + "epoch": 2.01, + "learning_rate": 1.314e-05, + "loss": 1.0986, + "step": 4380 + }, + { + "epoch": 2.01, + "learning_rate": 1.3170000000000001e-05, + "loss": 1.0341, + "step": 4390 + }, + { + "epoch": 2.01, + "learning_rate": 1.32e-05, + "loss": 1.1357, + "step": 4400 + }, + { + "epoch": 2.01, + "learning_rate": 1.323e-05, + "loss": 0.9352, + "step": 4410 + }, + { + "epoch": 2.01, + "learning_rate": 1.326e-05, + "loss": 1.1474, + "step": 4420 + }, + { + "epoch": 2.01, + "learning_rate": 1.329e-05, + "loss": 0.8776, + "step": 4430 + }, + { + "epoch": 2.01, + "learning_rate": 1.3320000000000001e-05, + "loss": 0.945, + "step": 4440 + }, + { + "epoch": 2.01, + "learning_rate": 1.3350000000000001e-05, + "loss": 1.067, + "step": 4450 + }, + { + "epoch": 2.01, + "learning_rate": 1.338e-05, + "loss": 0.6447, + "step": 4460 + }, + { + "epoch": 2.01, + "learning_rate": 1.341e-05, + "loss": 1.123, + "step": 4470 + }, + { + "epoch": 2.01, + "learning_rate": 1.344e-05, + "loss": 0.9915, + "step": 4480 + }, + { + "epoch": 2.01, + "learning_rate": 1.3470000000000001e-05, + "loss": 1.0084, + "step": 4490 + }, + { + "epoch": 2.01, + "learning_rate": 1.3500000000000001e-05, + "loss": 1.0558, + "step": 4500 + }, + { + "epoch": 2.01, + "learning_rate": 1.3530000000000001e-05, + "loss": 0.9298, + "step": 4510 + }, + { + "epoch": 2.01, + "learning_rate": 1.356e-05, + "loss": 0.948, + "step": 4520 + }, + { + "epoch": 2.01, + "learning_rate": 1.359e-05, + "loss": 1.0212, + "step": 4530 + }, + { + "epoch": 2.01, + "learning_rate": 1.362e-05, + "loss": 1.1543, + "step": 4540 + }, + { + "epoch": 2.01, + "learning_rate": 1.3650000000000001e-05, + "loss": 1.1628, + "step": 4550 + }, + { + "epoch": 2.01, + "learning_rate": 1.3680000000000001e-05, + "loss": 1.231, + "step": 4560 + }, + { + "epoch": 2.01, + "learning_rate": 1.3710000000000001e-05, + "loss": 1.251, + "step": 4570 + }, + { + "epoch": 2.01, + "learning_rate": 1.374e-05, + "loss": 0.821, + "step": 4580 + }, + { + "epoch": 2.01, + "learning_rate": 1.377e-05, + "loss": 0.786, + "step": 4590 + }, + { + "epoch": 2.01, + "learning_rate": 1.3800000000000002e-05, + "loss": 1.1307, + "step": 4600 + }, + { + "epoch": 2.01, + "learning_rate": 1.3830000000000001e-05, + "loss": 1.1182, + "step": 4610 + }, + { + "epoch": 2.01, + "learning_rate": 1.3860000000000001e-05, + "loss": 1.0388, + "step": 4620 + }, + { + "epoch": 2.01, + "learning_rate": 1.389e-05, + "loss": 1.0585, + "step": 4630 + }, + { + "epoch": 2.01, + "learning_rate": 1.392e-05, + "loss": 0.7796, + "step": 4640 + }, + { + "epoch": 2.01, + "learning_rate": 1.395e-05, + "loss": 1.1808, + "step": 4650 + }, + { + "epoch": 2.01, + "learning_rate": 1.3980000000000002e-05, + "loss": 0.9879, + "step": 4660 + }, + { + "epoch": 2.01, + "learning_rate": 1.4010000000000001e-05, + "loss": 1.0262, + "step": 4670 + }, + { + "epoch": 2.01, + "learning_rate": 1.4040000000000001e-05, + "loss": 0.9567, + "step": 4680 + }, + { + "epoch": 2.01, + "learning_rate": 1.4069999999999999e-05, + "loss": 1.2993, + "step": 4690 + }, + { + "epoch": 2.01, + "learning_rate": 1.4099999999999999e-05, + "loss": 1.024, + "step": 4700 + }, + { + "epoch": 2.01, + "learning_rate": 1.413e-05, + "loss": 1.0662, + "step": 4710 + }, + { + "epoch": 2.01, + "learning_rate": 1.416e-05, + "loss": 1.2413, + "step": 4720 + }, + { + "epoch": 2.01, + "learning_rate": 1.419e-05, + "loss": 1.0022, + "step": 4730 + }, + { + "epoch": 2.01, + "learning_rate": 1.422e-05, + "loss": 0.807, + "step": 4740 + }, + { + "epoch": 2.02, + "learning_rate": 1.4249999999999999e-05, + "loss": 1.2506, + "step": 4750 + }, + { + "epoch": 2.02, + "learning_rate": 1.428e-05, + "loss": 1.1809, + "step": 4760 + }, + { + "epoch": 2.02, + "learning_rate": 1.431e-05, + "loss": 1.0797, + "step": 4770 + }, + { + "epoch": 2.02, + "learning_rate": 1.434e-05, + "loss": 0.9592, + "step": 4780 + }, + { + "epoch": 2.02, + "learning_rate": 1.437e-05, + "loss": 1.0962, + "step": 4790 + }, + { + "epoch": 2.02, + "learning_rate": 1.44e-05, + "loss": 1.1471, + "step": 4800 + }, + { + "epoch": 2.02, + "learning_rate": 1.4429999999999999e-05, + "loss": 0.9697, + "step": 4810 + }, + { + "epoch": 2.02, + "learning_rate": 1.446e-05, + "loss": 1.0342, + "step": 4820 + }, + { + "epoch": 2.02, + "learning_rate": 1.449e-05, + "loss": 0.8653, + "step": 4830 + }, + { + "epoch": 2.02, + "learning_rate": 1.452e-05, + "loss": 1.1813, + "step": 4840 + }, + { + "epoch": 2.02, + "learning_rate": 1.455e-05, + "loss": 1.097, + "step": 4850 + }, + { + "epoch": 2.02, + "learning_rate": 1.458e-05, + "loss": 1.3181, + "step": 4860 + }, + { + "epoch": 2.02, + "learning_rate": 1.461e-05, + "loss": 1.0778, + "step": 4870 + }, + { + "epoch": 2.02, + "learning_rate": 1.464e-05, + "loss": 1.096, + "step": 4880 + }, + { + "epoch": 2.02, + "learning_rate": 1.467e-05, + "loss": 0.8033, + "step": 4890 + }, + { + "epoch": 2.02, + "learning_rate": 1.47e-05, + "loss": 1.0194, + "step": 4900 + }, + { + "epoch": 2.02, + "learning_rate": 1.473e-05, + "loss": 1.0501, + "step": 4910 + }, + { + "epoch": 2.02, + "learning_rate": 1.4760000000000001e-05, + "loss": 0.7855, + "step": 4920 + }, + { + "epoch": 2.02, + "learning_rate": 1.479e-05, + "loss": 1.151, + "step": 4930 + }, + { + "epoch": 2.02, + "learning_rate": 1.482e-05, + "loss": 1.072, + "step": 4940 + }, + { + "epoch": 2.02, + "learning_rate": 1.485e-05, + "loss": 1.0244, + "step": 4950 + }, + { + "epoch": 2.02, + "learning_rate": 1.488e-05, + "loss": 0.9692, + "step": 4960 + }, + { + "epoch": 2.02, + "learning_rate": 1.491e-05, + "loss": 1.0249, + "step": 4970 + }, + { + "epoch": 2.02, + "learning_rate": 1.4940000000000001e-05, + "loss": 0.964, + "step": 4980 + }, + { + "epoch": 2.02, + "learning_rate": 1.497e-05, + "loss": 0.9706, + "step": 4990 + }, + { + "epoch": 2.02, + "learning_rate": 1.5e-05, + "loss": 1.2112, + "step": 5000 + }, + { + "epoch": 2.02, + "learning_rate": 1.4999998172295556e-05, + "loss": 0.8264, + "step": 5010 + }, + { + "epoch": 2.02, + "learning_rate": 1.4999992689183113e-05, + "loss": 0.952, + "step": 5020 + }, + { + "epoch": 2.02, + "learning_rate": 1.4999983550665345e-05, + "loss": 1.3463, + "step": 5030 + }, + { + "epoch": 2.02, + "learning_rate": 1.4999970756746704e-05, + "loss": 0.9856, + "step": 5040 + }, + { + "epoch": 2.02, + "learning_rate": 1.4999954307433428e-05, + "loss": 0.904, + "step": 5050 + }, + { + "epoch": 2.02, + "learning_rate": 1.4999934202733533e-05, + "loss": 0.9741, + "step": 5060 + }, + { + "epoch": 2.02, + "learning_rate": 1.4999910442656817e-05, + "loss": 0.8908, + "step": 5070 + }, + { + "epoch": 2.02, + "learning_rate": 1.4999883027214862e-05, + "loss": 1.2765, + "step": 5080 + }, + { + "epoch": 2.02, + "learning_rate": 1.4999851956421028e-05, + "loss": 0.9875, + "step": 5090 + }, + { + "epoch": 2.02, + "learning_rate": 1.499981723029046e-05, + "loss": 0.7411, + "step": 5100 + }, + { + "epoch": 2.02, + "learning_rate": 1.4999778848840085e-05, + "loss": 0.9148, + "step": 5110 + }, + { + "epoch": 2.02, + "learning_rate": 1.4999736812088606e-05, + "loss": 0.7769, + "step": 5120 + }, + { + "epoch": 2.02, + "learning_rate": 1.4999691120056512e-05, + "loss": 0.8871, + "step": 5130 + }, + { + "epoch": 2.02, + "learning_rate": 1.4999641772766074e-05, + "loss": 0.9924, + "step": 5140 + }, + { + "epoch": 2.02, + "learning_rate": 1.4999588770241342e-05, + "loss": 1.062, + "step": 5150 + }, + { + "epoch": 2.02, + "learning_rate": 1.499953211250815e-05, + "loss": 1.0356, + "step": 5160 + }, + { + "epoch": 2.02, + "learning_rate": 1.499947179959411e-05, + "loss": 0.9518, + "step": 5170 + }, + { + "epoch": 2.02, + "learning_rate": 1.4999407831528622e-05, + "loss": 1.1531, + "step": 5180 + }, + { + "epoch": 2.02, + "learning_rate": 1.4999340208342858e-05, + "loss": 0.9666, + "step": 5190 + }, + { + "epoch": 2.02, + "learning_rate": 1.4999268930069782e-05, + "loss": 1.3574, + "step": 5200 + }, + { + "epoch": 2.02, + "learning_rate": 1.4999193996744131e-05, + "loss": 0.9965, + "step": 5210 + }, + { + "epoch": 2.02, + "learning_rate": 1.4999115408402427e-05, + "loss": 1.0885, + "step": 5220 + }, + { + "epoch": 2.02, + "learning_rate": 1.4999033165082974e-05, + "loss": 0.9979, + "step": 5230 + }, + { + "epoch": 2.02, + "learning_rate": 1.4998947266825853e-05, + "loss": 1.0737, + "step": 5240 + }, + { + "epoch": 2.02, + "learning_rate": 1.4998857713672935e-05, + "loss": 0.9925, + "step": 5250 + }, + { + "epoch": 2.03, + "learning_rate": 1.4998764505667862e-05, + "loss": 1.1046, + "step": 5260 + }, + { + "epoch": 2.03, + "learning_rate": 1.4998667642856068e-05, + "loss": 0.9812, + "step": 5270 + }, + { + "epoch": 2.03, + "learning_rate": 1.4998567125284757e-05, + "loss": 1.0785, + "step": 5280 + }, + { + "epoch": 2.03, + "learning_rate": 1.4998462953002925e-05, + "loss": 1.0161, + "step": 5290 + }, + { + "epoch": 2.03, + "learning_rate": 1.4998355126061342e-05, + "loss": 0.8528, + "step": 5300 + }, + { + "epoch": 2.03, + "learning_rate": 1.499824364451256e-05, + "loss": 1.0053, + "step": 5310 + }, + { + "epoch": 2.03, + "learning_rate": 1.4998128508410916e-05, + "loss": 1.0668, + "step": 5320 + }, + { + "epoch": 2.03, + "learning_rate": 1.4998009717812525e-05, + "loss": 0.9532, + "step": 5330 + }, + { + "epoch": 2.03, + "learning_rate": 1.4997887272775285e-05, + "loss": 0.8229, + "step": 5340 + }, + { + "epoch": 2.03, + "learning_rate": 1.4997761173358875e-05, + "loss": 1.0487, + "step": 5350 + }, + { + "epoch": 2.03, + "learning_rate": 1.499763141962475e-05, + "loss": 0.8786, + "step": 5360 + }, + { + "epoch": 2.03, + "learning_rate": 1.4997498011636154e-05, + "loss": 0.9382, + "step": 5370 + }, + { + "epoch": 2.03, + "learning_rate": 1.499736094945811e-05, + "loss": 1.2307, + "step": 5380 + }, + { + "epoch": 2.03, + "learning_rate": 1.4997220233157415e-05, + "loss": 1.0116, + "step": 5390 + }, + { + "epoch": 2.03, + "learning_rate": 1.4997075862802657e-05, + "loss": 1.0605, + "step": 5400 + }, + { + "epoch": 2.03, + "learning_rate": 1.49969278384642e-05, + "loss": 0.9716, + "step": 5410 + }, + { + "epoch": 2.03, + "learning_rate": 1.4996776160214188e-05, + "loss": 1.1633, + "step": 5420 + }, + { + "epoch": 2.03, + "learning_rate": 1.4996620828126546e-05, + "loss": 0.7762, + "step": 5430 + }, + { + "epoch": 2.03, + "learning_rate": 1.4996461842276982e-05, + "loss": 1.1011, + "step": 5440 + }, + { + "epoch": 2.03, + "learning_rate": 1.4996299202742987e-05, + "loss": 1.0375, + "step": 5450 + }, + { + "epoch": 2.03, + "learning_rate": 1.4996132909603825e-05, + "loss": 1.0724, + "step": 5460 + }, + { + "epoch": 2.03, + "learning_rate": 1.4995962962940547e-05, + "loss": 1.5755, + "step": 5470 + }, + { + "epoch": 2.03, + "learning_rate": 1.4995789362835983e-05, + "loss": 0.9854, + "step": 5480 + }, + { + "epoch": 2.03, + "learning_rate": 1.4995612109374742e-05, + "loss": 0.8795, + "step": 5490 + }, + { + "epoch": 2.03, + "learning_rate": 1.4995431202643219e-05, + "loss": 0.8795, + "step": 5500 + }, + { + "epoch": 2.03, + "learning_rate": 1.499524664272958e-05, + "loss": 0.9471, + "step": 5510 + }, + { + "epoch": 2.03, + "learning_rate": 1.4995058429723783e-05, + "loss": 1.116, + "step": 5520 + }, + { + "epoch": 2.03, + "learning_rate": 1.499486656371756e-05, + "loss": 1.0457, + "step": 5530 + }, + { + "epoch": 2.03, + "learning_rate": 1.4994671044804419e-05, + "loss": 1.1661, + "step": 5540 + }, + { + "epoch": 2.03, + "learning_rate": 1.4994471873079658e-05, + "loss": 0.9537, + "step": 5550 + }, + { + "epoch": 2.03, + "learning_rate": 1.499426904864035e-05, + "loss": 0.9942, + "step": 5560 + }, + { + "epoch": 2.03, + "learning_rate": 1.4994062571585351e-05, + "loss": 1.101, + "step": 5570 + }, + { + "epoch": 2.03, + "learning_rate": 1.4993852442015293e-05, + "loss": 1.3061, + "step": 5580 + }, + { + "epoch": 2.03, + "learning_rate": 1.499363866003259e-05, + "loss": 1.0937, + "step": 5590 + }, + { + "epoch": 2.03, + "learning_rate": 1.4993421225741438e-05, + "loss": 0.8348, + "step": 5600 + }, + { + "epoch": 2.03, + "learning_rate": 1.4993200139247813e-05, + "loss": 1.1291, + "step": 5610 + }, + { + "epoch": 2.03, + "learning_rate": 1.4992975400659466e-05, + "loss": 0.9377, + "step": 5620 + }, + { + "epoch": 2.03, + "learning_rate": 1.4992747010085936e-05, + "loss": 0.9313, + "step": 5630 + }, + { + "epoch": 2.03, + "learning_rate": 1.4992514967638537e-05, + "loss": 1.1957, + "step": 5640 + }, + { + "epoch": 2.03, + "learning_rate": 1.4992279273430361e-05, + "loss": 0.9746, + "step": 5650 + }, + { + "epoch": 2.03, + "learning_rate": 1.4992039927576285e-05, + "loss": 1.1093, + "step": 5660 + }, + { + "epoch": 2.03, + "learning_rate": 1.4991796930192962e-05, + "loss": 0.9546, + "step": 5670 + }, + { + "epoch": 2.03, + "learning_rate": 1.4991550281398828e-05, + "loss": 0.9618, + "step": 5680 + }, + { + "epoch": 2.03, + "learning_rate": 1.4991299981314094e-05, + "loss": 0.9739, + "step": 5690 + }, + { + "epoch": 2.03, + "learning_rate": 1.4991046030060756e-05, + "loss": 0.9386, + "step": 5700 + }, + { + "epoch": 2.03, + "learning_rate": 1.4990788427762585e-05, + "loss": 1.2641, + "step": 5710 + }, + { + "epoch": 2.03, + "learning_rate": 1.4990527174545132e-05, + "loss": 1.0383, + "step": 5720 + }, + { + "epoch": 2.03, + "learning_rate": 1.499026227053573e-05, + "loss": 0.8838, + "step": 5730 + }, + { + "epoch": 2.03, + "learning_rate": 1.498999371586349e-05, + "loss": 1.0993, + "step": 5740 + }, + { + "epoch": 2.04, + "learning_rate": 1.4989721510659303e-05, + "loss": 1.2501, + "step": 5750 + }, + { + "epoch": 2.04, + "learning_rate": 1.498944565505584e-05, + "loss": 1.2304, + "step": 5760 + }, + { + "epoch": 2.04, + "learning_rate": 1.4989166149187544e-05, + "loss": 0.9101, + "step": 5770 + }, + { + "epoch": 2.04, + "learning_rate": 1.498888299319065e-05, + "loss": 1.1794, + "step": 5780 + }, + { + "epoch": 2.04, + "learning_rate": 1.4988596187203158e-05, + "loss": 0.9571, + "step": 5790 + }, + { + "epoch": 2.04, + "learning_rate": 1.4988305731364858e-05, + "loss": 1.2548, + "step": 5800 + }, + { + "epoch": 2.04, + "learning_rate": 1.4988011625817314e-05, + "loss": 1.0551, + "step": 5810 + }, + { + "epoch": 2.04, + "learning_rate": 1.4987713870703869e-05, + "loss": 1.1907, + "step": 5820 + }, + { + "epoch": 2.04, + "learning_rate": 1.4987412466169642e-05, + "loss": 1.0176, + "step": 5830 + }, + { + "epoch": 2.04, + "learning_rate": 1.4987107412361541e-05, + "loss": 1.0289, + "step": 5840 + }, + { + "epoch": 2.04, + "learning_rate": 1.4986798709428242e-05, + "loss": 1.3242, + "step": 5850 + }, + { + "epoch": 2.04, + "learning_rate": 1.49864863575202e-05, + "loss": 1.2081, + "step": 5860 + }, + { + "epoch": 2.04, + "learning_rate": 1.4986170356789654e-05, + "loss": 0.8864, + "step": 5870 + }, + { + "epoch": 2.04, + "learning_rate": 1.4985850707390621e-05, + "loss": 0.7922, + "step": 5880 + }, + { + "epoch": 2.04, + "learning_rate": 1.4985527409478893e-05, + "loss": 0.9615, + "step": 5890 + }, + { + "epoch": 2.04, + "learning_rate": 1.4985200463212038e-05, + "loss": 0.8607, + "step": 5900 + }, + { + "epoch": 2.04, + "learning_rate": 1.498486986874941e-05, + "loss": 0.8566, + "step": 5910 + }, + { + "epoch": 2.04, + "learning_rate": 1.4984535626252133e-05, + "loss": 1.2487, + "step": 5920 + }, + { + "epoch": 2.04, + "learning_rate": 1.4984197735883119e-05, + "loss": 1.1537, + "step": 5930 + }, + { + "epoch": 2.04, + "learning_rate": 1.4983856197807045e-05, + "loss": 1.1069, + "step": 5940 + }, + { + "epoch": 2.04, + "learning_rate": 1.4983511012190374e-05, + "loss": 0.6814, + "step": 5950 + }, + { + "epoch": 2.04, + "learning_rate": 1.498316217920135e-05, + "loss": 1.1646, + "step": 5960 + }, + { + "epoch": 2.04, + "learning_rate": 1.4982809699009982e-05, + "loss": 1.0384, + "step": 5970 + }, + { + "epoch": 2.04, + "learning_rate": 1.4982453571788074e-05, + "loss": 1.0221, + "step": 5980 + }, + { + "epoch": 2.04, + "learning_rate": 1.4982093797709188e-05, + "loss": 1.0354, + "step": 5990 + }, + { + "epoch": 2.04, + "learning_rate": 1.4981730376948682e-05, + "loss": 0.9749, + "step": 6000 + }, + { + "epoch": 2.04, + "eval_accuracy": 0.7536842105263157, + "eval_f1": 0.7536842105263157, + "eval_loss": 0.9298247694969177, + "eval_runtime": 753.1546, + "eval_samples_per_second": 6.307, + "eval_steps_per_second": 1.577, + "step": 6000 + }, + { + "epoch": 3.0, + "learning_rate": 1.4981363309683678e-05, + "loss": 1.0715, + "step": 6010 + }, + { + "epoch": 3.0, + "learning_rate": 1.4980992596093081e-05, + "loss": 0.7671, + "step": 6020 + }, + { + "epoch": 3.0, + "learning_rate": 1.4980618236357574e-05, + "loss": 1.0827, + "step": 6030 + }, + { + "epoch": 3.0, + "learning_rate": 1.4980240230659615e-05, + "loss": 0.8094, + "step": 6040 + }, + { + "epoch": 3.0, + "learning_rate": 1.4979858579183435e-05, + "loss": 1.0124, + "step": 6050 + }, + { + "epoch": 3.0, + "learning_rate": 1.4979473282115054e-05, + "loss": 1.0736, + "step": 6060 + }, + { + "epoch": 3.0, + "learning_rate": 1.4979084339642255e-05, + "loss": 0.9094, + "step": 6070 + }, + { + "epoch": 3.0, + "learning_rate": 1.4978691751954603e-05, + "loss": 0.8073, + "step": 6080 + }, + { + "epoch": 3.0, + "learning_rate": 1.497829551924345e-05, + "loss": 1.0181, + "step": 6090 + }, + { + "epoch": 3.0, + "learning_rate": 1.4977895641701902e-05, + "loss": 0.9183, + "step": 6100 + }, + { + "epoch": 3.0, + "learning_rate": 1.4977492119524863e-05, + "loss": 0.7493, + "step": 6110 + }, + { + "epoch": 3.0, + "learning_rate": 1.4977084952909003e-05, + "loss": 1.0122, + "step": 6120 + }, + { + "epoch": 3.0, + "learning_rate": 1.4976674142052769e-05, + "loss": 0.9298, + "step": 6130 + }, + { + "epoch": 3.0, + "learning_rate": 1.4976259687156385e-05, + "loss": 0.9499, + "step": 6140 + }, + { + "epoch": 3.0, + "learning_rate": 1.4975841588421854e-05, + "loss": 1.0176, + "step": 6150 + }, + { + "epoch": 3.0, + "learning_rate": 1.497541984605295e-05, + "loss": 1.1361, + "step": 6160 + }, + { + "epoch": 3.0, + "learning_rate": 1.4974994460255223e-05, + "loss": 0.7681, + "step": 6170 + }, + { + "epoch": 3.0, + "learning_rate": 1.4974565431236006e-05, + "loss": 0.8797, + "step": 6180 + }, + { + "epoch": 3.0, + "learning_rate": 1.49741327592044e-05, + "loss": 0.9948, + "step": 6190 + }, + { + "epoch": 3.0, + "learning_rate": 1.4973696444371283e-05, + "loss": 1.2721, + "step": 6200 + }, + { + "epoch": 3.0, + "learning_rate": 1.497325648694931e-05, + "loss": 0.9341, + "step": 6210 + }, + { + "epoch": 3.0, + "learning_rate": 1.4972812887152913e-05, + "loss": 0.9105, + "step": 6220 + }, + { + "epoch": 3.0, + "learning_rate": 1.4972365645198294e-05, + "loss": 1.1488, + "step": 6230 + }, + { + "epoch": 3.0, + "learning_rate": 1.4971914761303436e-05, + "loss": 1.1213, + "step": 6240 + }, + { + "epoch": 3.0, + "learning_rate": 1.4971460235688093e-05, + "loss": 0.9571, + "step": 6250 + }, + { + "epoch": 3.01, + "learning_rate": 1.4971002068573793e-05, + "loss": 0.8745, + "step": 6260 + }, + { + "epoch": 3.01, + "learning_rate": 1.4970540260183847e-05, + "loss": 1.1222, + "step": 6270 + }, + { + "epoch": 3.01, + "learning_rate": 1.497007481074333e-05, + "loss": 0.9804, + "step": 6280 + }, + { + "epoch": 3.01, + "learning_rate": 1.4969605720479096e-05, + "loss": 0.933, + "step": 6290 + }, + { + "epoch": 3.01, + "learning_rate": 1.4969132989619776e-05, + "loss": 1.0085, + "step": 6300 + }, + { + "epoch": 3.01, + "learning_rate": 1.4968656618395776e-05, + "loss": 0.8859, + "step": 6310 + }, + { + "epoch": 3.01, + "learning_rate": 1.496817660703927e-05, + "loss": 0.7726, + "step": 6320 + }, + { + "epoch": 3.01, + "learning_rate": 1.4967692955784207e-05, + "loss": 1.1581, + "step": 6330 + }, + { + "epoch": 3.01, + "learning_rate": 1.4967205664866318e-05, + "loss": 0.9762, + "step": 6340 + }, + { + "epoch": 3.01, + "learning_rate": 1.4966714734523101e-05, + "loss": 0.7109, + "step": 6350 + }, + { + "epoch": 3.01, + "learning_rate": 1.4966220164993826e-05, + "loss": 0.9304, + "step": 6360 + }, + { + "epoch": 3.01, + "learning_rate": 1.4965721956519547e-05, + "loss": 0.7953, + "step": 6370 + }, + { + "epoch": 3.01, + "learning_rate": 1.496522010934308e-05, + "loss": 1.2858, + "step": 6380 + }, + { + "epoch": 3.01, + "learning_rate": 1.4964714623709019e-05, + "loss": 1.1374, + "step": 6390 + }, + { + "epoch": 3.01, + "learning_rate": 1.4964205499863734e-05, + "loss": 1.2635, + "step": 6400 + }, + { + "epoch": 3.01, + "learning_rate": 1.4963692738055364e-05, + "loss": 1.2273, + "step": 6410 + }, + { + "epoch": 3.01, + "learning_rate": 1.4963176338533823e-05, + "loss": 0.9485, + "step": 6420 + }, + { + "epoch": 3.01, + "learning_rate": 1.49626563015508e-05, + "loss": 0.8397, + "step": 6430 + }, + { + "epoch": 3.01, + "learning_rate": 1.4962132627359753e-05, + "loss": 0.9709, + "step": 6440 + }, + { + "epoch": 3.01, + "learning_rate": 1.4961605316215913e-05, + "loss": 0.7586, + "step": 6450 + }, + { + "epoch": 3.01, + "learning_rate": 1.4961074368376286e-05, + "loss": 1.2284, + "step": 6460 + }, + { + "epoch": 3.01, + "learning_rate": 1.496053978409965e-05, + "loss": 1.1168, + "step": 6470 + }, + { + "epoch": 3.01, + "learning_rate": 1.4960001563646557e-05, + "loss": 0.9851, + "step": 6480 + }, + { + "epoch": 3.01, + "learning_rate": 1.4959459707279325e-05, + "loss": 1.106, + "step": 6490 + }, + { + "epoch": 3.01, + "learning_rate": 1.495891421526205e-05, + "loss": 1.0387, + "step": 6500 + }, + { + "epoch": 3.01, + "learning_rate": 1.49583650878606e-05, + "loss": 0.8843, + "step": 6510 + }, + { + "epoch": 3.01, + "learning_rate": 1.495781232534261e-05, + "loss": 1.2072, + "step": 6520 + }, + { + "epoch": 3.01, + "learning_rate": 1.4957255927977493e-05, + "loss": 0.7769, + "step": 6530 + }, + { + "epoch": 3.01, + "learning_rate": 1.4956695896036427e-05, + "loss": 0.7896, + "step": 6540 + }, + { + "epoch": 3.01, + "learning_rate": 1.4956132229792366e-05, + "loss": 0.9106, + "step": 6550 + }, + { + "epoch": 3.01, + "learning_rate": 1.4955564929520036e-05, + "loss": 1.1088, + "step": 6560 + }, + { + "epoch": 3.01, + "learning_rate": 1.4954993995495928e-05, + "loss": 1.0579, + "step": 6570 + }, + { + "epoch": 3.01, + "learning_rate": 1.4954419427998312e-05, + "loss": 1.0675, + "step": 6580 + }, + { + "epoch": 3.01, + "learning_rate": 1.4953841227307225e-05, + "loss": 1.046, + "step": 6590 + }, + { + "epoch": 3.01, + "learning_rate": 1.4953259393704474e-05, + "loss": 0.7583, + "step": 6600 + }, + { + "epoch": 3.01, + "learning_rate": 1.4952673927473636e-05, + "loss": 0.9493, + "step": 6610 + }, + { + "epoch": 3.01, + "learning_rate": 1.4952084828900064e-05, + "loss": 0.9263, + "step": 6620 + }, + { + "epoch": 3.01, + "learning_rate": 1.4951492098270874e-05, + "loss": 0.9671, + "step": 6630 + }, + { + "epoch": 3.01, + "learning_rate": 1.4950895735874958e-05, + "loss": 1.1237, + "step": 6640 + }, + { + "epoch": 3.01, + "learning_rate": 1.4950295742002972e-05, + "loss": 1.056, + "step": 6650 + }, + { + "epoch": 3.01, + "learning_rate": 1.4949692116947354e-05, + "loss": 1.439, + "step": 6660 + }, + { + "epoch": 3.01, + "learning_rate": 1.4949084861002293e-05, + "loss": 0.9316, + "step": 6670 + }, + { + "epoch": 3.01, + "learning_rate": 1.4948473974463767e-05, + "loss": 0.6299, + "step": 6680 + }, + { + "epoch": 3.01, + "learning_rate": 1.4947859457629508e-05, + "loss": 0.7169, + "step": 6690 + }, + { + "epoch": 3.01, + "learning_rate": 1.4947241310799028e-05, + "loss": 0.6922, + "step": 6700 + }, + { + "epoch": 3.01, + "learning_rate": 1.4946619534273603e-05, + "loss": 0.8926, + "step": 6710 + }, + { + "epoch": 3.01, + "learning_rate": 1.494599412835628e-05, + "loss": 1.0797, + "step": 6720 + }, + { + "epoch": 3.01, + "learning_rate": 1.4945365093351874e-05, + "loss": 0.8354, + "step": 6730 + }, + { + "epoch": 3.01, + "learning_rate": 1.4944732429566967e-05, + "loss": 0.7327, + "step": 6740 + }, + { + "epoch": 3.02, + "learning_rate": 1.4944096137309916e-05, + "loss": 0.8729, + "step": 6750 + }, + { + "epoch": 3.02, + "learning_rate": 1.4943456216890838e-05, + "loss": 1.0992, + "step": 6760 + }, + { + "epoch": 3.02, + "learning_rate": 1.4942812668621623e-05, + "loss": 0.6533, + "step": 6770 + }, + { + "epoch": 3.02, + "learning_rate": 1.4942165492815934e-05, + "loss": 1.1681, + "step": 6780 + }, + { + "epoch": 3.02, + "learning_rate": 1.494151468978919e-05, + "loss": 0.8496, + "step": 6790 + }, + { + "epoch": 3.02, + "learning_rate": 1.4940860259858585e-05, + "loss": 1.1699, + "step": 6800 + }, + { + "epoch": 3.02, + "learning_rate": 1.4940202203343083e-05, + "loss": 0.9825, + "step": 6810 + }, + { + "epoch": 3.02, + "learning_rate": 1.4939540520563411e-05, + "loss": 0.9348, + "step": 6820 + }, + { + "epoch": 3.02, + "learning_rate": 1.4938875211842066e-05, + "loss": 0.949, + "step": 6830 + }, + { + "epoch": 3.02, + "learning_rate": 1.4938206277503313e-05, + "loss": 1.1664, + "step": 6840 + }, + { + "epoch": 3.02, + "learning_rate": 1.4937533717873178e-05, + "loss": 0.961, + "step": 6850 + }, + { + "epoch": 3.02, + "learning_rate": 1.4936857533279463e-05, + "loss": 1.007, + "step": 6860 + }, + { + "epoch": 3.02, + "learning_rate": 1.4936177724051729e-05, + "loss": 0.9197, + "step": 6870 + }, + { + "epoch": 3.02, + "learning_rate": 1.493549429052131e-05, + "loss": 0.8877, + "step": 6880 + }, + { + "epoch": 3.02, + "learning_rate": 1.49348072330213e-05, + "loss": 0.9528, + "step": 6890 + }, + { + "epoch": 3.02, + "learning_rate": 1.4934116551886563e-05, + "loss": 0.8209, + "step": 6900 + }, + { + "epoch": 3.02, + "learning_rate": 1.493342224745373e-05, + "loss": 0.8172, + "step": 6910 + }, + { + "epoch": 3.02, + "learning_rate": 1.4932724320061195e-05, + "loss": 1.2846, + "step": 6920 + }, + { + "epoch": 3.02, + "learning_rate": 1.493202277004912e-05, + "loss": 1.0496, + "step": 6930 + }, + { + "epoch": 3.02, + "learning_rate": 1.4931317597759435e-05, + "loss": 1.0846, + "step": 6940 + }, + { + "epoch": 3.02, + "learning_rate": 1.4930608803535828e-05, + "loss": 1.0522, + "step": 6950 + }, + { + "epoch": 3.02, + "learning_rate": 1.4929896387723756e-05, + "loss": 0.9628, + "step": 6960 + }, + { + "epoch": 3.02, + "learning_rate": 1.4929180350670445e-05, + "loss": 1.494, + "step": 6970 + }, + { + "epoch": 3.02, + "learning_rate": 1.4928460692724883e-05, + "loss": 1.061, + "step": 6980 + }, + { + "epoch": 3.02, + "learning_rate": 1.4927737414237823e-05, + "loss": 0.6719, + "step": 6990 + }, + { + "epoch": 3.02, + "learning_rate": 1.4927010515561777e-05, + "loss": 1.0067, + "step": 7000 + }, + { + "epoch": 3.02, + "learning_rate": 1.4926279997051033e-05, + "loss": 0.9501, + "step": 7010 + }, + { + "epoch": 3.02, + "learning_rate": 1.4925545859061631e-05, + "loss": 0.8364, + "step": 7020 + }, + { + "epoch": 3.02, + "learning_rate": 1.4924808101951386e-05, + "loss": 0.792, + "step": 7030 + }, + { + "epoch": 3.02, + "learning_rate": 1.4924066726079868e-05, + "loss": 0.7958, + "step": 7040 + }, + { + "epoch": 3.02, + "learning_rate": 1.4923321731808417e-05, + "loss": 0.9504, + "step": 7050 + }, + { + "epoch": 3.02, + "learning_rate": 1.4922573119500133e-05, + "loss": 0.8696, + "step": 7060 + }, + { + "epoch": 3.02, + "learning_rate": 1.492182088951988e-05, + "loss": 1.0283, + "step": 7070 + }, + { + "epoch": 3.02, + "learning_rate": 1.492106504223429e-05, + "loss": 1.0415, + "step": 7080 + }, + { + "epoch": 3.02, + "learning_rate": 1.492030557801175e-05, + "loss": 0.7804, + "step": 7090 + }, + { + "epoch": 3.02, + "learning_rate": 1.4919542497222414e-05, + "loss": 1.0154, + "step": 7100 + }, + { + "epoch": 3.02, + "learning_rate": 1.4918775800238197e-05, + "loss": 1.1701, + "step": 7110 + }, + { + "epoch": 3.02, + "learning_rate": 1.4918005487432779e-05, + "loss": 0.9983, + "step": 7120 + }, + { + "epoch": 3.02, + "learning_rate": 1.4917231559181602e-05, + "loss": 0.8283, + "step": 7130 + }, + { + "epoch": 3.02, + "learning_rate": 1.4916454015861869e-05, + "loss": 0.8256, + "step": 7140 + }, + { + "epoch": 3.02, + "learning_rate": 1.4915672857852545e-05, + "loss": 0.9813, + "step": 7150 + }, + { + "epoch": 3.02, + "learning_rate": 1.4914888085534355e-05, + "loss": 0.9492, + "step": 7160 + }, + { + "epoch": 3.02, + "learning_rate": 1.491409969928979e-05, + "loss": 0.8943, + "step": 7170 + }, + { + "epoch": 3.02, + "learning_rate": 1.49133076995031e-05, + "loss": 0.9886, + "step": 7180 + }, + { + "epoch": 3.02, + "learning_rate": 1.4912512086560295e-05, + "loss": 1.0587, + "step": 7190 + }, + { + "epoch": 3.02, + "learning_rate": 1.4911712860849147e-05, + "loss": 0.9218, + "step": 7200 + }, + { + "epoch": 3.02, + "learning_rate": 1.4910910022759188e-05, + "loss": 0.9411, + "step": 7210 + }, + { + "epoch": 3.02, + "learning_rate": 1.4910103572681715e-05, + "loss": 0.633, + "step": 7220 + }, + { + "epoch": 3.02, + "learning_rate": 1.4909293511009777e-05, + "loss": 0.7656, + "step": 7230 + }, + { + "epoch": 3.02, + "learning_rate": 1.4908479838138192e-05, + "loss": 1.0236, + "step": 7240 + }, + { + "epoch": 3.02, + "learning_rate": 1.4907662554463534e-05, + "loss": 0.7882, + "step": 7250 + }, + { + "epoch": 3.03, + "learning_rate": 1.4906841660384135e-05, + "loss": 0.703, + "step": 7260 + }, + { + "epoch": 3.03, + "learning_rate": 1.4906017156300092e-05, + "loss": 0.9532, + "step": 7270 + }, + { + "epoch": 3.03, + "learning_rate": 1.4905189042613253e-05, + "loss": 0.8584, + "step": 7280 + }, + { + "epoch": 3.03, + "learning_rate": 1.4904357319727236e-05, + "loss": 0.8987, + "step": 7290 + }, + { + "epoch": 3.03, + "learning_rate": 1.490352198804741e-05, + "loss": 0.7435, + "step": 7300 + }, + { + "epoch": 3.03, + "learning_rate": 1.4902683047980905e-05, + "loss": 0.7606, + "step": 7310 + }, + { + "epoch": 3.03, + "learning_rate": 1.4901840499936614e-05, + "loss": 1.0078, + "step": 7320 + }, + { + "epoch": 3.03, + "learning_rate": 1.490099434432518e-05, + "loss": 1.0276, + "step": 7330 + }, + { + "epoch": 3.03, + "learning_rate": 1.490014458155901e-05, + "loss": 0.9141, + "step": 7340 + }, + { + "epoch": 3.03, + "learning_rate": 1.4899291212052271e-05, + "loss": 0.9084, + "step": 7350 + }, + { + "epoch": 3.03, + "learning_rate": 1.4898434236220883e-05, + "loss": 1.0924, + "step": 7360 + }, + { + "epoch": 3.03, + "learning_rate": 1.4897573654482523e-05, + "loss": 1.0189, + "step": 7370 + }, + { + "epoch": 3.03, + "learning_rate": 1.4896709467256632e-05, + "loss": 0.9006, + "step": 7380 + }, + { + "epoch": 3.03, + "learning_rate": 1.4895841674964404e-05, + "loss": 0.9912, + "step": 7390 + }, + { + "epoch": 3.03, + "learning_rate": 1.4894970278028787e-05, + "loss": 1.0447, + "step": 7400 + }, + { + "epoch": 3.03, + "learning_rate": 1.4894095276874493e-05, + "loss": 0.9935, + "step": 7410 + }, + { + "epoch": 3.03, + "learning_rate": 1.4893216671927985e-05, + "loss": 0.9729, + "step": 7420 + }, + { + "epoch": 3.03, + "learning_rate": 1.4892334463617486e-05, + "loss": 0.926, + "step": 7430 + }, + { + "epoch": 3.03, + "learning_rate": 1.4891448652372972e-05, + "loss": 0.9932, + "step": 7440 + }, + { + "epoch": 3.03, + "learning_rate": 1.4890559238626177e-05, + "loss": 0.8552, + "step": 7450 + }, + { + "epoch": 3.03, + "learning_rate": 1.488966622281059e-05, + "loss": 0.8495, + "step": 7460 + }, + { + "epoch": 3.03, + "learning_rate": 1.4888769605361456e-05, + "loss": 0.7872, + "step": 7470 + }, + { + "epoch": 3.03, + "learning_rate": 1.4887869386715777e-05, + "loss": 1.157, + "step": 7480 + }, + { + "epoch": 3.03, + "learning_rate": 1.4886965567312308e-05, + "loss": 1.1723, + "step": 7490 + }, + { + "epoch": 3.03, + "learning_rate": 1.488605814759156e-05, + "loss": 0.9442, + "step": 7500 + }, + { + "epoch": 3.03, + "learning_rate": 1.4885147127995799e-05, + "loss": 0.8961, + "step": 7510 + }, + { + "epoch": 3.03, + "learning_rate": 1.4884232508969042e-05, + "loss": 1.2466, + "step": 7520 + }, + { + "epoch": 3.03, + "learning_rate": 1.4883314290957063e-05, + "loss": 0.9751, + "step": 7530 + }, + { + "epoch": 3.03, + "learning_rate": 1.4882392474407394e-05, + "loss": 0.6522, + "step": 7540 + }, + { + "epoch": 3.03, + "learning_rate": 1.4881467059769314e-05, + "loss": 1.0721, + "step": 7550 + }, + { + "epoch": 3.03, + "learning_rate": 1.4880538047493862e-05, + "loss": 0.8575, + "step": 7560 + }, + { + "epoch": 3.03, + "learning_rate": 1.4879605438033825e-05, + "loss": 1.1339, + "step": 7570 + }, + { + "epoch": 3.03, + "learning_rate": 1.4878669231843745e-05, + "loss": 0.7888, + "step": 7580 + }, + { + "epoch": 3.03, + "learning_rate": 1.4877729429379918e-05, + "loss": 0.7422, + "step": 7590 + }, + { + "epoch": 3.03, + "learning_rate": 1.4876786031100395e-05, + "loss": 1.1973, + "step": 7600 + }, + { + "epoch": 3.03, + "learning_rate": 1.4875839037464973e-05, + "loss": 1.2434, + "step": 7610 + }, + { + "epoch": 3.03, + "learning_rate": 1.4874888448935207e-05, + "loss": 0.8379, + "step": 7620 + }, + { + "epoch": 3.03, + "learning_rate": 1.4873934265974401e-05, + "loss": 0.7247, + "step": 7630 + }, + { + "epoch": 3.03, + "learning_rate": 1.4872976489047615e-05, + "loss": 0.8807, + "step": 7640 + }, + { + "epoch": 3.03, + "learning_rate": 1.4872015118621657e-05, + "loss": 0.879, + "step": 7650 + }, + { + "epoch": 3.03, + "learning_rate": 1.4871050155165086e-05, + "loss": 1.0314, + "step": 7660 + }, + { + "epoch": 3.03, + "learning_rate": 1.4870081599148213e-05, + "loss": 0.8832, + "step": 7670 + }, + { + "epoch": 3.03, + "learning_rate": 1.48691094510431e-05, + "loss": 1.1437, + "step": 7680 + }, + { + "epoch": 3.03, + "learning_rate": 1.4868133711323566e-05, + "loss": 1.0185, + "step": 7690 + }, + { + "epoch": 3.03, + "learning_rate": 1.4867154380465167e-05, + "loss": 0.963, + "step": 7700 + }, + { + "epoch": 3.03, + "learning_rate": 1.4866171458945218e-05, + "loss": 0.7426, + "step": 7710 + }, + { + "epoch": 3.03, + "learning_rate": 1.4865184947242789e-05, + "loss": 0.733, + "step": 7720 + }, + { + "epoch": 3.03, + "learning_rate": 1.486419484583869e-05, + "loss": 1.0429, + "step": 7730 + }, + { + "epoch": 3.03, + "learning_rate": 1.486320115521548e-05, + "loss": 1.0294, + "step": 7740 + }, + { + "epoch": 3.04, + "learning_rate": 1.486220387585748e-05, + "loss": 0.8187, + "step": 7750 + }, + { + "epoch": 3.04, + "learning_rate": 1.4861203008250747e-05, + "loss": 1.0844, + "step": 7760 + }, + { + "epoch": 3.04, + "learning_rate": 1.4860198552883092e-05, + "loss": 0.6466, + "step": 7770 + }, + { + "epoch": 3.04, + "learning_rate": 1.4859190510244076e-05, + "loss": 1.0297, + "step": 7780 + }, + { + "epoch": 3.04, + "learning_rate": 1.4858178880825006e-05, + "loss": 1.0666, + "step": 7790 + }, + { + "epoch": 3.04, + "learning_rate": 1.4857163665118937e-05, + "loss": 0.8843, + "step": 7800 + }, + { + "epoch": 3.04, + "learning_rate": 1.4856144863620673e-05, + "loss": 0.9252, + "step": 7810 + }, + { + "epoch": 3.04, + "learning_rate": 1.4855122476826767e-05, + "loss": 0.6275, + "step": 7820 + }, + { + "epoch": 3.04, + "learning_rate": 1.4854096505235517e-05, + "loss": 0.797, + "step": 7830 + }, + { + "epoch": 3.04, + "learning_rate": 1.4853066949346967e-05, + "loss": 0.8827, + "step": 7840 + }, + { + "epoch": 3.04, + "learning_rate": 1.4852033809662914e-05, + "loss": 1.0379, + "step": 7850 + }, + { + "epoch": 3.04, + "learning_rate": 1.4850997086686896e-05, + "loss": 1.122, + "step": 7860 + }, + { + "epoch": 3.04, + "learning_rate": 1.4849956780924197e-05, + "loss": 0.8421, + "step": 7870 + }, + { + "epoch": 3.04, + "learning_rate": 1.4848912892881852e-05, + "loss": 0.7973, + "step": 7880 + }, + { + "epoch": 3.04, + "learning_rate": 1.4847865423068639e-05, + "loss": 1.1859, + "step": 7890 + }, + { + "epoch": 3.04, + "learning_rate": 1.484681437199508e-05, + "loss": 0.9623, + "step": 7900 + }, + { + "epoch": 3.04, + "learning_rate": 1.4845759740173448e-05, + "loss": 1.1052, + "step": 7910 + }, + { + "epoch": 3.04, + "learning_rate": 1.4844701528117756e-05, + "loss": 0.8889, + "step": 7920 + }, + { + "epoch": 3.04, + "learning_rate": 1.4843639736343764e-05, + "loss": 0.6552, + "step": 7930 + }, + { + "epoch": 3.04, + "learning_rate": 1.4842574365368974e-05, + "loss": 0.9871, + "step": 7940 + }, + { + "epoch": 3.04, + "learning_rate": 1.4841505415712638e-05, + "loss": 0.7968, + "step": 7950 + }, + { + "epoch": 3.04, + "learning_rate": 1.4840432887895748e-05, + "loss": 1.1069, + "step": 7960 + }, + { + "epoch": 3.04, + "learning_rate": 1.483935678244104e-05, + "loss": 1.0628, + "step": 7970 + }, + { + "epoch": 3.04, + "learning_rate": 1.4838277099873e-05, + "loss": 0.9716, + "step": 7980 + }, + { + "epoch": 3.04, + "learning_rate": 1.4837193840717842e-05, + "loss": 1.1399, + "step": 7990 + }, + { + "epoch": 3.04, + "learning_rate": 1.4836107005503543e-05, + "loss": 0.9048, + "step": 8000 + }, + { + "epoch": 3.04, + "eval_accuracy": 0.7863157894736842, + "eval_f1": 0.7863157894736842, + "eval_loss": 0.8679137825965881, + "eval_runtime": 746.5646, + "eval_samples_per_second": 6.362, + "eval_steps_per_second": 1.591, + "step": 8000 + }, + { + "epoch": 4.0, + "learning_rate": 1.4835016594759808e-05, + "loss": 0.9414, + "step": 8010 + }, + { + "epoch": 4.0, + "learning_rate": 1.4833922609018092e-05, + "loss": 0.7321, + "step": 8020 + }, + { + "epoch": 4.0, + "learning_rate": 1.483282504881159e-05, + "loss": 1.1217, + "step": 8030 + }, + { + "epoch": 4.0, + "learning_rate": 1.483172391467524e-05, + "loss": 0.9879, + "step": 8040 + }, + { + "epoch": 4.0, + "learning_rate": 1.483061920714572e-05, + "loss": 0.9518, + "step": 8050 + }, + { + "epoch": 4.0, + "learning_rate": 1.4829510926761451e-05, + "loss": 0.7889, + "step": 8060 + }, + { + "epoch": 4.0, + "learning_rate": 1.4828399074062598e-05, + "loss": 0.757, + "step": 8070 + }, + { + "epoch": 4.0, + "learning_rate": 1.4827283649591061e-05, + "loss": 0.9488, + "step": 8080 + }, + { + "epoch": 4.0, + "learning_rate": 1.4826164653890486e-05, + "loss": 0.7085, + "step": 8090 + }, + { + "epoch": 4.0, + "learning_rate": 1.482504208750626e-05, + "loss": 0.8293, + "step": 8100 + }, + { + "epoch": 4.0, + "learning_rate": 1.4823915950985504e-05, + "loss": 0.9005, + "step": 8110 + }, + { + "epoch": 4.0, + "learning_rate": 1.4822786244877088e-05, + "loss": 1.0684, + "step": 8120 + }, + { + "epoch": 4.0, + "learning_rate": 1.4821652969731612e-05, + "loss": 0.8249, + "step": 8130 + }, + { + "epoch": 4.0, + "learning_rate": 1.4820516126101424e-05, + "loss": 0.8295, + "step": 8140 + }, + { + "epoch": 4.0, + "learning_rate": 1.4819375714540607e-05, + "loss": 1.0305, + "step": 8150 + }, + { + "epoch": 4.0, + "learning_rate": 1.4818231735604982e-05, + "loss": 1.0169, + "step": 8160 + }, + { + "epoch": 4.0, + "learning_rate": 1.4817084189852114e-05, + "loss": 0.9314, + "step": 8170 + }, + { + "epoch": 4.0, + "learning_rate": 1.48159330778413e-05, + "loss": 1.0152, + "step": 8180 + }, + { + "epoch": 4.0, + "learning_rate": 1.4814778400133578e-05, + "loss": 0.8649, + "step": 8190 + }, + { + "epoch": 4.0, + "learning_rate": 1.4813620157291724e-05, + "loss": 0.8508, + "step": 8200 + }, + { + "epoch": 4.0, + "learning_rate": 1.4812458349880255e-05, + "loss": 0.8145, + "step": 8210 + }, + { + "epoch": 4.0, + "learning_rate": 1.4811292978465416e-05, + "loss": 1.0982, + "step": 8220 + }, + { + "epoch": 4.0, + "learning_rate": 1.48101240436152e-05, + "loss": 0.6951, + "step": 8230 + }, + { + "epoch": 4.0, + "learning_rate": 1.480895154589933e-05, + "loss": 0.9137, + "step": 8240 + }, + { + "epoch": 4.0, + "learning_rate": 1.4807775485889265e-05, + "loss": 1.2906, + "step": 8250 + }, + { + "epoch": 4.01, + "learning_rate": 1.4806595864158203e-05, + "loss": 0.8748, + "step": 8260 + }, + { + "epoch": 4.01, + "learning_rate": 1.4805412681281081e-05, + "loss": 1.1232, + "step": 8270 + }, + { + "epoch": 4.01, + "learning_rate": 1.4804225937834564e-05, + "loss": 0.8602, + "step": 8280 + }, + { + "epoch": 4.01, + "learning_rate": 1.4803035634397058e-05, + "loss": 0.8962, + "step": 8290 + }, + { + "epoch": 4.01, + "learning_rate": 1.4801841771548703e-05, + "loss": 0.61, + "step": 8300 + }, + { + "epoch": 4.01, + "learning_rate": 1.4800644349871372e-05, + "loss": 0.8235, + "step": 8310 + }, + { + "epoch": 4.01, + "learning_rate": 1.479944336994867e-05, + "loss": 0.9972, + "step": 8320 + }, + { + "epoch": 4.01, + "learning_rate": 1.479823883236595e-05, + "loss": 0.7722, + "step": 8330 + }, + { + "epoch": 4.01, + "learning_rate": 1.479703073771028e-05, + "loss": 0.9794, + "step": 8340 + }, + { + "epoch": 4.01, + "learning_rate": 1.4795819086570476e-05, + "loss": 1.2318, + "step": 8350 + }, + { + "epoch": 4.01, + "learning_rate": 1.4794603879537076e-05, + "loss": 0.6804, + "step": 8360 + }, + { + "epoch": 4.01, + "learning_rate": 1.4793385117202365e-05, + "loss": 0.7993, + "step": 8370 + }, + { + "epoch": 4.01, + "learning_rate": 1.4792162800160346e-05, + "loss": 0.9275, + "step": 8380 + }, + { + "epoch": 4.01, + "learning_rate": 1.4790936929006766e-05, + "loss": 0.9467, + "step": 8390 + }, + { + "epoch": 4.01, + "learning_rate": 1.4789707504339098e-05, + "loss": 0.8108, + "step": 8400 + }, + { + "epoch": 4.01, + "learning_rate": 1.4788474526756547e-05, + "loss": 0.9307, + "step": 8410 + }, + { + "epoch": 4.01, + "learning_rate": 1.4787237996860056e-05, + "loss": 0.7687, + "step": 8420 + }, + { + "epoch": 4.01, + "learning_rate": 1.478599791525229e-05, + "loss": 0.9544, + "step": 8430 + }, + { + "epoch": 4.01, + "learning_rate": 1.478475428253765e-05, + "loss": 0.968, + "step": 8440 + }, + { + "epoch": 4.01, + "learning_rate": 1.4783507099322271e-05, + "loss": 1.0598, + "step": 8450 + }, + { + "epoch": 4.01, + "learning_rate": 1.4782256366214012e-05, + "loss": 0.7067, + "step": 8460 + }, + { + "epoch": 4.01, + "learning_rate": 1.4781002083822465e-05, + "loss": 0.8091, + "step": 8470 + }, + { + "epoch": 4.01, + "learning_rate": 1.4779744252758955e-05, + "loss": 0.8498, + "step": 8480 + }, + { + "epoch": 4.01, + "learning_rate": 1.4778482873636531e-05, + "loss": 1.1503, + "step": 8490 + }, + { + "epoch": 4.01, + "learning_rate": 1.4777217947069972e-05, + "loss": 0.9158, + "step": 8500 + }, + { + "epoch": 4.01, + "learning_rate": 1.4775949473675794e-05, + "loss": 0.7677, + "step": 8510 + }, + { + "epoch": 4.01, + "learning_rate": 1.477467745407223e-05, + "loss": 0.5724, + "step": 8520 + }, + { + "epoch": 4.01, + "learning_rate": 1.4773401888879247e-05, + "loss": 0.919, + "step": 8530 + }, + { + "epoch": 4.01, + "learning_rate": 1.4772122778718545e-05, + "loss": 0.9545, + "step": 8540 + }, + { + "epoch": 4.01, + "learning_rate": 1.4770840124213543e-05, + "loss": 0.9738, + "step": 8550 + }, + { + "epoch": 4.01, + "learning_rate": 1.476955392598939e-05, + "loss": 0.9716, + "step": 8560 + }, + { + "epoch": 4.01, + "learning_rate": 1.4768264184672965e-05, + "loss": 1.2282, + "step": 8570 + }, + { + "epoch": 4.01, + "learning_rate": 1.4766970900892873e-05, + "loss": 0.8605, + "step": 8580 + }, + { + "epoch": 4.01, + "learning_rate": 1.4765674075279444e-05, + "loss": 0.9213, + "step": 8590 + }, + { + "epoch": 4.01, + "learning_rate": 1.4764373708464733e-05, + "loss": 0.7695, + "step": 8600 + }, + { + "epoch": 4.01, + "learning_rate": 1.4763069801082527e-05, + "loss": 0.6323, + "step": 8610 + }, + { + "epoch": 4.01, + "learning_rate": 1.4761762353768333e-05, + "loss": 0.9472, + "step": 8620 + }, + { + "epoch": 4.01, + "learning_rate": 1.4760451367159385e-05, + "loss": 0.7674, + "step": 8630 + }, + { + "epoch": 4.01, + "learning_rate": 1.475913684189464e-05, + "loss": 0.6949, + "step": 8640 + }, + { + "epoch": 4.01, + "learning_rate": 1.4757818778614786e-05, + "loss": 1.1363, + "step": 8650 + }, + { + "epoch": 4.01, + "learning_rate": 1.4756497177962224e-05, + "loss": 0.9669, + "step": 8660 + }, + { + "epoch": 4.01, + "learning_rate": 1.4755172040581093e-05, + "loss": 0.7092, + "step": 8670 + }, + { + "epoch": 4.01, + "learning_rate": 1.4753843367117248e-05, + "loss": 0.937, + "step": 8680 + }, + { + "epoch": 4.01, + "learning_rate": 1.4752511158218263e-05, + "loss": 1.281, + "step": 8690 + }, + { + "epoch": 4.01, + "learning_rate": 1.4751175414533447e-05, + "loss": 0.8582, + "step": 8700 + }, + { + "epoch": 4.01, + "learning_rate": 1.4749836136713819e-05, + "loss": 1.0818, + "step": 8710 + }, + { + "epoch": 4.01, + "learning_rate": 1.4748493325412132e-05, + "loss": 1.1468, + "step": 8720 + }, + { + "epoch": 4.01, + "learning_rate": 1.4747146981282854e-05, + "loss": 0.9654, + "step": 8730 + }, + { + "epoch": 4.01, + "learning_rate": 1.4745797104982177e-05, + "loss": 0.9186, + "step": 8740 + }, + { + "epoch": 4.01, + "learning_rate": 1.4744443697168013e-05, + "loss": 0.9825, + "step": 8750 + }, + { + "epoch": 4.02, + "learning_rate": 1.4743086758499996e-05, + "loss": 0.7791, + "step": 8760 + }, + { + "epoch": 4.02, + "learning_rate": 1.4741726289639485e-05, + "loss": 0.8682, + "step": 8770 + }, + { + "epoch": 4.02, + "learning_rate": 1.4740362291249555e-05, + "loss": 1.0464, + "step": 8780 + }, + { + "epoch": 4.02, + "learning_rate": 1.4738994763995e-05, + "loss": 0.7383, + "step": 8790 + }, + { + "epoch": 4.02, + "learning_rate": 1.4737623708542336e-05, + "loss": 0.9431, + "step": 8800 + }, + { + "epoch": 4.02, + "learning_rate": 1.47362491255598e-05, + "loss": 0.8399, + "step": 8810 + }, + { + "epoch": 4.02, + "learning_rate": 1.473487101571735e-05, + "loss": 0.7528, + "step": 8820 + }, + { + "epoch": 4.02, + "learning_rate": 1.4733489379686654e-05, + "loss": 1.0939, + "step": 8830 + }, + { + "epoch": 4.02, + "learning_rate": 1.473210421814111e-05, + "loss": 1.1854, + "step": 8840 + }, + { + "epoch": 4.02, + "learning_rate": 1.4730715531755826e-05, + "loss": 0.8147, + "step": 8850 + }, + { + "epoch": 4.02, + "learning_rate": 1.472932332120763e-05, + "loss": 0.9854, + "step": 8860 + }, + { + "epoch": 4.02, + "learning_rate": 1.4727927587175074e-05, + "loss": 0.9375, + "step": 8870 + }, + { + "epoch": 4.02, + "learning_rate": 1.4726528330338416e-05, + "loss": 1.1204, + "step": 8880 + }, + { + "epoch": 4.02, + "learning_rate": 1.4725125551379637e-05, + "loss": 0.7848, + "step": 8890 + }, + { + "epoch": 4.02, + "learning_rate": 1.4723719250982437e-05, + "loss": 0.937, + "step": 8900 + }, + { + "epoch": 4.02, + "learning_rate": 1.4722309429832228e-05, + "loss": 0.6796, + "step": 8910 + }, + { + "epoch": 4.02, + "learning_rate": 1.4720896088616142e-05, + "loss": 0.6637, + "step": 8920 + }, + { + "epoch": 4.02, + "learning_rate": 1.4719479228023022e-05, + "loss": 0.7359, + "step": 8930 + }, + { + "epoch": 4.02, + "learning_rate": 1.471805884874343e-05, + "loss": 0.8415, + "step": 8940 + }, + { + "epoch": 4.02, + "learning_rate": 1.471663495146964e-05, + "loss": 0.8642, + "step": 8950 + }, + { + "epoch": 4.02, + "learning_rate": 1.4715207536895644e-05, + "loss": 1.1598, + "step": 8960 + }, + { + "epoch": 4.02, + "learning_rate": 1.4713776605717146e-05, + "loss": 0.9631, + "step": 8970 + }, + { + "epoch": 4.02, + "learning_rate": 1.4712342158631564e-05, + "loss": 1.028, + "step": 8980 + }, + { + "epoch": 4.02, + "learning_rate": 1.4710904196338032e-05, + "loss": 0.9079, + "step": 8990 + }, + { + "epoch": 4.02, + "learning_rate": 1.4709462719537392e-05, + "loss": 0.7848, + "step": 9000 + }, + { + "epoch": 4.02, + "learning_rate": 1.4708017728932204e-05, + "loss": 0.7513, + "step": 9010 + }, + { + "epoch": 4.02, + "learning_rate": 1.4706569225226741e-05, + "loss": 1.0347, + "step": 9020 + }, + { + "epoch": 4.02, + "learning_rate": 1.470511720912698e-05, + "loss": 0.9487, + "step": 9030 + }, + { + "epoch": 4.02, + "learning_rate": 1.4703661681340624e-05, + "loss": 0.649, + "step": 9040 + }, + { + "epoch": 4.02, + "learning_rate": 1.4702202642577073e-05, + "loss": 0.8312, + "step": 9050 + }, + { + "epoch": 4.02, + "learning_rate": 1.470074009354745e-05, + "loss": 0.883, + "step": 9060 + }, + { + "epoch": 4.02, + "learning_rate": 1.4699274034964577e-05, + "loss": 0.635, + "step": 9070 + }, + { + "epoch": 4.02, + "learning_rate": 1.4697804467543001e-05, + "loss": 1.1506, + "step": 9080 + }, + { + "epoch": 4.02, + "learning_rate": 1.4696331391998966e-05, + "loss": 1.3274, + "step": 9090 + }, + { + "epoch": 4.02, + "learning_rate": 1.4694854809050431e-05, + "loss": 0.4753, + "step": 9100 + }, + { + "epoch": 4.02, + "learning_rate": 1.4693374719417069e-05, + "loss": 0.9106, + "step": 9110 + }, + { + "epoch": 4.02, + "learning_rate": 1.4691891123820253e-05, + "loss": 1.1933, + "step": 9120 + }, + { + "epoch": 4.02, + "learning_rate": 1.469040402298307e-05, + "loss": 1.4015, + "step": 9130 + }, + { + "epoch": 4.02, + "learning_rate": 1.468891341763032e-05, + "loss": 0.9833, + "step": 9140 + }, + { + "epoch": 4.02, + "learning_rate": 1.46874193084885e-05, + "loss": 1.1419, + "step": 9150 + }, + { + "epoch": 4.02, + "learning_rate": 1.4685921696285823e-05, + "loss": 0.7536, + "step": 9160 + }, + { + "epoch": 4.02, + "learning_rate": 1.4684420581752207e-05, + "loss": 0.9597, + "step": 9170 + }, + { + "epoch": 4.02, + "learning_rate": 1.4682915965619275e-05, + "loss": 1.0005, + "step": 9180 + }, + { + "epoch": 4.02, + "learning_rate": 1.4681407848620362e-05, + "loss": 1.1734, + "step": 9190 + }, + { + "epoch": 4.02, + "learning_rate": 1.4679896231490503e-05, + "loss": 0.9856, + "step": 9200 + }, + { + "epoch": 4.02, + "learning_rate": 1.4678381114966447e-05, + "loss": 1.2223, + "step": 9210 + }, + { + "epoch": 4.02, + "learning_rate": 1.4676862499786637e-05, + "loss": 0.8492, + "step": 9220 + }, + { + "epoch": 4.02, + "learning_rate": 1.467534038669123e-05, + "loss": 0.8826, + "step": 9230 + }, + { + "epoch": 4.02, + "learning_rate": 1.4673814776422084e-05, + "loss": 0.5519, + "step": 9240 + }, + { + "epoch": 4.03, + "learning_rate": 1.4672285669722767e-05, + "loss": 1.0553, + "step": 9250 + }, + { + "epoch": 4.03, + "learning_rate": 1.4670753067338543e-05, + "loss": 1.1525, + "step": 9260 + }, + { + "epoch": 4.03, + "learning_rate": 1.4669216970016385e-05, + "loss": 1.0257, + "step": 9270 + }, + { + "epoch": 4.03, + "learning_rate": 1.466767737850497e-05, + "loss": 0.8463, + "step": 9280 + }, + { + "epoch": 4.03, + "learning_rate": 1.4666134293554673e-05, + "loss": 0.6491, + "step": 9290 + }, + { + "epoch": 4.03, + "learning_rate": 1.4664587715917576e-05, + "loss": 0.7512, + "step": 9300 + }, + { + "epoch": 4.03, + "learning_rate": 1.4663037646347467e-05, + "loss": 0.7006, + "step": 9310 + }, + { + "epoch": 4.03, + "learning_rate": 1.4661484085599823e-05, + "loss": 0.9646, + "step": 9320 + }, + { + "epoch": 4.03, + "learning_rate": 1.4659927034431834e-05, + "loss": 0.8665, + "step": 9330 + }, + { + "epoch": 4.03, + "learning_rate": 1.465836649360239e-05, + "loss": 0.5898, + "step": 9340 + }, + { + "epoch": 4.03, + "learning_rate": 1.4656802463872076e-05, + "loss": 0.6902, + "step": 9350 + }, + { + "epoch": 4.03, + "learning_rate": 1.4655234946003185e-05, + "loss": 0.8633, + "step": 9360 + }, + { + "epoch": 4.03, + "learning_rate": 1.4653663940759703e-05, + "loss": 0.8003, + "step": 9370 + }, + { + "epoch": 4.03, + "learning_rate": 1.465208944890732e-05, + "loss": 0.6989, + "step": 9380 + }, + { + "epoch": 4.03, + "learning_rate": 1.4650511471213424e-05, + "loss": 1.0922, + "step": 9390 + }, + { + "epoch": 4.03, + "learning_rate": 1.4648930008447102e-05, + "loss": 0.9699, + "step": 9400 + }, + { + "epoch": 4.03, + "learning_rate": 1.4647345061379142e-05, + "loss": 0.894, + "step": 9410 + }, + { + "epoch": 4.03, + "learning_rate": 1.4645756630782025e-05, + "loss": 0.8662, + "step": 9420 + }, + { + "epoch": 4.03, + "learning_rate": 1.4644164717429931e-05, + "loss": 0.7388, + "step": 9430 + }, + { + "epoch": 4.03, + "learning_rate": 1.4642569322098747e-05, + "loss": 0.5905, + "step": 9440 + }, + { + "epoch": 4.03, + "learning_rate": 1.464097044556604e-05, + "loss": 0.8467, + "step": 9450 + }, + { + "epoch": 4.03, + "learning_rate": 1.463936808861109e-05, + "loss": 1.0525, + "step": 9460 + }, + { + "epoch": 4.03, + "learning_rate": 1.4637762252014863e-05, + "loss": 0.8764, + "step": 9470 + }, + { + "epoch": 4.03, + "learning_rate": 1.4636152936560023e-05, + "loss": 0.7288, + "step": 9480 + }, + { + "epoch": 4.03, + "learning_rate": 1.4634540143030935e-05, + "loss": 0.6504, + "step": 9490 + }, + { + "epoch": 4.03, + "learning_rate": 1.4632923872213653e-05, + "loss": 0.8992, + "step": 9500 + }, + { + "epoch": 4.03, + "learning_rate": 1.4631304124895924e-05, + "loss": 0.652, + "step": 9510 + }, + { + "epoch": 4.03, + "learning_rate": 1.46296809018672e-05, + "loss": 0.9174, + "step": 9520 + }, + { + "epoch": 4.03, + "learning_rate": 1.4628054203918615e-05, + "loss": 1.2839, + "step": 9530 + }, + { + "epoch": 4.03, + "learning_rate": 1.4626424031843006e-05, + "loss": 0.9751, + "step": 9540 + }, + { + "epoch": 4.03, + "learning_rate": 1.4624790386434893e-05, + "loss": 0.8526, + "step": 9550 + }, + { + "epoch": 4.03, + "learning_rate": 1.4623153268490502e-05, + "loss": 0.9823, + "step": 9560 + }, + { + "epoch": 4.03, + "learning_rate": 1.4621512678807738e-05, + "loss": 1.1091, + "step": 9570 + }, + { + "epoch": 4.03, + "learning_rate": 1.461986861818621e-05, + "loss": 0.8197, + "step": 9580 + }, + { + "epoch": 4.03, + "learning_rate": 1.461822108742721e-05, + "loss": 0.6894, + "step": 9590 + }, + { + "epoch": 4.03, + "learning_rate": 1.4616570087333725e-05, + "loss": 1.047, + "step": 9600 + }, + { + "epoch": 4.03, + "learning_rate": 1.4614915618710431e-05, + "loss": 0.8924, + "step": 9610 + }, + { + "epoch": 4.03, + "learning_rate": 1.46132576823637e-05, + "loss": 0.8171, + "step": 9620 + }, + { + "epoch": 4.03, + "learning_rate": 1.4611596279101584e-05, + "loss": 0.9181, + "step": 9630 + }, + { + "epoch": 4.03, + "learning_rate": 1.4609931409733837e-05, + "loss": 0.8818, + "step": 9640 + }, + { + "epoch": 4.03, + "learning_rate": 1.4608263075071894e-05, + "loss": 0.8118, + "step": 9650 + }, + { + "epoch": 4.03, + "learning_rate": 1.4606591275928879e-05, + "loss": 0.9907, + "step": 9660 + }, + { + "epoch": 4.03, + "learning_rate": 1.4604916013119607e-05, + "loss": 0.9498, + "step": 9670 + }, + { + "epoch": 4.03, + "learning_rate": 1.4603237287460582e-05, + "loss": 0.7819, + "step": 9680 + }, + { + "epoch": 4.03, + "learning_rate": 1.4601555099769994e-05, + "loss": 0.6889, + "step": 9690 + }, + { + "epoch": 4.03, + "learning_rate": 1.4599869450867724e-05, + "loss": 0.7433, + "step": 9700 + }, + { + "epoch": 4.03, + "learning_rate": 1.4598180341575332e-05, + "loss": 0.8066, + "step": 9710 + }, + { + "epoch": 4.03, + "learning_rate": 1.459648777271607e-05, + "loss": 0.7443, + "step": 9720 + }, + { + "epoch": 4.03, + "learning_rate": 1.4594791745114878e-05, + "loss": 0.6299, + "step": 9730 + }, + { + "epoch": 4.03, + "learning_rate": 1.4593092259598375e-05, + "loss": 1.0822, + "step": 9740 + }, + { + "epoch": 4.04, + "learning_rate": 1.4591389316994878e-05, + "loss": 0.8876, + "step": 9750 + }, + { + "epoch": 4.04, + "learning_rate": 1.4589682918134371e-05, + "loss": 1.0569, + "step": 9760 + }, + { + "epoch": 4.04, + "learning_rate": 1.4587973063848537e-05, + "loss": 0.8454, + "step": 9770 + }, + { + "epoch": 4.04, + "learning_rate": 1.4586259754970738e-05, + "loss": 0.687, + "step": 9780 + }, + { + "epoch": 4.04, + "learning_rate": 1.4584542992336017e-05, + "loss": 1.2345, + "step": 9790 + }, + { + "epoch": 4.04, + "learning_rate": 1.4582822776781108e-05, + "loss": 0.6698, + "step": 9800 + }, + { + "epoch": 4.04, + "learning_rate": 1.4581099109144421e-05, + "loss": 0.6466, + "step": 9810 + }, + { + "epoch": 4.04, + "learning_rate": 1.457937199026605e-05, + "loss": 1.2157, + "step": 9820 + }, + { + "epoch": 4.04, + "learning_rate": 1.457764142098777e-05, + "loss": 0.9521, + "step": 9830 + }, + { + "epoch": 4.04, + "learning_rate": 1.4575907402153044e-05, + "loss": 1.0293, + "step": 9840 + }, + { + "epoch": 4.04, + "learning_rate": 1.4574169934607006e-05, + "loss": 0.9327, + "step": 9850 + }, + { + "epoch": 4.04, + "learning_rate": 1.4572429019196484e-05, + "loss": 0.7715, + "step": 9860 + }, + { + "epoch": 4.04, + "learning_rate": 1.4570684656769973e-05, + "loss": 1.1282, + "step": 9870 + }, + { + "epoch": 4.04, + "learning_rate": 1.4568936848177657e-05, + "loss": 0.9545, + "step": 9880 + }, + { + "epoch": 4.04, + "learning_rate": 1.4567185594271393e-05, + "loss": 0.7993, + "step": 9890 + }, + { + "epoch": 4.04, + "learning_rate": 1.4565430895904725e-05, + "loss": 0.8394, + "step": 9900 + }, + { + "epoch": 4.04, + "learning_rate": 1.456367275393287e-05, + "loss": 0.6563, + "step": 9910 + }, + { + "epoch": 4.04, + "learning_rate": 1.4561911169212726e-05, + "loss": 0.7107, + "step": 9920 + }, + { + "epoch": 4.04, + "learning_rate": 1.4560146142602868e-05, + "loss": 1.2381, + "step": 9930 + }, + { + "epoch": 4.04, + "learning_rate": 1.4558377674963545e-05, + "loss": 0.6681, + "step": 9940 + }, + { + "epoch": 4.04, + "learning_rate": 1.4556605767156693e-05, + "loss": 1.149, + "step": 9950 + }, + { + "epoch": 4.04, + "learning_rate": 1.455483042004591e-05, + "loss": 0.9799, + "step": 9960 + }, + { + "epoch": 4.04, + "learning_rate": 1.4553051634496486e-05, + "loss": 0.9604, + "step": 9970 + }, + { + "epoch": 4.04, + "learning_rate": 1.4551269411375377e-05, + "loss": 0.681, + "step": 9980 + }, + { + "epoch": 4.04, + "learning_rate": 1.4549483751551216e-05, + "loss": 0.8368, + "step": 9990 + }, + { + "epoch": 4.04, + "learning_rate": 1.4547694655894313e-05, + "loss": 0.7977, + "step": 10000 + }, + { + "epoch": 4.04, + "eval_accuracy": 0.7810526315789473, + "eval_f1": 0.7810526315789474, + "eval_loss": 0.8845712542533875, + "eval_runtime": 758.7878, + "eval_samples_per_second": 6.26, + "eval_steps_per_second": 1.566, + "step": 10000 + }, + { + "epoch": 5.0, + "learning_rate": 1.4545902125276652e-05, + "loss": 1.0388, + "step": 10010 + }, + { + "epoch": 5.0, + "learning_rate": 1.4544106160571887e-05, + "loss": 0.9292, + "step": 10020 + }, + { + "epoch": 5.0, + "learning_rate": 1.4542306762655355e-05, + "loss": 0.897, + "step": 10030 + }, + { + "epoch": 5.0, + "learning_rate": 1.4540503932404057e-05, + "loss": 1.0358, + "step": 10040 + }, + { + "epoch": 5.0, + "learning_rate": 1.453869767069667e-05, + "loss": 1.0391, + "step": 10050 + }, + { + "epoch": 5.0, + "learning_rate": 1.4536887978413547e-05, + "loss": 0.7696, + "step": 10060 + }, + { + "epoch": 5.0, + "learning_rate": 1.4535074856436707e-05, + "loss": 0.6376, + "step": 10070 + }, + { + "epoch": 5.0, + "learning_rate": 1.4533258305649845e-05, + "loss": 0.8613, + "step": 10080 + }, + { + "epoch": 5.0, + "learning_rate": 1.4531438326938328e-05, + "loss": 0.8559, + "step": 10090 + }, + { + "epoch": 5.0, + "learning_rate": 1.4529614921189187e-05, + "loss": 0.6378, + "step": 10100 + }, + { + "epoch": 5.0, + "learning_rate": 1.452778808929113e-05, + "loss": 0.6774, + "step": 10110 + }, + { + "epoch": 5.0, + "learning_rate": 1.4525957832134532e-05, + "loss": 0.6826, + "step": 10120 + }, + { + "epoch": 5.0, + "learning_rate": 1.4524124150611443e-05, + "loss": 0.7114, + "step": 10130 + }, + { + "epoch": 5.0, + "learning_rate": 1.452228704561557e-05, + "loss": 0.8111, + "step": 10140 + }, + { + "epoch": 5.0, + "learning_rate": 1.4520446518042301e-05, + "loss": 0.8248, + "step": 10150 + }, + { + "epoch": 5.0, + "learning_rate": 1.4518602568788683e-05, + "loss": 0.8909, + "step": 10160 + }, + { + "epoch": 5.0, + "learning_rate": 1.4516755198753435e-05, + "loss": 1.0263, + "step": 10170 + }, + { + "epoch": 5.0, + "learning_rate": 1.4514904408836944e-05, + "loss": 0.7914, + "step": 10180 + }, + { + "epoch": 5.0, + "learning_rate": 1.4513050199941261e-05, + "loss": 0.9401, + "step": 10190 + }, + { + "epoch": 5.0, + "learning_rate": 1.4511192572970108e-05, + "loss": 0.7783, + "step": 10200 + }, + { + "epoch": 5.0, + "learning_rate": 1.4509331528828868e-05, + "loss": 0.6487, + "step": 10210 + }, + { + "epoch": 5.0, + "learning_rate": 1.4507467068424591e-05, + "loss": 0.7832, + "step": 10220 + }, + { + "epoch": 5.0, + "learning_rate": 1.4505599192665993e-05, + "loss": 0.7375, + "step": 10230 + }, + { + "epoch": 5.0, + "learning_rate": 1.4503727902463451e-05, + "loss": 0.8463, + "step": 10240 + }, + { + "epoch": 5.0, + "learning_rate": 1.4501853198729012e-05, + "loss": 0.7524, + "step": 10250 + }, + { + "epoch": 5.01, + "learning_rate": 1.4499975082376387e-05, + "loss": 0.5843, + "step": 10260 + }, + { + "epoch": 5.01, + "learning_rate": 1.449809355432094e-05, + "loss": 1.342, + "step": 10270 + }, + { + "epoch": 5.01, + "learning_rate": 1.449620861547971e-05, + "loss": 0.8964, + "step": 10280 + }, + { + "epoch": 5.01, + "learning_rate": 1.4494320266771391e-05, + "loss": 0.8378, + "step": 10290 + }, + { + "epoch": 5.01, + "learning_rate": 1.4492428509116341e-05, + "loss": 0.8489, + "step": 10300 + }, + { + "epoch": 5.01, + "learning_rate": 1.4490533343436581e-05, + "loss": 0.5495, + "step": 10310 + }, + { + "epoch": 5.01, + "learning_rate": 1.4488634770655793e-05, + "loss": 0.9422, + "step": 10320 + }, + { + "epoch": 5.01, + "learning_rate": 1.4486732791699318e-05, + "loss": 1.0297, + "step": 10330 + }, + { + "epoch": 5.01, + "learning_rate": 1.4484827407494154e-05, + "loss": 0.8256, + "step": 10340 + }, + { + "epoch": 5.01, + "learning_rate": 1.4482918618968963e-05, + "loss": 0.5982, + "step": 10350 + }, + { + "epoch": 5.01, + "learning_rate": 1.4481006427054067e-05, + "loss": 0.7247, + "step": 10360 + }, + { + "epoch": 5.01, + "learning_rate": 1.4479090832681445e-05, + "loss": 1.0779, + "step": 10370 + }, + { + "epoch": 5.01, + "learning_rate": 1.4477171836784736e-05, + "loss": 0.7428, + "step": 10380 + }, + { + "epoch": 5.01, + "learning_rate": 1.4475249440299231e-05, + "loss": 0.6429, + "step": 10390 + }, + { + "epoch": 5.01, + "learning_rate": 1.4473323644161886e-05, + "loss": 0.557, + "step": 10400 + }, + { + "epoch": 5.01, + "learning_rate": 1.4471394449311309e-05, + "loss": 0.7514, + "step": 10410 + }, + { + "epoch": 5.01, + "learning_rate": 1.4469461856687769e-05, + "loss": 1.0773, + "step": 10420 + }, + { + "epoch": 5.01, + "learning_rate": 1.4467525867233184e-05, + "loss": 1.1142, + "step": 10430 + }, + { + "epoch": 5.01, + "learning_rate": 1.4465586481891134e-05, + "loss": 0.8702, + "step": 10440 + }, + { + "epoch": 5.01, + "learning_rate": 1.4463643701606852e-05, + "loss": 1.0078, + "step": 10450 + }, + { + "epoch": 5.01, + "learning_rate": 1.4461697527327225e-05, + "loss": 0.6047, + "step": 10460 + }, + { + "epoch": 5.01, + "learning_rate": 1.4459747960000795e-05, + "loss": 0.9054, + "step": 10470 + }, + { + "epoch": 5.01, + "learning_rate": 1.4457795000577756e-05, + "loss": 0.8874, + "step": 10480 + }, + { + "epoch": 5.01, + "learning_rate": 1.4455838650009962e-05, + "loss": 1.1414, + "step": 10490 + }, + { + "epoch": 5.01, + "learning_rate": 1.4453878909250906e-05, + "loss": 0.8461, + "step": 10500 + }, + { + "epoch": 5.01, + "learning_rate": 1.4451915779255748e-05, + "loss": 1.2347, + "step": 10510 + }, + { + "epoch": 5.01, + "learning_rate": 1.4449949260981291e-05, + "loss": 0.9733, + "step": 10520 + }, + { + "epoch": 5.01, + "learning_rate": 1.4447979355385994e-05, + "loss": 0.7622, + "step": 10530 + }, + { + "epoch": 5.01, + "learning_rate": 1.4446006063429966e-05, + "loss": 1.0488, + "step": 10540 + }, + { + "epoch": 5.01, + "learning_rate": 1.4444029386074961e-05, + "loss": 0.6356, + "step": 10550 + }, + { + "epoch": 5.01, + "learning_rate": 1.4442049324284393e-05, + "loss": 0.9436, + "step": 10560 + }, + { + "epoch": 5.01, + "learning_rate": 1.4440065879023313e-05, + "loss": 0.9486, + "step": 10570 + }, + { + "epoch": 5.01, + "learning_rate": 1.4438079051258435e-05, + "loss": 1.1029, + "step": 10580 + }, + { + "epoch": 5.01, + "learning_rate": 1.4436088841958113e-05, + "loss": 1.1475, + "step": 10590 + }, + { + "epoch": 5.01, + "learning_rate": 1.443409525209235e-05, + "loss": 0.7903, + "step": 10600 + }, + { + "epoch": 5.01, + "learning_rate": 1.4432098282632795e-05, + "loss": 0.6607, + "step": 10610 + }, + { + "epoch": 5.01, + "learning_rate": 1.4430097934552751e-05, + "loss": 0.6229, + "step": 10620 + }, + { + "epoch": 5.01, + "learning_rate": 1.4428094208827161e-05, + "loss": 1.006, + "step": 10630 + }, + { + "epoch": 5.01, + "learning_rate": 1.4426087106432617e-05, + "loss": 0.7741, + "step": 10640 + }, + { + "epoch": 5.01, + "learning_rate": 1.4424076628347357e-05, + "loss": 0.5454, + "step": 10650 + }, + { + "epoch": 5.01, + "learning_rate": 1.4422062775551262e-05, + "loss": 0.7863, + "step": 10660 + }, + { + "epoch": 5.01, + "learning_rate": 1.4420045549025862e-05, + "loss": 0.977, + "step": 10670 + }, + { + "epoch": 5.01, + "learning_rate": 1.4418024949754326e-05, + "loss": 0.956, + "step": 10680 + }, + { + "epoch": 5.01, + "learning_rate": 1.441600097872147e-05, + "loss": 0.9236, + "step": 10690 + }, + { + "epoch": 5.01, + "learning_rate": 1.4413973636913754e-05, + "loss": 0.9959, + "step": 10700 + }, + { + "epoch": 5.01, + "learning_rate": 1.441194292531928e-05, + "loss": 1.2392, + "step": 10710 + }, + { + "epoch": 5.01, + "learning_rate": 1.4409908844927792e-05, + "loss": 0.6514, + "step": 10720 + }, + { + "epoch": 5.01, + "learning_rate": 1.4407871396730672e-05, + "loss": 0.9126, + "step": 10730 + }, + { + "epoch": 5.01, + "learning_rate": 1.4405830581720953e-05, + "loss": 0.9703, + "step": 10740 + }, + { + "epoch": 5.01, + "learning_rate": 1.4403786400893304e-05, + "loss": 0.6085, + "step": 10750 + }, + { + "epoch": 5.02, + "learning_rate": 1.4401738855244029e-05, + "loss": 0.7244, + "step": 10760 + }, + { + "epoch": 5.02, + "learning_rate": 1.439968794577108e-05, + "loss": 0.8882, + "step": 10770 + }, + { + "epoch": 5.02, + "learning_rate": 1.4397633673474042e-05, + "loss": 0.584, + "step": 10780 + }, + { + "epoch": 5.02, + "learning_rate": 1.4395576039354148e-05, + "loss": 1.2507, + "step": 10790 + }, + { + "epoch": 5.02, + "learning_rate": 1.4393515044414259e-05, + "loss": 0.8442, + "step": 10800 + }, + { + "epoch": 5.02, + "learning_rate": 1.439145068965888e-05, + "loss": 1.0842, + "step": 10810 + }, + { + "epoch": 5.02, + "learning_rate": 1.4389382976094155e-05, + "loss": 0.9413, + "step": 10820 + }, + { + "epoch": 5.02, + "learning_rate": 1.438731190472786e-05, + "loss": 0.655, + "step": 10830 + }, + { + "epoch": 5.02, + "learning_rate": 1.438523747656941e-05, + "loss": 0.8029, + "step": 10840 + }, + { + "epoch": 5.02, + "learning_rate": 1.4383159692629858e-05, + "loss": 0.5417, + "step": 10850 + }, + { + "epoch": 5.02, + "learning_rate": 1.4381078553921888e-05, + "loss": 1.0177, + "step": 10860 + }, + { + "epoch": 5.02, + "learning_rate": 1.4378994061459826e-05, + "loss": 0.9567, + "step": 10870 + }, + { + "epoch": 5.02, + "learning_rate": 1.4376906216259623e-05, + "loss": 0.7705, + "step": 10880 + }, + { + "epoch": 5.02, + "learning_rate": 1.4374815019338873e-05, + "loss": 0.6263, + "step": 10890 + }, + { + "epoch": 5.02, + "learning_rate": 1.4372720471716797e-05, + "loss": 0.5253, + "step": 10900 + }, + { + "epoch": 5.02, + "learning_rate": 1.4370622574414254e-05, + "loss": 0.8735, + "step": 10910 + }, + { + "epoch": 5.02, + "learning_rate": 1.4368521328453736e-05, + "loss": 0.916, + "step": 10920 + }, + { + "epoch": 5.02, + "learning_rate": 1.4366416734859362e-05, + "loss": 0.8634, + "step": 10930 + }, + { + "epoch": 5.02, + "learning_rate": 1.4364308794656881e-05, + "loss": 0.7812, + "step": 10940 + }, + { + "epoch": 5.02, + "learning_rate": 1.4362197508873688e-05, + "loss": 0.7478, + "step": 10950 + }, + { + "epoch": 5.02, + "learning_rate": 1.4360082878538787e-05, + "loss": 0.7418, + "step": 10960 + }, + { + "epoch": 5.02, + "learning_rate": 1.4357964904682832e-05, + "loss": 0.6585, + "step": 10970 + }, + { + "epoch": 5.02, + "learning_rate": 1.4355843588338092e-05, + "loss": 0.8206, + "step": 10980 + }, + { + "epoch": 5.02, + "learning_rate": 1.4353718930538473e-05, + "loss": 0.6967, + "step": 10990 + }, + { + "epoch": 5.02, + "learning_rate": 1.4351590932319506e-05, + "loss": 0.8266, + "step": 11000 + }, + { + "epoch": 5.02, + "learning_rate": 1.4349459594718354e-05, + "loss": 0.8005, + "step": 11010 + }, + { + "epoch": 5.02, + "learning_rate": 1.4347324918773805e-05, + "loss": 0.9922, + "step": 11020 + }, + { + "epoch": 5.02, + "learning_rate": 1.4345186905526272e-05, + "loss": 0.9101, + "step": 11030 + }, + { + "epoch": 5.02, + "learning_rate": 1.4343045556017798e-05, + "loss": 0.8269, + "step": 11040 + }, + { + "epoch": 5.02, + "learning_rate": 1.4340900871292047e-05, + "loss": 1.0536, + "step": 11050 + }, + { + "epoch": 5.02, + "learning_rate": 1.433875285239432e-05, + "loss": 0.8667, + "step": 11060 + }, + { + "epoch": 5.02, + "learning_rate": 1.4336601500371527e-05, + "loss": 0.8843, + "step": 11070 + }, + { + "epoch": 5.02, + "learning_rate": 1.4334446816272218e-05, + "loss": 0.7773, + "step": 11080 + }, + { + "epoch": 5.02, + "learning_rate": 1.4332288801146554e-05, + "loss": 1.0272, + "step": 11090 + }, + { + "epoch": 5.02, + "learning_rate": 1.4330127456046328e-05, + "loss": 0.739, + "step": 11100 + }, + { + "epoch": 5.02, + "learning_rate": 1.4327962782024956e-05, + "loss": 0.7335, + "step": 11110 + }, + { + "epoch": 5.02, + "learning_rate": 1.4325794780137468e-05, + "loss": 0.7412, + "step": 11120 + }, + { + "epoch": 5.02, + "learning_rate": 1.4323623451440525e-05, + "loss": 1.011, + "step": 11130 + }, + { + "epoch": 5.02, + "learning_rate": 1.4321448796992409e-05, + "loss": 0.9723, + "step": 11140 + }, + { + "epoch": 5.02, + "learning_rate": 1.4319270817853014e-05, + "loss": 1.0028, + "step": 11150 + }, + { + "epoch": 5.02, + "learning_rate": 1.4317089515083866e-05, + "loss": 0.7306, + "step": 11160 + }, + { + "epoch": 5.02, + "learning_rate": 1.4314904889748102e-05, + "loss": 0.9448, + "step": 11170 + }, + { + "epoch": 5.02, + "learning_rate": 1.4312716942910483e-05, + "loss": 0.617, + "step": 11180 + }, + { + "epoch": 5.02, + "learning_rate": 1.4310525675637389e-05, + "loss": 0.7878, + "step": 11190 + }, + { + "epoch": 5.02, + "learning_rate": 1.4308331088996816e-05, + "loss": 1.0331, + "step": 11200 + }, + { + "epoch": 5.02, + "learning_rate": 1.4306133184058378e-05, + "loss": 0.6705, + "step": 11210 + }, + { + "epoch": 5.02, + "learning_rate": 1.4303931961893309e-05, + "loss": 0.7288, + "step": 11220 + }, + { + "epoch": 5.02, + "learning_rate": 1.4301727423574453e-05, + "loss": 1.2383, + "step": 11230 + }, + { + "epoch": 5.02, + "learning_rate": 1.4299519570176284e-05, + "loss": 0.7154, + "step": 11240 + }, + { + "epoch": 5.03, + "learning_rate": 1.4297308402774876e-05, + "loss": 0.6404, + "step": 11250 + }, + { + "epoch": 5.03, + "learning_rate": 1.4295093922447927e-05, + "loss": 0.9904, + "step": 11260 + }, + { + "epoch": 5.03, + "learning_rate": 1.4292876130274747e-05, + "loss": 1.0343, + "step": 11270 + }, + { + "epoch": 5.03, + "learning_rate": 1.4290655027336264e-05, + "loss": 0.7563, + "step": 11280 + }, + { + "epoch": 5.03, + "learning_rate": 1.428843061471501e-05, + "loss": 1.0234, + "step": 11290 + }, + { + "epoch": 5.03, + "learning_rate": 1.4286202893495147e-05, + "loss": 0.6342, + "step": 11300 + }, + { + "epoch": 5.03, + "learning_rate": 1.428397186476243e-05, + "loss": 0.7474, + "step": 11310 + }, + { + "epoch": 5.03, + "learning_rate": 1.428173752960424e-05, + "loss": 0.7082, + "step": 11320 + }, + { + "epoch": 5.03, + "learning_rate": 1.4279499889109563e-05, + "loss": 0.9103, + "step": 11330 + }, + { + "epoch": 5.03, + "learning_rate": 1.4277258944369001e-05, + "loss": 0.6227, + "step": 11340 + }, + { + "epoch": 5.03, + "learning_rate": 1.4275014696474758e-05, + "loss": 0.5647, + "step": 11350 + }, + { + "epoch": 5.03, + "learning_rate": 1.4272767146520655e-05, + "loss": 0.9027, + "step": 11360 + }, + { + "epoch": 5.03, + "learning_rate": 1.4270516295602122e-05, + "loss": 0.7919, + "step": 11370 + }, + { + "epoch": 5.03, + "learning_rate": 1.4268262144816196e-05, + "loss": 0.831, + "step": 11380 + }, + { + "epoch": 5.03, + "learning_rate": 1.426600469526152e-05, + "loss": 0.7439, + "step": 11390 + }, + { + "epoch": 5.03, + "learning_rate": 1.4263743948038355e-05, + "loss": 0.9817, + "step": 11400 + }, + { + "epoch": 5.03, + "learning_rate": 1.4261479904248552e-05, + "loss": 0.6809, + "step": 11410 + }, + { + "epoch": 5.03, + "learning_rate": 1.4259212564995586e-05, + "loss": 0.852, + "step": 11420 + }, + { + "epoch": 5.03, + "learning_rate": 1.4256941931384526e-05, + "loss": 0.6747, + "step": 11430 + }, + { + "epoch": 5.03, + "learning_rate": 1.4254668004522053e-05, + "loss": 1.0233, + "step": 11440 + }, + { + "epoch": 5.03, + "learning_rate": 1.4252390785516453e-05, + "loss": 0.9007, + "step": 11450 + }, + { + "epoch": 5.03, + "learning_rate": 1.4250110275477612e-05, + "loss": 0.9747, + "step": 11460 + }, + { + "epoch": 5.03, + "learning_rate": 1.4247826475517023e-05, + "loss": 1.2008, + "step": 11470 + }, + { + "epoch": 5.03, + "learning_rate": 1.4245539386747784e-05, + "loss": 0.782, + "step": 11480 + }, + { + "epoch": 5.03, + "learning_rate": 1.4243249010284593e-05, + "loss": 0.6924, + "step": 11490 + }, + { + "epoch": 5.03, + "learning_rate": 1.4240955347243754e-05, + "loss": 0.8578, + "step": 11500 + }, + { + "epoch": 5.03, + "learning_rate": 1.4238658398743167e-05, + "loss": 0.6872, + "step": 11510 + }, + { + "epoch": 5.03, + "learning_rate": 1.4236358165902338e-05, + "loss": 0.5779, + "step": 11520 + }, + { + "epoch": 5.03, + "learning_rate": 1.4234054649842377e-05, + "loss": 0.8016, + "step": 11530 + }, + { + "epoch": 5.03, + "learning_rate": 1.4231747851685982e-05, + "loss": 0.7191, + "step": 11540 + }, + { + "epoch": 5.03, + "learning_rate": 1.4229437772557463e-05, + "loss": 0.8966, + "step": 11550 + }, + { + "epoch": 5.03, + "learning_rate": 1.4227124413582726e-05, + "loss": 0.8387, + "step": 11560 + }, + { + "epoch": 5.03, + "learning_rate": 1.422480777588927e-05, + "loss": 0.6938, + "step": 11570 + }, + { + "epoch": 5.03, + "learning_rate": 1.4222487860606197e-05, + "loss": 0.964, + "step": 11580 + }, + { + "epoch": 5.03, + "learning_rate": 1.4220164668864207e-05, + "loss": 0.831, + "step": 11590 + }, + { + "epoch": 5.03, + "learning_rate": 1.4217838201795596e-05, + "loss": 1.0617, + "step": 11600 + }, + { + "epoch": 5.03, + "learning_rate": 1.4215508460534254e-05, + "loss": 0.7678, + "step": 11610 + }, + { + "epoch": 5.03, + "learning_rate": 1.4213175446215669e-05, + "loss": 1.1968, + "step": 11620 + }, + { + "epoch": 5.03, + "learning_rate": 1.4210839159976927e-05, + "loss": 0.8451, + "step": 11630 + }, + { + "epoch": 5.03, + "learning_rate": 1.4208499602956699e-05, + "loss": 0.803, + "step": 11640 + }, + { + "epoch": 5.03, + "learning_rate": 1.420615677629526e-05, + "loss": 0.5882, + "step": 11650 + }, + { + "epoch": 5.03, + "learning_rate": 1.4203810681134479e-05, + "loss": 1.0378, + "step": 11660 + }, + { + "epoch": 5.03, + "learning_rate": 1.4201461318617807e-05, + "loss": 0.8273, + "step": 11670 + }, + { + "epoch": 5.03, + "learning_rate": 1.4199108689890303e-05, + "loss": 0.8917, + "step": 11680 + }, + { + "epoch": 5.03, + "learning_rate": 1.4196752796098601e-05, + "loss": 0.8185, + "step": 11690 + }, + { + "epoch": 5.03, + "learning_rate": 1.4194393638390943e-05, + "loss": 0.7411, + "step": 11700 + }, + { + "epoch": 5.03, + "learning_rate": 1.4192031217917148e-05, + "loss": 0.649, + "step": 11710 + }, + { + "epoch": 5.03, + "learning_rate": 1.4189665535828631e-05, + "loss": 1.0503, + "step": 11720 + }, + { + "epoch": 5.03, + "learning_rate": 1.41872965932784e-05, + "loss": 0.9669, + "step": 11730 + }, + { + "epoch": 5.03, + "learning_rate": 1.418492439142105e-05, + "loss": 0.7626, + "step": 11740 + }, + { + "epoch": 5.04, + "learning_rate": 1.4182548931412758e-05, + "loss": 0.7923, + "step": 11750 + }, + { + "epoch": 5.04, + "learning_rate": 1.41801702144113e-05, + "loss": 0.9244, + "step": 11760 + }, + { + "epoch": 5.04, + "learning_rate": 1.417778824157603e-05, + "loss": 1.2815, + "step": 11770 + }, + { + "epoch": 5.04, + "learning_rate": 1.4175403014067892e-05, + "loss": 1.0134, + "step": 11780 + }, + { + "epoch": 5.04, + "learning_rate": 1.4173014533049422e-05, + "loss": 0.9197, + "step": 11790 + }, + { + "epoch": 5.04, + "learning_rate": 1.4170622799684732e-05, + "loss": 0.9558, + "step": 11800 + }, + { + "epoch": 5.04, + "learning_rate": 1.4168227815139526e-05, + "loss": 0.9409, + "step": 11810 + }, + { + "epoch": 5.04, + "learning_rate": 1.4165829580581085e-05, + "loss": 0.861, + "step": 11820 + }, + { + "epoch": 5.04, + "learning_rate": 1.416342809717829e-05, + "loss": 0.7087, + "step": 11830 + }, + { + "epoch": 5.04, + "learning_rate": 1.4161023366101585e-05, + "loss": 1.0238, + "step": 11840 + }, + { + "epoch": 5.04, + "learning_rate": 1.415861538852301e-05, + "loss": 0.782, + "step": 11850 + }, + { + "epoch": 5.04, + "learning_rate": 1.4156204165616188e-05, + "loss": 0.6829, + "step": 11860 + }, + { + "epoch": 5.04, + "learning_rate": 1.4153789698556311e-05, + "loss": 0.7065, + "step": 11870 + }, + { + "epoch": 5.04, + "learning_rate": 1.4151371988520169e-05, + "loss": 0.7532, + "step": 11880 + }, + { + "epoch": 5.04, + "learning_rate": 1.414895103668612e-05, + "loss": 0.4863, + "step": 11890 + }, + { + "epoch": 5.04, + "learning_rate": 1.414652684423411e-05, + "loss": 1.1852, + "step": 11900 + }, + { + "epoch": 5.04, + "learning_rate": 1.414409941234566e-05, + "loss": 0.7895, + "step": 11910 + }, + { + "epoch": 5.04, + "learning_rate": 1.4141668742203868e-05, + "loss": 0.6872, + "step": 11920 + }, + { + "epoch": 5.04, + "learning_rate": 1.4139234834993416e-05, + "loss": 1.1803, + "step": 11930 + }, + { + "epoch": 5.04, + "learning_rate": 1.4136797691900557e-05, + "loss": 0.5786, + "step": 11940 + }, + { + "epoch": 5.04, + "learning_rate": 1.413435731411313e-05, + "loss": 0.8615, + "step": 11950 + }, + { + "epoch": 5.04, + "learning_rate": 1.4131913702820543e-05, + "loss": 0.7935, + "step": 11960 + }, + { + "epoch": 5.04, + "learning_rate": 1.4129466859213782e-05, + "loss": 0.5791, + "step": 11970 + }, + { + "epoch": 5.04, + "learning_rate": 1.4127016784485411e-05, + "loss": 1.2098, + "step": 11980 + }, + { + "epoch": 5.04, + "learning_rate": 1.4124563479829562e-05, + "loss": 1.1726, + "step": 11990 + }, + { + "epoch": 5.04, + "learning_rate": 1.4122106946441953e-05, + "loss": 0.9259, + "step": 12000 + }, + { + "epoch": 5.04, + "eval_accuracy": 0.8263157894736842, + "eval_f1": 0.8263157894736841, + "eval_loss": 0.8018165230751038, + "eval_runtime": 741.1315, + "eval_samples_per_second": 6.409, + "eval_steps_per_second": 1.603, + "step": 12000 + }, + { + "epoch": 6.0, + "learning_rate": 1.4119647185519863e-05, + "loss": 0.9292, + "step": 12010 + }, + { + "epoch": 6.0, + "learning_rate": 1.4117184198262151e-05, + "loss": 0.9956, + "step": 12020 + }, + { + "epoch": 6.0, + "learning_rate": 1.4114717985869247e-05, + "loss": 0.9384, + "step": 12030 + }, + { + "epoch": 6.0, + "learning_rate": 1.4112248549543151e-05, + "loss": 0.9028, + "step": 12040 + }, + { + "epoch": 6.0, + "learning_rate": 1.410977589048744e-05, + "loss": 0.8509, + "step": 12050 + }, + { + "epoch": 6.0, + "learning_rate": 1.410730000990726e-05, + "loss": 0.6837, + "step": 12060 + }, + { + "epoch": 6.0, + "learning_rate": 1.4104820909009319e-05, + "loss": 0.6524, + "step": 12070 + }, + { + "epoch": 6.0, + "learning_rate": 1.4102338589001901e-05, + "loss": 0.6301, + "step": 12080 + }, + { + "epoch": 6.0, + "learning_rate": 1.4099853051094864e-05, + "loss": 0.8283, + "step": 12090 + }, + { + "epoch": 6.0, + "learning_rate": 1.4097364296499624e-05, + "loss": 1.0106, + "step": 12100 + }, + { + "epoch": 6.0, + "learning_rate": 1.409487232642917e-05, + "loss": 0.8437, + "step": 12110 + }, + { + "epoch": 6.0, + "learning_rate": 1.4092377142098065e-05, + "loss": 0.7118, + "step": 12120 + }, + { + "epoch": 6.0, + "learning_rate": 1.4089878744722421e-05, + "loss": 1.087, + "step": 12130 + }, + { + "epoch": 6.0, + "learning_rate": 1.4087377135519934e-05, + "loss": 0.54, + "step": 12140 + }, + { + "epoch": 6.0, + "learning_rate": 1.4084872315709853e-05, + "loss": 0.785, + "step": 12150 + }, + { + "epoch": 6.0, + "learning_rate": 1.4082364286513003e-05, + "loss": 0.509, + "step": 12160 + }, + { + "epoch": 6.0, + "learning_rate": 1.4079853049151762e-05, + "loss": 0.8785, + "step": 12170 + }, + { + "epoch": 6.0, + "learning_rate": 1.4077338604850075e-05, + "loss": 0.9986, + "step": 12180 + }, + { + "epoch": 6.0, + "learning_rate": 1.4074820954833457e-05, + "loss": 0.8, + "step": 12190 + }, + { + "epoch": 6.0, + "learning_rate": 1.4072300100328976e-05, + "loss": 0.8324, + "step": 12200 + }, + { + "epoch": 6.0, + "learning_rate": 1.4069776042565269e-05, + "loss": 0.3989, + "step": 12210 + }, + { + "epoch": 6.0, + "learning_rate": 1.406724878277253e-05, + "loss": 0.9609, + "step": 12220 + }, + { + "epoch": 6.0, + "learning_rate": 1.4064718322182512e-05, + "loss": 0.9633, + "step": 12230 + }, + { + "epoch": 6.0, + "learning_rate": 1.4062184662028534e-05, + "loss": 0.8126, + "step": 12240 + }, + { + "epoch": 6.0, + "learning_rate": 1.4059647803545468e-05, + "loss": 0.5011, + "step": 12250 + }, + { + "epoch": 6.01, + "learning_rate": 1.4057107747969753e-05, + "loss": 0.5289, + "step": 12260 + }, + { + "epoch": 6.01, + "learning_rate": 1.4054564496539378e-05, + "loss": 1.0525, + "step": 12270 + }, + { + "epoch": 6.01, + "learning_rate": 1.4052018050493892e-05, + "loss": 0.7972, + "step": 12280 + }, + { + "epoch": 6.01, + "learning_rate": 1.4049468411074402e-05, + "loss": 0.7144, + "step": 12290 + }, + { + "epoch": 6.01, + "learning_rate": 1.4046915579523573e-05, + "loss": 0.6966, + "step": 12300 + }, + { + "epoch": 6.01, + "learning_rate": 1.4044359557085624e-05, + "loss": 0.9538, + "step": 12310 + }, + { + "epoch": 6.01, + "learning_rate": 1.4041800345006328e-05, + "loss": 0.7337, + "step": 12320 + }, + { + "epoch": 6.01, + "learning_rate": 1.4039237944533015e-05, + "loss": 0.8238, + "step": 12330 + }, + { + "epoch": 6.01, + "learning_rate": 1.4036672356914567e-05, + "loss": 0.7472, + "step": 12340 + }, + { + "epoch": 6.01, + "learning_rate": 1.4034103583401422e-05, + "loss": 0.7119, + "step": 12350 + }, + { + "epoch": 6.01, + "learning_rate": 1.4031531625245567e-05, + "loss": 0.7508, + "step": 12360 + }, + { + "epoch": 6.01, + "learning_rate": 1.4028956483700542e-05, + "loss": 0.8372, + "step": 12370 + }, + { + "epoch": 6.01, + "learning_rate": 1.4026378160021442e-05, + "loss": 0.8902, + "step": 12380 + }, + { + "epoch": 6.01, + "learning_rate": 1.4023796655464912e-05, + "loss": 0.666, + "step": 12390 + }, + { + "epoch": 6.01, + "learning_rate": 1.4021211971289142e-05, + "loss": 0.9766, + "step": 12400 + }, + { + "epoch": 6.01, + "learning_rate": 1.401862410875388e-05, + "loss": 0.9295, + "step": 12410 + }, + { + "epoch": 6.01, + "learning_rate": 1.4016033069120414e-05, + "loss": 0.8617, + "step": 12420 + }, + { + "epoch": 6.01, + "learning_rate": 1.4013438853651591e-05, + "loss": 0.6898, + "step": 12430 + }, + { + "epoch": 6.01, + "learning_rate": 1.4010841463611795e-05, + "loss": 0.6646, + "step": 12440 + }, + { + "epoch": 6.01, + "learning_rate": 1.4008240900266964e-05, + "loss": 0.4307, + "step": 12450 + }, + { + "epoch": 6.01, + "learning_rate": 1.400563716488458e-05, + "loss": 0.7693, + "step": 12460 + }, + { + "epoch": 6.01, + "learning_rate": 1.4003030258733676e-05, + "loss": 0.735, + "step": 12470 + }, + { + "epoch": 6.01, + "learning_rate": 1.4000420183084823e-05, + "loss": 0.818, + "step": 12480 + }, + { + "epoch": 6.01, + "learning_rate": 1.3997806939210139e-05, + "loss": 1.0814, + "step": 12490 + }, + { + "epoch": 6.01, + "learning_rate": 1.3995190528383292e-05, + "loss": 1.1466, + "step": 12500 + }, + { + "epoch": 6.01, + "learning_rate": 1.3992570951879483e-05, + "loss": 0.8816, + "step": 12510 + }, + { + "epoch": 6.01, + "learning_rate": 1.3989948210975466e-05, + "loss": 0.8825, + "step": 12520 + }, + { + "epoch": 6.01, + "learning_rate": 1.3987322306949532e-05, + "loss": 0.9807, + "step": 12530 + }, + { + "epoch": 6.01, + "learning_rate": 1.3984693241081512e-05, + "loss": 0.8425, + "step": 12540 + }, + { + "epoch": 6.01, + "learning_rate": 1.3982061014652787e-05, + "loss": 0.8364, + "step": 12550 + }, + { + "epoch": 6.01, + "learning_rate": 1.3979425628946263e-05, + "loss": 0.8375, + "step": 12560 + }, + { + "epoch": 6.01, + "learning_rate": 1.3976787085246405e-05, + "loss": 0.7715, + "step": 12570 + }, + { + "epoch": 6.01, + "learning_rate": 1.3974145384839203e-05, + "loss": 0.7157, + "step": 12580 + }, + { + "epoch": 6.01, + "learning_rate": 1.3971500529012188e-05, + "loss": 0.8325, + "step": 12590 + }, + { + "epoch": 6.01, + "learning_rate": 1.396885251905443e-05, + "loss": 0.5132, + "step": 12600 + }, + { + "epoch": 6.01, + "learning_rate": 1.3966201356256543e-05, + "loss": 1.0001, + "step": 12610 + }, + { + "epoch": 6.01, + "learning_rate": 1.3963547041910663e-05, + "loss": 0.8268, + "step": 12620 + }, + { + "epoch": 6.01, + "learning_rate": 1.3960889577310476e-05, + "loss": 0.7412, + "step": 12630 + }, + { + "epoch": 6.01, + "learning_rate": 1.3958228963751197e-05, + "loss": 0.5222, + "step": 12640 + }, + { + "epoch": 6.01, + "learning_rate": 1.3955565202529577e-05, + "loss": 0.6424, + "step": 12650 + }, + { + "epoch": 6.01, + "learning_rate": 1.39528982949439e-05, + "loss": 0.9083, + "step": 12660 + }, + { + "epoch": 6.01, + "learning_rate": 1.3950228242293985e-05, + "loss": 0.6788, + "step": 12670 + }, + { + "epoch": 6.01, + "learning_rate": 1.3947555045881183e-05, + "loss": 0.9128, + "step": 12680 + }, + { + "epoch": 6.01, + "learning_rate": 1.3944878707008378e-05, + "loss": 1.0018, + "step": 12690 + }, + { + "epoch": 6.01, + "learning_rate": 1.3942199226979984e-05, + "loss": 0.9426, + "step": 12700 + }, + { + "epoch": 6.01, + "learning_rate": 1.3939516607101947e-05, + "loss": 0.7346, + "step": 12710 + }, + { + "epoch": 6.01, + "learning_rate": 1.3936830848681743e-05, + "loss": 0.7292, + "step": 12720 + }, + { + "epoch": 6.01, + "learning_rate": 1.393414195302838e-05, + "loss": 0.9039, + "step": 12730 + }, + { + "epoch": 6.01, + "learning_rate": 1.3931449921452392e-05, + "loss": 0.4155, + "step": 12740 + }, + { + "epoch": 6.01, + "learning_rate": 1.3928754755265844e-05, + "loss": 0.5747, + "step": 12750 + }, + { + "epoch": 6.02, + "learning_rate": 1.3926056455782322e-05, + "loss": 0.7444, + "step": 12760 + }, + { + "epoch": 6.02, + "learning_rate": 1.392335502431695e-05, + "loss": 1.033, + "step": 12770 + }, + { + "epoch": 6.02, + "learning_rate": 1.392065046218637e-05, + "loss": 0.5894, + "step": 12780 + }, + { + "epoch": 6.02, + "learning_rate": 1.3917942770708757e-05, + "loss": 0.7544, + "step": 12790 + }, + { + "epoch": 6.02, + "learning_rate": 1.39152319512038e-05, + "loss": 0.6563, + "step": 12800 + }, + { + "epoch": 6.02, + "learning_rate": 1.3912518004992724e-05, + "loss": 0.7536, + "step": 12810 + }, + { + "epoch": 6.02, + "learning_rate": 1.3909800933398273e-05, + "loss": 0.577, + "step": 12820 + }, + { + "epoch": 6.02, + "learning_rate": 1.3907080737744714e-05, + "loss": 0.6801, + "step": 12830 + }, + { + "epoch": 6.02, + "learning_rate": 1.3904357419357838e-05, + "loss": 0.8534, + "step": 12840 + }, + { + "epoch": 6.02, + "learning_rate": 1.3901630979564955e-05, + "loss": 0.9727, + "step": 12850 + }, + { + "epoch": 6.02, + "learning_rate": 1.3898901419694903e-05, + "loss": 0.8376, + "step": 12860 + }, + { + "epoch": 6.02, + "learning_rate": 1.3896168741078033e-05, + "loss": 0.9828, + "step": 12870 + }, + { + "epoch": 6.02, + "learning_rate": 1.3893432945046219e-05, + "loss": 0.81, + "step": 12880 + }, + { + "epoch": 6.02, + "learning_rate": 1.3890694032932857e-05, + "loss": 0.9423, + "step": 12890 + }, + { + "epoch": 6.02, + "learning_rate": 1.3887952006072857e-05, + "loss": 0.9811, + "step": 12900 + }, + { + "epoch": 6.02, + "learning_rate": 1.388520686580265e-05, + "loss": 1.0064, + "step": 12910 + }, + { + "epoch": 6.02, + "learning_rate": 1.388245861346019e-05, + "loss": 1.0164, + "step": 12920 + }, + { + "epoch": 6.02, + "learning_rate": 1.3879707250384934e-05, + "loss": 0.6345, + "step": 12930 + }, + { + "epoch": 6.02, + "learning_rate": 1.3876952777917864e-05, + "loss": 0.8114, + "step": 12940 + }, + { + "epoch": 6.02, + "learning_rate": 1.387419519740148e-05, + "loss": 0.8133, + "step": 12950 + }, + { + "epoch": 6.02, + "learning_rate": 1.3871434510179791e-05, + "loss": 0.7562, + "step": 12960 + }, + { + "epoch": 6.02, + "learning_rate": 1.3868670717598323e-05, + "loss": 0.9805, + "step": 12970 + }, + { + "epoch": 6.02, + "learning_rate": 1.3865903821004115e-05, + "loss": 0.7079, + "step": 12980 + }, + { + "epoch": 6.02, + "learning_rate": 1.3863133821745717e-05, + "loss": 0.9933, + "step": 12990 + }, + { + "epoch": 6.02, + "learning_rate": 1.3860360721173195e-05, + "loss": 0.5244, + "step": 13000 + }, + { + "epoch": 6.02, + "learning_rate": 1.3857584520638124e-05, + "loss": 0.6868, + "step": 13010 + }, + { + "epoch": 6.02, + "learning_rate": 1.3854805221493592e-05, + "loss": 0.7727, + "step": 13020 + }, + { + "epoch": 6.02, + "learning_rate": 1.3852022825094192e-05, + "loss": 1.0346, + "step": 13030 + }, + { + "epoch": 6.02, + "learning_rate": 1.3849237332796034e-05, + "loss": 0.9681, + "step": 13040 + }, + { + "epoch": 6.02, + "learning_rate": 1.384644874595673e-05, + "loss": 0.824, + "step": 13050 + }, + { + "epoch": 6.02, + "learning_rate": 1.3843657065935406e-05, + "loss": 0.9007, + "step": 13060 + }, + { + "epoch": 6.02, + "learning_rate": 1.3840862294092691e-05, + "loss": 1.0339, + "step": 13070 + }, + { + "epoch": 6.02, + "learning_rate": 1.3838064431790724e-05, + "loss": 0.8334, + "step": 13080 + }, + { + "epoch": 6.02, + "learning_rate": 1.3835263480393149e-05, + "loss": 0.9044, + "step": 13090 + }, + { + "epoch": 6.02, + "learning_rate": 1.3832459441265114e-05, + "loss": 0.8958, + "step": 13100 + }, + { + "epoch": 6.02, + "learning_rate": 1.3829652315773276e-05, + "loss": 0.8666, + "step": 13110 + }, + { + "epoch": 6.02, + "learning_rate": 1.3826842105285792e-05, + "loss": 0.9741, + "step": 13120 + }, + { + "epoch": 6.02, + "learning_rate": 1.3824028811172325e-05, + "loss": 0.8652, + "step": 13130 + }, + { + "epoch": 6.02, + "learning_rate": 1.3821212434804042e-05, + "loss": 0.9392, + "step": 13140 + }, + { + "epoch": 6.02, + "learning_rate": 1.3818392977553607e-05, + "loss": 0.658, + "step": 13150 + }, + { + "epoch": 6.02, + "learning_rate": 1.3815570440795194e-05, + "loss": 0.636, + "step": 13160 + }, + { + "epoch": 6.02, + "learning_rate": 1.3812744825904467e-05, + "loss": 0.8125, + "step": 13170 + }, + { + "epoch": 6.02, + "learning_rate": 1.3809916134258603e-05, + "loss": 0.5889, + "step": 13180 + }, + { + "epoch": 6.02, + "learning_rate": 1.3807084367236269e-05, + "loss": 0.6374, + "step": 13190 + }, + { + "epoch": 6.02, + "learning_rate": 1.3804249526217633e-05, + "loss": 0.9499, + "step": 13200 + }, + { + "epoch": 6.02, + "learning_rate": 1.3801411612584363e-05, + "loss": 0.9158, + "step": 13210 + }, + { + "epoch": 6.02, + "learning_rate": 1.3798570627719622e-05, + "loss": 0.4447, + "step": 13220 + }, + { + "epoch": 6.02, + "learning_rate": 1.3795726573008075e-05, + "loss": 0.976, + "step": 13230 + }, + { + "epoch": 6.02, + "learning_rate": 1.3792879449835875e-05, + "loss": 0.7793, + "step": 13240 + }, + { + "epoch": 6.03, + "learning_rate": 1.3790029259590681e-05, + "loss": 0.9342, + "step": 13250 + }, + { + "epoch": 6.03, + "learning_rate": 1.3787176003661635e-05, + "loss": 0.6184, + "step": 13260 + }, + { + "epoch": 6.03, + "learning_rate": 1.3784319683439385e-05, + "loss": 0.7955, + "step": 13270 + }, + { + "epoch": 6.03, + "learning_rate": 1.3781460300316064e-05, + "loss": 0.906, + "step": 13280 + }, + { + "epoch": 6.03, + "learning_rate": 1.37785978556853e-05, + "loss": 0.9102, + "step": 13290 + }, + { + "epoch": 6.03, + "learning_rate": 1.3775732350942213e-05, + "loss": 0.9564, + "step": 13300 + }, + { + "epoch": 6.03, + "learning_rate": 1.3772863787483418e-05, + "loss": 0.734, + "step": 13310 + }, + { + "epoch": 6.03, + "learning_rate": 1.3769992166707014e-05, + "loss": 0.8327, + "step": 13320 + }, + { + "epoch": 6.03, + "learning_rate": 1.3767117490012603e-05, + "loss": 0.6923, + "step": 13330 + }, + { + "epoch": 6.03, + "learning_rate": 1.3764239758801257e-05, + "loss": 0.692, + "step": 13340 + }, + { + "epoch": 6.03, + "learning_rate": 1.376135897447555e-05, + "loss": 0.8945, + "step": 13350 + }, + { + "epoch": 6.03, + "learning_rate": 1.3758475138439543e-05, + "loss": 0.6951, + "step": 13360 + }, + { + "epoch": 6.03, + "learning_rate": 1.3755588252098785e-05, + "loss": 0.6342, + "step": 13370 + }, + { + "epoch": 6.03, + "learning_rate": 1.3752698316860305e-05, + "loss": 0.8719, + "step": 13380 + }, + { + "epoch": 6.03, + "learning_rate": 1.3749805334132624e-05, + "loss": 0.7696, + "step": 13390 + }, + { + "epoch": 6.03, + "learning_rate": 1.3746909305325747e-05, + "loss": 0.721, + "step": 13400 + }, + { + "epoch": 6.03, + "learning_rate": 1.3744010231851161e-05, + "loss": 0.5691, + "step": 13410 + }, + { + "epoch": 6.03, + "learning_rate": 1.3741108115121844e-05, + "loss": 1.035, + "step": 13420 + }, + { + "epoch": 6.03, + "learning_rate": 1.373820295655225e-05, + "loss": 0.7772, + "step": 13430 + }, + { + "epoch": 6.03, + "learning_rate": 1.3735294757558315e-05, + "loss": 0.9524, + "step": 13440 + }, + { + "epoch": 6.03, + "learning_rate": 1.3732383519557461e-05, + "loss": 1.0839, + "step": 13450 + }, + { + "epoch": 6.03, + "learning_rate": 1.3729469243968596e-05, + "loss": 0.4841, + "step": 13460 + }, + { + "epoch": 6.03, + "learning_rate": 1.3726551932212094e-05, + "loss": 0.7338, + "step": 13470 + }, + { + "epoch": 6.03, + "learning_rate": 1.3723631585709822e-05, + "loss": 1.0517, + "step": 13480 + }, + { + "epoch": 6.03, + "learning_rate": 1.3720708205885125e-05, + "loss": 0.9323, + "step": 13490 + }, + { + "epoch": 6.03, + "learning_rate": 1.3717781794162813e-05, + "loss": 0.6849, + "step": 13500 + }, + { + "epoch": 6.03, + "learning_rate": 1.371485235196919e-05, + "loss": 0.3872, + "step": 13510 + }, + { + "epoch": 6.03, + "learning_rate": 1.3711919880732033e-05, + "loss": 0.9672, + "step": 13520 + }, + { + "epoch": 6.03, + "learning_rate": 1.3708984381880584e-05, + "loss": 0.6098, + "step": 13530 + }, + { + "epoch": 6.03, + "learning_rate": 1.3706045856845579e-05, + "loss": 0.7626, + "step": 13540 + }, + { + "epoch": 6.03, + "learning_rate": 1.3703104307059213e-05, + "loss": 0.8963, + "step": 13550 + }, + { + "epoch": 6.03, + "learning_rate": 1.3700159733955166e-05, + "loss": 0.7506, + "step": 13560 + }, + { + "epoch": 6.03, + "learning_rate": 1.3697212138968584e-05, + "loss": 0.9264, + "step": 13570 + }, + { + "epoch": 6.03, + "learning_rate": 1.369426152353609e-05, + "loss": 0.787, + "step": 13580 + }, + { + "epoch": 6.03, + "learning_rate": 1.3691307889095778e-05, + "loss": 0.7793, + "step": 13590 + }, + { + "epoch": 6.03, + "learning_rate": 1.3688351237087214e-05, + "loss": 0.8481, + "step": 13600 + }, + { + "epoch": 6.03, + "learning_rate": 1.3685391568951434e-05, + "loss": 0.8792, + "step": 13610 + }, + { + "epoch": 6.03, + "learning_rate": 1.3682428886130944e-05, + "loss": 0.888, + "step": 13620 + }, + { + "epoch": 6.03, + "learning_rate": 1.367946319006972e-05, + "loss": 0.7437, + "step": 13630 + }, + { + "epoch": 6.03, + "learning_rate": 1.3676494482213206e-05, + "loss": 1.0551, + "step": 13640 + }, + { + "epoch": 6.03, + "learning_rate": 1.3673522764008315e-05, + "loss": 0.9655, + "step": 13650 + }, + { + "epoch": 6.03, + "learning_rate": 1.3670548036903425e-05, + "loss": 0.8299, + "step": 13660 + }, + { + "epoch": 6.03, + "learning_rate": 1.3667570302348384e-05, + "loss": 0.9885, + "step": 13670 + }, + { + "epoch": 6.03, + "learning_rate": 1.3664589561794498e-05, + "loss": 0.6566, + "step": 13680 + }, + { + "epoch": 6.03, + "learning_rate": 1.3661605816694551e-05, + "loss": 0.6017, + "step": 13690 + }, + { + "epoch": 6.03, + "learning_rate": 1.365861906850278e-05, + "loss": 0.6448, + "step": 13700 + }, + { + "epoch": 6.03, + "learning_rate": 1.3655629318674892e-05, + "loss": 0.8787, + "step": 13710 + }, + { + "epoch": 6.03, + "learning_rate": 1.3652636568668053e-05, + "loss": 0.8334, + "step": 13720 + }, + { + "epoch": 6.03, + "learning_rate": 1.3649640819940891e-05, + "loss": 1.1023, + "step": 13730 + }, + { + "epoch": 6.03, + "learning_rate": 1.3646642073953505e-05, + "loss": 0.5216, + "step": 13740 + }, + { + "epoch": 6.04, + "learning_rate": 1.364364033216744e-05, + "loss": 1.0398, + "step": 13750 + }, + { + "epoch": 6.04, + "learning_rate": 1.3640635596045708e-05, + "loss": 0.9455, + "step": 13760 + }, + { + "epoch": 6.04, + "learning_rate": 1.3637627867052786e-05, + "loss": 0.6643, + "step": 13770 + }, + { + "epoch": 6.04, + "learning_rate": 1.3634617146654605e-05, + "loss": 0.8005, + "step": 13780 + }, + { + "epoch": 6.04, + "learning_rate": 1.3631603436318548e-05, + "loss": 1.2044, + "step": 13790 + }, + { + "epoch": 6.04, + "learning_rate": 1.3628586737513463e-05, + "loss": 1.0157, + "step": 13800 + }, + { + "epoch": 6.04, + "learning_rate": 1.3625567051709656e-05, + "loss": 0.4879, + "step": 13810 + }, + { + "epoch": 6.04, + "learning_rate": 1.362254438037888e-05, + "loss": 0.6704, + "step": 13820 + }, + { + "epoch": 6.04, + "learning_rate": 1.3619518724994351e-05, + "loss": 0.8101, + "step": 13830 + }, + { + "epoch": 6.04, + "learning_rate": 1.3616490087030738e-05, + "loss": 0.6362, + "step": 13840 + }, + { + "epoch": 6.04, + "learning_rate": 1.3613458467964156e-05, + "loss": 0.9716, + "step": 13850 + }, + { + "epoch": 6.04, + "learning_rate": 1.3610423869272188e-05, + "loss": 0.6861, + "step": 13860 + }, + { + "epoch": 6.04, + "learning_rate": 1.3607386292433854e-05, + "loss": 1.2453, + "step": 13870 + }, + { + "epoch": 6.04, + "learning_rate": 1.3604345738929636e-05, + "loss": 0.7002, + "step": 13880 + }, + { + "epoch": 6.04, + "learning_rate": 1.360130221024146e-05, + "loss": 0.7556, + "step": 13890 + }, + { + "epoch": 6.04, + "learning_rate": 1.3598255707852707e-05, + "loss": 0.7298, + "step": 13900 + }, + { + "epoch": 6.04, + "learning_rate": 1.3595206233248204e-05, + "loss": 0.7313, + "step": 13910 + }, + { + "epoch": 6.04, + "learning_rate": 1.3592153787914228e-05, + "loss": 0.4009, + "step": 13920 + }, + { + "epoch": 6.04, + "learning_rate": 1.3589098373338507e-05, + "loss": 0.829, + "step": 13930 + }, + { + "epoch": 6.04, + "learning_rate": 1.3586039991010208e-05, + "loss": 0.6719, + "step": 13940 + }, + { + "epoch": 6.04, + "learning_rate": 1.358297864241995e-05, + "loss": 0.5607, + "step": 13950 + }, + { + "epoch": 6.04, + "learning_rate": 1.35799143290598e-05, + "loss": 0.6292, + "step": 13960 + }, + { + "epoch": 6.04, + "learning_rate": 1.3576847052423266e-05, + "loss": 0.9576, + "step": 13970 + }, + { + "epoch": 6.04, + "learning_rate": 1.35737768140053e-05, + "loss": 1.0345, + "step": 13980 + }, + { + "epoch": 6.04, + "learning_rate": 1.35707036153023e-05, + "loss": 0.4929, + "step": 13990 + }, + { + "epoch": 6.04, + "learning_rate": 1.3567627457812107e-05, + "loss": 0.6077, + "step": 14000 + }, + { + "epoch": 6.04, + "eval_accuracy": 0.8189473684210526, + "eval_f1": 0.8189473684210526, + "eval_loss": 0.8212071061134338, + "eval_runtime": 747.465, + "eval_samples_per_second": 6.355, + "eval_steps_per_second": 1.589, + "step": 14000 + }, + { + "epoch": 7.0, + "learning_rate": 1.3564548343034e-05, + "loss": 0.8925, + "step": 14010 + }, + { + "epoch": 7.0, + "learning_rate": 1.3561466272468704e-05, + "loss": 1.021, + "step": 14020 + }, + { + "epoch": 7.0, + "learning_rate": 1.3558381247618381e-05, + "loss": 0.8947, + "step": 14030 + }, + { + "epoch": 7.0, + "learning_rate": 1.3555293269986639e-05, + "loss": 0.7799, + "step": 14040 + }, + { + "epoch": 7.0, + "learning_rate": 1.3552202341078515e-05, + "loss": 0.7529, + "step": 14050 + }, + { + "epoch": 7.0, + "learning_rate": 1.3549108462400494e-05, + "loss": 0.8222, + "step": 14060 + }, + { + "epoch": 7.0, + "learning_rate": 1.3546011635460494e-05, + "loss": 1.1847, + "step": 14070 + }, + { + "epoch": 7.0, + "learning_rate": 1.354291186176787e-05, + "loss": 0.7235, + "step": 14080 + }, + { + "epoch": 7.0, + "learning_rate": 1.3539809142833414e-05, + "loss": 0.8463, + "step": 14090 + }, + { + "epoch": 7.0, + "learning_rate": 1.3536703480169356e-05, + "loss": 0.512, + "step": 14100 + }, + { + "epoch": 7.0, + "learning_rate": 1.3533594875289357e-05, + "loss": 0.8988, + "step": 14110 + }, + { + "epoch": 7.0, + "learning_rate": 1.353048332970851e-05, + "loss": 0.8714, + "step": 14120 + }, + { + "epoch": 7.0, + "learning_rate": 1.3527368844943349e-05, + "loss": 0.8919, + "step": 14130 + }, + { + "epoch": 7.0, + "learning_rate": 1.3524251422511834e-05, + "loss": 0.6822, + "step": 14140 + }, + { + "epoch": 7.0, + "learning_rate": 1.3521131063933359e-05, + "loss": 0.8376, + "step": 14150 + }, + { + "epoch": 7.0, + "learning_rate": 1.351800777072875e-05, + "loss": 0.6984, + "step": 14160 + }, + { + "epoch": 7.0, + "learning_rate": 1.3514881544420259e-05, + "loss": 0.7484, + "step": 14170 + }, + { + "epoch": 7.0, + "learning_rate": 1.3511752386531575e-05, + "loss": 1.064, + "step": 14180 + }, + { + "epoch": 7.0, + "learning_rate": 1.3508620298587809e-05, + "loss": 0.5924, + "step": 14190 + }, + { + "epoch": 7.0, + "learning_rate": 1.3505485282115501e-05, + "loss": 0.812, + "step": 14200 + }, + { + "epoch": 7.0, + "learning_rate": 1.3502347338642623e-05, + "loss": 0.7384, + "step": 14210 + }, + { + "epoch": 7.0, + "learning_rate": 1.3499206469698571e-05, + "loss": 0.8245, + "step": 14220 + }, + { + "epoch": 7.0, + "learning_rate": 1.3496062676814165e-05, + "loss": 0.8636, + "step": 14230 + }, + { + "epoch": 7.0, + "learning_rate": 1.349291596152165e-05, + "loss": 0.6606, + "step": 14240 + }, + { + "epoch": 7.0, + "learning_rate": 1.3489766325354697e-05, + "loss": 0.8448, + "step": 14250 + }, + { + "epoch": 7.01, + "learning_rate": 1.3486613769848403e-05, + "loss": 0.9852, + "step": 14260 + }, + { + "epoch": 7.01, + "learning_rate": 1.3483458296539283e-05, + "loss": 1.052, + "step": 14270 + }, + { + "epoch": 7.01, + "learning_rate": 1.3480299906965276e-05, + "loss": 0.6644, + "step": 14280 + }, + { + "epoch": 7.01, + "learning_rate": 1.3477138602665747e-05, + "loss": 0.8872, + "step": 14290 + }, + { + "epoch": 7.01, + "learning_rate": 1.3473974385181472e-05, + "loss": 1.069, + "step": 14300 + }, + { + "epoch": 7.01, + "learning_rate": 1.3470807256054654e-05, + "loss": 0.7077, + "step": 14310 + }, + { + "epoch": 7.01, + "learning_rate": 1.3467637216828916e-05, + "loss": 0.8647, + "step": 14320 + }, + { + "epoch": 7.01, + "learning_rate": 1.3464464269049293e-05, + "loss": 0.9642, + "step": 14330 + }, + { + "epoch": 7.01, + "learning_rate": 1.3461288414262242e-05, + "loss": 0.7707, + "step": 14340 + }, + { + "epoch": 7.01, + "learning_rate": 1.3458109654015637e-05, + "loss": 0.7427, + "step": 14350 + }, + { + "epoch": 7.01, + "learning_rate": 1.3454927989858766e-05, + "loss": 0.561, + "step": 14360 + }, + { + "epoch": 7.01, + "learning_rate": 1.3451743423342333e-05, + "loss": 1.0471, + "step": 14370 + }, + { + "epoch": 7.01, + "learning_rate": 1.344855595601846e-05, + "loss": 0.9729, + "step": 14380 + }, + { + "epoch": 7.01, + "learning_rate": 1.3445365589440676e-05, + "loss": 0.7116, + "step": 14390 + }, + { + "epoch": 7.01, + "learning_rate": 1.344217232516393e-05, + "loss": 0.8295, + "step": 14400 + }, + { + "epoch": 7.01, + "learning_rate": 1.343897616474458e-05, + "loss": 0.6606, + "step": 14410 + }, + { + "epoch": 7.01, + "learning_rate": 1.3435777109740394e-05, + "loss": 0.8007, + "step": 14420 + }, + { + "epoch": 7.01, + "learning_rate": 1.3432575161710552e-05, + "loss": 1.0616, + "step": 14430 + }, + { + "epoch": 7.01, + "learning_rate": 1.3429370322215648e-05, + "loss": 0.7153, + "step": 14440 + }, + { + "epoch": 7.01, + "learning_rate": 1.3426162592817678e-05, + "loss": 0.7569, + "step": 14450 + }, + { + "epoch": 7.01, + "learning_rate": 1.3422951975080054e-05, + "loss": 0.9862, + "step": 14460 + }, + { + "epoch": 7.01, + "learning_rate": 1.3419738470567587e-05, + "loss": 0.5145, + "step": 14470 + }, + { + "epoch": 7.01, + "learning_rate": 1.3416522080846506e-05, + "loss": 1.1163, + "step": 14480 + }, + { + "epoch": 7.01, + "learning_rate": 1.3413302807484436e-05, + "loss": 0.8015, + "step": 14490 + }, + { + "epoch": 7.01, + "learning_rate": 1.3410080652050414e-05, + "loss": 0.7657, + "step": 14500 + }, + { + "epoch": 7.01, + "learning_rate": 1.340685561611488e-05, + "loss": 0.534, + "step": 14510 + }, + { + "epoch": 7.01, + "learning_rate": 1.3403627701249675e-05, + "loss": 0.8209, + "step": 14520 + }, + { + "epoch": 7.01, + "learning_rate": 1.3400396909028046e-05, + "loss": 0.5046, + "step": 14530 + }, + { + "epoch": 7.01, + "learning_rate": 1.3397163241024641e-05, + "loss": 0.8202, + "step": 14540 + }, + { + "epoch": 7.01, + "learning_rate": 1.3393926698815516e-05, + "loss": 0.7993, + "step": 14550 + }, + { + "epoch": 7.01, + "learning_rate": 1.3390687283978114e-05, + "loss": 0.6522, + "step": 14560 + }, + { + "epoch": 7.01, + "learning_rate": 1.3387444998091294e-05, + "loss": 0.7665, + "step": 14570 + }, + { + "epoch": 7.01, + "learning_rate": 1.33841998427353e-05, + "loss": 0.5005, + "step": 14580 + }, + { + "epoch": 7.01, + "learning_rate": 1.3380951819491785e-05, + "loss": 0.8614, + "step": 14590 + }, + { + "epoch": 7.01, + "learning_rate": 1.3377700929943799e-05, + "loss": 0.4467, + "step": 14600 + }, + { + "epoch": 7.01, + "learning_rate": 1.337444717567578e-05, + "loss": 1.0509, + "step": 14610 + }, + { + "epoch": 7.01, + "learning_rate": 1.3371190558273574e-05, + "loss": 1.0283, + "step": 14620 + }, + { + "epoch": 7.01, + "learning_rate": 1.336793107932441e-05, + "loss": 0.9415, + "step": 14630 + }, + { + "epoch": 7.01, + "learning_rate": 1.3364668740416927e-05, + "loss": 0.6773, + "step": 14640 + }, + { + "epoch": 7.01, + "learning_rate": 1.3361403543141141e-05, + "loss": 0.7324, + "step": 14650 + }, + { + "epoch": 7.01, + "learning_rate": 1.3358135489088473e-05, + "loss": 0.9419, + "step": 14660 + }, + { + "epoch": 7.01, + "learning_rate": 1.3354864579851737e-05, + "loss": 0.4525, + "step": 14670 + }, + { + "epoch": 7.01, + "learning_rate": 1.3351590817025127e-05, + "loss": 0.5833, + "step": 14680 + }, + { + "epoch": 7.01, + "learning_rate": 1.3348314202204241e-05, + "loss": 0.8469, + "step": 14690 + }, + { + "epoch": 7.01, + "learning_rate": 1.3345034736986057e-05, + "loss": 0.8856, + "step": 14700 + }, + { + "epoch": 7.01, + "learning_rate": 1.3341752422968948e-05, + "loss": 0.5665, + "step": 14710 + }, + { + "epoch": 7.01, + "learning_rate": 1.3338467261752677e-05, + "loss": 0.9724, + "step": 14720 + }, + { + "epoch": 7.01, + "learning_rate": 1.3335179254938387e-05, + "loss": 0.6042, + "step": 14730 + }, + { + "epoch": 7.01, + "learning_rate": 1.3331888404128615e-05, + "loss": 0.9173, + "step": 14740 + }, + { + "epoch": 7.01, + "learning_rate": 1.3328594710927282e-05, + "loss": 1.3782, + "step": 14750 + }, + { + "epoch": 7.02, + "learning_rate": 1.3325298176939694e-05, + "loss": 0.876, + "step": 14760 + }, + { + "epoch": 7.02, + "learning_rate": 1.332199880377254e-05, + "loss": 0.876, + "step": 14770 + }, + { + "epoch": 7.02, + "learning_rate": 1.3318696593033896e-05, + "loss": 1.0023, + "step": 14780 + }, + { + "epoch": 7.02, + "learning_rate": 1.3315391546333219e-05, + "loss": 0.6538, + "step": 14790 + }, + { + "epoch": 7.02, + "learning_rate": 1.3312083665281348e-05, + "loss": 0.7284, + "step": 14800 + }, + { + "epoch": 7.02, + "learning_rate": 1.3308772951490503e-05, + "loss": 0.7376, + "step": 14810 + }, + { + "epoch": 7.02, + "learning_rate": 1.330545940657429e-05, + "loss": 0.5036, + "step": 14820 + }, + { + "epoch": 7.02, + "learning_rate": 1.3302143032147687e-05, + "loss": 0.7377, + "step": 14830 + }, + { + "epoch": 7.02, + "learning_rate": 1.3298823829827055e-05, + "loss": 1.1506, + "step": 14840 + }, + { + "epoch": 7.02, + "learning_rate": 1.3295501801230133e-05, + "loss": 0.6578, + "step": 14850 + }, + { + "epoch": 7.02, + "learning_rate": 1.3292176947976038e-05, + "loss": 0.7625, + "step": 14860 + }, + { + "epoch": 7.02, + "learning_rate": 1.3288849271685263e-05, + "loss": 0.64, + "step": 14870 + }, + { + "epoch": 7.02, + "learning_rate": 1.3285518773979677e-05, + "loss": 0.6243, + "step": 14880 + }, + { + "epoch": 7.02, + "learning_rate": 1.3282185456482522e-05, + "loss": 0.9075, + "step": 14890 + }, + { + "epoch": 7.02, + "learning_rate": 1.327884932081842e-05, + "loss": 0.9783, + "step": 14900 + }, + { + "epoch": 7.02, + "learning_rate": 1.327551036861336e-05, + "loss": 0.6165, + "step": 14910 + }, + { + "epoch": 7.02, + "learning_rate": 1.327216860149471e-05, + "loss": 0.9649, + "step": 14920 + }, + { + "epoch": 7.02, + "learning_rate": 1.3268824021091203e-05, + "loss": 0.7211, + "step": 14930 + }, + { + "epoch": 7.02, + "learning_rate": 1.326547662903295e-05, + "loss": 0.9264, + "step": 14940 + }, + { + "epoch": 7.02, + "learning_rate": 1.3262126426951427e-05, + "loss": 0.7827, + "step": 14950 + }, + { + "epoch": 7.02, + "learning_rate": 1.3258773416479483e-05, + "loss": 0.947, + "step": 14960 + }, + { + "epoch": 7.02, + "learning_rate": 1.3255417599251331e-05, + "loss": 0.5499, + "step": 14970 + }, + { + "epoch": 7.02, + "learning_rate": 1.3252058976902563e-05, + "loss": 0.6036, + "step": 14980 + }, + { + "epoch": 7.02, + "learning_rate": 1.3248697551070124e-05, + "loss": 0.8893, + "step": 14990 + }, + { + "epoch": 7.02, + "learning_rate": 1.3245333323392335e-05, + "loss": 0.6627, + "step": 15000 + }, + { + "epoch": 7.02, + "learning_rate": 1.3241966295508879e-05, + "loss": 0.7549, + "step": 15010 + }, + { + "epoch": 7.02, + "learning_rate": 1.3238596469060808e-05, + "loss": 0.7898, + "step": 15020 + }, + { + "epoch": 7.02, + "learning_rate": 1.3235223845690528e-05, + "loss": 0.7913, + "step": 15030 + }, + { + "epoch": 7.02, + "learning_rate": 1.3231848427041817e-05, + "loss": 1.0171, + "step": 15040 + }, + { + "epoch": 7.02, + "learning_rate": 1.3228470214759818e-05, + "loss": 0.9284, + "step": 15050 + }, + { + "epoch": 7.02, + "learning_rate": 1.3225089210491024e-05, + "loss": 1.0654, + "step": 15060 + }, + { + "epoch": 7.02, + "learning_rate": 1.3221705415883297e-05, + "loss": 0.762, + "step": 15070 + }, + { + "epoch": 7.02, + "learning_rate": 1.321831883258586e-05, + "loss": 0.656, + "step": 15080 + }, + { + "epoch": 7.02, + "learning_rate": 1.321492946224929e-05, + "loss": 0.5709, + "step": 15090 + }, + { + "epoch": 7.02, + "learning_rate": 1.3211537306525526e-05, + "loss": 0.8218, + "step": 15100 + }, + { + "epoch": 7.02, + "learning_rate": 1.3208142367067865e-05, + "loss": 0.5841, + "step": 15110 + }, + { + "epoch": 7.02, + "learning_rate": 1.3204744645530956e-05, + "loss": 0.6966, + "step": 15120 + }, + { + "epoch": 7.02, + "learning_rate": 1.3201344143570806e-05, + "loss": 0.9424, + "step": 15130 + }, + { + "epoch": 7.02, + "learning_rate": 1.3197940862844786e-05, + "loss": 0.6475, + "step": 15140 + }, + { + "epoch": 7.02, + "learning_rate": 1.3194534805011606e-05, + "loss": 0.91, + "step": 15150 + }, + { + "epoch": 7.02, + "learning_rate": 1.3191125971731342e-05, + "loss": 0.8327, + "step": 15160 + }, + { + "epoch": 7.02, + "learning_rate": 1.3187714364665415e-05, + "loss": 0.7005, + "step": 15170 + }, + { + "epoch": 7.02, + "learning_rate": 1.31842999854766e-05, + "loss": 0.9117, + "step": 15180 + }, + { + "epoch": 7.02, + "learning_rate": 1.318088283582903e-05, + "loss": 0.6846, + "step": 15190 + }, + { + "epoch": 7.02, + "learning_rate": 1.3177462917388173e-05, + "loss": 0.8622, + "step": 15200 + }, + { + "epoch": 7.02, + "learning_rate": 1.3174040231820863e-05, + "loss": 0.9937, + "step": 15210 + }, + { + "epoch": 7.02, + "learning_rate": 1.3170614780795273e-05, + "loss": 0.5033, + "step": 15220 + }, + { + "epoch": 7.02, + "learning_rate": 1.3167186565980927e-05, + "loss": 1.3306, + "step": 15230 + }, + { + "epoch": 7.02, + "learning_rate": 1.3163755589048693e-05, + "loss": 0.9008, + "step": 15240 + }, + { + "epoch": 7.03, + "learning_rate": 1.316032185167079e-05, + "loss": 0.8008, + "step": 15250 + }, + { + "epoch": 7.03, + "learning_rate": 1.3156885355520778e-05, + "loss": 0.6892, + "step": 15260 + }, + { + "epoch": 7.03, + "learning_rate": 1.3153446102273566e-05, + "loss": 0.7426, + "step": 15270 + }, + { + "epoch": 7.03, + "learning_rate": 1.3150004093605403e-05, + "loss": 0.7207, + "step": 15280 + }, + { + "epoch": 7.03, + "learning_rate": 1.3146559331193879e-05, + "loss": 0.9089, + "step": 15290 + }, + { + "epoch": 7.03, + "learning_rate": 1.3143111816717933e-05, + "loss": 0.8718, + "step": 15300 + }, + { + "epoch": 7.03, + "learning_rate": 1.3139661551857842e-05, + "loss": 0.8146, + "step": 15310 + }, + { + "epoch": 7.03, + "learning_rate": 1.3136208538295221e-05, + "loss": 0.7586, + "step": 15320 + }, + { + "epoch": 7.03, + "learning_rate": 1.3132752777713027e-05, + "loss": 0.6503, + "step": 15330 + }, + { + "epoch": 7.03, + "learning_rate": 1.312929427179556e-05, + "loss": 0.6057, + "step": 15340 + }, + { + "epoch": 7.03, + "learning_rate": 1.3125833022228448e-05, + "loss": 0.7167, + "step": 15350 + }, + { + "epoch": 7.03, + "learning_rate": 1.3122369030698663e-05, + "loss": 0.8433, + "step": 15360 + }, + { + "epoch": 7.03, + "learning_rate": 1.3118902298894515e-05, + "loss": 0.7265, + "step": 15370 + }, + { + "epoch": 7.03, + "learning_rate": 1.3115432828505646e-05, + "loss": 0.6624, + "step": 15380 + }, + { + "epoch": 7.03, + "learning_rate": 1.3111960621223035e-05, + "loss": 0.5974, + "step": 15390 + }, + { + "epoch": 7.03, + "learning_rate": 1.310848567873899e-05, + "loss": 0.9127, + "step": 15400 + }, + { + "epoch": 7.03, + "learning_rate": 1.3105008002747158e-05, + "loss": 0.8466, + "step": 15410 + }, + { + "epoch": 7.03, + "learning_rate": 1.3101527594942517e-05, + "loss": 0.5286, + "step": 15420 + }, + { + "epoch": 7.03, + "learning_rate": 1.3098044457021373e-05, + "loss": 0.6996, + "step": 15430 + }, + { + "epoch": 7.03, + "learning_rate": 1.3094558590681367e-05, + "loss": 0.6488, + "step": 15440 + }, + { + "epoch": 7.03, + "learning_rate": 1.3091069997621466e-05, + "loss": 0.5853, + "step": 15450 + }, + { + "epoch": 7.03, + "learning_rate": 1.3087578679541972e-05, + "loss": 0.7664, + "step": 15460 + }, + { + "epoch": 7.03, + "learning_rate": 1.3084084638144506e-05, + "loss": 0.8834, + "step": 15470 + }, + { + "epoch": 7.03, + "learning_rate": 1.3080587875132024e-05, + "loss": 1.1025, + "step": 15480 + }, + { + "epoch": 7.03, + "learning_rate": 1.3077088392208807e-05, + "loss": 0.82, + "step": 15490 + }, + { + "epoch": 7.03, + "learning_rate": 1.3073586191080456e-05, + "loss": 0.9271, + "step": 15500 + }, + { + "epoch": 7.03, + "learning_rate": 1.3070081273453906e-05, + "loss": 0.8683, + "step": 15510 + }, + { + "epoch": 7.03, + "learning_rate": 1.3066573641037412e-05, + "loss": 0.7035, + "step": 15520 + }, + { + "epoch": 7.03, + "learning_rate": 1.3063063295540545e-05, + "loss": 0.6797, + "step": 15530 + }, + { + "epoch": 7.03, + "learning_rate": 1.3059550238674209e-05, + "loss": 0.8762, + "step": 15540 + }, + { + "epoch": 7.03, + "learning_rate": 1.3056034472150625e-05, + "loss": 0.6074, + "step": 15550 + }, + { + "epoch": 7.03, + "learning_rate": 1.3052515997683336e-05, + "loss": 0.6325, + "step": 15560 + }, + { + "epoch": 7.03, + "learning_rate": 1.3048994816987201e-05, + "loss": 0.7776, + "step": 15570 + }, + { + "epoch": 7.03, + "learning_rate": 1.3045470931778403e-05, + "loss": 0.8261, + "step": 15580 + }, + { + "epoch": 7.03, + "learning_rate": 1.304194434377444e-05, + "loss": 0.6536, + "step": 15590 + }, + { + "epoch": 7.03, + "learning_rate": 1.303841505469413e-05, + "loss": 1.1411, + "step": 15600 + }, + { + "epoch": 7.03, + "learning_rate": 1.3034883066257602e-05, + "loss": 0.6888, + "step": 15610 + }, + { + "epoch": 7.03, + "learning_rate": 1.3031348380186305e-05, + "loss": 0.7335, + "step": 15620 + }, + { + "epoch": 7.03, + "learning_rate": 1.3027810998203005e-05, + "loss": 0.8149, + "step": 15630 + }, + { + "epoch": 7.03, + "learning_rate": 1.3024270922031775e-05, + "loss": 1.115, + "step": 15640 + }, + { + "epoch": 7.03, + "learning_rate": 1.302072815339801e-05, + "loss": 0.6974, + "step": 15650 + }, + { + "epoch": 7.03, + "learning_rate": 1.3017182694028406e-05, + "loss": 1.0305, + "step": 15660 + }, + { + "epoch": 7.03, + "learning_rate": 1.3013634545650983e-05, + "loss": 0.5599, + "step": 15670 + }, + { + "epoch": 7.03, + "learning_rate": 1.3010083709995062e-05, + "loss": 0.7644, + "step": 15680 + }, + { + "epoch": 7.03, + "learning_rate": 1.3006530188791278e-05, + "loss": 0.688, + "step": 15690 + }, + { + "epoch": 7.03, + "learning_rate": 1.3002973983771572e-05, + "loss": 0.8266, + "step": 15700 + }, + { + "epoch": 7.03, + "learning_rate": 1.2999415096669201e-05, + "loss": 0.9208, + "step": 15710 + }, + { + "epoch": 7.03, + "learning_rate": 1.2995853529218718e-05, + "loss": 0.6909, + "step": 15720 + }, + { + "epoch": 7.03, + "learning_rate": 1.2992289283155988e-05, + "loss": 0.9514, + "step": 15730 + }, + { + "epoch": 7.03, + "learning_rate": 1.2988722360218184e-05, + "loss": 0.5879, + "step": 15740 + }, + { + "epoch": 7.04, + "learning_rate": 1.2985152762143779e-05, + "loss": 0.7636, + "step": 15750 + }, + { + "epoch": 7.04, + "learning_rate": 1.2981580490672554e-05, + "loss": 0.6555, + "step": 15760 + }, + { + "epoch": 7.04, + "learning_rate": 1.2978005547545586e-05, + "loss": 0.4848, + "step": 15770 + }, + { + "epoch": 7.04, + "learning_rate": 1.2974427934505262e-05, + "loss": 0.6107, + "step": 15780 + }, + { + "epoch": 7.04, + "learning_rate": 1.2970847653295267e-05, + "loss": 1.2346, + "step": 15790 + }, + { + "epoch": 7.04, + "learning_rate": 1.2967264705660587e-05, + "loss": 0.6772, + "step": 15800 + }, + { + "epoch": 7.04, + "learning_rate": 1.2963679093347507e-05, + "loss": 0.7676, + "step": 15810 + }, + { + "epoch": 7.04, + "learning_rate": 1.296009081810361e-05, + "loss": 0.8187, + "step": 15820 + }, + { + "epoch": 7.04, + "learning_rate": 1.2956499881677777e-05, + "loss": 0.5784, + "step": 15830 + }, + { + "epoch": 7.04, + "learning_rate": 1.295290628582019e-05, + "loss": 0.5904, + "step": 15840 + }, + { + "epoch": 7.04, + "learning_rate": 1.294931003228232e-05, + "loss": 0.7243, + "step": 15850 + }, + { + "epoch": 7.04, + "learning_rate": 1.2945711122816939e-05, + "loss": 0.8472, + "step": 15860 + }, + { + "epoch": 7.04, + "learning_rate": 1.2942109559178113e-05, + "loss": 0.9001, + "step": 15870 + }, + { + "epoch": 7.04, + "learning_rate": 1.2938505343121199e-05, + "loss": 0.6855, + "step": 15880 + }, + { + "epoch": 7.04, + "learning_rate": 1.2934898476402851e-05, + "loss": 0.7127, + "step": 15890 + }, + { + "epoch": 7.04, + "learning_rate": 1.2931288960781008e-05, + "loss": 0.4177, + "step": 15900 + }, + { + "epoch": 7.04, + "learning_rate": 1.2927676798014904e-05, + "loss": 0.8813, + "step": 15910 + }, + { + "epoch": 7.04, + "learning_rate": 1.2924061989865066e-05, + "loss": 0.6631, + "step": 15920 + }, + { + "epoch": 7.04, + "learning_rate": 1.2920444538093305e-05, + "loss": 1.1157, + "step": 15930 + }, + { + "epoch": 7.04, + "learning_rate": 1.2916824444462726e-05, + "loss": 0.9802, + "step": 15940 + }, + { + "epoch": 7.04, + "learning_rate": 1.2913201710737716e-05, + "loss": 0.5958, + "step": 15950 + }, + { + "epoch": 7.04, + "learning_rate": 1.2909576338683956e-05, + "loss": 1.1829, + "step": 15960 + }, + { + "epoch": 7.04, + "learning_rate": 1.2905948330068401e-05, + "loss": 0.7153, + "step": 15970 + }, + { + "epoch": 7.04, + "learning_rate": 1.2902317686659302e-05, + "loss": 1.0184, + "step": 15980 + }, + { + "epoch": 7.04, + "learning_rate": 1.2898684410226192e-05, + "loss": 0.6815, + "step": 15990 + }, + { + "epoch": 7.04, + "learning_rate": 1.2895048502539883e-05, + "loss": 0.7102, + "step": 16000 + }, + { + "epoch": 7.04, + "eval_accuracy": 0.8242105263157895, + "eval_f1": 0.8242105263157895, + "eval_loss": 0.7876191735267639, + "eval_runtime": 761.4492, + "eval_samples_per_second": 6.238, + "eval_steps_per_second": 1.56, + "step": 16000 + }, + { + "epoch": 8.0, + "learning_rate": 1.2891409965372476e-05, + "loss": 0.903, + "step": 16010 + }, + { + "epoch": 8.0, + "learning_rate": 1.2887768800497346e-05, + "loss": 0.7573, + "step": 16020 + }, + { + "epoch": 8.0, + "learning_rate": 1.2884125009689153e-05, + "loss": 0.9222, + "step": 16030 + }, + { + "epoch": 8.0, + "learning_rate": 1.2880478594723841e-05, + "loss": 0.9862, + "step": 16040 + }, + { + "epoch": 8.0, + "learning_rate": 1.287682955737862e-05, + "loss": 0.7781, + "step": 16050 + }, + { + "epoch": 8.0, + "learning_rate": 1.2873177899431994e-05, + "loss": 0.7142, + "step": 16060 + }, + { + "epoch": 8.0, + "learning_rate": 1.2869523622663734e-05, + "loss": 0.9108, + "step": 16070 + }, + { + "epoch": 8.0, + "learning_rate": 1.2865866728854889e-05, + "loss": 0.572, + "step": 16080 + }, + { + "epoch": 8.0, + "learning_rate": 1.2862207219787787e-05, + "loss": 0.5146, + "step": 16090 + }, + { + "epoch": 8.0, + "learning_rate": 1.2858545097246025e-05, + "loss": 0.7091, + "step": 16100 + }, + { + "epoch": 8.0, + "learning_rate": 1.2854880363014482e-05, + "loss": 0.759, + "step": 16110 + }, + { + "epoch": 8.0, + "learning_rate": 1.2851213018879298e-05, + "loss": 0.5985, + "step": 16120 + }, + { + "epoch": 8.0, + "learning_rate": 1.2847543066627897e-05, + "loss": 0.8886, + "step": 16130 + }, + { + "epoch": 8.0, + "learning_rate": 1.284387050804897e-05, + "loss": 1.0709, + "step": 16140 + }, + { + "epoch": 8.0, + "learning_rate": 1.2840195344932474e-05, + "loss": 0.735, + "step": 16150 + }, + { + "epoch": 8.0, + "learning_rate": 1.2836517579069642e-05, + "loss": 0.8514, + "step": 16160 + }, + { + "epoch": 8.0, + "learning_rate": 1.2832837212252966e-05, + "loss": 0.6403, + "step": 16170 + }, + { + "epoch": 8.0, + "learning_rate": 1.2829154246276222e-05, + "loss": 0.6856, + "step": 16180 + }, + { + "epoch": 8.0, + "learning_rate": 1.2825468682934436e-05, + "loss": 0.7865, + "step": 16190 + }, + { + "epoch": 8.0, + "learning_rate": 1.2821780524023907e-05, + "loss": 0.6365, + "step": 16200 + }, + { + "epoch": 8.0, + "learning_rate": 1.28180897713422e-05, + "loss": 0.6259, + "step": 16210 + }, + { + "epoch": 8.0, + "learning_rate": 1.2814396426688147e-05, + "loss": 0.7151, + "step": 16220 + }, + { + "epoch": 8.0, + "learning_rate": 1.2810700491861833e-05, + "loss": 0.7079, + "step": 16230 + }, + { + "epoch": 8.0, + "learning_rate": 1.2807001968664616e-05, + "loss": 0.7594, + "step": 16240 + }, + { + "epoch": 8.01, + "learning_rate": 1.2803300858899106e-05, + "loss": 0.7652, + "step": 16250 + }, + { + "epoch": 8.01, + "learning_rate": 1.2799597164369187e-05, + "loss": 0.8973, + "step": 16260 + }, + { + "epoch": 8.01, + "learning_rate": 1.2795890886879988e-05, + "loss": 0.9693, + "step": 16270 + }, + { + "epoch": 8.01, + "learning_rate": 1.2792182028237907e-05, + "loss": 1.0535, + "step": 16280 + }, + { + "epoch": 8.01, + "learning_rate": 1.2788470590250594e-05, + "loss": 0.7048, + "step": 16290 + }, + { + "epoch": 8.01, + "learning_rate": 1.278475657472696e-05, + "loss": 0.6197, + "step": 16300 + }, + { + "epoch": 8.01, + "learning_rate": 1.2781039983477172e-05, + "loss": 0.6348, + "step": 16310 + }, + { + "epoch": 8.01, + "learning_rate": 1.2777320818312652e-05, + "loss": 0.8078, + "step": 16320 + }, + { + "epoch": 8.01, + "learning_rate": 1.2773599081046076e-05, + "loss": 1.0742, + "step": 16330 + }, + { + "epoch": 8.01, + "learning_rate": 1.276987477349137e-05, + "loss": 0.6463, + "step": 16340 + }, + { + "epoch": 8.01, + "learning_rate": 1.2766147897463718e-05, + "loss": 0.4302, + "step": 16350 + }, + { + "epoch": 8.01, + "learning_rate": 1.2762418454779556e-05, + "loss": 0.751, + "step": 16360 + }, + { + "epoch": 8.01, + "learning_rate": 1.2758686447256568e-05, + "loss": 0.9189, + "step": 16370 + }, + { + "epoch": 8.01, + "learning_rate": 1.2754951876713688e-05, + "loss": 0.8189, + "step": 16380 + }, + { + "epoch": 8.01, + "learning_rate": 1.27512147449711e-05, + "loss": 0.5518, + "step": 16390 + }, + { + "epoch": 8.01, + "learning_rate": 1.2747475053850241e-05, + "loss": 1.0893, + "step": 16400 + }, + { + "epoch": 8.01, + "learning_rate": 1.2743732805173786e-05, + "loss": 0.9392, + "step": 16410 + }, + { + "epoch": 8.01, + "learning_rate": 1.2739988000765664e-05, + "loss": 1.016, + "step": 16420 + }, + { + "epoch": 8.01, + "learning_rate": 1.2736240642451046e-05, + "loss": 0.3563, + "step": 16430 + }, + { + "epoch": 8.01, + "learning_rate": 1.2732490732056348e-05, + "loss": 0.9955, + "step": 16440 + }, + { + "epoch": 8.01, + "learning_rate": 1.2728738271409236e-05, + "loss": 0.5461, + "step": 16450 + }, + { + "epoch": 8.01, + "learning_rate": 1.2724983262338605e-05, + "loss": 0.5001, + "step": 16460 + }, + { + "epoch": 8.01, + "learning_rate": 1.2721225706674609e-05, + "loss": 0.6405, + "step": 16470 + }, + { + "epoch": 8.01, + "learning_rate": 1.2717465606248632e-05, + "loss": 0.7436, + "step": 16480 + }, + { + "epoch": 8.01, + "learning_rate": 1.2713702962893299e-05, + "loss": 0.8613, + "step": 16490 + }, + { + "epoch": 8.01, + "learning_rate": 1.270993777844248e-05, + "loss": 0.929, + "step": 16500 + }, + { + "epoch": 8.01, + "learning_rate": 1.270617005473128e-05, + "loss": 0.504, + "step": 16510 + }, + { + "epoch": 8.01, + "learning_rate": 1.2702399793596037e-05, + "loss": 0.4618, + "step": 16520 + }, + { + "epoch": 8.01, + "learning_rate": 1.2698626996874338e-05, + "loss": 0.6505, + "step": 16530 + }, + { + "epoch": 8.01, + "learning_rate": 1.2694851666404991e-05, + "loss": 0.8792, + "step": 16540 + }, + { + "epoch": 8.01, + "learning_rate": 1.2691073804028052e-05, + "loss": 0.7674, + "step": 16550 + }, + { + "epoch": 8.01, + "learning_rate": 1.26872934115848e-05, + "loss": 1.042, + "step": 16560 + }, + { + "epoch": 8.01, + "learning_rate": 1.2683510490917759e-05, + "loss": 0.9164, + "step": 16570 + }, + { + "epoch": 8.01, + "learning_rate": 1.2679725043870672e-05, + "loss": 0.6523, + "step": 16580 + }, + { + "epoch": 8.01, + "learning_rate": 1.2675937072288526e-05, + "loss": 0.5904, + "step": 16590 + }, + { + "epoch": 8.01, + "learning_rate": 1.2672146578017524e-05, + "loss": 0.8063, + "step": 16600 + }, + { + "epoch": 8.01, + "learning_rate": 1.2668353562905114e-05, + "loss": 0.4042, + "step": 16610 + }, + { + "epoch": 8.01, + "learning_rate": 1.2664558028799963e-05, + "loss": 0.6684, + "step": 16620 + }, + { + "epoch": 8.01, + "learning_rate": 1.2660759977551972e-05, + "loss": 0.6434, + "step": 16630 + }, + { + "epoch": 8.01, + "learning_rate": 1.2656959411012255e-05, + "loss": 0.9733, + "step": 16640 + }, + { + "epoch": 8.01, + "learning_rate": 1.2653156331033174e-05, + "loss": 0.5205, + "step": 16650 + }, + { + "epoch": 8.01, + "learning_rate": 1.2649350739468295e-05, + "loss": 0.4417, + "step": 16660 + }, + { + "epoch": 8.01, + "learning_rate": 1.2645542638172423e-05, + "loss": 0.8051, + "step": 16670 + }, + { + "epoch": 8.01, + "learning_rate": 1.2641732029001577e-05, + "loss": 0.7189, + "step": 16680 + }, + { + "epoch": 8.01, + "learning_rate": 1.2637918913813002e-05, + "loss": 0.7512, + "step": 16690 + }, + { + "epoch": 8.01, + "learning_rate": 1.2634103294465166e-05, + "loss": 1.0825, + "step": 16700 + }, + { + "epoch": 8.01, + "learning_rate": 1.2630285172817751e-05, + "loss": 1.1748, + "step": 16710 + }, + { + "epoch": 8.01, + "learning_rate": 1.2626464550731671e-05, + "loss": 1.0208, + "step": 16720 + }, + { + "epoch": 8.01, + "learning_rate": 1.2622641430069041e-05, + "loss": 0.5634, + "step": 16730 + }, + { + "epoch": 8.01, + "learning_rate": 1.2618815812693213e-05, + "loss": 0.4922, + "step": 16740 + }, + { + "epoch": 8.02, + "learning_rate": 1.2614987700468739e-05, + "loss": 0.8235, + "step": 16750 + }, + { + "epoch": 8.02, + "learning_rate": 1.2611157095261399e-05, + "loss": 0.6315, + "step": 16760 + }, + { + "epoch": 8.02, + "learning_rate": 1.2607323998938181e-05, + "loss": 0.8166, + "step": 16770 + }, + { + "epoch": 8.02, + "learning_rate": 1.260348841336729e-05, + "loss": 0.7631, + "step": 16780 + }, + { + "epoch": 8.02, + "learning_rate": 1.2599650340418144e-05, + "loss": 0.7756, + "step": 16790 + }, + { + "epoch": 8.02, + "learning_rate": 1.2595809781961374e-05, + "loss": 0.7076, + "step": 16800 + }, + { + "epoch": 8.02, + "learning_rate": 1.2591966739868822e-05, + "loss": 0.7914, + "step": 16810 + }, + { + "epoch": 8.02, + "learning_rate": 1.2588121216013537e-05, + "loss": 0.6832, + "step": 16820 + }, + { + "epoch": 8.02, + "learning_rate": 1.2584273212269782e-05, + "loss": 0.547, + "step": 16830 + }, + { + "epoch": 8.02, + "learning_rate": 1.258042273051303e-05, + "loss": 0.9669, + "step": 16840 + }, + { + "epoch": 8.02, + "learning_rate": 1.2576569772619955e-05, + "loss": 0.6243, + "step": 16850 + }, + { + "epoch": 8.02, + "learning_rate": 1.2572714340468445e-05, + "loss": 0.6971, + "step": 16860 + }, + { + "epoch": 8.02, + "learning_rate": 1.256885643593759e-05, + "loss": 0.5598, + "step": 16870 + }, + { + "epoch": 8.02, + "learning_rate": 1.2564996060907683e-05, + "loss": 0.604, + "step": 16880 + }, + { + "epoch": 8.02, + "learning_rate": 1.2561133217260227e-05, + "loss": 0.7139, + "step": 16890 + }, + { + "epoch": 8.02, + "learning_rate": 1.2557267906877925e-05, + "loss": 0.4249, + "step": 16900 + }, + { + "epoch": 8.02, + "learning_rate": 1.255340013164468e-05, + "loss": 0.4992, + "step": 16910 + }, + { + "epoch": 8.02, + "learning_rate": 1.2549529893445603e-05, + "loss": 1.0948, + "step": 16920 + }, + { + "epoch": 8.02, + "learning_rate": 1.2545657194166997e-05, + "loss": 0.6932, + "step": 16930 + }, + { + "epoch": 8.02, + "learning_rate": 1.2541782035696372e-05, + "loss": 0.5878, + "step": 16940 + }, + { + "epoch": 8.02, + "learning_rate": 1.2537904419922428e-05, + "loss": 0.6641, + "step": 16950 + }, + { + "epoch": 8.02, + "learning_rate": 1.253402434873507e-05, + "loss": 0.8987, + "step": 16960 + }, + { + "epoch": 8.02, + "learning_rate": 1.2530141824025399e-05, + "loss": 0.5498, + "step": 16970 + }, + { + "epoch": 8.02, + "learning_rate": 1.2526256847685713e-05, + "loss": 0.7692, + "step": 16980 + }, + { + "epoch": 8.02, + "learning_rate": 1.2522369421609497e-05, + "loss": 0.7997, + "step": 16990 + }, + { + "epoch": 8.02, + "learning_rate": 1.2518479547691437e-05, + "loss": 0.5458, + "step": 17000 + }, + { + "epoch": 8.02, + "learning_rate": 1.2514587227827412e-05, + "loss": 0.8942, + "step": 17010 + }, + { + "epoch": 8.02, + "learning_rate": 1.2510692463914487e-05, + "loss": 0.461, + "step": 17020 + }, + { + "epoch": 8.02, + "learning_rate": 1.250679525785093e-05, + "loss": 0.9453, + "step": 17030 + }, + { + "epoch": 8.02, + "learning_rate": 1.2502895611536185e-05, + "loss": 1.1806, + "step": 17040 + }, + { + "epoch": 8.02, + "learning_rate": 1.2498993526870893e-05, + "loss": 0.8235, + "step": 17050 + }, + { + "epoch": 8.02, + "learning_rate": 1.2495089005756888e-05, + "loss": 0.3497, + "step": 17060 + }, + { + "epoch": 8.02, + "learning_rate": 1.249118205009718e-05, + "loss": 0.9592, + "step": 17070 + }, + { + "epoch": 8.02, + "learning_rate": 1.2487272661795976e-05, + "loss": 0.5981, + "step": 17080 + }, + { + "epoch": 8.02, + "learning_rate": 1.2483360842758662e-05, + "loss": 0.7031, + "step": 17090 + }, + { + "epoch": 8.02, + "learning_rate": 1.2479446594891812e-05, + "loss": 0.8887, + "step": 17100 + }, + { + "epoch": 8.02, + "learning_rate": 1.2475529920103183e-05, + "loss": 0.5698, + "step": 17110 + }, + { + "epoch": 8.02, + "learning_rate": 1.2471610820301714e-05, + "loss": 0.9545, + "step": 17120 + }, + { + "epoch": 8.02, + "learning_rate": 1.2467689297397526e-05, + "loss": 0.5898, + "step": 17130 + }, + { + "epoch": 8.02, + "learning_rate": 1.2463765353301925e-05, + "loss": 0.4993, + "step": 17140 + }, + { + "epoch": 8.02, + "learning_rate": 1.245983898992739e-05, + "loss": 0.4658, + "step": 17150 + }, + { + "epoch": 8.02, + "learning_rate": 1.2455910209187584e-05, + "loss": 0.7944, + "step": 17160 + }, + { + "epoch": 8.02, + "learning_rate": 1.2451979012997347e-05, + "loss": 0.8215, + "step": 17170 + }, + { + "epoch": 8.02, + "learning_rate": 1.2448045403272696e-05, + "loss": 0.5273, + "step": 17180 + }, + { + "epoch": 8.02, + "learning_rate": 1.2444109381930825e-05, + "loss": 0.6735, + "step": 17190 + }, + { + "epoch": 8.02, + "learning_rate": 1.2440170950890103e-05, + "loss": 0.8846, + "step": 17200 + }, + { + "epoch": 8.02, + "learning_rate": 1.2436230112070075e-05, + "loss": 0.5238, + "step": 17210 + }, + { + "epoch": 8.02, + "learning_rate": 1.2432286867391455e-05, + "loss": 0.9863, + "step": 17220 + }, + { + "epoch": 8.02, + "learning_rate": 1.2428341218776135e-05, + "loss": 0.8749, + "step": 17230 + }, + { + "epoch": 8.02, + "learning_rate": 1.2424393168147173e-05, + "loss": 0.5667, + "step": 17240 + }, + { + "epoch": 8.03, + "learning_rate": 1.2420442717428804e-05, + "loss": 0.877, + "step": 17250 + }, + { + "epoch": 8.03, + "learning_rate": 1.241648986854643e-05, + "loss": 0.7394, + "step": 17260 + }, + { + "epoch": 8.03, + "learning_rate": 1.241253462342662e-05, + "loss": 0.6451, + "step": 17270 + }, + { + "epoch": 8.03, + "learning_rate": 1.2408576983997112e-05, + "loss": 0.8236, + "step": 17280 + }, + { + "epoch": 8.03, + "learning_rate": 1.2404616952186811e-05, + "loss": 0.8152, + "step": 17290 + }, + { + "epoch": 8.03, + "learning_rate": 1.2400654529925793e-05, + "loss": 0.8507, + "step": 17300 + }, + { + "epoch": 8.03, + "learning_rate": 1.2396689719145288e-05, + "loss": 0.7373, + "step": 17310 + }, + { + "epoch": 8.03, + "learning_rate": 1.2392722521777699e-05, + "loss": 0.4609, + "step": 17320 + }, + { + "epoch": 8.03, + "learning_rate": 1.2388752939756591e-05, + "loss": 0.7621, + "step": 17330 + }, + { + "epoch": 8.03, + "learning_rate": 1.238478097501669e-05, + "loss": 0.7369, + "step": 17340 + }, + { + "epoch": 8.03, + "learning_rate": 1.2380806629493882e-05, + "loss": 0.6674, + "step": 17350 + }, + { + "epoch": 8.03, + "learning_rate": 1.2376829905125215e-05, + "loss": 0.7949, + "step": 17360 + }, + { + "epoch": 8.03, + "learning_rate": 1.23728508038489e-05, + "loss": 0.9449, + "step": 17370 + }, + { + "epoch": 8.03, + "learning_rate": 1.2368869327604297e-05, + "loss": 0.7967, + "step": 17380 + }, + { + "epoch": 8.03, + "learning_rate": 1.2364885478331933e-05, + "loss": 0.9675, + "step": 17390 + }, + { + "epoch": 8.03, + "learning_rate": 1.2360899257973482e-05, + "loss": 1.2767, + "step": 17400 + }, + { + "epoch": 8.03, + "learning_rate": 1.2356910668471788e-05, + "loss": 0.816, + "step": 17410 + }, + { + "epoch": 8.03, + "learning_rate": 1.2352919711770834e-05, + "loss": 0.8011, + "step": 17420 + }, + { + "epoch": 8.03, + "learning_rate": 1.2348926389815766e-05, + "loss": 0.5407, + "step": 17430 + }, + { + "epoch": 8.03, + "learning_rate": 1.2344930704552883e-05, + "loss": 0.5253, + "step": 17440 + }, + { + "epoch": 8.03, + "learning_rate": 1.234093265792963e-05, + "loss": 0.6741, + "step": 17450 + }, + { + "epoch": 8.03, + "learning_rate": 1.2336932251894608e-05, + "loss": 0.8949, + "step": 17460 + }, + { + "epoch": 8.03, + "learning_rate": 1.2332929488397567e-05, + "loss": 1.0454, + "step": 17470 + }, + { + "epoch": 8.03, + "learning_rate": 1.2328924369389404e-05, + "loss": 0.4574, + "step": 17480 + }, + { + "epoch": 8.03, + "learning_rate": 1.2324916896822166e-05, + "loss": 0.9879, + "step": 17490 + }, + { + "epoch": 8.03, + "learning_rate": 1.2320907072649045e-05, + "loss": 0.9544, + "step": 17500 + }, + { + "epoch": 8.03, + "learning_rate": 1.2316894898824383e-05, + "loss": 1.1716, + "step": 17510 + }, + { + "epoch": 8.03, + "learning_rate": 1.2312880377303664e-05, + "loss": 0.9311, + "step": 17520 + }, + { + "epoch": 8.03, + "learning_rate": 1.2308863510043517e-05, + "loss": 0.5337, + "step": 17530 + }, + { + "epoch": 8.03, + "learning_rate": 1.2304844299001712e-05, + "loss": 0.8078, + "step": 17540 + }, + { + "epoch": 8.03, + "learning_rate": 1.2300822746137167e-05, + "loss": 0.8005, + "step": 17550 + }, + { + "epoch": 8.03, + "learning_rate": 1.2296798853409937e-05, + "loss": 0.8335, + "step": 17560 + }, + { + "epoch": 8.03, + "learning_rate": 1.2292772622781215e-05, + "loss": 0.6197, + "step": 17570 + }, + { + "epoch": 8.03, + "learning_rate": 1.2288744056213343e-05, + "loss": 0.7248, + "step": 17580 + }, + { + "epoch": 8.03, + "learning_rate": 1.228471315566979e-05, + "loss": 0.5651, + "step": 17590 + }, + { + "epoch": 8.03, + "learning_rate": 1.2280679923115173e-05, + "loss": 0.7526, + "step": 17600 + }, + { + "epoch": 8.03, + "learning_rate": 1.2276644360515238e-05, + "loss": 0.3586, + "step": 17610 + }, + { + "epoch": 8.03, + "learning_rate": 1.2272606469836868e-05, + "loss": 0.8768, + "step": 17620 + }, + { + "epoch": 8.03, + "learning_rate": 1.2268566253048081e-05, + "loss": 0.8052, + "step": 17630 + }, + { + "epoch": 8.03, + "learning_rate": 1.2264523712118033e-05, + "loss": 0.748, + "step": 17640 + }, + { + "epoch": 8.03, + "learning_rate": 1.2260478849017007e-05, + "loss": 0.7358, + "step": 17650 + }, + { + "epoch": 8.03, + "learning_rate": 1.225643166571642e-05, + "loss": 0.3771, + "step": 17660 + }, + { + "epoch": 8.03, + "learning_rate": 1.2252382164188825e-05, + "loss": 0.6613, + "step": 17670 + }, + { + "epoch": 8.03, + "learning_rate": 1.2248330346407893e-05, + "loss": 1.1527, + "step": 17680 + }, + { + "epoch": 8.03, + "learning_rate": 1.2244276214348435e-05, + "loss": 0.5922, + "step": 17690 + }, + { + "epoch": 8.03, + "learning_rate": 1.2240219769986382e-05, + "loss": 0.6686, + "step": 17700 + }, + { + "epoch": 8.03, + "learning_rate": 1.2236161015298799e-05, + "loss": 0.6159, + "step": 17710 + }, + { + "epoch": 8.03, + "learning_rate": 1.2232099952263872e-05, + "loss": 0.9876, + "step": 17720 + }, + { + "epoch": 8.03, + "learning_rate": 1.2228036582860917e-05, + "loss": 0.6581, + "step": 17730 + }, + { + "epoch": 8.03, + "learning_rate": 1.2223970909070367e-05, + "loss": 0.613, + "step": 17740 + }, + { + "epoch": 8.04, + "learning_rate": 1.2219902932873782e-05, + "loss": 0.5538, + "step": 17750 + }, + { + "epoch": 8.04, + "learning_rate": 1.2215832656253846e-05, + "loss": 0.7896, + "step": 17760 + }, + { + "epoch": 8.04, + "learning_rate": 1.2211760081194362e-05, + "loss": 0.9046, + "step": 17770 + }, + { + "epoch": 8.04, + "learning_rate": 1.2207685209680254e-05, + "loss": 0.8718, + "step": 17780 + }, + { + "epoch": 8.04, + "learning_rate": 1.2203608043697564e-05, + "loss": 0.6827, + "step": 17790 + }, + { + "epoch": 8.04, + "learning_rate": 1.2199528585233455e-05, + "loss": 0.5505, + "step": 17800 + }, + { + "epoch": 8.04, + "learning_rate": 1.2195446836276202e-05, + "loss": 0.5683, + "step": 17810 + }, + { + "epoch": 8.04, + "learning_rate": 1.2191362798815203e-05, + "loss": 0.946, + "step": 17820 + }, + { + "epoch": 8.04, + "learning_rate": 1.2187276474840968e-05, + "loss": 0.581, + "step": 17830 + }, + { + "epoch": 8.04, + "learning_rate": 1.2183187866345117e-05, + "loss": 0.9227, + "step": 17840 + }, + { + "epoch": 8.04, + "learning_rate": 1.2179096975320398e-05, + "loss": 0.5363, + "step": 17850 + }, + { + "epoch": 8.04, + "learning_rate": 1.217500380376065e-05, + "loss": 0.6947, + "step": 17860 + }, + { + "epoch": 8.04, + "learning_rate": 1.2170908353660844e-05, + "loss": 0.8875, + "step": 17870 + }, + { + "epoch": 8.04, + "learning_rate": 1.216681062701705e-05, + "loss": 0.4628, + "step": 17880 + }, + { + "epoch": 8.04, + "learning_rate": 1.2162710625826446e-05, + "loss": 0.8057, + "step": 17890 + }, + { + "epoch": 8.04, + "learning_rate": 1.2158608352087328e-05, + "loss": 0.9372, + "step": 17900 + }, + { + "epoch": 8.04, + "learning_rate": 1.2154503807799093e-05, + "loss": 0.649, + "step": 17910 + }, + { + "epoch": 8.04, + "learning_rate": 1.2150396994962244e-05, + "loss": 0.835, + "step": 17920 + }, + { + "epoch": 8.04, + "learning_rate": 1.2146287915578394e-05, + "loss": 0.4606, + "step": 17930 + }, + { + "epoch": 8.04, + "learning_rate": 1.2142176571650259e-05, + "loss": 0.7991, + "step": 17940 + }, + { + "epoch": 8.04, + "learning_rate": 1.2138062965181656e-05, + "loss": 0.7018, + "step": 17950 + }, + { + "epoch": 8.04, + "learning_rate": 1.2133947098177508e-05, + "loss": 1.0809, + "step": 17960 + }, + { + "epoch": 8.04, + "learning_rate": 1.2129828972643837e-05, + "loss": 0.4973, + "step": 17970 + }, + { + "epoch": 8.04, + "learning_rate": 1.212570859058777e-05, + "loss": 0.6203, + "step": 17980 + }, + { + "epoch": 8.04, + "learning_rate": 1.2121585954017528e-05, + "loss": 0.5908, + "step": 17990 + }, + { + "epoch": 8.04, + "learning_rate": 1.2117461064942437e-05, + "loss": 0.5726, + "step": 18000 + }, + { + "epoch": 8.04, + "eval_accuracy": 0.8231578947368421, + "eval_f1": 0.8231578947368422, + "eval_loss": 0.8805130124092102, + "eval_runtime": 767.3617, + "eval_samples_per_second": 6.19, + "eval_steps_per_second": 1.548, + "step": 18000 + }, + { + "epoch": 9.0, + "learning_rate": 1.2113333925372919e-05, + "loss": 1.2099, + "step": 18010 + }, + { + "epoch": 9.0, + "learning_rate": 1.2109204537320483e-05, + "loss": 0.937, + "step": 18020 + }, + { + "epoch": 9.0, + "learning_rate": 1.2105072902797753e-05, + "loss": 0.7613, + "step": 18030 + }, + { + "epoch": 9.0, + "learning_rate": 1.2100939023818432e-05, + "loss": 0.8872, + "step": 18040 + }, + { + "epoch": 9.0, + "learning_rate": 1.2096802902397324e-05, + "loss": 0.7832, + "step": 18050 + }, + { + "epoch": 9.0, + "learning_rate": 1.2092664540550323e-05, + "loss": 0.8112, + "step": 18060 + }, + { + "epoch": 9.0, + "learning_rate": 1.2088523940294418e-05, + "loss": 0.6844, + "step": 18070 + }, + { + "epoch": 9.0, + "learning_rate": 1.2084381103647688e-05, + "loss": 0.8863, + "step": 18080 + }, + { + "epoch": 9.0, + "learning_rate": 1.2080236032629298e-05, + "loss": 0.6259, + "step": 18090 + }, + { + "epoch": 9.0, + "learning_rate": 1.207608872925951e-05, + "loss": 0.5141, + "step": 18100 + }, + { + "epoch": 9.0, + "learning_rate": 1.207193919555966e-05, + "loss": 0.7785, + "step": 18110 + }, + { + "epoch": 9.0, + "learning_rate": 1.2067787433552192e-05, + "loss": 0.8503, + "step": 18120 + }, + { + "epoch": 9.0, + "learning_rate": 1.2063633445260615e-05, + "loss": 0.7436, + "step": 18130 + }, + { + "epoch": 9.0, + "learning_rate": 1.2059477232709542e-05, + "loss": 0.8031, + "step": 18140 + }, + { + "epoch": 9.0, + "learning_rate": 1.205531879792465e-05, + "loss": 1.2157, + "step": 18150 + }, + { + "epoch": 9.0, + "learning_rate": 1.2051158142932718e-05, + "loss": 0.8124, + "step": 18160 + }, + { + "epoch": 9.0, + "learning_rate": 1.204699526976159e-05, + "loss": 0.7364, + "step": 18170 + }, + { + "epoch": 9.0, + "learning_rate": 1.2042830180440211e-05, + "loss": 0.7025, + "step": 18180 + }, + { + "epoch": 9.0, + "learning_rate": 1.2038662876998586e-05, + "loss": 0.398, + "step": 18190 + }, + { + "epoch": 9.0, + "learning_rate": 1.2034493361467813e-05, + "loss": 0.9601, + "step": 18200 + }, + { + "epoch": 9.0, + "learning_rate": 1.203032163588006e-05, + "loss": 0.4517, + "step": 18210 + }, + { + "epoch": 9.0, + "learning_rate": 1.2026147702268574e-05, + "loss": 0.6078, + "step": 18220 + }, + { + "epoch": 9.0, + "learning_rate": 1.2021971562667687e-05, + "loss": 0.6127, + "step": 18230 + }, + { + "epoch": 9.0, + "learning_rate": 1.201779321911279e-05, + "loss": 0.9812, + "step": 18240 + }, + { + "epoch": 9.01, + "learning_rate": 1.2013612673640364e-05, + "loss": 0.7009, + "step": 18250 + }, + { + "epoch": 9.01, + "learning_rate": 1.200942992828795e-05, + "loss": 0.5552, + "step": 18260 + }, + { + "epoch": 9.01, + "learning_rate": 1.2005244985094171e-05, + "loss": 0.7145, + "step": 18270 + }, + { + "epoch": 9.01, + "learning_rate": 1.2001057846098717e-05, + "loss": 0.4111, + "step": 18280 + }, + { + "epoch": 9.01, + "learning_rate": 1.1996868513342349e-05, + "loss": 0.5976, + "step": 18290 + }, + { + "epoch": 9.01, + "learning_rate": 1.1992676988866894e-05, + "loss": 0.7484, + "step": 18300 + }, + { + "epoch": 9.01, + "learning_rate": 1.1988483274715256e-05, + "loss": 0.9863, + "step": 18310 + }, + { + "epoch": 9.01, + "learning_rate": 1.1984287372931392e-05, + "loss": 0.3628, + "step": 18320 + }, + { + "epoch": 9.01, + "learning_rate": 1.1980089285560342e-05, + "loss": 0.4903, + "step": 18330 + }, + { + "epoch": 9.01, + "learning_rate": 1.1975889014648195e-05, + "loss": 0.8426, + "step": 18340 + }, + { + "epoch": 9.01, + "learning_rate": 1.197168656224212e-05, + "loss": 0.7967, + "step": 18350 + }, + { + "epoch": 9.01, + "learning_rate": 1.1967481930390335e-05, + "loss": 0.4083, + "step": 18360 + }, + { + "epoch": 9.01, + "learning_rate": 1.196327512114213e-05, + "loss": 0.6116, + "step": 18370 + }, + { + "epoch": 9.01, + "learning_rate": 1.1959066136547851e-05, + "loss": 0.9358, + "step": 18380 + }, + { + "epoch": 9.01, + "learning_rate": 1.1954854978658903e-05, + "loss": 0.6193, + "step": 18390 + }, + { + "epoch": 9.01, + "learning_rate": 1.1950641649527762e-05, + "loss": 0.677, + "step": 18400 + }, + { + "epoch": 9.01, + "learning_rate": 1.1946426151207945e-05, + "loss": 0.6106, + "step": 18410 + }, + { + "epoch": 9.01, + "learning_rate": 1.1942208485754038e-05, + "loss": 0.6699, + "step": 18420 + }, + { + "epoch": 9.01, + "learning_rate": 1.193798865522168e-05, + "loss": 0.5351, + "step": 18430 + }, + { + "epoch": 9.01, + "learning_rate": 1.1933766661667565e-05, + "loss": 0.836, + "step": 18440 + }, + { + "epoch": 9.01, + "learning_rate": 1.192954250714944e-05, + "loss": 0.7118, + "step": 18450 + }, + { + "epoch": 9.01, + "learning_rate": 1.192531619372611e-05, + "loss": 1.0574, + "step": 18460 + }, + { + "epoch": 9.01, + "learning_rate": 1.1921087723457425e-05, + "loss": 0.7786, + "step": 18470 + }, + { + "epoch": 9.01, + "learning_rate": 1.191685709840429e-05, + "loss": 0.9358, + "step": 18480 + }, + { + "epoch": 9.01, + "learning_rate": 1.1912624320628666e-05, + "loss": 0.8795, + "step": 18490 + }, + { + "epoch": 9.01, + "learning_rate": 1.1908389392193549e-05, + "loss": 0.7275, + "step": 18500 + }, + { + "epoch": 9.01, + "learning_rate": 1.1904152315162996e-05, + "loss": 0.487, + "step": 18510 + }, + { + "epoch": 9.01, + "learning_rate": 1.189991309160211e-05, + "loss": 0.6892, + "step": 18520 + }, + { + "epoch": 9.01, + "learning_rate": 1.1895671723577032e-05, + "loss": 0.8501, + "step": 18530 + }, + { + "epoch": 9.01, + "learning_rate": 1.1891428213154956e-05, + "loss": 1.0048, + "step": 18540 + }, + { + "epoch": 9.01, + "learning_rate": 1.1887182562404118e-05, + "loss": 0.579, + "step": 18550 + }, + { + "epoch": 9.01, + "learning_rate": 1.188293477339379e-05, + "loss": 1.0681, + "step": 18560 + }, + { + "epoch": 9.01, + "learning_rate": 1.1878684848194302e-05, + "loss": 0.9328, + "step": 18570 + }, + { + "epoch": 9.01, + "learning_rate": 1.187443278887701e-05, + "loss": 0.8938, + "step": 18580 + }, + { + "epoch": 9.01, + "learning_rate": 1.187017859751432e-05, + "loss": 0.7139, + "step": 18590 + }, + { + "epoch": 9.01, + "learning_rate": 1.1865922276179671e-05, + "loss": 0.7831, + "step": 18600 + }, + { + "epoch": 9.01, + "learning_rate": 1.186166382694754e-05, + "loss": 0.6213, + "step": 18610 + }, + { + "epoch": 9.01, + "learning_rate": 1.185740325189345e-05, + "loss": 0.901, + "step": 18620 + }, + { + "epoch": 9.01, + "learning_rate": 1.1853140553093945e-05, + "loss": 0.5026, + "step": 18630 + }, + { + "epoch": 9.01, + "learning_rate": 1.1848875732626619e-05, + "loss": 0.5141, + "step": 18640 + }, + { + "epoch": 9.01, + "learning_rate": 1.1844608792570091e-05, + "loss": 0.7118, + "step": 18650 + }, + { + "epoch": 9.01, + "learning_rate": 1.1840339735004018e-05, + "loss": 1.1035, + "step": 18660 + }, + { + "epoch": 9.01, + "learning_rate": 1.1836068562009084e-05, + "loss": 0.4183, + "step": 18670 + }, + { + "epoch": 9.01, + "learning_rate": 1.1831795275667007e-05, + "loss": 0.6543, + "step": 18680 + }, + { + "epoch": 9.01, + "learning_rate": 1.1827519878060537e-05, + "loss": 0.5805, + "step": 18690 + }, + { + "epoch": 9.01, + "learning_rate": 1.182324237127345e-05, + "loss": 0.3918, + "step": 18700 + }, + { + "epoch": 9.01, + "learning_rate": 1.1818962757390552e-05, + "loss": 1.0159, + "step": 18710 + }, + { + "epoch": 9.01, + "learning_rate": 1.1814681038497671e-05, + "loss": 0.5038, + "step": 18720 + }, + { + "epoch": 9.01, + "learning_rate": 1.1810397216681665e-05, + "loss": 0.5818, + "step": 18730 + }, + { + "epoch": 9.01, + "learning_rate": 1.1806111294030424e-05, + "loss": 0.8919, + "step": 18740 + }, + { + "epoch": 9.02, + "learning_rate": 1.1801823272632845e-05, + "loss": 0.8671, + "step": 18750 + }, + { + "epoch": 9.02, + "learning_rate": 1.1797533154578866e-05, + "loss": 0.858, + "step": 18760 + }, + { + "epoch": 9.02, + "learning_rate": 1.1793240941959434e-05, + "loss": 0.5649, + "step": 18770 + }, + { + "epoch": 9.02, + "learning_rate": 1.1788946636866518e-05, + "loss": 0.7596, + "step": 18780 + }, + { + "epoch": 9.02, + "learning_rate": 1.1784650241393117e-05, + "loss": 0.8354, + "step": 18790 + }, + { + "epoch": 9.02, + "learning_rate": 1.178035175763324e-05, + "loss": 0.5007, + "step": 18800 + }, + { + "epoch": 9.02, + "learning_rate": 1.1776051187681911e-05, + "loss": 0.9104, + "step": 18810 + }, + { + "epoch": 9.02, + "learning_rate": 1.177174853363518e-05, + "loss": 0.639, + "step": 18820 + }, + { + "epoch": 9.02, + "learning_rate": 1.176744379759011e-05, + "loss": 0.6799, + "step": 18830 + }, + { + "epoch": 9.02, + "learning_rate": 1.1763136981644773e-05, + "loss": 0.9681, + "step": 18840 + }, + { + "epoch": 9.02, + "learning_rate": 1.175882808789826e-05, + "loss": 1.0896, + "step": 18850 + }, + { + "epoch": 9.02, + "learning_rate": 1.1754517118450675e-05, + "loss": 0.5733, + "step": 18860 + }, + { + "epoch": 9.02, + "learning_rate": 1.1750204075403128e-05, + "loss": 0.6242, + "step": 18870 + }, + { + "epoch": 9.02, + "learning_rate": 1.1745888960857749e-05, + "loss": 0.5115, + "step": 18880 + }, + { + "epoch": 9.02, + "learning_rate": 1.1741571776917673e-05, + "loss": 0.5443, + "step": 18890 + }, + { + "epoch": 9.02, + "learning_rate": 1.1737252525687035e-05, + "loss": 0.6259, + "step": 18900 + }, + { + "epoch": 9.02, + "learning_rate": 1.1732931209270995e-05, + "loss": 0.7084, + "step": 18910 + }, + { + "epoch": 9.02, + "learning_rate": 1.17286078297757e-05, + "loss": 0.8891, + "step": 18920 + }, + { + "epoch": 9.02, + "learning_rate": 1.1724282389308324e-05, + "loss": 0.8908, + "step": 18930 + }, + { + "epoch": 9.02, + "learning_rate": 1.1719954889977027e-05, + "loss": 0.8218, + "step": 18940 + }, + { + "epoch": 9.02, + "learning_rate": 1.1715625333890979e-05, + "loss": 0.7136, + "step": 18950 + }, + { + "epoch": 9.02, + "learning_rate": 1.1711293723160359e-05, + "loss": 0.587, + "step": 18960 + }, + { + "epoch": 9.02, + "learning_rate": 1.1706960059896336e-05, + "loss": 0.6956, + "step": 18970 + }, + { + "epoch": 9.02, + "learning_rate": 1.1702624346211084e-05, + "loss": 0.5694, + "step": 18980 + }, + { + "epoch": 9.02, + "learning_rate": 1.1698286584217785e-05, + "loss": 0.5655, + "step": 18990 + }, + { + "epoch": 9.02, + "learning_rate": 1.1693946776030601e-05, + "loss": 0.5769, + "step": 19000 + }, + { + "epoch": 9.02, + "learning_rate": 1.168960492376471e-05, + "loss": 0.7568, + "step": 19010 + }, + { + "epoch": 9.02, + "learning_rate": 1.1685261029536276e-05, + "loss": 0.6725, + "step": 19020 + }, + { + "epoch": 9.02, + "learning_rate": 1.1680915095462456e-05, + "loss": 0.6308, + "step": 19030 + }, + { + "epoch": 9.02, + "learning_rate": 1.167656712366141e-05, + "loss": 0.5472, + "step": 19040 + }, + { + "epoch": 9.02, + "learning_rate": 1.1672217116252287e-05, + "loss": 0.6917, + "step": 19050 + }, + { + "epoch": 9.02, + "learning_rate": 1.1667865075355224e-05, + "loss": 0.8717, + "step": 19060 + }, + { + "epoch": 9.02, + "learning_rate": 1.1663511003091356e-05, + "loss": 0.8956, + "step": 19070 + }, + { + "epoch": 9.02, + "learning_rate": 1.1659154901582805e-05, + "loss": 0.3889, + "step": 19080 + }, + { + "epoch": 9.02, + "learning_rate": 1.165479677295268e-05, + "loss": 0.494, + "step": 19090 + }, + { + "epoch": 9.02, + "learning_rate": 1.1650436619325081e-05, + "loss": 0.3606, + "step": 19100 + }, + { + "epoch": 9.02, + "learning_rate": 1.1646074442825094e-05, + "loss": 0.6802, + "step": 19110 + }, + { + "epoch": 9.02, + "learning_rate": 1.164171024557879e-05, + "loss": 0.6772, + "step": 19120 + }, + { + "epoch": 9.02, + "learning_rate": 1.1637344029713228e-05, + "loss": 0.9367, + "step": 19130 + }, + { + "epoch": 9.02, + "learning_rate": 1.1632975797356445e-05, + "loss": 0.5726, + "step": 19140 + }, + { + "epoch": 9.02, + "learning_rate": 1.1628605550637467e-05, + "loss": 0.5994, + "step": 19150 + }, + { + "epoch": 9.02, + "learning_rate": 1.16242332916863e-05, + "loss": 0.6001, + "step": 19160 + }, + { + "epoch": 9.02, + "learning_rate": 1.1619859022633925e-05, + "loss": 0.7313, + "step": 19170 + }, + { + "epoch": 9.02, + "learning_rate": 1.1615482745612315e-05, + "loss": 0.5976, + "step": 19180 + }, + { + "epoch": 9.02, + "learning_rate": 1.1611104462754406e-05, + "loss": 0.6038, + "step": 19190 + }, + { + "epoch": 9.02, + "learning_rate": 1.1606724176194128e-05, + "loss": 0.7814, + "step": 19200 + }, + { + "epoch": 9.02, + "learning_rate": 1.1602341888066372e-05, + "loss": 0.6553, + "step": 19210 + }, + { + "epoch": 9.02, + "learning_rate": 1.1597957600507019e-05, + "loss": 0.7844, + "step": 19220 + }, + { + "epoch": 9.02, + "learning_rate": 1.1593571315652912e-05, + "loss": 0.4785, + "step": 19230 + }, + { + "epoch": 9.02, + "learning_rate": 1.1589183035641877e-05, + "loss": 0.7918, + "step": 19240 + }, + { + "epoch": 9.03, + "learning_rate": 1.1584792762612705e-05, + "loss": 1.0776, + "step": 19250 + }, + { + "epoch": 9.03, + "learning_rate": 1.1580400498705161e-05, + "loss": 1.1064, + "step": 19260 + }, + { + "epoch": 9.03, + "learning_rate": 1.1576006246059987e-05, + "loss": 0.9584, + "step": 19270 + }, + { + "epoch": 9.03, + "learning_rate": 1.1571610006818883e-05, + "loss": 0.983, + "step": 19280 + }, + { + "epoch": 9.03, + "learning_rate": 1.1567211783124523e-05, + "loss": 0.5213, + "step": 19290 + }, + { + "epoch": 9.03, + "learning_rate": 1.156281157712055e-05, + "loss": 0.8329, + "step": 19300 + }, + { + "epoch": 9.03, + "learning_rate": 1.155840939095157e-05, + "loss": 0.9438, + "step": 19310 + }, + { + "epoch": 9.03, + "learning_rate": 1.1554005226763153e-05, + "loss": 0.5701, + "step": 19320 + }, + { + "epoch": 9.03, + "learning_rate": 1.1549599086701841e-05, + "loss": 0.698, + "step": 19330 + }, + { + "epoch": 9.03, + "learning_rate": 1.1545190972915127e-05, + "loss": 0.5665, + "step": 19340 + }, + { + "epoch": 9.03, + "learning_rate": 1.1540780887551473e-05, + "loss": 0.7165, + "step": 19350 + }, + { + "epoch": 9.03, + "learning_rate": 1.1536368832760304e-05, + "loss": 0.694, + "step": 19360 + }, + { + "epoch": 9.03, + "learning_rate": 1.1531954810692e-05, + "loss": 0.6035, + "step": 19370 + }, + { + "epoch": 9.03, + "learning_rate": 1.1527538823497903e-05, + "loss": 0.7644, + "step": 19380 + }, + { + "epoch": 9.03, + "learning_rate": 1.1523120873330308e-05, + "loss": 0.8886, + "step": 19390 + }, + { + "epoch": 9.03, + "learning_rate": 1.1518700962342475e-05, + "loss": 0.7071, + "step": 19400 + }, + { + "epoch": 9.03, + "learning_rate": 1.1514279092688612e-05, + "loss": 0.8012, + "step": 19410 + }, + { + "epoch": 9.03, + "learning_rate": 1.1509855266523884e-05, + "loss": 0.9929, + "step": 19420 + }, + { + "epoch": 9.03, + "learning_rate": 1.1505429486004414e-05, + "loss": 0.7935, + "step": 19430 + }, + { + "epoch": 9.03, + "learning_rate": 1.150100175328727e-05, + "loss": 0.6962, + "step": 19440 + }, + { + "epoch": 9.03, + "learning_rate": 1.1496572070530475e-05, + "loss": 0.8044, + "step": 19450 + }, + { + "epoch": 9.03, + "learning_rate": 1.1492140439893006e-05, + "loss": 0.7601, + "step": 19460 + }, + { + "epoch": 9.03, + "learning_rate": 1.148770686353478e-05, + "loss": 0.9783, + "step": 19470 + }, + { + "epoch": 9.03, + "learning_rate": 1.1483271343616675e-05, + "loss": 0.5717, + "step": 19480 + }, + { + "epoch": 9.03, + "learning_rate": 1.1478833882300505e-05, + "loss": 0.4403, + "step": 19490 + }, + { + "epoch": 9.03, + "learning_rate": 1.1474394481749037e-05, + "loss": 0.6769, + "step": 19500 + }, + { + "epoch": 9.03, + "learning_rate": 1.1469953144125981e-05, + "loss": 0.628, + "step": 19510 + }, + { + "epoch": 9.03, + "learning_rate": 1.1465509871595986e-05, + "loss": 0.2903, + "step": 19520 + }, + { + "epoch": 9.03, + "learning_rate": 1.1461064666324659e-05, + "loss": 1.0457, + "step": 19530 + }, + { + "epoch": 9.03, + "learning_rate": 1.1456617530478528e-05, + "loss": 0.9678, + "step": 19540 + }, + { + "epoch": 9.03, + "learning_rate": 1.1452168466225084e-05, + "loss": 0.7661, + "step": 19550 + }, + { + "epoch": 9.03, + "learning_rate": 1.1447717475732735e-05, + "loss": 0.7126, + "step": 19560 + }, + { + "epoch": 9.03, + "learning_rate": 1.144326456117085e-05, + "loss": 0.6659, + "step": 19570 + }, + { + "epoch": 9.03, + "learning_rate": 1.1438809724709719e-05, + "loss": 0.5846, + "step": 19580 + }, + { + "epoch": 9.03, + "learning_rate": 1.1434352968520574e-05, + "loss": 1.1135, + "step": 19590 + }, + { + "epoch": 9.03, + "learning_rate": 1.1429894294775594e-05, + "loss": 0.4112, + "step": 19600 + }, + { + "epoch": 9.03, + "learning_rate": 1.1425433705647872e-05, + "loss": 0.6958, + "step": 19610 + }, + { + "epoch": 9.03, + "learning_rate": 1.142097120331145e-05, + "loss": 0.5993, + "step": 19620 + }, + { + "epoch": 9.03, + "learning_rate": 1.1416506789941295e-05, + "loss": 0.9286, + "step": 19630 + }, + { + "epoch": 9.03, + "learning_rate": 1.1412040467713309e-05, + "loss": 1.0423, + "step": 19640 + }, + { + "epoch": 9.03, + "learning_rate": 1.1407572238804325e-05, + "loss": 0.6883, + "step": 19650 + }, + { + "epoch": 9.03, + "learning_rate": 1.1403102105392098e-05, + "loss": 0.5804, + "step": 19660 + }, + { + "epoch": 9.03, + "learning_rate": 1.139863006965532e-05, + "loss": 0.7858, + "step": 19670 + }, + { + "epoch": 9.03, + "learning_rate": 1.139415613377361e-05, + "loss": 0.7412, + "step": 19680 + }, + { + "epoch": 9.03, + "learning_rate": 1.1389680299927506e-05, + "loss": 0.8717, + "step": 19690 + }, + { + "epoch": 9.03, + "learning_rate": 1.1385202570298477e-05, + "loss": 0.6939, + "step": 19700 + }, + { + "epoch": 9.03, + "learning_rate": 1.1380722947068912e-05, + "loss": 0.7415, + "step": 19710 + }, + { + "epoch": 9.03, + "learning_rate": 1.1376241432422127e-05, + "loss": 0.9208, + "step": 19720 + }, + { + "epoch": 9.03, + "learning_rate": 1.1371758028542356e-05, + "loss": 0.9007, + "step": 19730 + }, + { + "epoch": 9.03, + "learning_rate": 1.1367272737614758e-05, + "loss": 0.6171, + "step": 19740 + }, + { + "epoch": 9.04, + "learning_rate": 1.1362785561825407e-05, + "loss": 0.7419, + "step": 19750 + }, + { + "epoch": 9.04, + "learning_rate": 1.13582965033613e-05, + "loss": 0.8097, + "step": 19760 + }, + { + "epoch": 9.04, + "learning_rate": 1.1353805564410347e-05, + "loss": 0.6628, + "step": 19770 + }, + { + "epoch": 9.04, + "learning_rate": 1.1349312747161377e-05, + "loss": 0.5436, + "step": 19780 + }, + { + "epoch": 9.04, + "learning_rate": 1.1344818053804139e-05, + "loss": 0.8017, + "step": 19790 + }, + { + "epoch": 9.04, + "learning_rate": 1.1340321486529287e-05, + "loss": 0.6905, + "step": 19800 + }, + { + "epoch": 9.04, + "learning_rate": 1.1335823047528395e-05, + "loss": 0.4657, + "step": 19810 + }, + { + "epoch": 9.04, + "learning_rate": 1.1331322738993949e-05, + "loss": 0.815, + "step": 19820 + }, + { + "epoch": 9.04, + "learning_rate": 1.1326820563119344e-05, + "loss": 0.6882, + "step": 19830 + }, + { + "epoch": 9.04, + "learning_rate": 1.1322316522098883e-05, + "loss": 0.748, + "step": 19840 + }, + { + "epoch": 9.04, + "learning_rate": 1.1317810618127785e-05, + "loss": 0.572, + "step": 19850 + }, + { + "epoch": 9.04, + "learning_rate": 1.1313302853402172e-05, + "loss": 0.8386, + "step": 19860 + }, + { + "epoch": 9.04, + "learning_rate": 1.130879323011907e-05, + "loss": 0.6839, + "step": 19870 + }, + { + "epoch": 9.04, + "learning_rate": 1.1304281750476418e-05, + "loss": 1.0472, + "step": 19880 + }, + { + "epoch": 9.04, + "learning_rate": 1.1299768416673056e-05, + "loss": 0.6152, + "step": 19890 + }, + { + "epoch": 9.04, + "learning_rate": 1.1295253230908728e-05, + "loss": 0.6518, + "step": 19900 + }, + { + "epoch": 9.04, + "learning_rate": 1.1290736195384084e-05, + "loss": 0.4062, + "step": 19910 + }, + { + "epoch": 9.04, + "learning_rate": 1.1286217312300663e-05, + "loss": 0.5654, + "step": 19920 + }, + { + "epoch": 9.04, + "learning_rate": 1.1281696583860923e-05, + "loss": 0.4624, + "step": 19930 + }, + { + "epoch": 9.04, + "learning_rate": 1.1277174012268207e-05, + "loss": 0.5182, + "step": 19940 + }, + { + "epoch": 9.04, + "learning_rate": 1.1272649599726764e-05, + "loss": 0.8319, + "step": 19950 + }, + { + "epoch": 9.04, + "learning_rate": 1.1268123348441735e-05, + "loss": 0.7917, + "step": 19960 + }, + { + "epoch": 9.04, + "learning_rate": 1.1263595260619163e-05, + "loss": 0.7896, + "step": 19970 + }, + { + "epoch": 9.04, + "learning_rate": 1.1259065338465981e-05, + "loss": 0.5326, + "step": 19980 + }, + { + "epoch": 9.04, + "learning_rate": 1.125453358419002e-05, + "loss": 0.7379, + "step": 19990 + }, + { + "epoch": 9.04, + "learning_rate": 1.125e-05, + "loss": 0.7768, + "step": 20000 + }, + { + "epoch": 9.04, + "eval_accuracy": 0.8589473684210527, + "eval_f1": 0.8589473684210527, + "eval_loss": 0.7489694356918335, + "eval_runtime": 770.3856, + "eval_samples_per_second": 6.166, + "eval_steps_per_second": 1.542, + "step": 20000 + }, + { + "epoch": 10.0, + "learning_rate": 1.1245464588105537e-05, + "loss": 0.4541, + "step": 20010 + }, + { + "epoch": 10.0, + "learning_rate": 1.1240927350717133e-05, + "loss": 0.6959, + "step": 20020 + }, + { + "epoch": 10.0, + "learning_rate": 1.123638829004618e-05, + "loss": 0.8044, + "step": 20030 + }, + { + "epoch": 10.0, + "learning_rate": 1.123184740830497e-05, + "loss": 0.754, + "step": 20040 + }, + { + "epoch": 10.0, + "learning_rate": 1.1227304707706665e-05, + "loss": 0.3744, + "step": 20050 + }, + { + "epoch": 10.0, + "learning_rate": 1.1222760190465327e-05, + "loss": 0.8936, + "step": 20060 + }, + { + "epoch": 10.0, + "learning_rate": 1.1218213858795896e-05, + "loss": 1.1047, + "step": 20070 + }, + { + "epoch": 10.0, + "learning_rate": 1.1213665714914198e-05, + "loss": 0.3212, + "step": 20080 + }, + { + "epoch": 10.0, + "learning_rate": 1.1209115761036948e-05, + "loss": 0.5225, + "step": 20090 + }, + { + "epoch": 10.0, + "learning_rate": 1.1204563999381733e-05, + "loss": 0.5067, + "step": 20100 + }, + { + "epoch": 10.0, + "learning_rate": 1.1200010432167028e-05, + "loss": 0.692, + "step": 20110 + }, + { + "epoch": 10.0, + "learning_rate": 1.1195455061612187e-05, + "loss": 0.4932, + "step": 20120 + }, + { + "epoch": 10.0, + "learning_rate": 1.1190897889937441e-05, + "loss": 0.528, + "step": 20130 + }, + { + "epoch": 10.0, + "learning_rate": 1.1186338919363903e-05, + "loss": 0.5525, + "step": 20140 + }, + { + "epoch": 10.0, + "learning_rate": 1.1181778152113556e-05, + "loss": 0.6876, + "step": 20150 + }, + { + "epoch": 10.0, + "learning_rate": 1.1177215590409265e-05, + "loss": 0.7504, + "step": 20160 + }, + { + "epoch": 10.0, + "learning_rate": 1.1172651236474768e-05, + "loss": 0.7971, + "step": 20170 + }, + { + "epoch": 10.0, + "learning_rate": 1.1168085092534673e-05, + "loss": 0.7278, + "step": 20180 + }, + { + "epoch": 10.0, + "learning_rate": 1.1163517160814464e-05, + "loss": 0.3767, + "step": 20190 + }, + { + "epoch": 10.0, + "learning_rate": 1.1158947443540496e-05, + "loss": 0.924, + "step": 20200 + }, + { + "epoch": 10.0, + "learning_rate": 1.1154375942939992e-05, + "loss": 0.6778, + "step": 20210 + }, + { + "epoch": 10.0, + "learning_rate": 1.1149802661241051e-05, + "loss": 0.7237, + "step": 20220 + }, + { + "epoch": 10.0, + "learning_rate": 1.1145227600672627e-05, + "loss": 0.7282, + "step": 20230 + }, + { + "epoch": 10.0, + "learning_rate": 1.1140650763464555e-05, + "loss": 0.5504, + "step": 20240 + }, + { + "epoch": 10.01, + "learning_rate": 1.1136072151847529e-05, + "loss": 0.7132, + "step": 20250 + }, + { + "epoch": 10.01, + "learning_rate": 1.1131491768053105e-05, + "loss": 0.6256, + "step": 20260 + }, + { + "epoch": 10.01, + "learning_rate": 1.1126909614313711e-05, + "loss": 0.3614, + "step": 20270 + }, + { + "epoch": 10.01, + "learning_rate": 1.1122325692862631e-05, + "loss": 0.3448, + "step": 20280 + }, + { + "epoch": 10.01, + "learning_rate": 1.1117740005934013e-05, + "loss": 0.6783, + "step": 20290 + }, + { + "epoch": 10.01, + "learning_rate": 1.1113152555762865e-05, + "loss": 0.8791, + "step": 20300 + }, + { + "epoch": 10.01, + "learning_rate": 1.1108563344585056e-05, + "loss": 0.7556, + "step": 20310 + }, + { + "epoch": 10.01, + "learning_rate": 1.1103972374637305e-05, + "loss": 0.6908, + "step": 20320 + }, + { + "epoch": 10.01, + "learning_rate": 1.1099379648157206e-05, + "loss": 0.9076, + "step": 20330 + }, + { + "epoch": 10.01, + "learning_rate": 1.1094785167383189e-05, + "loss": 0.6393, + "step": 20340 + }, + { + "epoch": 10.01, + "learning_rate": 1.1090188934554552e-05, + "loss": 0.4002, + "step": 20350 + }, + { + "epoch": 10.01, + "learning_rate": 1.1085590951911442e-05, + "loss": 0.5696, + "step": 20360 + }, + { + "epoch": 10.01, + "learning_rate": 1.108099122169486e-05, + "loss": 0.5739, + "step": 20370 + }, + { + "epoch": 10.01, + "learning_rate": 1.1076389746146659e-05, + "loss": 0.8006, + "step": 20380 + }, + { + "epoch": 10.01, + "learning_rate": 1.1071786527509544e-05, + "loss": 0.7159, + "step": 20390 + }, + { + "epoch": 10.01, + "learning_rate": 1.1067181568027065e-05, + "loss": 0.5583, + "step": 20400 + }, + { + "epoch": 10.01, + "learning_rate": 1.1062574869943623e-05, + "loss": 0.4653, + "step": 20410 + }, + { + "epoch": 10.01, + "learning_rate": 1.1057966435504468e-05, + "loss": 0.8138, + "step": 20420 + }, + { + "epoch": 10.01, + "learning_rate": 1.1053356266955699e-05, + "loss": 0.9701, + "step": 20430 + }, + { + "epoch": 10.01, + "learning_rate": 1.1048744366544248e-05, + "loss": 1.121, + "step": 20440 + }, + { + "epoch": 10.01, + "learning_rate": 1.1044130736517906e-05, + "loss": 0.6044, + "step": 20450 + }, + { + "epoch": 10.01, + "learning_rate": 1.1039515379125297e-05, + "loss": 0.7948, + "step": 20460 + }, + { + "epoch": 10.01, + "learning_rate": 1.1034898296615888e-05, + "loss": 0.7339, + "step": 20470 + }, + { + "epoch": 10.01, + "learning_rate": 1.1030279491239996e-05, + "loss": 0.6781, + "step": 20480 + }, + { + "epoch": 10.01, + "learning_rate": 1.1025658965248762e-05, + "loss": 0.5691, + "step": 20490 + }, + { + "epoch": 10.01, + "learning_rate": 1.1021036720894182e-05, + "loss": 0.7411, + "step": 20500 + }, + { + "epoch": 10.01, + "learning_rate": 1.1016412760429078e-05, + "loss": 0.8556, + "step": 20510 + }, + { + "epoch": 10.01, + "learning_rate": 1.1011787086107109e-05, + "loss": 0.8254, + "step": 20520 + }, + { + "epoch": 10.01, + "learning_rate": 1.100715970018278e-05, + "loss": 0.6314, + "step": 20530 + }, + { + "epoch": 10.01, + "learning_rate": 1.1002530604911416e-05, + "loss": 0.6799, + "step": 20540 + }, + { + "epoch": 10.01, + "learning_rate": 1.0997899802549185e-05, + "loss": 0.7688, + "step": 20550 + }, + { + "epoch": 10.01, + "learning_rate": 1.0993267295353082e-05, + "loss": 0.755, + "step": 20560 + }, + { + "epoch": 10.01, + "learning_rate": 1.0988633085580938e-05, + "loss": 0.6034, + "step": 20570 + }, + { + "epoch": 10.01, + "learning_rate": 1.0983997175491409e-05, + "loss": 0.6137, + "step": 20580 + }, + { + "epoch": 10.01, + "learning_rate": 1.0979359567343977e-05, + "loss": 0.5719, + "step": 20590 + }, + { + "epoch": 10.01, + "learning_rate": 1.0974720263398964e-05, + "loss": 0.6048, + "step": 20600 + }, + { + "epoch": 10.01, + "learning_rate": 1.0970079265917503e-05, + "loss": 0.7313, + "step": 20610 + }, + { + "epoch": 10.01, + "learning_rate": 1.0965436577161566e-05, + "loss": 0.5351, + "step": 20620 + }, + { + "epoch": 10.01, + "learning_rate": 1.0960792199393936e-05, + "loss": 0.71, + "step": 20630 + }, + { + "epoch": 10.01, + "learning_rate": 1.0956146134878232e-05, + "loss": 0.8728, + "step": 20640 + }, + { + "epoch": 10.01, + "learning_rate": 1.0951498385878888e-05, + "loss": 1.0162, + "step": 20650 + }, + { + "epoch": 10.01, + "learning_rate": 1.0946848954661161e-05, + "loss": 0.7644, + "step": 20660 + }, + { + "epoch": 10.01, + "learning_rate": 1.0942197843491125e-05, + "loss": 0.7666, + "step": 20670 + }, + { + "epoch": 10.01, + "learning_rate": 1.0937545054635673e-05, + "loss": 0.4105, + "step": 20680 + }, + { + "epoch": 10.01, + "learning_rate": 1.0932890590362526e-05, + "loss": 0.6279, + "step": 20690 + }, + { + "epoch": 10.01, + "learning_rate": 1.0928234452940207e-05, + "loss": 0.6136, + "step": 20700 + }, + { + "epoch": 10.01, + "learning_rate": 1.0923576644638063e-05, + "loss": 0.7239, + "step": 20710 + }, + { + "epoch": 10.01, + "learning_rate": 1.0918917167726252e-05, + "loss": 0.5398, + "step": 20720 + }, + { + "epoch": 10.01, + "learning_rate": 1.0914256024475743e-05, + "loss": 0.9341, + "step": 20730 + }, + { + "epoch": 10.01, + "learning_rate": 1.090959321715833e-05, + "loss": 0.899, + "step": 20740 + }, + { + "epoch": 10.02, + "learning_rate": 1.0904928748046601e-05, + "loss": 0.5511, + "step": 20750 + }, + { + "epoch": 10.02, + "learning_rate": 1.0900262619413965e-05, + "loss": 0.8734, + "step": 20760 + }, + { + "epoch": 10.02, + "learning_rate": 1.0895594833534635e-05, + "loss": 0.865, + "step": 20770 + }, + { + "epoch": 10.02, + "learning_rate": 1.089092539268363e-05, + "loss": 0.5086, + "step": 20780 + }, + { + "epoch": 10.02, + "learning_rate": 1.0886254299136787e-05, + "loss": 0.9701, + "step": 20790 + }, + { + "epoch": 10.02, + "learning_rate": 1.088158155517073e-05, + "loss": 0.9013, + "step": 20800 + }, + { + "epoch": 10.02, + "learning_rate": 1.0876907163062907e-05, + "loss": 0.8956, + "step": 20810 + }, + { + "epoch": 10.02, + "learning_rate": 1.0872231125091554e-05, + "loss": 0.6707, + "step": 20820 + }, + { + "epoch": 10.02, + "learning_rate": 1.0867553443535718e-05, + "loss": 0.7424, + "step": 20830 + }, + { + "epoch": 10.02, + "learning_rate": 1.0862874120675244e-05, + "loss": 0.7948, + "step": 20840 + }, + { + "epoch": 10.02, + "learning_rate": 1.0858193158790773e-05, + "loss": 0.895, + "step": 20850 + }, + { + "epoch": 10.02, + "learning_rate": 1.0853510560163755e-05, + "loss": 0.5475, + "step": 20860 + }, + { + "epoch": 10.02, + "learning_rate": 1.0848826327076426e-05, + "loss": 0.6243, + "step": 20870 + }, + { + "epoch": 10.02, + "learning_rate": 1.0844140461811832e-05, + "loss": 0.2264, + "step": 20880 + }, + { + "epoch": 10.02, + "learning_rate": 1.0839452966653798e-05, + "loss": 0.8413, + "step": 20890 + }, + { + "epoch": 10.02, + "learning_rate": 1.0834763843886956e-05, + "loss": 0.9677, + "step": 20900 + }, + { + "epoch": 10.02, + "learning_rate": 1.083007309579673e-05, + "loss": 0.4505, + "step": 20910 + }, + { + "epoch": 10.02, + "learning_rate": 1.0825380724669328e-05, + "loss": 0.8381, + "step": 20920 + }, + { + "epoch": 10.02, + "learning_rate": 1.0820686732791763e-05, + "loss": 0.3911, + "step": 20930 + }, + { + "epoch": 10.02, + "learning_rate": 1.081599112245182e-05, + "loss": 0.4558, + "step": 20940 + }, + { + "epoch": 10.02, + "learning_rate": 1.081129389593809e-05, + "loss": 0.7595, + "step": 20950 + }, + { + "epoch": 10.02, + "learning_rate": 1.080659505553994e-05, + "loss": 0.6722, + "step": 20960 + }, + { + "epoch": 10.02, + "learning_rate": 1.0801894603547529e-05, + "loss": 0.6681, + "step": 20970 + }, + { + "epoch": 10.02, + "learning_rate": 1.07971925422518e-05, + "loss": 0.9848, + "step": 20980 + }, + { + "epoch": 10.02, + "learning_rate": 1.0792488873944481e-05, + "loss": 0.8077, + "step": 20990 + }, + { + "epoch": 10.02, + "learning_rate": 1.078778360091808e-05, + "loss": 0.6053, + "step": 21000 + }, + { + "epoch": 10.02, + "learning_rate": 1.0783076725465896e-05, + "loss": 0.2675, + "step": 21010 + }, + { + "epoch": 10.02, + "learning_rate": 1.0778368249881996e-05, + "loss": 0.7754, + "step": 21020 + }, + { + "epoch": 10.02, + "learning_rate": 1.0773658176461242e-05, + "loss": 0.4894, + "step": 21030 + }, + { + "epoch": 10.02, + "learning_rate": 1.0768946507499255e-05, + "loss": 0.9299, + "step": 21040 + }, + { + "epoch": 10.02, + "learning_rate": 1.0764233245292457e-05, + "loss": 0.7061, + "step": 21050 + }, + { + "epoch": 10.02, + "learning_rate": 1.0759518392138026e-05, + "loss": 0.5992, + "step": 21060 + }, + { + "epoch": 10.02, + "learning_rate": 1.0754801950333931e-05, + "loss": 1.0293, + "step": 21070 + }, + { + "epoch": 10.02, + "learning_rate": 1.0750083922178904e-05, + "loss": 0.9621, + "step": 21080 + }, + { + "epoch": 10.02, + "learning_rate": 1.0745364309972454e-05, + "loss": 1.074, + "step": 21090 + }, + { + "epoch": 10.02, + "learning_rate": 1.0740643116014868e-05, + "loss": 1.0342, + "step": 21100 + }, + { + "epoch": 10.02, + "learning_rate": 1.0735920342607193e-05, + "loss": 0.5996, + "step": 21110 + }, + { + "epoch": 10.02, + "learning_rate": 1.0731195992051254e-05, + "loss": 0.4885, + "step": 21120 + }, + { + "epoch": 10.02, + "learning_rate": 1.0726470066649639e-05, + "loss": 0.8302, + "step": 21130 + }, + { + "epoch": 10.02, + "learning_rate": 1.0721742568705713e-05, + "loss": 0.8496, + "step": 21140 + }, + { + "epoch": 10.02, + "learning_rate": 1.0717013500523595e-05, + "loss": 0.6095, + "step": 21150 + }, + { + "epoch": 10.02, + "learning_rate": 1.0712282864408178e-05, + "loss": 0.7966, + "step": 21160 + }, + { + "epoch": 10.02, + "learning_rate": 1.0707550662665117e-05, + "loss": 0.7261, + "step": 21170 + }, + { + "epoch": 10.02, + "learning_rate": 1.0702816897600825e-05, + "loss": 0.5543, + "step": 21180 + }, + { + "epoch": 10.02, + "learning_rate": 1.0698081571522491e-05, + "loss": 0.4776, + "step": 21190 + }, + { + "epoch": 10.02, + "learning_rate": 1.0693344686738045e-05, + "loss": 0.7805, + "step": 21200 + }, + { + "epoch": 10.02, + "learning_rate": 1.0688606245556196e-05, + "loss": 0.4526, + "step": 21210 + }, + { + "epoch": 10.02, + "learning_rate": 1.0683866250286394e-05, + "loss": 0.6296, + "step": 21220 + }, + { + "epoch": 10.02, + "learning_rate": 1.0679124703238862e-05, + "loss": 0.7896, + "step": 21230 + }, + { + "epoch": 10.02, + "learning_rate": 1.0674381606724573e-05, + "loss": 0.75, + "step": 21240 + }, + { + "epoch": 10.03, + "learning_rate": 1.0669636963055247e-05, + "loss": 0.4908, + "step": 21250 + }, + { + "epoch": 10.03, + "learning_rate": 1.0664890774543372e-05, + "loss": 0.5771, + "step": 21260 + }, + { + "epoch": 10.03, + "learning_rate": 1.0660143043502181e-05, + "loss": 0.2833, + "step": 21270 + }, + { + "epoch": 10.03, + "learning_rate": 1.0655393772245661e-05, + "loss": 0.495, + "step": 21280 + }, + { + "epoch": 10.03, + "learning_rate": 1.0650642963088549e-05, + "loss": 0.6387, + "step": 21290 + }, + { + "epoch": 10.03, + "learning_rate": 1.0645890618346329e-05, + "loss": 0.718, + "step": 21300 + }, + { + "epoch": 10.03, + "learning_rate": 1.0641136740335238e-05, + "loss": 0.6408, + "step": 21310 + }, + { + "epoch": 10.03, + "learning_rate": 1.063638133137226e-05, + "loss": 0.7938, + "step": 21320 + }, + { + "epoch": 10.03, + "learning_rate": 1.0631624393775125e-05, + "loss": 0.4977, + "step": 21330 + }, + { + "epoch": 10.03, + "learning_rate": 1.0626865929862303e-05, + "loss": 0.7993, + "step": 21340 + }, + { + "epoch": 10.03, + "learning_rate": 1.062210594195301e-05, + "loss": 0.9612, + "step": 21350 + }, + { + "epoch": 10.03, + "learning_rate": 1.0617344432367208e-05, + "loss": 0.6478, + "step": 21360 + }, + { + "epoch": 10.03, + "learning_rate": 1.0612581403425603e-05, + "loss": 1.1047, + "step": 21370 + }, + { + "epoch": 10.03, + "learning_rate": 1.0607816857449632e-05, + "loss": 0.6164, + "step": 21380 + }, + { + "epoch": 10.03, + "learning_rate": 1.060305079676148e-05, + "loss": 0.5161, + "step": 21390 + }, + { + "epoch": 10.03, + "learning_rate": 1.0598283223684064e-05, + "loss": 0.7603, + "step": 21400 + }, + { + "epoch": 10.03, + "learning_rate": 1.0593514140541044e-05, + "loss": 0.827, + "step": 21410 + }, + { + "epoch": 10.03, + "learning_rate": 1.0588743549656812e-05, + "loss": 0.9353, + "step": 21420 + }, + { + "epoch": 10.03, + "learning_rate": 1.0583971453356499e-05, + "loss": 0.7165, + "step": 21430 + }, + { + "epoch": 10.03, + "learning_rate": 1.057919785396596e-05, + "loss": 0.5806, + "step": 21440 + }, + { + "epoch": 10.03, + "learning_rate": 1.0574422753811796e-05, + "loss": 0.8537, + "step": 21450 + }, + { + "epoch": 10.03, + "learning_rate": 1.056964615522133e-05, + "loss": 0.587, + "step": 21460 + }, + { + "epoch": 10.03, + "learning_rate": 1.0564868060522619e-05, + "loss": 0.6138, + "step": 21470 + }, + { + "epoch": 10.03, + "learning_rate": 1.0560088472044448e-05, + "loss": 0.6889, + "step": 21480 + }, + { + "epoch": 10.03, + "learning_rate": 1.0555307392116327e-05, + "loss": 1.3632, + "step": 21490 + }, + { + "epoch": 10.03, + "learning_rate": 1.0550524823068504e-05, + "loss": 0.5563, + "step": 21500 + }, + { + "epoch": 10.03, + "learning_rate": 1.0545740767231936e-05, + "loss": 0.6569, + "step": 21510 + }, + { + "epoch": 10.03, + "learning_rate": 1.054095522693832e-05, + "loss": 0.4933, + "step": 21520 + }, + { + "epoch": 10.03, + "learning_rate": 1.0536168204520068e-05, + "loss": 1.0152, + "step": 21530 + }, + { + "epoch": 10.03, + "learning_rate": 1.0531379702310317e-05, + "loss": 0.5184, + "step": 21540 + }, + { + "epoch": 10.03, + "learning_rate": 1.0526589722642927e-05, + "loss": 0.9484, + "step": 21550 + }, + { + "epoch": 10.03, + "learning_rate": 1.0521798267852471e-05, + "loss": 0.7081, + "step": 21560 + }, + { + "epoch": 10.03, + "learning_rate": 1.051700534027425e-05, + "loss": 0.6213, + "step": 21570 + }, + { + "epoch": 10.03, + "learning_rate": 1.0512210942244275e-05, + "loss": 0.6249, + "step": 21580 + }, + { + "epoch": 10.03, + "learning_rate": 1.0507415076099281e-05, + "loss": 0.657, + "step": 21590 + }, + { + "epoch": 10.03, + "learning_rate": 1.0502617744176715e-05, + "loss": 0.6751, + "step": 21600 + }, + { + "epoch": 10.03, + "learning_rate": 1.0497818948814732e-05, + "loss": 0.5777, + "step": 21610 + }, + { + "epoch": 10.03, + "learning_rate": 1.0493018692352216e-05, + "loss": 0.6597, + "step": 21620 + }, + { + "epoch": 10.03, + "learning_rate": 1.0488216977128745e-05, + "loss": 0.6365, + "step": 21630 + }, + { + "epoch": 10.03, + "learning_rate": 1.0483413805484625e-05, + "loss": 0.5279, + "step": 21640 + }, + { + "epoch": 10.03, + "learning_rate": 1.0478609179760854e-05, + "loss": 0.2789, + "step": 21650 + }, + { + "epoch": 10.03, + "learning_rate": 1.0473803102299157e-05, + "loss": 0.6284, + "step": 21660 + }, + { + "epoch": 10.03, + "learning_rate": 1.0468995575441954e-05, + "loss": 0.9454, + "step": 21670 + }, + { + "epoch": 10.03, + "learning_rate": 1.0464186601532374e-05, + "loss": 1.0541, + "step": 21680 + }, + { + "epoch": 10.03, + "learning_rate": 1.0459376182914256e-05, + "loss": 0.676, + "step": 21690 + }, + { + "epoch": 10.03, + "learning_rate": 1.0454564321932134e-05, + "loss": 1.0218, + "step": 21700 + }, + { + "epoch": 10.03, + "learning_rate": 1.0449751020931255e-05, + "loss": 0.6263, + "step": 21710 + }, + { + "epoch": 10.03, + "learning_rate": 1.0444936282257564e-05, + "loss": 0.6173, + "step": 21720 + }, + { + "epoch": 10.03, + "learning_rate": 1.0440120108257702e-05, + "loss": 1.0447, + "step": 21730 + }, + { + "epoch": 10.03, + "learning_rate": 1.043530250127902e-05, + "loss": 0.5234, + "step": 21740 + }, + { + "epoch": 10.04, + "learning_rate": 1.0430483463669552e-05, + "loss": 0.6918, + "step": 21750 + }, + { + "epoch": 10.04, + "learning_rate": 1.0425662997778048e-05, + "loss": 0.5981, + "step": 21760 + }, + { + "epoch": 10.04, + "learning_rate": 1.042084110595394e-05, + "loss": 0.6197, + "step": 21770 + }, + { + "epoch": 10.04, + "learning_rate": 1.0416017790547357e-05, + "loss": 0.4061, + "step": 21780 + }, + { + "epoch": 10.04, + "learning_rate": 1.041119305390913e-05, + "loss": 0.8991, + "step": 21790 + }, + { + "epoch": 10.04, + "learning_rate": 1.0406366898390772e-05, + "loss": 0.3839, + "step": 21800 + }, + { + "epoch": 10.04, + "learning_rate": 1.0401539326344498e-05, + "loss": 0.902, + "step": 21810 + }, + { + "epoch": 10.04, + "learning_rate": 1.03967103401232e-05, + "loss": 0.9921, + "step": 21820 + }, + { + "epoch": 10.04, + "learning_rate": 1.0391879942080475e-05, + "loss": 0.7106, + "step": 21830 + }, + { + "epoch": 10.04, + "learning_rate": 1.0387048134570596e-05, + "loss": 0.632, + "step": 21840 + }, + { + "epoch": 10.04, + "learning_rate": 1.0382214919948527e-05, + "loss": 0.6111, + "step": 21850 + }, + { + "epoch": 10.04, + "learning_rate": 1.0377380300569923e-05, + "loss": 0.3693, + "step": 21860 + }, + { + "epoch": 10.04, + "learning_rate": 1.0372544278791114e-05, + "loss": 0.6298, + "step": 21870 + }, + { + "epoch": 10.04, + "learning_rate": 1.0367706856969119e-05, + "loss": 0.474, + "step": 21880 + }, + { + "epoch": 10.04, + "learning_rate": 1.0362868037461638e-05, + "loss": 0.5683, + "step": 21890 + }, + { + "epoch": 10.04, + "learning_rate": 1.0358027822627057e-05, + "loss": 0.8299, + "step": 21900 + }, + { + "epoch": 10.04, + "learning_rate": 1.0353186214824433e-05, + "loss": 0.7203, + "step": 21910 + }, + { + "epoch": 10.04, + "learning_rate": 1.034834321641351e-05, + "loss": 0.3176, + "step": 21920 + }, + { + "epoch": 10.04, + "learning_rate": 1.0343498829754703e-05, + "loss": 0.5711, + "step": 21930 + }, + { + "epoch": 10.04, + "learning_rate": 1.0338653057209111e-05, + "loss": 0.5252, + "step": 21940 + }, + { + "epoch": 10.04, + "learning_rate": 1.0333805901138504e-05, + "loss": 0.5265, + "step": 21950 + }, + { + "epoch": 10.04, + "learning_rate": 1.0328957363905325e-05, + "loss": 0.7713, + "step": 21960 + }, + { + "epoch": 10.04, + "learning_rate": 1.0324107447872695e-05, + "loss": 0.5364, + "step": 21970 + }, + { + "epoch": 10.04, + "learning_rate": 1.03192561554044e-05, + "loss": 0.6878, + "step": 21980 + }, + { + "epoch": 10.04, + "learning_rate": 1.0314403488864907e-05, + "loss": 0.7172, + "step": 21990 + }, + { + "epoch": 10.04, + "learning_rate": 1.0309549450619342e-05, + "loss": 0.6793, + "step": 22000 + }, + { + "epoch": 10.04, + "eval_accuracy": 0.8557894736842105, + "eval_f1": 0.8557894736842105, + "eval_loss": 0.7730118036270142, + "eval_runtime": 761.7959, + "eval_samples_per_second": 6.235, + "eval_steps_per_second": 1.559, + "step": 22000 + }, + { + "epoch": 11.0, + "learning_rate": 1.0304694043033502e-05, + "loss": 0.9001, + "step": 22010 + }, + { + "epoch": 11.0, + "learning_rate": 1.0299837268473863e-05, + "loss": 0.7062, + "step": 22020 + }, + { + "epoch": 11.0, + "learning_rate": 1.0294979129307548e-05, + "loss": 0.4986, + "step": 22030 + }, + { + "epoch": 11.0, + "learning_rate": 1.0290119627902361e-05, + "loss": 0.563, + "step": 22040 + }, + { + "epoch": 11.0, + "learning_rate": 1.0285258766626762e-05, + "loss": 0.7271, + "step": 22050 + }, + { + "epoch": 11.0, + "learning_rate": 1.0280396547849873e-05, + "loss": 0.782, + "step": 22060 + }, + { + "epoch": 11.0, + "learning_rate": 1.0275532973941487e-05, + "loss": 0.686, + "step": 22070 + }, + { + "epoch": 11.0, + "learning_rate": 1.0270668047272045e-05, + "loss": 0.6969, + "step": 22080 + }, + { + "epoch": 11.0, + "learning_rate": 1.0265801770212656e-05, + "loss": 0.6656, + "step": 22090 + }, + { + "epoch": 11.0, + "learning_rate": 1.0260934145135086e-05, + "loss": 0.5161, + "step": 22100 + }, + { + "epoch": 11.0, + "learning_rate": 1.025606517441175e-05, + "loss": 0.9492, + "step": 22110 + }, + { + "epoch": 11.0, + "learning_rate": 1.0251194860415732e-05, + "loss": 0.7005, + "step": 22120 + }, + { + "epoch": 11.0, + "learning_rate": 1.0246323205520757e-05, + "loss": 1.0263, + "step": 22130 + }, + { + "epoch": 11.0, + "learning_rate": 1.0241450212101217e-05, + "loss": 0.4128, + "step": 22140 + }, + { + "epoch": 11.0, + "learning_rate": 1.0236575882532147e-05, + "loss": 0.601, + "step": 22150 + }, + { + "epoch": 11.0, + "learning_rate": 1.0231700219189237e-05, + "loss": 0.391, + "step": 22160 + }, + { + "epoch": 11.0, + "learning_rate": 1.0226823224448825e-05, + "loss": 0.623, + "step": 22170 + }, + { + "epoch": 11.0, + "learning_rate": 1.0221944900687897e-05, + "loss": 0.5038, + "step": 22180 + }, + { + "epoch": 11.0, + "learning_rate": 1.0217065250284094e-05, + "loss": 0.5027, + "step": 22190 + }, + { + "epoch": 11.0, + "learning_rate": 1.0212184275615691e-05, + "loss": 0.7453, + "step": 22200 + }, + { + "epoch": 11.0, + "learning_rate": 1.0207301979061625e-05, + "loss": 0.6545, + "step": 22210 + }, + { + "epoch": 11.0, + "learning_rate": 1.0202418363001462e-05, + "loss": 0.8928, + "step": 22220 + }, + { + "epoch": 11.0, + "learning_rate": 1.0197533429815416e-05, + "loss": 0.759, + "step": 22230 + }, + { + "epoch": 11.0, + "learning_rate": 1.0192647181884347e-05, + "loss": 0.6488, + "step": 22240 + }, + { + "epoch": 11.01, + "learning_rate": 1.018775962158975e-05, + "loss": 0.4284, + "step": 22250 + }, + { + "epoch": 11.01, + "learning_rate": 1.018287075131377e-05, + "loss": 0.7273, + "step": 22260 + }, + { + "epoch": 11.01, + "learning_rate": 1.0177980573439176e-05, + "loss": 0.8202, + "step": 22270 + }, + { + "epoch": 11.01, + "learning_rate": 1.017308909034938e-05, + "loss": 0.6795, + "step": 22280 + }, + { + "epoch": 11.01, + "learning_rate": 1.0168196304428437e-05, + "loss": 0.7264, + "step": 22290 + }, + { + "epoch": 11.01, + "learning_rate": 1.0163302218061028e-05, + "loss": 0.5813, + "step": 22300 + }, + { + "epoch": 11.01, + "learning_rate": 1.0158406833632473e-05, + "loss": 0.6163, + "step": 22310 + }, + { + "epoch": 11.01, + "learning_rate": 1.015351015352872e-05, + "loss": 0.4763, + "step": 22320 + }, + { + "epoch": 11.01, + "learning_rate": 1.0148612180136355e-05, + "loss": 0.5641, + "step": 22330 + }, + { + "epoch": 11.01, + "learning_rate": 1.0143712915842589e-05, + "loss": 0.704, + "step": 22340 + }, + { + "epoch": 11.01, + "learning_rate": 1.0138812363035263e-05, + "loss": 0.4373, + "step": 22350 + }, + { + "epoch": 11.01, + "learning_rate": 1.013391052410285e-05, + "loss": 0.8222, + "step": 22360 + }, + { + "epoch": 11.01, + "learning_rate": 1.0129007401434443e-05, + "loss": 0.7842, + "step": 22370 + }, + { + "epoch": 11.01, + "learning_rate": 1.0124102997419767e-05, + "loss": 0.3431, + "step": 22380 + }, + { + "epoch": 11.01, + "learning_rate": 1.0119197314449169e-05, + "loss": 0.7633, + "step": 22390 + }, + { + "epoch": 11.01, + "learning_rate": 1.0114290354913615e-05, + "loss": 0.5489, + "step": 22400 + }, + { + "epoch": 11.01, + "learning_rate": 1.0109382121204702e-05, + "loss": 0.5858, + "step": 22410 + }, + { + "epoch": 11.01, + "learning_rate": 1.0104472615714642e-05, + "loss": 0.5548, + "step": 22420 + }, + { + "epoch": 11.01, + "learning_rate": 1.0099561840836272e-05, + "loss": 0.6733, + "step": 22430 + }, + { + "epoch": 11.01, + "learning_rate": 1.0094649798963037e-05, + "loss": 0.8389, + "step": 22440 + }, + { + "epoch": 11.01, + "learning_rate": 1.008973649248901e-05, + "loss": 0.4196, + "step": 22450 + }, + { + "epoch": 11.01, + "learning_rate": 1.0084821923808877e-05, + "loss": 0.7719, + "step": 22460 + }, + { + "epoch": 11.01, + "learning_rate": 1.007990609531794e-05, + "loss": 0.4378, + "step": 22470 + }, + { + "epoch": 11.01, + "learning_rate": 1.0074989009412116e-05, + "loss": 0.3773, + "step": 22480 + }, + { + "epoch": 11.01, + "learning_rate": 1.0070070668487926e-05, + "loss": 0.7681, + "step": 22490 + }, + { + "epoch": 11.01, + "learning_rate": 1.0065151074942516e-05, + "loss": 0.4419, + "step": 22500 + }, + { + "epoch": 11.01, + "learning_rate": 1.0060230231173632e-05, + "loss": 0.7388, + "step": 22510 + }, + { + "epoch": 11.01, + "learning_rate": 1.0055308139579639e-05, + "loss": 0.678, + "step": 22520 + }, + { + "epoch": 11.01, + "learning_rate": 1.0050384802559497e-05, + "loss": 0.4424, + "step": 22530 + }, + { + "epoch": 11.01, + "learning_rate": 1.0045460222512785e-05, + "loss": 0.5644, + "step": 22540 + }, + { + "epoch": 11.01, + "learning_rate": 1.0040534401839687e-05, + "loss": 0.9504, + "step": 22550 + }, + { + "epoch": 11.01, + "learning_rate": 1.003560734294098e-05, + "loss": 0.5278, + "step": 22560 + }, + { + "epoch": 11.01, + "learning_rate": 1.003067904821806e-05, + "loss": 0.546, + "step": 22570 + }, + { + "epoch": 11.01, + "learning_rate": 1.0025749520072912e-05, + "loss": 1.0441, + "step": 22580 + }, + { + "epoch": 11.01, + "learning_rate": 1.0020818760908133e-05, + "loss": 0.8724, + "step": 22590 + }, + { + "epoch": 11.01, + "learning_rate": 1.0015886773126914e-05, + "loss": 0.9456, + "step": 22600 + }, + { + "epoch": 11.01, + "learning_rate": 1.0010953559133043e-05, + "loss": 0.662, + "step": 22610 + }, + { + "epoch": 11.01, + "learning_rate": 1.0006019121330913e-05, + "loss": 0.7724, + "step": 22620 + }, + { + "epoch": 11.01, + "learning_rate": 1.0001083462125504e-05, + "loss": 1.0125, + "step": 22630 + }, + { + "epoch": 11.01, + "learning_rate": 9.996146583922401e-06, + "loss": 0.5953, + "step": 22640 + }, + { + "epoch": 11.01, + "learning_rate": 9.991208489127775e-06, + "loss": 0.871, + "step": 22650 + }, + { + "epoch": 11.01, + "learning_rate": 9.986269180148397e-06, + "loss": 0.6984, + "step": 22660 + }, + { + "epoch": 11.01, + "learning_rate": 9.981328659391623e-06, + "loss": 0.6937, + "step": 22670 + }, + { + "epoch": 11.01, + "learning_rate": 9.976386929265403e-06, + "loss": 0.6753, + "step": 22680 + }, + { + "epoch": 11.01, + "learning_rate": 9.971443992178277e-06, + "loss": 0.6009, + "step": 22690 + }, + { + "epoch": 11.01, + "learning_rate": 9.966499850539375e-06, + "loss": 0.9312, + "step": 22700 + }, + { + "epoch": 11.01, + "learning_rate": 9.961554506758408e-06, + "loss": 0.5313, + "step": 22710 + }, + { + "epoch": 11.01, + "learning_rate": 9.956607963245676e-06, + "loss": 0.5486, + "step": 22720 + }, + { + "epoch": 11.01, + "learning_rate": 9.951660222412069e-06, + "loss": 0.6269, + "step": 22730 + }, + { + "epoch": 11.01, + "learning_rate": 9.94671128666905e-06, + "loss": 0.7164, + "step": 22740 + }, + { + "epoch": 11.02, + "learning_rate": 9.941761158428675e-06, + "loss": 0.4487, + "step": 22750 + }, + { + "epoch": 11.02, + "learning_rate": 9.936809840103575e-06, + "loss": 0.851, + "step": 22760 + }, + { + "epoch": 11.02, + "learning_rate": 9.931857334106958e-06, + "loss": 0.6416, + "step": 22770 + }, + { + "epoch": 11.02, + "learning_rate": 9.926903642852621e-06, + "loss": 0.9216, + "step": 22780 + }, + { + "epoch": 11.02, + "learning_rate": 9.921948768754931e-06, + "loss": 0.6046, + "step": 22790 + }, + { + "epoch": 11.02, + "learning_rate": 9.916992714228835e-06, + "loss": 0.6689, + "step": 22800 + }, + { + "epoch": 11.02, + "learning_rate": 9.91203548168985e-06, + "loss": 1.0292, + "step": 22810 + }, + { + "epoch": 11.02, + "learning_rate": 9.90707707355407e-06, + "loss": 0.7029, + "step": 22820 + }, + { + "epoch": 11.02, + "learning_rate": 9.90211749223817e-06, + "loss": 1.2995, + "step": 22830 + }, + { + "epoch": 11.02, + "learning_rate": 9.897156740159384e-06, + "loss": 0.5994, + "step": 22840 + }, + { + "epoch": 11.02, + "learning_rate": 9.892194819735525e-06, + "loss": 0.8809, + "step": 22850 + }, + { + "epoch": 11.02, + "learning_rate": 9.887231733384972e-06, + "loss": 0.8827, + "step": 22860 + }, + { + "epoch": 11.02, + "learning_rate": 9.882267483526669e-06, + "loss": 0.7032, + "step": 22870 + }, + { + "epoch": 11.02, + "learning_rate": 9.877302072580139e-06, + "loss": 0.4283, + "step": 22880 + }, + { + "epoch": 11.02, + "learning_rate": 9.872335502965455e-06, + "loss": 0.5838, + "step": 22890 + }, + { + "epoch": 11.02, + "learning_rate": 9.867367777103269e-06, + "loss": 0.5258, + "step": 22900 + }, + { + "epoch": 11.02, + "learning_rate": 9.862398897414786e-06, + "loss": 0.5202, + "step": 22910 + }, + { + "epoch": 11.02, + "learning_rate": 9.857428866321781e-06, + "loss": 0.5962, + "step": 22920 + }, + { + "epoch": 11.02, + "learning_rate": 9.852457686246583e-06, + "loss": 0.4088, + "step": 22930 + }, + { + "epoch": 11.02, + "learning_rate": 9.847485359612086e-06, + "loss": 0.9281, + "step": 22940 + }, + { + "epoch": 11.02, + "learning_rate": 9.842511888841744e-06, + "loss": 0.5964, + "step": 22950 + }, + { + "epoch": 11.02, + "learning_rate": 9.837537276359564e-06, + "loss": 0.6418, + "step": 22960 + }, + { + "epoch": 11.02, + "learning_rate": 9.832561524590115e-06, + "loss": 0.4186, + "step": 22970 + }, + { + "epoch": 11.02, + "learning_rate": 9.82758463595851e-06, + "loss": 0.6558, + "step": 22980 + }, + { + "epoch": 11.02, + "learning_rate": 9.822606612890431e-06, + "loss": 0.765, + "step": 22990 + }, + { + "epoch": 11.02, + "learning_rate": 9.817627457812105e-06, + "loss": 0.9792, + "step": 23000 + }, + { + "epoch": 11.02, + "learning_rate": 9.812647173150313e-06, + "loss": 0.9115, + "step": 23010 + }, + { + "epoch": 11.02, + "learning_rate": 9.807665761332382e-06, + "loss": 0.3789, + "step": 23020 + }, + { + "epoch": 11.02, + "learning_rate": 9.80268322478619e-06, + "loss": 0.4179, + "step": 23030 + }, + { + "epoch": 11.02, + "learning_rate": 9.797699565940168e-06, + "loss": 0.861, + "step": 23040 + }, + { + "epoch": 11.02, + "learning_rate": 9.792714787223294e-06, + "loss": 0.5827, + "step": 23050 + }, + { + "epoch": 11.02, + "learning_rate": 9.787728891065084e-06, + "loss": 0.6217, + "step": 23060 + }, + { + "epoch": 11.02, + "learning_rate": 9.782741879895602e-06, + "loss": 0.7407, + "step": 23070 + }, + { + "epoch": 11.02, + "learning_rate": 9.777753756145458e-06, + "loss": 0.5362, + "step": 23080 + }, + { + "epoch": 11.02, + "learning_rate": 9.772764522245806e-06, + "loss": 0.6387, + "step": 23090 + }, + { + "epoch": 11.02, + "learning_rate": 9.767774180628335e-06, + "loss": 0.4657, + "step": 23100 + }, + { + "epoch": 11.02, + "learning_rate": 9.762782733725277e-06, + "loss": 0.6733, + "step": 23110 + }, + { + "epoch": 11.02, + "learning_rate": 9.757790183969404e-06, + "loss": 0.6548, + "step": 23120 + }, + { + "epoch": 11.02, + "learning_rate": 9.752796533794022e-06, + "loss": 0.8012, + "step": 23130 + }, + { + "epoch": 11.02, + "learning_rate": 9.747801785632979e-06, + "loss": 0.7032, + "step": 23140 + }, + { + "epoch": 11.02, + "learning_rate": 9.74280594192065e-06, + "loss": 0.7914, + "step": 23150 + }, + { + "epoch": 11.02, + "learning_rate": 9.737809005091952e-06, + "loss": 0.7735, + "step": 23160 + }, + { + "epoch": 11.02, + "learning_rate": 9.732810977582329e-06, + "loss": 0.5257, + "step": 23170 + }, + { + "epoch": 11.02, + "learning_rate": 9.727811861827762e-06, + "loss": 0.7142, + "step": 23180 + }, + { + "epoch": 11.02, + "learning_rate": 9.722811660264757e-06, + "loss": 0.5854, + "step": 23190 + }, + { + "epoch": 11.02, + "learning_rate": 9.717810375330351e-06, + "loss": 1.0973, + "step": 23200 + }, + { + "epoch": 11.02, + "learning_rate": 9.71280800946211e-06, + "loss": 0.4392, + "step": 23210 + }, + { + "epoch": 11.02, + "learning_rate": 9.707804565098127e-06, + "loss": 0.5886, + "step": 23220 + }, + { + "epoch": 11.02, + "learning_rate": 9.702800044677022e-06, + "loss": 0.9332, + "step": 23230 + }, + { + "epoch": 11.02, + "learning_rate": 9.69779445063793e-06, + "loss": 0.639, + "step": 23240 + }, + { + "epoch": 11.03, + "learning_rate": 9.692787785420525e-06, + "loss": 0.4684, + "step": 23250 + }, + { + "epoch": 11.03, + "learning_rate": 9.687780051464993e-06, + "loss": 0.7447, + "step": 23260 + }, + { + "epoch": 11.03, + "learning_rate": 9.682771251212038e-06, + "loss": 0.5868, + "step": 23270 + }, + { + "epoch": 11.03, + "learning_rate": 9.677761387102896e-06, + "loss": 0.7026, + "step": 23280 + }, + { + "epoch": 11.03, + "learning_rate": 9.672750461579305e-06, + "loss": 0.4704, + "step": 23290 + }, + { + "epoch": 11.03, + "learning_rate": 9.667738477083536e-06, + "loss": 0.7248, + "step": 23300 + }, + { + "epoch": 11.03, + "learning_rate": 9.66272543605837e-06, + "loss": 0.7306, + "step": 23310 + }, + { + "epoch": 11.03, + "learning_rate": 9.657711340947096e-06, + "loss": 0.578, + "step": 23320 + }, + { + "epoch": 11.03, + "learning_rate": 9.652696194193527e-06, + "loss": 0.5586, + "step": 23330 + }, + { + "epoch": 11.03, + "learning_rate": 9.647679998241984e-06, + "loss": 0.7175, + "step": 23340 + }, + { + "epoch": 11.03, + "learning_rate": 9.642662755537301e-06, + "loss": 0.4754, + "step": 23350 + }, + { + "epoch": 11.03, + "learning_rate": 9.63764446852482e-06, + "loss": 0.4033, + "step": 23360 + }, + { + "epoch": 11.03, + "learning_rate": 9.632625139650395e-06, + "loss": 1.0351, + "step": 23370 + }, + { + "epoch": 11.03, + "learning_rate": 9.627604771360382e-06, + "loss": 0.5524, + "step": 23380 + }, + { + "epoch": 11.03, + "learning_rate": 9.622583366101652e-06, + "loss": 0.4591, + "step": 23390 + }, + { + "epoch": 11.03, + "learning_rate": 9.617560926321575e-06, + "loss": 0.9319, + "step": 23400 + }, + { + "epoch": 11.03, + "learning_rate": 9.612537454468024e-06, + "loss": 1.0524, + "step": 23410 + }, + { + "epoch": 11.03, + "learning_rate": 9.607512952989385e-06, + "loss": 0.6202, + "step": 23420 + }, + { + "epoch": 11.03, + "learning_rate": 9.602487424334532e-06, + "loss": 0.8657, + "step": 23430 + }, + { + "epoch": 11.03, + "learning_rate": 9.597460870952849e-06, + "loss": 0.8354, + "step": 23440 + }, + { + "epoch": 11.03, + "learning_rate": 9.59243329529422e-06, + "loss": 0.886, + "step": 23450 + }, + { + "epoch": 11.03, + "learning_rate": 9.58740469980902e-06, + "loss": 0.6174, + "step": 23460 + }, + { + "epoch": 11.03, + "learning_rate": 9.58237508694813e-06, + "loss": 0.7236, + "step": 23470 + }, + { + "epoch": 11.03, + "learning_rate": 9.577344459162918e-06, + "loss": 0.4743, + "step": 23480 + }, + { + "epoch": 11.03, + "learning_rate": 9.572312818905252e-06, + "loss": 0.6526, + "step": 23490 + }, + { + "epoch": 11.03, + "learning_rate": 9.567280168627493e-06, + "loss": 1.0537, + "step": 23500 + }, + { + "epoch": 11.03, + "learning_rate": 9.562246510782496e-06, + "loss": 0.1958, + "step": 23510 + }, + { + "epoch": 11.03, + "learning_rate": 9.5572118478236e-06, + "loss": 0.5649, + "step": 23520 + }, + { + "epoch": 11.03, + "learning_rate": 9.55217618220464e-06, + "loss": 0.8306, + "step": 23530 + }, + { + "epoch": 11.03, + "learning_rate": 9.54713951637994e-06, + "loss": 1.0081, + "step": 23540 + }, + { + "epoch": 11.03, + "learning_rate": 9.542101852804307e-06, + "loss": 0.7292, + "step": 23550 + }, + { + "epoch": 11.03, + "learning_rate": 9.537063193933041e-06, + "loss": 0.7931, + "step": 23560 + }, + { + "epoch": 11.03, + "learning_rate": 9.53202354222192e-06, + "loss": 0.4955, + "step": 23570 + }, + { + "epoch": 11.03, + "learning_rate": 9.52698290012721e-06, + "loss": 0.949, + "step": 23580 + }, + { + "epoch": 11.03, + "learning_rate": 9.521941270105657e-06, + "loss": 1.0758, + "step": 23590 + }, + { + "epoch": 11.03, + "learning_rate": 9.516898654614492e-06, + "loss": 1.0021, + "step": 23600 + }, + { + "epoch": 11.03, + "learning_rate": 9.511855056111426e-06, + "loss": 0.7936, + "step": 23610 + }, + { + "epoch": 11.03, + "learning_rate": 9.506810477054645e-06, + "loss": 0.7424, + "step": 23620 + }, + { + "epoch": 11.03, + "learning_rate": 9.501764919902818e-06, + "loss": 0.5233, + "step": 23630 + }, + { + "epoch": 11.03, + "learning_rate": 9.496718387115085e-06, + "loss": 0.5166, + "step": 23640 + }, + { + "epoch": 11.03, + "learning_rate": 9.491670881151067e-06, + "loss": 0.6646, + "step": 23650 + }, + { + "epoch": 11.03, + "learning_rate": 9.486622404470855e-06, + "loss": 0.6662, + "step": 23660 + }, + { + "epoch": 11.03, + "learning_rate": 9.481572959535019e-06, + "loss": 0.618, + "step": 23670 + }, + { + "epoch": 11.03, + "learning_rate": 9.476522548804596e-06, + "loss": 0.6063, + "step": 23680 + }, + { + "epoch": 11.03, + "learning_rate": 9.47147117474109e-06, + "loss": 0.3129, + "step": 23690 + }, + { + "epoch": 11.03, + "learning_rate": 9.466418839806486e-06, + "loss": 0.9168, + "step": 23700 + }, + { + "epoch": 11.03, + "learning_rate": 9.461365546463226e-06, + "loss": 0.7685, + "step": 23710 + }, + { + "epoch": 11.03, + "learning_rate": 9.456311297174228e-06, + "loss": 0.3527, + "step": 23720 + }, + { + "epoch": 11.03, + "learning_rate": 9.451256094402867e-06, + "loss": 0.5965, + "step": 23730 + }, + { + "epoch": 11.03, + "learning_rate": 9.44619994061299e-06, + "loss": 0.9973, + "step": 23740 + }, + { + "epoch": 11.04, + "learning_rate": 9.441142838268906e-06, + "loss": 0.8503, + "step": 23750 + }, + { + "epoch": 11.04, + "learning_rate": 9.436084789835383e-06, + "loss": 0.8314, + "step": 23760 + }, + { + "epoch": 11.04, + "learning_rate": 9.431025797777654e-06, + "loss": 0.4643, + "step": 23770 + }, + { + "epoch": 11.04, + "learning_rate": 9.425965864561408e-06, + "loss": 0.9074, + "step": 23780 + }, + { + "epoch": 11.04, + "learning_rate": 9.420904992652797e-06, + "loss": 0.4391, + "step": 23790 + }, + { + "epoch": 11.04, + "learning_rate": 9.41584318451843e-06, + "loss": 0.9418, + "step": 23800 + }, + { + "epoch": 11.04, + "learning_rate": 9.410780442625368e-06, + "loss": 0.575, + "step": 23810 + }, + { + "epoch": 11.04, + "learning_rate": 9.405716769441129e-06, + "loss": 0.8775, + "step": 23820 + }, + { + "epoch": 11.04, + "learning_rate": 9.400652167433687e-06, + "loss": 0.6625, + "step": 23830 + }, + { + "epoch": 11.04, + "learning_rate": 9.39558663907147e-06, + "loss": 1.0413, + "step": 23840 + }, + { + "epoch": 11.04, + "learning_rate": 9.390520186823354e-06, + "loss": 0.3134, + "step": 23850 + }, + { + "epoch": 11.04, + "learning_rate": 9.385452813158662e-06, + "loss": 0.722, + "step": 23860 + }, + { + "epoch": 11.04, + "learning_rate": 9.380384520547176e-06, + "loss": 0.8009, + "step": 23870 + }, + { + "epoch": 11.04, + "learning_rate": 9.375315311459116e-06, + "loss": 0.6277, + "step": 23880 + }, + { + "epoch": 11.04, + "learning_rate": 9.370245188365156e-06, + "loss": 0.5125, + "step": 23890 + }, + { + "epoch": 11.04, + "learning_rate": 9.365174153736414e-06, + "loss": 0.493, + "step": 23900 + }, + { + "epoch": 11.04, + "learning_rate": 9.360102210044441e-06, + "loss": 0.7453, + "step": 23910 + }, + { + "epoch": 11.04, + "learning_rate": 9.355029359761253e-06, + "loss": 0.439, + "step": 23920 + }, + { + "epoch": 11.04, + "learning_rate": 9.34995560535929e-06, + "loss": 0.691, + "step": 23930 + }, + { + "epoch": 11.04, + "learning_rate": 9.344880949311437e-06, + "loss": 0.6713, + "step": 23940 + }, + { + "epoch": 11.04, + "learning_rate": 9.33980539409102e-06, + "loss": 0.4107, + "step": 23950 + }, + { + "epoch": 11.04, + "learning_rate": 9.334728942171805e-06, + "loss": 0.7445, + "step": 23960 + }, + { + "epoch": 11.04, + "learning_rate": 9.329651596027992e-06, + "loss": 0.5816, + "step": 23970 + }, + { + "epoch": 11.04, + "learning_rate": 9.324573358134219e-06, + "loss": 0.8575, + "step": 23980 + }, + { + "epoch": 11.04, + "learning_rate": 9.319494230965556e-06, + "loss": 0.6872, + "step": 23990 + }, + { + "epoch": 11.04, + "learning_rate": 9.314414216997507e-06, + "loss": 0.5765, + "step": 24000 + }, + { + "epoch": 11.04, + "eval_accuracy": 0.8368421052631579, + "eval_f1": 0.8368421052631579, + "eval_loss": 0.7751592993736267, + "eval_runtime": 757.0231, + "eval_samples_per_second": 6.275, + "eval_steps_per_second": 1.569, + "step": 24000 + }, + { + "epoch": 12.0, + "learning_rate": 9.309333318706012e-06, + "loss": 0.5647, + "step": 24010 + }, + { + "epoch": 12.0, + "learning_rate": 9.304251538567439e-06, + "loss": 0.4817, + "step": 24020 + }, + { + "epoch": 12.0, + "learning_rate": 9.299168879058583e-06, + "loss": 1.1363, + "step": 24030 + }, + { + "epoch": 12.0, + "learning_rate": 9.29408534265667e-06, + "loss": 0.4983, + "step": 24040 + }, + { + "epoch": 12.0, + "learning_rate": 9.289000931839357e-06, + "loss": 0.5477, + "step": 24050 + }, + { + "epoch": 12.0, + "learning_rate": 9.283915649084722e-06, + "loss": 0.5383, + "step": 24060 + }, + { + "epoch": 12.0, + "learning_rate": 9.278829496871273e-06, + "loss": 0.3909, + "step": 24070 + }, + { + "epoch": 12.0, + "learning_rate": 9.273742477677936e-06, + "loss": 0.5127, + "step": 24080 + }, + { + "epoch": 12.0, + "learning_rate": 9.268654593984062e-06, + "loss": 0.6024, + "step": 24090 + }, + { + "epoch": 12.0, + "learning_rate": 9.263565848269425e-06, + "loss": 0.6374, + "step": 24100 + }, + { + "epoch": 12.0, + "learning_rate": 9.258476243014217e-06, + "loss": 0.3111, + "step": 24110 + }, + { + "epoch": 12.0, + "learning_rate": 9.253385780699054e-06, + "loss": 0.8523, + "step": 24120 + }, + { + "epoch": 12.0, + "learning_rate": 9.248294463804958e-06, + "loss": 0.6465, + "step": 24130 + }, + { + "epoch": 12.0, + "learning_rate": 9.24320229481338e-06, + "loss": 0.3356, + "step": 24140 + }, + { + "epoch": 12.0, + "learning_rate": 9.238109276206179e-06, + "loss": 0.4021, + "step": 24150 + }, + { + "epoch": 12.0, + "learning_rate": 9.233015410465636e-06, + "loss": 0.5717, + "step": 24160 + }, + { + "epoch": 12.0, + "learning_rate": 9.22792070007443e-06, + "loss": 0.5375, + "step": 24170 + }, + { + "epoch": 12.0, + "learning_rate": 9.222825147515668e-06, + "loss": 0.5466, + "step": 24180 + }, + { + "epoch": 12.0, + "learning_rate": 9.217728755272858e-06, + "loss": 0.5708, + "step": 24190 + }, + { + "epoch": 12.0, + "learning_rate": 9.212631525829919e-06, + "loss": 0.5068, + "step": 24200 + }, + { + "epoch": 12.0, + "learning_rate": 9.207533461671181e-06, + "loss": 0.3744, + "step": 24210 + }, + { + "epoch": 12.0, + "learning_rate": 9.202434565281376e-06, + "loss": 0.7047, + "step": 24220 + }, + { + "epoch": 12.0, + "learning_rate": 9.197334839145645e-06, + "loss": 0.7185, + "step": 24230 + }, + { + "epoch": 12.0, + "learning_rate": 9.19223428574953e-06, + "loss": 0.6393, + "step": 24240 + }, + { + "epoch": 12.01, + "learning_rate": 9.187132907578989e-06, + "loss": 0.9273, + "step": 24250 + }, + { + "epoch": 12.01, + "learning_rate": 9.18203070712036e-06, + "loss": 0.771, + "step": 24260 + }, + { + "epoch": 12.01, + "learning_rate": 9.176927686860397e-06, + "loss": 0.5174, + "step": 24270 + }, + { + "epoch": 12.01, + "learning_rate": 9.171823849286254e-06, + "loss": 0.1818, + "step": 24280 + }, + { + "epoch": 12.01, + "learning_rate": 9.166719196885473e-06, + "loss": 0.8949, + "step": 24290 + }, + { + "epoch": 12.01, + "learning_rate": 9.161613732146007e-06, + "loss": 0.5596, + "step": 24300 + }, + { + "epoch": 12.01, + "learning_rate": 9.156507457556189e-06, + "loss": 0.8454, + "step": 24310 + }, + { + "epoch": 12.01, + "learning_rate": 9.151400375604762e-06, + "loss": 0.8878, + "step": 24320 + }, + { + "epoch": 12.01, + "learning_rate": 9.146292488780854e-06, + "loss": 0.5003, + "step": 24330 + }, + { + "epoch": 12.01, + "learning_rate": 9.141183799573984e-06, + "loss": 0.7932, + "step": 24340 + }, + { + "epoch": 12.01, + "learning_rate": 9.136074310474071e-06, + "loss": 0.6643, + "step": 24350 + }, + { + "epoch": 12.01, + "learning_rate": 9.130964023971411e-06, + "loss": 0.4153, + "step": 24360 + }, + { + "epoch": 12.01, + "learning_rate": 9.1258529425567e-06, + "loss": 1.0002, + "step": 24370 + }, + { + "epoch": 12.01, + "learning_rate": 9.120741068721017e-06, + "loss": 0.6616, + "step": 24380 + }, + { + "epoch": 12.01, + "learning_rate": 9.115628404955823e-06, + "loss": 0.7308, + "step": 24390 + }, + { + "epoch": 12.01, + "learning_rate": 9.110514953752975e-06, + "loss": 0.6399, + "step": 24400 + }, + { + "epoch": 12.01, + "learning_rate": 9.1054007176047e-06, + "loss": 0.6749, + "step": 24410 + }, + { + "epoch": 12.01, + "learning_rate": 9.10028569900362e-06, + "loss": 0.9301, + "step": 24420 + }, + { + "epoch": 12.01, + "learning_rate": 9.09516990044273e-06, + "loss": 0.7458, + "step": 24430 + }, + { + "epoch": 12.01, + "learning_rate": 9.090053324415412e-06, + "loss": 0.6056, + "step": 24440 + }, + { + "epoch": 12.01, + "learning_rate": 9.084935973415417e-06, + "loss": 0.7213, + "step": 24450 + }, + { + "epoch": 12.01, + "learning_rate": 9.07981784993688e-06, + "loss": 0.4729, + "step": 24460 + }, + { + "epoch": 12.01, + "learning_rate": 9.074698956474321e-06, + "loss": 0.7893, + "step": 24470 + }, + { + "epoch": 12.01, + "learning_rate": 9.069579295522614e-06, + "loss": 0.6198, + "step": 24480 + }, + { + "epoch": 12.01, + "learning_rate": 9.064458869577028e-06, + "loss": 0.5706, + "step": 24490 + }, + { + "epoch": 12.01, + "learning_rate": 9.059337681133194e-06, + "loss": 0.9065, + "step": 24500 + }, + { + "epoch": 12.01, + "learning_rate": 9.054215732687118e-06, + "loss": 0.6079, + "step": 24510 + }, + { + "epoch": 12.01, + "learning_rate": 9.049093026735176e-06, + "loss": 0.4128, + "step": 24520 + }, + { + "epoch": 12.01, + "learning_rate": 9.04396956577411e-06, + "loss": 0.4852, + "step": 24530 + }, + { + "epoch": 12.01, + "learning_rate": 9.038845352301034e-06, + "loss": 0.6561, + "step": 24540 + }, + { + "epoch": 12.01, + "learning_rate": 9.033720388813426e-06, + "loss": 0.3259, + "step": 24550 + }, + { + "epoch": 12.01, + "learning_rate": 9.028594677809138e-06, + "loss": 0.7694, + "step": 24560 + }, + { + "epoch": 12.01, + "learning_rate": 9.023468221786367e-06, + "loss": 0.6811, + "step": 24570 + }, + { + "epoch": 12.01, + "learning_rate": 9.018341023243696e-06, + "loss": 0.6014, + "step": 24580 + }, + { + "epoch": 12.01, + "learning_rate": 9.013213084680053e-06, + "loss": 1.0853, + "step": 24590 + }, + { + "epoch": 12.01, + "learning_rate": 9.008084408594737e-06, + "loss": 0.664, + "step": 24600 + }, + { + "epoch": 12.01, + "learning_rate": 9.002954997487397e-06, + "loss": 0.643, + "step": 24610 + }, + { + "epoch": 12.01, + "learning_rate": 8.997824853858052e-06, + "loss": 0.7941, + "step": 24620 + }, + { + "epoch": 12.01, + "learning_rate": 8.992693980207069e-06, + "loss": 0.7122, + "step": 24630 + }, + { + "epoch": 12.01, + "learning_rate": 8.987562379035175e-06, + "loss": 0.6897, + "step": 24640 + }, + { + "epoch": 12.01, + "learning_rate": 8.982430052843447e-06, + "loss": 0.6138, + "step": 24650 + }, + { + "epoch": 12.01, + "learning_rate": 8.97729700413332e-06, + "loss": 0.6242, + "step": 24660 + }, + { + "epoch": 12.01, + "learning_rate": 8.97216323540658e-06, + "loss": 0.6159, + "step": 24670 + }, + { + "epoch": 12.01, + "learning_rate": 8.967028749165362e-06, + "loss": 0.5243, + "step": 24680 + }, + { + "epoch": 12.01, + "learning_rate": 8.961893547912155e-06, + "loss": 0.5596, + "step": 24690 + }, + { + "epoch": 12.01, + "learning_rate": 8.95675763414979e-06, + "loss": 0.8293, + "step": 24700 + }, + { + "epoch": 12.01, + "learning_rate": 8.951621010381454e-06, + "loss": 0.5012, + "step": 24710 + }, + { + "epoch": 12.01, + "learning_rate": 8.946483679110668e-06, + "loss": 0.6794, + "step": 24720 + }, + { + "epoch": 12.01, + "learning_rate": 8.941345642841312e-06, + "loss": 0.4822, + "step": 24730 + }, + { + "epoch": 12.01, + "learning_rate": 8.936206904077598e-06, + "loss": 0.3251, + "step": 24740 + }, + { + "epoch": 12.02, + "learning_rate": 8.931067465324087e-06, + "loss": 0.3446, + "step": 24750 + }, + { + "epoch": 12.02, + "learning_rate": 8.925927329085677e-06, + "loss": 0.921, + "step": 24760 + }, + { + "epoch": 12.02, + "learning_rate": 8.92078649786761e-06, + "loss": 0.5805, + "step": 24770 + }, + { + "epoch": 12.02, + "learning_rate": 8.915644974175466e-06, + "loss": 0.6757, + "step": 24780 + }, + { + "epoch": 12.02, + "learning_rate": 8.910502760515155e-06, + "loss": 0.8727, + "step": 24790 + }, + { + "epoch": 12.02, + "learning_rate": 8.905359859392936e-06, + "loss": 0.5516, + "step": 24800 + }, + { + "epoch": 12.02, + "learning_rate": 8.900216273315393e-06, + "loss": 0.8247, + "step": 24810 + }, + { + "epoch": 12.02, + "learning_rate": 8.895072004789447e-06, + "loss": 1.006, + "step": 24820 + }, + { + "epoch": 12.02, + "learning_rate": 8.889927056322356e-06, + "loss": 0.6281, + "step": 24830 + }, + { + "epoch": 12.02, + "learning_rate": 8.884781430421703e-06, + "loss": 0.7306, + "step": 24840 + }, + { + "epoch": 12.02, + "learning_rate": 8.879635129595402e-06, + "loss": 0.5848, + "step": 24850 + }, + { + "epoch": 12.02, + "learning_rate": 8.874488156351698e-06, + "loss": 0.5106, + "step": 24860 + }, + { + "epoch": 12.02, + "learning_rate": 8.869340513199166e-06, + "loss": 0.2741, + "step": 24870 + }, + { + "epoch": 12.02, + "learning_rate": 8.864192202646702e-06, + "loss": 0.5024, + "step": 24880 + }, + { + "epoch": 12.02, + "learning_rate": 8.85904322720353e-06, + "loss": 0.6582, + "step": 24890 + }, + { + "epoch": 12.02, + "learning_rate": 8.853893589379202e-06, + "loss": 0.6676, + "step": 24900 + }, + { + "epoch": 12.02, + "learning_rate": 8.848743291683583e-06, + "loss": 0.727, + "step": 24910 + }, + { + "epoch": 12.02, + "learning_rate": 8.843592336626868e-06, + "loss": 0.827, + "step": 24920 + }, + { + "epoch": 12.02, + "learning_rate": 8.83844072671957e-06, + "loss": 0.4957, + "step": 24930 + }, + { + "epoch": 12.02, + "learning_rate": 8.833288464472524e-06, + "loss": 0.7769, + "step": 24940 + }, + { + "epoch": 12.02, + "learning_rate": 8.828135552396875e-06, + "loss": 0.4891, + "step": 24950 + }, + { + "epoch": 12.02, + "learning_rate": 8.822981993004093e-06, + "loss": 1.0911, + "step": 24960 + }, + { + "epoch": 12.02, + "learning_rate": 8.81782778880596e-06, + "loss": 0.4612, + "step": 24970 + }, + { + "epoch": 12.02, + "learning_rate": 8.812672942314572e-06, + "loss": 0.9634, + "step": 24980 + }, + { + "epoch": 12.02, + "learning_rate": 8.807517456042335e-06, + "loss": 0.7368, + "step": 24990 + }, + { + "epoch": 12.02, + "learning_rate": 8.80236133250198e-06, + "loss": 0.5336, + "step": 25000 + }, + { + "epoch": 12.02, + "learning_rate": 8.797204574206529e-06, + "loss": 0.7426, + "step": 25010 + }, + { + "epoch": 12.02, + "learning_rate": 8.792047183669327e-06, + "loss": 0.7129, + "step": 25020 + }, + { + "epoch": 12.02, + "learning_rate": 8.786889163404021e-06, + "loss": 0.7036, + "step": 25030 + }, + { + "epoch": 12.02, + "learning_rate": 8.781730515924576e-06, + "loss": 0.5815, + "step": 25040 + }, + { + "epoch": 12.02, + "learning_rate": 8.776571243745244e-06, + "loss": 1.1234, + "step": 25050 + }, + { + "epoch": 12.02, + "learning_rate": 8.771411349380598e-06, + "loss": 0.9104, + "step": 25060 + }, + { + "epoch": 12.02, + "learning_rate": 8.766250835345503e-06, + "loss": 0.6302, + "step": 25070 + }, + { + "epoch": 12.02, + "learning_rate": 8.761089704155136e-06, + "loss": 0.7441, + "step": 25080 + }, + { + "epoch": 12.02, + "learning_rate": 8.755927958324966e-06, + "loss": 0.9698, + "step": 25090 + }, + { + "epoch": 12.02, + "learning_rate": 8.750765600370768e-06, + "loss": 0.596, + "step": 25100 + }, + { + "epoch": 12.02, + "learning_rate": 8.74560263280861e-06, + "loss": 0.7446, + "step": 25110 + }, + { + "epoch": 12.02, + "learning_rate": 8.740439058154858e-06, + "loss": 0.4741, + "step": 25120 + }, + { + "epoch": 12.02, + "learning_rate": 8.73527487892618e-06, + "loss": 0.884, + "step": 25130 + }, + { + "epoch": 12.02, + "learning_rate": 8.730110097639533e-06, + "loss": 0.5014, + "step": 25140 + }, + { + "epoch": 12.02, + "learning_rate": 8.724944716812167e-06, + "loss": 0.871, + "step": 25150 + }, + { + "epoch": 12.02, + "learning_rate": 8.719778738961629e-06, + "loss": 0.7679, + "step": 25160 + }, + { + "epoch": 12.02, + "learning_rate": 8.714612166605747e-06, + "loss": 0.6464, + "step": 25170 + }, + { + "epoch": 12.02, + "learning_rate": 8.709445002262655e-06, + "loss": 0.4916, + "step": 25180 + }, + { + "epoch": 12.02, + "learning_rate": 8.704277248450758e-06, + "loss": 0.9301, + "step": 25190 + }, + { + "epoch": 12.02, + "learning_rate": 8.699108907688763e-06, + "loss": 0.6673, + "step": 25200 + }, + { + "epoch": 12.02, + "learning_rate": 8.69393998249565e-06, + "loss": 0.7301, + "step": 25210 + }, + { + "epoch": 12.02, + "learning_rate": 8.688770475390698e-06, + "loss": 0.73, + "step": 25220 + }, + { + "epoch": 12.02, + "learning_rate": 8.683600388893454e-06, + "loss": 0.5313, + "step": 25230 + }, + { + "epoch": 12.02, + "learning_rate": 8.67842972552376e-06, + "loss": 0.4326, + "step": 25240 + }, + { + "epoch": 12.03, + "learning_rate": 8.673258487801733e-06, + "loss": 0.6077, + "step": 25250 + }, + { + "epoch": 12.03, + "learning_rate": 8.668086678247771e-06, + "loss": 0.8506, + "step": 25260 + }, + { + "epoch": 12.03, + "learning_rate": 8.662914299382555e-06, + "loss": 0.8871, + "step": 25270 + }, + { + "epoch": 12.03, + "learning_rate": 8.657741353727034e-06, + "loss": 0.6094, + "step": 25280 + }, + { + "epoch": 12.03, + "learning_rate": 8.652567843802442e-06, + "loss": 0.3646, + "step": 25290 + }, + { + "epoch": 12.03, + "learning_rate": 8.647393772130287e-06, + "loss": 0.7661, + "step": 25300 + }, + { + "epoch": 12.03, + "learning_rate": 8.642219141232343e-06, + "loss": 0.7078, + "step": 25310 + }, + { + "epoch": 12.03, + "learning_rate": 8.637043953630668e-06, + "loss": 0.6364, + "step": 25320 + }, + { + "epoch": 12.03, + "learning_rate": 8.63186821184758e-06, + "loss": 0.7387, + "step": 25330 + }, + { + "epoch": 12.03, + "learning_rate": 8.626691918405678e-06, + "loss": 0.7593, + "step": 25340 + }, + { + "epoch": 12.03, + "learning_rate": 8.621515075827822e-06, + "loss": 0.8417, + "step": 25350 + }, + { + "epoch": 12.03, + "learning_rate": 8.616337686637142e-06, + "loss": 0.7509, + "step": 25360 + }, + { + "epoch": 12.03, + "learning_rate": 8.611159753357035e-06, + "loss": 0.5077, + "step": 25370 + }, + { + "epoch": 12.03, + "learning_rate": 8.605981278511162e-06, + "loss": 0.5972, + "step": 25380 + }, + { + "epoch": 12.03, + "learning_rate": 8.60080226462345e-06, + "loss": 0.8684, + "step": 25390 + }, + { + "epoch": 12.03, + "learning_rate": 8.595622714218088e-06, + "loss": 0.8425, + "step": 25400 + }, + { + "epoch": 12.03, + "learning_rate": 8.590442629819523e-06, + "loss": 0.6553, + "step": 25410 + }, + { + "epoch": 12.03, + "learning_rate": 8.585262013952469e-06, + "loss": 1.0543, + "step": 25420 + }, + { + "epoch": 12.03, + "learning_rate": 8.580080869141891e-06, + "loss": 0.5518, + "step": 25430 + }, + { + "epoch": 12.03, + "learning_rate": 8.574899197913021e-06, + "loss": 0.6666, + "step": 25440 + }, + { + "epoch": 12.03, + "learning_rate": 8.569717002791338e-06, + "loss": 0.8162, + "step": 25450 + }, + { + "epoch": 12.03, + "learning_rate": 8.564534286302583e-06, + "loss": 0.5814, + "step": 25460 + }, + { + "epoch": 12.03, + "learning_rate": 8.559351050972751e-06, + "loss": 0.7725, + "step": 25470 + }, + { + "epoch": 12.03, + "learning_rate": 8.554167299328082e-06, + "loss": 0.6521, + "step": 25480 + }, + { + "epoch": 12.03, + "learning_rate": 8.548983033895081e-06, + "loss": 0.6795, + "step": 25490 + }, + { + "epoch": 12.03, + "learning_rate": 8.543798257200491e-06, + "loss": 0.4602, + "step": 25500 + }, + { + "epoch": 12.03, + "learning_rate": 8.538612971771311e-06, + "loss": 0.557, + "step": 25510 + }, + { + "epoch": 12.03, + "learning_rate": 8.533427180134784e-06, + "loss": 0.5068, + "step": 25520 + }, + { + "epoch": 12.03, + "learning_rate": 8.52824088481841e-06, + "loss": 0.7087, + "step": 25530 + }, + { + "epoch": 12.03, + "learning_rate": 8.523054088349913e-06, + "loss": 0.3761, + "step": 25540 + }, + { + "epoch": 12.03, + "learning_rate": 8.517866793257284e-06, + "loss": 0.4812, + "step": 25550 + }, + { + "epoch": 12.03, + "learning_rate": 8.512679002068744e-06, + "loss": 0.7368, + "step": 25560 + }, + { + "epoch": 12.03, + "learning_rate": 8.50749071731276e-06, + "loss": 0.581, + "step": 25570 + }, + { + "epoch": 12.03, + "learning_rate": 8.502301941518045e-06, + "loss": 0.2424, + "step": 25580 + }, + { + "epoch": 12.03, + "learning_rate": 8.497112677213532e-06, + "loss": 0.4681, + "step": 25590 + }, + { + "epoch": 12.03, + "learning_rate": 8.49192292692842e-06, + "loss": 0.8638, + "step": 25600 + }, + { + "epoch": 12.03, + "learning_rate": 8.48673269319212e-06, + "loss": 0.4321, + "step": 25610 + }, + { + "epoch": 12.03, + "learning_rate": 8.481541978534293e-06, + "loss": 0.5152, + "step": 25620 + }, + { + "epoch": 12.03, + "learning_rate": 8.476350785484828e-06, + "loss": 0.5563, + "step": 25630 + }, + { + "epoch": 12.03, + "learning_rate": 8.471159116573852e-06, + "loss": 0.9802, + "step": 25640 + }, + { + "epoch": 12.03, + "learning_rate": 8.465966974331722e-06, + "loss": 1.1415, + "step": 25650 + }, + { + "epoch": 12.03, + "learning_rate": 8.460774361289021e-06, + "loss": 1.0542, + "step": 25660 + }, + { + "epoch": 12.03, + "learning_rate": 8.45558127997657e-06, + "loss": 0.7572, + "step": 25670 + }, + { + "epoch": 12.03, + "learning_rate": 8.450387732925411e-06, + "loss": 0.6368, + "step": 25680 + }, + { + "epoch": 12.03, + "learning_rate": 8.445193722666814e-06, + "loss": 0.8224, + "step": 25690 + }, + { + "epoch": 12.03, + "learning_rate": 8.439999251732282e-06, + "loss": 0.4071, + "step": 25700 + }, + { + "epoch": 12.03, + "learning_rate": 8.434804322653534e-06, + "loss": 0.8619, + "step": 25710 + }, + { + "epoch": 12.03, + "learning_rate": 8.429608937962513e-06, + "loss": 0.3016, + "step": 25720 + }, + { + "epoch": 12.03, + "learning_rate": 8.424413100191391e-06, + "loss": 1.1291, + "step": 25730 + }, + { + "epoch": 12.03, + "learning_rate": 8.419216811872551e-06, + "loss": 0.5899, + "step": 25740 + }, + { + "epoch": 12.04, + "learning_rate": 8.414020075538606e-06, + "loss": 0.3795, + "step": 25750 + }, + { + "epoch": 12.04, + "learning_rate": 8.408822893722379e-06, + "loss": 0.5415, + "step": 25760 + }, + { + "epoch": 12.04, + "learning_rate": 8.403625268956915e-06, + "loss": 0.3862, + "step": 25770 + }, + { + "epoch": 12.04, + "learning_rate": 8.398427203775473e-06, + "loss": 0.7194, + "step": 25780 + }, + { + "epoch": 12.04, + "learning_rate": 8.393228700711524e-06, + "loss": 0.7457, + "step": 25790 + }, + { + "epoch": 12.04, + "learning_rate": 8.388029762298759e-06, + "loss": 0.6828, + "step": 25800 + }, + { + "epoch": 12.04, + "learning_rate": 8.382830391071072e-06, + "loss": 0.4018, + "step": 25810 + }, + { + "epoch": 12.04, + "learning_rate": 8.377630589562581e-06, + "loss": 0.6345, + "step": 25820 + }, + { + "epoch": 12.04, + "learning_rate": 8.3724303603076e-06, + "loss": 0.6341, + "step": 25830 + }, + { + "epoch": 12.04, + "learning_rate": 8.367229705840664e-06, + "loss": 0.4764, + "step": 25840 + }, + { + "epoch": 12.04, + "learning_rate": 8.3620286286965e-06, + "loss": 0.8783, + "step": 25850 + }, + { + "epoch": 12.04, + "learning_rate": 8.356827131410057e-06, + "loss": 0.5031, + "step": 25860 + }, + { + "epoch": 12.04, + "learning_rate": 8.351625216516476e-06, + "loss": 0.8044, + "step": 25870 + }, + { + "epoch": 12.04, + "learning_rate": 8.346422886551112e-06, + "loss": 0.3773, + "step": 25880 + }, + { + "epoch": 12.04, + "learning_rate": 8.341220144049517e-06, + "loss": 0.4815, + "step": 25890 + }, + { + "epoch": 12.04, + "learning_rate": 8.33601699154744e-06, + "loss": 0.6278, + "step": 25900 + }, + { + "epoch": 12.04, + "learning_rate": 8.33081343158084e-06, + "loss": 0.5951, + "step": 25910 + }, + { + "epoch": 12.04, + "learning_rate": 8.325609466685867e-06, + "loss": 0.7307, + "step": 25920 + }, + { + "epoch": 12.04, + "learning_rate": 8.320405099398867e-06, + "loss": 0.4442, + "step": 25930 + }, + { + "epoch": 12.04, + "learning_rate": 8.31520033225639e-06, + "loss": 0.5929, + "step": 25940 + }, + { + "epoch": 12.04, + "learning_rate": 8.309995167795172e-06, + "loss": 0.7052, + "step": 25950 + }, + { + "epoch": 12.04, + "learning_rate": 8.30478960855215e-06, + "loss": 0.5476, + "step": 25960 + }, + { + "epoch": 12.04, + "learning_rate": 8.29958365706445e-06, + "loss": 0.4296, + "step": 25970 + }, + { + "epoch": 12.04, + "learning_rate": 8.29437731586939e-06, + "loss": 0.5321, + "step": 25980 + }, + { + "epoch": 12.04, + "learning_rate": 8.289170587504476e-06, + "loss": 0.7862, + "step": 25990 + }, + { + "epoch": 12.04, + "learning_rate": 8.283963474507402e-06, + "loss": 0.4789, + "step": 26000 + }, + { + "epoch": 12.04, + "eval_accuracy": 0.848421052631579, + "eval_f1": 0.848421052631579, + "eval_loss": 0.7902358770370483, + "eval_runtime": 759.5604, + "eval_samples_per_second": 6.254, + "eval_steps_per_second": 1.564, + "step": 26000 + }, + { + "epoch": 13.0, + "learning_rate": 8.278755979416055e-06, + "loss": 0.8803, + "step": 26010 + }, + { + "epoch": 13.0, + "learning_rate": 8.273548104768505e-06, + "loss": 0.2926, + "step": 26020 + }, + { + "epoch": 13.0, + "learning_rate": 8.268339853103007e-06, + "loss": 0.7337, + "step": 26030 + }, + { + "epoch": 13.0, + "learning_rate": 8.263131226957998e-06, + "loss": 0.714, + "step": 26040 + }, + { + "epoch": 13.0, + "learning_rate": 8.257922228872097e-06, + "loss": 0.8997, + "step": 26050 + }, + { + "epoch": 13.0, + "learning_rate": 8.25271286138411e-06, + "loss": 0.8157, + "step": 26060 + }, + { + "epoch": 13.0, + "learning_rate": 8.247503127033023e-06, + "loss": 0.6588, + "step": 26070 + }, + { + "epoch": 13.0, + "learning_rate": 8.242293028357989e-06, + "loss": 0.7557, + "step": 26080 + }, + { + "epoch": 13.0, + "learning_rate": 8.237082567898349e-06, + "loss": 0.5747, + "step": 26090 + }, + { + "epoch": 13.0, + "learning_rate": 8.231871748193622e-06, + "loss": 0.4681, + "step": 26100 + }, + { + "epoch": 13.0, + "learning_rate": 8.226660571783495e-06, + "loss": 0.662, + "step": 26110 + }, + { + "epoch": 13.0, + "learning_rate": 8.221449041207832e-06, + "loss": 0.5678, + "step": 26120 + }, + { + "epoch": 13.0, + "learning_rate": 8.216237159006672e-06, + "loss": 0.4976, + "step": 26130 + }, + { + "epoch": 13.0, + "learning_rate": 8.21102492772022e-06, + "loss": 0.754, + "step": 26140 + }, + { + "epoch": 13.0, + "learning_rate": 8.20581234988886e-06, + "loss": 0.7292, + "step": 26150 + }, + { + "epoch": 13.0, + "learning_rate": 8.200599428053131e-06, + "loss": 0.7095, + "step": 26160 + }, + { + "epoch": 13.0, + "learning_rate": 8.19538616475375e-06, + "loss": 0.5002, + "step": 26170 + }, + { + "epoch": 13.0, + "learning_rate": 8.190172562531606e-06, + "loss": 0.4976, + "step": 26180 + }, + { + "epoch": 13.0, + "learning_rate": 8.184958623927732e-06, + "loss": 0.833, + "step": 26190 + }, + { + "epoch": 13.0, + "learning_rate": 8.179744351483353e-06, + "loss": 0.3571, + "step": 26200 + }, + { + "epoch": 13.0, + "learning_rate": 8.174529747739829e-06, + "loss": 0.5573, + "step": 26210 + }, + { + "epoch": 13.0, + "learning_rate": 8.169314815238705e-06, + "loss": 0.4676, + "step": 26220 + }, + { + "epoch": 13.0, + "learning_rate": 8.164099556521667e-06, + "loss": 0.3272, + "step": 26230 + }, + { + "epoch": 13.0, + "learning_rate": 8.158883974130576e-06, + "loss": 0.4443, + "step": 26240 + }, + { + "epoch": 13.01, + "learning_rate": 8.153668070607439e-06, + "loss": 0.5747, + "step": 26250 + }, + { + "epoch": 13.01, + "learning_rate": 8.148451848494422e-06, + "loss": 0.7184, + "step": 26260 + }, + { + "epoch": 13.01, + "learning_rate": 8.143235310333854e-06, + "loss": 0.5533, + "step": 26270 + }, + { + "epoch": 13.01, + "learning_rate": 8.13801845866821e-06, + "loss": 0.4728, + "step": 26280 + }, + { + "epoch": 13.01, + "learning_rate": 8.132801296040121e-06, + "loss": 0.3192, + "step": 26290 + }, + { + "epoch": 13.01, + "learning_rate": 8.127583824992369e-06, + "loss": 0.7083, + "step": 26300 + }, + { + "epoch": 13.01, + "learning_rate": 8.12236604806788e-06, + "loss": 0.8348, + "step": 26310 + }, + { + "epoch": 13.01, + "learning_rate": 8.117147967809741e-06, + "loss": 0.9413, + "step": 26320 + }, + { + "epoch": 13.01, + "learning_rate": 8.111929586761183e-06, + "loss": 0.7851, + "step": 26330 + }, + { + "epoch": 13.01, + "learning_rate": 8.106710907465576e-06, + "loss": 0.7636, + "step": 26340 + }, + { + "epoch": 13.01, + "learning_rate": 8.101491932466441e-06, + "loss": 0.5119, + "step": 26350 + }, + { + "epoch": 13.01, + "learning_rate": 8.096272664307448e-06, + "loss": 0.7746, + "step": 26360 + }, + { + "epoch": 13.01, + "learning_rate": 8.091053105532402e-06, + "loss": 0.3223, + "step": 26370 + }, + { + "epoch": 13.01, + "learning_rate": 8.085833258685251e-06, + "loss": 0.8672, + "step": 26380 + }, + { + "epoch": 13.01, + "learning_rate": 8.08061312631009e-06, + "loss": 0.5817, + "step": 26390 + }, + { + "epoch": 13.01, + "learning_rate": 8.07539271095114e-06, + "loss": 0.6081, + "step": 26400 + }, + { + "epoch": 13.01, + "learning_rate": 8.070172015152773e-06, + "loss": 0.5154, + "step": 26410 + }, + { + "epoch": 13.01, + "learning_rate": 8.064951041459496e-06, + "loss": 0.664, + "step": 26420 + }, + { + "epoch": 13.01, + "learning_rate": 8.059729792415942e-06, + "loss": 0.8406, + "step": 26430 + }, + { + "epoch": 13.01, + "learning_rate": 8.054508270566888e-06, + "loss": 0.5142, + "step": 26440 + }, + { + "epoch": 13.01, + "learning_rate": 8.049286478457237e-06, + "loss": 0.7025, + "step": 26450 + }, + { + "epoch": 13.01, + "learning_rate": 8.044064418632032e-06, + "loss": 0.4909, + "step": 26460 + }, + { + "epoch": 13.01, + "learning_rate": 8.038842093636438e-06, + "loss": 0.8021, + "step": 26470 + }, + { + "epoch": 13.01, + "learning_rate": 8.033619506015754e-06, + "loss": 0.8264, + "step": 26480 + }, + { + "epoch": 13.01, + "learning_rate": 8.028396658315402e-06, + "loss": 0.7216, + "step": 26490 + }, + { + "epoch": 13.01, + "learning_rate": 8.02317355308094e-06, + "loss": 0.8995, + "step": 26500 + }, + { + "epoch": 13.01, + "learning_rate": 8.017950192858045e-06, + "loss": 0.6565, + "step": 26510 + }, + { + "epoch": 13.01, + "learning_rate": 8.012726580192515e-06, + "loss": 0.8193, + "step": 26520 + }, + { + "epoch": 13.01, + "learning_rate": 8.007502717630282e-06, + "loss": 0.5443, + "step": 26530 + }, + { + "epoch": 13.01, + "learning_rate": 8.002278607717387e-06, + "loss": 0.6395, + "step": 26540 + }, + { + "epoch": 13.01, + "learning_rate": 7.997054253000003e-06, + "loss": 0.56, + "step": 26550 + }, + { + "epoch": 13.01, + "learning_rate": 7.991829656024412e-06, + "loss": 0.5525, + "step": 26560 + }, + { + "epoch": 13.01, + "learning_rate": 7.986604819337021e-06, + "loss": 0.4242, + "step": 26570 + }, + { + "epoch": 13.01, + "learning_rate": 7.981379745484353e-06, + "loss": 0.533, + "step": 26580 + }, + { + "epoch": 13.01, + "learning_rate": 7.976154437013045e-06, + "loss": 0.597, + "step": 26590 + }, + { + "epoch": 13.01, + "learning_rate": 7.970928896469851e-06, + "loss": 0.7054, + "step": 26600 + }, + { + "epoch": 13.01, + "learning_rate": 7.965703126401631e-06, + "loss": 0.4532, + "step": 26610 + }, + { + "epoch": 13.01, + "learning_rate": 7.960477129355367e-06, + "loss": 0.7798, + "step": 26620 + }, + { + "epoch": 13.01, + "learning_rate": 7.955250907878143e-06, + "loss": 0.9396, + "step": 26630 + }, + { + "epoch": 13.01, + "learning_rate": 7.950024464517157e-06, + "loss": 0.6024, + "step": 26640 + }, + { + "epoch": 13.01, + "learning_rate": 7.944797801819713e-06, + "loss": 0.4973, + "step": 26650 + }, + { + "epoch": 13.01, + "learning_rate": 7.939570922333223e-06, + "loss": 0.7844, + "step": 26660 + }, + { + "epoch": 13.01, + "learning_rate": 7.934343828605207e-06, + "loss": 0.852, + "step": 26670 + }, + { + "epoch": 13.01, + "learning_rate": 7.929116523183287e-06, + "loss": 0.9642, + "step": 26680 + }, + { + "epoch": 13.01, + "learning_rate": 7.923889008615186e-06, + "loss": 0.7534, + "step": 26690 + }, + { + "epoch": 13.01, + "learning_rate": 7.91866128744873e-06, + "loss": 0.6524, + "step": 26700 + }, + { + "epoch": 13.01, + "learning_rate": 7.913433362231847e-06, + "loss": 0.6843, + "step": 26710 + }, + { + "epoch": 13.01, + "learning_rate": 7.908205235512568e-06, + "loss": 0.8911, + "step": 26720 + }, + { + "epoch": 13.01, + "learning_rate": 7.902976909839015e-06, + "loss": 0.711, + "step": 26730 + }, + { + "epoch": 13.01, + "learning_rate": 7.897748387759413e-06, + "loss": 0.6965, + "step": 26740 + }, + { + "epoch": 13.02, + "learning_rate": 7.89251967182208e-06, + "loss": 0.906, + "step": 26750 + }, + { + "epoch": 13.02, + "learning_rate": 7.887290764575424e-06, + "loss": 0.7533, + "step": 26760 + }, + { + "epoch": 13.02, + "learning_rate": 7.882061668567957e-06, + "loss": 0.4359, + "step": 26770 + }, + { + "epoch": 13.02, + "learning_rate": 7.876832386348273e-06, + "loss": 0.3871, + "step": 26780 + }, + { + "epoch": 13.02, + "learning_rate": 7.87160292046506e-06, + "loss": 0.4603, + "step": 26790 + }, + { + "epoch": 13.02, + "learning_rate": 7.8663732734671e-06, + "loss": 0.5004, + "step": 26800 + }, + { + "epoch": 13.02, + "learning_rate": 7.861143447903256e-06, + "loss": 0.7104, + "step": 26810 + }, + { + "epoch": 13.02, + "learning_rate": 7.855913446322486e-06, + "loss": 0.4944, + "step": 26820 + }, + { + "epoch": 13.02, + "learning_rate": 7.850683271273822e-06, + "loss": 0.61, + "step": 26830 + }, + { + "epoch": 13.02, + "learning_rate": 7.845452925306393e-06, + "loss": 0.607, + "step": 26840 + }, + { + "epoch": 13.02, + "learning_rate": 7.840222410969402e-06, + "loss": 0.7068, + "step": 26850 + }, + { + "epoch": 13.02, + "learning_rate": 7.834991730812147e-06, + "loss": 0.3703, + "step": 26860 + }, + { + "epoch": 13.02, + "learning_rate": 7.829760887383987e-06, + "loss": 0.7156, + "step": 26870 + }, + { + "epoch": 13.02, + "learning_rate": 7.824529883234379e-06, + "loss": 0.6012, + "step": 26880 + }, + { + "epoch": 13.02, + "learning_rate": 7.819298720912848e-06, + "loss": 0.8801, + "step": 26890 + }, + { + "epoch": 13.02, + "learning_rate": 7.814067402968998e-06, + "loss": 0.6596, + "step": 26900 + }, + { + "epoch": 13.02, + "learning_rate": 7.808835931952513e-06, + "loss": 0.6712, + "step": 26910 + }, + { + "epoch": 13.02, + "learning_rate": 7.803604310413144e-06, + "loss": 0.3717, + "step": 26920 + }, + { + "epoch": 13.02, + "learning_rate": 7.798372540900723e-06, + "loss": 0.7806, + "step": 26930 + }, + { + "epoch": 13.02, + "learning_rate": 7.793140625965152e-06, + "loss": 0.4922, + "step": 26940 + }, + { + "epoch": 13.02, + "learning_rate": 7.7879085681564e-06, + "loss": 0.2948, + "step": 26950 + }, + { + "epoch": 13.02, + "learning_rate": 7.78267637002451e-06, + "loss": 0.8103, + "step": 26960 + }, + { + "epoch": 13.02, + "learning_rate": 7.77744403411959e-06, + "loss": 0.2707, + "step": 26970 + }, + { + "epoch": 13.02, + "learning_rate": 7.772211562991819e-06, + "loss": 0.6444, + "step": 26980 + }, + { + "epoch": 13.02, + "learning_rate": 7.766978959191438e-06, + "loss": 1.2529, + "step": 26990 + }, + { + "epoch": 13.02, + "learning_rate": 7.76174622526876e-06, + "loss": 0.6869, + "step": 27000 + }, + { + "epoch": 13.02, + "learning_rate": 7.756513363774147e-06, + "loss": 0.5928, + "step": 27010 + }, + { + "epoch": 13.02, + "learning_rate": 7.751280377258036e-06, + "loss": 0.2015, + "step": 27020 + }, + { + "epoch": 13.02, + "learning_rate": 7.746047268270923e-06, + "loss": 0.5429, + "step": 27030 + }, + { + "epoch": 13.02, + "learning_rate": 7.740814039363363e-06, + "loss": 0.7598, + "step": 27040 + }, + { + "epoch": 13.02, + "learning_rate": 7.735580693085962e-06, + "loss": 0.6728, + "step": 27050 + }, + { + "epoch": 13.02, + "learning_rate": 7.730347231989397e-06, + "loss": 0.455, + "step": 27060 + }, + { + "epoch": 13.02, + "learning_rate": 7.725113658624384e-06, + "loss": 0.7046, + "step": 27070 + }, + { + "epoch": 13.02, + "learning_rate": 7.719879975541714e-06, + "loss": 0.4476, + "step": 27080 + }, + { + "epoch": 13.02, + "learning_rate": 7.714646185292211e-06, + "loss": 0.3398, + "step": 27090 + }, + { + "epoch": 13.02, + "learning_rate": 7.709412290426768e-06, + "loss": 0.4458, + "step": 27100 + }, + { + "epoch": 13.02, + "learning_rate": 7.704178293496315e-06, + "loss": 1.237, + "step": 27110 + }, + { + "epoch": 13.02, + "learning_rate": 7.698944197051845e-06, + "loss": 0.495, + "step": 27120 + }, + { + "epoch": 13.02, + "learning_rate": 7.693710003644391e-06, + "loss": 0.918, + "step": 27130 + }, + { + "epoch": 13.02, + "learning_rate": 7.688475715825032e-06, + "loss": 1.0978, + "step": 27140 + }, + { + "epoch": 13.02, + "learning_rate": 7.6832413361449e-06, + "loss": 0.5083, + "step": 27150 + }, + { + "epoch": 13.02, + "learning_rate": 7.678006867155165e-06, + "loss": 0.7703, + "step": 27160 + }, + { + "epoch": 13.02, + "learning_rate": 7.672772311407047e-06, + "loss": 0.4654, + "step": 27170 + }, + { + "epoch": 13.02, + "learning_rate": 7.667537671451803e-06, + "loss": 0.9089, + "step": 27180 + }, + { + "epoch": 13.02, + "learning_rate": 7.662302949840734e-06, + "loss": 0.4724, + "step": 27190 + }, + { + "epoch": 13.02, + "learning_rate": 7.657068149125177e-06, + "loss": 0.752, + "step": 27200 + }, + { + "epoch": 13.02, + "learning_rate": 7.651833271856514e-06, + "loss": 0.6876, + "step": 27210 + }, + { + "epoch": 13.02, + "learning_rate": 7.64659832058616e-06, + "loss": 0.8171, + "step": 27220 + }, + { + "epoch": 13.02, + "learning_rate": 7.641363297865563e-06, + "loss": 0.5231, + "step": 27230 + }, + { + "epoch": 13.02, + "learning_rate": 7.636128206246212e-06, + "loss": 0.6129, + "step": 27240 + }, + { + "epoch": 13.03, + "learning_rate": 7.630893048279627e-06, + "loss": 0.498, + "step": 27250 + }, + { + "epoch": 13.03, + "learning_rate": 7.625657826517362e-06, + "loss": 0.5614, + "step": 27260 + }, + { + "epoch": 13.03, + "learning_rate": 7.620422543510997e-06, + "loss": 0.4612, + "step": 27270 + }, + { + "epoch": 13.03, + "learning_rate": 7.615187201812148e-06, + "loss": 0.3073, + "step": 27280 + }, + { + "epoch": 13.03, + "learning_rate": 7.609951803972455e-06, + "loss": 0.3593, + "step": 27290 + }, + { + "epoch": 13.03, + "learning_rate": 7.604716352543591e-06, + "loss": 0.4811, + "step": 27300 + }, + { + "epoch": 13.03, + "learning_rate": 7.5994808500772465e-06, + "loss": 0.5341, + "step": 27310 + }, + { + "epoch": 13.03, + "learning_rate": 7.594245299125145e-06, + "loss": 0.5838, + "step": 27320 + }, + { + "epoch": 13.03, + "learning_rate": 7.589009702239029e-06, + "loss": 0.6061, + "step": 27330 + }, + { + "epoch": 13.03, + "learning_rate": 7.583774061970667e-06, + "loss": 1.0167, + "step": 27340 + }, + { + "epoch": 13.03, + "learning_rate": 7.578538380871844e-06, + "loss": 0.3962, + "step": 27350 + }, + { + "epoch": 13.03, + "learning_rate": 7.573302661494369e-06, + "loss": 0.7158, + "step": 27360 + }, + { + "epoch": 13.03, + "learning_rate": 7.568066906390064e-06, + "loss": 0.6698, + "step": 27370 + }, + { + "epoch": 13.03, + "learning_rate": 7.56283111811078e-06, + "loss": 0.4631, + "step": 27380 + }, + { + "epoch": 13.03, + "learning_rate": 7.557595299208372e-06, + "loss": 0.6564, + "step": 27390 + }, + { + "epoch": 13.03, + "learning_rate": 7.552359452234712e-06, + "loss": 1.0689, + "step": 27400 + }, + { + "epoch": 13.03, + "learning_rate": 7.547123579741694e-06, + "loss": 0.4199, + "step": 27410 + }, + { + "epoch": 13.03, + "learning_rate": 7.541887684281212e-06, + "loss": 0.1127, + "step": 27420 + }, + { + "epoch": 13.03, + "learning_rate": 7.536651768405185e-06, + "loss": 0.6529, + "step": 27430 + }, + { + "epoch": 13.03, + "learning_rate": 7.53141583466553e-06, + "loss": 0.8881, + "step": 27440 + }, + { + "epoch": 13.03, + "learning_rate": 7.526179885614178e-06, + "loss": 0.6771, + "step": 27450 + }, + { + "epoch": 13.03, + "learning_rate": 7.52094392380307e-06, + "loss": 0.3566, + "step": 27460 + }, + { + "epoch": 13.03, + "learning_rate": 7.515707951784145e-06, + "loss": 0.4737, + "step": 27470 + }, + { + "epoch": 13.03, + "learning_rate": 7.510471972109359e-06, + "loss": 0.5579, + "step": 27480 + }, + { + "epoch": 13.03, + "learning_rate": 7.505235987330656e-06, + "loss": 0.7683, + "step": 27490 + }, + { + "epoch": 13.03, + "learning_rate": 7.5e-06, + "loss": 0.8997, + "step": 27500 + }, + { + "epoch": 13.03, + "learning_rate": 7.494764012669344e-06, + "loss": 0.8071, + "step": 27510 + }, + { + "epoch": 13.03, + "learning_rate": 7.489528027890643e-06, + "loss": 0.5877, + "step": 27520 + }, + { + "epoch": 13.03, + "learning_rate": 7.484292048215854e-06, + "loss": 0.8307, + "step": 27530 + }, + { + "epoch": 13.03, + "learning_rate": 7.479056076196931e-06, + "loss": 0.8793, + "step": 27540 + }, + { + "epoch": 13.03, + "learning_rate": 7.473820114385825e-06, + "loss": 0.5178, + "step": 27550 + }, + { + "epoch": 13.03, + "learning_rate": 7.468584165334472e-06, + "loss": 0.5048, + "step": 27560 + }, + { + "epoch": 13.03, + "learning_rate": 7.463348231594817e-06, + "loss": 0.6739, + "step": 27570 + }, + { + "epoch": 13.03, + "learning_rate": 7.458112315718789e-06, + "loss": 0.5874, + "step": 27580 + }, + { + "epoch": 13.03, + "learning_rate": 7.452876420258308e-06, + "loss": 0.5699, + "step": 27590 + }, + { + "epoch": 13.03, + "learning_rate": 7.447640547765289e-06, + "loss": 0.884, + "step": 27600 + }, + { + "epoch": 13.03, + "learning_rate": 7.442404700791629e-06, + "loss": 0.4479, + "step": 27610 + }, + { + "epoch": 13.03, + "learning_rate": 7.43716888188922e-06, + "loss": 0.4794, + "step": 27620 + }, + { + "epoch": 13.03, + "learning_rate": 7.4319330936099345e-06, + "loss": 0.5059, + "step": 27630 + }, + { + "epoch": 13.03, + "learning_rate": 7.426697338505634e-06, + "loss": 0.5991, + "step": 27640 + }, + { + "epoch": 13.03, + "learning_rate": 7.421461619128159e-06, + "loss": 0.9087, + "step": 27650 + }, + { + "epoch": 13.03, + "learning_rate": 7.4162259380293345e-06, + "loss": 0.8848, + "step": 27660 + }, + { + "epoch": 13.03, + "learning_rate": 7.4109902977609716e-06, + "loss": 0.7439, + "step": 27670 + }, + { + "epoch": 13.03, + "learning_rate": 7.405754700874855e-06, + "loss": 1.0823, + "step": 27680 + }, + { + "epoch": 13.03, + "learning_rate": 7.400519149922755e-06, + "loss": 0.3812, + "step": 27690 + }, + { + "epoch": 13.03, + "learning_rate": 7.395283647456411e-06, + "loss": 0.474, + "step": 27700 + }, + { + "epoch": 13.03, + "learning_rate": 7.390048196027545e-06, + "loss": 0.3511, + "step": 27710 + }, + { + "epoch": 13.03, + "learning_rate": 7.3848127981878525e-06, + "loss": 0.4437, + "step": 27720 + }, + { + "epoch": 13.03, + "learning_rate": 7.379577456489006e-06, + "loss": 0.7786, + "step": 27730 + }, + { + "epoch": 13.03, + "learning_rate": 7.374342173482641e-06, + "loss": 0.7242, + "step": 27740 + }, + { + "epoch": 13.04, + "learning_rate": 7.369106951720374e-06, + "loss": 0.4872, + "step": 27750 + }, + { + "epoch": 13.04, + "learning_rate": 7.36387179375379e-06, + "loss": 0.7691, + "step": 27760 + }, + { + "epoch": 13.04, + "learning_rate": 7.35863670213444e-06, + "loss": 0.7271, + "step": 27770 + }, + { + "epoch": 13.04, + "learning_rate": 7.353401679413842e-06, + "loss": 0.2824, + "step": 27780 + }, + { + "epoch": 13.04, + "learning_rate": 7.348166728143486e-06, + "loss": 0.323, + "step": 27790 + }, + { + "epoch": 13.04, + "learning_rate": 7.342931850874824e-06, + "loss": 0.8196, + "step": 27800 + }, + { + "epoch": 13.04, + "learning_rate": 7.337697050159266e-06, + "loss": 0.7817, + "step": 27810 + }, + { + "epoch": 13.04, + "learning_rate": 7.332462328548198e-06, + "loss": 0.6831, + "step": 27820 + }, + { + "epoch": 13.04, + "learning_rate": 7.327227688592955e-06, + "loss": 0.4716, + "step": 27830 + }, + { + "epoch": 13.04, + "learning_rate": 7.321993132844837e-06, + "loss": 0.4233, + "step": 27840 + }, + { + "epoch": 13.04, + "learning_rate": 7.316758663855102e-06, + "loss": 0.5285, + "step": 27850 + }, + { + "epoch": 13.04, + "learning_rate": 7.31152428417497e-06, + "loss": 0.7737, + "step": 27860 + }, + { + "epoch": 13.04, + "learning_rate": 7.30628999635561e-06, + "loss": 0.4997, + "step": 27870 + }, + { + "epoch": 13.04, + "learning_rate": 7.301055802948154e-06, + "loss": 0.4997, + "step": 27880 + }, + { + "epoch": 13.04, + "learning_rate": 7.295821706503684e-06, + "loss": 0.7782, + "step": 27890 + }, + { + "epoch": 13.04, + "learning_rate": 7.290587709573235e-06, + "loss": 0.8473, + "step": 27900 + }, + { + "epoch": 13.04, + "learning_rate": 7.285353814707791e-06, + "loss": 0.7146, + "step": 27910 + }, + { + "epoch": 13.04, + "learning_rate": 7.280120024458289e-06, + "loss": 0.7538, + "step": 27920 + }, + { + "epoch": 13.04, + "learning_rate": 7.274886341375616e-06, + "loss": 0.5318, + "step": 27930 + }, + { + "epoch": 13.04, + "learning_rate": 7.269652768010605e-06, + "loss": 0.5479, + "step": 27940 + }, + { + "epoch": 13.04, + "learning_rate": 7.264419306914038e-06, + "loss": 0.5562, + "step": 27950 + }, + { + "epoch": 13.04, + "learning_rate": 7.259185960636639e-06, + "loss": 0.6011, + "step": 27960 + }, + { + "epoch": 13.04, + "learning_rate": 7.253952731729076e-06, + "loss": 0.67, + "step": 27970 + }, + { + "epoch": 13.04, + "learning_rate": 7.2487196227419636e-06, + "loss": 0.8015, + "step": 27980 + }, + { + "epoch": 13.04, + "learning_rate": 7.243486636225856e-06, + "loss": 0.528, + "step": 27990 + }, + { + "epoch": 13.04, + "learning_rate": 7.238253774731245e-06, + "loss": 0.7398, + "step": 28000 + }, + { + "epoch": 13.04, + "eval_accuracy": 0.8568421052631578, + "eval_f1": 0.8568421052631578, + "eval_loss": 0.7603176832199097, + "eval_runtime": 752.2156, + "eval_samples_per_second": 6.315, + "eval_steps_per_second": 1.579, + "step": 28000 + }, + { + "epoch": 14.0, + "learning_rate": 7.233021040808562e-06, + "loss": 0.6519, + "step": 28010 + }, + { + "epoch": 14.0, + "learning_rate": 7.227788437008182e-06, + "loss": 0.4855, + "step": 28020 + }, + { + "epoch": 14.0, + "learning_rate": 7.2225559658804115e-06, + "loss": 0.547, + "step": 28030 + }, + { + "epoch": 14.0, + "learning_rate": 7.21732362997549e-06, + "loss": 0.4211, + "step": 28040 + }, + { + "epoch": 14.0, + "learning_rate": 7.212091431843601e-06, + "loss": 0.6844, + "step": 28050 + }, + { + "epoch": 14.0, + "learning_rate": 7.206859374034849e-06, + "loss": 0.6981, + "step": 28060 + }, + { + "epoch": 14.0, + "learning_rate": 7.201627459099275e-06, + "loss": 0.3789, + "step": 28070 + }, + { + "epoch": 14.0, + "learning_rate": 7.196395689586858e-06, + "loss": 0.418, + "step": 28080 + }, + { + "epoch": 14.0, + "learning_rate": 7.19116406804749e-06, + "loss": 0.8616, + "step": 28090 + }, + { + "epoch": 14.0, + "learning_rate": 7.1859325970310044e-06, + "loss": 0.4312, + "step": 28100 + }, + { + "epoch": 14.0, + "learning_rate": 7.1807012790871536e-06, + "loss": 0.3848, + "step": 28110 + }, + { + "epoch": 14.0, + "learning_rate": 7.175470116765623e-06, + "loss": 0.4381, + "step": 28120 + }, + { + "epoch": 14.0, + "learning_rate": 7.170239112616015e-06, + "loss": 0.7408, + "step": 28130 + }, + { + "epoch": 14.0, + "learning_rate": 7.165008269187855e-06, + "loss": 0.5329, + "step": 28140 + }, + { + "epoch": 14.0, + "learning_rate": 7.159777589030597e-06, + "loss": 0.3478, + "step": 28150 + }, + { + "epoch": 14.0, + "learning_rate": 7.1545470746936075e-06, + "loss": 0.6114, + "step": 28160 + }, + { + "epoch": 14.0, + "learning_rate": 7.149316728726182e-06, + "loss": 0.4138, + "step": 28170 + }, + { + "epoch": 14.0, + "learning_rate": 7.144086553677518e-06, + "loss": 0.6431, + "step": 28180 + }, + { + "epoch": 14.0, + "learning_rate": 7.138856552096746e-06, + "loss": 0.7836, + "step": 28190 + }, + { + "epoch": 14.0, + "learning_rate": 7.1336267265329e-06, + "loss": 0.71, + "step": 28200 + }, + { + "epoch": 14.0, + "learning_rate": 7.128397079534941e-06, + "loss": 0.7157, + "step": 28210 + }, + { + "epoch": 14.0, + "learning_rate": 7.123167613651729e-06, + "loss": 0.2906, + "step": 28220 + }, + { + "epoch": 14.0, + "learning_rate": 7.117938331432043e-06, + "loss": 0.548, + "step": 28230 + }, + { + "epoch": 14.0, + "learning_rate": 7.112709235424576e-06, + "loss": 0.3176, + "step": 28240 + }, + { + "epoch": 14.01, + "learning_rate": 7.107480328177922e-06, + "loss": 0.5765, + "step": 28250 + }, + { + "epoch": 14.01, + "learning_rate": 7.102251612240589e-06, + "loss": 0.9896, + "step": 28260 + }, + { + "epoch": 14.01, + "learning_rate": 7.097023090160984e-06, + "loss": 0.5716, + "step": 28270 + }, + { + "epoch": 14.01, + "learning_rate": 7.091794764487433e-06, + "loss": 0.7398, + "step": 28280 + }, + { + "epoch": 14.01, + "learning_rate": 7.086566637768154e-06, + "loss": 0.6816, + "step": 28290 + }, + { + "epoch": 14.01, + "learning_rate": 7.081338712551271e-06, + "loss": 0.5037, + "step": 28300 + }, + { + "epoch": 14.01, + "learning_rate": 7.076110991384817e-06, + "loss": 0.8894, + "step": 28310 + }, + { + "epoch": 14.01, + "learning_rate": 7.070883476816714e-06, + "loss": 0.6717, + "step": 28320 + }, + { + "epoch": 14.01, + "learning_rate": 7.065656171394791e-06, + "loss": 0.4743, + "step": 28330 + }, + { + "epoch": 14.01, + "learning_rate": 7.0604290776667756e-06, + "loss": 0.5505, + "step": 28340 + }, + { + "epoch": 14.01, + "learning_rate": 7.055202198180289e-06, + "loss": 0.6482, + "step": 28350 + }, + { + "epoch": 14.01, + "learning_rate": 7.049975535482847e-06, + "loss": 0.5837, + "step": 28360 + }, + { + "epoch": 14.01, + "learning_rate": 7.044749092121859e-06, + "loss": 0.8117, + "step": 28370 + }, + { + "epoch": 14.01, + "learning_rate": 7.039522870644635e-06, + "loss": 0.4058, + "step": 28380 + }, + { + "epoch": 14.01, + "learning_rate": 7.03429687359837e-06, + "loss": 0.4443, + "step": 28390 + }, + { + "epoch": 14.01, + "learning_rate": 7.02907110353015e-06, + "loss": 0.5815, + "step": 28400 + }, + { + "epoch": 14.01, + "learning_rate": 7.023845562986955e-06, + "loss": 0.3337, + "step": 28410 + }, + { + "epoch": 14.01, + "learning_rate": 7.018620254515645e-06, + "loss": 0.5415, + "step": 28420 + }, + { + "epoch": 14.01, + "learning_rate": 7.013395180662979e-06, + "loss": 0.8324, + "step": 28430 + }, + { + "epoch": 14.01, + "learning_rate": 7.00817034397559e-06, + "loss": 0.4286, + "step": 28440 + }, + { + "epoch": 14.01, + "learning_rate": 7.002945747000001e-06, + "loss": 0.5507, + "step": 28450 + }, + { + "epoch": 14.01, + "learning_rate": 6.997721392282614e-06, + "loss": 0.6761, + "step": 28460 + }, + { + "epoch": 14.01, + "learning_rate": 6.99249728236972e-06, + "loss": 0.8651, + "step": 28470 + }, + { + "epoch": 14.01, + "learning_rate": 6.987273419807486e-06, + "loss": 0.9459, + "step": 28480 + }, + { + "epoch": 14.01, + "learning_rate": 6.982049807141956e-06, + "loss": 0.8787, + "step": 28490 + }, + { + "epoch": 14.01, + "learning_rate": 6.976826446919061e-06, + "loss": 0.759, + "step": 28500 + }, + { + "epoch": 14.01, + "learning_rate": 6.971603341684598e-06, + "loss": 0.3774, + "step": 28510 + }, + { + "epoch": 14.01, + "learning_rate": 6.966380493984251e-06, + "loss": 0.4496, + "step": 28520 + }, + { + "epoch": 14.01, + "learning_rate": 6.961157906363564e-06, + "loss": 0.5926, + "step": 28530 + }, + { + "epoch": 14.01, + "learning_rate": 6.95593558136797e-06, + "loss": 0.231, + "step": 28540 + }, + { + "epoch": 14.01, + "learning_rate": 6.950713521542764e-06, + "loss": 0.4263, + "step": 28550 + }, + { + "epoch": 14.01, + "learning_rate": 6.945491729433113e-06, + "loss": 0.6419, + "step": 28560 + }, + { + "epoch": 14.01, + "learning_rate": 6.940270207584059e-06, + "loss": 0.5247, + "step": 28570 + }, + { + "epoch": 14.01, + "learning_rate": 6.935048958540506e-06, + "loss": 0.5439, + "step": 28580 + }, + { + "epoch": 14.01, + "learning_rate": 6.929827984847225e-06, + "loss": 0.4805, + "step": 28590 + }, + { + "epoch": 14.01, + "learning_rate": 6.9246072890488605e-06, + "loss": 0.3178, + "step": 28600 + }, + { + "epoch": 14.01, + "learning_rate": 6.919386873689914e-06, + "loss": 0.6941, + "step": 28610 + }, + { + "epoch": 14.01, + "learning_rate": 6.91416674131475e-06, + "loss": 0.5058, + "step": 28620 + }, + { + "epoch": 14.01, + "learning_rate": 6.9089468944675996e-06, + "loss": 0.4293, + "step": 28630 + }, + { + "epoch": 14.01, + "learning_rate": 6.903727335692553e-06, + "loss": 0.3477, + "step": 28640 + }, + { + "epoch": 14.01, + "learning_rate": 6.8985080675335594e-06, + "loss": 0.7547, + "step": 28650 + }, + { + "epoch": 14.01, + "learning_rate": 6.893289092534425e-06, + "loss": 0.43, + "step": 28660 + }, + { + "epoch": 14.01, + "learning_rate": 6.888070413238819e-06, + "loss": 0.5564, + "step": 28670 + }, + { + "epoch": 14.01, + "learning_rate": 6.882852032190257e-06, + "loss": 0.9997, + "step": 28680 + }, + { + "epoch": 14.01, + "learning_rate": 6.87763395193212e-06, + "loss": 0.6674, + "step": 28690 + }, + { + "epoch": 14.01, + "learning_rate": 6.8724161750076355e-06, + "loss": 0.485, + "step": 28700 + }, + { + "epoch": 14.01, + "learning_rate": 6.867198703959881e-06, + "loss": 0.7944, + "step": 28710 + }, + { + "epoch": 14.01, + "learning_rate": 6.86198154133179e-06, + "loss": 0.4259, + "step": 28720 + }, + { + "epoch": 14.01, + "learning_rate": 6.856764689666146e-06, + "loss": 0.8059, + "step": 28730 + }, + { + "epoch": 14.01, + "learning_rate": 6.85154815150558e-06, + "loss": 0.5936, + "step": 28740 + }, + { + "epoch": 14.02, + "learning_rate": 6.8463319293925634e-06, + "loss": 0.301, + "step": 28750 + }, + { + "epoch": 14.02, + "learning_rate": 6.841116025869426e-06, + "loss": 0.5836, + "step": 28760 + }, + { + "epoch": 14.02, + "learning_rate": 6.835900443478333e-06, + "loss": 0.673, + "step": 28770 + }, + { + "epoch": 14.02, + "learning_rate": 6.830685184761296e-06, + "loss": 0.3068, + "step": 28780 + }, + { + "epoch": 14.02, + "learning_rate": 6.8254702522601715e-06, + "loss": 0.8024, + "step": 28790 + }, + { + "epoch": 14.02, + "learning_rate": 6.82025564851665e-06, + "loss": 0.5062, + "step": 28800 + }, + { + "epoch": 14.02, + "learning_rate": 6.815041376072268e-06, + "loss": 0.5104, + "step": 28810 + }, + { + "epoch": 14.02, + "learning_rate": 6.809827437468397e-06, + "loss": 0.5171, + "step": 28820 + }, + { + "epoch": 14.02, + "learning_rate": 6.80461383524625e-06, + "loss": 0.6172, + "step": 28830 + }, + { + "epoch": 14.02, + "learning_rate": 6.799400571946872e-06, + "loss": 0.8842, + "step": 28840 + }, + { + "epoch": 14.02, + "learning_rate": 6.7941876501111426e-06, + "loss": 0.7714, + "step": 28850 + }, + { + "epoch": 14.02, + "learning_rate": 6.78897507227978e-06, + "loss": 0.82, + "step": 28860 + }, + { + "epoch": 14.02, + "learning_rate": 6.7837628409933274e-06, + "loss": 0.9166, + "step": 28870 + }, + { + "epoch": 14.02, + "learning_rate": 6.778550958792171e-06, + "loss": 0.4862, + "step": 28880 + }, + { + "epoch": 14.02, + "learning_rate": 6.773339428216507e-06, + "loss": 0.5772, + "step": 28890 + }, + { + "epoch": 14.02, + "learning_rate": 6.76812825180638e-06, + "loss": 0.3489, + "step": 28900 + }, + { + "epoch": 14.02, + "learning_rate": 6.762917432101653e-06, + "loss": 0.7883, + "step": 28910 + }, + { + "epoch": 14.02, + "learning_rate": 6.7577069716420125e-06, + "loss": 0.6087, + "step": 28920 + }, + { + "epoch": 14.02, + "learning_rate": 6.752496872966979e-06, + "loss": 0.588, + "step": 28930 + }, + { + "epoch": 14.02, + "learning_rate": 6.747287138615887e-06, + "loss": 0.5674, + "step": 28940 + }, + { + "epoch": 14.02, + "learning_rate": 6.742077771127902e-06, + "loss": 0.3757, + "step": 28950 + }, + { + "epoch": 14.02, + "learning_rate": 6.7368687730420035e-06, + "loss": 0.4141, + "step": 28960 + }, + { + "epoch": 14.02, + "learning_rate": 6.731660146896996e-06, + "loss": 0.7038, + "step": 28970 + }, + { + "epoch": 14.02, + "learning_rate": 6.726451895231497e-06, + "loss": 0.6075, + "step": 28980 + }, + { + "epoch": 14.02, + "learning_rate": 6.721244020583946e-06, + "loss": 0.6138, + "step": 28990 + }, + { + "epoch": 14.02, + "learning_rate": 6.7160365254926005e-06, + "loss": 0.6848, + "step": 29000 + }, + { + "epoch": 14.02, + "learning_rate": 6.710829412495527e-06, + "loss": 0.5487, + "step": 29010 + }, + { + "epoch": 14.02, + "learning_rate": 6.705622684130612e-06, + "loss": 0.6123, + "step": 29020 + }, + { + "epoch": 14.02, + "learning_rate": 6.700416342935551e-06, + "loss": 0.572, + "step": 29030 + }, + { + "epoch": 14.02, + "learning_rate": 6.69521039144785e-06, + "loss": 1.101, + "step": 29040 + }, + { + "epoch": 14.02, + "learning_rate": 6.690004832204828e-06, + "loss": 0.5468, + "step": 29050 + }, + { + "epoch": 14.02, + "learning_rate": 6.684799667743613e-06, + "loss": 0.4592, + "step": 29060 + }, + { + "epoch": 14.02, + "learning_rate": 6.679594900601136e-06, + "loss": 0.4517, + "step": 29070 + }, + { + "epoch": 14.02, + "learning_rate": 6.674390533314135e-06, + "loss": 0.3175, + "step": 29080 + }, + { + "epoch": 14.02, + "learning_rate": 6.66918656841916e-06, + "loss": 0.5221, + "step": 29090 + }, + { + "epoch": 14.02, + "learning_rate": 6.66398300845256e-06, + "loss": 0.6589, + "step": 29100 + }, + { + "epoch": 14.02, + "learning_rate": 6.658779855950483e-06, + "loss": 0.7721, + "step": 29110 + }, + { + "epoch": 14.02, + "learning_rate": 6.653577113448887e-06, + "loss": 0.5447, + "step": 29120 + }, + { + "epoch": 14.02, + "learning_rate": 6.648374783483521e-06, + "loss": 0.6826, + "step": 29130 + }, + { + "epoch": 14.02, + "learning_rate": 6.643172868589947e-06, + "loss": 0.8476, + "step": 29140 + }, + { + "epoch": 14.02, + "learning_rate": 6.6379713713035015e-06, + "loss": 0.5372, + "step": 29150 + }, + { + "epoch": 14.02, + "learning_rate": 6.63277029415934e-06, + "loss": 0.426, + "step": 29160 + }, + { + "epoch": 14.02, + "learning_rate": 6.627569639692401e-06, + "loss": 0.4579, + "step": 29170 + }, + { + "epoch": 14.02, + "learning_rate": 6.62236941043742e-06, + "loss": 0.3956, + "step": 29180 + }, + { + "epoch": 14.02, + "learning_rate": 6.617169608928927e-06, + "loss": 0.663, + "step": 29190 + }, + { + "epoch": 14.02, + "learning_rate": 6.611970237701242e-06, + "loss": 0.5617, + "step": 29200 + }, + { + "epoch": 14.02, + "learning_rate": 6.606771299288477e-06, + "loss": 0.5615, + "step": 29210 + }, + { + "epoch": 14.02, + "learning_rate": 6.6015727962245286e-06, + "loss": 0.5542, + "step": 29220 + }, + { + "epoch": 14.02, + "learning_rate": 6.596374731043087e-06, + "loss": 0.4503, + "step": 29230 + }, + { + "epoch": 14.02, + "learning_rate": 6.591177106277623e-06, + "loss": 0.7225, + "step": 29240 + }, + { + "epoch": 14.03, + "learning_rate": 6.5859799244613955e-06, + "loss": 0.9153, + "step": 29250 + }, + { + "epoch": 14.03, + "learning_rate": 6.580783188127449e-06, + "loss": 0.6467, + "step": 29260 + }, + { + "epoch": 14.03, + "learning_rate": 6.57558689980861e-06, + "loss": 0.4762, + "step": 29270 + }, + { + "epoch": 14.03, + "learning_rate": 6.570391062037487e-06, + "loss": 0.5648, + "step": 29280 + }, + { + "epoch": 14.03, + "learning_rate": 6.5651956773464675e-06, + "loss": 0.6234, + "step": 29290 + }, + { + "epoch": 14.03, + "learning_rate": 6.560000748267717e-06, + "loss": 0.9244, + "step": 29300 + }, + { + "epoch": 14.03, + "learning_rate": 6.554806277333185e-06, + "loss": 0.3941, + "step": 29310 + }, + { + "epoch": 14.03, + "learning_rate": 6.549612267074592e-06, + "loss": 0.5927, + "step": 29320 + }, + { + "epoch": 14.03, + "learning_rate": 6.544418720023433e-06, + "loss": 0.5772, + "step": 29330 + }, + { + "epoch": 14.03, + "learning_rate": 6.53922563871098e-06, + "loss": 0.4929, + "step": 29340 + }, + { + "epoch": 14.03, + "learning_rate": 6.5340330256682805e-06, + "loss": 0.853, + "step": 29350 + }, + { + "epoch": 14.03, + "learning_rate": 6.5288408834261485e-06, + "loss": 0.516, + "step": 29360 + }, + { + "epoch": 14.03, + "learning_rate": 6.523649214515172e-06, + "loss": 0.5695, + "step": 29370 + }, + { + "epoch": 14.03, + "learning_rate": 6.5184580214657085e-06, + "loss": 0.3876, + "step": 29380 + }, + { + "epoch": 14.03, + "learning_rate": 6.513267306807882e-06, + "loss": 0.6751, + "step": 29390 + }, + { + "epoch": 14.03, + "learning_rate": 6.508077073071581e-06, + "loss": 0.6608, + "step": 29400 + }, + { + "epoch": 14.03, + "learning_rate": 6.502887322786468e-06, + "loss": 0.5213, + "step": 29410 + }, + { + "epoch": 14.03, + "learning_rate": 6.497698058481959e-06, + "loss": 0.9044, + "step": 29420 + }, + { + "epoch": 14.03, + "learning_rate": 6.49250928268724e-06, + "loss": 0.7779, + "step": 29430 + }, + { + "epoch": 14.03, + "learning_rate": 6.487320997931256e-06, + "loss": 0.8036, + "step": 29440 + }, + { + "epoch": 14.03, + "learning_rate": 6.482133206742718e-06, + "loss": 0.9571, + "step": 29450 + }, + { + "epoch": 14.03, + "learning_rate": 6.476945911650088e-06, + "loss": 0.5353, + "step": 29460 + }, + { + "epoch": 14.03, + "learning_rate": 6.471759115181593e-06, + "loss": 0.3019, + "step": 29470 + }, + { + "epoch": 14.03, + "learning_rate": 6.466572819865215e-06, + "loss": 0.4097, + "step": 29480 + }, + { + "epoch": 14.03, + "learning_rate": 6.461387028228688e-06, + "loss": 0.7054, + "step": 29490 + }, + { + "epoch": 14.03, + "learning_rate": 6.456201742799511e-06, + "loss": 0.9272, + "step": 29500 + }, + { + "epoch": 14.03, + "learning_rate": 6.451016966104921e-06, + "loss": 0.7606, + "step": 29510 + }, + { + "epoch": 14.03, + "learning_rate": 6.445832700671919e-06, + "loss": 0.4147, + "step": 29520 + }, + { + "epoch": 14.03, + "learning_rate": 6.440648949027251e-06, + "loss": 0.4601, + "step": 29530 + }, + { + "epoch": 14.03, + "learning_rate": 6.435465713697417e-06, + "loss": 0.4947, + "step": 29540 + }, + { + "epoch": 14.03, + "learning_rate": 6.430282997208663e-06, + "loss": 0.4206, + "step": 29550 + }, + { + "epoch": 14.03, + "learning_rate": 6.425100802086979e-06, + "loss": 0.8854, + "step": 29560 + }, + { + "epoch": 14.03, + "learning_rate": 6.419919130858109e-06, + "loss": 0.4316, + "step": 29570 + }, + { + "epoch": 14.03, + "learning_rate": 6.414737986047532e-06, + "loss": 0.3306, + "step": 29580 + }, + { + "epoch": 14.03, + "learning_rate": 6.409557370180479e-06, + "loss": 1.0803, + "step": 29590 + }, + { + "epoch": 14.03, + "learning_rate": 6.404377285781914e-06, + "loss": 0.5344, + "step": 29600 + }, + { + "epoch": 14.03, + "learning_rate": 6.399197735376551e-06, + "loss": 0.5595, + "step": 29610 + }, + { + "epoch": 14.03, + "learning_rate": 6.394018721488839e-06, + "loss": 0.9271, + "step": 29620 + }, + { + "epoch": 14.03, + "learning_rate": 6.388840246642965e-06, + "loss": 0.328, + "step": 29630 + }, + { + "epoch": 14.03, + "learning_rate": 6.383662313362858e-06, + "loss": 0.6804, + "step": 29640 + }, + { + "epoch": 14.03, + "learning_rate": 6.37848492417218e-06, + "loss": 0.231, + "step": 29650 + }, + { + "epoch": 14.03, + "learning_rate": 6.373308081594322e-06, + "loss": 0.6374, + "step": 29660 + }, + { + "epoch": 14.03, + "learning_rate": 6.368131788152419e-06, + "loss": 0.6959, + "step": 29670 + }, + { + "epoch": 14.03, + "learning_rate": 6.362956046369335e-06, + "loss": 0.346, + "step": 29680 + }, + { + "epoch": 14.03, + "learning_rate": 6.3577808587676585e-06, + "loss": 0.5246, + "step": 29690 + }, + { + "epoch": 14.03, + "learning_rate": 6.3526062278697154e-06, + "loss": 0.8182, + "step": 29700 + }, + { + "epoch": 14.03, + "learning_rate": 6.347432156197558e-06, + "loss": 0.7254, + "step": 29710 + }, + { + "epoch": 14.03, + "learning_rate": 6.342258646272966e-06, + "loss": 0.6114, + "step": 29720 + }, + { + "epoch": 14.03, + "learning_rate": 6.3370857006174464e-06, + "loss": 0.526, + "step": 29730 + }, + { + "epoch": 14.03, + "learning_rate": 6.3319133217522295e-06, + "loss": 0.4828, + "step": 29740 + }, + { + "epoch": 14.04, + "learning_rate": 6.326741512198267e-06, + "loss": 0.5321, + "step": 29750 + }, + { + "epoch": 14.04, + "learning_rate": 6.321570274476245e-06, + "loss": 0.7288, + "step": 29760 + }, + { + "epoch": 14.04, + "learning_rate": 6.316399611106549e-06, + "loss": 0.5285, + "step": 29770 + }, + { + "epoch": 14.04, + "learning_rate": 6.311229524609307e-06, + "loss": 0.656, + "step": 29780 + }, + { + "epoch": 14.04, + "learning_rate": 6.3060600175043494e-06, + "loss": 0.8339, + "step": 29790 + }, + { + "epoch": 14.04, + "learning_rate": 6.300891092311239e-06, + "loss": 0.506, + "step": 29800 + }, + { + "epoch": 14.04, + "learning_rate": 6.295722751549243e-06, + "loss": 0.5651, + "step": 29810 + }, + { + "epoch": 14.04, + "learning_rate": 6.290554997737346e-06, + "loss": 0.4679, + "step": 29820 + }, + { + "epoch": 14.04, + "learning_rate": 6.2853878333942526e-06, + "loss": 0.7864, + "step": 29830 + }, + { + "epoch": 14.04, + "learning_rate": 6.2802212610383735e-06, + "loss": 0.5258, + "step": 29840 + }, + { + "epoch": 14.04, + "learning_rate": 6.2750552831878354e-06, + "loss": 0.7134, + "step": 29850 + }, + { + "epoch": 14.04, + "learning_rate": 6.269889902360468e-06, + "loss": 0.7398, + "step": 29860 + }, + { + "epoch": 14.04, + "learning_rate": 6.26472512107382e-06, + "loss": 0.439, + "step": 29870 + }, + { + "epoch": 14.04, + "learning_rate": 6.259560941845143e-06, + "loss": 0.5584, + "step": 29880 + }, + { + "epoch": 14.04, + "learning_rate": 6.254397367191391e-06, + "loss": 0.6233, + "step": 29890 + }, + { + "epoch": 14.04, + "learning_rate": 6.249234399629234e-06, + "loss": 0.702, + "step": 29900 + }, + { + "epoch": 14.04, + "learning_rate": 6.244072041675034e-06, + "loss": 0.5829, + "step": 29910 + }, + { + "epoch": 14.04, + "learning_rate": 6.238910295844863e-06, + "loss": 0.3531, + "step": 29920 + }, + { + "epoch": 14.04, + "learning_rate": 6.233749164654496e-06, + "loss": 0.6007, + "step": 29930 + }, + { + "epoch": 14.04, + "learning_rate": 6.228588650619405e-06, + "loss": 0.4124, + "step": 29940 + }, + { + "epoch": 14.04, + "learning_rate": 6.223428756254758e-06, + "loss": 0.5734, + "step": 29950 + }, + { + "epoch": 14.04, + "learning_rate": 6.218269484075426e-06, + "loss": 0.6953, + "step": 29960 + }, + { + "epoch": 14.04, + "learning_rate": 6.213110836595978e-06, + "loss": 0.716, + "step": 29970 + }, + { + "epoch": 14.04, + "learning_rate": 6.207952816330676e-06, + "loss": 0.708, + "step": 29980 + }, + { + "epoch": 14.04, + "learning_rate": 6.202795425793473e-06, + "loss": 0.6968, + "step": 29990 + }, + { + "epoch": 14.04, + "learning_rate": 6.197638667498023e-06, + "loss": 0.6807, + "step": 30000 + }, + { + "epoch": 14.04, + "eval_accuracy": 0.871578947368421, + "eval_f1": 0.871578947368421, + "eval_loss": 0.7531183362007141, + "eval_runtime": 743.1398, + "eval_samples_per_second": 6.392, + "eval_steps_per_second": 1.599, + "step": 30000 + }, + { + "epoch": 15.0, + "learning_rate": 6.1924825439576625e-06, + "loss": 0.3591, + "step": 30010 + }, + { + "epoch": 15.0, + "learning_rate": 6.1873270576854295e-06, + "loss": 0.8104, + "step": 30020 + }, + { + "epoch": 15.0, + "learning_rate": 6.182172211194042e-06, + "loss": 0.6743, + "step": 30030 + }, + { + "epoch": 15.0, + "learning_rate": 6.177018006995909e-06, + "loss": 0.3849, + "step": 30040 + }, + { + "epoch": 15.0, + "learning_rate": 6.171864447603126e-06, + "loss": 0.4138, + "step": 30050 + }, + { + "epoch": 15.0, + "learning_rate": 6.1667115355274785e-06, + "loss": 0.252, + "step": 30060 + }, + { + "epoch": 15.0, + "learning_rate": 6.161559273280431e-06, + "loss": 0.5183, + "step": 30070 + }, + { + "epoch": 15.0, + "learning_rate": 6.156407663373133e-06, + "loss": 0.5016, + "step": 30080 + }, + { + "epoch": 15.0, + "learning_rate": 6.1512567083164184e-06, + "loss": 0.7197, + "step": 30090 + }, + { + "epoch": 15.0, + "learning_rate": 6.146106410620801e-06, + "loss": 0.5565, + "step": 30100 + }, + { + "epoch": 15.0, + "learning_rate": 6.140956772796469e-06, + "loss": 0.4194, + "step": 30110 + }, + { + "epoch": 15.0, + "learning_rate": 6.1358077973533e-06, + "loss": 0.4268, + "step": 30120 + }, + { + "epoch": 15.0, + "learning_rate": 6.130659486800836e-06, + "loss": 0.4098, + "step": 30130 + }, + { + "epoch": 15.0, + "learning_rate": 6.125511843648304e-06, + "loss": 0.3364, + "step": 30140 + }, + { + "epoch": 15.0, + "learning_rate": 6.1203648704045986e-06, + "loss": 0.5037, + "step": 30150 + }, + { + "epoch": 15.0, + "learning_rate": 6.115218569578299e-06, + "loss": 0.3156, + "step": 30160 + }, + { + "epoch": 15.0, + "learning_rate": 6.110072943677645e-06, + "loss": 0.6378, + "step": 30170 + }, + { + "epoch": 15.0, + "learning_rate": 6.104927995210551e-06, + "loss": 0.2939, + "step": 30180 + }, + { + "epoch": 15.0, + "learning_rate": 6.099783726684608e-06, + "loss": 0.595, + "step": 30190 + }, + { + "epoch": 15.0, + "learning_rate": 6.094640140607064e-06, + "loss": 0.4013, + "step": 30200 + }, + { + "epoch": 15.0, + "learning_rate": 6.089497239484847e-06, + "loss": 0.751, + "step": 30210 + }, + { + "epoch": 15.0, + "learning_rate": 6.084355025824538e-06, + "loss": 0.7006, + "step": 30220 + }, + { + "epoch": 15.0, + "learning_rate": 6.079213502132392e-06, + "loss": 0.3794, + "step": 30230 + }, + { + "epoch": 15.0, + "learning_rate": 6.074072670914325e-06, + "loss": 0.4284, + "step": 30240 + }, + { + "epoch": 15.01, + "learning_rate": 6.068932534675914e-06, + "loss": 0.7241, + "step": 30250 + }, + { + "epoch": 15.01, + "learning_rate": 6.063793095922403e-06, + "loss": 0.4987, + "step": 30260 + }, + { + "epoch": 15.01, + "learning_rate": 6.058654357158688e-06, + "loss": 0.5056, + "step": 30270 + }, + { + "epoch": 15.01, + "learning_rate": 6.053516320889331e-06, + "loss": 0.7833, + "step": 30280 + }, + { + "epoch": 15.01, + "learning_rate": 6.048378989618548e-06, + "loss": 0.6071, + "step": 30290 + }, + { + "epoch": 15.01, + "learning_rate": 6.043242365850212e-06, + "loss": 0.3674, + "step": 30300 + }, + { + "epoch": 15.01, + "learning_rate": 6.038106452087847e-06, + "loss": 0.6286, + "step": 30310 + }, + { + "epoch": 15.01, + "learning_rate": 6.032971250834639e-06, + "loss": 0.695, + "step": 30320 + }, + { + "epoch": 15.01, + "learning_rate": 6.027836764593422e-06, + "loss": 0.4895, + "step": 30330 + }, + { + "epoch": 15.01, + "learning_rate": 6.022702995866681e-06, + "loss": 0.42, + "step": 30340 + }, + { + "epoch": 15.01, + "learning_rate": 6.017569947156554e-06, + "loss": 0.2868, + "step": 30350 + }, + { + "epoch": 15.01, + "learning_rate": 6.012437620964827e-06, + "loss": 0.4577, + "step": 30360 + }, + { + "epoch": 15.01, + "learning_rate": 6.0073060197929295e-06, + "loss": 0.662, + "step": 30370 + }, + { + "epoch": 15.01, + "learning_rate": 6.0021751461419485e-06, + "loss": 0.6371, + "step": 30380 + }, + { + "epoch": 15.01, + "learning_rate": 5.997045002512604e-06, + "loss": 0.3516, + "step": 30390 + }, + { + "epoch": 15.01, + "learning_rate": 5.991915591405267e-06, + "loss": 0.5922, + "step": 30400 + }, + { + "epoch": 15.01, + "learning_rate": 5.986786915319949e-06, + "loss": 0.7923, + "step": 30410 + }, + { + "epoch": 15.01, + "learning_rate": 5.9816589767563065e-06, + "loss": 0.3149, + "step": 30420 + }, + { + "epoch": 15.01, + "learning_rate": 5.9765317782136346e-06, + "loss": 0.4833, + "step": 30430 + }, + { + "epoch": 15.01, + "learning_rate": 5.971405322190864e-06, + "loss": 0.7275, + "step": 30440 + }, + { + "epoch": 15.01, + "learning_rate": 5.966279611186573e-06, + "loss": 0.6083, + "step": 30450 + }, + { + "epoch": 15.01, + "learning_rate": 5.961154647698965e-06, + "loss": 0.5943, + "step": 30460 + }, + { + "epoch": 15.01, + "learning_rate": 5.956030434225892e-06, + "loss": 0.7019, + "step": 30470 + }, + { + "epoch": 15.01, + "learning_rate": 5.9509069732648255e-06, + "loss": 0.5745, + "step": 30480 + }, + { + "epoch": 15.01, + "learning_rate": 5.945784267312882e-06, + "loss": 0.7729, + "step": 30490 + }, + { + "epoch": 15.01, + "learning_rate": 5.9406623188668065e-06, + "loss": 0.5774, + "step": 30500 + }, + { + "epoch": 15.01, + "learning_rate": 5.935541130422972e-06, + "loss": 0.468, + "step": 30510 + }, + { + "epoch": 15.01, + "learning_rate": 5.930420704477387e-06, + "loss": 0.5499, + "step": 30520 + }, + { + "epoch": 15.01, + "learning_rate": 5.925301043525682e-06, + "loss": 0.6378, + "step": 30530 + }, + { + "epoch": 15.01, + "learning_rate": 5.9201821500631195e-06, + "loss": 1.0126, + "step": 30540 + }, + { + "epoch": 15.01, + "learning_rate": 5.915064026584586e-06, + "loss": 1.0802, + "step": 30550 + }, + { + "epoch": 15.01, + "learning_rate": 5.9099466755845925e-06, + "loss": 0.7932, + "step": 30560 + }, + { + "epoch": 15.01, + "learning_rate": 5.904830099557271e-06, + "loss": 0.5136, + "step": 30570 + }, + { + "epoch": 15.01, + "learning_rate": 5.899714300996381e-06, + "loss": 0.4871, + "step": 30580 + }, + { + "epoch": 15.01, + "learning_rate": 5.8945992823953e-06, + "loss": 0.4485, + "step": 30590 + }, + { + "epoch": 15.01, + "learning_rate": 5.889485046247026e-06, + "loss": 0.5692, + "step": 30600 + }, + { + "epoch": 15.01, + "learning_rate": 5.8843715950441765e-06, + "loss": 0.6649, + "step": 30610 + }, + { + "epoch": 15.01, + "learning_rate": 5.8792589312789855e-06, + "loss": 0.6113, + "step": 30620 + }, + { + "epoch": 15.01, + "learning_rate": 5.874147057443301e-06, + "loss": 0.4845, + "step": 30630 + }, + { + "epoch": 15.01, + "learning_rate": 5.869035976028589e-06, + "loss": 0.6766, + "step": 30640 + }, + { + "epoch": 15.01, + "learning_rate": 5.863925689525933e-06, + "loss": 0.385, + "step": 30650 + }, + { + "epoch": 15.01, + "learning_rate": 5.858816200426018e-06, + "loss": 0.5752, + "step": 30660 + }, + { + "epoch": 15.01, + "learning_rate": 5.853707511219148e-06, + "loss": 0.7434, + "step": 30670 + }, + { + "epoch": 15.01, + "learning_rate": 5.848599624395239e-06, + "loss": 0.8021, + "step": 30680 + }, + { + "epoch": 15.01, + "learning_rate": 5.8434925424438125e-06, + "loss": 0.5919, + "step": 30690 + }, + { + "epoch": 15.01, + "learning_rate": 5.838386267853996e-06, + "loss": 0.7574, + "step": 30700 + }, + { + "epoch": 15.01, + "learning_rate": 5.833280803114527e-06, + "loss": 0.7047, + "step": 30710 + }, + { + "epoch": 15.01, + "learning_rate": 5.828176150713747e-06, + "loss": 0.4341, + "step": 30720 + }, + { + "epoch": 15.01, + "learning_rate": 5.823072313139602e-06, + "loss": 0.8295, + "step": 30730 + }, + { + "epoch": 15.01, + "learning_rate": 5.817969292879642e-06, + "loss": 0.2953, + "step": 30740 + }, + { + "epoch": 15.02, + "learning_rate": 5.812867092421014e-06, + "loss": 0.2567, + "step": 30750 + }, + { + "epoch": 15.02, + "learning_rate": 5.807765714250469e-06, + "loss": 0.6689, + "step": 30760 + }, + { + "epoch": 15.02, + "learning_rate": 5.802665160854357e-06, + "loss": 0.6787, + "step": 30770 + }, + { + "epoch": 15.02, + "learning_rate": 5.797565434718626e-06, + "loss": 0.7291, + "step": 30780 + }, + { + "epoch": 15.02, + "learning_rate": 5.79246653832882e-06, + "loss": 0.4689, + "step": 30790 + }, + { + "epoch": 15.02, + "learning_rate": 5.787368474170081e-06, + "loss": 0.144, + "step": 30800 + }, + { + "epoch": 15.02, + "learning_rate": 5.782271244727144e-06, + "loss": 0.598, + "step": 30810 + }, + { + "epoch": 15.02, + "learning_rate": 5.777174852484333e-06, + "loss": 0.7588, + "step": 30820 + }, + { + "epoch": 15.02, + "learning_rate": 5.772079299925573e-06, + "loss": 0.4024, + "step": 30830 + }, + { + "epoch": 15.02, + "learning_rate": 5.766984589534368e-06, + "loss": 0.6319, + "step": 30840 + }, + { + "epoch": 15.02, + "learning_rate": 5.761890723793821e-06, + "loss": 0.5051, + "step": 30850 + }, + { + "epoch": 15.02, + "learning_rate": 5.75679770518662e-06, + "loss": 0.428, + "step": 30860 + }, + { + "epoch": 15.02, + "learning_rate": 5.751705536195043e-06, + "loss": 0.6227, + "step": 30870 + }, + { + "epoch": 15.02, + "learning_rate": 5.7466142193009485e-06, + "loss": 0.5371, + "step": 30880 + }, + { + "epoch": 15.02, + "learning_rate": 5.7415237569857805e-06, + "loss": 0.5355, + "step": 30890 + }, + { + "epoch": 15.02, + "learning_rate": 5.736434151730575e-06, + "loss": 0.5317, + "step": 30900 + }, + { + "epoch": 15.02, + "learning_rate": 5.731345406015938e-06, + "loss": 0.7229, + "step": 30910 + }, + { + "epoch": 15.02, + "learning_rate": 5.726257522322067e-06, + "loss": 0.5791, + "step": 30920 + }, + { + "epoch": 15.02, + "learning_rate": 5.7211705031287285e-06, + "loss": 0.8684, + "step": 30930 + }, + { + "epoch": 15.02, + "learning_rate": 5.716084350915279e-06, + "loss": 0.2345, + "step": 30940 + }, + { + "epoch": 15.02, + "learning_rate": 5.710999068160645e-06, + "loss": 0.4998, + "step": 30950 + }, + { + "epoch": 15.02, + "learning_rate": 5.70591465734333e-06, + "loss": 0.599, + "step": 30960 + }, + { + "epoch": 15.02, + "learning_rate": 5.7008311209414195e-06, + "loss": 0.5463, + "step": 30970 + }, + { + "epoch": 15.02, + "learning_rate": 5.695748461432562e-06, + "loss": 0.4036, + "step": 30980 + }, + { + "epoch": 15.02, + "learning_rate": 5.690666681293987e-06, + "loss": 0.393, + "step": 30990 + }, + { + "epoch": 15.02, + "learning_rate": 5.685585783002493e-06, + "loss": 0.4896, + "step": 31000 + }, + { + "epoch": 15.02, + "learning_rate": 5.680505769034446e-06, + "loss": 0.4201, + "step": 31010 + }, + { + "epoch": 15.02, + "learning_rate": 5.675426641865783e-06, + "loss": 0.8405, + "step": 31020 + }, + { + "epoch": 15.02, + "learning_rate": 5.6703484039720085e-06, + "loss": 0.5632, + "step": 31030 + }, + { + "epoch": 15.02, + "learning_rate": 5.665271057828196e-06, + "loss": 0.3888, + "step": 31040 + }, + { + "epoch": 15.02, + "learning_rate": 5.66019460590898e-06, + "loss": 0.398, + "step": 31050 + }, + { + "epoch": 15.02, + "learning_rate": 5.655119050688565e-06, + "loss": 0.6701, + "step": 31060 + }, + { + "epoch": 15.02, + "learning_rate": 5.650044394640713e-06, + "loss": 0.5749, + "step": 31070 + }, + { + "epoch": 15.02, + "learning_rate": 5.644970640238747e-06, + "loss": 0.5094, + "step": 31080 + }, + { + "epoch": 15.02, + "learning_rate": 5.63989778995556e-06, + "loss": 0.5268, + "step": 31090 + }, + { + "epoch": 15.02, + "learning_rate": 5.6348258462635905e-06, + "loss": 0.4821, + "step": 31100 + }, + { + "epoch": 15.02, + "learning_rate": 5.629754811634846e-06, + "loss": 0.5058, + "step": 31110 + }, + { + "epoch": 15.02, + "learning_rate": 5.624684688540884e-06, + "loss": 0.2582, + "step": 31120 + }, + { + "epoch": 15.02, + "learning_rate": 5.619615479452826e-06, + "loss": 0.7632, + "step": 31130 + }, + { + "epoch": 15.02, + "learning_rate": 5.614547186841339e-06, + "loss": 0.6387, + "step": 31140 + }, + { + "epoch": 15.02, + "learning_rate": 5.609479813176648e-06, + "loss": 0.4985, + "step": 31150 + }, + { + "epoch": 15.02, + "learning_rate": 5.6044133609285305e-06, + "loss": 0.454, + "step": 31160 + }, + { + "epoch": 15.02, + "learning_rate": 5.5993478325663125e-06, + "loss": 0.7597, + "step": 31170 + }, + { + "epoch": 15.02, + "learning_rate": 5.594283230558874e-06, + "loss": 0.3568, + "step": 31180 + }, + { + "epoch": 15.02, + "learning_rate": 5.589219557374635e-06, + "loss": 0.3692, + "step": 31190 + }, + { + "epoch": 15.02, + "learning_rate": 5.5841568154815725e-06, + "loss": 0.6227, + "step": 31200 + }, + { + "epoch": 15.02, + "learning_rate": 5.579095007347204e-06, + "loss": 0.3188, + "step": 31210 + }, + { + "epoch": 15.02, + "learning_rate": 5.574034135438591e-06, + "loss": 0.5097, + "step": 31220 + }, + { + "epoch": 15.02, + "learning_rate": 5.568974202222348e-06, + "loss": 0.7134, + "step": 31230 + }, + { + "epoch": 15.02, + "learning_rate": 5.5639152101646184e-06, + "loss": 0.6648, + "step": 31240 + }, + { + "epoch": 15.03, + "learning_rate": 5.558857161731094e-06, + "loss": 0.9005, + "step": 31250 + }, + { + "epoch": 15.03, + "learning_rate": 5.55380005938701e-06, + "loss": 0.6035, + "step": 31260 + }, + { + "epoch": 15.03, + "learning_rate": 5.548743905597135e-06, + "loss": 0.3174, + "step": 31270 + }, + { + "epoch": 15.03, + "learning_rate": 5.543688702825775e-06, + "loss": 0.7496, + "step": 31280 + }, + { + "epoch": 15.03, + "learning_rate": 5.538634453536774e-06, + "loss": 0.6956, + "step": 31290 + }, + { + "epoch": 15.03, + "learning_rate": 5.533581160193516e-06, + "loss": 0.6995, + "step": 31300 + }, + { + "epoch": 15.03, + "learning_rate": 5.5285288252589095e-06, + "loss": 0.4295, + "step": 31310 + }, + { + "epoch": 15.03, + "learning_rate": 5.5234774511954065e-06, + "loss": 0.5449, + "step": 31320 + }, + { + "epoch": 15.03, + "learning_rate": 5.5184270404649815e-06, + "loss": 0.7376, + "step": 31330 + }, + { + "epoch": 15.03, + "learning_rate": 5.513377595529143e-06, + "loss": 0.5837, + "step": 31340 + }, + { + "epoch": 15.03, + "learning_rate": 5.508329118848934e-06, + "loss": 1.0156, + "step": 31350 + }, + { + "epoch": 15.03, + "learning_rate": 5.503281612884917e-06, + "loss": 0.6391, + "step": 31360 + }, + { + "epoch": 15.03, + "learning_rate": 5.498235080097185e-06, + "loss": 0.4268, + "step": 31370 + }, + { + "epoch": 15.03, + "learning_rate": 5.493189522945356e-06, + "loss": 0.4658, + "step": 31380 + }, + { + "epoch": 15.03, + "learning_rate": 5.488144943888575e-06, + "loss": 0.7193, + "step": 31390 + }, + { + "epoch": 15.03, + "learning_rate": 5.483101345385508e-06, + "loss": 1.1998, + "step": 31400 + }, + { + "epoch": 15.03, + "learning_rate": 5.478058729894342e-06, + "loss": 0.3759, + "step": 31410 + }, + { + "epoch": 15.03, + "learning_rate": 5.4730170998727915e-06, + "loss": 1.0222, + "step": 31420 + }, + { + "epoch": 15.03, + "learning_rate": 5.4679764577780815e-06, + "loss": 0.6979, + "step": 31430 + }, + { + "epoch": 15.03, + "learning_rate": 5.4629368060669585e-06, + "loss": 0.6705, + "step": 31440 + }, + { + "epoch": 15.03, + "learning_rate": 5.457898147195693e-06, + "loss": 0.2831, + "step": 31450 + }, + { + "epoch": 15.03, + "learning_rate": 5.452860483620062e-06, + "loss": 0.5871, + "step": 31460 + }, + { + "epoch": 15.03, + "learning_rate": 5.447823817795362e-06, + "loss": 0.6708, + "step": 31470 + }, + { + "epoch": 15.03, + "learning_rate": 5.442788152176401e-06, + "loss": 0.3945, + "step": 31480 + }, + { + "epoch": 15.03, + "learning_rate": 5.437753489217505e-06, + "loss": 1.0316, + "step": 31490 + }, + { + "epoch": 15.03, + "learning_rate": 5.432719831372507e-06, + "loss": 0.4664, + "step": 31500 + }, + { + "epoch": 15.03, + "learning_rate": 5.427687181094748e-06, + "loss": 0.4658, + "step": 31510 + }, + { + "epoch": 15.03, + "learning_rate": 5.422655540837083e-06, + "loss": 0.4841, + "step": 31520 + }, + { + "epoch": 15.03, + "learning_rate": 5.417624913051869e-06, + "loss": 0.4642, + "step": 31530 + }, + { + "epoch": 15.03, + "learning_rate": 5.412595300190981e-06, + "loss": 0.5075, + "step": 31540 + }, + { + "epoch": 15.03, + "learning_rate": 5.407566704705782e-06, + "loss": 0.7411, + "step": 31550 + }, + { + "epoch": 15.03, + "learning_rate": 5.402539129047152e-06, + "loss": 1.0875, + "step": 31560 + }, + { + "epoch": 15.03, + "learning_rate": 5.397512575665469e-06, + "loss": 0.7321, + "step": 31570 + }, + { + "epoch": 15.03, + "learning_rate": 5.392487047010618e-06, + "loss": 0.537, + "step": 31580 + }, + { + "epoch": 15.03, + "learning_rate": 5.387462545531976e-06, + "loss": 0.4299, + "step": 31590 + }, + { + "epoch": 15.03, + "learning_rate": 5.382439073678426e-06, + "loss": 0.5034, + "step": 31600 + }, + { + "epoch": 15.03, + "learning_rate": 5.377416633898348e-06, + "loss": 0.7486, + "step": 31610 + }, + { + "epoch": 15.03, + "learning_rate": 5.372395228639619e-06, + "loss": 0.6522, + "step": 31620 + }, + { + "epoch": 15.03, + "learning_rate": 5.367374860349607e-06, + "loss": 0.5415, + "step": 31630 + }, + { + "epoch": 15.03, + "learning_rate": 5.362355531475179e-06, + "loss": 0.592, + "step": 31640 + }, + { + "epoch": 15.03, + "learning_rate": 5.357337244462699e-06, + "loss": 0.2441, + "step": 31650 + }, + { + "epoch": 15.03, + "learning_rate": 5.352320001758017e-06, + "loss": 0.6928, + "step": 31660 + }, + { + "epoch": 15.03, + "learning_rate": 5.3473038058064725e-06, + "loss": 0.3521, + "step": 31670 + }, + { + "epoch": 15.03, + "learning_rate": 5.342288659052905e-06, + "loss": 0.5533, + "step": 31680 + }, + { + "epoch": 15.03, + "learning_rate": 5.337274563941633e-06, + "loss": 0.6759, + "step": 31690 + }, + { + "epoch": 15.03, + "learning_rate": 5.332261522916461e-06, + "loss": 0.4252, + "step": 31700 + }, + { + "epoch": 15.03, + "learning_rate": 5.3272495384206945e-06, + "loss": 0.8168, + "step": 31710 + }, + { + "epoch": 15.03, + "learning_rate": 5.322238612897107e-06, + "loss": 0.619, + "step": 31720 + }, + { + "epoch": 15.03, + "learning_rate": 5.317228748787963e-06, + "loss": 0.8278, + "step": 31730 + }, + { + "epoch": 15.03, + "learning_rate": 5.3122199485350085e-06, + "loss": 0.6556, + "step": 31740 + }, + { + "epoch": 15.04, + "learning_rate": 5.307212214579475e-06, + "loss": 0.3492, + "step": 31750 + }, + { + "epoch": 15.04, + "learning_rate": 5.3022055493620705e-06, + "loss": 0.5529, + "step": 31760 + }, + { + "epoch": 15.04, + "learning_rate": 5.29719995532298e-06, + "loss": 0.5318, + "step": 31770 + }, + { + "epoch": 15.04, + "learning_rate": 5.292195434901873e-06, + "loss": 0.6974, + "step": 31780 + }, + { + "epoch": 15.04, + "learning_rate": 5.2871919905378896e-06, + "loss": 0.4782, + "step": 31790 + }, + { + "epoch": 15.04, + "learning_rate": 5.282189624669653e-06, + "loss": 0.6933, + "step": 31800 + }, + { + "epoch": 15.04, + "learning_rate": 5.277188339735244e-06, + "loss": 0.5515, + "step": 31810 + }, + { + "epoch": 15.04, + "learning_rate": 5.272188138172239e-06, + "loss": 0.4032, + "step": 31820 + }, + { + "epoch": 15.04, + "learning_rate": 5.267189022417672e-06, + "loss": 0.7064, + "step": 31830 + }, + { + "epoch": 15.04, + "learning_rate": 5.2621909949080485e-06, + "loss": 0.814, + "step": 31840 + }, + { + "epoch": 15.04, + "learning_rate": 5.2571940580793506e-06, + "loss": 0.5002, + "step": 31850 + }, + { + "epoch": 15.04, + "learning_rate": 5.252198214367021e-06, + "loss": 0.6971, + "step": 31860 + }, + { + "epoch": 15.04, + "learning_rate": 5.247203466205977e-06, + "loss": 0.6614, + "step": 31870 + }, + { + "epoch": 15.04, + "learning_rate": 5.242209816030597e-06, + "loss": 0.4285, + "step": 31880 + }, + { + "epoch": 15.04, + "learning_rate": 5.237217266274726e-06, + "loss": 0.4315, + "step": 31890 + }, + { + "epoch": 15.04, + "learning_rate": 5.232225819371667e-06, + "loss": 0.4179, + "step": 31900 + }, + { + "epoch": 15.04, + "learning_rate": 5.227235477754197e-06, + "loss": 0.5745, + "step": 31910 + }, + { + "epoch": 15.04, + "learning_rate": 5.222246243854544e-06, + "loss": 0.2488, + "step": 31920 + }, + { + "epoch": 15.04, + "learning_rate": 5.217258120104401e-06, + "loss": 0.5487, + "step": 31930 + }, + { + "epoch": 15.04, + "learning_rate": 5.212271108934919e-06, + "loss": 0.3712, + "step": 31940 + }, + { + "epoch": 15.04, + "learning_rate": 5.207285212776708e-06, + "loss": 0.6056, + "step": 31950 + }, + { + "epoch": 15.04, + "learning_rate": 5.20230043405983e-06, + "loss": 0.3832, + "step": 31960 + }, + { + "epoch": 15.04, + "learning_rate": 5.19731677521381e-06, + "loss": 0.4765, + "step": 31970 + }, + { + "epoch": 15.04, + "learning_rate": 5.192334238667622e-06, + "loss": 0.6145, + "step": 31980 + }, + { + "epoch": 15.04, + "learning_rate": 5.18735282684969e-06, + "loss": 0.3832, + "step": 31990 + }, + { + "epoch": 15.04, + "learning_rate": 5.182372542187895e-06, + "loss": 0.3262, + "step": 32000 + }, + { + "epoch": 15.04, + "eval_accuracy": 0.8768421052631579, + "eval_f1": 0.876842105263158, + "eval_loss": 0.7663307785987854, + "eval_runtime": 750.321, + "eval_samples_per_second": 6.331, + "eval_steps_per_second": 1.583, + "step": 32000 + }, + { + "epoch": 16.0, + "learning_rate": 5.177393387109569e-06, + "loss": 0.4934, + "step": 32010 + }, + { + "epoch": 16.0, + "learning_rate": 5.172415364041492e-06, + "loss": 0.7969, + "step": 32020 + }, + { + "epoch": 16.0, + "learning_rate": 5.167438475409888e-06, + "loss": 0.6713, + "step": 32030 + }, + { + "epoch": 16.0, + "learning_rate": 5.162462723640436e-06, + "loss": 0.4962, + "step": 32040 + }, + { + "epoch": 16.0, + "learning_rate": 5.157488111158255e-06, + "loss": 0.465, + "step": 32050 + }, + { + "epoch": 16.0, + "learning_rate": 5.152514640387912e-06, + "loss": 0.4099, + "step": 32060 + }, + { + "epoch": 16.0, + "learning_rate": 5.147542313753419e-06, + "loss": 0.228, + "step": 32070 + }, + { + "epoch": 16.0, + "learning_rate": 5.142571133678222e-06, + "loss": 0.85, + "step": 32080 + }, + { + "epoch": 16.0, + "learning_rate": 5.137601102585216e-06, + "loss": 0.5891, + "step": 32090 + }, + { + "epoch": 16.0, + "learning_rate": 5.132632222896733e-06, + "loss": 0.3639, + "step": 32100 + }, + { + "epoch": 16.0, + "learning_rate": 5.127664497034546e-06, + "loss": 0.5626, + "step": 32110 + }, + { + "epoch": 16.0, + "learning_rate": 5.122697927419861e-06, + "loss": 0.7871, + "step": 32120 + }, + { + "epoch": 16.0, + "learning_rate": 5.1177325164733296e-06, + "loss": 0.3236, + "step": 32130 + }, + { + "epoch": 16.0, + "learning_rate": 5.11276826661503e-06, + "loss": 0.6567, + "step": 32140 + }, + { + "epoch": 16.0, + "learning_rate": 5.107805180264475e-06, + "loss": 0.5179, + "step": 32150 + }, + { + "epoch": 16.0, + "learning_rate": 5.102843259840616e-06, + "loss": 0.6067, + "step": 32160 + }, + { + "epoch": 16.0, + "learning_rate": 5.097882507761832e-06, + "loss": 0.3634, + "step": 32170 + }, + { + "epoch": 16.0, + "learning_rate": 5.09292292644593e-06, + "loss": 0.6364, + "step": 32180 + }, + { + "epoch": 16.0, + "learning_rate": 5.087964518310153e-06, + "loss": 0.9535, + "step": 32190 + }, + { + "epoch": 16.0, + "learning_rate": 5.083007285771168e-06, + "loss": 0.712, + "step": 32200 + }, + { + "epoch": 16.0, + "learning_rate": 5.07805123124507e-06, + "loss": 0.3794, + "step": 32210 + }, + { + "epoch": 16.0, + "learning_rate": 5.073096357147379e-06, + "loss": 0.4944, + "step": 32220 + }, + { + "epoch": 16.0, + "learning_rate": 5.068142665893042e-06, + "loss": 1.1231, + "step": 32230 + }, + { + "epoch": 16.0, + "learning_rate": 5.063190159896428e-06, + "loss": 0.619, + "step": 32240 + }, + { + "epoch": 16.0, + "learning_rate": 5.058238841571326e-06, + "loss": 0.6341, + "step": 32250 + }, + { + "epoch": 16.01, + "learning_rate": 5.05328871333095e-06, + "loss": 0.6589, + "step": 32260 + }, + { + "epoch": 16.01, + "learning_rate": 5.0483397775879325e-06, + "loss": 0.5169, + "step": 32270 + }, + { + "epoch": 16.01, + "learning_rate": 5.043392036754324e-06, + "loss": 0.4261, + "step": 32280 + }, + { + "epoch": 16.01, + "learning_rate": 5.038445493241593e-06, + "loss": 0.4723, + "step": 32290 + }, + { + "epoch": 16.01, + "learning_rate": 5.033500149460626e-06, + "loss": 0.653, + "step": 32300 + }, + { + "epoch": 16.01, + "learning_rate": 5.028556007821722e-06, + "loss": 0.535, + "step": 32310 + }, + { + "epoch": 16.01, + "learning_rate": 5.023613070734597e-06, + "loss": 0.706, + "step": 32320 + }, + { + "epoch": 16.01, + "learning_rate": 5.01867134060838e-06, + "loss": 0.9763, + "step": 32330 + }, + { + "epoch": 16.01, + "learning_rate": 5.013730819851606e-06, + "loss": 0.6044, + "step": 32340 + }, + { + "epoch": 16.01, + "learning_rate": 5.008791510872227e-06, + "loss": 0.6842, + "step": 32350 + }, + { + "epoch": 16.01, + "learning_rate": 5.003853416077601e-06, + "loss": 0.4799, + "step": 32360 + }, + { + "epoch": 16.01, + "learning_rate": 4.998916537874498e-06, + "loss": 0.7376, + "step": 32370 + }, + { + "epoch": 16.01, + "learning_rate": 4.9939808786690885e-06, + "loss": 0.3854, + "step": 32380 + }, + { + "epoch": 16.01, + "learning_rate": 4.989046440866958e-06, + "loss": 0.8238, + "step": 32390 + }, + { + "epoch": 16.01, + "learning_rate": 4.984113226873088e-06, + "loss": 0.5112, + "step": 32400 + }, + { + "epoch": 16.01, + "learning_rate": 4.979181239091867e-06, + "loss": 0.4544, + "step": 32410 + }, + { + "epoch": 16.01, + "learning_rate": 4.97425047992709e-06, + "loss": 0.5501, + "step": 32420 + }, + { + "epoch": 16.01, + "learning_rate": 4.969320951781942e-06, + "loss": 0.4778, + "step": 32430 + }, + { + "epoch": 16.01, + "learning_rate": 4.964392657059022e-06, + "loss": 0.7256, + "step": 32440 + }, + { + "epoch": 16.01, + "learning_rate": 4.959465598160315e-06, + "loss": 0.5249, + "step": 32450 + }, + { + "epoch": 16.01, + "learning_rate": 4.9545397774872146e-06, + "loss": 0.3695, + "step": 32460 + }, + { + "epoch": 16.01, + "learning_rate": 4.9496151974405045e-06, + "loss": 0.6224, + "step": 32470 + }, + { + "epoch": 16.01, + "learning_rate": 4.944691860420363e-06, + "loss": 0.5238, + "step": 32480 + }, + { + "epoch": 16.01, + "learning_rate": 4.939769768826367e-06, + "loss": 0.5127, + "step": 32490 + }, + { + "epoch": 16.01, + "learning_rate": 4.934848925057485e-06, + "loss": 0.8212, + "step": 32500 + }, + { + "epoch": 16.01, + "learning_rate": 4.929929331512076e-06, + "loss": 0.4451, + "step": 32510 + }, + { + "epoch": 16.01, + "learning_rate": 4.925010990587886e-06, + "loss": 0.472, + "step": 32520 + }, + { + "epoch": 16.01, + "learning_rate": 4.92009390468206e-06, + "loss": 0.9875, + "step": 32530 + }, + { + "epoch": 16.01, + "learning_rate": 4.915178076191123e-06, + "loss": 0.4381, + "step": 32540 + }, + { + "epoch": 16.01, + "learning_rate": 4.9102635075109905e-06, + "loss": 0.4436, + "step": 32550 + }, + { + "epoch": 16.01, + "learning_rate": 4.905350201036965e-06, + "loss": 0.3102, + "step": 32560 + }, + { + "epoch": 16.01, + "learning_rate": 4.9004381591637295e-06, + "loss": 0.7939, + "step": 32570 + }, + { + "epoch": 16.01, + "learning_rate": 4.895527384285357e-06, + "loss": 0.8265, + "step": 32580 + }, + { + "epoch": 16.01, + "learning_rate": 4.890617878795299e-06, + "loss": 0.4009, + "step": 32590 + }, + { + "epoch": 16.01, + "learning_rate": 4.885709645086388e-06, + "loss": 0.4055, + "step": 32600 + }, + { + "epoch": 16.01, + "learning_rate": 4.880802685550836e-06, + "loss": 1.0053, + "step": 32610 + }, + { + "epoch": 16.01, + "learning_rate": 4.875897002580235e-06, + "loss": 0.2254, + "step": 32620 + }, + { + "epoch": 16.01, + "learning_rate": 4.870992598565559e-06, + "loss": 0.3148, + "step": 32630 + }, + { + "epoch": 16.01, + "learning_rate": 4.86608947589715e-06, + "loss": 0.5144, + "step": 32640 + }, + { + "epoch": 16.01, + "learning_rate": 4.861187636964736e-06, + "loss": 0.5433, + "step": 32650 + }, + { + "epoch": 16.01, + "learning_rate": 4.8562870841574115e-06, + "loss": 0.4401, + "step": 32660 + }, + { + "epoch": 16.01, + "learning_rate": 4.851387819863644e-06, + "loss": 0.6315, + "step": 32670 + }, + { + "epoch": 16.01, + "learning_rate": 4.846489846471279e-06, + "loss": 0.5919, + "step": 32680 + }, + { + "epoch": 16.01, + "learning_rate": 4.841593166367529e-06, + "loss": 1.168, + "step": 32690 + }, + { + "epoch": 16.01, + "learning_rate": 4.836697781938973e-06, + "loss": 0.5355, + "step": 32700 + }, + { + "epoch": 16.01, + "learning_rate": 4.831803695571563e-06, + "loss": 0.3735, + "step": 32710 + }, + { + "epoch": 16.01, + "learning_rate": 4.82691090965062e-06, + "loss": 0.4671, + "step": 32720 + }, + { + "epoch": 16.01, + "learning_rate": 4.822019426560827e-06, + "loss": 0.7564, + "step": 32730 + }, + { + "epoch": 16.01, + "learning_rate": 4.817129248686231e-06, + "loss": 0.3753, + "step": 32740 + }, + { + "epoch": 16.02, + "learning_rate": 4.812240378410248e-06, + "loss": 0.5225, + "step": 32750 + }, + { + "epoch": 16.02, + "learning_rate": 4.8073528181156535e-06, + "loss": 0.78, + "step": 32760 + }, + { + "epoch": 16.02, + "learning_rate": 4.802466570184584e-06, + "loss": 0.6062, + "step": 32770 + }, + { + "epoch": 16.02, + "learning_rate": 4.797581636998541e-06, + "loss": 0.6152, + "step": 32780 + }, + { + "epoch": 16.02, + "learning_rate": 4.792698020938377e-06, + "loss": 0.2874, + "step": 32790 + }, + { + "epoch": 16.02, + "learning_rate": 4.787815724384309e-06, + "loss": 0.5955, + "step": 32800 + }, + { + "epoch": 16.02, + "learning_rate": 4.782934749715908e-06, + "loss": 0.654, + "step": 32810 + }, + { + "epoch": 16.02, + "learning_rate": 4.778055099312103e-06, + "loss": 0.3857, + "step": 32820 + }, + { + "epoch": 16.02, + "learning_rate": 4.7731767755511775e-06, + "loss": 0.5285, + "step": 32830 + }, + { + "epoch": 16.02, + "learning_rate": 4.768299780810763e-06, + "loss": 0.7611, + "step": 32840 + }, + { + "epoch": 16.02, + "learning_rate": 4.763424117467852e-06, + "loss": 0.759, + "step": 32850 + }, + { + "epoch": 16.02, + "learning_rate": 4.758549787898785e-06, + "loss": 0.6901, + "step": 32860 + }, + { + "epoch": 16.02, + "learning_rate": 4.753676794479244e-06, + "loss": 0.8388, + "step": 32870 + }, + { + "epoch": 16.02, + "learning_rate": 4.74880513958427e-06, + "loss": 0.3117, + "step": 32880 + }, + { + "epoch": 16.02, + "learning_rate": 4.743934825588252e-06, + "loss": 0.4733, + "step": 32890 + }, + { + "epoch": 16.02, + "learning_rate": 4.739065854864916e-06, + "loss": 0.297, + "step": 32900 + }, + { + "epoch": 16.02, + "learning_rate": 4.734198229787343e-06, + "loss": 0.4909, + "step": 32910 + }, + { + "epoch": 16.02, + "learning_rate": 4.729331952727954e-06, + "loss": 0.4957, + "step": 32920 + }, + { + "epoch": 16.02, + "learning_rate": 4.724467026058512e-06, + "loss": 0.3845, + "step": 32930 + }, + { + "epoch": 16.02, + "learning_rate": 4.719603452150125e-06, + "loss": 0.3681, + "step": 32940 + }, + { + "epoch": 16.02, + "learning_rate": 4.714741233373241e-06, + "loss": 0.5048, + "step": 32950 + }, + { + "epoch": 16.02, + "learning_rate": 4.709880372097642e-06, + "loss": 0.5012, + "step": 32960 + }, + { + "epoch": 16.02, + "learning_rate": 4.705020870692452e-06, + "loss": 0.5737, + "step": 32970 + }, + { + "epoch": 16.02, + "learning_rate": 4.7001627315261395e-06, + "loss": 0.7233, + "step": 32980 + }, + { + "epoch": 16.02, + "learning_rate": 4.695305956966498e-06, + "loss": 0.5549, + "step": 32990 + }, + { + "epoch": 16.02, + "learning_rate": 4.6904505493806595e-06, + "loss": 0.442, + "step": 33000 + }, + { + "epoch": 16.02, + "learning_rate": 4.685596511135094e-06, + "loss": 0.7247, + "step": 33010 + }, + { + "epoch": 16.02, + "learning_rate": 4.6807438445956e-06, + "loss": 0.6389, + "step": 33020 + }, + { + "epoch": 16.02, + "learning_rate": 4.675892552127305e-06, + "loss": 0.4903, + "step": 33030 + }, + { + "epoch": 16.02, + "learning_rate": 4.671042636094677e-06, + "loss": 0.6385, + "step": 33040 + }, + { + "epoch": 16.02, + "learning_rate": 4.666194098861498e-06, + "loss": 0.6107, + "step": 33050 + }, + { + "epoch": 16.02, + "learning_rate": 4.661346942790891e-06, + "loss": 0.8643, + "step": 33060 + }, + { + "epoch": 16.02, + "learning_rate": 4.656501170245297e-06, + "loss": 0.2314, + "step": 33070 + }, + { + "epoch": 16.02, + "learning_rate": 4.651656783586493e-06, + "loss": 0.2656, + "step": 33080 + }, + { + "epoch": 16.02, + "learning_rate": 4.646813785175569e-06, + "loss": 0.5808, + "step": 33090 + }, + { + "epoch": 16.02, + "learning_rate": 4.641972177372944e-06, + "loss": 0.3246, + "step": 33100 + }, + { + "epoch": 16.02, + "learning_rate": 4.637131962538362e-06, + "loss": 0.4211, + "step": 33110 + }, + { + "epoch": 16.02, + "learning_rate": 4.632293143030881e-06, + "loss": 0.599, + "step": 33120 + }, + { + "epoch": 16.02, + "learning_rate": 4.62745572120889e-06, + "loss": 0.5716, + "step": 33130 + }, + { + "epoch": 16.02, + "learning_rate": 4.6226196994300784e-06, + "loss": 0.229, + "step": 33140 + }, + { + "epoch": 16.02, + "learning_rate": 4.6177850800514735e-06, + "loss": 0.5619, + "step": 33150 + }, + { + "epoch": 16.02, + "learning_rate": 4.612951865429405e-06, + "loss": 0.3864, + "step": 33160 + }, + { + "epoch": 16.02, + "learning_rate": 4.6081200579195265e-06, + "loss": 0.6615, + "step": 33170 + }, + { + "epoch": 16.02, + "learning_rate": 4.6032896598768006e-06, + "loss": 0.6474, + "step": 33180 + }, + { + "epoch": 16.02, + "learning_rate": 4.5984606736555045e-06, + "loss": 0.5483, + "step": 33190 + }, + { + "epoch": 16.02, + "learning_rate": 4.5936331016092285e-06, + "loss": 0.3562, + "step": 33200 + }, + { + "epoch": 16.02, + "learning_rate": 4.58880694609087e-06, + "loss": 0.5079, + "step": 33210 + }, + { + "epoch": 16.02, + "learning_rate": 4.583982209452645e-06, + "loss": 0.3317, + "step": 33220 + }, + { + "epoch": 16.02, + "learning_rate": 4.579158894046062e-06, + "loss": 0.3543, + "step": 33230 + }, + { + "epoch": 16.02, + "learning_rate": 4.574337002221953e-06, + "loss": 0.6051, + "step": 33240 + }, + { + "epoch": 16.02, + "learning_rate": 4.569516536330448e-06, + "loss": 0.5412, + "step": 33250 + }, + { + "epoch": 16.03, + "learning_rate": 4.564697498720982e-06, + "loss": 0.8271, + "step": 33260 + }, + { + "epoch": 16.03, + "learning_rate": 4.5598798917422975e-06, + "loss": 0.5189, + "step": 33270 + }, + { + "epoch": 16.03, + "learning_rate": 4.555063717742438e-06, + "loss": 0.4982, + "step": 33280 + }, + { + "epoch": 16.03, + "learning_rate": 4.550248979068744e-06, + "loss": 0.3839, + "step": 33290 + }, + { + "epoch": 16.03, + "learning_rate": 4.5454356780678666e-06, + "loss": 0.7477, + "step": 33300 + }, + { + "epoch": 16.03, + "learning_rate": 4.540623817085747e-06, + "loss": 0.3904, + "step": 33310 + }, + { + "epoch": 16.03, + "learning_rate": 4.535813398467629e-06, + "loss": 0.4538, + "step": 33320 + }, + { + "epoch": 16.03, + "learning_rate": 4.531004424558048e-06, + "loss": 0.3612, + "step": 33330 + }, + { + "epoch": 16.03, + "learning_rate": 4.526196897700844e-06, + "loss": 0.4546, + "step": 33340 + }, + { + "epoch": 16.03, + "learning_rate": 4.521390820239146e-06, + "loss": 0.3839, + "step": 33350 + }, + { + "epoch": 16.03, + "learning_rate": 4.516586194515378e-06, + "loss": 0.8155, + "step": 33360 + }, + { + "epoch": 16.03, + "learning_rate": 4.511783022871256e-06, + "loss": 0.289, + "step": 33370 + }, + { + "epoch": 16.03, + "learning_rate": 4.506981307647786e-06, + "loss": 1.0293, + "step": 33380 + }, + { + "epoch": 16.03, + "learning_rate": 4.502181051185268e-06, + "loss": 0.4668, + "step": 33390 + }, + { + "epoch": 16.03, + "learning_rate": 4.497382255823289e-06, + "loss": 0.4752, + "step": 33400 + }, + { + "epoch": 16.03, + "learning_rate": 4.492584923900722e-06, + "loss": 0.7419, + "step": 33410 + }, + { + "epoch": 16.03, + "learning_rate": 4.487789057755726e-06, + "loss": 0.5496, + "step": 33420 + }, + { + "epoch": 16.03, + "learning_rate": 4.482994659725753e-06, + "loss": 0.3482, + "step": 33430 + }, + { + "epoch": 16.03, + "learning_rate": 4.478201732147531e-06, + "loss": 0.4638, + "step": 33440 + }, + { + "epoch": 16.03, + "learning_rate": 4.4734102773570745e-06, + "loss": 0.6718, + "step": 33450 + }, + { + "epoch": 16.03, + "learning_rate": 4.468620297689682e-06, + "loss": 0.2801, + "step": 33460 + }, + { + "epoch": 16.03, + "learning_rate": 4.463831795479932e-06, + "loss": 0.5753, + "step": 33470 + }, + { + "epoch": 16.03, + "learning_rate": 4.459044773061682e-06, + "loss": 0.6162, + "step": 33480 + }, + { + "epoch": 16.03, + "learning_rate": 4.454259232768066e-06, + "loss": 0.5012, + "step": 33490 + }, + { + "epoch": 16.03, + "learning_rate": 4.4494751769315e-06, + "loss": 0.4228, + "step": 33500 + }, + { + "epoch": 16.03, + "learning_rate": 4.444692607883674e-06, + "loss": 0.5142, + "step": 33510 + }, + { + "epoch": 16.03, + "learning_rate": 4.439911527955554e-06, + "loss": 0.442, + "step": 33520 + }, + { + "epoch": 16.03, + "learning_rate": 4.4351319394773836e-06, + "loss": 0.4225, + "step": 33530 + }, + { + "epoch": 16.03, + "learning_rate": 4.430353844778671e-06, + "loss": 0.5155, + "step": 33540 + }, + { + "epoch": 16.03, + "learning_rate": 4.425577246188204e-06, + "loss": 0.8971, + "step": 33550 + }, + { + "epoch": 16.03, + "learning_rate": 4.420802146034041e-06, + "loss": 0.8625, + "step": 33560 + }, + { + "epoch": 16.03, + "learning_rate": 4.416028546643505e-06, + "loss": 0.619, + "step": 33570 + }, + { + "epoch": 16.03, + "learning_rate": 4.4112564503431904e-06, + "loss": 0.756, + "step": 33580 + }, + { + "epoch": 16.03, + "learning_rate": 4.406485859458957e-06, + "loss": 0.3665, + "step": 33590 + }, + { + "epoch": 16.03, + "learning_rate": 4.401716776315938e-06, + "loss": 0.4867, + "step": 33600 + }, + { + "epoch": 16.03, + "learning_rate": 4.396949203238524e-06, + "loss": 0.8304, + "step": 33610 + }, + { + "epoch": 16.03, + "learning_rate": 4.392183142550369e-06, + "loss": 0.6656, + "step": 33620 + }, + { + "epoch": 16.03, + "learning_rate": 4.387418596574399e-06, + "loss": 0.5113, + "step": 33630 + }, + { + "epoch": 16.03, + "learning_rate": 4.382655567632791e-06, + "loss": 0.4387, + "step": 33640 + }, + { + "epoch": 16.03, + "learning_rate": 4.377894058046992e-06, + "loss": 0.4837, + "step": 33650 + }, + { + "epoch": 16.03, + "learning_rate": 4.3731340701377e-06, + "loss": 0.6714, + "step": 33660 + }, + { + "epoch": 16.03, + "learning_rate": 4.368375606224877e-06, + "loss": 0.7669, + "step": 33670 + }, + { + "epoch": 16.03, + "learning_rate": 4.36361866862774e-06, + "loss": 0.2445, + "step": 33680 + }, + { + "epoch": 16.03, + "learning_rate": 4.358863259664763e-06, + "loss": 0.4635, + "step": 33690 + }, + { + "epoch": 16.03, + "learning_rate": 4.354109381653672e-06, + "loss": 0.4453, + "step": 33700 + }, + { + "epoch": 16.03, + "learning_rate": 4.349357036911453e-06, + "loss": 0.5797, + "step": 33710 + }, + { + "epoch": 16.03, + "learning_rate": 4.344606227754341e-06, + "loss": 0.4042, + "step": 33720 + }, + { + "epoch": 16.03, + "learning_rate": 4.339856956497821e-06, + "loss": 0.4951, + "step": 33730 + }, + { + "epoch": 16.03, + "learning_rate": 4.33510922545663e-06, + "loss": 0.7429, + "step": 33740 + }, + { + "epoch": 16.04, + "learning_rate": 4.330363036944755e-06, + "loss": 0.6737, + "step": 33750 + }, + { + "epoch": 16.04, + "learning_rate": 4.325618393275432e-06, + "loss": 0.5931, + "step": 33760 + }, + { + "epoch": 16.04, + "learning_rate": 4.320875296761139e-06, + "loss": 0.6849, + "step": 33770 + }, + { + "epoch": 16.04, + "learning_rate": 4.316133749713608e-06, + "loss": 0.3549, + "step": 33780 + }, + { + "epoch": 16.04, + "learning_rate": 4.311393754443805e-06, + "loss": 0.6275, + "step": 33790 + }, + { + "epoch": 16.04, + "learning_rate": 4.306655313261955e-06, + "loss": 0.4788, + "step": 33800 + }, + { + "epoch": 16.04, + "learning_rate": 4.301918428477511e-06, + "loss": 0.5819, + "step": 33810 + }, + { + "epoch": 16.04, + "learning_rate": 4.2971831023991745e-06, + "loss": 0.4174, + "step": 33820 + }, + { + "epoch": 16.04, + "learning_rate": 4.292449337334886e-06, + "loss": 0.6669, + "step": 33830 + }, + { + "epoch": 16.04, + "learning_rate": 4.287717135591823e-06, + "loss": 0.4027, + "step": 33840 + }, + { + "epoch": 16.04, + "learning_rate": 4.2829864994764075e-06, + "loss": 0.6037, + "step": 33850 + }, + { + "epoch": 16.04, + "learning_rate": 4.278257431294289e-06, + "loss": 0.4935, + "step": 33860 + }, + { + "epoch": 16.04, + "learning_rate": 4.2735299333503615e-06, + "loss": 0.2093, + "step": 33870 + }, + { + "epoch": 16.04, + "learning_rate": 4.268804007948749e-06, + "loss": 0.4776, + "step": 33880 + }, + { + "epoch": 16.04, + "learning_rate": 4.264079657392807e-06, + "loss": 0.4668, + "step": 33890 + }, + { + "epoch": 16.04, + "learning_rate": 4.259356883985131e-06, + "loss": 0.8342, + "step": 33900 + }, + { + "epoch": 16.04, + "learning_rate": 4.254635690027544e-06, + "loss": 0.569, + "step": 33910 + }, + { + "epoch": 16.04, + "learning_rate": 4.249916077821096e-06, + "loss": 0.3638, + "step": 33920 + }, + { + "epoch": 16.04, + "learning_rate": 4.245198049666069e-06, + "loss": 0.832, + "step": 33930 + }, + { + "epoch": 16.04, + "learning_rate": 4.240481607861973e-06, + "loss": 0.8977, + "step": 33940 + }, + { + "epoch": 16.04, + "learning_rate": 4.235766754707545e-06, + "loss": 0.5267, + "step": 33950 + }, + { + "epoch": 16.04, + "learning_rate": 4.231053492500745e-06, + "loss": 0.4278, + "step": 33960 + }, + { + "epoch": 16.04, + "learning_rate": 4.2263418235387615e-06, + "loss": 0.5325, + "step": 33970 + }, + { + "epoch": 16.04, + "learning_rate": 4.221631750118005e-06, + "loss": 0.6714, + "step": 33980 + }, + { + "epoch": 16.04, + "learning_rate": 4.216923274534104e-06, + "loss": 1.2647, + "step": 33990 + }, + { + "epoch": 16.04, + "learning_rate": 4.212216399081919e-06, + "loss": 0.4387, + "step": 34000 + }, + { + "epoch": 16.04, + "eval_accuracy": 0.88, + "eval_f1": 0.88, + "eval_loss": 0.7549371719360352, + "eval_runtime": 770.409, + "eval_samples_per_second": 6.166, + "eval_steps_per_second": 1.542, + "step": 34000 + }, + { + "epoch": 17.0, + "learning_rate": 4.2075111260555195e-06, + "loss": 0.5721, + "step": 34010 + }, + { + "epoch": 17.0, + "learning_rate": 4.202807457748205e-06, + "loss": 0.4209, + "step": 34020 + }, + { + "epoch": 17.0, + "learning_rate": 4.1981053964524725e-06, + "loss": 0.2641, + "step": 34030 + }, + { + "epoch": 17.0, + "learning_rate": 4.193404944460062e-06, + "loss": 0.4175, + "step": 34040 + }, + { + "epoch": 17.0, + "learning_rate": 4.188706104061912e-06, + "loss": 0.3068, + "step": 34050 + }, + { + "epoch": 17.0, + "learning_rate": 4.18400887754818e-06, + "loss": 0.5447, + "step": 34060 + }, + { + "epoch": 17.0, + "learning_rate": 4.17931326720824e-06, + "loss": 0.4912, + "step": 34070 + }, + { + "epoch": 17.0, + "learning_rate": 4.1746192753306685e-06, + "loss": 0.3235, + "step": 34080 + }, + { + "epoch": 17.0, + "learning_rate": 4.16992690420327e-06, + "loss": 0.695, + "step": 34090 + }, + { + "epoch": 17.0, + "learning_rate": 4.165236156113046e-06, + "loss": 0.7171, + "step": 34100 + }, + { + "epoch": 17.0, + "learning_rate": 4.160547033346206e-06, + "loss": 0.2027, + "step": 34110 + }, + { + "epoch": 17.0, + "learning_rate": 4.15585953818817e-06, + "loss": 0.4179, + "step": 34120 + }, + { + "epoch": 17.0, + "learning_rate": 4.151173672923573e-06, + "loss": 0.545, + "step": 34130 + }, + { + "epoch": 17.0, + "learning_rate": 4.146489439836246e-06, + "loss": 0.4815, + "step": 34140 + }, + { + "epoch": 17.0, + "learning_rate": 4.141806841209228e-06, + "loss": 0.2793, + "step": 34150 + }, + { + "epoch": 17.0, + "learning_rate": 4.137125879324759e-06, + "loss": 0.5699, + "step": 34160 + }, + { + "epoch": 17.0, + "learning_rate": 4.1324465564642845e-06, + "loss": 0.7039, + "step": 34170 + }, + { + "epoch": 17.0, + "learning_rate": 4.127768874908445e-06, + "loss": 0.4332, + "step": 34180 + }, + { + "epoch": 17.0, + "learning_rate": 4.123092836937095e-06, + "loss": 0.3581, + "step": 34190 + }, + { + "epoch": 17.0, + "learning_rate": 4.118418444829271e-06, + "loss": 0.6377, + "step": 34200 + }, + { + "epoch": 17.0, + "learning_rate": 4.1137457008632175e-06, + "loss": 0.8027, + "step": 34210 + }, + { + "epoch": 17.0, + "learning_rate": 4.1090746073163686e-06, + "loss": 0.3202, + "step": 34220 + }, + { + "epoch": 17.0, + "learning_rate": 4.104405166465368e-06, + "loss": 0.325, + "step": 34230 + }, + { + "epoch": 17.0, + "learning_rate": 4.099737380586036e-06, + "loss": 0.3973, + "step": 34240 + }, + { + "epoch": 17.0, + "learning_rate": 4.095071251953399e-06, + "loss": 0.6119, + "step": 34250 + }, + { + "epoch": 17.01, + "learning_rate": 4.090406782841671e-06, + "loss": 0.3482, + "step": 34260 + }, + { + "epoch": 17.01, + "learning_rate": 4.085743975524253e-06, + "loss": 0.2866, + "step": 34270 + }, + { + "epoch": 17.01, + "learning_rate": 4.081082832273752e-06, + "loss": 0.593, + "step": 34280 + }, + { + "epoch": 17.01, + "learning_rate": 4.07642335536194e-06, + "loss": 0.3209, + "step": 34290 + }, + { + "epoch": 17.01, + "learning_rate": 4.071765547059796e-06, + "loss": 0.4869, + "step": 34300 + }, + { + "epoch": 17.01, + "learning_rate": 4.067109409637476e-06, + "loss": 0.5368, + "step": 34310 + }, + { + "epoch": 17.01, + "learning_rate": 4.062454945364326e-06, + "loss": 0.5865, + "step": 34320 + }, + { + "epoch": 17.01, + "learning_rate": 4.057802156508877e-06, + "loss": 0.9716, + "step": 34330 + }, + { + "epoch": 17.01, + "learning_rate": 4.053151045338842e-06, + "loss": 0.5427, + "step": 34340 + }, + { + "epoch": 17.01, + "learning_rate": 4.0485016141211134e-06, + "loss": 0.728, + "step": 34350 + }, + { + "epoch": 17.01, + "learning_rate": 4.04385386512177e-06, + "loss": 0.4432, + "step": 34360 + }, + { + "epoch": 17.01, + "learning_rate": 4.039207800606065e-06, + "loss": 0.6848, + "step": 34370 + }, + { + "epoch": 17.01, + "learning_rate": 4.034563422838437e-06, + "loss": 0.2282, + "step": 34380 + }, + { + "epoch": 17.01, + "learning_rate": 4.029920734082499e-06, + "loss": 0.2571, + "step": 34390 + }, + { + "epoch": 17.01, + "learning_rate": 4.025279736601039e-06, + "loss": 0.4565, + "step": 34400 + }, + { + "epoch": 17.01, + "learning_rate": 4.0206404326560205e-06, + "loss": 0.3911, + "step": 34410 + }, + { + "epoch": 17.01, + "learning_rate": 4.016002824508593e-06, + "loss": 0.4225, + "step": 34420 + }, + { + "epoch": 17.01, + "learning_rate": 4.011366914419062e-06, + "loss": 0.1458, + "step": 34430 + }, + { + "epoch": 17.01, + "learning_rate": 4.006732704646918e-06, + "loss": 0.2513, + "step": 34440 + }, + { + "epoch": 17.01, + "learning_rate": 4.002100197450817e-06, + "loss": 0.5512, + "step": 34450 + }, + { + "epoch": 17.01, + "learning_rate": 3.997469395088586e-06, + "loss": 0.4876, + "step": 34460 + }, + { + "epoch": 17.01, + "learning_rate": 3.992840299817223e-06, + "loss": 0.9696, + "step": 34470 + }, + { + "epoch": 17.01, + "learning_rate": 3.988212913892892e-06, + "loss": 0.4729, + "step": 34480 + }, + { + "epoch": 17.01, + "learning_rate": 3.983587239570926e-06, + "loss": 0.5691, + "step": 34490 + }, + { + "epoch": 17.01, + "learning_rate": 3.978963279105821e-06, + "loss": 0.6559, + "step": 34500 + }, + { + "epoch": 17.01, + "learning_rate": 3.974341034751237e-06, + "loss": 0.5865, + "step": 34510 + }, + { + "epoch": 17.01, + "learning_rate": 3.969720508760006e-06, + "loss": 0.5114, + "step": 34520 + }, + { + "epoch": 17.01, + "learning_rate": 3.965101703384111e-06, + "loss": 0.8371, + "step": 34530 + }, + { + "epoch": 17.01, + "learning_rate": 3.960484620874706e-06, + "loss": 0.7119, + "step": 34540 + }, + { + "epoch": 17.01, + "learning_rate": 3.955869263482096e-06, + "loss": 0.6099, + "step": 34550 + }, + { + "epoch": 17.01, + "learning_rate": 3.951255633455752e-06, + "loss": 0.9088, + "step": 34560 + }, + { + "epoch": 17.01, + "learning_rate": 3.946643733044303e-06, + "loss": 0.6616, + "step": 34570 + }, + { + "epoch": 17.01, + "learning_rate": 3.942033564495532e-06, + "loss": 0.6684, + "step": 34580 + }, + { + "epoch": 17.01, + "learning_rate": 3.937425130056378e-06, + "loss": 0.8524, + "step": 34590 + }, + { + "epoch": 17.01, + "learning_rate": 3.9328184319729346e-06, + "loss": 0.3119, + "step": 34600 + }, + { + "epoch": 17.01, + "learning_rate": 3.9282134724904555e-06, + "loss": 0.8073, + "step": 34610 + }, + { + "epoch": 17.01, + "learning_rate": 3.92361025385334e-06, + "loss": 0.4739, + "step": 34620 + }, + { + "epoch": 17.01, + "learning_rate": 3.919008778305139e-06, + "loss": 0.7186, + "step": 34630 + }, + { + "epoch": 17.01, + "learning_rate": 3.914409048088559e-06, + "loss": 0.3444, + "step": 34640 + }, + { + "epoch": 17.01, + "learning_rate": 3.90981106544545e-06, + "loss": 0.3746, + "step": 34650 + }, + { + "epoch": 17.01, + "learning_rate": 3.905214832616813e-06, + "loss": 0.3366, + "step": 34660 + }, + { + "epoch": 17.01, + "learning_rate": 3.900620351842797e-06, + "loss": 0.5922, + "step": 34670 + }, + { + "epoch": 17.01, + "learning_rate": 3.896027625362695e-06, + "loss": 0.5424, + "step": 34680 + }, + { + "epoch": 17.01, + "learning_rate": 3.891436655414948e-06, + "loss": 0.4661, + "step": 34690 + }, + { + "epoch": 17.01, + "learning_rate": 3.886847444237134e-06, + "loss": 0.5704, + "step": 34700 + }, + { + "epoch": 17.01, + "learning_rate": 3.882259994065986e-06, + "loss": 0.3393, + "step": 34710 + }, + { + "epoch": 17.01, + "learning_rate": 3.877674307137371e-06, + "loss": 0.6378, + "step": 34720 + }, + { + "epoch": 17.01, + "learning_rate": 3.873090385686292e-06, + "loss": 0.2401, + "step": 34730 + }, + { + "epoch": 17.01, + "learning_rate": 3.868508231946894e-06, + "loss": 0.6446, + "step": 34740 + }, + { + "epoch": 17.02, + "learning_rate": 3.8639278481524725e-06, + "loss": 0.7911, + "step": 34750 + }, + { + "epoch": 17.02, + "learning_rate": 3.859349236535446e-06, + "loss": 0.4049, + "step": 34760 + }, + { + "epoch": 17.02, + "learning_rate": 3.854772399327374e-06, + "loss": 0.5855, + "step": 34770 + }, + { + "epoch": 17.02, + "learning_rate": 3.850197338758952e-06, + "loss": 0.5569, + "step": 34780 + }, + { + "epoch": 17.02, + "learning_rate": 3.845624057060006e-06, + "loss": 0.5836, + "step": 34790 + }, + { + "epoch": 17.02, + "learning_rate": 3.841052556459503e-06, + "loss": 0.5529, + "step": 34800 + }, + { + "epoch": 17.02, + "learning_rate": 3.83648283918554e-06, + "loss": 0.413, + "step": 34810 + }, + { + "epoch": 17.02, + "learning_rate": 3.831914907465331e-06, + "loss": 0.4782, + "step": 34820 + }, + { + "epoch": 17.02, + "learning_rate": 3.8273487635252355e-06, + "loss": 0.2445, + "step": 34830 + }, + { + "epoch": 17.02, + "learning_rate": 3.822784409590735e-06, + "loss": 0.7761, + "step": 34840 + }, + { + "epoch": 17.02, + "learning_rate": 3.818221847886444e-06, + "loss": 0.3729, + "step": 34850 + }, + { + "epoch": 17.02, + "learning_rate": 3.813661080636098e-06, + "loss": 0.6889, + "step": 34860 + }, + { + "epoch": 17.02, + "learning_rate": 3.809102110062559e-06, + "loss": 0.6488, + "step": 34870 + }, + { + "epoch": 17.02, + "learning_rate": 3.8045449383878144e-06, + "loss": 0.5869, + "step": 34880 + }, + { + "epoch": 17.02, + "learning_rate": 3.799989567832971e-06, + "loss": 0.6171, + "step": 34890 + }, + { + "epoch": 17.02, + "learning_rate": 3.795436000618269e-06, + "loss": 0.7484, + "step": 34900 + }, + { + "epoch": 17.02, + "learning_rate": 3.7908842389630542e-06, + "loss": 0.7111, + "step": 34910 + }, + { + "epoch": 17.02, + "learning_rate": 3.7863342850858034e-06, + "loss": 0.4369, + "step": 34920 + }, + { + "epoch": 17.02, + "learning_rate": 3.7817861412041044e-06, + "loss": 0.3915, + "step": 34930 + }, + { + "epoch": 17.02, + "learning_rate": 3.7772398095346738e-06, + "loss": 0.3937, + "step": 34940 + }, + { + "epoch": 17.02, + "learning_rate": 3.7726952922933353e-06, + "loss": 0.597, + "step": 34950 + }, + { + "epoch": 17.02, + "learning_rate": 3.768152591695031e-06, + "loss": 0.6374, + "step": 34960 + }, + { + "epoch": 17.02, + "learning_rate": 3.763611709953819e-06, + "loss": 0.5747, + "step": 34970 + }, + { + "epoch": 17.02, + "learning_rate": 3.7590726492828704e-06, + "loss": 0.413, + "step": 34980 + }, + { + "epoch": 17.02, + "learning_rate": 3.7545354118944664e-06, + "loss": 0.7505, + "step": 34990 + }, + { + "epoch": 17.02, + "learning_rate": 3.750000000000002e-06, + "loss": 0.7681, + "step": 35000 + }, + { + "epoch": 17.02, + "learning_rate": 3.7454664158099814e-06, + "loss": 0.7081, + "step": 35010 + }, + { + "epoch": 17.02, + "learning_rate": 3.7409346615340203e-06, + "loss": 0.3066, + "step": 35020 + }, + { + "epoch": 17.02, + "learning_rate": 3.736404739380836e-06, + "loss": 0.7753, + "step": 35030 + }, + { + "epoch": 17.02, + "learning_rate": 3.731876651558265e-06, + "loss": 0.5647, + "step": 35040 + }, + { + "epoch": 17.02, + "learning_rate": 3.727350400273237e-06, + "loss": 0.4773, + "step": 35050 + }, + { + "epoch": 17.02, + "learning_rate": 3.7228259877317942e-06, + "loss": 0.5379, + "step": 35060 + }, + { + "epoch": 17.02, + "learning_rate": 3.7183034161390787e-06, + "loss": 0.2418, + "step": 35070 + }, + { + "epoch": 17.02, + "learning_rate": 3.7137826876993383e-06, + "loss": 0.4506, + "step": 35080 + }, + { + "epoch": 17.02, + "learning_rate": 3.7092638046159198e-06, + "loss": 0.6835, + "step": 35090 + }, + { + "epoch": 17.02, + "learning_rate": 3.704746769091273e-06, + "loss": 0.2637, + "step": 35100 + }, + { + "epoch": 17.02, + "learning_rate": 3.7002315833269455e-06, + "loss": 0.3303, + "step": 35110 + }, + { + "epoch": 17.02, + "learning_rate": 3.695718249523581e-06, + "loss": 0.7317, + "step": 35120 + }, + { + "epoch": 17.02, + "learning_rate": 3.6912067698809306e-06, + "loss": 0.4164, + "step": 35130 + }, + { + "epoch": 17.02, + "learning_rate": 3.6866971465978297e-06, + "loss": 0.5373, + "step": 35140 + }, + { + "epoch": 17.02, + "learning_rate": 3.6821893818722152e-06, + "loss": 0.6486, + "step": 35150 + }, + { + "epoch": 17.02, + "learning_rate": 3.677683477901117e-06, + "loss": 0.8014, + "step": 35160 + }, + { + "epoch": 17.02, + "learning_rate": 3.673179436880658e-06, + "loss": 0.5934, + "step": 35170 + }, + { + "epoch": 17.02, + "learning_rate": 3.6686772610060517e-06, + "loss": 0.8094, + "step": 35180 + }, + { + "epoch": 17.02, + "learning_rate": 3.6641769524716058e-06, + "loss": 0.3018, + "step": 35190 + }, + { + "epoch": 17.02, + "learning_rate": 3.6596785134707146e-06, + "loss": 0.7329, + "step": 35200 + }, + { + "epoch": 17.02, + "learning_rate": 3.6551819461958634e-06, + "loss": 0.3554, + "step": 35210 + }, + { + "epoch": 17.02, + "learning_rate": 3.650687252838622e-06, + "loss": 0.4408, + "step": 35220 + }, + { + "epoch": 17.02, + "learning_rate": 3.6461944355896537e-06, + "loss": 0.457, + "step": 35230 + }, + { + "epoch": 17.02, + "learning_rate": 3.641703496638701e-06, + "loss": 0.4134, + "step": 35240 + }, + { + "epoch": 17.02, + "learning_rate": 3.637214438174593e-06, + "loss": 0.5221, + "step": 35250 + }, + { + "epoch": 17.03, + "learning_rate": 3.632727262385243e-06, + "loss": 0.4694, + "step": 35260 + }, + { + "epoch": 17.03, + "learning_rate": 3.6282419714576444e-06, + "loss": 0.5371, + "step": 35270 + }, + { + "epoch": 17.03, + "learning_rate": 3.6237585675778735e-06, + "loss": 0.3918, + "step": 35280 + }, + { + "epoch": 17.03, + "learning_rate": 3.619277052931089e-06, + "loss": 0.6303, + "step": 35290 + }, + { + "epoch": 17.03, + "learning_rate": 3.6147974297015245e-06, + "loss": 0.7591, + "step": 35300 + }, + { + "epoch": 17.03, + "learning_rate": 3.6103197000724957e-06, + "loss": 0.6012, + "step": 35310 + }, + { + "epoch": 17.03, + "learning_rate": 3.6058438662263894e-06, + "loss": 0.6378, + "step": 35320 + }, + { + "epoch": 17.03, + "learning_rate": 3.6013699303446784e-06, + "loss": 0.5929, + "step": 35330 + }, + { + "epoch": 17.03, + "learning_rate": 3.596897894607905e-06, + "loss": 0.4922, + "step": 35340 + }, + { + "epoch": 17.03, + "learning_rate": 3.5924277611956798e-06, + "loss": 0.5399, + "step": 35350 + }, + { + "epoch": 17.03, + "learning_rate": 3.587959532286692e-06, + "loss": 0.8259, + "step": 35360 + }, + { + "epoch": 17.03, + "learning_rate": 3.583493210058706e-06, + "loss": 0.7079, + "step": 35370 + }, + { + "epoch": 17.03, + "learning_rate": 3.579028796688551e-06, + "loss": 0.6888, + "step": 35380 + }, + { + "epoch": 17.03, + "learning_rate": 3.574566294352129e-06, + "loss": 0.8479, + "step": 35390 + }, + { + "epoch": 17.03, + "learning_rate": 3.5701057052244076e-06, + "loss": 0.6815, + "step": 35400 + }, + { + "epoch": 17.03, + "learning_rate": 3.565647031479422e-06, + "loss": 0.725, + "step": 35410 + }, + { + "epoch": 17.03, + "learning_rate": 3.5611902752902814e-06, + "loss": 1.1404, + "step": 35420 + }, + { + "epoch": 17.03, + "learning_rate": 3.556735438829154e-06, + "loss": 0.3504, + "step": 35430 + }, + { + "epoch": 17.03, + "learning_rate": 3.5522825242672673e-06, + "loss": 0.3773, + "step": 35440 + }, + { + "epoch": 17.03, + "learning_rate": 3.5478315337749183e-06, + "loss": 1.0465, + "step": 35450 + }, + { + "epoch": 17.03, + "learning_rate": 3.543382469521471e-06, + "loss": 0.6458, + "step": 35460 + }, + { + "epoch": 17.03, + "learning_rate": 3.5389353336753426e-06, + "loss": 0.4453, + "step": 35470 + }, + { + "epoch": 17.03, + "learning_rate": 3.5344901284040122e-06, + "loss": 0.2837, + "step": 35480 + }, + { + "epoch": 17.03, + "learning_rate": 3.530046855874021e-06, + "loss": 0.618, + "step": 35490 + }, + { + "epoch": 17.03, + "learning_rate": 3.525605518250964e-06, + "loss": 0.4564, + "step": 35500 + }, + { + "epoch": 17.03, + "learning_rate": 3.521166117699493e-06, + "loss": 0.8331, + "step": 35510 + }, + { + "epoch": 17.03, + "learning_rate": 3.5167286563833268e-06, + "loss": 0.4391, + "step": 35520 + }, + { + "epoch": 17.03, + "learning_rate": 3.512293136465221e-06, + "loss": 0.4842, + "step": 35530 + }, + { + "epoch": 17.03, + "learning_rate": 3.507859560106998e-06, + "loss": 0.7373, + "step": 35540 + }, + { + "epoch": 17.03, + "learning_rate": 3.5034279294695245e-06, + "loss": 0.3786, + "step": 35550 + }, + { + "epoch": 17.03, + "learning_rate": 3.498998246712731e-06, + "loss": 0.322, + "step": 35560 + }, + { + "epoch": 17.03, + "learning_rate": 3.4945705139955865e-06, + "loss": 0.4963, + "step": 35570 + }, + { + "epoch": 17.03, + "learning_rate": 3.490144733476116e-06, + "loss": 0.6952, + "step": 35580 + }, + { + "epoch": 17.03, + "learning_rate": 3.4857209073113893e-06, + "loss": 0.5512, + "step": 35590 + }, + { + "epoch": 17.03, + "learning_rate": 3.4812990376575235e-06, + "loss": 0.4881, + "step": 35600 + }, + { + "epoch": 17.03, + "learning_rate": 3.4768791266696926e-06, + "loss": 0.5135, + "step": 35610 + }, + { + "epoch": 17.03, + "learning_rate": 3.4724611765021e-06, + "loss": 0.7624, + "step": 35620 + }, + { + "epoch": 17.03, + "learning_rate": 3.4680451893080028e-06, + "loss": 0.3704, + "step": 35630 + }, + { + "epoch": 17.03, + "learning_rate": 3.4636311672396958e-06, + "loss": 0.2933, + "step": 35640 + }, + { + "epoch": 17.03, + "learning_rate": 3.4592191124485265e-06, + "loss": 0.5389, + "step": 35650 + }, + { + "epoch": 17.03, + "learning_rate": 3.454809027084874e-06, + "loss": 0.5478, + "step": 35660 + }, + { + "epoch": 17.03, + "learning_rate": 3.4504009132981614e-06, + "loss": 0.5282, + "step": 35670 + }, + { + "epoch": 17.03, + "learning_rate": 3.445994773236847e-06, + "loss": 0.6104, + "step": 35680 + }, + { + "epoch": 17.03, + "learning_rate": 3.4415906090484316e-06, + "loss": 0.5048, + "step": 35690 + }, + { + "epoch": 17.03, + "learning_rate": 3.437188422879452e-06, + "loss": 0.2955, + "step": 35700 + }, + { + "epoch": 17.03, + "learning_rate": 3.4327882168754793e-06, + "loss": 0.404, + "step": 35710 + }, + { + "epoch": 17.03, + "learning_rate": 3.4283899931811203e-06, + "loss": 0.5149, + "step": 35720 + }, + { + "epoch": 17.03, + "learning_rate": 3.4239937539400167e-06, + "loss": 0.7681, + "step": 35730 + }, + { + "epoch": 17.03, + "learning_rate": 3.4195995012948382e-06, + "loss": 0.4023, + "step": 35740 + }, + { + "epoch": 17.04, + "learning_rate": 3.415207237387297e-06, + "loss": 0.5534, + "step": 35750 + }, + { + "epoch": 17.04, + "learning_rate": 3.4108169643581244e-06, + "loss": 0.7659, + "step": 35760 + }, + { + "epoch": 17.04, + "learning_rate": 3.4064286843470882e-06, + "loss": 0.6809, + "step": 35770 + }, + { + "epoch": 17.04, + "learning_rate": 3.4020423994929824e-06, + "loss": 0.4474, + "step": 35780 + }, + { + "epoch": 17.04, + "learning_rate": 3.397658111933628e-06, + "loss": 0.496, + "step": 35790 + }, + { + "epoch": 17.04, + "learning_rate": 3.3932758238058738e-06, + "loss": 0.2837, + "step": 35800 + }, + { + "epoch": 17.04, + "learning_rate": 3.3888955372455947e-06, + "loss": 0.1942, + "step": 35810 + }, + { + "epoch": 17.04, + "learning_rate": 3.3845172543876883e-06, + "loss": 0.5356, + "step": 35820 + }, + { + "epoch": 17.04, + "learning_rate": 3.380140977366077e-06, + "loss": 0.7328, + "step": 35830 + }, + { + "epoch": 17.04, + "learning_rate": 3.3757667083137015e-06, + "loss": 0.2156, + "step": 35840 + }, + { + "epoch": 17.04, + "learning_rate": 3.371394449362533e-06, + "loss": 0.4024, + "step": 35850 + }, + { + "epoch": 17.04, + "learning_rate": 3.367024202643555e-06, + "loss": 0.3176, + "step": 35860 + }, + { + "epoch": 17.04, + "learning_rate": 3.3626559702867738e-06, + "loss": 0.627, + "step": 35870 + }, + { + "epoch": 17.04, + "learning_rate": 3.358289754421211e-06, + "loss": 0.3277, + "step": 35880 + }, + { + "epoch": 17.04, + "learning_rate": 3.353925557174907e-06, + "loss": 0.5174, + "step": 35890 + }, + { + "epoch": 17.04, + "learning_rate": 3.3495633806749203e-06, + "loss": 0.4933, + "step": 35900 + }, + { + "epoch": 17.04, + "learning_rate": 3.345203227047322e-06, + "loss": 0.4456, + "step": 35910 + }, + { + "epoch": 17.04, + "learning_rate": 3.340845098417197e-06, + "loss": 0.6054, + "step": 35920 + }, + { + "epoch": 17.04, + "learning_rate": 3.3364889969086428e-06, + "loss": 0.7091, + "step": 35930 + }, + { + "epoch": 17.04, + "learning_rate": 3.3321349246447753e-06, + "loss": 0.7329, + "step": 35940 + }, + { + "epoch": 17.04, + "learning_rate": 3.327782883747714e-06, + "loss": 0.562, + "step": 35950 + }, + { + "epoch": 17.04, + "learning_rate": 3.323432876338593e-06, + "loss": 0.6282, + "step": 35960 + }, + { + "epoch": 17.04, + "learning_rate": 3.319084904537545e-06, + "loss": 0.574, + "step": 35970 + }, + { + "epoch": 17.04, + "learning_rate": 3.3147389704637263e-06, + "loss": 0.3558, + "step": 35980 + }, + { + "epoch": 17.04, + "learning_rate": 3.310395076235291e-06, + "loss": 0.834, + "step": 35990 + }, + { + "epoch": 17.04, + "learning_rate": 3.3060532239694e-06, + "loss": 0.5013, + "step": 36000 + }, + { + "epoch": 17.04, + "eval_accuracy": 0.8736842105263158, + "eval_f1": 0.8736842105263158, + "eval_loss": 0.7713430523872375, + "eval_runtime": 768.879, + "eval_samples_per_second": 6.178, + "eval_steps_per_second": 1.545, + "step": 36000 + }, + { + "epoch": 18.0, + "learning_rate": 3.3017134157822186e-06, + "loss": 0.3416, + "step": 36010 + }, + { + "epoch": 18.0, + "learning_rate": 3.2973756537889164e-06, + "loss": 0.519, + "step": 36020 + }, + { + "epoch": 18.0, + "learning_rate": 3.2930399401036647e-06, + "loss": 0.7151, + "step": 36030 + }, + { + "epoch": 18.0, + "learning_rate": 3.288706276839642e-06, + "loss": 0.7257, + "step": 36040 + }, + { + "epoch": 18.0, + "learning_rate": 3.2843746661090225e-06, + "loss": 0.3615, + "step": 36050 + }, + { + "epoch": 18.0, + "learning_rate": 3.2800451100229773e-06, + "loss": 0.2905, + "step": 36060 + }, + { + "epoch": 18.0, + "learning_rate": 3.275717610691677e-06, + "loss": 1.0138, + "step": 36070 + }, + { + "epoch": 18.0, + "learning_rate": 3.2713921702242993e-06, + "loss": 0.5712, + "step": 36080 + }, + { + "epoch": 18.0, + "learning_rate": 3.267068790729008e-06, + "loss": 0.4467, + "step": 36090 + }, + { + "epoch": 18.0, + "learning_rate": 3.262747474312966e-06, + "loss": 0.3777, + "step": 36100 + }, + { + "epoch": 18.0, + "learning_rate": 3.2584282230823308e-06, + "loss": 0.5909, + "step": 36110 + }, + { + "epoch": 18.0, + "learning_rate": 3.2541110391422493e-06, + "loss": 0.3593, + "step": 36120 + }, + { + "epoch": 18.0, + "learning_rate": 3.2497959245968696e-06, + "loss": 0.3639, + "step": 36130 + }, + { + "epoch": 18.0, + "learning_rate": 3.2454828815493277e-06, + "loss": 0.6528, + "step": 36140 + }, + { + "epoch": 18.0, + "learning_rate": 3.241171912101743e-06, + "loss": 0.3752, + "step": 36150 + }, + { + "epoch": 18.0, + "learning_rate": 3.23686301835523e-06, + "loss": 0.5945, + "step": 36160 + }, + { + "epoch": 18.0, + "learning_rate": 3.2325562024098906e-06, + "loss": 0.3346, + "step": 36170 + }, + { + "epoch": 18.0, + "learning_rate": 3.2282514663648196e-06, + "loss": 0.3911, + "step": 36180 + }, + { + "epoch": 18.0, + "learning_rate": 3.22394881231809e-06, + "loss": 0.7658, + "step": 36190 + }, + { + "epoch": 18.0, + "learning_rate": 3.219648242366763e-06, + "loss": 0.352, + "step": 36200 + }, + { + "epoch": 18.0, + "learning_rate": 3.215349758606885e-06, + "loss": 0.6865, + "step": 36210 + }, + { + "epoch": 18.0, + "learning_rate": 3.2110533631334805e-06, + "loss": 0.5107, + "step": 36220 + }, + { + "epoch": 18.0, + "learning_rate": 3.20675905804057e-06, + "loss": 0.4335, + "step": 36230 + }, + { + "epoch": 18.0, + "learning_rate": 3.2024668454211373e-06, + "loss": 0.556, + "step": 36240 + }, + { + "epoch": 18.0, + "learning_rate": 3.1981767273671563e-06, + "loss": 0.6473, + "step": 36250 + }, + { + "epoch": 18.01, + "learning_rate": 3.1938887059695775e-06, + "loss": 0.737, + "step": 36260 + }, + { + "epoch": 18.01, + "learning_rate": 3.189602783318334e-06, + "loss": 0.5317, + "step": 36270 + }, + { + "epoch": 18.01, + "learning_rate": 3.185318961502331e-06, + "loss": 1.087, + "step": 36280 + }, + { + "epoch": 18.01, + "learning_rate": 3.1810372426094507e-06, + "loss": 0.9457, + "step": 36290 + }, + { + "epoch": 18.01, + "learning_rate": 3.1767576287265512e-06, + "loss": 0.3877, + "step": 36300 + }, + { + "epoch": 18.01, + "learning_rate": 3.1724801219394645e-06, + "loss": 0.1997, + "step": 36310 + }, + { + "epoch": 18.01, + "learning_rate": 3.1682047243329947e-06, + "loss": 0.4113, + "step": 36320 + }, + { + "epoch": 18.01, + "learning_rate": 3.1639314379909188e-06, + "loss": 0.5148, + "step": 36330 + }, + { + "epoch": 18.01, + "learning_rate": 3.1596602649959844e-06, + "loss": 0.5285, + "step": 36340 + }, + { + "epoch": 18.01, + "learning_rate": 3.1553912074299106e-06, + "loss": 0.3002, + "step": 36350 + }, + { + "epoch": 18.01, + "learning_rate": 3.151124267373381e-06, + "loss": 0.9515, + "step": 36360 + }, + { + "epoch": 18.01, + "learning_rate": 3.1468594469060546e-06, + "loss": 0.3972, + "step": 36370 + }, + { + "epoch": 18.01, + "learning_rate": 3.1425967481065515e-06, + "loss": 0.3017, + "step": 36380 + }, + { + "epoch": 18.01, + "learning_rate": 3.1383361730524596e-06, + "loss": 0.3262, + "step": 36390 + }, + { + "epoch": 18.01, + "learning_rate": 3.13407772382033e-06, + "loss": 0.2876, + "step": 36400 + }, + { + "epoch": 18.01, + "learning_rate": 3.1298214024856807e-06, + "loss": 0.4643, + "step": 36410 + }, + { + "epoch": 18.01, + "learning_rate": 3.1255672111229896e-06, + "loss": 0.2409, + "step": 36420 + }, + { + "epoch": 18.01, + "learning_rate": 3.121315151805699e-06, + "loss": 0.5946, + "step": 36430 + }, + { + "epoch": 18.01, + "learning_rate": 3.11706522660621e-06, + "loss": 0.5023, + "step": 36440 + }, + { + "epoch": 18.01, + "learning_rate": 3.1128174375958836e-06, + "loss": 0.5401, + "step": 36450 + }, + { + "epoch": 18.01, + "learning_rate": 3.1085717868450442e-06, + "loss": 0.8222, + "step": 36460 + }, + { + "epoch": 18.01, + "learning_rate": 3.104328276422968e-06, + "loss": 0.3783, + "step": 36470 + }, + { + "epoch": 18.01, + "learning_rate": 3.100086908397891e-06, + "loss": 0.5229, + "step": 36480 + }, + { + "epoch": 18.01, + "learning_rate": 3.095847684837003e-06, + "loss": 0.2334, + "step": 36490 + }, + { + "epoch": 18.01, + "learning_rate": 3.0916106078064522e-06, + "loss": 0.471, + "step": 36500 + }, + { + "epoch": 18.01, + "learning_rate": 3.0873756793713374e-06, + "loss": 0.4497, + "step": 36510 + }, + { + "epoch": 18.01, + "learning_rate": 3.083142901595711e-06, + "loss": 0.924, + "step": 36520 + }, + { + "epoch": 18.01, + "learning_rate": 3.0789122765425775e-06, + "loss": 0.7265, + "step": 36530 + }, + { + "epoch": 18.01, + "learning_rate": 3.0746838062738935e-06, + "loss": 0.676, + "step": 36540 + }, + { + "epoch": 18.01, + "learning_rate": 3.0704574928505588e-06, + "loss": 0.45, + "step": 36550 + }, + { + "epoch": 18.01, + "learning_rate": 3.066233338332435e-06, + "loss": 0.4074, + "step": 36560 + }, + { + "epoch": 18.01, + "learning_rate": 3.06201134477832e-06, + "loss": 0.4613, + "step": 36570 + }, + { + "epoch": 18.01, + "learning_rate": 3.057791514245965e-06, + "loss": 0.4177, + "step": 36580 + }, + { + "epoch": 18.01, + "learning_rate": 3.0535738487920557e-06, + "loss": 0.3157, + "step": 36590 + }, + { + "epoch": 18.01, + "learning_rate": 3.04935835047224e-06, + "loss": 0.402, + "step": 36600 + }, + { + "epoch": 18.01, + "learning_rate": 3.045145021341097e-06, + "loss": 0.5566, + "step": 36610 + }, + { + "epoch": 18.01, + "learning_rate": 3.0409338634521526e-06, + "loss": 0.8892, + "step": 36620 + }, + { + "epoch": 18.01, + "learning_rate": 3.036724878857873e-06, + "loss": 0.3931, + "step": 36630 + }, + { + "epoch": 18.01, + "learning_rate": 3.032518069609665e-06, + "loss": 0.4067, + "step": 36640 + }, + { + "epoch": 18.01, + "learning_rate": 3.0283134377578804e-06, + "loss": 0.5997, + "step": 36650 + }, + { + "epoch": 18.01, + "learning_rate": 3.024110985351804e-06, + "loss": 0.6039, + "step": 36660 + }, + { + "epoch": 18.01, + "learning_rate": 3.019910714439662e-06, + "loss": 1.072, + "step": 36670 + }, + { + "epoch": 18.01, + "learning_rate": 3.0157126270686113e-06, + "loss": 0.6233, + "step": 36680 + }, + { + "epoch": 18.01, + "learning_rate": 3.011516725284747e-06, + "loss": 0.619, + "step": 36690 + }, + { + "epoch": 18.01, + "learning_rate": 3.007323011133107e-06, + "loss": 0.6432, + "step": 36700 + }, + { + "epoch": 18.01, + "learning_rate": 3.0031314866576535e-06, + "loss": 0.5534, + "step": 36710 + }, + { + "epoch": 18.01, + "learning_rate": 2.9989421539012843e-06, + "loss": 0.8209, + "step": 36720 + }, + { + "epoch": 18.01, + "learning_rate": 2.9947550149058308e-06, + "loss": 0.3964, + "step": 36730 + }, + { + "epoch": 18.01, + "learning_rate": 2.99057007171205e-06, + "loss": 0.2976, + "step": 36740 + }, + { + "epoch": 18.02, + "learning_rate": 2.9863873263596374e-06, + "loss": 0.4838, + "step": 36750 + }, + { + "epoch": 18.02, + "learning_rate": 2.982206780887212e-06, + "loss": 0.6236, + "step": 36760 + }, + { + "epoch": 18.02, + "learning_rate": 2.9780284373323167e-06, + "loss": 0.4662, + "step": 36770 + }, + { + "epoch": 18.02, + "learning_rate": 2.9738522977314257e-06, + "loss": 0.3537, + "step": 36780 + }, + { + "epoch": 18.02, + "learning_rate": 2.9696783641199416e-06, + "loss": 0.5034, + "step": 36790 + }, + { + "epoch": 18.02, + "learning_rate": 2.9655066385321886e-06, + "loss": 0.2501, + "step": 36800 + }, + { + "epoch": 18.02, + "learning_rate": 2.9613371230014146e-06, + "loss": 0.6887, + "step": 36810 + }, + { + "epoch": 18.02, + "learning_rate": 2.9571698195597902e-06, + "loss": 0.5212, + "step": 36820 + }, + { + "epoch": 18.02, + "learning_rate": 2.953004730238409e-06, + "loss": 0.4871, + "step": 36830 + }, + { + "epoch": 18.02, + "learning_rate": 2.9488418570672825e-06, + "loss": 0.3243, + "step": 36840 + }, + { + "epoch": 18.02, + "learning_rate": 2.9446812020753518e-06, + "loss": 0.4076, + "step": 36850 + }, + { + "epoch": 18.02, + "learning_rate": 2.9405227672904613e-06, + "loss": 0.3996, + "step": 36860 + }, + { + "epoch": 18.02, + "learning_rate": 2.9363665547393856e-06, + "loss": 0.7454, + "step": 36870 + }, + { + "epoch": 18.02, + "learning_rate": 2.932212566447809e-06, + "loss": 0.6302, + "step": 36880 + }, + { + "epoch": 18.02, + "learning_rate": 2.928060804440339e-06, + "loss": 0.3576, + "step": 36890 + }, + { + "epoch": 18.02, + "learning_rate": 2.9239112707404932e-06, + "loss": 0.3181, + "step": 36900 + }, + { + "epoch": 18.02, + "learning_rate": 2.9197639673707034e-06, + "loss": 0.608, + "step": 36910 + }, + { + "epoch": 18.02, + "learning_rate": 2.915618896352314e-06, + "loss": 0.2541, + "step": 36920 + }, + { + "epoch": 18.02, + "learning_rate": 2.9114760597055798e-06, + "loss": 0.2384, + "step": 36930 + }, + { + "epoch": 18.02, + "learning_rate": 2.907335459449678e-06, + "loss": 0.529, + "step": 36940 + }, + { + "epoch": 18.02, + "learning_rate": 2.9031970976026783e-06, + "loss": 0.5254, + "step": 36950 + }, + { + "epoch": 18.02, + "learning_rate": 2.89906097618157e-06, + "loss": 0.9418, + "step": 36960 + }, + { + "epoch": 18.02, + "learning_rate": 2.8949270972022473e-06, + "loss": 0.3854, + "step": 36970 + }, + { + "epoch": 18.02, + "learning_rate": 2.8907954626795167e-06, + "loss": 0.5665, + "step": 36980 + }, + { + "epoch": 18.02, + "learning_rate": 2.886666074627084e-06, + "loss": 0.5238, + "step": 36990 + }, + { + "epoch": 18.02, + "learning_rate": 2.882538935057563e-06, + "loss": 0.2699, + "step": 37000 + }, + { + "epoch": 18.02, + "learning_rate": 2.8784140459824717e-06, + "loss": 0.7433, + "step": 37010 + }, + { + "epoch": 18.02, + "learning_rate": 2.8742914094122317e-06, + "loss": 0.6341, + "step": 37020 + }, + { + "epoch": 18.02, + "learning_rate": 2.8701710273561635e-06, + "loss": 0.4695, + "step": 37030 + }, + { + "epoch": 18.02, + "learning_rate": 2.8660529018224937e-06, + "loss": 0.6994, + "step": 37040 + }, + { + "epoch": 18.02, + "learning_rate": 2.861937034818345e-06, + "loss": 0.2551, + "step": 37050 + }, + { + "epoch": 18.02, + "learning_rate": 2.8578234283497423e-06, + "loss": 0.4353, + "step": 37060 + }, + { + "epoch": 18.02, + "learning_rate": 2.8537120844216044e-06, + "loss": 0.5129, + "step": 37070 + }, + { + "epoch": 18.02, + "learning_rate": 2.8496030050377552e-06, + "loss": 0.8072, + "step": 37080 + }, + { + "epoch": 18.02, + "learning_rate": 2.845496192200908e-06, + "loss": 0.6905, + "step": 37090 + }, + { + "epoch": 18.02, + "learning_rate": 2.841391647912673e-06, + "loss": 0.489, + "step": 37100 + }, + { + "epoch": 18.02, + "learning_rate": 2.8372893741735546e-06, + "loss": 0.5638, + "step": 37110 + }, + { + "epoch": 18.02, + "learning_rate": 2.8331893729829527e-06, + "loss": 0.5291, + "step": 37120 + }, + { + "epoch": 18.02, + "learning_rate": 2.8290916463391576e-06, + "loss": 0.5672, + "step": 37130 + }, + { + "epoch": 18.02, + "learning_rate": 2.8249961962393506e-06, + "loss": 0.5564, + "step": 37140 + }, + { + "epoch": 18.02, + "learning_rate": 2.8209030246796054e-06, + "loss": 0.4435, + "step": 37150 + }, + { + "epoch": 18.02, + "learning_rate": 2.8168121336548832e-06, + "loss": 0.3198, + "step": 37160 + }, + { + "epoch": 18.02, + "learning_rate": 2.812723525159033e-06, + "loss": 0.4359, + "step": 37170 + }, + { + "epoch": 18.02, + "learning_rate": 2.808637201184797e-06, + "loss": 0.2043, + "step": 37180 + }, + { + "epoch": 18.02, + "learning_rate": 2.8045531637237984e-06, + "loss": 0.6797, + "step": 37190 + }, + { + "epoch": 18.02, + "learning_rate": 2.8004714147665487e-06, + "loss": 0.6557, + "step": 37200 + }, + { + "epoch": 18.02, + "learning_rate": 2.7963919563024366e-06, + "loss": 0.7913, + "step": 37210 + }, + { + "epoch": 18.02, + "learning_rate": 2.7923147903197473e-06, + "loss": 0.5721, + "step": 37220 + }, + { + "epoch": 18.02, + "learning_rate": 2.7882399188056393e-06, + "loss": 0.7529, + "step": 37230 + }, + { + "epoch": 18.02, + "learning_rate": 2.7841673437461557e-06, + "loss": 0.2419, + "step": 37240 + }, + { + "epoch": 18.02, + "learning_rate": 2.7800970671262205e-06, + "loss": 0.5299, + "step": 37250 + }, + { + "epoch": 18.03, + "learning_rate": 2.7760290909296344e-06, + "loss": 0.8419, + "step": 37260 + }, + { + "epoch": 18.03, + "learning_rate": 2.771963417139084e-06, + "loss": 0.6347, + "step": 37270 + }, + { + "epoch": 18.03, + "learning_rate": 2.7679000477361276e-06, + "loss": 0.6209, + "step": 37280 + }, + { + "epoch": 18.03, + "learning_rate": 2.763838984701204e-06, + "loss": 0.389, + "step": 37290 + }, + { + "epoch": 18.03, + "learning_rate": 2.759780230013618e-06, + "loss": 0.5476, + "step": 37300 + }, + { + "epoch": 18.03, + "learning_rate": 2.755723785651567e-06, + "loss": 0.8579, + "step": 37310 + }, + { + "epoch": 18.03, + "learning_rate": 2.7516696535921084e-06, + "loss": 0.7398, + "step": 37320 + }, + { + "epoch": 18.03, + "learning_rate": 2.7476178358111765e-06, + "loss": 0.5888, + "step": 37330 + }, + { + "epoch": 18.03, + "learning_rate": 2.7435683342835794e-06, + "loss": 0.5358, + "step": 37340 + }, + { + "epoch": 18.03, + "learning_rate": 2.739521150982994e-06, + "loss": 0.4386, + "step": 37350 + }, + { + "epoch": 18.03, + "learning_rate": 2.735476287881967e-06, + "loss": 0.2455, + "step": 37360 + }, + { + "epoch": 18.03, + "learning_rate": 2.731433746951919e-06, + "loss": 0.2797, + "step": 37370 + }, + { + "epoch": 18.03, + "learning_rate": 2.727393530163136e-06, + "loss": 0.5183, + "step": 37380 + }, + { + "epoch": 18.03, + "learning_rate": 2.7233556394847664e-06, + "loss": 0.3987, + "step": 37390 + }, + { + "epoch": 18.03, + "learning_rate": 2.719320076884827e-06, + "loss": 0.4604, + "step": 37400 + }, + { + "epoch": 18.03, + "learning_rate": 2.7152868443302092e-06, + "loss": 0.5954, + "step": 37410 + }, + { + "epoch": 18.03, + "learning_rate": 2.7112559437866583e-06, + "loss": 0.5356, + "step": 37420 + }, + { + "epoch": 18.03, + "learning_rate": 2.707227377218785e-06, + "loss": 0.5328, + "step": 37430 + }, + { + "epoch": 18.03, + "learning_rate": 2.7032011465900655e-06, + "loss": 0.4616, + "step": 37440 + }, + { + "epoch": 18.03, + "learning_rate": 2.6991772538628325e-06, + "loss": 0.4083, + "step": 37450 + }, + { + "epoch": 18.03, + "learning_rate": 2.6951557009982875e-06, + "loss": 0.5883, + "step": 37460 + }, + { + "epoch": 18.03, + "learning_rate": 2.691136489956486e-06, + "loss": 0.4895, + "step": 37470 + }, + { + "epoch": 18.03, + "learning_rate": 2.687119622696339e-06, + "loss": 0.6126, + "step": 37480 + }, + { + "epoch": 18.03, + "learning_rate": 2.683105101175617e-06, + "loss": 0.2373, + "step": 37490 + }, + { + "epoch": 18.03, + "learning_rate": 2.6790929273509547e-06, + "loss": 0.445, + "step": 37500 + }, + { + "epoch": 18.03, + "learning_rate": 2.6750831031778352e-06, + "loss": 0.5903, + "step": 37510 + }, + { + "epoch": 18.03, + "learning_rate": 2.6710756306105973e-06, + "loss": 0.2374, + "step": 37520 + }, + { + "epoch": 18.03, + "learning_rate": 2.6670705116024345e-06, + "loss": 0.5996, + "step": 37530 + }, + { + "epoch": 18.03, + "learning_rate": 2.663067748105393e-06, + "loss": 0.3053, + "step": 37540 + }, + { + "epoch": 18.03, + "learning_rate": 2.6590673420703694e-06, + "loss": 0.325, + "step": 37550 + }, + { + "epoch": 18.03, + "learning_rate": 2.6550692954471202e-06, + "loss": 0.351, + "step": 37560 + }, + { + "epoch": 18.03, + "learning_rate": 2.6510736101842365e-06, + "loss": 0.8695, + "step": 37570 + }, + { + "epoch": 18.03, + "learning_rate": 2.64708028822917e-06, + "loss": 0.4065, + "step": 37580 + }, + { + "epoch": 18.03, + "learning_rate": 2.643089331528214e-06, + "loss": 0.563, + "step": 37590 + }, + { + "epoch": 18.03, + "learning_rate": 2.6391007420265183e-06, + "loss": 0.8252, + "step": 37600 + }, + { + "epoch": 18.03, + "learning_rate": 2.63511452166807e-06, + "loss": 0.2165, + "step": 37610 + }, + { + "epoch": 18.03, + "learning_rate": 2.631130672395705e-06, + "loss": 0.5479, + "step": 37620 + }, + { + "epoch": 18.03, + "learning_rate": 2.6271491961511017e-06, + "loss": 0.5603, + "step": 37630 + }, + { + "epoch": 18.03, + "learning_rate": 2.623170094874782e-06, + "loss": 0.4072, + "step": 37640 + }, + { + "epoch": 18.03, + "learning_rate": 2.619193370506119e-06, + "loss": 0.5133, + "step": 37650 + }, + { + "epoch": 18.03, + "learning_rate": 2.615219024983312e-06, + "loss": 0.6001, + "step": 37660 + }, + { + "epoch": 18.03, + "learning_rate": 2.6112470602434107e-06, + "loss": 0.4305, + "step": 37670 + }, + { + "epoch": 18.03, + "learning_rate": 2.6072774782223036e-06, + "loss": 0.5292, + "step": 37680 + }, + { + "epoch": 18.03, + "learning_rate": 2.6033102808547136e-06, + "loss": 0.538, + "step": 37690 + }, + { + "epoch": 18.03, + "learning_rate": 2.599345470074209e-06, + "loss": 0.4507, + "step": 37700 + }, + { + "epoch": 18.03, + "learning_rate": 2.595383047813189e-06, + "loss": 0.2525, + "step": 37710 + }, + { + "epoch": 18.03, + "learning_rate": 2.591423016002889e-06, + "loss": 0.3462, + "step": 37720 + }, + { + "epoch": 18.03, + "learning_rate": 2.5874653765733816e-06, + "loss": 0.4602, + "step": 37730 + }, + { + "epoch": 18.03, + "learning_rate": 2.5835101314535712e-06, + "loss": 0.5111, + "step": 37740 + }, + { + "epoch": 18.04, + "learning_rate": 2.5795572825711963e-06, + "loss": 0.3537, + "step": 37750 + }, + { + "epoch": 18.04, + "learning_rate": 2.5756068318528278e-06, + "loss": 0.5182, + "step": 37760 + }, + { + "epoch": 18.04, + "learning_rate": 2.571658781223867e-06, + "loss": 0.2725, + "step": 37770 + }, + { + "epoch": 18.04, + "learning_rate": 2.5677131326085447e-06, + "loss": 0.6178, + "step": 37780 + }, + { + "epoch": 18.04, + "learning_rate": 2.5637698879299253e-06, + "loss": 0.4089, + "step": 37790 + }, + { + "epoch": 18.04, + "learning_rate": 2.5598290491098956e-06, + "loss": 0.4844, + "step": 37800 + }, + { + "epoch": 18.04, + "learning_rate": 2.5558906180691747e-06, + "loss": 0.525, + "step": 37810 + }, + { + "epoch": 18.04, + "learning_rate": 2.5519545967273048e-06, + "loss": 0.8549, + "step": 37820 + }, + { + "epoch": 18.04, + "learning_rate": 2.548020987002654e-06, + "loss": 0.4982, + "step": 37830 + }, + { + "epoch": 18.04, + "learning_rate": 2.5440897908124176e-06, + "loss": 0.5018, + "step": 37840 + }, + { + "epoch": 18.04, + "learning_rate": 2.5401610100726124e-06, + "loss": 0.3848, + "step": 37850 + }, + { + "epoch": 18.04, + "learning_rate": 2.536234646698077e-06, + "loss": 0.3812, + "step": 37860 + }, + { + "epoch": 18.04, + "learning_rate": 2.5323107026024746e-06, + "loss": 0.4099, + "step": 37870 + }, + { + "epoch": 18.04, + "learning_rate": 2.528389179698286e-06, + "loss": 0.5027, + "step": 37880 + }, + { + "epoch": 18.04, + "learning_rate": 2.5244700798968166e-06, + "loss": 0.3912, + "step": 37890 + }, + { + "epoch": 18.04, + "learning_rate": 2.520553405108188e-06, + "loss": 0.3629, + "step": 37900 + }, + { + "epoch": 18.04, + "learning_rate": 2.5166391572413403e-06, + "loss": 0.951, + "step": 37910 + }, + { + "epoch": 18.04, + "learning_rate": 2.5127273382040244e-06, + "loss": 0.3341, + "step": 37920 + }, + { + "epoch": 18.04, + "learning_rate": 2.5088179499028205e-06, + "loss": 0.8679, + "step": 37930 + }, + { + "epoch": 18.04, + "learning_rate": 2.5049109942431135e-06, + "loss": 0.8129, + "step": 37940 + }, + { + "epoch": 18.04, + "learning_rate": 2.5010064731291072e-06, + "loss": 0.6507, + "step": 37950 + }, + { + "epoch": 18.04, + "learning_rate": 2.497104388463818e-06, + "loss": 1.0909, + "step": 37960 + }, + { + "epoch": 18.04, + "learning_rate": 2.4932047421490708e-06, + "loss": 0.3817, + "step": 37970 + }, + { + "epoch": 18.04, + "learning_rate": 2.4893075360855116e-06, + "loss": 0.8744, + "step": 37980 + }, + { + "epoch": 18.04, + "learning_rate": 2.485412772172589e-06, + "loss": 0.4002, + "step": 37990 + }, + { + "epoch": 18.04, + "learning_rate": 2.4815204523085656e-06, + "loss": 0.9572, + "step": 38000 + }, + { + "epoch": 18.04, + "eval_accuracy": 0.868421052631579, + "eval_f1": 0.868421052631579, + "eval_loss": 0.761253297328949, + "eval_runtime": 768.4537, + "eval_samples_per_second": 6.181, + "eval_steps_per_second": 1.546, + "step": 38000 + }, + { + "epoch": 19.0, + "learning_rate": 2.4776305783905063e-06, + "loss": 1.176, + "step": 38010 + }, + { + "epoch": 19.0, + "learning_rate": 2.473743152314288e-06, + "loss": 0.3656, + "step": 38020 + }, + { + "epoch": 19.0, + "learning_rate": 2.4698581759746e-06, + "loss": 0.3558, + "step": 38030 + }, + { + "epoch": 19.0, + "learning_rate": 2.465975651264931e-06, + "loss": 0.723, + "step": 38040 + }, + { + "epoch": 19.0, + "learning_rate": 2.462095580077575e-06, + "loss": 0.747, + "step": 38050 + }, + { + "epoch": 19.0, + "learning_rate": 2.4582179643036316e-06, + "loss": 0.5305, + "step": 38060 + }, + { + "epoch": 19.0, + "learning_rate": 2.4543428058330024e-06, + "loss": 0.5172, + "step": 38070 + }, + { + "epoch": 19.0, + "learning_rate": 2.450470106554396e-06, + "loss": 0.4605, + "step": 38080 + }, + { + "epoch": 19.0, + "learning_rate": 2.44659986835532e-06, + "loss": 0.3962, + "step": 38090 + }, + { + "epoch": 19.0, + "learning_rate": 2.442732093122077e-06, + "loss": 0.4428, + "step": 38100 + }, + { + "epoch": 19.0, + "learning_rate": 2.4388667827397724e-06, + "loss": 0.5217, + "step": 38110 + }, + { + "epoch": 19.0, + "learning_rate": 2.4350039390923164e-06, + "loss": 0.6184, + "step": 38120 + }, + { + "epoch": 19.0, + "learning_rate": 2.431143564062411e-06, + "loss": 0.4994, + "step": 38130 + }, + { + "epoch": 19.0, + "learning_rate": 2.427285659531556e-06, + "loss": 0.578, + "step": 38140 + }, + { + "epoch": 19.0, + "learning_rate": 2.4234302273800455e-06, + "loss": 0.2993, + "step": 38150 + }, + { + "epoch": 19.0, + "learning_rate": 2.4195772694869712e-06, + "loss": 0.3344, + "step": 38160 + }, + { + "epoch": 19.0, + "learning_rate": 2.415726787730216e-06, + "loss": 0.4295, + "step": 38170 + }, + { + "epoch": 19.0, + "learning_rate": 2.411878783986465e-06, + "loss": 0.697, + "step": 38180 + }, + { + "epoch": 19.0, + "learning_rate": 2.4080332601311813e-06, + "loss": 0.3132, + "step": 38190 + }, + { + "epoch": 19.0, + "learning_rate": 2.404190218038628e-06, + "loss": 0.5248, + "step": 38200 + }, + { + "epoch": 19.0, + "learning_rate": 2.4003496595818557e-06, + "loss": 0.457, + "step": 38210 + }, + { + "epoch": 19.0, + "learning_rate": 2.396511586632711e-06, + "loss": 0.3919, + "step": 38220 + }, + { + "epoch": 19.0, + "learning_rate": 2.39267600106182e-06, + "loss": 0.2957, + "step": 38230 + }, + { + "epoch": 19.0, + "learning_rate": 2.3888429047386022e-06, + "loss": 0.4636, + "step": 38240 + }, + { + "epoch": 19.0, + "learning_rate": 2.3850122995312623e-06, + "loss": 0.2432, + "step": 38250 + }, + { + "epoch": 19.01, + "learning_rate": 2.3811841873067865e-06, + "loss": 0.4494, + "step": 38260 + }, + { + "epoch": 19.01, + "learning_rate": 2.3773585699309594e-06, + "loss": 0.6232, + "step": 38270 + }, + { + "epoch": 19.01, + "learning_rate": 2.3735354492683315e-06, + "loss": 0.4032, + "step": 38280 + }, + { + "epoch": 19.01, + "learning_rate": 2.3697148271822498e-06, + "loss": 0.4197, + "step": 38290 + }, + { + "epoch": 19.01, + "learning_rate": 2.3658967055348347e-06, + "loss": 0.4868, + "step": 38300 + }, + { + "epoch": 19.01, + "learning_rate": 2.3620810861869978e-06, + "loss": 0.2848, + "step": 38310 + }, + { + "epoch": 19.01, + "learning_rate": 2.3582679709984236e-06, + "loss": 0.3721, + "step": 38320 + }, + { + "epoch": 19.01, + "learning_rate": 2.3544573618275777e-06, + "loss": 0.2844, + "step": 38330 + }, + { + "epoch": 19.01, + "learning_rate": 2.3506492605317054e-06, + "loss": 0.9406, + "step": 38340 + }, + { + "epoch": 19.01, + "learning_rate": 2.3468436689668285e-06, + "loss": 0.6308, + "step": 38350 + }, + { + "epoch": 19.01, + "learning_rate": 2.343040588987745e-06, + "loss": 0.4966, + "step": 38360 + }, + { + "epoch": 19.01, + "learning_rate": 2.3392400224480327e-06, + "loss": 0.2921, + "step": 38370 + }, + { + "epoch": 19.01, + "learning_rate": 2.3354419712000383e-06, + "loss": 0.7538, + "step": 38380 + }, + { + "epoch": 19.01, + "learning_rate": 2.331646437094888e-06, + "loss": 0.146, + "step": 38390 + }, + { + "epoch": 19.01, + "learning_rate": 2.3278534219824766e-06, + "loss": 0.588, + "step": 38400 + }, + { + "epoch": 19.01, + "learning_rate": 2.3240629277114765e-06, + "loss": 0.4177, + "step": 38410 + }, + { + "epoch": 19.01, + "learning_rate": 2.320274956129328e-06, + "loss": 0.1569, + "step": 38420 + }, + { + "epoch": 19.01, + "learning_rate": 2.316489509082243e-06, + "loss": 0.2267, + "step": 38430 + }, + { + "epoch": 19.01, + "learning_rate": 2.3127065884152e-06, + "loss": 0.4758, + "step": 38440 + }, + { + "epoch": 19.01, + "learning_rate": 2.30892619597195e-06, + "loss": 0.7699, + "step": 38450 + }, + { + "epoch": 19.01, + "learning_rate": 2.3051483335950098e-06, + "loss": 0.3688, + "step": 38460 + }, + { + "epoch": 19.01, + "learning_rate": 2.3013730031256647e-06, + "loss": 0.842, + "step": 38470 + }, + { + "epoch": 19.01, + "learning_rate": 2.297600206403964e-06, + "loss": 0.5726, + "step": 38480 + }, + { + "epoch": 19.01, + "learning_rate": 2.2938299452687213e-06, + "loss": 0.339, + "step": 38490 + }, + { + "epoch": 19.01, + "learning_rate": 2.29006222155752e-06, + "loss": 0.5403, + "step": 38500 + }, + { + "epoch": 19.01, + "learning_rate": 2.286297037106701e-06, + "loss": 0.1399, + "step": 38510 + }, + { + "epoch": 19.01, + "learning_rate": 2.282534393751369e-06, + "loss": 0.6494, + "step": 38520 + }, + { + "epoch": 19.01, + "learning_rate": 2.2787742933253936e-06, + "loss": 0.9051, + "step": 38530 + }, + { + "epoch": 19.01, + "learning_rate": 2.2750167376613945e-06, + "loss": 0.98, + "step": 38540 + }, + { + "epoch": 19.01, + "learning_rate": 2.2712617285907668e-06, + "loss": 0.5566, + "step": 38550 + }, + { + "epoch": 19.01, + "learning_rate": 2.2675092679436525e-06, + "loss": 0.3889, + "step": 38560 + }, + { + "epoch": 19.01, + "learning_rate": 2.2637593575489562e-06, + "loss": 0.2252, + "step": 38570 + }, + { + "epoch": 19.01, + "learning_rate": 2.260011999234338e-06, + "loss": 0.8652, + "step": 38580 + }, + { + "epoch": 19.01, + "learning_rate": 2.2562671948262134e-06, + "loss": 0.5661, + "step": 38590 + }, + { + "epoch": 19.01, + "learning_rate": 2.2525249461497585e-06, + "loss": 0.4515, + "step": 38600 + }, + { + "epoch": 19.01, + "learning_rate": 2.2487852550288986e-06, + "loss": 0.8839, + "step": 38610 + }, + { + "epoch": 19.01, + "learning_rate": 2.2450481232863144e-06, + "loss": 0.8046, + "step": 38620 + }, + { + "epoch": 19.01, + "learning_rate": 2.2413135527434324e-06, + "loss": 0.3477, + "step": 38630 + }, + { + "epoch": 19.01, + "learning_rate": 2.2375815452204444e-06, + "loss": 0.1162, + "step": 38640 + }, + { + "epoch": 19.01, + "learning_rate": 2.233852102536282e-06, + "loss": 0.2483, + "step": 38650 + }, + { + "epoch": 19.01, + "learning_rate": 2.230125226508632e-06, + "loss": 0.2738, + "step": 38660 + }, + { + "epoch": 19.01, + "learning_rate": 2.226400918953927e-06, + "loss": 0.2559, + "step": 38670 + }, + { + "epoch": 19.01, + "learning_rate": 2.222679181687349e-06, + "loss": 0.2962, + "step": 38680 + }, + { + "epoch": 19.01, + "learning_rate": 2.2189600165228265e-06, + "loss": 0.5954, + "step": 38690 + }, + { + "epoch": 19.01, + "learning_rate": 2.2152434252730397e-06, + "loss": 0.546, + "step": 38700 + }, + { + "epoch": 19.01, + "learning_rate": 2.2115294097494088e-06, + "loss": 0.5356, + "step": 38710 + }, + { + "epoch": 19.01, + "learning_rate": 2.207817971762097e-06, + "loss": 0.2988, + "step": 38720 + }, + { + "epoch": 19.01, + "learning_rate": 2.2041091131200133e-06, + "loss": 0.3633, + "step": 38730 + }, + { + "epoch": 19.01, + "learning_rate": 2.2004028356308146e-06, + "loss": 0.7466, + "step": 38740 + }, + { + "epoch": 19.02, + "learning_rate": 2.196699141100894e-06, + "loss": 0.8244, + "step": 38750 + }, + { + "epoch": 19.02, + "learning_rate": 2.192998031335387e-06, + "loss": 0.3217, + "step": 38760 + }, + { + "epoch": 19.02, + "learning_rate": 2.1892995081381687e-06, + "loss": 0.643, + "step": 38770 + }, + { + "epoch": 19.02, + "learning_rate": 2.185603573311853e-06, + "loss": 0.9874, + "step": 38780 + }, + { + "epoch": 19.02, + "learning_rate": 2.181910228657798e-06, + "loss": 0.6318, + "step": 38790 + }, + { + "epoch": 19.02, + "learning_rate": 2.1782194759760947e-06, + "loss": 0.5392, + "step": 38800 + }, + { + "epoch": 19.02, + "learning_rate": 2.1745313170655672e-06, + "loss": 0.3199, + "step": 38810 + }, + { + "epoch": 19.02, + "learning_rate": 2.170845753723778e-06, + "loss": 0.5739, + "step": 38820 + }, + { + "epoch": 19.02, + "learning_rate": 2.1671627877470326e-06, + "loss": 0.6472, + "step": 38830 + }, + { + "epoch": 19.02, + "learning_rate": 2.16348242093036e-06, + "loss": 0.6141, + "step": 38840 + }, + { + "epoch": 19.02, + "learning_rate": 2.1598046550675273e-06, + "loss": 0.4404, + "step": 38850 + }, + { + "epoch": 19.02, + "learning_rate": 2.156129491951031e-06, + "loss": 0.6016, + "step": 38860 + }, + { + "epoch": 19.02, + "learning_rate": 2.152456933372103e-06, + "loss": 0.3857, + "step": 38870 + }, + { + "epoch": 19.02, + "learning_rate": 2.1487869811207007e-06, + "loss": 0.5235, + "step": 38880 + }, + { + "epoch": 19.02, + "learning_rate": 2.1451196369855212e-06, + "loss": 0.7967, + "step": 38890 + }, + { + "epoch": 19.02, + "learning_rate": 2.1414549027539765e-06, + "loss": 0.2731, + "step": 38900 + }, + { + "epoch": 19.02, + "learning_rate": 2.1377927802122154e-06, + "loss": 0.4294, + "step": 38910 + }, + { + "epoch": 19.02, + "learning_rate": 2.1341332711451104e-06, + "loss": 0.7456, + "step": 38920 + }, + { + "epoch": 19.02, + "learning_rate": 2.130476377336266e-06, + "loss": 0.5222, + "step": 38930 + }, + { + "epoch": 19.02, + "learning_rate": 2.1268221005680057e-06, + "loss": 0.5666, + "step": 38940 + }, + { + "epoch": 19.02, + "learning_rate": 2.1231704426213795e-06, + "loss": 0.4093, + "step": 38950 + }, + { + "epoch": 19.02, + "learning_rate": 2.1195214052761614e-06, + "loss": 0.7834, + "step": 38960 + }, + { + "epoch": 19.02, + "learning_rate": 2.115874990310845e-06, + "loss": 0.4512, + "step": 38970 + }, + { + "epoch": 19.02, + "learning_rate": 2.1122311995026557e-06, + "loss": 0.4492, + "step": 38980 + }, + { + "epoch": 19.02, + "learning_rate": 2.108590034627526e-06, + "loss": 0.4992, + "step": 38990 + }, + { + "epoch": 19.02, + "learning_rate": 2.104951497460118e-06, + "loss": 0.3091, + "step": 39000 + }, + { + "epoch": 19.02, + "learning_rate": 2.1013155897738106e-06, + "loss": 0.411, + "step": 39010 + }, + { + "epoch": 19.02, + "learning_rate": 2.097682313340698e-06, + "loss": 0.9523, + "step": 39020 + }, + { + "epoch": 19.02, + "learning_rate": 2.0940516699316e-06, + "loss": 0.3464, + "step": 39030 + }, + { + "epoch": 19.02, + "learning_rate": 2.0904236613160463e-06, + "loss": 0.4488, + "step": 39040 + }, + { + "epoch": 19.02, + "learning_rate": 2.086798289262284e-06, + "loss": 0.4607, + "step": 39050 + }, + { + "epoch": 19.02, + "learning_rate": 2.0831755555372753e-06, + "loss": 0.4364, + "step": 39060 + }, + { + "epoch": 19.02, + "learning_rate": 2.0795554619066955e-06, + "loss": 0.5127, + "step": 39070 + }, + { + "epoch": 19.02, + "learning_rate": 2.075938010134936e-06, + "loss": 0.5928, + "step": 39080 + }, + { + "epoch": 19.02, + "learning_rate": 2.072323201985098e-06, + "loss": 0.431, + "step": 39090 + }, + { + "epoch": 19.02, + "learning_rate": 2.068711039218996e-06, + "loss": 0.4404, + "step": 39100 + }, + { + "epoch": 19.02, + "learning_rate": 2.06510152359715e-06, + "loss": 0.1239, + "step": 39110 + }, + { + "epoch": 19.02, + "learning_rate": 2.0614946568788e-06, + "loss": 0.8922, + "step": 39120 + }, + { + "epoch": 19.02, + "learning_rate": 2.057890440821886e-06, + "loss": 0.5615, + "step": 39130 + }, + { + "epoch": 19.02, + "learning_rate": 2.0542888771830605e-06, + "loss": 0.6164, + "step": 39140 + }, + { + "epoch": 19.02, + "learning_rate": 2.050689967717681e-06, + "loss": 0.3161, + "step": 39150 + }, + { + "epoch": 19.02, + "learning_rate": 2.0470937141798125e-06, + "loss": 0.3372, + "step": 39160 + }, + { + "epoch": 19.02, + "learning_rate": 2.0435001183222243e-06, + "loss": 0.6414, + "step": 39170 + }, + { + "epoch": 19.02, + "learning_rate": 2.039909181896393e-06, + "loss": 0.9572, + "step": 39180 + }, + { + "epoch": 19.02, + "learning_rate": 2.036320906652495e-06, + "loss": 0.6156, + "step": 39190 + }, + { + "epoch": 19.02, + "learning_rate": 2.032735294339415e-06, + "loss": 0.1312, + "step": 39200 + }, + { + "epoch": 19.02, + "learning_rate": 2.029152346704733e-06, + "loss": 0.865, + "step": 39210 + }, + { + "epoch": 19.02, + "learning_rate": 2.0255720654947383e-06, + "loss": 0.4385, + "step": 39220 + }, + { + "epoch": 19.02, + "learning_rate": 2.021994452454415e-06, + "loss": 0.4145, + "step": 39230 + }, + { + "epoch": 19.02, + "learning_rate": 2.0184195093274506e-06, + "loss": 0.4148, + "step": 39240 + }, + { + "epoch": 19.02, + "learning_rate": 2.0148472378562215e-06, + "loss": 0.4605, + "step": 39250 + }, + { + "epoch": 19.03, + "learning_rate": 2.0112776397818165e-06, + "loss": 0.4646, + "step": 39260 + }, + { + "epoch": 19.03, + "learning_rate": 2.007710716844012e-06, + "loss": 0.8151, + "step": 39270 + }, + { + "epoch": 19.03, + "learning_rate": 2.0041464707812835e-06, + "loss": 0.3476, + "step": 39280 + }, + { + "epoch": 19.03, + "learning_rate": 2.0005849033308014e-06, + "loss": 0.712, + "step": 39290 + }, + { + "epoch": 19.03, + "learning_rate": 1.9970260162284266e-06, + "loss": 0.566, + "step": 39300 + }, + { + "epoch": 19.03, + "learning_rate": 1.993469811208723e-06, + "loss": 0.6018, + "step": 39310 + }, + { + "epoch": 19.03, + "learning_rate": 1.9899162900049393e-06, + "loss": 0.6175, + "step": 39320 + }, + { + "epoch": 19.03, + "learning_rate": 1.9863654543490208e-06, + "loss": 0.6559, + "step": 39330 + }, + { + "epoch": 19.03, + "learning_rate": 1.982817305971595e-06, + "loss": 0.336, + "step": 39340 + }, + { + "epoch": 19.03, + "learning_rate": 1.9792718466019927e-06, + "loss": 0.7435, + "step": 39350 + }, + { + "epoch": 19.03, + "learning_rate": 1.975729077968226e-06, + "loss": 0.3189, + "step": 39360 + }, + { + "epoch": 19.03, + "learning_rate": 1.972189001796997e-06, + "loss": 0.7507, + "step": 39370 + }, + { + "epoch": 19.03, + "learning_rate": 1.9686516198136965e-06, + "loss": 0.7727, + "step": 39380 + }, + { + "epoch": 19.03, + "learning_rate": 1.9651169337424006e-06, + "loss": 0.3345, + "step": 39390 + }, + { + "epoch": 19.03, + "learning_rate": 1.9615849453058707e-06, + "loss": 0.193, + "step": 39400 + }, + { + "epoch": 19.03, + "learning_rate": 1.958055656225559e-06, + "loss": 0.8061, + "step": 39410 + }, + { + "epoch": 19.03, + "learning_rate": 1.9545290682215987e-06, + "loss": 0.4554, + "step": 39420 + }, + { + "epoch": 19.03, + "learning_rate": 1.9510051830128007e-06, + "loss": 0.2398, + "step": 39430 + }, + { + "epoch": 19.03, + "learning_rate": 1.947484002316664e-06, + "loss": 0.1954, + "step": 39440 + }, + { + "epoch": 19.03, + "learning_rate": 1.943965527849375e-06, + "loss": 0.4979, + "step": 39450 + }, + { + "epoch": 19.03, + "learning_rate": 1.9404497613257914e-06, + "loss": 0.4122, + "step": 39460 + }, + { + "epoch": 19.03, + "learning_rate": 1.9369367044594567e-06, + "loss": 0.501, + "step": 39470 + }, + { + "epoch": 19.03, + "learning_rate": 1.9334263589625904e-06, + "loss": 0.4208, + "step": 39480 + }, + { + "epoch": 19.03, + "learning_rate": 1.929918726546092e-06, + "loss": 0.4463, + "step": 39490 + }, + { + "epoch": 19.03, + "learning_rate": 1.9264138089195424e-06, + "loss": 0.7467, + "step": 39500 + }, + { + "epoch": 19.03, + "learning_rate": 1.922911607791196e-06, + "loss": 0.4804, + "step": 39510 + }, + { + "epoch": 19.03, + "learning_rate": 1.9194121248679772e-06, + "loss": 0.7707, + "step": 39520 + }, + { + "epoch": 19.03, + "learning_rate": 1.915915361855496e-06, + "loss": 0.4711, + "step": 39530 + }, + { + "epoch": 19.03, + "learning_rate": 1.912421320458029e-06, + "loss": 0.5653, + "step": 39540 + }, + { + "epoch": 19.03, + "learning_rate": 1.9089300023785338e-06, + "loss": 0.473, + "step": 39550 + }, + { + "epoch": 19.03, + "learning_rate": 1.9054414093186343e-06, + "loss": 0.4003, + "step": 39560 + }, + { + "epoch": 19.03, + "learning_rate": 1.9019555429786287e-06, + "loss": 0.8618, + "step": 39570 + }, + { + "epoch": 19.03, + "learning_rate": 1.8984724050574857e-06, + "loss": 0.3744, + "step": 39580 + }, + { + "epoch": 19.03, + "learning_rate": 1.8949919972528412e-06, + "loss": 0.3537, + "step": 39590 + }, + { + "epoch": 19.03, + "learning_rate": 1.891514321261012e-06, + "loss": 0.6167, + "step": 39600 + }, + { + "epoch": 19.03, + "learning_rate": 1.888039378776968e-06, + "loss": 0.5296, + "step": 39610 + }, + { + "epoch": 19.03, + "learning_rate": 1.8845671714943557e-06, + "loss": 0.701, + "step": 39620 + }, + { + "epoch": 19.03, + "learning_rate": 1.8810977011054845e-06, + "loss": 0.4249, + "step": 39630 + }, + { + "epoch": 19.03, + "learning_rate": 1.877630969301337e-06, + "loss": 0.4951, + "step": 39640 + }, + { + "epoch": 19.03, + "learning_rate": 1.8741669777715532e-06, + "loss": 0.4541, + "step": 39650 + }, + { + "epoch": 19.03, + "learning_rate": 1.8707057282044417e-06, + "loss": 0.7534, + "step": 39660 + }, + { + "epoch": 19.03, + "learning_rate": 1.867247222286973e-06, + "loss": 0.2315, + "step": 39670 + }, + { + "epoch": 19.03, + "learning_rate": 1.86379146170478e-06, + "loss": 0.6699, + "step": 39680 + }, + { + "epoch": 19.03, + "learning_rate": 1.8603384481421596e-06, + "loss": 0.4909, + "step": 39690 + }, + { + "epoch": 19.03, + "learning_rate": 1.856888183282068e-06, + "loss": 0.4573, + "step": 39700 + }, + { + "epoch": 19.03, + "learning_rate": 1.8534406688061228e-06, + "loss": 0.5582, + "step": 39710 + }, + { + "epoch": 19.03, + "learning_rate": 1.8499959063946007e-06, + "loss": 0.4972, + "step": 39720 + }, + { + "epoch": 19.03, + "learning_rate": 1.8465538977264343e-06, + "loss": 0.1831, + "step": 39730 + }, + { + "epoch": 19.03, + "learning_rate": 1.843114644479221e-06, + "loss": 0.9039, + "step": 39740 + }, + { + "epoch": 19.04, + "learning_rate": 1.83967814832921e-06, + "loss": 0.4582, + "step": 39750 + }, + { + "epoch": 19.04, + "learning_rate": 1.8362444109513072e-06, + "loss": 0.4638, + "step": 39760 + }, + { + "epoch": 19.04, + "learning_rate": 1.8328134340190738e-06, + "loss": 0.4641, + "step": 39770 + }, + { + "epoch": 19.04, + "learning_rate": 1.8293852192047275e-06, + "loss": 0.7425, + "step": 39780 + }, + { + "epoch": 19.04, + "learning_rate": 1.8259597681791374e-06, + "loss": 0.8255, + "step": 39790 + }, + { + "epoch": 19.04, + "learning_rate": 1.822537082611828e-06, + "loss": 0.5125, + "step": 39800 + }, + { + "epoch": 19.04, + "learning_rate": 1.819117164170973e-06, + "loss": 0.5191, + "step": 39810 + }, + { + "epoch": 19.04, + "learning_rate": 1.8157000145233987e-06, + "loss": 0.4591, + "step": 39820 + }, + { + "epoch": 19.04, + "learning_rate": 1.812285635334586e-06, + "loss": 0.2738, + "step": 39830 + }, + { + "epoch": 19.04, + "learning_rate": 1.8088740282686587e-06, + "loss": 0.3304, + "step": 39840 + }, + { + "epoch": 19.04, + "learning_rate": 1.8054651949883941e-06, + "loss": 0.6303, + "step": 39850 + }, + { + "epoch": 19.04, + "learning_rate": 1.8020591371552175e-06, + "loss": 0.7347, + "step": 39860 + }, + { + "epoch": 19.04, + "learning_rate": 1.7986558564291935e-06, + "loss": 0.5348, + "step": 39870 + }, + { + "epoch": 19.04, + "learning_rate": 1.7952553544690462e-06, + "loss": 0.2213, + "step": 39880 + }, + { + "epoch": 19.04, + "learning_rate": 1.791857632932138e-06, + "loss": 0.4525, + "step": 39890 + }, + { + "epoch": 19.04, + "learning_rate": 1.7884626934744759e-06, + "loss": 0.3864, + "step": 39900 + }, + { + "epoch": 19.04, + "learning_rate": 1.785070537750712e-06, + "loss": 0.5979, + "step": 39910 + }, + { + "epoch": 19.04, + "learning_rate": 1.78168116741414e-06, + "loss": 0.3264, + "step": 39920 + }, + { + "epoch": 19.04, + "learning_rate": 1.7782945841167034e-06, + "loss": 0.3334, + "step": 39930 + }, + { + "epoch": 19.04, + "learning_rate": 1.7749107895089774e-06, + "loss": 0.6249, + "step": 39940 + }, + { + "epoch": 19.04, + "learning_rate": 1.7715297852401858e-06, + "loss": 0.3204, + "step": 39950 + }, + { + "epoch": 19.04, + "learning_rate": 1.7681515729581825e-06, + "loss": 0.2486, + "step": 39960 + }, + { + "epoch": 19.04, + "learning_rate": 1.764776154309473e-06, + "loss": 0.3564, + "step": 39970 + }, + { + "epoch": 19.04, + "learning_rate": 1.7614035309391938e-06, + "loss": 0.6261, + "step": 39980 + }, + { + "epoch": 19.04, + "learning_rate": 1.758033704491121e-06, + "loss": 0.5859, + "step": 39990 + }, + { + "epoch": 19.04, + "learning_rate": 1.7546666766076658e-06, + "loss": 1.0645, + "step": 40000 + }, + { + "epoch": 19.04, + "eval_accuracy": 0.8810526315789474, + "eval_f1": 0.8810526315789474, + "eval_loss": 0.7618152499198914, + "eval_runtime": 771.4019, + "eval_samples_per_second": 6.158, + "eval_steps_per_second": 1.54, + "step": 40000 + }, + { + "epoch": 20.0, + "learning_rate": 1.751302448929877e-06, + "loss": 0.2598, + "step": 40010 + }, + { + "epoch": 20.0, + "learning_rate": 1.7479410230974375e-06, + "loss": 0.4122, + "step": 40020 + }, + { + "epoch": 20.0, + "learning_rate": 1.744582400748668e-06, + "loss": 0.283, + "step": 40030 + }, + { + "epoch": 20.0, + "learning_rate": 1.741226583520521e-06, + "loss": 0.5017, + "step": 40040 + }, + { + "epoch": 20.0, + "learning_rate": 1.7378735730485766e-06, + "loss": 0.9872, + "step": 40050 + }, + { + "epoch": 20.0, + "learning_rate": 1.7345233709670513e-06, + "loss": 0.5128, + "step": 40060 + }, + { + "epoch": 20.0, + "learning_rate": 1.7311759789087977e-06, + "loss": 0.4265, + "step": 40070 + }, + { + "epoch": 20.0, + "learning_rate": 1.7278313985052915e-06, + "loss": 0.5808, + "step": 40080 + }, + { + "epoch": 20.0, + "learning_rate": 1.7244896313866404e-06, + "loss": 0.3198, + "step": 40090 + }, + { + "epoch": 20.0, + "learning_rate": 1.7211506791815815e-06, + "loss": 0.5577, + "step": 40100 + }, + { + "epoch": 20.0, + "learning_rate": 1.7178145435174772e-06, + "loss": 0.5626, + "step": 40110 + }, + { + "epoch": 20.0, + "learning_rate": 1.7144812260203234e-06, + "loss": 0.3418, + "step": 40120 + }, + { + "epoch": 20.0, + "learning_rate": 1.7111507283147392e-06, + "loss": 0.2519, + "step": 40130 + }, + { + "epoch": 20.0, + "learning_rate": 1.7078230520239644e-06, + "loss": 0.3026, + "step": 40140 + }, + { + "epoch": 20.0, + "learning_rate": 1.7044981987698672e-06, + "loss": 0.3616, + "step": 40150 + }, + { + "epoch": 20.0, + "learning_rate": 1.7011761701729456e-06, + "loss": 0.4037, + "step": 40160 + }, + { + "epoch": 20.0, + "learning_rate": 1.6978569678523137e-06, + "loss": 0.2516, + "step": 40170 + }, + { + "epoch": 20.0, + "learning_rate": 1.6945405934257107e-06, + "loss": 0.3255, + "step": 40180 + }, + { + "epoch": 20.0, + "learning_rate": 1.6912270485094972e-06, + "loss": 0.3581, + "step": 40190 + }, + { + "epoch": 20.0, + "learning_rate": 1.6879163347186536e-06, + "loss": 0.8509, + "step": 40200 + }, + { + "epoch": 20.0, + "learning_rate": 1.6846084536667816e-06, + "loss": 0.6085, + "step": 40210 + }, + { + "epoch": 20.0, + "learning_rate": 1.681303406966106e-06, + "loss": 0.3777, + "step": 40220 + }, + { + "epoch": 20.0, + "learning_rate": 1.6780011962274622e-06, + "loss": 0.3812, + "step": 40230 + }, + { + "epoch": 20.0, + "learning_rate": 1.6747018230603087e-06, + "loss": 0.9465, + "step": 40240 + }, + { + "epoch": 20.0, + "learning_rate": 1.6714052890727182e-06, + "loss": 0.4388, + "step": 40250 + }, + { + "epoch": 20.01, + "learning_rate": 1.668111595871385e-06, + "loss": 0.6455, + "step": 40260 + }, + { + "epoch": 20.01, + "learning_rate": 1.6648207450616137e-06, + "loss": 0.2788, + "step": 40270 + }, + { + "epoch": 20.01, + "learning_rate": 1.661532738247325e-06, + "loss": 0.7803, + "step": 40280 + }, + { + "epoch": 20.01, + "learning_rate": 1.6582475770310526e-06, + "loss": 0.5762, + "step": 40290 + }, + { + "epoch": 20.01, + "learning_rate": 1.6549652630139425e-06, + "loss": 0.5437, + "step": 40300 + }, + { + "epoch": 20.01, + "learning_rate": 1.6516857977957617e-06, + "loss": 0.5942, + "step": 40310 + }, + { + "epoch": 20.01, + "learning_rate": 1.6484091829748748e-06, + "loss": 0.7837, + "step": 40320 + }, + { + "epoch": 20.01, + "learning_rate": 1.6451354201482654e-06, + "loss": 0.6525, + "step": 40330 + }, + { + "epoch": 20.01, + "learning_rate": 1.6418645109115255e-06, + "loss": 0.5469, + "step": 40340 + }, + { + "epoch": 20.01, + "learning_rate": 1.6385964568588587e-06, + "loss": 0.2445, + "step": 40350 + }, + { + "epoch": 20.01, + "learning_rate": 1.6353312595830746e-06, + "loss": 0.3352, + "step": 40360 + }, + { + "epoch": 20.01, + "learning_rate": 1.6320689206755892e-06, + "loss": 0.3698, + "step": 40370 + }, + { + "epoch": 20.01, + "learning_rate": 1.6288094417264273e-06, + "loss": 0.4792, + "step": 40380 + }, + { + "epoch": 20.01, + "learning_rate": 1.6255528243242198e-06, + "loss": 0.5886, + "step": 40390 + }, + { + "epoch": 20.01, + "learning_rate": 1.622299070056202e-06, + "loss": 0.4715, + "step": 40400 + }, + { + "epoch": 20.01, + "learning_rate": 1.6190481805082146e-06, + "loss": 0.5926, + "step": 40410 + }, + { + "epoch": 20.01, + "learning_rate": 1.615800157264701e-06, + "loss": 0.5368, + "step": 40420 + }, + { + "epoch": 20.01, + "learning_rate": 1.612555001908709e-06, + "loss": 0.2857, + "step": 40430 + }, + { + "epoch": 20.01, + "learning_rate": 1.609312716021886e-06, + "loss": 0.6023, + "step": 40440 + }, + { + "epoch": 20.01, + "learning_rate": 1.6060733011844852e-06, + "loss": 0.3041, + "step": 40450 + }, + { + "epoch": 20.01, + "learning_rate": 1.602836758975358e-06, + "loss": 0.2443, + "step": 40460 + }, + { + "epoch": 20.01, + "learning_rate": 1.5996030909719554e-06, + "loss": 0.4438, + "step": 40470 + }, + { + "epoch": 20.01, + "learning_rate": 1.5963722987503259e-06, + "loss": 0.6301, + "step": 40480 + }, + { + "epoch": 20.01, + "learning_rate": 1.5931443838851215e-06, + "loss": 0.3789, + "step": 40490 + }, + { + "epoch": 20.01, + "learning_rate": 1.5899193479495858e-06, + "loss": 0.7228, + "step": 40500 + }, + { + "epoch": 20.01, + "learning_rate": 1.5866971925155641e-06, + "loss": 0.3848, + "step": 40510 + }, + { + "epoch": 20.01, + "learning_rate": 1.583477919153495e-06, + "loss": 0.2661, + "step": 40520 + }, + { + "epoch": 20.01, + "learning_rate": 1.5802615294324137e-06, + "loss": 0.503, + "step": 40530 + }, + { + "epoch": 20.01, + "learning_rate": 1.577048024919947e-06, + "loss": 0.3205, + "step": 40540 + }, + { + "epoch": 20.01, + "learning_rate": 1.5738374071823219e-06, + "loss": 0.7299, + "step": 40550 + }, + { + "epoch": 20.01, + "learning_rate": 1.5706296777843525e-06, + "loss": 0.4654, + "step": 40560 + }, + { + "epoch": 20.01, + "learning_rate": 1.5674248382894501e-06, + "loss": 0.4685, + "step": 40570 + }, + { + "epoch": 20.01, + "learning_rate": 1.5642228902596076e-06, + "loss": 0.5656, + "step": 40580 + }, + { + "epoch": 20.01, + "learning_rate": 1.5610238352554214e-06, + "loss": 0.3782, + "step": 40590 + }, + { + "epoch": 20.01, + "learning_rate": 1.5578276748360706e-06, + "loss": 0.275, + "step": 40600 + }, + { + "epoch": 20.01, + "learning_rate": 1.5546344105593247e-06, + "loss": 0.6039, + "step": 40610 + }, + { + "epoch": 20.01, + "learning_rate": 1.5514440439815419e-06, + "loss": 0.5187, + "step": 40620 + }, + { + "epoch": 20.01, + "learning_rate": 1.5482565766576661e-06, + "loss": 0.4336, + "step": 40630 + }, + { + "epoch": 20.01, + "learning_rate": 1.5450720101412346e-06, + "loss": 0.6509, + "step": 40640 + }, + { + "epoch": 20.01, + "learning_rate": 1.5418903459843633e-06, + "loss": 0.2785, + "step": 40650 + }, + { + "epoch": 20.01, + "learning_rate": 1.5387115857377602e-06, + "loss": 0.355, + "step": 40660 + }, + { + "epoch": 20.01, + "learning_rate": 1.535535730950708e-06, + "loss": 0.5541, + "step": 40670 + }, + { + "epoch": 20.01, + "learning_rate": 1.5323627831710843e-06, + "loss": 0.2017, + "step": 40680 + }, + { + "epoch": 20.01, + "learning_rate": 1.5291927439453457e-06, + "loss": 0.5485, + "step": 40690 + }, + { + "epoch": 20.01, + "learning_rate": 1.526025614818528e-06, + "loss": 0.6539, + "step": 40700 + }, + { + "epoch": 20.01, + "learning_rate": 1.5228613973342541e-06, + "loss": 1.0763, + "step": 40710 + }, + { + "epoch": 20.01, + "learning_rate": 1.5197000930347237e-06, + "loss": 0.388, + "step": 40720 + }, + { + "epoch": 20.01, + "learning_rate": 1.5165417034607169e-06, + "loss": 0.534, + "step": 40730 + }, + { + "epoch": 20.01, + "learning_rate": 1.5133862301515977e-06, + "loss": 0.3051, + "step": 40740 + }, + { + "epoch": 20.02, + "learning_rate": 1.5102336746453054e-06, + "loss": 0.4692, + "step": 40750 + }, + { + "epoch": 20.02, + "learning_rate": 1.5070840384783534e-06, + "loss": 0.5556, + "step": 40760 + }, + { + "epoch": 20.02, + "learning_rate": 1.5039373231858372e-06, + "loss": 0.5806, + "step": 40770 + }, + { + "epoch": 20.02, + "learning_rate": 1.5007935303014297e-06, + "loss": 0.6341, + "step": 40780 + }, + { + "epoch": 20.02, + "learning_rate": 1.4976526613573772e-06, + "loss": 0.6556, + "step": 40790 + }, + { + "epoch": 20.02, + "learning_rate": 1.4945147178844997e-06, + "loss": 0.4185, + "step": 40800 + }, + { + "epoch": 20.02, + "learning_rate": 1.4913797014121927e-06, + "loss": 0.4563, + "step": 40810 + }, + { + "epoch": 20.02, + "learning_rate": 1.4882476134684245e-06, + "loss": 0.5987, + "step": 40820 + }, + { + "epoch": 20.02, + "learning_rate": 1.4851184555797397e-06, + "loss": 0.3656, + "step": 40830 + }, + { + "epoch": 20.02, + "learning_rate": 1.481992229271252e-06, + "loss": 0.556, + "step": 40840 + }, + { + "epoch": 20.02, + "learning_rate": 1.4788689360666423e-06, + "loss": 0.7573, + "step": 40850 + }, + { + "epoch": 20.02, + "learning_rate": 1.4757485774881682e-06, + "loss": 0.2427, + "step": 40860 + }, + { + "epoch": 20.02, + "learning_rate": 1.4726311550566512e-06, + "loss": 0.5579, + "step": 40870 + }, + { + "epoch": 20.02, + "learning_rate": 1.4695166702914903e-06, + "loss": 0.4394, + "step": 40880 + }, + { + "epoch": 20.02, + "learning_rate": 1.4664051247106443e-06, + "loss": 0.7392, + "step": 40890 + }, + { + "epoch": 20.02, + "learning_rate": 1.4632965198306448e-06, + "loss": 0.6134, + "step": 40900 + }, + { + "epoch": 20.02, + "learning_rate": 1.460190857166586e-06, + "loss": 0.6297, + "step": 40910 + }, + { + "epoch": 20.02, + "learning_rate": 1.4570881382321298e-06, + "loss": 0.5465, + "step": 40920 + }, + { + "epoch": 20.02, + "learning_rate": 1.453988364539508e-06, + "loss": 0.503, + "step": 40930 + }, + { + "epoch": 20.02, + "learning_rate": 1.4508915375995082e-06, + "loss": 0.3181, + "step": 40940 + }, + { + "epoch": 20.02, + "learning_rate": 1.4477976589214873e-06, + "loss": 0.6384, + "step": 40950 + }, + { + "epoch": 20.02, + "learning_rate": 1.4447067300133621e-06, + "loss": 0.6659, + "step": 40960 + }, + { + "epoch": 20.02, + "learning_rate": 1.4416187523816186e-06, + "loss": 0.605, + "step": 40970 + }, + { + "epoch": 20.02, + "learning_rate": 1.4385337275312969e-06, + "loss": 0.5757, + "step": 40980 + }, + { + "epoch": 20.02, + "learning_rate": 1.4354516569660009e-06, + "loss": 0.387, + "step": 40990 + }, + { + "epoch": 20.02, + "learning_rate": 1.432372542187895e-06, + "loss": 0.5196, + "step": 41000 + }, + { + "epoch": 20.02, + "learning_rate": 1.429296384697701e-06, + "loss": 0.3582, + "step": 41010 + }, + { + "epoch": 20.02, + "learning_rate": 1.4262231859947016e-06, + "loss": 0.516, + "step": 41020 + }, + { + "epoch": 20.02, + "learning_rate": 1.423152947576736e-06, + "loss": 0.3667, + "step": 41030 + }, + { + "epoch": 20.02, + "learning_rate": 1.4200856709402014e-06, + "loss": 0.3674, + "step": 41040 + }, + { + "epoch": 20.02, + "learning_rate": 1.4170213575800518e-06, + "loss": 0.7149, + "step": 41050 + }, + { + "epoch": 20.02, + "learning_rate": 1.4139600089897933e-06, + "loss": 0.2172, + "step": 41060 + }, + { + "epoch": 20.02, + "learning_rate": 1.4109016266614934e-06, + "loss": 0.5621, + "step": 41070 + }, + { + "epoch": 20.02, + "learning_rate": 1.4078462120857708e-06, + "loss": 0.4305, + "step": 41080 + }, + { + "epoch": 20.02, + "learning_rate": 1.4047937667517954e-06, + "loss": 0.4987, + "step": 41090 + }, + { + "epoch": 20.02, + "learning_rate": 1.4017442921472933e-06, + "loss": 0.6915, + "step": 41100 + }, + { + "epoch": 20.02, + "learning_rate": 1.3986977897585398e-06, + "loss": 0.5445, + "step": 41110 + }, + { + "epoch": 20.02, + "learning_rate": 1.3956542610703654e-06, + "loss": 0.4355, + "step": 41120 + }, + { + "epoch": 20.02, + "learning_rate": 1.392613707566147e-06, + "loss": 0.7685, + "step": 41130 + }, + { + "epoch": 20.02, + "learning_rate": 1.3895761307278137e-06, + "loss": 0.4018, + "step": 41140 + }, + { + "epoch": 20.02, + "learning_rate": 1.3865415320358427e-06, + "loss": 0.6319, + "step": 41150 + }, + { + "epoch": 20.02, + "learning_rate": 1.3835099129692637e-06, + "loss": 0.6525, + "step": 41160 + }, + { + "epoch": 20.02, + "learning_rate": 1.3804812750056494e-06, + "loss": 0.5524, + "step": 41170 + }, + { + "epoch": 20.02, + "learning_rate": 1.3774556196211206e-06, + "loss": 0.2176, + "step": 41180 + }, + { + "epoch": 20.02, + "learning_rate": 1.3744329482903447e-06, + "loss": 0.4774, + "step": 41190 + }, + { + "epoch": 20.02, + "learning_rate": 1.3714132624865363e-06, + "loss": 0.8767, + "step": 41200 + }, + { + "epoch": 20.02, + "learning_rate": 1.368396563681453e-06, + "loss": 0.9403, + "step": 41210 + }, + { + "epoch": 20.02, + "learning_rate": 1.3653828533453968e-06, + "loss": 0.6222, + "step": 41220 + }, + { + "epoch": 20.02, + "learning_rate": 1.3623721329472139e-06, + "loss": 0.8261, + "step": 41230 + }, + { + "epoch": 20.02, + "learning_rate": 1.3593644039542919e-06, + "loss": 0.5185, + "step": 41240 + }, + { + "epoch": 20.02, + "learning_rate": 1.3563596678325607e-06, + "loss": 0.6334, + "step": 41250 + }, + { + "epoch": 20.03, + "learning_rate": 1.3533579260464956e-06, + "loss": 0.3334, + "step": 41260 + }, + { + "epoch": 20.03, + "learning_rate": 1.3503591800591072e-06, + "loss": 0.3246, + "step": 41270 + }, + { + "epoch": 20.03, + "learning_rate": 1.3473634313319497e-06, + "loss": 0.661, + "step": 41280 + }, + { + "epoch": 20.03, + "learning_rate": 1.3443706813251082e-06, + "loss": 0.5927, + "step": 41290 + }, + { + "epoch": 20.03, + "learning_rate": 1.3413809314972197e-06, + "loss": 0.2087, + "step": 41300 + }, + { + "epoch": 20.03, + "learning_rate": 1.33839418330545e-06, + "loss": 0.3979, + "step": 41310 + }, + { + "epoch": 20.03, + "learning_rate": 1.3354104382055022e-06, + "loss": 0.654, + "step": 41320 + }, + { + "epoch": 20.03, + "learning_rate": 1.3324296976516195e-06, + "loss": 0.5012, + "step": 41330 + }, + { + "epoch": 20.03, + "learning_rate": 1.3294519630965752e-06, + "loss": 0.5119, + "step": 41340 + }, + { + "epoch": 20.03, + "learning_rate": 1.3264772359916855e-06, + "loss": 0.2278, + "step": 41350 + }, + { + "epoch": 20.03, + "learning_rate": 1.3235055177867942e-06, + "loss": 0.5415, + "step": 41360 + }, + { + "epoch": 20.03, + "learning_rate": 1.3205368099302818e-06, + "loss": 0.6114, + "step": 41370 + }, + { + "epoch": 20.03, + "learning_rate": 1.3175711138690582e-06, + "loss": 0.3263, + "step": 41380 + }, + { + "epoch": 20.03, + "learning_rate": 1.3146084310485668e-06, + "loss": 0.4803, + "step": 41390 + }, + { + "epoch": 20.03, + "learning_rate": 1.3116487629127872e-06, + "loss": 0.3701, + "step": 41400 + }, + { + "epoch": 20.03, + "learning_rate": 1.308692110904223e-06, + "loss": 0.5197, + "step": 41410 + }, + { + "epoch": 20.03, + "learning_rate": 1.3057384764639107e-06, + "loss": 0.7112, + "step": 41420 + }, + { + "epoch": 20.03, + "learning_rate": 1.3027878610314173e-06, + "loss": 0.5485, + "step": 41430 + }, + { + "epoch": 20.03, + "learning_rate": 1.2998402660448339e-06, + "loss": 0.3948, + "step": 41440 + }, + { + "epoch": 20.03, + "learning_rate": 1.2968956929407854e-06, + "loss": 0.9255, + "step": 41450 + }, + { + "epoch": 20.03, + "learning_rate": 1.2939541431544224e-06, + "loss": 0.6046, + "step": 41460 + }, + { + "epoch": 20.03, + "learning_rate": 1.2910156181194163e-06, + "loss": 0.5436, + "step": 41470 + }, + { + "epoch": 20.03, + "learning_rate": 1.288080119267969e-06, + "loss": 0.6322, + "step": 41480 + }, + { + "epoch": 20.03, + "learning_rate": 1.2851476480308092e-06, + "loss": 0.8129, + "step": 41490 + }, + { + "epoch": 20.03, + "learning_rate": 1.282218205837188e-06, + "loss": 0.2926, + "step": 41500 + }, + { + "epoch": 20.03, + "learning_rate": 1.2792917941148778e-06, + "loss": 0.4101, + "step": 41510 + }, + { + "epoch": 20.03, + "learning_rate": 1.2763684142901778e-06, + "loss": 0.6326, + "step": 41520 + }, + { + "epoch": 20.03, + "learning_rate": 1.2734480677879066e-06, + "loss": 0.7398, + "step": 41530 + }, + { + "epoch": 20.03, + "learning_rate": 1.270530756031404e-06, + "loss": 0.5005, + "step": 41540 + }, + { + "epoch": 20.03, + "learning_rate": 1.2676164804425391e-06, + "loss": 0.3263, + "step": 41550 + }, + { + "epoch": 20.03, + "learning_rate": 1.2647052424416878e-06, + "loss": 0.3796, + "step": 41560 + }, + { + "epoch": 20.03, + "learning_rate": 1.2617970434477532e-06, + "loss": 0.491, + "step": 41570 + }, + { + "epoch": 20.03, + "learning_rate": 1.258891884878156e-06, + "loss": 0.426, + "step": 41580 + }, + { + "epoch": 20.03, + "learning_rate": 1.2559897681488377e-06, + "loss": 0.712, + "step": 41590 + }, + { + "epoch": 20.03, + "learning_rate": 1.2530906946742544e-06, + "loss": 0.29, + "step": 41600 + }, + { + "epoch": 20.03, + "learning_rate": 1.2501946658673771e-06, + "loss": 0.5593, + "step": 41610 + }, + { + "epoch": 20.03, + "learning_rate": 1.2473016831396962e-06, + "loss": 0.5783, + "step": 41620 + }, + { + "epoch": 20.03, + "learning_rate": 1.2444117479012166e-06, + "loss": 0.5131, + "step": 41630 + }, + { + "epoch": 20.03, + "learning_rate": 1.2415248615604577e-06, + "loss": 0.4384, + "step": 41640 + }, + { + "epoch": 20.03, + "learning_rate": 1.2386410255244518e-06, + "loss": 0.307, + "step": 41650 + }, + { + "epoch": 20.03, + "learning_rate": 1.235760241198747e-06, + "loss": 0.4889, + "step": 41660 + }, + { + "epoch": 20.03, + "learning_rate": 1.2328825099873995e-06, + "loss": 0.3653, + "step": 41670 + }, + { + "epoch": 20.03, + "learning_rate": 1.2300078332929845e-06, + "loss": 0.583, + "step": 41680 + }, + { + "epoch": 20.03, + "learning_rate": 1.2271362125165825e-06, + "loss": 0.5165, + "step": 41690 + }, + { + "epoch": 20.03, + "learning_rate": 1.224267649057788e-06, + "loss": 0.6934, + "step": 41700 + }, + { + "epoch": 20.03, + "learning_rate": 1.2214021443147022e-06, + "loss": 0.4905, + "step": 41710 + }, + { + "epoch": 20.03, + "learning_rate": 1.2185396996839376e-06, + "loss": 0.6703, + "step": 41720 + }, + { + "epoch": 20.03, + "learning_rate": 1.2156803165606156e-06, + "loss": 0.7073, + "step": 41730 + }, + { + "epoch": 20.03, + "learning_rate": 1.2128239963383647e-06, + "loss": 0.314, + "step": 41740 + }, + { + "epoch": 20.04, + "learning_rate": 1.2099707404093204e-06, + "loss": 0.6241, + "step": 41750 + }, + { + "epoch": 20.04, + "learning_rate": 1.2071205501641256e-06, + "loss": 0.6667, + "step": 41760 + }, + { + "epoch": 20.04, + "learning_rate": 1.2042734269919254e-06, + "loss": 0.3184, + "step": 41770 + }, + { + "epoch": 20.04, + "learning_rate": 1.2014293722803782e-06, + "loss": 0.469, + "step": 41780 + }, + { + "epoch": 20.04, + "learning_rate": 1.1985883874156386e-06, + "loss": 0.161, + "step": 41790 + }, + { + "epoch": 20.04, + "learning_rate": 1.1957504737823682e-06, + "loss": 0.4946, + "step": 41800 + }, + { + "epoch": 20.04, + "learning_rate": 1.1929156327637321e-06, + "loss": 0.3183, + "step": 41810 + }, + { + "epoch": 20.04, + "learning_rate": 1.1900838657413978e-06, + "loss": 0.1142, + "step": 41820 + }, + { + "epoch": 20.04, + "learning_rate": 1.187255174095533e-06, + "loss": 0.5159, + "step": 41830 + }, + { + "epoch": 20.04, + "learning_rate": 1.1844295592048086e-06, + "loss": 0.1947, + "step": 41840 + }, + { + "epoch": 20.04, + "learning_rate": 1.181607022446394e-06, + "loss": 0.3758, + "step": 41850 + }, + { + "epoch": 20.04, + "learning_rate": 1.1787875651959606e-06, + "loss": 0.4884, + "step": 41860 + }, + { + "epoch": 20.04, + "learning_rate": 1.175971188827675e-06, + "loss": 0.6049, + "step": 41870 + }, + { + "epoch": 20.04, + "learning_rate": 1.173157894714209e-06, + "loss": 0.6191, + "step": 41880 + }, + { + "epoch": 20.04, + "learning_rate": 1.1703476842267253e-06, + "loss": 0.4537, + "step": 41890 + }, + { + "epoch": 20.04, + "learning_rate": 1.1675405587348885e-06, + "loss": 0.6927, + "step": 41900 + }, + { + "epoch": 20.04, + "learning_rate": 1.1647365196068524e-06, + "loss": 0.1467, + "step": 41910 + }, + { + "epoch": 20.04, + "learning_rate": 1.1619355682092774e-06, + "loss": 0.4122, + "step": 41920 + }, + { + "epoch": 20.04, + "learning_rate": 1.1591377059073103e-06, + "loss": 0.3217, + "step": 41930 + }, + { + "epoch": 20.04, + "learning_rate": 1.1563429340645955e-06, + "loss": 0.8771, + "step": 41940 + }, + { + "epoch": 20.04, + "learning_rate": 1.1535512540432707e-06, + "loss": 0.6063, + "step": 41950 + }, + { + "epoch": 20.04, + "learning_rate": 1.1507626672039656e-06, + "loss": 0.5014, + "step": 41960 + }, + { + "epoch": 20.04, + "learning_rate": 1.1479771749058071e-06, + "loss": 0.5023, + "step": 41970 + }, + { + "epoch": 20.04, + "learning_rate": 1.1451947785064086e-06, + "loss": 0.7961, + "step": 41980 + }, + { + "epoch": 20.04, + "learning_rate": 1.1424154793618775e-06, + "loss": 0.3274, + "step": 41990 + }, + { + "epoch": 20.04, + "learning_rate": 1.1396392788268054e-06, + "loss": 0.4949, + "step": 42000 + }, + { + "epoch": 20.04, + "eval_accuracy": 0.8747368421052631, + "eval_f1": 0.8747368421052631, + "eval_loss": 0.7881549000740051, + "eval_runtime": 742.269, + "eval_samples_per_second": 6.399, + "eval_steps_per_second": 1.6, + "step": 42000 + }, + { + "epoch": 21.0, + "learning_rate": 1.1368661782542842e-06, + "loss": 0.5779, + "step": 42010 + }, + { + "epoch": 21.0, + "learning_rate": 1.1340961789958867e-06, + "loss": 0.7587, + "step": 42020 + }, + { + "epoch": 21.0, + "learning_rate": 1.1313292824016783e-06, + "loss": 0.7537, + "step": 42030 + }, + { + "epoch": 21.0, + "learning_rate": 1.1285654898202107e-06, + "loss": 0.2199, + "step": 42040 + }, + { + "epoch": 21.0, + "learning_rate": 1.1258048025985219e-06, + "loss": 0.4507, + "step": 42050 + }, + { + "epoch": 21.0, + "learning_rate": 1.123047222082135e-06, + "loss": 0.3394, + "step": 42060 + }, + { + "epoch": 21.0, + "learning_rate": 1.120292749615067e-06, + "loss": 0.812, + "step": 42070 + }, + { + "epoch": 21.0, + "learning_rate": 1.117541386539812e-06, + "loss": 0.4893, + "step": 42080 + }, + { + "epoch": 21.0, + "learning_rate": 1.1147931341973493e-06, + "loss": 0.7984, + "step": 42090 + }, + { + "epoch": 21.0, + "learning_rate": 1.1120479939271424e-06, + "loss": 0.3631, + "step": 42100 + }, + { + "epoch": 21.0, + "learning_rate": 1.1093059670671444e-06, + "loss": 0.2635, + "step": 42110 + }, + { + "epoch": 21.0, + "learning_rate": 1.1065670549537818e-06, + "loss": 0.1696, + "step": 42120 + }, + { + "epoch": 21.0, + "learning_rate": 1.1038312589219687e-06, + "loss": 0.4704, + "step": 42130 + }, + { + "epoch": 21.0, + "learning_rate": 1.1010985803050986e-06, + "loss": 0.4442, + "step": 42140 + }, + { + "epoch": 21.0, + "learning_rate": 1.0983690204350433e-06, + "loss": 0.1498, + "step": 42150 + }, + { + "epoch": 21.0, + "learning_rate": 1.0956425806421622e-06, + "loss": 0.4469, + "step": 42160 + }, + { + "epoch": 21.0, + "learning_rate": 1.0929192622552872e-06, + "loss": 0.3605, + "step": 42170 + }, + { + "epoch": 21.0, + "learning_rate": 1.0901990666017283e-06, + "loss": 1.0167, + "step": 42180 + }, + { + "epoch": 21.0, + "learning_rate": 1.087481995007276e-06, + "loss": 0.9402, + "step": 42190 + }, + { + "epoch": 21.0, + "learning_rate": 1.0847680487962005e-06, + "loss": 0.5113, + "step": 42200 + }, + { + "epoch": 21.0, + "learning_rate": 1.0820572292912442e-06, + "loss": 0.2354, + "step": 42210 + }, + { + "epoch": 21.0, + "learning_rate": 1.0793495378136291e-06, + "loss": 0.1514, + "step": 42220 + }, + { + "epoch": 21.0, + "learning_rate": 1.0766449756830507e-06, + "loss": 0.4024, + "step": 42230 + }, + { + "epoch": 21.0, + "learning_rate": 1.0739435442176783e-06, + "loss": 0.1413, + "step": 42240 + }, + { + "epoch": 21.0, + "learning_rate": 1.0712452447341584e-06, + "loss": 0.4313, + "step": 42250 + }, + { + "epoch": 21.01, + "learning_rate": 1.0685500785476093e-06, + "loss": 0.3361, + "step": 42260 + }, + { + "epoch": 21.01, + "learning_rate": 1.0658580469716214e-06, + "loss": 0.5155, + "step": 42270 + }, + { + "epoch": 21.01, + "learning_rate": 1.0631691513182585e-06, + "loss": 0.7166, + "step": 42280 + }, + { + "epoch": 21.01, + "learning_rate": 1.0604833928980537e-06, + "loss": 0.5803, + "step": 42290 + }, + { + "epoch": 21.01, + "learning_rate": 1.0578007730200167e-06, + "loss": 0.2315, + "step": 42300 + }, + { + "epoch": 21.01, + "learning_rate": 1.0551212929916225e-06, + "loss": 0.4389, + "step": 42310 + }, + { + "epoch": 21.01, + "learning_rate": 1.0524449541188174e-06, + "loss": 0.6883, + "step": 42320 + }, + { + "epoch": 21.01, + "learning_rate": 1.0497717577060153e-06, + "loss": 0.5107, + "step": 42330 + }, + { + "epoch": 21.01, + "learning_rate": 1.0471017050560999e-06, + "loss": 0.5452, + "step": 42340 + }, + { + "epoch": 21.01, + "learning_rate": 1.0444347974704235e-06, + "loss": 0.2522, + "step": 42350 + }, + { + "epoch": 21.01, + "learning_rate": 1.0417710362488031e-06, + "loss": 0.2507, + "step": 42360 + }, + { + "epoch": 21.01, + "learning_rate": 1.039110422689525e-06, + "loss": 0.2414, + "step": 42370 + }, + { + "epoch": 21.01, + "learning_rate": 1.036452958089338e-06, + "loss": 1.171, + "step": 42380 + }, + { + "epoch": 21.01, + "learning_rate": 1.0337986437434584e-06, + "loss": 0.3574, + "step": 42390 + }, + { + "epoch": 21.01, + "learning_rate": 1.031147480945569e-06, + "loss": 0.3141, + "step": 42400 + }, + { + "epoch": 21.01, + "learning_rate": 1.0284994709878125e-06, + "loss": 0.8145, + "step": 42410 + }, + { + "epoch": 21.01, + "learning_rate": 1.0258546151607978e-06, + "loss": 0.5917, + "step": 42420 + }, + { + "epoch": 21.01, + "learning_rate": 1.0232129147535943e-06, + "loss": 0.5349, + "step": 42430 + }, + { + "epoch": 21.01, + "learning_rate": 1.0205743710537355e-06, + "loss": 0.5481, + "step": 42440 + }, + { + "epoch": 21.01, + "learning_rate": 1.0179389853472152e-06, + "loss": 0.3898, + "step": 42450 + }, + { + "epoch": 21.01, + "learning_rate": 1.0153067589184893e-06, + "loss": 0.4142, + "step": 42460 + }, + { + "epoch": 21.01, + "learning_rate": 1.0126776930504706e-06, + "loss": 0.4881, + "step": 42470 + }, + { + "epoch": 21.01, + "learning_rate": 1.010051789024535e-06, + "loss": 0.4907, + "step": 42480 + }, + { + "epoch": 21.01, + "learning_rate": 1.0074290481205182e-06, + "loss": 0.5862, + "step": 42490 + }, + { + "epoch": 21.01, + "learning_rate": 1.0048094716167097e-06, + "loss": 0.2149, + "step": 42500 + }, + { + "epoch": 21.01, + "learning_rate": 1.0021930607898611e-06, + "loss": 0.5522, + "step": 42510 + }, + { + "epoch": 21.01, + "learning_rate": 9.995798169151788e-07, + "loss": 0.5393, + "step": 42520 + }, + { + "epoch": 21.01, + "learning_rate": 9.969697412663247e-07, + "loss": 0.4814, + "step": 42530 + }, + { + "epoch": 21.01, + "learning_rate": 9.9436283511542e-07, + "loss": 0.364, + "step": 42540 + }, + { + "epoch": 21.01, + "learning_rate": 9.917590997330377e-07, + "loss": 0.5711, + "step": 42550 + }, + { + "epoch": 21.01, + "learning_rate": 9.891585363882066e-07, + "loss": 0.5678, + "step": 42560 + }, + { + "epoch": 21.01, + "learning_rate": 9.865611463484108e-07, + "loss": 0.1107, + "step": 42570 + }, + { + "epoch": 21.01, + "learning_rate": 9.839669308795846e-07, + "loss": 0.6555, + "step": 42580 + }, + { + "epoch": 21.01, + "learning_rate": 9.813758912461204e-07, + "loss": 0.987, + "step": 42590 + }, + { + "epoch": 21.01, + "learning_rate": 9.787880287108574e-07, + "loss": 0.3986, + "step": 42600 + }, + { + "epoch": 21.01, + "learning_rate": 9.762033445350901e-07, + "loss": 0.498, + "step": 42610 + }, + { + "epoch": 21.01, + "learning_rate": 9.736218399785573e-07, + "loss": 0.5059, + "step": 42620 + }, + { + "epoch": 21.01, + "learning_rate": 9.710435162994585e-07, + "loss": 0.4711, + "step": 42630 + }, + { + "epoch": 21.01, + "learning_rate": 9.68468374754435e-07, + "loss": 0.2611, + "step": 42640 + }, + { + "epoch": 21.01, + "learning_rate": 9.658964165985798e-07, + "loss": 0.4665, + "step": 42650 + }, + { + "epoch": 21.01, + "learning_rate": 9.63327643085434e-07, + "loss": 1.166, + "step": 42660 + }, + { + "epoch": 21.01, + "learning_rate": 9.607620554669846e-07, + "loss": 0.3244, + "step": 42670 + }, + { + "epoch": 21.01, + "learning_rate": 9.581996549936721e-07, + "loss": 0.8249, + "step": 42680 + }, + { + "epoch": 21.01, + "learning_rate": 9.55640442914376e-07, + "loss": 0.5912, + "step": 42690 + }, + { + "epoch": 21.01, + "learning_rate": 9.530844204764286e-07, + "loss": 0.4654, + "step": 42700 + }, + { + "epoch": 21.01, + "learning_rate": 9.505315889256005e-07, + "loss": 0.5017, + "step": 42710 + }, + { + "epoch": 21.01, + "learning_rate": 9.479819495061102e-07, + "loss": 0.2483, + "step": 42720 + }, + { + "epoch": 21.01, + "learning_rate": 9.454355034606241e-07, + "loss": 0.2872, + "step": 42730 + }, + { + "epoch": 21.01, + "learning_rate": 9.428922520302479e-07, + "loss": 0.3335, + "step": 42740 + }, + { + "epoch": 21.02, + "learning_rate": 9.40352196454532e-07, + "loss": 0.6583, + "step": 42750 + }, + { + "epoch": 21.02, + "learning_rate": 9.378153379714682e-07, + "loss": 0.6831, + "step": 42760 + }, + { + "epoch": 21.02, + "learning_rate": 9.352816778174878e-07, + "loss": 0.1119, + "step": 42770 + }, + { + "epoch": 21.02, + "learning_rate": 9.327512172274711e-07, + "loss": 0.6276, + "step": 42780 + }, + { + "epoch": 21.02, + "learning_rate": 9.302239574347323e-07, + "loss": 0.1037, + "step": 42790 + }, + { + "epoch": 21.02, + "learning_rate": 9.276998996710248e-07, + "loss": 0.432, + "step": 42800 + }, + { + "epoch": 21.02, + "learning_rate": 9.251790451665426e-07, + "loss": 0.5301, + "step": 42810 + }, + { + "epoch": 21.02, + "learning_rate": 9.226613951499246e-07, + "loss": 0.2378, + "step": 42820 + }, + { + "epoch": 21.02, + "learning_rate": 9.201469508482394e-07, + "loss": 0.6031, + "step": 42830 + }, + { + "epoch": 21.02, + "learning_rate": 9.176357134869981e-07, + "loss": 0.5935, + "step": 42840 + }, + { + "epoch": 21.02, + "learning_rate": 9.15127684290146e-07, + "loss": 0.752, + "step": 42850 + }, + { + "epoch": 21.02, + "learning_rate": 9.126228644800669e-07, + "loss": 0.4204, + "step": 42860 + }, + { + "epoch": 21.02, + "learning_rate": 9.101212552775793e-07, + "loss": 0.1586, + "step": 42870 + }, + { + "epoch": 21.02, + "learning_rate": 9.076228579019377e-07, + "loss": 0.7899, + "step": 42880 + }, + { + "epoch": 21.02, + "learning_rate": 9.051276735708292e-07, + "loss": 0.1739, + "step": 42890 + }, + { + "epoch": 21.02, + "learning_rate": 9.026357035003774e-07, + "loss": 0.1888, + "step": 42900 + }, + { + "epoch": 21.02, + "learning_rate": 9.001469489051367e-07, + "loss": 0.1448, + "step": 42910 + }, + { + "epoch": 21.02, + "learning_rate": 8.976614109980985e-07, + "loss": 0.4078, + "step": 42920 + }, + { + "epoch": 21.02, + "learning_rate": 8.951790909906829e-07, + "loss": 0.628, + "step": 42930 + }, + { + "epoch": 21.02, + "learning_rate": 8.926999900927413e-07, + "loss": 0.6041, + "step": 42940 + }, + { + "epoch": 21.02, + "learning_rate": 8.902241095125588e-07, + "loss": 0.4989, + "step": 42950 + }, + { + "epoch": 21.02, + "learning_rate": 8.877514504568484e-07, + "loss": 0.4466, + "step": 42960 + }, + { + "epoch": 21.02, + "learning_rate": 8.852820141307555e-07, + "loss": 0.4655, + "step": 42970 + }, + { + "epoch": 21.02, + "learning_rate": 8.828158017378512e-07, + "loss": 0.7187, + "step": 42980 + }, + { + "epoch": 21.02, + "learning_rate": 8.803528144801393e-07, + "loss": 0.8563, + "step": 42990 + }, + { + "epoch": 21.02, + "learning_rate": 8.778930535580476e-07, + "loss": 0.516, + "step": 43000 + }, + { + "epoch": 21.02, + "learning_rate": 8.754365201704373e-07, + "loss": 0.6609, + "step": 43010 + }, + { + "epoch": 21.02, + "learning_rate": 8.729832155145897e-07, + "loss": 0.4027, + "step": 43020 + }, + { + "epoch": 21.02, + "learning_rate": 8.705331407862185e-07, + "loss": 0.2548, + "step": 43030 + }, + { + "epoch": 21.02, + "learning_rate": 8.680862971794575e-07, + "loss": 0.5615, + "step": 43040 + }, + { + "epoch": 21.02, + "learning_rate": 8.656426858868705e-07, + "loss": 0.4992, + "step": 43050 + }, + { + "epoch": 21.02, + "learning_rate": 8.632023080994436e-07, + "loss": 0.388, + "step": 43060 + }, + { + "epoch": 21.02, + "learning_rate": 8.607651650065865e-07, + "loss": 0.5331, + "step": 43070 + }, + { + "epoch": 21.02, + "learning_rate": 8.583312577961339e-07, + "loss": 0.4323, + "step": 43080 + }, + { + "epoch": 21.02, + "learning_rate": 8.559005876543427e-07, + "loss": 0.4964, + "step": 43090 + }, + { + "epoch": 21.02, + "learning_rate": 8.534731557658895e-07, + "loss": 0.3992, + "step": 43100 + }, + { + "epoch": 21.02, + "learning_rate": 8.510489633138788e-07, + "loss": 0.5065, + "step": 43110 + }, + { + "epoch": 21.02, + "learning_rate": 8.486280114798311e-07, + "loss": 0.5718, + "step": 43120 + }, + { + "epoch": 21.02, + "learning_rate": 8.462103014436887e-07, + "loss": 0.3075, + "step": 43130 + }, + { + "epoch": 21.02, + "learning_rate": 8.437958343838146e-07, + "loss": 0.3258, + "step": 43140 + }, + { + "epoch": 21.02, + "learning_rate": 8.413846114769907e-07, + "loss": 0.5287, + "step": 43150 + }, + { + "epoch": 21.02, + "learning_rate": 8.389766338984165e-07, + "loss": 0.8632, + "step": 43160 + }, + { + "epoch": 21.02, + "learning_rate": 8.365719028217128e-07, + "loss": 0.372, + "step": 43170 + }, + { + "epoch": 21.02, + "learning_rate": 8.341704194189148e-07, + "loss": 0.4632, + "step": 43180 + }, + { + "epoch": 21.02, + "learning_rate": 8.317721848604754e-07, + "loss": 0.2295, + "step": 43190 + }, + { + "epoch": 21.02, + "learning_rate": 8.293772003152686e-07, + "loss": 0.5518, + "step": 43200 + }, + { + "epoch": 21.02, + "learning_rate": 8.269854669505783e-07, + "loss": 0.6363, + "step": 43210 + }, + { + "epoch": 21.02, + "learning_rate": 8.245969859321062e-07, + "loss": 0.4671, + "step": 43220 + }, + { + "epoch": 21.02, + "learning_rate": 8.222117584239719e-07, + "loss": 0.5977, + "step": 43230 + }, + { + "epoch": 21.02, + "learning_rate": 8.198297855887005e-07, + "loss": 0.6138, + "step": 43240 + }, + { + "epoch": 21.02, + "learning_rate": 8.174510685872417e-07, + "loss": 0.6577, + "step": 43250 + }, + { + "epoch": 21.03, + "learning_rate": 8.150756085789512e-07, + "loss": 0.4194, + "step": 43260 + }, + { + "epoch": 21.03, + "learning_rate": 8.127034067215999e-07, + "loss": 0.3516, + "step": 43270 + }, + { + "epoch": 21.03, + "learning_rate": 8.103344641713695e-07, + "loss": 0.3407, + "step": 43280 + }, + { + "epoch": 21.03, + "learning_rate": 8.079687820828538e-07, + "loss": 0.5712, + "step": 43290 + }, + { + "epoch": 21.03, + "learning_rate": 8.056063616090581e-07, + "loss": 0.6257, + "step": 43300 + }, + { + "epoch": 21.03, + "learning_rate": 8.032472039013988e-07, + "loss": 0.3842, + "step": 43310 + }, + { + "epoch": 21.03, + "learning_rate": 8.008913101096996e-07, + "loss": 0.684, + "step": 43320 + }, + { + "epoch": 21.03, + "learning_rate": 7.985386813821918e-07, + "loss": 0.7516, + "step": 43330 + }, + { + "epoch": 21.03, + "learning_rate": 7.961893188655217e-07, + "loss": 0.3816, + "step": 43340 + }, + { + "epoch": 21.03, + "learning_rate": 7.938432237047392e-07, + "loss": 0.5124, + "step": 43350 + }, + { + "epoch": 21.03, + "learning_rate": 7.915003970433018e-07, + "loss": 0.6431, + "step": 43360 + }, + { + "epoch": 21.03, + "learning_rate": 7.891608400230749e-07, + "loss": 0.4608, + "step": 43370 + }, + { + "epoch": 21.03, + "learning_rate": 7.868245537843311e-07, + "loss": 0.5772, + "step": 43380 + }, + { + "epoch": 21.03, + "learning_rate": 7.844915394657445e-07, + "loss": 0.6119, + "step": 43390 + }, + { + "epoch": 21.03, + "learning_rate": 7.821617982044033e-07, + "loss": 0.1979, + "step": 43400 + }, + { + "epoch": 21.03, + "learning_rate": 7.798353311357931e-07, + "loss": 0.4184, + "step": 43410 + }, + { + "epoch": 21.03, + "learning_rate": 7.775121393938045e-07, + "loss": 0.6045, + "step": 43420 + }, + { + "epoch": 21.03, + "learning_rate": 7.751922241107309e-07, + "loss": 0.6312, + "step": 43430 + }, + { + "epoch": 21.03, + "learning_rate": 7.728755864172754e-07, + "loss": 0.3845, + "step": 43440 + }, + { + "epoch": 21.03, + "learning_rate": 7.705622274425372e-07, + "loss": 0.2585, + "step": 43450 + }, + { + "epoch": 21.03, + "learning_rate": 7.682521483140187e-07, + "loss": 0.874, + "step": 43460 + }, + { + "epoch": 21.03, + "learning_rate": 7.659453501576258e-07, + "loss": 0.1983, + "step": 43470 + }, + { + "epoch": 21.03, + "learning_rate": 7.636418340976609e-07, + "loss": 0.5186, + "step": 43480 + }, + { + "epoch": 21.03, + "learning_rate": 7.613416012568349e-07, + "loss": 0.5401, + "step": 43490 + }, + { + "epoch": 21.03, + "learning_rate": 7.59044652756249e-07, + "loss": 0.3666, + "step": 43500 + }, + { + "epoch": 21.03, + "learning_rate": 7.567509897154088e-07, + "loss": 0.2822, + "step": 43510 + }, + { + "epoch": 21.03, + "learning_rate": 7.54460613252217e-07, + "loss": 0.3534, + "step": 43520 + }, + { + "epoch": 21.03, + "learning_rate": 7.52173524482978e-07, + "loss": 0.7955, + "step": 43530 + }, + { + "epoch": 21.03, + "learning_rate": 7.498897245223904e-07, + "loss": 0.6602, + "step": 43540 + }, + { + "epoch": 21.03, + "learning_rate": 7.476092144835487e-07, + "loss": 0.2675, + "step": 43550 + }, + { + "epoch": 21.03, + "learning_rate": 7.453319954779478e-07, + "loss": 0.526, + "step": 43560 + }, + { + "epoch": 21.03, + "learning_rate": 7.430580686154751e-07, + "loss": 0.5293, + "step": 43570 + }, + { + "epoch": 21.03, + "learning_rate": 7.407874350044155e-07, + "loss": 0.8674, + "step": 43580 + }, + { + "epoch": 21.03, + "learning_rate": 7.38520095751449e-07, + "loss": 0.8319, + "step": 43590 + }, + { + "epoch": 21.03, + "learning_rate": 7.362560519616474e-07, + "loss": 0.3474, + "step": 43600 + }, + { + "epoch": 21.03, + "learning_rate": 7.339953047384795e-07, + "loss": 0.7456, + "step": 43610 + }, + { + "epoch": 21.03, + "learning_rate": 7.31737855183805e-07, + "loss": 0.3838, + "step": 43620 + }, + { + "epoch": 21.03, + "learning_rate": 7.294837043978786e-07, + "loss": 0.359, + "step": 43630 + }, + { + "epoch": 21.03, + "learning_rate": 7.272328534793465e-07, + "loss": 0.4701, + "step": 43640 + }, + { + "epoch": 21.03, + "learning_rate": 7.249853035252437e-07, + "loss": 0.468, + "step": 43650 + }, + { + "epoch": 21.03, + "learning_rate": 7.227410556310016e-07, + "loss": 0.468, + "step": 43660 + }, + { + "epoch": 21.03, + "learning_rate": 7.205001108904375e-07, + "loss": 0.4356, + "step": 43670 + }, + { + "epoch": 21.03, + "learning_rate": 7.182624703957603e-07, + "loss": 0.1461, + "step": 43680 + }, + { + "epoch": 21.03, + "learning_rate": 7.1602813523757e-07, + "loss": 0.5284, + "step": 43690 + }, + { + "epoch": 21.03, + "learning_rate": 7.137971065048549e-07, + "loss": 0.6941, + "step": 43700 + }, + { + "epoch": 21.03, + "learning_rate": 7.115693852849891e-07, + "loss": 0.7175, + "step": 43710 + }, + { + "epoch": 21.03, + "learning_rate": 7.093449726637369e-07, + "loss": 0.4247, + "step": 43720 + }, + { + "epoch": 21.03, + "learning_rate": 7.071238697252533e-07, + "loss": 0.3809, + "step": 43730 + }, + { + "epoch": 21.03, + "learning_rate": 7.049060775520741e-07, + "loss": 0.6679, + "step": 43740 + }, + { + "epoch": 21.04, + "learning_rate": 7.026915972251254e-07, + "loss": 0.5361, + "step": 43750 + }, + { + "epoch": 21.04, + "learning_rate": 7.004804298237175e-07, + "loss": 0.7257, + "step": 43760 + }, + { + "epoch": 21.04, + "learning_rate": 6.982725764255463e-07, + "loss": 0.3651, + "step": 43770 + }, + { + "epoch": 21.04, + "learning_rate": 6.960680381066936e-07, + "loss": 0.4422, + "step": 43780 + }, + { + "epoch": 21.04, + "learning_rate": 6.938668159416234e-07, + "loss": 0.1049, + "step": 43790 + }, + { + "epoch": 21.04, + "learning_rate": 6.916689110031857e-07, + "loss": 0.4155, + "step": 43800 + }, + { + "epoch": 21.04, + "learning_rate": 6.894743243626112e-07, + "loss": 0.4926, + "step": 43810 + }, + { + "epoch": 21.04, + "learning_rate": 6.872830570895167e-07, + "loss": 0.3949, + "step": 43820 + }, + { + "epoch": 21.04, + "learning_rate": 6.850951102518979e-07, + "loss": 0.2546, + "step": 43830 + }, + { + "epoch": 21.04, + "learning_rate": 6.829104849161344e-07, + "loss": 0.4445, + "step": 43840 + }, + { + "epoch": 21.04, + "learning_rate": 6.80729182146986e-07, + "loss": 0.5695, + "step": 43850 + }, + { + "epoch": 21.04, + "learning_rate": 6.785512030075925e-07, + "loss": 0.6643, + "step": 43860 + }, + { + "epoch": 21.04, + "learning_rate": 6.763765485594744e-07, + "loss": 0.7855, + "step": 43870 + }, + { + "epoch": 21.04, + "learning_rate": 6.742052198625326e-07, + "loss": 0.5017, + "step": 43880 + }, + { + "epoch": 21.04, + "learning_rate": 6.720372179750461e-07, + "loss": 0.3665, + "step": 43890 + }, + { + "epoch": 21.04, + "learning_rate": 6.698725439536723e-07, + "loss": 0.4931, + "step": 43900 + }, + { + "epoch": 21.04, + "learning_rate": 6.677111988534465e-07, + "loss": 0.3041, + "step": 43910 + }, + { + "epoch": 21.04, + "learning_rate": 6.655531837277834e-07, + "loss": 0.2887, + "step": 43920 + }, + { + "epoch": 21.04, + "learning_rate": 6.633984996284725e-07, + "loss": 0.2932, + "step": 43930 + }, + { + "epoch": 21.04, + "learning_rate": 6.61247147605683e-07, + "loss": 0.4111, + "step": 43940 + }, + { + "epoch": 21.04, + "learning_rate": 6.590991287079529e-07, + "loss": 0.4439, + "step": 43950 + }, + { + "epoch": 21.04, + "learning_rate": 6.569544439822045e-07, + "loss": 0.604, + "step": 43960 + }, + { + "epoch": 21.04, + "learning_rate": 6.548130944737294e-07, + "loss": 0.4894, + "step": 43970 + }, + { + "epoch": 21.04, + "learning_rate": 6.526750812261958e-07, + "loss": 0.3146, + "step": 43980 + }, + { + "epoch": 21.04, + "learning_rate": 6.505404052816455e-07, + "loss": 0.2673, + "step": 43990 + }, + { + "epoch": 21.04, + "learning_rate": 6.484090676804927e-07, + "loss": 0.6131, + "step": 44000 + }, + { + "epoch": 21.04, + "eval_accuracy": 0.8705263157894737, + "eval_f1": 0.8705263157894737, + "eval_loss": 0.796357274055481, + "eval_runtime": 747.1435, + "eval_samples_per_second": 6.358, + "eval_steps_per_second": 1.59, + "step": 44000 + }, + { + "epoch": 22.0, + "learning_rate": 6.462810694615273e-07, + "loss": 0.6104, + "step": 44010 + }, + { + "epoch": 22.0, + "learning_rate": 6.441564116619089e-07, + "loss": 0.671, + "step": 44020 + }, + { + "epoch": 22.0, + "learning_rate": 6.420350953171708e-07, + "loss": 0.466, + "step": 44030 + }, + { + "epoch": 22.0, + "learning_rate": 6.399171214612126e-07, + "loss": 0.3888, + "step": 44040 + }, + { + "epoch": 22.0, + "learning_rate": 6.378024911263144e-07, + "loss": 0.428, + "step": 44050 + }, + { + "epoch": 22.0, + "learning_rate": 6.356912053431185e-07, + "loss": 0.177, + "step": 44060 + }, + { + "epoch": 22.0, + "learning_rate": 6.33583265140641e-07, + "loss": 0.6163, + "step": 44070 + }, + { + "epoch": 22.0, + "learning_rate": 6.314786715462656e-07, + "loss": 0.3802, + "step": 44080 + }, + { + "epoch": 22.0, + "learning_rate": 6.293774255857463e-07, + "loss": 0.5927, + "step": 44090 + }, + { + "epoch": 22.0, + "learning_rate": 6.272795282832028e-07, + "loss": 0.2755, + "step": 44100 + }, + { + "epoch": 22.0, + "learning_rate": 6.25184980661129e-07, + "loss": 0.4401, + "step": 44110 + }, + { + "epoch": 22.0, + "learning_rate": 6.230937837403783e-07, + "loss": 0.3299, + "step": 44120 + }, + { + "epoch": 22.0, + "learning_rate": 6.210059385401754e-07, + "loss": 0.2617, + "step": 44130 + }, + { + "epoch": 22.0, + "learning_rate": 6.189214460781104e-07, + "loss": 0.3985, + "step": 44140 + }, + { + "epoch": 22.0, + "learning_rate": 6.168403073701415e-07, + "loss": 0.512, + "step": 44150 + }, + { + "epoch": 22.0, + "learning_rate": 6.14762523430589e-07, + "loss": 0.4434, + "step": 44160 + }, + { + "epoch": 22.0, + "learning_rate": 6.126880952721403e-07, + "loss": 0.4754, + "step": 44170 + }, + { + "epoch": 22.0, + "learning_rate": 6.10617023905845e-07, + "loss": 0.5834, + "step": 44180 + }, + { + "epoch": 22.0, + "learning_rate": 6.085493103411183e-07, + "loss": 0.2878, + "step": 44190 + }, + { + "epoch": 22.0, + "learning_rate": 6.064849555857421e-07, + "loss": 0.3629, + "step": 44200 + }, + { + "epoch": 22.0, + "learning_rate": 6.044239606458543e-07, + "loss": 0.5338, + "step": 44210 + }, + { + "epoch": 22.0, + "learning_rate": 6.023663265259597e-07, + "loss": 0.5599, + "step": 44220 + }, + { + "epoch": 22.0, + "learning_rate": 6.003120542289234e-07, + "loss": 0.5573, + "step": 44230 + }, + { + "epoch": 22.0, + "learning_rate": 5.982611447559722e-07, + "loss": 0.3172, + "step": 44240 + }, + { + "epoch": 22.0, + "learning_rate": 5.962135991066972e-07, + "loss": 0.7887, + "step": 44250 + }, + { + "epoch": 22.01, + "learning_rate": 5.941694182790461e-07, + "loss": 0.4521, + "step": 44260 + }, + { + "epoch": 22.01, + "learning_rate": 5.921286032693277e-07, + "loss": 0.5678, + "step": 44270 + }, + { + "epoch": 22.01, + "learning_rate": 5.900911550722105e-07, + "loss": 0.3744, + "step": 44280 + }, + { + "epoch": 22.01, + "learning_rate": 5.88057074680721e-07, + "loss": 0.576, + "step": 44290 + }, + { + "epoch": 22.01, + "learning_rate": 5.86026363086247e-07, + "loss": 0.5443, + "step": 44300 + }, + { + "epoch": 22.01, + "learning_rate": 5.83999021278531e-07, + "loss": 0.8581, + "step": 44310 + }, + { + "epoch": 22.01, + "learning_rate": 5.819750502456758e-07, + "loss": 0.2708, + "step": 44320 + }, + { + "epoch": 22.01, + "learning_rate": 5.799544509741384e-07, + "loss": 0.7194, + "step": 44330 + }, + { + "epoch": 22.01, + "learning_rate": 5.779372244487374e-07, + "loss": 0.4304, + "step": 44340 + }, + { + "epoch": 22.01, + "learning_rate": 5.759233716526433e-07, + "loss": 0.3599, + "step": 44350 + }, + { + "epoch": 22.01, + "learning_rate": 5.739128935673832e-07, + "loss": 0.259, + "step": 44360 + }, + { + "epoch": 22.01, + "learning_rate": 5.719057911728392e-07, + "loss": 0.2699, + "step": 44370 + }, + { + "epoch": 22.01, + "learning_rate": 5.699020654472497e-07, + "loss": 0.5559, + "step": 44380 + }, + { + "epoch": 22.01, + "learning_rate": 5.679017173672052e-07, + "loss": 0.3535, + "step": 44390 + }, + { + "epoch": 22.01, + "learning_rate": 5.659047479076524e-07, + "loss": 0.3878, + "step": 44400 + }, + { + "epoch": 22.01, + "learning_rate": 5.639111580418885e-07, + "loss": 0.2821, + "step": 44410 + }, + { + "epoch": 22.01, + "learning_rate": 5.619209487415655e-07, + "loss": 0.1983, + "step": 44420 + }, + { + "epoch": 22.01, + "learning_rate": 5.599341209766867e-07, + "loss": 0.5107, + "step": 44430 + }, + { + "epoch": 22.01, + "learning_rate": 5.579506757156091e-07, + "loss": 0.5185, + "step": 44440 + }, + { + "epoch": 22.01, + "learning_rate": 5.559706139250395e-07, + "loss": 0.4973, + "step": 44450 + }, + { + "epoch": 22.01, + "learning_rate": 5.539939365700358e-07, + "loss": 0.3193, + "step": 44460 + }, + { + "epoch": 22.01, + "learning_rate": 5.520206446140058e-07, + "loss": 0.495, + "step": 44470 + }, + { + "epoch": 22.01, + "learning_rate": 5.500507390187093e-07, + "loss": 0.6467, + "step": 44480 + }, + { + "epoch": 22.01, + "learning_rate": 5.480842207442533e-07, + "loss": 0.6714, + "step": 44490 + }, + { + "epoch": 22.01, + "learning_rate": 5.461210907490952e-07, + "loss": 0.3219, + "step": 44500 + }, + { + "epoch": 22.01, + "learning_rate": 5.441613499900405e-07, + "loss": 0.4564, + "step": 44510 + }, + { + "epoch": 22.01, + "learning_rate": 5.422049994222427e-07, + "loss": 0.4278, + "step": 44520 + }, + { + "epoch": 22.01, + "learning_rate": 5.40252039999205e-07, + "loss": 0.3553, + "step": 44530 + }, + { + "epoch": 22.01, + "learning_rate": 5.38302472672775e-07, + "loss": 0.5166, + "step": 44540 + }, + { + "epoch": 22.01, + "learning_rate": 5.363562983931486e-07, + "loss": 0.318, + "step": 44550 + }, + { + "epoch": 22.01, + "learning_rate": 5.344135181088677e-07, + "loss": 0.6389, + "step": 44560 + }, + { + "epoch": 22.01, + "learning_rate": 5.324741327668168e-07, + "loss": 0.635, + "step": 44570 + }, + { + "epoch": 22.01, + "learning_rate": 5.305381433122328e-07, + "loss": 0.6101, + "step": 44580 + }, + { + "epoch": 22.01, + "learning_rate": 5.286055506886911e-07, + "loss": 0.3987, + "step": 44590 + }, + { + "epoch": 22.01, + "learning_rate": 5.266763558381149e-07, + "loss": 0.8255, + "step": 44600 + }, + { + "epoch": 22.01, + "learning_rate": 5.247505597007704e-07, + "loss": 0.2846, + "step": 44610 + }, + { + "epoch": 22.01, + "learning_rate": 5.228281632152651e-07, + "loss": 0.6096, + "step": 44620 + }, + { + "epoch": 22.01, + "learning_rate": 5.209091673185546e-07, + "loss": 0.667, + "step": 44630 + }, + { + "epoch": 22.01, + "learning_rate": 5.18993572945933e-07, + "loss": 0.3633, + "step": 44640 + }, + { + "epoch": 22.01, + "learning_rate": 5.170813810310388e-07, + "loss": 0.4713, + "step": 44650 + }, + { + "epoch": 22.01, + "learning_rate": 5.151725925058478e-07, + "loss": 0.2222, + "step": 44660 + }, + { + "epoch": 22.01, + "learning_rate": 5.132672083006836e-07, + "loss": 0.459, + "step": 44670 + }, + { + "epoch": 22.01, + "learning_rate": 5.113652293442067e-07, + "loss": 0.4248, + "step": 44680 + }, + { + "epoch": 22.01, + "learning_rate": 5.094666565634178e-07, + "loss": 0.602, + "step": 44690 + }, + { + "epoch": 22.01, + "learning_rate": 5.075714908836587e-07, + "loss": 0.4219, + "step": 44700 + }, + { + "epoch": 22.01, + "learning_rate": 5.056797332286103e-07, + "loss": 0.3305, + "step": 44710 + }, + { + "epoch": 22.01, + "learning_rate": 5.037913845202901e-07, + "loss": 0.2996, + "step": 44720 + }, + { + "epoch": 22.01, + "learning_rate": 5.019064456790612e-07, + "loss": 0.2826, + "step": 44730 + }, + { + "epoch": 22.01, + "learning_rate": 5.000249176236151e-07, + "loss": 0.4705, + "step": 44740 + }, + { + "epoch": 22.02, + "learning_rate": 4.981468012709877e-07, + "loss": 0.6573, + "step": 44750 + }, + { + "epoch": 22.02, + "learning_rate": 4.962720975365492e-07, + "loss": 0.4925, + "step": 44760 + }, + { + "epoch": 22.02, + "learning_rate": 4.944008073340086e-07, + "loss": 0.3959, + "step": 44770 + }, + { + "epoch": 22.02, + "learning_rate": 4.925329315754099e-07, + "loss": 0.2424, + "step": 44780 + }, + { + "epoch": 22.02, + "learning_rate": 4.906684711711328e-07, + "loss": 0.3226, + "step": 44790 + }, + { + "epoch": 22.02, + "learning_rate": 4.888074270298917e-07, + "loss": 0.4597, + "step": 44800 + }, + { + "epoch": 22.02, + "learning_rate": 4.869498000587375e-07, + "loss": 0.3997, + "step": 44810 + }, + { + "epoch": 22.02, + "learning_rate": 4.850955911630576e-07, + "loss": 0.3794, + "step": 44820 + }, + { + "epoch": 22.02, + "learning_rate": 4.832448012465665e-07, + "loss": 0.5885, + "step": 44830 + }, + { + "epoch": 22.02, + "learning_rate": 4.813974312113195e-07, + "loss": 0.5058, + "step": 44840 + }, + { + "epoch": 22.02, + "learning_rate": 4.795534819577005e-07, + "loss": 0.5762, + "step": 44850 + }, + { + "epoch": 22.02, + "learning_rate": 4.777129543844297e-07, + "loss": 0.5572, + "step": 44860 + }, + { + "epoch": 22.02, + "learning_rate": 4.7587584938855786e-07, + "loss": 0.9591, + "step": 44870 + }, + { + "epoch": 22.02, + "learning_rate": 4.7404216786546634e-07, + "loss": 0.6308, + "step": 44880 + }, + { + "epoch": 22.02, + "learning_rate": 4.7221191070887013e-07, + "loss": 0.3762, + "step": 44890 + }, + { + "epoch": 22.02, + "learning_rate": 4.703850788108141e-07, + "loss": 0.7058, + "step": 44900 + }, + { + "epoch": 22.02, + "learning_rate": 4.685616730616743e-07, + "loss": 0.167, + "step": 44910 + }, + { + "epoch": 22.02, + "learning_rate": 4.667416943501557e-07, + "loss": 0.4422, + "step": 44920 + }, + { + "epoch": 22.02, + "learning_rate": 4.649251435632945e-07, + "loss": 0.2814, + "step": 44930 + }, + { + "epoch": 22.02, + "learning_rate": 4.631120215864551e-07, + "loss": 0.4397, + "step": 44940 + }, + { + "epoch": 22.02, + "learning_rate": 4.6130232930333056e-07, + "loss": 0.6745, + "step": 44950 + }, + { + "epoch": 22.02, + "learning_rate": 4.5949606759594446e-07, + "loss": 0.0493, + "step": 44960 + }, + { + "epoch": 22.02, + "learning_rate": 4.57693237344646e-07, + "loss": 0.5869, + "step": 44970 + }, + { + "epoch": 22.02, + "learning_rate": 4.5589383942811315e-07, + "loss": 0.5944, + "step": 44980 + }, + { + "epoch": 22.02, + "learning_rate": 4.540978747233493e-07, + "loss": 0.3901, + "step": 44990 + }, + { + "epoch": 22.02, + "learning_rate": 4.5230534410568764e-07, + "loss": 0.572, + "step": 45000 + }, + { + "epoch": 22.02, + "learning_rate": 4.505162484487843e-07, + "loss": 0.3358, + "step": 45010 + }, + { + "epoch": 22.02, + "learning_rate": 4.4873058862462347e-07, + "loss": 0.349, + "step": 45020 + }, + { + "epoch": 22.02, + "learning_rate": 4.469483655035148e-07, + "loss": 0.5895, + "step": 45030 + }, + { + "epoch": 22.02, + "learning_rate": 4.451695799540892e-07, + "loss": 0.2153, + "step": 45040 + }, + { + "epoch": 22.02, + "learning_rate": 4.433942328433091e-07, + "loss": 0.6343, + "step": 45050 + }, + { + "epoch": 22.02, + "learning_rate": 4.4162232503645484e-07, + "loss": 0.2725, + "step": 45060 + }, + { + "epoch": 22.02, + "learning_rate": 4.3985385739713306e-07, + "loss": 0.5323, + "step": 45070 + }, + { + "epoch": 22.02, + "learning_rate": 4.3808883078727437e-07, + "loss": 0.3967, + "step": 45080 + }, + { + "epoch": 22.02, + "learning_rate": 4.363272460671297e-07, + "loss": 0.7175, + "step": 45090 + }, + { + "epoch": 22.02, + "learning_rate": 4.3456910409527483e-07, + "loss": 1.0338, + "step": 45100 + }, + { + "epoch": 22.02, + "learning_rate": 4.328144057286068e-07, + "loss": 0.352, + "step": 45110 + }, + { + "epoch": 22.02, + "learning_rate": 4.3106315182234484e-07, + "loss": 0.4248, + "step": 45120 + }, + { + "epoch": 22.02, + "learning_rate": 4.2931534323002787e-07, + "loss": 0.5194, + "step": 45130 + }, + { + "epoch": 22.02, + "learning_rate": 4.2757098080351606e-07, + "loss": 0.497, + "step": 45140 + }, + { + "epoch": 22.02, + "learning_rate": 4.258300653929928e-07, + "loss": 0.5801, + "step": 45150 + }, + { + "epoch": 22.02, + "learning_rate": 4.2409259784695675e-07, + "loss": 0.2365, + "step": 45160 + }, + { + "epoch": 22.02, + "learning_rate": 4.223585790122306e-07, + "loss": 0.3726, + "step": 45170 + }, + { + "epoch": 22.02, + "learning_rate": 4.2062800973395157e-07, + "loss": 0.6571, + "step": 45180 + }, + { + "epoch": 22.02, + "learning_rate": 4.1890089085557996e-07, + "loss": 0.4289, + "step": 45190 + }, + { + "epoch": 22.02, + "learning_rate": 4.171772232188925e-07, + "loss": 0.9237, + "step": 45200 + }, + { + "epoch": 22.02, + "learning_rate": 4.1545700766398297e-07, + "loss": 0.2547, + "step": 45210 + }, + { + "epoch": 22.02, + "learning_rate": 4.137402450292641e-07, + "loss": 0.3113, + "step": 45220 + }, + { + "epoch": 22.02, + "learning_rate": 4.1202693615146476e-07, + "loss": 0.4242, + "step": 45230 + }, + { + "epoch": 22.02, + "learning_rate": 4.103170818656296e-07, + "loss": 0.433, + "step": 45240 + }, + { + "epoch": 22.02, + "learning_rate": 4.0861068300512364e-07, + "loss": 0.5028, + "step": 45250 + }, + { + "epoch": 22.03, + "learning_rate": 4.0690774040162416e-07, + "loss": 0.6273, + "step": 45260 + }, + { + "epoch": 22.03, + "learning_rate": 4.0520825488512463e-07, + "loss": 0.5756, + "step": 45270 + }, + { + "epoch": 22.03, + "learning_rate": 4.0351222728393087e-07, + "loss": 0.3489, + "step": 45280 + }, + { + "epoch": 22.03, + "learning_rate": 4.0181965842467e-07, + "loss": 0.9094, + "step": 45290 + }, + { + "epoch": 22.03, + "learning_rate": 4.001305491322779e-07, + "loss": 0.4944, + "step": 45300 + }, + { + "epoch": 22.03, + "learning_rate": 3.984449002300053e-07, + "loss": 0.7408, + "step": 45310 + }, + { + "epoch": 22.03, + "learning_rate": 3.9676271253941827e-07, + "loss": 0.7202, + "step": 45320 + }, + { + "epoch": 22.03, + "learning_rate": 3.9508398688039267e-07, + "loss": 0.6042, + "step": 45330 + }, + { + "epoch": 22.03, + "learning_rate": 3.9340872407112234e-07, + "loss": 0.5301, + "step": 45340 + }, + { + "epoch": 22.03, + "learning_rate": 3.917369249281083e-07, + "loss": 0.437, + "step": 45350 + }, + { + "epoch": 22.03, + "learning_rate": 3.900685902661638e-07, + "loss": 0.4105, + "step": 45360 + }, + { + "epoch": 22.03, + "learning_rate": 3.884037208984151e-07, + "loss": 0.2409, + "step": 45370 + }, + { + "epoch": 22.03, + "learning_rate": 3.8674231763630134e-07, + "loss": 0.3427, + "step": 45380 + }, + { + "epoch": 22.03, + "learning_rate": 3.850843812895691e-07, + "loss": 0.5633, + "step": 45390 + }, + { + "epoch": 22.03, + "learning_rate": 3.8342991266627605e-07, + "loss": 0.6687, + "step": 45400 + }, + { + "epoch": 22.03, + "learning_rate": 3.8177891257279136e-07, + "loss": 0.4543, + "step": 45410 + }, + { + "epoch": 22.03, + "learning_rate": 3.8013138181379054e-07, + "loss": 0.5662, + "step": 45420 + }, + { + "epoch": 22.03, + "learning_rate": 3.784873211922604e-07, + "loss": 0.9155, + "step": 45430 + }, + { + "epoch": 22.03, + "learning_rate": 3.768467315094992e-07, + "loss": 0.4995, + "step": 45440 + }, + { + "epoch": 22.03, + "learning_rate": 3.752096135651073e-07, + "loss": 0.4798, + "step": 45450 + }, + { + "epoch": 22.03, + "learning_rate": 3.7357596815699557e-07, + "loss": 0.7374, + "step": 45460 + }, + { + "epoch": 22.03, + "learning_rate": 3.719457960813846e-07, + "loss": 0.3294, + "step": 45470 + }, + { + "epoch": 22.03, + "learning_rate": 3.7031909813279974e-07, + "loss": 0.5075, + "step": 45480 + }, + { + "epoch": 22.03, + "learning_rate": 3.68695875104075e-07, + "loss": 0.3049, + "step": 45490 + }, + { + "epoch": 22.03, + "learning_rate": 3.6707612778634855e-07, + "loss": 0.7697, + "step": 45500 + }, + { + "epoch": 22.03, + "learning_rate": 3.6545985696906535e-07, + "loss": 0.4896, + "step": 45510 + }, + { + "epoch": 22.03, + "learning_rate": 3.6384706343997544e-07, + "loss": 0.531, + "step": 45520 + }, + { + "epoch": 22.03, + "learning_rate": 3.6223774798513825e-07, + "loss": 0.5829, + "step": 45530 + }, + { + "epoch": 22.03, + "learning_rate": 3.6063191138891144e-07, + "loss": 0.7269, + "step": 45540 + }, + { + "epoch": 22.03, + "learning_rate": 3.590295544339606e-07, + "loss": 0.3007, + "step": 45550 + }, + { + "epoch": 22.03, + "learning_rate": 3.574306779012551e-07, + "loss": 0.6541, + "step": 45560 + }, + { + "epoch": 22.03, + "learning_rate": 3.5583528257006755e-07, + "loss": 0.6501, + "step": 45570 + }, + { + "epoch": 22.03, + "learning_rate": 3.542433692179767e-07, + "loss": 0.2508, + "step": 45580 + }, + { + "epoch": 22.03, + "learning_rate": 3.526549386208594e-07, + "loss": 0.3447, + "step": 45590 + }, + { + "epoch": 22.03, + "learning_rate": 3.510699915528981e-07, + "loss": 0.2587, + "step": 45600 + }, + { + "epoch": 22.03, + "learning_rate": 3.494885287865765e-07, + "loss": 0.4874, + "step": 45610 + }, + { + "epoch": 22.03, + "learning_rate": 3.4791055109268053e-07, + "loss": 0.6196, + "step": 45620 + }, + { + "epoch": 22.03, + "learning_rate": 3.4633605924029754e-07, + "loss": 0.7277, + "step": 45630 + }, + { + "epoch": 22.03, + "learning_rate": 3.4476505399681615e-07, + "loss": 0.754, + "step": 45640 + }, + { + "epoch": 22.03, + "learning_rate": 3.431975361279238e-07, + "loss": 0.6357, + "step": 45650 + }, + { + "epoch": 22.03, + "learning_rate": 3.416335063976103e-07, + "loss": 0.1892, + "step": 45660 + }, + { + "epoch": 22.03, + "learning_rate": 3.400729655681659e-07, + "loss": 0.5099, + "step": 45670 + }, + { + "epoch": 22.03, + "learning_rate": 3.3851591440017785e-07, + "loss": 0.6402, + "step": 45680 + }, + { + "epoch": 22.03, + "learning_rate": 3.369623536525343e-07, + "loss": 0.4794, + "step": 45690 + }, + { + "epoch": 22.03, + "learning_rate": 3.354122840824228e-07, + "loss": 0.4491, + "step": 45700 + }, + { + "epoch": 22.03, + "learning_rate": 3.3386570644532736e-07, + "loss": 0.4113, + "step": 45710 + }, + { + "epoch": 22.03, + "learning_rate": 3.323226214950309e-07, + "loss": 0.278, + "step": 45720 + }, + { + "epoch": 22.03, + "learning_rate": 3.307830299836151e-07, + "loss": 0.4852, + "step": 45730 + }, + { + "epoch": 22.03, + "learning_rate": 3.2924693266145804e-07, + "loss": 0.7552, + "step": 45740 + }, + { + "epoch": 22.04, + "learning_rate": 3.2771433027723425e-07, + "loss": 0.5251, + "step": 45750 + }, + { + "epoch": 22.04, + "learning_rate": 3.261852235779153e-07, + "loss": 0.3029, + "step": 45760 + }, + { + "epoch": 22.04, + "learning_rate": 3.246596133087709e-07, + "loss": 0.442, + "step": 45770 + }, + { + "epoch": 22.04, + "learning_rate": 3.2313750021336464e-07, + "loss": 0.7183, + "step": 45780 + }, + { + "epoch": 22.04, + "learning_rate": 3.2161888503355386e-07, + "loss": 0.7049, + "step": 45790 + }, + { + "epoch": 22.04, + "learning_rate": 3.201037685094957e-07, + "loss": 0.7686, + "step": 45800 + }, + { + "epoch": 22.04, + "learning_rate": 3.1859215137963775e-07, + "loss": 0.3101, + "step": 45810 + }, + { + "epoch": 22.04, + "learning_rate": 3.170840343807249e-07, + "loss": 0.6807, + "step": 45820 + }, + { + "epoch": 22.04, + "learning_rate": 3.1557941824779417e-07, + "loss": 0.3282, + "step": 45830 + }, + { + "epoch": 22.04, + "learning_rate": 3.140783037141781e-07, + "loss": 0.5177, + "step": 45840 + }, + { + "epoch": 22.04, + "learning_rate": 3.1258069151150067e-07, + "loss": 0.6931, + "step": 45850 + }, + { + "epoch": 22.04, + "learning_rate": 3.110865823696804e-07, + "loss": 0.4378, + "step": 45860 + }, + { + "epoch": 22.04, + "learning_rate": 3.0959597701692905e-07, + "loss": 0.683, + "step": 45870 + }, + { + "epoch": 22.04, + "learning_rate": 3.0810887617974793e-07, + "loss": 0.3099, + "step": 45880 + }, + { + "epoch": 22.04, + "learning_rate": 3.066252805829323e-07, + "loss": 0.7326, + "step": 45890 + }, + { + "epoch": 22.04, + "learning_rate": 3.0514519094956864e-07, + "loss": 0.6025, + "step": 45900 + }, + { + "epoch": 22.04, + "learning_rate": 3.0366860800103507e-07, + "loss": 0.6772, + "step": 45910 + }, + { + "epoch": 22.04, + "learning_rate": 3.02195532457e-07, + "loss": 0.3919, + "step": 45920 + }, + { + "epoch": 22.04, + "learning_rate": 3.0072596503542244e-07, + "loss": 0.2697, + "step": 45930 + }, + { + "epoch": 22.04, + "learning_rate": 2.99259906452552e-07, + "loss": 0.6047, + "step": 45940 + }, + { + "epoch": 22.04, + "learning_rate": 2.97797357422927e-07, + "loss": 0.3856, + "step": 45950 + }, + { + "epoch": 22.04, + "learning_rate": 2.9633831865937656e-07, + "loss": 0.4051, + "step": 45960 + }, + { + "epoch": 22.04, + "learning_rate": 2.9488279087302004e-07, + "loss": 0.4755, + "step": 45970 + }, + { + "epoch": 22.04, + "learning_rate": 2.9343077477326173e-07, + "loss": 0.3595, + "step": 45980 + }, + { + "epoch": 22.04, + "learning_rate": 2.9198227106779634e-07, + "loss": 0.6075, + "step": 45990 + }, + { + "epoch": 22.04, + "learning_rate": 2.905372804626083e-07, + "loss": 0.628, + "step": 46000 + }, + { + "epoch": 22.04, + "eval_accuracy": 0.8747368421052631, + "eval_f1": 0.8747368421052631, + "eval_loss": 0.8088645339012146, + "eval_runtime": 748.0599, + "eval_samples_per_second": 6.35, + "eval_steps_per_second": 1.588, + "step": 46000 + }, + { + "epoch": 23.0, + "learning_rate": 2.8909580366196926e-07, + "loss": 0.4376, + "step": 46010 + }, + { + "epoch": 23.0, + "learning_rate": 2.8765784136843554e-07, + "loss": 0.7477, + "step": 46020 + }, + { + "epoch": 23.0, + "learning_rate": 2.8622339428285406e-07, + "loss": 0.294, + "step": 46030 + }, + { + "epoch": 23.0, + "learning_rate": 2.847924631043555e-07, + "loss": 0.4043, + "step": 46040 + }, + { + "epoch": 23.0, + "learning_rate": 2.833650485303596e-07, + "loss": 0.4888, + "step": 46050 + }, + { + "epoch": 23.0, + "learning_rate": 2.8194115125657146e-07, + "loss": 0.7153, + "step": 46060 + }, + { + "epoch": 23.0, + "learning_rate": 2.8052077197697925e-07, + "loss": 0.3605, + "step": 46070 + }, + { + "epoch": 23.0, + "learning_rate": 2.791039113838592e-07, + "loss": 0.2711, + "step": 46080 + }, + { + "epoch": 23.0, + "learning_rate": 2.7769057016777234e-07, + "loss": 0.3246, + "step": 46090 + }, + { + "epoch": 23.0, + "learning_rate": 2.7628074901756423e-07, + "loss": 0.696, + "step": 46100 + }, + { + "epoch": 23.0, + "learning_rate": 2.748744486203644e-07, + "loss": 0.6081, + "step": 46110 + }, + { + "epoch": 23.0, + "learning_rate": 2.734716696615863e-07, + "loss": 0.2118, + "step": 46120 + }, + { + "epoch": 23.0, + "learning_rate": 2.720724128249272e-07, + "loss": 0.617, + "step": 46130 + }, + { + "epoch": 23.0, + "learning_rate": 2.7067667879236815e-07, + "loss": 0.6095, + "step": 46140 + }, + { + "epoch": 23.0, + "learning_rate": 2.6928446824417503e-07, + "loss": 0.5906, + "step": 46150 + }, + { + "epoch": 23.0, + "learning_rate": 2.678957818588909e-07, + "loss": 0.7001, + "step": 46160 + }, + { + "epoch": 23.0, + "learning_rate": 2.6651062031334607e-07, + "loss": 0.2159, + "step": 46170 + }, + { + "epoch": 23.0, + "learning_rate": 2.651289842826504e-07, + "loss": 0.3471, + "step": 46180 + }, + { + "epoch": 23.0, + "learning_rate": 2.637508744401987e-07, + "loss": 0.4117, + "step": 46190 + }, + { + "epoch": 23.0, + "learning_rate": 2.623762914576644e-07, + "loss": 0.6562, + "step": 46200 + }, + { + "epoch": 23.0, + "learning_rate": 2.6100523600500163e-07, + "loss": 0.5202, + "step": 46210 + }, + { + "epoch": 23.0, + "learning_rate": 2.596377087504459e-07, + "loss": 0.6931, + "step": 46220 + }, + { + "epoch": 23.0, + "learning_rate": 2.5827371036051486e-07, + "loss": 0.4239, + "step": 46230 + }, + { + "epoch": 23.0, + "learning_rate": 2.5691324150000333e-07, + "loss": 0.8724, + "step": 46240 + }, + { + "epoch": 23.0, + "learning_rate": 2.555563028319885e-07, + "loss": 0.8134, + "step": 46250 + }, + { + "epoch": 23.01, + "learning_rate": 2.542028950178246e-07, + "loss": 0.6479, + "step": 46260 + }, + { + "epoch": 23.01, + "learning_rate": 2.528530187171474e-07, + "loss": 0.3903, + "step": 46270 + }, + { + "epoch": 23.01, + "learning_rate": 2.5150667458786804e-07, + "loss": 0.3458, + "step": 46280 + }, + { + "epoch": 23.01, + "learning_rate": 2.5016386328618077e-07, + "loss": 0.4394, + "step": 46290 + }, + { + "epoch": 23.01, + "learning_rate": 2.488245854665547e-07, + "loss": 0.5828, + "step": 46300 + }, + { + "epoch": 23.01, + "learning_rate": 2.4748884178173754e-07, + "loss": 0.6674, + "step": 46310 + }, + { + "epoch": 23.01, + "learning_rate": 2.461566328827536e-07, + "loss": 0.3149, + "step": 46320 + }, + { + "epoch": 23.01, + "learning_rate": 2.4482795941890677e-07, + "loss": 0.6931, + "step": 46330 + }, + { + "epoch": 23.01, + "learning_rate": 2.435028220377758e-07, + "loss": 0.3118, + "step": 46340 + }, + { + "epoch": 23.01, + "learning_rate": 2.421812213852165e-07, + "loss": 0.6017, + "step": 46350 + }, + { + "epoch": 23.01, + "learning_rate": 2.408631581053602e-07, + "loss": 0.4137, + "step": 46360 + }, + { + "epoch": 23.01, + "learning_rate": 2.3954863284061573e-07, + "loss": 0.395, + "step": 46370 + }, + { + "epoch": 23.01, + "learning_rate": 2.3823764623166712e-07, + "loss": 0.5066, + "step": 46380 + }, + { + "epoch": 23.01, + "learning_rate": 2.3693019891747241e-07, + "loss": 0.4814, + "step": 46390 + }, + { + "epoch": 23.01, + "learning_rate": 2.3562629153526693e-07, + "loss": 0.5267, + "step": 46400 + }, + { + "epoch": 23.01, + "learning_rate": 2.3432592472055732e-07, + "loss": 0.8471, + "step": 46410 + }, + { + "epoch": 23.01, + "learning_rate": 2.3302909910712823e-07, + "loss": 0.3451, + "step": 46420 + }, + { + "epoch": 23.01, + "learning_rate": 2.3173581532703574e-07, + "loss": 0.6432, + "step": 46430 + }, + { + "epoch": 23.01, + "learning_rate": 2.3044607401061152e-07, + "loss": 0.2905, + "step": 46440 + }, + { + "epoch": 23.01, + "learning_rate": 2.2915987578645852e-07, + "loss": 0.4183, + "step": 46450 + }, + { + "epoch": 23.01, + "learning_rate": 2.2787722128145534e-07, + "loss": 0.4307, + "step": 46460 + }, + { + "epoch": 23.01, + "learning_rate": 2.26598111120751e-07, + "loss": 0.4788, + "step": 46470 + }, + { + "epoch": 23.01, + "learning_rate": 2.2532254592777025e-07, + "loss": 0.2118, + "step": 46480 + }, + { + "epoch": 23.01, + "learning_rate": 2.2405052632420652e-07, + "loss": 0.3335, + "step": 46490 + }, + { + "epoch": 23.01, + "learning_rate": 2.2278205293002645e-07, + "loss": 0.1742, + "step": 46500 + }, + { + "epoch": 23.01, + "learning_rate": 2.2151712636346966e-07, + "loss": 0.5771, + "step": 46510 + }, + { + "epoch": 23.01, + "learning_rate": 2.2025574724104548e-07, + "loss": 0.2051, + "step": 46520 + }, + { + "epoch": 23.01, + "learning_rate": 2.189979161775346e-07, + "loss": 0.3724, + "step": 46530 + }, + { + "epoch": 23.01, + "learning_rate": 2.1774363378598838e-07, + "loss": 0.3743, + "step": 46540 + }, + { + "epoch": 23.01, + "learning_rate": 2.1649290067773026e-07, + "loss": 0.5481, + "step": 46550 + }, + { + "epoch": 23.01, + "learning_rate": 2.152457174623501e-07, + "loss": 0.6652, + "step": 46560 + }, + { + "epoch": 23.01, + "learning_rate": 2.1400208474771087e-07, + "loss": 0.4334, + "step": 46570 + }, + { + "epoch": 23.01, + "learning_rate": 2.1276200313994522e-07, + "loss": 0.339, + "step": 46580 + }, + { + "epoch": 23.01, + "learning_rate": 2.115254732434538e-07, + "loss": 0.4181, + "step": 46590 + }, + { + "epoch": 23.01, + "learning_rate": 2.102924956609037e-07, + "loss": 0.3678, + "step": 46600 + }, + { + "epoch": 23.01, + "learning_rate": 2.0906307099323508e-07, + "loss": 0.4751, + "step": 46610 + }, + { + "epoch": 23.01, + "learning_rate": 2.0783719983965443e-07, + "loss": 0.4232, + "step": 46620 + }, + { + "epoch": 23.01, + "learning_rate": 2.0661488279763634e-07, + "loss": 0.3166, + "step": 46630 + }, + { + "epoch": 23.01, + "learning_rate": 2.0539612046292344e-07, + "loss": 0.3519, + "step": 46640 + }, + { + "epoch": 23.01, + "learning_rate": 2.0418091342952638e-07, + "loss": 0.4472, + "step": 46650 + }, + { + "epoch": 23.01, + "learning_rate": 2.0296926228971973e-07, + "loss": 0.6069, + "step": 46660 + }, + { + "epoch": 23.01, + "learning_rate": 2.0176116763405027e-07, + "loss": 0.5496, + "step": 46670 + }, + { + "epoch": 23.01, + "learning_rate": 2.0055663005132868e-07, + "loss": 0.5399, + "step": 46680 + }, + { + "epoch": 23.01, + "learning_rate": 1.9935565012863032e-07, + "loss": 0.2832, + "step": 46690 + }, + { + "epoch": 23.01, + "learning_rate": 1.9815822845129783e-07, + "loss": 0.5219, + "step": 46700 + }, + { + "epoch": 23.01, + "learning_rate": 1.9696436560294184e-07, + "loss": 0.4706, + "step": 46710 + }, + { + "epoch": 23.01, + "learning_rate": 1.9577406216543607e-07, + "loss": 0.3866, + "step": 46720 + }, + { + "epoch": 23.01, + "learning_rate": 1.9458731871891899e-07, + "loss": 0.4084, + "step": 46730 + }, + { + "epoch": 23.01, + "learning_rate": 1.9340413584179623e-07, + "loss": 0.2977, + "step": 46740 + }, + { + "epoch": 23.02, + "learning_rate": 1.9222451411073648e-07, + "loss": 0.6459, + "step": 46750 + }, + { + "epoch": 23.02, + "learning_rate": 1.9104845410067072e-07, + "loss": 0.5983, + "step": 46760 + }, + { + "epoch": 23.02, + "learning_rate": 1.8987595638480042e-07, + "loss": 0.5445, + "step": 46770 + }, + { + "epoch": 23.02, + "learning_rate": 1.887070215345843e-07, + "loss": 0.5333, + "step": 46780 + }, + { + "epoch": 23.02, + "learning_rate": 1.875416501197466e-07, + "loss": 0.2043, + "step": 46790 + }, + { + "epoch": 23.02, + "learning_rate": 1.8637984270827552e-07, + "loss": 0.6342, + "step": 46800 + }, + { + "epoch": 23.02, + "learning_rate": 1.8522159986642306e-07, + "loss": 0.6586, + "step": 46810 + }, + { + "epoch": 23.02, + "learning_rate": 1.8406692215870185e-07, + "loss": 0.3052, + "step": 46820 + }, + { + "epoch": 23.02, + "learning_rate": 1.8291581014788755e-07, + "loss": 0.9379, + "step": 46830 + }, + { + "epoch": 23.02, + "learning_rate": 1.8176826439501887e-07, + "loss": 0.26, + "step": 46840 + }, + { + "epoch": 23.02, + "learning_rate": 1.8062428545939424e-07, + "loss": 0.3086, + "step": 46850 + }, + { + "epoch": 23.02, + "learning_rate": 1.7948387389857767e-07, + "loss": 0.1077, + "step": 46860 + }, + { + "epoch": 23.02, + "learning_rate": 1.7834703026838957e-07, + "loss": 0.3269, + "step": 46870 + }, + { + "epoch": 23.02, + "learning_rate": 1.7721375512291417e-07, + "loss": 0.3113, + "step": 46880 + }, + { + "epoch": 23.02, + "learning_rate": 1.7608404901449548e-07, + "loss": 0.2717, + "step": 46890 + }, + { + "epoch": 23.02, + "learning_rate": 1.749579124937406e-07, + "loss": 0.6674, + "step": 46900 + }, + { + "epoch": 23.02, + "learning_rate": 1.738353461095138e-07, + "loss": 0.2997, + "step": 46910 + }, + { + "epoch": 23.02, + "learning_rate": 1.7271635040893908e-07, + "loss": 0.6477, + "step": 46920 + }, + { + "epoch": 23.02, + "learning_rate": 1.716009259374035e-07, + "loss": 0.3548, + "step": 46930 + }, + { + "epoch": 23.02, + "learning_rate": 1.704890732385489e-07, + "loss": 0.3934, + "step": 46940 + }, + { + "epoch": 23.02, + "learning_rate": 1.6938079285428094e-07, + "loss": 0.3057, + "step": 46950 + }, + { + "epoch": 23.02, + "learning_rate": 1.6827608532476168e-07, + "loss": 0.6208, + "step": 46960 + }, + { + "epoch": 23.02, + "learning_rate": 1.6717495118841048e-07, + "loss": 0.3769, + "step": 46970 + }, + { + "epoch": 23.02, + "learning_rate": 1.6607739098190883e-07, + "loss": 0.331, + "step": 46980 + }, + { + "epoch": 23.02, + "learning_rate": 1.6498340524019218e-07, + "loss": 0.6539, + "step": 46990 + }, + { + "epoch": 23.02, + "learning_rate": 1.6389299449645734e-07, + "loss": 0.6753, + "step": 47000 + }, + { + "epoch": 23.02, + "learning_rate": 1.6280615928215753e-07, + "loss": 0.1063, + "step": 47010 + }, + { + "epoch": 23.02, + "learning_rate": 1.6172290012700235e-07, + "loss": 0.7285, + "step": 47020 + }, + { + "epoch": 23.02, + "learning_rate": 1.606432175589595e-07, + "loss": 0.5733, + "step": 47030 + }, + { + "epoch": 23.02, + "learning_rate": 1.5956711210425218e-07, + "loss": 0.4015, + "step": 47040 + }, + { + "epoch": 23.02, + "learning_rate": 1.584945842873625e-07, + "loss": 0.6097, + "step": 47050 + }, + { + "epoch": 23.02, + "learning_rate": 1.5742563463102654e-07, + "loss": 0.326, + "step": 47060 + }, + { + "epoch": 23.02, + "learning_rate": 1.5636026365623833e-07, + "loss": 0.8333, + "step": 47070 + }, + { + "epoch": 23.02, + "learning_rate": 1.5529847188224504e-07, + "loss": 0.4393, + "step": 47080 + }, + { + "epoch": 23.02, + "learning_rate": 1.5424025982655187e-07, + "loss": 0.9822, + "step": 47090 + }, + { + "epoch": 23.02, + "learning_rate": 1.5318562800491957e-07, + "loss": 0.6041, + "step": 47100 + }, + { + "epoch": 23.02, + "learning_rate": 1.5213457693136201e-07, + "loss": 0.3749, + "step": 47110 + }, + { + "epoch": 23.02, + "learning_rate": 1.5108710711814855e-07, + "loss": 0.4134, + "step": 47120 + }, + { + "epoch": 23.02, + "learning_rate": 1.5004321907580332e-07, + "loss": 0.4031, + "step": 47130 + }, + { + "epoch": 23.02, + "learning_rate": 1.4900291331310518e-07, + "loss": 0.35, + "step": 47140 + }, + { + "epoch": 23.02, + "learning_rate": 1.4796619033708686e-07, + "loss": 0.3272, + "step": 47150 + }, + { + "epoch": 23.02, + "learning_rate": 1.4693305065303252e-07, + "loss": 0.247, + "step": 47160 + }, + { + "epoch": 23.02, + "learning_rate": 1.459034947644844e-07, + "loss": 0.7903, + "step": 47170 + }, + { + "epoch": 23.02, + "learning_rate": 1.4487752317323272e-07, + "loss": 0.3523, + "step": 47180 + }, + { + "epoch": 23.02, + "learning_rate": 1.438551363793267e-07, + "loss": 0.7924, + "step": 47190 + }, + { + "epoch": 23.02, + "learning_rate": 1.4283633488106352e-07, + "loss": 0.3725, + "step": 47200 + }, + { + "epoch": 23.02, + "learning_rate": 1.4182111917499517e-07, + "loss": 0.9139, + "step": 47210 + }, + { + "epoch": 23.02, + "learning_rate": 1.4080948975592423e-07, + "loss": 0.2654, + "step": 47220 + }, + { + "epoch": 23.02, + "learning_rate": 1.398014471169079e-07, + "loss": 0.4064, + "step": 47230 + }, + { + "epoch": 23.02, + "learning_rate": 1.387969917492532e-07, + "loss": 0.3291, + "step": 47240 + }, + { + "epoch": 23.02, + "learning_rate": 1.3779612414252017e-07, + "loss": 0.5799, + "step": 47250 + }, + { + "epoch": 23.03, + "learning_rate": 1.3679884478451948e-07, + "loss": 0.423, + "step": 47260 + }, + { + "epoch": 23.03, + "learning_rate": 1.358051541613131e-07, + "loss": 0.6288, + "step": 47270 + }, + { + "epoch": 23.03, + "learning_rate": 1.3481505275721202e-07, + "loss": 0.6378, + "step": 47280 + }, + { + "epoch": 23.03, + "learning_rate": 1.3382854105478099e-07, + "loss": 0.7306, + "step": 47290 + }, + { + "epoch": 23.03, + "learning_rate": 1.3284561953483541e-07, + "loss": 0.2512, + "step": 47300 + }, + { + "epoch": 23.03, + "learning_rate": 1.3186628867643713e-07, + "loss": 0.4269, + "step": 47310 + }, + { + "epoch": 23.03, + "learning_rate": 1.308905489568993e-07, + "loss": 0.2313, + "step": 47320 + }, + { + "epoch": 23.03, + "learning_rate": 1.2991840085178814e-07, + "loss": 0.7018, + "step": 47330 + }, + { + "epoch": 23.03, + "learning_rate": 1.2894984483491468e-07, + "loss": 0.3318, + "step": 47340 + }, + { + "epoch": 23.03, + "learning_rate": 1.279848813783438e-07, + "loss": 0.2508, + "step": 47350 + }, + { + "epoch": 23.03, + "learning_rate": 1.270235109523843e-07, + "loss": 0.3462, + "step": 47360 + }, + { + "epoch": 23.03, + "learning_rate": 1.2606573402559718e-07, + "loss": 0.1962, + "step": 47370 + }, + { + "epoch": 23.03, + "learning_rate": 1.251115510647932e-07, + "loss": 0.6131, + "step": 47380 + }, + { + "epoch": 23.03, + "learning_rate": 1.241609625350279e-07, + "loss": 0.4011, + "step": 47390 + }, + { + "epoch": 23.03, + "learning_rate": 1.2321396889960645e-07, + "loss": 0.597, + "step": 47400 + }, + { + "epoch": 23.03, + "learning_rate": 1.2227057062008217e-07, + "loss": 0.3469, + "step": 47410 + }, + { + "epoch": 23.03, + "learning_rate": 1.213307681562556e-07, + "loss": 0.5855, + "step": 47420 + }, + { + "epoch": 23.03, + "learning_rate": 1.2039456196617616e-07, + "loss": 0.3312, + "step": 47430 + }, + { + "epoch": 23.03, + "learning_rate": 1.1946195250613885e-07, + "loss": 0.3626, + "step": 47440 + }, + { + "epoch": 23.03, + "learning_rate": 1.1853294023068589e-07, + "loss": 0.3461, + "step": 47450 + }, + { + "epoch": 23.03, + "learning_rate": 1.1760752559260679e-07, + "loss": 0.4803, + "step": 47460 + }, + { + "epoch": 23.03, + "learning_rate": 1.1668570904293657e-07, + "loss": 0.4819, + "step": 47470 + }, + { + "epoch": 23.03, + "learning_rate": 1.1576749103096002e-07, + "loss": 0.9285, + "step": 47480 + }, + { + "epoch": 23.03, + "learning_rate": 1.1485287200420246e-07, + "loss": 0.473, + "step": 47490 + }, + { + "epoch": 23.03, + "learning_rate": 1.1394185240843985e-07, + "loss": 0.271, + "step": 47500 + }, + { + "epoch": 23.03, + "learning_rate": 1.1303443268769114e-07, + "loss": 0.6521, + "step": 47510 + }, + { + "epoch": 23.03, + "learning_rate": 1.1213061328422175e-07, + "loss": 0.6992, + "step": 47520 + }, + { + "epoch": 23.03, + "learning_rate": 1.1123039463854345e-07, + "loss": 0.4707, + "step": 47530 + }, + { + "epoch": 23.03, + "learning_rate": 1.1033377718941112e-07, + "loss": 0.6397, + "step": 47540 + }, + { + "epoch": 23.03, + "learning_rate": 1.0944076137382436e-07, + "loss": 0.3328, + "step": 47550 + }, + { + "epoch": 23.03, + "learning_rate": 1.0855134762702917e-07, + "loss": 0.2288, + "step": 47560 + }, + { + "epoch": 23.03, + "learning_rate": 1.0766553638251464e-07, + "loss": 0.3954, + "step": 47570 + }, + { + "epoch": 23.03, + "learning_rate": 1.0678332807201541e-07, + "loss": 0.4067, + "step": 47580 + }, + { + "epoch": 23.03, + "learning_rate": 1.0590472312550753e-07, + "loss": 0.5821, + "step": 47590 + }, + { + "epoch": 23.03, + "learning_rate": 1.0502972197121347e-07, + "loss": 0.6503, + "step": 47600 + }, + { + "epoch": 23.03, + "learning_rate": 1.0415832503559708e-07, + "loss": 0.5509, + "step": 47610 + }, + { + "epoch": 23.03, + "learning_rate": 1.032905327433678e-07, + "loss": 0.9522, + "step": 47620 + }, + { + "epoch": 23.03, + "learning_rate": 1.0242634551747731e-07, + "loss": 0.6986, + "step": 47630 + }, + { + "epoch": 23.03, + "learning_rate": 1.0156576377911869e-07, + "loss": 0.4621, + "step": 47640 + }, + { + "epoch": 23.03, + "learning_rate": 1.0070878794772975e-07, + "loss": 0.6139, + "step": 47650 + }, + { + "epoch": 23.03, + "learning_rate": 9.985541844098972e-08, + "loss": 0.5695, + "step": 47660 + }, + { + "epoch": 23.03, + "learning_rate": 9.900565567482089e-08, + "loss": 0.5113, + "step": 47670 + }, + { + "epoch": 23.03, + "learning_rate": 9.815950006338697e-08, + "loss": 0.3426, + "step": 47680 + }, + { + "epoch": 23.03, + "learning_rate": 9.731695201909474e-08, + "loss": 0.4428, + "step": 47690 + }, + { + "epoch": 23.03, + "learning_rate": 9.647801195259071e-08, + "loss": 0.4442, + "step": 47700 + }, + { + "epoch": 23.03, + "learning_rate": 9.564268027276446e-08, + "loss": 0.3995, + "step": 47710 + }, + { + "epoch": 23.03, + "learning_rate": 9.4810957386747e-08, + "loss": 0.8283, + "step": 47720 + }, + { + "epoch": 23.03, + "learning_rate": 9.398284369990989e-08, + "loss": 0.438, + "step": 47730 + }, + { + "epoch": 23.03, + "learning_rate": 9.315833961586612e-08, + "loss": 0.5626, + "step": 47740 + }, + { + "epoch": 23.04, + "learning_rate": 9.233744553646756e-08, + "loss": 0.4854, + "step": 47750 + }, + { + "epoch": 23.04, + "learning_rate": 9.152016186180834e-08, + "loss": 0.4819, + "step": 47760 + }, + { + "epoch": 23.04, + "learning_rate": 9.070648899022315e-08, + "loss": 0.769, + "step": 47770 + }, + { + "epoch": 23.04, + "learning_rate": 8.989642731828646e-08, + "loss": 0.4348, + "step": 47780 + }, + { + "epoch": 23.04, + "learning_rate": 8.908997724081241e-08, + "loss": 0.5948, + "step": 47790 + }, + { + "epoch": 23.04, + "learning_rate": 8.828713915085412e-08, + "loss": 0.1465, + "step": 47800 + }, + { + "epoch": 23.04, + "learning_rate": 8.748791343970524e-08, + "loss": 0.3815, + "step": 47810 + }, + { + "epoch": 23.04, + "learning_rate": 8.669230049690003e-08, + "loss": 0.3987, + "step": 47820 + }, + { + "epoch": 23.04, + "learning_rate": 8.590030071020994e-08, + "loss": 0.316, + "step": 47830 + }, + { + "epoch": 23.04, + "learning_rate": 8.511191446564537e-08, + "loss": 0.2271, + "step": 47840 + }, + { + "epoch": 23.04, + "learning_rate": 8.432714214745646e-08, + "loss": 0.3009, + "step": 47850 + }, + { + "epoch": 23.04, + "learning_rate": 8.354598413813141e-08, + "loss": 0.2945, + "step": 47860 + }, + { + "epoch": 23.04, + "learning_rate": 8.276844081839819e-08, + "loss": 0.2503, + "step": 47870 + }, + { + "epoch": 23.04, + "learning_rate": 8.199451256722196e-08, + "loss": 0.5661, + "step": 47880 + }, + { + "epoch": 23.04, + "learning_rate": 8.122419976180434e-08, + "loss": 0.6049, + "step": 47890 + }, + { + "epoch": 23.04, + "learning_rate": 8.045750277758751e-08, + "loss": 0.4348, + "step": 47900 + }, + { + "epoch": 23.04, + "learning_rate": 7.969442198825089e-08, + "loss": 0.8783, + "step": 47910 + }, + { + "epoch": 23.04, + "learning_rate": 7.893495776570947e-08, + "loss": 0.4905, + "step": 47920 + }, + { + "epoch": 23.04, + "learning_rate": 7.8179110480118e-08, + "loss": 0.3333, + "step": 47930 + }, + { + "epoch": 23.04, + "learning_rate": 7.742688049986596e-08, + "loss": 0.4873, + "step": 47940 + }, + { + "epoch": 23.04, + "learning_rate": 7.667826819158257e-08, + "loss": 0.4427, + "step": 47950 + }, + { + "epoch": 23.04, + "learning_rate": 7.593327392013183e-08, + "loss": 0.4111, + "step": 47960 + }, + { + "epoch": 23.04, + "learning_rate": 7.519189804861493e-08, + "loss": 0.6091, + "step": 47970 + }, + { + "epoch": 23.04, + "learning_rate": 7.44541409383695e-08, + "loss": 0.2827, + "step": 47980 + }, + { + "epoch": 23.04, + "learning_rate": 7.372000294896792e-08, + "loss": 0.2771, + "step": 47990 + }, + { + "epoch": 23.04, + "learning_rate": 7.298948443822229e-08, + "loss": 0.5693, + "step": 48000 + }, + { + "epoch": 23.04, + "eval_accuracy": 0.8747368421052631, + "eval_f1": 0.8747368421052631, + "eval_loss": 0.8009958863258362, + "eval_runtime": 732.1449, + "eval_samples_per_second": 6.488, + "eval_steps_per_second": 1.623, + "step": 48000 + }, + { + "epoch": 24.0, + "learning_rate": 7.226258576217865e-08, + "loss": 0.3753, + "step": 48010 + }, + { + "epoch": 24.0, + "learning_rate": 7.15393072751161e-08, + "loss": 0.4069, + "step": 48020 + }, + { + "epoch": 24.0, + "learning_rate": 7.081964932955349e-08, + "loss": 0.4612, + "step": 48030 + }, + { + "epoch": 24.0, + "learning_rate": 7.010361227624357e-08, + "loss": 0.3692, + "step": 48040 + }, + { + "epoch": 24.0, + "learning_rate": 6.939119646417302e-08, + "loss": 0.3168, + "step": 48050 + }, + { + "epoch": 24.0, + "learning_rate": 6.868240224056577e-08, + "loss": 0.5174, + "step": 48060 + }, + { + "epoch": 24.0, + "learning_rate": 6.79772299508788e-08, + "loss": 0.5508, + "step": 48070 + }, + { + "epoch": 24.0, + "learning_rate": 6.727567993880468e-08, + "loss": 0.5056, + "step": 48080 + }, + { + "epoch": 24.0, + "learning_rate": 6.657775254626991e-08, + "loss": 0.4838, + "step": 48090 + }, + { + "epoch": 24.0, + "learning_rate": 6.588344811343738e-08, + "loss": 0.3053, + "step": 48100 + }, + { + "epoch": 24.0, + "learning_rate": 6.519276697870142e-08, + "loss": 0.4235, + "step": 48110 + }, + { + "epoch": 24.0, + "learning_rate": 6.450570947869106e-08, + "loss": 0.5998, + "step": 48120 + }, + { + "epoch": 24.0, + "learning_rate": 6.382227594827012e-08, + "loss": 0.5087, + "step": 48130 + }, + { + "epoch": 24.0, + "learning_rate": 6.314246672053715e-08, + "loss": 0.4317, + "step": 48140 + }, + { + "epoch": 24.0, + "learning_rate": 6.246628212682209e-08, + "loss": 0.8639, + "step": 48150 + }, + { + "epoch": 24.0, + "learning_rate": 6.179372249668802e-08, + "loss": 0.3694, + "step": 48160 + }, + { + "epoch": 24.0, + "learning_rate": 6.112478815793437e-08, + "loss": 0.2338, + "step": 48170 + }, + { + "epoch": 24.0, + "learning_rate": 6.045947943658953e-08, + "loss": 0.2262, + "step": 48180 + }, + { + "epoch": 24.0, + "learning_rate": 5.979779665691826e-08, + "loss": 0.5833, + "step": 48190 + }, + { + "epoch": 24.0, + "learning_rate": 5.9139740141416765e-08, + "loss": 0.3248, + "step": 48200 + }, + { + "epoch": 24.0, + "learning_rate": 5.848531021081266e-08, + "loss": 1.0088, + "step": 48210 + }, + { + "epoch": 24.0, + "learning_rate": 5.7834507184067466e-08, + "loss": 0.3743, + "step": 48220 + }, + { + "epoch": 24.0, + "learning_rate": 5.718733137837578e-08, + "loss": 0.4842, + "step": 48230 + }, + { + "epoch": 24.0, + "learning_rate": 5.6543783109161974e-08, + "loss": 0.2676, + "step": 48240 + }, + { + "epoch": 24.0, + "learning_rate": 5.5903862690085125e-08, + "loss": 0.3801, + "step": 48250 + }, + { + "epoch": 24.01, + "learning_rate": 5.526757043303243e-08, + "loss": 0.7997, + "step": 48260 + }, + { + "epoch": 24.01, + "learning_rate": 5.463490664812748e-08, + "loss": 0.7286, + "step": 48270 + }, + { + "epoch": 24.01, + "learning_rate": 5.4005871643721114e-08, + "loss": 0.2438, + "step": 48280 + }, + { + "epoch": 24.01, + "learning_rate": 5.3380465726398096e-08, + "loss": 0.6707, + "step": 48290 + }, + { + "epoch": 24.01, + "learning_rate": 5.275868920097293e-08, + "loss": 0.5388, + "step": 48300 + }, + { + "epoch": 24.01, + "learning_rate": 5.214054237049321e-08, + "loss": 0.3983, + "step": 48310 + }, + { + "epoch": 24.01, + "learning_rate": 5.152602553623459e-08, + "loss": 0.3292, + "step": 48320 + }, + { + "epoch": 24.01, + "learning_rate": 5.091513899770667e-08, + "loss": 0.5818, + "step": 48330 + }, + { + "epoch": 24.01, + "learning_rate": 5.0307883052647944e-08, + "loss": 0.4123, + "step": 48340 + }, + { + "epoch": 24.01, + "learning_rate": 4.970425799702666e-08, + "loss": 0.389, + "step": 48350 + }, + { + "epoch": 24.01, + "learning_rate": 4.910426412504332e-08, + "loss": 0.258, + "step": 48360 + }, + { + "epoch": 24.01, + "learning_rate": 4.85079017291265e-08, + "loss": 0.6119, + "step": 48370 + }, + { + "epoch": 24.01, + "learning_rate": 4.791517109993704e-08, + "loss": 0.5102, + "step": 48380 + }, + { + "epoch": 24.01, + "learning_rate": 4.732607252636384e-08, + "loss": 0.5128, + "step": 48390 + }, + { + "epoch": 24.01, + "learning_rate": 4.6740606295527236e-08, + "loss": 0.1333, + "step": 48400 + }, + { + "epoch": 24.01, + "learning_rate": 4.615877269277563e-08, + "loss": 0.3087, + "step": 48410 + }, + { + "epoch": 24.01, + "learning_rate": 4.558057200168802e-08, + "loss": 0.6449, + "step": 48420 + }, + { + "epoch": 24.01, + "learning_rate": 4.500600450407233e-08, + "loss": 0.3746, + "step": 48430 + }, + { + "epoch": 24.01, + "learning_rate": 4.4435070479965366e-08, + "loss": 0.5859, + "step": 48440 + }, + { + "epoch": 24.01, + "learning_rate": 4.386777020763455e-08, + "loss": 0.3587, + "step": 48450 + }, + { + "epoch": 24.01, + "learning_rate": 4.330410396357371e-08, + "loss": 0.2082, + "step": 48460 + }, + { + "epoch": 24.01, + "learning_rate": 4.274407202250807e-08, + "loss": 0.3468, + "step": 48470 + }, + { + "epoch": 24.01, + "learning_rate": 4.2187674657390154e-08, + "loss": 0.8718, + "step": 48480 + }, + { + "epoch": 24.01, + "learning_rate": 4.1634912139400514e-08, + "loss": 0.4234, + "step": 48490 + }, + { + "epoch": 24.01, + "learning_rate": 4.108578473795033e-08, + "loss": 0.6711, + "step": 48500 + }, + { + "epoch": 24.01, + "learning_rate": 4.0540292720675495e-08, + "loss": 0.4326, + "step": 48510 + }, + { + "epoch": 24.01, + "learning_rate": 3.999843635344419e-08, + "loss": 0.6955, + "step": 48520 + }, + { + "epoch": 24.01, + "learning_rate": 3.946021590035015e-08, + "loss": 0.3644, + "step": 48530 + }, + { + "epoch": 24.01, + "learning_rate": 3.892563162371521e-08, + "loss": 0.1796, + "step": 48540 + }, + { + "epoch": 24.01, + "learning_rate": 3.839468378408845e-08, + "loss": 0.4076, + "step": 48550 + }, + { + "epoch": 24.01, + "learning_rate": 3.7867372640248697e-08, + "loss": 0.6121, + "step": 48560 + }, + { + "epoch": 24.01, + "learning_rate": 3.734369844920038e-08, + "loss": 0.446, + "step": 48570 + }, + { + "epoch": 24.01, + "learning_rate": 3.6823661466176825e-08, + "loss": 0.4861, + "step": 48580 + }, + { + "epoch": 24.01, + "learning_rate": 3.6307261944636126e-08, + "loss": 0.4031, + "step": 48590 + }, + { + "epoch": 24.01, + "learning_rate": 3.579450013626612e-08, + "loss": 0.3043, + "step": 48600 + }, + { + "epoch": 24.01, + "learning_rate": 3.528537629098022e-08, + "loss": 0.3778, + "step": 48610 + }, + { + "epoch": 24.01, + "learning_rate": 3.477989065692078e-08, + "loss": 0.4012, + "step": 48620 + }, + { + "epoch": 24.01, + "learning_rate": 3.4278043480453216e-08, + "loss": 0.3413, + "step": 48630 + }, + { + "epoch": 24.01, + "learning_rate": 3.377983500617271e-08, + "loss": 0.6273, + "step": 48640 + }, + { + "epoch": 24.01, + "learning_rate": 3.328526547690003e-08, + "loss": 0.7842, + "step": 48650 + }, + { + "epoch": 24.01, + "learning_rate": 3.279433513368235e-08, + "loss": 0.462, + "step": 48660 + }, + { + "epoch": 24.01, + "learning_rate": 3.230704421579328e-08, + "loss": 0.6631, + "step": 48670 + }, + { + "epoch": 24.01, + "learning_rate": 3.1823392960732e-08, + "loss": 0.3991, + "step": 48680 + }, + { + "epoch": 24.01, + "learning_rate": 3.134338160422412e-08, + "loss": 0.7023, + "step": 48690 + }, + { + "epoch": 24.01, + "learning_rate": 3.086701038022249e-08, + "loss": 0.2972, + "step": 48700 + }, + { + "epoch": 24.01, + "learning_rate": 3.039427952090307e-08, + "loss": 0.4932, + "step": 48710 + }, + { + "epoch": 24.01, + "learning_rate": 2.992518925667154e-08, + "loss": 0.3013, + "step": 48720 + }, + { + "epoch": 24.01, + "learning_rate": 2.9459739816154186e-08, + "loss": 0.3875, + "step": 48730 + }, + { + "epoch": 24.01, + "learning_rate": 2.8997931426206214e-08, + "loss": 0.4445, + "step": 48740 + }, + { + "epoch": 24.02, + "learning_rate": 2.8539764311908412e-08, + "loss": 0.5529, + "step": 48750 + }, + { + "epoch": 24.02, + "learning_rate": 2.8085238696565485e-08, + "loss": 0.6926, + "step": 48760 + }, + { + "epoch": 24.02, + "learning_rate": 2.7634354801706896e-08, + "loss": 0.3991, + "step": 48770 + }, + { + "epoch": 24.02, + "learning_rate": 2.7187112847087693e-08, + "loss": 0.9109, + "step": 48780 + }, + { + "epoch": 24.02, + "learning_rate": 2.6743513050690172e-08, + "loss": 0.7538, + "step": 48790 + }, + { + "epoch": 24.02, + "learning_rate": 2.6303555628717225e-08, + "loss": 0.4482, + "step": 48800 + }, + { + "epoch": 24.02, + "learning_rate": 2.5867240795600656e-08, + "loss": 0.3986, + "step": 48810 + }, + { + "epoch": 24.02, + "learning_rate": 2.5434568763993692e-08, + "loss": 0.6185, + "step": 48820 + }, + { + "epoch": 24.02, + "learning_rate": 2.5005539744775984e-08, + "loss": 0.6925, + "step": 48830 + }, + { + "epoch": 24.02, + "learning_rate": 2.45801539470511e-08, + "loss": 0.4379, + "step": 48840 + }, + { + "epoch": 24.02, + "learning_rate": 2.4158411578146523e-08, + "loss": 0.4391, + "step": 48850 + }, + { + "epoch": 24.02, + "learning_rate": 2.3740312843614497e-08, + "loss": 0.89, + "step": 48860 + }, + { + "epoch": 24.02, + "learning_rate": 2.332585794723119e-08, + "loss": 0.3428, + "step": 48870 + }, + { + "epoch": 24.02, + "learning_rate": 2.291504709099751e-08, + "loss": 0.7952, + "step": 48880 + }, + { + "epoch": 24.02, + "learning_rate": 2.2507880475136634e-08, + "loss": 0.6414, + "step": 48890 + }, + { + "epoch": 24.02, + "learning_rate": 2.2104358298098147e-08, + "loss": 0.3109, + "step": 48900 + }, + { + "epoch": 24.02, + "learning_rate": 2.1704480756552237e-08, + "loss": 0.159, + "step": 48910 + }, + { + "epoch": 24.02, + "learning_rate": 2.1308248045395494e-08, + "loss": 0.4725, + "step": 48920 + }, + { + "epoch": 24.02, + "learning_rate": 2.0915660357746778e-08, + "loss": 0.7352, + "step": 48930 + }, + { + "epoch": 24.02, + "learning_rate": 2.05267178849472e-08, + "loss": 0.6739, + "step": 48940 + }, + { + "epoch": 24.02, + "learning_rate": 2.0141420816564282e-08, + "loss": 0.8071, + "step": 48950 + }, + { + "epoch": 24.02, + "learning_rate": 1.9759769340386148e-08, + "loss": 0.5585, + "step": 48960 + }, + { + "epoch": 24.02, + "learning_rate": 1.9381763642425665e-08, + "loss": 0.6838, + "step": 48970 + }, + { + "epoch": 24.02, + "learning_rate": 1.9007403906918797e-08, + "loss": 0.6336, + "step": 48980 + }, + { + "epoch": 24.02, + "learning_rate": 1.8636690316322092e-08, + "loss": 0.5289, + "step": 48990 + }, + { + "epoch": 24.02, + "learning_rate": 1.8269623051318517e-08, + "loss": 0.7095, + "step": 49000 + }, + { + "epoch": 24.02, + "learning_rate": 1.7906202290810803e-08, + "loss": 0.4653, + "step": 49010 + }, + { + "epoch": 24.02, + "learning_rate": 1.7546428211927257e-08, + "loss": 0.2675, + "step": 49020 + }, + { + "epoch": 24.02, + "learning_rate": 1.7190300990016784e-08, + "loss": 0.4124, + "step": 49030 + }, + { + "epoch": 24.02, + "learning_rate": 1.6837820798650538e-08, + "loss": 0.6305, + "step": 49040 + }, + { + "epoch": 24.02, + "learning_rate": 1.6488987809625268e-08, + "loss": 0.6233, + "step": 49050 + }, + { + "epoch": 24.02, + "learning_rate": 1.6143802192955805e-08, + "loss": 0.694, + "step": 49060 + }, + { + "epoch": 24.02, + "learning_rate": 1.580226411688257e-08, + "loss": 0.252, + "step": 49070 + }, + { + "epoch": 24.02, + "learning_rate": 1.5464373747866577e-08, + "loss": 0.4817, + "step": 49080 + }, + { + "epoch": 24.02, + "learning_rate": 1.5130131250591093e-08, + "loss": 0.1697, + "step": 49090 + }, + { + "epoch": 24.02, + "learning_rate": 1.4799536787963308e-08, + "loss": 0.2944, + "step": 49100 + }, + { + "epoch": 24.02, + "learning_rate": 1.4472590521110162e-08, + "loss": 0.7365, + "step": 49110 + }, + { + "epoch": 24.02, + "learning_rate": 1.4149292609380027e-08, + "loss": 0.5104, + "step": 49120 + }, + { + "epoch": 24.02, + "learning_rate": 1.3829643210346854e-08, + "loss": 0.7771, + "step": 49130 + }, + { + "epoch": 24.02, + "learning_rate": 1.3513642479801857e-08, + "loss": 0.5946, + "step": 49140 + }, + { + "epoch": 24.02, + "learning_rate": 1.3201290571760172e-08, + "loss": 0.5192, + "step": 49150 + }, + { + "epoch": 24.02, + "learning_rate": 1.2892587638460018e-08, + "loss": 0.4447, + "step": 49160 + }, + { + "epoch": 24.02, + "learning_rate": 1.258753383035771e-08, + "loss": 0.5295, + "step": 49170 + }, + { + "epoch": 24.02, + "learning_rate": 1.2286129296132653e-08, + "loss": 0.3215, + "step": 49180 + }, + { + "epoch": 24.02, + "learning_rate": 1.1988374182687334e-08, + "loss": 0.8657, + "step": 49190 + }, + { + "epoch": 24.02, + "learning_rate": 1.1694268635142335e-08, + "loss": 0.4053, + "step": 49200 + }, + { + "epoch": 24.02, + "learning_rate": 1.1403812796842161e-08, + "loss": 0.5778, + "step": 49210 + }, + { + "epoch": 24.02, + "learning_rate": 1.1117006809351072e-08, + "loss": 0.4498, + "step": 49220 + }, + { + "epoch": 24.02, + "learning_rate": 1.0833850812455581e-08, + "loss": 0.4014, + "step": 49230 + }, + { + "epoch": 24.02, + "learning_rate": 1.0554344944161132e-08, + "loss": 0.3483, + "step": 49240 + }, + { + "epoch": 24.02, + "learning_rate": 1.027848934069625e-08, + "loss": 0.2246, + "step": 49250 + }, + { + "epoch": 24.03, + "learning_rate": 1.0006284136509224e-08, + "loss": 0.4773, + "step": 49260 + }, + { + "epoch": 24.03, + "learning_rate": 9.737729464269762e-09, + "loss": 0.4115, + "step": 49270 + }, + { + "epoch": 24.03, + "learning_rate": 9.472825454868995e-09, + "loss": 0.5762, + "step": 49280 + }, + { + "epoch": 24.03, + "learning_rate": 9.211572237416145e-09, + "loss": 0.7074, + "step": 49290 + }, + { + "epoch": 24.03, + "learning_rate": 8.953969939245188e-09, + "loss": 0.3273, + "step": 49300 + }, + { + "epoch": 24.03, + "learning_rate": 8.700018685905697e-09, + "loss": 0.2681, + "step": 49310 + }, + { + "epoch": 24.03, + "learning_rate": 8.449718601171997e-09, + "loss": 0.2451, + "step": 49320 + }, + { + "epoch": 24.03, + "learning_rate": 8.203069807038165e-09, + "loss": 0.2841, + "step": 49330 + }, + { + "epoch": 24.03, + "learning_rate": 7.960072423715547e-09, + "loss": 0.3003, + "step": 49340 + }, + { + "epoch": 24.03, + "learning_rate": 7.720726569640235e-09, + "loss": 0.3248, + "step": 49350 + }, + { + "epoch": 24.03, + "learning_rate": 7.48503236146475e-09, + "loss": 0.2326, + "step": 49360 + }, + { + "epoch": 24.03, + "learning_rate": 7.252989914064701e-09, + "loss": 0.2678, + "step": 49370 + }, + { + "epoch": 24.03, + "learning_rate": 7.024599340534621e-09, + "loss": 0.3889, + "step": 49380 + }, + { + "epoch": 24.03, + "learning_rate": 6.7998607521888026e-09, + "loss": 0.6104, + "step": 49390 + }, + { + "epoch": 24.03, + "learning_rate": 6.5787742585621256e-09, + "loss": 0.3541, + "step": 49400 + }, + { + "epoch": 24.03, + "learning_rate": 6.361339967410895e-09, + "loss": 0.4186, + "step": 49410 + }, + { + "epoch": 24.03, + "learning_rate": 6.147557984707841e-09, + "loss": 0.5533, + "step": 49420 + }, + { + "epoch": 24.03, + "learning_rate": 5.937428414648782e-09, + "loss": 0.4036, + "step": 49430 + }, + { + "epoch": 24.03, + "learning_rate": 5.730951359648462e-09, + "loss": 0.8842, + "step": 49440 + }, + { + "epoch": 24.03, + "learning_rate": 5.528126920341381e-09, + "loss": 0.4078, + "step": 49450 + }, + { + "epoch": 24.03, + "learning_rate": 5.3289551955809645e-09, + "loss": 0.3735, + "step": 49460 + }, + { + "epoch": 24.03, + "learning_rate": 5.133436282441228e-09, + "loss": 0.5025, + "step": 49470 + }, + { + "epoch": 24.03, + "learning_rate": 4.941570276215945e-09, + "loss": 0.2423, + "step": 49480 + }, + { + "epoch": 24.03, + "learning_rate": 4.753357270418646e-09, + "loss": 0.5151, + "step": 49490 + }, + { + "epoch": 24.03, + "learning_rate": 4.568797356781784e-09, + "loss": 0.4716, + "step": 49500 + }, + { + "epoch": 24.03, + "learning_rate": 4.387890625257574e-09, + "loss": 0.689, + "step": 49510 + }, + { + "epoch": 24.03, + "learning_rate": 4.210637164017983e-09, + "loss": 0.4343, + "step": 49520 + }, + { + "epoch": 24.03, + "learning_rate": 4.037037059453908e-09, + "loss": 0.1581, + "step": 49530 + }, + { + "epoch": 24.03, + "learning_rate": 3.8670903961751655e-09, + "loss": 0.5421, + "step": 49540 + }, + { + "epoch": 24.03, + "learning_rate": 3.700797257013e-09, + "loss": 0.4423, + "step": 49550 + }, + { + "epoch": 24.03, + "learning_rate": 3.5381577230167437e-09, + "loss": 0.4451, + "step": 49560 + }, + { + "epoch": 24.03, + "learning_rate": 3.3791718734538235e-09, + "loss": 0.4525, + "step": 49570 + }, + { + "epoch": 24.03, + "learning_rate": 3.2238397858122546e-09, + "loss": 0.3258, + "step": 49580 + }, + { + "epoch": 24.03, + "learning_rate": 3.072161535799811e-09, + "loss": 0.4485, + "step": 49590 + }, + { + "epoch": 24.03, + "learning_rate": 2.924137197342358e-09, + "loss": 0.6284, + "step": 49600 + }, + { + "epoch": 24.03, + "learning_rate": 2.7797668425846857e-09, + "loss": 0.3722, + "step": 49610 + }, + { + "epoch": 24.03, + "learning_rate": 2.6390505418913413e-09, + "loss": 0.6805, + "step": 49620 + }, + { + "epoch": 24.03, + "learning_rate": 2.5019883638457973e-09, + "loss": 0.9199, + "step": 49630 + }, + { + "epoch": 24.03, + "learning_rate": 2.368580375250451e-09, + "loss": 0.2866, + "step": 49640 + }, + { + "epoch": 24.03, + "learning_rate": 2.2388266411266234e-09, + "loss": 0.4129, + "step": 49650 + }, + { + "epoch": 24.03, + "learning_rate": 2.1127272247145614e-09, + "loss": 0.718, + "step": 49660 + }, + { + "epoch": 24.03, + "learning_rate": 1.9902821874742684e-09, + "loss": 0.339, + "step": 49670 + }, + { + "epoch": 24.03, + "learning_rate": 1.8714915890838404e-09, + "loss": 0.6156, + "step": 49680 + }, + { + "epoch": 24.03, + "learning_rate": 1.7563554874402975e-09, + "loss": 0.3388, + "step": 49690 + }, + { + "epoch": 24.03, + "learning_rate": 1.644873938658753e-09, + "loss": 0.5114, + "step": 49700 + }, + { + "epoch": 24.03, + "learning_rate": 1.537046997074909e-09, + "loss": 0.8165, + "step": 49710 + }, + { + "epoch": 24.03, + "learning_rate": 1.4328747152417277e-09, + "loss": 0.5025, + "step": 49720 + }, + { + "epoch": 24.03, + "learning_rate": 1.332357143932761e-09, + "loss": 0.2093, + "step": 49730 + }, + { + "epoch": 24.03, + "learning_rate": 1.2354943321371548e-09, + "loss": 0.6465, + "step": 49740 + }, + { + "epoch": 24.04, + "learning_rate": 1.1422863270654781e-09, + "loss": 0.7074, + "step": 49750 + }, + { + "epoch": 24.04, + "learning_rate": 1.0527331741472247e-09, + "loss": 0.3695, + "step": 49760 + }, + { + "epoch": 24.04, + "learning_rate": 9.668349170274814e-10, + "loss": 0.6652, + "step": 49770 + }, + { + "epoch": 24.04, + "learning_rate": 8.845915975735919e-10, + "loss": 0.4753, + "step": 49780 + }, + { + "epoch": 24.04, + "learning_rate": 8.060032558693253e-10, + "loss": 0.3091, + "step": 49790 + }, + { + "epoch": 24.04, + "learning_rate": 7.310699302182089e-10, + "loss": 0.4409, + "step": 49800 + }, + { + "epoch": 24.04, + "learning_rate": 6.597916571418617e-10, + "loss": 0.2497, + "step": 49810 + }, + { + "epoch": 24.04, + "learning_rate": 5.92168471379162e-10, + "loss": 0.5315, + "step": 49820 + }, + { + "epoch": 24.04, + "learning_rate": 5.282004058895784e-10, + "loss": 0.5753, + "step": 49830 + }, + { + "epoch": 24.04, + "learning_rate": 4.678874918515041e-10, + "loss": 0.5933, + "step": 49840 + }, + { + "epoch": 24.04, + "learning_rate": 4.112297586589264e-10, + "loss": 0.6205, + "step": 49850 + }, + { + "epoch": 24.04, + "learning_rate": 3.582272339272552e-10, + "loss": 0.3334, + "step": 49860 + }, + { + "epoch": 24.04, + "learning_rate": 3.088799434891598e-10, + "loss": 0.5659, + "step": 49870 + }, + { + "epoch": 24.04, + "learning_rate": 2.631879113954017e-10, + "loss": 0.6555, + "step": 49880 + }, + { + "epoch": 24.04, + "learning_rate": 2.2115115991566682e-10, + "loss": 0.7503, + "step": 49890 + }, + { + "epoch": 24.04, + "learning_rate": 1.8276970953939875e-10, + "loss": 0.4174, + "step": 49900 + }, + { + "epoch": 24.04, + "learning_rate": 1.48043578971635e-10, + "loss": 0.234, + "step": 49910 + }, + { + "epoch": 24.04, + "learning_rate": 1.1697278513800313e-10, + "loss": 0.521, + "step": 49920 + }, + { + "epoch": 24.04, + "learning_rate": 8.955734318305542e-11, + "loss": 0.5629, + "step": 49930 + }, + { + "epoch": 24.04, + "learning_rate": 6.579726646777085e-11, + "loss": 0.6017, + "step": 49940 + }, + { + "epoch": 24.04, + "learning_rate": 4.5692566572053116e-11, + "loss": 0.6729, + "step": 49950 + }, + { + "epoch": 24.04, + "learning_rate": 2.924325329556332e-11, + "loss": 0.2276, + "step": 49960 + }, + { + "epoch": 24.04, + "learning_rate": 1.6449334655221914e-11, + "loss": 0.4924, + "step": 49970 + }, + { + "epoch": 24.04, + "learning_rate": 7.310816886874072e-12, + "loss": 0.5423, + "step": 49980 + }, + { + "epoch": 24.04, + "learning_rate": 1.8277044444570124e-12, + "loss": 0.3546, + "step": 49990 + }, + { + "epoch": 24.04, + "learning_rate": 0.0, + "loss": 0.4764, + "step": 50000 + }, + { + "epoch": 24.04, + "eval_accuracy": 0.8789473684210526, + "eval_f1": 0.8789473684210526, + "eval_loss": 0.8117492198944092, + "eval_runtime": 766.7022, + "eval_samples_per_second": 6.195, + "eval_steps_per_second": 1.549, + "step": 50000 + }, + { + "epoch": 24.04, + "step": 50000, + "total_flos": 2.49219585441792e+20, + "train_loss": 0.7113624122858048, + "train_runtime": 65187.9107, + "train_samples_per_second": 3.068, + "train_steps_per_second": 0.767 + }, + { + "epoch": 24.04, + "eval_accuracy": 0.8757894736842106, + "eval_f1": 0.8757894736842106, + "eval_loss": 0.7742094993591309, + "eval_runtime": 766.7571, + "eval_samples_per_second": 6.195, + "eval_steps_per_second": 1.549, + "step": 50000 + }, + { + "epoch": 24.04, + "eval_accuracy": 0.870116156282999, + "eval_f1": 0.870116156282999, + "eval_loss": 0.7973663210868835, + "eval_runtime": 771.4511, + "eval_samples_per_second": 6.138, + "eval_steps_per_second": 1.535, + "step": 50000 + } + ], + "logging_steps": 10, + "max_steps": 50000, + "num_train_epochs": 9223372036854775807, + "save_steps": 500, + "total_flos": 2.49219585441792e+20, + "trial_name": null, + "trial_params": null +}